@@ 0,0 1,1904 @@
+
+#
+ LOD aware terrain vectorization using dual marching cubes
+
+ VVF -> volumetric vertex field
+
+define-infix< 40 , _
+
+using import glm
+using import glsl
+using import itertools
+using import Array
+using import Box
+using import struct
+using import Option
+import ..lib.tukan.use
+
+import tukan.voxel
+let voxel = tukan.voxel
+
+using import tukan.gl
+using import tukan.bitmap
+using import tukan.packing
+using import tukan.random
+using import tukan.color
+using import tukan.perfect_hash
+using import tukan.raytrace
+using import tukan.sdf
+using import tukan.sdl
+using import tukan.rotation
+using import tukan.brdf
+using import tukan.normal
+using import tukan.noise
+using import tukan.projection
+using import tukan.derivative
+using import tukan.isosurface
+using import tukan.hash
+using import tukan.spherical
+using import tukan.ResourceGroup
+using import tukan.logcell
+using import .testfragment
+
+let RG = (ResourceGroup "RG")
+from (import tukan.math) let expmix
+
+SAMPLE_CAMERA_OFFSET := false
+PROJECT_FINAL_VERTEX := false
+VISUALIZE_IDS := false
+POST_TRANSFORM := false # true is worse
+OCCLUSION_CULLING := false
+FOG := false
+USE_FLAT_SHADING := true
+BALANCE_QUADS := true
+USE_COMPLEX_SURFACE := true
+HIGH_QUALITY_FEATURES := true
+SOFT_WORLD_SAMPLING := true
+USE_CATMULL_CLARK := false
+BLOCKY_WORLD := false
+USE_VVF_PACKING := true
+AVERAGE_LOD_VERTICES := false
+
+FETCH_UV_OFFSET := 0.5
+#FETCH_UV_OFFSET := 0.0
+
+# to reach fog density D at depth Z, FOG_RATE = -log2(1 - D)/Z
+FOG_RATE := 0.02 # 50% at 100 units
+
+let MAX_VERTICES = (20 * (1 << 20))
+
+let WORLD_SIZE = (uvec3 256)
+let WORLD_SCALE = (vec3 256.0)
+let MAX_WORLD_LOD_I = 8
+let MAX_WORLD_LOD = (MAX_WORLD_LOD_I as f32)
+
+let CUBE_SIZE = (uvec3 256)
+let LOD_OFFSET = -6
+let LOD_NORMAL_OFFSET = -1
+let GROUP_SIZE = 4
+let MAX_SECTOR_LOD_I = 2
+let SECTOR_GROUP_SIZE = (1 << MAX_SECTOR_LOD_I)
+
+let SUBDIVIDE_RADIUS = 6
+#let SUBDIVIDE_RADIUS = 1
+let SUBDIVIDE_SCALE = 16
+
+MAX_CASCADE_DEPTH := 6 # MAX_WORLD_LOD_I - MAX_SECTOR_LOD_I
+#MAX_CASCADE_DEPTH := 3 # MAX_WORLD_LOD_I - MAX_SECTOR_LOD_I
+SECTOR_CAPACITY := MAX_CASCADE_DEPTH * 100000
+CASCADE_SIZE := (1 << MAX_CASCADE_DEPTH)
+CASCADE_CENTER := (CASCADE_SIZE // 2)
+
+let BINDING_BUF_SECTOR_IN = 1
+let BINDING_BUF_FACE_BRIDGE_IN = 2
+let BINDING_BUF_VERTEX_IN = 3
+let BINDING_BUF_VERTEX_OUT = 4
+let BINDING_BUF_DRAW_VOXELS_CMD = 5
+let BINDING_BUF_DISPATCH_CMD = 6
+let BINDING_IMG_ZBUFFER = 7
+let BINDING_IMG_WORLD_IN = 8
+let BINDING_IMG_WORLD_OUT = 9
+let BINDING_IMG_WORLD_INOUT = 10
+
+let UNIFORM_LEVEL = 1
+let UNIFORM_PROGRAM = 2
+let UNIFORM_MOUSE_STATE = 12
+let UNIFORM_SECTOR_OFFSET = 13
+let UNIFORM_SCREEN_SAMPLER = 4
+let UNIFORM_WORLD_SAMPLER = 7
+
+
+let LEVELS = 8
+
+let SEARCH_R = 5
+let SEARCH_Rf = (SEARCH_R as f32)
+
+let sqrt3 = (sqrt 3.0)
+
+run-stage;
+
+struct Vertex plain
+ pos : vec4
+ normal : vec4
+
+struct Vertices plain
+ count : u32
+ # each entry holds a vertex
+ entries : (array Vertex)
+
+let
+ BLEND+X = 1:u32
+ BLEND-X = 2:u32
+ BLEND+Y = 4:u32
+ BLEND-Y = 8:u32
+ BLEND+Z = 16:u32
+ BLEND-Z = 32:u32
+
+struct Sector plain
+ key : u32
+ flags : u32 # six face bits indicating where the next highest LOD level is
+
+struct Sectors plain
+ keys : (array Sector)
+
+buffer sector-in : Sectors
+ binding = BINDING_BUF_SECTOR_IN
+ \ readonly coherent
+
+buffer face-bridge-in : Sectors
+ binding = BINDING_BUF_FACE_BRIDGE_IN
+ \ readonly coherent
+
+buffer vertex-in : Vertices
+ binding = BINDING_BUF_VERTEX_IN
+ \ readonly coherent
+
+buffer vertex-out : Vertices
+ binding = BINDING_BUF_VERTEX_OUT
+ \ coherent
+
+uniform smp-screen : sampler2D
+ location = UNIFORM_SCREEN_SAMPLER
+
+uniform mouse-state : i32
+ location = UNIFORM_MOUSE_STATE
+
+uniform sector-offset : u32
+ location = UNIFORM_SECTOR_OFFSET
+
+fn simple-sphere (p)
+ (length p) - 0.5
+
+fn nine-spheres (p)
+ let x = (deref shglobals.time)
+ r := (mix 0.1 0.2 ((sin x) * 0.5 + 0.5))
+ d := (sqrt 0.5) * 0.4
+ sdSmoothAnd
+ sdSmoothOr
+ (length p) - 0.5
+ min
+ (length (p - (vec3 d d d))) - r
+ (length (p - (vec3 -d d d))) - r
+ (length (p - (vec3 d -d d))) - r
+ (length (p - (vec3 -d -d d))) - r
+ 0.1
+ * -1.0
+ min
+ (length (p - (vec3 d d -d))) - r
+ (length (p - (vec3 -d d -d))) - r
+ (length (p - (vec3 d -d -d))) - r
+ (length (p - (vec3 -d -d -d))) - r
+ 0.1
+
+fn twoballs (p)
+ let x = (deref shglobals.time)
+ x := ((sin x) * 0.5 + 0.5) * 0.5
+ sdSmoothOr
+ (length (p - (vec3 0.0 0 x))) - 0.25
+ (length (p - (vec3 0.0 0 -x))) - 0.25
+ 0.2
+
+fn doubletori (p)
+ let x = (deref shglobals.time)
+ let xz =
+ anglevector-rotate
+ anglevector -x
+ p.xz
+ let p1 =
+ vec3
+ xz.x
+ p.y
+ xz.y
+ let p2 =
+ vec3
+ anglevector-rotate
+ anglevector x
+ p.xy
+ p.z
+
+ 'sdSmoothOr
+ sdmDist
+ sdTorus p1.xzy (vec2 0.5 0.05)
+ sdMaterial
+ vec4 1.0 0.5 0.3 1.0
+ sdmDist
+ sdTorus p2 (vec2 0.4 0.1)
+ sdMaterial
+ vec4 0.3 0.5 1.0 1.0
+ roughness = 0.1
+ 0.2
+
+fn two-boxes (p)
+ let x = 8.1923 # (deref shglobals.time)
+ let p1 =
+ vec3
+ anglevector-rotate
+ anglevector -x
+ p.xy
+ p.z
+ let p2 =
+ vec3
+ p.x
+ anglevector-rotate
+ anglevector (x * 0.917)
+ p.yz
+ sdSmoothOr
+ sdBox p1 (vec3 0.33)
+ sdBox p2 (vec3 0.33)
+ 0.001
+
+fn two-boxes-merge (p)
+ let d =
+ vec3 0.3
+ let sz =
+ vec3 0.5
+ let x = (deref shglobals.time)
+ r := (mix 1.0 0.2 ((sin x) * 0.5 + 0.5))
+ 'sdSmoothAnd
+ 'sdSmoothOr
+ sdmDist
+ #sdSphere
+ p - (vec3 0.01)
+ 1.0
+ sdUberprim
+ p - (vec3 0.01)
+ vec4 1.0 1.0 0.05 0.05
+ vec3 0.5 0.05 0.0
+ sdMaterial
+ vec4 1.0 0.9 0.3 1.0
+ metallic = 1.0
+ 'sdSmoothOr
+ sdmDist
+ sdBox (p - d) sz
+ sdMaterial
+ vec4 1.0 0.5 0.3 1.0
+ sdmDist
+ sdBox (p + d) sz
+ sdMaterial
+ vec4 0.3 0.5 1.0 1.0
+ roughness = 0.2
+ 0.5
+ 0.05
+ sdmDist
+ -
+ sdSphere
+ p - (vec3 0.0 0.0 -0.5)
+ r
+ sdMaterial
+ vec4 0.5 0.3 1.0 1.0
+ 0.1
+
+fn one-box (p)
+ sdBox p (vec3 0.33)
+
+fn matmapf (p)
+ #p := p + 1.0
+ #p := p + (vec3 1 0 0) * shglobals.time
+ #p := (sdDomainRep p 2.0)
+ #do
+ p := p.yzx * 2.0
+ (two-boxes-merge p) * 0.5
+ #doubletori p
+ #sdmDist
+ simple-sphere p
+ sdMaterial
+ vec4 0.5 0.3 1.0 1.0
+ #scale := 20.0
+ sdmDist
+ static-if 0
+ scale := 100.0
+ (two-boxes (p / scale)) * scale
+ elseif 0
+ scale := 100.0
+ (nine-spheres (p / scale)) * scale
+ elseif 0
+ #p := p * 0.2
+ let d =
+ static-if USE_COMPLEX_SURFACE
+ ((triquad-noise3 p) * 2.0 - 1.0) - 0.05
+ else -inf
+
+ #let d =
+ min d
+ ((triquad-noise3 (p / (vec3 100 2.0 2.0))) * 2.0 - 1.0) * 100.0 - 10.0
+
+ R := ((length p) - 30.0)
+ let d =
+ max
+ max
+ (length p) - 200.0
+ d
+ -R
+ #d * scale
+ d
+ else
+ scale := 300.0
+ DD := (doubletori (p / scale)) as f32 * scale
+ S := (length p)
+ Z := (length p.xy)
+ #p := p * 0.01
+ p := p * 0.2
+ local d = 0.0
+ for i in (range 5)
+ s := (exp2 (i as f32))
+ d += ((triquad-noise3 (p * s)) * 2.0 - 1.0) / s
+ min
+ #do
+ S - 10.0
+ static-if USE_COMPLEX_SURFACE
+ +
+ (abs DD) - 10.0
+
+ (deref d) * 40.0
+ else
+ (abs DD) - 10.0
+
+
+ #Z - 30.0
+ #- (Z - 20.0)
+ sdMaterial
+ vec4 0.5 0.3 1.0 1.0
+
+# single sphere:
+ 256^3: 89240 cells (best: 0.3ms)
+ 1024^3: 1427240 cells (best: 3.5ms)
+# min: 104567 at 256^3
+fn mapf (p)
+ #let x = ((radians (deref shglobals.time)) * 10.0)
+ #let xz =
+ anglevector-rotate
+ anglevector -x
+ p.xz
+ #let p =
+ vec3
+ xz.x
+ p.y
+ xz.y
+ #simple-sphere p
+ #twoballs p
+ #doubletori p
+ #nine-spheres p
+ #two-boxes p
+ #one-box p
+ (matmapf p) as f32
+
+let orig_mapf = mapf
+
+fn normalmapf (p r)
+ - (sdNormalFast mapf p r)
+
+let ONION_NEAR = 0.6
+let ONION_FAR = 100.0
+let ONION_LAYERS = 32.0
+
+fn map_onion_radius (p)
+ #r := (clamp ((p.z * 0.5 + 0.5) * 0.5 + 0.5) 0.0 1.0)
+ r := (clamp (p.z * 0.5 + 0.5) 0.0 1.0)
+ #r := (p.z * 0.5 + 0.5) * ONION_LAYERS
+
+ # roughly square layers
+ embed
+ Z := CUBE_SIZE.z as f32
+ r := ONION_NEAR * ((1 + 2 * (sqrt pi) / (Z * 0.5)) ** (r * Z))
+
+ # exponential interpolation
+ #embed
+ r := (expmix ONION_NEAR ONION_FAR r 0.5)
+
+ # infinite perspective projection
+ #embed
+ r := r * 0.9999
+ k := 0.01
+ r := r / (k * (1.0 - r))
+
+ p := (unpack_normal_snorm p.xy)
+ #p :=
+ spherical-surface
+ L1-spherical
+ octahedral-surface
+ tile-guyou ((p.xy - 1.0) / 2.0)
+
+ _ (p * r) r
+
+fn map_onion (p)
+ let p r = (map_onion_radius p)
+ p
+
+fn map_identity (p) p
+fn map_identity_radius (p) (_ p 1.0)
+
+#let map_vertex map_vertex_rlimit = map_onion map_onion_radius
+let map_vertex map_vertex_rlimit = map_identity map_identity_radius
+
+inline map-translation (tpos)
+ static-if SAMPLE_CAMERA_OFFSET
+ tpos + (shglobals.view-inverse @ 3) . xyz
+ else tpos
+
+
+fn calc-projection ()
+ let aspect = (vec2 (/ (deref shglobals.aspect)) 1.0)
+ 'ifp-perspective ProjectionSetup aspect 0.1
+
+inline transform-dist (p)
+ (mat3 shglobals.view) * p
+
+inline transform-invert-dist (p)
+ (mat3 shglobals.view-inverse) * p
+
+inline transform-invert-pos (p)
+ v := (deref shglobals.view-inverse) * (vec4 p 1)
+ v.xyz
+
+inline transform-pos (p)
+ v := (deref shglobals.view) * (vec4 p 1)
+ v.xyz
+
+fn gradient-root (v0 v1 dir)
+ c0 := v0.w
+ c1 := v1.w
+ g0 := (dot v0.xyz dir) * 0.01
+ g1 := (dot v1.xyz dir) * 0.01
+ d := 0.5 / (g1 - g0)
+ c := d * (c0 - c1 - g0 + g1)
+ w := d * (sqrt (c0 * c0 - 2.0 * c0 * c1 + c1 * c1 + 2.0 * (c0 + c1) * g0 + g0 * g0 - 2.0 * (c0 + c1 + g0) * g1 + g1 * g1))
+ q0 := (c - w) * 2.0 - 1.0
+ q1 := (c + w) * 2.0 - 1.0
+ ? ((abs q0) < (abs q1)) q0 q1
+
+fn triangle-area (A B C)
+ (length (cross (B - A) (C - A))) / 2.0
+
+fn trimix (v p)
+ # corner weights of cube in [-1..1]³ domain
+ let c000 c001 c010 c011 c100 c101 c110 c111 =
+ va-map
+ inline (i) (copy (v @ i))
+ va-range 8
+ mix
+ mix
+ mix c000 c001 p.x
+ mix c010 c011 p.x
+ p.y
+ mix
+ mix c100 c101 p.x
+ mix c110 c111 p.x
+ p.y
+ p.z
+
+fn tet-feature-vertex-normal2 (cd tetidx)
+ let tetverts = 0x6cc99:u32
+ k := tetidx * 3:u32
+ k1 := (tetverts >> k) & 7:u32
+ k3 := (tetverts >> (k + 3:u32)) & 7:u32
+
+ let idxs = (ivec4 0 k1 7 k3)
+
+ local p : (array vec3 4)
+ p @ 0 = (vec3 -1 -1 -1)
+ p @ 1 = (vec3 (k1 & 1) ((k1 >> 1) & 1) ((k1 >> 2) & 1)) * 2 - 1
+ p @ 2 = (vec3 1 1 1)
+ p @ 3 = (vec3 (k3 & 1) ((k3 >> 1) & 1) ((k3 >> 2) & 1)) * 2 - 1
+ let d = (vec4 (cd @ 0) (cd @ k1) (cd @ 7) (cd @ k3))
+
+ inline tf (i0 i1)
+ mix (p @ i0) (p @ i1) (tetlerp (d @ i0) (d @ i1))
+
+ let c i = (tetfaces d)
+ switch c
+ case 1:u32
+ p0 := (tf i.x i.y)
+ p1 := (tf i.x i.z)
+ p2 := (tf i.x i.w)
+ (p0 + p1 + p2) / 3
+ case 2:u32
+ p0 := (tf i.x i.z)
+ p1 := (tf i.x i.w)
+ p2 := (tf i.y i.w)
+ p3 := (tf i.y i.z)
+ (p0 + p1 + p2 + p3) / 4
+ default
+ (+ (p @ 0) (p @ 1) (p @ 2) (p @ 3)) / 4
+
+fn tet-feature-vertex-normal (cd)
+ local outv = (vec4 0)
+ for tetidx in (range 6:u32)
+ let tetverts = 0x6cc99:u32
+ k := tetidx * 3:u32
+ k1 := (tetverts >> k) & 7:u32
+ k3 := (tetverts >> (k + 3:u32)) & 7:u32
+
+ let idxs = (ivec4 0 k1 7 k3)
+
+ local p : (array vec3 4)
+ p @ 0 = (vec3 -1 -1 -1)
+ p @ 1 = (vec3 (k1 & 1) ((k1 >> 1) & 1) ((k1 >> 2) & 1)) * 2 - 1
+ p @ 2 = (vec3 1 1 1)
+ p @ 3 = (vec3 (k3 & 1) ((k3 >> 1) & 1) ((k3 >> 2) & 1)) * 2 - 1
+ let d = (vec4 (cd @ 0) (cd @ k1) (cd @ 7) (cd @ k3))
+
+ let c i = (tetfaces d)
+ if (c == 0:u32)
+ continue;
+ let vc = (c * 3)
+ inline tf (i0 i1)
+ mix (p @ i0) (p @ i1) (tetlerp (d @ i0) (d @ i1))
+
+ fn triangle-area (A B C)
+ # removed factor 1/2
+ length (cross (B - A) (C - A))
+
+ let pc A =
+ if (c == 1:u32)
+ p0 := (tf i.x i.y)
+ p1 := (tf i.x i.z)
+ p2 := (tf i.x i.w)
+ pc := ((p0 + p1 + p2) / 3)
+ A := (triangle-area p0 p1 p2)
+ _ pc A
+ else # elseif (c == 2:u32)
+ p0 := (tf i.x i.z)
+ p1 := (tf i.x i.w)
+ p2 := (tf i.y i.w)
+ p3 := (tf i.y i.z)
+ pc := ((p0 + p1 + p2 + p3) / 4)
+ A :=
+ +
+ (triangle-area p0 p1 p2)
+ (triangle-area p0 p3 p2)
+ _ pc A
+ outv += (vec4 pc 1) * (max 1e-5 A)
+ ? (outv.w == 0) (vec3 0) (outv.xyz / outv.w)
+
+fn feature-vertex-normal (v)
+ let c000 c001 c010 c011 c100 c101 c110 c111 =
+ va-map
+ inline (i) (copy (v @ i))
+ va-range 8
+ # unsigned body diagonal vectors
+ n00 := (vec3 1 1 1)
+ n01 := (vec3 -1 1 1)
+ n10 := (vec3 1 -1 1)
+ n11 := (vec3 -1 -1 1)
+ # sign flipped differences along body diagonal vectors
+ g00 := (c000 - c111)
+ g01 := (c001 - c110)
+ g10 := (c010 - c101)
+ g11 := (c011 - c100)
+ # normal vector (gradient)
+ g :=
+ do #normalize
+ +
+ g00 * n00
+ g01 * n01
+ g10 * n10
+ g11 * n11
+
+ # corner weights of cube in [-1..1]³ domain
+ let fv =
+ static-if HIGH_QUALITY_FEATURES
+ tet-feature-vertex-normal v
+ elseif 0
+ #embed
+ # position of zero in interval [-1..1] (inverse lerp)
+ d00 := (gradient-root c000 c111 n00)
+ d01 := (gradient-root c001 c110 n01)
+ d10 := (gradient-root c010 c101 n10)
+ d11 := (gradient-root c011 c100 n11)
+ embed
+ # position of zero in interval [-1..1] (inverse lerp)
+ d00 := (c000 + c111) / g00
+ d01 := (c001 + c110) / g01
+ d10 := (c010 + c101) / g10
+ d11 := (c011 + c100) / g11
+
+ # projected points
+ v00 := d00 * n00
+ v01 := d01 * n01
+ v10 := d10 * n10
+ v11 := d11 * n11
+
+ let verts... =
+ ? ((abs d00) <= 1.0)
+ vec4 v00 1
+ vec4 0
+ ? ((abs d01) <= 1.0)
+ vec4 v01 1
+ vec4 0
+ ? ((abs d10) <= 1.0)
+ vec4 v10 1
+ vec4 0
+ ? ((abs d11) <= 1.0)
+ vec4 v11 1
+ vec4 0
+
+ d := (+ verts... )
+
+ # feature vertex
+ fv := (d.xyz / (max 1.0 d.w))
+
+ fv
+ else
+ inline mapf (p)
+ p := p * 0.5 + 0.5
+ mix
+ mix
+ mix c000 c001 p.x
+ mix c010 c011 p.x
+ p.y
+ mix
+ mix c100 c101 p.x
+ mix c110 c111 p.x
+ p.y
+ p.z
+ inline grad (p)
+ sdNormalFast mapf p
+ local p = (vec3 0)
+ for i in (range 20)
+ d := (mapf p)
+ g := (grad p)
+ p = (p - d * g)
+ ;
+ deref p
+ #else
+ let c000 c001 c010 c011 c100 c101 c110 c111 =
+ va-map
+ inline (i) (copy ((v @ i) . w))
+ va-range 8
+ # isoplane point
+ d := (+ c000 c001 c010 c011 c100 c101 c110 c111) / 8
+ g :=
+ /
+ vec3
+ (+ c001 c101 c011 c111) - (+ c000 c100 c010 c110)
+ (+ c010 c011 c110 c111) - (+ c000 c001 c100 c101)
+ (+ c100 c110 c101 c111) - (+ c000 c010 c001 c011)
+ 8
+ l := (length g)
+ g := g / l
+ d := d / l
+ k := -d / (dot g g)
+ k * g
+ _ fv g
+
+#do
+ let k0 = 0
+ let k1 = 1
+ local ww =
+ arrayof f32
+ \ k1 k0 k1 k1
+ \ k1 k1 k1 k1
+ print
+ feature-plane ww
+
+ #print
+ GL.MAX_COMPUTE_SHARED_MEMORY_SIZE
+
+ if true
+ exit 0
+#run-stage;
+
+#embed
+ let WORLD_PIXELFMT = GL.R32F
+ let WORLD_IMAGETYPE = (image3D r32f)
+#embed
+ let WORLD_PIXELFMT = GL.R16F
+ let WORLD_IMAGETYPE = (image3D r16f)
+#embed
+ let WORLD_PIXELFMT = GL.R8
+ let WORLD_IMAGETYPE = (image3D r8)
+embed
+ let WORLD_PIXELFMT = GL.RGBA32UI
+ let WORLD_IMAGETYPE = (uimage3D rgba32ui)
+
+uniform world-in : WORLD_IMAGETYPE
+ binding = BINDING_IMG_WORLD_IN
+ \ coherent readonly restrict
+
+uniform world-out : WORLD_IMAGETYPE
+ binding = BINDING_IMG_WORLD_OUT
+ \ coherent writeonly restrict
+
+uniform world-inout : WORLD_IMAGETYPE
+ binding = BINDING_IMG_WORLD_INOUT
+ \ coherent restrict
+
+uniform smp-world : usampler3D
+ location = UNIFORM_WORLD_SAMPLER
+
+fn pack-vvf (vertex cflags)
+ static-if USE_VVF_PACKING
+ uvec4
+ (packSnorm4x8 (vec4 vertex 0)) | (cflags << 24:u32)
+ \ 0 0 0
+ else
+ uvec4
+ bitcast vertex.x u32
+ bitcast vertex.y u32
+ bitcast vertex.z u32
+ cflags
+
+fn unpack-vvf (data)
+ static-if USE_VVF_PACKING
+ _
+ (unpackSnorm4x8 data.x) . xyz as vec-type
+ data.x >> 24:u32
+ else
+ _
+ vec3
+ bitcast data.x f32
+ bitcast data.y f32
+ bitcast data.z f32
+ copy data.w
+
+inline sample-field (ipos mapf)
+ rd := (2.0 / (vec3 WORLD_SIZE))
+ fpos := (vec3 ipos) * rd - 1.0
+ pos := (fpos + 0.5 * rd) * WORLD_SCALE
+
+ local cd : (array f32 8)
+ local mind = inf
+ local maxd = -inf
+ for i in (range 8)
+ let x y z = (i & 1) ((i >> 1) & 1) ((i >> 2) & 1)
+ pos := (fpos + (vec3 x y z) * rd) * WORLD_SCALE
+ let d = (mapf pos)
+ cd @ i = d
+ mind = (min mind d)
+ maxd = (max maxd d)
+ let fv = (feature-vertex-normal cd)
+ _ fv
+ |
+ ? ((cd @ 0) < 0) 1:u32 0:u32
+ ? (mind * maxd <= 0) 2:u32 0:u32
+
+fn generate-world ()
+ local_size GROUP_SIZE GROUP_SIZE GROUP_SIZE
+ ipos := (uvec3 gl_GlobalInvocationID.xyz)
+ if (any? (ipos >= WORLD_SIZE))
+ return;
+ rd := (2.0 / (vec3 WORLD_SIZE))
+ inline samplef (pos)
+ static-if 0
+ mapf pos
+ else
+ local v = 0.0
+ N := 2
+ N:u32 := N as u32
+ for x y z in (dim N N N)
+ d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0
+ d := d *
+ do
+ static-if SOFT_WORLD_SAMPLING 3.0
+ else 1.0
+ lpos := pos + d * (0.25 * rd * WORLD_SCALE)
+ #d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0
+ #lpos := pos + (d * 3.0 * rd) * WORLD_SCALE
+ s := (mapf lpos)
+ #w := (/ (+ (abs d.x) (abs d.y) (abs d.z)))
+ v += s
+ (copy v) / (N * N * N)
+ imageStore world-out ipos (pack-vvf (sample-field ipos samplef))
+#
+ rd := (2.0 / (vec3 WORLD_SIZE))
+ fpos := (vec3 ipos) * rd - 1.0
+ pos := (fpos + 0.5 * rd) * WORLD_SCALE
+
+ local cd : (array f32 8)
+ local mind = inf
+ local maxd = -inf
+ for i in (range 8)
+ let x y z = (i & 1) ((i >> 1) & 1) ((i >> 2) & 1)
+ pos := (fpos + (vec3 x y z) * rd) * WORLD_SCALE
+ let d =
+ static-if 0
+ mapf pos
+ elseif 1
+ local v = 0.0
+ N := 2
+ N:u32 := N as u32
+ for x y z in (dim N N N)
+ d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0
+ d := d *
+ do
+ static-if SOFT_WORLD_SAMPLING 3.0
+ else 1.0
+ lpos := pos + d * (0.25 * rd * WORLD_SCALE)
+ #d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0
+ #lpos := pos + (d * 3.0 * rd) * WORLD_SCALE
+ s := (mapf lpos)
+ #w := (/ (+ (abs d.x) (abs d.y) (abs d.z)))
+ v += s
+ (copy v) / (N * N * N)
+ elseif 0
+ local v = (vec4 0)
+ N := 8
+ N:u32 := N as u32
+ for x y z in (dim N N N)
+ d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0
+ #d := d * 16.0
+ d := d * 3.0
+ lpos := pos + d * (0.5 * rd * WORLD_SCALE)
+ #d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0
+ #lpos := pos + (d * 3.0 * rd) * WORLD_SCALE
+ s := (- (sign (mapf lpos)))
+ w := (/ (+ (abs d.x) (abs d.y) (abs d.z)))
+ v += s * w * (vec4 d 1)
+ ((copy v.w) + 1.0) / 2.0
+ else
+ grad := (normalmapf pos (rd * 0.5))
+ local bits = 0
+ N := 8
+ N:u32 := N as u32
+ for x y z in (dim N N N)
+ lpos := (fpos + ((vec3 x y z) / N) * rd) * WORLD_SCALE
+ #d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0
+ #lpos := pos + (d * 3.0 * rd) * WORLD_SCALE
+ if ((mapf lpos) <= (rd.x / N))
+ bits += 1
+ bits as f32 / (N * N * N)
+ cd @ i = d
+ mind = (min mind d)
+ maxd = (max maxd d)
+ ;
+ let fv = (feature-vertex-normal cd)
+ data :=
+ pack-vvf fv
+ |
+ ? ((cd @ 0) < 0) 1:u32 0:u32
+ ? (mind * maxd <= 0) 2:u32 0:u32
+ imageStore world-out ipos data
+ ;
+
+inline vertex-valid? (smp pos)
+ local flags = 0:u32
+ for i in (range 8)
+ pos := pos + (ivec3 (i & 1) ((i >> 1) & 1) ((i >> 2) & 1))
+ let vx cf = (unpack-vvf (imageLoad smp pos))
+ flags |= (cf & 1) << i as u32
+ (flags != 0:u32) & (flags != 255:u32)
+
+fn update-world ()
+ local_size GROUP_SIZE GROUP_SIZE GROUP_SIZE
+ ipos := (uvec3 gl_GlobalInvocationID.xyz)
+ if (any? (ipos >= WORLD_SIZE))
+ return;
+ inline samplef (pos)
+ origin := pos - 2.0 * (shglobals.view-inverse @ 3) . xyz
+ origin := origin - (shglobals.view-inverse @ 2) . xyz * 40.0
+ (length origin) - 40.0
+ let fv cfbrush = (sample-field ipos samplef)
+ let vx cf = (unpack-vvf (imageLoad world-inout ipos))
+
+ brush-vertex-valid? := ((cfbrush & 2) != 0)
+
+ local svx = (vec4 0)
+ ipos := (ivec3 ipos)
+ svx := (? (vertex-valid? world-inout ipos) vx fv)
+ #svx += (vec4 fv 1) * (? brush-vertex-valid? 0.1 0.0)
+ #svx += (vec4 vx 1) * (? (vertex-valid? world-inout ipos) 1.0 0.0)
+ #svx := (? (svx.w == 0) (vec3 0) (svx.xyz / svx.w))
+
+ #let fv = vx
+ let fv cf =
+ if (mouse-state & 4)
+ #sdSmoothOr d brush 5.0
+ _ svx
+ |
+ (cf | cfbrush) & 1
+ (cf | cfbrush) & 2
+ elseif (mouse-state & 1)
+ #sdSmoothAnd d -brush 2.0
+ _ svx
+ |
+ (cf & (cfbrush ^ 1)) & 1
+ (cf | cfbrush) & 2
+ else
+ return;
+
+ imageStore world-inout ipos (pack-vvf fv cf)
+ ;
+
+#uniform lodlevel : i32
+
+#vvv print
+#fold (w = 0.0) for x y z in (dim 3 3 3)
+ d := 3
+ p := (ivec3 x y z) - 1
+ #ap := (ivec3 (abs p.x) (abs p.y) (abs p.z))
+ ap := (vec3 (abs p.x) (abs p.y) (abs p.z))
+ #w1 := (exp2 (- (ap.x + ap.y + ap.z)))
+ w1 := (exp2 (- (length (vec3 p))))
+ print (x - 1) (y - 1) (z - 1) (w1 / 10.910761)
+ w + w1
+
+
+#do
+ vvv print
+ fold (w = 0.0) for x y z in (dim 4 4 4)
+ # blur kernel 4x4x4
+ # total sum of weights is S = 2 ** (3*d)
+ # w = (3 ** d) / (3 ** (abs(p.x) + abs(p.y) + abs(p.z))) / S
+ # exp2 ((log2 3.0) * (d - ((abs p.x) + (abs p.y) + (abs p.z))) - (log2 2.0) * 3 * d)
+ d := 3
+ ofs := (ivec3 x y z)
+ p := (vec3 ((ivec3 x y z) >> 1))
+ # simulate adding 8 samples
+ w +
+ exp2 ((log2 3.0) * (3 - (p.x + p.y + p.z)) - (log2 2.0) * 9)
+
+
+#if true
+ exit 0
+#run-stage;
+
+inline windowed-fetch (src ipos)
+ isize := (imageSize src)
+ iz := (ivec3 0)
+ fold (v = (vec4 0.0)) for x y z in (dim 3 3 3)
+ p := (ivec3 x y z) - 1
+ ap := (ivec3 (abs p.x) (abs p.y) (abs p.z))
+ value := (imageLoad src ipos)
+ outofbounds? := (any? ((ipos < iz) | (ipos >= isize)))
+ + v
+ * value
+ ? outofbounds? 0.0
+ / (1 << (3 + ap.x + ap.y + ap.z))
+
+fn generate-world-lod ()
+ local_size GROUP_SIZE GROUP_SIZE GROUP_SIZE
+ opos := (ivec3 gl_GlobalInvocationID.xyz)
+ if (any? (opos >= (imageSize world-out)))
+ return;
+ isize := (imageSize world-in)
+ iz := (ivec3 0)
+ ibpos := opos << 1
+
+ local vertex =
+ do
+ static-if AVERAGE_LOD_VERTICES
+ vec4 0
+ else
+ vec3 0
+ local bestdist = inf
+ local failed = true
+ for x y z in (dim 2 2 2)
+ ipos := ibpos + (ivec3 x y z)
+ let vx cf = (unpack-vvf (imageLoad world-in ipos))
+ vx := (0.5 * vx + (vec3 x y z) - 0.5)
+ L := (dot vx vx)
+ #if (((cf & 2) != 0) & (L < bestdist))
+ static-if AVERAGE_LOD_VERTICES
+ if (vertex-valid? world-in ipos)
+ vertex += (vec4 vx 1) * (1 / L)
+ failed = false
+ else
+ if ((vertex-valid? world-in ipos) & (L < bestdist))
+ vertex = vx
+ bestdist = L
+ failed = false
+ vx :=
+ ? failed
+ vec3 0
+ static-if AVERAGE_LOD_VERTICES
+ vertex.xyz / vertex.w
+ else
+ copy vertex
+ inline fetch (pos)
+ let __ cf = (unpack-vvf (imageLoad world-in (ibpos + pos * 2)))
+ cf & 1
+ let w =
+ +
+ 2 * (fetch (ivec3 0))
+ (fetch (ivec3 1 0 0))
+ (fetch (ivec3 -1 0 0))
+ (fetch (ivec3 0 1 0))
+ (fetch (ivec3 0 -1 0))
+ (fetch (ivec3 0 0 1))
+ (fetch (ivec3 0 0 -1))
+
+ let cf =
+ |
+ #(? (w / 8 < 0.38) 0:u32 1:u32)
+ |
+ (fetch (ivec3 0))
+ (fetch (ivec3 1 0 0)) & (fetch (ivec3 -1 0 0))
+ (fetch (ivec3 0 1 0)) & (fetch (ivec3 0 -1 0))
+ (fetch (ivec3 0 0 1)) & (fetch (ivec3 0 0 -1))
+ ? failed 0:u32 2:u32
+
+ #let __ cf = (unpack-vvf ((imageLoad world-in ibpos) . r))
+ #let cf = (? (bcount < 3) 0:u32 1:u32)
+ imageStore world-out opos (pack-vvf vx cf)
+ ;
+
+#inline mapf (p lod)
+#
+ z := 0.5 * (exp2 (-lod * 1.0))
+ let d =
+ (textureLod smp-world ((p / WORLD_SCALE) * 0.5 + 0.5) lod) . r
+ #d := (textureLod smp-world ((p / WORLD_SCALE) * 0.5 + 0.5) 2) . r
+ slimit := (min d (1.0 - d))
+ s := 1.0
+ #min
+ (clamp gx -slimit slimit) / gx
+ (clamp gy -slimit slimit) / gy
+ (clamp gz -slimit slimit) / gz
+ (z - d)
+
+#inline matmapf (p lod)
+#
+ sdmDist (mapf p lod)
+ sdMaterial
+ vec4 0.5 0.3 1.0 1.0
+
+fn normalmapf (p lod)
+ r := (1.0 / WORLD_SIZE.x) * WORLD_SCALE
+ -
+ sdNormalFast
+ inline (p)
+ mapf p (max 0.0 (lod + LOD_NORMAL_OFFSET))
+ \ p r
+
+let SECTOR_SAMPLE_SIZE = (SECTOR_GROUP_SIZE + 2)
+let SECTOR_SAMPLE_VOLUME = (SECTOR_SAMPLE_SIZE ** 3)
+let SECTOR_GROUP_VOLUME = (SECTOR_GROUP_SIZE ** 3)
+
+dump "shared memory requirements"
+ +
+ (sizeof u32) * SECTOR_SAMPLE_VOLUME
+ (sizeof vec3) * SECTOR_SAMPLE_VOLUME
+ (sizeof vec3) * SECTOR_SAMPLE_VOLUME
+
+shared cell-corner-flags : (array u32 SECTOR_SAMPLE_VOLUME)
+shared cell-vertex : (array vec3 SECTOR_SAMPLE_VOLUME)
+shared cell-normal : (array vec3 SECTOR_SAMPLE_VOLUME)
+
+#fn id2index (id)
+ #id.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + id.y * gl_WorkGroupSize.x + id.x
+ id.z * (SECTOR_GROUP_SIZE * SECTOR_GROUP_SIZE) + id.y * SECTOR_GROUP_SIZE + id.x
+
+fn index2id (idx)
+ x := idx % SECTOR_GROUP_SIZE
+ idx := idx // SECTOR_GROUP_SIZE
+ y := idx % SECTOR_GROUP_SIZE
+ z := idx // SECTOR_GROUP_SIZE
+ ivec3 x y z
+
+fn id2svindex (id)
+ id := id + 1
+ id.z * (SECTOR_SAMPLE_SIZE * SECTOR_SAMPLE_SIZE) + id.y * SECTOR_SAMPLE_SIZE + id.x
+
+fn svindex2id (idx)
+ x := idx % SECTOR_SAMPLE_SIZE
+ idx := idx // SECTOR_SAMPLE_SIZE
+ y := idx % SECTOR_SAMPLE_SIZE
+ z := idx // SECTOR_SAMPLE_SIZE
+ (ivec3 x y z) - 1
+
+NATIVE_LANE_WIDTH := 64
+
+fn normal (v1 v2 v3)
+ cross
+ normalize
+ (v3 - v1) . xyz
+ normalize
+ (v2 - v1) . xyz
+
+inline swapnormal (v n)
+ Vertex v.pos (vec4 n 0)
+
+fn generate-quad (v00 v01 v10 v11)
+ let v00 v10 v11 v01 =
+ static-if BALANCE_QUADS
+ let du = (v11.pos.xyz - v00.pos.xyz)
+ let dv = (v10.pos.xyz - v01.pos.xyz)
+ if ((dot du du) < (dot dv dv))
+ _ v00 v10 v11 v01
+ else
+ _ v10 v11 v01 v00
+ else
+ _ v00 v10 v11 v01
+
+ # generate quad
+ let ofs = (atomicAdd vertex-out.count 6)
+ entries := vertex-out.entries
+ static-if USE_FLAT_SHADING
+ n0 := (normal v00.pos v10.pos v11.pos)
+ n1 := (normal v11.pos v01.pos v00.pos)
+ #n1 := (normal v00.pos v11.pos v01.pos)
+
+ entries @ (ofs + 0) = (swapnormal v00 n0)
+ entries @ (ofs + 1) = (swapnormal v10 n0)
+ entries @ (ofs + 2) = (swapnormal v11 n0)
+ entries @ (ofs + 3) = (swapnormal v11 n1)
+ entries @ (ofs + 4) = (swapnormal v01 n1)
+ entries @ (ofs + 5) = (swapnormal v00 n1)
+ else
+ entries @ (ofs + 0) = v00
+ entries @ (ofs + 1) = v10
+ entries @ (ofs + 2) = v11
+ entries @ (ofs + 3) = v11
+ entries @ (ofs + 4) = v01
+ entries @ (ofs + 5) = v00
+ ;
+
+fn generate-cell-verts ()
+ local_size NATIVE_LANE_WIDTH 1 1
+ sector := (copy (sector-in.keys @ (gl_WorkGroupID.x + sector-offset)))
+ sector-flags := (copy sector.flags)
+ let lvl sectorpos... = (decode-cell sector.key)
+ #let lvl sectorpos... = (decode-cell 1:u32)
+ lsectorpos := (ivec3 sectorpos...)
+ sectorlod := (MAX_CASCADE_DEPTH - lvl)
+ coord := (ivec4 lsectorpos sectorlod)
+
+ sector-scale := (f32 (1 << sectorlod)) / CASCADE_SIZE
+ sector-origin := (vec3 lsectorpos) * sector-scale - 0.5
+ sector-origin := sector-origin * WORLD_SCALE
+ sector-scale := sector-scale * WORLD_SCALE
+ lod := MAX_WORLD_LOD_I - (lvl as i32)
+
+ lane-idx := (copy gl_LocalInvocationIndex)
+ sectorpos := (lsectorpos << MAX_SECTOR_LOD_I)
+
+ #if
+ for x y z in (dim 3 3 3)
+ w := (texelFetch smp-world (lsectorpos + (ivec3 x y z) - 1) (sectorlod as i32)) . r
+ if (w != 0.0)
+ break false
+ else true
+ return;
+
+ SECTOR_SAMPLE_PASSES := (SECTOR_SAMPLE_VOLUME + NATIVE_LANE_WIDTH - 1) // NATIVE_LANE_WIDTH
+
+ dump "SECTOR_SAMPLE_PASSES" SECTOR_SAMPLE_PASSES
+
+ lod := (lod - MAX_SECTOR_LOD_I)
+ for i in (range SECTOR_SAMPLE_PASSES)
+ # index in shared buffers
+ idx := (i * NATIVE_LANE_WIDTH) as u32 + lane-idx
+ if (idx >= SECTOR_SAMPLE_VOLUME)
+ break;
+ lpos := (svindex2id idx)
+ #idx := (id2index lpos)
+ gpos := lpos + sectorpos
+
+ inline fetch (pos)
+ unpack-vvf (texelFetch smp-world pos lod)
+
+ let vertex cf = (fetch gpos)
+ let vertex1 =
+ unpack-vvf (texelFetch smp-world (gpos >> 1) (lod + 1))
+ vertex1 := (vertex1 - (vec3 (gpos & 1))) * 2.0 + 1.0
+ local cflags = (cf & 1)
+ for i in (range 1 8)
+ let x y z = (i & 1) ((i >> 1) & 1) ((i >> 2) & 1)
+ #d := (((x ^ y ^ z) & 1) * 2 - 1) as f32
+ wpos := gpos + (ivec3 x y z)
+ let vx cf = (fetch wpos)
+ cflags |= ((cf & 1) << (i as u32))
+ ;
+ cflags := (deref cflags)
+ cell-corner-flags @ idx = cflags
+ if ((cflags != 0) & (cflags != 0xff))
+ cell-vertex @ idx =
+ do
+ static-if BLOCKY_WORLD
+ vec3 0
+ else
+ if
+ &
+ (((shglobals.frame // 10) % 2) == 0)
+ (cf & 2) == 0
+ vec3 -10
+ elseif (sector-flags == 0:u32) vertex
+ else
+ inline blend-factor (x f+1 f-2)
+ ? ((sector-flags & (f+1 | f-2)) != 0)
+ ? ((sector-flags & f+1) != 0) x (1 - x)
+ 0.0
+ #w := (vec3 lpos) / (SECTOR_GROUP_SIZE - 1)
+ w := (step (vec3 (SECTOR_GROUP_SIZE // 2)) (vec3 lpos))
+ w :=
+ vec3
+ blend-factor w.x BLEND+X BLEND-X
+ blend-factor w.y BLEND+Y BLEND-Y
+ blend-factor w.z BLEND+Z BLEND-Z
+ mix vertex vertex1
+ max w.x w.y w.z
+
+ #cell-normal @ idx = n
+ ;
+ ;
+ barrier;
+
+ lpos := (index2id lane-idx)
+ idx := (id2svindex lpos)
+
+ cell-scale := sector-scale * (1.0 / SECTOR_GROUP_SIZE)
+ inline transform-vertex (v dpos)
+ cell-origin := sector-origin + cell-scale * (vec3 dpos)
+ cell-origin + cell-scale * (v * 0.5 + 0.5)
+
+ cflags := (copy (cell-corner-flags @ idx))
+
+ static-if USE_CATMULL_CLARK
+ inline getvertex (offset)
+ dpos := lpos + offset
+ idx := (id2svindex dpos)
+ v := (copy (cell-vertex @ idx))
+ transform-vertex v dpos
+
+ local verts : (array vec4 7)
+ local n = 0
+ va-map
+ inline (i)
+ verts @ i = (vec4 0)
+ va-range 7
+ let C00- C00+ C0-0 C0+0 C-00 C+00 C000 = (va-range 7)
+
+ let v000 = (getvertex (ivec3 0 0 0))
+ #inline check-edge (IDX ofs mask)
+ x := (cflags & mask)
+ if ((x != mask) & ((x ^ mask) != mask))
+ m := (v000 + (getvertex ofs))
+ w @ C00- = (vec4 m 2)
+
+ inline edgebits (a b)
+ (cflags >> a as u32) & 1, (cflags >> b as u32) & 1
+
+ inline edge (a b)
+ let u v = (edgebits a b)
+ (u != v), (u == 0)
+
+ inline collect-plane (Du fpermute)
+ Dv := (Du + 1) % 3
+ Dw := (Du + 2) % 3
+ for u v in (dim 2 2)
+ i0 := (u << Du) | (v << Dv)
+ i1 := i0 ^ (1 << Dw)
+ let set? flip? = (edge i0 i1)
+ if set?
+ v00 := v000
+ let du dv = (u * 2 - 1) (v * 2 - 1)
+ v01 := (getvertex (fpermute du 0))
+ v10 := (getvertex (fpermute 0 dv))
+ v11 := (getvertex (fpermute du dv))
+ # face vertices
+ fv := ((v00 + v01 + v10 + v11) / 4)
+ # edge vertices
+ ev01 := (v00 + v01) / 2
+ ev10 := (v00 + v10) / 2
+ verts @ C000 += (vec4 (fv + ev01 + ev10) 3)
+ verts @ (Du * 2 + u) += (vec4 (fv + ev01) 2)
+ verts @ (Dv * 2 + v) += (vec4 (fv + ev10) 2)
+ n += 1
+ ;
+
+ collect-plane 0 # XY
+ inline (du dv) (ivec3 du dv 0)
+ collect-plane 1 # YZ
+ inline (du dv) (ivec3 0 du dv)
+ collect-plane 2 # ZX
+ inline (du dv) (ivec3 dv 0 du)
+
+ n := n as f32
+ verts @ C000 += n * (n - 3) * (vec4 v000 1)
+
+ inline build-plane (Du fpermute)
+ Dv := (Du + 1) % 3
+ Dw := (Du + 2) % 3
+ for u v in (dim 2 2)
+ i0 := (u << Du) | (v << Dv)
+ i1 := i0 ^ (1 << Dw)
+ let set? flip? = (edge i0 i1)
+ if set?
+ flip? := flip? ^ ((u ^ v) == 1)
+ v00 := v000
+ let du dv = (u * 2 - 1) (v * 2 - 1)
+ v01 := (getvertex (fpermute du 0))
+ v10 := (getvertex (fpermute 0 dv))
+ v11 := (getvertex (fpermute du dv))
+ # face vertices
+ fv := (v00 + v01 + v10 + v11) / 4
+ # edge vertices
+ ev01 := (copy (verts @ (Du * 2 + u)))
+ ev01 := ev01.xyz / ev01.w
+ ev10 := (copy (verts @ (Dv * 2 + v)))
+ ev10 := ev10.xyz / ev10.w
+ # center vertex
+ cv := (copy (verts @ C000))
+ cv := cv.xyz / cv.w
+ let ev01 ev10 =
+ if flip? (_ ev10 ev01)
+ else (_ ev01 ev10)
+ generate-quad
+ Vertex (vec4 cv 1) (vec4 0)
+ Vertex (vec4 ev01 1) (vec4 0)
+ Vertex (vec4 ev10 1) (vec4 0)
+ Vertex (vec4 fv 1) (vec4 0)
+
+ build-plane 0 # XY
+ inline (du dv) (ivec3 du dv 0)
+ build-plane 1 # YZ
+ inline (du dv) (ivec3 0 du dv)
+ build-plane 2 # ZX
+ inline (du dv) (ivec3 dv 0 du)
+
+ else
+ inline getidxvertex (idx dpos)
+ v := (copy (cell-vertex @ idx))
+ #n := (copy (cell-normal @ idx))
+ Vertex
+ vec4 (transform-vertex v dpos) 1
+ vec4 0 0 1 0
+
+ inline getvertex (offset)
+ dpos := lpos + offset
+ idx := (id2svindex dpos)
+ getidxvertex idx dpos
+
+ v00 := (getidxvertex idx lpos)
+ centerbit := (cflags >> 7) & 1
+ flip? := (centerbit != 0)
+
+ for i in (range 3)
+ if ((centerbit ^ ((cflags >> ((1 << i as u32) ^ 7)) & 1)) != 0)
+ v1 := ((ivec3 0b100 0b001 0b010) >> i) & 1
+ v2 := ((ivec3 0b010 0b100 0b001) >> i) & 1
+ let v1 v2 =
+ if flip? (_ v2 v1)
+ else (_ v1 v2)
+ let v01 = (getvertex v1)
+ let v10 = (getvertex v2)
+ let v11 = (getvertex (v1 | v2))
+ generate-quad v00 v01 v10 v11
+
+ ;
+
+inout normal : vec3
+inout depthval : f32
+inout albedo : vec4
+inout matdata : vec4
+fn rasterize-vert ()
+ let vertex-index = ((deref gl_VertexID) as u32)
+ let vin = (deref (vertex-in.entries @ vertex-index))
+ let coord = (vec3 vin.pos.xyz)
+ let lod = (vin.pos.w as f32)
+
+ let tcoord = coord
+
+ #let dist = (matmapf tcoord lod)
+ #let material =
+ dist.material
+ #let dist = dist0
+ #let material =
+ 'mix dist0.material dist1.material l
+ let n = (vec3 vin.normal.xyz)
+ #normalmapf tcoord lod # (r * 0.5)
+
+ # rotate it a little
+ #embed
+ let a = ((deref shglobals.time) * 0.2)
+ let c s = (cos a) (sin a)
+
+ n :=
+ vec3
+ c * n.x - s * n.z
+ n.y
+ s * n.x + c * n.z
+
+ coord :=
+ vec3
+ c * coord.x - s * coord.z
+ coord.y
+ s * coord.x + c * coord.z
+
+ #coord :=
+ coord + (vec3 0 0 1)
+ #n := (transform-dist n)
+
+ #if PROJECT_FINAL_VERTEX
+
+ let coord = (transform-pos coord)
+
+ let proj =
+ calc-projection;
+
+ let pcoord =
+ 'project proj
+ vec4 coord 1.0
+
+ normal.out =
+ #(viridis (lod / MAX_WORLD_LOD)) * 2.0 - 1.0
+ do
+ static-if VISUALIZE_IDS ((vec3hash (vertex-index as f32)) * 2.0 - 1.0)
+ else n
+ depthval.out = coord.z
+ albedo.out = (vec4 1) #material.albedo
+ matdata.out =
+ #vec4 material.roughness material.metallic 0 0
+ vec4 1 0 0 0
+ gl_Position = pcoord
+ ;
+
+fn pack-surfel-data (normal depth color matdata)
+ let normal =
+ bitcast (packSnorm2x16 (pack_normal_snorm normal)) f32
+ #bitcast (packSnorm4x8 (vec4 (normalize normal) 0)) f32
+ let color =
+ bitcast (packUnorm4x8 color) f32
+ let matdata =
+ bitcast (packUnorm4x8 matdata) f32
+ vec4 normal matdata color depth
+
+fn unpack-surfel-data (frag)
+ let normal =
+ unpack_normal_snorm (unpackSnorm2x16 (bitcast frag.x u32))
+ #vec3 ((unpackSnorm4x8 (bitcast frag.x u32)) . xyz)
+ let matdata =
+ unpackUnorm4x8 (bitcast frag.y u32)
+ let color =
+ unpackUnorm4x8 (bitcast frag.z u32)
+ _
+ normal
+ frag.w
+ color
+ matdata
+
+out out_Color : vec4
+ binding = 0
+fn rasterize-frag ()
+ out_Color =
+ pack-surfel-data
+ deref normal.in
+ deref depthval.in
+ deref albedo.in
+ deref matdata.in
+
+#uniform img-target-rgba32f : (image2D rgba32f)
+ binding = IMAGE_TARGET_RGBA32F
+ \ coherent writeonly restrict
+
+fn mixdown (uv)
+ #let t = (deref shglobals.time)
+ let size =
+ vec2 (deref shglobals.size)
+ let uv2 =
+ (uv * 2.0 - 1.0) * (vec2 shglobals.aspect 1)
+ let uv = (ivec2 ((deref gl_FragCoord) . xy + 0.5))
+
+ let col =
+ texelFetch smp-screen uv 0
+ let normal depth color matdata = (unpack-surfel-data col)
+ if (depth == 0.0)
+ return
+ vec4 0.1 0.1 0.2 1
+ let proj =
+ calc-projection;
+ let coord =
+ vec3 ((uv2 * 2.0 - 1.0) * depth / proj.aspect) depth
+ let rd = (normalize coord)
+
+ let l =
+ normalize
+ vec3 0.5 -1.0 0.25
+ let lambert =
+ max 0.0 (dot normal l)
+
+ let albedo = color.rgb
+ let ambient =
+ ((dot normal (vec3 0.0 -1.0 0.0)) * 0.5 + 0.5) * 0.1
+ let exposure = 2.0
+
+ let roughness = matdata.x
+ let metallic = matdata.y
+ let color =
+ linear->sRGB
+ tonemap
+ * exposure
+ +
+ ambient * albedo * (1.0 - metallic)
+ * lambert
+ BRDF albedo
+ roughness
+ metallic
+ \ l rd normal
+
+ return
+ vec4 color 1.0
+ #vec4
+ #\ uv 0.0
+ #normal * 0.5 + 0.5
+ vec3
+ (dot normal (normalize (vec3 0 -1 0))) * 0.5 + 0.5
+ #normhue depth
+ #normhue (radius / 16.0)
+ #normhue (w / 8.0)
+ 1.0
+
+fn visualize-buffer (uv)
+ #let t = (deref shglobals.time)
+ let size =
+ vec2 (deref shglobals.size)
+ let uv2 =
+ (uv * 2.0 - 1.0) * (vec2 shglobals.aspect 1)
+ let uv = (ivec2 ((deref gl_FragCoord) . xy + 0.5))
+
+ let col =
+ texelFetch smp-screen uv 0
+ let normal depth color matdata = (unpack-surfel-data col)
+ let fog-color = (vec4 0.6 0.8 1 1)
+
+ if (depth == 0.0)
+ return fog-color
+
+ let col =
+ vec4
+ normal * 0.5 + 0.5
+ #normhue depth
+ #normhue (radius / 16.0)
+ #color
+ 0.0
+ let col =
+ static-if FOG
+ mix col fog-color
+ 1.0 - (exp2 (-depth * FOG_RATE))
+ else col
+
+ return col
+
+fn shader (uv)
+ #mixdown uv
+ visualize-buffer uv
+
+################################################################################
+
+global sector-queue : (Array u32 SECTOR_CAPACITY)
+global sectors : (Array Sector SECTOR_CAPACITY)
+
+fn subdivide1d? (p t S)
+ """"p : i32 = tile position
+ t : i32 = camera position
+ S : i32 = tile size
+ # subdivide if distance to camera falls below 1:2 threshold
+ t := t - p
+ t :=
+ ? (t >= 0)
+ t - 2 * S + 1
+ -t - S
+ t < 0
+
+fn subdivide3d? (T t)
+ """"T : ivec4 = tile position [0 .. 1<<level]x[0 .. 1<<level]x[0 .. 1<<level], tile level 0..N
+ t : ivec3 = camera position
+ if (T.w <= 0) false
+ else
+ static-if 0
+ R := (1 << T.w) as f32 * 0.5
+ p0 := (vec3 ((T.xyz as vec-type) << T.w))
+ pc := p0 + R
+ l := (length ((vec3 t) - pc))
+ ((R / l) * SUBDIVIDE_SCALE) > 1.0
+ elseif 1
+ # subdivide if 3x3 tile contains camera
+ t := ((t >> T.w) - T.xyz)
+ #(max (abs t.x) (abs t.y) (abs t.z)) <= 1
+ (max (abs t.x) (abs t.y) (abs t.z)) <= SUBDIVIDE_RADIUS
+ else
+ # subdivide if distance to camera falls below 1:2 threshold
+ S := 1 << T.w
+ p := (T.xyz as vec-type) << T.w
+ &
+ subdivide1d? p.x t.x S
+ subdivide1d? p.y t.y S
+ subdivide1d? p.z t.z S
+
+inline encode-face-bridge-flags (ratiox ratioy ratioz)
+ | ratiox (ratioy << 2) (ratioz << 4)
+
+inline decode-face-bridge-flags (flags)
+ flags := flags as i32
+ _ (flags & 3) ((flags >> 2) & 3) ((flags >> 4) & 3)
+
+fn collect-sectors ()
+ 'clear sector-queue
+ 'clear sectors
+ 'append sector-queue
+ encode-cell 0:u32 0:u32 0:u32 0:u32
+
+ for code in sector-queue
+ #while (not (empty? queue))
+ #code := ('pop queue)
+ q := cpu_shglobals.view-inverse * (vec4 0 0 0 1)
+
+ p := (ivec3 ((q.xyz / WORLD_SCALE + 0.5) * CASCADE_SIZE))
+ #p := (ivec3 (CASCADE_CENTER + q.xyz))
+ let lvl x y z = (decode-cell code)
+ coord := (ivec4 x y z (MAX_CASCADE_DEPTH - lvl))
+ if (not (subdivide3d? coord p))
+ lod := coord.w + 1
+ let flags =
+ |
+ ? (subdivide3d?
+ (ivec4 ((coord.xyz + (ivec3 1 0 0)) // 2) lod) p)
+ \ 0:u32 BLEND+X
+ ? (subdivide3d?
+ (ivec4 ((coord.xyz + (ivec3 -1 0 0)) // 2) lod) p)
+ \ 0:u32 BLEND-X
+ ? (subdivide3d?
+ (ivec4 ((coord.xyz + (ivec3 0 1 0)) // 2) lod) p)
+ \ 0:u32 BLEND+Y
+ ? (subdivide3d?
+ (ivec4 ((coord.xyz + (ivec3 0 -1 0)) // 2) lod) p)
+ \ 0:u32 BLEND-Y
+ ? (subdivide3d?
+ (ivec4 ((coord.xyz + (ivec3 0 0 1)) // 2) lod) p)
+ \ 0:u32 BLEND+Z
+ ? (subdivide3d?
+ (ivec4 ((coord.xyz + (ivec3 0 0 -1)) // 2) lod) p)
+ \ 0:u32 BLEND-Z
+ 'append sectors (Sector code flags)
+ else
+ c0 := (ivec3 (coord.xyz as vec-type << coord.w))
+ c1 := (ivec3 ((coord.xyz + 1) << coord.w))
+ c := (c0 + c1) // 2
+ mask :=
+ |
+ ? (p.x >= c.x) 1:u32 0:u32
+ ? (p.y >= c.y) 2:u32 0:u32
+ ? (p.z >= c.z) 4:u32 0:u32
+ mask := mask ^ 7
+ for index in (range 8:u32)
+ 'append sector-queue
+ child-cell code (index ^ mask)
+ #tilecount := ((countof tiles) as i32)
+ ;
+
+################################################################################
+
+inline main ()
+ global fb-scene-color = (GL.Texture GL.TEXTURE_2D)
+ 'setup fb-scene-color
+ size = (ivec2 2048 2048)
+ format = GL.RGBA32F
+ do
+ let h = 2048
+ GL.ClearTexImage fb-scene-color 0 GL.RGBA GL.FLOAT null
+
+ global rb-scene-depth = (GL.Renderbuffer)
+ setup-renderbuffer rb-scene-depth 2048 2048
+ format = GL.DEPTH_COMPONENT
+ global fb-scene = (GL.Framebuffer)
+ setup-framebuffer fb-scene
+ color = fb-scene-color
+ rb-depth = rb-scene-depth
+
+ global vao-empty = (GL.VertexArray)
+
+ global pg-rasterize = (GL.Program)
+ call
+ attach-shaders (deref pg-rasterize)
+ vertex = rasterize-vert
+ fragment = rasterize-frag
+ #debug = true
+
+ global rg : (Option RG)
+
+ fn per-frame-setup (size pg-test frame)
+ let rg =
+ 'force-unwrap rg
+ from (methodsof rg) let static program compute-program indirect-draw-arrays-setup
+
+ GL.BindTextureUnit 0 fb-scene-color
+ GL.Uniform smp-screen 0
+
+ let world =
+ static GL.Texture
+ inline ()
+ let tex = (GL.Texture GL.TEXTURE_3D)
+ 'setup tex
+ size = (ivec3 WORLD_SIZE)
+ format = WORLD_PIXELFMT
+ lod = true
+ #min-filter = GL.LINEAR_MIPMAP_LINEAR
+ #mag-filter = GL.LINEAR
+ tex
+
+ # generate mipmaps
+ let pg-genworldlod = (compute-program generate-world-lod)
+ if (frame == 0)
+ report "generating world..."
+ # generate world
+ let pg-genworld = (compute-program generate-world)
+ GL.UseProgram pg-genworld
+ GL.BindImageTexture BINDING_IMG_WORLD_OUT world 0 GL.TRUE 0
+ GL.WRITE_ONLY
+ WORLD_PIXELFMT
+ GL.DispatchCompute (unpack ((WORLD_SIZE + GROUP_SIZE - 1) // GROUP_SIZE))
+ GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT)
+ report "generating world mipmaps..."
+ GL.UseProgram pg-genworldlod
+ for lod in (range 1 (MAX_WORLD_LOD_I + 1))
+ GL.BindImageTexture BINDING_IMG_WORLD_IN world (lod - 1) GL.TRUE 0 GL.READ_ONLY WORLD_PIXELFMT
+ GL.BindImageTexture BINDING_IMG_WORLD_OUT world lod GL.TRUE 0 GL.WRITE_ONLY WORLD_PIXELFMT
+ GL.DispatchCompute (unpack (((WORLD_SIZE >> (lod as u32)) + GROUP_SIZE - 1) // GROUP_SIZE))
+ GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT)
+ report "done."
+
+ do
+ # edit world
+ # generate world
+ let pg-updateworld = (compute-program update-world)
+ local mx = 0; local my = 0
+ mstate := (SDL_GetMouseState &mx &my)
+ if (mstate != 0)
+ #print mstate
+ GL.UseProgram pg-updateworld
+ GL.Uniform mouse-state (mstate as i32)
+ GL.BindImageTexture BINDING_IMG_WORLD_INOUT world 0 GL.TRUE 0
+ GL.READ_WRITE
+ WORLD_PIXELFMT
+ GL.DispatchCompute (unpack ((WORLD_SIZE + GROUP_SIZE - 1) // GROUP_SIZE))
+ GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT)
+ # generate mipmaps
+ GL.UseProgram pg-genworldlod
+ for lod in (range 1 (MAX_WORLD_LOD_I + 1))
+ GL.BindImageTexture BINDING_IMG_WORLD_IN world (lod - 1) GL.TRUE 0 GL.READ_ONLY WORLD_PIXELFMT
+ GL.BindImageTexture BINDING_IMG_WORLD_OUT world lod GL.TRUE 0 GL.WRITE_ONLY WORLD_PIXELFMT
+ GL.DispatchCompute (unpack (((WORLD_SIZE >> (lod as u32)) + GROUP_SIZE - 1) // GROUP_SIZE))
+ GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT)
+
+ # update terrain
+
+ collect-sectors;
+ sector-count := (countof sectors)
+ if (frame % 120 == 0)
+ print (sector-count as i32) "sectors"
+ let sector_buffer_sz = ((sizeof Sector) * SECTOR_CAPACITY)
+ let sector_buffer =
+ static GL.Buffer
+ inline ()
+ let buf = (GL.Buffer)
+ GL.NamedBufferData buf (i32 sector_buffer_sz) null GL.DYNAMIC_READ
+ buf
+ GL.NamedBufferSubData sector_buffer 0
+ i32 (sector-count * (sizeof Sector))
+ & (sectors @ 0)
+
+ let vertex_buffer_sz = ((sizeof Vertices) + (sizeof Vertex) * MAX_VERTICES)
+ let vertex_buffer =
+ static GL.Buffer
+ inline ()
+ let buf = (GL.Buffer)
+ GL.NamedBufferData buf (i32 vertex_buffer_sz) null GL.STREAM_COPY
+ buf
+
+ do
+ # clear vertex buffer count
+ let ptr =
+ GL.MapNamedBufferRange vertex_buffer 0 (sizeof u32)
+ | GL.MAP_WRITE_BIT
+ GL.MAP_INVALIDATE_BUFFER_BIT
+ #GL.MAP_UNSYNCHRONIZED_BIT
+ let ptr = (bitcast ptr (mutable pointer Vertices))
+ ptr.count = 0:u32
+ GL.UnmapNamedBuffer vertex_buffer
+ GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+ BINDING_BUF_VERTEX_OUT
+ vertex_buffer
+ \ 0:i64 (i64 vertex_buffer_sz)
+ GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+ BINDING_BUF_SECTOR_IN
+ sector_buffer
+ \ 0:i64 (i64 (sector-count * (sizeof Sector)))
+ GL.BindTextureUnit 1 world
+ let pg-gen-cell = (compute-program generate-cell-verts)
+ GL.UseProgram pg-gen-cell
+ GL.Uniform smp-world 1
+ MAX_WORKGROUPS := 32768:u32
+ for i in (range 0:u32 (sector-count as u32) MAX_WORKGROUPS)
+ offset := i
+ GL.Uniform sector-offset offset
+ sz := (min MAX_WORKGROUPS (sector-count as u32 - i))
+ GL.DispatchCompute sz 1 1
+
+ GL.MemoryBarrier GL.SHADER_STORAGE_BARRIER_BIT
+
+ #############
+
+ inline print-in-count ()
+ let ptr =
+ GL.MapNamedBufferRange vertex_buffer 0 (sizeof u32)
+ GL.MAP_READ_BIT
+ let ptr = (bitcast ptr (pointer Vertices))
+ print (ptr.count / 3) "triangles"
+ GL.UnmapNamedBuffer vertex_buffer
+
+ if ((frame % 60) == 0)
+ print-in-count;
+
+ vvv bind setup-draw-arrays exec-draw-arrays
+ indirect-draw-arrays-setup
+ inline ()
+ _
+ deref vertex-in.count
+ 1
+ 0
+ 0
+
+ GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+ BINDING_BUF_VERTEX_IN
+ vertex_buffer
+ \ 0:i64 (i64 vertex_buffer_sz)
+ setup-draw-arrays;
+
+ do
+ GL.BindFramebuffer GL.FRAMEBUFFER fb-scene
+ GL.Viewport 0 0 (i32 size.x) (i32 size.y)
+ GL.ClearColor 0 0 0 0
+ GL.DepthFunc GL.GREATER
+ GL.ClearDepthf 0
+ GL.DepthRangef -1 1
+ #GL.Enable GL.CULL_FACE
+ GL.Disable GL.CULL_FACE
+ GL.CullFace GL.BACK
+ GL.Enable GL.DEPTH_TEST
+ GL.Clear
+ |
+ GL.COLOR_BUFFER_BIT
+ GL.DEPTH_BUFFER_BIT
+ GL.STENCIL_BUFFER_BIT
+
+ GL.UseProgram pg-rasterize
+ #GL.BindTextureUnit 1 world
+ #GL.Uniform smp-world 1
+ GL.BindVertexArray vao-empty
+ exec-draw-arrays GL.TRIANGLES
+
+ GL.Disable GL.DEPTH_TEST
+ GL.Disable GL.CULL_FACE
+ GL.BindFramebuffer GL.FRAMEBUFFER 0
+
+ let per-frame-setup =
+ static-typify per-frame-setup ivec2 GL.Program i32
+ rg = (RG)
+
+ _ per-frame-setup shader
+
+fn program ()
+ render-fragment-shader main
+ #debug = true
+ size = (ivec2 1024)
+
+
+static-if true
+ program;
+else
+ define f
+ compile
+ typify program
+ 'O3
+
+ f;