# HG changeset patch # User Leonard Ritter # Date 1645182264 -3600 # Fri Feb 18 12:04:24 2022 +0100 # Node ID 77196f1820478fbb5c531b29705083d9bfbdc7f4 # Parent 0c2ada78d5cbe307456d114637488a58599e9461 * first VVF implementation diff --git a/testing/test_cascade_dmc_cc_vvf.sc b/testing/test_cascade_dmc_cc_vvf.sc new file mode 100644 --- /dev/null +++ b/testing/test_cascade_dmc_cc_vvf.sc @@ -0,0 +1,1904 @@ + +# + LOD aware terrain vectorization using dual marching cubes + + VVF -> volumetric vertex field + +define-infix< 40 , _ + +using import glm +using import glsl +using import itertools +using import Array +using import Box +using import struct +using import Option +import ..lib.tukan.use + +import tukan.voxel +let voxel = tukan.voxel + +using import tukan.gl +using import tukan.bitmap +using import tukan.packing +using import tukan.random +using import tukan.color +using import tukan.perfect_hash +using import tukan.raytrace +using import tukan.sdf +using import tukan.sdl +using import tukan.rotation +using import tukan.brdf +using import tukan.normal +using import tukan.noise +using import tukan.projection +using import tukan.derivative +using import tukan.isosurface +using import tukan.hash +using import tukan.spherical +using import tukan.ResourceGroup +using import tukan.logcell +using import .testfragment + +let RG = (ResourceGroup "RG") +from (import tukan.math) let expmix + +SAMPLE_CAMERA_OFFSET := false +PROJECT_FINAL_VERTEX := false +VISUALIZE_IDS := false +POST_TRANSFORM := false # true is worse +OCCLUSION_CULLING := false +FOG := false +USE_FLAT_SHADING := true +BALANCE_QUADS := true +USE_COMPLEX_SURFACE := true +HIGH_QUALITY_FEATURES := true +SOFT_WORLD_SAMPLING := true +USE_CATMULL_CLARK := false +BLOCKY_WORLD := false +USE_VVF_PACKING := true +AVERAGE_LOD_VERTICES := false + +FETCH_UV_OFFSET := 0.5 +#FETCH_UV_OFFSET := 0.0 + +# to reach fog density D at depth Z, FOG_RATE = -log2(1 - D)/Z +FOG_RATE := 0.02 # 50% at 100 units + +let MAX_VERTICES = (20 * (1 << 20)) + +let WORLD_SIZE = (uvec3 256) +let WORLD_SCALE = (vec3 256.0) +let MAX_WORLD_LOD_I = 8 +let MAX_WORLD_LOD = (MAX_WORLD_LOD_I as f32) + +let CUBE_SIZE = (uvec3 256) +let LOD_OFFSET = -6 +let LOD_NORMAL_OFFSET = -1 +let GROUP_SIZE = 4 +let MAX_SECTOR_LOD_I = 2 +let SECTOR_GROUP_SIZE = (1 << MAX_SECTOR_LOD_I) + +let SUBDIVIDE_RADIUS = 6 +#let SUBDIVIDE_RADIUS = 1 +let SUBDIVIDE_SCALE = 16 + +MAX_CASCADE_DEPTH := 6 # MAX_WORLD_LOD_I - MAX_SECTOR_LOD_I +#MAX_CASCADE_DEPTH := 3 # MAX_WORLD_LOD_I - MAX_SECTOR_LOD_I +SECTOR_CAPACITY := MAX_CASCADE_DEPTH * 100000 +CASCADE_SIZE := (1 << MAX_CASCADE_DEPTH) +CASCADE_CENTER := (CASCADE_SIZE // 2) + +let BINDING_BUF_SECTOR_IN = 1 +let BINDING_BUF_FACE_BRIDGE_IN = 2 +let BINDING_BUF_VERTEX_IN = 3 +let BINDING_BUF_VERTEX_OUT = 4 +let BINDING_BUF_DRAW_VOXELS_CMD = 5 +let BINDING_BUF_DISPATCH_CMD = 6 +let BINDING_IMG_ZBUFFER = 7 +let BINDING_IMG_WORLD_IN = 8 +let BINDING_IMG_WORLD_OUT = 9 +let BINDING_IMG_WORLD_INOUT = 10 + +let UNIFORM_LEVEL = 1 +let UNIFORM_PROGRAM = 2 +let UNIFORM_MOUSE_STATE = 12 +let UNIFORM_SECTOR_OFFSET = 13 +let UNIFORM_SCREEN_SAMPLER = 4 +let UNIFORM_WORLD_SAMPLER = 7 + + +let LEVELS = 8 + +let SEARCH_R = 5 +let SEARCH_Rf = (SEARCH_R as f32) + +let sqrt3 = (sqrt 3.0) + +run-stage; + +struct Vertex plain + pos : vec4 + normal : vec4 + +struct Vertices plain + count : u32 + # each entry holds a vertex + entries : (array Vertex) + +let + BLEND+X = 1:u32 + BLEND-X = 2:u32 + BLEND+Y = 4:u32 + BLEND-Y = 8:u32 + BLEND+Z = 16:u32 + BLEND-Z = 32:u32 + +struct Sector plain + key : u32 + flags : u32 # six face bits indicating where the next highest LOD level is + +struct Sectors plain + keys : (array Sector) + +buffer sector-in : Sectors + binding = BINDING_BUF_SECTOR_IN + \ readonly coherent + +buffer face-bridge-in : Sectors + binding = BINDING_BUF_FACE_BRIDGE_IN + \ readonly coherent + +buffer vertex-in : Vertices + binding = BINDING_BUF_VERTEX_IN + \ readonly coherent + +buffer vertex-out : Vertices + binding = BINDING_BUF_VERTEX_OUT + \ coherent + +uniform smp-screen : sampler2D + location = UNIFORM_SCREEN_SAMPLER + +uniform mouse-state : i32 + location = UNIFORM_MOUSE_STATE + +uniform sector-offset : u32 + location = UNIFORM_SECTOR_OFFSET + +fn simple-sphere (p) + (length p) - 0.5 + +fn nine-spheres (p) + let x = (deref shglobals.time) + r := (mix 0.1 0.2 ((sin x) * 0.5 + 0.5)) + d := (sqrt 0.5) * 0.4 + sdSmoothAnd + sdSmoothOr + (length p) - 0.5 + min + (length (p - (vec3 d d d))) - r + (length (p - (vec3 -d d d))) - r + (length (p - (vec3 d -d d))) - r + (length (p - (vec3 -d -d d))) - r + 0.1 + * -1.0 + min + (length (p - (vec3 d d -d))) - r + (length (p - (vec3 -d d -d))) - r + (length (p - (vec3 d -d -d))) - r + (length (p - (vec3 -d -d -d))) - r + 0.1 + +fn twoballs (p) + let x = (deref shglobals.time) + x := ((sin x) * 0.5 + 0.5) * 0.5 + sdSmoothOr + (length (p - (vec3 0.0 0 x))) - 0.25 + (length (p - (vec3 0.0 0 -x))) - 0.25 + 0.2 + +fn doubletori (p) + let x = (deref shglobals.time) + let xz = + anglevector-rotate + anglevector -x + p.xz + let p1 = + vec3 + xz.x + p.y + xz.y + let p2 = + vec3 + anglevector-rotate + anglevector x + p.xy + p.z + + 'sdSmoothOr + sdmDist + sdTorus p1.xzy (vec2 0.5 0.05) + sdMaterial + vec4 1.0 0.5 0.3 1.0 + sdmDist + sdTorus p2 (vec2 0.4 0.1) + sdMaterial + vec4 0.3 0.5 1.0 1.0 + roughness = 0.1 + 0.2 + +fn two-boxes (p) + let x = 8.1923 # (deref shglobals.time) + let p1 = + vec3 + anglevector-rotate + anglevector -x + p.xy + p.z + let p2 = + vec3 + p.x + anglevector-rotate + anglevector (x * 0.917) + p.yz + sdSmoothOr + sdBox p1 (vec3 0.33) + sdBox p2 (vec3 0.33) + 0.001 + +fn two-boxes-merge (p) + let d = + vec3 0.3 + let sz = + vec3 0.5 + let x = (deref shglobals.time) + r := (mix 1.0 0.2 ((sin x) * 0.5 + 0.5)) + 'sdSmoothAnd + 'sdSmoothOr + sdmDist + #sdSphere + p - (vec3 0.01) + 1.0 + sdUberprim + p - (vec3 0.01) + vec4 1.0 1.0 0.05 0.05 + vec3 0.5 0.05 0.0 + sdMaterial + vec4 1.0 0.9 0.3 1.0 + metallic = 1.0 + 'sdSmoothOr + sdmDist + sdBox (p - d) sz + sdMaterial + vec4 1.0 0.5 0.3 1.0 + sdmDist + sdBox (p + d) sz + sdMaterial + vec4 0.3 0.5 1.0 1.0 + roughness = 0.2 + 0.5 + 0.05 + sdmDist + - + sdSphere + p - (vec3 0.0 0.0 -0.5) + r + sdMaterial + vec4 0.5 0.3 1.0 1.0 + 0.1 + +fn one-box (p) + sdBox p (vec3 0.33) + +fn matmapf (p) + #p := p + 1.0 + #p := p + (vec3 1 0 0) * shglobals.time + #p := (sdDomainRep p 2.0) + #do + p := p.yzx * 2.0 + (two-boxes-merge p) * 0.5 + #doubletori p + #sdmDist + simple-sphere p + sdMaterial + vec4 0.5 0.3 1.0 1.0 + #scale := 20.0 + sdmDist + static-if 0 + scale := 100.0 + (two-boxes (p / scale)) * scale + elseif 0 + scale := 100.0 + (nine-spheres (p / scale)) * scale + elseif 0 + #p := p * 0.2 + let d = + static-if USE_COMPLEX_SURFACE + ((triquad-noise3 p) * 2.0 - 1.0) - 0.05 + else -inf + + #let d = + min d + ((triquad-noise3 (p / (vec3 100 2.0 2.0))) * 2.0 - 1.0) * 100.0 - 10.0 + + R := ((length p) - 30.0) + let d = + max + max + (length p) - 200.0 + d + -R + #d * scale + d + else + scale := 300.0 + DD := (doubletori (p / scale)) as f32 * scale + S := (length p) + Z := (length p.xy) + #p := p * 0.01 + p := p * 0.2 + local d = 0.0 + for i in (range 5) + s := (exp2 (i as f32)) + d += ((triquad-noise3 (p * s)) * 2.0 - 1.0) / s + min + #do + S - 10.0 + static-if USE_COMPLEX_SURFACE + + + (abs DD) - 10.0 + + (deref d) * 40.0 + else + (abs DD) - 10.0 + + + #Z - 30.0 + #- (Z - 20.0) + sdMaterial + vec4 0.5 0.3 1.0 1.0 + +# single sphere: + 256^3: 89240 cells (best: 0.3ms) + 1024^3: 1427240 cells (best: 3.5ms) +# min: 104567 at 256^3 +fn mapf (p) + #let x = ((radians (deref shglobals.time)) * 10.0) + #let xz = + anglevector-rotate + anglevector -x + p.xz + #let p = + vec3 + xz.x + p.y + xz.y + #simple-sphere p + #twoballs p + #doubletori p + #nine-spheres p + #two-boxes p + #one-box p + (matmapf p) as f32 + +let orig_mapf = mapf + +fn normalmapf (p r) + - (sdNormalFast mapf p r) + +let ONION_NEAR = 0.6 +let ONION_FAR = 100.0 +let ONION_LAYERS = 32.0 + +fn map_onion_radius (p) + #r := (clamp ((p.z * 0.5 + 0.5) * 0.5 + 0.5) 0.0 1.0) + r := (clamp (p.z * 0.5 + 0.5) 0.0 1.0) + #r := (p.z * 0.5 + 0.5) * ONION_LAYERS + + # roughly square layers + embed + Z := CUBE_SIZE.z as f32 + r := ONION_NEAR * ((1 + 2 * (sqrt pi) / (Z * 0.5)) ** (r * Z)) + + # exponential interpolation + #embed + r := (expmix ONION_NEAR ONION_FAR r 0.5) + + # infinite perspective projection + #embed + r := r * 0.9999 + k := 0.01 + r := r / (k * (1.0 - r)) + + p := (unpack_normal_snorm p.xy) + #p := + spherical-surface + L1-spherical + octahedral-surface + tile-guyou ((p.xy - 1.0) / 2.0) + + _ (p * r) r + +fn map_onion (p) + let p r = (map_onion_radius p) + p + +fn map_identity (p) p +fn map_identity_radius (p) (_ p 1.0) + +#let map_vertex map_vertex_rlimit = map_onion map_onion_radius +let map_vertex map_vertex_rlimit = map_identity map_identity_radius + +inline map-translation (tpos) + static-if SAMPLE_CAMERA_OFFSET + tpos + (shglobals.view-inverse @ 3) . xyz + else tpos + + +fn calc-projection () + let aspect = (vec2 (/ (deref shglobals.aspect)) 1.0) + 'ifp-perspective ProjectionSetup aspect 0.1 + +inline transform-dist (p) + (mat3 shglobals.view) * p + +inline transform-invert-dist (p) + (mat3 shglobals.view-inverse) * p + +inline transform-invert-pos (p) + v := (deref shglobals.view-inverse) * (vec4 p 1) + v.xyz + +inline transform-pos (p) + v := (deref shglobals.view) * (vec4 p 1) + v.xyz + +fn gradient-root (v0 v1 dir) + c0 := v0.w + c1 := v1.w + g0 := (dot v0.xyz dir) * 0.01 + g1 := (dot v1.xyz dir) * 0.01 + d := 0.5 / (g1 - g0) + c := d * (c0 - c1 - g0 + g1) + w := d * (sqrt (c0 * c0 - 2.0 * c0 * c1 + c1 * c1 + 2.0 * (c0 + c1) * g0 + g0 * g0 - 2.0 * (c0 + c1 + g0) * g1 + g1 * g1)) + q0 := (c - w) * 2.0 - 1.0 + q1 := (c + w) * 2.0 - 1.0 + ? ((abs q0) < (abs q1)) q0 q1 + +fn triangle-area (A B C) + (length (cross (B - A) (C - A))) / 2.0 + +fn trimix (v p) + # corner weights of cube in [-1..1]³ domain + let c000 c001 c010 c011 c100 c101 c110 c111 = + va-map + inline (i) (copy (v @ i)) + va-range 8 + mix + mix + mix c000 c001 p.x + mix c010 c011 p.x + p.y + mix + mix c100 c101 p.x + mix c110 c111 p.x + p.y + p.z + +fn tet-feature-vertex-normal2 (cd tetidx) + let tetverts = 0x6cc99:u32 + k := tetidx * 3:u32 + k1 := (tetverts >> k) & 7:u32 + k3 := (tetverts >> (k + 3:u32)) & 7:u32 + + let idxs = (ivec4 0 k1 7 k3) + + local p : (array vec3 4) + p @ 0 = (vec3 -1 -1 -1) + p @ 1 = (vec3 (k1 & 1) ((k1 >> 1) & 1) ((k1 >> 2) & 1)) * 2 - 1 + p @ 2 = (vec3 1 1 1) + p @ 3 = (vec3 (k3 & 1) ((k3 >> 1) & 1) ((k3 >> 2) & 1)) * 2 - 1 + let d = (vec4 (cd @ 0) (cd @ k1) (cd @ 7) (cd @ k3)) + + inline tf (i0 i1) + mix (p @ i0) (p @ i1) (tetlerp (d @ i0) (d @ i1)) + + let c i = (tetfaces d) + switch c + case 1:u32 + p0 := (tf i.x i.y) + p1 := (tf i.x i.z) + p2 := (tf i.x i.w) + (p0 + p1 + p2) / 3 + case 2:u32 + p0 := (tf i.x i.z) + p1 := (tf i.x i.w) + p2 := (tf i.y i.w) + p3 := (tf i.y i.z) + (p0 + p1 + p2 + p3) / 4 + default + (+ (p @ 0) (p @ 1) (p @ 2) (p @ 3)) / 4 + +fn tet-feature-vertex-normal (cd) + local outv = (vec4 0) + for tetidx in (range 6:u32) + let tetverts = 0x6cc99:u32 + k := tetidx * 3:u32 + k1 := (tetverts >> k) & 7:u32 + k3 := (tetverts >> (k + 3:u32)) & 7:u32 + + let idxs = (ivec4 0 k1 7 k3) + + local p : (array vec3 4) + p @ 0 = (vec3 -1 -1 -1) + p @ 1 = (vec3 (k1 & 1) ((k1 >> 1) & 1) ((k1 >> 2) & 1)) * 2 - 1 + p @ 2 = (vec3 1 1 1) + p @ 3 = (vec3 (k3 & 1) ((k3 >> 1) & 1) ((k3 >> 2) & 1)) * 2 - 1 + let d = (vec4 (cd @ 0) (cd @ k1) (cd @ 7) (cd @ k3)) + + let c i = (tetfaces d) + if (c == 0:u32) + continue; + let vc = (c * 3) + inline tf (i0 i1) + mix (p @ i0) (p @ i1) (tetlerp (d @ i0) (d @ i1)) + + fn triangle-area (A B C) + # removed factor 1/2 + length (cross (B - A) (C - A)) + + let pc A = + if (c == 1:u32) + p0 := (tf i.x i.y) + p1 := (tf i.x i.z) + p2 := (tf i.x i.w) + pc := ((p0 + p1 + p2) / 3) + A := (triangle-area p0 p1 p2) + _ pc A + else # elseif (c == 2:u32) + p0 := (tf i.x i.z) + p1 := (tf i.x i.w) + p2 := (tf i.y i.w) + p3 := (tf i.y i.z) + pc := ((p0 + p1 + p2 + p3) / 4) + A := + + + (triangle-area p0 p1 p2) + (triangle-area p0 p3 p2) + _ pc A + outv += (vec4 pc 1) * (max 1e-5 A) + ? (outv.w == 0) (vec3 0) (outv.xyz / outv.w) + +fn feature-vertex-normal (v) + let c000 c001 c010 c011 c100 c101 c110 c111 = + va-map + inline (i) (copy (v @ i)) + va-range 8 + # unsigned body diagonal vectors + n00 := (vec3 1 1 1) + n01 := (vec3 -1 1 1) + n10 := (vec3 1 -1 1) + n11 := (vec3 -1 -1 1) + # sign flipped differences along body diagonal vectors + g00 := (c000 - c111) + g01 := (c001 - c110) + g10 := (c010 - c101) + g11 := (c011 - c100) + # normal vector (gradient) + g := + do #normalize + + + g00 * n00 + g01 * n01 + g10 * n10 + g11 * n11 + + # corner weights of cube in [-1..1]³ domain + let fv = + static-if HIGH_QUALITY_FEATURES + tet-feature-vertex-normal v + elseif 0 + #embed + # position of zero in interval [-1..1] (inverse lerp) + d00 := (gradient-root c000 c111 n00) + d01 := (gradient-root c001 c110 n01) + d10 := (gradient-root c010 c101 n10) + d11 := (gradient-root c011 c100 n11) + embed + # position of zero in interval [-1..1] (inverse lerp) + d00 := (c000 + c111) / g00 + d01 := (c001 + c110) / g01 + d10 := (c010 + c101) / g10 + d11 := (c011 + c100) / g11 + + # projected points + v00 := d00 * n00 + v01 := d01 * n01 + v10 := d10 * n10 + v11 := d11 * n11 + + let verts... = + ? ((abs d00) <= 1.0) + vec4 v00 1 + vec4 0 + ? ((abs d01) <= 1.0) + vec4 v01 1 + vec4 0 + ? ((abs d10) <= 1.0) + vec4 v10 1 + vec4 0 + ? ((abs d11) <= 1.0) + vec4 v11 1 + vec4 0 + + d := (+ verts... ) + + # feature vertex + fv := (d.xyz / (max 1.0 d.w)) + + fv + else + inline mapf (p) + p := p * 0.5 + 0.5 + mix + mix + mix c000 c001 p.x + mix c010 c011 p.x + p.y + mix + mix c100 c101 p.x + mix c110 c111 p.x + p.y + p.z + inline grad (p) + sdNormalFast mapf p + local p = (vec3 0) + for i in (range 20) + d := (mapf p) + g := (grad p) + p = (p - d * g) + ; + deref p + #else + let c000 c001 c010 c011 c100 c101 c110 c111 = + va-map + inline (i) (copy ((v @ i) . w)) + va-range 8 + # isoplane point + d := (+ c000 c001 c010 c011 c100 c101 c110 c111) / 8 + g := + / + vec3 + (+ c001 c101 c011 c111) - (+ c000 c100 c010 c110) + (+ c010 c011 c110 c111) - (+ c000 c001 c100 c101) + (+ c100 c110 c101 c111) - (+ c000 c010 c001 c011) + 8 + l := (length g) + g := g / l + d := d / l + k := -d / (dot g g) + k * g + _ fv g + +#do + let k0 = 0 + let k1 = 1 + local ww = + arrayof f32 + \ k1 k0 k1 k1 + \ k1 k1 k1 k1 + print + feature-plane ww + + #print + GL.MAX_COMPUTE_SHARED_MEMORY_SIZE + + if true + exit 0 +#run-stage; + +#embed + let WORLD_PIXELFMT = GL.R32F + let WORLD_IMAGETYPE = (image3D r32f) +#embed + let WORLD_PIXELFMT = GL.R16F + let WORLD_IMAGETYPE = (image3D r16f) +#embed + let WORLD_PIXELFMT = GL.R8 + let WORLD_IMAGETYPE = (image3D r8) +embed + let WORLD_PIXELFMT = GL.RGBA32UI + let WORLD_IMAGETYPE = (uimage3D rgba32ui) + +uniform world-in : WORLD_IMAGETYPE + binding = BINDING_IMG_WORLD_IN + \ coherent readonly restrict + +uniform world-out : WORLD_IMAGETYPE + binding = BINDING_IMG_WORLD_OUT + \ coherent writeonly restrict + +uniform world-inout : WORLD_IMAGETYPE + binding = BINDING_IMG_WORLD_INOUT + \ coherent restrict + +uniform smp-world : usampler3D + location = UNIFORM_WORLD_SAMPLER + +fn pack-vvf (vertex cflags) + static-if USE_VVF_PACKING + uvec4 + (packSnorm4x8 (vec4 vertex 0)) | (cflags << 24:u32) + \ 0 0 0 + else + uvec4 + bitcast vertex.x u32 + bitcast vertex.y u32 + bitcast vertex.z u32 + cflags + +fn unpack-vvf (data) + static-if USE_VVF_PACKING + _ + (unpackSnorm4x8 data.x) . xyz as vec-type + data.x >> 24:u32 + else + _ + vec3 + bitcast data.x f32 + bitcast data.y f32 + bitcast data.z f32 + copy data.w + +inline sample-field (ipos mapf) + rd := (2.0 / (vec3 WORLD_SIZE)) + fpos := (vec3 ipos) * rd - 1.0 + pos := (fpos + 0.5 * rd) * WORLD_SCALE + + local cd : (array f32 8) + local mind = inf + local maxd = -inf + for i in (range 8) + let x y z = (i & 1) ((i >> 1) & 1) ((i >> 2) & 1) + pos := (fpos + (vec3 x y z) * rd) * WORLD_SCALE + let d = (mapf pos) + cd @ i = d + mind = (min mind d) + maxd = (max maxd d) + let fv = (feature-vertex-normal cd) + _ fv + | + ? ((cd @ 0) < 0) 1:u32 0:u32 + ? (mind * maxd <= 0) 2:u32 0:u32 + +fn generate-world () + local_size GROUP_SIZE GROUP_SIZE GROUP_SIZE + ipos := (uvec3 gl_GlobalInvocationID.xyz) + if (any? (ipos >= WORLD_SIZE)) + return; + rd := (2.0 / (vec3 WORLD_SIZE)) + inline samplef (pos) + static-if 0 + mapf pos + else + local v = 0.0 + N := 2 + N:u32 := N as u32 + for x y z in (dim N N N) + d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0 + d := d * + do + static-if SOFT_WORLD_SAMPLING 3.0 + else 1.0 + lpos := pos + d * (0.25 * rd * WORLD_SCALE) + #d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0 + #lpos := pos + (d * 3.0 * rd) * WORLD_SCALE + s := (mapf lpos) + #w := (/ (+ (abs d.x) (abs d.y) (abs d.z))) + v += s + (copy v) / (N * N * N) + imageStore world-out ipos (pack-vvf (sample-field ipos samplef)) +# + rd := (2.0 / (vec3 WORLD_SIZE)) + fpos := (vec3 ipos) * rd - 1.0 + pos := (fpos + 0.5 * rd) * WORLD_SCALE + + local cd : (array f32 8) + local mind = inf + local maxd = -inf + for i in (range 8) + let x y z = (i & 1) ((i >> 1) & 1) ((i >> 2) & 1) + pos := (fpos + (vec3 x y z) * rd) * WORLD_SCALE + let d = + static-if 0 + mapf pos + elseif 1 + local v = 0.0 + N := 2 + N:u32 := N as u32 + for x y z in (dim N N N) + d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0 + d := d * + do + static-if SOFT_WORLD_SAMPLING 3.0 + else 1.0 + lpos := pos + d * (0.25 * rd * WORLD_SCALE) + #d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0 + #lpos := pos + (d * 3.0 * rd) * WORLD_SCALE + s := (mapf lpos) + #w := (/ (+ (abs d.x) (abs d.y) (abs d.z))) + v += s + (copy v) / (N * N * N) + elseif 0 + local v = (vec4 0) + N := 8 + N:u32 := N as u32 + for x y z in (dim N N N) + d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0 + #d := d * 16.0 + d := d * 3.0 + lpos := pos + d * (0.5 * rd * WORLD_SCALE) + #d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0 + #lpos := pos + (d * 3.0 * rd) * WORLD_SCALE + s := (- (sign (mapf lpos))) + w := (/ (+ (abs d.x) (abs d.y) (abs d.z))) + v += s * w * (vec4 d 1) + ((copy v.w) + 1.0) / 2.0 + else + grad := (normalmapf pos (rd * 0.5)) + local bits = 0 + N := 8 + N:u32 := N as u32 + for x y z in (dim N N N) + lpos := (fpos + ((vec3 x y z) / N) * rd) * WORLD_SCALE + #d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0 + #lpos := pos + (d * 3.0 * rd) * WORLD_SCALE + if ((mapf lpos) <= (rd.x / N)) + bits += 1 + bits as f32 / (N * N * N) + cd @ i = d + mind = (min mind d) + maxd = (max maxd d) + ; + let fv = (feature-vertex-normal cd) + data := + pack-vvf fv + | + ? ((cd @ 0) < 0) 1:u32 0:u32 + ? (mind * maxd <= 0) 2:u32 0:u32 + imageStore world-out ipos data + ; + +inline vertex-valid? (smp pos) + local flags = 0:u32 + for i in (range 8) + pos := pos + (ivec3 (i & 1) ((i >> 1) & 1) ((i >> 2) & 1)) + let vx cf = (unpack-vvf (imageLoad smp pos)) + flags |= (cf & 1) << i as u32 + (flags != 0:u32) & (flags != 255:u32) + +fn update-world () + local_size GROUP_SIZE GROUP_SIZE GROUP_SIZE + ipos := (uvec3 gl_GlobalInvocationID.xyz) + if (any? (ipos >= WORLD_SIZE)) + return; + inline samplef (pos) + origin := pos - 2.0 * (shglobals.view-inverse @ 3) . xyz + origin := origin - (shglobals.view-inverse @ 2) . xyz * 40.0 + (length origin) - 40.0 + let fv cfbrush = (sample-field ipos samplef) + let vx cf = (unpack-vvf (imageLoad world-inout ipos)) + + brush-vertex-valid? := ((cfbrush & 2) != 0) + + local svx = (vec4 0) + ipos := (ivec3 ipos) + svx := (? (vertex-valid? world-inout ipos) vx fv) + #svx += (vec4 fv 1) * (? brush-vertex-valid? 0.1 0.0) + #svx += (vec4 vx 1) * (? (vertex-valid? world-inout ipos) 1.0 0.0) + #svx := (? (svx.w == 0) (vec3 0) (svx.xyz / svx.w)) + + #let fv = vx + let fv cf = + if (mouse-state & 4) + #sdSmoothOr d brush 5.0 + _ svx + | + (cf | cfbrush) & 1 + (cf | cfbrush) & 2 + elseif (mouse-state & 1) + #sdSmoothAnd d -brush 2.0 + _ svx + | + (cf & (cfbrush ^ 1)) & 1 + (cf | cfbrush) & 2 + else + return; + + imageStore world-inout ipos (pack-vvf fv cf) + ; + +#uniform lodlevel : i32 + +#vvv print +#fold (w = 0.0) for x y z in (dim 3 3 3) + d := 3 + p := (ivec3 x y z) - 1 + #ap := (ivec3 (abs p.x) (abs p.y) (abs p.z)) + ap := (vec3 (abs p.x) (abs p.y) (abs p.z)) + #w1 := (exp2 (- (ap.x + ap.y + ap.z))) + w1 := (exp2 (- (length (vec3 p)))) + print (x - 1) (y - 1) (z - 1) (w1 / 10.910761) + w + w1 + + +#do + vvv print + fold (w = 0.0) for x y z in (dim 4 4 4) + # blur kernel 4x4x4 + # total sum of weights is S = 2 ** (3*d) + # w = (3 ** d) / (3 ** (abs(p.x) + abs(p.y) + abs(p.z))) / S + # exp2 ((log2 3.0) * (d - ((abs p.x) + (abs p.y) + (abs p.z))) - (log2 2.0) * 3 * d) + d := 3 + ofs := (ivec3 x y z) + p := (vec3 ((ivec3 x y z) >> 1)) + # simulate adding 8 samples + w + + exp2 ((log2 3.0) * (3 - (p.x + p.y + p.z)) - (log2 2.0) * 9) + + +#if true + exit 0 +#run-stage; + +inline windowed-fetch (src ipos) + isize := (imageSize src) + iz := (ivec3 0) + fold (v = (vec4 0.0)) for x y z in (dim 3 3 3) + p := (ivec3 x y z) - 1 + ap := (ivec3 (abs p.x) (abs p.y) (abs p.z)) + value := (imageLoad src ipos) + outofbounds? := (any? ((ipos < iz) | (ipos >= isize))) + + v + * value + ? outofbounds? 0.0 + / (1 << (3 + ap.x + ap.y + ap.z)) + +fn generate-world-lod () + local_size GROUP_SIZE GROUP_SIZE GROUP_SIZE + opos := (ivec3 gl_GlobalInvocationID.xyz) + if (any? (opos >= (imageSize world-out))) + return; + isize := (imageSize world-in) + iz := (ivec3 0) + ibpos := opos << 1 + + local vertex = + do + static-if AVERAGE_LOD_VERTICES + vec4 0 + else + vec3 0 + local bestdist = inf + local failed = true + for x y z in (dim 2 2 2) + ipos := ibpos + (ivec3 x y z) + let vx cf = (unpack-vvf (imageLoad world-in ipos)) + vx := (0.5 * vx + (vec3 x y z) - 0.5) + L := (dot vx vx) + #if (((cf & 2) != 0) & (L < bestdist)) + static-if AVERAGE_LOD_VERTICES + if (vertex-valid? world-in ipos) + vertex += (vec4 vx 1) * (1 / L) + failed = false + else + if ((vertex-valid? world-in ipos) & (L < bestdist)) + vertex = vx + bestdist = L + failed = false + vx := + ? failed + vec3 0 + static-if AVERAGE_LOD_VERTICES + vertex.xyz / vertex.w + else + copy vertex + inline fetch (pos) + let __ cf = (unpack-vvf (imageLoad world-in (ibpos + pos * 2))) + cf & 1 + let w = + + + 2 * (fetch (ivec3 0)) + (fetch (ivec3 1 0 0)) + (fetch (ivec3 -1 0 0)) + (fetch (ivec3 0 1 0)) + (fetch (ivec3 0 -1 0)) + (fetch (ivec3 0 0 1)) + (fetch (ivec3 0 0 -1)) + + let cf = + | + #(? (w / 8 < 0.38) 0:u32 1:u32) + | + (fetch (ivec3 0)) + (fetch (ivec3 1 0 0)) & (fetch (ivec3 -1 0 0)) + (fetch (ivec3 0 1 0)) & (fetch (ivec3 0 -1 0)) + (fetch (ivec3 0 0 1)) & (fetch (ivec3 0 0 -1)) + ? failed 0:u32 2:u32 + + #let __ cf = (unpack-vvf ((imageLoad world-in ibpos) . r)) + #let cf = (? (bcount < 3) 0:u32 1:u32) + imageStore world-out opos (pack-vvf vx cf) + ; + +#inline mapf (p lod) +# + z := 0.5 * (exp2 (-lod * 1.0)) + let d = + (textureLod smp-world ((p / WORLD_SCALE) * 0.5 + 0.5) lod) . r + #d := (textureLod smp-world ((p / WORLD_SCALE) * 0.5 + 0.5) 2) . r + slimit := (min d (1.0 - d)) + s := 1.0 + #min + (clamp gx -slimit slimit) / gx + (clamp gy -slimit slimit) / gy + (clamp gz -slimit slimit) / gz + (z - d) + +#inline matmapf (p lod) +# + sdmDist (mapf p lod) + sdMaterial + vec4 0.5 0.3 1.0 1.0 + +fn normalmapf (p lod) + r := (1.0 / WORLD_SIZE.x) * WORLD_SCALE + - + sdNormalFast + inline (p) + mapf p (max 0.0 (lod + LOD_NORMAL_OFFSET)) + \ p r + +let SECTOR_SAMPLE_SIZE = (SECTOR_GROUP_SIZE + 2) +let SECTOR_SAMPLE_VOLUME = (SECTOR_SAMPLE_SIZE ** 3) +let SECTOR_GROUP_VOLUME = (SECTOR_GROUP_SIZE ** 3) + +dump "shared memory requirements" + + + (sizeof u32) * SECTOR_SAMPLE_VOLUME + (sizeof vec3) * SECTOR_SAMPLE_VOLUME + (sizeof vec3) * SECTOR_SAMPLE_VOLUME + +shared cell-corner-flags : (array u32 SECTOR_SAMPLE_VOLUME) +shared cell-vertex : (array vec3 SECTOR_SAMPLE_VOLUME) +shared cell-normal : (array vec3 SECTOR_SAMPLE_VOLUME) + +#fn id2index (id) + #id.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + id.y * gl_WorkGroupSize.x + id.x + id.z * (SECTOR_GROUP_SIZE * SECTOR_GROUP_SIZE) + id.y * SECTOR_GROUP_SIZE + id.x + +fn index2id (idx) + x := idx % SECTOR_GROUP_SIZE + idx := idx // SECTOR_GROUP_SIZE + y := idx % SECTOR_GROUP_SIZE + z := idx // SECTOR_GROUP_SIZE + ivec3 x y z + +fn id2svindex (id) + id := id + 1 + id.z * (SECTOR_SAMPLE_SIZE * SECTOR_SAMPLE_SIZE) + id.y * SECTOR_SAMPLE_SIZE + id.x + +fn svindex2id (idx) + x := idx % SECTOR_SAMPLE_SIZE + idx := idx // SECTOR_SAMPLE_SIZE + y := idx % SECTOR_SAMPLE_SIZE + z := idx // SECTOR_SAMPLE_SIZE + (ivec3 x y z) - 1 + +NATIVE_LANE_WIDTH := 64 + +fn normal (v1 v2 v3) + cross + normalize + (v3 - v1) . xyz + normalize + (v2 - v1) . xyz + +inline swapnormal (v n) + Vertex v.pos (vec4 n 0) + +fn generate-quad (v00 v01 v10 v11) + let v00 v10 v11 v01 = + static-if BALANCE_QUADS + let du = (v11.pos.xyz - v00.pos.xyz) + let dv = (v10.pos.xyz - v01.pos.xyz) + if ((dot du du) < (dot dv dv)) + _ v00 v10 v11 v01 + else + _ v10 v11 v01 v00 + else + _ v00 v10 v11 v01 + + # generate quad + let ofs = (atomicAdd vertex-out.count 6) + entries := vertex-out.entries + static-if USE_FLAT_SHADING + n0 := (normal v00.pos v10.pos v11.pos) + n1 := (normal v11.pos v01.pos v00.pos) + #n1 := (normal v00.pos v11.pos v01.pos) + + entries @ (ofs + 0) = (swapnormal v00 n0) + entries @ (ofs + 1) = (swapnormal v10 n0) + entries @ (ofs + 2) = (swapnormal v11 n0) + entries @ (ofs + 3) = (swapnormal v11 n1) + entries @ (ofs + 4) = (swapnormal v01 n1) + entries @ (ofs + 5) = (swapnormal v00 n1) + else + entries @ (ofs + 0) = v00 + entries @ (ofs + 1) = v10 + entries @ (ofs + 2) = v11 + entries @ (ofs + 3) = v11 + entries @ (ofs + 4) = v01 + entries @ (ofs + 5) = v00 + ; + +fn generate-cell-verts () + local_size NATIVE_LANE_WIDTH 1 1 + sector := (copy (sector-in.keys @ (gl_WorkGroupID.x + sector-offset))) + sector-flags := (copy sector.flags) + let lvl sectorpos... = (decode-cell sector.key) + #let lvl sectorpos... = (decode-cell 1:u32) + lsectorpos := (ivec3 sectorpos...) + sectorlod := (MAX_CASCADE_DEPTH - lvl) + coord := (ivec4 lsectorpos sectorlod) + + sector-scale := (f32 (1 << sectorlod)) / CASCADE_SIZE + sector-origin := (vec3 lsectorpos) * sector-scale - 0.5 + sector-origin := sector-origin * WORLD_SCALE + sector-scale := sector-scale * WORLD_SCALE + lod := MAX_WORLD_LOD_I - (lvl as i32) + + lane-idx := (copy gl_LocalInvocationIndex) + sectorpos := (lsectorpos << MAX_SECTOR_LOD_I) + + #if + for x y z in (dim 3 3 3) + w := (texelFetch smp-world (lsectorpos + (ivec3 x y z) - 1) (sectorlod as i32)) . r + if (w != 0.0) + break false + else true + return; + + SECTOR_SAMPLE_PASSES := (SECTOR_SAMPLE_VOLUME + NATIVE_LANE_WIDTH - 1) // NATIVE_LANE_WIDTH + + dump "SECTOR_SAMPLE_PASSES" SECTOR_SAMPLE_PASSES + + lod := (lod - MAX_SECTOR_LOD_I) + for i in (range SECTOR_SAMPLE_PASSES) + # index in shared buffers + idx := (i * NATIVE_LANE_WIDTH) as u32 + lane-idx + if (idx >= SECTOR_SAMPLE_VOLUME) + break; + lpos := (svindex2id idx) + #idx := (id2index lpos) + gpos := lpos + sectorpos + + inline fetch (pos) + unpack-vvf (texelFetch smp-world pos lod) + + let vertex cf = (fetch gpos) + let vertex1 = + unpack-vvf (texelFetch smp-world (gpos >> 1) (lod + 1)) + vertex1 := (vertex1 - (vec3 (gpos & 1))) * 2.0 + 1.0 + local cflags = (cf & 1) + for i in (range 1 8) + let x y z = (i & 1) ((i >> 1) & 1) ((i >> 2) & 1) + #d := (((x ^ y ^ z) & 1) * 2 - 1) as f32 + wpos := gpos + (ivec3 x y z) + let vx cf = (fetch wpos) + cflags |= ((cf & 1) << (i as u32)) + ; + cflags := (deref cflags) + cell-corner-flags @ idx = cflags + if ((cflags != 0) & (cflags != 0xff)) + cell-vertex @ idx = + do + static-if BLOCKY_WORLD + vec3 0 + else + if + & + (((shglobals.frame // 10) % 2) == 0) + (cf & 2) == 0 + vec3 -10 + elseif (sector-flags == 0:u32) vertex + else + inline blend-factor (x f+1 f-2) + ? ((sector-flags & (f+1 | f-2)) != 0) + ? ((sector-flags & f+1) != 0) x (1 - x) + 0.0 + #w := (vec3 lpos) / (SECTOR_GROUP_SIZE - 1) + w := (step (vec3 (SECTOR_GROUP_SIZE // 2)) (vec3 lpos)) + w := + vec3 + blend-factor w.x BLEND+X BLEND-X + blend-factor w.y BLEND+Y BLEND-Y + blend-factor w.z BLEND+Z BLEND-Z + mix vertex vertex1 + max w.x w.y w.z + + #cell-normal @ idx = n + ; + ; + barrier; + + lpos := (index2id lane-idx) + idx := (id2svindex lpos) + + cell-scale := sector-scale * (1.0 / SECTOR_GROUP_SIZE) + inline transform-vertex (v dpos) + cell-origin := sector-origin + cell-scale * (vec3 dpos) + cell-origin + cell-scale * (v * 0.5 + 0.5) + + cflags := (copy (cell-corner-flags @ idx)) + + static-if USE_CATMULL_CLARK + inline getvertex (offset) + dpos := lpos + offset + idx := (id2svindex dpos) + v := (copy (cell-vertex @ idx)) + transform-vertex v dpos + + local verts : (array vec4 7) + local n = 0 + va-map + inline (i) + verts @ i = (vec4 0) + va-range 7 + let C00- C00+ C0-0 C0+0 C-00 C+00 C000 = (va-range 7) + + let v000 = (getvertex (ivec3 0 0 0)) + #inline check-edge (IDX ofs mask) + x := (cflags & mask) + if ((x != mask) & ((x ^ mask) != mask)) + m := (v000 + (getvertex ofs)) + w @ C00- = (vec4 m 2) + + inline edgebits (a b) + (cflags >> a as u32) & 1, (cflags >> b as u32) & 1 + + inline edge (a b) + let u v = (edgebits a b) + (u != v), (u == 0) + + inline collect-plane (Du fpermute) + Dv := (Du + 1) % 3 + Dw := (Du + 2) % 3 + for u v in (dim 2 2) + i0 := (u << Du) | (v << Dv) + i1 := i0 ^ (1 << Dw) + let set? flip? = (edge i0 i1) + if set? + v00 := v000 + let du dv = (u * 2 - 1) (v * 2 - 1) + v01 := (getvertex (fpermute du 0)) + v10 := (getvertex (fpermute 0 dv)) + v11 := (getvertex (fpermute du dv)) + # face vertices + fv := ((v00 + v01 + v10 + v11) / 4) + # edge vertices + ev01 := (v00 + v01) / 2 + ev10 := (v00 + v10) / 2 + verts @ C000 += (vec4 (fv + ev01 + ev10) 3) + verts @ (Du * 2 + u) += (vec4 (fv + ev01) 2) + verts @ (Dv * 2 + v) += (vec4 (fv + ev10) 2) + n += 1 + ; + + collect-plane 0 # XY + inline (du dv) (ivec3 du dv 0) + collect-plane 1 # YZ + inline (du dv) (ivec3 0 du dv) + collect-plane 2 # ZX + inline (du dv) (ivec3 dv 0 du) + + n := n as f32 + verts @ C000 += n * (n - 3) * (vec4 v000 1) + + inline build-plane (Du fpermute) + Dv := (Du + 1) % 3 + Dw := (Du + 2) % 3 + for u v in (dim 2 2) + i0 := (u << Du) | (v << Dv) + i1 := i0 ^ (1 << Dw) + let set? flip? = (edge i0 i1) + if set? + flip? := flip? ^ ((u ^ v) == 1) + v00 := v000 + let du dv = (u * 2 - 1) (v * 2 - 1) + v01 := (getvertex (fpermute du 0)) + v10 := (getvertex (fpermute 0 dv)) + v11 := (getvertex (fpermute du dv)) + # face vertices + fv := (v00 + v01 + v10 + v11) / 4 + # edge vertices + ev01 := (copy (verts @ (Du * 2 + u))) + ev01 := ev01.xyz / ev01.w + ev10 := (copy (verts @ (Dv * 2 + v))) + ev10 := ev10.xyz / ev10.w + # center vertex + cv := (copy (verts @ C000)) + cv := cv.xyz / cv.w + let ev01 ev10 = + if flip? (_ ev10 ev01) + else (_ ev01 ev10) + generate-quad + Vertex (vec4 cv 1) (vec4 0) + Vertex (vec4 ev01 1) (vec4 0) + Vertex (vec4 ev10 1) (vec4 0) + Vertex (vec4 fv 1) (vec4 0) + + build-plane 0 # XY + inline (du dv) (ivec3 du dv 0) + build-plane 1 # YZ + inline (du dv) (ivec3 0 du dv) + build-plane 2 # ZX + inline (du dv) (ivec3 dv 0 du) + + else + inline getidxvertex (idx dpos) + v := (copy (cell-vertex @ idx)) + #n := (copy (cell-normal @ idx)) + Vertex + vec4 (transform-vertex v dpos) 1 + vec4 0 0 1 0 + + inline getvertex (offset) + dpos := lpos + offset + idx := (id2svindex dpos) + getidxvertex idx dpos + + v00 := (getidxvertex idx lpos) + centerbit := (cflags >> 7) & 1 + flip? := (centerbit != 0) + + for i in (range 3) + if ((centerbit ^ ((cflags >> ((1 << i as u32) ^ 7)) & 1)) != 0) + v1 := ((ivec3 0b100 0b001 0b010) >> i) & 1 + v2 := ((ivec3 0b010 0b100 0b001) >> i) & 1 + let v1 v2 = + if flip? (_ v2 v1) + else (_ v1 v2) + let v01 = (getvertex v1) + let v10 = (getvertex v2) + let v11 = (getvertex (v1 | v2)) + generate-quad v00 v01 v10 v11 + + ; + +inout normal : vec3 +inout depthval : f32 +inout albedo : vec4 +inout matdata : vec4 +fn rasterize-vert () + let vertex-index = ((deref gl_VertexID) as u32) + let vin = (deref (vertex-in.entries @ vertex-index)) + let coord = (vec3 vin.pos.xyz) + let lod = (vin.pos.w as f32) + + let tcoord = coord + + #let dist = (matmapf tcoord lod) + #let material = + dist.material + #let dist = dist0 + #let material = + 'mix dist0.material dist1.material l + let n = (vec3 vin.normal.xyz) + #normalmapf tcoord lod # (r * 0.5) + + # rotate it a little + #embed + let a = ((deref shglobals.time) * 0.2) + let c s = (cos a) (sin a) + + n := + vec3 + c * n.x - s * n.z + n.y + s * n.x + c * n.z + + coord := + vec3 + c * coord.x - s * coord.z + coord.y + s * coord.x + c * coord.z + + #coord := + coord + (vec3 0 0 1) + #n := (transform-dist n) + + #if PROJECT_FINAL_VERTEX + + let coord = (transform-pos coord) + + let proj = + calc-projection; + + let pcoord = + 'project proj + vec4 coord 1.0 + + normal.out = + #(viridis (lod / MAX_WORLD_LOD)) * 2.0 - 1.0 + do + static-if VISUALIZE_IDS ((vec3hash (vertex-index as f32)) * 2.0 - 1.0) + else n + depthval.out = coord.z + albedo.out = (vec4 1) #material.albedo + matdata.out = + #vec4 material.roughness material.metallic 0 0 + vec4 1 0 0 0 + gl_Position = pcoord + ; + +fn pack-surfel-data (normal depth color matdata) + let normal = + bitcast (packSnorm2x16 (pack_normal_snorm normal)) f32 + #bitcast (packSnorm4x8 (vec4 (normalize normal) 0)) f32 + let color = + bitcast (packUnorm4x8 color) f32 + let matdata = + bitcast (packUnorm4x8 matdata) f32 + vec4 normal matdata color depth + +fn unpack-surfel-data (frag) + let normal = + unpack_normal_snorm (unpackSnorm2x16 (bitcast frag.x u32)) + #vec3 ((unpackSnorm4x8 (bitcast frag.x u32)) . xyz) + let matdata = + unpackUnorm4x8 (bitcast frag.y u32) + let color = + unpackUnorm4x8 (bitcast frag.z u32) + _ + normal + frag.w + color + matdata + +out out_Color : vec4 + binding = 0 +fn rasterize-frag () + out_Color = + pack-surfel-data + deref normal.in + deref depthval.in + deref albedo.in + deref matdata.in + +#uniform img-target-rgba32f : (image2D rgba32f) + binding = IMAGE_TARGET_RGBA32F + \ coherent writeonly restrict + +fn mixdown (uv) + #let t = (deref shglobals.time) + let size = + vec2 (deref shglobals.size) + let uv2 = + (uv * 2.0 - 1.0) * (vec2 shglobals.aspect 1) + let uv = (ivec2 ((deref gl_FragCoord) . xy + 0.5)) + + let col = + texelFetch smp-screen uv 0 + let normal depth color matdata = (unpack-surfel-data col) + if (depth == 0.0) + return + vec4 0.1 0.1 0.2 1 + let proj = + calc-projection; + let coord = + vec3 ((uv2 * 2.0 - 1.0) * depth / proj.aspect) depth + let rd = (normalize coord) + + let l = + normalize + vec3 0.5 -1.0 0.25 + let lambert = + max 0.0 (dot normal l) + + let albedo = color.rgb + let ambient = + ((dot normal (vec3 0.0 -1.0 0.0)) * 0.5 + 0.5) * 0.1 + let exposure = 2.0 + + let roughness = matdata.x + let metallic = matdata.y + let color = + linear->sRGB + tonemap + * exposure + + + ambient * albedo * (1.0 - metallic) + * lambert + BRDF albedo + roughness + metallic + \ l rd normal + + return + vec4 color 1.0 + #vec4 + #\ uv 0.0 + #normal * 0.5 + 0.5 + vec3 + (dot normal (normalize (vec3 0 -1 0))) * 0.5 + 0.5 + #normhue depth + #normhue (radius / 16.0) + #normhue (w / 8.0) + 1.0 + +fn visualize-buffer (uv) + #let t = (deref shglobals.time) + let size = + vec2 (deref shglobals.size) + let uv2 = + (uv * 2.0 - 1.0) * (vec2 shglobals.aspect 1) + let uv = (ivec2 ((deref gl_FragCoord) . xy + 0.5)) + + let col = + texelFetch smp-screen uv 0 + let normal depth color matdata = (unpack-surfel-data col) + let fog-color = (vec4 0.6 0.8 1 1) + + if (depth == 0.0) + return fog-color + + let col = + vec4 + normal * 0.5 + 0.5 + #normhue depth + #normhue (radius / 16.0) + #color + 0.0 + let col = + static-if FOG + mix col fog-color + 1.0 - (exp2 (-depth * FOG_RATE)) + else col + + return col + +fn shader (uv) + #mixdown uv + visualize-buffer uv + +################################################################################ + +global sector-queue : (Array u32 SECTOR_CAPACITY) +global sectors : (Array Sector SECTOR_CAPACITY) + +fn subdivide1d? (p t S) + """"p : i32 = tile position + t : i32 = camera position + S : i32 = tile size + # subdivide if distance to camera falls below 1:2 threshold + t := t - p + t := + ? (t >= 0) + t - 2 * S + 1 + -t - S + t < 0 + +fn subdivide3d? (T t) + """"T : ivec4 = tile position [0 .. 1< 1.0 + elseif 1 + # subdivide if 3x3 tile contains camera + t := ((t >> T.w) - T.xyz) + #(max (abs t.x) (abs t.y) (abs t.z)) <= 1 + (max (abs t.x) (abs t.y) (abs t.z)) <= SUBDIVIDE_RADIUS + else + # subdivide if distance to camera falls below 1:2 threshold + S := 1 << T.w + p := (T.xyz as vec-type) << T.w + & + subdivide1d? p.x t.x S + subdivide1d? p.y t.y S + subdivide1d? p.z t.z S + +inline encode-face-bridge-flags (ratiox ratioy ratioz) + | ratiox (ratioy << 2) (ratioz << 4) + +inline decode-face-bridge-flags (flags) + flags := flags as i32 + _ (flags & 3) ((flags >> 2) & 3) ((flags >> 4) & 3) + +fn collect-sectors () + 'clear sector-queue + 'clear sectors + 'append sector-queue + encode-cell 0:u32 0:u32 0:u32 0:u32 + + for code in sector-queue + #while (not (empty? queue)) + #code := ('pop queue) + q := cpu_shglobals.view-inverse * (vec4 0 0 0 1) + + p := (ivec3 ((q.xyz / WORLD_SCALE + 0.5) * CASCADE_SIZE)) + #p := (ivec3 (CASCADE_CENTER + q.xyz)) + let lvl x y z = (decode-cell code) + coord := (ivec4 x y z (MAX_CASCADE_DEPTH - lvl)) + if (not (subdivide3d? coord p)) + lod := coord.w + 1 + let flags = + | + ? (subdivide3d? + (ivec4 ((coord.xyz + (ivec3 1 0 0)) // 2) lod) p) + \ 0:u32 BLEND+X + ? (subdivide3d? + (ivec4 ((coord.xyz + (ivec3 -1 0 0)) // 2) lod) p) + \ 0:u32 BLEND-X + ? (subdivide3d? + (ivec4 ((coord.xyz + (ivec3 0 1 0)) // 2) lod) p) + \ 0:u32 BLEND+Y + ? (subdivide3d? + (ivec4 ((coord.xyz + (ivec3 0 -1 0)) // 2) lod) p) + \ 0:u32 BLEND-Y + ? (subdivide3d? + (ivec4 ((coord.xyz + (ivec3 0 0 1)) // 2) lod) p) + \ 0:u32 BLEND+Z + ? (subdivide3d? + (ivec4 ((coord.xyz + (ivec3 0 0 -1)) // 2) lod) p) + \ 0:u32 BLEND-Z + 'append sectors (Sector code flags) + else + c0 := (ivec3 (coord.xyz as vec-type << coord.w)) + c1 := (ivec3 ((coord.xyz + 1) << coord.w)) + c := (c0 + c1) // 2 + mask := + | + ? (p.x >= c.x) 1:u32 0:u32 + ? (p.y >= c.y) 2:u32 0:u32 + ? (p.z >= c.z) 4:u32 0:u32 + mask := mask ^ 7 + for index in (range 8:u32) + 'append sector-queue + child-cell code (index ^ mask) + #tilecount := ((countof tiles) as i32) + ; + +################################################################################ + +inline main () + global fb-scene-color = (GL.Texture GL.TEXTURE_2D) + 'setup fb-scene-color + size = (ivec2 2048 2048) + format = GL.RGBA32F + do + let h = 2048 + GL.ClearTexImage fb-scene-color 0 GL.RGBA GL.FLOAT null + + global rb-scene-depth = (GL.Renderbuffer) + setup-renderbuffer rb-scene-depth 2048 2048 + format = GL.DEPTH_COMPONENT + global fb-scene = (GL.Framebuffer) + setup-framebuffer fb-scene + color = fb-scene-color + rb-depth = rb-scene-depth + + global vao-empty = (GL.VertexArray) + + global pg-rasterize = (GL.Program) + call + attach-shaders (deref pg-rasterize) + vertex = rasterize-vert + fragment = rasterize-frag + #debug = true + + global rg : (Option RG) + + fn per-frame-setup (size pg-test frame) + let rg = + 'force-unwrap rg + from (methodsof rg) let static program compute-program indirect-draw-arrays-setup + + GL.BindTextureUnit 0 fb-scene-color + GL.Uniform smp-screen 0 + + let world = + static GL.Texture + inline () + let tex = (GL.Texture GL.TEXTURE_3D) + 'setup tex + size = (ivec3 WORLD_SIZE) + format = WORLD_PIXELFMT + lod = true + #min-filter = GL.LINEAR_MIPMAP_LINEAR + #mag-filter = GL.LINEAR + tex + + # generate mipmaps + let pg-genworldlod = (compute-program generate-world-lod) + if (frame == 0) + report "generating world..." + # generate world + let pg-genworld = (compute-program generate-world) + GL.UseProgram pg-genworld + GL.BindImageTexture BINDING_IMG_WORLD_OUT world 0 GL.TRUE 0 + GL.WRITE_ONLY + WORLD_PIXELFMT + GL.DispatchCompute (unpack ((WORLD_SIZE + GROUP_SIZE - 1) // GROUP_SIZE)) + GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT) + report "generating world mipmaps..." + GL.UseProgram pg-genworldlod + for lod in (range 1 (MAX_WORLD_LOD_I + 1)) + GL.BindImageTexture BINDING_IMG_WORLD_IN world (lod - 1) GL.TRUE 0 GL.READ_ONLY WORLD_PIXELFMT + GL.BindImageTexture BINDING_IMG_WORLD_OUT world lod GL.TRUE 0 GL.WRITE_ONLY WORLD_PIXELFMT + GL.DispatchCompute (unpack (((WORLD_SIZE >> (lod as u32)) + GROUP_SIZE - 1) // GROUP_SIZE)) + GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT) + report "done." + + do + # edit world + # generate world + let pg-updateworld = (compute-program update-world) + local mx = 0; local my = 0 + mstate := (SDL_GetMouseState &mx &my) + if (mstate != 0) + #print mstate + GL.UseProgram pg-updateworld + GL.Uniform mouse-state (mstate as i32) + GL.BindImageTexture BINDING_IMG_WORLD_INOUT world 0 GL.TRUE 0 + GL.READ_WRITE + WORLD_PIXELFMT + GL.DispatchCompute (unpack ((WORLD_SIZE + GROUP_SIZE - 1) // GROUP_SIZE)) + GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT) + # generate mipmaps + GL.UseProgram pg-genworldlod + for lod in (range 1 (MAX_WORLD_LOD_I + 1)) + GL.BindImageTexture BINDING_IMG_WORLD_IN world (lod - 1) GL.TRUE 0 GL.READ_ONLY WORLD_PIXELFMT + GL.BindImageTexture BINDING_IMG_WORLD_OUT world lod GL.TRUE 0 GL.WRITE_ONLY WORLD_PIXELFMT + GL.DispatchCompute (unpack (((WORLD_SIZE >> (lod as u32)) + GROUP_SIZE - 1) // GROUP_SIZE)) + GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT) + + # update terrain + + collect-sectors; + sector-count := (countof sectors) + if (frame % 120 == 0) + print (sector-count as i32) "sectors" + let sector_buffer_sz = ((sizeof Sector) * SECTOR_CAPACITY) + let sector_buffer = + static GL.Buffer + inline () + let buf = (GL.Buffer) + GL.NamedBufferData buf (i32 sector_buffer_sz) null GL.DYNAMIC_READ + buf + GL.NamedBufferSubData sector_buffer 0 + i32 (sector-count * (sizeof Sector)) + & (sectors @ 0) + + let vertex_buffer_sz = ((sizeof Vertices) + (sizeof Vertex) * MAX_VERTICES) + let vertex_buffer = + static GL.Buffer + inline () + let buf = (GL.Buffer) + GL.NamedBufferData buf (i32 vertex_buffer_sz) null GL.STREAM_COPY + buf + + do + # clear vertex buffer count + let ptr = + GL.MapNamedBufferRange vertex_buffer 0 (sizeof u32) + | GL.MAP_WRITE_BIT + GL.MAP_INVALIDATE_BUFFER_BIT + #GL.MAP_UNSYNCHRONIZED_BIT + let ptr = (bitcast ptr (mutable pointer Vertices)) + ptr.count = 0:u32 + GL.UnmapNamedBuffer vertex_buffer + GL.BindBufferRange GL.SHADER_STORAGE_BUFFER + BINDING_BUF_VERTEX_OUT + vertex_buffer + \ 0:i64 (i64 vertex_buffer_sz) + GL.BindBufferRange GL.SHADER_STORAGE_BUFFER + BINDING_BUF_SECTOR_IN + sector_buffer + \ 0:i64 (i64 (sector-count * (sizeof Sector))) + GL.BindTextureUnit 1 world + let pg-gen-cell = (compute-program generate-cell-verts) + GL.UseProgram pg-gen-cell + GL.Uniform smp-world 1 + MAX_WORKGROUPS := 32768:u32 + for i in (range 0:u32 (sector-count as u32) MAX_WORKGROUPS) + offset := i + GL.Uniform sector-offset offset + sz := (min MAX_WORKGROUPS (sector-count as u32 - i)) + GL.DispatchCompute sz 1 1 + + GL.MemoryBarrier GL.SHADER_STORAGE_BARRIER_BIT + + ############# + + inline print-in-count () + let ptr = + GL.MapNamedBufferRange vertex_buffer 0 (sizeof u32) + GL.MAP_READ_BIT + let ptr = (bitcast ptr (pointer Vertices)) + print (ptr.count / 3) "triangles" + GL.UnmapNamedBuffer vertex_buffer + + if ((frame % 60) == 0) + print-in-count; + + vvv bind setup-draw-arrays exec-draw-arrays + indirect-draw-arrays-setup + inline () + _ + deref vertex-in.count + 1 + 0 + 0 + + GL.BindBufferRange GL.SHADER_STORAGE_BUFFER + BINDING_BUF_VERTEX_IN + vertex_buffer + \ 0:i64 (i64 vertex_buffer_sz) + setup-draw-arrays; + + do + GL.BindFramebuffer GL.FRAMEBUFFER fb-scene + GL.Viewport 0 0 (i32 size.x) (i32 size.y) + GL.ClearColor 0 0 0 0 + GL.DepthFunc GL.GREATER + GL.ClearDepthf 0 + GL.DepthRangef -1 1 + #GL.Enable GL.CULL_FACE + GL.Disable GL.CULL_FACE + GL.CullFace GL.BACK + GL.Enable GL.DEPTH_TEST + GL.Clear + | + GL.COLOR_BUFFER_BIT + GL.DEPTH_BUFFER_BIT + GL.STENCIL_BUFFER_BIT + + GL.UseProgram pg-rasterize + #GL.BindTextureUnit 1 world + #GL.Uniform smp-world 1 + GL.BindVertexArray vao-empty + exec-draw-arrays GL.TRIANGLES + + GL.Disable GL.DEPTH_TEST + GL.Disable GL.CULL_FACE + GL.BindFramebuffer GL.FRAMEBUFFER 0 + + let per-frame-setup = + static-typify per-frame-setup ivec2 GL.Program i32 + rg = (RG) + + _ per-frame-setup shader + +fn program () + render-fragment-shader main + #debug = true + size = (ivec2 1024) + + +static-if true + program; +else + define f + compile + typify program + 'O3 + + f;