77196f182047 — Leonard Ritter 2 years ago
* first VVF implementation
1 files changed, 1904 insertions(+), 0 deletions(-)

A => testing/test_cascade_dmc_cc_vvf.sc
A => testing/test_cascade_dmc_cc_vvf.sc +1904 -0
@@ 0,0 1,1904 @@ 
+
+#
+    LOD aware terrain vectorization using dual marching cubes
+
+    VVF -> volumetric vertex field
+
+define-infix< 40 , _
+
+using import glm
+using import glsl
+using import itertools
+using import Array
+using import Box
+using import struct
+using import Option
+import ..lib.tukan.use
+
+import tukan.voxel
+let voxel = tukan.voxel
+
+using import tukan.gl
+using import tukan.bitmap
+using import tukan.packing
+using import tukan.random
+using import tukan.color
+using import tukan.perfect_hash
+using import tukan.raytrace
+using import tukan.sdf
+using import tukan.sdl
+using import tukan.rotation
+using import tukan.brdf
+using import tukan.normal
+using import tukan.noise
+using import tukan.projection
+using import tukan.derivative
+using import tukan.isosurface
+using import tukan.hash
+using import tukan.spherical
+using import tukan.ResourceGroup
+using import tukan.logcell
+using import .testfragment
+
+let RG = (ResourceGroup "RG")
+from (import tukan.math) let expmix
+
+SAMPLE_CAMERA_OFFSET := false
+PROJECT_FINAL_VERTEX := false
+VISUALIZE_IDS := false
+POST_TRANSFORM := false # true is worse
+OCCLUSION_CULLING := false
+FOG := false
+USE_FLAT_SHADING := true
+BALANCE_QUADS := true
+USE_COMPLEX_SURFACE := true
+HIGH_QUALITY_FEATURES := true
+SOFT_WORLD_SAMPLING := true
+USE_CATMULL_CLARK := false
+BLOCKY_WORLD := false
+USE_VVF_PACKING := true
+AVERAGE_LOD_VERTICES := false
+
+FETCH_UV_OFFSET := 0.5
+#FETCH_UV_OFFSET := 0.0
+
+# to reach fog density D at depth Z, FOG_RATE = -log2(1 - D)/Z
+FOG_RATE := 0.02 # 50% at 100 units
+
+let MAX_VERTICES = (20 * (1 << 20))
+
+let WORLD_SIZE = (uvec3 256)
+let WORLD_SCALE = (vec3 256.0)
+let MAX_WORLD_LOD_I = 8
+let MAX_WORLD_LOD = (MAX_WORLD_LOD_I as f32)
+
+let CUBE_SIZE = (uvec3 256)
+let LOD_OFFSET = -6
+let LOD_NORMAL_OFFSET = -1
+let GROUP_SIZE = 4
+let MAX_SECTOR_LOD_I = 2
+let SECTOR_GROUP_SIZE = (1 << MAX_SECTOR_LOD_I)
+
+let SUBDIVIDE_RADIUS = 6
+#let SUBDIVIDE_RADIUS = 1
+let SUBDIVIDE_SCALE = 16
+
+MAX_CASCADE_DEPTH := 6 # MAX_WORLD_LOD_I - MAX_SECTOR_LOD_I
+#MAX_CASCADE_DEPTH := 3 # MAX_WORLD_LOD_I - MAX_SECTOR_LOD_I
+SECTOR_CAPACITY := MAX_CASCADE_DEPTH * 100000
+CASCADE_SIZE := (1 << MAX_CASCADE_DEPTH)
+CASCADE_CENTER := (CASCADE_SIZE // 2)
+
+let BINDING_BUF_SECTOR_IN = 1
+let BINDING_BUF_FACE_BRIDGE_IN = 2
+let BINDING_BUF_VERTEX_IN = 3
+let BINDING_BUF_VERTEX_OUT = 4
+let BINDING_BUF_DRAW_VOXELS_CMD = 5
+let BINDING_BUF_DISPATCH_CMD = 6
+let BINDING_IMG_ZBUFFER = 7
+let BINDING_IMG_WORLD_IN = 8
+let BINDING_IMG_WORLD_OUT = 9
+let BINDING_IMG_WORLD_INOUT = 10
+
+let UNIFORM_LEVEL = 1
+let UNIFORM_PROGRAM = 2
+let UNIFORM_MOUSE_STATE = 12
+let UNIFORM_SECTOR_OFFSET = 13
+let UNIFORM_SCREEN_SAMPLER = 4
+let UNIFORM_WORLD_SAMPLER = 7
+
+
+let LEVELS = 8
+
+let SEARCH_R = 5
+let SEARCH_Rf = (SEARCH_R as f32)
+
+let sqrt3 = (sqrt 3.0)
+
+run-stage;
+
+struct Vertex plain
+    pos : vec4
+    normal : vec4
+
+struct Vertices plain
+    count : u32
+    # each entry holds a vertex
+    entries : (array Vertex)
+
+let
+    BLEND+X = 1:u32
+    BLEND-X = 2:u32
+    BLEND+Y = 4:u32
+    BLEND-Y = 8:u32
+    BLEND+Z = 16:u32
+    BLEND-Z = 32:u32
+
+struct Sector plain
+    key : u32
+    flags : u32 # six face bits indicating where the next highest LOD level is
+
+struct Sectors plain
+    keys : (array Sector)
+
+buffer sector-in : Sectors
+    binding = BINDING_BUF_SECTOR_IN
+    \ readonly coherent
+
+buffer face-bridge-in : Sectors
+    binding = BINDING_BUF_FACE_BRIDGE_IN
+    \ readonly coherent
+
+buffer vertex-in : Vertices
+    binding = BINDING_BUF_VERTEX_IN
+    \ readonly coherent
+
+buffer vertex-out : Vertices
+    binding = BINDING_BUF_VERTEX_OUT
+    \ coherent
+
+uniform smp-screen : sampler2D
+    location = UNIFORM_SCREEN_SAMPLER
+
+uniform mouse-state : i32
+    location = UNIFORM_MOUSE_STATE
+
+uniform sector-offset : u32
+    location = UNIFORM_SECTOR_OFFSET
+
+fn simple-sphere (p)
+    (length p) - 0.5
+
+fn nine-spheres (p)
+    let x = (deref shglobals.time)
+    r := (mix 0.1 0.2 ((sin x) * 0.5 + 0.5))
+    d := (sqrt 0.5) * 0.4
+    sdSmoothAnd
+        sdSmoothOr
+            (length p) - 0.5
+            min
+                (length (p - (vec3 d d d))) - r
+                (length (p - (vec3 -d d d))) - r
+                (length (p - (vec3 d -d d))) - r
+                (length (p - (vec3 -d -d d))) - r
+            0.1
+        * -1.0
+            min
+                (length (p - (vec3 d d -d))) - r
+                (length (p - (vec3 -d d -d))) - r
+                (length (p - (vec3 d -d -d))) - r
+                (length (p - (vec3 -d -d -d))) - r
+        0.1
+
+fn twoballs (p)
+    let x = (deref shglobals.time)
+    x := ((sin x) * 0.5 + 0.5) * 0.5
+    sdSmoothOr
+        (length (p - (vec3 0.0 0 x))) - 0.25
+        (length (p - (vec3 0.0 0 -x))) - 0.25
+        0.2
+
+fn doubletori (p)
+    let x = (deref shglobals.time)
+    let xz =
+        anglevector-rotate
+            anglevector -x
+            p.xz
+    let p1 =
+        vec3
+            xz.x
+            p.y
+            xz.y
+    let p2 =
+        vec3
+            anglevector-rotate
+                anglevector x
+                p.xy
+            p.z
+
+    'sdSmoothOr
+        sdmDist
+            sdTorus p1.xzy (vec2 0.5 0.05)
+            sdMaterial
+                vec4 1.0 0.5 0.3 1.0
+        sdmDist
+            sdTorus p2 (vec2 0.4 0.1)
+            sdMaterial
+                vec4 0.3 0.5 1.0 1.0
+                roughness = 0.1
+        0.2
+
+fn two-boxes (p)
+    let x = 8.1923 # (deref shglobals.time)
+    let p1 =
+        vec3
+            anglevector-rotate
+                anglevector -x
+                p.xy
+            p.z
+    let p2 =
+        vec3
+            p.x
+            anglevector-rotate
+                anglevector (x * 0.917)
+                p.yz
+    sdSmoothOr
+        sdBox p1 (vec3 0.33)
+        sdBox p2 (vec3 0.33)
+        0.001
+
+fn two-boxes-merge (p)
+    let d =
+        vec3 0.3
+    let sz =
+        vec3 0.5
+    let x = (deref shglobals.time)
+    r := (mix 1.0 0.2 ((sin x) * 0.5 + 0.5))
+    'sdSmoothAnd
+        'sdSmoothOr
+            sdmDist
+                #sdSphere
+                    p - (vec3 0.01)
+                    1.0
+                sdUberprim
+                    p - (vec3 0.01)
+                    vec4 1.0 1.0 0.05 0.05
+                    vec3 0.5 0.05 0.0
+                sdMaterial
+                    vec4 1.0 0.9 0.3 1.0
+                    metallic = 1.0
+            'sdSmoothOr
+                sdmDist
+                    sdBox (p - d) sz
+                    sdMaterial
+                        vec4 1.0 0.5 0.3 1.0
+                sdmDist
+                    sdBox (p + d) sz
+                    sdMaterial
+                        vec4 0.3 0.5 1.0 1.0
+                        roughness = 0.2
+                0.5
+            0.05
+        sdmDist
+            -
+                sdSphere
+                    p - (vec3 0.0 0.0 -0.5)
+                    r
+            sdMaterial
+                vec4 0.5 0.3 1.0 1.0
+        0.1
+
+fn one-box (p)
+    sdBox p (vec3 0.33)
+
+fn matmapf (p)
+    #p := p + 1.0
+    #p := p + (vec3 1 0 0) * shglobals.time
+    #p := (sdDomainRep p 2.0)
+    #do
+        p := p.yzx * 2.0
+        (two-boxes-merge p) * 0.5
+    #doubletori p
+    #sdmDist
+        simple-sphere p
+        sdMaterial
+            vec4 0.5 0.3 1.0 1.0
+    #scale := 20.0
+    sdmDist
+        static-if 0
+            scale := 100.0
+            (two-boxes (p / scale)) * scale
+        elseif 0
+            scale := 100.0
+            (nine-spheres (p / scale)) * scale
+        elseif 0
+            #p := p * 0.2
+            let d =
+                static-if USE_COMPLEX_SURFACE
+                    ((triquad-noise3 p) * 2.0 - 1.0) - 0.05
+                else -inf
+
+            #let d =
+                min d
+                    ((triquad-noise3 (p / (vec3 100 2.0 2.0))) * 2.0 - 1.0) * 100.0 - 10.0
+
+            R := ((length p) - 30.0)
+            let d =
+                max
+                    max
+                        (length p) - 200.0
+                        d
+                    -R
+            #d * scale
+            d
+        else
+            scale := 300.0
+            DD := (doubletori (p / scale)) as f32 * scale
+            S := (length p)
+            Z := (length p.xy)
+            #p := p * 0.01
+            p := p * 0.2
+            local d = 0.0
+            for i in (range 5)
+                s := (exp2 (i as f32))
+                d += ((triquad-noise3 (p * s)) * 2.0 - 1.0) / s
+            min
+                #do
+                S - 10.0
+                static-if USE_COMPLEX_SURFACE
+                    +
+                        (abs DD) - 10.0
+
+                        (deref d) * 40.0
+                else
+                    (abs DD) - 10.0
+
+
+                #Z - 30.0
+                #- (Z - 20.0)
+        sdMaterial
+            vec4 0.5 0.3 1.0 1.0
+
+# single sphere:
+    256^3: 89240 cells (best: 0.3ms)
+    1024^3: 1427240 cells (best: 3.5ms)
+# min: 104567 at 256^3
+fn mapf (p)
+    #let x = ((radians (deref shglobals.time)) * 10.0)
+    #let xz =
+        anglevector-rotate
+            anglevector -x
+            p.xz
+    #let p =
+        vec3
+            xz.x
+            p.y
+            xz.y
+    #simple-sphere p
+    #twoballs p
+    #doubletori p
+    #nine-spheres p
+    #two-boxes p
+    #one-box p
+    (matmapf p) as f32
+
+let orig_mapf = mapf
+
+fn normalmapf (p r)
+    - (sdNormalFast mapf p r)
+
+let ONION_NEAR = 0.6
+let ONION_FAR = 100.0
+let ONION_LAYERS = 32.0
+
+fn map_onion_radius (p)
+    #r := (clamp ((p.z * 0.5 + 0.5) * 0.5 + 0.5) 0.0 1.0)
+    r := (clamp (p.z * 0.5 + 0.5) 0.0 1.0)
+    #r := (p.z * 0.5 + 0.5) * ONION_LAYERS
+
+    # roughly square layers
+    embed
+        Z := CUBE_SIZE.z as f32
+        r := ONION_NEAR * ((1 + 2 * (sqrt pi) / (Z * 0.5)) ** (r * Z))
+
+    # exponential interpolation
+    #embed
+        r := (expmix ONION_NEAR ONION_FAR r 0.5)
+
+    # infinite perspective projection
+    #embed
+        r := r * 0.9999
+        k := 0.01
+        r := r / (k * (1.0 - r))
+
+    p := (unpack_normal_snorm p.xy)
+    #p :=
+        spherical-surface
+            L1-spherical
+                octahedral-surface
+                    tile-guyou ((p.xy - 1.0) / 2.0)
+
+    _ (p * r) r
+
+fn map_onion (p)
+    let p r = (map_onion_radius p)
+    p
+
+fn map_identity (p) p
+fn map_identity_radius (p) (_ p 1.0)
+
+#let map_vertex map_vertex_rlimit = map_onion map_onion_radius
+let map_vertex map_vertex_rlimit = map_identity map_identity_radius
+
+inline map-translation (tpos)
+    static-if SAMPLE_CAMERA_OFFSET
+        tpos + (shglobals.view-inverse @ 3) . xyz
+    else tpos
+
+
+fn calc-projection ()
+    let aspect = (vec2 (/ (deref shglobals.aspect)) 1.0)
+    'ifp-perspective ProjectionSetup aspect 0.1
+
+inline transform-dist (p)
+    (mat3 shglobals.view) * p
+
+inline transform-invert-dist (p)
+    (mat3 shglobals.view-inverse) * p
+
+inline transform-invert-pos (p)
+    v := (deref shglobals.view-inverse) * (vec4 p 1)
+    v.xyz
+
+inline transform-pos (p)
+    v := (deref shglobals.view) * (vec4 p 1)
+    v.xyz
+
+fn gradient-root (v0 v1 dir)
+    c0 := v0.w
+    c1 := v1.w
+    g0 := (dot v0.xyz dir) * 0.01
+    g1 := (dot v1.xyz dir) * 0.01
+    d := 0.5 / (g1 - g0)
+    c := d * (c0 - c1 - g0 + g1)
+    w := d * (sqrt (c0 * c0 - 2.0 * c0 * c1 + c1 * c1 + 2.0 * (c0 + c1) * g0 + g0 * g0 - 2.0 * (c0 + c1 + g0) * g1 + g1 * g1))
+    q0 := (c - w) * 2.0 - 1.0
+    q1 := (c + w) * 2.0 - 1.0
+    ? ((abs q0) < (abs q1)) q0 q1
+
+fn triangle-area (A B C)
+    (length (cross (B - A) (C - A))) / 2.0
+
+fn trimix (v p)
+    # corner weights of cube in [-1..1]³ domain
+    let c000 c001 c010 c011 c100 c101 c110 c111 =
+        va-map
+            inline (i) (copy (v @ i))
+            va-range 8
+    mix
+        mix
+            mix c000 c001 p.x
+            mix c010 c011 p.x
+            p.y
+        mix
+            mix c100 c101 p.x
+            mix c110 c111 p.x
+            p.y
+        p.z
+
+fn tet-feature-vertex-normal2 (cd tetidx)
+    let tetverts = 0x6cc99:u32
+    k := tetidx * 3:u32
+    k1 := (tetverts >> k) & 7:u32
+    k3 := (tetverts >> (k + 3:u32)) & 7:u32
+
+    let idxs = (ivec4 0 k1 7 k3)
+
+    local p : (array vec3 4)
+    p @ 0 = (vec3 -1 -1 -1)
+    p @ 1 = (vec3 (k1 & 1) ((k1 >> 1) & 1) ((k1 >> 2) & 1)) * 2 - 1
+    p @ 2 = (vec3 1 1 1)
+    p @ 3 = (vec3 (k3 & 1) ((k3 >> 1) & 1) ((k3 >> 2) & 1)) * 2 - 1
+    let d = (vec4 (cd @ 0) (cd @ k1) (cd @ 7) (cd @ k3))
+
+    inline tf (i0 i1)
+        mix (p @ i0) (p @ i1) (tetlerp (d @ i0) (d @ i1))
+
+    let c i = (tetfaces d)
+    switch c
+    case 1:u32
+        p0 := (tf i.x i.y)
+        p1 := (tf i.x i.z)
+        p2 := (tf i.x i.w)
+        (p0 + p1 + p2) / 3
+    case 2:u32
+        p0 := (tf i.x i.z)
+        p1 := (tf i.x i.w)
+        p2 := (tf i.y i.w)
+        p3 := (tf i.y i.z)
+        (p0 + p1 + p2 + p3) / 4
+    default
+        (+ (p @ 0) (p @ 1) (p @ 2) (p @ 3)) / 4
+
+fn tet-feature-vertex-normal (cd)
+    local outv = (vec4 0)
+    for tetidx in (range 6:u32)
+        let tetverts = 0x6cc99:u32
+        k := tetidx * 3:u32
+        k1 := (tetverts >> k) & 7:u32
+        k3 := (tetverts >> (k + 3:u32)) & 7:u32
+
+        let idxs = (ivec4 0 k1 7 k3)
+
+        local p : (array vec3 4)
+        p @ 0 = (vec3 -1 -1 -1)
+        p @ 1 = (vec3 (k1 & 1) ((k1 >> 1) & 1) ((k1 >> 2) & 1)) * 2 - 1
+        p @ 2 = (vec3 1 1 1)
+        p @ 3 = (vec3 (k3 & 1) ((k3 >> 1) & 1) ((k3 >> 2) & 1)) * 2 - 1
+        let d = (vec4 (cd @ 0) (cd @ k1) (cd @ 7) (cd @ k3))
+
+        let c i = (tetfaces d)
+        if (c == 0:u32)
+            continue;
+        let vc = (c * 3)
+        inline tf (i0 i1)
+            mix (p @ i0) (p @ i1) (tetlerp (d @ i0) (d @ i1))
+
+        fn triangle-area (A B C)
+            # removed factor 1/2
+            length (cross (B - A) (C - A))
+
+        let pc A =
+            if (c == 1:u32)
+                p0 := (tf i.x i.y)
+                p1 := (tf i.x i.z)
+                p2 := (tf i.x i.w)
+                pc := ((p0 + p1 + p2) / 3)
+                A := (triangle-area p0 p1 p2)
+                _ pc A
+            else # elseif (c == 2:u32)
+                p0 := (tf i.x i.z)
+                p1 := (tf i.x i.w)
+                p2 := (tf i.y i.w)
+                p3 := (tf i.y i.z)
+                pc := ((p0 + p1 + p2 + p3) / 4)
+                A :=
+                    +
+                        (triangle-area p0 p1 p2)
+                        (triangle-area p0 p3 p2)
+                _ pc A
+        outv += (vec4 pc 1) * (max 1e-5 A)
+    ? (outv.w == 0) (vec3 0) (outv.xyz / outv.w)
+
+fn feature-vertex-normal (v)
+    let c000 c001 c010 c011 c100 c101 c110 c111 =
+        va-map
+            inline (i) (copy (v @ i))
+            va-range 8
+    # unsigned body diagonal vectors
+    n00 := (vec3 1 1 1)
+    n01 := (vec3 -1 1 1)
+    n10 := (vec3 1 -1 1)
+    n11 := (vec3 -1 -1 1)
+    # sign flipped differences along body diagonal vectors
+    g00 := (c000 - c111)
+    g01 := (c001 - c110)
+    g10 := (c010 - c101)
+    g11 := (c011 - c100)
+    # normal vector (gradient)
+    g :=
+        do  #normalize
+            +
+                g00 * n00
+                g01 * n01
+                g10 * n10
+                g11 * n11
+
+    # corner weights of cube in [-1..1]³ domain
+    let fv =
+        static-if HIGH_QUALITY_FEATURES
+            tet-feature-vertex-normal v
+        elseif 0
+            #embed
+                # position of zero in interval [-1..1] (inverse lerp)
+                d00 := (gradient-root c000 c111 n00)
+                d01 := (gradient-root c001 c110 n01)
+                d10 := (gradient-root c010 c101 n10)
+                d11 := (gradient-root c011 c100 n11)
+            embed
+                # position of zero in interval [-1..1] (inverse lerp)
+                d00 := (c000 + c111) / g00
+                d01 := (c001 + c110) / g01
+                d10 := (c010 + c101) / g10
+                d11 := (c011 + c100) / g11
+
+            # projected points
+            v00 := d00 * n00
+            v01 := d01 * n01
+            v10 := d10 * n10
+            v11 := d11 * n11
+
+            let verts... =
+                ? ((abs d00) <= 1.0)
+                    vec4 v00 1
+                    vec4 0
+                ? ((abs d01) <= 1.0)
+                    vec4 v01 1
+                    vec4 0
+                ? ((abs d10) <= 1.0)
+                    vec4 v10 1
+                    vec4 0
+                ? ((abs d11) <= 1.0)
+                    vec4 v11 1
+                    vec4 0
+
+            d := (+ verts... )
+
+            # feature vertex
+            fv := (d.xyz / (max 1.0 d.w))
+
+            fv
+        else
+            inline mapf (p)
+                p := p * 0.5 + 0.5
+                mix
+                    mix
+                        mix c000 c001 p.x
+                        mix c010 c011 p.x
+                        p.y
+                    mix
+                        mix c100 c101 p.x
+                        mix c110 c111 p.x
+                        p.y
+                    p.z
+            inline grad (p)
+                sdNormalFast mapf p
+            local p = (vec3 0)
+            for i in (range 20)
+                d := (mapf p)
+                g := (grad p)
+                p = (p - d * g)
+                ;
+            deref p
+        #else
+            let c000 c001 c010 c011 c100 c101 c110 c111 =
+                va-map
+                    inline (i) (copy ((v @ i) . w))
+                    va-range 8
+            # isoplane point
+            d := (+ c000 c001 c010 c011 c100 c101 c110 c111) / 8
+            g :=
+                /
+                    vec3
+                        (+ c001 c101 c011 c111) - (+ c000 c100 c010 c110)
+                        (+ c010 c011 c110 c111) - (+ c000 c001 c100 c101)
+                        (+ c100 c110 c101 c111) - (+ c000 c010 c001 c011)
+                    8
+            l := (length g)
+            g := g / l
+            d := d / l
+            k := -d / (dot g g)
+            k * g
+    _ fv g
+
+#do
+    let k0 = 0
+    let k1 = 1
+    local ww =
+        arrayof f32
+            \ k1 k0 k1 k1
+            \ k1 k1 k1 k1
+    print
+        feature-plane ww
+
+    #print
+        GL.MAX_COMPUTE_SHARED_MEMORY_SIZE
+
+    if true
+        exit 0
+#run-stage;
+
+#embed
+    let WORLD_PIXELFMT = GL.R32F
+    let WORLD_IMAGETYPE = (image3D r32f)
+#embed
+    let WORLD_PIXELFMT = GL.R16F
+    let WORLD_IMAGETYPE = (image3D r16f)
+#embed
+    let WORLD_PIXELFMT = GL.R8
+    let WORLD_IMAGETYPE = (image3D r8)
+embed
+    let WORLD_PIXELFMT = GL.RGBA32UI
+    let WORLD_IMAGETYPE = (uimage3D rgba32ui)
+
+uniform world-in : WORLD_IMAGETYPE
+    binding = BINDING_IMG_WORLD_IN
+    \ coherent readonly restrict
+
+uniform world-out : WORLD_IMAGETYPE
+    binding = BINDING_IMG_WORLD_OUT
+    \ coherent writeonly restrict
+
+uniform world-inout : WORLD_IMAGETYPE
+    binding = BINDING_IMG_WORLD_INOUT
+    \ coherent restrict
+
+uniform smp-world : usampler3D
+    location = UNIFORM_WORLD_SAMPLER
+
+fn pack-vvf (vertex cflags)
+    static-if USE_VVF_PACKING
+        uvec4
+            (packSnorm4x8 (vec4 vertex 0)) | (cflags << 24:u32)
+            \ 0 0 0
+    else
+        uvec4
+            bitcast vertex.x u32
+            bitcast vertex.y u32
+            bitcast vertex.z u32
+            cflags
+
+fn unpack-vvf (data)
+    static-if USE_VVF_PACKING
+        _
+            (unpackSnorm4x8 data.x) . xyz as vec-type
+            data.x >> 24:u32
+    else
+        _
+            vec3
+                bitcast data.x f32
+                bitcast data.y f32
+                bitcast data.z f32
+            copy data.w
+
+inline sample-field (ipos mapf)
+    rd := (2.0 / (vec3 WORLD_SIZE))
+    fpos := (vec3 ipos) * rd - 1.0
+    pos := (fpos + 0.5 * rd) * WORLD_SCALE
+
+    local cd : (array f32 8)
+    local mind = inf
+    local maxd = -inf
+    for i in (range 8)
+        let x y z = (i & 1) ((i >> 1) & 1) ((i >> 2) & 1)
+        pos := (fpos + (vec3 x y z) * rd) * WORLD_SCALE
+        let d = (mapf pos)
+        cd @ i = d
+        mind = (min mind d)
+        maxd = (max maxd d)
+    let fv = (feature-vertex-normal cd)
+    _ fv
+        |
+            ? ((cd @ 0) < 0) 1:u32 0:u32
+            ? (mind * maxd <= 0) 2:u32 0:u32
+
+fn generate-world ()
+    local_size GROUP_SIZE GROUP_SIZE GROUP_SIZE
+    ipos := (uvec3 gl_GlobalInvocationID.xyz)
+    if (any? (ipos >= WORLD_SIZE))
+        return;
+    rd := (2.0 / (vec3 WORLD_SIZE))
+    inline samplef (pos)
+        static-if 0
+            mapf pos
+        else
+            local v = 0.0
+            N := 2
+            N:u32 := N as u32
+            for x y z in (dim N N N)
+                d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0
+                d := d *
+                    do
+                        static-if SOFT_WORLD_SAMPLING 3.0
+                        else 1.0
+                lpos := pos + d * (0.25 * rd * WORLD_SCALE)
+                #d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0
+                #lpos := pos + (d * 3.0 * rd) * WORLD_SCALE
+                s := (mapf lpos)
+                #w := (/ (+ (abs d.x) (abs d.y) (abs d.z)))
+                v += s
+            (copy v) / (N * N * N)
+    imageStore world-out ipos (pack-vvf (sample-field ipos samplef))
+#
+    rd := (2.0 / (vec3 WORLD_SIZE))
+    fpos := (vec3 ipos) * rd - 1.0
+    pos := (fpos + 0.5 * rd) * WORLD_SCALE
+
+    local cd : (array f32 8)
+    local mind = inf
+    local maxd = -inf
+    for i in (range 8)
+        let x y z = (i & 1) ((i >> 1) & 1) ((i >> 2) & 1)
+        pos := (fpos + (vec3 x y z) * rd) * WORLD_SCALE
+        let d =
+            static-if 0
+                mapf pos
+            elseif 1
+                local v = 0.0
+                N := 2
+                N:u32 := N as u32
+                for x y z in (dim N N N)
+                    d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0
+                    d := d *
+                        do
+                            static-if SOFT_WORLD_SAMPLING 3.0
+                            else 1.0
+                    lpos := pos + d * (0.25 * rd * WORLD_SCALE)
+                    #d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0
+                    #lpos := pos + (d * 3.0 * rd) * WORLD_SCALE
+                    s := (mapf lpos)
+                    #w := (/ (+ (abs d.x) (abs d.y) (abs d.z)))
+                    v += s
+                (copy v) / (N * N * N)
+            elseif 0
+                local v = (vec4 0)
+                N := 8
+                N:u32 := N as u32
+                for x y z in (dim N N N)
+                    d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0
+                    #d := d * 16.0
+                    d := d * 3.0
+                    lpos := pos + d * (0.5 * rd * WORLD_SCALE)
+                    #d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0
+                    #lpos := pos + (d * 3.0 * rd) * WORLD_SCALE
+                    s := (- (sign (mapf lpos)))
+                    w := (/ (+ (abs d.x) (abs d.y) (abs d.z)))
+                    v += s * w * (vec4 d 1)
+                ((copy v.w) + 1.0) / 2.0
+            else
+                grad := (normalmapf pos (rd * 0.5))
+                local bits = 0
+                N := 8
+                N:u32 := N as u32
+                for x y z in (dim N N N)
+                    lpos := (fpos + ((vec3 x y z) / N) * rd) * WORLD_SCALE
+                    #d := ((vec3 x y z) / (N - 1)) * 2.0 - 1.0
+                    #lpos := pos + (d * 3.0 * rd) * WORLD_SCALE
+                    if ((mapf lpos) <= (rd.x / N))
+                        bits += 1
+                bits as f32 / (N * N * N)
+        cd @ i = d
+        mind = (min mind d)
+        maxd = (max maxd d)
+        ;
+    let fv = (feature-vertex-normal cd)
+    data :=
+        pack-vvf fv
+            |
+                ? ((cd @ 0) < 0) 1:u32 0:u32
+                ? (mind * maxd <= 0) 2:u32 0:u32
+    imageStore world-out ipos data
+    ;
+
+inline vertex-valid? (smp pos)
+    local flags = 0:u32
+    for i in (range 8)
+        pos := pos + (ivec3 (i & 1) ((i >> 1) & 1) ((i >> 2) & 1))
+        let vx cf = (unpack-vvf (imageLoad smp pos))
+        flags |= (cf & 1) << i as u32
+    (flags != 0:u32) & (flags != 255:u32)
+
+fn update-world ()
+    local_size GROUP_SIZE GROUP_SIZE GROUP_SIZE
+    ipos := (uvec3 gl_GlobalInvocationID.xyz)
+    if (any? (ipos >= WORLD_SIZE))
+        return;
+    inline samplef (pos)
+        origin := pos - 2.0 * (shglobals.view-inverse @ 3) . xyz
+        origin := origin - (shglobals.view-inverse @ 2) . xyz * 40.0
+        (length origin) - 40.0
+    let fv cfbrush = (sample-field ipos samplef)
+    let vx cf = (unpack-vvf (imageLoad world-inout ipos))
+
+    brush-vertex-valid? := ((cfbrush & 2) != 0)
+
+    local svx = (vec4 0)
+    ipos := (ivec3 ipos)
+    svx := (? (vertex-valid? world-inout ipos) vx fv)
+    #svx += (vec4 fv 1) * (? brush-vertex-valid? 0.1 0.0)
+    #svx += (vec4 vx 1) * (? (vertex-valid? world-inout ipos) 1.0 0.0)
+    #svx := (? (svx.w == 0) (vec3 0) (svx.xyz / svx.w))
+
+    #let fv = vx
+    let fv cf =
+        if (mouse-state & 4)
+            #sdSmoothOr d brush 5.0
+            _  svx
+                |
+                    (cf | cfbrush) & 1
+                    (cf | cfbrush) & 2
+        elseif (mouse-state & 1)
+            #sdSmoothAnd d -brush 2.0
+            _  svx
+                |
+                    (cf & (cfbrush ^ 1)) & 1
+                    (cf | cfbrush) & 2
+        else
+            return;
+
+    imageStore world-inout ipos (pack-vvf fv cf)
+    ;
+
+#uniform lodlevel : i32
+
+#vvv print
+#fold (w = 0.0) for x y z in (dim 3 3 3)
+    d := 3
+    p := (ivec3 x y z) - 1
+    #ap := (ivec3 (abs p.x) (abs p.y) (abs p.z))
+    ap := (vec3 (abs p.x) (abs p.y) (abs p.z))
+    #w1 := (exp2 (- (ap.x + ap.y + ap.z)))
+    w1 := (exp2 (- (length (vec3 p))))
+    print (x - 1) (y - 1) (z - 1) (w1 / 10.910761)
+    w + w1
+
+
+#do
+    vvv print
+    fold (w = 0.0) for x y z in (dim 4 4 4)
+        # blur kernel 4x4x4
+        # total sum of weights is S = 2 ** (3*d)
+        # w = (3 ** d) / (3 ** (abs(p.x) + abs(p.y) + abs(p.z))) / S
+        # exp2 ((log2 3.0) * (d - ((abs p.x) + (abs p.y) + (abs p.z))) - (log2 2.0) * 3 * d)
+        d := 3
+        ofs := (ivec3 x y z)
+        p := (vec3 ((ivec3 x y z) >> 1))
+        # simulate adding 8 samples
+        w +
+            exp2 ((log2 3.0) * (3 - (p.x + p.y + p.z)) - (log2 2.0) * 9)
+
+
+#if true
+    exit 0
+#run-stage;
+
+inline windowed-fetch (src ipos)
+    isize := (imageSize src)
+    iz := (ivec3 0)
+    fold (v = (vec4 0.0)) for x y z in (dim 3 3 3)
+        p := (ivec3 x y z) - 1
+        ap := (ivec3 (abs p.x) (abs p.y) (abs p.z))
+        value := (imageLoad src ipos)
+        outofbounds? := (any? ((ipos < iz) | (ipos >= isize)))
+        + v
+            * value
+                ? outofbounds? 0.0
+                    / (1 << (3 + ap.x + ap.y + ap.z))
+
+fn generate-world-lod ()
+    local_size GROUP_SIZE GROUP_SIZE GROUP_SIZE
+    opos := (ivec3 gl_GlobalInvocationID.xyz)
+    if (any? (opos >= (imageSize world-out)))
+        return;
+    isize := (imageSize world-in)
+    iz := (ivec3 0)
+    ibpos := opos << 1
+
+    local vertex =
+        do
+            static-if AVERAGE_LOD_VERTICES
+                vec4 0
+            else
+                vec3 0
+    local bestdist = inf
+    local failed = true
+    for x y z in (dim 2 2 2)
+        ipos := ibpos + (ivec3 x y z)
+        let vx cf = (unpack-vvf (imageLoad world-in ipos))
+        vx := (0.5 * vx + (vec3 x y z) - 0.5)
+        L := (dot vx vx)
+        #if (((cf & 2) != 0) & (L < bestdist))
+        static-if AVERAGE_LOD_VERTICES
+            if (vertex-valid? world-in ipos)
+                vertex += (vec4 vx 1) * (1 / L)
+                failed = false
+        else
+            if ((vertex-valid? world-in ipos) & (L < bestdist))
+                vertex = vx
+                bestdist = L
+                failed = false
+    vx :=
+        ? failed
+            vec3 0
+            static-if AVERAGE_LOD_VERTICES
+                vertex.xyz / vertex.w
+            else
+                copy vertex
+    inline fetch (pos)
+        let __ cf = (unpack-vvf (imageLoad world-in (ibpos + pos * 2)))
+        cf & 1
+    let w =
+        +
+            2 * (fetch (ivec3 0))
+            (fetch (ivec3 1 0 0))
+            (fetch (ivec3 -1 0 0))
+            (fetch (ivec3 0 1 0))
+            (fetch (ivec3 0 -1 0))
+            (fetch (ivec3 0 0 1))
+            (fetch (ivec3 0 0 -1))
+
+    let cf =
+        |
+            #(? (w / 8 < 0.38) 0:u32 1:u32)
+            |
+                (fetch (ivec3 0))
+                (fetch (ivec3 1 0 0)) & (fetch (ivec3 -1 0 0))
+                (fetch (ivec3 0 1 0)) & (fetch (ivec3 0 -1 0))
+                (fetch (ivec3 0 0 1)) & (fetch (ivec3 0 0 -1))
+            ? failed 0:u32 2:u32
+
+    #let __ cf = (unpack-vvf ((imageLoad world-in ibpos) . r))
+    #let cf = (? (bcount < 3) 0:u32 1:u32)
+    imageStore world-out opos (pack-vvf vx cf)
+    ;
+
+#inline mapf (p lod)
+#
+    z := 0.5 * (exp2 (-lod * 1.0))
+    let d =
+        (textureLod smp-world ((p / WORLD_SCALE) * 0.5 + 0.5) lod) . r
+    #d := (textureLod smp-world ((p / WORLD_SCALE) * 0.5 + 0.5) 2) . r
+    slimit := (min d (1.0 - d))
+    s := 1.0
+        #min
+            (clamp gx -slimit slimit) / gx
+            (clamp gy -slimit slimit) / gy
+            (clamp gz -slimit slimit) / gz
+    (z - d)
+
+#inline matmapf (p lod)
+#
+    sdmDist (mapf p lod)
+        sdMaterial
+            vec4 0.5 0.3 1.0 1.0
+
+fn normalmapf (p lod)
+    r := (1.0 / WORLD_SIZE.x) * WORLD_SCALE
+    -
+        sdNormalFast
+            inline (p)
+                mapf p (max 0.0 (lod + LOD_NORMAL_OFFSET))
+            \ p r
+
+let SECTOR_SAMPLE_SIZE = (SECTOR_GROUP_SIZE + 2)
+let SECTOR_SAMPLE_VOLUME = (SECTOR_SAMPLE_SIZE ** 3)
+let SECTOR_GROUP_VOLUME = (SECTOR_GROUP_SIZE ** 3)
+
+dump "shared memory requirements"
+    +
+        (sizeof u32) * SECTOR_SAMPLE_VOLUME
+        (sizeof vec3) * SECTOR_SAMPLE_VOLUME
+        (sizeof vec3) * SECTOR_SAMPLE_VOLUME
+
+shared cell-corner-flags : (array u32 SECTOR_SAMPLE_VOLUME)
+shared cell-vertex : (array vec3 SECTOR_SAMPLE_VOLUME)
+shared cell-normal : (array vec3 SECTOR_SAMPLE_VOLUME)
+
+#fn id2index (id)
+    #id.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + id.y * gl_WorkGroupSize.x + id.x
+    id.z * (SECTOR_GROUP_SIZE * SECTOR_GROUP_SIZE) + id.y * SECTOR_GROUP_SIZE + id.x
+
+fn index2id (idx)
+    x := idx % SECTOR_GROUP_SIZE
+    idx := idx // SECTOR_GROUP_SIZE
+    y := idx % SECTOR_GROUP_SIZE
+    z := idx // SECTOR_GROUP_SIZE
+    ivec3 x y z
+
+fn id2svindex (id)
+    id := id + 1
+    id.z * (SECTOR_SAMPLE_SIZE * SECTOR_SAMPLE_SIZE) + id.y * SECTOR_SAMPLE_SIZE + id.x
+
+fn svindex2id (idx)
+    x := idx % SECTOR_SAMPLE_SIZE
+    idx := idx // SECTOR_SAMPLE_SIZE
+    y := idx % SECTOR_SAMPLE_SIZE
+    z := idx // SECTOR_SAMPLE_SIZE
+    (ivec3 x y z) - 1
+
+NATIVE_LANE_WIDTH := 64
+
+fn normal (v1 v2 v3)
+    cross
+        normalize
+            (v3 - v1) . xyz
+        normalize
+            (v2 - v1) . xyz
+
+inline swapnormal (v n)
+    Vertex v.pos (vec4 n 0)
+
+fn generate-quad (v00 v01 v10 v11)
+    let v00 v10 v11 v01 =
+        static-if BALANCE_QUADS
+            let du = (v11.pos.xyz - v00.pos.xyz)
+            let dv = (v10.pos.xyz - v01.pos.xyz)
+            if ((dot du du) < (dot dv dv))
+                _ v00 v10 v11 v01
+            else
+                _ v10 v11 v01 v00
+        else
+            _ v00 v10 v11 v01
+
+    # generate quad
+    let ofs = (atomicAdd vertex-out.count 6)
+    entries := vertex-out.entries
+    static-if USE_FLAT_SHADING
+        n0 := (normal v00.pos v10.pos v11.pos)
+        n1 := (normal v11.pos v01.pos v00.pos)
+        #n1 := (normal v00.pos v11.pos v01.pos)
+
+        entries @ (ofs + 0) = (swapnormal v00 n0)
+        entries @ (ofs + 1) = (swapnormal v10 n0)
+        entries @ (ofs + 2) = (swapnormal v11 n0)
+        entries @ (ofs + 3) = (swapnormal v11 n1)
+        entries @ (ofs + 4) = (swapnormal v01 n1)
+        entries @ (ofs + 5) = (swapnormal v00 n1)
+    else
+        entries @ (ofs + 0) = v00
+        entries @ (ofs + 1) = v10
+        entries @ (ofs + 2) = v11
+        entries @ (ofs + 3) = v11
+        entries @ (ofs + 4) = v01
+        entries @ (ofs + 5) = v00
+    ;
+
+fn generate-cell-verts ()
+    local_size NATIVE_LANE_WIDTH 1 1
+    sector := (copy (sector-in.keys @ (gl_WorkGroupID.x + sector-offset)))
+    sector-flags := (copy sector.flags)
+    let lvl sectorpos... = (decode-cell sector.key)
+    #let lvl sectorpos... = (decode-cell 1:u32)
+    lsectorpos := (ivec3 sectorpos...)
+    sectorlod := (MAX_CASCADE_DEPTH - lvl)
+    coord := (ivec4 lsectorpos sectorlod)
+
+    sector-scale := (f32 (1 << sectorlod)) / CASCADE_SIZE
+    sector-origin := (vec3 lsectorpos) * sector-scale - 0.5
+    sector-origin := sector-origin * WORLD_SCALE
+    sector-scale := sector-scale * WORLD_SCALE
+    lod := MAX_WORLD_LOD_I - (lvl as i32)
+
+    lane-idx := (copy gl_LocalInvocationIndex)
+    sectorpos := (lsectorpos << MAX_SECTOR_LOD_I)
+
+    #if
+        for x y z in (dim 3 3 3)
+            w := (texelFetch smp-world (lsectorpos + (ivec3 x y z) - 1) (sectorlod as i32)) . r
+            if (w != 0.0)
+                break false
+        else true
+        return;
+
+    SECTOR_SAMPLE_PASSES := (SECTOR_SAMPLE_VOLUME + NATIVE_LANE_WIDTH - 1) // NATIVE_LANE_WIDTH
+
+    dump "SECTOR_SAMPLE_PASSES" SECTOR_SAMPLE_PASSES
+
+    lod := (lod - MAX_SECTOR_LOD_I)
+    for i in (range SECTOR_SAMPLE_PASSES)
+        # index in shared buffers
+        idx := (i * NATIVE_LANE_WIDTH) as u32 + lane-idx
+        if (idx >= SECTOR_SAMPLE_VOLUME)
+            break;
+        lpos := (svindex2id idx)
+        #idx := (id2index lpos)
+        gpos := lpos + sectorpos
+
+        inline fetch (pos)
+            unpack-vvf (texelFetch smp-world pos lod)
+
+        let vertex cf = (fetch gpos)
+        let vertex1 =
+            unpack-vvf (texelFetch smp-world (gpos >> 1) (lod + 1))
+        vertex1 := (vertex1 - (vec3 (gpos & 1))) * 2.0 + 1.0
+        local cflags = (cf & 1)
+        for i in (range 1 8)
+            let x y z = (i & 1) ((i >> 1) & 1) ((i >> 2) & 1)
+            #d := (((x ^ y ^ z) & 1) * 2 - 1) as f32
+            wpos := gpos + (ivec3 x y z)
+            let vx cf = (fetch wpos)
+            cflags |= ((cf & 1) << (i as u32))
+            ;
+        cflags := (deref cflags)
+        cell-corner-flags @ idx = cflags
+        if ((cflags != 0) & (cflags != 0xff))
+            cell-vertex @ idx =
+                do
+                    static-if BLOCKY_WORLD
+                        vec3 0
+                    else
+                        if
+                            &
+                                (((shglobals.frame // 10) % 2) == 0)
+                                (cf & 2) == 0
+                            vec3 -10
+                        elseif (sector-flags == 0:u32) vertex
+                        else
+                            inline blend-factor (x f+1 f-2)
+                                ? ((sector-flags & (f+1 | f-2)) != 0)
+                                    ? ((sector-flags & f+1) != 0) x (1 - x)
+                                    0.0
+                            #w := (vec3 lpos) / (SECTOR_GROUP_SIZE - 1)
+                            w := (step (vec3 (SECTOR_GROUP_SIZE // 2)) (vec3 lpos))
+                            w :=
+                                vec3
+                                    blend-factor w.x BLEND+X BLEND-X
+                                    blend-factor w.y BLEND+Y BLEND-Y
+                                    blend-factor w.z BLEND+Z BLEND-Z
+                            mix vertex vertex1
+                                max w.x w.y w.z
+
+            #cell-normal @ idx = n
+            ;
+        ;
+    barrier;
+
+    lpos := (index2id lane-idx)
+    idx := (id2svindex lpos)
+
+    cell-scale := sector-scale * (1.0 / SECTOR_GROUP_SIZE)
+    inline transform-vertex (v dpos)
+        cell-origin := sector-origin + cell-scale * (vec3 dpos)
+        cell-origin + cell-scale * (v * 0.5 + 0.5)
+
+    cflags := (copy (cell-corner-flags @ idx))
+
+    static-if USE_CATMULL_CLARK
+        inline getvertex (offset)
+            dpos := lpos + offset
+            idx := (id2svindex dpos)
+            v := (copy (cell-vertex @ idx))
+            transform-vertex v dpos
+
+        local verts : (array vec4 7)
+        local n = 0
+        va-map
+            inline (i)
+                verts @ i = (vec4 0)
+            va-range 7
+        let C00- C00+ C0-0 C0+0 C-00 C+00 C000 = (va-range 7)
+
+        let v000 = (getvertex (ivec3 0 0 0))
+        #inline check-edge (IDX ofs mask)
+            x := (cflags & mask)
+            if ((x != mask) & ((x ^ mask) != mask))
+                m := (v000 + (getvertex ofs))
+                w @ C00- = (vec4 m 2)
+
+        inline edgebits (a b)
+            (cflags >> a as u32) & 1, (cflags >> b as u32) & 1
+
+        inline edge (a b)
+            let u v = (edgebits a b)
+            (u != v), (u == 0)
+
+        inline collect-plane (Du fpermute)
+            Dv := (Du + 1) % 3
+            Dw := (Du + 2) % 3
+            for u v in (dim 2 2)
+                i0 := (u << Du) | (v << Dv)
+                i1 := i0 ^ (1 << Dw)
+                let set? flip? = (edge i0 i1)
+                if set?
+                    v00 := v000
+                    let du dv = (u * 2 - 1) (v * 2 - 1)
+                    v01 := (getvertex (fpermute du  0))
+                    v10 := (getvertex (fpermute  0 dv))
+                    v11 := (getvertex (fpermute du dv))
+                    # face vertices
+                    fv := ((v00 + v01 + v10 + v11) / 4)
+                    # edge vertices
+                    ev01 := (v00 + v01) / 2
+                    ev10 := (v00 + v10) / 2
+                    verts @ C000 += (vec4 (fv + ev01 + ev10) 3)
+                    verts @ (Du * 2 + u) += (vec4 (fv + ev01) 2)
+                    verts @ (Dv * 2 + v) += (vec4 (fv + ev10) 2)
+                    n += 1
+                    ;
+
+        collect-plane 0 # XY
+            inline (du dv) (ivec3 du dv 0)
+        collect-plane 1 # YZ
+            inline (du dv) (ivec3 0 du dv)
+        collect-plane 2 # ZX
+            inline (du dv) (ivec3 dv 0 du)
+
+        n := n as f32
+        verts @ C000 += n * (n - 3) * (vec4 v000 1)
+
+        inline build-plane (Du fpermute)
+            Dv := (Du + 1) % 3
+            Dw := (Du + 2) % 3
+            for u v in (dim 2 2)
+                i0 := (u << Du) | (v << Dv)
+                i1 := i0 ^ (1 << Dw)
+                let set? flip? = (edge i0 i1)
+                if set?
+                    flip? := flip? ^ ((u ^ v) == 1)
+                    v00 := v000
+                    let du dv = (u * 2 - 1) (v * 2 - 1)
+                    v01 := (getvertex (fpermute du  0))
+                    v10 := (getvertex (fpermute  0 dv))
+                    v11 := (getvertex (fpermute du dv))
+                    # face vertices
+                    fv := (v00 + v01 + v10 + v11) / 4
+                    # edge vertices
+                    ev01 := (copy (verts @ (Du * 2 + u)))
+                    ev01 := ev01.xyz / ev01.w
+                    ev10 := (copy (verts @ (Dv * 2 + v)))
+                    ev10 := ev10.xyz / ev10.w
+                    # center vertex
+                    cv := (copy (verts @ C000))
+                    cv := cv.xyz / cv.w
+                    let ev01 ev10 =
+                        if flip? (_ ev10 ev01)
+                        else (_ ev01 ev10)
+                    generate-quad
+                        Vertex (vec4   cv 1) (vec4 0)
+                        Vertex (vec4 ev01 1) (vec4 0)
+                        Vertex (vec4 ev10 1) (vec4 0)
+                        Vertex (vec4   fv 1) (vec4 0)
+
+        build-plane 0 # XY
+            inline (du dv) (ivec3 du dv 0)
+        build-plane 1 # YZ
+            inline (du dv) (ivec3 0 du dv)
+        build-plane 2 # ZX
+            inline (du dv) (ivec3 dv 0 du)
+
+    else
+        inline getidxvertex (idx dpos)
+            v := (copy (cell-vertex @ idx))
+            #n := (copy (cell-normal @ idx))
+            Vertex
+                vec4 (transform-vertex v dpos) 1
+                vec4 0 0 1 0
+
+        inline getvertex (offset)
+            dpos := lpos + offset
+            idx := (id2svindex dpos)
+            getidxvertex idx dpos
+
+        v00 := (getidxvertex idx lpos)
+        centerbit := (cflags >> 7) & 1
+        flip? := (centerbit != 0)
+
+        for i in (range 3)
+            if ((centerbit ^ ((cflags >> ((1 << i as u32) ^ 7)) & 1)) != 0)
+                v1 := ((ivec3 0b100 0b001 0b010) >> i) & 1
+                v2 := ((ivec3 0b010 0b100 0b001) >> i) & 1
+                let v1 v2 =
+                    if flip? (_ v2 v1)
+                    else (_ v1 v2)
+                let v01 = (getvertex v1)
+                let v10 = (getvertex v2)
+                let v11 = (getvertex (v1 | v2))
+                generate-quad v00 v01 v10 v11
+
+    ;
+
+inout normal : vec3
+inout depthval : f32
+inout albedo : vec4
+inout matdata : vec4
+fn rasterize-vert ()
+    let vertex-index = ((deref gl_VertexID) as u32)
+    let vin = (deref (vertex-in.entries @ vertex-index))
+    let coord = (vec3 vin.pos.xyz)
+    let lod = (vin.pos.w as f32)
+
+    let tcoord = coord
+
+    #let dist = (matmapf tcoord lod)
+    #let material =
+        dist.material
+    #let dist = dist0
+    #let material =
+        'mix dist0.material dist1.material l
+    let n = (vec3 vin.normal.xyz)
+    #normalmapf tcoord lod # (r * 0.5)
+
+    # rotate it a little
+    #embed
+        let a = ((deref shglobals.time) * 0.2)
+        let c s = (cos a) (sin a)
+
+        n :=
+            vec3
+                c * n.x - s * n.z
+                n.y
+                s * n.x + c * n.z
+
+        coord :=
+            vec3
+                c * coord.x - s * coord.z
+                coord.y
+                s * coord.x + c * coord.z
+
+    #coord :=
+        coord + (vec3 0 0 1)
+    #n := (transform-dist n)
+
+    #if PROJECT_FINAL_VERTEX
+
+    let coord = (transform-pos coord)
+
+    let proj =
+        calc-projection;
+
+    let pcoord =
+        'project proj
+            vec4 coord 1.0
+
+    normal.out =
+        #(viridis (lod / MAX_WORLD_LOD)) * 2.0 - 1.0
+        do
+            static-if VISUALIZE_IDS ((vec3hash (vertex-index as f32)) * 2.0 - 1.0)
+            else n
+    depthval.out = coord.z
+    albedo.out = (vec4 1) #material.albedo
+    matdata.out =
+        #vec4 material.roughness material.metallic 0 0
+        vec4 1 0 0 0
+    gl_Position = pcoord
+    ;
+
+fn pack-surfel-data (normal depth color matdata)
+    let normal =
+        bitcast (packSnorm2x16 (pack_normal_snorm normal)) f32
+        #bitcast (packSnorm4x8 (vec4 (normalize normal) 0)) f32
+    let color =
+        bitcast (packUnorm4x8 color) f32
+    let matdata =
+        bitcast (packUnorm4x8 matdata) f32
+    vec4 normal matdata color depth
+
+fn unpack-surfel-data (frag)
+    let normal =
+        unpack_normal_snorm (unpackSnorm2x16 (bitcast frag.x u32))
+        #vec3 ((unpackSnorm4x8 (bitcast frag.x u32)) . xyz)
+    let matdata =
+        unpackUnorm4x8 (bitcast frag.y u32)
+    let color =
+        unpackUnorm4x8 (bitcast frag.z u32)
+    _
+        normal
+        frag.w
+        color
+        matdata
+
+out out_Color : vec4
+    binding = 0
+fn rasterize-frag ()
+    out_Color =
+        pack-surfel-data
+            deref normal.in
+            deref depthval.in
+            deref albedo.in
+            deref matdata.in
+
+#uniform img-target-rgba32f : (image2D rgba32f)
+    binding = IMAGE_TARGET_RGBA32F
+    \ coherent writeonly restrict
+
+fn mixdown (uv)
+    #let t = (deref shglobals.time)
+    let size =
+        vec2 (deref shglobals.size)
+    let uv2 =
+        (uv * 2.0 - 1.0) * (vec2 shglobals.aspect 1)
+    let uv = (ivec2 ((deref gl_FragCoord) . xy + 0.5))
+
+    let col =
+        texelFetch smp-screen uv 0
+    let normal depth color matdata = (unpack-surfel-data col)
+    if (depth == 0.0)
+        return
+            vec4 0.1 0.1 0.2 1
+    let proj =
+        calc-projection;
+    let coord =
+        vec3 ((uv2 * 2.0 - 1.0) * depth / proj.aspect) depth
+    let rd = (normalize coord)
+
+    let l =
+        normalize
+            vec3 0.5 -1.0 0.25
+    let lambert =
+        max 0.0 (dot normal l)
+
+    let albedo = color.rgb
+    let ambient =
+        ((dot normal (vec3 0.0 -1.0 0.0)) * 0.5 + 0.5) * 0.1
+    let exposure = 2.0
+
+    let roughness = matdata.x
+    let metallic = matdata.y
+    let color =
+        linear->sRGB
+            tonemap
+                * exposure
+                    +
+                        ambient * albedo * (1.0 - metallic)
+                        * lambert
+                            BRDF albedo
+                                roughness
+                                metallic
+                                \ l rd normal
+
+    return
+        vec4 color 1.0
+        #vec4
+            #\ uv 0.0
+            #normal * 0.5 + 0.5
+            vec3
+                (dot normal (normalize (vec3 0 -1 0))) * 0.5 + 0.5
+            #normhue depth
+            #normhue (radius / 16.0)
+            #normhue (w / 8.0)
+            1.0
+
+fn visualize-buffer (uv)
+    #let t = (deref shglobals.time)
+    let size =
+        vec2 (deref shglobals.size)
+    let uv2 =
+        (uv * 2.0 - 1.0) * (vec2 shglobals.aspect 1)
+    let uv = (ivec2 ((deref gl_FragCoord) . xy + 0.5))
+
+    let col =
+        texelFetch smp-screen uv 0
+    let normal depth color matdata = (unpack-surfel-data col)
+    let fog-color = (vec4 0.6 0.8 1 1)
+
+    if (depth == 0.0)
+        return fog-color
+
+    let col =
+        vec4
+            normal * 0.5 + 0.5
+            #normhue depth
+            #normhue (radius / 16.0)
+            #color
+            0.0
+    let col =
+        static-if FOG
+            mix col fog-color
+                1.0 - (exp2 (-depth * FOG_RATE))
+        else col
+
+    return col
+
+fn shader (uv)
+    #mixdown uv
+    visualize-buffer uv
+
+################################################################################
+
+global sector-queue : (Array u32 SECTOR_CAPACITY)
+global sectors : (Array Sector SECTOR_CAPACITY)
+
+fn subdivide1d? (p t S)
+    """"p : i32 = tile position
+        t : i32 = camera position
+        S : i32 = tile size
+    # subdivide if distance to camera falls below 1:2 threshold
+    t := t - p
+    t :=
+        ? (t >= 0)
+            t - 2 * S + 1
+            -t - S
+    t < 0
+
+fn subdivide3d? (T t)
+    """"T : ivec4 = tile position [0 .. 1<<level]x[0 .. 1<<level]x[0 .. 1<<level], tile level 0..N
+        t : ivec3 = camera position
+    if (T.w <= 0) false
+    else
+        static-if 0
+            R := (1 << T.w) as f32 * 0.5
+            p0 := (vec3 ((T.xyz as vec-type) << T.w))
+            pc := p0 + R
+            l := (length ((vec3 t) - pc))
+            ((R / l) * SUBDIVIDE_SCALE) > 1.0
+        elseif 1
+            # subdivide if 3x3 tile contains camera
+            t := ((t >> T.w) - T.xyz)
+            #(max (abs t.x) (abs t.y) (abs t.z)) <= 1
+            (max (abs t.x) (abs t.y) (abs t.z)) <= SUBDIVIDE_RADIUS
+        else
+            # subdivide if distance to camera falls below 1:2 threshold
+            S := 1 << T.w
+            p := (T.xyz as vec-type) << T.w
+            &
+                subdivide1d? p.x t.x S
+                subdivide1d? p.y t.y S
+                subdivide1d? p.z t.z S
+
+inline encode-face-bridge-flags (ratiox ratioy ratioz)
+    | ratiox (ratioy << 2) (ratioz << 4)
+
+inline decode-face-bridge-flags (flags)
+    flags := flags as i32
+    _ (flags & 3) ((flags >> 2) & 3) ((flags >> 4) & 3)
+
+fn collect-sectors ()
+    'clear sector-queue
+    'clear sectors
+    'append sector-queue
+        encode-cell 0:u32 0:u32 0:u32 0:u32
+
+    for code in sector-queue
+        #while (not (empty? queue))
+        #code := ('pop queue)
+        q := cpu_shglobals.view-inverse * (vec4 0 0 0 1)
+
+        p := (ivec3 ((q.xyz / WORLD_SCALE + 0.5) * CASCADE_SIZE))
+        #p := (ivec3 (CASCADE_CENTER + q.xyz))
+        let lvl x y z = (decode-cell code)
+        coord := (ivec4 x y z (MAX_CASCADE_DEPTH - lvl))
+        if (not (subdivide3d? coord p))
+            lod := coord.w + 1
+            let flags =
+                |
+                    ? (subdivide3d?
+                        (ivec4 ((coord.xyz + (ivec3 1 0 0)) // 2) lod) p)
+                        \ 0:u32 BLEND+X
+                    ? (subdivide3d?
+                        (ivec4 ((coord.xyz + (ivec3 -1 0 0)) // 2) lod) p)
+                        \ 0:u32 BLEND-X
+                    ? (subdivide3d?
+                        (ivec4 ((coord.xyz + (ivec3 0 1 0)) // 2) lod) p)
+                        \ 0:u32 BLEND+Y
+                    ? (subdivide3d?
+                        (ivec4 ((coord.xyz + (ivec3 0 -1 0)) // 2) lod) p)
+                        \ 0:u32 BLEND-Y
+                    ? (subdivide3d?
+                        (ivec4 ((coord.xyz + (ivec3 0 0 1)) // 2) lod) p)
+                        \ 0:u32 BLEND+Z
+                    ? (subdivide3d?
+                        (ivec4 ((coord.xyz + (ivec3 0 0 -1)) // 2) lod) p)
+                        \ 0:u32 BLEND-Z
+            'append sectors (Sector code flags)
+        else
+            c0 := (ivec3 (coord.xyz as vec-type << coord.w))
+            c1 := (ivec3 ((coord.xyz + 1) << coord.w))
+            c := (c0 + c1) // 2
+            mask :=
+                |
+                    ? (p.x >= c.x) 1:u32 0:u32
+                    ? (p.y >= c.y) 2:u32 0:u32
+                    ? (p.z >= c.z) 4:u32 0:u32
+            mask := mask ^ 7
+            for index in (range 8:u32)
+                'append sector-queue
+                    child-cell code (index ^ mask)
+    #tilecount := ((countof tiles) as i32)
+    ;
+
+################################################################################
+
+inline main ()
+    global fb-scene-color = (GL.Texture GL.TEXTURE_2D)
+    'setup fb-scene-color
+        size = (ivec2 2048 2048)
+        format = GL.RGBA32F
+    do
+        let h = 2048
+        GL.ClearTexImage fb-scene-color 0 GL.RGBA GL.FLOAT null
+
+    global rb-scene-depth = (GL.Renderbuffer)
+    setup-renderbuffer rb-scene-depth 2048 2048
+        format = GL.DEPTH_COMPONENT
+    global fb-scene = (GL.Framebuffer)
+    setup-framebuffer fb-scene
+        color = fb-scene-color
+        rb-depth = rb-scene-depth
+
+    global vao-empty = (GL.VertexArray)
+
+    global pg-rasterize = (GL.Program)
+    call
+        attach-shaders (deref pg-rasterize)
+            vertex = rasterize-vert
+            fragment = rasterize-frag
+            #debug = true
+
+    global rg : (Option RG)
+
+    fn per-frame-setup (size pg-test frame)
+        let rg =
+            'force-unwrap rg
+        from (methodsof rg) let static program compute-program indirect-draw-arrays-setup
+
+        GL.BindTextureUnit 0 fb-scene-color
+        GL.Uniform smp-screen 0
+
+        let world =
+            static GL.Texture
+                inline ()
+                    let tex = (GL.Texture GL.TEXTURE_3D)
+                    'setup tex
+                        size = (ivec3 WORLD_SIZE)
+                        format = WORLD_PIXELFMT
+                        lod = true
+                        #min-filter = GL.LINEAR_MIPMAP_LINEAR
+                        #mag-filter = GL.LINEAR
+                    tex
+
+        # generate mipmaps
+        let pg-genworldlod = (compute-program generate-world-lod)
+        if (frame == 0)
+            report "generating world..."
+            # generate world
+            let pg-genworld = (compute-program generate-world)
+            GL.UseProgram pg-genworld
+            GL.BindImageTexture BINDING_IMG_WORLD_OUT world 0 GL.TRUE 0
+                GL.WRITE_ONLY
+                WORLD_PIXELFMT
+            GL.DispatchCompute (unpack ((WORLD_SIZE + GROUP_SIZE - 1) // GROUP_SIZE))
+            GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT)
+            report "generating world mipmaps..."
+            GL.UseProgram pg-genworldlod
+            for lod in (range 1 (MAX_WORLD_LOD_I + 1))
+                GL.BindImageTexture BINDING_IMG_WORLD_IN world (lod - 1) GL.TRUE 0 GL.READ_ONLY WORLD_PIXELFMT
+                GL.BindImageTexture BINDING_IMG_WORLD_OUT world lod GL.TRUE 0 GL.WRITE_ONLY WORLD_PIXELFMT
+                GL.DispatchCompute (unpack (((WORLD_SIZE >> (lod as u32)) + GROUP_SIZE - 1) // GROUP_SIZE))
+                GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT)
+            report "done."
+
+        do
+            # edit world
+            # generate world
+            let pg-updateworld = (compute-program update-world)
+            local mx = 0; local my = 0
+            mstate := (SDL_GetMouseState &mx &my)
+            if (mstate != 0)
+                #print mstate
+                GL.UseProgram pg-updateworld
+                GL.Uniform mouse-state (mstate as i32)
+                GL.BindImageTexture BINDING_IMG_WORLD_INOUT world 0 GL.TRUE 0
+                    GL.READ_WRITE
+                    WORLD_PIXELFMT
+                GL.DispatchCompute (unpack ((WORLD_SIZE + GROUP_SIZE - 1) // GROUP_SIZE))
+                GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT)
+                # generate mipmaps
+                GL.UseProgram pg-genworldlod
+                for lod in (range 1 (MAX_WORLD_LOD_I + 1))
+                    GL.BindImageTexture BINDING_IMG_WORLD_IN world (lod - 1) GL.TRUE 0 GL.READ_ONLY WORLD_PIXELFMT
+                    GL.BindImageTexture BINDING_IMG_WORLD_OUT world lod GL.TRUE 0 GL.WRITE_ONLY WORLD_PIXELFMT
+                    GL.DispatchCompute (unpack (((WORLD_SIZE >> (lod as u32)) + GROUP_SIZE - 1) // GROUP_SIZE))
+                    GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT)
+
+        # update terrain
+
+        collect-sectors;
+        sector-count := (countof sectors)
+        if (frame %  120 == 0)
+            print (sector-count as i32) "sectors"
+        let sector_buffer_sz = ((sizeof Sector) * SECTOR_CAPACITY)
+        let sector_buffer =
+            static GL.Buffer
+                inline ()
+                    let buf = (GL.Buffer)
+                    GL.NamedBufferData buf (i32 sector_buffer_sz) null GL.DYNAMIC_READ
+                    buf
+        GL.NamedBufferSubData sector_buffer 0
+            i32 (sector-count * (sizeof Sector))
+            & (sectors @ 0)
+
+        let vertex_buffer_sz = ((sizeof Vertices) + (sizeof Vertex) * MAX_VERTICES)
+        let vertex_buffer =
+            static GL.Buffer
+                inline ()
+                    let buf = (GL.Buffer)
+                    GL.NamedBufferData buf (i32 vertex_buffer_sz) null GL.STREAM_COPY
+                    buf
+
+        do
+            # clear vertex buffer count
+            let ptr =
+                GL.MapNamedBufferRange vertex_buffer 0 (sizeof u32)
+                    | GL.MAP_WRITE_BIT
+                        GL.MAP_INVALIDATE_BUFFER_BIT
+                        #GL.MAP_UNSYNCHRONIZED_BIT
+            let ptr = (bitcast ptr (mutable pointer Vertices))
+            ptr.count = 0:u32
+            GL.UnmapNamedBuffer vertex_buffer
+        GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+            BINDING_BUF_VERTEX_OUT
+            vertex_buffer
+            \ 0:i64 (i64 vertex_buffer_sz)
+        GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+            BINDING_BUF_SECTOR_IN
+            sector_buffer
+            \ 0:i64 (i64 (sector-count * (sizeof Sector)))
+        GL.BindTextureUnit 1 world
+        let pg-gen-cell = (compute-program generate-cell-verts)
+        GL.UseProgram pg-gen-cell
+        GL.Uniform smp-world 1
+        MAX_WORKGROUPS := 32768:u32
+        for i in (range 0:u32 (sector-count as u32) MAX_WORKGROUPS)
+            offset := i
+            GL.Uniform sector-offset offset
+            sz := (min MAX_WORKGROUPS (sector-count as u32 - i))
+            GL.DispatchCompute sz 1 1
+
+        GL.MemoryBarrier GL.SHADER_STORAGE_BARRIER_BIT
+
+        #############
+
+        inline print-in-count ()
+            let ptr =
+                GL.MapNamedBufferRange vertex_buffer 0 (sizeof u32)
+                    GL.MAP_READ_BIT
+            let ptr = (bitcast ptr (pointer Vertices))
+            print (ptr.count / 3) "triangles"
+            GL.UnmapNamedBuffer vertex_buffer
+
+        if ((frame % 60) == 0)
+            print-in-count;
+
+        vvv bind setup-draw-arrays exec-draw-arrays
+        indirect-draw-arrays-setup
+            inline ()
+                _
+                    deref vertex-in.count
+                    1
+                    0
+                    0
+
+        GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+            BINDING_BUF_VERTEX_IN
+            vertex_buffer
+            \ 0:i64 (i64 vertex_buffer_sz)
+        setup-draw-arrays;
+
+        do
+            GL.BindFramebuffer GL.FRAMEBUFFER fb-scene
+            GL.Viewport 0 0 (i32 size.x) (i32 size.y)
+            GL.ClearColor 0 0 0 0
+            GL.DepthFunc GL.GREATER
+            GL.ClearDepthf 0
+            GL.DepthRangef -1 1
+            #GL.Enable GL.CULL_FACE
+            GL.Disable GL.CULL_FACE
+            GL.CullFace GL.BACK
+            GL.Enable GL.DEPTH_TEST
+            GL.Clear
+                |
+                    GL.COLOR_BUFFER_BIT
+                    GL.DEPTH_BUFFER_BIT
+                    GL.STENCIL_BUFFER_BIT
+
+            GL.UseProgram pg-rasterize
+            #GL.BindTextureUnit 1 world
+            #GL.Uniform smp-world 1
+            GL.BindVertexArray vao-empty
+            exec-draw-arrays GL.TRIANGLES
+
+            GL.Disable GL.DEPTH_TEST
+            GL.Disable GL.CULL_FACE
+            GL.BindFramebuffer GL.FRAMEBUFFER 0
+
+    let per-frame-setup =
+        static-typify per-frame-setup ivec2 GL.Program i32
+    rg = (RG)
+
+    _ per-frame-setup shader
+
+fn program ()
+    render-fragment-shader main
+        #debug = true
+        size = (ivec2 1024)
+
+
+static-if true
+    program;
+else
+    define f
+        compile
+            typify program
+            'O3
+
+    f;