7468f43b71ed — Leonard Ritter a month ago
* check-in subjective MT
1 files changed, 925 insertions(+), 0 deletions(-)

A => testing/test_subjective_mt.sc
A => testing/test_subjective_mt.sc +925 -0
@@ 0,0 1,925 @@ 
+
+#
+    marching tetrahedra
+
+
+#
+    tetrahedral marching triangles
+
+    step 1: identify by mask for each tet if its relevant vertices are partially set
+    mask has 8 bits, 3 tets per bitarray = 2 x 24 bits
+    all bits set or all bits not set = ignore, otherwise generate surface
+
+    one tet is handled per thread
+
+
+using import glm
+using import glsl
+using import Array
+using import Box
+using import struct
+import ..lib.tukan.use
+
+import tukan.voxel
+let voxel = tukan.voxel
+
+using import tukan.gl
+using import tukan.bitmap
+using import tukan.packing
+using import tukan.random
+using import tukan.color
+using import tukan.perfect_hash
+using import tukan.raytrace
+using import tukan.sdf
+using import tukan.sdl
+using import tukan.rotation
+using import tukan.brdf
+using import tukan.normal
+using import tukan.projection
+using import tukan.derivative
+using import tukan.isosurface
+using import tukan.hash
+using import .testfragment
+
+# reserve 10MB for each voxel buffer
+    at 4 bytes per voxel
+let MAX_VOXELS = ((10 * (1 << 20)) // 4)
+let POINTLIMIT = (1600000 * 6)
+
+let BINDING_BUF_CELLS_IN = 1
+let BINDING_BUF_CELLS_OUT = 2
+let BINDING_BUF_DRAW_VOXELS_CMD = 3
+let BINDING_BUF_DISPATCH_CMD = 4
+
+let IMAGE_TARGET_RGBA32F = 1
+
+let UNIFORM_LEVEL = 1
+let UNIFORM_PROGRAM = 2
+let UNIFORM_SCREEN_SAMPLER = 3
+
+let ProgramVoxelizeInit = 0
+let ProgramVoxelize = 1
+let ProgramSimplify = 2
+
+let LEVELS = 8
+
+let SEARCH_R = 5
+let SEARCH_Rf = (SEARCH_R as f32)
+
+let sqrt3 = (sqrt 3.0)
+
+run-stage;
+
+struct CellVals plain
+    count : u32
+    # each entry holds a key
+    entries : (array u32)
+
+buffer buf-cells-in : CellVals
+    binding = BINDING_BUF_CELLS_IN
+    \ readonly coherent
+
+buffer buf-cells-out : CellVals
+    binding = BINDING_BUF_CELLS_OUT
+    \ coherent
+
+uniform u-program : i32
+    location = UNIFORM_PROGRAM
+
+uniform u-level : i32
+    location = UNIFORM_LEVEL
+
+uniform smp-screen : sampler2D
+    location = UNIFORM_SCREEN_SAMPLER
+
+struct DrawElementsIndirectCommand plain
+    count : u32 = 0
+    instanceCount : u32 = 0
+    firstIndex : u32 = 0
+    baseVertex : u32 = 0
+    baseInstance : u32 = 0
+
+buffer buf-draw-voxels-cmd : DrawElementsIndirectCommand
+    binding = BINDING_BUF_DRAW_VOXELS_CMD
+
+fn simple-sphere (p)
+    (length p) - 0.5
+
+fn nine-spheres (p)
+    let x = (deref shglobals.time)
+    r := (mix 0.1 0.2 ((sin x) * 0.5 + 0.5))
+    d := (sqrt 0.5) * 0.4
+    sdSmoothAnd
+        sdSmoothOr
+            (length p) - 0.5
+            min
+                (length (p - (vec3 d d d))) - r
+                (length (p - (vec3 -d d d))) - r
+                (length (p - (vec3 d -d d))) - r
+                (length (p - (vec3 -d -d d))) - r
+            0.1
+        * -1.0
+            min
+                (length (p - (vec3 d d -d))) - r
+                (length (p - (vec3 -d d -d))) - r
+                (length (p - (vec3 d -d -d))) - r
+                (length (p - (vec3 -d -d -d))) - r
+        0.1
+
+fn twoballs (p)
+    let x = (deref shglobals.time)
+    x := ((sin x) * 0.5 + 0.5) * 0.5
+    sdSmoothOr
+        (length (p - (vec3 0.0 0 x))) - 0.25
+        (length (p - (vec3 0.0 0 -x))) - 0.25
+        0.2
+
+fn doubletori (p)
+    let x = (deref shglobals.time)
+    let xz =
+        anglevector-rotate
+            anglevector -x
+            p.xz
+    let p1 =
+        vec3
+            xz.x
+            p.y
+            xz.y
+    let p2 =
+        vec3
+            anglevector-rotate
+                anglevector x
+                p.xy
+            p.z
+
+    'sdSmoothOr
+        sdmDist
+            sdTorus p1.xzy (vec2 0.5 0.05)
+            sdMaterial
+                vec4 1.0 0.5 0.3 1.0
+        sdmDist
+            sdTorus p2 (vec2 0.4 0.1)
+            sdMaterial
+                vec4 0.3 0.5 1.0 1.0
+                roughness = 0.1
+        0.2
+
+fn two-boxes (p)
+    let x = (deref shglobals.time)
+    let p1 =
+        vec3
+            anglevector-rotate
+                anglevector -x
+                p.xy
+            p.z
+    let p2 =
+        vec3
+            p.x
+            anglevector-rotate
+                anglevector (x * 0.917)
+                p.yz
+    sdSmoothOr
+        sdBox p1 (vec3 0.33)
+        sdBox p2 (vec3 0.33)
+        0.2
+
+fn two-boxes-merge (p)
+    let d =
+        vec3 0.3
+    let sz =
+        vec3 0.5
+    let x = (deref shglobals.time)
+    r := (mix 1.0 0.2 ((sin x) * 0.5 + 0.5))
+    'sdSmoothAnd
+        'sdSmoothOr
+            sdmDist
+                #sdSphere
+                    p - (vec3 0.01)
+                    1.0
+                sdUberprim
+                    p - (vec3 0.01)
+                    vec4 1.0 1.0 0.05 0.05
+                    vec3 0.5 0.05 0.0
+                sdMaterial
+                    vec4 1.0 0.9 0.3 1.0
+                    metallic = 1.0
+            'sdSmoothOr
+                sdmDist
+                    sdBox (p - d) sz
+                    sdMaterial
+                        vec4 1.0 0.5 0.3 1.0
+                sdmDist
+                    sdBox (p + d) sz
+                    sdMaterial
+                        vec4 0.3 0.5 1.0 1.0
+                        roughness = 0.2
+                0.5
+            0.05
+        sdmDist
+            -
+                sdSphere
+                    p - (vec3 0.0 0.0 -0.5)
+                    r
+            sdMaterial
+                vec4 0.5 0.3 1.0 1.0
+        0.1
+
+fn one-box (p)
+    sdBox p (vec3 0.33)
+
+fn matmapf (p)
+    p := p + 1.0
+    p := p + (vec3 1 0 0) * shglobals.time
+    p := (sdDomainRep p 2.0)
+    #do
+        p := p.yzx * 2.0
+        (two-boxes-merge p) * 0.5
+    #doubletori p
+    sdmDist
+        simple-sphere p
+        sdMaterial
+            vec4 0.5 0.3 1.0 1.0
+
+# single sphere:
+    256^3: 89240 cells (best: 0.3ms)
+    1024^3: 1427240 cells (best: 3.5ms)
+# min: 104567 at 256^3
+fn mapf (p)
+    #let x = ((radians (deref shglobals.time)) * 10.0)
+    #let xz =
+        anglevector-rotate
+            anglevector -x
+            p.xz
+    #let p =
+        vec3
+            xz.x
+            p.y
+            xz.y
+    #simple-sphere p
+    #twoballs p
+    #doubletori p
+    #nine-spheres p
+    #two-boxes p
+    #one-box p
+    (matmapf p) as f32
+
+fn normalmapf (p r)
+    - (sdNormalFast mapf p r)
+
+let ONION_NEAR = 0.6
+let ONION_FAR = 100.0
+let ONION_LAYERS = 32.0
+
+fn expmix (a b x C)
+    (b * (C - 1.0) + (exp2 ((log2 (b * (1.0 - C) + a * C)) * (1.0 - x) + (log2 b) * x))) / C
+
+fn map_onion_radius (p)
+    #r := (clamp ((p.z * 0.5 + 0.5) * 0.5 + 0.5) 0.0 1.0)
+    r := (clamp (p.z * 0.5 + 0.5) 0.0 1.0)
+    #r := (p.z * 0.5 + 0.5) * ONION_LAYERS
+    #r := (1 + 2 * (sqrt pi) / (ONION_LAYERS * 0.5)) ** r
+    #r := (exp2 (mix (log2 ONION_NEAR) (log2 ONION_FAR) r))
+    r := (expmix ONION_NEAR ONION_FAR r 1.0)
+    _ ((unpack_normal_snorm p.xy) * r) (r * 2.5)
+
+fn map_onion (p)
+    let p r = (map_onion_radius p)
+    p
+
+fn map_identity (p) p
+fn map_identity_radius (p) (_ p 1.0)
+
+let map_vertex map_vertex_rlimit = map_onion map_onion_radius
+#let map_vertex map_vertex_rlimit = map_identity map_identity_radius
+PROJECT_FINAL_VERTEX := true
+VISUALIZE_IDS := false
+
+fn subdivide-cell (key)
+    let level = ((deref u-level) as u32)
+    let r = (/ (f32 (1:u32 << level)))
+    let d = (2.0 * r)
+    #let rlimit =
+        if (level == 8:u32) r
+        else (sqrt3 * r)
+
+    key := (key << 3:u32)
+
+    ucoord := (unpack-morton3x10 key)
+    coord := (vec3 ucoord) * d - 1.0 + r
+
+    global cells : (array u32 8)
+    global written = 0:u32
+
+    fn test-cell (i ofs key coord r)
+        let rlimit = (sqrt3 * r)
+        key := key | i
+        pos := coord + ofs
+        let vx vr = (map_onion_radius pos)
+        let dist = (mapf vx)
+        let hit = ((abs dist) < (rlimit * vr))
+        if hit
+            cells @ (deref written) = key
+            written += 1:u32
+
+    inline test-cell (i ofs)
+        test-cell i ofs key coord r
+
+    test-cell 0:u32 (vec3 0 0 0)
+    test-cell 1:u32 (vec3 d 0 0)
+    test-cell 2:u32 (vec3 0 d 0)
+    test-cell 3:u32 (vec3 d d 0)
+    test-cell 4:u32 (vec3 0 0 d)
+    test-cell 5:u32 (vec3 d 0 d)
+    test-cell 6:u32 (vec3 0 d d)
+    test-cell 7:u32 (vec3 d d d)
+
+    # commit
+    if (written != 0:u32)
+        let id = (atomicAdd buf-cells-out.count (deref written))
+        for i in (range (deref written))
+            buf-cells-out.entries @ (id + i) = (cells @ i)
+
+fn simplify-cell (key)
+    let level = (((deref u-level) as u32) - 1)
+    let r = (/ (f32 (1:u32 << level)))
+    let d = (2.0 * r)
+
+    ucoord := (unpack-morton3x10 key)
+    coord := (vec3 ucoord) * d - 1.0
+    key := (key << 3:u32)
+
+    global mask = 0:u32
+
+    fn check-cell (i ofs coord)
+        let dist = (mapf (map_vertex (coord + ofs)))
+        let bit = (? (dist < 0.0) 1:u32 0:u32)
+        mask |= (bit << i)
+
+    inline check-cell (i ofs)
+        check-cell i ofs coord
+
+    #do
+        let coord = (coord + r)
+        let n =
+            normalmapf coord r
+        embed
+            let a = ((deref shglobals.time) * 0.2)
+            let c s = (cos a) (sin a)
+
+            n :=
+                vec3
+                    c * n.x - s * n.z
+                    n.y
+                    s * n.x + c * n.z
+
+            coord :=
+                vec3
+                    c * coord.x - s * coord.z
+                    coord.y
+                    s * coord.x + c * coord.z
+
+        coord :=
+            coord + (vec3 0 0 1)
+
+        if ((dot n (normalize coord)) < -0.3)
+            return;
+
+    check-cell 0:u32 (vec3 0 0 0)
+    check-cell 1:u32 (vec3 d 0 0)
+    check-cell 2:u32 (vec3 0 d 0)
+    check-cell 3:u32 (vec3 d d 0)
+    check-cell 4:u32 (vec3 0 0 d)
+    check-cell 5:u32 (vec3 d 0 d)
+    check-cell 6:u32 (vec3 0 d d)
+    check-cell 7:u32 (vec3 d d d)
+    let mask = (deref mask)
+
+    if ((mask != 0:u32) & (mask != 255:u32))
+        global cells : (array u32 8)
+        global written = 0:u32
+
+        fn test-cell (i k1 k3 checkmask mask key packedmask)
+            let hit = ((checkmask != 0:u32) & (checkmask != mask))
+            if hit
+                cells @ (deref written) = key | i | (packedmask << 28:u32)
+                written += 1:u32
+
+        inline test-cell (i)
+            let tetverts = 0x6cc99:u32
+            k := i * 3:u32
+            k1 := (tetverts >> k) & 7:u32
+            k3 := (tetverts >> (k + 3:u32)) & 7:u32
+            packedmask :=
+                |
+                    mask & 1:u32
+                    ((mask >> k1) & 1:u32) << 1:u32
+                    ((mask >> 7:u32) & 1:u32) << 2:u32
+                    ((mask >> k3) & 1:u32) << 3:u32
+            checkmask :=
+                |
+                    (1:u32 << 0:u32) | (1:u32 << 7:u32)
+                    1:u32 << k1
+                    1:u32 << k3
+            static-assert (constant? checkmask)
+            test-cell i k1 k3 checkmask mask key packedmask
+
+        test-cell 0:u32
+        test-cell 1:u32
+        test-cell 2:u32
+        test-cell 3:u32
+        test-cell 4:u32
+        test-cell 5:u32
+
+        # commit
+        if (written != 0:u32)
+            let id = (atomicAdd buf-cells-out.count (deref written))
+            for i in (range (deref written))
+                buf-cells-out.entries @ (id + i) = (cells @ i)
+
+fn voxelize-init ()
+    let index = (deref gl_GlobalInvocationID.x)
+    subdivide-cell index
+
+fn voxelize ()
+    let index = (deref gl_GlobalInvocationID.x)
+    if (index < buf-cells-in.count)
+        subdivide-cell (deref (buf-cells-in.entries @ index))
+
+fn simplify ()
+    let index = (deref gl_GlobalInvocationID.x)
+    if (index < buf-cells-in.count)
+        simplify-cell (deref (buf-cells-in.entries @ index))
+
+fn supershader ()
+    local_size 64 1 1
+    let mode = (deref u-program)
+    switch mode
+    case ProgramVoxelizeInit
+        voxelize-init;
+    case ProgramVoxelize
+        voxelize;
+    case ProgramSimplify
+        simplify;
+    default
+        ;;
+
+fn calc-projection ()
+    let aspect = (vec2 (/ (deref shglobals.aspect)) 1.0)
+    'ifp-perspective ProjectionSetup aspect 0.1
+
+inline transform-dist (p)
+    (mat3 shglobals.view) * p
+
+inline transform-invert-dist (p)
+    (mat3 shglobals.view-inverse) * p
+
+inline transform-invert-pos (p)
+    v := (deref shglobals.view-inverse) * (vec4 p 1)
+    v.xyz
+
+inline transform-pos (p)
+    v := (deref shglobals.view) * (vec4 p 1)
+    v.xyz
+
+inout normal : vec3
+inout depthval : f32
+inout albedo : vec4
+inout matdata : vec4
+fn rasterize-vert ()
+    let index = ((deref gl_InstanceID) as u32)
+    if (index < buf-cells-in.count)
+        let vertex-index = ((deref gl_VertexID) as u32)
+        let key = (deref (buf-cells-in.entries @ index))
+        tetidx := (key & 7:u32)
+        signs := (key >> 28:u32)
+        key := (key >> 3:u32) & 0x1ffffff
+
+        let level = (((deref u-level) as u32) - 1)
+        let r = (/ (f32 (1:u32 << level)))
+        let d = (2.0 * r)
+        ucoord := (unpack-morton3x10 key)
+        coord := (vec3 ucoord) * d - 1.0
+
+        let tetverts = 0x6cc99:u32
+        k := tetidx * 3:u32
+        k1 := (tetverts >> k) & 7:u32
+        k3 := (tetverts >> (k + 3:u32)) & 7:u32
+
+        local p : (array vec3 4)
+        p @ 0 = coord
+        p @ 1 = coord + ((vec3 ((uvec3 k1 (k1 >> 1:u32) (k1 >> 2:u32)) & 1:u32)) * d)
+        p @ 2 = coord + (vec3 d d d)
+        p @ 3 = coord + ((vec3 ((uvec3 k3 (k3 >> 1:u32) (k3 >> 2:u32)) & 1:u32)) * d)
+
+        let d =
+            vec4
+                ? ((signs & 1:u32) == 1:u32) -1.0 1.0
+                ? ((signs & 2:u32) == 2:u32) -1.0 1.0
+                ? ((signs & 4:u32) == 4:u32) -1.0 1.0
+                ? ((signs & 8:u32) == 8:u32) -1.0 1.0
+
+        let c i = (tetfaces d)
+        let shift = ((((c - 1) << 2) | vertex-index) * 2)
+        let i0 i1 =
+            (0x5000 >> shift) & 3
+            (0xeef9 >> shift) & 3
+        let i0 i1 = (deref (i @ i0)) (deref (i @ i1))
+        let dist0 dist1 =
+            mapf (map_vertex (deref (p @ i0)))
+            mapf (map_vertex (deref (p @ i1)))
+        let d0 d1 = (dist0 as f32) (dist1 as f32)
+        let l = (tetlerp d0 d1)
+        let coord = (mix (deref (p @ i0)) (deref (p @ i1)) l)
+        #let coord = (deref (p @ vertex-index))
+        let dist = (matmapf (map_vertex coord))
+        let material =
+            dist.material
+        #let dist = dist0
+        #let material =
+            'mix dist0.material dist1.material l
+        let n =
+            normalmapf (map_vertex coord) (r * 0.5)
+
+        # rotate it a little
+        #embed
+            let a = ((deref shglobals.time) * 0.2)
+            let c s = (cos a) (sin a)
+
+            n :=
+                vec3
+                    c * n.x - s * n.z
+                    n.y
+                    s * n.x + c * n.z
+
+            coord :=
+                vec3
+                    c * coord.x - s * coord.z
+                    coord.y
+                    s * coord.x + c * coord.z
+
+        #coord :=
+            coord + (vec3 0 0 1)
+        #n := (transform-dist n)
+
+        let coord =
+            if PROJECT_FINAL_VERTEX
+                map_vertex coord
+            else coord
+        coord := (transform-pos coord)
+
+        let proj =
+            calc-projection;
+
+        let pcoord =
+            'project proj
+                vec4 coord 1.0
+
+        normal.out =
+            do
+                static-if VISUALIZE_IDS (vec3hash (key as f32))
+                else n
+        depthval.out = coord.z
+        albedo.out = material.albedo
+        matdata.out =
+            vec4 material.roughness material.metallic 0 0
+        gl_Position = pcoord
+        return;
+
+    gl_Position = (vec4 0 0 0 inf)
+    ;
+
+fn pack-surfel-data (normal depth color matdata)
+    let normal =
+        bitcast (packSnorm2x16 (pack_normal_snorm normal)) f32
+    let color =
+        bitcast (packUnorm4x8 color) f32
+    let matdata =
+        bitcast (packUnorm4x8 matdata) f32
+    vec4 normal matdata color depth
+
+fn unpack-surfel-data (frag)
+    let normal =
+        unpack_normal_snorm (unpackSnorm2x16 (bitcast frag.x u32))
+    let matdata =
+        unpackUnorm4x8 (bitcast frag.y u32)
+    let color =
+        unpackUnorm4x8 (bitcast frag.z u32)
+    _
+        normal
+        frag.w
+        color
+        matdata
+
+out out_Color : vec4
+    binding = 0
+fn rasterize-frag ()
+    out_Color =
+        pack-surfel-data
+            normalize (deref normal.in)
+            deref depthval.in
+            deref albedo.in
+            deref matdata.in
+
+#uniform img-target-rgba32f : (image2D rgba32f)
+    binding = IMAGE_TARGET_RGBA32F
+    \ coherent writeonly restrict
+
+uniform u-level : i32
+    location = UNIFORM_LEVEL
+
+fn mixdown (uv)
+    #let t = (deref shglobals.time)
+    let size =
+        vec2 (deref shglobals.size)
+    let uv2 =
+        (uv * 2.0 - 1.0) * (vec2 shglobals.aspect 1)
+    let uv = (ivec2 ((deref gl_FragCoord) . xy + 0.5))
+
+    let col =
+        texelFetch smp-screen uv 0
+    let normal depth color matdata = (unpack-surfel-data col)
+    if (depth == 0.0)
+        return
+            vec4 0.1 0.1 0.2 1
+    let proj =
+        calc-projection;
+    let coord =
+        vec3 ((uv2 * 2.0 - 1.0) * depth / proj.aspect) depth
+    let rd = (normalize coord)
+
+    let l =
+        normalize
+            vec3 0.5 -1.0 0.25
+    let lambert =
+        max 0.0 (dot normal l)
+
+    let albedo = color.rgb
+    let ambient =
+        ((dot normal (vec3 0.0 -1.0 0.0)) * 0.5 + 0.5) * 0.1
+    let exposure = 2.0
+
+    let roughness = matdata.x
+    let metallic = matdata.y
+    let color =
+        linear->sRGB
+            tonemap
+                * exposure
+                    +
+                        ambient * albedo * (1.0 - metallic)
+                        * lambert
+                            BRDF albedo
+                                roughness
+                                metallic
+                                \ l rd normal
+
+    return
+        vec4 color 1.0
+        #vec4
+            #\ uv 0.0
+            #normal * 0.5 + 0.5
+            vec3
+                (dot normal (normalize (vec3 0 -1 0))) * 0.5 + 0.5
+            #normhue depth
+            #normhue (radius / 16.0)
+            #normhue (w / 8.0)
+            1.0
+
+fn visualize-buffer (uv)
+    #let t = (deref shglobals.time)
+    let size =
+        vec2 (deref shglobals.size)
+    let uv2 =
+        (uv * 2.0 - 1.0) * (vec2 shglobals.aspect 1)
+    let uv = (ivec2 ((deref gl_FragCoord) . xy + 0.5))
+
+    let col =
+        texelFetch smp-screen uv 0
+    let normal depth color matdata = (unpack-surfel-data col)
+    if (depth == 0.0)
+        return
+            vec4 0 0 0 1
+
+    return
+        vec4
+            normal * 0.5 + 0.5
+            #normhue depth
+            #normhue (radius / 16.0)
+            #color
+            1.0
+
+fn shader (uv)
+    #mixdown uv
+    visualize-buffer uv
+
+inline main ()
+
+    # pass overview:
+        in: compute 4 * 4 * 4
+        per thread:
+            traverse 1/64th of octree
+        for each leaf found:
+            increase atomic counter in indirect draw call argument to obtain index
+            append position + normal as vec4 (xyz, packed normal)
+            append color as i32
+                gamma compressed RGB10 values
+                values >= 256 are emissive
+                2 bits left for ?
+            append material props as i32
+                * roughness
+                * metallic
+                * film thickness
+                * waxiness?
+                * hue rotation?
+                * UV activeness?
+                * snowy glitter?
+
+        in: indirect draw call argument
+        out: rasterized voxel cubes
+
+    let NUM_BUFFERS = 5
+
+    global cell_buffers =
+        arrayof GL.uint
+            GL.Buffer;
+            GL.Buffer;
+            GL.Buffer;
+            GL.Buffer;
+            GL.Buffer;
+    let cell_buffer_sz = ((sizeof u32) * (1 + MAX_VOXELS))
+    for i in (range (NUM_BUFFERS as u32))
+        let buf = (cell_buffers @ i)
+        GL.NamedBufferData buf (i32 cell_buffer_sz) null GL.STREAM_COPY
+        GL.BindBufferRange GL.SHADER_STORAGE_BUFFER (BINDING_BUF_CELLS_IN + i)
+            \ buf 0:i64 (i64 cell_buffer_sz)
+
+    #global draw_voxels_cmd = (GL.CreateBuffer)
+    #setup-ssbo draw_voxels_cmd buf-draw-voxels-cmd
+    #let draw_voxels_cmd_sz = (sizeof DrawElementsIndirectCommand)
+    #GL.NamedBufferData draw_voxels_cmd (i32 draw_voxels_cmd_sz) null GL.STREAM_DRAW
+    #GL.BindBufferRange GL.SHADER_STORAGE_BUFFER BINDING_BUF_DRAW_VOXELS_CMD draw_voxels_cmd 0:i64 (i64 draw_voxels_cmd_sz)
+
+    #global tx_position_normal = (GL.CreateTexture GL.TEXTURE_BUFFER)
+    #GL.TextureBuffer tx_position_normal GL.RGBA32UI position_normal
+
+    global fb-scene-color = (GL.Texture GL.TEXTURE_2D)
+    'setup fb-scene-color
+        size = (ivec2 2048 2048)
+        format = GL.RGBA32F
+    do
+        let h = 2048
+        GL.ClearTexImage fb-scene-color 0 GL.RGBA GL.FLOAT null
+
+    global rb-scene-depth = (GL.Renderbuffer)
+    setup-renderbuffer rb-scene-depth 2048 2048
+        format = GL.DEPTH_COMPONENT
+    global fb-scene = (GL.Framebuffer)
+    setup-framebuffer fb-scene
+        color = fb-scene-color
+        rb-depth = rb-scene-depth
+
+    global vao-empty = (GL.VertexArray)
+
+    global pg-rasterize = (GL.Program)
+    call
+        attach-shaders (deref pg-rasterize)
+            vertex = rasterize-vert
+            fragment = rasterize-frag
+            #debug = true
+
+    global pg-supershader = (GL.Program)
+    call
+        attach-shaders (deref pg-supershader)
+            compute = supershader
+            #debug = true
+
+    inline per-frame-setup (size)
+
+        GL.BindTextureUnit 0 fb-scene-color
+        GL.Uniform smp-screen 0
+
+        for i in (range NUM_BUFFERS)
+            let buf = (cell_buffers @ i)
+            let ptr =
+                GL.MapNamedBufferRange buf 0 (sizeof u32)
+                    | GL.MAP_WRITE_BIT
+                        GL.MAP_INVALIDATE_BUFFER_BIT
+                        #GL.MAP_UNSYNCHRONIZED_BIT
+            let ptr = (bitcast ptr (mutable pointer CellVals))
+            ptr.count = 0:u32
+            GL.UnmapNamedBuffer buf
+
+        #local cmd = (DrawElementsIndirectCommand)
+        #bind-ssbo draw_voxels_cmd buf-draw-voxels-cmd &cmd
+        #GL.NamedBufferSubData draw_voxels_cmd 0 draw_voxels_cmd_sz &cmd
+
+        inline bind-buffers (i0 i1)
+            GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+                BINDING_BUF_CELLS_IN
+                cell_buffers @ i0
+                \ 0:i64 (i64 cell_buffer_sz)
+            GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+                BINDING_BUF_CELLS_OUT
+                cell_buffers @ i1
+                \ 0:i64 (i64 cell_buffer_sz)
+
+        GL.UseProgram pg-supershader
+
+        do
+            GL.Uniform u-program ProgramVoxelizeInit
+            GL.Uniform u-level 6
+            bind-buffers 0 1
+            GL.DispatchCompute ((8 ** 3) as u32) 1 1
+            GL.MemoryBarrier GL.SHADER_STORAGE_BARRIER_BIT
+            #GL.MemoryBarrier (GL.ALL_BARRIER_BITS as u32)
+
+        #do
+            GL.UseProgram pg-voxelize
+            GL.Uniform u-level 7
+            bind-buffers 1 2
+            GL.DispatchCompute 84 1 1
+            GL.MemoryBarrier GL.SHADER_STORAGE_BARRIER_BIT
+            #GL.MemoryBarrier (GL.ALL_BARRIER_BITS as u32)
+
+        GL.Uniform u-program ProgramVoxelize
+
+        #do
+            GL.Uniform u-level 6
+            bind-buffers 1 2
+            #GL.DispatchCompute 21 1 1
+            #GL.DispatchCompute 346 1 1
+            GL.DispatchCompute 594 1 1
+            GL.MemoryBarrier GL.SHADER_STORAGE_BARRIER_BIT
+            #GL.MemoryBarrier (GL.ALL_BARRIER_BITS as u32)
+
+        do
+            GL.Uniform u-level 7
+            bind-buffers 1 2
+            #GL.DispatchCompute 84 1 1
+            #GL.DispatchCompute 1395 1 1
+            GL.DispatchCompute ((POINTLIMIT // 15) // 64) 1 1
+            GL.MemoryBarrier GL.SHADER_STORAGE_BARRIER_BIT
+            #GL.MemoryBarrier (GL.ALL_BARRIER_BITS as u32)
+
+        do
+            GL.Uniform u-program ProgramSimplify
+            GL.Uniform u-level 8
+            bind-buffers 2 3
+            GL.DispatchCompute ((POINTLIMIT // 3) // 64) 1 1
+            GL.MemoryBarrier GL.SHADER_STORAGE_BARRIER_BIT
+            #GL.MemoryBarrier (GL.ALL_BARRIER_BITS as u32)
+
+        #do
+            for i in (range NUM_BUFFERS)
+                let buf = (cell_buffers @ i)
+                let ptr =
+                    GL.MapNamedBufferRange buf 0 (sizeof u32)
+                        GL.MAP_READ_BIT
+                let ptr = (bitcast ptr (pointer CellVals))
+                print i "=" ptr.count "/" ((ptr.count + 63:u32) // 64:u32)
+                GL.UnmapNamedBuffer buf
+
+        do
+            GL.BindFramebuffer GL.FRAMEBUFFER fb-scene
+            GL.Viewport 0 0 (i32 size.x) (i32 size.y)
+            GL.ClearColor 0 0 0 0
+            GL.DepthFunc GL.GREATER
+            GL.ClearDepthf 0
+            GL.DepthRangef -1 1
+            GL.Enable GL.CULL_FACE
+            GL.CullFace GL.BACK
+            GL.Enable GL.DEPTH_TEST
+            GL.Clear
+                |
+                    GL.COLOR_BUFFER_BIT
+                    GL.DEPTH_BUFFER_BIT
+                    GL.STENCIL_BUFFER_BIT
+
+            GL.UseProgram pg-rasterize
+            bind-buffers 3 0
+            GL.Uniform u-level 8
+            GL.BindVertexArray vao-empty
+            #GL.DrawArrays GL.POINTS 0 POINTLIMIT
+            #GL.DrawArraysInstanced GL.POINTS 0 1 POINTLIMIT
+            GL.DrawArraysInstanced GL.TRIANGLE_STRIP 0 4 POINTLIMIT
+
+            GL.Disable GL.DEPTH_TEST
+            GL.Disable GL.CULL_FACE
+            GL.BindFramebuffer GL.FRAMEBUFFER 0
+
+    _ per-frame-setup shader
+
+fn program ()
+    render-fragment-shader main
+        #debug = true
+        size = (ivec2 512)
+
+
+static-if true
+    program;
+else
+    define f
+        compile
+            typify program
+            'O3
+
+    f;