ea8156aa78ce — Leonard Ritter 4 months ago
* initial support for occlusion culling
1 files changed, 174 insertions(+), 20 deletions(-)

M testing/test_cascade_dmc_cc_vvf.sc
M testing/test_cascade_dmc_cc_vvf.sc +174 -20
@@ 53,7 53,7 @@ OCCLUSION_CULLING := false
 FOG := false
 USE_FLAT_SHADING := true
 BALANCE_QUADS := true
-USE_COMPLEX_SURFACE := true
+USE_COMPLEX_SURFACE := false
 HIGH_QUALITY_FEATURES := true
 SOFT_WORLD_SAMPLING := true
 USE_CATMULL_CLARK := false

          
@@ 62,7 62,8 @@ USE_VVF_PACKING := true
 AVERAGE_LOD_VERTICES := false
 USE_PERSPECTIVE_SUBDIVISION := false
 CULL_BACKFACES := true
-VISUALIZE_HZB := true
+VISUALIZE_HZB := false
+DRAW_LINES := false
 
 FETCH_UV_OFFSET := 0.5
 #FETCH_UV_OFFSET := 0.0

          
@@ 70,7 71,8 @@ FETCH_UV_OFFSET := 0.5
 # to reach fog density D at depth Z, FOG_RATE = -log2(1 - D)/Z
 FOG_RATE := 0.02 # 50% at 100 units
 
-let MAX_VERTICES = (20 * (1 << 20))
+InstanceVertexCount := 4
+let MAX_VERTICES = (4 * 2 * (1 << 20))
 MAX_SECTORS_PER_FRAME := 64 << 5
 
 let WORLD_SIZE = (uvec3 256)

          
@@ 95,10 97,8 @@ SECTOR_CAPACITY := MAX_CASCADE_DEPTH * 1
 CASCADE_SIZE := (1 << MAX_CASCADE_DEPTH)
 CASCADE_CENTER := (CASCADE_SIZE // 2)
 
-InstanceVertexCount := 4
-
 HZB_LEVELS := 10
-HZB_LOD_VIZ := 5
+HZB_LOD_VIZ := 8
 HZB_SIZE := (1 << HZB_LEVELS)
 
 let BINDING_BUF_SECTOR_IN = 1

          
@@ 160,8 160,8 @@ let
 struct Sector plain
     key : u32
     flags : u32 # six face bits indicating where the next highest LOD level is
-    offset-min = 0xffffffff:u32 # index of first triangle in triangle buffer
-    offset-max = 0:u32 # index after last triangle in triangle buffer
+    offset-min = 0xffffffff:u32 # index of first vertex in vertex buffer
+    offset-max = 0:u32 # index after last vertex in vertex buffer
 
 struct Sectors plain
     keys : (array Sector)

          
@@ 1142,6 1142,7 @@ dump "shared memory requirements"
 shared cell-corner-flags : (array u32 SECTOR_SAMPLE_VOLUME)
 shared cell-vertex : (array vec4 SECTOR_SAMPLE_VOLUME)
 shared cell-normal : (array vec3 SECTOR_SAMPLE_VOLUME)
+shared quad-count : u32
 
 #fn id2index (id)
     #id.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + id.y * gl_WorkGroupSize.x + id.x

          
@@ 1183,10 1184,8 @@ fn generate-quad (v00 v01 v10 v11)
             _ v00 v10 v11 v01
 
     # generate quad
-    let ofs = (atomicAdd vertex-out.count 4)
     sector := (sector-inout.keys @ gl_WorkGroupID.x)
-    atomicMin sector.offset-min ofs
-    atomicMax sector.offset-max (ofs + 4)
+    let ofs = (atomicAdd sector.offset-max 4)
     entries := vertex-out.entries
     static-if USE_FLAT_SHADING
         n0 := (triangle-normal v00.pos v11.pos v10.pos)

          
@@ 1362,6 1361,8 @@ fn generate-cell-verts ()
             cell-normal @ idx = (normalize normal_accum)
             ;
         ;
+    if (lane-idx == 0)
+        quad-count = 0
     barrier;
 
     lpos := (index2id lane-idx)

          
@@ 1374,6 1375,22 @@ fn generate-cell-verts ()
 
     cflags := (copy (cell-corner-flags @ idx))
 
+    centerbit := (cflags >> 7) & 1
+    flip? := (centerbit != 0)
+
+    for i in (range 3)
+        if ((centerbit ^ ((cflags >> ((1 << i as u32) ^ 7)) & 1)) != 0)
+            static-if USE_CATMULL_CLARK
+                atomicAdd quad-count 4
+            else
+                atomicAdd quad-count 1
+    barrier;
+    if (lane-idx == 0)
+        let ofs = (atomicAdd vertex-out.count (4 * quad-count))
+        sector.offset-min = ofs
+        sector.offset-max = ofs
+    barrier;
+
     static-if USE_CATMULL_CLARK
         inline getvertex (offset)
             dpos := lpos + offset

          
@@ 1986,6 2003,10 @@ inline main ()
                     GL.DispatchCompute (unpack (((WORLD_SIZE >> (lod as u32)) + GROUP_SIZE - 1) // GROUP_SIZE))
                     GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT)
 
+        let sector_counts =
+            static-array u32 2
+                inline () 0
+
         let vertex_buffer_sz = ((sizeof Vertices) + (sizeof Vertex) * MAX_VERTICES)
         let vertex_buffers =
             static-array GL.Buffer 3

          
@@ 2006,11 2027,13 @@ inline main ()
         # update terrain
 
         next-terrain-doublebuffer-index := terrain-doublebuffer-index ^ 1
+        next_sector_count := sector_counts @ next-terrain-doublebuffer-index
         next_sector_buffer := sector_buffers @ next-terrain-doublebuffer-index
         next_vertex_buffer := vertex_buffers @ next-terrain-doublebuffer-index
 
         if ('fresh? terrain-job)
             collect-sectors;
+            next_sector_count = (countof sectors) as u32
             GL.NamedBufferSubData next_sector_buffer 0
                 i32 ((countof sectors) * (sizeof Sector))
                 & (sectors @ 0)

          
@@ 2044,24 2067,27 @@ inline main ()
         #############
 
         vertex_buffer := vertex_buffers @ terrain-doublebuffer-index
+        sector_buffer := sector_buffers @ terrain-doublebuffer-index
 
-        inline print-in-count ()
+        inline print-in-count (label vertex_buffer)
             let ptr =
                 GL.MapNamedBufferRange vertex_buffer 0 (sizeof u32)
                     GL.MAP_READ_BIT
             let ptr = (bitcast ptr (pointer Vertices))
-            print (ptr.count // InstanceVertexCount) "primitives"
+            print label ":" (ptr.count // InstanceVertexCount) "primitives"
             GL.UnmapNamedBuffer vertex_buffer
 
         if ((frame % 60) == 0)
-            print-in-count;
+            print-in-count "all" vertex_buffer
 
         vvv bind setup-draw-arrays exec-draw-arrays
         indirect-draw-arrays-setup
             inline ()
                 _
                     InstanceVertexCount
-                    (deref vertex-in.count) // InstanceVertexCount
+                    min
+                        MAX_VERTICES as u32
+                        (deref vertex-in.count) // InstanceVertexCount
                     0
                     0
 

          
@@ 2134,6 2160,132 @@ inline main ()
                 GL.DispatchCompute size size 1
                 GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT)
 
+        frame_vertex_buffer := vertex_buffers @ 2
+
+        sector_count := sector_counts @ terrain-doublebuffer-index
+
+        GL.ClearNamedBufferSubData frame_vertex_buffer GL.R32UI 0 (sizeof u32) GL.RED_INTEGER GL.UNSIGNED_INT null
+        if (sector_count != 0)
+            GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+                BINDING_BUF_SECTOR_IN
+                sector_buffer
+                \ 0:i64 (i64 (sector_count * (sizeof Sector)))
+            GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+                BINDING_BUF_VERTEX_IN
+                vertex_buffer
+                \ 0:i64 (i64 vertex_buffer_sz)
+            GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+                BINDING_BUF_VERTEX_OUT
+                frame_vertex_buffer
+                \ 0:i64 (i64 vertex_buffer_sz)
+            GL.UseProgram
+                compute-program
+                    fn ()
+                        let LANE_COUNT = 384
+                        local_size LANE_COUNT 1 1
+                        sector := (copy (sector-in.keys @ gl_WorkGroupID.x))
+                        if (sector.key == 0)
+                            return;
+                        from sector let offset-min offset-max key
+                        let sz = (offset-max as i32 - offset-min as i32)
+                        if (sz <= 0)
+                            return;
+                        let lvl sectorpos... = (decode-cell key)
+                        sectorlod := MAX_CASCADE_DEPTH - lvl
+                        lsectorpos := (ivec3 sectorpos...)
+                        sector-scale := (f32 (1 << sectorlod)) / CASCADE_SIZE
+                        sector-origin := (vec3 lsectorpos) * sector-scale - 0.5
+                        sector-origin := sector-origin * WORLD_SCALE
+                        sector-scale := sector-scale * WORLD_SCALE
+
+                        let proj =
+                            calc-projection;
+                        inline project (p)
+                            d := 1.0 / SECTOR_GROUP_SIZE
+                            p := p * (1.0 + d * 4.0) - d * 2.0
+                            p := p * sector-scale + sector-origin
+                            p := (transform-pos p)
+                            z := p.z
+                            p :=
+                                'project proj
+                                    vec4 p 1.0
+                            uv := (p.xy / p.w) #* 4.0
+                            uv := uv * 0.5 + 0.5
+                            vec3 uv z
+
+                        let points... =
+                            va-map project
+                                vec3 0 0 0
+                                vec3 1 0 0
+                                vec3 0 1 0
+                                vec3 1 1 0
+                                vec3 0 0 1
+                                vec3 1 0 1
+                                vec3 0 1 1
+                                vec3 1 1 1
+                        let minv = (min points...)
+                        let maxv = (max points...)
+                        let size = (maxv - minv)
+                        if ((min maxv.x maxv.y) < 0.0)
+                            return;
+                        if ((max minv.x minv.y) > 1.0)
+                            return;
+                        if (maxv.z < 0.0)
+                            return;
+                        let depth = minv.z
+                        do
+                            let w = (min size.x size.y)
+                            let lod =
+                                if (w == 0) 0
+                                else
+                                    w := HZB_LEVELS + (log2 w)
+                                    w := (clamp w 0.0 (HZB_LEVELS as f32))
+                                    w as i32
+                            center := (minv + maxv) / 2
+                            let sz = (HZB_SIZE >> lod)
+                            let uv =
+                                ivec2
+                                    center.xy * (sz as f32) + 0.5
+                            let occluded? =
+                                for x y in (dim 3 3)
+                                    uv := uv + (ivec2 x y) - 1
+                                    uv := (clamp uv (ivec2 0) (ivec2 (sz - 1)))
+                                    z := (texelFetch smp-hzb uv lod) . r
+                                    if (depth < z)
+                                        break false
+                                else true
+                            if occluded?
+                                return;
+
+                        texelFetch smp-hzb (ivec2 0) 0
+
+                        id := (copy gl_LocalInvocationIndex)
+                        shared dstofs : u32
+                        sz as:= u32
+                        if (id == 0)
+                            dstofs = (atomicAdd vertex-out.count sz)
+                        barrier;
+                        let srcofs = offset-min
+                        let dstofs = (copy dstofs)
+                        BANK_COUNT := (32 * 3 * 4 + LANE_COUNT - 1) // LANE_COUNT
+                        for k in (range (BANK_COUNT as u32))
+                            i := k * LANE_COUNT + id
+                            if (i >= sz)
+                                return;
+                            vertex-out.entries @ (dstofs + i) = vertex-in.entries @ (srcofs + i)
+            GL.BindTextureUnit 1 hzb.depth-texture
+            GL.Uniform smp-hzb 1
+            GL.DispatchCompute sector_count 1 1
+            GL.MemoryBarrier GL.SHADER_STORAGE_BARRIER_BIT
+
+        if ((frame % 60) == 0)
+            print-in-count "culled" frame_vertex_buffer
+        GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+            BINDING_BUF_VERTEX_IN
+            frame_vertex_buffer
+            \ 0:i64 (i64 vertex_buffer_sz)
+        setup-draw-arrays;
+
         do
             GL.BindFramebuffer GL.FRAMEBUFFER fb-scene
             GL.Viewport 0 0 (i32 size.x) (i32 size.y)

          
@@ 2156,9 2308,10 @@ inline main ()
             GL.UseProgram pg-rasterize
             GL.BindVertexArray vao-empty
             #exec-draw-arrays GL.TRIANGLES
-            exec-draw-arrays GL.TRIANGLE_FAN
-            #exec-draw-arrays GL.LINE_LOOP
-
+            static-if DRAW_LINES
+                exec-draw-arrays GL.LINE_LOOP
+            else
+                exec-draw-arrays GL.TRIANGLE_FAN
             GL.Disable GL.DEPTH_TEST
             GL.Disable GL.CULL_FACE
             GL.BindFramebuffer GL.FRAMEBUFFER 0

          
@@ 2172,8 2325,9 @@ inline main ()
         GL.BindTextureUnit 0 fb-scene-color
         GL.Uniform smp-screen 0
         #GL.BindImageTexture BINDING_IMG_HZB_IN hzb.depth-texture HZB_LOD_VIZ GL.TRUE 0 GL.READ_ONLY HZB_PIXELFMT
-        GL.BindTextureUnit 1 hzb.depth-texture
-        GL.Uniform smp-hzb 1
+        static-if VISUALIZE_HZB
+            GL.BindTextureUnit 1 hzb.depth-texture
+            GL.Uniform smp-hzb 1
         'draw screen
 
     let per-frame-setup =