# HG changeset patch # User Leonard Ritter # Date 1645806634 -3600 # Fri Feb 25 17:30:34 2022 +0100 # Node ID ea8156aa78ce49016b30a275ce7656d40c922505 # Parent 34ad1d66754752fa7fe1bbb2cfd8d706214c2ad2 * initial support for occlusion culling diff --git a/testing/test_cascade_dmc_cc_vvf.sc b/testing/test_cascade_dmc_cc_vvf.sc --- a/testing/test_cascade_dmc_cc_vvf.sc +++ b/testing/test_cascade_dmc_cc_vvf.sc @@ -53,7 +53,7 @@ FOG := false USE_FLAT_SHADING := true BALANCE_QUADS := true -USE_COMPLEX_SURFACE := true +USE_COMPLEX_SURFACE := false HIGH_QUALITY_FEATURES := true SOFT_WORLD_SAMPLING := true USE_CATMULL_CLARK := false @@ -62,7 +62,8 @@ AVERAGE_LOD_VERTICES := false USE_PERSPECTIVE_SUBDIVISION := false CULL_BACKFACES := true -VISUALIZE_HZB := true +VISUALIZE_HZB := false +DRAW_LINES := false FETCH_UV_OFFSET := 0.5 #FETCH_UV_OFFSET := 0.0 @@ -70,7 +71,8 @@ # to reach fog density D at depth Z, FOG_RATE = -log2(1 - D)/Z FOG_RATE := 0.02 # 50% at 100 units -let MAX_VERTICES = (20 * (1 << 20)) +InstanceVertexCount := 4 +let MAX_VERTICES = (4 * 2 * (1 << 20)) MAX_SECTORS_PER_FRAME := 64 << 5 let WORLD_SIZE = (uvec3 256) @@ -95,10 +97,8 @@ CASCADE_SIZE := (1 << MAX_CASCADE_DEPTH) CASCADE_CENTER := (CASCADE_SIZE // 2) -InstanceVertexCount := 4 - HZB_LEVELS := 10 -HZB_LOD_VIZ := 5 +HZB_LOD_VIZ := 8 HZB_SIZE := (1 << HZB_LEVELS) let BINDING_BUF_SECTOR_IN = 1 @@ -160,8 +160,8 @@ struct Sector plain key : u32 flags : u32 # six face bits indicating where the next highest LOD level is - offset-min = 0xffffffff:u32 # index of first triangle in triangle buffer - offset-max = 0:u32 # index after last triangle in triangle buffer + offset-min = 0xffffffff:u32 # index of first vertex in vertex buffer + offset-max = 0:u32 # index after last vertex in vertex buffer struct Sectors plain keys : (array Sector) @@ -1142,6 +1142,7 @@ shared cell-corner-flags : (array u32 SECTOR_SAMPLE_VOLUME) shared cell-vertex : (array vec4 SECTOR_SAMPLE_VOLUME) shared cell-normal : (array vec3 SECTOR_SAMPLE_VOLUME) +shared quad-count : u32 #fn id2index (id) #id.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + id.y * gl_WorkGroupSize.x + id.x @@ -1183,10 +1184,8 @@ _ v00 v10 v11 v01 # generate quad - let ofs = (atomicAdd vertex-out.count 4) sector := (sector-inout.keys @ gl_WorkGroupID.x) - atomicMin sector.offset-min ofs - atomicMax sector.offset-max (ofs + 4) + let ofs = (atomicAdd sector.offset-max 4) entries := vertex-out.entries static-if USE_FLAT_SHADING n0 := (triangle-normal v00.pos v11.pos v10.pos) @@ -1362,6 +1361,8 @@ cell-normal @ idx = (normalize normal_accum) ; ; + if (lane-idx == 0) + quad-count = 0 barrier; lpos := (index2id lane-idx) @@ -1374,6 +1375,22 @@ cflags := (copy (cell-corner-flags @ idx)) + centerbit := (cflags >> 7) & 1 + flip? := (centerbit != 0) + + for i in (range 3) + if ((centerbit ^ ((cflags >> ((1 << i as u32) ^ 7)) & 1)) != 0) + static-if USE_CATMULL_CLARK + atomicAdd quad-count 4 + else + atomicAdd quad-count 1 + barrier; + if (lane-idx == 0) + let ofs = (atomicAdd vertex-out.count (4 * quad-count)) + sector.offset-min = ofs + sector.offset-max = ofs + barrier; + static-if USE_CATMULL_CLARK inline getvertex (offset) dpos := lpos + offset @@ -1986,6 +2003,10 @@ GL.DispatchCompute (unpack (((WORLD_SIZE >> (lod as u32)) + GROUP_SIZE - 1) // GROUP_SIZE)) GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT) + let sector_counts = + static-array u32 2 + inline () 0 + let vertex_buffer_sz = ((sizeof Vertices) + (sizeof Vertex) * MAX_VERTICES) let vertex_buffers = static-array GL.Buffer 3 @@ -2006,11 +2027,13 @@ # update terrain next-terrain-doublebuffer-index := terrain-doublebuffer-index ^ 1 + next_sector_count := sector_counts @ next-terrain-doublebuffer-index next_sector_buffer := sector_buffers @ next-terrain-doublebuffer-index next_vertex_buffer := vertex_buffers @ next-terrain-doublebuffer-index if ('fresh? terrain-job) collect-sectors; + next_sector_count = (countof sectors) as u32 GL.NamedBufferSubData next_sector_buffer 0 i32 ((countof sectors) * (sizeof Sector)) & (sectors @ 0) @@ -2044,24 +2067,27 @@ ############# vertex_buffer := vertex_buffers @ terrain-doublebuffer-index + sector_buffer := sector_buffers @ terrain-doublebuffer-index - inline print-in-count () + inline print-in-count (label vertex_buffer) let ptr = GL.MapNamedBufferRange vertex_buffer 0 (sizeof u32) GL.MAP_READ_BIT let ptr = (bitcast ptr (pointer Vertices)) - print (ptr.count // InstanceVertexCount) "primitives" + print label ":" (ptr.count // InstanceVertexCount) "primitives" GL.UnmapNamedBuffer vertex_buffer if ((frame % 60) == 0) - print-in-count; + print-in-count "all" vertex_buffer vvv bind setup-draw-arrays exec-draw-arrays indirect-draw-arrays-setup inline () _ InstanceVertexCount - (deref vertex-in.count) // InstanceVertexCount + min + MAX_VERTICES as u32 + (deref vertex-in.count) // InstanceVertexCount 0 0 @@ -2134,6 +2160,132 @@ GL.DispatchCompute size size 1 GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT) + frame_vertex_buffer := vertex_buffers @ 2 + + sector_count := sector_counts @ terrain-doublebuffer-index + + GL.ClearNamedBufferSubData frame_vertex_buffer GL.R32UI 0 (sizeof u32) GL.RED_INTEGER GL.UNSIGNED_INT null + if (sector_count != 0) + GL.BindBufferRange GL.SHADER_STORAGE_BUFFER + BINDING_BUF_SECTOR_IN + sector_buffer + \ 0:i64 (i64 (sector_count * (sizeof Sector))) + GL.BindBufferRange GL.SHADER_STORAGE_BUFFER + BINDING_BUF_VERTEX_IN + vertex_buffer + \ 0:i64 (i64 vertex_buffer_sz) + GL.BindBufferRange GL.SHADER_STORAGE_BUFFER + BINDING_BUF_VERTEX_OUT + frame_vertex_buffer + \ 0:i64 (i64 vertex_buffer_sz) + GL.UseProgram + compute-program + fn () + let LANE_COUNT = 384 + local_size LANE_COUNT 1 1 + sector := (copy (sector-in.keys @ gl_WorkGroupID.x)) + if (sector.key == 0) + return; + from sector let offset-min offset-max key + let sz = (offset-max as i32 - offset-min as i32) + if (sz <= 0) + return; + let lvl sectorpos... = (decode-cell key) + sectorlod := MAX_CASCADE_DEPTH - lvl + lsectorpos := (ivec3 sectorpos...) + sector-scale := (f32 (1 << sectorlod)) / CASCADE_SIZE + sector-origin := (vec3 lsectorpos) * sector-scale - 0.5 + sector-origin := sector-origin * WORLD_SCALE + sector-scale := sector-scale * WORLD_SCALE + + let proj = + calc-projection; + inline project (p) + d := 1.0 / SECTOR_GROUP_SIZE + p := p * (1.0 + d * 4.0) - d * 2.0 + p := p * sector-scale + sector-origin + p := (transform-pos p) + z := p.z + p := + 'project proj + vec4 p 1.0 + uv := (p.xy / p.w) #* 4.0 + uv := uv * 0.5 + 0.5 + vec3 uv z + + let points... = + va-map project + vec3 0 0 0 + vec3 1 0 0 + vec3 0 1 0 + vec3 1 1 0 + vec3 0 0 1 + vec3 1 0 1 + vec3 0 1 1 + vec3 1 1 1 + let minv = (min points...) + let maxv = (max points...) + let size = (maxv - minv) + if ((min maxv.x maxv.y) < 0.0) + return; + if ((max minv.x minv.y) > 1.0) + return; + if (maxv.z < 0.0) + return; + let depth = minv.z + do + let w = (min size.x size.y) + let lod = + if (w == 0) 0 + else + w := HZB_LEVELS + (log2 w) + w := (clamp w 0.0 (HZB_LEVELS as f32)) + w as i32 + center := (minv + maxv) / 2 + let sz = (HZB_SIZE >> lod) + let uv = + ivec2 + center.xy * (sz as f32) + 0.5 + let occluded? = + for x y in (dim 3 3) + uv := uv + (ivec2 x y) - 1 + uv := (clamp uv (ivec2 0) (ivec2 (sz - 1))) + z := (texelFetch smp-hzb uv lod) . r + if (depth < z) + break false + else true + if occluded? + return; + + texelFetch smp-hzb (ivec2 0) 0 + + id := (copy gl_LocalInvocationIndex) + shared dstofs : u32 + sz as:= u32 + if (id == 0) + dstofs = (atomicAdd vertex-out.count sz) + barrier; + let srcofs = offset-min + let dstofs = (copy dstofs) + BANK_COUNT := (32 * 3 * 4 + LANE_COUNT - 1) // LANE_COUNT + for k in (range (BANK_COUNT as u32)) + i := k * LANE_COUNT + id + if (i >= sz) + return; + vertex-out.entries @ (dstofs + i) = vertex-in.entries @ (srcofs + i) + GL.BindTextureUnit 1 hzb.depth-texture + GL.Uniform smp-hzb 1 + GL.DispatchCompute sector_count 1 1 + GL.MemoryBarrier GL.SHADER_STORAGE_BARRIER_BIT + + if ((frame % 60) == 0) + print-in-count "culled" frame_vertex_buffer + GL.BindBufferRange GL.SHADER_STORAGE_BUFFER + BINDING_BUF_VERTEX_IN + frame_vertex_buffer + \ 0:i64 (i64 vertex_buffer_sz) + setup-draw-arrays; + do GL.BindFramebuffer GL.FRAMEBUFFER fb-scene GL.Viewport 0 0 (i32 size.x) (i32 size.y) @@ -2156,9 +2308,10 @@ GL.UseProgram pg-rasterize GL.BindVertexArray vao-empty #exec-draw-arrays GL.TRIANGLES - exec-draw-arrays GL.TRIANGLE_FAN - #exec-draw-arrays GL.LINE_LOOP - + static-if DRAW_LINES + exec-draw-arrays GL.LINE_LOOP + else + exec-draw-arrays GL.TRIANGLE_FAN GL.Disable GL.DEPTH_TEST GL.Disable GL.CULL_FACE GL.BindFramebuffer GL.FRAMEBUFFER 0 @@ -2172,8 +2325,9 @@ GL.BindTextureUnit 0 fb-scene-color GL.Uniform smp-screen 0 #GL.BindImageTexture BINDING_IMG_HZB_IN hzb.depth-texture HZB_LOD_VIZ GL.TRUE 0 GL.READ_ONLY HZB_PIXELFMT - GL.BindTextureUnit 1 hzb.depth-texture - GL.Uniform smp-hzb 1 + static-if VISUALIZE_HZB + GL.BindTextureUnit 1 hzb.depth-texture + GL.Uniform smp-hzb 1 'draw screen let per-frame-setup =