@@ 53,7 53,7 @@ OCCLUSION_CULLING := false
FOG := false
USE_FLAT_SHADING := true
BALANCE_QUADS := true
-USE_COMPLEX_SURFACE := true
+USE_COMPLEX_SURFACE := false
HIGH_QUALITY_FEATURES := true
SOFT_WORLD_SAMPLING := true
USE_CATMULL_CLARK := false
@@ 62,7 62,8 @@ USE_VVF_PACKING := true
AVERAGE_LOD_VERTICES := false
USE_PERSPECTIVE_SUBDIVISION := false
CULL_BACKFACES := true
-VISUALIZE_HZB := true
+VISUALIZE_HZB := false
+DRAW_LINES := false
FETCH_UV_OFFSET := 0.5
#FETCH_UV_OFFSET := 0.0
@@ 70,7 71,8 @@ FETCH_UV_OFFSET := 0.5
# to reach fog density D at depth Z, FOG_RATE = -log2(1 - D)/Z
FOG_RATE := 0.02 # 50% at 100 units
-let MAX_VERTICES = (20 * (1 << 20))
+InstanceVertexCount := 4
+let MAX_VERTICES = (4 * 2 * (1 << 20))
MAX_SECTORS_PER_FRAME := 64 << 5
let WORLD_SIZE = (uvec3 256)
@@ 95,10 97,8 @@ SECTOR_CAPACITY := MAX_CASCADE_DEPTH * 1
CASCADE_SIZE := (1 << MAX_CASCADE_DEPTH)
CASCADE_CENTER := (CASCADE_SIZE // 2)
-InstanceVertexCount := 4
-
HZB_LEVELS := 10
-HZB_LOD_VIZ := 5
+HZB_LOD_VIZ := 8
HZB_SIZE := (1 << HZB_LEVELS)
let BINDING_BUF_SECTOR_IN = 1
@@ 160,8 160,8 @@ let
struct Sector plain
key : u32
flags : u32 # six face bits indicating where the next highest LOD level is
- offset-min = 0xffffffff:u32 # index of first triangle in triangle buffer
- offset-max = 0:u32 # index after last triangle in triangle buffer
+ offset-min = 0xffffffff:u32 # index of first vertex in vertex buffer
+ offset-max = 0:u32 # index after last vertex in vertex buffer
struct Sectors plain
keys : (array Sector)
@@ 1142,6 1142,7 @@ dump "shared memory requirements"
shared cell-corner-flags : (array u32 SECTOR_SAMPLE_VOLUME)
shared cell-vertex : (array vec4 SECTOR_SAMPLE_VOLUME)
shared cell-normal : (array vec3 SECTOR_SAMPLE_VOLUME)
+shared quad-count : u32
#fn id2index (id)
#id.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + id.y * gl_WorkGroupSize.x + id.x
@@ 1183,10 1184,8 @@ fn generate-quad (v00 v01 v10 v11)
_ v00 v10 v11 v01
# generate quad
- let ofs = (atomicAdd vertex-out.count 4)
sector := (sector-inout.keys @ gl_WorkGroupID.x)
- atomicMin sector.offset-min ofs
- atomicMax sector.offset-max (ofs + 4)
+ let ofs = (atomicAdd sector.offset-max 4)
entries := vertex-out.entries
static-if USE_FLAT_SHADING
n0 := (triangle-normal v00.pos v11.pos v10.pos)
@@ 1362,6 1361,8 @@ fn generate-cell-verts ()
cell-normal @ idx = (normalize normal_accum)
;
;
+ if (lane-idx == 0)
+ quad-count = 0
barrier;
lpos := (index2id lane-idx)
@@ 1374,6 1375,22 @@ fn generate-cell-verts ()
cflags := (copy (cell-corner-flags @ idx))
+ centerbit := (cflags >> 7) & 1
+ flip? := (centerbit != 0)
+
+ for i in (range 3)
+ if ((centerbit ^ ((cflags >> ((1 << i as u32) ^ 7)) & 1)) != 0)
+ static-if USE_CATMULL_CLARK
+ atomicAdd quad-count 4
+ else
+ atomicAdd quad-count 1
+ barrier;
+ if (lane-idx == 0)
+ let ofs = (atomicAdd vertex-out.count (4 * quad-count))
+ sector.offset-min = ofs
+ sector.offset-max = ofs
+ barrier;
+
static-if USE_CATMULL_CLARK
inline getvertex (offset)
dpos := lpos + offset
@@ 1986,6 2003,10 @@ inline main ()
GL.DispatchCompute (unpack (((WORLD_SIZE >> (lod as u32)) + GROUP_SIZE - 1) // GROUP_SIZE))
GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT)
+ let sector_counts =
+ static-array u32 2
+ inline () 0
+
let vertex_buffer_sz = ((sizeof Vertices) + (sizeof Vertex) * MAX_VERTICES)
let vertex_buffers =
static-array GL.Buffer 3
@@ 2006,11 2027,13 @@ inline main ()
# update terrain
next-terrain-doublebuffer-index := terrain-doublebuffer-index ^ 1
+ next_sector_count := sector_counts @ next-terrain-doublebuffer-index
next_sector_buffer := sector_buffers @ next-terrain-doublebuffer-index
next_vertex_buffer := vertex_buffers @ next-terrain-doublebuffer-index
if ('fresh? terrain-job)
collect-sectors;
+ next_sector_count = (countof sectors) as u32
GL.NamedBufferSubData next_sector_buffer 0
i32 ((countof sectors) * (sizeof Sector))
& (sectors @ 0)
@@ 2044,24 2067,27 @@ inline main ()
#############
vertex_buffer := vertex_buffers @ terrain-doublebuffer-index
+ sector_buffer := sector_buffers @ terrain-doublebuffer-index
- inline print-in-count ()
+ inline print-in-count (label vertex_buffer)
let ptr =
GL.MapNamedBufferRange vertex_buffer 0 (sizeof u32)
GL.MAP_READ_BIT
let ptr = (bitcast ptr (pointer Vertices))
- print (ptr.count // InstanceVertexCount) "primitives"
+ print label ":" (ptr.count // InstanceVertexCount) "primitives"
GL.UnmapNamedBuffer vertex_buffer
if ((frame % 60) == 0)
- print-in-count;
+ print-in-count "all" vertex_buffer
vvv bind setup-draw-arrays exec-draw-arrays
indirect-draw-arrays-setup
inline ()
_
InstanceVertexCount
- (deref vertex-in.count) // InstanceVertexCount
+ min
+ MAX_VERTICES as u32
+ (deref vertex-in.count) // InstanceVertexCount
0
0
@@ 2134,6 2160,132 @@ inline main ()
GL.DispatchCompute size size 1
GL.MemoryBarrier (GL.TEXTURE_FETCH_BARRIER_BIT | GL.SHADER_IMAGE_ACCESS_BARRIER_BIT)
+ frame_vertex_buffer := vertex_buffers @ 2
+
+ sector_count := sector_counts @ terrain-doublebuffer-index
+
+ GL.ClearNamedBufferSubData frame_vertex_buffer GL.R32UI 0 (sizeof u32) GL.RED_INTEGER GL.UNSIGNED_INT null
+ if (sector_count != 0)
+ GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+ BINDING_BUF_SECTOR_IN
+ sector_buffer
+ \ 0:i64 (i64 (sector_count * (sizeof Sector)))
+ GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+ BINDING_BUF_VERTEX_IN
+ vertex_buffer
+ \ 0:i64 (i64 vertex_buffer_sz)
+ GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+ BINDING_BUF_VERTEX_OUT
+ frame_vertex_buffer
+ \ 0:i64 (i64 vertex_buffer_sz)
+ GL.UseProgram
+ compute-program
+ fn ()
+ let LANE_COUNT = 384
+ local_size LANE_COUNT 1 1
+ sector := (copy (sector-in.keys @ gl_WorkGroupID.x))
+ if (sector.key == 0)
+ return;
+ from sector let offset-min offset-max key
+ let sz = (offset-max as i32 - offset-min as i32)
+ if (sz <= 0)
+ return;
+ let lvl sectorpos... = (decode-cell key)
+ sectorlod := MAX_CASCADE_DEPTH - lvl
+ lsectorpos := (ivec3 sectorpos...)
+ sector-scale := (f32 (1 << sectorlod)) / CASCADE_SIZE
+ sector-origin := (vec3 lsectorpos) * sector-scale - 0.5
+ sector-origin := sector-origin * WORLD_SCALE
+ sector-scale := sector-scale * WORLD_SCALE
+
+ let proj =
+ calc-projection;
+ inline project (p)
+ d := 1.0 / SECTOR_GROUP_SIZE
+ p := p * (1.0 + d * 4.0) - d * 2.0
+ p := p * sector-scale + sector-origin
+ p := (transform-pos p)
+ z := p.z
+ p :=
+ 'project proj
+ vec4 p 1.0
+ uv := (p.xy / p.w) #* 4.0
+ uv := uv * 0.5 + 0.5
+ vec3 uv z
+
+ let points... =
+ va-map project
+ vec3 0 0 0
+ vec3 1 0 0
+ vec3 0 1 0
+ vec3 1 1 0
+ vec3 0 0 1
+ vec3 1 0 1
+ vec3 0 1 1
+ vec3 1 1 1
+ let minv = (min points...)
+ let maxv = (max points...)
+ let size = (maxv - minv)
+ if ((min maxv.x maxv.y) < 0.0)
+ return;
+ if ((max minv.x minv.y) > 1.0)
+ return;
+ if (maxv.z < 0.0)
+ return;
+ let depth = minv.z
+ do
+ let w = (min size.x size.y)
+ let lod =
+ if (w == 0) 0
+ else
+ w := HZB_LEVELS + (log2 w)
+ w := (clamp w 0.0 (HZB_LEVELS as f32))
+ w as i32
+ center := (minv + maxv) / 2
+ let sz = (HZB_SIZE >> lod)
+ let uv =
+ ivec2
+ center.xy * (sz as f32) + 0.5
+ let occluded? =
+ for x y in (dim 3 3)
+ uv := uv + (ivec2 x y) - 1
+ uv := (clamp uv (ivec2 0) (ivec2 (sz - 1)))
+ z := (texelFetch smp-hzb uv lod) . r
+ if (depth < z)
+ break false
+ else true
+ if occluded?
+ return;
+
+ texelFetch smp-hzb (ivec2 0) 0
+
+ id := (copy gl_LocalInvocationIndex)
+ shared dstofs : u32
+ sz as:= u32
+ if (id == 0)
+ dstofs = (atomicAdd vertex-out.count sz)
+ barrier;
+ let srcofs = offset-min
+ let dstofs = (copy dstofs)
+ BANK_COUNT := (32 * 3 * 4 + LANE_COUNT - 1) // LANE_COUNT
+ for k in (range (BANK_COUNT as u32))
+ i := k * LANE_COUNT + id
+ if (i >= sz)
+ return;
+ vertex-out.entries @ (dstofs + i) = vertex-in.entries @ (srcofs + i)
+ GL.BindTextureUnit 1 hzb.depth-texture
+ GL.Uniform smp-hzb 1
+ GL.DispatchCompute sector_count 1 1
+ GL.MemoryBarrier GL.SHADER_STORAGE_BARRIER_BIT
+
+ if ((frame % 60) == 0)
+ print-in-count "culled" frame_vertex_buffer
+ GL.BindBufferRange GL.SHADER_STORAGE_BUFFER
+ BINDING_BUF_VERTEX_IN
+ frame_vertex_buffer
+ \ 0:i64 (i64 vertex_buffer_sz)
+ setup-draw-arrays;
+
do
GL.BindFramebuffer GL.FRAMEBUFFER fb-scene
GL.Viewport 0 0 (i32 size.x) (i32 size.y)
@@ 2156,9 2308,10 @@ inline main ()
GL.UseProgram pg-rasterize
GL.BindVertexArray vao-empty
#exec-draw-arrays GL.TRIANGLES
- exec-draw-arrays GL.TRIANGLE_FAN
- #exec-draw-arrays GL.LINE_LOOP
-
+ static-if DRAW_LINES
+ exec-draw-arrays GL.LINE_LOOP
+ else
+ exec-draw-arrays GL.TRIANGLE_FAN
GL.Disable GL.DEPTH_TEST
GL.Disable GL.CULL_FACE
GL.BindFramebuffer GL.FRAMEBUFFER 0
@@ 2172,8 2325,9 @@ inline main ()
GL.BindTextureUnit 0 fb-scene-color
GL.Uniform smp-screen 0
#GL.BindImageTexture BINDING_IMG_HZB_IN hzb.depth-texture HZB_LOD_VIZ GL.TRUE 0 GL.READ_ONLY HZB_PIXELFMT
- GL.BindTextureUnit 1 hzb.depth-texture
- GL.Uniform smp-hzb 1
+ static-if VISUALIZE_HZB
+ GL.BindTextureUnit 1 hzb.depth-texture
+ GL.Uniform smp-hzb 1
'draw screen
let per-frame-setup =