1010import dev .engine_room .flywheel .backend .compile .IndirectPrograms ;
1111import dev .engine_room .flywheel .backend .gl .GlFence ;
1212import dev .engine_room .flywheel .backend .gl .buffer .GlBuffer ;
13+ import dev .engine_room .flywheel .backend .gl .buffer .GlBufferUsage ;
1314import dev .engine_room .flywheel .lib .memory .FlwMemoryTracker ;
1415import dev .engine_room .flywheel .lib .memory .MemoryBlock ;
1516import it .unimi .dsi .fastutil .PriorityQueue ;
@@ -22,6 +23,8 @@ public class StagingBuffer {
2223 private static final int STORAGE_FLAGS = GL45C .GL_MAP_PERSISTENT_BIT | GL45C .GL_MAP_WRITE_BIT | GL45C .GL_CLIENT_STORAGE_BIT ;
2324 private static final int MAP_FLAGS = GL45C .GL_MAP_PERSISTENT_BIT | GL45C .GL_MAP_WRITE_BIT | GL45C .GL_MAP_FLUSH_EXPLICIT_BIT | GL45C .GL_MAP_INVALIDATE_BUFFER_BIT ;
2425
26+ private static final int SSBO_ALIGNMENT = GL45 .glGetInteger (GL45 .GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT );
27+
2528 private final int vbo ;
2629 private final long map ;
2730 private final long capacity ;
@@ -30,7 +33,7 @@ public class StagingBuffer {
3033 private final OverflowStagingBuffer overflow = new OverflowStagingBuffer ();
3134 private final TransferList transfers = new TransferList ();
3235 private final PriorityQueue <FencedRegion > fencedRegions = new ObjectArrayFIFOQueue <>();
33- private final GlBuffer scatterBuffer = new GlBuffer ();
36+ private final GlBuffer scatterBuffer = new GlBuffer (GlBufferUsage . STREAM_COPY );
3437 private final ScatterList scatterList = new ScatterList ();
3538
3639 /**
@@ -252,7 +255,6 @@ private void dispatchComputeCopies() {
252255 .bind ();
253256
254257 // These bindings don't change between dstVbos.
255- GL45 .glBindBufferBase (GL45C .GL_SHADER_STORAGE_BUFFER , 0 , scatterBuffer .handle ());
256258 GL45 .glBindBufferBase (GL45C .GL_SHADER_STORAGE_BUFFER , 1 , vbo );
257259
258260 int dstVbo ;
@@ -274,7 +276,25 @@ private void dispatchComputeCopies() {
274276 }
275277
276278 private void dispatchScatter (int dstVbo ) {
277- scatterBuffer .upload (scatterList .ptr (), scatterList .usedBytes ());
279+ var scatterSize = scatterList .usedBytes ();
280+
281+ // If there's enough space in the staging buffer still, lets write the scatter in it directly.
282+ long alignedPos = pos + SSBO_ALIGNMENT - 1 - (pos + SSBO_ALIGNMENT - 1 ) % SSBO_ALIGNMENT ;
283+
284+ long remaining = capacity - alignedPos ;
285+ if (scatterSize <= remaining && scatterSize <= totalAvailable ) {
286+ MemoryUtil .memCopy (scatterList .ptr (), map + alignedPos , scatterSize );
287+ GL45 .glBindBufferRange (GL45C .GL_SHADER_STORAGE_BUFFER , 0 , vbo , alignedPos , scatterSize );
288+
289+ long alignmentCost = alignedPos - pos ;
290+
291+ usedCapacity += scatterSize + alignmentCost ;
292+ totalAvailable -= scatterSize + alignmentCost ;
293+ pos += scatterSize + alignmentCost ;
294+ } else {
295+ scatterBuffer .upload (scatterList .ptr (), scatterSize );
296+ GL45 .glBindBufferBase (GL45C .GL_SHADER_STORAGE_BUFFER , 0 , scatterBuffer .handle ());
297+ }
278298
279299 GL45 .glBindBufferBase (GL45C .GL_SHADER_STORAGE_BUFFER , 2 , dstVbo );
280300
0 commit comments