Skip to content

Commit 22a28e9

Browse files
author
Exoridus
committed
fix(webgpu): pack multi-batch sprite flush into a single vertex upload
When a flush contained more than one batch (e.g. blend-mode change, texture-slot overflow, or pipeline switch), every batch called device.queue.writeBuffer(vertexBuffer, offset: 0, ...) before queue.submit(commandBuffer). Per the WebGPU queue model, all writeBuffer ops serialize before the submit — so only the LAST batch's vertex data was actually in the buffer when the command buffer ran. Every drawIndexed in the pass then read the same (final) vertex range, and earlier batches rendered with the wrong sprites' positions/UVs/colors. Symptom on the blendmodes example: "Normal" renders correctly (single batch), but any non-Normal blend mode produced a distorted background (actually a bunny rendered at the background's draw slot) and misplaced / missing bunnies. Fix: walk the batches once before beginRenderPass, pack every batch's vertices into the CPU-side vertex buffer at its own sprite offset, and grow GPU buffers for the total sprite count. Then do a single queue.writeBuffer for the whole packed region, and each drawIndexed uses firstIndex = firstSprite * 6 to target its own range. Pre-built quad indices (0,1,2,0,2,3 per sprite) already match a linear sprite layout, so no per-batch index rewriting is needed. Also corrects _ensureBatchCapacity to size for total sprites in a flush (was max-per-batch after the previous fix, which was correct for avoiding in-loop growth but too small for the packed layout).
1 parent fe85f6f commit 22a28e9

1 file changed

Lines changed: 66 additions & 35 deletions

File tree

src/rendering/webgpu/WebGpuSpriteRenderer.ts

Lines changed: 66 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -299,22 +299,52 @@ export class WebGpuSpriteRenderer extends AbstractWebGpuRenderer<Sprite> {
299299
return;
300300
}
301301

302-
// Grow vertex/index buffers up front for the largest batch in this
303-
// flush. _ensureBatchCapacity destroys the old buffers and creates new
304-
// ones when capacity grows, so it must not run after setVertexBuffer /
305-
// setIndexBuffer — otherwise the pass's bindings point at destroyed
306-
// buffers and the submit is invalid.
307-
let maxBatchSpriteCount = 0;
308-
for (let start = 0; start < this._drawCallCount;) {
309-
const batchRange = this._getBatchRange(start);
310-
const batchSpriteCount = batchRange.end - batchRange.start;
311-
if (batchSpriteCount > maxBatchSpriteCount) {
312-
maxBatchSpriteCount = batchSpriteCount;
313-
}
314-
start = batchRange.end;
302+
// Grow vertex/index buffers up front for the TOTAL sprite count. Two
303+
// reasons this must happen before the render pass begins:
304+
// 1. _ensureBatchCapacity destroys old buffers and creates new ones
305+
// when capacity grows, so running it after setVertexBuffer /
306+
// setIndexBuffer would leave the pass bound to destroyed buffers.
307+
// 2. All batches are packed into the vertex buffer at distinct
308+
// sprite offsets, so the buffer must hold every sprite in the
309+
// flush, not just one batch worth.
310+
if (this._drawCallCount > 0) {
311+
this._ensureBatchCapacity(this._drawCallCount);
315312
}
316-
if (maxBatchSpriteCount > 0) {
317-
this._ensureBatchCapacity(maxBatchSpriteCount);
313+
314+
// Walk the batches once, packing each batch's vertex data into the
315+
// CPU-side buffer at its own sprite-aligned offset. Each batch's
316+
// metadata is recorded for the draw loop below.
317+
//
318+
// This replaces an earlier per-batch queue.writeBuffer(..., offset: 0)
319+
// pattern where every writeBuffer targeted the same GPU offset. All
320+
// writeBuffers in a frame execute before queue.submit(commandBuffer),
321+
// so only the last batch's vertex data survived — which meant any
322+
// flush containing more than one batch rendered every batch using
323+
// the LAST batch's vertices (background vanished, sprites duplicated
324+
// at wrong sizes, etc. whenever blend mode / texture slot / pipeline
325+
// caused a split into multiple batches).
326+
const batchPlan: Array<{
327+
firstSprite: number;
328+
spriteCount: number;
329+
blendMode: BlendModes;
330+
textures: Array<Texture | RenderTexture>;
331+
}> = [];
332+
let packedSpriteCount = 0;
333+
334+
for (let start = 0; start < this._drawCallCount;) {
335+
const batch = this._getBatchRange(start);
336+
const spriteCount = batch.end - batch.start;
337+
338+
this._writeBatchVertexData(batch, packedSpriteCount);
339+
batchPlan.push({
340+
firstSprite: packedSpriteCount,
341+
spriteCount,
342+
blendMode: batch.blendMode,
343+
textures: batch.textures,
344+
});
345+
346+
packedSpriteCount += spriteCount;
347+
start = batch.end;
318348
}
319349

320350
const viewMatrix = renderManager.view.getTransform();
@@ -347,34 +377,35 @@ export class WebGpuSpriteRenderer extends AbstractWebGpuRenderer<Sprite> {
347377
}
348378

349379
if (this._drawCallCount > 0 && !maskClipsAll) {
380+
// Single upload for the whole packed vertex buffer — every batch
381+
// reads from its own sprite range via drawIndexed's firstIndex.
382+
device.queue.writeBuffer(
383+
this._vertexBuffer!,
384+
0,
385+
this._vertexData,
386+
0,
387+
packedSpriteCount * spriteVertexCount * vertexStrideBytes,
388+
);
389+
350390
pass.setBindGroup(0, uniformBindGroup);
351391
pass.setVertexBuffer(0, this._vertexBuffer!);
352392
pass.setIndexBuffer(this._indexBuffer!, 'uint32');
353393

354-
for (let start = 0; start < this._drawCallCount;) {
355-
const batch = this._getBatchRange(start);
356-
const pipeline = this._getPipeline(batch.blendMode, renderManager.renderTargetFormat);
357-
const spriteCount = batch.end - batch.start;
394+
for (const plan of batchPlan) {
395+
const pipeline = this._getPipeline(plan.blendMode, renderManager.renderTargetFormat);
396+
const textureBindGroup = this._createTextureBindGroup(device, renderManager, plan.textures);
358397

359-
this._writeBatchVertexData(batch);
360-
361-
device.queue.writeBuffer(
362-
this._vertexBuffer!,
398+
pass.setPipeline(pipeline);
399+
pass.setBindGroup(1, textureBindGroup);
400+
pass.drawIndexed(
401+
plan.spriteCount * spriteIndexCount,
402+
1,
403+
plan.firstSprite * spriteIndexCount,
363404
0,
364-
this._vertexData,
365405
0,
366-
spriteCount * spriteVertexCount * vertexStrideBytes,
367406
);
368-
369-
const textureBindGroup = this._createTextureBindGroup(device, renderManager, batch.textures);
370-
371-
pass.setPipeline(pipeline);
372-
pass.setBindGroup(1, textureBindGroup);
373-
pass.drawIndexed(batch.spriteCount * spriteIndexCount, 1, 0, 0, 0);
374407
renderManager.stats.batches++;
375408
renderManager.stats.drawCalls++;
376-
377-
start = batch.end;
378409
}
379410
}
380411

@@ -434,14 +465,14 @@ export class WebGpuSpriteRenderer extends AbstractWebGpuRenderer<Sprite> {
434465
this._indexBuffer = indexBuffer;
435466
}
436467

437-
private _writeBatchVertexData(batch: WebGpuSpriteBatchRange): void {
468+
private _writeBatchVertexData(batch: WebGpuSpriteBatchRange, firstSprite: number): void {
438469
const renderManager = this._renderManager;
439470

440471
if (!renderManager) {
441472
return;
442473
}
443474

444-
let vertexOffset = 0;
475+
let vertexOffset = firstSprite * spriteVertexCount * wordsPerVertex;
445476

446477
for (let drawCallIndex = batch.start; drawCallIndex < batch.end; drawCallIndex++) {
447478
const drawCall = this._drawCalls[drawCallIndex];

0 commit comments

Comments
 (0)