From 6f16cf5491102ef1f5d21cd6c201562c0a915372 Mon Sep 17 00:00:00 2001 From: shaoboyan Date: Wed, 31 Dec 2025 16:32:37 +0800 Subject: [PATCH 1/5] Operation tests for setImmediates Implement operation tests for setImmediates in ComputePassEncoder, RenderPassEncoder, and RenderBundleEncoder. - Add basic execution tests for scalar, vector, and struct types. - Add tests for partial updates and multiple updates (using range verification). - Add tests for pipeline switching (no inheritance). - Add tests for large data (maxImmediateSize) with range verification. - Add tests for TypedArray arguments with offsets. - Add tests for mixing render pass and bundle execution. --- .../programmable/immediate.spec.ts | 974 ++++++++++++++++++ 1 file changed, 974 insertions(+) create mode 100644 src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts diff --git a/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts b/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts new file mode 100644 index 000000000000..a2bda7c7bf9e --- /dev/null +++ b/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts @@ -0,0 +1,974 @@ +export const description = ` +Operation tests for immediate data usage in RenderPassEncoder, ComputePassEncoder, and RenderBundleEncoder. +`; + +import { makeTestGroup } from '../../../../../common/framework/test_group.js'; +import { getGPU } from '../../../../../common/util/navigator_gpu.js'; +import { + assert, + kTypedArrayBufferViews, + kTypedArrayBufferViewKeys, + supportsImmediateData, + unreachable, +} from '../../../../../common/util/util.js'; +import { AllFeaturesMaxLimitsGPUTest } from '../../../../gpu_test.js'; +import { HostSharableTypes, kVectorContainerTypes, ScalarType } from '../../../../shader/types.js'; +import { + kProgrammableEncoderTypes, + ProgrammableEncoderType, +} from '../../../../util/command_buffer_maker.js'; + +class ImmediateDataOperationTest extends AllFeaturesMaxLimitsGPUTest { + override async init() { + await super.init(); + + if (!supportsImmediateData(getGPU(this.rec))) { + this.skip('Immediate data not supported'); + return; + } + } + + skipIfStorageBuffersInFragmentStageNotAvailable(encoderType: ProgrammableEncoderType) { + if (!this.isCompatibility) { + return; + } + const needsStorageBuffersInFragmentStage = + encoderType === 'render pass' || encoderType === 'render bundle'; + this.skipIf( + needsStorageBuffersInFragmentStage && + !(this.device.limits.maxStorageBuffersInFragmentStage! >= 1), + `maxStorageBuffersInFragmentStage(${this.device.limits.maxStorageBuffersInFragmentStage}) < 1` + ); + } +} + +function createPipeline( + t: AllFeaturesMaxLimitsGPUTest, + encoderType: ProgrammableEncoderType, + wgslDecl: string, + wgslUsage: string, + immediateSize: number +) { + const layout = t.device.createPipelineLayout({ + bindGroupLayouts: [ + t.device.createBindGroupLayout({ + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT, + buffer: { type: 'storage' }, + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT, + buffer: { type: 'uniform', hasDynamicOffset: true }, + }, + ], + }), + ], + immediateSize, + }); + + const fullCode = ` + ${wgslDecl} + @group(0) @binding(0) var output: array; + @group(0) @binding(1) var outIndex: u32; + + ${wgslUsage} + `; + + if (encoderType === 'compute pass') { + return t.device.createComputePipeline({ + layout, + compute: { + module: t.device.createShaderModule({ code: fullCode }), + entryPoint: 'cs_main', + }, + }); + } else { + return t.device.createRenderPipeline({ + layout, + vertex: { + module: t.device.createShaderModule({ + code: ` + @vertex fn vs_main(@builtin(vertex_index) vIdx: u32) -> @builtin(position) vec4f { + // Map vIdx 0..3 to pixel centers. + // Uses a 4x1 texture. + // vIdx 0 -> pixel 0 -> x in [-1, -0.5] + // vIdx 1 -> pixel 1 -> x in [-0.5, 0] + let x = (f32(vIdx) + 0.5) / 2.0 - 1.0; + return vec4f(x, 0.0, 0.0, 1.0); + } + `, + }), + entryPoint: 'vs_main', + }, + fragment: { + module: t.device.createShaderModule({ code: fullCode }), + entryPoint: 'fs_main', + targets: [{ format: 'r32uint' }], + }, + primitive: { + topology: 'point-list', + }, + }); + } +} + +function executePass( + t: AllFeaturesMaxLimitsGPUTest, + encoderType: ProgrammableEncoderType, + commandEncoder: GPUCommandEncoder, + fn: (pass: GPURenderPassEncoder | GPUComputePassEncoder | GPURenderBundleEncoder) => void +) { + if (encoderType === 'compute pass') { + const pass = commandEncoder.beginComputePass(); + fn(pass); + pass.end(); + } else { + const renderTargetTexture = t.createTextureTracked({ + size: [4, 1, 1], + format: 'r32uint', + usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC, + }); + + if (encoderType === 'render pass') { + const pass = commandEncoder.beginRenderPass({ + colorAttachments: [ + { + view: renderTargetTexture.createView(), + loadOp: 'clear', + storeOp: 'store', + clearValue: { r: 0, g: 0, b: 0, a: 0 }, + }, + ], + }); + fn(pass); + pass.end(); + } else { + // Render Bundle + const bundleEncoder = t.device.createRenderBundleEncoder({ + colorFormats: ['r32uint'], + }); + fn(bundleEncoder); + const bundle = bundleEncoder.finish(); + + const pass = commandEncoder.beginRenderPass({ + colorAttachments: [ + { + view: renderTargetTexture.createView(), + loadOp: 'clear', + storeOp: 'store', + clearValue: { r: 0, g: 0, b: 0, a: 0 }, + }, + ], + }); + pass.executeBundles([bundle]); + pass.end(); + } + } +} + +function runAndCheck( + t: AllFeaturesMaxLimitsGPUTest, + encoderType: ProgrammableEncoderType, + pipeline: GPURenderPipeline | GPUComputePipeline, + setImmediatesFn: ( + encoder: GPUComputePassEncoder | GPURenderPassEncoder | GPURenderBundleEncoder + ) => void, + expectedValues: number[] +) { + const outputBuffer = t.createBufferTracked({ + size: 4 * 4, // 4 u32s + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); + const indexUniformBuffer = t.createBufferTracked({ + size: 256 * 4, // Enough for dynamic offsets + usage: GPUBufferUsage.UNIFORM, + mappedAtCreation: true, + }); + // Initialize indices 0, 1, 2, 3 at offsets 0, 256, 512, 768 + const indexData = new Uint32Array(indexUniformBuffer.getMappedRange()); + indexData[0] = 0; + indexData[256 / 4] = 1; + indexData[512 / 4] = 2; + indexData[768 / 4] = 3; + indexUniformBuffer.unmap(); + + const bindGroup = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: { buffer: outputBuffer } }, + { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, + ], + }); + + const commandEncoder = t.device.createCommandEncoder(); + executePass(t, encoderType, commandEncoder, encoder => { + if (encoderType === 'compute pass') { + const pass = encoder as GPUComputePassEncoder; + pass.setPipeline(pipeline as GPUComputePipeline); + pass.setBindGroup(0, bindGroup, [0]); // Index 0 + setImmediatesFn(pass); + pass.dispatchWorkgroups(1); + } else if (encoderType === 'render pass') { + const pass = encoder as GPURenderPassEncoder; + pass.setPipeline(pipeline as GPURenderPipeline); + pass.setBindGroup(0, bindGroup, [0]); + setImmediatesFn(pass); + pass.draw(1, 1, 0, 0); // Vertex 0 -> Pixel 0 + } else if (encoderType === 'render bundle') { + const bundleEncoder = encoder as GPURenderBundleEncoder; + bundleEncoder.setPipeline(pipeline as GPURenderPipeline); + bundleEncoder.setBindGroup(0, bindGroup, [0]); + setImmediatesFn(bundleEncoder); + bundleEncoder.draw(1, 1, 0, 0); + } else { + unreachable(); + } + }); + + t.device.queue.submit([commandEncoder.finish()]); + + t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expectedValues)); +} + +export const g = makeTestGroup(ImmediateDataOperationTest); + +g.test('basic_execution') + .desc('Verify immediate data is correctly passed to shaders.') + .params(u => + u.combine('encoderType', kProgrammableEncoderTypes).expandWithParams(function* () { + // Scalars + for (const s of HostSharableTypes) { + yield { dataType: s, scalarType: s, vectorSize: 1 }; + } + // Vectors + for (const v of kVectorContainerTypes) { + const size = parseInt(v[3]); + for (const s of HostSharableTypes) { + yield { dataType: `${v}<${s}>`, scalarType: s, vectorSize: size }; + } + } + // Struct + yield { dataType: 'struct', scalarType: undefined, vectorSize: undefined }; + }) + ) + .fn(t => { + const { encoderType, dataType, scalarType, vectorSize } = t.params; + t.skipIf(scalarType === 'f16', 'Immediate data blocks do not yet support f16 types'); + t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); + + let wgslDecl = ''; + let wgslUsage = ''; + let immediateSize = 0; + let expected: number[] = []; + let inputData: Uint32Array; + + if (dataType === 'struct') { + immediateSize = 8; + wgslDecl = ` + struct S { a: u32, b: u32 } + var data: S; + `; + wgslUsage = ` + @compute @workgroup_size(1) fn cs_main() { + output[0] = data.a; output[1] = data.b; + } + @fragment fn fs_main() -> @location(0) vec4u { + output[0] = data.a; output[1] = data.b; + return vec4u(0); + } + `; + inputData = new Uint32Array([0xdeadbeef, 0xcafebabe]); + expected = [0xdeadbeef, 0xcafebabe]; + } else { + // Non-struct types (scalar or vector) + const sType = scalarType as ScalarType; + const vSize = vectorSize as number; + + immediateSize = vSize * 4; + wgslDecl = `var data: ${dataType};`; + + let readCode = ''; + for (let i = 0; i < vSize; i++) { + let valExpr = vSize === 1 ? 'data' : `data[${i}]`; + if (sType === 'i32' || sType === 'f32') { + valExpr = `bitcast(${valExpr})`; + } + readCode += `output[${i}] = ${valExpr};`; + } + + wgslUsage = ` + @compute @workgroup_size(1) fn cs_main() { + ${readCode} + } + @fragment fn fs_main() -> @location(0) vec4u { + ${readCode} + return vec4u(0); + } + `; + + inputData = new Uint32Array(vSize); + for (let i = 0; i < vSize; i++) { + if (sType === 'u32') { + const val = 0x10000000 + i; + inputData[i] = val; + expected.push(val); + } else if (sType === 'i32') { + const val = -1000 - i; + inputData[i] = new Uint32Array(new Int32Array([val]).buffer)[0]; + expected.push(inputData[i]); + } else if (sType === 'f32') { + const val = 1.5 + i; + inputData[i] = new Uint32Array(new Float32Array([val]).buffer)[0]; + expected.push(inputData[i]); + } else { + unreachable(`Unhandled scalar type: ${sType}`); + } + } + } + + const pipeline = createPipeline(t, encoderType, wgslDecl, wgslUsage, immediateSize); + + runAndCheck( + t, + encoderType, + pipeline, + encoder => { + encoder.setImmediates!(0, inputData.buffer, 0, inputData.buffer.byteLength); + }, + expected + ); + }); + +g.test('update_data') + .desc('Verify setImmediates updates data correctly within a pass, including partial updates.') + .params(u => u.combine('encoderType', kProgrammableEncoderTypes)) + .fn(t => { + const { encoderType } = t.params; + t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); + const immediateSize = 16; + const wgslDecl = 'var data: vec4;'; + const wgslUsage = ` + @compute @workgroup_size(1) fn cs_main() { + let base = outIndex * 4; + output[base + 0] = data[0]; + output[base + 1] = data[1]; + output[base + 2] = data[2]; + output[base + 3] = data[3]; + } + @fragment fn fs_main() -> @location(0) vec4u { + let base = outIndex * 4; + output[base + 0] = data[0]; + output[base + 1] = data[1]; + output[base + 2] = data[2]; + output[base + 3] = data[3]; + return vec4u(0); + } + `; + + const pipeline = createPipeline(t, encoderType, wgslDecl, wgslUsage, immediateSize); + + const outputBuffer = t.createBufferTracked({ + size: 4 * 4 * 3, // 3 steps, 4 u32s each + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); + + const indexUniformBuffer = t.createBufferTracked({ + size: 256 * 3, + usage: GPUBufferUsage.UNIFORM, + mappedAtCreation: true, + }); + const indexData = new Uint32Array(indexUniformBuffer.getMappedRange()); + indexData[0] = 0; + indexData[256 / 4] = 1; + indexData[512 / 4] = 2; + indexUniformBuffer.unmap(); + + const bindGroup = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: { buffer: outputBuffer } }, + { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, + ], + }); + + const commandEncoder = t.device.createCommandEncoder(); + + const runStep = ( + pass: GPURenderPassEncoder | GPUComputePassEncoder | GPURenderBundleEncoder, + stepIndex: number, + data: Uint32Array, + dstOffset: number = 0 + ) => { + pass.setBindGroup(0, bindGroup, [stepIndex * 256]); + pass.setImmediates!(dstOffset, data, 0, data.length); + + if (encoderType === 'compute pass') { + (pass as GPUComputePassEncoder).dispatchWorkgroups(1); + } else if (encoderType === 'render pass') { + (pass as GPURenderPassEncoder).draw(1, 1, 0, 0); + } else { + (pass as GPURenderBundleEncoder).draw(1, 1, 0, 0); + } + }; + + executePass(t, encoderType, commandEncoder, enc => { + if (encoderType === 'compute pass') { + (enc as GPUComputePassEncoder).setPipeline(pipeline as GPUComputePipeline); + } else if (encoderType === 'render pass') { + (enc as GPURenderPassEncoder).setPipeline(pipeline as GPURenderPipeline); + } else { + (enc as GPURenderBundleEncoder).setPipeline(pipeline as GPURenderPipeline); + } + + // Step 1: Full set [1, 2, 3, 4] + runStep(enc, 0, new Uint32Array([1, 2, 3, 4])); + + // Step 2: Full update [5, 6, 7, 8] + runStep(enc, 1, new Uint32Array([5, 6, 7, 8])); + + // Step 3: Partial update offset 4 bytes (index 1) with [9, 10] -> [5, 9, 10, 8] + runStep(enc, 2, new Uint32Array([9, 10]), 4); + }); + + t.device.queue.submit([commandEncoder.finish()]); + + const expected = new Uint32Array([1, 2, 3, 4, 5, 6, 7, 8, 5, 9, 10, 8]); + t.expectGPUBufferValuesEqual(outputBuffer, expected); + }); + +g.test('pipeline_switch') + .desc( + `Verify immediate data is correctly set after switching pipelines. + - compatible: Both pipelines use the same immediateSize (same layout). + - incompatible: Pipelines use different immediateSize values (different layouts). + In both cases, immediates must be reset correctly between draws/dispatches.` + ) + .params(u => + u + .combine('encoderType', ['render pass', 'compute pass'] as const) + .combine('compatibility', ['compatible', 'incompatible'] as const) + ) + .fn(t => { + const { encoderType, compatibility } = t.params; + t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); + + // Pipeline A always uses vec4 (16 bytes). + const wgslDeclA = 'var data: vec4;'; + const wgslUsageA = ` + @compute @workgroup_size(1) fn cs_main() { + let base = outIndex * 4; + output[base] = data.x; output[base+1] = data.y; output[base+2] = data.z; output[base+3] = data.w; + } + @fragment fn fs_main() -> @location(0) vec4u { + let base = outIndex * 4; + output[base] = data.x; output[base+1] = data.y; output[base+2] = data.z; output[base+3] = data.w; + return vec4u(0); + } + `; + + let wgslDeclB: string; + let wgslUsageB: string; + let immediateSizeB: number; + + if (compatibility === 'compatible') { + // Pipeline B has the same immediate layout as A (vec4, 16 bytes). + wgslDeclB = wgslDeclA; + wgslUsageB = wgslUsageA; + immediateSizeB = 16; + } else { + // Pipeline B uses vec2 (8 bytes) — different/incompatible layout. + wgslDeclB = 'var data: vec2;'; + wgslUsageB = ` + @compute @workgroup_size(1) fn cs_main() { + let base = outIndex * 4; + output[base] = data.x; output[base+1] = data.y; output[base+2] = 0u; output[base+3] = 0u; + } + @fragment fn fs_main() -> @location(0) vec4u { + let base = outIndex * 4; + output[base] = data.x; output[base+1] = data.y; output[base+2] = 0u; output[base+3] = 0u; + return vec4u(0); + } + `; + immediateSizeB = 8; + } + + const pipelineA = createPipeline(t, encoderType, wgslDeclA, wgslUsageA, 16); + const pipelineB = createPipeline(t, encoderType, wgslDeclB, wgslUsageB, immediateSizeB); + + const outputBuffer = t.createBufferTracked({ + size: 32, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); + const indexUniformBuffer = t.createBufferTracked({ + size: 512, + usage: GPUBufferUsage.UNIFORM, + mappedAtCreation: true, + }); + const indexData = new Uint32Array(indexUniformBuffer.getMappedRange()); + indexData[0] = 0; + indexData[256 / 4] = 1; + indexUniformBuffer.unmap(); + + const bindGroupA = t.device.createBindGroup({ + layout: pipelineA.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: { buffer: outputBuffer } }, + { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, + ], + }); + const bindGroupB = t.device.createBindGroup({ + layout: pipelineB.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: { buffer: outputBuffer } }, + { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, + ], + }); + + const commandEncoder = t.device.createCommandEncoder(); + + if (compatibility === 'compatible') { + executePass(t, encoderType, commandEncoder, enc => { + if (encoderType === 'compute pass') { + const pass = enc as GPUComputePassEncoder; + // 1. Set Pipeline A, set immediates [1, 2, 3, 4], dispatch + pass.setPipeline(pipelineA as GPUComputePipeline); + pass.setBindGroup(0, bindGroupA, [0]); + pass.setImmediates!(0, new Uint32Array([1, 2, 3, 4]), 0, 4); + pass.dispatchWorkgroups(1); + + // 2. Switch to Pipeline B (compatible), set new immediates [5, 6, 7, 8], dispatch + pass.setPipeline(pipelineB as GPUComputePipeline); + pass.setBindGroup(0, bindGroupB, [256]); + pass.setImmediates!(0, new Uint32Array([5, 6, 7, 8]), 0, 4); + pass.dispatchWorkgroups(1); + } else { + const pass = enc as GPURenderPassEncoder; + // 1. Set Pipeline A, set immediates [1, 2, 3, 4], draw + pass.setPipeline(pipelineA as GPURenderPipeline); + pass.setBindGroup(0, bindGroupA, [0]); + pass.setImmediates!(0, new Uint32Array([1, 2, 3, 4]), 0, 4); + pass.draw(1, 1, 0, 0); + + // 2. Switch to Pipeline B (compatible), set new immediates [5, 6, 7, 8], draw + pass.setPipeline(pipelineB as GPURenderPipeline); + pass.setBindGroup(0, bindGroupB, [256]); + pass.setImmediates!(0, new Uint32Array([5, 6, 7, 8]), 0, 4); + pass.draw(1, 1, 1, 0); + } + }); + + t.device.queue.submit([commandEncoder.finish()]); + + const expected = new Uint32Array([1, 2, 3, 4, 5, 6, 7, 8]); + t.expectGPUBufferValuesEqual(outputBuffer, expected); + } else { + // Incompatible: Pipeline A uses 16 bytes, Pipeline B uses 8 bytes. + executePass(t, encoderType, commandEncoder, enc => { + if (encoderType === 'compute pass') { + const pass = enc as GPUComputePassEncoder; + // 1. Set Pipeline A (16 bytes), set immediates [1, 2, 3, 4], dispatch + pass.setPipeline(pipelineA as GPUComputePipeline); + pass.setBindGroup(0, bindGroupA, [0]); + pass.setImmediates!(0, new Uint32Array([1, 2, 3, 4]), 0, 4); + pass.dispatchWorkgroups(1); + + // 2. Switch to Pipeline B (8 bytes), set new immediates [5, 6], dispatch + pass.setPipeline(pipelineB as GPUComputePipeline); + pass.setBindGroup(0, bindGroupB, [256]); + pass.setImmediates!(0, new Uint32Array([5, 6]), 0, 2); + pass.dispatchWorkgroups(1); + } else { + const pass = enc as GPURenderPassEncoder; + // 1. Set Pipeline A (16 bytes), set immediates [1, 2, 3, 4], draw + pass.setPipeline(pipelineA as GPURenderPipeline); + pass.setBindGroup(0, bindGroupA, [0]); + pass.setImmediates!(0, new Uint32Array([1, 2, 3, 4]), 0, 4); + pass.draw(1, 1, 0, 0); + + // 2. Switch to Pipeline B (8 bytes), set new immediates [5, 6], draw + pass.setPipeline(pipelineB as GPURenderPipeline); + pass.setBindGroup(0, bindGroupB, [256]); + pass.setImmediates!(0, new Uint32Array([5, 6]), 0, 2); + pass.draw(1, 1, 1, 0); + } + }); + + t.device.queue.submit([commandEncoder.finish()]); + + // Pipeline A wrote [1, 2, 3, 4]; Pipeline B wrote [5, 6, 0, 0] (only 2 fields, rest zeroed). + const expected = new Uint32Array([1, 2, 3, 4, 5, 6, 0, 0]); + t.expectGPUBufferValuesEqual(outputBuffer, expected); + } + }); + +g.test('use_max_immediate_size') + .desc('Verify setImmediates with maxImmediateSize.') + .params(u => u.combine('encoderType', kProgrammableEncoderTypes)) + .fn(t => { + const { encoderType } = t.params; + t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); + assert(t.device.limits.maxImmediateSize !== undefined); + const maxImmediateSize = t.device.limits.maxImmediateSize; + // Create a pipeline that reads the first and last u32 of the immediate data + const count = maxImmediateSize / 4; + const members: string[] = []; + for (let i = 0; i < count; i++) { + members.push(`m${i}: u32`); + } + const wgslDecl = `struct Large { ${members.join(', ')} } var data: Large;`; + const wgslUsage = ` + @compute @workgroup_size(1) fn cs_main() { + output[0] = data.m0; + output[1] = data.m${count - 1}; + } + @fragment fn fs_main() -> @location(0) vec4u { + output[0] = data.m0; + output[1] = data.m${count - 1}; + return vec4u(0); + } + `; + + const pipeline = createPipeline(t, encoderType, wgslDecl, wgslUsage, maxImmediateSize); + + const outputBuffer = t.createBufferTracked({ + size: 8, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); + const indexUniformBuffer = t.makeBufferWithContents( + new Uint32Array(256 / 4), + GPUBufferUsage.UNIFORM + ); + + const bindGroup = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: { buffer: outputBuffer } }, + { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, + ], + }); + + const commandEncoder = t.device.createCommandEncoder(); + executePass(t, encoderType, commandEncoder, enc => { + const data = new Uint32Array(count); + data[0] = 0xdeadbeef; + data[count - 1] = 0xcafebabe; + + if (encoderType === 'compute pass') { + const pass = enc as GPUComputePassEncoder; + pass.setPipeline(pipeline as GPUComputePipeline); + pass.setBindGroup(0, bindGroup, [0]); + pass.setImmediates!(0, data, 0, count); + pass.dispatchWorkgroups(1); + } else { + const pass = enc as GPURenderPassEncoder | GPURenderBundleEncoder; + pass.setPipeline(pipeline as GPURenderPipeline); + pass.setBindGroup(0, bindGroup, [0]); + pass.setImmediates!(0, data, 0, count); + pass.draw(1, 1, 0, 0); + } + }); + + t.device.queue.submit([commandEncoder.finish()]); + t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array([0xdeadbeef, 0xcafebabe])); + }); + +g.test('typed_array_arguments') + .desc('Verify srcOffset and srcSize are in elements for TypedArrays.') + .params(u => + u + .combine('typedArray', kTypedArrayBufferViewKeys) + .combine('encoderType', kProgrammableEncoderTypes) + ) + .fn(t => { + const { typedArray, encoderType } = t.params; + t.skipIf(typedArray === 'Float16Array', 'TODO(#4297): Float16Array not yet supported'); + t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); + const Ctor = kTypedArrayBufferViews[typedArray]; + const elementSize = Ctor.BYTES_PER_ELEMENT; + + // Write a known pattern. + // Use a buffer of 8 bytes. + // Pattern: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 + const bytePattern = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]; + + // Create a large buffer to test offset. + // Use offset = 2 elements. + // Need enough data. + // Construct the TypedArray such that at index `offset`, the pattern exists. + const offset = 2; + const elementCount = 8 / elementSize; // Write 8 bytes. + + // Total elements needed: offset + elementCount + padding + const totalElements = offset + elementCount + 2; + const arr = new Ctor(totalElements); + + // Fill with non-zero value. + const fillView = new Uint8Array(arr.buffer); + fillView.fill(0xaa); + + // The bytes at `arr[offset]...` should match `bytePattern`. + // Use a DataView on the array's buffer to set the bytes. + const buffer = arr.buffer; + const byteOffset = arr.byteOffset + offset * elementSize; + const view = new DataView(buffer); + + for (let i = 0; i < 8; i++) { + view.setUint8(byteOffset + i, bytePattern[i]); + } + + // Now `arr` contains the pattern at `offset`. + + // Shader: read 2 u32s (8 bytes). + const wgslDecl = 'var data: array;'; + const wgslUsage = ` + @compute @workgroup_size(1) fn cs_main() { + output[0] = data[0]; + output[1] = data[1]; + } + @fragment fn fs_main() -> @location(0) vec4u { + output[0] = data[0]; + output[1] = data[1]; + return vec4u(0); + } + `; + + const pipeline = createPipeline(t, encoderType, wgslDecl, wgslUsage, 8); + + const outputBuffer = t.createBufferTracked({ + size: 8, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); + const indexUniformBuffer = t.makeBufferWithContents( + new Uint32Array(256 / 4), + GPUBufferUsage.UNIFORM + ); + + const bindGroup = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: { buffer: outputBuffer } }, + { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, + ], + }); + + const commandEncoder = t.device.createCommandEncoder(); + executePass(t, encoderType, commandEncoder, enc => { + if (encoderType === 'compute pass') { + const pass = enc as GPUComputePassEncoder; + pass.setPipeline(pipeline as GPUComputePipeline); + pass.setBindGroup(0, bindGroup, [0]); + pass.setImmediates!(0, arr, offset, elementCount); + pass.dispatchWorkgroups(1); + } else { + const pass = enc as GPURenderPassEncoder | GPURenderBundleEncoder; + pass.setPipeline(pipeline as GPURenderPipeline); + pass.setBindGroup(0, bindGroup, [0]); + pass.setImmediates!(0, arr, offset, elementCount); + pass.draw(1, 1, 0, 0); + } + }); + + t.device.queue.submit([commandEncoder.finish()]); + + // Expected: 0x04030201, 0x08070605 (Little Endian) + const expected = new Uint32Array([0x04030201, 0x08070605]); + t.expectGPUBufferValuesEqual(outputBuffer, expected); + }); + +g.test('multiple_updates_before_draw_or_dispatch') + .desc( + 'Verify that multiple setImmediates calls before a draw or dispatch result in the latest content being used (merging updates).' + ) + .params(u => u.combine('encoderType', kProgrammableEncoderTypes)) + .fn(t => { + const { encoderType } = t.params; + t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); + // Use vec4 to allow partial updates. + const wgslDecl = 'var data: vec4;'; + const wgslUsage = ` + @compute @workgroup_size(1) fn cs_main() { + output[0] = data.x; output[1] = data.y; output[2] = data.z; output[3] = data.w; + } + @fragment fn fs_main() -> @location(0) vec4u { + output[0] = data.x; output[1] = data.y; output[2] = data.z; output[3] = data.w; + return vec4u(0); + } + `; + const pipeline = createPipeline(t, encoderType, wgslDecl, wgslUsage, 16); + + runAndCheck( + t, + encoderType, + pipeline, + encoder => { + // 1. Set all to [1, 2, 3, 4] + encoder.setImmediates!(0, new Uint32Array([1, 2, 3, 4]), 0, 4); + // 2. Update middle two to [5, 6] -> [1, 5, 6, 4] + encoder.setImmediates!(4, new Uint32Array([5, 6]), 0, 2); + // 3. Update last to [7] -> [1, 5, 6, 7] + encoder.setImmediates!(12, new Uint32Array([7]), 0, 1); + }, + [1, 5, 6, 7] + ); + }); + +g.test('render_pass_and_bundle_mix') + .desc('Verify interaction between executeBundles and direct render pass commands.') + .fn(t => { + t.skipIfStorageBuffersInFragmentStageNotAvailable('render pass'); + const wgslDecl = 'var data: vec2;'; + const wgslUsage = ` + @fragment fn fs_main() -> @location(0) vec4u { + let base = outIndex * 2; + output[base] = data.x; + output[base + 1] = data.y; + return vec4u(0); + } + `; + // Use 'render pass' type to create the pipeline, but it works for bundle too. + // Immediate size: vec2 = 2 * 4 bytes = 8 bytes. + const pipeline = createPipeline(t, 'render pass', wgslDecl, wgslUsage, 8) as GPURenderPipeline; + + const outputBuffer = t.createBufferTracked({ + size: 16, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); + const indexUniformBuffer = t.createBufferTracked({ + size: 512, + usage: GPUBufferUsage.UNIFORM, + mappedAtCreation: true, + }); + const idxData = new Uint32Array(indexUniformBuffer.getMappedRange()); + idxData[0] = 0; + idxData[256 / 4] = 1; + indexUniformBuffer.unmap(); + + const bindGroup = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: { buffer: outputBuffer } }, + { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, + ], + }); + + // Bundle: Set [1, 10], Draw (Index 0) + const bundleEncoder = t.device.createRenderBundleEncoder({ colorFormats: ['r32uint'] }); + bundleEncoder.setPipeline(pipeline); + bundleEncoder.setBindGroup(0, bindGroup, [0]); + bundleEncoder.setImmediates!(0, new Uint32Array([1, 10]), 0, 2); + bundleEncoder.draw(1, 1, 0, 0); + const bundle = bundleEncoder.finish(); + + const renderTargetTexture = t.createTextureTracked({ + size: [4, 1, 1], + format: 'r32uint', + usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC, + }); + const commandEncoder = t.device.createCommandEncoder(); + const pass = commandEncoder.beginRenderPass({ + colorAttachments: [ + { + view: renderTargetTexture.createView(), + loadOp: 'clear', + storeOp: 'store', + clearValue: { r: 0, g: 0, b: 0, a: 0 }, + }, + ], + }); + + // Execute Bundle + pass.executeBundles([bundle]); + + // Pass: Set [2, 20], Draw (Index 1) + pass.setPipeline(pipeline); + pass.setBindGroup(0, bindGroup, [256]); + pass.setImmediates!(0, new Uint32Array([2, 20]), 0, 2); + pass.draw(1, 1, 1, 0); + + pass.end(); + t.device.queue.submit([commandEncoder.finish()]); + + t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array([1, 10, 2, 20])); + }); + +g.test('render_bundle_isolation') + .desc('Verify that immediate data state is isolated between bundles executed in the same pass.') + .fn(t => { + t.skipIfStorageBuffersInFragmentStageNotAvailable('render bundle'); + const wgslDecl = 'var data: vec2;'; + const wgslUsage = ` + @fragment fn fs_main() -> @location(0) vec4u { + let base = outIndex * 2; + output[base] = data.x; + output[base + 1] = data.y; + return vec4u(0); + } + `; + const pipeline = createPipeline(t, 'render pass', wgslDecl, wgslUsage, 8) as GPURenderPipeline; + + const outputBuffer = t.createBufferTracked({ + size: 16, // 2 draws * 2 u32s * 4 bytes + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); + const indexUniformBuffer = t.createBufferTracked({ + size: 512, + usage: GPUBufferUsage.UNIFORM, + mappedAtCreation: true, + }); + const idxData = new Uint32Array(indexUniformBuffer.getMappedRange()); + idxData[0] = 0; + idxData[256 / 4] = 1; + indexUniformBuffer.unmap(); + + const bindGroup = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: { buffer: outputBuffer } }, + { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, + ], + }); + + // Bundle A: Set [1, 2], Draw (Index 0) + const bundleEncoderA = t.device.createRenderBundleEncoder({ colorFormats: ['r32uint'] }); + bundleEncoderA.setPipeline(pipeline); + bundleEncoderA.setBindGroup(0, bindGroup, [0]); + bundleEncoderA.setImmediates!(0, new Uint32Array([1, 2]), 0, 2); + bundleEncoderA.draw(1, 1, 0, 0); + const bundleA = bundleEncoderA.finish(); + + // Bundle B: Set [3, 4], Draw (Index 1) + const bundleEncoderB = t.device.createRenderBundleEncoder({ colorFormats: ['r32uint'] }); + bundleEncoderB.setPipeline(pipeline); + bundleEncoderB.setBindGroup(0, bindGroup, [256]); + bundleEncoderB.setImmediates!(0, new Uint32Array([3, 4]), 0, 2); + bundleEncoderB.draw(1, 1, 1, 0); + const bundleB = bundleEncoderB.finish(); + + const renderTargetTexture = t.createTextureTracked({ + size: [4, 1, 1], + format: 'r32uint', + usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC, + }); + const commandEncoder = t.device.createCommandEncoder(); + const pass = commandEncoder.beginRenderPass({ + colorAttachments: [ + { + view: renderTargetTexture.createView(), + loadOp: 'clear', + storeOp: 'store', + clearValue: { r: 0, g: 0, b: 0, a: 0 }, + }, + ], + }); + + // Execute Bundles + pass.executeBundles([bundleA, bundleB]); + + pass.end(); + t.device.queue.submit([commandEncoder.finish()]); + + t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array([1, 2, 3, 4])); + }); From 079a9cd5d89915d838819b2206d409d02bb448cb Mon Sep 17 00:00:00 2001 From: shaoboyan Date: Mon, 9 Mar 2026 16:41:10 +0800 Subject: [PATCH 2/5] Address comments --- .../programmable/immediate.spec.ts | 481 ++++++------------ 1 file changed, 165 insertions(+), 316 deletions(-) diff --git a/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts b/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts index a2bda7c7bf9e..3f50558b3df1 100644 --- a/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts +++ b/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts @@ -46,7 +46,7 @@ function createPipeline( t: AllFeaturesMaxLimitsGPUTest, encoderType: ProgrammableEncoderType, wgslDecl: string, - wgslUsage: string, + copyCode: string, immediateSize: number ) { const layout = t.device.createPipelineLayout({ @@ -74,7 +74,13 @@ function createPipeline( @group(0) @binding(0) var output: array; @group(0) @binding(1) var outIndex: u32; - ${wgslUsage} + @compute @workgroup_size(1) fn cs_main() { + ${copyCode} + } + @fragment fn fs_main() -> @location(0) vec4u { + ${copyCode} + return vec4u(0); + } `; if (encoderType === 'compute pass') { @@ -82,7 +88,6 @@ function createPipeline( layout, compute: { module: t.device.createShaderModule({ code: fullCode }), - entryPoint: 'cs_main', }, }); } else { @@ -92,20 +97,16 @@ function createPipeline( module: t.device.createShaderModule({ code: ` @vertex fn vs_main(@builtin(vertex_index) vIdx: u32) -> @builtin(position) vec4f { - // Map vIdx 0..3 to pixel centers. - // Uses a 4x1 texture. - // vIdx 0 -> pixel 0 -> x in [-1, -0.5] - // vIdx 1 -> pixel 1 -> x in [-0.5, 0] + // Map vIdx 0..3 to pixel centers in a 4x1 render target. + // firstVertex selects which pixel to render to. let x = (f32(vIdx) + 0.5) / 2.0 - 1.0; return vec4f(x, 0.0, 0.0, 1.0); } `, }), - entryPoint: 'vs_main', }, fragment: { module: t.device.createShaderModule({ code: fullCode }), - entryPoint: 'fs_main', targets: [{ format: 'r32uint' }], }, primitive: { @@ -115,7 +116,35 @@ function createPipeline( } } -function executePass( +/** Dispatch or draw based on encoder type. firstVertex selects the output pixel for render passes. */ +function dispatchOrDraw( + encoderType: ProgrammableEncoderType, + encoder: GPUComputePassEncoder | GPURenderPassEncoder | GPURenderBundleEncoder, + firstVertex: number = 0 +) { + if (encoderType === 'compute pass') { + (encoder as GPUComputePassEncoder).dispatchWorkgroups(1); + } else { + (encoder as GPURenderPassEncoder | GPURenderBundleEncoder).draw(1, 1, firstVertex, 0); + } +} + +/** Create a uniform buffer with output indices at 256-byte aligned offsets for dynamic binding. */ +function createOutputIndexBuffer(t: AllFeaturesMaxLimitsGPUTest, count: number): GPUBuffer { + const buffer = t.createBufferTracked({ + size: 256 * count, + usage: GPUBufferUsage.UNIFORM, + mappedAtCreation: true, + }); + const data = new Uint32Array(buffer.getMappedRange()); + for (let i = 0; i < count; i++) { + data[(i * 256) / 4] = i; + } + buffer.unmap(); + return buffer; +} + +function encodeForPassType( t: AllFeaturesMaxLimitsGPUTest, encoderType: ProgrammableEncoderType, commandEncoder: GPUCommandEncoder, @@ -179,21 +208,11 @@ function runAndCheck( expectedValues: number[] ) { const outputBuffer = t.createBufferTracked({ - size: 4 * 4, // 4 u32s + size: expectedValues.length * 4, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, }); - const indexUniformBuffer = t.createBufferTracked({ - size: 256 * 4, // Enough for dynamic offsets - usage: GPUBufferUsage.UNIFORM, - mappedAtCreation: true, - }); - // Initialize indices 0, 1, 2, 3 at offsets 0, 256, 512, 768 - const indexData = new Uint32Array(indexUniformBuffer.getMappedRange()); - indexData[0] = 0; - indexData[256 / 4] = 1; - indexData[512 / 4] = 2; - indexData[768 / 4] = 3; - indexUniformBuffer.unmap(); + // Simple uniform for outIndex = 0. Dynamic offset is always [0]. + const indexUniformBuffer = t.makeBufferWithContents(new Uint32Array([0]), GPUBufferUsage.UNIFORM); const bindGroup = t.device.createBindGroup({ layout: pipeline.getBindGroupLayout(0), @@ -204,28 +223,17 @@ function runAndCheck( }); const commandEncoder = t.device.createCommandEncoder(); - executePass(t, encoderType, commandEncoder, encoder => { + encodeForPassType(t, encoderType, commandEncoder, encoder => { if (encoderType === 'compute pass') { - const pass = encoder as GPUComputePassEncoder; - pass.setPipeline(pipeline as GPUComputePipeline); - pass.setBindGroup(0, bindGroup, [0]); // Index 0 - setImmediatesFn(pass); - pass.dispatchWorkgroups(1); - } else if (encoderType === 'render pass') { - const pass = encoder as GPURenderPassEncoder; - pass.setPipeline(pipeline as GPURenderPipeline); - pass.setBindGroup(0, bindGroup, [0]); - setImmediatesFn(pass); - pass.draw(1, 1, 0, 0); // Vertex 0 -> Pixel 0 - } else if (encoderType === 'render bundle') { - const bundleEncoder = encoder as GPURenderBundleEncoder; - bundleEncoder.setPipeline(pipeline as GPURenderPipeline); - bundleEncoder.setBindGroup(0, bindGroup, [0]); - setImmediatesFn(bundleEncoder); - bundleEncoder.draw(1, 1, 0, 0); + (encoder as GPUComputePassEncoder).setPipeline(pipeline as GPUComputePipeline); } else { - unreachable(); + (encoder as GPURenderPassEncoder | GPURenderBundleEncoder).setPipeline( + pipeline as GPURenderPipeline + ); } + encoder.setBindGroup(0, bindGroup, [0]); + setImmediatesFn(encoder); + dispatchOrDraw(encoderType, encoder); }); t.device.queue.submit([commandEncoder.finish()]); @@ -260,7 +268,7 @@ g.test('basic_execution') t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); let wgslDecl = ''; - let wgslUsage = ''; + let copyCode = ''; let immediateSize = 0; let expected: number[] = []; let inputData: Uint32Array; @@ -271,15 +279,7 @@ g.test('basic_execution') struct S { a: u32, b: u32 } var data: S; `; - wgslUsage = ` - @compute @workgroup_size(1) fn cs_main() { - output[0] = data.a; output[1] = data.b; - } - @fragment fn fs_main() -> @location(0) vec4u { - output[0] = data.a; output[1] = data.b; - return vec4u(0); - } - `; + copyCode = 'output[0] = data.a; output[1] = data.b;'; inputData = new Uint32Array([0xdeadbeef, 0xcafebabe]); expected = [0xdeadbeef, 0xcafebabe]; } else { @@ -290,24 +290,13 @@ g.test('basic_execution') immediateSize = vSize * 4; wgslDecl = `var data: ${dataType};`; + // bitcast is identity for u32, so we can use it unconditionally. let readCode = ''; for (let i = 0; i < vSize; i++) { - let valExpr = vSize === 1 ? 'data' : `data[${i}]`; - if (sType === 'i32' || sType === 'f32') { - valExpr = `bitcast(${valExpr})`; - } - readCode += `output[${i}] = ${valExpr};`; + const valExpr = vSize === 1 ? 'data' : `data[${i}]`; + readCode += `output[${i}] = bitcast(${valExpr});\n`; } - - wgslUsage = ` - @compute @workgroup_size(1) fn cs_main() { - ${readCode} - } - @fragment fn fs_main() -> @location(0) vec4u { - ${readCode} - return vec4u(0); - } - `; + copyCode = readCode; inputData = new Uint32Array(vSize); for (let i = 0; i < vSize; i++) { @@ -329,14 +318,14 @@ g.test('basic_execution') } } - const pipeline = createPipeline(t, encoderType, wgslDecl, wgslUsage, immediateSize); + const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, immediateSize); runAndCheck( t, encoderType, pipeline, encoder => { - encoder.setImmediates!(0, inputData.buffer, 0, inputData.buffer.byteLength); + encoder.setImmediates!(0, inputData, 0, inputData.length); }, expected ); @@ -350,41 +339,22 @@ g.test('update_data') t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); const immediateSize = 16; const wgslDecl = 'var data: vec4;'; - const wgslUsage = ` - @compute @workgroup_size(1) fn cs_main() { - let base = outIndex * 4; - output[base + 0] = data[0]; - output[base + 1] = data[1]; - output[base + 2] = data[2]; - output[base + 3] = data[3]; - } - @fragment fn fs_main() -> @location(0) vec4u { - let base = outIndex * 4; - output[base + 0] = data[0]; - output[base + 1] = data[1]; - output[base + 2] = data[2]; - output[base + 3] = data[3]; - return vec4u(0); - } + const copyCode = ` + let base = outIndex * 4; + output[base + 0] = data[0]; + output[base + 1] = data[1]; + output[base + 2] = data[2]; + output[base + 3] = data[3]; `; - const pipeline = createPipeline(t, encoderType, wgslDecl, wgslUsage, immediateSize); + const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, immediateSize); const outputBuffer = t.createBufferTracked({ size: 4 * 4 * 3, // 3 steps, 4 u32s each usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, }); - const indexUniformBuffer = t.createBufferTracked({ - size: 256 * 3, - usage: GPUBufferUsage.UNIFORM, - mappedAtCreation: true, - }); - const indexData = new Uint32Array(indexUniformBuffer.getMappedRange()); - indexData[0] = 0; - indexData[256 / 4] = 1; - indexData[512 / 4] = 2; - indexUniformBuffer.unmap(); + const indexUniformBuffer = createOutputIndexBuffer(t, 3); const bindGroup = t.device.createBindGroup({ layout: pipeline.getBindGroupLayout(0), @@ -396,6 +366,7 @@ g.test('update_data') const commandEncoder = t.device.createCommandEncoder(); + /** Set bind group with dynamic offset for output index, set immediates, and dispatch/draw. */ const runStep = ( pass: GPURenderPassEncoder | GPUComputePassEncoder | GPURenderBundleEncoder, stepIndex: number, @@ -404,23 +375,16 @@ g.test('update_data') ) => { pass.setBindGroup(0, bindGroup, [stepIndex * 256]); pass.setImmediates!(dstOffset, data, 0, data.length); - - if (encoderType === 'compute pass') { - (pass as GPUComputePassEncoder).dispatchWorkgroups(1); - } else if (encoderType === 'render pass') { - (pass as GPURenderPassEncoder).draw(1, 1, 0, 0); - } else { - (pass as GPURenderBundleEncoder).draw(1, 1, 0, 0); - } + dispatchOrDraw(encoderType, pass); }; - executePass(t, encoderType, commandEncoder, enc => { + encodeForPassType(t, encoderType, commandEncoder, enc => { if (encoderType === 'compute pass') { (enc as GPUComputePassEncoder).setPipeline(pipeline as GPUComputePipeline); - } else if (encoderType === 'render pass') { - (enc as GPURenderPassEncoder).setPipeline(pipeline as GPURenderPipeline); } else { - (enc as GPURenderBundleEncoder).setPipeline(pipeline as GPURenderPipeline); + (enc as GPURenderPassEncoder | GPURenderBundleEncoder).setPipeline( + pipeline as GPURenderPipeline + ); } // Step 1: Full set [1, 2, 3, 4] @@ -442,166 +406,100 @@ g.test('update_data') g.test('pipeline_switch') .desc( `Verify immediate data is correctly set after switching pipelines. - - compatible: Both pipelines use the same immediateSize (same layout). - - incompatible: Pipelines use different immediateSize values (different layouts). - In both cases, immediates must be reset correctly between draws/dispatches.` + - sameImmediateSize=true: Both pipelines use the same immediateSize. + - sameImmediateSize=false: Pipelines use different immediateSize values. + In both cases, immediates must be set correctly between draws/dispatches.` ) .params(u => u .combine('encoderType', ['render pass', 'compute pass'] as const) - .combine('compatibility', ['compatible', 'incompatible'] as const) + .combine('sameImmediateSize', [true, false] as const) ) .fn(t => { - const { encoderType, compatibility } = t.params; + const { encoderType, sameImmediateSize } = t.params; t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); // Pipeline A always uses vec4 (16 bytes). const wgslDeclA = 'var data: vec4;'; - const wgslUsageA = ` - @compute @workgroup_size(1) fn cs_main() { - let base = outIndex * 4; - output[base] = data.x; output[base+1] = data.y; output[base+2] = data.z; output[base+3] = data.w; - } - @fragment fn fs_main() -> @location(0) vec4u { - let base = outIndex * 4; - output[base] = data.x; output[base+1] = data.y; output[base+2] = data.z; output[base+3] = data.w; - return vec4u(0); - } - `; + const copyCodeA = ` + let base = outIndex * 4; + output[base] = data.x; output[base+1] = data.y; output[base+2] = data.z; output[base+3] = data.w; + `; let wgslDeclB: string; - let wgslUsageB: string; + let copyCodeB: string; let immediateSizeB: number; - if (compatibility === 'compatible') { + if (sameImmediateSize) { // Pipeline B has the same immediate layout as A (vec4, 16 bytes). wgslDeclB = wgslDeclA; - wgslUsageB = wgslUsageA; + copyCodeB = copyCodeA; immediateSizeB = 16; } else { // Pipeline B uses vec2 (8 bytes) — different/incompatible layout. wgslDeclB = 'var data: vec2;'; - wgslUsageB = ` - @compute @workgroup_size(1) fn cs_main() { - let base = outIndex * 4; - output[base] = data.x; output[base+1] = data.y; output[base+2] = 0u; output[base+3] = 0u; - } - @fragment fn fs_main() -> @location(0) vec4u { - let base = outIndex * 4; - output[base] = data.x; output[base+1] = data.y; output[base+2] = 0u; output[base+3] = 0u; - return vec4u(0); - } - `; + copyCodeB = ` + let base = outIndex * 4; + output[base] = data.x; output[base+1] = data.y; output[base+2] = 0u; output[base+3] = 0u; + `; immediateSizeB = 8; } - const pipelineA = createPipeline(t, encoderType, wgslDeclA, wgslUsageA, 16); - const pipelineB = createPipeline(t, encoderType, wgslDeclB, wgslUsageB, immediateSizeB); + const pipelineA = createPipeline(t, encoderType, wgslDeclA, copyCodeA, 16); + const pipelineB = createPipeline(t, encoderType, wgslDeclB, copyCodeB, immediateSizeB); const outputBuffer = t.createBufferTracked({ size: 32, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, }); - const indexUniformBuffer = t.createBufferTracked({ - size: 512, - usage: GPUBufferUsage.UNIFORM, - mappedAtCreation: true, - }); - const indexData = new Uint32Array(indexUniformBuffer.getMappedRange()); - indexData[0] = 0; - indexData[256 / 4] = 1; - indexUniformBuffer.unmap(); + const indexUniformBuffer = createOutputIndexBuffer(t, 2); - const bindGroupA = t.device.createBindGroup({ + // Use a single bind group. Both pipelines have the same bind group layout, + // so the same bind group is compatible with both. This verifies that + // previously bound descriptor sets remain valid after a pipeline switch + // (important for Vulkan correctness). + const bindGroup = t.device.createBindGroup({ layout: pipelineA.getBindGroupLayout(0), entries: [ { binding: 0, resource: { buffer: outputBuffer } }, { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, ], }); - const bindGroupB = t.device.createBindGroup({ - layout: pipelineB.getBindGroupLayout(0), - entries: [ - { binding: 0, resource: { buffer: outputBuffer } }, - { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, - ], - }); - const commandEncoder = t.device.createCommandEncoder(); + const immA = new Uint32Array([1, 2, 3, 4]); + const immB = sameImmediateSize ? new Uint32Array([1, 2, 3, 4]) : new Uint32Array([1, 2]); - if (compatibility === 'compatible') { - executePass(t, encoderType, commandEncoder, enc => { - if (encoderType === 'compute pass') { - const pass = enc as GPUComputePassEncoder; - // 1. Set Pipeline A, set immediates [1, 2, 3, 4], dispatch - pass.setPipeline(pipelineA as GPUComputePipeline); - pass.setBindGroup(0, bindGroupA, [0]); - pass.setImmediates!(0, new Uint32Array([1, 2, 3, 4]), 0, 4); - pass.dispatchWorkgroups(1); - - // 2. Switch to Pipeline B (compatible), set new immediates [5, 6, 7, 8], dispatch - pass.setPipeline(pipelineB as GPUComputePipeline); - pass.setBindGroup(0, bindGroupB, [256]); - pass.setImmediates!(0, new Uint32Array([5, 6, 7, 8]), 0, 4); - pass.dispatchWorkgroups(1); - } else { - const pass = enc as GPURenderPassEncoder; - // 1. Set Pipeline A, set immediates [1, 2, 3, 4], draw - pass.setPipeline(pipelineA as GPURenderPipeline); - pass.setBindGroup(0, bindGroupA, [0]); - pass.setImmediates!(0, new Uint32Array([1, 2, 3, 4]), 0, 4); - pass.draw(1, 1, 0, 0); - - // 2. Switch to Pipeline B (compatible), set new immediates [5, 6, 7, 8], draw - pass.setPipeline(pipelineB as GPURenderPipeline); - pass.setBindGroup(0, bindGroupB, [256]); - pass.setImmediates!(0, new Uint32Array([5, 6, 7, 8]), 0, 4); - pass.draw(1, 1, 1, 0); - } - }); - - t.device.queue.submit([commandEncoder.finish()]); + const commandEncoder = t.device.createCommandEncoder(); + encodeForPassType(t, encoderType, commandEncoder, enc => { + // Pipeline A: set immediates [1, 2, 3, 4], dispatch/draw at outIndex 0. + if (encoderType === 'compute pass') { + (enc as GPUComputePassEncoder).setPipeline(pipelineA as GPUComputePipeline); + } else { + (enc as GPURenderPassEncoder).setPipeline(pipelineA as GPURenderPipeline); + } + enc.setBindGroup(0, bindGroup, [0]); + enc.setImmediates!(0, immA, 0, immA.length); + dispatchOrDraw(encoderType, enc, 0); - const expected = new Uint32Array([1, 2, 3, 4, 5, 6, 7, 8]); - t.expectGPUBufferValuesEqual(outputBuffer, expected); - } else { - // Incompatible: Pipeline A uses 16 bytes, Pipeline B uses 8 bytes. - executePass(t, encoderType, commandEncoder, enc => { - if (encoderType === 'compute pass') { - const pass = enc as GPUComputePassEncoder; - // 1. Set Pipeline A (16 bytes), set immediates [1, 2, 3, 4], dispatch - pass.setPipeline(pipelineA as GPUComputePipeline); - pass.setBindGroup(0, bindGroupA, [0]); - pass.setImmediates!(0, new Uint32Array([1, 2, 3, 4]), 0, 4); - pass.dispatchWorkgroups(1); - - // 2. Switch to Pipeline B (8 bytes), set new immediates [5, 6], dispatch - pass.setPipeline(pipelineB as GPUComputePipeline); - pass.setBindGroup(0, bindGroupB, [256]); - pass.setImmediates!(0, new Uint32Array([5, 6]), 0, 2); - pass.dispatchWorkgroups(1); - } else { - const pass = enc as GPURenderPassEncoder; - // 1. Set Pipeline A (16 bytes), set immediates [1, 2, 3, 4], draw - pass.setPipeline(pipelineA as GPURenderPipeline); - pass.setBindGroup(0, bindGroupA, [0]); - pass.setImmediates!(0, new Uint32Array([1, 2, 3, 4]), 0, 4); - pass.draw(1, 1, 0, 0); - - // 2. Switch to Pipeline B (8 bytes), set new immediates [5, 6], draw - pass.setPipeline(pipelineB as GPURenderPipeline); - pass.setBindGroup(0, bindGroupB, [256]); - pass.setImmediates!(0, new Uint32Array([5, 6]), 0, 2); - pass.draw(1, 1, 1, 0); - } - }); + // Pipeline B: switch pipeline, update dynamic offset for outIndex 1. + if (encoderType === 'compute pass') { + (enc as GPUComputePassEncoder).setPipeline(pipelineB as GPUComputePipeline); + } else { + (enc as GPURenderPassEncoder).setPipeline(pipelineB as GPURenderPipeline); + } + enc.setBindGroup(0, bindGroup, [256]); + enc.setImmediates!(0, immB, 0, immB.length); + dispatchOrDraw(encoderType, enc, 1); + }); - t.device.queue.submit([commandEncoder.finish()]); + t.device.queue.submit([commandEncoder.finish()]); - // Pipeline A wrote [1, 2, 3, 4]; Pipeline B wrote [5, 6, 0, 0] (only 2 fields, rest zeroed). - const expected = new Uint32Array([1, 2, 3, 4, 5, 6, 0, 0]); - t.expectGPUBufferValuesEqual(outputBuffer, expected); - } + // Pipeline A wrote [1, 2, 3, 4] at outIndex 0. + // Pipeline B wrote [1, 2, 3, 4] (same size) or [1, 2, 0, 0] (different size) at outIndex 1. + const expected = sameImmediateSize + ? new Uint32Array([1, 2, 3, 4, 1, 2, 3, 4]) + : new Uint32Array([1, 2, 3, 4, 1, 2, 0, 0]); + t.expectGPUBufferValuesEqual(outputBuffer, expected); }); g.test('use_max_immediate_size') @@ -619,19 +517,12 @@ g.test('use_max_immediate_size') members.push(`m${i}: u32`); } const wgslDecl = `struct Large { ${members.join(', ')} } var data: Large;`; - const wgslUsage = ` - @compute @workgroup_size(1) fn cs_main() { - output[0] = data.m0; - output[1] = data.m${count - 1}; - } - @fragment fn fs_main() -> @location(0) vec4u { - output[0] = data.m0; - output[1] = data.m${count - 1}; - return vec4u(0); - } + const copyCode = ` + output[0] = data.m0; + output[1] = data.m${count - 1}; `; - const pipeline = createPipeline(t, encoderType, wgslDecl, wgslUsage, maxImmediateSize); + const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, maxImmediateSize); const outputBuffer = t.createBufferTracked({ size: 8, @@ -651,24 +542,21 @@ g.test('use_max_immediate_size') }); const commandEncoder = t.device.createCommandEncoder(); - executePass(t, encoderType, commandEncoder, enc => { + encodeForPassType(t, encoderType, commandEncoder, enc => { const data = new Uint32Array(count); data[0] = 0xdeadbeef; data[count - 1] = 0xcafebabe; if (encoderType === 'compute pass') { - const pass = enc as GPUComputePassEncoder; - pass.setPipeline(pipeline as GPUComputePipeline); - pass.setBindGroup(0, bindGroup, [0]); - pass.setImmediates!(0, data, 0, count); - pass.dispatchWorkgroups(1); + (enc as GPUComputePassEncoder).setPipeline(pipeline as GPUComputePipeline); } else { - const pass = enc as GPURenderPassEncoder | GPURenderBundleEncoder; - pass.setPipeline(pipeline as GPURenderPipeline); - pass.setBindGroup(0, bindGroup, [0]); - pass.setImmediates!(0, data, 0, count); - pass.draw(1, 1, 0, 0); + (enc as GPURenderPassEncoder | GPURenderBundleEncoder).setPipeline( + pipeline as GPURenderPipeline + ); } + enc.setBindGroup(0, bindGroup, [0]); + enc.setImmediates!(0, data, 0, count); + dispatchOrDraw(encoderType, enc); }); t.device.queue.submit([commandEncoder.finish()]); @@ -723,19 +611,12 @@ g.test('typed_array_arguments') // Shader: read 2 u32s (8 bytes). const wgslDecl = 'var data: array;'; - const wgslUsage = ` - @compute @workgroup_size(1) fn cs_main() { - output[0] = data[0]; - output[1] = data[1]; - } - @fragment fn fs_main() -> @location(0) vec4u { - output[0] = data[0]; - output[1] = data[1]; - return vec4u(0); - } + const copyCode = ` + output[0] = data[0]; + output[1] = data[1]; `; - const pipeline = createPipeline(t, encoderType, wgslDecl, wgslUsage, 8); + const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, 8); const outputBuffer = t.createBufferTracked({ size: 8, @@ -755,20 +636,17 @@ g.test('typed_array_arguments') }); const commandEncoder = t.device.createCommandEncoder(); - executePass(t, encoderType, commandEncoder, enc => { + encodeForPassType(t, encoderType, commandEncoder, enc => { if (encoderType === 'compute pass') { - const pass = enc as GPUComputePassEncoder; - pass.setPipeline(pipeline as GPUComputePipeline); - pass.setBindGroup(0, bindGroup, [0]); - pass.setImmediates!(0, arr, offset, elementCount); - pass.dispatchWorkgroups(1); + (enc as GPUComputePassEncoder).setPipeline(pipeline as GPUComputePipeline); } else { - const pass = enc as GPURenderPassEncoder | GPURenderBundleEncoder; - pass.setPipeline(pipeline as GPURenderPipeline); - pass.setBindGroup(0, bindGroup, [0]); - pass.setImmediates!(0, arr, offset, elementCount); - pass.draw(1, 1, 0, 0); + (enc as GPURenderPassEncoder | GPURenderBundleEncoder).setPipeline( + pipeline as GPURenderPipeline + ); } + enc.setBindGroup(0, bindGroup, [0]); + enc.setImmediates!(0, arr, offset, elementCount); + dispatchOrDraw(encoderType, enc); }); t.device.queue.submit([commandEncoder.finish()]); @@ -788,16 +666,9 @@ g.test('multiple_updates_before_draw_or_dispatch') t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); // Use vec4 to allow partial updates. const wgslDecl = 'var data: vec4;'; - const wgslUsage = ` - @compute @workgroup_size(1) fn cs_main() { - output[0] = data.x; output[1] = data.y; output[2] = data.z; output[3] = data.w; - } - @fragment fn fs_main() -> @location(0) vec4u { - output[0] = data.x; output[1] = data.y; output[2] = data.z; output[3] = data.w; - return vec4u(0); - } - `; - const pipeline = createPipeline(t, encoderType, wgslDecl, wgslUsage, 16); + const copyCode = + 'output[0] = data.x; output[1] = data.y; output[2] = data.z; output[3] = data.w;'; + const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, 16); runAndCheck( t, @@ -820,31 +691,20 @@ g.test('render_pass_and_bundle_mix') .fn(t => { t.skipIfStorageBuffersInFragmentStageNotAvailable('render pass'); const wgslDecl = 'var data: vec2;'; - const wgslUsage = ` - @fragment fn fs_main() -> @location(0) vec4u { - let base = outIndex * 2; - output[base] = data.x; - output[base + 1] = data.y; - return vec4u(0); - } + const copyCode = ` + let base = outIndex * 2; + output[base] = data.x; + output[base + 1] = data.y; `; // Use 'render pass' type to create the pipeline, but it works for bundle too. // Immediate size: vec2 = 2 * 4 bytes = 8 bytes. - const pipeline = createPipeline(t, 'render pass', wgslDecl, wgslUsage, 8) as GPURenderPipeline; + const pipeline = createPipeline(t, 'render pass', wgslDecl, copyCode, 8) as GPURenderPipeline; const outputBuffer = t.createBufferTracked({ size: 16, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, }); - const indexUniformBuffer = t.createBufferTracked({ - size: 512, - usage: GPUBufferUsage.UNIFORM, - mappedAtCreation: true, - }); - const idxData = new Uint32Array(indexUniformBuffer.getMappedRange()); - idxData[0] = 0; - idxData[256 / 4] = 1; - indexUniformBuffer.unmap(); + const indexUniformBuffer = createOutputIndexBuffer(t, 2); const bindGroup = t.device.createBindGroup({ layout: pipeline.getBindGroupLayout(0), @@ -899,29 +759,18 @@ g.test('render_bundle_isolation') .fn(t => { t.skipIfStorageBuffersInFragmentStageNotAvailable('render bundle'); const wgslDecl = 'var data: vec2;'; - const wgslUsage = ` - @fragment fn fs_main() -> @location(0) vec4u { - let base = outIndex * 2; - output[base] = data.x; - output[base + 1] = data.y; - return vec4u(0); - } + const copyCode = ` + let base = outIndex * 2; + output[base] = data.x; + output[base + 1] = data.y; `; - const pipeline = createPipeline(t, 'render pass', wgslDecl, wgslUsage, 8) as GPURenderPipeline; + const pipeline = createPipeline(t, 'render pass', wgslDecl, copyCode, 8) as GPURenderPipeline; const outputBuffer = t.createBufferTracked({ size: 16, // 2 draws * 2 u32s * 4 bytes usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, }); - const indexUniformBuffer = t.createBufferTracked({ - size: 512, - usage: GPUBufferUsage.UNIFORM, - mappedAtCreation: true, - }); - const idxData = new Uint32Array(indexUniformBuffer.getMappedRange()); - idxData[0] = 0; - idxData[256 / 4] = 1; - indexUniformBuffer.unmap(); + const indexUniformBuffer = createOutputIndexBuffer(t, 2); const bindGroup = t.device.createBindGroup({ layout: pipeline.getBindGroupLayout(0), From f57ee4ecc4526926fe1938ce271a308ea5a7c696 Mon Sep 17 00:00:00 2001 From: shaoboyan Date: Tue, 10 Mar 2026 16:46:46 +0800 Subject: [PATCH 3/5] Iterate --- .../programmable/immediate.spec.ts | 325 ++++++++++-------- 1 file changed, 191 insertions(+), 134 deletions(-) diff --git a/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts b/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts index 3f50558b3df1..a7f3a63a8ee4 100644 --- a/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts +++ b/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts @@ -5,7 +5,6 @@ Operation tests for immediate data usage in RenderPassEncoder, ComputePassEncode import { makeTestGroup } from '../../../../../common/framework/test_group.js'; import { getGPU } from '../../../../../common/util/navigator_gpu.js'; import { - assert, kTypedArrayBufferViews, kTypedArrayBufferViewKeys, supportsImmediateData, @@ -47,27 +46,30 @@ function createPipeline( encoderType: ProgrammableEncoderType, wgslDecl: string, copyCode: string, - immediateSize: number + immediateSize: number, + pipelineLayout?: GPUPipelineLayout ) { - const layout = t.device.createPipelineLayout({ - bindGroupLayouts: [ - t.device.createBindGroupLayout({ - entries: [ - { - binding: 0, - visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT, - buffer: { type: 'storage' }, - }, - { - binding: 1, - visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT, - buffer: { type: 'uniform', hasDynamicOffset: true }, - }, - ], - }), - ], - immediateSize, - }); + const layout = + pipelineLayout || + t.device.createPipelineLayout({ + bindGroupLayouts: [ + t.device.createBindGroupLayout({ + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT, + buffer: { type: 'storage' }, + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT | GPUShaderStage.VERTEX, + buffer: { type: 'uniform', hasDynamicOffset: true }, + }, + ], + }), + ], + immediateSize, + }); const fullCode = ` ${wgslDecl} @@ -96,10 +98,13 @@ function createPipeline( vertex: { module: t.device.createShaderModule({ code: ` - @vertex fn vs_main(@builtin(vertex_index) vIdx: u32) -> @builtin(position) vec4f { - // Map vIdx 0..3 to pixel centers in a 4x1 render target. - // firstVertex selects which pixel to render to. - let x = (f32(vIdx) + 0.5) / 2.0 - 1.0; + // Re-declare outIndex in the vertex shader + @group(0) @binding(1) var outIndex: u32; + + @vertex fn vs_main() -> @builtin(position) vec4f { + // Map outIndex 0..3 to pixel centers in a 4x1 render target. + // x = (f32(outIndex) + 0.5) / 2.0 - 1.0 + let x = (f32(outIndex) + 0.5) / 2.0 - 1.0; return vec4f(x, 0.0, 0.0, 1.0); } `, @@ -116,20 +121,23 @@ function createPipeline( } } -/** Dispatch or draw based on encoder type. firstVertex selects the output pixel for render passes. */ +/** Dispatch or draw based on encoder type. */ function dispatchOrDraw( encoderType: ProgrammableEncoderType, - encoder: GPUComputePassEncoder | GPURenderPassEncoder | GPURenderBundleEncoder, - firstVertex: number = 0 + encoder: GPUComputePassEncoder | GPURenderPassEncoder | GPURenderBundleEncoder ) { if (encoderType === 'compute pass') { (encoder as GPUComputePassEncoder).dispatchWorkgroups(1); } else { - (encoder as GPURenderPassEncoder | GPURenderBundleEncoder).draw(1, 1, firstVertex, 0); + (encoder as GPURenderPassEncoder | GPURenderBundleEncoder).draw(1); // 1 Vertex over 1 Instance } } -/** Create a uniform buffer with output indices at 256-byte aligned offsets for dynamic binding. */ +/** + * Create a uniform buffer with output indices at 256-byte aligned offsets for dynamic binding. + * This is used instead of other mechanisms to provide an output index (like firstVertex or immediates) + * because it works across all shader stages without consuming the immediate data capability that we are actively testing. + */ function createOutputIndexBuffer(t: AllFeaturesMaxLimitsGPUTest, count: number): GPUBuffer { const buffer = t.createBufferTracked({ size: 256 * count, @@ -198,6 +206,20 @@ function encodeForPassType( } } +function setPipeline( + encoderType: ProgrammableEncoderType, + encoder: GPUComputePassEncoder | GPURenderPassEncoder | GPURenderBundleEncoder, + pipeline: GPURenderPipeline | GPUComputePipeline +) { + if (encoderType === 'compute pass') { + (encoder as GPUComputePassEncoder).setPipeline(pipeline as GPUComputePipeline); + } else { + (encoder as GPURenderPassEncoder | GPURenderBundleEncoder).setPipeline( + pipeline as GPURenderPipeline + ); + } +} + function runAndCheck( t: AllFeaturesMaxLimitsGPUTest, encoderType: ProgrammableEncoderType, @@ -224,13 +246,7 @@ function runAndCheck( const commandEncoder = t.device.createCommandEncoder(); encodeForPassType(t, encoderType, commandEncoder, encoder => { - if (encoderType === 'compute pass') { - (encoder as GPUComputePassEncoder).setPipeline(pipeline as GPUComputePipeline); - } else { - (encoder as GPURenderPassEncoder | GPURenderBundleEncoder).setPipeline( - pipeline as GPURenderPipeline - ); - } + setPipeline(encoderType, encoder, pipeline); encoder.setBindGroup(0, bindGroup, [0]); setImmediatesFn(encoder); dispatchOrDraw(encoderType, encoder); @@ -291,12 +307,10 @@ g.test('basic_execution') wgslDecl = `var data: ${dataType};`; // bitcast is identity for u32, so we can use it unconditionally. - let readCode = ''; for (let i = 0; i < vSize; i++) { const valExpr = vSize === 1 ? 'data' : `data[${i}]`; - readCode += `output[${i}] = bitcast(${valExpr});\n`; + copyCode += `output[${i}] = bitcast(${valExpr});\n`; } - copyCode = readCode; inputData = new Uint32Array(vSize); for (let i = 0; i < vSize; i++) { @@ -379,13 +393,7 @@ g.test('update_data') }; encodeForPassType(t, encoderType, commandEncoder, enc => { - if (encoderType === 'compute pass') { - (enc as GPUComputePassEncoder).setPipeline(pipeline as GPUComputePipeline); - } else { - (enc as GPURenderPassEncoder | GPURenderBundleEncoder).setPipeline( - pipeline as GPURenderPipeline - ); - } + setPipeline(encoderType, enc, pipeline); // Step 1: Full set [1, 2, 3, 4] runStep(enc, 0, new Uint32Array([1, 2, 3, 4])); @@ -445,8 +453,32 @@ g.test('pipeline_switch') immediateSizeB = 8; } - const pipelineA = createPipeline(t, encoderType, wgslDeclA, copyCodeA, 16); - const pipelineB = createPipeline(t, encoderType, wgslDeclB, copyCodeB, immediateSizeB); + const bindGroupLayout = t.device.createBindGroupLayout({ + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT, + buffer: { type: 'storage' }, + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT | GPUShaderStage.VERTEX, + buffer: { type: 'uniform', hasDynamicOffset: true }, + }, + ], + }); + + const layoutA = t.device.createPipelineLayout({ + bindGroupLayouts: [bindGroupLayout], + immediateSize: 16, + }); + const pipelineA = createPipeline(t, encoderType, wgslDeclA, copyCodeA, 16, layoutA); + + const layoutB = t.device.createPipelineLayout({ + bindGroupLayouts: [bindGroupLayout], + immediateSize: immediateSizeB, + }); + const pipelineB = createPipeline(t, encoderType, wgslDeclB, copyCodeB, immediateSizeB, layoutB); const outputBuffer = t.createBufferTracked({ size: 32, @@ -459,37 +491,29 @@ g.test('pipeline_switch') // previously bound descriptor sets remain valid after a pipeline switch // (important for Vulkan correctness). const bindGroup = t.device.createBindGroup({ - layout: pipelineA.getBindGroupLayout(0), + layout: bindGroupLayout, entries: [ { binding: 0, resource: { buffer: outputBuffer } }, { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, ], }); - const immA = new Uint32Array([1, 2, 3, 4]); - const immB = sameImmediateSize ? new Uint32Array([1, 2, 3, 4]) : new Uint32Array([1, 2]); + const immData = new Uint32Array([1, 2, 3, 4]); const commandEncoder = t.device.createCommandEncoder(); encodeForPassType(t, encoderType, commandEncoder, enc => { // Pipeline A: set immediates [1, 2, 3, 4], dispatch/draw at outIndex 0. - if (encoderType === 'compute pass') { - (enc as GPUComputePassEncoder).setPipeline(pipelineA as GPUComputePipeline); - } else { - (enc as GPURenderPassEncoder).setPipeline(pipelineA as GPURenderPipeline); - } + setPipeline(encoderType, enc, pipelineA); enc.setBindGroup(0, bindGroup, [0]); - enc.setImmediates!(0, immA, 0, immA.length); - dispatchOrDraw(encoderType, enc, 0); + enc.setImmediates!(0, immData, 0, immData.length); + dispatchOrDraw(encoderType, enc); // Pipeline B: switch pipeline, update dynamic offset for outIndex 1. - if (encoderType === 'compute pass') { - (enc as GPUComputePassEncoder).setPipeline(pipelineB as GPUComputePipeline); - } else { - (enc as GPURenderPassEncoder).setPipeline(pipelineB as GPURenderPipeline); - } + setPipeline(encoderType, enc, pipelineB); enc.setBindGroup(0, bindGroup, [256]); - enc.setImmediates!(0, immB, 0, immB.length); - dispatchOrDraw(encoderType, enc, 1); + // We purposefully don't call setImmediates here to verify that the + // immediates set prior to the pipeline switch are preserved. + dispatchOrDraw(encoderType, enc); }); t.device.queue.submit([commandEncoder.finish()]); @@ -508,8 +532,13 @@ g.test('use_max_immediate_size') .fn(t => { const { encoderType } = t.params; t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); - assert(t.device.limits.maxImmediateSize !== undefined); + const maxImmediateSize = t.device.limits.maxImmediateSize; + if (maxImmediateSize === undefined) { + t.skip('maxImmediateSize limit is undefined'); + return; + } + // Create a pipeline that reads the first and last u32 of the immediate data const count = maxImmediateSize / 4; const members: string[] = []; @@ -528,10 +557,7 @@ g.test('use_max_immediate_size') size: 8, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, }); - const indexUniformBuffer = t.makeBufferWithContents( - new Uint32Array(256 / 4), - GPUBufferUsage.UNIFORM - ); + const indexUniformBuffer = createOutputIndexBuffer(t, 1); const bindGroup = t.device.createBindGroup({ layout: pipeline.getBindGroupLayout(0), @@ -547,13 +573,7 @@ g.test('use_max_immediate_size') data[0] = 0xdeadbeef; data[count - 1] = 0xcafebabe; - if (encoderType === 'compute pass') { - (enc as GPUComputePassEncoder).setPipeline(pipeline as GPUComputePipeline); - } else { - (enc as GPURenderPassEncoder | GPURenderBundleEncoder).setPipeline( - pipeline as GPURenderPipeline - ); - } + setPipeline(encoderType, enc, pipeline); enc.setBindGroup(0, bindGroup, [0]); enc.setImmediates!(0, data, 0, count); dispatchOrDraw(encoderType, enc); @@ -564,69 +584,91 @@ g.test('use_max_immediate_size') }); g.test('typed_array_arguments') - .desc('Verify srcOffset and srcSize are in elements for TypedArrays.') + .desc('Verify dataOffset and dataSize arguments work correctly for all TypedArray types.') .params(u => u .combine('typedArray', kTypedArrayBufferViewKeys) .combine('encoderType', kProgrammableEncoderTypes) + .beginSubcases() + .combine('dataOffset', [undefined, 0, 2]) + .combine('dataSize', [undefined, 2]) + .filter(t => { + // WebGPU requires the byte size of the provided data to `setImmediates` to be a multiple of 4. + const elementSize = kTypedArrayBufferViews[t.typedArray].BYTES_PER_ELEMENT; + const actualDataOffset = t.dataOffset ?? 0; + + // Let's test a larger size to accommodate BigUint64Array/BigInt64Array matrices more easily + const maxElementsIn64Bytes = 64 / elementSize; + const actualDataSize = t.dataSize ?? maxElementsIn64Bytes - actualDataOffset; + const byteSize = actualDataSize * elementSize; + + return byteSize <= 64 && byteSize % 4 === 0; + }) ) .fn(t => { - const { typedArray, encoderType } = t.params; + const { typedArray, encoderType, dataOffset, dataSize } = t.params; t.skipIf(typedArray === 'Float16Array', 'TODO(#4297): Float16Array not yet supported'); t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); const Ctor = kTypedArrayBufferViews[typedArray]; const elementSize = Ctor.BYTES_PER_ELEMENT; - // Write a known pattern. - // Use a buffer of 8 bytes. - // Pattern: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 - const bytePattern = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]; - - // Create a large buffer to test offset. - // Use offset = 2 elements. - // Need enough data. - // Construct the TypedArray such that at index `offset`, the pattern exists. - const offset = 2; - const elementCount = 8 / elementSize; // Write 8 bytes. - - // Total elements needed: offset + elementCount + padding - const totalElements = offset + elementCount + 2; - const arr = new Ctor(totalElements); - - // Fill with non-zero value. - const fillView = new Uint8Array(arr.buffer); - fillView.fill(0xaa); - - // The bytes at `arr[offset]...` should match `bytePattern`. - // Use a DataView on the array's buffer to set the bytes. - const buffer = arr.buffer; - const byteOffset = arr.byteOffset + offset * elementSize; - const view = new DataView(buffer); - - for (let i = 0; i < 8; i++) { - view.setUint8(byteOffset + i, bytePattern[i]); - } - - // Now `arr` contains the pattern at `offset`. - - // Shader: read 2 u32s (8 bytes). - const wgslDecl = 'var data: array;'; + // Use a baseline of 16 u32s (64 bytes). + const immediateSize = 64; + const wgslDecl = ` + struct ImmediateData { + m0: vec4, + m1: vec4, + m2: vec4, + m3: vec4 + } + var data: ImmediateData; + `; const copyCode = ` - output[0] = data[0]; - output[1] = data[1]; + output[0] = data.m0.x; + output[1] = data.m0.y; + output[2] = data.m0.z; + output[3] = data.m0.w; + output[4] = data.m1.x; + output[5] = data.m1.y; + output[6] = data.m1.z; + output[7] = data.m1.w; + output[8] = data.m2.x; + output[9] = data.m2.y; + output[10] = data.m2.z; + output[11] = data.m2.w; + output[12] = data.m3.x; + output[13] = data.m3.y; + output[14] = data.m3.z; + output[15] = data.m3.w; `; + const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, immediateSize); - const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, 8); + const actualDataOffset = dataOffset ?? 0; + // We want to write `dataSize` elements. If dataSize is undefined, it defaults to: + // array.length - dataOffset + const maxElementsIn64Bytes = 64 / elementSize; + const actualDataSize = dataSize ?? maxElementsIn64Bytes - actualDataOffset; + + // Create a typed array buffer. If dataSize is undefined, we shouldn't add padding + // because setImmediates uses the rest of the array, which would exceed our intended size or break the % 4 rule. + const paddingElements = dataSize === undefined ? 0 : 4; + const arr = new Ctor(actualDataOffset + actualDataSize + paddingElements); + const view = new DataView(arr.buffer); + + for (let i = 0; i < actualDataSize; i++) { + // Just fill some bytes. E.g., repeating byte pattern. + for (let b = 0; b < elementSize; b++) { + view.setUint8((actualDataOffset + i) * elementSize + b, 0x10 + b + i); + } + } + + const commandEncoder = t.device.createCommandEncoder(); const outputBuffer = t.createBufferTracked({ - size: 8, + size: 64, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, }); - const indexUniformBuffer = t.makeBufferWithContents( - new Uint32Array(256 / 4), - GPUBufferUsage.UNIFORM - ); - + const indexUniformBuffer = createOutputIndexBuffer(t, 1); const bindGroup = t.device.createBindGroup({ layout: pipeline.getBindGroupLayout(0), entries: [ @@ -635,24 +677,39 @@ g.test('typed_array_arguments') ], }); - const commandEncoder = t.device.createCommandEncoder(); encodeForPassType(t, encoderType, commandEncoder, enc => { - if (encoderType === 'compute pass') { - (enc as GPUComputePassEncoder).setPipeline(pipeline as GPUComputePipeline); + setPipeline(encoderType, enc, pipeline); + enc.setBindGroup(0, bindGroup, [0]); + + // Initialize pipeline storage to a clear pattern mapping to up to 64 bytes + const clearData = new Uint32Array(16); + for (let i = 0; i < 16; i++) clearData[i] = 0xaaaaaaaa + i * 0x11111111; + enc.setImmediates!(0, clearData); + + if (dataSize === undefined) { + if (dataOffset === undefined) { + enc.setImmediates!(0, arr); + } else { + enc.setImmediates!(0, arr, dataOffset); + } } else { - (enc as GPURenderPassEncoder | GPURenderBundleEncoder).setPipeline( - pipeline as GPURenderPipeline - ); + enc.setImmediates!(0, arr, dataOffset, dataSize); } - enc.setBindGroup(0, bindGroup, [0]); - enc.setImmediates!(0, arr, offset, elementCount); + dispatchOrDraw(encoderType, enc); }); - t.device.queue.submit([commandEncoder.finish()]); - // Expected: 0x04030201, 0x08070605 (Little Endian) - const expected = new Uint32Array([0x04030201, 0x08070605]); + const expected = new Uint32Array(16); + for (let i = 0; i < 16; i++) expected[i] = 0xaaaaaaaa + i * 0x11111111; + const expectedView = new Uint8Array(expected.buffer); + + for (let i = 0; i < actualDataSize; i++) { + for (let b = 0; b < elementSize; b++) { + expectedView[i * elementSize + b] = 0x10 + b + i; + } + } + t.expectGPUBufferValuesEqual(outputBuffer, expected); }); From 1ddbf941afa4cf8e5f590444578cabef828f7adb Mon Sep 17 00:00:00 2001 From: shaoboyan Date: Wed, 11 Mar 2026 11:31:41 +0800 Subject: [PATCH 4/5] Iterating on typed array case and use outIndex every where --- .../programmable/immediate.spec.ts | 179 +++++++++--------- 1 file changed, 93 insertions(+), 86 deletions(-) diff --git a/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts b/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts index a7f3a63a8ee4..6db9ab8f7c1b 100644 --- a/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts +++ b/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts @@ -5,8 +5,10 @@ Operation tests for immediate data usage in RenderPassEncoder, ComputePassEncode import { makeTestGroup } from '../../../../../common/framework/test_group.js'; import { getGPU } from '../../../../../common/util/navigator_gpu.js'; import { + assert, kTypedArrayBufferViews, kTypedArrayBufferViewKeys, + memcpy, supportsImmediateData, unreachable, } from '../../../../../common/util/util.js'; @@ -135,8 +137,9 @@ function dispatchOrDraw( /** * Create a uniform buffer with output indices at 256-byte aligned offsets for dynamic binding. - * This is used instead of other mechanisms to provide an output index (like firstVertex or immediates) - * because it works across all shader stages without consuming the immediate data capability that we are actively testing. + * A uniform buffer with dynamic offsets is used to provide the output index because: + * 1. It works uniformly across all shader stages (compute, vertex, fragment). + * 2. It doesn't consume the immediate data capability that these tests are actively exercising. */ function createOutputIndexBuffer(t: AllFeaturesMaxLimitsGPUTest, count: number): GPUBuffer { const buffer = t.createBufferTracked({ @@ -229,11 +232,18 @@ function runAndCheck( ) => void, expectedValues: number[] ) { + assert(expectedValues.length > 0, 'expectedValues must not be empty'); const outputBuffer = t.createBufferTracked({ size: expectedValues.length * 4, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, }); - // Simple uniform for outIndex = 0. Dynamic offset is always [0]. + // A dynamic-offset uniform buffer supplies outIndex = 0 here. + // We use a uniform buffer (rather than e.g. firstVertex via @builtin(vertex_index)) because: + // - It works across all shader stages (compute, vertex, fragment). + // - firstVertex is emulated via root constants on D3D12, which is the same mechanism + // backing var, so using it could mask bugs in the path under test. + // The pipeline layout declares hasDynamicOffset, so we must always pass a dynamic offset + // array — even though this simple helper only ever uses offset [0]. const indexUniformBuffer = t.makeBufferWithContents(new Uint32Array([0]), GPUBufferUsage.UNIFORM); const bindGroup = t.device.createBindGroup({ @@ -339,7 +349,7 @@ g.test('basic_execution') encoderType, pipeline, encoder => { - encoder.setImmediates!(0, inputData, 0, inputData.length); + encoder.setImmediates!(0, inputData); }, expected ); @@ -368,6 +378,8 @@ g.test('update_data') usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, }); + // Uniform buffer with output indices [0, 1, 2] at 256-byte aligned offsets, + // used to direct each dispatch/draw step to a separate region of the output buffer. const indexUniformBuffer = createOutputIndexBuffer(t, 3); const bindGroup = t.device.createBindGroup({ @@ -388,7 +400,7 @@ g.test('update_data') dstOffset: number = 0 ) => { pass.setBindGroup(0, bindGroup, [stepIndex * 256]); - pass.setImmediates!(dstOffset, data, 0, data.length); + pass.setImmediates!(dstOffset, data); dispatchOrDraw(encoderType, pass); }; @@ -453,6 +465,11 @@ g.test('pipeline_switch') immediateSizeB = 8; } + // Same source data for both cases; dataSize controls how many elements are written. + const immDataB = new Uint32Array([5, 6, 7, 8]); + const immDataSizeB = sameImmediateSize ? undefined : immediateSizeB / 4; + const expectedB = sameImmediateSize ? [5, 6, 7, 8] : [5, 6, 0, 0]; + const bindGroupLayout = t.device.createBindGroupLayout({ entries: [ { @@ -481,15 +498,11 @@ g.test('pipeline_switch') const pipelineB = createPipeline(t, encoderType, wgslDeclB, copyCodeB, immediateSizeB, layoutB); const outputBuffer = t.createBufferTracked({ - size: 32, + size: 16, // 4 u32s at outIndex 0 usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, }); - const indexUniformBuffer = createOutputIndexBuffer(t, 2); + const indexUniformBuffer = createOutputIndexBuffer(t, 1); - // Use a single bind group. Both pipelines have the same bind group layout, - // so the same bind group is compatible with both. This verifies that - // previously bound descriptor sets remain valid after a pipeline switch - // (important for Vulkan correctness). const bindGroup = t.device.createBindGroup({ layout: bindGroupLayout, entries: [ @@ -498,32 +511,26 @@ g.test('pipeline_switch') ], }); - const immData = new Uint32Array([1, 2, 3, 4]); - const commandEncoder = t.device.createCommandEncoder(); encodeForPassType(t, encoderType, commandEncoder, enc => { - // Pipeline A: set immediates [1, 2, 3, 4], dispatch/draw at outIndex 0. + // Only set bind group once between bind group compatible pipelines. setPipeline(encoderType, enc, pipelineA); enc.setBindGroup(0, bindGroup, [0]); - enc.setImmediates!(0, immData, 0, immData.length); - dispatchOrDraw(encoderType, enc); + enc.setImmediates!(0, new Uint32Array([1, 2, 3, 4])); - // Pipeline B: switch pipeline, update dynamic offset for outIndex 1. + // Switch to Pipeline B without re-setting the bind group. + // The bind group set under Pipeline A must remain valid. setPipeline(encoderType, enc, pipelineB); - enc.setBindGroup(0, bindGroup, [256]); - // We purposefully don't call setImmediates here to verify that the - // immediates set prior to the pipeline switch are preserved. + // Same source data; dataSize controls how many elements are written. + // Passing undefined for srcOffset/srcSize relies on WebIDL defaults (0 / array.length). + enc.setImmediates!(0, immDataB, undefined, immDataSizeB); dispatchOrDraw(encoderType, enc); }); t.device.queue.submit([commandEncoder.finish()]); - // Pipeline A wrote [1, 2, 3, 4] at outIndex 0. - // Pipeline B wrote [1, 2, 3, 4] (same size) or [1, 2, 0, 0] (different size) at outIndex 1. - const expected = sameImmediateSize - ? new Uint32Array([1, 2, 3, 4, 1, 2, 3, 4]) - : new Uint32Array([1, 2, 3, 4, 1, 2, 0, 0]); - t.expectGPUBufferValuesEqual(outputBuffer, expected); + // Pipeline B's draw used the bind group set under Pipeline A. + t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expectedB)); }); g.test('use_max_immediate_size') @@ -575,7 +582,7 @@ g.test('use_max_immediate_size') setPipeline(encoderType, enc, pipeline); enc.setBindGroup(0, bindGroup, [0]); - enc.setImmediates!(0, data, 0, count); + enc.setImmediates!(0, data); dispatchOrDraw(encoderType, enc); }); @@ -590,19 +597,16 @@ g.test('typed_array_arguments') .combine('typedArray', kTypedArrayBufferViewKeys) .combine('encoderType', kProgrammableEncoderTypes) .beginSubcases() - .combine('dataOffset', [undefined, 0, 2]) - .combine('dataSize', [undefined, 2]) - .filter(t => { - // WebGPU requires the byte size of the provided data to `setImmediates` to be a multiple of 4. - const elementSize = kTypedArrayBufferViews[t.typedArray].BYTES_PER_ELEMENT; - const actualDataOffset = t.dataOffset ?? 0; - - // Let's test a larger size to accommodate BigUint64Array/BigInt64Array matrices more easily - const maxElementsIn64Bytes = 64 / elementSize; - const actualDataSize = t.dataSize ?? maxElementsIn64Bytes - actualDataOffset; - const byteSize = actualDataSize * elementSize; - - return byteSize <= 64 && byteSize % 4 === 0; + .expandWithParams(function* (p) { + const elementSize = kTypedArrayBufferViews[p.typedArray].BYTES_PER_ELEMENT; + // Smallest element count that produces a 4-byte-aligned byte size. + const smallCount = Math.max(1, Math.ceil(4 / elementSize)); + yield { dataOffset: undefined, dataSize: undefined }; + yield { dataOffset: 0, dataSize: undefined }; + yield { dataOffset: smallCount, dataSize: undefined }; + yield { dataOffset: undefined, dataSize: smallCount }; + yield { dataOffset: 0, dataSize: smallCount }; + yield { dataOffset: smallCount, dataSize: smallCount }; }) ) .fn(t => { @@ -612,8 +616,9 @@ g.test('typed_array_arguments') const Ctor = kTypedArrayBufferViews[typedArray]; const elementSize = Ctor.BYTES_PER_ELEMENT; - // Use a baseline of 16 u32s (64 bytes). - const immediateSize = 64; + // 64 bytes of immediate data (4 x vec4). This size must match the WGSL struct below. + const kImmediateByteSize = 64; + const kImmediateU32Count = kImmediateByteSize / 4; const wgslDecl = ` struct ImmediateData { m0: vec4, @@ -641,31 +646,39 @@ g.test('typed_array_arguments') output[14] = data.m3.z; output[15] = data.m3.w; `; - const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, immediateSize); + const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, kImmediateByteSize); const actualDataOffset = dataOffset ?? 0; - // We want to write `dataSize` elements. If dataSize is undefined, it defaults to: - // array.length - dataOffset - const maxElementsIn64Bytes = 64 / elementSize; - const actualDataSize = dataSize ?? maxElementsIn64Bytes - actualDataOffset; - - // Create a typed array buffer. If dataSize is undefined, we shouldn't add padding - // because setImmediates uses the rest of the array, which would exceed our intended size or break the % 4 rule. - const paddingElements = dataSize === undefined ? 0 : 4; + const maxElements = kImmediateByteSize / elementSize; + const actualDataSize = dataSize ?? maxElements - actualDataOffset; + + // Validate that the byte size is 4-byte aligned and fits in the immediate block. + const byteSize = actualDataSize * elementSize; + assert( + byteSize <= kImmediateByteSize && byteSize % 4 === 0, + `byteSize ${byteSize} must be <= ${kImmediateByteSize} and a multiple of 4` + ); + + // When dataSize is explicit, add padding elements to verify setImmediates + // respects the dataSize boundary and doesn't read beyond it. + // When dataSize is undefined, no padding since setImmediates reads to the end of the array. + const paddingElements = dataSize !== undefined ? 4 : 0; const arr = new Ctor(actualDataOffset + actualDataSize + paddingElements); const view = new DataView(arr.buffer); + // Fill the data region with a recognizable byte pattern. for (let i = 0; i < actualDataSize; i++) { - // Just fill some bytes. E.g., repeating byte pattern. for (let b = 0; b < elementSize; b++) { view.setUint8((actualDataOffset + i) * elementSize + b, 0x10 + b + i); } } - const commandEncoder = t.device.createCommandEncoder(); + // Baseline clear pattern for the full immediate block. + const clearData = new Uint32Array(kImmediateU32Count); + for (let i = 0; i < kImmediateU32Count; i++) clearData[i] = 0xaaaaaaaa + i * 0x11111111; const outputBuffer = t.createBufferTracked({ - size: 64, + size: kImmediateByteSize, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, }); const indexUniformBuffer = createOutputIndexBuffer(t, 1); @@ -677,38 +690,32 @@ g.test('typed_array_arguments') ], }); + const commandEncoder = t.device.createCommandEncoder(); encodeForPassType(t, encoderType, commandEncoder, enc => { setPipeline(encoderType, enc, pipeline); enc.setBindGroup(0, bindGroup, [0]); - // Initialize pipeline storage to a clear pattern mapping to up to 64 bytes - const clearData = new Uint32Array(16); - for (let i = 0; i < 16; i++) clearData[i] = 0xaaaaaaaa + i * 0x11111111; + // Initialize immediates to the baseline clear pattern. enc.setImmediates!(0, clearData); - if (dataSize === undefined) { - if (dataOffset === undefined) { - enc.setImmediates!(0, arr); - } else { - enc.setImmediates!(0, arr, dataOffset); - } - } else { - enc.setImmediates!(0, arr, dataOffset, dataSize); - } + // Overwrite with typed array data using the parametrized offset/size. + // Passing undefined for dataOffset/dataSize uses the WebIDL default (0 / array.length). + enc.setImmediates!(0, arr, dataOffset, dataSize); dispatchOrDraw(encoderType, enc); }); t.device.queue.submit([commandEncoder.finish()]); - const expected = new Uint32Array(16); - for (let i = 0; i < 16; i++) expected[i] = 0xaaaaaaaa + i * 0x11111111; - const expectedView = new Uint8Array(expected.buffer); - - for (let i = 0; i < actualDataSize; i++) { - for (let b = 0; b < elementSize; b++) { - expectedView[i * elementSize + b] = 0x10 + b + i; - } - } + // Build expected: baseline pattern with the written typed-array bytes overlaid at offset 0. + const expected = new Uint32Array(clearData); + memcpy( + { + src: arr.buffer, + start: actualDataOffset * elementSize, + length: actualDataSize * elementSize, + }, + { dst: expected.buffer, start: 0 } + ); t.expectGPUBufferValuesEqual(outputBuffer, expected); }); @@ -733,11 +740,11 @@ g.test('multiple_updates_before_draw_or_dispatch') pipeline, encoder => { // 1. Set all to [1, 2, 3, 4] - encoder.setImmediates!(0, new Uint32Array([1, 2, 3, 4]), 0, 4); + encoder.setImmediates!(0, new Uint32Array([1, 2, 3, 4])); // 2. Update middle two to [5, 6] -> [1, 5, 6, 4] - encoder.setImmediates!(4, new Uint32Array([5, 6]), 0, 2); + encoder.setImmediates!(4, new Uint32Array([5, 6])); // 3. Update last to [7] -> [1, 5, 6, 7] - encoder.setImmediates!(12, new Uint32Array([7]), 0, 1); + encoder.setImmediates!(12, new Uint32Array([7])); }, [1, 5, 6, 7] ); @@ -775,8 +782,8 @@ g.test('render_pass_and_bundle_mix') const bundleEncoder = t.device.createRenderBundleEncoder({ colorFormats: ['r32uint'] }); bundleEncoder.setPipeline(pipeline); bundleEncoder.setBindGroup(0, bindGroup, [0]); - bundleEncoder.setImmediates!(0, new Uint32Array([1, 10]), 0, 2); - bundleEncoder.draw(1, 1, 0, 0); + bundleEncoder.setImmediates!(0, new Uint32Array([1, 10])); + bundleEncoder.draw(1); const bundle = bundleEncoder.finish(); const renderTargetTexture = t.createTextureTracked({ @@ -802,8 +809,8 @@ g.test('render_pass_and_bundle_mix') // Pass: Set [2, 20], Draw (Index 1) pass.setPipeline(pipeline); pass.setBindGroup(0, bindGroup, [256]); - pass.setImmediates!(0, new Uint32Array([2, 20]), 0, 2); - pass.draw(1, 1, 1, 0); + pass.setImmediates!(0, new Uint32Array([2, 20])); + pass.draw(1); pass.end(); t.device.queue.submit([commandEncoder.finish()]); @@ -841,16 +848,16 @@ g.test('render_bundle_isolation') const bundleEncoderA = t.device.createRenderBundleEncoder({ colorFormats: ['r32uint'] }); bundleEncoderA.setPipeline(pipeline); bundleEncoderA.setBindGroup(0, bindGroup, [0]); - bundleEncoderA.setImmediates!(0, new Uint32Array([1, 2]), 0, 2); - bundleEncoderA.draw(1, 1, 0, 0); + bundleEncoderA.setImmediates!(0, new Uint32Array([1, 2])); + bundleEncoderA.draw(1); const bundleA = bundleEncoderA.finish(); // Bundle B: Set [3, 4], Draw (Index 1) const bundleEncoderB = t.device.createRenderBundleEncoder({ colorFormats: ['r32uint'] }); bundleEncoderB.setPipeline(pipeline); bundleEncoderB.setBindGroup(0, bindGroup, [256]); - bundleEncoderB.setImmediates!(0, new Uint32Array([3, 4]), 0, 2); - bundleEncoderB.draw(1, 1, 1, 0); + bundleEncoderB.setImmediates!(0, new Uint32Array([3, 4])); + bundleEncoderB.draw(1); const bundleB = bundleEncoderB.finish(); const renderTargetTexture = t.createTextureTracked({ From af03a72cda32de20db8ac82f799689a0e33afeb6 Mon Sep 17 00:00:00 2001 From: shaoboyan Date: Mon, 16 Mar 2026 14:50:54 +0800 Subject: [PATCH 5/5] Address comments --- .../command_buffer/programmable/immediate.spec.ts | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts b/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts index 6db9ab8f7c1b..13512fbac35a 100644 --- a/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts +++ b/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts @@ -442,8 +442,7 @@ g.test('pipeline_switch') // Pipeline A always uses vec4 (16 bytes). const wgslDeclA = 'var data: vec4;'; const copyCodeA = ` - let base = outIndex * 4; - output[base] = data.x; output[base+1] = data.y; output[base+2] = data.z; output[base+3] = data.w; + output[0] = data.x; output[1] = data.y; output[2] = data.z; output[3] = data.w; `; let wgslDeclB: string; @@ -459,8 +458,7 @@ g.test('pipeline_switch') // Pipeline B uses vec2 (8 bytes) — different/incompatible layout. wgslDeclB = 'var data: vec2;'; copyCodeB = ` - let base = outIndex * 4; - output[base] = data.x; output[base+1] = data.y; output[base+2] = 0u; output[base+3] = 0u; + output[0] = data.x; output[1] = data.y; output[2] = 0u; output[3] = 0u; `; immediateSizeB = 8; } @@ -667,10 +665,10 @@ g.test('typed_array_arguments') const view = new DataView(arr.buffer); // Fill the data region with a recognizable byte pattern. - for (let i = 0; i < actualDataSize; i++) { - for (let b = 0; b < elementSize; b++) { - view.setUint8((actualDataOffset + i) * elementSize + b, 0x10 + b + i); - } + const dataByteOffset = actualDataOffset * elementSize; + const dataByteSize = actualDataSize * elementSize; + for (let byte = 0; byte < dataByteSize; byte++) { + view.setUint8(dataByteOffset + byte, 0x10 + byte); } // Baseline clear pattern for the full immediate block.