Skip to content

Commit d5d3d5e

Browse files
Addressing Copilot review
1 parent fd62012 commit d5d3d5e

3 files changed

Lines changed: 37 additions & 32 deletions

File tree

src/client/Presets/HbmBandwidth.hpp

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ int HbmBandwidthPreset(EnvVars& ev,
216216

217217
// Check for consistency across ranks
218218
IS_UNIFORM(blockSizes, "BLOCKSIZES");
219-
IS_UNIFORM(blockSizes, "CRITERIA");
219+
IS_UNIFORM(criteria, "CRITERIA");
220220
IS_UNIFORM(elemBytes, "ELEM_BYTES");
221221
// GPU_INDICES may be different per rank
222222
IS_UNIFORM(memTypeIdx, "MEM_TYPE");
@@ -225,7 +225,7 @@ int HbmBandwidthPreset(EnvVars& ev,
225225
IS_UNIFORM(numSesList, "NUM_SUB_EXECS");
226226
IS_UNIFORM(prewarmMsec, "PREWARM_MSEC");
227227
IS_UNIFORM(showDetails, "SHOW_DETAILS");
228-
IS_UNIFORM(showDetails, "SHOW_EXTRA");
228+
IS_UNIFORM(showExtra, "SHOW_EXTRA");
229229
IS_UNIFORM(temporalMask, "TEMPORAL_MASK");
230230
IS_UNIFORM(unrolls, "UNROLLS");
231231
IS_UNIFORM(useWallClock, "USE_WALLCLOCK");
@@ -262,17 +262,18 @@ int HbmBandwidthPreset(EnvVars& ev,
262262
if (!gpuIndices.empty()) {
263263
for (auto gpuIdx : gpuIndices) {
264264
if (gpuIdx < 0 || gpuIdx >= numDetectedGpus) {
265-
Utils::Print("[ERROR] GPU_INDICES index out of range (%d) (rank %d)", gpuIdx, myRank);
265+
Utils::Print("[ERROR] GPU_INDICES index out of range (%d) (rank %d)\n", gpuIdx, myRank);
266266
return 1;
267267
}
268268
}
269269
}
270270

271-
if (numBuffers <= 1) {
271+
if (numBuffers < 1) {
272272
Utils::Print("[ERROR] NUM_BUFFERS must be a positive number (not %d)\n", numBuffers);
273+
return 1;
273274
}
274275
if (numBuffers > numIterations) {
275-
Utils::Print("[WARN] NUM_BUFFERS (%d) exceeds NUM_ITERATIONS ($d), so some buffers will not be used\n",
276+
Utils::Print("[WARN] NUM_BUFFERS (%d) exceeds NUM_ITERATIONS (%d), so some buffers will not be used\n",
276277
numBuffers, numIterations);
277278
numBuffers = numIterations;
278279
}
@@ -307,6 +308,7 @@ int HbmBandwidthPreset(EnvVars& ev,
307308

308309
if (unrolls.empty()) {
309310
Utils::Print("[ERROR] UNROLLS may not be empty");
311+
return 1;
310312
}
311313
for (auto unroll : unrolls) {
312314
if (unroll != 1 && unroll != 2 && unroll != 4 && unroll != 8 && unroll != 16) {
@@ -383,33 +385,29 @@ int HbmBandwidthPreset(EnvVars& ev,
383385
for (int gpuIdx : gpuIndices) {
384386
HIP_CALL(hipSetDevice(gpuIdx));
385387

386-
// Determine wall clock rate for this GPU
387-
int wallClockRate;
388-
if (useWallClock) {
389-
#if defined(__NVCC__)
390-
wallClockRate = 1000000;
391-
#else
392-
HIP_CALL(hipDeviceGetAttribute(&wallClockRate, hipDeviceAttributeWallClockRate, gpuIdx));
393-
#endif
394-
}
395-
396388
// Create streams/events for this GPU
397389
hipStream_t stream;
398390
hipEvent_t startEvent, stopEvent;
399391
HIP_CALL(hipStreamCreate(&stream));
400392
HIP_CALL(hipEventCreate(&startEvent));
401393
HIP_CALL(hipEventCreate(&stopEvent));
402394

403-
// Allocate pinned host memory closest to this GPU to capture timestamps
404-
long long* minStartCycle;
405-
long long* maxStopCycle;
406-
if (Utils::AllocateMemory({MEM_CPU_CLOSEST, gpuIdx, myRank}, sizeof(int64_t), (void**)&minStartCycle)) {
407-
Utils::Print("[ERROR] Unable to allocate pinned host memory on rank %d closest to GPU %d\n", myRank, gpuIdx);
408-
return 1;
409-
}
410-
if (Utils::AllocateMemory({MEM_CPU_CLOSEST, gpuIdx, myRank}, sizeof(int64_t), (void**)&maxStopCycle)) {
411-
Utils::Print("[ERROR] Unable to allocate pinned host memory on rank %d closest to GPU %d\n", myRank, gpuIdx);
412-
return 1;
395+
// Allocate pinned host memory closest to this GPU to capture timestamps (if enabled)
396+
int wallClockRate;
397+
long long* minStartCycle = nullptr;
398+
long long* maxStopCycle = nullptr;
399+
400+
if (useWallClock) {
401+
#if defined(__NVCC__)
402+
wallClockRate = 1000000;
403+
#else
404+
HIP_CALL(hipDeviceGetAttribute(&wallClockRate, hipDeviceAttributeWallClockRate, gpuIdx));
405+
#endif
406+
if (Utils::AllocateMemory({MEM_CPU_CLOSEST, gpuIdx, myRank}, sizeof(int64_t), (void**)&minStartCycle) ||
407+
Utils::AllocateMemory({MEM_CPU_CLOSEST, gpuIdx, myRank}, sizeof(int64_t), (void**)&maxStopCycle)) {
408+
Utils::Print("[ERROR] Unable to allocate pinned host memory on rank %d closest to GPU %d\n", myRank, gpuIdx);
409+
return 1;
410+
}
413411
}
414412

415413
// Allocate and initialize each GPU buffer
@@ -458,7 +456,7 @@ int HbmBandwidthPreset(EnvVars& ev,
458456
int currBufferIdx = 0;
459457
auto prewarmEnd = std::chrono::steady_clock::now() + std::chrono::milliseconds(prewarmMsec);
460458
do {
461-
kernel<<<gridDim, blockDim, 0, stream>>>(inputBuffers[currBufferIdx++], NULL, numSteps, minStartCycle, maxStopCycle);
459+
kernel<<<gridDim, blockDim, 0, stream>>>(inputBuffers[currBufferIdx++], nullptr, numSteps, minStartCycle, maxStopCycle);
462460
HIP_CALL(hipStreamSynchronize(stream));
463461
if (currBufferIdx == numBuffers) currBufferIdx = 0;
464462
} while (std::chrono::steady_clock::now() < prewarmEnd);
@@ -473,15 +471,13 @@ int HbmBandwidthPreset(EnvVars& ev,
473471
if (!useWallClock) {
474472
HIP_CALL(hipEventRecord(startEvent, stream));
475473
}
476-
kernel<<<gridDim, blockDim, 0, stream>>>(inputBuffers[currBufferIdx++], NULL, numSteps,
477-
useWallClock ? minStartCycle : NULL, maxStopCycle);
474+
kernel<<<gridDim, blockDim, 0, stream>>>(inputBuffers[currBufferIdx++], nullptr, numSteps, minStartCycle, maxStopCycle);
478475
if (!useWallClock) {
479476
HIP_CALL(hipEventRecord(stopEvent, stream));
480477
}
481478
#else
482-
hipExtLaunchKernelGGL(kernel, gridDim, blockDim, 0, stream, useWallClock ? NULL : startEvent, useWallClock ? NULL : stopEvent, 0,
483-
inputBuffers[currBufferIdx], nullptr , numSteps,
484-
useWallClock ? minStartCycle : NULL, maxStopCycle);
479+
hipExtLaunchKernelGGL(kernel, gridDim, blockDim, 0, stream, useWallClock ? nullptr : startEvent, useWallClock ? nullptr : stopEvent, 0,
480+
inputBuffers[currBufferIdx++], nullptr, numSteps, minStartCycle, maxStopCycle);
485481
#endif
486482
HIP_CALL(hipStreamSynchronize(stream));
487483
if (currBufferIdx == numBuffers) currBufferIdx = 0;
@@ -543,6 +539,14 @@ int HbmBandwidthPreset(EnvVars& ev,
543539
}
544540
}
545541

542+
if (useWallClock) {
543+
if (Utils::DeallocateMemory(MEM_CPU_CLOSEST, minStartCycle, sizeof(int64_t)) ||
544+
Utils::DeallocateMemory(MEM_CPU_CLOSEST, maxStopCycle, sizeof(int64_t))) {
545+
Utils::Print("[ERROR] Unable to deallocate pinned host memory on rank %d closest to GPU %d\n", myRank, gpuIdx);
546+
return 1;
547+
}
548+
}
549+
546550
// Cleanup streams and events
547551
HIP_CALL(hipStreamDestroy(stream));
548552
HIP_CALL(hipEventDestroy(startEvent));

src/client/Presets/Presets.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ THE SOFTWARE.
4545
typedef int (*PresetFunc)(EnvVars& ev,
4646
size_t const numBytesPerTransfer,
4747
std::string const presetName,
48-
bool const bytesSpecified);
48+
[[maybe_unused]] bool const bytesSpecified);
4949

5050
std::map<std::string, std::pair<PresetFunc, std::string>> presetFuncMap =
5151
{

src/client/Utilities.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ THE SOFTWARE.
2323
#pragma once
2424
#include <unordered_map>
2525
#include <unordered_set>
26+
#include <type_traits>
2627
#include "TransferBench.hpp"
2728

2829
namespace TransferBench::Utils

0 commit comments

Comments
 (0)