Skip to content

Commit 39503d9

Browse files
committed
gfxsweep draft
1 parent 1ef9c51 commit 39503d9

2 files changed

Lines changed: 154 additions & 0 deletions

File tree

src/client/Presets/GfxSweep.hpp

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/*
2+
Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
3+
4+
Permission is hereby granted, free of charge, to any person obtaining a copy
5+
of this software and associated documentation files (the "Software"), to deal
6+
in the Software without restriction, including without limitation the rights
7+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8+
copies of the Software, and to permit persons to whom the Software is
9+
furnished to do so, subject to the following conditions:
10+
11+
The above copyright notice and this permission notice shall be included in
12+
all copies or substantial portions of the Software.
13+
14+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20+
THE SOFTWARE.
21+
*/
22+
23+
#include "EnvVars.hpp"
24+
25+
namespace {
26+
27+
bool LooksLikeFullTransferLine(std::string const& spec)
28+
{
29+
size_t i = 0;
30+
while (i < spec.size() && isspace(static_cast<unsigned char>(spec[i])))
31+
++i;
32+
if (i >= spec.size())
33+
return false;
34+
if (spec[i] == '-')
35+
return i + 1 < spec.size() && isdigit(static_cast<unsigned char>(spec[i + 1]));
36+
return isdigit(static_cast<unsigned char>(spec[i])) != 0;
37+
}
38+
39+
} // namespace
40+
41+
int GfxSweepPreset(EnvVars& ev,
42+
size_t const numBytesPerTransfer,
43+
std::string const presetName,
44+
[[maybe_unused]] bool const bytesSpecified)
45+
{
46+
(void)presetName;
47+
48+
ev.useSingleStream = 1;
49+
50+
int showMinOnly = EnvVars::GetEnvVar("SHOW_MIN_ONLY", 1);
51+
int verbose = EnvVars::GetEnvVar("VERBOSE", 0);
52+
std::vector<int> blockList = EnvVars::GetEnvVarArray("BLOCKSIZES", {256});
53+
std::vector<int> unrollList = EnvVars::GetEnvVarArray("UNROLLS", {1, 2, 3, 4, 6, 8});
54+
std::vector<int> numSesList = EnvVars::GetEnvVarArray("NUM_SUB_EXECS", {4, 8, 12, 16, 24, 32});
55+
56+
std::string const spec = EnvVars::GetEnvVar("GFX_SWEEP_TRANSFER", "G0->G0->G0");
57+
std::string const line = LooksLikeFullTransferLine(spec) ? spec : (std::string("1 1 ") + spec);
58+
59+
std::vector<TransferBench::Transfer> transfers;
60+
TransferBench::Utils::CheckForError(TransferBench::ParseTransfers(line, transfers));
61+
62+
if (transfers.size() != 1) {
63+
TransferBench::Utils::Print(
64+
"[ERROR] gfxsweep expects exactly one transfer after parsing (got %zu). "
65+
"Set GFX_SWEEP_TRANSFER to a single SRC EXE DST triplet or one basic/advanced line that expands to one transfer.\n",
66+
transfers.size());
67+
return 1;
68+
}
69+
70+
if (transfers[0].exeDevice.exeType != TransferBench::EXE_GPU_GFX) {
71+
TransferBench::Utils::Print(
72+
"[ERROR] gfxsweep requires a GPU GFX (G) executor; parsed executor type is not GFX.\n");
73+
return 1;
74+
}
75+
76+
transfers[0].numBytes = numBytesPerTransfer;
77+
78+
ev.DisplayEnvVars();
79+
if (!ev.hideEnv) {
80+
if (!ev.outputToCsv)
81+
printf("[GfxSweep Related]\n");
82+
ev.Print("GFX_SWEEP_TRANSFER", spec, "Transfer spec (see config file format)");
83+
ev.Print("BLOCKSIZES", blockList.size(), EnvVars::ToStr(blockList).c_str());
84+
ev.Print("NUM_SUB_EXECS", numSesList.size(), EnvVars::ToStr(numSesList).c_str());
85+
ev.Print("SHOW_MIN_ONLY", showMinOnly, showMinOnly ? "Showing only slowest sub-executor aggregate" : "Showing slowest and fastest");
86+
ev.Print("UNROLLS", unrollList.size(), EnvVars::ToStr(unrollList).c_str());
87+
ev.Print("VERBOSE", verbose, verbose ? "Display test results" : "Display summary only");
88+
printf("\n");
89+
}
90+
91+
printf("GFX sweep (single transfer):\n");
92+
printf("============================\n");
93+
printf("- Parsed line: %s\n", line.c_str());
94+
printf("- %lu bytes per transfer\n", static_cast<unsigned long>(numBytesPerTransfer));
95+
96+
TransferBench::ConfigOptions cfg = ev.ToConfigOptions();
97+
98+
std::map<std::pair<int, int>, TransferBench::TestResults> results;
99+
100+
for (int blockSize : blockList) {
101+
printf("Blocksize: %d\n", blockSize);
102+
ev.gfxBlockSize = cfg.gfx.blockSize = blockSize;
103+
104+
printf("#CUs\\Unroll");
105+
for (int u : unrollList) {
106+
printf(" %d(Min) ", u);
107+
if (!showMinOnly)
108+
printf(" %d(Max) ", u);
109+
}
110+
printf("\n");
111+
112+
for (int c : numSesList) {
113+
printf(" %5d ", c);
114+
fflush(stdout);
115+
for (int u : unrollList) {
116+
ev.gfxUnroll = cfg.gfx.unrollFactor = u;
117+
transfers[0].numSubExecs = c;
118+
119+
double minBandwidth = std::numeric_limits<double>::max();
120+
double maxBandwidth = std::numeric_limits<double>::min();
121+
TransferBench::TestResults result;
122+
if (TransferBench::RunTransfers(cfg, transfers, result)) {
123+
for (auto const& exeResult : result.exeResults) {
124+
minBandwidth = std::min(minBandwidth, exeResult.second.avgBandwidthGbPerSec);
125+
maxBandwidth = std::max(maxBandwidth, exeResult.second.avgBandwidthGbPerSec);
126+
}
127+
results[std::make_pair(c, u)] = result;
128+
} else {
129+
minBandwidth = 0.0;
130+
}
131+
printf(" %7.2f ", minBandwidth);
132+
if (!showMinOnly)
133+
printf(" %7.2f ", maxBandwidth);
134+
fflush(stdout);
135+
}
136+
printf("\n");
137+
fflush(stdout);
138+
}
139+
140+
if (verbose) {
141+
int testNum = 0;
142+
for (int c : numSesList) {
143+
for (int u : unrollList) {
144+
printf("SubExecs: %d Unroll %d\n", c, u);
145+
TransferBench::Utils::PrintResults(ev, ++testNum, transfers, results[std::make_pair(c, u)]);
146+
}
147+
}
148+
}
149+
}
150+
151+
return 0;
152+
}

src/client/Presets/Presets.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ THE SOFTWARE.
3030
#include "AllToAll.hpp"
3131
#include "AllToAllN.hpp"
3232
#include "AllToAllSweep.hpp"
33+
#include "GfxSweep.hpp"
3334
#include "HbmBandwidth.hpp"
3435
#include "HealthCheck.hpp"
3536
#include "NicRings.hpp"
@@ -52,6 +53,7 @@ std::map<std::string, std::pair<PresetFunc, std::string>> presetFuncMap =
5253
{"a2a", {AllToAllPreset, "Tests parallel transfers between all pairs of GPU devices"}},
5354
{"a2a_n", {AllToAllRdmaPreset, "Tests parallel transfers between all pairs of GPU devices using Nearest NIC RDMA transfers"}},
5455
{"a2asweep", {AllToAllSweepPreset, "Test GFX-based all-to-all transfers swept across different CU and GFX unroll counts"}},
56+
{"gfxsweep", {GfxSweepPreset, "Sweep BLOCKSIZES, UNROLLS, and NUM_SUB_EXECS for one GFX transfer (GFX_SWEEP_TRANSFER)"}},
5557
{"hbm", {HbmBandwidthPreset, "Tests HBM bandwidth"}},
5658
{"healthcheck", {HealthCheckPreset, "Simple bandwidth health check (MI300X series only)"}},
5759
{"nicrings", {NicRingsPreset, "Tests NIC rings created across identical NIC indices across ranks"}},

0 commit comments

Comments
 (0)