Skip to content

Commit 0228b69

Browse files
committed
fix
1 parent 39503d9 commit 0228b69

1 file changed

Lines changed: 101 additions & 67 deletions

File tree

src/client/Presets/GfxSweep.hpp

Lines changed: 101 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ THE SOFTWARE.
2222

2323
#include "EnvVars.hpp"
2424

25+
#include <map>
26+
#include <tuple>
27+
2528
namespace {
2629

2730
bool LooksLikeFullTransferLine(std::string const& spec)
@@ -41,25 +44,32 @@ bool LooksLikeFullTransferLine(std::string const& spec)
4144
int GfxSweepPreset(EnvVars& ev,
4245
size_t const numBytesPerTransfer,
4346
std::string const presetName,
44-
[[maybe_unused]] bool const bytesSpecified)
47+
bool const bytesSpecified)
4548
{
46-
(void)presetName;
47-
48-
ev.useSingleStream = 1;
49-
5049
int showMinOnly = EnvVars::GetEnvVar("SHOW_MIN_ONLY", 1);
5150
int verbose = EnvVars::GetEnvVar("VERBOSE", 0);
52-
std::vector<int> blockList = EnvVars::GetEnvVarArray("BLOCKSIZES", {256});
53-
std::vector<int> unrollList = EnvVars::GetEnvVarArray("UNROLLS", {1, 2, 3, 4, 6, 8});
54-
std::vector<int> numSesList = EnvVars::GetEnvVarArray("NUM_SUB_EXECS", {4, 8, 12, 16, 24, 32});
55-
56-
std::string const spec = EnvVars::GetEnvVar("GFX_SWEEP_TRANSFER", "G0->G0->G0");
51+
std::vector<int> blockList = EnvVars::GetEnvVarArray("BLOCKSIZES", {256});
52+
std::vector<int> unrollList = EnvVars::GetEnvVarArray("UNROLLS", {1, 2, 3, 4, 6, 8});
53+
std::vector<int> numSesList = EnvVars::GetEnvVarArray("NUM_SUB_EXECS", {4, 8, 12, 16, 24, 32});
54+
std::vector<int> wordSizeList = EnvVars::GetEnvVarArray("WORDSIZES", {4});
55+
std::vector<int> temporalList = EnvVars::GetEnvVarArray("TEMPORAL_MODES", {0});
56+
std::vector<int> waveOrderList = EnvVars::GetEnvVarArray("WAVE_ORDERS", {0});
57+
58+
std::string const spec = EnvVars::GetEnvVar("GFX_SWEEP_TRANSFER",
59+
TransferBench::GetNumRanks() > 1 ? "G0->G0->G0" : "R0G0->R0G0->R0G0");
5760
std::string const line = LooksLikeFullTransferLine(spec) ? spec : (std::string("1 1 ") + spec);
5861

5962
std::vector<TransferBench::Transfer> transfers;
6063
TransferBench::Utils::CheckForError(TransferBench::ParseTransfers(line, transfers));
6164

6265
if (transfers.size() != 1) {
66+
if (TransferBench::GetNumRanks() > 1 && transfers.size() > 1) {
67+
TransferBench::Utils::Print(
68+
"[WARN] gfxsweep: In Multinode setting, omitted rank fields on SRC/DST/EXE are filled per rank, "
69+
"and transfers without ranks specified will expand to multiple parallel copy per node. "
70+
"gfxsweep expects exactly one entry here and forbid such entries; for a local sweep use a single rank (`-np 1`), "
71+
"or adjust GFX_SWEEP_TRANSFER / rank syntax so expansion yields one transfer.\n");
72+
}
6373
TransferBench::Utils::Print(
6474
"[ERROR] gfxsweep expects exactly one transfer after parsing (got %zu). "
6575
"Set GFX_SWEEP_TRANSFER to a single SRC EXE DST triplet or one basic/advanced line that expands to one transfer.\n",
@@ -75,74 +85,98 @@ int GfxSweepPreset(EnvVars& ev,
7585

7686
transfers[0].numBytes = numBytesPerTransfer;
7787

78-
ev.DisplayEnvVars();
79-
if (!ev.hideEnv) {
80-
if (!ev.outputToCsv)
81-
printf("[GfxSweep Related]\n");
82-
ev.Print("GFX_SWEEP_TRANSFER", spec, "Transfer spec (see config file format)");
83-
ev.Print("BLOCKSIZES", blockList.size(), EnvVars::ToStr(blockList).c_str());
84-
ev.Print("NUM_SUB_EXECS", numSesList.size(), EnvVars::ToStr(numSesList).c_str());
85-
ev.Print("SHOW_MIN_ONLY", showMinOnly, showMinOnly ? "Showing only slowest sub-executor aggregate" : "Showing slowest and fastest");
86-
ev.Print("UNROLLS", unrollList.size(), EnvVars::ToStr(unrollList).c_str());
87-
ev.Print("VERBOSE", verbose, verbose ? "Display test results" : "Display summary only");
88-
printf("\n");
88+
if (TransferBench::Utils::RankDoesOutput()) {
89+
ev.DisplayEnvVars();
90+
if (!ev.hideEnv) {
91+
if (!ev.outputToCsv)
92+
TransferBench::Utils::Print("[GfxSweep Related]\n");
93+
ev.Print("GFX_SWEEP_TRANSFER", spec, "Transfer spec (see config file format)");
94+
ev.Print("BLOCKSIZES", blockList.size(), EnvVars::ToStr(blockList).c_str());
95+
ev.Print("NUM_SUB_EXECS", numSesList.size(), EnvVars::ToStr(numSesList).c_str());
96+
ev.Print("WORDSIZES", wordSizeList.size(), EnvVars::ToStr(wordSizeList).c_str());
97+
ev.Print("TEMPORAL_MODES", temporalList.size(), EnvVars::ToStr(temporalList).c_str());
98+
ev.Print("WAVE_ORDERS", waveOrderList.size(), EnvVars::ToStr(waveOrderList).c_str());
99+
ev.Print("SHOW_MIN_ONLY", showMinOnly, showMinOnly ? "Showing only slowest sub-executor aggregate" : "Showing slowest and fastest");
100+
ev.Print("UNROLLS", unrollList.size(), EnvVars::ToStr(unrollList).c_str());
101+
ev.Print("VERBOSE", verbose, verbose ? "Display test results" : "Display summary only");
102+
TransferBench::Utils::Print("\n");
103+
}
89104
}
90105

91-
printf("GFX sweep (single transfer):\n");
92-
printf("============================\n");
93-
printf("- Parsed line: %s\n", line.c_str());
94-
printf("- %lu bytes per transfer\n", static_cast<unsigned long>(numBytesPerTransfer));
106+
TransferBench::Utils::Print("GFX sweep (single transfer):\n");
107+
TransferBench::Utils::Print("============================\n");
108+
TransferBench::Utils::Print("- Parsed line: %s\n", line.c_str());
109+
TransferBench::Utils::Print("- %lu bytes per transfer\n", static_cast<unsigned long>(numBytesPerTransfer));
95110

96111
TransferBench::ConfigOptions cfg = ev.ToConfigOptions();
97112

98-
std::map<std::pair<int, int>, TransferBench::TestResults> results;
113+
using GfxSweepKey = std::tuple<int, int, int, int, int, int>; // block, wordSize, temporal, waveOrder, subExecs, unroll
114+
std::map<GfxSweepKey, TransferBench::TestResults> results;
99115

100116
for (int blockSize : blockList) {
101-
printf("Blocksize: %d\n", blockSize);
102117
ev.gfxBlockSize = cfg.gfx.blockSize = blockSize;
103118

104-
printf("#CUs\\Unroll");
105-
for (int u : unrollList) {
106-
printf(" %d(Min) ", u);
107-
if (!showMinOnly)
108-
printf(" %d(Max) ", u);
109-
}
110-
printf("\n");
111-
112-
for (int c : numSesList) {
113-
printf(" %5d ", c);
114-
fflush(stdout);
115-
for (int u : unrollList) {
116-
ev.gfxUnroll = cfg.gfx.unrollFactor = u;
117-
transfers[0].numSubExecs = c;
118-
119-
double minBandwidth = std::numeric_limits<double>::max();
120-
double maxBandwidth = std::numeric_limits<double>::min();
121-
TransferBench::TestResults result;
122-
if (TransferBench::RunTransfers(cfg, transfers, result)) {
123-
for (auto const& exeResult : result.exeResults) {
124-
minBandwidth = std::min(minBandwidth, exeResult.second.avgBandwidthGbPerSec);
125-
maxBandwidth = std::max(maxBandwidth, exeResult.second.avgBandwidthGbPerSec);
119+
for (int wordSize : wordSizeList) {
120+
ev.gfxWordSize = cfg.gfx.wordSize = wordSize;
121+
122+
for (int temporalMode : temporalList) {
123+
ev.gfxTemporal = cfg.gfx.temporalMode = temporalMode;
124+
125+
for (int waveOrder : waveOrderList) {
126+
ev.gfxWaveOrder = cfg.gfx.waveOrder = waveOrder;
127+
128+
TransferBench::Utils::Print("Blocksize: %d WORD_SIZE: %d TEMPORAL: %d WAVE_ORDER: %d\n",
129+
blockSize, wordSize, temporalMode, waveOrder);
130+
131+
TransferBench::Utils::Print("#CUs\\Unroll");
132+
for (int u : unrollList) {
133+
TransferBench::Utils::Print(" %d(Min) ", u);
134+
if (!showMinOnly)
135+
TransferBench::Utils::Print(" %d(Max) ", u);
136+
}
137+
TransferBench::Utils::Print("\n");
138+
139+
for (int c : numSesList) {
140+
TransferBench::Utils::Print(" %5d ", c);
141+
fflush(stdout);
142+
for (int u : unrollList) {
143+
ev.gfxUnroll = cfg.gfx.unrollFactor = u;
144+
transfers[0].numSubExecs = c;
145+
146+
double minBandwidth = std::numeric_limits<double>::max();
147+
double maxBandwidth = std::numeric_limits<double>::min();
148+
TransferBench::TestResults result;
149+
GfxSweepKey const key = std::make_tuple(blockSize, wordSize, temporalMode, waveOrder, c, u);
150+
if (TransferBench::RunTransfers(cfg, transfers, result)) {
151+
for (auto const& exeResult : result.exeResults) {
152+
minBandwidth = std::min(minBandwidth, exeResult.second.avgBandwidthGbPerSec);
153+
maxBandwidth = std::max(maxBandwidth, exeResult.second.avgBandwidthGbPerSec);
154+
}
155+
results[key] = result;
156+
} else {
157+
minBandwidth = 0.0;
158+
}
159+
TransferBench::Utils::Print(" %7.2f ", minBandwidth);
160+
if (!showMinOnly)
161+
TransferBench::Utils::Print(" %7.2f ", maxBandwidth);
162+
fflush(stdout);
163+
}
164+
TransferBench::Utils::Print("\n");
165+
fflush(stdout);
126166
}
127-
results[std::make_pair(c, u)] = result;
128-
} else {
129-
minBandwidth = 0.0;
130-
}
131-
printf(" %7.2f ", minBandwidth);
132-
if (!showMinOnly)
133-
printf(" %7.2f ", maxBandwidth);
134-
fflush(stdout);
135-
}
136-
printf("\n");
137-
fflush(stdout);
138-
}
139167

140-
if (verbose) {
141-
int testNum = 0;
142-
for (int c : numSesList) {
143-
for (int u : unrollList) {
144-
printf("SubExecs: %d Unroll %d\n", c, u);
145-
TransferBench::Utils::PrintResults(ev, ++testNum, transfers, results[std::make_pair(c, u)]);
168+
if (verbose) {
169+
int testNum = 0;
170+
for (int c : numSesList) {
171+
for (int u : unrollList) {
172+
GfxSweepKey const key = std::make_tuple(blockSize, wordSize, temporalMode, waveOrder, c, u);
173+
TransferBench::Utils::Print(
174+
"Blocksize: %d WORD_SIZE: %d TEMPORAL: %d WAVE_ORDER: %d SubExecs: %d Unroll: %d\n",
175+
blockSize, wordSize, temporalMode, waveOrder, c, u);
176+
TransferBench::Utils::PrintResults(ev, ++testNum, transfers, results[key]);
177+
}
178+
}
179+
}
146180
}
147181
}
148182
}

0 commit comments

Comments
 (0)