Skip to content

Commit 137f03d

Browse files
Adding new bytesSpecified argument to presets
1 parent 5e8b30b commit 137f03d

15 files changed

Lines changed: 69 additions & 50 deletions

src/client/Presets/AllToAll.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2020
THE SOFTWARE.
2121
*/
2222

23-
int AllToAllPreset(EnvVars& ev,
24-
size_t const numBytesPerTransfer,
25-
std::string const presetName)
23+
int AllToAllPreset(EnvVars& ev,
24+
size_t const numBytesPerTransfer,
25+
std::string const presetName,
26+
bool const bytesSpecified)
2627
{
2728
enum
2829
{

src/client/Presets/AllToAllN.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@ THE SOFTWARE.
2222

2323
#include "EnvVars.hpp"
2424

25-
int AllToAllRdmaPreset(EnvVars& ev,
26-
size_t const numBytesPerTransfer,
27-
std::string const presetName)
25+
int AllToAllRdmaPreset(EnvVars& ev,
26+
size_t const numBytesPerTransfer,
27+
std::string const presetName,
28+
bool const bytesSpecified)
2829
{
2930
if (TransferBench::GetNumRanks() > 1) {
3031
Utils::Print("[ERROR]a2an preset currently not supported for multi-node\n");

src/client/Presets/AllToAllSweep.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@ THE SOFTWARE.
2222

2323
#include "EnvVars.hpp"
2424

25-
int AllToAllSweepPreset(EnvVars& ev,
26-
size_t const numBytesPerTransfer,
27-
std::string const presetName)
25+
int AllToAllSweepPreset(EnvVars& ev,
26+
size_t const numBytesPerTransfer,
27+
std::string const presetName,
28+
bool const bytesSpecified)
2829
{
2930
if (TransferBench::GetNumRanks() > 1) {
3031
Utils::Print("[ERROR] All to All Sweep preset currently not supported for multi-node\n");

src/client/Presets/HbmBandwidth.hpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,14 @@ struct HbmBwResult
167167
double bw[3]; // MAX | AVG | MIN
168168
};
169169

170-
int HbmBandwidthPreset(EnvVars& ev,
171-
size_t const numBytesPerTransfer,
172-
std::string const presetName)
170+
int HbmBandwidthPreset(EnvVars& ev,
171+
size_t const numBytesPerTransfer,
172+
std::string const presetName,
173+
bool const bytesSpecified)
173174
{
175+
// If bytes aren't specified, default to 1GB
176+
size_t numBytesAtLeast = (bytesSpecified ? numBytesPerTransfer : 1024 * 1024 * 1024);
177+
174178
// Determine rank information
175179
int numRanks = TransferBench::GetNumRanks();
176180
int myRank = TransferBench::GetRank();
@@ -322,7 +326,7 @@ int HbmBandwidthPreset(EnvVars& ev,
322326
ev.Print("CRITERIA" , criteria , "Reporting highest %s bandwidth (0=MAX,1=AVG,2=MIN)", criteria == 0 ? "MAX" : criteria == 1 ? "AVG" : "MIN");
323327
ev.Print("ELEM_BYTES" , EnvVars::ToStr(elemBytes).c_str() , "Element sizes in bytes to sweep over (must contain only 4,8 or 16)");
324328
ev.Print("GPU_INDICES" , EnvVars::ToStr(gpuIndices).c_str(), "GPU indices to test. Leave empty for all");
325-
ev.Print("MEM_TYPE" , memTypeIdx , "Using %s GPU memory (%s)", devMemTypeStr.c_str(), Utils::GetAllGpuMemTypeStr().c_str());
329+
ev.Print("MEM_TYPE" , memTypeIdx , "Using %s memory (%s)", devMemTypeStr.c_str(), Utils::GetAllGpuMemTypeStr().c_str());
326330
ev.Print("NUM_BUFFERS" , numBuffers , "Number of buffers to rotate through (1 per iteration)");
327331
ev.Print("NUM_ITERATIONS", numIterations , "Number of iterations to time");
328332
ev.Print("NUM_SUB_EXECS" , EnvVars::ToStr(numSesList).c_str(), "Number of subexecutors to sweep over (default to all available)");
@@ -344,16 +348,16 @@ int HbmBandwidthPreset(EnvVars& ev,
344348

345349
// Determine how how much memory to allocate based on sweep setting
346350
// During each Step each threadblock works on BLOCKSIZE * UNROLL * ELEM_BYTES bytes
347-
// Each buffer will be allocated as the smallest multiple of this, larger than numBytesPerTransfer,
351+
// Each buffer will be allocated as the smallest multiple of this, larger than numBytesAtLeast,
348352
// NOTE: It's not safe to just base this on maximums values in each sweep parameter,
349-
// (e.g if maximum size divides numBytesPerTransfer perfectly) so looping over entire space is safer
353+
// (e.g if maximum size divides numBytesAtLeast perfectly) so looping over entire space is safer
350354
size_t largestTotalBytesPerBuffer = 0;
351355
for (int numSubExec : numSesList) {
352356
for (int blockSize : blockSizes) {
353357
for (int unroll : unrolls) {
354358
for (int elemByte : elemBytes) {
355359
size_t totalBytesPerStep = numSubExec * blockSize * unroll * elemByte;
356-
size_t numSteps = std::max((size_t)1, (numBytesPerTransfer + totalBytesPerStep - 1) / totalBytesPerStep);
360+
size_t numSteps = std::max((size_t)1, (numBytesAtLeast + totalBytesPerStep - 1) / totalBytesPerStep);
357361
size_t totalBytesPerBuffer = numSteps * totalBytesPerStep;
358362
if (totalBytesPerBuffer > largestTotalBytesPerBuffer) largestTotalBytesPerBuffer = totalBytesPerBuffer;
359363
}
@@ -372,7 +376,7 @@ int HbmBandwidthPreset(EnvVars& ev,
372376
// Calculate total number of tests that will be executed per GPU
373377
size_t numTests = numSesList.size() * blockSizes.size() * unrolls.size() * elemBytes.size() * (temporalMask == 3 ? 2 : 1);
374378

375-
Utils::Print("Testing (%lu configs per GPU): ", numTests);
379+
Utils::Print("Testing on at least %lu bytes (%lu configs per GPU): ", numBytesAtLeast, numTests);
376380
fflush(stdout);
377381
}
378382

@@ -439,7 +443,7 @@ int HbmBandwidthPreset(EnvVars& ev,
439443
for (int elemByte : elemBytes) {
440444
int elemByteIdx = (int)log2(elemByte) - 2;
441445
size_t totalBytesPerStep = numSubExec * blockSize * unroll * elemByte;
442-
size_t numSteps = std::max((size_t)1, (numBytesPerTransfer + totalBytesPerStep - 1) / totalBytesPerStep);
446+
size_t numSteps = std::max((size_t)1, (numBytesAtLeast + totalBytesPerStep - 1) / totalBytesPerStep);
443447
size_t totalBytes = numSteps * totalBytesPerStep;
444448

445449
for (int useNt = 0; useNt <= 1; useNt++) {

src/client/Presets/HealthCheck.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -439,9 +439,10 @@ int TestHbmPerformance(int modelId, bool verbose)
439439
return hasFail;
440440
}
441441

442-
int HealthCheckPreset(EnvVars& ev,
443-
size_t const numBytesPerTransfer,
444-
std::string const presetName)
442+
int HealthCheckPreset(EnvVars& ev,
443+
size_t const numBytesPerTransfer,
444+
std::string const presetName,
445+
bool const bytesSpecified)
445446
{
446447
if (TransferBench::GetNumRanks() > 1) {
447448
Utils::Print("[ERROR] Healthcheck preset currently not supported for multi-node\n");

src/client/Presets/NicPeerToPeer.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,10 @@ int GetClosestDeviceToNic(MemType memType, int nicIdx, int rank) {
122122
TransferBench::GetClosestGpuToNic(nicIdx, rank);
123123
}
124124

125-
int NicPeerToPeerPreset(EnvVars& ev,
126-
size_t const numBytesPerTransfer,
127-
std::string const presetName)
125+
int NicPeerToPeerPreset(EnvVars& ev,
126+
size_t const numBytesPerTransfer,
127+
std::string const presetName,
128+
bool const bytesSpecified)
128129
{
129130
if (Utils::GetNumRankGroups() > 1) {
130131
Utils::Print("[ERROR] NIC p2p preset can only be run across ranks that are homogenous\n");

src/client/Presets/NicRings.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2020
THE SOFTWARE.
2121
*/
2222

23-
int NicRingsPreset(EnvVars& ev,
24-
size_t const numBytesPerTransfer,
25-
std::string const presetName)
23+
int NicRingsPreset(EnvVars& ev,
24+
size_t const numBytesPerTransfer,
25+
std::string const presetName,
26+
bool const bytesSpecified)
2627
{
2728

2829
// Check for single homogenous group

src/client/Presets/OneToAll.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2020
THE SOFTWARE.
2121
*/
2222

23-
int OneToAllPreset(EnvVars& ev,
24-
size_t const numBytesPerTransfer,
25-
std::string const presetName)
23+
int OneToAllPreset(EnvVars& ev,
24+
size_t const numBytesPerTransfer,
25+
std::string const presetName,
26+
bool const bytesSpecified)
2627
{
2728
if (TransferBench::GetNumRanks() > 1) {
2829
Utils::Print("[ERROR] One-to-All preset currently not supported for multi-node\n");

src/client/Presets/PeerToPeer.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2020
THE SOFTWARE.
2121
*/
2222

23-
int PeerToPeerPreset(EnvVars& ev,
24-
size_t const numBytesPerTransfer,
25-
std::string const presetName)
23+
int PeerToPeerPreset(EnvVars& ev,
24+
size_t const numBytesPerTransfer,
25+
std::string const presetName,
26+
bool const bytesSpecified)
2627
{
2728
if (TransferBench::GetNumRanks() > 1) {
2829
Utils::Print("[ERROR] Peer-to-peer preset currently not supported for multi-node\n");

src/client/Presets/PodAllToAll.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,10 @@ void StrideGenerate(std::vector<int>& list, int k) {
4141
list = std::move(out);
4242
}
4343

44-
int PodAllToAllPreset(EnvVars& ev,
45-
size_t const numBytesPerTransfer,
46-
std::string const presetName)
44+
int PodAllToAllPreset(EnvVars& ev,
45+
size_t const numBytesPerTransfer,
46+
std::string const presetName,
47+
bool const bytesSpecified)
4748
{
4849
enum
4950
{

0 commit comments

Comments
 (0)