@@ -318,18 +318,19 @@ int HbmBandwidthPreset(EnvVars& ev,
318318 {
319319 if (!ev.outputToCsv ) Utils::Print (" [HBM Bandwidth Related]\n " );
320320 if (Utils::RankDoesOutput ()) {
321- ev.Print (" BLOCKSIZES" , EnvVars::ToStr (blockSizes).c_str (), " Threadblock sizes to sweep over" );
322- ev.Print (" CRITERIA" , criteria , " Reporting highest %s bandwidth" , criteria == 0 ? " MAX" : criteria == 1 ? " AVG" : " MIN" );
323- ev.Print (" ELEM_BYTES" , EnvVars::ToStr (elemBytes).c_str () , " Element sizes in bytes to sweep over" );
321+ ev.Print (" BLOCKSIZES" , EnvVars::ToStr (blockSizes).c_str (), " Threadblock sizes to sweep over (multiple of 128 up to 1024) " );
322+ ev.Print (" CRITERIA" , criteria , " Reporting highest %s bandwidth (0=MAX,1=AVG,2=MIN) " , criteria == 0 ? " MAX" : criteria == 1 ? " AVG" : " MIN" );
323+ ev.Print (" ELEM_BYTES" , EnvVars::ToStr (elemBytes).c_str () , " Element sizes in bytes to sweep over (must contain only 4,8 or 16) " );
324324 ev.Print (" GPU_INDICES" , EnvVars::ToStr (gpuIndices).c_str (), " GPU indices to test. Leave empty for all" );
325325 ev.Print (" MEM_TYPE" , memTypeIdx , " Using %s GPU memory (%s)" , devMemTypeStr.c_str (), Utils::GetAllGpuMemTypeStr ().c_str ());
326326 ev.Print (" NUM_BUFFERS" , numBuffers , " Number of buffers to rotate through (1 per iteration)" );
327327 ev.Print (" NUM_ITERATIONS" , numIterations , " Number of iterations to time" );
328- ev.Print (" NUM_SUB_EXECS" , EnvVars::ToStr (numSesList).c_str (), " Number of subexecutors to sweep over" );
328+ ev.Print (" NUM_SUB_EXECS" , EnvVars::ToStr (numSesList).c_str (), " Number of subexecutors to sweep over (default to all available) " );
329329 ev.Print (" PREWARM_MSEC" , prewarmMsec , " Prewarm duration in msec" );
330- ev.Print (" SHOW_DETAILS" , showDetails , " Show sweep details (ignored for multi-rank)" );
330+ ev.Print (" SHOW_DETAILS" , showDetails , " Show sweep details (ignored for multi-rank). Setting to 2 shows per iteration output" );
331+ ev.Print (" SHOW_EXTRA" , showExtra , " Show best sweep config details" );
331332 ev.Print (" TEMPORAL_MASK" , temporalMask , " Temporal mask (1 = temporal, 2 = non-temporal, 3 = both)" );
332- ev.Print (" UNROLLS" , EnvVars::ToStr (unrolls).c_str () , " Unroll factors to sweep over" );
333+ ev.Print (" UNROLLS" , EnvVars::ToStr (unrolls).c_str () , " Unroll factors to sweep over (must contain only 1,2,4,8 or 16) " );
333334 ev.Print (" USE_WALLCLOCK" , useWallClock , useWallClock ? " Using GPU wall-clock for timing" : " Using events for timing" );
334335 Utils::Print (" \n " );
335336 }
0 commit comments