File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -14,7 +14,7 @@ Documentation for TransferBench is available at
1414- Adding NIC_CQ_POLL_BATCH to control CQ poll batch size for NIC transfers
1515- New "hbm" preset which sweeps and tests local HBM read performance
1616- Added a new TB_WALLCLOCK_RATE that will override GPU GFX wallclock rate if it returns 0 (debug)
17- - Adding new batched-DMA executor "B", which utilizes the hipMemcpyBatchAsync API introduced in HIP 7.0
17+ - Adding new batched-DMA executor "B", which utilizes the hipMemcpyBatchAsync API introduced in HIP 7.1
1818- Added new bmasweep preset that compares DMA to batched DMA execution for parallel transfers to other GPUs
1919
2020### Modified
Original file line number Diff line number Diff line change @@ -79,9 +79,8 @@ THE SOFTWARE.
7979// / @endcond
8080
8181// Batched DMA executor is only supported with HIP >= 7.1
82- #if defined(__HIP_PLATFORM_AMD__) && \
83- defined (HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR >= 7 ) && \
84- defined(HIP_VERSION_MINOR) && (HIP_VERSION_MINOR >= 1 )
82+ #if defined(__HIP_PLATFORM_AMD__) && defined(HIP_VERSION_MAJOR) && defined(HIP_VERSION_MINOR) && \
83+ ((HIP_VERSION_MAJOR > 7 ) || (HIP_VERSION_MAJOR == 7 && HIP_VERSION_MINOR >= 1 ))
8584#define BMA_EXEC_ENABLED
8685#endif
8786
@@ -3892,7 +3891,7 @@ static bool IsConfiguredGid(union ibv_gid const& gid)
38923891 rss.batchSrcs .clear ();
38933892 rss.batchBytes .clear ();
38943893
3895- if (transfer.exeDevice .exeType == EXE_GPU_BMDA ) {
3894+ if (transfer.exeDevice .exeType == EXE_GPU_BDMA ) {
38963895 for (int i = 0 ; i < transfer.numSubExecs ; ++i) {
38973896 for (int j = 0 ; j < (int )rss.dstMem .size (); j++) {
38983897 rss.batchSrcs .push_back (subExecParam[i].src [0 ]);
You can’t perform that action at this time.
0 commit comments