diff --git a/sycl/test-e2e/USM/P2P/p2p_usm_residency.cpp b/sycl/test-e2e/USM/P2P/p2p_usm_residency.cpp index bc8cb748b19bf..268695004054b 100644 --- a/sycl/test-e2e/USM/P2P/p2p_usm_residency.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_usm_residency.cpp @@ -17,9 +17,12 @@ // different pattern, enables P2P access from dev0 to dev1, then uses dev0's // queue to copy the data to the host and verifies correctness. // -// Phase 3 (negative): Allocates memory on dev0, enables then disables P2P -// access from dev1, and verifies that a subsequent device-to-device memcpy -// via dev1's queue throws an exception. +// Phase 3: Allocates memory on dev0, enables then disables P2P access from +// dev1, and verifies that a subsequent device-to-device memcpy via dev1's +// queue still succeeds (P2P controls residency, not hardware data transfer). +// +// Phase 4: Verify that a device-to-device memcpy succeeds after enabling P2P +// and that the transferred data matches the fill pattern. // // REQUIRES: level_zero && two-or-more-gpu-devices // UNSUPPORTED: level_zero_v1_adapter @@ -76,13 +79,14 @@ static bool testP2PRead(context &ctx, queue &srcQueue, device &srcDev, } // Allocate N ints on srcQueue's device, fill with fillVal, enable P2P, then -// disable P2P, and verify that a device-to-device memcpy from dstQueue fails -// (since dstDev should no longer be able to access srcDev's allocations after -// P2P is disabled). -static bool testP2PReadFailsAfterDisable(context &ctx, queue &srcQueue, - device &srcDev, queue &dstQueue, - device &dstDev, size_t N, int fillVal, - const char *label) { +// disable P2P, and verify that a device-to-device memcpy from dstQueue still +// succeeds. P2P access controls memory residency, not hardware data transfer: +// Level Zero can still move data between devices via the interconnect +// regardless of residency state. +static bool testP2PCopySucceedsAfterDisable(context &ctx, queue &srcQueue, + device &srcDev, queue &dstQueue, + device &dstDev, size_t N, + int fillVal, const char *label) { int *src = sycl::malloc_device(N, srcQueue); if (!src) { std::cout << label << ": device alloc failed (src). Skipping.\n"; @@ -98,88 +102,40 @@ static bool testP2PReadFailsAfterDisable(context &ctx, queue &srcQueue, srcQueue.fill(src, fillVal, N).wait(); - // Enable then disable P2P: dstDev should no longer be able to access - // allocations on srcDev. + // Enable then disable P2P. std::cout << "Enabling P2P (temporarily).\n"; dstDev.ext_oneapi_enable_peer_access(srcDev); - std::cout << "Disabling P2P: dstDev should no longer access srcDev.\n"; + std::cout << "Disabling P2P.\n"; dstDev.ext_oneapi_disable_peer_access(srcDev); - // Attempt a device-to-device memcpy from src (on srcDev) to dst (on dstDev) - // via dstQueue after P2P has been revoked — this should fail. - bool gotException = false; - try { - dstQueue.memcpy(dst, src, N * sizeof(int)).wait(); - } catch (sycl::exception &e) { - std::cout << label << ": memcpy threw exception: " << e.what() << "\n"; - gotException = true; - } - - sycl::free(dst, ctx); - sycl::free(src, ctx); - - if (!gotException) { - std::cout << label - << ": FAIL — device-to-device memcpy succeeded after P2P was " - "disabled\n"; - return false; - } - std::cout << label << ": OK (memcpy correctly failed after P2P disable)\n"; - return true; -} - -// Allocate N ints on srcQueue's device, fill with fillVal, and verify that a -// device-to-device memcpy from dstQueue fails without ever enabling P2P (since -// dstDev must not access srcDev's allocations when P2P has never been enabled). -static bool testP2PReadFailsWithoutEnable(context &ctx, queue &srcQueue, - device &srcDev, queue &dstQueue, - device &dstDev, size_t N, int fillVal, - const char *label) { - (void)srcDev; - (void)dstDev; - - int *src = sycl::malloc_device(N, srcQueue); - if (!src) { - std::cout << label << ": device alloc failed (src). Skipping.\n"; - return true; - } - - int *dst = sycl::malloc_device(N, dstQueue); - if (!dst) { - std::cout << label << ": device alloc failed (dst). Skipping.\n"; - sycl::free(src, ctx); - return true; - } - - srcQueue.fill(src, fillVal, N).wait(); - - // Attempt a device-to-device memcpy without ever enabling P2P — must fail. - bool gotException = false; + // Attempt a device-to-device memcpy after P2P has been revoked — must still + // succeed because the adapter no longer blocks cross-device copies. + bool copyOk = true; try { dstQueue.memcpy(dst, src, N * sizeof(int)).wait(); } catch (sycl::exception &e) { std::cout << label << ": memcpy threw exception: " << e.what() << "\n"; - gotException = true; + copyOk = false; } sycl::free(dst, ctx); sycl::free(src, ctx); - if (!gotException) { + if (!copyOk) { std::cout << label - << ": FAIL — device-to-device memcpy succeeded without P2P\n"; + << ": FAIL — device-to-device memcpy failed after P2P disable\n"; return false; } - std::cout << label << ": OK (memcpy correctly failed without P2P)\n"; + std::cout << label << ": OK (memcpy succeeded after P2P disable)\n"; return true; } -// Verify the transition from blocked to permitted using the same allocation: -// first attempt a device-to-device memcpy from dstQueue without P2P (must -// fail), then enable P2P and retry the copy (must succeed with correct data). -static bool testP2PReadFailsThenSucceedsAfterEnable( - context &ctx, queue &srcQueue, device &srcDev, queue &dstQueue, - device &dstDev, size_t N, int fillVal, const char *label) { +// Verify that a device-to-device memcpy from dstQueue succeeds after P2P is +// enabled and that the transferred data matches the fill pattern. +static bool testP2PReadSucceedsAfterEnable(context &ctx, queue &srcQueue, + device &srcDev, queue &dstQueue, + device &dstDev, size_t N, + int fillVal, const char *label) { int *src = sycl::malloc_device(N, srcQueue); if (!src) { std::cout << label << ": device alloc failed (src). Skipping.\n"; @@ -195,27 +151,10 @@ static bool testP2PReadFailsThenSucceedsAfterEnable( srcQueue.fill(src, fillVal, N).wait(); - // Without P2P the copy must fail. - bool gotException = false; - try { - dstQueue.memcpy(dst, src, N * sizeof(int)).wait(); - } catch (sycl::exception &e) { - std::cout << label << ": first memcpy (no P2P) threw: " << e.what() << "\n"; - gotException = true; - } - - if (!gotException) { - std::cout << label << ": FAIL — first memcpy succeeded without P2P\n"; - sycl::free(dst, ctx); - sycl::free(src, ctx); - return false; - } - // Enable P2P: dstDev may now access allocations on srcDev. std::cout << label << ": enabling P2P.\n"; dstDev.ext_oneapi_enable_peer_access(srcDev); - // Retry — must succeed now. bool copyOk = true; std::vector result(N, 0); try { @@ -223,8 +162,7 @@ static bool testP2PReadFailsThenSucceedsAfterEnable( // Read back to host for verification. dstQueue.memcpy(result.data(), dst, N * sizeof(int)).wait(); } catch (sycl::exception &e) { - std::cout << label << ": second memcpy (P2P enabled) threw: " << e.what() - << "\n"; + std::cout << label << ": memcpy (P2P enabled) threw: " << e.what() << "\n"; copyOk = false; } @@ -243,7 +181,7 @@ static bool testP2PReadFailsThenSucceedsAfterEnable( return false; } } - std::cout << label << ": OK (failed without P2P, succeeded after enable)\n"; + std::cout << label << ": OK\n"; return true; } @@ -302,24 +240,17 @@ int main() { "Phase 2 (dev0 reads dev1)")) return 1; - // Phase 3: verify that memcpy fails after P2P is disabled. - std::cout << "Phase 3: verify memcpy fails after P2P is disabled.\n"; - if (!testP2PReadFailsAfterDisable(ctx, q0, dev0, q1, dev1, N, 0x77, - "Phase 3 (dev1 reads dev0 after disable)")) - return 1; - - // Phase 4: verify that memcpy fails without ever enabling P2P. - std::cout << "Phase 4: verify memcpy fails without ever enabling P2P.\n"; - if (!testP2PReadFailsWithoutEnable(ctx, q0, dev0, q1, dev1, N, 0x99, - "Phase 4 (dev1 reads dev0 without P2P)")) + // Phase 3: verify that memcpy still succeeds after P2P is disabled. + std::cout << "Phase 3: verify memcpy still succeeds after P2P is disabled.\n"; + if (!testP2PCopySucceedsAfterDisable( + ctx, q0, dev0, q1, dev1, N, 0x77, + "Phase 3 (dev1 reads dev0 after disable)")) return 1; - // Phase 5: verify the transition from blocked to permitted. - std::cout << "Phase 5: verify memcpy fails without P2P then succeeds after " - "enabling it.\n"; - if (!testP2PReadFailsThenSucceedsAfterEnable( - ctx, q0, dev0, q1, dev1, N, 0xAA, - "Phase 5 (dev1 reads dev0: fail then succeed)")) + // Phase 4: verify that memcpy succeeds after enabling P2P. + std::cout << "Phase 4: verify memcpy succeeds after enabling P2P.\n"; + if (!testP2PReadSucceedsAfterEnable(ctx, q0, dev0, q1, dev1, N, 0xAA, + "Phase 4 (dev1 reads dev0 after enable)")) return 1; std::cout << "PASS\n"; diff --git a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp index ed63a1c4ebe7a..942db4013d1ee 100644 --- a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp @@ -352,61 +352,11 @@ ur_result_t ur_command_list_manager::appendKernelLaunch( return UR_RESULT_SUCCESS; } -// Check P2P access for a device-to-device memcpy. Returns -// UR_RESULT_ERROR_INVALID_OPERATION when the destination is device memory, -// the source is device memory residing on a different device, and peer access -// between those two devices has not been enabled. In all other cases -// (host/shared memory, same device, or unknown allocation type) returns -// UR_RESULT_SUCCESS so the copy can proceed. -static ur_result_t checkP2PAccess(ze_context_handle_t zeContext, - const void *pDst, const void *pSrc, - ur_context_handle_t urContext, - ur_device_handle_t urDevice) { - ZeStruct dstProps; - ze_device_handle_t dstZeDevice = nullptr; - if (ZE_CALL_NOCHECK(zeMemGetAllocProperties, - (zeContext, pDst, &dstProps, &dstZeDevice)) != - ZE_RESULT_SUCCESS || - dstProps.type != ZE_MEMORY_TYPE_DEVICE) { - return UR_RESULT_SUCCESS; - } - - ZeStruct srcProps; - ze_device_handle_t srcZeDevice = nullptr; - if (ZE_CALL_NOCHECK(zeMemGetAllocProperties, - (zeContext, pSrc, &srcProps, &srcZeDevice)) != - ZE_RESULT_SUCCESS || - srcProps.type != ZE_MEMORY_TYPE_DEVICE || !srcZeDevice || - srcZeDevice == urDevice->ZeDevice) { - return UR_RESULT_SUCCESS; - } - - auto *srcDevice = - urContext->getPlatform()->getDeviceFromNativeHandle(srcZeDevice); - if (!srcDevice || !srcDevice->Id.has_value() || !urDevice->Id.has_value() || - urDevice->Id.value() >= srcDevice->peers.size()) { - return UR_RESULT_SUCCESS; - } - - std::scoped_lock lock(srcDevice->Mutex); - if (srcDevice->peers[urDevice->Id.value()] != - ur_device_handle_t_::PeerStatus::ENABLED) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - return UR_RESULT_SUCCESS; -} - ur_result_t ur_command_list_manager::appendUSMMemcpy( bool blocking, void *pDst, const void *pSrc, size_t size, wait_list_view &waitListView, ur_event_handle_t phEvent) { TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMMemcpy"); - // Verify P2P access when copying between device allocations on different - // devices. Copies to/from host or shared memory always succeed. - UR_CALL(checkP2PAccess(hContext.get()->getZeHandle(), pDst, pSrc, - hContext.get(), hDevice.get())); - auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY); auto [pWaitEvents, numWaitEvents, _] = waitListView; diff --git a/unified-runtime/test/adapters/level_zero/v2/memory_residency.cpp b/unified-runtime/test/adapters/level_zero/v2/memory_residency.cpp index 83797172a67b9..8d91eaa5a4ead 100644 --- a/unified-runtime/test/adapters/level_zero/v2/memory_residency.cpp +++ b/unified-runtime/test/adapters/level_zero/v2/memory_residency.cpp @@ -277,54 +277,6 @@ TEST_P(urMemoryMultiResidencyTest, [](uint8_t b) { return b == fillPattern; })); } -// Verify that the end-to-end P2P data transfer in -// enablePeerAccessStateMachineAndSourceAllocation fails when P2P is disabled. -// The adapter returns UR_RESULT_ERROR_INVALID_OPERATION from urEnqueueUSMMemcpy -// because the source pointer on devices[0] is not accessible from devices[1]. -TEST_P(urMemoryMultiResidencyTest, - enablePeerAccessStateMachineAndSourceAllocationFailsWithoutP2P) { - constexpr size_t allocSize = kAllocSize; - uint64_t initialMemFreeSource = 0; - ASSERT_SUCCESS(urDeviceGetInfo(devices[0], UR_DEVICE_INFO_GLOBAL_MEM_FREE, - sizeof(uint64_t), &initialMemFreeSource, - nullptr)); - if (initialMemFreeSource < allocSize) { - GTEST_SKIP() << "Not enough source device memory available"; - } - - // Allocate on devices[0] WITHOUT enabling P2P. - void *ptr = nullptr; - ASSERT_SUCCESS( - urUSMDeviceAlloc(context, devices[0], nullptr, nullptr, allocSize, &ptr)); - - // Fill ptr on devices[0] with a known pattern using devices[0]'s queue. - static constexpr uint8_t fillPattern = 0xAB; - ur_queue_handle_t srcQueue = nullptr; - ASSERT_SUCCESS(urQueueCreate(context, devices[0], nullptr, &srcQueue)); - ASSERT_SUCCESS(urEnqueueUSMFill(srcQueue, ptr, sizeof(fillPattern), - &fillPattern, allocSize, 0, nullptr, - nullptr)); - ASSERT_SUCCESS(urQueueFinish(srcQueue)); - urQueueRelease(srcQueue); - - // Attempt P2P copy: devices[1]'s queue reads ptr from devices[0] — should - // fail because P2P is disabled. - void *dstPtr = nullptr; - ASSERT_SUCCESS(urUSMDeviceAlloc(context, devices[1], nullptr, nullptr, - allocSize, &dstPtr)); - ur_queue_handle_t peerQueue = nullptr; - ASSERT_SUCCESS(urQueueCreate(context, devices[1], nullptr, &peerQueue)); - - ur_result_t copyResult = urEnqueueUSMMemcpy(peerQueue, true, dstPtr, ptr, - allocSize, 0, nullptr, nullptr); - - urQueueRelease(peerQueue); - urUSMFree(context, dstPtr); - ASSERT_SUCCESS(urUSMFree(context, ptr)); - - ASSERT_EQ(copyResult, UR_RESULT_ERROR_INVALID_OPERATION); -} - // Verify that disabling peer access succeeds and that a second disable attempt // returns UR_RESULT_ERROR_INVALID_OPERATION (access already disabled). // Source-device free memory is not checked because deferred frees from earlier @@ -415,9 +367,9 @@ TEST_P(urMemoryMultiResidencyTest, p2pReadSucceedsWithPeerAccessEnabled) { [](uint8_t b) { return b == fillPattern; })); } -// Verify the transition from blocked to permitted: attempt a USM copy from -// devices[1]'s queue without P2P (must fail), then enable P2P on the same -// allocation and retry the copy (must succeed with correct data). +// Verify that a USM copy from devices[1]'s queue succeeds after P2P is +// enabled, and that the data transferred matches the fill pattern written on +// devices[0]. TEST_P(urMemoryMultiResidencyTest, p2pReadSucceedsAfterEnablingAccess) { constexpr size_t allocSize = kAllocSize; static constexpr uint8_t fillPattern = 0xCD; @@ -457,11 +409,6 @@ TEST_P(urMemoryMultiResidencyTest, p2pReadSucceedsAfterEnablingAccess) { ur_queue_handle_t peerQueue = nullptr; ASSERT_SUCCESS(urQueueCreate(context, devices[1], nullptr, &peerQueue)); - // Without P2P the copy must be rejected. - ur_result_t copyWithoutP2P = urEnqueueUSMMemcpy( - peerQueue, true, dstPtr, srcPtr, allocSize, 0, nullptr, nullptr); - ASSERT_EQ(copyWithoutP2P, UR_RESULT_ERROR_INVALID_OPERATION); - // Enable P2P: devices[1] can now access allocations on devices[0]. ASSERT_SUCCESS(urUsmP2PEnablePeerAccessExp(devices[1], devices[0])); peerAccessEnabled = true; @@ -488,12 +435,13 @@ TEST_P(urMemoryMultiResidencyTest, p2pReadSucceedsAfterEnablingAccess) { [](uint8_t b) { return b == fillPattern; })); } -// Verify that revoking peer access from devices[1] to devices[0] prevents -// subsequent USM copies from devices[1]'s queue. A successful copy is first +// Verify that a USM copy from devices[1]'s queue succeeds even after peer +// access has been revoked. P2P access controls memory residency, not hardware +// data transfer: Level Zero can still move data between devices via the +// interconnect regardless of residency state. A successful copy is first // performed with P2P enabled to confirm the setup is correct; then P2P is -// disabled and the same copy is expected to fail with -// UR_RESULT_ERROR_INVALID_OPERATION. -TEST_P(urMemoryMultiResidencyTest, p2pReadFailsAfterRevokingAccess) { +// disabled and the same copy is expected to succeed. +TEST_P(urMemoryMultiResidencyTest, p2pCopySucceedsAfterRevokingAccess) { constexpr size_t allocSize = kAllocSize; uint64_t initialMemFreeSource = 0; @@ -534,7 +482,7 @@ TEST_P(urMemoryMultiResidencyTest, p2pReadFailsAfterRevokingAccess) { ASSERT_SUCCESS(urUsmP2PDisablePeerAccessExp(devices[1], devices[0])); peerAccessEnabled = false; - // Copy must now fail: devices[1] can no longer access srcPtr on devices[0]. + // Copy must still succeed: P2P controls residency, not hardware transfer. ur_result_t copyResult = urEnqueueUSMMemcpy(peerQueue, true, dstPtr, srcPtr, allocSize, 0, nullptr, nullptr); @@ -542,7 +490,7 @@ TEST_P(urMemoryMultiResidencyTest, p2pReadFailsAfterRevokingAccess) { urUSMFree(context, dstPtr); ASSERT_SUCCESS(urUSMFree(context, srcPtr)); - ASSERT_EQ(copyResult, UR_RESULT_ERROR_INVALID_OPERATION); + ASSERT_SUCCESS(copyResult); } // Verify that a USM allocation on devices[0] is NOT made resident on