From da5b343e0a8ab0105814ebeb51c01e73905c3c34 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 27 May 2026 14:59:11 +0000 Subject: [PATCH] [UR][L0v2] Remove checkP2PAccess() from appendUSMMemcpy The checkP2PAccess() helper in command_list_manager.cpp rejected device-to-device USM memcpy operations between devices without P2P access enabled, returning UR_RESULT_ERROR_INVALID_OPERATION. Remove the check: P2P access controls memory residency (pinning), not hardware data transfer. Level Zero can move data between devices via the interconnect regardless of residency state, so blocking the copy at the UR level is unnecessary and overly restrictive. Update tests to match the new behavior. Signed-off-by: Lukasz Dorau --- sycl/test-e2e/USM/P2P/p2p_usm_residency.cpp | 149 +++++------------- .../level_zero/v2/command_list_manager.cpp | 50 ------ .../level_zero/v2/memory_residency.cpp | 74 ++------- 3 files changed, 51 insertions(+), 222 deletions(-) diff --git a/sycl/test-e2e/USM/P2P/p2p_usm_residency.cpp b/sycl/test-e2e/USM/P2P/p2p_usm_residency.cpp index bc8cb748b19bf..268695004054b 100644 --- a/sycl/test-e2e/USM/P2P/p2p_usm_residency.cpp +++ b/sycl/test-e2e/USM/P2P/p2p_usm_residency.cpp @@ -17,9 +17,12 @@ // different pattern, enables P2P access from dev0 to dev1, then uses dev0's // queue to copy the data to the host and verifies correctness. // -// Phase 3 (negative): Allocates memory on dev0, enables then disables P2P -// access from dev1, and verifies that a subsequent device-to-device memcpy -// via dev1's queue throws an exception. +// Phase 3: Allocates memory on dev0, enables then disables P2P access from +// dev1, and verifies that a subsequent device-to-device memcpy via dev1's +// queue still succeeds (P2P controls residency, not hardware data transfer). +// +// Phase 4: Verify that a device-to-device memcpy succeeds after enabling P2P +// and that the transferred data matches the fill pattern. // // REQUIRES: level_zero && two-or-more-gpu-devices // UNSUPPORTED: level_zero_v1_adapter @@ -76,13 +79,14 @@ static bool testP2PRead(context &ctx, queue &srcQueue, device &srcDev, } // Allocate N ints on srcQueue's device, fill with fillVal, enable P2P, then -// disable P2P, and verify that a device-to-device memcpy from dstQueue fails -// (since dstDev should no longer be able to access srcDev's allocations after -// P2P is disabled). -static bool testP2PReadFailsAfterDisable(context &ctx, queue &srcQueue, - device &srcDev, queue &dstQueue, - device &dstDev, size_t N, int fillVal, - const char *label) { +// disable P2P, and verify that a device-to-device memcpy from dstQueue still +// succeeds. P2P access controls memory residency, not hardware data transfer: +// Level Zero can still move data between devices via the interconnect +// regardless of residency state. +static bool testP2PCopySucceedsAfterDisable(context &ctx, queue &srcQueue, + device &srcDev, queue &dstQueue, + device &dstDev, size_t N, + int fillVal, const char *label) { int *src = sycl::malloc_device(N, srcQueue); if (!src) { std::cout << label << ": device alloc failed (src). Skipping.\n"; @@ -98,88 +102,40 @@ static bool testP2PReadFailsAfterDisable(context &ctx, queue &srcQueue, srcQueue.fill(src, fillVal, N).wait(); - // Enable then disable P2P: dstDev should no longer be able to access - // allocations on srcDev. + // Enable then disable P2P. std::cout << "Enabling P2P (temporarily).\n"; dstDev.ext_oneapi_enable_peer_access(srcDev); - std::cout << "Disabling P2P: dstDev should no longer access srcDev.\n"; + std::cout << "Disabling P2P.\n"; dstDev.ext_oneapi_disable_peer_access(srcDev); - // Attempt a device-to-device memcpy from src (on srcDev) to dst (on dstDev) - // via dstQueue after P2P has been revoked — this should fail. - bool gotException = false; - try { - dstQueue.memcpy(dst, src, N * sizeof(int)).wait(); - } catch (sycl::exception &e) { - std::cout << label << ": memcpy threw exception: " << e.what() << "\n"; - gotException = true; - } - - sycl::free(dst, ctx); - sycl::free(src, ctx); - - if (!gotException) { - std::cout << label - << ": FAIL — device-to-device memcpy succeeded after P2P was " - "disabled\n"; - return false; - } - std::cout << label << ": OK (memcpy correctly failed after P2P disable)\n"; - return true; -} - -// Allocate N ints on srcQueue's device, fill with fillVal, and verify that a -// device-to-device memcpy from dstQueue fails without ever enabling P2P (since -// dstDev must not access srcDev's allocations when P2P has never been enabled). -static bool testP2PReadFailsWithoutEnable(context &ctx, queue &srcQueue, - device &srcDev, queue &dstQueue, - device &dstDev, size_t N, int fillVal, - const char *label) { - (void)srcDev; - (void)dstDev; - - int *src = sycl::malloc_device(N, srcQueue); - if (!src) { - std::cout << label << ": device alloc failed (src). Skipping.\n"; - return true; - } - - int *dst = sycl::malloc_device(N, dstQueue); - if (!dst) { - std::cout << label << ": device alloc failed (dst). Skipping.\n"; - sycl::free(src, ctx); - return true; - } - - srcQueue.fill(src, fillVal, N).wait(); - - // Attempt a device-to-device memcpy without ever enabling P2P — must fail. - bool gotException = false; + // Attempt a device-to-device memcpy after P2P has been revoked — must still + // succeed because the adapter no longer blocks cross-device copies. + bool copyOk = true; try { dstQueue.memcpy(dst, src, N * sizeof(int)).wait(); } catch (sycl::exception &e) { std::cout << label << ": memcpy threw exception: " << e.what() << "\n"; - gotException = true; + copyOk = false; } sycl::free(dst, ctx); sycl::free(src, ctx); - if (!gotException) { + if (!copyOk) { std::cout << label - << ": FAIL — device-to-device memcpy succeeded without P2P\n"; + << ": FAIL — device-to-device memcpy failed after P2P disable\n"; return false; } - std::cout << label << ": OK (memcpy correctly failed without P2P)\n"; + std::cout << label << ": OK (memcpy succeeded after P2P disable)\n"; return true; } -// Verify the transition from blocked to permitted using the same allocation: -// first attempt a device-to-device memcpy from dstQueue without P2P (must -// fail), then enable P2P and retry the copy (must succeed with correct data). -static bool testP2PReadFailsThenSucceedsAfterEnable( - context &ctx, queue &srcQueue, device &srcDev, queue &dstQueue, - device &dstDev, size_t N, int fillVal, const char *label) { +// Verify that a device-to-device memcpy from dstQueue succeeds after P2P is +// enabled and that the transferred data matches the fill pattern. +static bool testP2PReadSucceedsAfterEnable(context &ctx, queue &srcQueue, + device &srcDev, queue &dstQueue, + device &dstDev, size_t N, + int fillVal, const char *label) { int *src = sycl::malloc_device(N, srcQueue); if (!src) { std::cout << label << ": device alloc failed (src). Skipping.\n"; @@ -195,27 +151,10 @@ static bool testP2PReadFailsThenSucceedsAfterEnable( srcQueue.fill(src, fillVal, N).wait(); - // Without P2P the copy must fail. - bool gotException = false; - try { - dstQueue.memcpy(dst, src, N * sizeof(int)).wait(); - } catch (sycl::exception &e) { - std::cout << label << ": first memcpy (no P2P) threw: " << e.what() << "\n"; - gotException = true; - } - - if (!gotException) { - std::cout << label << ": FAIL — first memcpy succeeded without P2P\n"; - sycl::free(dst, ctx); - sycl::free(src, ctx); - return false; - } - // Enable P2P: dstDev may now access allocations on srcDev. std::cout << label << ": enabling P2P.\n"; dstDev.ext_oneapi_enable_peer_access(srcDev); - // Retry — must succeed now. bool copyOk = true; std::vector result(N, 0); try { @@ -223,8 +162,7 @@ static bool testP2PReadFailsThenSucceedsAfterEnable( // Read back to host for verification. dstQueue.memcpy(result.data(), dst, N * sizeof(int)).wait(); } catch (sycl::exception &e) { - std::cout << label << ": second memcpy (P2P enabled) threw: " << e.what() - << "\n"; + std::cout << label << ": memcpy (P2P enabled) threw: " << e.what() << "\n"; copyOk = false; } @@ -243,7 +181,7 @@ static bool testP2PReadFailsThenSucceedsAfterEnable( return false; } } - std::cout << label << ": OK (failed without P2P, succeeded after enable)\n"; + std::cout << label << ": OK\n"; return true; } @@ -302,24 +240,17 @@ int main() { "Phase 2 (dev0 reads dev1)")) return 1; - // Phase 3: verify that memcpy fails after P2P is disabled. - std::cout << "Phase 3: verify memcpy fails after P2P is disabled.\n"; - if (!testP2PReadFailsAfterDisable(ctx, q0, dev0, q1, dev1, N, 0x77, - "Phase 3 (dev1 reads dev0 after disable)")) - return 1; - - // Phase 4: verify that memcpy fails without ever enabling P2P. - std::cout << "Phase 4: verify memcpy fails without ever enabling P2P.\n"; - if (!testP2PReadFailsWithoutEnable(ctx, q0, dev0, q1, dev1, N, 0x99, - "Phase 4 (dev1 reads dev0 without P2P)")) + // Phase 3: verify that memcpy still succeeds after P2P is disabled. + std::cout << "Phase 3: verify memcpy still succeeds after P2P is disabled.\n"; + if (!testP2PCopySucceedsAfterDisable( + ctx, q0, dev0, q1, dev1, N, 0x77, + "Phase 3 (dev1 reads dev0 after disable)")) return 1; - // Phase 5: verify the transition from blocked to permitted. - std::cout << "Phase 5: verify memcpy fails without P2P then succeeds after " - "enabling it.\n"; - if (!testP2PReadFailsThenSucceedsAfterEnable( - ctx, q0, dev0, q1, dev1, N, 0xAA, - "Phase 5 (dev1 reads dev0: fail then succeed)")) + // Phase 4: verify that memcpy succeeds after enabling P2P. + std::cout << "Phase 4: verify memcpy succeeds after enabling P2P.\n"; + if (!testP2PReadSucceedsAfterEnable(ctx, q0, dev0, q1, dev1, N, 0xAA, + "Phase 4 (dev1 reads dev0 after enable)")) return 1; std::cout << "PASS\n"; diff --git a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp index ed63a1c4ebe7a..942db4013d1ee 100644 --- a/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp @@ -352,61 +352,11 @@ ur_result_t ur_command_list_manager::appendKernelLaunch( return UR_RESULT_SUCCESS; } -// Check P2P access for a device-to-device memcpy. Returns -// UR_RESULT_ERROR_INVALID_OPERATION when the destination is device memory, -// the source is device memory residing on a different device, and peer access -// between those two devices has not been enabled. In all other cases -// (host/shared memory, same device, or unknown allocation type) returns -// UR_RESULT_SUCCESS so the copy can proceed. -static ur_result_t checkP2PAccess(ze_context_handle_t zeContext, - const void *pDst, const void *pSrc, - ur_context_handle_t urContext, - ur_device_handle_t urDevice) { - ZeStruct dstProps; - ze_device_handle_t dstZeDevice = nullptr; - if (ZE_CALL_NOCHECK(zeMemGetAllocProperties, - (zeContext, pDst, &dstProps, &dstZeDevice)) != - ZE_RESULT_SUCCESS || - dstProps.type != ZE_MEMORY_TYPE_DEVICE) { - return UR_RESULT_SUCCESS; - } - - ZeStruct srcProps; - ze_device_handle_t srcZeDevice = nullptr; - if (ZE_CALL_NOCHECK(zeMemGetAllocProperties, - (zeContext, pSrc, &srcProps, &srcZeDevice)) != - ZE_RESULT_SUCCESS || - srcProps.type != ZE_MEMORY_TYPE_DEVICE || !srcZeDevice || - srcZeDevice == urDevice->ZeDevice) { - return UR_RESULT_SUCCESS; - } - - auto *srcDevice = - urContext->getPlatform()->getDeviceFromNativeHandle(srcZeDevice); - if (!srcDevice || !srcDevice->Id.has_value() || !urDevice->Id.has_value() || - urDevice->Id.value() >= srcDevice->peers.size()) { - return UR_RESULT_SUCCESS; - } - - std::scoped_lock lock(srcDevice->Mutex); - if (srcDevice->peers[urDevice->Id.value()] != - ur_device_handle_t_::PeerStatus::ENABLED) { - return UR_RESULT_ERROR_INVALID_OPERATION; - } - - return UR_RESULT_SUCCESS; -} - ur_result_t ur_command_list_manager::appendUSMMemcpy( bool blocking, void *pDst, const void *pSrc, size_t size, wait_list_view &waitListView, ur_event_handle_t phEvent) { TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMMemcpy"); - // Verify P2P access when copying between device allocations on different - // devices. Copies to/from host or shared memory always succeed. - UR_CALL(checkP2PAccess(hContext.get()->getZeHandle(), pDst, pSrc, - hContext.get(), hDevice.get())); - auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY); auto [pWaitEvents, numWaitEvents, _] = waitListView; diff --git a/unified-runtime/test/adapters/level_zero/v2/memory_residency.cpp b/unified-runtime/test/adapters/level_zero/v2/memory_residency.cpp index 83797172a67b9..8d91eaa5a4ead 100644 --- a/unified-runtime/test/adapters/level_zero/v2/memory_residency.cpp +++ b/unified-runtime/test/adapters/level_zero/v2/memory_residency.cpp @@ -277,54 +277,6 @@ TEST_P(urMemoryMultiResidencyTest, [](uint8_t b) { return b == fillPattern; })); } -// Verify that the end-to-end P2P data transfer in -// enablePeerAccessStateMachineAndSourceAllocation fails when P2P is disabled. -// The adapter returns UR_RESULT_ERROR_INVALID_OPERATION from urEnqueueUSMMemcpy -// because the source pointer on devices[0] is not accessible from devices[1]. -TEST_P(urMemoryMultiResidencyTest, - enablePeerAccessStateMachineAndSourceAllocationFailsWithoutP2P) { - constexpr size_t allocSize = kAllocSize; - uint64_t initialMemFreeSource = 0; - ASSERT_SUCCESS(urDeviceGetInfo(devices[0], UR_DEVICE_INFO_GLOBAL_MEM_FREE, - sizeof(uint64_t), &initialMemFreeSource, - nullptr)); - if (initialMemFreeSource < allocSize) { - GTEST_SKIP() << "Not enough source device memory available"; - } - - // Allocate on devices[0] WITHOUT enabling P2P. - void *ptr = nullptr; - ASSERT_SUCCESS( - urUSMDeviceAlloc(context, devices[0], nullptr, nullptr, allocSize, &ptr)); - - // Fill ptr on devices[0] with a known pattern using devices[0]'s queue. - static constexpr uint8_t fillPattern = 0xAB; - ur_queue_handle_t srcQueue = nullptr; - ASSERT_SUCCESS(urQueueCreate(context, devices[0], nullptr, &srcQueue)); - ASSERT_SUCCESS(urEnqueueUSMFill(srcQueue, ptr, sizeof(fillPattern), - &fillPattern, allocSize, 0, nullptr, - nullptr)); - ASSERT_SUCCESS(urQueueFinish(srcQueue)); - urQueueRelease(srcQueue); - - // Attempt P2P copy: devices[1]'s queue reads ptr from devices[0] — should - // fail because P2P is disabled. - void *dstPtr = nullptr; - ASSERT_SUCCESS(urUSMDeviceAlloc(context, devices[1], nullptr, nullptr, - allocSize, &dstPtr)); - ur_queue_handle_t peerQueue = nullptr; - ASSERT_SUCCESS(urQueueCreate(context, devices[1], nullptr, &peerQueue)); - - ur_result_t copyResult = urEnqueueUSMMemcpy(peerQueue, true, dstPtr, ptr, - allocSize, 0, nullptr, nullptr); - - urQueueRelease(peerQueue); - urUSMFree(context, dstPtr); - ASSERT_SUCCESS(urUSMFree(context, ptr)); - - ASSERT_EQ(copyResult, UR_RESULT_ERROR_INVALID_OPERATION); -} - // Verify that disabling peer access succeeds and that a second disable attempt // returns UR_RESULT_ERROR_INVALID_OPERATION (access already disabled). // Source-device free memory is not checked because deferred frees from earlier @@ -415,9 +367,9 @@ TEST_P(urMemoryMultiResidencyTest, p2pReadSucceedsWithPeerAccessEnabled) { [](uint8_t b) { return b == fillPattern; })); } -// Verify the transition from blocked to permitted: attempt a USM copy from -// devices[1]'s queue without P2P (must fail), then enable P2P on the same -// allocation and retry the copy (must succeed with correct data). +// Verify that a USM copy from devices[1]'s queue succeeds after P2P is +// enabled, and that the data transferred matches the fill pattern written on +// devices[0]. TEST_P(urMemoryMultiResidencyTest, p2pReadSucceedsAfterEnablingAccess) { constexpr size_t allocSize = kAllocSize; static constexpr uint8_t fillPattern = 0xCD; @@ -457,11 +409,6 @@ TEST_P(urMemoryMultiResidencyTest, p2pReadSucceedsAfterEnablingAccess) { ur_queue_handle_t peerQueue = nullptr; ASSERT_SUCCESS(urQueueCreate(context, devices[1], nullptr, &peerQueue)); - // Without P2P the copy must be rejected. - ur_result_t copyWithoutP2P = urEnqueueUSMMemcpy( - peerQueue, true, dstPtr, srcPtr, allocSize, 0, nullptr, nullptr); - ASSERT_EQ(copyWithoutP2P, UR_RESULT_ERROR_INVALID_OPERATION); - // Enable P2P: devices[1] can now access allocations on devices[0]. ASSERT_SUCCESS(urUsmP2PEnablePeerAccessExp(devices[1], devices[0])); peerAccessEnabled = true; @@ -488,12 +435,13 @@ TEST_P(urMemoryMultiResidencyTest, p2pReadSucceedsAfterEnablingAccess) { [](uint8_t b) { return b == fillPattern; })); } -// Verify that revoking peer access from devices[1] to devices[0] prevents -// subsequent USM copies from devices[1]'s queue. A successful copy is first +// Verify that a USM copy from devices[1]'s queue succeeds even after peer +// access has been revoked. P2P access controls memory residency, not hardware +// data transfer: Level Zero can still move data between devices via the +// interconnect regardless of residency state. A successful copy is first // performed with P2P enabled to confirm the setup is correct; then P2P is -// disabled and the same copy is expected to fail with -// UR_RESULT_ERROR_INVALID_OPERATION. -TEST_P(urMemoryMultiResidencyTest, p2pReadFailsAfterRevokingAccess) { +// disabled and the same copy is expected to succeed. +TEST_P(urMemoryMultiResidencyTest, p2pCopySucceedsAfterRevokingAccess) { constexpr size_t allocSize = kAllocSize; uint64_t initialMemFreeSource = 0; @@ -534,7 +482,7 @@ TEST_P(urMemoryMultiResidencyTest, p2pReadFailsAfterRevokingAccess) { ASSERT_SUCCESS(urUsmP2PDisablePeerAccessExp(devices[1], devices[0])); peerAccessEnabled = false; - // Copy must now fail: devices[1] can no longer access srcPtr on devices[0]. + // Copy must still succeed: P2P controls residency, not hardware transfer. ur_result_t copyResult = urEnqueueUSMMemcpy(peerQueue, true, dstPtr, srcPtr, allocSize, 0, nullptr, nullptr); @@ -542,7 +490,7 @@ TEST_P(urMemoryMultiResidencyTest, p2pReadFailsAfterRevokingAccess) { urUSMFree(context, dstPtr); ASSERT_SUCCESS(urUSMFree(context, srcPtr)); - ASSERT_EQ(copyResult, UR_RESULT_ERROR_INVALID_OPERATION); + ASSERT_SUCCESS(copyResult); } // Verify that a USM allocation on devices[0] is NOT made resident on