Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 13 additions & 28 deletions sycl/source/detail/sycl_mem_obj_t.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,16 +165,6 @@ class SYCLMemObjT : public SYCLMemObjI {
has_property<property::image::use_host_ptr>();
}

bool canReadHostPtr(void *HostPtr, const size_t RequiredAlign) {
bool Aligned =
(reinterpret_cast<std::uintptr_t>(HostPtr) % RequiredAlign) == 0;
return Aligned || useHostPtr();
}

bool canReuseHostPtr(void *HostPtr, const size_t RequiredAlign) {
return !MHostPtrReadOnly && canReadHostPtr(HostPtr, RequiredAlign);
}

void handleHostData(void *HostPtr, const size_t RequiredAlign) {
MHostPtrProvided = true;
if (!MHostPtrReadOnly && HostPtr) {
Expand All @@ -184,22 +174,23 @@ class SYCLMemObjT : public SYCLMemObjI {
}

if (HostPtr) {
if (canReuseHostPtr(HostPtr, RequiredAlign)) {
MUserPtr = HostPtr;
} else if (canReadHostPtr(HostPtr, RequiredAlign)) {
MUserPtr = HostPtr;
// Pass the user pointer to UR unchanged. Each adapter is responsible
// for handling pointers it cannot use directly (misaligned, not
// importable, etc.) by allocating its own backing storage and copying.
MUserPtr = HostPtr;

// For a read-only host pointer we still need a writable backing store
// if the user later creates a write accessor. Defer the allocation
// until that happens. This is adapter-independent: the language rule
// is that we may not write through a const user pointer.
if (MHostPtrReadOnly) {
std::lock_guard<std::mutex> Lock(MCreateShadowCopyMtx);
MCreateShadowCopy = [this, RequiredAlign, HostPtr]() -> void {
setAlign(RequiredAlign);
MShadowCopy = allocateHostMem();
MUserPtr = MShadowCopy;
std::memcpy(MUserPtr, HostPtr, MSizeInBytes);
};
} else {
setAlign(RequiredAlign);
MShadowCopy = allocateHostMem();
MUserPtr = MShadowCopy;
std::memcpy(MUserPtr, HostPtr, MSizeInBytes);
}
}
}
Expand All @@ -218,22 +209,16 @@ class SYCLMemObjT : public SYCLMemObjI {
if (!MHostPtrReadOnly)
set_final_data_from_storage();

if (canReuseHostPtr(HostPtr.get(), RequiredAlign)) {
MUserPtr = HostPtr.get();
} else if (canReadHostPtr(HostPtr.get(), RequiredAlign)) {
MUserPtr = HostPtr.get();
MUserPtr = HostPtr.get();

if (MHostPtrReadOnly) {
std::lock_guard<std::mutex> Lock(MCreateShadowCopyMtx);
MCreateShadowCopy = [this, RequiredAlign, HostPtr]() -> void {
setAlign(RequiredAlign);
MShadowCopy = allocateHostMem();
MUserPtr = MShadowCopy;
std::memcpy(MUserPtr, HostPtr.get(), MSizeInBytes);
};
} else {
setAlign(RequiredAlign);
MShadowCopy = allocateHostMem();
MUserPtr = MShadowCopy;
std::memcpy(MUserPtr, HostPtr.get(), MSizeInBytes);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,13 @@ bool maybeImportUSM(ze_driver_handle_t hTranslatedDriver,
if (ret == UR_RESULT_SUCCESS && properties.type == ZE_MEMORY_TYPE_UNKNOWN) {
// Promote the host ptr to USM host memory
ZeUSMImport.doZeUSMImport(hTranslatedDriver, ptr, size);
return true;

// doZeUSMImport silently ignores driver-level failures (e.g., misaligned
// ptr), so re-query to confirm the import actually succeeded before
// reporting it to callers.
ret = getMemoryAttrs(hContext, ptr, nullptr, &properties);
return ret == UR_RESULT_SUCCESS &&
properties.type != ZE_MEMORY_TYPE_UNKNOWN;
}
return false;
}
Expand Down
12 changes: 12 additions & 0 deletions unified-runtime/source/adapters/opencl/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,18 @@
#include <set>
#include <unordered_map>

cl_command_queue ur_context_handle_t_::getSyncQueue() {
std::lock_guard<std::mutex> Lock(SyncQueueMtx);
if (!SyncQueue) {
cl_int Err;
SyncQueue =
clCreateCommandQueue(CLContext, Devices[0]->CLDevice, 0, &Err);
assert(Err == CL_SUCCESS);
(void)Err;
}
return SyncQueue;
}

ur_result_t
ur_context_handle_t_::makeWithNative(native_type Ctx, uint32_t DevCount,
const ur_device_handle_t *phDevices,
Expand Down
10 changes: 10 additions & 0 deletions unified-runtime/source/adapters/opencl/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "common/ur_ref_count.hpp"
#include "device.hpp"

#include <mutex>
#include <vector>

struct ur_context_handle_t_ : ur::opencl::handle_base {
Expand All @@ -24,6 +25,9 @@ struct ur_context_handle_t_ : ur::opencl::handle_base {
bool IsNativeHandleOwned = true;
ur::RefCount RefCount;

cl_command_queue SyncQueue = nullptr;
std::mutex SyncQueueMtx;

ur_context_handle_t_(const ur_context_handle_t_ &) = delete;
ur_context_handle_t_ &operator=(const ur_context_handle_t_ &) = delete;

Expand All @@ -39,7 +43,13 @@ struct ur_context_handle_t_ : ur::opencl::handle_base {
static ur_result_t makeWithNative(native_type Ctx, uint32_t DevCount,
const ur_device_handle_t *phDevices,
ur_context_handle_t &Context);

cl_command_queue getSyncQueue();

~ur_context_handle_t_() noexcept {
if (SyncQueue) {
clReleaseCommandQueue(SyncQueue);
}
// If we're reasonably sure this context is about to be destroyed we should
// clear the ext function pointer cache. This isn't foolproof sadly but it
// should drastically reduce the chances of the pathological case described
Expand Down
62 changes: 56 additions & 6 deletions unified-runtime/source/adapters/opencl/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,10 +336,53 @@ ur_result_t ur_mem_handle_t_::makeWithNative(native_type NativeMem,
return UR_RESULT_SUCCESS;
}

static bool canUseHostPtrDirectly(ur_context_handle_t hContext, void *HostPtr) {
if (!HostPtr)
return false;

cl_uint MaxAlignBits = 0;
bool AllUnifiedMem = true;

for (uint32_t I = 0; I < hContext->DeviceCount; ++I) {
cl_uint AlignBits = 0;
clGetDeviceInfo(hContext->Devices[I]->CLDevice,
CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(AlignBits),
&AlignBits, nullptr);
if (AlignBits > MaxAlignBits)
MaxAlignBits = AlignBits;

cl_bool Unified = CL_FALSE;
clGetDeviceInfo(hContext->Devices[I]->CLDevice,
CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(Unified), &Unified,
nullptr);
if (!Unified)
AllUnifiedMem = false;
}

size_t RequiredAlign = MaxAlignBits / 8;
bool IsAligned =
RequiredAlign == 0 ||
(reinterpret_cast<uintptr_t>(HostPtr) % RequiredAlign) == 0;

return IsAligned && AllUnifiedMem;
}

UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size,
const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) {
cl_int RetErr = CL_INVALID_OPERATION;

void *HostPtr = pProperties ? pProperties->pHost : nullptr;
cl_mem_flags CLFlags = convertURMemFlagsToCL(flags);

bool NeedsWriteBack = false;
if (HostPtr && (CLFlags & CL_MEM_USE_HOST_PTR)) {
if (!canUseHostPtrDirectly(hContext, HostPtr)) {
CLFlags = (CLFlags & ~CL_MEM_USE_HOST_PTR) | CL_MEM_COPY_HOST_PTR;
NeedsWriteBack = true;
}
}

if (pProperties) {
// TODO: need to check if all properties are supported by OpenCL RT and
// ignore unsupported
Expand Down Expand Up @@ -377,11 +420,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
PropertiesIntel.push_back(0);

try {
cl_mem Buffer = FuncPtr(
CLContext, PropertiesIntel.data(), static_cast<cl_mem_flags>(flags),
size, pProperties->pHost, static_cast<cl_int *>(&RetErr));
cl_mem Buffer =
FuncPtr(CLContext, PropertiesIntel.data(), CLFlags, size, HostPtr,
static_cast<cl_int *>(&RetErr));
CL_RETURN_ON_FAILURE(RetErr);
auto URMem = std::make_unique<ur_mem_handle_t_>(Buffer, hContext);
if (NeedsWriteBack) {
URMem->WriteBackPtr = HostPtr;
URMem->Size = size;
}
*phBuffer = URMem.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
Expand All @@ -392,13 +439,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
}
}

void *HostPtr = pProperties ? pProperties->pHost : nullptr;
try {
cl_mem Buffer =
clCreateBuffer(hContext->CLContext, static_cast<cl_mem_flags>(flags),
size, HostPtr, static_cast<cl_int *>(&RetErr));
clCreateBuffer(hContext->CLContext, CLFlags, size, HostPtr,
static_cast<cl_int *>(&RetErr));
CL_RETURN_ON_FAILURE(RetErr);
auto URMem = std::make_unique<ur_mem_handle_t_>(Buffer, hContext);
if (NeedsWriteBack) {
URMem->WriteBackPtr = HostPtr;
URMem->Size = size;
}
*phBuffer = URMem.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
Expand Down
8 changes: 8 additions & 0 deletions unified-runtime/source/adapters/opencl/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ struct ur_mem_handle_t_ : ur::opencl::handle_base {
bool IsNativeHandleOwned = true;
ur::RefCount RefCount;

void *WriteBackPtr = nullptr;
size_t Size = 0;

ur_mem_handle_t_(const ur_mem_handle_t_ &) = delete;
ur_mem_handle_t_ &operator=(const ur_mem_handle_t_ &) = delete;

Expand All @@ -31,6 +34,11 @@ struct ur_mem_handle_t_ : ur::opencl::handle_base {
}

~ur_mem_handle_t_() {
if (WriteBackPtr && IsNativeHandleOwned) {
cl_command_queue Q = Context->getSyncQueue();
clEnqueueReadBuffer(Q, CLMemory, CL_TRUE, 0, Size, WriteBackPtr, 0,
nullptr, nullptr);
}
urContextRelease(Context);
if (IsNativeHandleOwned) {
clReleaseMemObject(CLMemory);
Expand Down
Loading