Fix #107356: Cycles: improve oneAPI error handling
This commit is contained in:
parent
8775cf804e
commit
1dcc8e6ffa
@ -302,6 +302,12 @@ void OneapiDevice::mem_copy_to(device_memory &mem)
|
|||||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* After getting runtime errors we need to avoid performing oneAPI runtime operations
|
||||||
|
* because the associated GPU context may be in an invalid state at this point. */
|
||||||
|
if (have_error()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (mem.type == MEM_GLOBAL) {
|
if (mem.type == MEM_GLOBAL) {
|
||||||
global_free(mem);
|
global_free(mem);
|
||||||
global_alloc(mem);
|
global_alloc(mem);
|
||||||
@ -334,6 +340,12 @@ void OneapiDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t
|
|||||||
<< " data " << size << " bytes";
|
<< " data " << size << " bytes";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* After getting runtime errors we need to avoid performing oneAPI runtime operations
|
||||||
|
* because the associated GPU context may be in an invalid state at this point. */
|
||||||
|
if (have_error()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
assert(device_queue_);
|
assert(device_queue_);
|
||||||
|
|
||||||
assert(size != 0);
|
assert(size != 0);
|
||||||
@ -357,6 +369,12 @@ void OneapiDevice::mem_zero(device_memory &mem)
|
|||||||
<< string_human_readable_size(mem.memory_size()) << ")\n";
|
<< string_human_readable_size(mem.memory_size()) << ")\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* After getting runtime errors we need to avoid performing oneAPI runtime operations
|
||||||
|
* because the associated GPU context may be in an invalid state at this point. */
|
||||||
|
if (have_error()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!mem.device_pointer) {
|
if (!mem.device_pointer) {
|
||||||
mem_alloc(mem);
|
mem_alloc(mem);
|
||||||
}
|
}
|
||||||
@ -602,33 +620,33 @@ bool OneapiDevice::usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t n
|
|||||||
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
|
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
|
||||||
OneapiDevice::check_usm(queue_, dest, true);
|
OneapiDevice::check_usm(queue_, dest, true);
|
||||||
OneapiDevice::check_usm(queue_, src, true);
|
OneapiDevice::check_usm(queue_, src, true);
|
||||||
sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
|
|
||||||
# ifdef WITH_CYCLES_DEBUG
|
|
||||||
try {
|
try {
|
||||||
|
sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
|
||||||
|
# ifdef WITH_CYCLES_DEBUG
|
||||||
/* NOTE(@nsirgien) Waiting on memory operation may give more precise error
|
/* NOTE(@nsirgien) Waiting on memory operation may give more precise error
|
||||||
* messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
|
* messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
|
||||||
*/
|
*/
|
||||||
mem_event.wait_and_throw();
|
mem_event.wait_and_throw();
|
||||||
return true;
|
return true;
|
||||||
|
# else
|
||||||
|
sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
|
||||||
|
sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
|
||||||
|
bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
|
||||||
|
src_type == sycl::usm::alloc::device;
|
||||||
|
bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
|
||||||
|
src_type == sycl::usm::alloc::unknown;
|
||||||
|
/* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host
|
||||||
|
* may not wait until the end of the transfer before using the memory.
|
||||||
|
*/
|
||||||
|
if (from_device_to_host || host_or_device_memop_with_offset)
|
||||||
|
mem_event.wait();
|
||||||
|
return true;
|
||||||
|
# endif
|
||||||
}
|
}
|
||||||
catch (sycl::exception const &e) {
|
catch (sycl::exception const &e) {
|
||||||
oneapi_error_string_ = e.what();
|
oneapi_error_string_ = e.what();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
# else
|
|
||||||
sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
|
|
||||||
sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
|
|
||||||
bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
|
|
||||||
src_type == sycl::usm::alloc::device;
|
|
||||||
bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
|
|
||||||
src_type == sycl::usm::alloc::unknown;
|
|
||||||
/* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host
|
|
||||||
* may not wait until the end of the transfer before using the memory.
|
|
||||||
*/
|
|
||||||
if (from_device_to_host || host_or_device_memop_with_offset)
|
|
||||||
mem_event.wait();
|
|
||||||
return true;
|
|
||||||
# endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OneapiDevice::usm_memset(SyclQueue *queue_,
|
bool OneapiDevice::usm_memset(SyclQueue *queue_,
|
||||||
@ -639,23 +657,22 @@ bool OneapiDevice::usm_memset(SyclQueue *queue_,
|
|||||||
assert(queue_);
|
assert(queue_);
|
||||||
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
|
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
|
||||||
OneapiDevice::check_usm(queue_, usm_ptr, true);
|
OneapiDevice::check_usm(queue_, usm_ptr, true);
|
||||||
sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
|
|
||||||
# ifdef WITH_CYCLES_DEBUG
|
|
||||||
try {
|
try {
|
||||||
|
sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
|
||||||
|
# ifdef WITH_CYCLES_DEBUG
|
||||||
/* NOTE(@nsirgien) Waiting on memory operation may give more precise error
|
/* NOTE(@nsirgien) Waiting on memory operation may give more precise error
|
||||||
* messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
|
* messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
|
||||||
*/
|
*/
|
||||||
mem_event.wait_and_throw();
|
mem_event.wait_and_throw();
|
||||||
|
# else
|
||||||
|
(void)mem_event;
|
||||||
|
# endif
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
catch (sycl::exception const &e) {
|
catch (sycl::exception const &e) {
|
||||||
oneapi_error_string_ = e.what();
|
oneapi_error_string_ = e.what();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
# else
|
|
||||||
(void)mem_event;
|
|
||||||
return true;
|
|
||||||
# endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OneapiDevice::queue_synchronize(SyclQueue *queue_)
|
bool OneapiDevice::queue_synchronize(SyclQueue *queue_)
|
||||||
|
Loading…
Reference in New Issue
Block a user