forked from bartvdbraak/blender
Cycles: add hardware raytracing support to oneAPI device
Updated Embree 4 library with GPU support is required for it to be compiled - compatiblity with Embree 3 and Embree 4 without GPU support is maintained. Enabling hardware raytracing is an opt-in user setting for now. Pull Request: https://projects.blender.org/blender/blender/pulls/106266
This commit is contained in:
parent
887022257d
commit
3f8c995109
@ -281,6 +281,9 @@ endif()
|
|||||||
|
|
||||||
if(WITH_CYCLES_EMBREE)
|
if(WITH_CYCLES_EMBREE)
|
||||||
add_definitions(-DWITH_EMBREE)
|
add_definitions(-DWITH_EMBREE)
|
||||||
|
if(WITH_CYCLES_DEVICE_ONEAPI AND EMBREE_SYCL_SUPPORT)
|
||||||
|
add_definitions(-DWITH_EMBREE_GPU)
|
||||||
|
endif()
|
||||||
add_definitions(-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION})
|
add_definitions(-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION})
|
||||||
include_directories(
|
include_directories(
|
||||||
SYSTEM
|
SYSTEM
|
||||||
|
@ -1544,6 +1544,13 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
|||||||
default=False,
|
default=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
use_oneapirt: BoolProperty(
|
||||||
|
name="Embree on GPU (Experimental)",
|
||||||
|
description="Embree GPU execution will allow to use hardware ray tracing on Intel GPUs, which will provide better performance. "
|
||||||
|
"However this support is experimental and some scenes may render incorrectly",
|
||||||
|
default=False,
|
||||||
|
)
|
||||||
|
|
||||||
kernel_optimization_level: EnumProperty(
|
kernel_optimization_level: EnumProperty(
|
||||||
name="Kernel Optimization",
|
name="Kernel Optimization",
|
||||||
description="Kernels can be optimized based on scene content. Optimized kernels are requested at the start of a render. "
|
description="Kernels can be optimized based on scene content. Optimized kernels are requested at the start of a render. "
|
||||||
@ -1763,6 +1770,11 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
|||||||
col.prop(self, "kernel_optimization_level")
|
col.prop(self, "kernel_optimization_level")
|
||||||
col.prop(self, "use_metalrt")
|
col.prop(self, "use_metalrt")
|
||||||
|
|
||||||
|
if compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
|
||||||
|
row = layout.row()
|
||||||
|
row.use_property_split = True
|
||||||
|
row.prop(self, "use_oneapirt")
|
||||||
|
|
||||||
def draw(self, context):
|
def draw(self, context):
|
||||||
self.draw_impl(self.layout, context)
|
self.draw_impl(self.layout, context)
|
||||||
|
|
||||||
|
@ -112,9 +112,26 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences,
|
|||||||
device.has_peer_memory = false;
|
device.has_peer_memory = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (get_boolean(cpreferences, "use_metalrt")) {
|
bool accumulated_use_hardware_raytracing = false;
|
||||||
device.use_metalrt = true;
|
foreach (
|
||||||
|
DeviceInfo &info,
|
||||||
|
(device.multi_devices.size() != 0 ? device.multi_devices : vector<DeviceInfo>({device}))) {
|
||||||
|
if (info.type == DEVICE_METAL && !get_boolean(cpreferences, "use_metalrt")) {
|
||||||
|
info.use_hardware_raytracing = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (info.type == DEVICE_ONEAPI && !get_boolean(cpreferences, "use_oneapirt")) {
|
||||||
|
info.use_hardware_raytracing = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* There is an accumulative logic here, because Multidevices are support only for
|
||||||
|
* the same backend + CPU in Blender right now, and both oneAPI and Metal have a
|
||||||
|
* global boolean backend setting (see above) for enabling/disabling HW RT,
|
||||||
|
* so all subdevices in the multidevice should enable (or disable) HW RT
|
||||||
|
* simultaneously (and CPU device are expected to ignore "use_hardware_raytracing" setting) */
|
||||||
|
accumulated_use_hardware_raytracing |= info.use_hardware_raytracing;
|
||||||
}
|
}
|
||||||
|
device.use_hardware_raytracing = accumulated_use_hardware_raytracing;
|
||||||
|
|
||||||
if (preview) {
|
if (preview) {
|
||||||
/* Disable specialization for preview renders. */
|
/* Disable specialization for preview renders. */
|
||||||
|
@ -1034,6 +1034,14 @@ void *CCL_python_module_init()
|
|||||||
Py_INCREF(Py_False);
|
Py_INCREF(Py_False);
|
||||||
#endif /* WITH_EMBREE */
|
#endif /* WITH_EMBREE */
|
||||||
|
|
||||||
|
#ifdef WITH_EMBREE_GPU
|
||||||
|
PyModule_AddObject(mod, "with_embree_gpu", Py_True);
|
||||||
|
Py_INCREF(Py_True);
|
||||||
|
#else /* WITH_EMBREE_GPU */
|
||||||
|
PyModule_AddObject(mod, "with_embree_gpu", Py_False);
|
||||||
|
Py_INCREF(Py_False);
|
||||||
|
#endif /* WITH_EMBREE_GPU */
|
||||||
|
|
||||||
if (ccl::openimagedenoise_supported()) {
|
if (ccl::openimagedenoise_supported()) {
|
||||||
PyModule_AddObject(mod, "with_openimagedenoise", Py_True);
|
PyModule_AddObject(mod, "with_openimagedenoise", Py_True);
|
||||||
Py_INCREF(Py_True);
|
Py_INCREF(Py_True);
|
||||||
|
@ -354,7 +354,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
|||||||
info.has_guiding = true;
|
info.has_guiding = true;
|
||||||
info.has_profiling = true;
|
info.has_profiling = true;
|
||||||
info.has_peer_memory = false;
|
info.has_peer_memory = false;
|
||||||
info.use_metalrt = false;
|
info.use_hardware_raytracing = false;
|
||||||
info.denoisers = DENOISER_ALL;
|
info.denoisers = DENOISER_ALL;
|
||||||
|
|
||||||
foreach (const DeviceInfo &device, subdevices) {
|
foreach (const DeviceInfo &device, subdevices) {
|
||||||
@ -403,7 +403,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
|||||||
info.has_guiding &= device.has_guiding;
|
info.has_guiding &= device.has_guiding;
|
||||||
info.has_profiling &= device.has_profiling;
|
info.has_profiling &= device.has_profiling;
|
||||||
info.has_peer_memory |= device.has_peer_memory;
|
info.has_peer_memory |= device.has_peer_memory;
|
||||||
info.use_metalrt |= device.use_metalrt;
|
info.use_hardware_raytracing |= device.use_hardware_raytracing;
|
||||||
info.denoisers &= device.denoisers;
|
info.denoisers &= device.denoisers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,15 +71,16 @@ class DeviceInfo {
|
|||||||
string description;
|
string description;
|
||||||
string id; /* used for user preferences, should stay fixed with changing hardware config */
|
string id; /* used for user preferences, should stay fixed with changing hardware config */
|
||||||
int num;
|
int num;
|
||||||
bool display_device; /* GPU is used as a display device. */
|
bool display_device; /* GPU is used as a display device. */
|
||||||
bool has_nanovdb; /* Support NanoVDB volumes. */
|
bool has_nanovdb; /* Support NanoVDB volumes. */
|
||||||
bool has_light_tree; /* Support light tree. */
|
bool has_light_tree; /* Support light tree. */
|
||||||
bool has_osl; /* Support Open Shading Language. */
|
bool has_osl; /* Support Open Shading Language. */
|
||||||
bool has_guiding; /* Support path guiding. */
|
bool has_guiding; /* Support path guiding. */
|
||||||
bool has_profiling; /* Supports runtime collection of profiling info. */
|
bool has_profiling; /* Supports runtime collection of profiling info. */
|
||||||
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
|
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
|
||||||
bool has_gpu_queue; /* Device supports GPU queue. */
|
bool has_gpu_queue; /* Device supports GPU queue. */
|
||||||
bool use_metalrt; /* Use MetalRT to accelerate ray queries (Metal only). */
|
bool use_hardware_raytracing; /* Use hardware ray tracing to accelerate ray queries in a backend.
|
||||||
|
*/
|
||||||
KernelOptimizationLevel kernel_optimization_level; /* Optimization level applied to path tracing
|
KernelOptimizationLevel kernel_optimization_level; /* Optimization level applied to path tracing
|
||||||
* kernels (Metal only). */
|
* kernels (Metal only). */
|
||||||
DenoiserTypeMask denoisers; /* Supported denoiser types. */
|
DenoiserTypeMask denoisers; /* Supported denoiser types. */
|
||||||
@ -101,7 +102,7 @@ class DeviceInfo {
|
|||||||
has_profiling = false;
|
has_profiling = false;
|
||||||
has_peer_memory = false;
|
has_peer_memory = false;
|
||||||
has_gpu_queue = false;
|
has_gpu_queue = false;
|
||||||
use_metalrt = false;
|
use_hardware_raytracing = false;
|
||||||
denoisers = DENOISER_NONE;
|
denoisers = DENOISER_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,7 +3,9 @@
|
|||||||
|
|
||||||
#include "device/kernel.h"
|
#include "device/kernel.h"
|
||||||
|
|
||||||
#include "util/log.h"
|
#ifndef __KERNEL_ONEAPI__
|
||||||
|
# include "util/log.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
@ -153,10 +155,13 @@ const char *device_kernel_as_string(DeviceKernel kernel)
|
|||||||
case DEVICE_KERNEL_NUM:
|
case DEVICE_KERNEL_NUM:
|
||||||
break;
|
break;
|
||||||
};
|
};
|
||||||
|
#ifndef __KERNEL_ONEAPI__
|
||||||
LOG(FATAL) << "Unhandled kernel " << static_cast<int>(kernel) << ", should never happen.";
|
LOG(FATAL) << "Unhandled kernel " << static_cast<int>(kernel) << ", should never happen.";
|
||||||
|
#endif
|
||||||
return "UNKNOWN";
|
return "UNKNOWN";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef __KERNEL_ONEAPI__
|
||||||
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel)
|
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel)
|
||||||
{
|
{
|
||||||
os << device_kernel_as_string(kernel);
|
os << device_kernel_as_string(kernel);
|
||||||
@ -178,5 +183,6 @@ string device_kernel_mask_as_string(DeviceKernelMask mask)
|
|||||||
|
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@ -3,11 +3,13 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "kernel/types.h"
|
#ifndef __KERNEL_ONEAPI__
|
||||||
|
# include "kernel/types.h"
|
||||||
|
|
||||||
#include "util/string.h"
|
# include "util/string.h"
|
||||||
|
|
||||||
#include <ostream> // NOLINT
|
# include <ostream> // NOLINT
|
||||||
|
#endif
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
@ -15,9 +17,12 @@ bool device_kernel_has_shading(DeviceKernel kernel);
|
|||||||
bool device_kernel_has_intersection(DeviceKernel kernel);
|
bool device_kernel_has_intersection(DeviceKernel kernel);
|
||||||
|
|
||||||
const char *device_kernel_as_string(DeviceKernel kernel);
|
const char *device_kernel_as_string(DeviceKernel kernel);
|
||||||
|
|
||||||
|
#ifndef __KERNEL_ONEAPI__
|
||||||
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);
|
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);
|
||||||
|
|
||||||
typedef uint64_t DeviceKernelMask;
|
typedef uint64_t DeviceKernelMask;
|
||||||
string device_kernel_mask_as_string(DeviceKernelMask mask);
|
string device_kernel_mask_as_string(DeviceKernelMask mask);
|
||||||
|
#endif
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@ -100,12 +100,12 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
|||||||
}
|
}
|
||||||
case METAL_GPU_AMD: {
|
case METAL_GPU_AMD: {
|
||||||
max_threads_per_threadgroup = 128;
|
max_threads_per_threadgroup = 128;
|
||||||
use_metalrt = info.use_metalrt;
|
use_metalrt = info.use_hardware_raytracing;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case METAL_GPU_APPLE: {
|
case METAL_GPU_APPLE: {
|
||||||
max_threads_per_threadgroup = 512;
|
max_threads_per_threadgroup = 512;
|
||||||
use_metalrt = info.use_metalrt;
|
use_metalrt = info.use_hardware_raytracing;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -87,7 +87,8 @@ Device *device_oneapi_create(const DeviceInfo &info, Stats &stats, Profiler &pro
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef WITH_ONEAPI
|
#ifdef WITH_ONEAPI
|
||||||
static void device_iterator_cb(const char *id, const char *name, int num, void *user_ptr)
|
static void device_iterator_cb(
|
||||||
|
const char *id, const char *name, int num, bool hwrt_support, void *user_ptr)
|
||||||
{
|
{
|
||||||
vector<DeviceInfo> *devices = (vector<DeviceInfo> *)user_ptr;
|
vector<DeviceInfo> *devices = (vector<DeviceInfo> *)user_ptr;
|
||||||
|
|
||||||
@ -112,6 +113,12 @@ static void device_iterator_cb(const char *id, const char *name, int num, void *
|
|||||||
/* NOTE(@nsirgien): Seems not possible to know from SYCL/oneAPI or Level0. */
|
/* NOTE(@nsirgien): Seems not possible to know from SYCL/oneAPI or Level0. */
|
||||||
info.display_device = false;
|
info.display_device = false;
|
||||||
|
|
||||||
|
# if WITH_EMBREE_GPU
|
||||||
|
info.use_hardware_raytracing = hwrt_support;
|
||||||
|
# else
|
||||||
|
info.use_hardware_raytracing = false;
|
||||||
|
# endif
|
||||||
|
|
||||||
devices->push_back(info);
|
devices->push_back(info);
|
||||||
VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
|
VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,19 @@
|
|||||||
# include "util/debug.h"
|
# include "util/debug.h"
|
||||||
# include "util/log.h"
|
# include "util/log.h"
|
||||||
|
|
||||||
|
# ifdef WITH_EMBREE_GPU
|
||||||
|
# include "bvh/embree.h"
|
||||||
|
# endif
|
||||||
|
|
||||||
# include "kernel/device/oneapi/globals.h"
|
# include "kernel/device/oneapi/globals.h"
|
||||||
|
# include "kernel/device/oneapi/kernel.h"
|
||||||
|
|
||||||
|
# if defined(WITH_EMBREE_GPU) && defined(EMBREE_SYCL_SUPPORT) && !defined(SYCL_LANGUAGE_VERSION)
|
||||||
|
/* These declarations are missing from embree headers when compiling from a compiler that doesn't
|
||||||
|
* support SYCL. */
|
||||||
|
extern "C" RTCDevice rtcNewSYCLDevice(sycl::context context, const char *config);
|
||||||
|
extern "C" bool rtcIsSYCLDeviceSupported(const sycl::device sycl_device);
|
||||||
|
# endif
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
@ -22,16 +34,29 @@ static void queue_error_cb(const char *message, void *user_ptr)
|
|||||||
OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||||
: Device(info, stats, profiler),
|
: Device(info, stats, profiler),
|
||||||
device_queue_(nullptr),
|
device_queue_(nullptr),
|
||||||
|
# ifdef WITH_EMBREE_GPU
|
||||||
|
embree_device(nullptr),
|
||||||
|
embree_scene(nullptr),
|
||||||
|
# endif
|
||||||
texture_info_(this, "texture_info", MEM_GLOBAL),
|
texture_info_(this, "texture_info", MEM_GLOBAL),
|
||||||
kg_memory_(nullptr),
|
kg_memory_(nullptr),
|
||||||
kg_memory_device_(nullptr),
|
kg_memory_device_(nullptr),
|
||||||
kg_memory_size_(0)
|
kg_memory_size_(0)
|
||||||
{
|
{
|
||||||
need_texture_info_ = false;
|
need_texture_info_ = false;
|
||||||
|
use_hardware_raytracing = info.use_hardware_raytracing;
|
||||||
|
|
||||||
oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
|
oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
|
||||||
|
|
||||||
bool is_finished_ok = create_queue(device_queue_, info.num);
|
bool is_finished_ok = create_queue(device_queue_,
|
||||||
|
info.num,
|
||||||
|
# ifdef WITH_EMBREE_GPU
|
||||||
|
use_hardware_raytracing ? &embree_device : nullptr
|
||||||
|
# else
|
||||||
|
nullptr
|
||||||
|
# endif
|
||||||
|
);
|
||||||
|
|
||||||
if (is_finished_ok == false) {
|
if (is_finished_ok == false) {
|
||||||
set_error("oneAPI queue initialization error: got runtime exception \"" +
|
set_error("oneAPI queue initialization error: got runtime exception \"" +
|
||||||
oneapi_error_string_ + "\"");
|
oneapi_error_string_ + "\"");
|
||||||
@ -42,6 +67,16 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
|
|||||||
assert(device_queue_);
|
assert(device_queue_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ifdef WITH_EMBREE_GPU
|
||||||
|
use_hardware_raytracing = use_hardware_raytracing && (embree_device != nullptr);
|
||||||
|
# else
|
||||||
|
use_hardware_raytracing = false;
|
||||||
|
# endif
|
||||||
|
|
||||||
|
if (use_hardware_raytracing) {
|
||||||
|
VLOG_INFO << "oneAPI will use hardware ray tracing for intersection acceleration.";
|
||||||
|
}
|
||||||
|
|
||||||
size_t globals_segment_size;
|
size_t globals_segment_size;
|
||||||
is_finished_ok = kernel_globals_size(globals_segment_size);
|
is_finished_ok = kernel_globals_size(globals_segment_size);
|
||||||
if (is_finished_ok == false) {
|
if (is_finished_ok == false) {
|
||||||
@ -64,6 +99,11 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
|
|||||||
|
|
||||||
OneapiDevice::~OneapiDevice()
|
OneapiDevice::~OneapiDevice()
|
||||||
{
|
{
|
||||||
|
# ifdef WITH_EMBREE_GPU
|
||||||
|
if (embree_device)
|
||||||
|
rtcReleaseDevice(embree_device);
|
||||||
|
# endif
|
||||||
|
|
||||||
texture_info_.free();
|
texture_info_.free();
|
||||||
usm_free(device_queue_, kg_memory_);
|
usm_free(device_queue_, kg_memory_);
|
||||||
usm_free(device_queue_, kg_memory_device_);
|
usm_free(device_queue_, kg_memory_device_);
|
||||||
@ -82,13 +122,36 @@ bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
|
|||||||
|
|
||||||
BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const
|
BVHLayoutMask OneapiDevice::get_bvh_layout_mask() const
|
||||||
{
|
{
|
||||||
return BVH_LAYOUT_BVH2;
|
return use_hardware_raytracing ? BVH_LAYOUT_EMBREE : BVH_LAYOUT_BVH2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ifdef WITH_EMBREE_GPU
|
||||||
|
void OneapiDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||||
|
{
|
||||||
|
if (embree_device && bvh->params.bvh_layout == BVH_LAYOUT_EMBREE) {
|
||||||
|
BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
|
||||||
|
if (refit) {
|
||||||
|
bvh_embree->refit(progress);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
bvh_embree->build(progress, &stats, embree_device);
|
||||||
|
}
|
||||||
|
if (bvh->params.top_level) {
|
||||||
|
embree_scene = bvh_embree->scene;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Device::build_bvh(bvh, progress, refit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
bool OneapiDevice::load_kernels(const uint requested_features)
|
bool OneapiDevice::load_kernels(const uint requested_features)
|
||||||
{
|
{
|
||||||
assert(device_queue_);
|
assert(device_queue_);
|
||||||
|
|
||||||
|
kernel_features = requested_features;
|
||||||
|
|
||||||
bool is_finished_ok = oneapi_run_test_kernel(device_queue_);
|
bool is_finished_ok = oneapi_run_test_kernel(device_queue_);
|
||||||
if (is_finished_ok == false) {
|
if (is_finished_ok == false) {
|
||||||
set_error("oneAPI test kernel execution: got a runtime exception \"" + oneapi_error_string_ +
|
set_error("oneAPI test kernel execution: got a runtime exception \"" + oneapi_error_string_ +
|
||||||
@ -327,6 +390,16 @@ void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
|
|||||||
<< string_human_readable_number(size) << " bytes. ("
|
<< string_human_readable_number(size) << " bytes. ("
|
||||||
<< string_human_readable_size(size) << ")";
|
<< string_human_readable_size(size) << ")";
|
||||||
|
|
||||||
|
# ifdef WITH_EMBREE_GPU
|
||||||
|
if (strcmp(name, "data") == 0) {
|
||||||
|
assert(size <= sizeof(KernelData));
|
||||||
|
|
||||||
|
/* Update scene handle(since it is different for each device on multi devices) */
|
||||||
|
KernelData *const data = (KernelData *)host;
|
||||||
|
data->device_bvh = embree_scene;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
ConstMemMap::iterator i = const_mem_map_.find(name);
|
ConstMemMap::iterator i = const_mem_map_.find(name);
|
||||||
device_vector<uchar> *data;
|
device_vector<uchar> *data;
|
||||||
|
|
||||||
@ -446,7 +519,9 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_
|
|||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
|
bool OneapiDevice::create_queue(SyclQueue *&external_queue,
|
||||||
|
int device_index,
|
||||||
|
void *embree_device_pointer)
|
||||||
{
|
{
|
||||||
bool finished_correct = true;
|
bool finished_correct = true;
|
||||||
try {
|
try {
|
||||||
@ -457,6 +532,11 @@ bool OneapiDevice::create_queue(SyclQueue *&external_queue, int device_index)
|
|||||||
sycl::queue *created_queue = new sycl::queue(devices[device_index],
|
sycl::queue *created_queue = new sycl::queue(devices[device_index],
|
||||||
sycl::property::queue::in_order());
|
sycl::property::queue::in_order());
|
||||||
external_queue = reinterpret_cast<SyclQueue *>(created_queue);
|
external_queue = reinterpret_cast<SyclQueue *>(created_queue);
|
||||||
|
# ifdef WITH_EMBREE_GPU
|
||||||
|
if (embree_device_pointer) {
|
||||||
|
*((RTCDevice *)embree_device_pointer) = rtcNewSYCLDevice(created_queue->get_context(), "");
|
||||||
|
}
|
||||||
|
# endif
|
||||||
}
|
}
|
||||||
catch (sycl::exception const &e) {
|
catch (sycl::exception const &e) {
|
||||||
finished_correct = false;
|
finished_correct = false;
|
||||||
@ -625,7 +705,8 @@ bool OneapiDevice::enqueue_kernel(KernelContext *kernel_context,
|
|||||||
size_t global_size,
|
size_t global_size,
|
||||||
void **args)
|
void **args)
|
||||||
{
|
{
|
||||||
return oneapi_enqueue_kernel(kernel_context, kernel, global_size, args);
|
return oneapi_enqueue_kernel(
|
||||||
|
kernel_context, kernel, global_size, kernel_features, use_hardware_raytracing, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
|
/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
|
||||||
@ -830,12 +911,17 @@ void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_p
|
|||||||
std::string name = device.get_info<sycl::info::device::name>();
|
std::string name = device.get_info<sycl::info::device::name>();
|
||||||
# else
|
# else
|
||||||
std::string name = "SYCL Host Task (Debug)";
|
std::string name = "SYCL Host Task (Debug)";
|
||||||
|
# endif
|
||||||
|
# ifdef WITH_EMBREE_GPU
|
||||||
|
bool hwrt_support = rtcIsSYCLDeviceSupported(device);
|
||||||
|
# else
|
||||||
|
bool hwrt_support = false;
|
||||||
# endif
|
# endif
|
||||||
std::string id = "ONEAPI_" + platform_name + "_" + name;
|
std::string id = "ONEAPI_" + platform_name + "_" + name;
|
||||||
if (device.has(sycl::aspect::ext_intel_pci_address)) {
|
if (device.has(sycl::aspect::ext_intel_pci_address)) {
|
||||||
id.append("_" + device.get_info<sycl::ext::intel::info::device::pci_address>());
|
id.append("_" + device.get_info<sycl::ext::intel::info::device::pci_address>());
|
||||||
}
|
}
|
||||||
(cb)(id.c_str(), name.c_str(), num, user_ptr);
|
(cb)(id.c_str(), name.c_str(), num, hwrt_support, user_ptr);
|
||||||
num++;
|
num++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,15 +16,16 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
class DeviceQueue;
|
class DeviceQueue;
|
||||||
|
|
||||||
typedef void (*OneAPIDeviceIteratorCallback)(const char *id,
|
typedef void (*OneAPIDeviceIteratorCallback)(
|
||||||
const char *name,
|
const char *id, const char *name, int num, bool hwrt_support, void *user_ptr);
|
||||||
int num,
|
|
||||||
void *user_ptr);
|
|
||||||
|
|
||||||
class OneapiDevice : public Device {
|
class OneapiDevice : public Device {
|
||||||
private:
|
private:
|
||||||
SyclQueue *device_queue_;
|
SyclQueue *device_queue_;
|
||||||
|
# if WITH_EMBREE_GPU
|
||||||
|
RTCDevice embree_device;
|
||||||
|
RTCScene embree_scene;
|
||||||
|
# endif
|
||||||
using ConstMemMap = map<string, device_vector<uchar> *>;
|
using ConstMemMap = map<string, device_vector<uchar> *>;
|
||||||
ConstMemMap const_mem_map_;
|
ConstMemMap const_mem_map_;
|
||||||
device_vector<TextureInfo> texture_info_;
|
device_vector<TextureInfo> texture_info_;
|
||||||
@ -34,6 +35,8 @@ class OneapiDevice : public Device {
|
|||||||
size_t kg_memory_size_ = (size_t)0;
|
size_t kg_memory_size_ = (size_t)0;
|
||||||
size_t max_memory_on_device_ = (size_t)0;
|
size_t max_memory_on_device_ = (size_t)0;
|
||||||
std::string oneapi_error_string_;
|
std::string oneapi_error_string_;
|
||||||
|
bool use_hardware_raytracing = false;
|
||||||
|
unsigned int kernel_features = 0;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||||
@ -41,10 +44,12 @@ class OneapiDevice : public Device {
|
|||||||
OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||||
|
|
||||||
virtual ~OneapiDevice();
|
virtual ~OneapiDevice();
|
||||||
|
# if WITH_EMBREE_GPU
|
||||||
|
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||||
|
# endif
|
||||||
bool check_peer_access(Device *peer_device) override;
|
bool check_peer_access(Device *peer_device) override;
|
||||||
|
|
||||||
bool load_kernels(const uint requested_features) override;
|
bool load_kernels(const uint kernel_features) override;
|
||||||
|
|
||||||
void load_texture_info();
|
void load_texture_info();
|
||||||
|
|
||||||
@ -114,7 +119,7 @@ class OneapiDevice : public Device {
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
void check_usm(SyclQueue *queue, const void *usm_ptr, bool allow_host);
|
void check_usm(SyclQueue *queue, const void *usm_ptr, bool allow_host);
|
||||||
bool create_queue(SyclQueue *&external_queue, int device_index);
|
bool create_queue(SyclQueue *&external_queue, int device_index, void *embree_device);
|
||||||
void free_queue(SyclQueue *queue);
|
void free_queue(SyclQueue *queue);
|
||||||
void *usm_aligned_alloc_host(SyclQueue *queue, size_t memory_size, size_t alignment);
|
void *usm_aligned_alloc_host(SyclQueue *queue, size_t memory_size, size_t alignment);
|
||||||
void *usm_alloc_device(SyclQueue *queue, size_t memory_size);
|
void *usm_alloc_device(SyclQueue *queue, size_t memory_size);
|
||||||
|
@ -96,10 +96,13 @@ set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS
|
|||||||
device/oneapi/compat.h
|
device/oneapi/compat.h
|
||||||
device/oneapi/context_begin.h
|
device/oneapi/context_begin.h
|
||||||
device/oneapi/context_end.h
|
device/oneapi/context_end.h
|
||||||
|
device/oneapi/context_intersect_begin.h
|
||||||
|
device/oneapi/context_intersect_end.h
|
||||||
device/oneapi/globals.h
|
device/oneapi/globals.h
|
||||||
device/oneapi/image.h
|
device/oneapi/image.h
|
||||||
device/oneapi/kernel.h
|
device/oneapi/kernel.h
|
||||||
device/oneapi/kernel_templates.h
|
device/oneapi/kernel_templates.h
|
||||||
|
device/cpu/bvh.h
|
||||||
)
|
)
|
||||||
|
|
||||||
set(SRC_KERNEL_CLOSURE_HEADERS
|
set(SRC_KERNEL_CLOSURE_HEADERS
|
||||||
@ -764,7 +767,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
|
|||||||
|
|
||||||
# Set defaults for spir64 and spir64_gen options
|
# Set defaults for spir64 and spir64_gen options
|
||||||
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
|
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
|
||||||
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
|
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-regular-grf-kernel integrator_intersect -ze-opt-large-grf-kernel shade -ze-opt-no-local-to-generic'")
|
||||||
endif()
|
endif()
|
||||||
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
|
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
|
||||||
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target")
|
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target")
|
||||||
@ -798,6 +801,59 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
|
|||||||
-I"${NANOVDB_INCLUDE_DIR}")
|
-I"${NANOVDB_INCLUDE_DIR}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(WITH_CYCLES_EMBREE AND EMBREE_SYCL_SUPPORT)
|
||||||
|
list(APPEND sycl_compiler_flags
|
||||||
|
-DWITH_EMBREE
|
||||||
|
-DWITH_EMBREE_GPU
|
||||||
|
-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION}
|
||||||
|
-I"${EMBREE_INCLUDE_DIRS}")
|
||||||
|
|
||||||
|
if(WIN32)
|
||||||
|
list(APPEND sycl_compiler_flags
|
||||||
|
-ladvapi32.lib
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(next_library_mode "")
|
||||||
|
foreach(library ${EMBREE_LIBRARIES})
|
||||||
|
string(TOLOWER "${library}" library_lower)
|
||||||
|
if(("${library_lower}" STREQUAL "optimized") OR
|
||||||
|
("${library_lower}" STREQUAL "debug"))
|
||||||
|
set(next_library_mode "${library_lower}")
|
||||||
|
else()
|
||||||
|
if(next_library_mode STREQUAL "")
|
||||||
|
list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
|
||||||
|
list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
|
||||||
|
else()
|
||||||
|
list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
|
||||||
|
endif()
|
||||||
|
set(next_library_mode "")
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
foreach(library ${TBB_LIBRARIES})
|
||||||
|
string(TOLOWER "${library}" library_lower)
|
||||||
|
if(("${library_lower}" STREQUAL "optimized") OR
|
||||||
|
("${library_lower}" STREQUAL "debug"))
|
||||||
|
set(next_library_mode "${library_lower}")
|
||||||
|
else()
|
||||||
|
if(next_library_mode STREQUAL "")
|
||||||
|
list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
|
||||||
|
list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
|
||||||
|
else()
|
||||||
|
list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
|
||||||
|
endif()
|
||||||
|
set(next_library_mode "")
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
list(APPEND sycl_compiler_flags
|
||||||
|
"$<$<CONFIG:Release>:${EMBREE_TBB_LIBRARIES_optimized}>"
|
||||||
|
"$<$<CONFIG:RelWithDebInfo>:${EMBREE_TBB_LIBRARIES_optimized}>"
|
||||||
|
"$<$<CONFIG:MinSizeRel>:${EMBREE_TBB_LIBRARIES_optimized}>"
|
||||||
|
"$<$<CONFIG:Debug>:${EMBREE_TBB_LIBRARIES_debug}>"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(WITH_CYCLES_DEBUG)
|
if(WITH_CYCLES_DEBUG)
|
||||||
list(APPEND sycl_compiler_flags -DWITH_CYCLES_DEBUG)
|
list(APPEND sycl_compiler_flags -DWITH_CYCLES_DEBUG)
|
||||||
endif()
|
endif()
|
||||||
|
@ -13,8 +13,13 @@
|
|||||||
# include <embree3/rtcore_scene.h>
|
# include <embree3/rtcore_scene.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "kernel/device/cpu/compat.h"
|
#ifdef __KERNEL_ONEAPI__
|
||||||
#include "kernel/device/cpu/globals.h"
|
# include "kernel/device/oneapi/compat.h"
|
||||||
|
# include "kernel/device/oneapi/globals.h"
|
||||||
|
#else
|
||||||
|
# include "kernel/device/cpu/compat.h"
|
||||||
|
# include "kernel/device/cpu/globals.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "kernel/bvh/types.h"
|
#include "kernel/bvh/types.h"
|
||||||
#include "kernel/bvh/util.h"
|
#include "kernel/bvh/util.h"
|
||||||
@ -33,11 +38,18 @@ using numhit_t = uint8_t;
|
|||||||
using numhit_t = uint32_t;
|
using numhit_t = uint32_t;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define CYCLES_EMBREE_USED_FEATURES \
|
#ifdef __KERNEL_ONEAPI__
|
||||||
(RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
|
static constexpr sycl::specialization_id<RTCFeatureFlags> oneapi_embree_features{
|
||||||
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
|
(const RTCFeatureFlags)(0)};
|
||||||
RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
|
# define CYCLES_EMBREE_USED_FEATURES \
|
||||||
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
|
(kernel_handler.get_specialization_constant<oneapi_embree_features>())
|
||||||
|
#else
|
||||||
|
# define CYCLES_EMBREE_USED_FEATURES \
|
||||||
|
(RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
|
||||||
|
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
|
||||||
|
RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
|
||||||
|
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define EMBREE_IS_HAIR(x) (x & 1)
|
#define EMBREE_IS_HAIR(x) (x & 1)
|
||||||
|
|
||||||
@ -252,7 +264,8 @@ ccl_device_inline void kernel_embree_convert_sss_hit(KernelGlobals kg,
|
|||||||
* Things like recording subsurface or shadow hits for later evaluation
|
* Things like recording subsurface or shadow hits for later evaluation
|
||||||
* as well as filtering for volume objects happen here.
|
* as well as filtering for volume objects happen here.
|
||||||
* Cycles' own BVH does that directly inside the traversal calls. */
|
* Cycles' own BVH does that directly inside the traversal calls. */
|
||||||
ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNArguments *args)
|
ccl_device_forceinline void kernel_embree_filter_intersection_func_impl(
|
||||||
|
const RTCFilterFunctionNArguments *args)
|
||||||
{
|
{
|
||||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||||
assert(args->N == 1);
|
assert(args->N == 1);
|
||||||
@ -263,7 +276,11 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
|
|||||||
#else
|
#else
|
||||||
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef __KERNEL_ONEAPI__
|
||||||
|
KernelGlobalsGPU *kg = nullptr;
|
||||||
|
#else
|
||||||
const KernelGlobalsCPU *kg = ctx->kg;
|
const KernelGlobalsCPU *kg = ctx->kg;
|
||||||
|
#endif
|
||||||
const Ray *cray = ctx->ray;
|
const Ray *cray = ctx->ray;
|
||||||
|
|
||||||
if (kernel_embree_is_self_intersection(
|
if (kernel_embree_is_self_intersection(
|
||||||
@ -277,7 +294,7 @@ ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNA
|
|||||||
* as well as filtering for volume objects happen here.
|
* as well as filtering for volume objects happen here.
|
||||||
* Cycles' own BVH does that directly inside the traversal calls.
|
* Cycles' own BVH does that directly inside the traversal calls.
|
||||||
*/
|
*/
|
||||||
ccl_device void kernel_embree_filter_occluded_shadow_all_func(
|
ccl_device_forceinline void kernel_embree_filter_occluded_shadow_all_func_impl(
|
||||||
const RTCFilterFunctionNArguments *args)
|
const RTCFilterFunctionNArguments *args)
|
||||||
{
|
{
|
||||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||||
@ -290,7 +307,11 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
|
|||||||
#else
|
#else
|
||||||
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef __KERNEL_ONEAPI__
|
||||||
|
KernelGlobalsGPU *kg = nullptr;
|
||||||
|
#else
|
||||||
const KernelGlobalsCPU *kg = ctx->kg;
|
const KernelGlobalsCPU *kg = ctx->kg;
|
||||||
|
#endif
|
||||||
const Ray *cray = ctx->ray;
|
const Ray *cray = ctx->ray;
|
||||||
|
|
||||||
Intersection current_isect;
|
Intersection current_isect;
|
||||||
@ -326,7 +347,7 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Test if we need to record this transparent intersection. */
|
/* Test if we need to record this transparent intersection. */
|
||||||
const numhit_t max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
|
const numhit_t max_record_hits = min(ctx->max_hits, numhit_t(INTEGRATOR_SHADOW_ISECT_SIZE));
|
||||||
if (ctx->num_recorded_hits < max_record_hits) {
|
if (ctx->num_recorded_hits < max_record_hits) {
|
||||||
/* If maximum number of hits was reached, replace the intersection with the
|
/* If maximum number of hits was reached, replace the intersection with the
|
||||||
* highest distance. We want to find the N closest intersections. */
|
* highest distance. We want to find the N closest intersections. */
|
||||||
@ -363,7 +384,7 @@ ccl_device void kernel_embree_filter_occluded_shadow_all_func(
|
|||||||
*args->valid = 0;
|
*args->valid = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
|
ccl_device_forceinline void kernel_embree_filter_occluded_local_func_impl(
|
||||||
const RTCFilterFunctionNArguments *args)
|
const RTCFilterFunctionNArguments *args)
|
||||||
{
|
{
|
||||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||||
@ -376,7 +397,11 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
|
|||||||
#else
|
#else
|
||||||
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef __KERNEL_ONEAPI__
|
||||||
|
KernelGlobalsGPU *kg = nullptr;
|
||||||
|
#else
|
||||||
const KernelGlobalsCPU *kg = ctx->kg;
|
const KernelGlobalsCPU *kg = ctx->kg;
|
||||||
|
#endif
|
||||||
const Ray *cray = ctx->ray;
|
const Ray *cray = ctx->ray;
|
||||||
|
|
||||||
/* Check if it's hitting the correct object. */
|
/* Check if it's hitting the correct object. */
|
||||||
@ -462,7 +487,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func(
|
|||||||
*args->valid = 0;
|
*args->valid = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
|
ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func_impl(
|
||||||
const RTCFilterFunctionNArguments *args)
|
const RTCFilterFunctionNArguments *args)
|
||||||
{
|
{
|
||||||
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
/* Current implementation in Cycles assumes only single-ray intersection queries. */
|
||||||
@ -475,7 +500,11 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func(
|
|||||||
#else
|
#else
|
||||||
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
CCLIntersectContext *ctx = (CCLIntersectContext *)(args->context);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef __KERNEL_ONEAPI__
|
||||||
|
KernelGlobalsGPU *kg = nullptr;
|
||||||
|
#else
|
||||||
const KernelGlobalsCPU *kg = ctx->kg;
|
const KernelGlobalsCPU *kg = ctx->kg;
|
||||||
|
#endif
|
||||||
const Ray *cray = ctx->ray;
|
const Ray *cray = ctx->ray;
|
||||||
|
|
||||||
/* Append the intersection to the end of the array. */
|
/* Append the intersection to the end of the array. */
|
||||||
@ -513,14 +542,14 @@ ccl_device_forceinline void kernel_embree_filter_occluded_func(
|
|||||||
|
|
||||||
switch (ctx->type) {
|
switch (ctx->type) {
|
||||||
case CCLIntersectContext::RAY_SHADOW_ALL:
|
case CCLIntersectContext::RAY_SHADOW_ALL:
|
||||||
kernel_embree_filter_occluded_shadow_all_func(args);
|
kernel_embree_filter_occluded_shadow_all_func_impl(args);
|
||||||
break;
|
break;
|
||||||
case CCLIntersectContext::RAY_LOCAL:
|
case CCLIntersectContext::RAY_LOCAL:
|
||||||
case CCLIntersectContext::RAY_SSS:
|
case CCLIntersectContext::RAY_SSS:
|
||||||
kernel_embree_filter_occluded_local_func(args);
|
kernel_embree_filter_occluded_local_func_impl(args);
|
||||||
break;
|
break;
|
||||||
case CCLIntersectContext::RAY_VOLUME_ALL:
|
case CCLIntersectContext::RAY_VOLUME_ALL:
|
||||||
kernel_embree_filter_occluded_volume_all_func(args);
|
kernel_embree_filter_occluded_volume_all_func_impl(args);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CCLIntersectContext::RAY_REGULAR:
|
case CCLIntersectContext::RAY_REGULAR:
|
||||||
@ -569,7 +598,63 @@ ccl_device void kernel_embree_filter_occluded_func_backface_cull(
|
|||||||
|
|
||||||
kernel_embree_filter_occluded_func(args);
|
kernel_embree_filter_occluded_func(args);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __KERNEL_ONEAPI__
|
||||||
|
/* Static wrappers so we can call the callbacks from out side the ONEAPIKernelContext class */
|
||||||
|
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
|
||||||
|
kernel_embree_filter_intersection_func_static(const RTCFilterFunctionNArguments *args)
|
||||||
|
{
|
||||||
|
RTCHit *hit = (RTCHit *)args->hit;
|
||||||
|
CCLFirstHitContext *ctx = (CCLFirstHitContext *)(args->context);
|
||||||
|
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
|
||||||
|
context->kernel_embree_filter_intersection_func_impl(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
|
||||||
|
kernel_embree_filter_occluded_shadow_all_func_static(const RTCFilterFunctionNArguments *args)
|
||||||
|
{
|
||||||
|
RTCHit *hit = (RTCHit *)args->hit;
|
||||||
|
CCLShadowContext *ctx = (CCLShadowContext *)(args->context);
|
||||||
|
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
|
||||||
|
context->kernel_embree_filter_occluded_shadow_all_func_impl(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
|
||||||
|
kernel_embree_filter_occluded_local_func_static(const RTCFilterFunctionNArguments *args)
|
||||||
|
{
|
||||||
|
RTCHit *hit = (RTCHit *)args->hit;
|
||||||
|
CCLLocalContext *ctx = (CCLLocalContext *)(args->context);
|
||||||
|
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
|
||||||
|
context->kernel_embree_filter_occluded_local_func_impl(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
|
||||||
|
kernel_embree_filter_occluded_volume_all_func_static(const RTCFilterFunctionNArguments *args)
|
||||||
|
{
|
||||||
|
RTCHit *hit = (RTCHit *)args->hit;
|
||||||
|
CCLVolumeContext *ctx = (CCLVolumeContext *)(args->context);
|
||||||
|
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
|
||||||
|
context->kernel_embree_filter_occluded_volume_all_func_impl(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
# define kernel_embree_filter_intersection_func \
|
||||||
|
ONEAPIKernelContext::kernel_embree_filter_intersection_func_static
|
||||||
|
# define kernel_embree_filter_occluded_shadow_all_func \
|
||||||
|
ONEAPIKernelContext::kernel_embree_filter_occluded_shadow_all_func_static
|
||||||
|
# define kernel_embree_filter_occluded_local_func \
|
||||||
|
ONEAPIKernelContext::kernel_embree_filter_occluded_local_func_static
|
||||||
|
# define kernel_embree_filter_occluded_volume_all_func \
|
||||||
|
ONEAPIKernelContext::kernel_embree_filter_occluded_volume_all_func_static
|
||||||
|
#else
|
||||||
|
# define kernel_embree_filter_intersection_func kernel_embree_filter_intersection_func_impl
|
||||||
|
# if EMBREE_MAJOR_VERSION >= 4
|
||||||
|
# define kernel_embree_filter_occluded_shadow_all_func \
|
||||||
|
kernel_embree_filter_occluded_shadow_all_func_impl
|
||||||
|
# define kernel_embree_filter_occluded_local_func kernel_embree_filter_occluded_local_func_impl
|
||||||
|
# define kernel_embree_filter_occluded_volume_all_func \
|
||||||
|
kernel_embree_filter_occluded_volume_all_func_impl
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Scene intersection. */
|
/* Scene intersection. */
|
||||||
@ -583,7 +668,15 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
|
|||||||
#if EMBREE_MAJOR_VERSION >= 4
|
#if EMBREE_MAJOR_VERSION >= 4
|
||||||
CCLFirstHitContext ctx;
|
CCLFirstHitContext ctx;
|
||||||
rtcInitRayQueryContext(&ctx);
|
rtcInitRayQueryContext(&ctx);
|
||||||
|
# ifdef __KERNEL_ONEAPI__
|
||||||
|
/* NOTE(sirgienko) Cycles GPU backends passes NULL to KernelGlobals and
|
||||||
|
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
|
||||||
|
* as a class context (Metal, oneAPI). So we need to pass this context here
|
||||||
|
* in order to have an access to it later in Embree filter functions on GPU. */
|
||||||
|
ctx.kg = (KernelGlobals)this;
|
||||||
|
# else
|
||||||
ctx.kg = kg;
|
ctx.kg = kg;
|
||||||
|
# endif
|
||||||
#else
|
#else
|
||||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
|
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
|
||||||
rtcInitIntersectContext(&ctx);
|
rtcInitIntersectContext(&ctx);
|
||||||
@ -596,7 +689,7 @@ ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
|
|||||||
#if EMBREE_MAJOR_VERSION >= 4
|
#if EMBREE_MAJOR_VERSION >= 4
|
||||||
RTCIntersectArguments args;
|
RTCIntersectArguments args;
|
||||||
rtcInitIntersectArguments(&args);
|
rtcInitIntersectArguments(&args);
|
||||||
args.filter = (RTCFilterFunctionN)kernel_embree_filter_intersection_func;
|
args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_intersection_func);
|
||||||
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
||||||
args.context = &ctx;
|
args.context = &ctx;
|
||||||
rtcIntersect1(kernel_data.device_bvh, &ray_hit, &args);
|
rtcIntersect1(kernel_data.device_bvh, &ray_hit, &args);
|
||||||
@ -625,7 +718,15 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
|
|||||||
# if EMBREE_MAJOR_VERSION >= 4
|
# if EMBREE_MAJOR_VERSION >= 4
|
||||||
CCLLocalContext ctx;
|
CCLLocalContext ctx;
|
||||||
rtcInitRayQueryContext(&ctx);
|
rtcInitRayQueryContext(&ctx);
|
||||||
|
# ifdef __KERNEL_ONEAPI__
|
||||||
|
/* NOTE(sirgienko) Cycles GPU backends passes NULL to KernelGlobals and
|
||||||
|
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
|
||||||
|
* as a class context (Metal, oneAPI). So we need to pass this context here
|
||||||
|
* in order to have an access to it later in Embree filter functions on GPU. */
|
||||||
|
ctx.kg = (KernelGlobals)this;
|
||||||
|
# else
|
||||||
ctx.kg = kg;
|
ctx.kg = kg;
|
||||||
|
# endif
|
||||||
# else
|
# else
|
||||||
CCLIntersectContext ctx(kg,
|
CCLIntersectContext ctx(kg,
|
||||||
has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
|
has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
|
||||||
@ -646,7 +747,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
|
|||||||
# if EMBREE_MAJOR_VERSION >= 4
|
# if EMBREE_MAJOR_VERSION >= 4
|
||||||
RTCOccludedArguments args;
|
RTCOccludedArguments args;
|
||||||
rtcInitOccludedArguments(&args);
|
rtcInitOccludedArguments(&args);
|
||||||
args.filter = (RTCFilterFunctionN)(kernel_embree_filter_occluded_local_func);
|
args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_occluded_local_func);
|
||||||
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
||||||
args.context = &ctx;
|
args.context = &ctx;
|
||||||
# endif
|
# endif
|
||||||
@ -692,7 +793,7 @@ ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
|
|||||||
|
|
||||||
#ifdef __SHADOW_RECORD_ALL__
|
#ifdef __SHADOW_RECORD_ALL__
|
||||||
ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
|
ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
|
||||||
IntegratorShadowStateCPU *state,
|
IntegratorShadowState state,
|
||||||
ccl_private const Ray *ray,
|
ccl_private const Ray *ray,
|
||||||
uint visibility,
|
uint visibility,
|
||||||
uint max_hits,
|
uint max_hits,
|
||||||
@ -702,7 +803,15 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
|
|||||||
# if EMBREE_MAJOR_VERSION >= 4
|
# if EMBREE_MAJOR_VERSION >= 4
|
||||||
CCLShadowContext ctx;
|
CCLShadowContext ctx;
|
||||||
rtcInitRayQueryContext(&ctx);
|
rtcInitRayQueryContext(&ctx);
|
||||||
|
# ifdef __KERNEL_ONEAPI__
|
||||||
|
/* NOTE(sirgienko) Cycles GPU backends passes NULL to KernelGlobals and
|
||||||
|
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
|
||||||
|
* as a class context (Metal, oneAPI). So we need to pass this context here
|
||||||
|
* in order to have an access to it later in Embree filter functions on GPU. */
|
||||||
|
ctx.kg = (KernelGlobals)this;
|
||||||
|
# else
|
||||||
ctx.kg = kg;
|
ctx.kg = kg;
|
||||||
|
# endif
|
||||||
# else
|
# else
|
||||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
|
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
|
||||||
rtcInitIntersectContext(&ctx);
|
rtcInitIntersectContext(&ctx);
|
||||||
@ -718,7 +827,8 @@ ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
|
|||||||
# if EMBREE_MAJOR_VERSION >= 4
|
# if EMBREE_MAJOR_VERSION >= 4
|
||||||
RTCOccludedArguments args;
|
RTCOccludedArguments args;
|
||||||
rtcInitOccludedArguments(&args);
|
rtcInitOccludedArguments(&args);
|
||||||
args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_shadow_all_func;
|
args.filter = reinterpret_cast<RTCFilterFunctionN>(
|
||||||
|
kernel_embree_filter_occluded_shadow_all_func);
|
||||||
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
||||||
args.context = &ctx;
|
args.context = &ctx;
|
||||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
|
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
|
||||||
@ -742,7 +852,15 @@ ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
|
|||||||
# if EMBREE_MAJOR_VERSION >= 4
|
# if EMBREE_MAJOR_VERSION >= 4
|
||||||
CCLVolumeContext ctx;
|
CCLVolumeContext ctx;
|
||||||
rtcInitRayQueryContext(&ctx);
|
rtcInitRayQueryContext(&ctx);
|
||||||
|
# ifdef __KERNEL_ONEAPI__
|
||||||
|
/* NOTE(sirgienko) Cycles GPU backends passes NULL to KernelGlobals and
|
||||||
|
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
|
||||||
|
* as a class context (Metal, oneAPI). So we need to pass this context here
|
||||||
|
* in order to have an access to it later in Embree filter functions on GPU. */
|
||||||
|
ctx.kg = (KernelGlobals)this;
|
||||||
|
# else
|
||||||
ctx.kg = kg;
|
ctx.kg = kg;
|
||||||
|
# endif
|
||||||
# else
|
# else
|
||||||
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
|
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
|
||||||
rtcInitIntersectContext(&ctx);
|
rtcInitIntersectContext(&ctx);
|
||||||
@ -756,7 +874,8 @@ ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
|
|||||||
# if EMBREE_MAJOR_VERSION >= 4
|
# if EMBREE_MAJOR_VERSION >= 4
|
||||||
RTCOccludedArguments args;
|
RTCOccludedArguments args;
|
||||||
rtcInitOccludedArguments(&args);
|
rtcInitOccludedArguments(&args);
|
||||||
args.filter = (RTCFilterFunctionN)kernel_embree_filter_occluded_volume_all_func;
|
args.filter = reinterpret_cast<RTCFilterFunctionN>(
|
||||||
|
kernel_embree_filter_occluded_volume_all_func);
|
||||||
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
|
||||||
args.context = &ctx;
|
args.context = &ctx;
|
||||||
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
|
rtcOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
|
||||||
|
@ -128,6 +128,12 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|||||||
}
|
}
|
||||||
ccl_gpu_kernel_postfix
|
ccl_gpu_kernel_postfix
|
||||||
|
|
||||||
|
/* Intersection kernels need access to the kernel handler for specialization constants to work
|
||||||
|
* properly. */
|
||||||
|
#ifdef __KERNEL_ONEAPI__
|
||||||
|
# include "kernel/device/oneapi/context_intersect_begin.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||||
ccl_gpu_kernel_signature(integrator_intersect_closest,
|
ccl_gpu_kernel_signature(integrator_intersect_closest,
|
||||||
ccl_global const int *path_index_array,
|
ccl_global const int *path_index_array,
|
||||||
|
@ -5,6 +5,11 @@
|
|||||||
|
|
||||||
#define __KERNEL_GPU__
|
#define __KERNEL_GPU__
|
||||||
#define __KERNEL_ONEAPI__
|
#define __KERNEL_ONEAPI__
|
||||||
|
#define __KERNEL_64_BIT__
|
||||||
|
|
||||||
|
#ifdef WITH_EMBREE_GPU
|
||||||
|
# define __KERNEL_GPU_RAYTRACING__
|
||||||
|
#endif
|
||||||
|
|
||||||
#define CCL_NAMESPACE_BEGIN
|
#define CCL_NAMESPACE_BEGIN
|
||||||
#define CCL_NAMESPACE_END
|
#define CCL_NAMESPACE_END
|
||||||
@ -57,17 +62,19 @@
|
|||||||
#define ccl_gpu_kernel_threads(block_num_threads)
|
#define ccl_gpu_kernel_threads(block_num_threads)
|
||||||
|
|
||||||
#ifndef WITH_ONEAPI_SYCL_HOST_TASK
|
#ifndef WITH_ONEAPI_SYCL_HOST_TASK
|
||||||
# define ccl_gpu_kernel_signature(name, ...) \
|
# define __ccl_gpu_kernel_signature(name, ...) \
|
||||||
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
|
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
|
||||||
size_t kernel_global_size, \
|
size_t kernel_global_size, \
|
||||||
size_t kernel_local_size, \
|
size_t kernel_local_size, \
|
||||||
sycl::handler &cgh, \
|
sycl::handler &cgh, \
|
||||||
__VA_ARGS__) { \
|
__VA_ARGS__) { \
|
||||||
(kg); \
|
(kg); \
|
||||||
cgh.parallel_for<class kernel_##name>( \
|
cgh.parallel_for( \
|
||||||
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
|
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
|
||||||
[=](sycl::nd_item<1> item) {
|
[=](sycl::nd_item<1> item) {
|
||||||
|
|
||||||
|
# define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
|
||||||
|
|
||||||
# define ccl_gpu_kernel_postfix \
|
# define ccl_gpu_kernel_postfix \
|
||||||
}); \
|
}); \
|
||||||
}
|
}
|
||||||
|
18
intern/cycles/kernel/device/oneapi/context_intersect_begin.h
Normal file
18
intern/cycles/kernel/device/oneapi/context_intersect_begin.h
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
/* SPDX-License-Identifier: Apache-2.0
|
||||||
|
* Copyright 2023 Intel Corporation */
|
||||||
|
|
||||||
|
#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
|
||||||
|
# undef ccl_gpu_kernel_signature
|
||||||
|
# define ccl_gpu_kernel_signature(name, ...) \
|
||||||
|
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
|
||||||
|
size_t kernel_global_size, \
|
||||||
|
size_t kernel_local_size, \
|
||||||
|
sycl::handler &cgh, \
|
||||||
|
__VA_ARGS__) \
|
||||||
|
{ \
|
||||||
|
(kg); \
|
||||||
|
cgh.parallel_for( \
|
||||||
|
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
|
||||||
|
[=](sycl::nd_item<1> item, sycl::kernel_handler oneapi_kernel_handler) { \
|
||||||
|
((ONEAPIKernelContext*)kg)->kernel_handler = oneapi_kernel_handler;
|
||||||
|
#endif
|
@ -0,0 +1,7 @@
|
|||||||
|
/* SPDX-License-Identifier: Apache-2.0
|
||||||
|
* Copyright 2023 Intel Corporation */
|
||||||
|
|
||||||
|
#if !defined(WITH_ONEAPI_SYCL_HOST_TASK) && defined(WITH_EMBREE_GPU)
|
||||||
|
# undef ccl_gpu_kernel_signature
|
||||||
|
# define ccl_gpu_kernel_signature __ccl_gpu_kernel_signature
|
||||||
|
#endif
|
@ -31,6 +31,8 @@ typedef struct KernelGlobalsGPU {
|
|||||||
size_t nd_item_group_range_0;
|
size_t nd_item_group_range_0;
|
||||||
size_t nd_item_global_id_0;
|
size_t nd_item_global_id_0;
|
||||||
size_t nd_item_global_range_0;
|
size_t nd_item_global_range_0;
|
||||||
|
#else
|
||||||
|
sycl::kernel_handler kernel_handler;
|
||||||
#endif
|
#endif
|
||||||
} KernelGlobalsGPU;
|
} KernelGlobalsGPU;
|
||||||
|
|
||||||
|
@ -16,9 +16,22 @@
|
|||||||
|
|
||||||
# include "kernel/device/gpu/kernel.h"
|
# include "kernel/device/gpu/kernel.h"
|
||||||
|
|
||||||
|
# include "device/kernel.cpp"
|
||||||
|
|
||||||
static OneAPIErrorCallback s_error_cb = nullptr;
|
static OneAPIErrorCallback s_error_cb = nullptr;
|
||||||
static void *s_error_user_ptr = nullptr;
|
static void *s_error_user_ptr = nullptr;
|
||||||
|
|
||||||
|
# ifdef WITH_EMBREE_GPU
|
||||||
|
static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_BASIC_FEATURES =
|
||||||
|
(const RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE |
|
||||||
|
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS |
|
||||||
|
RTC_FEATURE_FLAG_POINT | RTC_FEATURE_FLAG_MOTION_BLUR);
|
||||||
|
static const RTCFeatureFlags CYCLES_ONEAPI_EMBREE_ALL_FEATURES =
|
||||||
|
(const RTCFeatureFlags)(CYCLES_ONEAPI_EMBREE_BASIC_FEATURES |
|
||||||
|
RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE |
|
||||||
|
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE);
|
||||||
|
# endif
|
||||||
|
|
||||||
void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr)
|
void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr)
|
||||||
{
|
{
|
||||||
s_error_cb = cb;
|
s_error_cb = cb;
|
||||||
@ -144,13 +157,55 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue,
|
|||||||
|
|
||||||
bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
|
bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
|
||||||
{
|
{
|
||||||
# ifdef SYCL_SKIP_KERNELS_PRELOAD
|
|
||||||
(void)queue_;
|
|
||||||
(void)requested_features;
|
|
||||||
# else
|
|
||||||
assert(queue_);
|
assert(queue_);
|
||||||
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
|
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
|
||||||
|
# ifdef WITH_EMBREE_GPU
|
||||||
|
/* Preloading intersection kernels is mandatory with Embree on GPU execution,
|
||||||
|
* because AoT will be not fully performant. */
|
||||||
|
try {
|
||||||
|
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
|
||||||
|
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
|
||||||
|
{queue->get_device()});
|
||||||
|
|
||||||
|
for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
|
||||||
|
const std::string &kernel_name = kernel_id.get_name();
|
||||||
|
|
||||||
|
/* NOTE(@nsirgien): Names in this conditions below should match names from
|
||||||
|
* oneapi_call macro in oneapi_enqueue_kernel below */
|
||||||
|
/* Also, here we handle only intersection kernels (and skip the rest) */
|
||||||
|
if (kernel_name.find("_intersect_") == std::string::npos) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (((requested_features & KERNEL_FEATURE_VOLUME) == 0) &&
|
||||||
|
kernel_name.find("_intersect_volume") != std::string::npos) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle =
|
||||||
|
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
|
||||||
|
|
||||||
|
one_kernel_bundle.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||||
|
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES);
|
||||||
|
sycl::build(one_kernel_bundle);
|
||||||
|
|
||||||
|
one_kernel_bundle.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||||
|
CYCLES_ONEAPI_EMBREE_ALL_FEATURES);
|
||||||
|
sycl::build(one_kernel_bundle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (sycl::exception const &e) {
|
||||||
|
if (s_error_cb) {
|
||||||
|
s_error_cb(e.what(), s_error_user_ptr);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# ifdef SYCL_SKIP_KERNELS_PRELOAD
|
||||||
|
(void)queue_;
|
||||||
|
(void)kernel_features;
|
||||||
|
# else
|
||||||
try {
|
try {
|
||||||
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
|
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
|
||||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
|
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
|
||||||
@ -195,6 +250,8 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
|
|||||||
bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
||||||
int kernel,
|
int kernel,
|
||||||
size_t global_size,
|
size_t global_size,
|
||||||
|
const uint kernel_features,
|
||||||
|
bool use_hardware_raytracing,
|
||||||
void **args)
|
void **args)
|
||||||
{
|
{
|
||||||
bool success = true;
|
bool success = true;
|
||||||
@ -246,8 +303,24 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
|||||||
# pragma GCC diagnostic error "-Wswitch"
|
# pragma GCC diagnostic error "-Wswitch"
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
# ifdef WITH_EMBREE_GPU
|
||||||
|
bool is_with_rthw_kernel = device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||||
|
device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
|
||||||
|
device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
|
||||||
|
device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK;
|
||||||
|
const RTCFeatureFlags used_embree_features = (is_with_rthw_kernel && with_hwrt &&
|
||||||
|
!with_curve_features) ?
|
||||||
|
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES :
|
||||||
|
CYCLES_ONEAPI_EMBREE_ALL_FEATURES;
|
||||||
|
# endif
|
||||||
|
|
||||||
try {
|
try {
|
||||||
queue->submit([&](sycl::handler &cgh) {
|
queue->submit([&](sycl::handler &cgh) {
|
||||||
|
# ifdef WITH_EMBREE_GPU
|
||||||
|
if (is_with_rthw_kernel)
|
||||||
|
cgh.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||||
|
used_embree_features);
|
||||||
|
# endif
|
||||||
switch (device_kernel) {
|
switch (device_kernel) {
|
||||||
case DEVICE_KERNEL_INTEGRATOR_RESET: {
|
case DEVICE_KERNEL_INTEGRATOR_RESET: {
|
||||||
oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_reset);
|
oneapi_call(kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_reset);
|
||||||
@ -549,4 +622,5 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
|||||||
# endif
|
# endif
|
||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* WITH_ONEAPI */
|
#endif /* WITH_ONEAPI */
|
||||||
|
@ -47,10 +47,14 @@ CYCLES_KERNEL_ONEAPI_EXPORT size_t oneapi_kernel_preferred_local_size(
|
|||||||
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_enqueue_kernel(KernelContext *context,
|
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_enqueue_kernel(KernelContext *context,
|
||||||
int kernel,
|
int kernel,
|
||||||
size_t global_size,
|
size_t global_size,
|
||||||
|
const unsigned int kernel_features,
|
||||||
|
bool use_hardware_raytracing,
|
||||||
void **args);
|
void **args);
|
||||||
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_load_kernels(SyclQueue *queue,
|
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_load_kernels(SyclQueue *queue,
|
||||||
const unsigned int requested_features);
|
const unsigned int kernel_features,
|
||||||
|
bool use_hardware_raytracing);
|
||||||
# ifdef __cplusplus
|
# ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
||||||
# endif
|
# endif
|
||||||
#endif /* WITH_ONEAPI */
|
#endif /* WITH_ONEAPI */
|
||||||
|
@ -3,8 +3,9 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#if !defined(__KERNEL_GPU__) && defined(WITH_EMBREE)
|
#if (!defined(__KERNEL_GPU__) || (defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU))) && \
|
||||||
# if EMBREE_MAJOR_VERSION >= 4
|
defined(WITH_EMBREE)
|
||||||
|
# if EMBREE_MAJOR_VERSION == 4
|
||||||
# include <embree4/rtcore.h>
|
# include <embree4/rtcore.h>
|
||||||
# include <embree4/rtcore_scene.h>
|
# include <embree4/rtcore_scene.h>
|
||||||
# else
|
# else
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
#ifndef __UTIL_VECTOR_H__
|
#ifndef __UTIL_VECTOR_H__
|
||||||
#define __UTIL_VECTOR_H__
|
#define __UTIL_VECTOR_H__
|
||||||
|
|
||||||
#include <cassert>
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user