Cycles: update DPCPP to 2022-12 release
We also backport a patch to program_manager to it as
61e51015a5
helps avoid unnecessary recompilation when enumerating available
kernels.
This commit is contained in:
parent
5cdf0c9ee9
commit
887022257d
@ -156,6 +156,7 @@ download_source(OPENCLHEADERS)
|
||||
download_source(ICDLOADER)
|
||||
download_source(MP11)
|
||||
download_source(SPIRV_HEADERS)
|
||||
download_source(UNIFIED_RUNTIME)
|
||||
download_source(IGC)
|
||||
download_source(IGC_LLVM)
|
||||
download_source(IGC_OPENCL_CLANG)
|
||||
|
@ -41,6 +41,7 @@ set(DPCPP_EXTRA_ARGS
|
||||
-DLEVEL_ZERO_LIBRARY=${LIBDIR}/level-zero/lib/${LIBPREFIX}ze_loader${SHAREDLIBEXT}
|
||||
-DLEVEL_ZERO_INCLUDE_DIR=${LIBDIR}/level-zero/include
|
||||
-DLLVM_EXTERNAL_SPIRV_HEADERS_SOURCE_DIR=${BUILD_DIR}/spirvheaders/src/external_spirvheaders/
|
||||
-DUNIFIED_RUNTIME_SOURCE_DIR=${BUILD_DIR}/unifiedruntime/src/external_unifiedruntime/
|
||||
# Below here is copied from an invocation of buildbot/config.py
|
||||
-DLLVM_ENABLE_ASSERTIONS=ON
|
||||
-DLLVM_TARGETS_TO_BUILD=X86
|
||||
@ -107,6 +108,7 @@ add_dependencies(
|
||||
external_mp11
|
||||
external_level-zero
|
||||
external_spirvheaders
|
||||
external_unifiedruntime
|
||||
)
|
||||
|
||||
if(BUILD_MODE STREQUAL Release AND WIN32)
|
||||
|
@ -59,3 +59,13 @@ ExternalProject_Add(external_spirvheaders
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
)
|
||||
|
||||
ExternalProject_Add(external_unifiedruntime
|
||||
URL file://${PACKAGE_DIR}/${UNIFIED_RUNTIME_FILE}
|
||||
URL_HASH ${UNIFIED_RUNTIME_HASH_TYPE}=${UNIFIED_RUNTIME_HASH}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
PREFIX ${BUILD_DIR}/unifiedruntime
|
||||
CONFIGURE_COMMAND echo .
|
||||
BUILD_COMMAND echo .
|
||||
INSTALL_COMMAND echo .
|
||||
)
|
||||
|
@ -599,15 +599,15 @@ set(OPENPGL_HASH db63f5dac5cfa8c110ede241f0c413f00db0c4748697381c4fa23e0f9e82a75
|
||||
set(OPENPGL_HASH_TYPE SHA256)
|
||||
set(OPENPGL_FILE openpgl-${OPENPGL_VERSION}.tar.gz)
|
||||
|
||||
set(LEVEL_ZERO_VERSION v1.8.5)
|
||||
set(LEVEL_ZERO_VERSION v1.8.8)
|
||||
set(LEVEL_ZERO_URI https://github.com/oneapi-src/level-zero/archive/refs/tags/${LEVEL_ZERO_VERSION}.tar.gz)
|
||||
set(LEVEL_ZERO_HASH b6e9663bbcc53c148d32376998298bec6f7c434ef2218c61fa708963e3a09394)
|
||||
set(LEVEL_ZERO_HASH 3553ae8fa0d2d69c4210a8f3428bd6612bd8bb8a627faf52c3658a01851e66d2)
|
||||
set(LEVEL_ZERO_HASH_TYPE SHA256)
|
||||
set(LEVEL_ZERO_FILE level-zero-${LEVEL_ZERO_VERSION}.tar.gz)
|
||||
|
||||
set(DPCPP_VERSION 20221019)
|
||||
set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/sycl-nightly/${DPCPP_VERSION}.tar.gz)
|
||||
set(DPCPP_HASH 2f533946e91ce3829431758ea17b0b834b960c1a796e9e4563c86e03eb9603a2)
|
||||
set(DPCPP_VERSION 2022-12)
|
||||
set(DPCPP_URI https://github.com/intel/llvm/archive/refs/tags/${DPCPP_VERSION}.tar.gz)
|
||||
set(DPCPP_HASH 13151d5ae79f7c9c4a9b072a0c486ae7b3c4993e301bb1268c92214451025790)
|
||||
set(DPCPP_HASH_TYPE SHA256)
|
||||
set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
|
||||
|
||||
@ -620,9 +620,9 @@ set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
|
||||
# will take care of building them, unpack is being done in dpcpp_deps.cmake
|
||||
|
||||
# Source llvm/lib/SYCLLowerIR/CMakeLists.txt
|
||||
set(VCINTRINSICS_VERSION abce9184b7a3a7fe1b02289b9285610d9dc45465)
|
||||
set(VCINTRINSICS_VERSION 782fbf7301dc73acaa049a4324c976ad94f587f7)
|
||||
set(VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/${VCINTRINSICS_VERSION}.tar.gz)
|
||||
set(VCINTRINSICS_HASH 3e9fd471246b87633b26f7e15e17ab7733d357458c53d5c5881c03929d6c551f)
|
||||
set(VCINTRINSICS_HASH f4c0ccad8c1f77760364c551c65e8e1cf194d058889fa46d3b1b2d19ec4dc33f)
|
||||
set(VCINTRINSICS_HASH_TYPE SHA256)
|
||||
set(VCINTRINSICS_FILE vc-intrinsics-${VCINTRINSICS_VERSION}.tar.gz)
|
||||
|
||||
@ -657,6 +657,13 @@ set(SPIRV_HEADERS_HASH ec8ecb471a62672697846c436501638ab25447ae9d4a6761e0bfe8a9a
|
||||
set(SPIRV_HEADERS_HASH_TYPE SHA256)
|
||||
set(SPIRV_HEADERS_FILE SPIR-V-Headers-${SPIRV_HEADERS_VERSION}.tar.gz)
|
||||
|
||||
# Source llvm/sycl/plugins/unified_runtime/CMakeLists.txt
|
||||
set(UNIFIED_RUNTIME_VERSION fd711c920acc4434cb52ff18b078c082d9d7f44d)
|
||||
set(UNIFIED_RUNTIME_URI https://github.com/oneapi-src/unified-runtime/archive/${UNIFIED_RUNTIME_VERSION}.tar.gz)
|
||||
set(UNIFIED_RUNTIME_HASH 535ca2ee78f68c5e7e62b10f1bbabd909179488885566e6d9b1fc50e8a1be65f)
|
||||
set(UNIFIED_RUNTIME_HASH_TYPE SHA256)
|
||||
set(UNIFIED_RUNTIME_FILE unified-runtime-${UNIFIED_RUNTIME_VERSION}.tar.gz)
|
||||
|
||||
######################
|
||||
### DPCPP DEPS END ###
|
||||
######################
|
||||
|
@ -14,6 +14,7 @@ graph[autosize = false, size = "25.7,8.3!", resolution = 300];
|
||||
external_dpcpp -- external_mp11;
|
||||
external_dpcpp -- external_level_zero;
|
||||
external_dpcpp -- external_spirvheaders;
|
||||
external_dpcpp -- external_unifiedruntime;
|
||||
external_embree -- external_tbb;
|
||||
external_ffmpeg -- external_zlib;
|
||||
external_ffmpeg -- external_openjpeg;
|
||||
|
@ -34,3 +34,156 @@ diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice
|
||||
libsycldevice-obj
|
||||
libsycldevice-spv)
|
||||
|
||||
diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
|
||||
index 17eeaafae194..09e6d2217aaa 100644
|
||||
--- a/sycl/source/detail/program_manager/program_manager.cpp
|
||||
+++ b/sycl/source/detail/program_manager/program_manager.cpp
|
||||
@@ -1647,46 +1647,120 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState(
|
||||
}
|
||||
assert(BinImages.size() > 0 && "Expected to find at least one device image");
|
||||
|
||||
+ // Ignore images with incompatible state. Image is considered compatible
|
||||
+ // with a target state if an image is already in the target state or can
|
||||
+ // be brought to target state by compiling/linking/building.
|
||||
+ //
|
||||
+ // Example: an image in "executable" state is not compatible with
|
||||
+ // "input" target state - there is no operation to convert the image it
|
||||
+ // to "input" state. An image in "input" state is compatible with
|
||||
+ // "executable" target state because it can be built to get into
|
||||
+ // "executable" state.
|
||||
+ for (auto It = BinImages.begin(); It != BinImages.end();) {
|
||||
+ if (getBinImageState(*It) > TargetState)
|
||||
+ It = BinImages.erase(It);
|
||||
+ else
|
||||
+ ++It;
|
||||
+ }
|
||||
+
|
||||
std::vector<device_image_plain> SYCLDeviceImages;
|
||||
- for (RTDeviceBinaryImage *BinImage : BinImages) {
|
||||
- const bundle_state ImgState = getBinImageState(BinImage);
|
||||
-
|
||||
- // Ignore images with incompatible state. Image is considered compatible
|
||||
- // with a target state if an image is already in the target state or can
|
||||
- // be brought to target state by compiling/linking/building.
|
||||
- //
|
||||
- // Example: an image in "executable" state is not compatible with
|
||||
- // "input" target state - there is no operation to convert the image it
|
||||
- // to "input" state. An image in "input" state is compatible with
|
||||
- // "executable" target state because it can be built to get into
|
||||
- // "executable" state.
|
||||
- if (ImgState > TargetState)
|
||||
- continue;
|
||||
|
||||
- for (const sycl::device &Dev : Devs) {
|
||||
+ // If a non-input state is requested, we can filter out some compatible
|
||||
+ // images and return only those with the highest compatible state for each
|
||||
+ // device-kernel pair. This map tracks how many kernel-device pairs need each
|
||||
+ // image, so that any unneeded ones are skipped.
|
||||
+ // TODO this has no effect if the requested state is input, consider having
|
||||
+ // a separate branch for that case to avoid unnecessary tracking work.
|
||||
+ struct DeviceBinaryImageInfo {
|
||||
+ std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
|
||||
+ bundle_state State = bundle_state::input;
|
||||
+ int RequirementCounter = 0;
|
||||
+ };
|
||||
+ std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
|
||||
+
|
||||
+ for (const sycl::device &Dev : Devs) {
|
||||
+ // Track the highest image state for each requested kernel.
|
||||
+ using StateImagesPairT =
|
||||
+ std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
|
||||
+ using KernelImageMapT =
|
||||
+ std::map<kernel_id, StateImagesPairT, LessByNameComp>;
|
||||
+ KernelImageMapT KernelImageMap;
|
||||
+ if (!KernelIDs.empty())
|
||||
+ for (const kernel_id &KernelID : KernelIDs)
|
||||
+ KernelImageMap.insert({KernelID, {}});
|
||||
+
|
||||
+ for (RTDeviceBinaryImage *BinImage : BinImages) {
|
||||
if (!compatibleWithDevice(BinImage, Dev) ||
|
||||
!doesDevSupportImgAspects(Dev, *BinImage))
|
||||
continue;
|
||||
|
||||
- std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
|
||||
- // Collect kernel names for the image
|
||||
- {
|
||||
- std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
|
||||
- KernelIDs = m_BinImg2KernelIDs[BinImage];
|
||||
- // If the image does not contain any non-service kernels we can skip it.
|
||||
- if (!KernelIDs || KernelIDs->empty())
|
||||
- continue;
|
||||
+ auto InsertRes = ImageInfoMap.insert({BinImage, {}});
|
||||
+ DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
|
||||
+ if (InsertRes.second) {
|
||||
+ ImgInfo.State = getBinImageState(BinImage);
|
||||
+ // Collect kernel names for the image
|
||||
+ {
|
||||
+ std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
|
||||
+ ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
|
||||
+ }
|
||||
}
|
||||
+ const bundle_state ImgState = ImgInfo.State;
|
||||
+ const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
|
||||
+ ImgInfo.KernelIDs;
|
||||
+ int &ImgRequirementCounter = ImgInfo.RequirementCounter;
|
||||
|
||||
- DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
|
||||
- BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr);
|
||||
+ // If the image does not contain any non-service kernels we can skip it.
|
||||
+ if (!ImageKernelIDs || ImageKernelIDs->empty())
|
||||
+ continue;
|
||||
|
||||
- SYCLDeviceImages.push_back(
|
||||
- createSyclObjFromImpl<device_image_plain>(Impl));
|
||||
- break;
|
||||
+ // Update tracked information.
|
||||
+ for (kernel_id &KernelID : *ImageKernelIDs) {
|
||||
+ StateImagesPairT *StateImagesPair;
|
||||
+ // If only specific kernels are requested, ignore the rest.
|
||||
+ if (!KernelIDs.empty()) {
|
||||
+ auto It = KernelImageMap.find(KernelID);
|
||||
+ if (It == KernelImageMap.end())
|
||||
+ continue;
|
||||
+ StateImagesPair = &It->second;
|
||||
+ } else
|
||||
+ StateImagesPair = &KernelImageMap[KernelID];
|
||||
+
|
||||
+ auto &[KernelImagesState, KernelImages] = *StateImagesPair;
|
||||
+
|
||||
+ if (KernelImages.empty()) {
|
||||
+ KernelImagesState = ImgState;
|
||||
+ KernelImages.push_back(BinImage);
|
||||
+ ++ImgRequirementCounter;
|
||||
+ } else if (KernelImagesState < ImgState) {
|
||||
+ for (RTDeviceBinaryImage *Img : KernelImages) {
|
||||
+ auto It = ImageInfoMap.find(Img);
|
||||
+ assert(It != ImageInfoMap.end());
|
||||
+ assert(It->second.RequirementCounter > 0);
|
||||
+ --(It->second.RequirementCounter);
|
||||
+ }
|
||||
+ KernelImages.clear();
|
||||
+ KernelImages.push_back(BinImage);
|
||||
+ KernelImagesState = ImgState;
|
||||
+ ++ImgRequirementCounter;
|
||||
+ } else if (KernelImagesState == ImgState) {
|
||||
+ KernelImages.push_back(BinImage);
|
||||
+ ++ImgRequirementCounter;
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
+ for (const auto &ImgInfoPair : ImageInfoMap) {
|
||||
+ if (ImgInfoPair.second.RequirementCounter == 0)
|
||||
+ continue;
|
||||
+
|
||||
+ DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
|
||||
+ ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
|
||||
+ ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr);
|
||||
+
|
||||
+ SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
|
||||
+ }
|
||||
+
|
||||
return SYCLDeviceImages;
|
||||
}
|
||||
|
||||
|
@ -40,12 +40,12 @@ bool device_oneapi_init()
|
||||
if (getenv("SYCL_CACHE_TRESHOLD") == nullptr) {
|
||||
_putenv_s("SYCL_CACHE_THRESHOLD", "0");
|
||||
}
|
||||
if (getenv("SYCL_DEVICE_FILTER") == nullptr) {
|
||||
if (getenv("ONEAPI_DEVICE_SELECTOR") == nullptr) {
|
||||
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
|
||||
_putenv_s("SYCL_DEVICE_FILTER", "level_zero");
|
||||
_putenv_s("ONEAPI_DEVICE_SELECTOR", "level_zero:*");
|
||||
}
|
||||
else {
|
||||
_putenv_s("SYCL_DEVICE_FILTER", "level_zero,cuda,hip");
|
||||
_putenv_s("ONEAPI_DEVICE_SELECTOR", "!opencl:*");
|
||||
}
|
||||
}
|
||||
if (getenv("SYCL_ENABLE_PCI") == nullptr) {
|
||||
@ -58,10 +58,10 @@ bool device_oneapi_init()
|
||||
setenv("SYCL_CACHE_PERSISTENT", "1", false);
|
||||
setenv("SYCL_CACHE_THRESHOLD", "0", false);
|
||||
if (getenv("CYCLES_ONEAPI_ALL_DEVICES") == nullptr) {
|
||||
setenv("SYCL_DEVICE_FILTER", "level_zero", false);
|
||||
setenv("ONEAPI_DEVICE_SELECTOR", "level_zero:*", false);
|
||||
}
|
||||
else {
|
||||
setenv("SYCL_DEVICE_FILTER", "level_zero,cuda,hip", false);
|
||||
setenv("ONEAPI_DEVICE_SELECTOR", "!opencl:*", false);
|
||||
}
|
||||
setenv("SYCL_ENABLE_PCI", "1", false);
|
||||
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
|
||||
|
Loading…
Reference in New Issue
Block a user