Merge branch 'blender-v4.2-release'
This commit is contained in:
commit
4547260bda
54
intern/cycles/cmake/zstd_compress.cpp
Normal file
54
intern/cycles/cmake/zstd_compress.cpp
Normal file
@ -0,0 +1,54 @@
|
||||
/* SPDX-FileCopyrightText: 2024 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#include <zstd.h>
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
if (argc < 3) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* TODO: This might fail for non-ASCII paths on Windows... */
|
||||
std::ifstream in(argv[1], std::ios_base::binary);
|
||||
std::ofstream out(argv[2], std::ios_base::binary);
|
||||
if (!in || !out) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
in.seekg(0, std::ios_base::end);
|
||||
size_t in_size = in.tellg();
|
||||
in.seekg(0, std::ios_base::beg);
|
||||
if (!in) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<char> in_data(in_size);
|
||||
in.read(in_data.data(), in_size);
|
||||
if (!in) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t out_size = ZSTD_compressBound(in_size);
|
||||
if (ZSTD_isError(out_size)) {
|
||||
return -1;
|
||||
}
|
||||
std::vector<char> out_data(out_size);
|
||||
|
||||
out_size = ZSTD_compress(out_data.data(), out_data.size(), in_data.data(), in_data.size(), 19);
|
||||
if (ZSTD_isError(out_size)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
out.write(out_data.data(), out_size);
|
||||
if (!out) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -256,7 +256,7 @@ string CUDADevice::compile_kernel(const string &common_cflags,
|
||||
/* Attempt to use kernel provided with Blender. */
|
||||
if (!use_adaptive_compilation()) {
|
||||
if (!force_ptx) {
|
||||
const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
|
||||
const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin.zst", name, major, minor));
|
||||
VLOG_INFO << "Testing for pre-compiled kernel " << cubin << ".";
|
||||
if (path_exists(cubin)) {
|
||||
VLOG_INFO << "Using precompiled kernel.";
|
||||
@ -268,7 +268,7 @@ string CUDADevice::compile_kernel(const string &common_cflags,
|
||||
int ptx_major = major, ptx_minor = minor;
|
||||
while (ptx_major >= 3) {
|
||||
const string ptx = path_get(
|
||||
string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor));
|
||||
string_printf("lib/%s_compute_%d%d.ptx.zst", name, ptx_major, ptx_minor));
|
||||
VLOG_INFO << "Testing for pre-compiled kernel " << ptx << ".";
|
||||
if (path_exists(ptx)) {
|
||||
VLOG_INFO << "Using precompiled kernel.";
|
||||
@ -440,7 +440,7 @@ bool CUDADevice::load_kernels(const uint kernel_features)
|
||||
string cubin_data;
|
||||
CUresult result;
|
||||
|
||||
if (path_read_text(cubin, cubin_data)) {
|
||||
if (path_read_compressed_text(cubin, cubin_data)) {
|
||||
result = cuModuleLoadData(&cuModule, cubin_data.c_str());
|
||||
}
|
||||
else {
|
||||
|
@ -231,7 +231,7 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
|
||||
|
||||
/* Attempt to use kernel provided with Blender. */
|
||||
if (!use_adaptive_compilation()) {
|
||||
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, arch.c_str()));
|
||||
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin.zst", name, arch.c_str()));
|
||||
VLOG_INFO << "Testing for pre-compiled kernel " << fatbin << ".";
|
||||
if (path_exists(fatbin)) {
|
||||
VLOG_INFO << "Using precompiled kernel.";
|
||||
@ -387,7 +387,7 @@ bool HIPDevice::load_kernels(const uint kernel_features)
|
||||
string fatbin_data;
|
||||
hipError_t result;
|
||||
|
||||
if (path_read_text(fatbin, fatbin_data))
|
||||
if (path_read_compressed_text(fatbin, fatbin_data))
|
||||
result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
|
||||
else
|
||||
result = hipErrorFileNotFound;
|
||||
|
@ -141,7 +141,7 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name,
|
||||
const std::string arch = hipDeviceArch(hipDevId);
|
||||
|
||||
if (!use_adaptive_compilation()) {
|
||||
const string fatbin = path_get(string_printf("lib/%s_rt_gfx.hipfb", name));
|
||||
const string fatbin = path_get(string_printf("lib/%s_rt_gfx.hipfb.zst", name));
|
||||
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
|
||||
if (path_exists(fatbin)) {
|
||||
VLOG(1) << "Using precompiled kernel.";
|
||||
@ -309,8 +309,7 @@ bool HIPRTDevice::load_kernels(const uint kernel_features)
|
||||
string fatbin_data;
|
||||
hipError_t result;
|
||||
|
||||
if (path_read_text(fatbin, fatbin_data)) {
|
||||
|
||||
if (path_read_compressed_text(fatbin, fatbin_data)) {
|
||||
result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
|
||||
}
|
||||
else
|
||||
|
@ -216,7 +216,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
"";
|
||||
string ptx_filename;
|
||||
if (need_optix_kernels) {
|
||||
ptx_filename = path_get("lib/kernel_optix" + suffix + ".ptx");
|
||||
ptx_filename = path_get("lib/kernel_optix" + suffix + ".ptx.zst");
|
||||
if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
|
||||
std::string optix_include_dir = get_optix_include_dir();
|
||||
if (optix_include_dir.empty()) {
|
||||
@ -348,7 +348,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
|
||||
string cflags = compile_kernel_get_common_cflags(kernel_features);
|
||||
ptx_filename = compile_kernel(cflags, ("kernel" + suffix).c_str(), "optix", true);
|
||||
}
|
||||
if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) {
|
||||
if (ptx_filename.empty() || !path_read_compressed_text(ptx_filename, ptx_data)) {
|
||||
set_error(string_printf("Failed to load OptiX kernel from '%s'", ptx_filename.c_str()));
|
||||
return false;
|
||||
}
|
||||
@ -798,8 +798,8 @@ bool OptiXDevice::load_osl_kernels()
|
||||
osl_modules.resize(osl_kernels.size() + 1);
|
||||
|
||||
{ /* Load and compile PTX module with OSL services. */
|
||||
string ptx_data, ptx_filename = path_get("lib/kernel_optix_osl_services.ptx");
|
||||
if (!path_read_text(ptx_filename, ptx_data)) {
|
||||
string ptx_data, ptx_filename = path_get("lib/kernel_optix_osl_services.ptx.zst");
|
||||
if (!path_read_compressed_text(ptx_filename, ptx_data)) {
|
||||
set_error(string_printf("Failed to load OptiX OSL services kernel from '%s'",
|
||||
ptx_filename.c_str()));
|
||||
return false;
|
||||
|
@ -417,6 +417,11 @@ set(LIB
|
||||
|
||||
)
|
||||
|
||||
# Zstd compressor for kernels
|
||||
add_executable(zstd_compress ../cmake/zstd_compress.cpp)
|
||||
target_include_directories(zstd_compress SYSTEM PRIVATE ${ZSTD_INCLUDE_DIRS})
|
||||
target_link_libraries(zstd_compress ${ZSTD_LIBRARIES} ${PTHREADS_LIBRARIES})
|
||||
|
||||
# CUDA module
|
||||
|
||||
if(WITH_CYCLES_CUDA_BINARIES)
|
||||
@ -456,6 +461,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
set(format "cubin")
|
||||
endif()
|
||||
set(cuda_file ${name}_${arch}.${format})
|
||||
set(cuda_file_compressed ${cuda_file}.zst)
|
||||
|
||||
set(kernel_sources ${sources})
|
||||
if(NOT ${prev_arch} STREQUAL "none")
|
||||
@ -518,9 +524,14 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
DEPENDS ${kernel_sources})
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${cuda_file_compressed}
|
||||
COMMAND "$<TARGET_FILE:zstd_compress>" ${cuda_file} ${cuda_file_compressed}
|
||||
DEPENDS ${cuda_file})
|
||||
|
||||
unset(_cuda_nvcc_args)
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
list(APPEND cuda_cubins ${cuda_file})
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file_compressed}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
list(APPEND cuda_cubins ${cuda_file_compressed})
|
||||
|
||||
unset(cuda_debug_flags)
|
||||
endmacro()
|
||||
@ -604,6 +615,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
macro(CYCLES_HIP_KERNEL_ADD arch name flags sources experimental)
|
||||
set(format "fatbin")
|
||||
set(hip_file ${name}_${arch}.${format})
|
||||
set(hip_file_compressed ${hip_file}.zst)
|
||||
set(kernel_sources ${sources})
|
||||
|
||||
set(hip_kernel_src "/device/hip/${name}.cpp")
|
||||
@ -658,8 +670,12 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
OUTPUT ${hip_file}
|
||||
COMMAND ${hip_command} ${hip_flags}
|
||||
DEPENDS ${kernel_sources})
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
list(APPEND hip_fatbins ${hip_file})
|
||||
add_custom_command(
|
||||
OUTPUT ${hip_file_compressed}
|
||||
COMMAND "$<TARGET_FILE:zstd_compress>" ${hip_file} ${hip_file_compressed}
|
||||
DEPENDS ${hip_file})
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file_compressed}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
list(APPEND hip_fatbins ${hip_file_compressed})
|
||||
endmacro()
|
||||
|
||||
foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
|
||||
@ -681,6 +697,7 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
|
||||
${SRC_UTIL_HEADERS})
|
||||
set(bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc)
|
||||
set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb)
|
||||
set(hiprt_file_compressed ${hiprt_file}.zst)
|
||||
set(kernel_sources ${hiprt_sources})
|
||||
set(hiprt_kernel_src "/device/hiprt/kernel.cpp")
|
||||
if(WIN32)
|
||||
@ -745,8 +762,12 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
|
||||
OUTPUT ${hiprt_file}
|
||||
COMMAND ${hiprt_link_command} ${hiprt_link_flags}
|
||||
DEPENDS ${bitcode_file})
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file})
|
||||
add_custom_command(
|
||||
OUTPUT ${hiprt_file_compressed}
|
||||
COMMAND "$<TARGET_FILE:zstd_compress>" ${hiprt_file} ${hiprt_file_compressed}
|
||||
DEPENDS ${hiprt_file})
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file_compressed}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file_compressed})
|
||||
cycles_set_solution_folder(cycles_kernel_hiprt)
|
||||
endif()
|
||||
|
||||
@ -755,6 +776,7 @@ endif()
|
||||
if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
|
||||
macro(cycles_optix_kernel_add name input flags)
|
||||
set(output "${CMAKE_CURRENT_BINARY_DIR}/${name}.ptx")
|
||||
set(output_compressed "${output}.zst")
|
||||
|
||||
set(cuda_flags ${flags}
|
||||
-I "${OPTIX_INCLUDE_DIR}"
|
||||
@ -796,9 +818,14 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
|
||||
WORKING_DIRECTORY
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
|
||||
list(APPEND optix_ptx ${output})
|
||||
add_custom_command(
|
||||
OUTPUT ${output_compressed}
|
||||
COMMAND "$<TARGET_FILE:zstd_compress>" ${output} ${output_compressed}
|
||||
DEPENDS ${output})
|
||||
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
list(APPEND optix_ptx ${output_compressed})
|
||||
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output_compressed}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
endmacro()
|
||||
|
||||
cycles_optix_kernel_add(
|
||||
|
@ -7,6 +7,7 @@ set(INC
|
||||
)
|
||||
|
||||
set(INC_SYS
|
||||
${ZSTD_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
set(SRC
|
||||
@ -32,6 +33,7 @@ set(SRC
|
||||
|
||||
set(LIB
|
||||
${TBB_LIBRARIES}
|
||||
${ZSTD_LIBRARIES}
|
||||
)
|
||||
|
||||
set(SRC_HEADERS
|
||||
|
@ -19,6 +19,8 @@ OIIO_NAMESPACE_USING
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <zstd.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
# define DIR_SEP '\\'
|
||||
# define DIR_SEP_ALT '/'
|
||||
@ -704,6 +706,36 @@ bool path_read_binary(const string &path, vector<uint8_t> &binary)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool path_read_compressed_binary(const string &path, vector<uint8_t> &binary)
|
||||
{
|
||||
if (!string_endswith(path, ".zst")) {
|
||||
return path_read_binary(path, binary);
|
||||
}
|
||||
|
||||
vector<uint8_t> compressed;
|
||||
if (!path_read_binary(path, compressed)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t full_size = ZSTD_getFrameContentSize(compressed.data(), compressed.size());
|
||||
|
||||
if (full_size == ZSTD_CONTENTSIZE_ERROR) {
|
||||
/* Potentially corrupted file? */
|
||||
return false;
|
||||
}
|
||||
if (full_size == ZSTD_CONTENTSIZE_UNKNOWN) {
|
||||
/* Technically this is an optional field, but we can expect it to be set for now.
|
||||
* Otherwise we'd need streaming decompression and repeated resizing of the vector. */
|
||||
return false;
|
||||
}
|
||||
|
||||
binary.resize(full_size);
|
||||
|
||||
size_t err = ZSTD_decompress(binary.data(), binary.size(), compressed.data(), compressed.size());
|
||||
|
||||
return ZSTD_isError(err) == 0;
|
||||
}
|
||||
|
||||
bool path_read_text(const string &path, string &text)
|
||||
{
|
||||
vector<uint8_t> binary;
|
||||
@ -719,6 +751,21 @@ bool path_read_text(const string &path, string &text)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool path_read_compressed_text(const string &path, string &text)
|
||||
{
|
||||
vector<uint8_t> binary;
|
||||
|
||||
if (!path_exists(path) || !path_read_compressed_binary(path, binary)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const char *str = (const char *)&binary[0];
|
||||
size_t size = binary.size();
|
||||
text = string(str, size);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t path_modified_time(const string &path)
|
||||
{
|
||||
path_stat_t st;
|
||||
|
@ -50,6 +50,9 @@ bool path_write_text(const string &path, string &text);
|
||||
bool path_read_binary(const string &path, vector<uint8_t> &binary);
|
||||
bool path_read_text(const string &path, string &text);
|
||||
|
||||
bool path_read_compressed_binary(const string &path, vector<uint8_t> &binary);
|
||||
bool path_read_compressed_text(const string &path, string &text);
|
||||
|
||||
/* File manipulation. */
|
||||
bool path_remove(const string &path);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user