Merge branch 'blender-v4.2-release'

This commit is contained in:
Lukas Stockner 2024-06-23 01:23:51 +02:00
commit 4547260bda
9 changed files with 152 additions and 20 deletions

@ -0,0 +1,54 @@
/* SPDX-FileCopyrightText: 2024 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
#include <cstdint>
#include <fstream>
#include <vector>
#include <zstd.h>
int main(int argc, const char **argv)
{
if (argc < 3) {
return -1;
}
/* TODO: This might fail for non-ASCII paths on Windows... */
std::ifstream in(argv[1], std::ios_base::binary);
std::ofstream out(argv[2], std::ios_base::binary);
if (!in || !out) {
return -1;
}
in.seekg(0, std::ios_base::end);
size_t in_size = in.tellg();
in.seekg(0, std::ios_base::beg);
if (!in) {
return -1;
}
std::vector<char> in_data(in_size);
in.read(in_data.data(), in_size);
if (!in) {
return -1;
}
size_t out_size = ZSTD_compressBound(in_size);
if (ZSTD_isError(out_size)) {
return -1;
}
std::vector<char> out_data(out_size);
out_size = ZSTD_compress(out_data.data(), out_data.size(), in_data.data(), in_data.size(), 19);
if (ZSTD_isError(out_size)) {
return -1;
}
out.write(out_data.data(), out_size);
if (!out) {
return -1;
}
return 0;
}

@ -256,7 +256,7 @@ string CUDADevice::compile_kernel(const string &common_cflags,
/* Attempt to use kernel provided with Blender. */
if (!use_adaptive_compilation()) {
if (!force_ptx) {
const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin.zst", name, major, minor));
VLOG_INFO << "Testing for pre-compiled kernel " << cubin << ".";
if (path_exists(cubin)) {
VLOG_INFO << "Using precompiled kernel.";
@ -268,7 +268,7 @@ string CUDADevice::compile_kernel(const string &common_cflags,
int ptx_major = major, ptx_minor = minor;
while (ptx_major >= 3) {
const string ptx = path_get(
string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor));
string_printf("lib/%s_compute_%d%d.ptx.zst", name, ptx_major, ptx_minor));
VLOG_INFO << "Testing for pre-compiled kernel " << ptx << ".";
if (path_exists(ptx)) {
VLOG_INFO << "Using precompiled kernel.";
@ -440,7 +440,7 @@ bool CUDADevice::load_kernels(const uint kernel_features)
string cubin_data;
CUresult result;
if (path_read_text(cubin, cubin_data)) {
if (path_read_compressed_text(cubin, cubin_data)) {
result = cuModuleLoadData(&cuModule, cubin_data.c_str());
}
else {

@ -231,7 +231,7 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
/* Attempt to use kernel provided with Blender. */
if (!use_adaptive_compilation()) {
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, arch.c_str()));
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin.zst", name, arch.c_str()));
VLOG_INFO << "Testing for pre-compiled kernel " << fatbin << ".";
if (path_exists(fatbin)) {
VLOG_INFO << "Using precompiled kernel.";
@ -387,7 +387,7 @@ bool HIPDevice::load_kernels(const uint kernel_features)
string fatbin_data;
hipError_t result;
if (path_read_text(fatbin, fatbin_data))
if (path_read_compressed_text(fatbin, fatbin_data))
result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
else
result = hipErrorFileNotFound;

@ -141,7 +141,7 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name,
const std::string arch = hipDeviceArch(hipDevId);
if (!use_adaptive_compilation()) {
const string fatbin = path_get(string_printf("lib/%s_rt_gfx.hipfb", name));
const string fatbin = path_get(string_printf("lib/%s_rt_gfx.hipfb.zst", name));
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
if (path_exists(fatbin)) {
VLOG(1) << "Using precompiled kernel.";
@ -309,8 +309,7 @@ bool HIPRTDevice::load_kernels(const uint kernel_features)
string fatbin_data;
hipError_t result;
if (path_read_text(fatbin, fatbin_data)) {
if (path_read_compressed_text(fatbin, fatbin_data)) {
result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
}
else

@ -216,7 +216,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
"";
string ptx_filename;
if (need_optix_kernels) {
ptx_filename = path_get("lib/kernel_optix" + suffix + ".ptx");
ptx_filename = path_get("lib/kernel_optix" + suffix + ".ptx.zst");
if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
std::string optix_include_dir = get_optix_include_dir();
if (optix_include_dir.empty()) {
@ -348,7 +348,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
string cflags = compile_kernel_get_common_cflags(kernel_features);
ptx_filename = compile_kernel(cflags, ("kernel" + suffix).c_str(), "optix", true);
}
if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) {
if (ptx_filename.empty() || !path_read_compressed_text(ptx_filename, ptx_data)) {
set_error(string_printf("Failed to load OptiX kernel from '%s'", ptx_filename.c_str()));
return false;
}
@ -798,8 +798,8 @@ bool OptiXDevice::load_osl_kernels()
osl_modules.resize(osl_kernels.size() + 1);
{ /* Load and compile PTX module with OSL services. */
string ptx_data, ptx_filename = path_get("lib/kernel_optix_osl_services.ptx");
if (!path_read_text(ptx_filename, ptx_data)) {
string ptx_data, ptx_filename = path_get("lib/kernel_optix_osl_services.ptx.zst");
if (!path_read_compressed_text(ptx_filename, ptx_data)) {
set_error(string_printf("Failed to load OptiX OSL services kernel from '%s'",
ptx_filename.c_str()));
return false;

@ -417,6 +417,11 @@ set(LIB
)
# Zstd compressor for kernels
add_executable(zstd_compress ../cmake/zstd_compress.cpp)
target_include_directories(zstd_compress SYSTEM PRIVATE ${ZSTD_INCLUDE_DIRS})
target_link_libraries(zstd_compress ${ZSTD_LIBRARIES} ${PTHREADS_LIBRARIES})
# CUDA module
if(WITH_CYCLES_CUDA_BINARIES)
@ -456,6 +461,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(format "cubin")
endif()
set(cuda_file ${name}_${arch}.${format})
set(cuda_file_compressed ${cuda_file}.zst)
set(kernel_sources ${sources})
if(NOT ${prev_arch} STREQUAL "none")
@ -518,9 +524,14 @@ if(WITH_CYCLES_CUDA_BINARIES)
DEPENDS ${kernel_sources})
endif()
add_custom_command(
OUTPUT ${cuda_file_compressed}
COMMAND "$<TARGET_FILE:zstd_compress>" ${cuda_file} ${cuda_file_compressed}
DEPENDS ${cuda_file})
unset(_cuda_nvcc_args)
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND cuda_cubins ${cuda_file})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file_compressed}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND cuda_cubins ${cuda_file_compressed})
unset(cuda_debug_flags)
endmacro()
@ -604,6 +615,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
macro(CYCLES_HIP_KERNEL_ADD arch name flags sources experimental)
set(format "fatbin")
set(hip_file ${name}_${arch}.${format})
set(hip_file_compressed ${hip_file}.zst)
set(kernel_sources ${sources})
set(hip_kernel_src "/device/hip/${name}.cpp")
@ -658,8 +670,12 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
OUTPUT ${hip_file}
COMMAND ${hip_command} ${hip_flags}
DEPENDS ${kernel_sources})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND hip_fatbins ${hip_file})
add_custom_command(
OUTPUT ${hip_file_compressed}
COMMAND "$<TARGET_FILE:zstd_compress>" ${hip_file} ${hip_file_compressed}
DEPENDS ${hip_file})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file_compressed}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND hip_fatbins ${hip_file_compressed})
endmacro()
foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
@ -681,6 +697,7 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
${SRC_UTIL_HEADERS})
set(bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc)
set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb)
set(hiprt_file_compressed ${hiprt_file}.zst)
set(kernel_sources ${hiprt_sources})
set(hiprt_kernel_src "/device/hiprt/kernel.cpp")
if(WIN32)
@ -745,8 +762,12 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
OUTPUT ${hiprt_file}
COMMAND ${hiprt_link_command} ${hiprt_link_flags}
DEPENDS ${bitcode_file})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file}" ${CYCLES_INSTALL_PATH}/lib)
add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file})
add_custom_command(
OUTPUT ${hiprt_file_compressed}
COMMAND "$<TARGET_FILE:zstd_compress>" ${hiprt_file} ${hiprt_file_compressed}
DEPENDS ${hiprt_file})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file_compressed}" ${CYCLES_INSTALL_PATH}/lib)
add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file_compressed})
cycles_set_solution_folder(cycles_kernel_hiprt)
endif()
@ -755,6 +776,7 @@ endif()
if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
macro(cycles_optix_kernel_add name input flags)
set(output "${CMAKE_CURRENT_BINARY_DIR}/${name}.ptx")
set(output_compressed "${output}.zst")
set(cuda_flags ${flags}
-I "${OPTIX_INCLUDE_DIR}"
@ -796,9 +818,14 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
WORKING_DIRECTORY
"${CMAKE_CURRENT_SOURCE_DIR}")
list(APPEND optix_ptx ${output})
add_custom_command(
OUTPUT ${output_compressed}
COMMAND "$<TARGET_FILE:zstd_compress>" ${output} ${output_compressed}
DEPENDS ${output})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND optix_ptx ${output_compressed})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output_compressed}" ${CYCLES_INSTALL_PATH}/lib)
endmacro()
cycles_optix_kernel_add(

@ -7,6 +7,7 @@ set(INC
)
set(INC_SYS
${ZSTD_INCLUDE_DIRS}
)
set(SRC
@ -32,6 +33,7 @@ set(SRC
set(LIB
${TBB_LIBRARIES}
${ZSTD_LIBRARIES}
)
set(SRC_HEADERS

@ -19,6 +19,8 @@ OIIO_NAMESPACE_USING
#include <sys/stat.h>
#include <zstd.h>
#if defined(_WIN32)
# define DIR_SEP '\\'
# define DIR_SEP_ALT '/'
@ -704,6 +706,36 @@ bool path_read_binary(const string &path, vector<uint8_t> &binary)
return true;
}
bool path_read_compressed_binary(const string &path, vector<uint8_t> &binary)
{
if (!string_endswith(path, ".zst")) {
return path_read_binary(path, binary);
}
vector<uint8_t> compressed;
if (!path_read_binary(path, compressed)) {
return false;
}
const size_t full_size = ZSTD_getFrameContentSize(compressed.data(), compressed.size());
if (full_size == ZSTD_CONTENTSIZE_ERROR) {
/* Potentially corrupted file? */
return false;
}
if (full_size == ZSTD_CONTENTSIZE_UNKNOWN) {
/* Technically this is an optional field, but we can expect it to be set for now.
* Otherwise we'd need streaming decompression and repeated resizing of the vector. */
return false;
}
binary.resize(full_size);
size_t err = ZSTD_decompress(binary.data(), binary.size(), compressed.data(), compressed.size());
return ZSTD_isError(err) == 0;
}
bool path_read_text(const string &path, string &text)
{
vector<uint8_t> binary;
@ -719,6 +751,21 @@ bool path_read_text(const string &path, string &text)
return true;
}
bool path_read_compressed_text(const string &path, string &text)
{
vector<uint8_t> binary;
if (!path_exists(path) || !path_read_compressed_binary(path, binary)) {
return false;
}
const char *str = (const char *)&binary[0];
size_t size = binary.size();
text = string(str, size);
return true;
}
uint64_t path_modified_time(const string &path)
{
path_stat_t st;

@ -50,6 +50,9 @@ bool path_write_text(const string &path, string &text);
bool path_read_binary(const string &path, vector<uint8_t> &binary);
bool path_read_text(const string &path, string &text);
bool path_read_compressed_binary(const string &path, vector<uint8_t> &binary);
bool path_read_compressed_text(const string &path, string &text);
/* File manipulation. */
bool path_remove(const string &path);