Merge branch 'master' into blender2.8

Conflicts:
	source/blender/blenloader/intern/versioning_270.c
	source/blender/depsgraph/intern/depsgraph_tag.cc
	source/blender/editors/mask/mask_draw.c
This commit is contained in:
Bastien Montagne 2017-05-19 09:36:14 +02:00
commit 1f46da922a
49 changed files with 599 additions and 338 deletions

@ -284,7 +284,7 @@ NO_BUILD=false
NO_CONFIRM=false
USE_CXX11=true # Mandatory in blender2.8
PYTHON_VERSION="3.5.2"
PYTHON_VERSION="3.5.3"
PYTHON_VERSION_MIN="3.5"
PYTHON_FORCE_BUILD=false
PYTHON_FORCE_REBUILD=false
@ -361,8 +361,7 @@ ALEMBIC_FORCE_BUILD=false
ALEMBIC_FORCE_REBUILD=false
ALEMBIC_SKIP=false
# Version??
OPENCOLLADA_VERSION="1.3"
OPENCOLLADA_VERSION="1.6.47"
OPENCOLLADA_FORCE_BUILD=false
OPENCOLLADA_FORCE_REBUILD=false
OPENCOLLADA_SKIP=false
@ -729,7 +728,10 @@ _boost_version_nodots=`echo "$BOOST_VERSION" | sed -r 's/\./_/g'`
BOOST_SOURCE=( "http://sourceforge.net/projects/boost/files/boost/$BOOST_VERSION/boost_$_boost_version_nodots.tar.bz2/download" )
BOOST_BUILD_MODULES="--with-system --with-filesystem --with-thread --with-regex --with-locale --with-date_time --with-wave --with-iostreams --with-python --with-program_options"
OCIO_USE_REPO=true
OCIO_SOURCE=( "https://github.com/imageworks/OpenColorIO/tarball/v$OCIO_VERSION" )
OCIO_SOURCE_REPO=( "https://github.com/imageworks/OpenColorIO.git" )
OCIO_SOURCE_REPO_UID="6de971097c7f552300f669ed69ca0b6cf5a70843"
OPENEXR_USE_REPO=false
OPENEXR_SOURCE=( "http://download.savannah.nongnu.org/releases/openexr/openexr-$OPENEXR_VERSION.tar.gz" )
@ -778,7 +780,7 @@ ALEMBIC_SOURCE=( "https://github.com/alembic/alembic/archive/${ALEMBIC_VERSION}.
# ALEMBIC_SOURCE_REPO_BRANCH="master"
OPENCOLLADA_SOURCE=( "https://github.com/KhronosGroup/OpenCOLLADA.git" )
OPENCOLLADA_REPO_UID="3335ac164e68b2512a40914b14c74db260e6ff7d"
OPENCOLLADA_REPO_UID="22b1f4ff026881b4d2804d397730286ab7e3d090"
OPENCOLLADA_REPO_BRANCH="master"
FFMPEG_SOURCE=( "http://ffmpeg.org/releases/ffmpeg-$FFMPEG_VERSION.tar.bz2" )
@ -1260,7 +1262,7 @@ compile_OCIO() {
fi
# To be changed each time we make edits that would modify the compiled result!
ocio_magic=1
ocio_magic=2
_init_ocio
# Clean install if needed!
@ -1277,14 +1279,27 @@ compile_OCIO() {
if [ ! -d $_src ]; then
INFO "Downloading OpenColorIO-$OCIO_VERSION"
mkdir -p $SRC
download OCIO_SOURCE[@] $_src.tar.gz
INFO "Unpacking OpenColorIO-$OCIO_VERSION"
tar -C $SRC --transform "s,(.*/?)imageworks-OpenColorIO[^/]*(.*),\1OpenColorIO-$OCIO_VERSION\2,x" \
-xf $_src.tar.gz
if [ "$OCIO_USE_REPO" = true ]; then
git clone ${OCIO_SOURCE_REPO[0]} $_src
else
download OCIO_SOURCE[@] $_src.tar.gz
INFO "Unpacking OpenColorIO-$OCIO_VERSION"
tar -C $SRC --transform "s,(.*/?)imageworks-OpenColorIO[^/]*(.*),\1OpenColorIO-$OCIO_VERSION\2,x" \
-xf $_src.tar.gz
fi
fi
cd $_src
if [ "$OCIO_USE_REPO" = true ]; then
# XXX For now, always update from latest repo...
git pull origin master
git checkout $OCIO_SOURCE_REPO_UID
git reset --hard
fi
# Always refresh the whole build!
if [ -d build ]; then
rm -rf build
@ -1490,7 +1505,6 @@ compile_OPENEXR() {
if [ "$OPENEXR_USE_REPO" = true ]; then
# XXX For now, always update from latest repo...
git pull origin master
# Stick to same rev as windows' libs...
git checkout $OPENEXR_SOURCE_REPO_UID
git reset --hard
oiio_src_path="../OpenEXR"

@ -176,6 +176,7 @@ public:
KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int, bool)> filter_divide_shadow_kernel;
KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int, bool)> filter_get_feature_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
@ -210,6 +211,7 @@ public:
REGISTER_KERNEL(shader),
REGISTER_KERNEL(filter_divide_shadow),
REGISTER_KERNEL(filter_get_feature),
REGISTER_KERNEL(filter_detect_outliers),
REGISTER_KERNEL(filter_combine_halves),
REGISTER_KERNEL(filter_nlm_calc_difference),
REGISTER_KERNEL(filter_nlm_blur),
@ -594,6 +596,26 @@ public:
return true;
}
bool denoising_detect_outliers(device_ptr image_ptr,
device_ptr variance_ptr,
device_ptr depth_ptr,
device_ptr output_ptr,
DenoisingTask *task)
{
for(int y = task->rect.y; y < task->rect.w; y++) {
for(int x = task->rect.x; x < task->rect.z; x++) {
filter_detect_outliers_kernel()(x, y,
(float*) image_ptr,
(float*) variance_ptr,
(float*) depth_ptr,
(float*) output_ptr,
&task->rect.x,
task->buffer.pass_stride);
}
}
return true;
}
void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
{
float *render_buffer = (float*)tile.buffer;
@ -632,6 +654,7 @@ public:
denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
denoising.functions.detect_outliers = function_bind(&CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.functions.set_tiles = function_bind(&CPUDevice::denoising_set_tiles, this, _1, &denoising);
denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);

@ -949,7 +949,7 @@ public:
cuda_push_context();
int4 rect = task->rect;
int w = rect.z-rect.x;
int w = align_up(rect.z-rect.x, 4);
int h = rect.w-rect.y;
int r = task->nlm_state.r;
int f = task->nlm_state.f;
@ -1248,6 +1248,38 @@ public:
return !have_error();
}
bool denoising_detect_outliers(device_ptr image_ptr,
device_ptr variance_ptr,
device_ptr depth_ptr,
device_ptr output_ptr,
DenoisingTask *task)
{
if(have_error())
return false;
cuda_push_context();
CUfunction cuFilterDetectOutliers;
cuda_assert(cuModuleGetFunction(&cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers"));
cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1));
CUDA_GET_BLOCKSIZE(cuFilterDetectOutliers,
task->rect.z-task->rect.x,
task->rect.w-task->rect.y);
void *args[] = {&image_ptr,
&variance_ptr,
&depth_ptr,
&output_ptr,
&task->rect,
&task->buffer.pass_stride};
CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args);
cuda_assert(cuCtxSynchronize());
cuda_pop_context();
return !have_error();
}
void denoise(RenderTile &rtile, const DeviceTask &task)
{
DenoisingTask denoising(this);
@ -1258,6 +1290,7 @@ public:
denoising.functions.non_local_means = function_bind(&CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(&CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
denoising.functions.detect_outliers = function_bind(&CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.functions.set_tiles = function_bind(&CUDADevice::denoising_set_tiles, this, _1, &denoising);
denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);

@ -139,9 +139,9 @@ bool DenoisingTask::run_denoising()
nlm_state.temporary_2_ptr = *nlm_temporary_2;
nlm_state.temporary_3_ptr = *nlm_temporary_3;
int mean_from[] = { 0, 1, 2, 6, 7, 8, 12 };
int variance_from[] = { 3, 4, 5, 9, 10, 11, 13 };
int pass_to[] = { 1, 2, 3, 0, 5, 6, 7 };
int mean_from[] = { 0, 1, 2, 12, 6, 7, 8 };
int variance_from[] = { 3, 4, 5, 13, 9, 10, 11};
int pass_to[] = { 1, 2, 3, 0, 5, 6, 7};
for(int pass = 0; pass < 7; pass++) {
device_sub_ptr feature_pass(device, buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
/* Get the unfiltered pass and its variance from the RenderBuffers. */
@ -159,11 +159,25 @@ bool DenoisingTask::run_denoising()
int mean_to[] = { 8, 9, 10};
int variance_to[] = {11, 12, 13};
int num_color_passes = 3;
device_only_memory<float> temp_color;
temp_color.resize(3*buffer.pass_stride);
device->mem_alloc("Denoising temporary color", temp_color, MEM_READ_WRITE);
for(int pass = 0; pass < num_color_passes; pass++) {
device_sub_ptr color_pass (device, buffer.mem, mean_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
device_sub_ptr color_pass(device, temp_color, pass*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
device_sub_ptr color_var_pass(device, buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass);
}
{
device_sub_ptr depth_pass (device, buffer.mem, 0, buffer.pass_stride, MEM_READ_WRITE);
device_sub_ptr color_var_pass(device, buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
device_sub_ptr output_pass (device, buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
functions.detect_outliers(temp_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
}
device->mem_free(temp_color);
}
storage.w = filter_area.z;

@ -82,6 +82,11 @@ public:
device_ptr mean_ptr,
device_ptr variance_ptr
)> get_feature;
function<bool(device_ptr image_ptr,
device_ptr variance_ptr,
device_ptr depth_ptr,
device_ptr output_ptr
)> detect_outliers;
function<bool(device_ptr*)> set_tiles;
} functions;

@ -411,6 +411,11 @@ protected:
device_ptr mean_ptr,
device_ptr variance_ptr,
DenoisingTask *task);
bool denoising_detect_outliers(device_ptr image_ptr,
device_ptr variance_ptr,
device_ptr depth_ptr,
device_ptr output_ptr,
DenoisingTask *task);
bool denoising_set_tiles(device_ptr *buffers,
DenoisingTask *task);

@ -216,6 +216,7 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
denoising_program = OpenCLProgram(this, "denoising", "filter.cl", "");
denoising_program.add_kernel(ustring("filter_divide_shadow"));
denoising_program.add_kernel(ustring("filter_get_feature"));
denoising_program.add_kernel(ustring("filter_detect_outliers"));
denoising_program.add_kernel(ustring("filter_combine_halves"));
denoising_program.add_kernel(ustring("filter_construct_transform"));
denoising_program.add_kernel(ustring("filter_nlm_calc_difference"));
@ -910,6 +911,33 @@ bool OpenCLDeviceBase::denoising_get_feature(int mean_offset,
return true;
}
bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr,
device_ptr variance_ptr,
device_ptr depth_ptr,
device_ptr output_ptr,
DenoisingTask *task)
{
cl_mem image_mem = CL_MEM_PTR(image_ptr);
cl_mem variance_mem = CL_MEM_PTR(variance_ptr);
cl_mem depth_mem = CL_MEM_PTR(depth_ptr);
cl_mem output_mem = CL_MEM_PTR(output_ptr);
cl_kernel ckFilterDetectOutliers = denoising_program(ustring("filter_detect_outliers"));
kernel_set_args(ckFilterDetectOutliers, 0,
image_mem,
variance_mem,
depth_mem,
output_mem,
task->rect,
task->buffer.pass_stride);
enqueue_kernel(ckFilterDetectOutliers,
task->rect.z-task->rect.x,
task->rect.w-task->rect.y);
return true;
}
bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers,
DenoisingTask *task)
{
@ -942,6 +970,7 @@ void OpenCLDeviceBase::denoise(RenderTile &rtile, const DeviceTask &task)
denoising.functions.non_local_means = function_bind(&OpenCLDeviceBase::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&OpenCLDeviceBase::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(&OpenCLDeviceBase::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
denoising.functions.detect_outliers = function_bind(&OpenCLDeviceBase::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
denoising.render_buffer.samples = rtile.sample;

@ -10,8 +10,23 @@ set(INC_SYS
set(SRC
kernels/cpu/kernel.cpp
kernels/cpu/kernel_sse2.cpp
kernels/cpu/kernel_sse3.cpp
kernels/cpu/kernel_sse41.cpp
kernels/cpu/kernel_avx.cpp
kernels/cpu/kernel_avx2.cpp
kernels/cpu/kernel_split.cpp
kernels/cpu/kernel_split_sse2.cpp
kernels/cpu/kernel_split_sse3.cpp
kernels/cpu/kernel_split_sse41.cpp
kernels/cpu/kernel_split_avx.cpp
kernels/cpu/kernel_split_avx2.cpp
kernels/cpu/filter.cpp
kernels/cpu/filter_sse2.cpp
kernels/cpu/filter_sse3.cpp
kernels/cpu/filter_sse41.cpp
kernels/cpu/filter_avx.cpp
kernels/cpu/filter_avx2.cpp
kernels/opencl/kernel.cl
kernels/opencl/kernel_state_buffer_size.cl
kernels/opencl/kernel_split.cl
@ -406,48 +421,28 @@ set_source_files_properties(kernels/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${C
set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
if(NOT WITH_CYCLES_NATIVE_ONLY)
list(APPEND SRC
kernels/cpu/kernel_sse2.cpp
kernels/cpu/kernel_sse3.cpp
kernels/cpu/kernel_sse41.cpp
kernels/cpu/kernel_avx.cpp
kernels/cpu/kernel_avx2.cpp
kernels/cpu/kernel_split_sse2.cpp
kernels/cpu/kernel_split_sse3.cpp
kernels/cpu/kernel_split_sse41.cpp
kernels/cpu/kernel_split_avx.cpp
kernels/cpu/kernel_split_avx2.cpp
kernels/cpu/filter_sse2.cpp
kernels/cpu/filter_sse3.cpp
kernels/cpu/filter_sse41.cpp
kernels/cpu/filter_avx.cpp
kernels/cpu/filter_avx2.cpp
)
if(CXX_HAS_SSE)
set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
endif()
if(CXX_HAS_SSE)
set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX)
set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX)
set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX2)
set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX2)
set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
endif()
add_library(cycles_kernel

@ -91,18 +91,15 @@ ccl_device_forceinline float3 mf_sample_vndf(const float3 wi, const float2 alpha
return normalize(make_float3(-slope_x, -slope_y, 1.0f));
}
/* === Phase functions: Glossy, Diffuse and Glass === */
/* === Phase functions: Glossy and Glass === */
/* Phase function for reflective materials, either without a fresnel term (for compatibility) or with the conductive fresnel term. */
ccl_device_forceinline float3 mf_sample_phase_glossy(const float3 wi, float3 *n, float3 *k, float3 *weight, const float3 wm)
/* Phase function for reflective materials. */
ccl_device_forceinline float3 mf_sample_phase_glossy(const float3 wi, float3 *weight, const float3 wm)
{
if(n && k)
*weight *= fresnel_conductor(dot(wi, wm), *n, *k);
return -wi + 2.0f * wm * dot(wi, wm);
}
ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w, const float lambda, const float3 wo, const float2 alpha, float3 *n, float3 *k)
ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w, const float lambda, const float3 wo, const float2 alpha)
{
if(w.z > 0.9999f)
return make_float3(0.0f, 0.0f, 0.0f);
@ -123,30 +120,9 @@ ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w, const float l
else
phase *= D_ggx_aniso(wh, alpha);
if(n && k) {
/* Apply conductive fresnel term. */
return phase * fresnel_conductor(dotW_WH, *n, *k);
}
return make_float3(phase, phase, phase);
}
/* Phase function for rough lambertian diffuse surfaces. */
ccl_device_forceinline float3 mf_sample_phase_diffuse(const float3 wm, const float randu, const float randv)
{
float3 tm, bm;
make_orthonormals(wm, &tm, &bm);
float2 disk = concentric_sample_disk(randu, randv);
return disk.x*tm + disk.y*bm + safe_sqrtf(1.0f - disk.x*disk.x - disk.y*disk.y)*wm;
}
ccl_device_forceinline float3 mf_eval_phase_diffuse(const float3 w, const float3 wm)
{
const float v = max(0.0f, dot(w, wm)) * M_1_PI_F;
return make_float3(v, v, v);
}
/* Phase function for dielectric transmissive materials, including both reflection and refraction according to the dielectric fresnel term. */
ccl_device_forceinline float3 mf_sample_phase_glass(const float3 wi, const float eta, const float3 wm, const float randV, bool *outside)
{
@ -282,11 +258,6 @@ ccl_device_forceinline float mf_ggx_aniso_pdf(const float3 wi, const float3 wo,
return 0.25f * D_ggx_aniso(normalize(wi+wo), alpha) / ((1.0f + mf_lambda(wi, alpha)) * wi.z) + (1.0f - mf_ggx_albedo(sqrtf(alpha.x*alpha.y))) * wo.z;
}
ccl_device_forceinline float mf_diffuse_pdf(const float3 wo)
{
return M_1_PI_F * wo.z;
}
ccl_device_forceinline float mf_glass_pdf(const float3 wi, const float3 wo, const float alpha, const float eta)
{
float3 wh;
@ -315,13 +286,6 @@ ccl_device_forceinline float mf_glass_pdf(const float3 wi, const float3 wo, cons
#define MF_MULTI_GLASS
#include "kernel/closure/bsdf_microfacet_multi_impl.h"
/* The diffuse phase function is not implemented as a node yet. */
#if 0
#define MF_PHASE_FUNCTION diffuse
#define MF_MULTI_DIFFUSE
#include "kernel/closure/bsdf_microfacet_multi_impl.h"
#endif
#define MF_PHASE_FUNCTION glossy
#define MF_MULTI_GLOSSY
#include "kernel/closure/bsdf_microfacet_multi_impl.h"
@ -428,7 +392,7 @@ ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc
*pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
else
*pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x);
return mf_eval_glossy(localI, localO, true, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, NULL, NULL, bsdf->ior, use_fresnel, bsdf->extra->cspec0);
return mf_eval_glossy(localI, localO, true, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, use_fresnel, bsdf->extra->cspec0);
}
ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state)
@ -442,6 +406,10 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, const ShaderC
*omega_in = 2*dot(Z, I)*Z - I;
*pdf = 1e6f;
*eval = make_float3(1e6f, 1e6f, 1e6f);
#ifdef __RAY_DIFFERENTIALS__
*domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx;
*domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy;
#endif
return LABEL_REFLECT|LABEL_SINGULAR;
}
@ -456,7 +424,7 @@ ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, const ShaderC
float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z));
float3 localO;
*eval = mf_sample_glossy(localI, &localO, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, NULL, NULL, bsdf->ior, use_fresnel, bsdf->extra->cspec0);
*eval = mf_sample_glossy(localI, &localO, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, use_fresnel, bsdf->extra->cspec0);
if(is_aniso)
*pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y));
else

@ -26,24 +26,16 @@
* the balance heuristic isn't necessarily optimal anymore.
*/
ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
float3 wi,
float3 wo,
const bool wo_outside,
const float3 color,
const float alpha_x,
const float alpha_y,
ccl_addr_space uint *lcg_state
#ifdef MF_MULTI_GLASS
, const float eta
, bool use_fresnel
, const float3 cspec0
#elif defined(MF_MULTI_GLOSSY)
, float3 *n, float3 *k
, const float eta
, bool use_fresnel
, const float3 cspec0
#endif
)
float3 wi,
float3 wo,
const bool wo_outside,
const float3 color,
const float alpha_x,
const float alpha_y,
ccl_addr_space uint *lcg_state,
const float eta,
bool use_fresnel,
const float3 cspec0)
{
/* Evaluating for a shallower incoming direction produces less noise, and the properties of the BSDF guarantee reciprocity. */
bool swapped = false;
@ -77,44 +69,29 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
/* Analytically compute single scattering for lower noise. */
float3 eval;
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
const float3 wh = normalize(wi+wo);
#ifdef MF_MULTI_GLASS
eval = mf_eval_phase_glass(-wi, lambda_r, wo, wo_outside, alpha, eta);
if(wo_outside)
eval *= -lambda_r / (shadowing_lambda - lambda_r);
else
eval *= -lambda_r * beta(-lambda_r, shadowing_lambda+1.0f);
float F0 = fresnel_dielectric_cos(1.0f, eta);
if(use_fresnel) {
throughput = interpolate_fresnel_color(wi, normalize(wi + wo), eta, F0, cspec0);
eval *= throughput;
}
#elif defined(MF_MULTI_DIFFUSE)
/* Diffuse has no special closed form for the single scattering bounce */
eval = make_float3(0.0f, 0.0f, 0.0f);
#else /* MF_MULTI_GLOSSY */
const float3 wh = normalize(wi+wo);
const float G2 = 1.0f / (1.0f - (lambda_r + 1.0f) + shadowing_lambda);
float val = G2 * 0.25f / wi.z;
if(alpha.x == alpha.y)
val *= D_ggx(wh, alpha.x);
else
val *= D_ggx_aniso(wh, alpha);
if(n && k) {
eval = fresnel_conductor(dot(wh, wi), *n, *k) * val;
}
else {
eval = make_float3(val, val, val);
}
eval = make_float3(val, val, val);
#endif
float F0 = fresnel_dielectric_cos(1.0f, eta);
if(use_fresnel) {
throughput = interpolate_fresnel_color(wi, wh, eta, F0, cspec0);
eval = throughput * val;
eval *= throughput;
}
#endif
float3 wr = -wi;
float hr = 1.0f;
@ -129,13 +106,6 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
float3 wm = mf_sample_vndf(-wr, alpha, make_float2(lcg_step_float_addrspace(lcg_state),
lcg_step_float_addrspace(lcg_state)));
#ifdef MF_MULTI_DIFFUSE
if(order == 0) {
/* Compute single-scattering for diffuse. */
const float G2_G1 = -lambda_r / (shadowing_lambda - lambda_r);
eval += throughput * G2_G1 * mf_eval_phase_diffuse(wo, wm);
}
#endif
#ifdef MF_MULTI_GLASS
if(order == 0 && use_fresnel) {
/* Evaluate amount of scattering towards wo on this microfacet. */
@ -156,10 +126,8 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta);
else
phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f/eta);
#elif defined(MF_MULTI_DIFFUSE)
phase = mf_eval_phase_diffuse(wo, wm);
#else /* MF_MULTI_GLOSSY */
phase = mf_eval_phase_glossy(wr, lambda_r, wo, alpha, n, k) * throughput;
phase = mf_eval_phase_glossy(wr, lambda_r, wo, alpha) * throughput;
#endif
eval += throughput * phase * mf_G1(wo_outside? wo: -wo, mf_C1((outside == wo_outside)? hr: -hr), shadowing_lambda);
}
@ -181,25 +149,17 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
else if(use_fresnel && order > 0) {
throughput *= interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0);
}
#elif defined(MF_MULTI_DIFFUSE)
wr = mf_sample_phase_diffuse(wm,
lcg_step_float_addrspace(lcg_state),
lcg_step_float_addrspace(lcg_state));
#else /* MF_MULTI_GLOSSY */
if(use_fresnel && order > 0) {
throughput *= interpolate_fresnel_color(-wr, wm, eta, F0, cspec0);
}
wr = mf_sample_phase_glossy(-wr, n, k, &throughput, wm);
wr = mf_sample_phase_glossy(-wr, &throughput, wm);
#endif
lambda_r = mf_lambda(wr, alpha);
#if defined(MF_MULTI_GLOSSY) || defined(MF_MULTI_GLASS)
if(!use_fresnel)
throughput *= color;
#else
throughput *= color;
#endif
C1_r = mf_C1(hr);
G1_r = mf_G1(wr, C1_r, lambda_r);
@ -215,18 +175,16 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
* escaped the surface in wo. The function returns the throughput between wi and wo.
* Without reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal.
*/
ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3 *wo, const float3 color, const float alpha_x, const float alpha_y, ccl_addr_space uint *lcg_state
#ifdef MF_MULTI_GLASS
, const float eta
, bool use_fresnel
, const float3 cspec0
#elif defined(MF_MULTI_GLOSSY)
, float3 *n, float3 *k
, const float eta
, bool use_fresnel
, const float3 cspec0
#endif
)
ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(
float3 wi,
float3 *wo,
const float3 color,
const float alpha_x,
const float alpha_y,
ccl_addr_space uint *lcg_state,
const float eta,
bool use_fresnel,
const float3 cspec0)
{
const float2 alpha = make_float2(alpha_x, alpha_y);
@ -237,17 +195,11 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3
float C1_r = 1.0f;
float G1_r = 0.0f;
bool outside = true;
#ifdef MF_MULTI_GLASS
float F0 = fresnel_dielectric_cos(1.0f, eta);
if(use_fresnel) {
throughput = interpolate_fresnel_color(wi, normalize(wi + wr), eta, F0, cspec0);
}
#elif defined(MF_MULTI_GLOSSY)
float F0 = fresnel_dielectric_cos(1.0f, eta);
if(use_fresnel) {
throughput = interpolate_fresnel_color(wi, normalize(wi + wr), eta, F0, cspec0);
}
#endif
int order;
for(order = 0; order < 10; order++) {
@ -262,13 +214,8 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3
lcg_step_float_addrspace(lcg_state)));
/* First-bounce color is already accounted for in mix weight. */
#if defined(MF_MULTI_GLASS) || defined(MF_MULTI_GLOSSY)
if(!use_fresnel && order > 0)
throughput *= color;
#else
if(order > 0)
throughput *= color;
#endif
/* Bounce from the microfacet. */
#ifdef MF_MULTI_GLASS
@ -294,10 +241,6 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3
throughput *= t_color;
}
}
#elif defined(MF_MULTI_DIFFUSE)
wr = mf_sample_phase_diffuse(wm,
lcg_step_float_addrspace(lcg_state),
lcg_step_float_addrspace(lcg_state));
#else /* MF_MULTI_GLOSSY */
if(use_fresnel) {
float3 t_color = interpolate_fresnel_color(-wr, wm, eta, F0, cspec0);
@ -307,7 +250,7 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3
else
throughput *= t_color;
}
wr = mf_sample_phase_glossy(-wr, n, k, &throughput, wm);
wr = mf_sample_phase_glossy(-wr, &throughput, wm);
#endif
/* Update random walk parameters. */
@ -319,6 +262,5 @@ ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3
}
#undef MF_MULTI_GLASS
#undef MF_MULTI_DIFFUSE
#undef MF_MULTI_GLOSSY
#undef MF_PHASE_FUNCTION

@ -16,7 +16,7 @@
CCL_NAMESPACE_BEGIN
#define ccl_get_feature(buffer, pass) buffer[(pass)*pass_stride]
#define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride]
/* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).
* pixel_buffer always points to the current pixel in the first pass. */
@ -32,7 +32,7 @@ ccl_device_inline void filter_get_features(int2 pixel, ccl_global float ccl_rest
{
features[0] = pixel.x;
features[1] = pixel.y;
features[2] = ccl_get_feature(buffer, 0);
features[2] = fabsf(ccl_get_feature(buffer, 0));
features[3] = ccl_get_feature(buffer, 1);
features[4] = ccl_get_feature(buffer, 2);
features[5] = ccl_get_feature(buffer, 3);
@ -50,7 +50,7 @@ ccl_device_inline void filter_get_feature_scales(int2 pixel, ccl_global float cc
{
scales[0] = fabsf(pixel.x - mean[0]);
scales[1] = fabsf(pixel.y - mean[1]);
scales[2] = fabsf(ccl_get_feature(buffer, 0) - mean[2]);
scales[2] = fabsf(fabsf(ccl_get_feature(buffer, 0)) - mean[2]);
scales[3] = len_squared(make_float3(ccl_get_feature(buffer, 1) - mean[3],
ccl_get_feature(buffer, 2) - mean[4],
ccl_get_feature(buffer, 3) - mean[5]));
@ -107,7 +107,7 @@ ccl_device_inline void filter_get_design_row_transform(int2 p_pixel,
math_vector_zero(design_row+1, rank);
design_row_add(design_row, rank, transform, stride, 0, q_pixel.x - p_pixel.x);
design_row_add(design_row, rank, transform, stride, 1, q_pixel.y - p_pixel.y);
design_row_add(design_row, rank, transform, stride, 2, ccl_get_feature(q_buffer, 0) - ccl_get_feature(p_buffer, 0));
design_row_add(design_row, rank, transform, stride, 2, fabsf(ccl_get_feature(q_buffer, 0)) - fabsf(ccl_get_feature(p_buffer, 0)));
design_row_add(design_row, rank, transform, stride, 3, ccl_get_feature(q_buffer, 1) - ccl_get_feature(p_buffer, 1));
design_row_add(design_row, rank, transform, stride, 4, ccl_get_feature(q_buffer, 2) - ccl_get_feature(p_buffer, 2));
design_row_add(design_row, rank, transform, stride, 5, ccl_get_feature(q_buffer, 3) - ccl_get_feature(p_buffer, 3));

@ -37,7 +37,7 @@ ccl_device_inline void filter_get_features_sse(__m128 x, __m128 y, __m128 active
{
features[0] = x;
features[1] = y;
features[2] = ccl_get_feature_sse(0);
features[2] = _mm_fabs_ps(ccl_get_feature_sse(0));
features[3] = ccl_get_feature_sse(1);
features[4] = ccl_get_feature_sse(2);
features[5] = ccl_get_feature_sse(3);
@ -58,7 +58,7 @@ ccl_device_inline void filter_get_feature_scales_sse(__m128 x, __m128 y, __m128
scales[0] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(x, mean[0])), active_pixels);
scales[1] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(y, mean[1])), active_pixels);
scales[2] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(ccl_get_feature_sse(0), mean[2])), active_pixels);
scales[2] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(_mm_fabs_ps(ccl_get_feature_sse(0)), mean[2])), active_pixels);
__m128 diff, scale;
diff = _mm_sub_ps(ccl_get_feature_sse(1), mean[3]);

@ -104,6 +104,57 @@ ccl_device void kernel_filter_get_feature(int sample,
}
}
ccl_device void kernel_filter_detect_outliers(int x, int y,
ccl_global float *image,
ccl_global float *variance,
ccl_global float *depth,
ccl_global float *out,
int4 rect,
int pass_stride)
{
int buffer_w = align_up(rect.z - rect.x, 4);
int n = 0;
float values[25];
for(int y1 = max(y-2, rect.y); y1 < min(y+3, rect.w); y1++) {
for(int x1 = max(x-2, rect.x); x1 < min(x+3, rect.z); x1++) {
int idx = (y1-rect.y)*buffer_w + (x1-rect.x);
float L = average(make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]));
/* Find the position of L. */
int i;
for(i = 0; i < n; i++) {
if(values[i] > L) break;
}
/* Make space for L by shifting all following values to the right. */
for(int j = n; j > i; j--) {
values[j] = values[j-1];
}
/* Insert L. */
values[i] = L;
n++;
}
}
int idx = (y-rect.y)*buffer_w + (x-rect.x);
float L = average(make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]));
float ref = 2.0f*values[(int)(n*0.75f)];
float fac = 1.0f;
if(L > ref) {
/* If the pixel is an outlier, negate the depth value to mark it as one.
* Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */
depth[idx] = -depth[idx];
fac = ref/L;
variance[idx ] *= fac*fac;
variance[idx + pass_stride] *= fac*fac;
variance[idx+2*pass_stride] *= fac*fac;
}
out[idx ] = fac*image[idx];
out[idx + pass_stride] = fac*image[idx + pass_stride];
out[idx+2*pass_stride] = fac*image[idx+2*pass_stride];
}
/* Combine A/B buffers.
* Calculates the combined mean and the buffer variance. */
ccl_device void kernel_filter_combine_halves(int x, int y,

@ -54,7 +54,10 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
float p_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + p_offset, pass_stride));
float q_std_dev = sqrtf(filter_get_pixel_variance(variance_pass + q_offset, pass_stride));
if(average(fabs(p_color - q_color)) > 3.0f*(p_std_dev + q_std_dev + 1e-3f)) {
/* If the pixel was flagged as an outlier during prefiltering, skip it.
* Otherwise, perform the regular confidence interval test. */
if(ccl_get_feature(buffer + q_offset, 0) < 0.0f ||
average(fabs(p_color - q_color)) > 2.0f*(p_std_dev + q_std_dev + 1e-3f)) {
return;
}

@ -356,7 +356,16 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg, ccl_global float *
# endif
if(kernel_data.film.pass_denoising_clean) {
float3 noisy, clean;
path_radiance_split_denoising(kg, L, &noisy, &clean);
#ifdef __SHADOW_TRICKS__
if(is_shadow_catcher) {
noisy = L_sum;
clean = make_float3(0.0f, 0.0f, 0.0f);
}
else
#endif /* __SHADOW_TRICKS__ */
{
path_radiance_split_denoising(kg, L, &noisy, &clean);
}
kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
sample, noisy);
kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean,

@ -407,7 +407,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
}
#endif /* __SUBSURFACE__ */
#if defined(__EMISSION__) && defined(__BRANCHED_PATH__)
#if defined(__EMISSION__)
if(kernel_data.integrator.use_direct_light) {
int all = (kernel_data.integrator.sample_all_lights_indirect) ||
(state->flag & PATH_RAY_SHADOW_CATCHER);
@ -421,7 +421,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
L,
all);
}
#endif /* defined(__EMISSION__) && defined(__BRANCHED_PATH__) */
#endif /* defined(__EMISSION__) */
if(!kernel_path_surface_bounce(kg, rng, sd, &throughput, state, L, ray))
break;

@ -16,7 +16,7 @@
CCL_NAMESPACE_BEGIN
#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__) || defined(__SHADOW_TRICKS__)
#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__) || defined(__SHADOW_TRICKS__) || defined(__BAKING__)
/* branched path tracing: connect path directly to position on one or more lights and add it to L */
ccl_device_noinline void kernel_branched_path_surface_connect_light(
KernelGlobals *kg,

@ -43,6 +43,14 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
int buffer_denoising_offset,
bool use_split_variance);
void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
ccl_global float *image,
ccl_global float *variance,
ccl_global float *depth,
ccl_global float *output,
int *rect,
int pass_stride);
void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
float *mean,
float *variance,

@ -91,6 +91,21 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample,
#endif
}
void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y,
ccl_global float *image,
ccl_global float *variance,
ccl_global float *depth,
ccl_global float *output,
int *rect,
int pass_stride)
{
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers);
#else
kernel_filter_detect_outliers(x, y, image, variance, depth, output, load_int4(rect), pass_stride);
#endif
}
void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
float *mean,
float *variance,

@ -22,45 +22,49 @@
#include "kernel/kernel_compat_cpu.h"
#ifndef __SPLIT_KERNEL__
# include "kernel/kernel_math.h"
# include "kernel/kernel_types.h"
#ifndef KERNEL_STUB
# ifndef __SPLIT_KERNEL__
# include "kernel/kernel_math.h"
# include "kernel/kernel_types.h"
# include "kernel/split/kernel_split_data.h"
# include "kernel/kernel_globals.h"
# include "kernel/split/kernel_split_data.h"
# include "kernel/kernel_globals.h"
# include "kernel/kernels/cpu/kernel_cpu_image.h"
# include "kernel/kernel_film.h"
# include "kernel/kernel_path.h"
# include "kernel/kernel_path_branched.h"
# include "kernel/kernel_bake.h"
# include "kernel/kernels/cpu/kernel_cpu_image.h"
# include "kernel/kernel_film.h"
# include "kernel/kernel_path.h"
# include "kernel/kernel_path_branched.h"
# include "kernel/kernel_bake.h"
# else
# include "kernel/split/kernel_split_common.h"
# include "kernel/split/kernel_data_init.h"
# include "kernel/split/kernel_path_init.h"
# include "kernel/split/kernel_scene_intersect.h"
# include "kernel/split/kernel_lamp_emission.h"
# include "kernel/split/kernel_do_volume.h"
# include "kernel/split/kernel_queue_enqueue.h"
# include "kernel/split/kernel_indirect_background.h"
# include "kernel/split/kernel_shader_setup.h"
# include "kernel/split/kernel_shader_sort.h"
# include "kernel/split/kernel_shader_eval.h"
# include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h"
# include "kernel/split/kernel_subsurface_scatter.h"
# include "kernel/split/kernel_direct_lighting.h"
# include "kernel/split/kernel_shadow_blocked_ao.h"
# include "kernel/split/kernel_shadow_blocked_dl.h"
# include "kernel/split/kernel_next_iteration_setup.h"
# include "kernel/split/kernel_indirect_subsurface.h"
# include "kernel/split/kernel_buffer_update.h"
# endif /* __SPLIT_KERNEL__ */
#else
# include "kernel/split/kernel_split_common.h"
# include "kernel/split/kernel_data_init.h"
# include "kernel/split/kernel_path_init.h"
# include "kernel/split/kernel_scene_intersect.h"
# include "kernel/split/kernel_lamp_emission.h"
# include "kernel/split/kernel_do_volume.h"
# include "kernel/split/kernel_queue_enqueue.h"
# include "kernel/split/kernel_indirect_background.h"
# include "kernel/split/kernel_shader_setup.h"
# include "kernel/split/kernel_shader_sort.h"
# include "kernel/split/kernel_shader_eval.h"
# include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h"
# include "kernel/split/kernel_subsurface_scatter.h"
# include "kernel/split/kernel_direct_lighting.h"
# include "kernel/split/kernel_shadow_blocked_ao.h"
# include "kernel/split/kernel_shadow_blocked_dl.h"
# include "kernel/split/kernel_next_iteration_setup.h"
# include "kernel/split/kernel_indirect_subsurface.h"
# include "kernel/split/kernel_buffer_update.h"
#endif
#ifdef KERNEL_STUB
# include "util/util_debug.h"
# define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!"))
#endif
# ifdef __SPLIT_KERNEL__
# include "kernel/split/kernel_data_init.h"
# endif /* __SPLIT_KERNEL__ */
#endif /* KERNEL_STUB */
CCL_NAMESPACE_BEGIN
@ -191,20 +195,26 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
{ \
STUB_ASSERT(KERNEL_ARCH, name); \
}
# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
{ \
STUB_ASSERT(KERNEL_ARCH, name); \
}
#else
# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \
void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
{ \
kernel_##name(kg); \
}
#endif /* KERNEL_STUB */
#define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \
void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \
{ \
ccl_local type locals; \
kernel_##name(kg, &locals); \
}
#endif /* KERNEL_STUB */
DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)

@ -86,6 +86,22 @@ kernel_cuda_filter_get_feature(int sample,
}
}
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_detect_outliers(float *image,
float *variance,
float *depth,
float *output,
int4 prefilter_rect,
int pass_stride)
{
int x = prefilter_rect.x + blockDim.x*blockIdx.x + threadIdx.x;
int y = prefilter_rect.y + blockDim.y*blockIdx.y + threadIdx.y;
if(x < prefilter_rect.z && y < prefilter_rect.w) {
kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride);
}
}
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_filter_combine_halves(float *mean, float *variance, float *a, float *b, int4 prefilter_rect, int r)

@ -78,6 +78,20 @@ __kernel void kernel_ocl_filter_get_feature(int sample,
}
}
__kernel void kernel_ocl_filter_detect_outliers(ccl_global float *image,
ccl_global float *variance,
ccl_global float *depth,
ccl_global float *output,
int4 prefilter_rect,
int pass_stride)
{
int x = prefilter_rect.x + get_global_id(0);
int y = prefilter_rect.y + get_global_id(1);
if(x < prefilter_rect.z && y < prefilter_rect.w) {
kernel_filter_detect_outliers(x, y, image, variance, depth, output, prefilter_rect, pass_stride);
}
}
__kernel void kernel_ocl_filter_combine_halves(ccl_global float *mean,
ccl_global float *variance,
ccl_global float *a,

@ -34,17 +34,17 @@ shader node_principled_bsdf(
float Clearcoat = 0.0,
float ClearcoatGloss = 1.0,
float IOR = 1.45,
float Transparency = 0.0,
float RefractionRoughness = 0.0,
float Transmission = 0.0,
float TransmissionRoughness = 0.0,
normal Normal = N,
normal ClearcoatNormal = N,
normal Tangent = normalize(dPdu),
output closure color BSDF = 0)
{
float f = max(IOR, 1e-5);
float diffuse_weight = (1.0 - clamp(Metallic, 0.0, 1.0)) * (1.0 - clamp(Transparency, 0.0, 1.0));
float transp = clamp(Transparency, 0.0, 1.0) * (1.0 - clamp(Metallic, 0.0, 1.0));
float specular_weight = (1.0 - transp);
float diffuse_weight = (1.0 - clamp(Metallic, 0.0, 1.0)) * (1.0 - clamp(Transmission, 0.0, 1.0));
float final_transmission = clamp(Transmission, 0.0, 1.0) * (1.0 - clamp(Metallic, 0.0, 1.0));
float specular_weight = (1.0 - final_transmission);
vector T = Tangent;
@ -90,7 +90,7 @@ shader node_principled_bsdf(
}
}
if (transp > 1e-5) {
if (final_transmission > 1e-5) {
color Cspec0 = BaseColor * SpecularTint + color(1.0, 1.0, 1.0) * (1.0 - SpecularTint);
float eta = backfacing() ? 1.0 / f : f;
@ -98,18 +98,18 @@ shader node_principled_bsdf(
float cosNO = dot(Normal, I);
float Fr = fresnel_dielectric_cos(cosNO, eta);
float refl_roughness = Roughness;
if (Roughness <= 1e-2)
refl_roughness = 0.0;
float refl_roughness = Roughness;
if (Roughness <= 1e-2)
refl_roughness = 0.0;
float refraction_roughness = refl_roughness;
float transmission_roughness = refl_roughness;
if (distribution == "GGX")
refraction_roughness = 1.0 - (1.0 - refl_roughness) * (1.0 - RefractionRoughness);
transmission_roughness = 1.0 - (1.0 - refl_roughness) * (1.0 - TransmissionRoughness);
BSDF = BSDF + transp * (Fr * microfacet_ggx_fresnel(Normal, refl_roughness * refl_roughness, eta, BaseColor, Cspec0) +
(1.0 - Fr) * BaseColor * microfacet_ggx_refraction(Normal, refraction_roughness * refraction_roughness, eta));
BSDF = BSDF + final_transmission * (Fr * microfacet_ggx_fresnel(Normal, refl_roughness * refl_roughness, eta, BaseColor, Cspec0) +
(1.0 - Fr) * BaseColor * microfacet_ggx_refraction(Normal, transmission_roughness * transmission_roughness, eta));
} else {
BSDF = BSDF + transp * microfacet_multi_ggx_glass_fresnel(Normal, Roughness * Roughness, eta, BaseColor, Cspec0);
BSDF = BSDF + final_transmission * microfacet_multi_ggx_glass_fresnel(Normal, Roughness * Roughness, eta, BaseColor, Cspec0);
}
}

@ -67,6 +67,10 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
unsigned int num_samples,
ccl_global float *buffer)
{
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, data_init);
#else
#ifdef __KERNEL_OPENCL__
kg->data = data;
#endif
@ -143,6 +147,8 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
*(rng_state + index) = hash_int_2d(x, y);
}
}
#endif /* KERENL_STUB */
}
CCL_NAMESPACE_END

@ -79,14 +79,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
#ifdef __PRINCIPLED__
case CLOSURE_BSDF_PRINCIPLED_ID: {
uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset, sheen_offset,
sheen_tint_offset, clearcoat_offset, clearcoat_gloss_offset, eta_offset, transparency_offset,
anisotropic_rotation_offset, refraction_roughness_offset;
sheen_tint_offset, clearcoat_offset, clearcoat_gloss_offset, eta_offset, transmission_offset,
anisotropic_rotation_offset, transmission_roughness_offset;
uint4 data_node2 = read_node(kg, offset);
float3 T = stack_load_float3(stack, data_node.y);
decode_node_uchar4(data_node.z, &specular_offset, &roughness_offset, &specular_tint_offset, &anisotropic_offset);
decode_node_uchar4(data_node.w, &sheen_offset, &sheen_tint_offset, &clearcoat_offset, &clearcoat_gloss_offset);
decode_node_uchar4(data_node2.x, &eta_offset, &transparency_offset, &anisotropic_rotation_offset, &refraction_roughness_offset);
decode_node_uchar4(data_node2.x, &eta_offset, &transmission_offset, &anisotropic_rotation_offset, &transmission_roughness_offset);
// get Disney principled parameters
float metallic = param1;
@ -99,9 +99,9 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
float sheen_tint = stack_load_float(stack, sheen_tint_offset);
float clearcoat = stack_load_float(stack, clearcoat_offset);
float clearcoat_gloss = stack_load_float(stack, clearcoat_gloss_offset);
float transparency = stack_load_float(stack, transparency_offset);
float transmission = stack_load_float(stack, transmission_offset);
float anisotropic_rotation = stack_load_float(stack, anisotropic_rotation_offset);
float refraction_roughness = stack_load_float(stack, refraction_roughness_offset);
float transmission_roughness = stack_load_float(stack, transmission_roughness_offset);
float eta = fmaxf(stack_load_float(stack, eta_offset), 1e-5f);
ClosureType distribution = stack_valid(data_node2.y) ? (ClosureType) data_node2.y : CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID;
@ -118,10 +118,10 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
float fresnel = fresnel_dielectric_cos(cosNO, ior);
// calculate weights of the diffuse and specular part
float diffuse_weight = (1.0f - saturate(metallic)) * (1.0f - saturate(transparency));
float diffuse_weight = (1.0f - saturate(metallic)) * (1.0f - saturate(transmission));
float transp = saturate(transparency) * (1.0f - saturate(metallic));
float specular_weight = (1.0f - transp);
float final_transmission = saturate(transmission) * (1.0f - saturate(metallic));
float specular_weight = (1.0f - final_transmission);
// get the base color
uint4 data_base_color = read_node(kg, offset);
@ -300,8 +300,8 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
#ifdef __CAUSTICS_TRICKS__
if(kernel_data.integrator.caustics_reflective || kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) {
#endif
if(transp > CLOSURE_WEIGHT_CUTOFF) {
float3 glass_weight = weight * transp;
if(final_transmission > CLOSURE_WEIGHT_CUTOFF) {
float3 glass_weight = weight * final_transmission;
float3 cspec0 = base_color * specular_tint + make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint);
if(roughness <= 5e-2f || distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) { /* use single-scatter GGX */
@ -342,12 +342,12 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
bsdf->N = N;
if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID)
refraction_roughness = 1.0f - (1.0f - refl_roughness) * (1.0f - refraction_roughness);
transmission_roughness = 1.0f - (1.0f - refl_roughness) * (1.0f - transmission_roughness);
else
refraction_roughness = refl_roughness;
transmission_roughness = refl_roughness;
bsdf->alpha_x = refraction_roughness * refraction_roughness;
bsdf->alpha_y = refraction_roughness * refraction_roughness;
bsdf->alpha_x = transmission_roughness * transmission_roughness;
bsdf->alpha_y = transmission_roughness * transmission_roughness;
bsdf->ior = ior;
/* setup bsdf */

@ -30,6 +30,16 @@
CCL_NAMESPACE_BEGIN
/* Some helpers to silence warning in templated function. */
static bool isfinite(uchar /*value*/)
{
return false;
}
static bool isfinite(half /*value*/)
{
return false;
}
ImageManager::ImageManager(const DeviceInfo& info)
{
need_update = true;
@ -639,6 +649,37 @@ bool ImageManager::file_load_image(Image *img,
}
}
}
/* Make sure we don't have buggy values. */
if(FileFormat == TypeDesc::FLOAT) {
/* For RGBA buffers we put all channels to 0 if either of them is not
* finite. This way we avoid possible artifacts caused by fully changed
* hue.
*/
if(is_rgba) {
for(size_t i = 0; i < num_pixels; i += 4) {
StorageType *pixel = &pixels[i*4];
if(!isfinite(pixel[0]) ||
!isfinite(pixel[1]) ||
!isfinite(pixel[2]) ||
!isfinite(pixel[3]))
{
pixel[0] = 0;
pixel[1] = 0;
pixel[2] = 0;
pixel[3] = 0;
}
}
}
else {
for(size_t i = 0; i < num_pixels; ++i) {
StorageType *pixel = &pixels[i];
if(!isfinite(pixel[0])) {
pixel[0] = 0;
}
}
}
}
/* Scale image down if needed. */
if(pixels_storage.size() > 0) {
float scale_factor = 1.0f;
while(max_size * scale_factor > texture_limit) {

@ -486,18 +486,10 @@ static void background_cdf(int start,
float2 *cond_cdf)
{
/* Conditional CDFs (rows, U direction). */
/* NOTE: It is possible to have some NaN pixels on background
* which will ruin CDF causing wrong shading. We replace such
* pixels with black.
*/
for(int i = start; i < end; i++) {
float sin_theta = sinf(M_PI_F * (i + 0.5f) / res);
float3 env_color = (*pixels)[i * res];
float ave_luminance = average(env_color);
/* TODO(sergey): Consider adding average_safe(). */
if(!isfinite(ave_luminance)) {
ave_luminance = 0.0f;
}
cond_cdf[i * cdf_count].x = ave_luminance * sin_theta;
cond_cdf[i * cdf_count].y = 0.0f;
@ -505,9 +497,6 @@ static void background_cdf(int start,
for(int j = 1; j < res; j++) {
env_color = (*pixels)[i * res + j];
ave_luminance = average(env_color);
if(!isfinite(ave_luminance)) {
ave_luminance = 0.0f;
}
cond_cdf[i * cdf_count + j].x = ave_luminance * sin_theta;
cond_cdf[i * cdf_count + j].y = cond_cdf[i * cdf_count + j - 1].y + cond_cdf[i * cdf_count + j - 1].x / res;

@ -2316,8 +2316,8 @@ NODE_DEFINE(PrincipledBsdfNode)
SOCKET_IN_FLOAT(clearcoat, "Clearcoat", 0.0f);
SOCKET_IN_FLOAT(clearcoat_gloss, "Clearcoat Gloss", 0.0f);
SOCKET_IN_FLOAT(ior, "IOR", 0.0f);
SOCKET_IN_FLOAT(transparency, "Transparency", 0.0f);
SOCKET_IN_FLOAT(refraction_roughness, "Refraction Roughness", 0.0f);
SOCKET_IN_FLOAT(transmission, "Transmission", 0.0f);
SOCKET_IN_FLOAT(transmission_roughness, "Transmission Roughness", 0.0f);
SOCKET_IN_FLOAT(anisotropic_rotation, "Anisotropic Rotation", 0.0f);
SOCKET_IN_NORMAL(normal, "Normal", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_NORMAL);
SOCKET_IN_NORMAL(clearcoat_normal, "Clearcoat Normal", make_float3(0.0f, 0.0f, 0.0f), SocketType::LINK_NORMAL);
@ -2352,7 +2352,7 @@ void PrincipledBsdfNode::attributes(Shader *shader, AttributeRequestSet *attribu
void PrincipledBsdfNode::compile(SVMCompiler& compiler, ShaderInput *p_metallic, ShaderInput *p_subsurface, ShaderInput *p_subsurface_radius,
ShaderInput *p_specular, ShaderInput *p_roughness, ShaderInput *p_specular_tint, ShaderInput *p_anisotropic,
ShaderInput *p_sheen, ShaderInput *p_sheen_tint, ShaderInput *p_clearcoat, ShaderInput *p_clearcoat_gloss,
ShaderInput *p_ior, ShaderInput *p_transparency, ShaderInput *p_anisotropic_rotation, ShaderInput *p_refraction_roughness)
ShaderInput *p_ior, ShaderInput *p_transmission, ShaderInput *p_anisotropic_rotation, ShaderInput *p_transmission_roughness)
{
ShaderInput *base_color_in = input("Base Color");
ShaderInput *subsurface_color_in = input("Subsurface Color");
@ -2376,8 +2376,8 @@ void PrincipledBsdfNode::compile(SVMCompiler& compiler, ShaderInput *p_metallic,
int clearcoat_offset = compiler.stack_assign(p_clearcoat);
int clearcoat_gloss_offset = compiler.stack_assign(p_clearcoat_gloss);
int ior_offset = compiler.stack_assign(p_ior);
int transparency_offset = compiler.stack_assign(p_transparency);
int refraction_roughness_offset = compiler.stack_assign(p_refraction_roughness);
int transmission_offset = compiler.stack_assign(p_transmission);
int transmission_roughness_offset = compiler.stack_assign(p_transmission_roughness);
int anisotropic_rotation_offset = compiler.stack_assign(p_anisotropic_rotation);
int subsurface_radius_offset = compiler.stack_assign(p_subsurface_radius);
@ -2393,7 +2393,7 @@ void PrincipledBsdfNode::compile(SVMCompiler& compiler, ShaderInput *p_metallic,
compiler.encode_uchar4(specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset),
compiler.encode_uchar4(sheen_offset, sheen_tint_offset, clearcoat_offset, clearcoat_gloss_offset));
compiler.add_node(compiler.encode_uchar4(ior_offset, transparency_offset, anisotropic_rotation_offset, refraction_roughness_offset),
compiler.add_node(compiler.encode_uchar4(ior_offset, transmission_offset, anisotropic_rotation_offset, transmission_roughness_offset),
distribution, SVM_STACK_INVALID, SVM_STACK_INVALID);
float3 bc_default = get_float3(base_color_in->socket_type);
@ -2419,8 +2419,8 @@ void PrincipledBsdfNode::compile(SVMCompiler& compiler)
{
compile(compiler, input("Metallic"), input("Subsurface"), input("Subsurface Radius"), input("Specular"),
input("Roughness"), input("Specular Tint"), input("Anisotropic"), input("Sheen"), input("Sheen Tint"),
input("Clearcoat"), input("Clearcoat Gloss"), input("IOR"), input("Transparency"),
input("Anisotropic Rotation"), input("Refraction Roughness"));
input("Clearcoat"), input("Clearcoat Gloss"), input("IOR"), input("Transmission"),
input("Anisotropic Rotation"), input("Transmission Roughness"));
}
void PrincipledBsdfNode::compile(OSLCompiler& compiler)

@ -378,13 +378,13 @@ public:
void compile(SVMCompiler& compiler, ShaderInput *metallic, ShaderInput *subsurface, ShaderInput *subsurface_radius,
ShaderInput *specular, ShaderInput *roughness, ShaderInput *specular_tint, ShaderInput *anisotropic,
ShaderInput *sheen, ShaderInput *sheen_tint, ShaderInput *clearcoat, ShaderInput *clearcoat_gloss,
ShaderInput *ior, ShaderInput *transparency, ShaderInput *anisotropic_rotation, ShaderInput *refraction_roughness);
ShaderInput *ior, ShaderInput *transmission, ShaderInput *anisotropic_rotation, ShaderInput *transmission_roughness);
float3 base_color;
float3 subsurface_color, subsurface_radius;
float metallic, subsurface, specular, roughness, specular_tint, anisotropic,
sheen, sheen_tint, clearcoat, clearcoat_gloss, ior, transparency,
anisotropic_rotation, refraction_roughness;
sheen, sheen_tint, clearcoat, clearcoat_gloss, ior, transmission,
anisotropic_rotation, transmission_roughness;
float3 normal, clearcoat_normal, tangent;
float surface_mix_weight;
ClosureType distribution, distribution_orig;

@ -479,7 +479,7 @@ void Session::release_tile(RenderTile& rtile)
{
thread_scoped_lock tile_lock(tile_mutex);
progress.add_finished_tile();
progress.add_finished_tile(rtile.task == RenderTile::DENOISE);
bool delete_tile;
@ -912,7 +912,7 @@ void Session::update_status_time(bool show_pause, bool show_done)
int progressive_sample = tile_manager.state.sample;
int num_samples = tile_manager.get_num_effective_samples();
int tile = progress.get_finished_tiles();
int tile = progress.get_rendered_tiles();
int num_tiles = tile_manager.state.num_tiles;
/* update status */
@ -920,11 +920,12 @@ void Session::update_status_time(bool show_pause, bool show_done)
if(!params.progressive) {
const bool is_cpu = params.device.type == DEVICE_CPU;
const bool rendering_finished = (tile == num_tiles);
const bool is_last_tile = (tile + 1) == num_tiles;
substatus = string_printf("Path Tracing Tile %d/%d", tile, num_tiles);
if(device->show_samples() || (is_cpu && is_last_tile)) {
if(!rendering_finished && (device->show_samples() || (is_cpu && is_last_tile))) {
/* Some devices automatically support showing the sample number:
* - CUDADevice
* - OpenCLDevice when using the megakernel (the split kernel renders multiple
@ -936,6 +937,9 @@ void Session::update_status_time(bool show_pause, bool show_done)
*/
substatus += string_printf(", Sample %d/%d", progress.get_current_sample(), num_samples);
}
if(params.use_denoising) {
substatus += string_printf(", Denoised %d tiles", progress.get_denoised_tiles());
}
}
else if(tile_manager.num_samples == INT_MAX)
substatus = string_printf("Path Tracing Sample %d", progressive_sample+1);

@ -241,6 +241,10 @@ void Shader::set_graph(ShaderGraph *graph_)
delete graph_bump;
graph = graph_;
graph_bump = NULL;
/* Store info here before graph optimization to make sure that
* nodes that get optimized away still count. */
has_volume_connected = (graph->output()->input("Volume")->link != NULL);
}
void Shader::tag_update(Scene *scene)
@ -432,15 +436,14 @@ void ShaderManager::device_update_common(Device *device,
flag |= SD_HAS_VOLUME;
has_volumes = true;
/* in this case we can assume transparent surface */
if(!shader->has_surface)
flag |= SD_HAS_ONLY_VOLUME;
/* todo: this could check more fine grained, to skip useless volumes
* enclosed inside an opaque bsdf.
*/
flag |= SD_HAS_TRANSPARENT_SHADOW;
}
/* in this case we can assume transparent surface */
if(shader->has_volume_connected && !shader->has_surface)
flag |= SD_HAS_ONLY_VOLUME;
if(shader->heterogeneous_volume && shader->has_volume_spatial_varying)
flag |= SD_HETEROGENEOUS_VOLUME;
if(shader->has_bssrdf_bump)

@ -105,6 +105,15 @@ public:
bool need_update;
bool need_update_attributes;
/* If the shader has only volume components, the surface is assumed to
* be transparent.
* However, graph optimization might remove the volume subgraph, but
* since the user connected something to the volume output the surface
* should still be transparent.
* Therefore, has_volume_connected stores whether some volume subtree
* was connected before optimization. */
bool has_volume_connected;
/* information about shader after compiling */
bool has_surface;
bool has_surface_emission;

@ -786,12 +786,10 @@ static string line_directive(const string& base, const string& path, int line)
return string_printf("#line %d \"%s\"", line, escaped_path.c_str());
}
static string path_source_replace_includes_recursive(
const string& base,
const string& source,
const string& path,
const string& source_filename)
const string& source_filepath)
{
/* Our own little c preprocessor that replaces #includes with the file
* contents, to work around issue of OpenCL drivers not supporting
@ -809,23 +807,22 @@ static string path_source_replace_includes_recursive(
if(string_startswith(token, "include")) {
token = string_strip(token.substr(7, token.size() - 7));
if(token[0] == '"') {
size_t n_start = 1;
size_t n_end = token.find("\"", n_start);
string filename = token.substr(n_start, n_end - n_start);
string text, filepath = path_join(path, filename);
const size_t n_start = 1;
const size_t n_end = token.find("\"", n_start);
const string filename = token.substr(n_start, n_end - n_start);
string filepath = path_join(base, filename);
if(!path_exists(filepath)) {
filepath = path_join(path_dirname(source_filepath),
filename);
}
string text;
if(path_read_text(filepath, text)) {
/* Replace include directories with both current path
* and path extracted from the include file.
* Not totally robust, but works fine for Cycles kernel
* and avoids having list of include directories.x
*/
text = path_source_replace_includes(
text, path_dirname(filepath), filename);
text = path_source_replace_includes(text, path, filename);
text = path_source_replace_includes_recursive(
base, text, filepath);
/* Use line directives for better error messages. */
line = line_directive(base, filepath, 1)
+ token.replace(0, n_end + 1, "\n" + text + "\n")
+ line_directive(base, path_join(path, source_filename), i + 1);
+ line_directive(base, source_filepath, i + 1);
}
}
}
@ -840,7 +837,10 @@ string path_source_replace_includes(const string& source,
const string& path,
const string& source_filename)
{
return path_source_replace_includes_recursive(path, source, path, source_filename);
return path_source_replace_includes_recursive(
path,
source,
path_join(path, source_filename));
}
FILE *path_fopen(const string& path, const string& mode)

@ -37,7 +37,8 @@ public:
pixel_samples = 0;
total_pixel_samples = 0;
current_tile_sample = 0;
finished_tiles = 0;
rendered_tiles = 0;
denoised_tiles = 0;
start_time = time_dt();
render_start_time = time_dt();
status = "Initializing";
@ -75,7 +76,8 @@ public:
pixel_samples = 0;
total_pixel_samples = 0;
current_tile_sample = 0;
finished_tiles = 0;
rendered_tiles = 0;
denoised_tiles = 0;
start_time = time_dt();
render_start_time = time_dt();
status = "Initializing";
@ -177,7 +179,8 @@ public:
pixel_samples = 0;
current_tile_sample = 0;
finished_tiles = 0;
rendered_tiles = 0;
denoised_tiles = 0;
}
void set_total_pixel_samples(uint64_t total_pixel_samples_)
@ -209,11 +212,16 @@ public:
set_update();
}
void add_finished_tile()
void add_finished_tile(bool denoised)
{
thread_scoped_lock lock(progress_mutex);
finished_tiles++;
if(denoised) {
denoised_tiles++;
}
else {
rendered_tiles++;
}
}
int get_current_sample()
@ -223,9 +231,14 @@ public:
return current_tile_sample;
}
int get_finished_tiles()
int get_rendered_tiles()
{
return finished_tiles;
return rendered_tiles;
}
int get_denoised_tiles()
{
return denoised_tiles;
}
/* status messages */
@ -318,7 +331,7 @@ protected:
int current_tile_sample;
/* Stores the number of tiles that's already finished.
* Used to determine whether all but the last tile are finished rendering, in which case the current_tile_sample is displayed. */
int finished_tiles;
int rendered_tiles, denoised_tiles;
double start_time, render_start_time;

@ -1636,6 +1636,12 @@ void blo_do_versions_270(FileData *fd, Library *UNUSED(lib), Main *main)
}
{
for (Scene *scene = main->scene.first; scene; scene = scene->id.next) {
if (scene->r.im_format.exr_codec == R_IMF_EXR_CODEC_DWAB) {
scene->r.im_format.exr_codec = R_IMF_EXR_CODEC_DWAA;
}
}
if (!DNA_struct_elem_find(fd->filesdna, "View3DDebug", "char", "background")) {
bScreen *screen;

@ -1912,7 +1912,6 @@ static BMOpDefine bmo_wireframe_def = {
{"use_even_offset", BMO_OP_SLOT_BOOL},
{"use_crease", BMO_OP_SLOT_BOOL},
{"crease_weight", BMO_OP_SLOT_FLT},
{"thickness", BMO_OP_SLOT_FLT},
{"use_relative_offset", BMO_OP_SLOT_BOOL},
{"material_offset", BMO_OP_SLOT_INT},
{{'\0'}},

@ -62,6 +62,7 @@
#define EDGE_NET _FLAG_WALK
/* tag verts we've visit */
#define VERT_VISIT _FLAG_WALK
#define VERT_IN_QUEUE _FLAG_WALK_ALT
struct VertOrder {
float angle;
@ -512,13 +513,21 @@ bool BM_face_split_edgenet(
} while ((l_iter = l_iter->next) != l_first);
#endif
/* Note: 'VERT_IN_QUEUE' is often not needed at all,
* however in rare cases verts are added multiple times to the queue,
* that on it's own is harmless but in _very_ rare cases,
* the queue will overflow its maximum size,
* so we better be strict about this! see: T51539 */
for (i = 0; i < edge_net_len; i++) {
BM_ELEM_API_FLAG_ENABLE(edge_net[i], EDGE_NET);
BM_ELEM_API_FLAG_DISABLE(edge_net[i]->v1, VERT_IN_QUEUE);
BM_ELEM_API_FLAG_DISABLE(edge_net[i]->v2, VERT_IN_QUEUE);
}
l_iter = l_first = BM_FACE_FIRST_LOOP(f);
do {
BM_ELEM_API_FLAG_ENABLE(l_iter->e, EDGE_NET);
BM_ELEM_API_FLAG_DISABLE(l_iter->v, VERT_IN_QUEUE);
} while ((l_iter = l_iter->next) != l_first);
float face_normal_matrix[3][3];
@ -527,8 +536,10 @@ bool BM_face_split_edgenet(
/* any vert can be used to begin with */
STACK_PUSH(vert_queue, l_first->v);
BM_ELEM_API_FLAG_ENABLE(l_first->v, VERT_IN_QUEUE);
while ((v = STACK_POP(vert_queue))) {
BM_ELEM_API_FLAG_DISABLE(v, VERT_IN_QUEUE);
if (bm_face_split_edgenet_find_loop(
v, f->no, face_normal_matrix,
edge_order, edge_order_len, face_verts, &face_verts_len))
@ -558,8 +569,12 @@ bool BM_face_split_edgenet(
* (verts between boundary and manifold edges) */
l_iter = l_first = BM_FACE_FIRST_LOOP(f_new);
do {
if (bm_face_split_edgenet_find_loop_pair_exists(l_iter->v)) {
/* Avoid adding to queue multiple times (not common but happens). */
if (!BM_ELEM_API_FLAG_TEST(l_iter->v, VERT_IN_QUEUE) &&
bm_face_split_edgenet_find_loop_pair_exists(l_iter->v))
{
STACK_PUSH(vert_queue, l_iter->v);
BM_ELEM_API_FLAG_ENABLE(l_iter->v, VERT_IN_QUEUE);
}
} while ((l_iter = l_iter->next) != l_first);
}

@ -2295,7 +2295,7 @@ static int interp_range(const float *frac, int n, const float f, float *r_rest)
/* Interpolate given vmesh to make one with target nseg border vertices on the profiles */
static VMesh *interp_vmesh(BevelParams *bp, VMesh *vm0, int nseg)
{
int n, ns0, nseg2, odd, i, j, k, j0, k0, k0prev;
int n, ns0, nseg2, odd, i, j, k, j0, k0, k0prev, j0inc, k0inc;
float *prev_frac, *frac, *new_frac, *prev_new_frac;
float f, restj, restk, restkprev;
float quad[4][3], co[3], center[3];
@ -2339,10 +2339,12 @@ static VMesh *interp_vmesh(BevelParams *bp, VMesh *vm0, int nseg)
copy_v3_v3(co, mesh_vert_canon(vm0, i, j0, k0)->co);
}
else {
j0inc = (restj < BEVEL_EPSILON || j0 == ns0) ? 0 : 1;
k0inc = (restk < BEVEL_EPSILON || k0 == ns0) ? 0 : 1;
copy_v3_v3(quad[0], mesh_vert_canon(vm0, i, j0, k0)->co);
copy_v3_v3(quad[1], mesh_vert_canon(vm0, i, j0, k0 + 1)->co);
copy_v3_v3(quad[2], mesh_vert_canon(vm0, i, j0 + 1, k0 + 1)->co);
copy_v3_v3(quad[3], mesh_vert_canon(vm0, i, j0 + 1, k0)->co);
copy_v3_v3(quad[1], mesh_vert_canon(vm0, i, j0, k0 + k0inc)->co);
copy_v3_v3(quad[2], mesh_vert_canon(vm0, i, j0 + j0inc, k0 + k0inc)->co);
copy_v3_v3(quad[3], mesh_vert_canon(vm0, i, j0 + j0inc, k0)->co);
interp_bilinear_quad_v3(quad, restk, restj, co);
}
copy_v3_v3(mesh_vert(vm1, i, j, k)->co, co);

@ -784,13 +784,12 @@ void ED_mask_draw_region(Mask *mask, ARegion *ar,
/* apply transformation so mask editing tools will assume drawing from the origin in normalized space */
gpuPushMatrix();
gpuTranslate2f(x + xofs, y + yofs);
gpuScale2f(zoomx, zoomy);
if (stabmat) {
gpuMultMatrix(stabmat);
}
gpuTranslate2f(x + xofs, y + yofs);
gpuScale2f(maxdim * zoomx, maxdim * zoomy);
gpuScale2f(maxdim, maxdim);
if (do_draw_cb) {
ED_region_draw_cb_draw(C, ar, REGION_DRAW_PRE_VIEW);

@ -1892,7 +1892,6 @@ static bool save_image_doit(bContext *C, SpaceImage *sima, wmOperator *op, SaveI
}
else {
colormanaged_ibuf = IMB_colormanagement_imbuf_for_write(ibuf, save_as_render, true, &imf->view_settings, &imf->display_settings, imf);
IMB_metadata_copy(colormanaged_ibuf, ibuf);
ok = BKE_imbuf_write_as(colormanaged_ibuf, simopts->filepath, imf, save_copy);
save_imbuf_post(ibuf, colormanaged_ibuf);
}

@ -119,7 +119,7 @@ void meshobject_foreachScreenVert(
data.clip_flag = clip_flag;
if (clip_flag & V3D_PROJ_TEST_CLIP_BB) {
ED_view3d_clipping_local(vc->rv3d, vc->obedit->obmat); /* for local clipping lookups */
ED_view3d_clipping_local(vc->rv3d, vc->obact->obmat);
}
dm->foreachMappedVert(dm, meshobject_foreachScreenVert__mapFunc, &data, DM_FOREACH_NOP);

@ -2655,7 +2655,7 @@ void node_bsdf_toon(vec4 color, float size, float tsmooth, vec3 N, out vec4 resu
void node_bsdf_principled(vec4 base_color, float subsurface, vec3 subsurface_radius, vec4 subsurface_color, float metallic, float specular,
float specular_tint, float roughness, float anisotropic, float anisotropic_rotation, float sheen, float sheen_tint, float clearcoat,
float clearcoat_gloss, float ior, float transparency, float refraction_roughness, vec3 N, vec3 CN, vec3 T, vec3 I, out vec4 result)
float clearcoat_gloss, float ior, float transmission, float transmission_roughness, vec3 N, vec3 CN, vec3 T, vec3 I, out vec4 result)
{
/* ambient light */
// TODO: set ambient light to an appropriate value

@ -1759,9 +1759,14 @@ void IMB_colormanagement_transform_from_byte_threaded(float *float_buffer, unsig
return;
}
if (STREQ(from_colorspace, to_colorspace)) {
/* If source and destination color spaces are identical, skip
* threading overhead and simply do nothing
/* Because this function always takes a byte buffer and returns a float buffer, it must
* always do byte-to-float conversion of some kind. To avoid threading overhead
* IMB_buffer_float_from_byte is used when color spaces are identical. See T51002.
*/
IMB_buffer_float_from_byte(float_buffer, byte_buffer,
IB_PROFILE_SRGB, IB_PROFILE_SRGB,
true,
width, height, width, width);
return;
}
cm_processor = IMB_colormanagement_colorspace_processor_new(from_colorspace, to_colorspace);
@ -2060,6 +2065,10 @@ ImBuf *IMB_colormanagement_imbuf_for_write(ImBuf *ibuf, bool save_as_render, boo
}
}
if (colormanaged_ibuf != ibuf) {
IMB_metadata_copy(colormanaged_ibuf, ibuf);
}
return colormanaged_ibuf;
}

@ -81,7 +81,9 @@ bool IMB_metadata_get_field(struct ImBuf *img, const char *key, char *field, con
void IMB_metadata_copy(struct ImBuf *dimb, struct ImBuf *simb)
{
BLI_assert(dimb != simb);
if (simb->metadata) {
IMB_metadata_free(dimb);
dimb->metadata = IDP_CopyProperty(simb->metadata);
}
}

@ -213,7 +213,7 @@ struct ImBuf *imb_load_photoshop(const char *filename, int flags, char colorspac
in = ImageInput::create(filename);
if (!in) {
std::cerr << __func__ << ": ImageInput::create() failed:" << std::endl
<< OpenImageIO::geterror() << std::endl;
<< OIIO_NAMESPACE::geterror() << std::endl;
return NULL;
}

@ -1026,15 +1026,16 @@ void IMB_exr_set_channel(void *handle, const char *layname, const char *passname
ExrChannel *echan;
char name[EXR_TOT_MAXNAME + 1];
if (layname) {
if (layname && layname[0] != '\0') {
char lay[EXR_LAY_MAXNAME + 1], pass[EXR_PASS_MAXNAME + 1];
BLI_strncpy(lay, layname, EXR_LAY_MAXNAME);
BLI_strncpy(pass, passname, EXR_PASS_MAXNAME);
BLI_snprintf(name, sizeof(name), "%s.%s", lay, pass);
}
else
else {
BLI_strncpy(name, passname, EXR_TOT_MAXNAME - 1);
}
echan = (ExrChannel *)BLI_findstring(&data->channels, name, offsetof(ExrChannel, name));
@ -1043,8 +1044,9 @@ void IMB_exr_set_channel(void *handle, const char *layname, const char *passname
echan->ystride = ystride;
echan->rect = rect;
}
else
else {
printf("IMB_exr_set_channel error %s\n", name);
}
}
float *IMB_exr_channel_rect(void *handle, const char *layname, const char *passname, const char *viewname)

@ -95,7 +95,8 @@ EnumPropertyItem rna_enum_exr_codec_items[] = {
{R_IMF_EXR_CODEC_B44, "B44", 0, "B44 (lossy)", ""},
{R_IMF_EXR_CODEC_B44A, "B44A", 0, "B44A (lossy)", ""},
{R_IMF_EXR_CODEC_DWAA, "DWAA", 0, "DWAA (lossy)", ""},
{R_IMF_EXR_CODEC_DWAB, "DWAB", 0, "DWAB (lossy)", ""},
/* NOTE: Commented out for until new OpenEXR is released, see T50673. */
/* {R_IMF_EXR_CODEC_DWAB, "DWAB", 0, "DWAB (lossy)", ""}, */
{0, NULL, 0, NULL, NULL}
};
#endif

@ -45,8 +45,8 @@ static bNodeSocketTemplate sh_node_bsdf_principled_in[] = {
{ SOCK_FLOAT, 1, N_("Clearcoat"), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, PROP_FACTOR},
{ SOCK_FLOAT, 1, N_("Clearcoat Gloss"), 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, PROP_FACTOR},
{ SOCK_FLOAT, 1, N_("IOR"), 1.45f, 0.0f, 0.0f, 0.0f, 0.0f, 1000.0f},
{ SOCK_FLOAT, 1, N_("Transparency"), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, PROP_FACTOR},
{ SOCK_FLOAT, 1, N_("Refraction Roughness"), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, PROP_FACTOR},
{ SOCK_FLOAT, 1, N_("Transmission"), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, PROP_FACTOR},
{ SOCK_FLOAT, 1, N_("Transmission Roughness"),0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, PROP_FACTOR},
{ SOCK_VECTOR, 1, N_("Normal"), 0.0f, 0.0f, 0.0f, 1.0f, -1.0f, 1.0f, PROP_NONE, SOCK_HIDE_VALUE},
{ SOCK_VECTOR, 1, N_("Clearcoat Normal"), 0.0f, 0.0f, 0.0f, 1.0f, -1.0f, 1.0f, PROP_NONE, SOCK_HIDE_VALUE},
{ SOCK_VECTOR, 1, N_("Tangent"), 0.0f, 0.0f, 0.0f, 1.0f, -1.0f, 1.0f, PROP_NONE, SOCK_HIDE_VALUE},
@ -86,7 +86,7 @@ static void node_shader_update_principled(bNodeTree *UNUSED(ntree), bNode *node)
int distribution = node->custom1;
for (sock = node->inputs.first; sock; sock = sock->next) {
if (STREQ(sock->name, "Refraction Roughness")) {
if (STREQ(sock->name, "Transmission Roughness")) {
if (distribution == SHD_GLOSSY_GGX)
sock->flag &= ~SOCK_UNAVAIL;
else

@ -62,6 +62,7 @@
#include "renderpipeline.h"
#include "texture.h"
#include "zbuf.h"
#include "render_result.h"
/* ------------------------------------------------------------------------- */
@ -143,8 +144,8 @@ static Render *envmap_render_copy(Render *re, EnvMap *env)
/* set up renderdata */
render_copy_renderdata(&envre->r, &re->r);
envre->r.mode &= ~(R_BORDER | R_PANORAMA | R_ORTHO | R_MBLUR);
BLI_listbase_clear(&envre->r.layers);
BLI_listbase_clear(&envre->r.views);
BLI_freelistN(&envre->r.layers);
BLI_freelistN(&envre->r.views);
envre->r.filtertype = 0;
envre->r.tilex = envre->r.xsch / 2;
envre->r.tiley = envre->r.ysch / 2;
@ -491,11 +492,18 @@ static void render_envmap(Render *re, EnvMap *env)
env_rotate_scene(envre, tmat, 0);
if (re->test_break(re->tbh) == 0) {
RenderLayer *rl = envre->result->layers.first;
int y;
float *alpha;
float *rect;
if (envre->result->do_exr_tile) {
BLI_rw_mutex_lock(&envre->resultmutex, THREAD_LOCK_WRITE);
render_result_exr_file_end(envre);
BLI_rw_mutex_unlock(&envre->resultmutex);
}
RenderLayer *rl = envre->result->layers.first;
/* envmap is rendered independently of multiview */
rect = RE_RenderLayerGetPass(rl, RE_PASSNAME_COMBINED, "");
ibuf = IMB_allocImBuf(envre->rectx, envre->recty, 24, IB_rect | IB_rectfloat);