e949d6da5b
* Store compact ray differentials in ShaderData and compute full differentials on demand. This reduces register pressure on the GPU. * Remove BSDF differential code that was effectively doing nothing as the differential orientation was discarded when making it compact. This gives a 1-5% speedup with RTX A6000 + OptiX in our benchmarks, with the bigger speedups in simpler scenes. Renders appear to be identical except for the Both displacement option that does both displacement and bump. Differential Revision: https://developer.blender.org/D15677
46 lines
1.4 KiB
C
46 lines
1.4 KiB
C
/* SPDX-License-Identifier: Apache-2.0
|
|
* Copyright 2011-2022 Blender Foundation */
|
|
|
|
#pragma once
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
/* Bump Eval Nodes */
|
|
|
|
ccl_device_noinline void svm_node_enter_bump_eval(KernelGlobals kg,
|
|
ccl_private ShaderData *sd,
|
|
ccl_private float *stack,
|
|
uint offset)
|
|
{
|
|
/* save state */
|
|
stack_store_float3(stack, offset + 0, sd->P);
|
|
stack_store_float(stack, offset + 3, sd->dP);
|
|
|
|
/* set state as if undisplaced */
|
|
const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED);
|
|
|
|
if (desc.offset != ATTR_STD_NOT_FOUND) {
|
|
differential3 dP;
|
|
float3 P = primitive_surface_attribute_float3(kg, sd, desc, &dP.dx, &dP.dy);
|
|
|
|
object_position_transform(kg, sd, &P);
|
|
object_dir_transform(kg, sd, &dP.dx);
|
|
object_dir_transform(kg, sd, &dP.dy);
|
|
|
|
sd->P = P;
|
|
sd->dP = differential_make_compact(dP);
|
|
}
|
|
}
|
|
|
|
ccl_device_noinline void svm_node_leave_bump_eval(KernelGlobals kg,
|
|
ccl_private ShaderData *sd,
|
|
ccl_private float *stack,
|
|
uint offset)
|
|
{
|
|
/* restore state */
|
|
sd->P = stack_load_float3(stack, offset + 0);
|
|
sd->dP = stack_load_float(stack, offset + 3);
|
|
}
|
|
|
|
CCL_NAMESPACE_END
|