Cycles / Blackbody node:

* Replaced the Brute Force version with a nice lookup table, this speeds it up a lot. 
Patch by Philipp Oeser (lichtwerk) with some cleanup and changes by myself. Thanks! 

ToDo:
* Temperature values between 800 and 804 Kelvin are wrong in SVM, check on this.
This commit is contained in:
Thomas Dinges 2013-06-16 16:08:11 +00:00
parent 0429595440
commit a841813cd9
10 changed files with 258 additions and 55 deletions

@ -807,6 +807,12 @@ typedef struct KernelBSSRDF {
int pad1, pad2;
} KernelBSSRDF;
typedef struct KernelBLACKBODY {
int table_offset;
int pad1, pad2, pad3;
} KernelBLACKBODY;
typedef struct KernelData {
KernelCamera cam;
KernelFilm film;
@ -816,6 +822,7 @@ typedef struct KernelData {
KernelBVH bvh;
KernelCurves curve_kernel_data;
KernelBSSRDF bssrdf;
KernelBLACKBODY blackbody;
} KernelData;
CCL_NAMESPACE_END

@ -36,70 +36,52 @@ CCL_NAMESPACE_BEGIN
__device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *stack, uint temperature_offset, uint col_offset)
{
/* ToDo: move those defines to kernel_types.h ? */
float bb_drapper = 800.0f;
float bb_max_table_range = 12000.0f;
float bb_table_xpower = 1.5f;
float bb_table_ypower = 5.0f;
float bb_table_spacing = 2.0f;
/* Output */
float3 color_rgb;
float3 color_rgb = make_float3(0.0f, 0.0f, 0.0f);
/* Input */
float temperature = stack_load_float(stack, temperature_offset);
/* ToDo: Use a lookup table to speed this up and outsource it from the kernel */
float X = 0, Y = 0, Z = 0;
const float cie_colour_match[81][3] = {
{0.0014,0.0000,0.0065}, {0.0022,0.0001,0.0105}, {0.0042,0.0001,0.0201},
{0.0076,0.0002,0.0362}, {0.0143,0.0004,0.0679}, {0.0232,0.0006,0.1102},
{0.0435,0.0012,0.2074}, {0.0776,0.0022,0.3713}, {0.1344,0.0040,0.6456},
{0.2148,0.0073,1.0391}, {0.2839,0.0116,1.3856}, {0.3285,0.0168,1.6230},
{0.3483,0.0230,1.7471}, {0.3481,0.0298,1.7826}, {0.3362,0.0380,1.7721},
{0.3187,0.0480,1.7441}, {0.2908,0.0600,1.6692}, {0.2511,0.0739,1.5281},
{0.1954,0.0910,1.2876}, {0.1421,0.1126,1.0419}, {0.0956,0.1390,0.8130},
{0.0580,0.1693,0.6162}, {0.0320,0.2080,0.4652}, {0.0147,0.2586,0.3533},
{0.0049,0.3230,0.2720}, {0.0024,0.4073,0.2123}, {0.0093,0.5030,0.1582},
{0.0291,0.6082,0.1117}, {0.0633,0.7100,0.0782}, {0.1096,0.7932,0.0573},
{0.1655,0.8620,0.0422}, {0.2257,0.9149,0.0298}, {0.2904,0.9540,0.0203},
{0.3597,0.9803,0.0134}, {0.4334,0.9950,0.0087}, {0.5121,1.0000,0.0057},
{0.5945,0.9950,0.0039}, {0.6784,0.9786,0.0027}, {0.7621,0.9520,0.0021},
{0.8425,0.9154,0.0018}, {0.9163,0.8700,0.0017}, {0.9786,0.8163,0.0014},
{1.0263,0.7570,0.0011}, {1.0567,0.6949,0.0010}, {1.0622,0.6310,0.0008},
{1.0456,0.5668,0.0006}, {1.0026,0.5030,0.0003}, {0.9384,0.4412,0.0002},
{0.8544,0.3810,0.0002}, {0.7514,0.3210,0.0001}, {0.6424,0.2650,0.0000},
{0.5419,0.2170,0.0000}, {0.4479,0.1750,0.0000}, {0.3608,0.1382,0.0000},
{0.2835,0.1070,0.0000}, {0.2187,0.0816,0.0000}, {0.1649,0.0610,0.0000},
{0.1212,0.0446,0.0000}, {0.0874,0.0320,0.0000}, {0.0636,0.0232,0.0000},
{0.0468,0.0170,0.0000}, {0.0329,0.0119,0.0000}, {0.0227,0.0082,0.0000},
{0.0158,0.0057,0.0000}, {0.0114,0.0041,0.0000}, {0.0081,0.0029,0.0000},
{0.0058,0.0021,0.0000}, {0.0041,0.0015,0.0000}, {0.0029,0.0010,0.0000},
{0.0020,0.0007,0.0000}, {0.0014,0.0005,0.0000}, {0.0010,0.0004,0.0000},
{0.0007,0.0002,0.0000}, {0.0005,0.0002,0.0000}, {0.0003,0.0001,0.0000},
{0.0002,0.0001,0.0000}, {0.0002,0.0001,0.0000}, {0.0001,0.0000,0.0000},
{0.0001,0.0000,0.0000}, {0.0001,0.0000,0.0000}, {0.0000,0.0000,0.0000}
};
const float c1 = 3.74183e-16; // 2*pi*h*c^2, W*m^2
const float c2 = 1.4388e-2; // h*c/k, m*K, h is Planck's const, k is Boltzmann's
const float dlambda = 5.0f * 1e-9; // in meters
for (int i = 0; i < 81; ++i) {
float lambda = 380.0f + 5.0f * i;
float wlm = lambda * 1e-9; // Wavelength in meters
// N.B. spec_intens returns result in W/m^2 but it's a differential,
// needs to be scaled by dlambda!
float spec_intens = (c1 * powf(wlm, -5.0)) / (expf(c2 / (wlm * temperature)) -1.0f);
float Me = spec_intens * dlambda;
X += Me * cie_colour_match[i][0];
Y += Me * cie_colour_match[i][1];
Z += Me * cie_colour_match[i][2];
if (temperature < bb_drapper) {
/* just return very very dim red */
color_rgb = make_float3(1.0e-6f,0.0f,0.0f);
}
else if (temperature <= bb_max_table_range) {
/* This is the overall size of the table (317*3+3) */
const int lookuptablesize = 954;
const float lookuptablesizef = 954.0f;
/* Convert to RGB */
color_rgb = xyz_to_rgb(X, Y, Z);
/* reconstruct a proper index for the table lookup, compared to OSL we don't look up two colors
just one (the OSL-lerp is also automatically done for us by "lookup_table_read") */
float t = powf ((temperature - bb_drapper) / bb_table_spacing, 1.0f/bb_table_xpower);
/* Clamp to zero if values are smaller */
color_rgb = max(color_rgb, make_float3(0.0f, 0.0f, 0.0f));
int blackbody_table_offset = kernel_data.blackbody.table_offset;
/* Scale color by luminance */
color_rgb /= Y;
/* Retrieve colors from the lookup table */
float lutval = t/lookuptablesizef;
float R = lookup_table_read(kg, lutval, blackbody_table_offset, lookuptablesize);
lutval = (t + 317.0f*1.0f)/lookuptablesizef;
float G = lookup_table_read(kg, lutval, blackbody_table_offset, lookuptablesize);
lutval = (t + 317.0f*2.0f)/lookuptablesizef;
float B = lookup_table_read(kg, lutval, blackbody_table_offset, lookuptablesize);
R = powf(R, bb_table_ypower);
G = powf(G, bb_table_ypower);
B = powf(B, bb_table_ypower);
/* Luminance */
float l = linear_rgb_to_gray(make_float3(R, G, B));
color_rgb = make_float3(R, G, B);
color_rgb /= l;
}
if (stack_valid(col_offset))
stack_store_float3(stack, col_offset, color_rgb);

@ -16,6 +16,7 @@ set(INC_SYS
set(SRC
attribute.cpp
background.cpp
blackbody.cpp
buffers.cpp
bssrdf.cpp
camera.cpp
@ -43,6 +44,7 @@ set(SRC
set(SRC_HEADERS
attribute.h
background.h
blackbody.h
buffers.h
bssrdf.h
camera.h

@ -0,0 +1,145 @@
/*
* Adapted from Open Shading Language with this license:
*
* Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
* All Rights Reserved.
*
* Modifications Copyright 2013, Blender Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Sony Pictures Imageworks nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blackbody.h"
#include "util_color.h"
#include "util_math.h"
CCL_NAMESPACE_BEGIN
vector<float> blackbody_table()
{
/* quoted from OSLs opcolor.cpp
In order to speed up the blackbody computation, we have a table
storing the precomputed BB values for a range of temperatures. Less
than BB_DRAPER always returns 0. Greater than BB_MAX_TABLE_RANGE
does the full computation, we think it'll be rare to inquire higher
temperatures.
Since the bb function is so nonlinear, we actually space the table
entries nonlinearly, with the relationship between the table index i
and the temperature T as follows:
i = ((T-Draper)/spacing)^(1/xpower)
T = pow(i, xpower) * spacing + Draper
And furthermore, we store in the table the true value raised ^(1/5).
I tuned this a bit, and with the current values we can have all
blackbody results accurate to within 0.1% with a table size of 317
(about 5 KB of data).
*/
const float cie_colour_match[81][3] = {
{0.0014,0.0000,0.0065}, {0.0022,0.0001,0.0105}, {0.0042,0.0001,0.0201},
{0.0076,0.0002,0.0362}, {0.0143,0.0004,0.0679}, {0.0232,0.0006,0.1102},
{0.0435,0.0012,0.2074}, {0.0776,0.0022,0.3713}, {0.1344,0.0040,0.6456},
{0.2148,0.0073,1.0391}, {0.2839,0.0116,1.3856}, {0.3285,0.0168,1.6230},
{0.3483,0.0230,1.7471}, {0.3481,0.0298,1.7826}, {0.3362,0.0380,1.7721},
{0.3187,0.0480,1.7441}, {0.2908,0.0600,1.6692}, {0.2511,0.0739,1.5281},
{0.1954,0.0910,1.2876}, {0.1421,0.1126,1.0419}, {0.0956,0.1390,0.8130},
{0.0580,0.1693,0.6162}, {0.0320,0.2080,0.4652}, {0.0147,0.2586,0.3533},
{0.0049,0.3230,0.2720}, {0.0024,0.4073,0.2123}, {0.0093,0.5030,0.1582},
{0.0291,0.6082,0.1117}, {0.0633,0.7100,0.0782}, {0.1096,0.7932,0.0573},
{0.1655,0.8620,0.0422}, {0.2257,0.9149,0.0298}, {0.2904,0.9540,0.0203},
{0.3597,0.9803,0.0134}, {0.4334,0.9950,0.0087}, {0.5121,1.0000,0.0057},
{0.5945,0.9950,0.0039}, {0.6784,0.9786,0.0027}, {0.7621,0.9520,0.0021},
{0.8425,0.9154,0.0018}, {0.9163,0.8700,0.0017}, {0.9786,0.8163,0.0014},
{1.0263,0.7570,0.0011}, {1.0567,0.6949,0.0010}, {1.0622,0.6310,0.0008},
{1.0456,0.5668,0.0006}, {1.0026,0.5030,0.0003}, {0.9384,0.4412,0.0002},
{0.8544,0.3810,0.0002}, {0.7514,0.3210,0.0001}, {0.6424,0.2650,0.0000},
{0.5419,0.2170,0.0000}, {0.4479,0.1750,0.0000}, {0.3608,0.1382,0.0000},
{0.2835,0.1070,0.0000}, {0.2187,0.0816,0.0000}, {0.1649,0.0610,0.0000},
{0.1212,0.0446,0.0000}, {0.0874,0.0320,0.0000}, {0.0636,0.0232,0.0000},
{0.0468,0.0170,0.0000}, {0.0329,0.0119,0.0000}, {0.0227,0.0082,0.0000},
{0.0158,0.0057,0.0000}, {0.0114,0.0041,0.0000}, {0.0081,0.0029,0.0000},
{0.0058,0.0021,0.0000}, {0.0041,0.0015,0.0000}, {0.0029,0.0010,0.0000},
{0.0020,0.0007,0.0000}, {0.0014,0.0005,0.0000}, {0.0010,0.0004,0.0000},
{0.0007,0.0002,0.0000}, {0.0005,0.0002,0.0000}, {0.0003,0.0001,0.0000},
{0.0002,0.0001,0.0000}, {0.0002,0.0001,0.0000}, {0.0001,0.0000,0.0000},
{0.0001,0.0000,0.0000}, {0.0001,0.0000,0.0000}, {0.0000,0.0000,0.0000}
};
const double c1 = 3.74183e-16; // 2*pi*h*c^2, W*m^2
const double c2 = 1.4388e-2; // h*c/k, m*K
// h is Planck's const, k is Boltzmann's
const float dlambda = 5.0f * 1e-9; // in meters
/* Blackbody table from 800 to 12k Kelvin (317 entries) */
vector<float> blackbody_table(317*3+3);
/* ToDo: move those defines to kernel_types.h ? */
float bb_drapper = 800.0f;
float bb_max_table_range = 12000.0f;
float bb_table_xpower = 1.5f;
float bb_table_ypower = 5.0f;
float bb_table_spacing = 2.0f;
float X, Y, Z;
/* ToDo: bring this back to what OSL does with the lastTemperature limit ? */
for (int i = 0; i <= 317; ++i) {
float Temperature = powf (float(i), bb_table_xpower) * bb_table_spacing + bb_drapper;
X = 0;
Y = 0;
Z = 0;
/* from OSL "spectrum_to_XYZ" */
for (int n = 0; n < 81; ++n) {
float lambda = 380.0f + 5.0f * n;
double wlm = lambda * 1e-9; // Wavelength in meters
// N.B. spec_intens returns result in W/m^2 but it's a differential,
// needs to be scaled by dlambda!
float spec_intens = float((c1 * powf(wlm,-5.0)) / (expf(c2 / (wlm * Temperature)) -1.0f));
float Me = spec_intens * dlambda;
X += Me * cie_colour_match[n][0];
Y += Me * cie_colour_match[n][1];
Z += Me * cie_colour_match[n][2];
}
/* Convert from xyz color space */
float3 col = xyz_to_rgb(X, Y, Z);
/* Clamp to zero if values are smaller */
col = max(col, make_float3(0.0f, 0.0f, 0.0f));
col.x = powf(col.x, 1.0f / bb_table_ypower);
col.y = powf(col.y, 1.0f / bb_table_ypower);
col.z = powf(col.z, 1.0f / bb_table_ypower);
/* Store in table in RRRGGGBBB format */
blackbody_table[i] = col.x;
blackbody_table[i+317*1] = col.y;
blackbody_table[i+317*2] = col.z;
}
return blackbody_table;
}
CCL_NAMESPACE_END

@ -0,0 +1,30 @@
/*
* Copyright 2011, Blender Foundation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __BLACKBODY_H__
#define __BLACKBODY_H__
#include "util_vector.h"
CCL_NAMESPACE_BEGIN
vector<float> blackbody_table();
CCL_NAMESPACE_END
#endif /* __BLACKBODY_H__ */

@ -188,6 +188,7 @@ public:
virtual bool has_surface_emission() { return false; }
virtual bool has_surface_transparent() { return false; }
virtual bool has_surface_bssrdf() { return false; }
virtual bool has_converter_blackbody() { return false; }
vector<ShaderInput*> inputs;
vector<ShaderOutput*> outputs;

@ -465,6 +465,8 @@ public:
class BlackbodyNode : public ShaderNode {
public:
SHADER_NODE_CLASS(BlackbodyNode)
bool has_converter_blackbody() { return true; }
};
class MathNode : public ShaderNode {

@ -17,6 +17,7 @@
*/
#include "bssrdf.h"
#include "blackbody.h"
#include "device.h"
#include "graph.h"
#include "light.h"
@ -49,6 +50,7 @@ Shader::Shader()
has_surface_transparent = false;
has_surface_emission = false;
has_surface_bssrdf = false;
has_converter_blackbody = false;
has_volume = false;
has_displacement = false;
@ -125,6 +127,7 @@ ShaderManager::ShaderManager()
{
need_update = true;
bssrdf_table_offset = TABLE_OFFSET_INVALID;
blackbody_table_offset = TABLE_OFFSET_INVALID;
}
ShaderManager::~ShaderManager()
@ -216,6 +219,7 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc
uint *shader_flag = dscene->shader_flag.resize(shader_flag_size);
uint i = 0;
bool has_surface_bssrdf = false;
bool has_converter_blackbody = false;
foreach(Shader *shader, scene->shaders) {
uint flag = 0;
@ -230,6 +234,8 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc
flag |= SD_HOMOGENEOUS_VOLUME;
if(shader->has_surface_bssrdf)
has_surface_bssrdf = true;
if(shader->has_converter_blackbody)
has_converter_blackbody = true;
shader_flag[i++] = flag;
shader_flag[i++] = shader->pass_id;
@ -255,6 +261,21 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc
scene->lookup_tables->remove_table(bssrdf_table_offset);
bssrdf_table_offset = TABLE_OFFSET_INVALID;
}
/* blackbody lookup table */
KernelBLACKBODY *kblackbody = &dscene->data.blackbody;
if(has_converter_blackbody && blackbody_table_offset == TABLE_OFFSET_INVALID) {
vector<float> table = blackbody_table();
blackbody_table_offset = scene->lookup_tables->add_table(dscene, table);
kblackbody->table_offset = (int)blackbody_table_offset;
}
else if(!has_converter_blackbody && blackbody_table_offset != TABLE_OFFSET_INVALID) {
scene->lookup_tables->remove_table(blackbody_table_offset);
blackbody_table_offset = TABLE_OFFSET_INVALID;
}
}
void ShaderManager::device_free_common(Device *device, DeviceScene *dscene, Scene *scene)
@ -264,6 +285,11 @@ void ShaderManager::device_free_common(Device *device, DeviceScene *dscene, Scen
bssrdf_table_offset = TABLE_OFFSET_INVALID;
}
if(blackbody_table_offset != TABLE_OFFSET_INVALID) {
scene->lookup_tables->remove_table(blackbody_table_offset);
blackbody_table_offset = TABLE_OFFSET_INVALID;
}
device->tex_free(dscene->shader_flag);
dscene->shader_flag.clear();
}

@ -76,6 +76,7 @@ public:
bool has_volume;
bool has_displacement;
bool has_surface_bssrdf;
bool has_converter_blackbody;
/* requested mesh attributes */
AttributeRequestSet attributes;
@ -141,6 +142,7 @@ protected:
AttributeIDMap unique_attribute_id;
size_t bssrdf_table_offset;
size_t blackbody_table_offset;
};
CCL_NAMESPACE_END

@ -389,6 +389,11 @@ void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNo
nodes_done = true;
foreach(ShaderNode *node, nodes) {
/* Detect if we have a blackbody converter, to prepare lookup table */
if(node->has_converter_blackbody())
current_shader->has_converter_blackbody = true;
if(done.find(node) == done.end()) {
bool inputs_done = true;
@ -672,6 +677,7 @@ void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int in
shader->has_surface_emission = false;
shader->has_surface_transparent = false;
shader->has_surface_bssrdf = false;
shader->has_converter_blackbody = false;
shader->has_volume = false;
shader->has_displacement = false;