From 65b25df8018cdef8952d2e49ad66f55c6b0b3e2b Mon Sep 17 00:00:00 2001 From: Lukas Stockner Date: Sun, 21 Oct 2018 03:41:31 +0200 Subject: [PATCH 1/6] Cycles: Overhaul ensure_valid_reflection to fix issues with normal- and bumpmapping This function is supposed to prevent the black artifacts caused by strong normal- or bumpmapping, but failed in some cases. Now the code correctly handles all test files and previous issues I am aware of and also has extensive comments describing the algorithm and the math behind it. Basically, the main problem was that there can be multiple valid solutions that fulfil the reflection angle criterium, but I had assumed that only one would exist and therefore simply picked the first solution with a positive term in srqt(). Now, the code uses additional validity checks and a simple heuristic to pick the best valid solution. Additionally, the code messed up very shallow reflections even if the normal map strength was zero due to the constant limit for the outgoing ray angle, which caused shallow incoming rays to fail the initial test even when reflected directly on Ng. Now, the code accounts for this by reducing the threshold in the case of a shallow incoming ray, ensuring that at least N=Ng is always a valid solution. Reviewers: brecht Differential Revision: https://developer.blender.org/D3816 --- intern/cycles/kernel/kernel_montecarlo.h | 93 ++++++++++++++++++++---- intern/cycles/kernel/shaders/stdosl.h | 62 ++++++++++++---- 2 files changed, 126 insertions(+), 29 deletions(-) diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h index 9b96bb80c32..ce68aa16af8 100644 --- a/intern/cycles/kernel/kernel_montecarlo.h +++ b/intern/cycles/kernel/kernel_montecarlo.h @@ -187,7 +187,10 @@ ccl_device float2 regular_polygon_sample(float corners, float rotation, float u, ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N) { float3 R = 2*dot(N, I)*N - I; - if(dot(Ng, R) >= 0.05f) { + + /* Reflection rays may always be at least as shallow as the incoming ray. */ + float threshold = min(0.9f*dot(Ng, I), 0.01f); + if(dot(Ng, R) >= threshold) { return N; } @@ -195,22 +198,86 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N) * The X axis is found by normalizing the component of N that's orthogonal to Ng. * The Y axis isn't actually needed. */ - float3 X = normalize(N - dot(N, Ng)*Ng); + float NdotNg = dot(N, Ng); + float3 X = normalize(N - NdotNg*Ng); - /* Calculate N.z and N.x in the local coordinate system. */ - float Iz = dot(I, Ng); - float Ix2 = sqr(dot(I, X)), Iz2 = sqr(Iz); - float Ix2Iz2 = Ix2 + Iz2; + /* Calculate N.z and N.x in the local coordinate system. + * + * The goal of this computation is to find a N' that is rotated towards Ng just enough + * to lift R' above the threshold (here called t), therefore dot(R', Ng) = t. + * + * According to the standard reflection equation, this means that we want dot(2*dot(N', I)*N' - I, Ng) = t. + * + * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get 2*dot(N', I)*N'.z - I.z = t. + * + * The rotation is simple to express in the coordinate system we formed - since N lies in the X-Z-plane, we know that + * N' will also lie in the X-Z-plane, so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z . + * + * Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2). + * + * With these simplifications, we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t. + * + * The only unknown here is N'.z, so we can solve for that. + * + * The equation has four solutions in general: + * + * N'.z = +-sqrt(0.5*(+-sqrt(I.x^2*(I.x^2 + I.z^2 - t^2)) + t*I.z + I.x^2 + I.z^2)/(I.x^2 + I.z^2)) + * We can simplify this expression a bit by grouping terms: + * + * a = I.x^2 + I.z^2 + * b = sqrt(I.x^2 * (a - t^2)) + * c = I.z*t + a + * N'.z = +-sqrt(0.5*(+-b + c)/a) + * + * Two solutions can immediately be discarded because they're negative so N' would lie in the lower hemisphere. + */ + float Ix = dot(I, X), Iz = dot(I, Ng); + float Ix2 = sqr(Ix), Iz2 = sqr(Iz); + float a = Ix2 + Iz2; - float a = safe_sqrtf(Ix2*(Ix2Iz2 - sqr(0.05f))); - float b = Iz*0.05f + Ix2Iz2; - float c = (a + b > 0.0f)? (a + b) : (-a + b); + float b = safe_sqrtf(Ix2*(a - sqr(threshold))); + float c = Iz*threshold + a; - float Nz = safe_sqrtf(0.5f * c * (1.0f / Ix2Iz2)); - float Nx = safe_sqrtf(1.0f - sqr(Nz)); + /* Evaluate both solutions. + * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than one), so check for that first. + * If no option is viable (might happen in extreme cases like N being in the wrong hemisphere), give up and return Ng. */ + float fac = 0.5f/a; + float N1_z2 = fac*(b+c), N2_z2 = fac*(-b+c); + bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f)); + bool valid2 = (N2_z2 > 1e-5f) && (N2_z2 <= (1.0f + 1e-5f)); - /* Transform back into global coordinates. */ - return Nx*X + Nz*Ng; + float2 N_new; + if(valid1 && valid2) { + /* If both are possible, do the expensive reflection-based check. */ + float2 N1 = make_float2(safe_sqrtf(1.0f - N1_z2), safe_sqrtf(N1_z2)); + float2 N2 = make_float2(safe_sqrtf(1.0f - N2_z2), safe_sqrtf(N2_z2)); + + float R1 = 2*(N1.x*Ix + N1.y*Iz)*N1.y - Iz; + float R2 = 2*(N2.x*Ix + N2.y*Iz)*N2.y - Iz; + + valid1 = (R1 >= 1e-5f); + valid2 = (R2 >= 1e-5f); + if(valid1 && valid2) { + /* If both solutions are valid, return the one with the shallower reflection since it will be closer to the input + * (if the original reflection wasn't shallow, we would not be in this part of the function). */ + N_new = (R1 < R2)? N1 : N2; + } + else { + /* If only one reflection is valid (= positive), pick that one. */ + N_new = (R1 > R2)? N1 : N2; + } + + } + else if(valid1 || valid2) { + /* Only one solution passes the N'.z criterium, so pick that one. */ + float Nz2 = valid1? N1_z2 : N2_z2; + N_new = make_float2(safe_sqrtf(1.0f - Nz2), safe_sqrtf(Nz2)); + } + else { + return Ng; + } + + return N_new.x*X + N_new.y*Ng; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h index 4a8378796ba..f1235500f2b 100644 --- a/intern/cycles/kernel/shaders/stdosl.h +++ b/intern/cycles/kernel/shaders/stdosl.h @@ -284,33 +284,63 @@ point rotate (point p, float angle, point a, point b) normal ensure_valid_reflection(normal Ng, vector I, normal N) { + /* The implementation here mirrors the one in kernel_montecarlo.h, + * check there for an explanation of the algorithm. */ + float sqr(float x) { return x*x; } vector R = 2*dot(N, I)*N - I; - if (dot(Ng, R) >= 0.05) { + + float threshold = min(0.9*dot(Ng, I), 0.01); + if(dot(Ng, R) >= threshold) { return N; } - /* Form coordinate system with Ng as the Z axis and N inside the X-Z-plane. - * The X axis is found by normalizing the component of N that's orthogonal to Ng. - * The Y axis isn't actually needed. - */ - vector X = normalize(N - dot(N, Ng)*Ng); + float NdotNg = dot(N, Ng); + vector X = normalize(N - NdotNg*Ng); - /* Calculate N.z and N.x in the local coordinate system. */ float Ix = dot(I, X), Iz = dot(I, Ng); - float Ix2 = sqr(dot(I, X)), Iz2 = sqr(dot(I, Ng)); - float Ix2Iz2 = Ix2 + Iz2; + float Ix2 = sqr(Ix), Iz2 = sqr(Iz); + float a = Ix2 + Iz2; - float a = sqrt(Ix2*(Ix2Iz2 - sqr(0.05))); - float b = Iz*0.05 + Ix2Iz2; - float c = (a + b > 0.0)? (a + b) : (-a + b); + float b = sqrt(Ix2*(a - sqr(threshold))); + float c = Iz*threshold + a; - float Nz = sqrt(0.5 * c * (1.0 / Ix2Iz2)); - float Nx = sqrt(1.0 - sqr(Nz)); + float fac = 0.5/a; + float N1_z2 = fac*(b+c), N2_z2 = fac*(-b+c); + int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5)); + int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5)); - /* Transform back into global coordinates. */ - return Nx*X + Nz*Ng; + float N_new_x, N_new_z; + if(valid1 && valid2) { + float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2); + float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2); + + float R1 = 2*(N1_x*Ix + N1_z*Iz)*N1_z - Iz; + float R2 = 2*(N2_x*Ix + N2_z*Iz)*N2_z - Iz; + + valid1 = (R1 >= 1e-5); + valid2 = (R2 >= 1e-5); + if(valid1 && valid2) { + N_new_x = (R1 < R2)? N1_x : N2_x; + N_new_z = (R1 < R2)? N1_z : N2_z; + } + else { + N_new_x = (R1 > R2)? N1_x : N2_x; + N_new_z = (R1 > R2)? N1_z : N2_z; + } + + } + else if(valid1 || valid2) { + float Nz2 = valid1? N1_z2 : N2_z2; + N_new_x = sqrt(1.0 - Nz2); + N_new_z = sqrt(Nz2); + } + else { + return Ng; + } + + return N_new_x*X + N_new_z*Ng; } From f1673d20fa645b895cfa9d67e76dd29df7c739a2 Mon Sep 17 00:00:00 2001 From: Lukas Stockner Date: Thu, 30 Aug 2018 23:28:10 +0200 Subject: [PATCH 2/6] Cycles: Expose noisy image pass by default when rendering with denoiser Apparently quite a few users would like to have the noisy pass available when using the denoiser, and since it's being generated anyways we might as well expose it by default. Reviewers: brecht Differential Revision: https://developer.blender.org/D3608 --- intern/cycles/blender/addon/engine.py | 36 +++++++++++++------------- intern/cycles/blender/blender_sync.cpp | 35 +++++++++++++------------ intern/cycles/render/buffers.cpp | 35 ++++++++++++++++++------- 3 files changed, 62 insertions(+), 44 deletions(-) diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index 87dcbe486c7..16ec7bc314b 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -254,21 +254,21 @@ def register_passes(engine, scene, srl): if crl.use_pass_volume_indirect: engine.register_pass(scene, srl, "VolumeInd", 3, "RGB", 'COLOR') cscene = scene.cycles - if crl.use_denoising and crl.denoising_store_passes and not cscene.use_progressive_refine: - engine.register_pass(scene, srl, "Denoising Normal", 3, "XYZ", 'VECTOR') - engine.register_pass(scene, srl, "Denoising Normal Variance", 3, "XYZ", 'VECTOR') - engine.register_pass(scene, srl, "Denoising Albedo", 3, "RGB", 'COLOR') - engine.register_pass(scene, srl, "Denoising Albedo Variance", 3, "RGB", 'COLOR') - engine.register_pass(scene, srl, "Denoising Depth", 1, "Z", 'VALUE') - engine.register_pass(scene, srl, "Denoising Depth Variance", 1, "Z", 'VALUE') - engine.register_pass(scene, srl, "Denoising Shadow A", 3, "XYV", 'VECTOR') - engine.register_pass(scene, srl, "Denoising Shadow B", 3, "XYV", 'VECTOR') - engine.register_pass(scene, srl, "Denoising Image", 3, "RGB", 'COLOR') - engine.register_pass(scene, srl, "Denoising Image Variance", 3, "RGB", 'COLOR') - - clean_options = ("denoising_diffuse_direct", "denoising_diffuse_indirect", - "denoising_glossy_direct", "denoising_glossy_indirect", - "denoising_transmission_direct", "denoising_transmission_indirect", - "denoising_subsurface_direct", "denoising_subsurface_indirect") - if any(getattr(crl, option) for option in clean_options): - engine.register_pass(scene, srl, "Denoising Clean", 3, "RGB", 'COLOR') + if crl.use_denoising: + engine.register_pass(scene, srl, "Noisy Image", 3, "RGBA", 'COLOR') + if crl.denoising_store_passes: + engine.register_pass(scene, srl, "Denoising Normal", 3, "XYZ", 'VECTOR') + engine.register_pass(scene, srl, "Denoising Normal Variance", 3, "XYZ", 'VECTOR') + engine.register_pass(scene, srl, "Denoising Albedo", 3, "RGB", 'COLOR') + engine.register_pass(scene, srl, "Denoising Albedo Variance", 3, "RGB", 'COLOR') + engine.register_pass(scene, srl, "Denoising Depth", 1, "Z", 'VALUE') + engine.register_pass(scene, srl, "Denoising Depth Variance", 1, "Z", 'VALUE') + engine.register_pass(scene, srl, "Denoising Shadow A", 3, "XYV", 'VECTOR') + engine.register_pass(scene, srl, "Denoising Shadow B", 3, "XYV", 'VECTOR') + engine.register_pass(scene, srl, "Denoising Image Variance", 3, "RGB", 'COLOR') + clean_options = ("denoising_diffuse_direct", "denoising_diffuse_indirect", + "denoising_glossy_direct", "denoising_glossy_indirect", + "denoising_transmission_direct", "denoising_transmission_indirect", + "denoising_subsurface_direct", "denoising_subsurface_indirect") + if any(getattr(crl, option) for option in clean_options): + engine.register_pass(scene, srl, "Denoising Clean", 3, "RGB", 'COLOR') diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 5e47252e336..8ae52beb1c1 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -525,6 +525,9 @@ PassType BlenderSync::get_pass_type(BL::RenderPass& b_pass) int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass) { string name = b_pass.name(); + + if(name == "Noisy Image") return DENOISING_PASS_COLOR; + if(name.substr(0, 10) != "Denoising ") { return -1; } @@ -539,7 +542,6 @@ int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass) MAP_PASS("Depth Variance", DENOISING_PASS_DEPTH_VAR); MAP_PASS("Shadow A", DENOISING_PASS_SHADOW_A); MAP_PASS("Shadow B", DENOISING_PASS_SHADOW_B); - MAP_PASS("Image", DENOISING_PASS_COLOR); MAP_PASS("Image Variance", DENOISING_PASS_COLOR_VAR); MAP_PASS("Clean", DENOISING_PASS_CLEAN); #undef MAP_PASS @@ -573,20 +575,8 @@ array BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, scene->film->denoising_flags = 0; PointerRNA crp = RNA_pointer_get(&b_srlay.ptr, "cycles"); - if(get_boolean(crp, "denoising_store_passes") && - get_boolean(crp, "use_denoising")) + if(get_boolean(crp, "use_denoising")) { - b_engine.add_pass("Denoising Normal", 3, "XYZ", b_srlay.name().c_str()); - b_engine.add_pass("Denoising Normal Variance", 3, "XYZ", b_srlay.name().c_str()); - b_engine.add_pass("Denoising Albedo", 3, "RGB", b_srlay.name().c_str()); - b_engine.add_pass("Denoising Albedo Variance", 3, "RGB", b_srlay.name().c_str()); - b_engine.add_pass("Denoising Depth", 1, "Z", b_srlay.name().c_str()); - b_engine.add_pass("Denoising Depth Variance", 1, "Z", b_srlay.name().c_str()); - b_engine.add_pass("Denoising Shadow A", 3, "XYV", b_srlay.name().c_str()); - b_engine.add_pass("Denoising Shadow B", 3, "XYV", b_srlay.name().c_str()); - b_engine.add_pass("Denoising Image", 3, "RGB", b_srlay.name().c_str()); - b_engine.add_pass("Denoising Image Variance", 3, "RGB", b_srlay.name().c_str()); - #define MAP_OPTION(name, flag) if(!get_boolean(crp, name)) scene->film->denoising_flags |= flag; MAP_OPTION("denoising_diffuse_direct", DENOISING_CLEAN_DIFFUSE_DIR); MAP_OPTION("denoising_diffuse_indirect", DENOISING_CLEAN_DIFFUSE_IND); @@ -598,8 +588,21 @@ array BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, MAP_OPTION("denoising_subsurface_indirect", DENOISING_CLEAN_SUBSURFACE_IND); #undef MAP_OPTION - if(scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES) { - b_engine.add_pass("Denoising Clean", 3, "RGB", b_srlay.name().c_str()); + b_engine.add_pass("Noisy Image", 4, "RGBA", b_srlay.name().c_str()); + if(get_boolean(crp, "denoising_store_passes")) { + b_engine.add_pass("Denoising Normal", 3, "XYZ", b_srlay.name().c_str()); + b_engine.add_pass("Denoising Normal Variance", 3, "XYZ", b_srlay.name().c_str()); + b_engine.add_pass("Denoising Albedo", 3, "RGB", b_srlay.name().c_str()); + b_engine.add_pass("Denoising Albedo Variance", 3, "RGB", b_srlay.name().c_str()); + b_engine.add_pass("Denoising Depth", 1, "Z", b_srlay.name().c_str()); + b_engine.add_pass("Denoising Depth Variance", 1, "Z", b_srlay.name().c_str()); + b_engine.add_pass("Denoising Shadow A", 3, "XYV", b_srlay.name().c_str()); + b_engine.add_pass("Denoising Shadow B", 3, "XYV", b_srlay.name().c_str()); + b_engine.add_pass("Denoising Image Variance", 3, "RGB", b_srlay.name().c_str()); + + if(scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES) { + b_engine.add_pass("Denoising Clean", 3, "RGB", b_srlay.name().c_str()); + } } } #ifdef __KERNEL_DEBUG__ diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index 4cd8b3726d3..e6021f4b37d 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -160,11 +160,12 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp (offset == DENOISING_PASS_DEPTH_VAR) || (offset == DENOISING_PASS_COLOR_VAR); + float scale_exposure = scale; if(offset == DENOISING_PASS_COLOR || offset == DENOISING_PASS_CLEAN) { - scale *= exposure; + scale_exposure *= exposure; } else if(offset == DENOISING_PASS_COLOR_VAR) { - scale *= exposure*exposure; + scale_exposure *= exposure*exposure; } offset += params.get_denoising_offset(); @@ -181,14 +182,14 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp if(components == 1) { for(int i = 0; i < size; i++, mean += pass_stride, var += pass_stride, pixels++) { - pixels[0] = max(0.0f, var[0] - mean[0]*mean[0]*invsample)*scale; + pixels[0] = max(0.0f, var[0] - mean[0]*mean[0]*invsample)*scale_exposure; } } else if(components == 3) { for(int i = 0; i < size; i++, mean += pass_stride, var += pass_stride, pixels += 3) { - pixels[0] = max(0.0f, var[0] - mean[0]*mean[0]*invsample)*scale; - pixels[1] = max(0.0f, var[1] - mean[1]*mean[1]*invsample)*scale; - pixels[2] = max(0.0f, var[2] - mean[2]*mean[2]*invsample)*scale; + pixels[0] = max(0.0f, var[0] - mean[0]*mean[0]*invsample)*scale_exposure; + pixels[1] = max(0.0f, var[1] - mean[1]*mean[1]*invsample)*scale_exposure; + pixels[2] = max(0.0f, var[2] - mean[2]*mean[2]*invsample)*scale_exposure; } } else { @@ -200,14 +201,28 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp if(components == 1) { for(int i = 0; i < size; i++, in += pass_stride, pixels++) { - pixels[0] = in[0]*scale; + pixels[0] = in[0]*scale_exposure; } } else if(components == 3) { for(int i = 0; i < size; i++, in += pass_stride, pixels += 3) { - pixels[0] = in[0]*scale; - pixels[1] = in[1]*scale; - pixels[2] = in[2]*scale; + pixels[0] = in[0]*scale_exposure; + pixels[1] = in[1]*scale_exposure; + pixels[2] = in[2]*scale_exposure; + } + } + else if(components == 4) { + assert(offset == DENOISING_PASS_COLOR); + + /* Since the alpha channel is not involved in denoising, output the Combined alpha channel. */ + assert(params.passes[0].type == PASS_COMBINED); + float *in_combined = buffer.data(); + + for(int i = 0; i < size; i++, in += pass_stride, in_combined += pass_stride, pixels += 4) { + pixels[0] = in[0]*scale_exposure; + pixels[1] = in[1]*scale_exposure; + pixels[2] = in[2]*scale_exposure; + pixels[3] = saturate(in_combined[3]*scale); } } else { From c0b3e3daebd36a483e659d32e6517f2fb9b0e277 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Fri, 26 Oct 2018 19:26:06 +0200 Subject: [PATCH 3/6] Fix T57393: Cycles OSL bevel and AO not working after OSL upgrade. --- intern/cycles/kernel/osl/osl_services.cpp | 28 +++++++++++++++++++++-- intern/cycles/kernel/osl/osl_services.h | 7 +++++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 7902381440b..81348f5594d 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -884,6 +884,23 @@ bool OSLRenderServices::has_userdata(ustring name, TypeDesc type, OSL::ShaderGlo return false; /* never called by OSL */ } +TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring filename) +{ + if (filename.length() && filename[0] == '@') { + /* Dummy, we don't use texture handles for builtin textures but need + * to tell the OSL runtime optimizer that this is a valid texture. */ + return NULL; + } + else { + return texturesys()->get_texture_handle(filename); + } +} + +bool OSLRenderServices::good(TextureSystem::TextureHandle *texture_handle) +{ + return texturesys()->good(texture_handle); +} + bool OSLRenderServices::texture(ustring filename, TextureHandle *texture_handle, TexturePerthread *texture_thread_info, @@ -894,7 +911,8 @@ bool OSLRenderServices::texture(ustring filename, int nchannels, float *result, float *dresultds, - float *dresultdt) + float *dresultdt, + ustring *errormessage) { OSL::TextureSystem *ts = osl_ts; ShaderData *sd = (ShaderData *)(sg->renderstate); @@ -1156,7 +1174,13 @@ bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg, ustring filenam TypeDesc datatype, void *data) { OSL::TextureSystem *ts = osl_ts; - return ts->get_texture_info(filename, subimage, dataname, datatype, data); + if (filename.length() && filename[0] == '@') { + /* Special builtin textures. */ + return false; + } + else { + return ts->get_texture_info(filename, subimage, dataname, datatype, data); + } } int OSLRenderServices::pointcloud_search(OSL::ShaderGlobals *sg, ustring filename, const OSL::Vec3 ¢er, diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h index 50044746fd1..5dcaa4d7445 100644 --- a/intern/cycles/kernel/osl/osl_services.h +++ b/intern/cycles/kernel/osl/osl_services.h @@ -93,6 +93,10 @@ public: bool getmessage(OSL::ShaderGlobals *sg, ustring source, ustring name, TypeDesc type, void *val, bool derivatives); + TextureSystem::TextureHandle *get_texture_handle(ustring filename); + + bool good(TextureSystem::TextureHandle *texture_handle); + bool texture(ustring filename, TextureSystem::TextureHandle *texture_handle, TexturePerthread *texture_thread_info, @@ -103,7 +107,8 @@ public: int nchannels, float *result, float *dresultds, - float *dresultdt); + float *dresultdt, + ustring *errormessage); bool texture3d(ustring filename, TextureHandle *texture_handle, From e58c6cf0c678849cf9c348a8df5e0ec24a6abd4d Mon Sep 17 00:00:00 2001 From: Stefan Werner Date: Sun, 28 Oct 2018 05:37:41 -0400 Subject: [PATCH 4/6] Cycles: Added Cryptomatte output. This allows for extra output passes that encode automatic object and material masks for the entire scene. It is an implementation of the Cryptomatte standard as introduced by Psyop. A good future extension would be to add a manifest to the export and to do plenty of testing to ensure that it is fully compatible with other renderers and compositing programs that use Cryptomatte. Internally, it adds the ability for Cycles to have several passes of the same type that are distinguished by their name. Differential Revision: https://developer.blender.org/D3538 --- intern/cycles/blender/addon/engine.py | 11 ++ intern/cycles/blender/addon/properties.py | 31 +++- intern/cycles/blender/addon/ui.py | 11 ++ intern/cycles/blender/blender_object.cpp | 17 +++ intern/cycles/blender/blender_session.cpp | 6 +- intern/cycles/blender/blender_sync.cpp | 46 +++++- intern/cycles/blender/blender_sync.h | 6 +- intern/cycles/device/device_cpu.cpp | 15 +- intern/cycles/kernel/CMakeLists.txt | 1 + intern/cycles/kernel/geom/geom_object.h | 18 +++ intern/cycles/kernel/kernel_globals.h | 8 + intern/cycles/kernel/kernel_id_passes.h | 94 ++++++++++++ intern/cycles/kernel/kernel_passes.h | 43 +++++- intern/cycles/kernel/kernel_shader.h | 5 + intern/cycles/kernel/kernel_types.h | 25 ++- intern/cycles/kernel/kernels/cuda/kernel.cu | 30 +++- intern/cycles/kernel/kernels/opencl/kernel.cl | 12 +- .../kernel/split/kernel_buffer_update.h | 13 ++ intern/cycles/render/CMakeLists.txt | 2 + intern/cycles/render/buffers.cpp | 21 ++- intern/cycles/render/buffers.h | 4 +- intern/cycles/render/coverage.cpp | 143 ++++++++++++++++++ intern/cycles/render/coverage.h | 49 ++++++ intern/cycles/render/film.cpp | 37 +++-- intern/cycles/render/film.h | 13 +- intern/cycles/render/object.cpp | 5 + intern/cycles/render/object.h | 1 + intern/cycles/render/shader.cpp | 4 + intern/cycles/util/CMakeLists.txt | 2 + intern/cycles/util/util_atomic.h | 28 ++++ intern/cycles/util/util_murmurhash.cpp | 125 +++++++++++++++ intern/cycles/util/util_murmurhash.h | 30 ++++ 32 files changed, 810 insertions(+), 46 deletions(-) create mode 100644 intern/cycles/kernel/kernel_id_passes.h create mode 100644 intern/cycles/render/coverage.cpp create mode 100644 intern/cycles/render/coverage.h create mode 100644 intern/cycles/util/util_murmurhash.cpp create mode 100644 intern/cycles/util/util_murmurhash.h diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index 16ec7bc314b..2cdeb97a32d 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -254,6 +254,17 @@ def register_passes(engine, scene, srl): if crl.use_pass_volume_indirect: engine.register_pass(scene, srl, "VolumeInd", 3, "RGB", 'COLOR') cscene = scene.cycles + + if crl.use_pass_crypto_object: + for i in range(0, crl.pass_crypto_depth, 2): + engine.register_pass(scene, srl, "CryptoObject" + '{:02d}'.format(i), 4, "RGBA", 'COLOR') + if crl.use_pass_crypto_material: + for i in range(0, crl.pass_crypto_depth, 2): + engine.register_pass(scene, srl, "CryptoMaterial" + '{:02d}'.format(i), 4, "RGBA", 'COLOR') + if srl.cycles.use_pass_crypto_asset: + for i in range(0, srl.cycles.pass_crypto_depth, 2): + engine.register_pass(scene, srl, "CryptoAsset" + '{:02d}'.format(i), 4, "RGBA", 'COLOR') + if crl.use_denoising: engine.register_pass(scene, srl, "Noisy Image", 3, "RGBA", 'COLOR') if crl.denoising_store_passes: diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 80b83c94012..848b76eb02f 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -1339,7 +1339,36 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup): default=False, update=update_render_passes, ) - + cls.use_pass_crypto_object = BoolProperty( + name="Cryptomatte Object", + description="Cryptomatte Object pass", + default=False, + update=update_render_passes, + ) + cls.use_pass_crypto_material = BoolProperty( + name="Cryptomatte Material", + description="Cryptomatte Material pass", + default=False, + update=update_render_passes, + ) + cls.use_pass_crypto_asset = BoolProperty( + name="Cryptomatte Asset", + description="Cryptomatte Asset pass", + default=False, + update=update_render_passes, + ) + cls.pass_crypto_depth = IntProperty( + name="Cryptomatte Levels", + description="Describes how many unique IDs per pixel are written to Cryptomatte", + default=6, min=2, max=16, step=2, + update=update_render_passes, + ) + cls.pass_crypto_accurate = BoolProperty( + name="Cryptomatte Accurate", + description="Gerenate a more accurate Cryptomatte pass, CPU only, may render slower and use more memory", + default=True, + update=update_render_passes, + ) @classmethod def unregister(cls): del bpy.types.SceneRenderLayer.cycles diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 5edbcb19672..6f11d3c313d 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -563,6 +563,17 @@ class CYCLES_RENDER_PT_layer_passes(CyclesButtonsPanel, Panel): col.prop(crl, "pass_debug_bvh_intersections") col.prop(crl, "pass_debug_ray_bounces") + crl = rl.cycles + layout.label("Cryptomatte:") + row = layout.row(align=True) + row.prop(crl, "use_pass_crypto_object", text="Object", toggle=True) + row.prop(crl, "use_pass_crypto_material", text="Material", toggle=True) + row.prop(crl, "use_pass_crypto_asset", text="Asset", toggle=True) + row = layout.row(align=True) + row.prop(crl, "pass_crypto_depth") + row = layout.row(align=True) + row.active = use_cpu(context) + row.prop(crl, "pass_crypto_accurate", text="Accurate Mode") class CYCLES_RENDER_PT_views(CyclesButtonsPanel, Panel): bl_label = "Views" diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp index 0fab9ab3531..a05c982b367 100644 --- a/intern/cycles/blender/blender_object.cpp +++ b/intern/cycles/blender/blender_object.cpp @@ -384,6 +384,23 @@ Object *BlenderSync::sync_object(BL::Object& b_parent, object_updated = true; } + /* sync the asset name for Cryptomatte */ + BL::Object parent = b_ob.parent(); + ustring parent_name; + if(parent) { + while(parent.parent()) { + parent = parent.parent(); + } + parent_name = parent.name(); + } + else { + parent_name = b_ob.name(); + } + if(object->asset_name != parent_name) { + object->asset_name = parent_name; + object_updated = true; + } + /* object sync * transform comparison should not be needed, but duplis don't work perfect * in the depsgraph and may not signal changes, so this is a workaround */ diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index a07131d04ae..e9e14a9b6c9 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -405,7 +405,7 @@ void BlenderSession::render() BL::RenderLayer b_rlay = *b_single_rlay; /* add passes */ - array passes = sync->sync_render_passes(b_rlay, *b_layer_iter, session_params); + vector passes = sync->sync_render_passes(b_rlay, *b_layer_iter, session_params); buffer_params.passes = passes; PointerRNA crl = RNA_pointer_get(&b_layer_iter->ptr, "cycles"); @@ -700,7 +700,7 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult& b_rr, bool read = false; if(pass_type != PASS_NONE) { /* copy pixels */ - read = buffers->get_pass_rect(pass_type, exposure, sample, components, &pixels[0]); + read = buffers->get_pass_rect(pass_type, exposure, sample, components, &pixels[0], b_pass.name()); } else { int denoising_offset = BlenderSync::get_denoising_pass(b_pass); @@ -719,7 +719,7 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult& b_rr, else { /* copy combined pass */ BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str())); - if(buffers->get_pass_rect(PASS_COMBINED, exposure, sample, 4, &pixels[0])) + if(buffers->get_pass_rect(PASS_COMBINED, exposure, sample, 4, &pixels[0], "Combined")) b_combined_pass.rect(&pixels[0]); } diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 8ae52beb1c1..076734d105f 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -40,6 +40,8 @@ CCL_NAMESPACE_BEGIN +static const char *cryptomatte_prefix = "Crypto"; + /* Constructor */ BlenderSync::BlenderSync(BL::RenderEngine& b_engine, @@ -517,6 +519,9 @@ PassType BlenderSync::get_pass_type(BL::RenderPass& b_pass) MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES); #endif MAP_PASS("Debug Render Time", PASS_RENDER_TIME); + if(string_startswith(name, cryptomatte_prefix)) { + return PASS_CRYPTOMATTE; + } #undef MAP_PASS return PASS_NONE; @@ -549,11 +554,11 @@ int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass) return -1; } -array BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, - BL::SceneRenderLayer& b_srlay, - const SessionParams &session_params) +vector BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, + BL::SceneRenderLayer& b_srlay, + const SessionParams &session_params) { - array passes; + vector passes; Pass::add(PASS_COMBINED, passes); if(!session_params.device.advanced_shading) { @@ -636,6 +641,39 @@ array BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, Pass::add(PASS_VOLUME_INDIRECT, passes); } + /* Cryptomatte stores two ID/weight pairs per RGBA layer. + * User facing paramter is the number of pairs. */ + int crypto_depth = min(16, get_int(crp, "pass_crypto_depth")) / 2; + scene->film->cryptomatte_depth = crypto_depth; + scene->film->cryptomatte_passes = CRYPT_NONE; + if(get_boolean(crp, "use_pass_crypto_object")) { + for(int i = 0; i < crypto_depth; ++i) { + string passname = cryptomatte_prefix + string_printf("Object%02d", i); + b_engine.add_pass(passname.c_str(), 4, "RGBA", b_srlay.name().c_str()); + Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str()); + } + scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_OBJECT); + } + if(get_boolean(crp, "use_pass_crypto_material")) { + for(int i = 0; i < crypto_depth; ++i) { + string passname = cryptomatte_prefix + string_printf("Material%02d", i); + b_engine.add_pass(passname.c_str(), 4, "RGBA", b_srlay.name().c_str()); + Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str()); + } + scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_MATERIAL); + } + if(get_boolean(crp, "use_pass_crypto_asset")) { + for(int i = 0; i < crypto_depth; ++i) { + string passname = cryptomatte_prefix + string_printf("Asset%02d", i); + b_engine.add_pass(passname.c_str(), 4, "RGBA", b_srlay.name().c_str()); + Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str()); + } + scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_ASSET); + } + if(get_boolean(crp, "pass_crypto_accurate") && scene->film->cryptomatte_passes != CRYPT_NONE) { + scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_ACCURATE); + } + return passes; } diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h index 5e63f76033d..eb84bedb118 100644 --- a/intern/cycles/blender/blender_sync.h +++ b/intern/cycles/blender/blender_sync.h @@ -66,9 +66,9 @@ public: void **python_thread_state, const char *layer = 0); void sync_render_layers(BL::SpaceView3D& b_v3d, const char *layer); - array sync_render_passes(BL::RenderLayer& b_rlay, - BL::SceneRenderLayer& b_srlay, - const SessionParams &session_params); + vector sync_render_passes(BL::RenderLayer& b_rlay, + BL::SceneRenderLayer& b_srlay, + const SessionParams &session_params); void sync_integrator(); void sync_camera(BL::RenderSettings& b_render, BL::Object& b_override, diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 7eb73dea3ef..eb816e1fdd0 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -41,6 +41,7 @@ #include "kernel/osl/osl_globals.h" #include "render/buffers.h" +#include "render/coverage.h" #include "util/util_debug.h" #include "util/util_foreach.h" @@ -677,8 +678,15 @@ public: void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg) { + const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE; + scoped_timer timer(&tile.buffers->render_time); + Coverage coverage(kg, tile); + if(use_coverage) { + coverage.init_path_trace(); + } + float *render_buffer = (float*)tile.buffer; int start_sample = tile.start_sample; int end_sample = tile.start_sample + tile.num_samples; @@ -691,6 +699,9 @@ public: for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { + if(use_coverage) { + coverage.init_pixel(x, y); + } path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride); } @@ -700,6 +711,9 @@ public: task.update_progress(&tile, tile.w*tile.h); } + if(use_coverage) { + coverage.finalize(); + } } void denoise(DenoisingTask& denoising, RenderTile &tile) @@ -760,7 +774,6 @@ public: } else if(tile.task == RenderTile::DENOISE) { denoise(denoising, tile); - task.update_progress(&tile, tile.w*tile.h); } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index b48ed649a8c..08efede36df 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -96,6 +96,7 @@ set(SRC_HEADERS kernel_emission.h kernel_film.h kernel_globals.h + kernel_id_passes.h kernel_jitter.h kernel_light.h kernel_math.h diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index cfe17e63627..0eb8ce2cf8b 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -304,6 +304,24 @@ ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd) return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id; } +/* Cryptomatte ID */ + +ccl_device_inline float object_cryptomatte_id(KernelGlobals *kg, int object) +{ + if(object == OBJECT_NONE) + return 0.0f; + + return kernel_tex_fetch(__objects, object).cryptomatte_object; +} + +ccl_device_inline float object_cryptomatte_asset_id(KernelGlobals *kg, int object) +{ + if(object == OBJECT_NONE) + return 0; + + return kernel_tex_fetch(__objects, object).cryptomatte_asset; +} + /* Particle data from which object was instanced */ ccl_device_inline uint particle_index(KernelGlobals *kg, int particle) diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index 74cfacb5bc1..37402f42863 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -21,6 +21,7 @@ #ifdef __KERNEL_CPU__ # include "util/util_vector.h" +# include "util/util_map.h" #endif #ifdef __KERNEL_OPENCL__ @@ -42,6 +43,8 @@ struct OSLThreadData; struct OSLShadingSystem; # endif +typedef unordered_map CoverageMap; + struct Intersection; struct VolumeStep; @@ -68,6 +71,11 @@ typedef struct KernelGlobals { VolumeStep *decoupled_volume_steps[2]; int decoupled_volume_steps_index; + /* A buffer for storing per-pixel coverage for Cryptomatte. */ + CoverageMap *coverage_object; + CoverageMap *coverage_material; + CoverageMap *coverage_asset; + /* split kernel */ SplitData split_data; SplitParams split_param_data; diff --git a/intern/cycles/kernel/kernel_id_passes.h b/intern/cycles/kernel/kernel_id_passes.h new file mode 100644 index 00000000000..486c61d2ae5 --- /dev/null +++ b/intern/cycles/kernel/kernel_id_passes.h @@ -0,0 +1,94 @@ +/* +* Copyright 2018 Blender Foundation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +CCL_NAMESPACE_BEGIN + +ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, int num_slots, float id, float weight) +{ + kernel_assert(id != ID_NONE); + if(weight == 0.0f) { + return; + } + + for(int slot = 0; slot < num_slots; slot++) { + ccl_global float2 *id_buffer = (ccl_global float2*)buffer; +#ifdef __ATOMIC_PASS_WRITE__ + /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ + if(id_buffer[slot].x == ID_NONE) { + /* Use an atomic to claim this slot. + * If a different thread got here first, try again from this slot on. */ + float old_id = atomic_compare_and_swap_float(buffer+slot*2, ID_NONE, id); + if(old_id != ID_NONE && old_id != id) { + continue; + } + atomic_add_and_fetch_float(buffer+slot*2+1, weight); + break; + } + /* If there already is a slot for that ID, add the weight. + * If no slot was found, add it to the last. */ + else if(id_buffer[slot].x == id || slot == num_slots - 1) { + atomic_add_and_fetch_float(buffer+slot*2+1, weight); + break; + } +#else /* __ATOMIC_PASS_WRITE__ */ + /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ + if(id_buffer[slot].x == ID_NONE) { + id_buffer[slot].x = id; + id_buffer[slot].y = weight; + break; + } + /* If there already is a slot for that ID, add the weight. + * If no slot was found, add it to the last. */ + else if(id_buffer[slot].x == id || slot == num_slots - 1) { + id_buffer[slot].y += weight; + break; + } +#endif /* __ATOMIC_PASS_WRITE__ */ + } +} + +ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots) +{ + ccl_global float2 *id_buffer = (ccl_global float2*)buffer; + for(int slot = 1; slot < num_slots; ++slot) { + if(id_buffer[slot].x == ID_NONE) { + return; + } + /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */ + int i = slot; + while(i > 0 && id_buffer[i].y > id_buffer[i - 1].y) { + float2 swap = id_buffer[i]; + id_buffer[i] = id_buffer[i - 1]; + id_buffer[i - 1] = swap; + --i; + } + } +} + +#ifdef __KERNEL_GPU__ +/* post-sorting for Cryptomatte */ +ccl_device void kernel_cryptomatte_post(KernelGlobals *kg, ccl_global float *buffer, uint sample, int x, int y, int offset, int stride) +{ + if(sample - 1 == kernel_data.integrator.aa_samples) { + int index = offset + x + y * stride; + int pass_stride = kernel_data.film.pass_stride; + ccl_global float *cryptomatte_buffer = buffer + index * pass_stride + kernel_data.film.pass_cryptomatte; + kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); + } +} +#endif + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index 458aa6c2a97..e256a1819ed 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -14,12 +14,14 @@ * limitations under the License. */ -CCL_NAMESPACE_BEGIN - #if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__) #define __ATOMIC_PASS_WRITE__ #endif +#include "kernel/kernel_id_passes.h" + +CCL_NAMESPACE_BEGIN + ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value) { ccl_global float *buf = buffer; @@ -189,6 +191,23 @@ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg, } #endif /* __KERNEL_DEBUG__ */ +#ifdef __KERNEL_CPU__ +#define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) kernel_write_id_pass_cpu(buffer, depth * 2, id, matte_weight, kg->coverage_##name) +ccl_device_inline size_t kernel_write_id_pass_cpu(float *buffer, size_t depth, float id, float matte_weight, CoverageMap *map) +{ + if(map) { + (*map)[id] += matte_weight; + return 0; + } +#else /* __KERNEL_CPU__ */ +#define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) kernel_write_id_slots_gpu(buffer, depth * 2, id, matte_weight) +ccl_device_inline size_t kernel_write_id_slots_gpu(ccl_global float *buffer, size_t depth, float id, float matte_weight) +{ +#endif /* __KERNEL_CPU__ */ + kernel_write_id_slots(buffer, depth, id, matte_weight); + return depth * 2; +} + ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L, ShaderData *sd, ccl_addr_space PathState *state, float3 throughput) { @@ -242,6 +261,26 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl } } + if(kernel_data.film.cryptomatte_passes) { + const float matte_weight = average(throughput) * (1.0f - average(shader_bsdf_transparency(kg, sd))); + if(matte_weight > 0.0f) { + ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; + if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { + float id = object_cryptomatte_id(kg, sd->object); + cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, object); + } + if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { + float id = shader_cryptomatte_id(kg, sd->shader); + cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, material); + } + if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { + float id = object_cryptomatte_asset_id(kg, sd->object); + cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, asset); + } + } + } + + if(light_flag & PASSMASK_COMPONENT(DIFFUSE)) L->color_diffuse += shader_bsdf_diffuse(kg, sd)*throughput; if(light_flag & PASSMASK_COMPONENT(GLOSSY)) diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index e834b701f96..af883aa715b 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -1276,4 +1276,9 @@ ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect } #endif /* __TRANSPARENT_SHADOWS__ */ +ccl_device float shader_cryptomatte_id(KernelGlobals *kg, int shader) +{ + return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index e93100a6442..f46b06f87f9 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -53,6 +53,7 @@ CCL_NAMESPACE_BEGIN #define OBJECT_NONE (~0) #define PRIM_NONE (~0) #define LAMP_NONE (~0) +#define ID_NONE (0.0f) #define VOLUME_STACK_SIZE 32 @@ -415,6 +416,7 @@ typedef enum PassType { PASS_RAY_BOUNCES, #endif PASS_RENDER_TIME, + PASS_CRYPTOMATTE, PASS_CATEGORY_MAIN_END = 31, PASS_MIST = 32, @@ -443,6 +445,14 @@ typedef enum PassType { #define PASS_ANY (~0) +typedef enum CryptomatteType { + CRYPT_NONE = 0, + CRYPT_OBJECT = (1 << 0), + CRYPT_MATERIAL = (1 << 1), + CRYPT_ASSET = (1 << 2), + CRYPT_ACCURATE = (1 << 3), +} CryptomatteType; + typedef enum DenoisingPassOffsets { DENOISING_PASS_NORMAL = 0, DENOISING_PASS_NORMAL_VAR = 3, @@ -1260,17 +1270,20 @@ typedef struct KernelFilm { int pass_shadow; float pass_shadow_scale; int filter_table_offset; + int cryptomatte_passes; + int cryptomatte_depth; + int pass_cryptomatte; int pass_mist; float mist_start; float mist_inv_depth; float mist_falloff; - + int pass_denoising_data; int pass_denoising_clean; int denoising_flags; - int pad1, pad2, pad3; + int pad1, pad2; /* XYZ to rendering color space transform. float4 instead of float3 to * ensure consistent padding/alignment across devices. */ @@ -1460,7 +1473,11 @@ typedef struct KernelObject { uint patch_map_offset; uint attribute_map_offset; uint motion_offset; - uint pad; + uint pad1; + + float cryptomatte_object; + float cryptomatte_asset; + float pad2, pad3; } KernelObject; static_assert_align(KernelObject, 16); @@ -1540,7 +1557,7 @@ static_assert_align(KernelParticle, 16); typedef struct KernelShader { float constant_emission[3]; - float pad1; + float cryptomatte_id; int flags; int pass_id; int pad2, pad3; diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu index 8a180a509e8..af311027f78 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel.cu @@ -40,14 +40,21 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_path_trace(WorkTile *tile, uint total_work_size) { int work_index = ccl_global_id(0); - - if(work_index < total_work_size) { - uint x, y, sample; + bool thread_is_active = work_index < total_work_size; + uint x, y, sample; + KernelGlobals kg; + if(thread_is_active) { get_work_pixel(tile, work_index, &x, &y, &sample); - KernelGlobals kg; kernel_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); } + + if(kernel_data.film.cryptomatte_passes) { + __syncthreads(); + if(thread_is_active) { + kernel_cryptomatte_post(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); + } + } } #ifdef __BRANCHED_PATH__ @@ -56,14 +63,21 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS) kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size) { int work_index = ccl_global_id(0); - - if(work_index < total_work_size) { - uint x, y, sample; + bool thread_is_active = work_index < total_work_size; + uint x, y, sample; + KernelGlobals kg; + if(thread_is_active) { get_work_pixel(tile, work_index, &x, &y, &sample); - KernelGlobals kg; kernel_branched_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); } + + if(kernel_data.film.cryptomatte_passes) { + __syncthreads(); + if(thread_is_active) { + kernel_cryptomatte_post(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); + } + } } #endif diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index 63128d0aecf..de1f5088629 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -66,9 +66,17 @@ __kernel void kernel_ocl_path_trace( int x = sx + ccl_global_id(0); int y = sy + ccl_global_id(1); - - if(x < sx + sw && y < sy + sh) + bool thread_is_active = x < sx + sw && y < sy + sh; + if(thread_is_active) { kernel_path_trace(kg, buffer, sample, x, y, offset, stride); + } + if(kernel_data.film.cryptomatte_passes) { + /* Make sure no thread is writing to the buffers. */ + ccl_barrier(CCL_LOCAL_MEM_FENCE); + if(thread_is_active) { + kernel_cryptomatte_post(kg, buffer, sample, x, y, offset, stride); + } + } } #else /* __COMPILE_ONLY_MEGAKERNEL__ */ diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index 180c0b57077..18eec6372f1 100644 --- a/intern/cycles/kernel/split/kernel_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -80,8 +80,10 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + bool ray_was_updated = false; if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { + ray_was_updated = true; uint sample = state->sample; uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; @@ -92,6 +94,17 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); } + if(kernel_data.film.cryptomatte_passes) { + /* Make sure no thread is writing to the buffers. */ + ccl_barrier(CCL_LOCAL_MEM_FENCE); + if(ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) { + uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; + ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; + kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); + } + } + if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { /* We have completed current work; So get next work */ ccl_global uint *work_pools = kernel_split_params.work_pools; diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt index 7d2220f37f9..c0ce7368771 100644 --- a/intern/cycles/render/CMakeLists.txt +++ b/intern/cycles/render/CMakeLists.txt @@ -15,6 +15,7 @@ set(SRC buffers.cpp camera.cpp constant_fold.cpp + coverage.cpp film.cpp graph.cpp image.cpp @@ -46,6 +47,7 @@ set(SRC_HEADERS buffers.h camera.h constant_fold.h + coverage.h film.h graph.h image.h diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index e6021f4b37d..dd20efb3dde 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -233,7 +233,7 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp return true; } -bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels) +bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels, const string &name) { if(buffer.data() == NULL) { return false; @@ -249,6 +249,14 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int continue; } + /* Tell Cryptomatte passes apart by their name. */ + if(pass.type == PASS_CRYPTOMATTE) { + if(pass.name != name) { + pass_offset += pass.components; + continue; + } + } + float *in = buffer.data() + pass_offset; int pass_stride = params.get_passes_size(); @@ -385,6 +393,17 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int pixels[3] = f.w*invw; } } + else if(type == PASS_CRYPTOMATTE) { + for(int i = 0; i < size; i++, in += pass_stride, pixels += 4) { + float4 f = make_float4(in[0], in[1], in[2], in[3]); + /* x and z contain integer IDs, don't rescale them. + y and w contain matte weights, they get scaled. */ + pixels[0] = f.x; + pixels[1] = f.y * scale; + pixels[2] = f.z; + pixels[3] = f.w * scale; + } + } else { for(int i = 0; i < size; i++, in += pass_stride, pixels += 4) { float4 f = make_float4(in[0], in[1], in[2], in[3]); diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 1b06ffe33a6..a8f019dddd6 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -50,7 +50,7 @@ public: int full_height; /* passes */ - array passes; + vector passes; bool denoising_data_pass; /* If only some light path types should be denoised, an additional pass is needed. */ bool denoising_clean_pass; @@ -84,7 +84,7 @@ public: void zero(); bool copy_from_device(); - bool get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels); + bool get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels, const string &name); bool get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels); }; diff --git a/intern/cycles/render/coverage.cpp b/intern/cycles/render/coverage.cpp new file mode 100644 index 00000000000..72ef4cda3ff --- /dev/null +++ b/intern/cycles/render/coverage.cpp @@ -0,0 +1,143 @@ +/* + * Copyright 2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "render/coverage.h" +#include "kernel/kernel_compat_cpu.h" +#include "kernel/split/kernel_split_data.h" +#include "kernel/kernel_globals.h" +#include "kernel/kernel_id_passes.h" +#include "kernel/kernel_types.h" +#include "util/util_map.h" +#include "util/util_vector.h" + +CCL_NAMESPACE_BEGIN + +static bool crypomatte_comp(const pair& i, const pair j) { return i.first > j.first; } + +void Coverage::finalize() +{ + int pass_offset = 0; + if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { + finalize_buffer(coverage_object, pass_offset); + pass_offset += kernel_data.film.cryptomatte_depth * 4; + } + if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { + finalize_buffer(coverage_material, pass_offset); + pass_offset += kernel_data.film.cryptomatte_depth * 4; + } + if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { + finalize_buffer(coverage_asset, pass_offset); + } +} + +void Coverage::init_path_trace() +{ + kg->coverage_object = kg->coverage_material = kg->coverage_asset = NULL; + + if(kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) { + if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { + coverage_object.clear(); + coverage_object.resize(tile.w * tile.h); + } + if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { + coverage_material.clear(); + coverage_material.resize(tile.w * tile.h); + } + if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { + coverage_asset.clear(); + coverage_asset.resize(tile.w * tile.h); + } + } +} + +void Coverage::init_pixel(int x, int y) +{ + if(kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) { + const int pixel_index = tile.w * (y - tile.y) + x - tile.x; + if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { + kg->coverage_object = &coverage_object[pixel_index]; + } + if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { + kg->coverage_material = &coverage_material[pixel_index]; + } + if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { + kg->coverage_asset = &coverage_asset[pixel_index]; + } + } +} + +void Coverage::finalize_buffer(vector & coverage, const int pass_offset) +{ + if(kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) { + flatten_buffer(coverage, pass_offset); + } + else { + sort_buffer(pass_offset); + } +} + +void Coverage::flatten_buffer(vector &coverage, const int pass_offset) +{ + /* Sort the coverage map and write it to the output */ + int pixel_index = 0; + int pass_stride = tile.buffers->params.get_passes_size(); + for(int y = 0; y < tile.h; ++y) { + for(int x = 0; x < tile.w; ++x) { + const CoverageMap& pixel = coverage[pixel_index]; + if(!pixel.empty()) { + /* buffer offset */ + int index = x + y * tile.stride; + float *buffer = (float*)tile.buffer + index*pass_stride; + + /* sort the cryptomatte pixel */ + vector > sorted_pixel; + for(CoverageMap::const_iterator it = pixel.begin(); it != pixel.end(); ++it) { + sorted_pixel.push_back(std::make_pair(it->second, it->first)); + } + sort(sorted_pixel.begin(), sorted_pixel.end(), crypomatte_comp); + int num_slots = 2 * (kernel_data.film.cryptomatte_depth); + if(sorted_pixel.size() > num_slots) { + float leftover = 0.0f; + for(vector >::iterator it = sorted_pixel.begin()+num_slots; it != sorted_pixel.end(); ++it) { + leftover += it->first; + } + sorted_pixel[num_slots-1].first += leftover; + } + int limit = min(num_slots, sorted_pixel.size()); + for(int i = 0; i < limit; ++i) { + kernel_write_id_slots(buffer + kernel_data.film.pass_cryptomatte + pass_offset, 2 * (kernel_data.film.cryptomatte_depth), sorted_pixel[i].second, sorted_pixel[i].first); + } + } + ++pixel_index; + } + } +} + +void Coverage::sort_buffer(const int pass_offset) +{ + /* Sort the coverage map and write it to the output */ + int pass_stride = tile.buffers->params.get_passes_size(); + for(int y = 0; y < tile.h; ++y) { + for(int x = 0; x < tile.w; ++x) { + /* buffer offset */ + int index = x + y*tile.stride; + float *buffer = (float*)tile.buffer + index*pass_stride; + kernel_sort_id_slots(buffer + kernel_data.film.pass_cryptomatte + pass_offset, 2 * (kernel_data.film.cryptomatte_depth)); + } + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/render/coverage.h b/intern/cycles/render/coverage.h new file mode 100644 index 00000000000..16176ce4beb --- /dev/null +++ b/intern/cycles/render/coverage.h @@ -0,0 +1,49 @@ +/* + * Copyright 2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "render/buffers.h" +#include "kernel/kernel_compat_cpu.h" +#include "kernel/split/kernel_split_data.h" +#include "kernel/kernel_globals.h" +#include "util/util_map.h" +#include "util/util_vector.h" + +#ifndef __COVERAGE_H__ +#define __COVERAGE_H__ + +CCL_NAMESPACE_BEGIN + +class Coverage { +public: + Coverage(KernelGlobals *kg_, RenderTile &tile_) : kg(kg_), tile(tile_) { } + void init_path_trace(); + void init_pixel(int x, int y); + void finalize(); +private: + vectorcoverage_object; + vectorcoverage_material; + vectorcoverage_asset; + KernelGlobals *kg; + RenderTile &tile; + void finalize_buffer(vector&coverage, const int pass_offset); + void flatten_buffer(vector&coverage, const int pass_offset); + void sort_buffer(const int pass_offset); +}; + + +CCL_NAMESPACE_END + +#endif /* __COVERAGE_H__ */ diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp index 8f3596ade58..d0f15496e50 100644 --- a/intern/cycles/render/film.cpp +++ b/intern/cycles/render/film.cpp @@ -38,11 +38,14 @@ static bool compare_pass_order(const Pass& a, const Pass& b) return (a.components > b.components); } -void Pass::add(PassType type, array& passes) +void Pass::add(PassType type, vector& passes, const char *name) { - for(size_t i = 0; i < passes.size(); i++) - if(passes[i].type == type) + for(size_t i = 0; i < passes.size(); i++) { + if(passes[i].type == type && + (name ? (passes[i].name == name) : passes[i].name.empty())) { return; + } + } Pass pass; @@ -50,6 +53,9 @@ void Pass::add(PassType type, array& passes) pass.filter = true; pass.exposure = false; pass.divide_type = PASS_NONE; + if(name) { + pass.name = name; + } switch(type) { case PASS_NONE: @@ -155,13 +161,15 @@ void Pass::add(PassType type, array& passes) pass.components = 4; pass.exposure = true; break; - + case PASS_CRYPTOMATTE: + pass.components = 4; + break; default: assert(false); break; } - passes.push_back_slow(pass); + passes.push_back(pass); /* order from by components, to ensure alignment so passes with size 4 * come first and then passes with size 1 */ @@ -171,19 +179,19 @@ void Pass::add(PassType type, array& passes) Pass::add(pass.divide_type, passes); } -bool Pass::equals(const array& A, const array& B) +bool Pass::equals(const vector& A, const vector& B) { if(A.size() != B.size()) return false; for(int i = 0; i < A.size(); i++) - if(A[i].type != B[i].type) + if(A[i].type != B[i].type || A[i].name != B[i].name) return false; return true; } -bool Pass::contains(const array& passes, PassType type) +bool Pass::contains(const vector& passes, PassType type) { for(size_t i = 0; i < passes.size(); i++) if(passes[i].type == type) @@ -290,6 +298,7 @@ Film::Film() use_light_visibility = false; filter_table_offset = TABLE_OFFSET_INVALID; + cryptomatte_passes = CRYPT_NONE; need_update = true; } @@ -314,6 +323,8 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->pass_stride = 0; kfilm->use_light_pass = use_light_visibility || use_sample_clamp; + bool have_cryptomatte = false; + for(size_t i = 0; i < passes.size(); i++) { Pass& pass = passes[i]; @@ -434,7 +445,10 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) #endif case PASS_RENDER_TIME: break; - + case PASS_CRYPTOMATTE: + kfilm->pass_cryptomatte = have_cryptomatte ? min(kfilm->pass_cryptomatte, kfilm->pass_stride) : kfilm->pass_stride; + have_cryptomatte = true; + break; default: assert(false); break; @@ -471,6 +485,9 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->mist_inv_depth = (mist_depth > 0.0f)? 1.0f/mist_depth: 0.0f; kfilm->mist_falloff = mist_falloff; + kfilm->cryptomatte_passes = cryptomatte_passes; + kfilm->cryptomatte_depth = cryptomatte_depth; + pass_stride = kfilm->pass_stride; denoising_data_offset = kfilm->pass_denoising_data; denoising_clean_offset = kfilm->pass_denoising_clean; @@ -490,7 +507,7 @@ bool Film::modified(const Film& film) return !Node::equals(film) || !Pass::equals(passes, film.passes); } -void Film::tag_passes_update(Scene *scene, const array& passes_) +void Film::tag_passes_update(Scene *scene, const vector& passes_) { if(Pass::contains(passes, PASS_UV) != Pass::contains(passes_, PASS_UV)) { scene->mesh_manager->tag_update(scene); diff --git a/intern/cycles/render/film.h b/intern/cycles/render/film.h index 6ab2eea79b8..57f1bf4eb64 100644 --- a/intern/cycles/render/film.h +++ b/intern/cycles/render/film.h @@ -45,10 +45,11 @@ public: bool filter; bool exposure; PassType divide_type; + string name; - static void add(PassType type, array& passes); - static bool equals(const array& A, const array& B); - static bool contains(const array& passes, PassType); + static void add(PassType type, vector& passes, const char* name = NULL); + static bool equals(const vector& A, const vector& B); + static bool contains(const vector& passes, PassType); }; class Film : public Node { @@ -56,7 +57,7 @@ public: NODE_DECLARE float exposure; - array passes; + vector passes; bool denoising_data_pass; bool denoising_clean_pass; int denoising_flags; @@ -76,6 +77,8 @@ public: bool use_light_visibility; bool use_sample_clamp; + CryptomatteType cryptomatte_passes; + int cryptomatte_depth; bool need_update; @@ -86,7 +89,7 @@ public: void device_free(Device *device, DeviceScene *dscene, Scene *scene); bool modified(const Film& film); - void tag_passes_update(Scene *scene, const array& passes_); + void tag_passes_update(Scene *scene, const vector& passes_); void tag_update(Scene *scene); }; diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index e3f35c366d6..a56a8a6ec58 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -28,6 +28,7 @@ #include "util/util_map.h" #include "util/util_progress.h" #include "util/util_vector.h" +#include "util/util_murmurhash.h" #include "subd/subd_patch_table.h" @@ -483,6 +484,10 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s kobject.numverts = mesh->verts.size(); kobject.patch_map_offset = 0; kobject.attribute_map_offset = 0; + uint32_t hash_name = util_murmur_hash3(ob->name.c_str(), ob->name.length(), 0); + uint32_t hash_asset = util_murmur_hash3(ob->asset_name.c_str(), ob->asset_name.length(), 0); + kobject.cryptomatte_object = util_hash_to_float(hash_name); + kobject.cryptomatte_asset = util_hash_to_float(hash_asset); /* Object flag. */ if(ob->use_holdout) { diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h index b80c4aef70b..bd44b35aba3 100644 --- a/intern/cycles/render/object.h +++ b/intern/cycles/render/object.h @@ -48,6 +48,7 @@ public: BoundBox bounds; uint random_id; int pass_id; + ustring asset_name; vector attributes; uint visibility; array motion; diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index ac605305b94..8d0cec7b14e 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -30,6 +30,7 @@ #include "render/tables.h" #include "util/util_foreach.h" +#include "util/util_murmurhash.h" #ifdef WITH_OCIO # include @@ -523,12 +524,15 @@ void ShaderManager::device_update_common(Device *device, if(shader->is_constant_emission(&constant_emission)) flag |= SD_HAS_CONSTANT_EMISSION; + uint32_t cryptomatte_id = util_murmur_hash3(shader->name.c_str(), shader->name.length(), 0); + /* regular shader */ kshader->flags = flag; kshader->pass_id = shader->pass_id; kshader->constant_emission[0] = constant_emission.x; kshader->constant_emission[1] = constant_emission.y; kshader->constant_emission[2] = constant_emission.z; + kshader->cryptomatte_id = util_hash_to_float(cryptomatte_id); kshader++; has_transparent_shadow |= (flag & SD_HAS_TRANSPARENT_SHADOW) != 0; diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 291f9a9fcae..4f623c5dfb7 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -15,6 +15,7 @@ set(SRC util_logging.cpp util_math_cdf.cpp util_md5.cpp + util_murmurhash.cpp util_path.cpp util_string.cpp util_simd.cpp @@ -64,6 +65,7 @@ set(SRC_HEADERS util_math_int4.h util_math_matrix.h util_md5.h + util_murmurhash.h util_opengl.h util_optimization.h util_param.h diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h index f3c7ae546a0..e17e99d0acd 100644 --- a/intern/cycles/util/util_atomic.h +++ b/intern/cycles/util/util_atomic.h @@ -23,6 +23,7 @@ #include "atomic_ops.h" #define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x)) +#define atomic_compare_and_swap_float(p, old_val, new_val) atomic_cas_float((p), (old_val), (new_val)) #define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) #define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1) @@ -57,6 +58,20 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *so return new_value.float_value; } +ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float *dest, + const float old_val, const float new_val) +{ + union { + unsigned int int_value; + float float_value; + } new_value, prev_value, result; + prev_value.float_value = old_val; + new_value.float_value = new_val; + result.int_value = atomic_cmpxchg((volatile ccl_global unsigned int *)dest, + prev_value.int_value, new_value.int_value); + return result.float_value; +} + #define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x)) #define atomic_fetch_and_inc_uint32(p) atomic_inc((p)) #define atomic_fetch_and_dec_uint32(p) atomic_dec((p)) @@ -75,6 +90,19 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *so #define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) #define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1) +ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest, + const float old_val, const float new_val) +{ + union { + unsigned int int_value; + float float_value; + } new_value, prev_value, result; + prev_value.float_value = old_val; + new_value.float_value = new_val; + result.int_value = atomicCAS((unsigned int *)dest, prev_value.int_value,new_value.int_value); + return result.float_value; +} + #define CCL_LOCAL_MEM_FENCE #define ccl_barrier(flags) __syncthreads() diff --git a/intern/cycles/util/util_murmurhash.cpp b/intern/cycles/util/util_murmurhash.cpp new file mode 100644 index 00000000000..c1f81e61b72 --- /dev/null +++ b/intern/cycles/util/util_murmurhash.cpp @@ -0,0 +1,125 @@ +/* + * Copyright 2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is taken from alShaders/Cryptomatte/MurmurHash3.h: + * + * MurmurHash3 was written by Austin Appleby, and is placed in the public + * domain. The author hereby disclaims copyright to this source code. + * + */ + +#include "util/util_algorithm.h" +#include "util/util_murmurhash.h" + +#if defined(_MSC_VER) +# include +# define ROTL32(x,y) _rotl(x,y) +# define ROTL64(x,y) _rotl64(x,y) +# define BIG_CONSTANT(x) (x) +#else +ccl_device_inline uint32_t rotl32(uint32_t x, int8_t r) +{ + return (x << r) | (x >> (32 - r)); +} +# define ROTL32(x,y) rotl32(x,y) +# define BIG_CONSTANT(x) (x##LLU) +#endif + +CCL_NAMESPACE_BEGIN + +/* Block read - if your platform needs to do endian-swapping or can only + * handle aligned reads, do the conversion here. */ +ccl_device_inline uint32_t mm_hash_getblock32(const uint32_t *p, int i) +{ + return p[i]; +} + +/* Finalization mix - force all bits of a hash block to avalanche */ +ccl_device_inline uint32_t mm_hash_fmix32 ( uint32_t h ) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} + +uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); + + for(int i = -nblocks; i; i++) { + uint32_t k1 = mm_hash_getblock32(blocks,i); + + k1 *= c1; + k1 = ROTL32(k1,15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1,13); + h1 = h1 * 5 + 0xe6546b64; + } + + const uint8_t *tail = (const uint8_t*)(data + nblocks*4); + + uint32_t k1 = 0; + + switch(len & 3) { + case 3: + k1 ^= tail[2] << 16; + ATTR_FALLTHROUGH; + case 2: + k1 ^= tail[1] << 8; + ATTR_FALLTHROUGH; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = ROTL32(k1,15); + k1 *= c2; + h1 ^= k1; + } + + h1 ^= len; + h1 = mm_hash_fmix32(h1); + return h1; +} + +/* This is taken from the cryptomatte specification 1.0 */ +float util_hash_to_float(uint32_t hash) +{ + uint32_t mantissa = hash & (( 1 << 23) - 1); + uint32_t exponent = (hash >> 23) & ((1 << 8) - 1); + exponent = max(exponent, (uint32_t) 1); + exponent = min(exponent, (uint32_t) 254); + exponent = exponent << 23; + uint32_t sign = (hash >> 31); + sign = sign << 31; + uint32_t float_bits = sign | exponent | mantissa; + float f; + memcpy(&f, &float_bits, sizeof(uint32_t)); + return f; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_murmurhash.h b/intern/cycles/util/util_murmurhash.h new file mode 100644 index 00000000000..824ed59cb16 --- /dev/null +++ b/intern/cycles/util/util_murmurhash.h @@ -0,0 +1,30 @@ +/* + * Copyright 2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef __UTIL_MURMURHASH_H__ +#define __UTIL_MURMURHASH_H__ + +#include "util/util_types.h" + +CCL_NAMESPACE_BEGIN + +uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed); +float util_hash_to_float(uint32_t hash); + +CCL_NAMESPACE_END + +#endif /* __UTIL_MURMURHASH_H__ */ From 47953dee79202e1074ddfd0f85a2f5fa6a73d3dd Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sun, 28 Oct 2018 14:53:08 +0100 Subject: [PATCH 5/6] Fix Linux build after Cryptomatte commit. --- build_files/cmake/macros.cmake | 2 +- intern/cycles/util/util_murmurhash.cpp | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/build_files/cmake/macros.cmake b/build_files/cmake/macros.cmake index e2af3667f2a..acbbdef6dd4 100644 --- a/build_files/cmake/macros.cmake +++ b/build_files/cmake/macros.cmake @@ -666,10 +666,10 @@ function(SETUP_BLENDER_SORTED_LIBS) bf_intern_mikktspace bf_intern_dualcon bf_intern_cycles + cycles_device cycles_render cycles_graph cycles_bvh - cycles_device cycles_kernel cycles_util cycles_subd diff --git a/intern/cycles/util/util_murmurhash.cpp b/intern/cycles/util/util_murmurhash.cpp index c1f81e61b72..68b2f2031be 100644 --- a/intern/cycles/util/util_murmurhash.cpp +++ b/intern/cycles/util/util_murmurhash.cpp @@ -21,20 +21,22 @@ * */ +#include +#include + #include "util/util_algorithm.h" #include "util/util_murmurhash.h" #if defined(_MSC_VER) -# include -# define ROTL32(x,y) _rotl(x,y) -# define ROTL64(x,y) _rotl64(x,y) +# define ROTL32(x,y) _rotl(x,y) +# define ROTL64(x,y) _rotl64(x,y) # define BIG_CONSTANT(x) (x) #else ccl_device_inline uint32_t rotl32(uint32_t x, int8_t r) { return (x << r) | (x >> (32 - r)); } -# define ROTL32(x,y) rotl32(x,y) +# define ROTL32(x,y) rotl32(x,y) # define BIG_CONSTANT(x) (x##LLU) #endif From e3d2df038001af8c67707c20a27e21571b1fdff6 Mon Sep 17 00:00:00 2001 From: Roel Koster Date: Sun, 28 Oct 2018 13:03:50 +0100 Subject: [PATCH 6/6] Fix snaps appearing in system bookmarks on Linux. Differential Revision: https://developer.blender.org/D3838 --- source/blender/editors/space_file/fsmenu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/blender/editors/space_file/fsmenu.c b/source/blender/editors/space_file/fsmenu.c index 38b8dd37d46..f925409d882 100644 --- a/source/blender/editors/space_file/fsmenu.c +++ b/source/blender/editors/space_file/fsmenu.c @@ -605,6 +605,8 @@ void fsmenu_read_system(struct FSMenu *fsmenu, int read_bookmarks) /* not sure if this is right, but seems to give the relevant mnts */ if (!STREQLEN(mnt->mnt_fsname, "/dev", 4)) continue; + if (STREQLEN(mnt->mnt_fsname, "/dev/loop", 9)) + continue; len = strlen(mnt->mnt_dir); if (len && mnt->mnt_dir[len - 1] != '/') {