diff --git a/build_files/cmake/macros.cmake b/build_files/cmake/macros.cmake index 9d679dc9b8f..16a62ee4f61 100644 --- a/build_files/cmake/macros.cmake +++ b/build_files/cmake/macros.cmake @@ -672,10 +672,10 @@ function(SETUP_BLENDER_SORTED_LIBS) bf_intern_mikktspace bf_intern_dualcon bf_intern_cycles + cycles_device cycles_render cycles_graph cycles_bvh - cycles_device cycles_kernel cycles_util cycles_subd diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index 1604422211b..25708a0f888 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -257,21 +257,32 @@ def register_passes(engine, scene, srl): if crl.use_pass_volume_indirect: engine.register_pass(scene, srl, "VolumeInd", 3, "RGB", 'COLOR') cscene = scene.cycles - if crl.use_denoising and crl.denoising_store_passes and not cscene.use_progressive_refine: - engine.register_pass(scene, srl, "Denoising Normal", 3, "XYZ", 'VECTOR') - engine.register_pass(scene, srl, "Denoising Normal Variance", 3, "XYZ", 'VECTOR') - engine.register_pass(scene, srl, "Denoising Albedo", 3, "RGB", 'COLOR') - engine.register_pass(scene, srl, "Denoising Albedo Variance", 3, "RGB", 'COLOR') - engine.register_pass(scene, srl, "Denoising Depth", 1, "Z", 'VALUE') - engine.register_pass(scene, srl, "Denoising Depth Variance", 1, "Z", 'VALUE') - engine.register_pass(scene, srl, "Denoising Shadow A", 3, "XYV", 'VECTOR') - engine.register_pass(scene, srl, "Denoising Shadow B", 3, "XYV", 'VECTOR') - engine.register_pass(scene, srl, "Denoising Image", 3, "RGB", 'COLOR') - engine.register_pass(scene, srl, "Denoising Image Variance", 3, "RGB", 'COLOR') - clean_options = ("denoising_diffuse_direct", "denoising_diffuse_indirect", - "denoising_glossy_direct", "denoising_glossy_indirect", - "denoising_transmission_direct", "denoising_transmission_indirect", - "denoising_subsurface_direct", "denoising_subsurface_indirect") - if any(getattr(crl, option) for option in clean_options): - engine.register_pass(scene, srl, "Denoising Clean", 3, "RGB", 'COLOR') + if crl.use_pass_crypto_object: + for i in range(0, crl.pass_crypto_depth, 2): + engine.register_pass(scene, srl, "CryptoObject" + '{:02d}'.format(i), 4, "RGBA", 'COLOR') + if crl.use_pass_crypto_material: + for i in range(0, crl.pass_crypto_depth, 2): + engine.register_pass(scene, srl, "CryptoMaterial" + '{:02d}'.format(i), 4, "RGBA", 'COLOR') + if srl.cycles.use_pass_crypto_asset: + for i in range(0, srl.cycles.pass_crypto_depth, 2): + engine.register_pass(scene, srl, "CryptoAsset" + '{:02d}'.format(i), 4, "RGBA", 'COLOR') + + if crl.use_denoising: + engine.register_pass(scene, srl, "Noisy Image", 3, "RGBA", 'COLOR') + if crl.denoising_store_passes: + engine.register_pass(scene, srl, "Denoising Normal", 3, "XYZ", 'VECTOR') + engine.register_pass(scene, srl, "Denoising Normal Variance", 3, "XYZ", 'VECTOR') + engine.register_pass(scene, srl, "Denoising Albedo", 3, "RGB", 'COLOR') + engine.register_pass(scene, srl, "Denoising Albedo Variance", 3, "RGB", 'COLOR') + engine.register_pass(scene, srl, "Denoising Depth", 1, "Z", 'VALUE') + engine.register_pass(scene, srl, "Denoising Depth Variance", 1, "Z", 'VALUE') + engine.register_pass(scene, srl, "Denoising Shadow A", 3, "XYV", 'VECTOR') + engine.register_pass(scene, srl, "Denoising Shadow B", 3, "XYV", 'VECTOR') + engine.register_pass(scene, srl, "Denoising Image Variance", 3, "RGB", 'COLOR') + clean_options = ("denoising_diffuse_direct", "denoising_diffuse_indirect", + "denoising_glossy_direct", "denoising_glossy_indirect", + "denoising_transmission_direct", "denoising_transmission_indirect", + "denoising_subsurface_direct", "denoising_subsurface_indirect") + if any(getattr(crl, option) for option in clean_options): + engine.register_pass(scene, srl, "Denoising Clean", 3, "RGB", 'COLOR') diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index a4a1f597455..05f94ebc37a 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -1347,6 +1347,36 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup): default=False, update=update_render_passes, ) + use_pass_crypto_object: BoolProperty( + name="Cryptomatte Object", + description="Cryptomatte Object pass", + default=False, + update=update_render_passes, + ) + use_pass_crypto_material: BoolProperty( + name="Cryptomatte Material", + description="Cryptomatte Material pass", + default=False, + update=update_render_passes, + ) + use_pass_crypto_asset: BoolProperty( + name="Cryptomatte Asset", + description="Cryptomatte Asset pass", + default=False, + update=update_render_passes, + ) + pass_crypto_depth: IntProperty( + name="Cryptomatte Levels", + description="Describes how many unique IDs per pixel are written to Cryptomatte", + default=6, min=2, max=16, step=2, + update=update_render_passes, + ) + pass_crypto_accurate: BoolProperty( + name="Cryptomatte Accurate", + description="Gerenate a more accurate Cryptomatte pass, CPU only, may render slower and use more memory", + default=True, + update=update_render_passes, + ) @classmethod def register(cls): diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 0b73a684a55..bce909e345a 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -817,6 +817,17 @@ class CYCLES_RENDER_PT_layer_passes(CyclesButtonsPanel, Panel): col.prop(cycles_view_layer, "pass_debug_bvh_intersections") col.prop(cycles_view_layer, "pass_debug_ray_bounces") + layout.label("Cryptomatte:") + row = layout.row(align=True) + row.prop(cycles_view_layer, "use_pass_crypto_object", text="Object", toggle=True) + row.prop(cycles_view_layer, "use_pass_crypto_material", text="Material", toggle=True) + row.prop(cycles_view_layer, "use_pass_crypto_asset", text="Asset", toggle=True) + row = layout.row(align=True) + row.prop(cycles_view_layer, "pass_crypto_depth") + row = layout.row(align=True) + row.active = use_cpu(context) + row.prop(cycles_view_layer, "pass_crypto_accurate", text="Accurate Mode") + class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel): bl_label = "Denoising" diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp index 408a92f1f3a..dcadc735b8e 100644 --- a/intern/cycles/blender/blender_object.cpp +++ b/intern/cycles/blender/blender_object.cpp @@ -424,6 +424,23 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph, object_updated = true; } + /* sync the asset name for Cryptomatte */ + BL::Object parent = b_ob.parent(); + ustring parent_name; + if(parent) { + while(parent.parent()) { + parent = parent.parent(); + } + parent_name = parent.name(); + } + else { + parent_name = b_ob.name(); + } + if(object->asset_name != parent_name) { + object->asset_name = parent_name; + object_updated = true; + } + /* object sync * transform comparison should not be needed, but duplis don't work perfect * in the depsgraph and may not signal changes, so this is a workaround */ diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 4ac0e1f21c1..1ff15284bc1 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -407,7 +407,7 @@ void BlenderSession::render(BL::Depsgraph& b_depsgraph_) BL::RenderLayer b_rlay = *b_single_rlay; /* add passes */ - array passes = sync->sync_render_passes(b_rlay, b_view_layer, session_params); + vector passes = sync->sync_render_passes(b_rlay, b_view_layer, session_params); buffer_params.passes = passes; PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles"); @@ -711,7 +711,7 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult& b_rr, bool read = false; if(pass_type != PASS_NONE) { /* copy pixels */ - read = buffers->get_pass_rect(pass_type, exposure, sample, components, &pixels[0]); + read = buffers->get_pass_rect(pass_type, exposure, sample, components, &pixels[0], b_pass.name()); } else { int denoising_offset = BlenderSync::get_denoising_pass(b_pass); @@ -730,7 +730,7 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult& b_rr, else { /* copy combined pass */ BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str())); - if(buffers->get_pass_rect(PASS_COMBINED, exposure, sample, 4, &pixels[0])) + if(buffers->get_pass_rect(PASS_COMBINED, exposure, sample, 4, &pixels[0], "Combined")) b_combined_pass.rect(&pixels[0]); } diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 4989746ae6a..42489438780 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -40,6 +40,8 @@ CCL_NAMESPACE_BEGIN +static const char *cryptomatte_prefix = "Crypto"; + /* Constructor */ BlenderSync::BlenderSync(BL::RenderEngine& b_engine, @@ -449,6 +451,9 @@ PassType BlenderSync::get_pass_type(BL::RenderPass& b_pass) MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES); #endif MAP_PASS("Debug Render Time", PASS_RENDER_TIME); + if(string_startswith(name, cryptomatte_prefix)) { + return PASS_CRYPTOMATTE; + } #undef MAP_PASS return PASS_NONE; @@ -457,6 +462,9 @@ PassType BlenderSync::get_pass_type(BL::RenderPass& b_pass) int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass) { string name = b_pass.name(); + + if(name == "Noisy Image") return DENOISING_PASS_COLOR; + if(name.substr(0, 10) != "Denoising ") { return -1; } @@ -471,7 +479,6 @@ int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass) MAP_PASS("Depth Variance", DENOISING_PASS_DEPTH_VAR); MAP_PASS("Shadow A", DENOISING_PASS_SHADOW_A); MAP_PASS("Shadow B", DENOISING_PASS_SHADOW_B); - MAP_PASS("Image", DENOISING_PASS_COLOR); MAP_PASS("Image Variance", DENOISING_PASS_COLOR_VAR); MAP_PASS("Clean", DENOISING_PASS_CLEAN); #undef MAP_PASS @@ -479,11 +486,11 @@ int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass) return -1; } -array BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, - BL::ViewLayer& b_view_layer, - const SessionParams &session_params) +vector BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, + BL::ViewLayer& b_view_layer, + const SessionParams &session_params) { - array passes; + vector passes; Pass::add(PASS_COMBINED, passes); if(!session_params.device.advanced_shading) { @@ -505,20 +512,7 @@ array BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, scene->film->denoising_flags = 0; PointerRNA crp = RNA_pointer_get(&b_view_layer.ptr, "cycles"); - if(get_boolean(crp, "denoising_store_passes") && - get_boolean(crp, "use_denoising")) - { - b_engine.add_pass("Denoising Normal", 3, "XYZ", b_view_layer.name().c_str()); - b_engine.add_pass("Denoising Normal Variance", 3, "XYZ", b_view_layer.name().c_str()); - b_engine.add_pass("Denoising Albedo", 3, "RGB", b_view_layer.name().c_str()); - b_engine.add_pass("Denoising Albedo Variance", 3, "RGB", b_view_layer.name().c_str()); - b_engine.add_pass("Denoising Depth", 1, "Z", b_view_layer.name().c_str()); - b_engine.add_pass("Denoising Depth Variance", 1, "Z", b_view_layer.name().c_str()); - b_engine.add_pass("Denoising Shadow A", 3, "XYV", b_view_layer.name().c_str()); - b_engine.add_pass("Denoising Shadow B", 3, "XYV", b_view_layer.name().c_str()); - b_engine.add_pass("Denoising Image", 3, "RGB", b_view_layer.name().c_str()); - b_engine.add_pass("Denoising Image Variance", 3, "RGB", b_view_layer.name().c_str()); - + if(get_boolean(crp, "use_denoising")) { #define MAP_OPTION(name, flag) if(!get_boolean(crp, name)) scene->film->denoising_flags |= flag; MAP_OPTION("denoising_diffuse_direct", DENOISING_CLEAN_DIFFUSE_DIR); MAP_OPTION("denoising_diffuse_indirect", DENOISING_CLEAN_DIFFUSE_IND); @@ -530,8 +524,21 @@ array BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, MAP_OPTION("denoising_subsurface_indirect", DENOISING_CLEAN_SUBSURFACE_IND); #undef MAP_OPTION - if(scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES) { - b_engine.add_pass("Denoising Clean", 3, "RGB", b_view_layer.name().c_str()); + b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str()); + if(get_boolean(crp, "denoising_store_passes")) { + b_engine.add_pass("Denoising Normal", 3, "XYZ", b_view_layer.name().c_str()); + b_engine.add_pass("Denoising Normal Variance", 3, "XYZ", b_view_layer.name().c_str()); + b_engine.add_pass("Denoising Albedo", 3, "RGB", b_view_layer.name().c_str()); + b_engine.add_pass("Denoising Albedo Variance", 3, "RGB", b_view_layer.name().c_str()); + b_engine.add_pass("Denoising Depth", 1, "Z", b_view_layer.name().c_str()); + b_engine.add_pass("Denoising Depth Variance", 1, "Z", b_view_layer.name().c_str()); + b_engine.add_pass("Denoising Shadow A", 3, "XYV", b_view_layer.name().c_str()); + b_engine.add_pass("Denoising Shadow B", 3, "XYV", b_view_layer.name().c_str()); + b_engine.add_pass("Denoising Image Variance", 3, "RGB", b_view_layer.name().c_str()); + + if(scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES) { + b_engine.add_pass("Denoising Clean", 3, "RGB", b_view_layer.name().c_str()); + } } } #ifdef __KERNEL_DEBUG__ @@ -565,6 +572,39 @@ array BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, Pass::add(PASS_VOLUME_INDIRECT, passes); } + /* Cryptomatte stores two ID/weight pairs per RGBA layer. + * User facing paramter is the number of pairs. */ + int crypto_depth = min(16, get_int(crp, "pass_crypto_depth")) / 2; + scene->film->cryptomatte_depth = crypto_depth; + scene->film->cryptomatte_passes = CRYPT_NONE; + if(get_boolean(crp, "use_pass_crypto_object")) { + for(int i = 0; i < crypto_depth; ++i) { + string passname = cryptomatte_prefix + string_printf("Object%02d", i); + b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str()); + Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str()); + } + scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_OBJECT); + } + if(get_boolean(crp, "use_pass_crypto_material")) { + for(int i = 0; i < crypto_depth; ++i) { + string passname = cryptomatte_prefix + string_printf("Material%02d", i); + b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str()); + Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str()); + } + scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_MATERIAL); + } + if(get_boolean(crp, "use_pass_crypto_asset")) { + for(int i = 0; i < crypto_depth; ++i) { + string passname = cryptomatte_prefix + string_printf("Asset%02d", i); + b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str()); + Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str()); + } + scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_ASSET); + } + if(get_boolean(crp, "pass_crypto_accurate") && scene->film->cryptomatte_passes != CRYPT_NONE) { + scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_ACCURATE); + } + return passes; } diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h index e63ef9e5e47..d2b362be24d 100644 --- a/intern/cycles/blender/blender_sync.h +++ b/intern/cycles/blender/blender_sync.h @@ -67,9 +67,9 @@ public: int width, int height, void **python_thread_state); void sync_view_layer(BL::SpaceView3D& b_v3d, BL::ViewLayer& b_view_layer); - array sync_render_passes(BL::RenderLayer& b_render_layer, - BL::ViewLayer& b_view_layer, - const SessionParams &session_params); + vector sync_render_passes(BL::RenderLayer& b_render_layer, + BL::ViewLayer& b_view_layer, + const SessionParams &session_params); void sync_integrator(); void sync_camera(BL::RenderSettings& b_render, BL::Object& b_override, diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 7eb73dea3ef..eb816e1fdd0 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -41,6 +41,7 @@ #include "kernel/osl/osl_globals.h" #include "render/buffers.h" +#include "render/coverage.h" #include "util/util_debug.h" #include "util/util_foreach.h" @@ -677,8 +678,15 @@ public: void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg) { + const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE; + scoped_timer timer(&tile.buffers->render_time); + Coverage coverage(kg, tile); + if(use_coverage) { + coverage.init_path_trace(); + } + float *render_buffer = (float*)tile.buffer; int start_sample = tile.start_sample; int end_sample = tile.start_sample + tile.num_samples; @@ -691,6 +699,9 @@ public: for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { + if(use_coverage) { + coverage.init_pixel(x, y); + } path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride); } @@ -700,6 +711,9 @@ public: task.update_progress(&tile, tile.w*tile.h); } + if(use_coverage) { + coverage.finalize(); + } } void denoise(DenoisingTask& denoising, RenderTile &tile) @@ -760,7 +774,6 @@ public: } else if(tile.task == RenderTile::DENOISE) { denoise(denoising, tile); - task.update_progress(&tile, tile.w*tile.h); } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index b48ed649a8c..08efede36df 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -96,6 +96,7 @@ set(SRC_HEADERS kernel_emission.h kernel_film.h kernel_globals.h + kernel_id_passes.h kernel_jitter.h kernel_light.h kernel_math.h diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index cfe17e63627..0eb8ce2cf8b 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -304,6 +304,24 @@ ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd) return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id; } +/* Cryptomatte ID */ + +ccl_device_inline float object_cryptomatte_id(KernelGlobals *kg, int object) +{ + if(object == OBJECT_NONE) + return 0.0f; + + return kernel_tex_fetch(__objects, object).cryptomatte_object; +} + +ccl_device_inline float object_cryptomatte_asset_id(KernelGlobals *kg, int object) +{ + if(object == OBJECT_NONE) + return 0; + + return kernel_tex_fetch(__objects, object).cryptomatte_asset; +} + /* Particle data from which object was instanced */ ccl_device_inline uint particle_index(KernelGlobals *kg, int particle) diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index 74cfacb5bc1..37402f42863 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -21,6 +21,7 @@ #ifdef __KERNEL_CPU__ # include "util/util_vector.h" +# include "util/util_map.h" #endif #ifdef __KERNEL_OPENCL__ @@ -42,6 +43,8 @@ struct OSLThreadData; struct OSLShadingSystem; # endif +typedef unordered_map CoverageMap; + struct Intersection; struct VolumeStep; @@ -68,6 +71,11 @@ typedef struct KernelGlobals { VolumeStep *decoupled_volume_steps[2]; int decoupled_volume_steps_index; + /* A buffer for storing per-pixel coverage for Cryptomatte. */ + CoverageMap *coverage_object; + CoverageMap *coverage_material; + CoverageMap *coverage_asset; + /* split kernel */ SplitData split_data; SplitParams split_param_data; diff --git a/intern/cycles/kernel/kernel_id_passes.h b/intern/cycles/kernel/kernel_id_passes.h new file mode 100644 index 00000000000..486c61d2ae5 --- /dev/null +++ b/intern/cycles/kernel/kernel_id_passes.h @@ -0,0 +1,94 @@ +/* +* Copyright 2018 Blender Foundation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +CCL_NAMESPACE_BEGIN + +ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, int num_slots, float id, float weight) +{ + kernel_assert(id != ID_NONE); + if(weight == 0.0f) { + return; + } + + for(int slot = 0; slot < num_slots; slot++) { + ccl_global float2 *id_buffer = (ccl_global float2*)buffer; +#ifdef __ATOMIC_PASS_WRITE__ + /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ + if(id_buffer[slot].x == ID_NONE) { + /* Use an atomic to claim this slot. + * If a different thread got here first, try again from this slot on. */ + float old_id = atomic_compare_and_swap_float(buffer+slot*2, ID_NONE, id); + if(old_id != ID_NONE && old_id != id) { + continue; + } + atomic_add_and_fetch_float(buffer+slot*2+1, weight); + break; + } + /* If there already is a slot for that ID, add the weight. + * If no slot was found, add it to the last. */ + else if(id_buffer[slot].x == id || slot == num_slots - 1) { + atomic_add_and_fetch_float(buffer+slot*2+1, weight); + break; + } +#else /* __ATOMIC_PASS_WRITE__ */ + /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ + if(id_buffer[slot].x == ID_NONE) { + id_buffer[slot].x = id; + id_buffer[slot].y = weight; + break; + } + /* If there already is a slot for that ID, add the weight. + * If no slot was found, add it to the last. */ + else if(id_buffer[slot].x == id || slot == num_slots - 1) { + id_buffer[slot].y += weight; + break; + } +#endif /* __ATOMIC_PASS_WRITE__ */ + } +} + +ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots) +{ + ccl_global float2 *id_buffer = (ccl_global float2*)buffer; + for(int slot = 1; slot < num_slots; ++slot) { + if(id_buffer[slot].x == ID_NONE) { + return; + } + /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */ + int i = slot; + while(i > 0 && id_buffer[i].y > id_buffer[i - 1].y) { + float2 swap = id_buffer[i]; + id_buffer[i] = id_buffer[i - 1]; + id_buffer[i - 1] = swap; + --i; + } + } +} + +#ifdef __KERNEL_GPU__ +/* post-sorting for Cryptomatte */ +ccl_device void kernel_cryptomatte_post(KernelGlobals *kg, ccl_global float *buffer, uint sample, int x, int y, int offset, int stride) +{ + if(sample - 1 == kernel_data.integrator.aa_samples) { + int index = offset + x + y * stride; + int pass_stride = kernel_data.film.pass_stride; + ccl_global float *cryptomatte_buffer = buffer + index * pass_stride + kernel_data.film.pass_cryptomatte; + kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); + } +} +#endif + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h index 9b96bb80c32..ce68aa16af8 100644 --- a/intern/cycles/kernel/kernel_montecarlo.h +++ b/intern/cycles/kernel/kernel_montecarlo.h @@ -187,7 +187,10 @@ ccl_device float2 regular_polygon_sample(float corners, float rotation, float u, ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N) { float3 R = 2*dot(N, I)*N - I; - if(dot(Ng, R) >= 0.05f) { + + /* Reflection rays may always be at least as shallow as the incoming ray. */ + float threshold = min(0.9f*dot(Ng, I), 0.01f); + if(dot(Ng, R) >= threshold) { return N; } @@ -195,22 +198,86 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N) * The X axis is found by normalizing the component of N that's orthogonal to Ng. * The Y axis isn't actually needed. */ - float3 X = normalize(N - dot(N, Ng)*Ng); + float NdotNg = dot(N, Ng); + float3 X = normalize(N - NdotNg*Ng); - /* Calculate N.z and N.x in the local coordinate system. */ - float Iz = dot(I, Ng); - float Ix2 = sqr(dot(I, X)), Iz2 = sqr(Iz); - float Ix2Iz2 = Ix2 + Iz2; + /* Calculate N.z and N.x in the local coordinate system. + * + * The goal of this computation is to find a N' that is rotated towards Ng just enough + * to lift R' above the threshold (here called t), therefore dot(R', Ng) = t. + * + * According to the standard reflection equation, this means that we want dot(2*dot(N', I)*N' - I, Ng) = t. + * + * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get 2*dot(N', I)*N'.z - I.z = t. + * + * The rotation is simple to express in the coordinate system we formed - since N lies in the X-Z-plane, we know that + * N' will also lie in the X-Z-plane, so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z . + * + * Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2). + * + * With these simplifications, we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t. + * + * The only unknown here is N'.z, so we can solve for that. + * + * The equation has four solutions in general: + * + * N'.z = +-sqrt(0.5*(+-sqrt(I.x^2*(I.x^2 + I.z^2 - t^2)) + t*I.z + I.x^2 + I.z^2)/(I.x^2 + I.z^2)) + * We can simplify this expression a bit by grouping terms: + * + * a = I.x^2 + I.z^2 + * b = sqrt(I.x^2 * (a - t^2)) + * c = I.z*t + a + * N'.z = +-sqrt(0.5*(+-b + c)/a) + * + * Two solutions can immediately be discarded because they're negative so N' would lie in the lower hemisphere. + */ + float Ix = dot(I, X), Iz = dot(I, Ng); + float Ix2 = sqr(Ix), Iz2 = sqr(Iz); + float a = Ix2 + Iz2; - float a = safe_sqrtf(Ix2*(Ix2Iz2 - sqr(0.05f))); - float b = Iz*0.05f + Ix2Iz2; - float c = (a + b > 0.0f)? (a + b) : (-a + b); + float b = safe_sqrtf(Ix2*(a - sqr(threshold))); + float c = Iz*threshold + a; - float Nz = safe_sqrtf(0.5f * c * (1.0f / Ix2Iz2)); - float Nx = safe_sqrtf(1.0f - sqr(Nz)); + /* Evaluate both solutions. + * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than one), so check for that first. + * If no option is viable (might happen in extreme cases like N being in the wrong hemisphere), give up and return Ng. */ + float fac = 0.5f/a; + float N1_z2 = fac*(b+c), N2_z2 = fac*(-b+c); + bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f)); + bool valid2 = (N2_z2 > 1e-5f) && (N2_z2 <= (1.0f + 1e-5f)); - /* Transform back into global coordinates. */ - return Nx*X + Nz*Ng; + float2 N_new; + if(valid1 && valid2) { + /* If both are possible, do the expensive reflection-based check. */ + float2 N1 = make_float2(safe_sqrtf(1.0f - N1_z2), safe_sqrtf(N1_z2)); + float2 N2 = make_float2(safe_sqrtf(1.0f - N2_z2), safe_sqrtf(N2_z2)); + + float R1 = 2*(N1.x*Ix + N1.y*Iz)*N1.y - Iz; + float R2 = 2*(N2.x*Ix + N2.y*Iz)*N2.y - Iz; + + valid1 = (R1 >= 1e-5f); + valid2 = (R2 >= 1e-5f); + if(valid1 && valid2) { + /* If both solutions are valid, return the one with the shallower reflection since it will be closer to the input + * (if the original reflection wasn't shallow, we would not be in this part of the function). */ + N_new = (R1 < R2)? N1 : N2; + } + else { + /* If only one reflection is valid (= positive), pick that one. */ + N_new = (R1 > R2)? N1 : N2; + } + + } + else if(valid1 || valid2) { + /* Only one solution passes the N'.z criterium, so pick that one. */ + float Nz2 = valid1? N1_z2 : N2_z2; + N_new = make_float2(safe_sqrtf(1.0f - Nz2), safe_sqrtf(Nz2)); + } + else { + return Ng; + } + + return N_new.x*X + N_new.y*Ng; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index 458aa6c2a97..e256a1819ed 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -14,12 +14,14 @@ * limitations under the License. */ -CCL_NAMESPACE_BEGIN - #if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__) #define __ATOMIC_PASS_WRITE__ #endif +#include "kernel/kernel_id_passes.h" + +CCL_NAMESPACE_BEGIN + ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value) { ccl_global float *buf = buffer; @@ -189,6 +191,23 @@ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg, } #endif /* __KERNEL_DEBUG__ */ +#ifdef __KERNEL_CPU__ +#define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) kernel_write_id_pass_cpu(buffer, depth * 2, id, matte_weight, kg->coverage_##name) +ccl_device_inline size_t kernel_write_id_pass_cpu(float *buffer, size_t depth, float id, float matte_weight, CoverageMap *map) +{ + if(map) { + (*map)[id] += matte_weight; + return 0; + } +#else /* __KERNEL_CPU__ */ +#define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) kernel_write_id_slots_gpu(buffer, depth * 2, id, matte_weight) +ccl_device_inline size_t kernel_write_id_slots_gpu(ccl_global float *buffer, size_t depth, float id, float matte_weight) +{ +#endif /* __KERNEL_CPU__ */ + kernel_write_id_slots(buffer, depth, id, matte_weight); + return depth * 2; +} + ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L, ShaderData *sd, ccl_addr_space PathState *state, float3 throughput) { @@ -242,6 +261,26 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl } } + if(kernel_data.film.cryptomatte_passes) { + const float matte_weight = average(throughput) * (1.0f - average(shader_bsdf_transparency(kg, sd))); + if(matte_weight > 0.0f) { + ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; + if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { + float id = object_cryptomatte_id(kg, sd->object); + cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, object); + } + if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { + float id = shader_cryptomatte_id(kg, sd->shader); + cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, material); + } + if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { + float id = object_cryptomatte_asset_id(kg, sd->object); + cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, asset); + } + } + } + + if(light_flag & PASSMASK_COMPONENT(DIFFUSE)) L->color_diffuse += shader_bsdf_diffuse(kg, sd)*throughput; if(light_flag & PASSMASK_COMPONENT(GLOSSY)) diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index e834b701f96..af883aa715b 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -1276,4 +1276,9 @@ ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect } #endif /* __TRANSPARENT_SHADOWS__ */ +ccl_device float shader_cryptomatte_id(KernelGlobals *kg, int shader) +{ + return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index e93100a6442..f46b06f87f9 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -53,6 +53,7 @@ CCL_NAMESPACE_BEGIN #define OBJECT_NONE (~0) #define PRIM_NONE (~0) #define LAMP_NONE (~0) +#define ID_NONE (0.0f) #define VOLUME_STACK_SIZE 32 @@ -415,6 +416,7 @@ typedef enum PassType { PASS_RAY_BOUNCES, #endif PASS_RENDER_TIME, + PASS_CRYPTOMATTE, PASS_CATEGORY_MAIN_END = 31, PASS_MIST = 32, @@ -443,6 +445,14 @@ typedef enum PassType { #define PASS_ANY (~0) +typedef enum CryptomatteType { + CRYPT_NONE = 0, + CRYPT_OBJECT = (1 << 0), + CRYPT_MATERIAL = (1 << 1), + CRYPT_ASSET = (1 << 2), + CRYPT_ACCURATE = (1 << 3), +} CryptomatteType; + typedef enum DenoisingPassOffsets { DENOISING_PASS_NORMAL = 0, DENOISING_PASS_NORMAL_VAR = 3, @@ -1260,17 +1270,20 @@ typedef struct KernelFilm { int pass_shadow; float pass_shadow_scale; int filter_table_offset; + int cryptomatte_passes; + int cryptomatte_depth; + int pass_cryptomatte; int pass_mist; float mist_start; float mist_inv_depth; float mist_falloff; - + int pass_denoising_data; int pass_denoising_clean; int denoising_flags; - int pad1, pad2, pad3; + int pad1, pad2; /* XYZ to rendering color space transform. float4 instead of float3 to * ensure consistent padding/alignment across devices. */ @@ -1460,7 +1473,11 @@ typedef struct KernelObject { uint patch_map_offset; uint attribute_map_offset; uint motion_offset; - uint pad; + uint pad1; + + float cryptomatte_object; + float cryptomatte_asset; + float pad2, pad3; } KernelObject; static_assert_align(KernelObject, 16); @@ -1540,7 +1557,7 @@ static_assert_align(KernelParticle, 16); typedef struct KernelShader { float constant_emission[3]; - float pad1; + float cryptomatte_id; int flags; int pass_id; int pad2, pad3; diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu index 8a180a509e8..af311027f78 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel.cu @@ -40,14 +40,21 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_path_trace(WorkTile *tile, uint total_work_size) { int work_index = ccl_global_id(0); - - if(work_index < total_work_size) { - uint x, y, sample; + bool thread_is_active = work_index < total_work_size; + uint x, y, sample; + KernelGlobals kg; + if(thread_is_active) { get_work_pixel(tile, work_index, &x, &y, &sample); - KernelGlobals kg; kernel_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); } + + if(kernel_data.film.cryptomatte_passes) { + __syncthreads(); + if(thread_is_active) { + kernel_cryptomatte_post(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); + } + } } #ifdef __BRANCHED_PATH__ @@ -56,14 +63,21 @@ CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS) kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size) { int work_index = ccl_global_id(0); - - if(work_index < total_work_size) { - uint x, y, sample; + bool thread_is_active = work_index < total_work_size; + uint x, y, sample; + KernelGlobals kg; + if(thread_is_active) { get_work_pixel(tile, work_index, &x, &y, &sample); - KernelGlobals kg; kernel_branched_path_trace(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); } + + if(kernel_data.film.cryptomatte_passes) { + __syncthreads(); + if(thread_is_active) { + kernel_cryptomatte_post(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride); + } + } } #endif diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index 63128d0aecf..de1f5088629 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -66,9 +66,17 @@ __kernel void kernel_ocl_path_trace( int x = sx + ccl_global_id(0); int y = sy + ccl_global_id(1); - - if(x < sx + sw && y < sy + sh) + bool thread_is_active = x < sx + sw && y < sy + sh; + if(thread_is_active) { kernel_path_trace(kg, buffer, sample, x, y, offset, stride); + } + if(kernel_data.film.cryptomatte_passes) { + /* Make sure no thread is writing to the buffers. */ + ccl_barrier(CCL_LOCAL_MEM_FENCE); + if(thread_is_active) { + kernel_cryptomatte_post(kg, buffer, sample, x, y, offset, stride); + } + } } #else /* __COMPILE_ONLY_MEGAKERNEL__ */ diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 7902381440b..81348f5594d 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -884,6 +884,23 @@ bool OSLRenderServices::has_userdata(ustring name, TypeDesc type, OSL::ShaderGlo return false; /* never called by OSL */ } +TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring filename) +{ + if (filename.length() && filename[0] == '@') { + /* Dummy, we don't use texture handles for builtin textures but need + * to tell the OSL runtime optimizer that this is a valid texture. */ + return NULL; + } + else { + return texturesys()->get_texture_handle(filename); + } +} + +bool OSLRenderServices::good(TextureSystem::TextureHandle *texture_handle) +{ + return texturesys()->good(texture_handle); +} + bool OSLRenderServices::texture(ustring filename, TextureHandle *texture_handle, TexturePerthread *texture_thread_info, @@ -894,7 +911,8 @@ bool OSLRenderServices::texture(ustring filename, int nchannels, float *result, float *dresultds, - float *dresultdt) + float *dresultdt, + ustring *errormessage) { OSL::TextureSystem *ts = osl_ts; ShaderData *sd = (ShaderData *)(sg->renderstate); @@ -1156,7 +1174,13 @@ bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg, ustring filenam TypeDesc datatype, void *data) { OSL::TextureSystem *ts = osl_ts; - return ts->get_texture_info(filename, subimage, dataname, datatype, data); + if (filename.length() && filename[0] == '@') { + /* Special builtin textures. */ + return false; + } + else { + return ts->get_texture_info(filename, subimage, dataname, datatype, data); + } } int OSLRenderServices::pointcloud_search(OSL::ShaderGlobals *sg, ustring filename, const OSL::Vec3 ¢er, diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h index 50044746fd1..5dcaa4d7445 100644 --- a/intern/cycles/kernel/osl/osl_services.h +++ b/intern/cycles/kernel/osl/osl_services.h @@ -93,6 +93,10 @@ public: bool getmessage(OSL::ShaderGlobals *sg, ustring source, ustring name, TypeDesc type, void *val, bool derivatives); + TextureSystem::TextureHandle *get_texture_handle(ustring filename); + + bool good(TextureSystem::TextureHandle *texture_handle); + bool texture(ustring filename, TextureSystem::TextureHandle *texture_handle, TexturePerthread *texture_thread_info, @@ -103,7 +107,8 @@ public: int nchannels, float *result, float *dresultds, - float *dresultdt); + float *dresultdt, + ustring *errormessage); bool texture3d(ustring filename, TextureHandle *texture_handle, diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h index 4a8378796ba..f1235500f2b 100644 --- a/intern/cycles/kernel/shaders/stdosl.h +++ b/intern/cycles/kernel/shaders/stdosl.h @@ -284,33 +284,63 @@ point rotate (point p, float angle, point a, point b) normal ensure_valid_reflection(normal Ng, vector I, normal N) { + /* The implementation here mirrors the one in kernel_montecarlo.h, + * check there for an explanation of the algorithm. */ + float sqr(float x) { return x*x; } vector R = 2*dot(N, I)*N - I; - if (dot(Ng, R) >= 0.05) { + + float threshold = min(0.9*dot(Ng, I), 0.01); + if(dot(Ng, R) >= threshold) { return N; } - /* Form coordinate system with Ng as the Z axis and N inside the X-Z-plane. - * The X axis is found by normalizing the component of N that's orthogonal to Ng. - * The Y axis isn't actually needed. - */ - vector X = normalize(N - dot(N, Ng)*Ng); + float NdotNg = dot(N, Ng); + vector X = normalize(N - NdotNg*Ng); - /* Calculate N.z and N.x in the local coordinate system. */ float Ix = dot(I, X), Iz = dot(I, Ng); - float Ix2 = sqr(dot(I, X)), Iz2 = sqr(dot(I, Ng)); - float Ix2Iz2 = Ix2 + Iz2; + float Ix2 = sqr(Ix), Iz2 = sqr(Iz); + float a = Ix2 + Iz2; - float a = sqrt(Ix2*(Ix2Iz2 - sqr(0.05))); - float b = Iz*0.05 + Ix2Iz2; - float c = (a + b > 0.0)? (a + b) : (-a + b); + float b = sqrt(Ix2*(a - sqr(threshold))); + float c = Iz*threshold + a; - float Nz = sqrt(0.5 * c * (1.0 / Ix2Iz2)); - float Nx = sqrt(1.0 - sqr(Nz)); + float fac = 0.5/a; + float N1_z2 = fac*(b+c), N2_z2 = fac*(-b+c); + int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5)); + int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5)); - /* Transform back into global coordinates. */ - return Nx*X + Nz*Ng; + float N_new_x, N_new_z; + if(valid1 && valid2) { + float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2); + float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2); + + float R1 = 2*(N1_x*Ix + N1_z*Iz)*N1_z - Iz; + float R2 = 2*(N2_x*Ix + N2_z*Iz)*N2_z - Iz; + + valid1 = (R1 >= 1e-5); + valid2 = (R2 >= 1e-5); + if(valid1 && valid2) { + N_new_x = (R1 < R2)? N1_x : N2_x; + N_new_z = (R1 < R2)? N1_z : N2_z; + } + else { + N_new_x = (R1 > R2)? N1_x : N2_x; + N_new_z = (R1 > R2)? N1_z : N2_z; + } + + } + else if(valid1 || valid2) { + float Nz2 = valid1? N1_z2 : N2_z2; + N_new_x = sqrt(1.0 - Nz2); + N_new_z = sqrt(Nz2); + } + else { + return Ng; + } + + return N_new_x*X + N_new_z*Ng; } diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index 180c0b57077..18eec6372f1 100644 --- a/intern/cycles/kernel/split/kernel_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -80,8 +80,10 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + bool ray_was_updated = false; if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { + ray_was_updated = true; uint sample = state->sample; uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; @@ -92,6 +94,17 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); } + if(kernel_data.film.cryptomatte_passes) { + /* Make sure no thread is writing to the buffers. */ + ccl_barrier(CCL_LOCAL_MEM_FENCE); + if(ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) { + uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; + ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; + kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); + } + } + if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { /* We have completed current work; So get next work */ ccl_global uint *work_pools = kernel_split_params.work_pools; diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt index 7d2220f37f9..c0ce7368771 100644 --- a/intern/cycles/render/CMakeLists.txt +++ b/intern/cycles/render/CMakeLists.txt @@ -15,6 +15,7 @@ set(SRC buffers.cpp camera.cpp constant_fold.cpp + coverage.cpp film.cpp graph.cpp image.cpp @@ -46,6 +47,7 @@ set(SRC_HEADERS buffers.h camera.h constant_fold.h + coverage.h film.h graph.h image.h diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index de2b38340e9..0318834ff33 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -160,11 +160,12 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp (offset == DENOISING_PASS_DEPTH_VAR) || (offset == DENOISING_PASS_COLOR_VAR); + float scale_exposure = scale; if(offset == DENOISING_PASS_COLOR || offset == DENOISING_PASS_CLEAN) { - scale *= exposure; + scale_exposure *= exposure; } else if(offset == DENOISING_PASS_COLOR_VAR) { - scale *= exposure*exposure; + scale_exposure *= exposure*exposure; } offset += params.get_denoising_offset(); @@ -181,14 +182,14 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp if(components == 1) { for(int i = 0; i < size; i++, mean += pass_stride, var += pass_stride, pixels++) { - pixels[0] = max(0.0f, var[0] - mean[0]*mean[0]*invsample)*scale; + pixels[0] = max(0.0f, var[0] - mean[0]*mean[0]*invsample)*scale_exposure; } } else if(components == 3) { for(int i = 0; i < size; i++, mean += pass_stride, var += pass_stride, pixels += 3) { - pixels[0] = max(0.0f, var[0] - mean[0]*mean[0]*invsample)*scale; - pixels[1] = max(0.0f, var[1] - mean[1]*mean[1]*invsample)*scale; - pixels[2] = max(0.0f, var[2] - mean[2]*mean[2]*invsample)*scale; + pixels[0] = max(0.0f, var[0] - mean[0]*mean[0]*invsample)*scale_exposure; + pixels[1] = max(0.0f, var[1] - mean[1]*mean[1]*invsample)*scale_exposure; + pixels[2] = max(0.0f, var[2] - mean[2]*mean[2]*invsample)*scale_exposure; } } else { @@ -200,14 +201,28 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp if(components == 1) { for(int i = 0; i < size; i++, in += pass_stride, pixels++) { - pixels[0] = in[0]*scale; + pixels[0] = in[0]*scale_exposure; } } else if(components == 3) { for(int i = 0; i < size; i++, in += pass_stride, pixels += 3) { - pixels[0] = in[0]*scale; - pixels[1] = in[1]*scale; - pixels[2] = in[2]*scale; + pixels[0] = in[0]*scale_exposure; + pixels[1] = in[1]*scale_exposure; + pixels[2] = in[2]*scale_exposure; + } + } + else if(components == 4) { + assert(offset == DENOISING_PASS_COLOR); + + /* Since the alpha channel is not involved in denoising, output the Combined alpha channel. */ + assert(params.passes[0].type == PASS_COMBINED); + float *in_combined = buffer.data(); + + for(int i = 0; i < size; i++, in += pass_stride, in_combined += pass_stride, pixels += 4) { + pixels[0] = in[0]*scale_exposure; + pixels[1] = in[1]*scale_exposure; + pixels[2] = in[2]*scale_exposure; + pixels[3] = saturate(in_combined[3]*scale); } } else { @@ -218,7 +233,7 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp return true; } -bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels) +bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels, const string &name) { if(buffer.data() == NULL) { return false; @@ -234,6 +249,14 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int continue; } + /* Tell Cryptomatte passes apart by their name. */ + if(pass.type == PASS_CRYPTOMATTE) { + if(pass.name != name) { + pass_offset += pass.components; + continue; + } + } + float *in = buffer.data() + pass_offset; int pass_stride = params.get_passes_size(); @@ -370,6 +393,17 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int pixels[3] = f.w*invw; } } + else if(type == PASS_CRYPTOMATTE) { + for(int i = 0; i < size; i++, in += pass_stride, pixels += 4) { + float4 f = make_float4(in[0], in[1], in[2], in[3]); + /* x and z contain integer IDs, don't rescale them. + y and w contain matte weights, they get scaled. */ + pixels[0] = f.x; + pixels[1] = f.y * scale; + pixels[2] = f.z; + pixels[3] = f.w * scale; + } + } else { for(int i = 0; i < size; i++, in += pass_stride, pixels += 4) { float4 f = make_float4(in[0], in[1], in[2], in[3]); diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 1b06ffe33a6..a8f019dddd6 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -50,7 +50,7 @@ public: int full_height; /* passes */ - array passes; + vector passes; bool denoising_data_pass; /* If only some light path types should be denoised, an additional pass is needed. */ bool denoising_clean_pass; @@ -84,7 +84,7 @@ public: void zero(); bool copy_from_device(); - bool get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels); + bool get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels, const string &name); bool get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels); }; diff --git a/intern/cycles/render/coverage.cpp b/intern/cycles/render/coverage.cpp new file mode 100644 index 00000000000..72ef4cda3ff --- /dev/null +++ b/intern/cycles/render/coverage.cpp @@ -0,0 +1,143 @@ +/* + * Copyright 2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "render/coverage.h" +#include "kernel/kernel_compat_cpu.h" +#include "kernel/split/kernel_split_data.h" +#include "kernel/kernel_globals.h" +#include "kernel/kernel_id_passes.h" +#include "kernel/kernel_types.h" +#include "util/util_map.h" +#include "util/util_vector.h" + +CCL_NAMESPACE_BEGIN + +static bool crypomatte_comp(const pair& i, const pair j) { return i.first > j.first; } + +void Coverage::finalize() +{ + int pass_offset = 0; + if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { + finalize_buffer(coverage_object, pass_offset); + pass_offset += kernel_data.film.cryptomatte_depth * 4; + } + if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { + finalize_buffer(coverage_material, pass_offset); + pass_offset += kernel_data.film.cryptomatte_depth * 4; + } + if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { + finalize_buffer(coverage_asset, pass_offset); + } +} + +void Coverage::init_path_trace() +{ + kg->coverage_object = kg->coverage_material = kg->coverage_asset = NULL; + + if(kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) { + if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { + coverage_object.clear(); + coverage_object.resize(tile.w * tile.h); + } + if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { + coverage_material.clear(); + coverage_material.resize(tile.w * tile.h); + } + if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { + coverage_asset.clear(); + coverage_asset.resize(tile.w * tile.h); + } + } +} + +void Coverage::init_pixel(int x, int y) +{ + if(kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) { + const int pixel_index = tile.w * (y - tile.y) + x - tile.x; + if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { + kg->coverage_object = &coverage_object[pixel_index]; + } + if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { + kg->coverage_material = &coverage_material[pixel_index]; + } + if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { + kg->coverage_asset = &coverage_asset[pixel_index]; + } + } +} + +void Coverage::finalize_buffer(vector & coverage, const int pass_offset) +{ + if(kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) { + flatten_buffer(coverage, pass_offset); + } + else { + sort_buffer(pass_offset); + } +} + +void Coverage::flatten_buffer(vector &coverage, const int pass_offset) +{ + /* Sort the coverage map and write it to the output */ + int pixel_index = 0; + int pass_stride = tile.buffers->params.get_passes_size(); + for(int y = 0; y < tile.h; ++y) { + for(int x = 0; x < tile.w; ++x) { + const CoverageMap& pixel = coverage[pixel_index]; + if(!pixel.empty()) { + /* buffer offset */ + int index = x + y * tile.stride; + float *buffer = (float*)tile.buffer + index*pass_stride; + + /* sort the cryptomatte pixel */ + vector > sorted_pixel; + for(CoverageMap::const_iterator it = pixel.begin(); it != pixel.end(); ++it) { + sorted_pixel.push_back(std::make_pair(it->second, it->first)); + } + sort(sorted_pixel.begin(), sorted_pixel.end(), crypomatte_comp); + int num_slots = 2 * (kernel_data.film.cryptomatte_depth); + if(sorted_pixel.size() > num_slots) { + float leftover = 0.0f; + for(vector >::iterator it = sorted_pixel.begin()+num_slots; it != sorted_pixel.end(); ++it) { + leftover += it->first; + } + sorted_pixel[num_slots-1].first += leftover; + } + int limit = min(num_slots, sorted_pixel.size()); + for(int i = 0; i < limit; ++i) { + kernel_write_id_slots(buffer + kernel_data.film.pass_cryptomatte + pass_offset, 2 * (kernel_data.film.cryptomatte_depth), sorted_pixel[i].second, sorted_pixel[i].first); + } + } + ++pixel_index; + } + } +} + +void Coverage::sort_buffer(const int pass_offset) +{ + /* Sort the coverage map and write it to the output */ + int pass_stride = tile.buffers->params.get_passes_size(); + for(int y = 0; y < tile.h; ++y) { + for(int x = 0; x < tile.w; ++x) { + /* buffer offset */ + int index = x + y*tile.stride; + float *buffer = (float*)tile.buffer + index*pass_stride; + kernel_sort_id_slots(buffer + kernel_data.film.pass_cryptomatte + pass_offset, 2 * (kernel_data.film.cryptomatte_depth)); + } + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/render/coverage.h b/intern/cycles/render/coverage.h new file mode 100644 index 00000000000..16176ce4beb --- /dev/null +++ b/intern/cycles/render/coverage.h @@ -0,0 +1,49 @@ +/* + * Copyright 2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "render/buffers.h" +#include "kernel/kernel_compat_cpu.h" +#include "kernel/split/kernel_split_data.h" +#include "kernel/kernel_globals.h" +#include "util/util_map.h" +#include "util/util_vector.h" + +#ifndef __COVERAGE_H__ +#define __COVERAGE_H__ + +CCL_NAMESPACE_BEGIN + +class Coverage { +public: + Coverage(KernelGlobals *kg_, RenderTile &tile_) : kg(kg_), tile(tile_) { } + void init_path_trace(); + void init_pixel(int x, int y); + void finalize(); +private: + vectorcoverage_object; + vectorcoverage_material; + vectorcoverage_asset; + KernelGlobals *kg; + RenderTile &tile; + void finalize_buffer(vector&coverage, const int pass_offset); + void flatten_buffer(vector&coverage, const int pass_offset); + void sort_buffer(const int pass_offset); +}; + + +CCL_NAMESPACE_END + +#endif /* __COVERAGE_H__ */ diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp index 8f3596ade58..d0f15496e50 100644 --- a/intern/cycles/render/film.cpp +++ b/intern/cycles/render/film.cpp @@ -38,11 +38,14 @@ static bool compare_pass_order(const Pass& a, const Pass& b) return (a.components > b.components); } -void Pass::add(PassType type, array& passes) +void Pass::add(PassType type, vector& passes, const char *name) { - for(size_t i = 0; i < passes.size(); i++) - if(passes[i].type == type) + for(size_t i = 0; i < passes.size(); i++) { + if(passes[i].type == type && + (name ? (passes[i].name == name) : passes[i].name.empty())) { return; + } + } Pass pass; @@ -50,6 +53,9 @@ void Pass::add(PassType type, array& passes) pass.filter = true; pass.exposure = false; pass.divide_type = PASS_NONE; + if(name) { + pass.name = name; + } switch(type) { case PASS_NONE: @@ -155,13 +161,15 @@ void Pass::add(PassType type, array& passes) pass.components = 4; pass.exposure = true; break; - + case PASS_CRYPTOMATTE: + pass.components = 4; + break; default: assert(false); break; } - passes.push_back_slow(pass); + passes.push_back(pass); /* order from by components, to ensure alignment so passes with size 4 * come first and then passes with size 1 */ @@ -171,19 +179,19 @@ void Pass::add(PassType type, array& passes) Pass::add(pass.divide_type, passes); } -bool Pass::equals(const array& A, const array& B) +bool Pass::equals(const vector& A, const vector& B) { if(A.size() != B.size()) return false; for(int i = 0; i < A.size(); i++) - if(A[i].type != B[i].type) + if(A[i].type != B[i].type || A[i].name != B[i].name) return false; return true; } -bool Pass::contains(const array& passes, PassType type) +bool Pass::contains(const vector& passes, PassType type) { for(size_t i = 0; i < passes.size(); i++) if(passes[i].type == type) @@ -290,6 +298,7 @@ Film::Film() use_light_visibility = false; filter_table_offset = TABLE_OFFSET_INVALID; + cryptomatte_passes = CRYPT_NONE; need_update = true; } @@ -314,6 +323,8 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->pass_stride = 0; kfilm->use_light_pass = use_light_visibility || use_sample_clamp; + bool have_cryptomatte = false; + for(size_t i = 0; i < passes.size(); i++) { Pass& pass = passes[i]; @@ -434,7 +445,10 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) #endif case PASS_RENDER_TIME: break; - + case PASS_CRYPTOMATTE: + kfilm->pass_cryptomatte = have_cryptomatte ? min(kfilm->pass_cryptomatte, kfilm->pass_stride) : kfilm->pass_stride; + have_cryptomatte = true; + break; default: assert(false); break; @@ -471,6 +485,9 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->mist_inv_depth = (mist_depth > 0.0f)? 1.0f/mist_depth: 0.0f; kfilm->mist_falloff = mist_falloff; + kfilm->cryptomatte_passes = cryptomatte_passes; + kfilm->cryptomatte_depth = cryptomatte_depth; + pass_stride = kfilm->pass_stride; denoising_data_offset = kfilm->pass_denoising_data; denoising_clean_offset = kfilm->pass_denoising_clean; @@ -490,7 +507,7 @@ bool Film::modified(const Film& film) return !Node::equals(film) || !Pass::equals(passes, film.passes); } -void Film::tag_passes_update(Scene *scene, const array& passes_) +void Film::tag_passes_update(Scene *scene, const vector& passes_) { if(Pass::contains(passes, PASS_UV) != Pass::contains(passes_, PASS_UV)) { scene->mesh_manager->tag_update(scene); diff --git a/intern/cycles/render/film.h b/intern/cycles/render/film.h index 6ab2eea79b8..57f1bf4eb64 100644 --- a/intern/cycles/render/film.h +++ b/intern/cycles/render/film.h @@ -45,10 +45,11 @@ public: bool filter; bool exposure; PassType divide_type; + string name; - static void add(PassType type, array& passes); - static bool equals(const array& A, const array& B); - static bool contains(const array& passes, PassType); + static void add(PassType type, vector& passes, const char* name = NULL); + static bool equals(const vector& A, const vector& B); + static bool contains(const vector& passes, PassType); }; class Film : public Node { @@ -56,7 +57,7 @@ public: NODE_DECLARE float exposure; - array passes; + vector passes; bool denoising_data_pass; bool denoising_clean_pass; int denoising_flags; @@ -76,6 +77,8 @@ public: bool use_light_visibility; bool use_sample_clamp; + CryptomatteType cryptomatte_passes; + int cryptomatte_depth; bool need_update; @@ -86,7 +89,7 @@ public: void device_free(Device *device, DeviceScene *dscene, Scene *scene); bool modified(const Film& film); - void tag_passes_update(Scene *scene, const array& passes_); + void tag_passes_update(Scene *scene, const vector& passes_); void tag_update(Scene *scene); }; diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index e3f35c366d6..a56a8a6ec58 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -28,6 +28,7 @@ #include "util/util_map.h" #include "util/util_progress.h" #include "util/util_vector.h" +#include "util/util_murmurhash.h" #include "subd/subd_patch_table.h" @@ -483,6 +484,10 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s kobject.numverts = mesh->verts.size(); kobject.patch_map_offset = 0; kobject.attribute_map_offset = 0; + uint32_t hash_name = util_murmur_hash3(ob->name.c_str(), ob->name.length(), 0); + uint32_t hash_asset = util_murmur_hash3(ob->asset_name.c_str(), ob->asset_name.length(), 0); + kobject.cryptomatte_object = util_hash_to_float(hash_name); + kobject.cryptomatte_asset = util_hash_to_float(hash_asset); /* Object flag. */ if(ob->use_holdout) { diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h index b80c4aef70b..bd44b35aba3 100644 --- a/intern/cycles/render/object.h +++ b/intern/cycles/render/object.h @@ -48,6 +48,7 @@ public: BoundBox bounds; uint random_id; int pass_id; + ustring asset_name; vector attributes; uint visibility; array motion; diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index e428e174712..13075a1ee2c 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -30,6 +30,7 @@ #include "render/tables.h" #include "util/util_foreach.h" +#include "util/util_murmurhash.h" #ifdef WITH_OCIO # include @@ -524,12 +525,15 @@ void ShaderManager::device_update_common(Device *device, if(shader->is_constant_emission(&constant_emission)) flag |= SD_HAS_CONSTANT_EMISSION; + uint32_t cryptomatte_id = util_murmur_hash3(shader->name.c_str(), shader->name.length(), 0); + /* regular shader */ kshader->flags = flag; kshader->pass_id = shader->pass_id; kshader->constant_emission[0] = constant_emission.x; kshader->constant_emission[1] = constant_emission.y; kshader->constant_emission[2] = constant_emission.z; + kshader->cryptomatte_id = util_hash_to_float(cryptomatte_id); kshader++; has_transparent_shadow |= (flag & SD_HAS_TRANSPARENT_SHADOW) != 0; diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 291f9a9fcae..4f623c5dfb7 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -15,6 +15,7 @@ set(SRC util_logging.cpp util_math_cdf.cpp util_md5.cpp + util_murmurhash.cpp util_path.cpp util_string.cpp util_simd.cpp @@ -64,6 +65,7 @@ set(SRC_HEADERS util_math_int4.h util_math_matrix.h util_md5.h + util_murmurhash.h util_opengl.h util_optimization.h util_param.h diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h index f3c7ae546a0..e17e99d0acd 100644 --- a/intern/cycles/util/util_atomic.h +++ b/intern/cycles/util/util_atomic.h @@ -23,6 +23,7 @@ #include "atomic_ops.h" #define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x)) +#define atomic_compare_and_swap_float(p, old_val, new_val) atomic_cas_float((p), (old_val), (new_val)) #define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) #define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1) @@ -57,6 +58,20 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *so return new_value.float_value; } +ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float *dest, + const float old_val, const float new_val) +{ + union { + unsigned int int_value; + float float_value; + } new_value, prev_value, result; + prev_value.float_value = old_val; + new_value.float_value = new_val; + result.int_value = atomic_cmpxchg((volatile ccl_global unsigned int *)dest, + prev_value.int_value, new_value.int_value); + return result.float_value; +} + #define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x)) #define atomic_fetch_and_inc_uint32(p) atomic_inc((p)) #define atomic_fetch_and_dec_uint32(p) atomic_dec((p)) @@ -75,6 +90,19 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *so #define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) #define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1) +ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest, + const float old_val, const float new_val) +{ + union { + unsigned int int_value; + float float_value; + } new_value, prev_value, result; + prev_value.float_value = old_val; + new_value.float_value = new_val; + result.int_value = atomicCAS((unsigned int *)dest, prev_value.int_value,new_value.int_value); + return result.float_value; +} + #define CCL_LOCAL_MEM_FENCE #define ccl_barrier(flags) __syncthreads() diff --git a/intern/cycles/util/util_murmurhash.cpp b/intern/cycles/util/util_murmurhash.cpp new file mode 100644 index 00000000000..68b2f2031be --- /dev/null +++ b/intern/cycles/util/util_murmurhash.cpp @@ -0,0 +1,127 @@ +/* + * Copyright 2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is taken from alShaders/Cryptomatte/MurmurHash3.h: + * + * MurmurHash3 was written by Austin Appleby, and is placed in the public + * domain. The author hereby disclaims copyright to this source code. + * + */ + +#include +#include + +#include "util/util_algorithm.h" +#include "util/util_murmurhash.h" + +#if defined(_MSC_VER) +# define ROTL32(x,y) _rotl(x,y) +# define ROTL64(x,y) _rotl64(x,y) +# define BIG_CONSTANT(x) (x) +#else +ccl_device_inline uint32_t rotl32(uint32_t x, int8_t r) +{ + return (x << r) | (x >> (32 - r)); +} +# define ROTL32(x,y) rotl32(x,y) +# define BIG_CONSTANT(x) (x##LLU) +#endif + +CCL_NAMESPACE_BEGIN + +/* Block read - if your platform needs to do endian-swapping or can only + * handle aligned reads, do the conversion here. */ +ccl_device_inline uint32_t mm_hash_getblock32(const uint32_t *p, int i) +{ + return p[i]; +} + +/* Finalization mix - force all bits of a hash block to avalanche */ +ccl_device_inline uint32_t mm_hash_fmix32 ( uint32_t h ) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} + +uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); + + for(int i = -nblocks; i; i++) { + uint32_t k1 = mm_hash_getblock32(blocks,i); + + k1 *= c1; + k1 = ROTL32(k1,15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1,13); + h1 = h1 * 5 + 0xe6546b64; + } + + const uint8_t *tail = (const uint8_t*)(data + nblocks*4); + + uint32_t k1 = 0; + + switch(len & 3) { + case 3: + k1 ^= tail[2] << 16; + ATTR_FALLTHROUGH; + case 2: + k1 ^= tail[1] << 8; + ATTR_FALLTHROUGH; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = ROTL32(k1,15); + k1 *= c2; + h1 ^= k1; + } + + h1 ^= len; + h1 = mm_hash_fmix32(h1); + return h1; +} + +/* This is taken from the cryptomatte specification 1.0 */ +float util_hash_to_float(uint32_t hash) +{ + uint32_t mantissa = hash & (( 1 << 23) - 1); + uint32_t exponent = (hash >> 23) & ((1 << 8) - 1); + exponent = max(exponent, (uint32_t) 1); + exponent = min(exponent, (uint32_t) 254); + exponent = exponent << 23; + uint32_t sign = (hash >> 31); + sign = sign << 31; + uint32_t float_bits = sign | exponent | mantissa; + float f; + memcpy(&f, &float_bits, sizeof(uint32_t)); + return f; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_murmurhash.h b/intern/cycles/util/util_murmurhash.h new file mode 100644 index 00000000000..824ed59cb16 --- /dev/null +++ b/intern/cycles/util/util_murmurhash.h @@ -0,0 +1,30 @@ +/* + * Copyright 2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef __UTIL_MURMURHASH_H__ +#define __UTIL_MURMURHASH_H__ + +#include "util/util_types.h" + +CCL_NAMESPACE_BEGIN + +uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed); +float util_hash_to_float(uint32_t hash); + +CCL_NAMESPACE_END + +#endif /* __UTIL_MURMURHASH_H__ */ diff --git a/release/scripts/addons b/release/scripts/addons index 2e14c2aa697..6c3a46dc113 160000 --- a/release/scripts/addons +++ b/release/scripts/addons @@ -1 +1 @@ -Subproject commit 2e14c2aa69726b472f8758f62ad839eddaf63bfe +Subproject commit 6c3a46dc113de870a03191e4c0685238b0823acd diff --git a/release/scripts/addons_contrib b/release/scripts/addons_contrib index 311b03bd2ce..15b25a42783 160000 --- a/release/scripts/addons_contrib +++ b/release/scripts/addons_contrib @@ -1 +1 @@ -Subproject commit 311b03bd2ce4c5b3c3fc5e2b58a4ee1a629ea6a9 +Subproject commit 15b25a42783d1e516b5298d70b582fae2559ae17 diff --git a/source/blender/editors/space_file/fsmenu.c b/source/blender/editors/space_file/fsmenu.c index 931e8627b71..768bdba3520 100644 --- a/source/blender/editors/space_file/fsmenu.c +++ b/source/blender/editors/space_file/fsmenu.c @@ -603,6 +603,8 @@ void fsmenu_read_system(struct FSMenu *fsmenu, int read_bookmarks) /* not sure if this is right, but seems to give the relevant mnts */ if (!STREQLEN(mnt->mnt_fsname, "/dev", 4)) continue; + if (STREQLEN(mnt->mnt_fsname, "/dev/loop", 9)) + continue; len = strlen(mnt->mnt_dir); if (len && mnt->mnt_dir[len - 1] != '/') { diff --git a/source/tools b/source/tools index 5162393c104..11656ebaf7f 160000 --- a/source/tools +++ b/source/tools @@ -1 +1 @@ -Subproject commit 5162393c104d6d5f0314183a084875fff68f28bb +Subproject commit 11656ebaf7f912cdb1b5eb39c5d0a3b5d492c1aa