Realtime Compositor: Implement Keying node

This patch implements the Keying node for the realtime compositor. To
ease the implementation, some morphological operators were moved into
algorithms and a mechanism to steal data between results was added to
the Result class.

Pull Request: https://projects.blender.org/blender/blender/pulls/108393
This commit is contained in:
Omar Emara 2023-06-24 13:02:33 +02:00 committed by Omar Emara
parent ec428c3f7f
commit c9e6399fe1
20 changed files with 781 additions and 129 deletions

@ -68,10 +68,14 @@ set(SRC
COM_texture_pool.hh
COM_utilities.hh
algorithms/intern/algorithm_parallel_reduction.cc
algorithms/intern/morphological_distance.cc
algorithms/intern/morphological_distance_feather.cc
algorithms/intern/parallel_reduction.cc
algorithms/intern/smaa.cc
algorithms/intern/symmetric_separable_blur.cc
algorithms/COM_algorithm_morphological_distance.hh
algorithms/COM_algorithm_morphological_distance_feather.hh
algorithms/COM_algorithm_parallel_reduction.hh
algorithms/COM_algorithm_smaa.hh
algorithms/COM_algorithm_symmetric_separable_blur.hh
@ -135,6 +139,11 @@ set(GLSL_SRC
shaders/compositor_glare_streaks_filter.glsl
shaders/compositor_id_mask.glsl
shaders/compositor_image_crop.glsl
shaders/compositor_keying_compute_image.glsl
shaders/compositor_keying_compute_matte.glsl
shaders/compositor_keying_extract_chroma.glsl
shaders/compositor_keying_replace_chroma.glsl
shaders/compositor_keying_tweak_matte.glsl
shaders/compositor_map_uv.glsl
shaders/compositor_morphological_distance.glsl
shaders/compositor_morphological_distance_feather.glsl
@ -239,6 +248,7 @@ set(SRC_SHADER_CREATE_INFOS
shaders/infos/compositor_glare_info.hh
shaders/infos/compositor_id_mask_info.hh
shaders/infos/compositor_image_crop_info.hh
shaders/infos/compositor_keying_info.hh
shaders/infos/compositor_map_uv_info.hh
shaders/infos/compositor_morphological_distance_feather_info.hh
shaders/infos/compositor_morphological_distance_info.hh

@ -164,6 +164,20 @@ class Result {
* the discussion above for more information. */
void pass_through(Result &target);
/* Steal the allocated data from the given source result and assign it to this result, then
* remove any references to the data from the source result. It is assumed that:
*
* - Both results are of the same type.
* - This result is not allocated but the source result is allocated.
* - Neither of the results is a proxy one, that is, has a master result.
*
* This is different from proxy results and the pass_through mechanism in that it can be used on
* temporary results. This is most useful in multi-step compositor operations where some steps
* can be optional, in that case, intermediate results can be temporary results that can
* eventually be stolen by the actual output of the operation. See the uses of the method for
* a practical example of use. */
void steal_data(Result &source);
/* Transform the result by the given transformation. This effectively pre-multiply the given
* transformation by the current transformation of the domain of the result. */
void transform(const float3x3 &transformation);

@ -0,0 +1,18 @@
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "COM_context.hh"
#include "COM_result.hh"
namespace blender::realtime_compositor {
/* Dilate or erode the given input using a morphological operator with a circular structuring
* element of radius equivalent to the absolute value of the given distance parameter. A positive
* distance corresponds to dilate operator, while a negative distance corresponds to an erode
* operator. */
void morphological_distance(Context &context, Result &input, Result &output, int distance);
} // namespace blender::realtime_compositor

@ -0,0 +1,22 @@
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "DNA_scene_types.h"
#include "COM_context.hh"
#include "COM_result.hh"
namespace blender::realtime_compositor {
/* Dilate or erode the given input using a morphological inverse distance operation evaluated at
* the given falloff. The radius of the structuring element is equivalent to the absolute value of
* the given distance parameter. A positive distance corresponds to a dilate operator, while a
* negative distance corresponds to an erode operator. See the implementation and shader for more
* information. */
void morphological_distance_feather(
Context &context, Result &input, Result &output, int distance, int falloff_type = PROP_SMOOTH);
} // namespace blender::realtime_compositor

@ -6,6 +6,8 @@
#include "BLI_math_vector_types.hh"
#include "DNA_scene_types.h"
#include "COM_context.hh"
#include "COM_result.hh"
@ -22,8 +24,8 @@ void symmetric_separable_blur(Context &context,
Result &input,
Result &output,
float2 radius,
int filter_type,
bool extend_bounds,
bool gamma_correct);
int filter_type = R_FILTER_GAUSS,
bool extend_bounds = false,
bool gamma_correct = false);
} // namespace blender::realtime_compositor

@ -0,0 +1,46 @@
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_math_base.hh"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "COM_context.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
#include "COM_algorithm_morphological_distance.hh"
namespace blender::realtime_compositor {
static const char *get_shader_name(int distance)
{
if (distance > 0) {
return "compositor_morphological_distance_dilate";
}
return "compositor_morphological_distance_erode";
}
void morphological_distance(Context &context, Result &input, Result &output, int distance)
{
GPUShader *shader = context.shader_manager().get(get_shader_name(distance));
GPU_shader_bind(shader);
/* Pass the absolute value of the distance. We have specialized shaders for each sign. */
GPU_shader_uniform_1i(shader, "radius", math::abs(distance));
input.bind_as_texture(shader, "input_tx");
output.allocate_texture(input.domain());
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, input.domain().size);
GPU_shader_unbind();
output.unbind_as_image();
input.unbind_as_texture();
}
} // namespace blender::realtime_compositor

@ -0,0 +1,107 @@
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_math_base.hh"
#include "BLI_math_vector_types.hh"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "COM_algorithm_symmetric_separable_blur.hh"
#include "COM_context.hh"
#include "COM_morphological_distance_feather_weights.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
namespace blender::realtime_compositor {
const char *get_shader_name(int distance)
{
if (distance > 0) {
return "compositor_morphological_distance_feather_dilate";
}
return "compositor_morphological_distance_feather_erode";
}
static Result horizontal_pass(Context &context, Result &input, int distance, int falloff_type)
{
GPUShader *shader = context.shader_manager().get(get_shader_name(distance));
GPU_shader_bind(shader);
input.bind_as_texture(shader, "input_tx");
const MorphologicalDistanceFeatherWeights &weights =
context.cache_manager().morphological_distance_feather_weights.get(falloff_type,
math::abs(distance));
weights.bind_weights_as_texture(shader, "weights_tx");
weights.bind_distance_falloffs_as_texture(shader, "falloffs_tx");
/* We allocate an output image of a transposed size, that is, with a height equivalent to the
* width of the input and vice versa. This is done as a performance optimization. The shader
* will process the image horizontally and write it to the intermediate output transposed. Then
* the vertical pass will execute the same horizontal pass shader, but since its input is
* transposed, it will effectively do a vertical pass and write to the output transposed,
* effectively undoing the transposition in the horizontal pass. This is done to improve
* spatial cache locality in the shader and to avoid having two separate shaders for each of
* the passes. */
const Domain domain = input.domain();
const int2 transposed_domain = int2(domain.size.y, domain.size.x);
Result output = Result::Temporary(ResultType::Float, context.texture_pool());
output.allocate_texture(transposed_domain);
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, domain.size);
GPU_shader_unbind();
input.unbind_as_texture();
weights.unbind_weights_as_texture();
weights.unbind_distance_falloffs_as_texture();
output.unbind_as_image();
return output;
}
static void vertical_pass(Context &context,
Result &original_input,
Result &horizontal_pass_result,
Result &output,
int distance,
int falloff_type)
{
GPUShader *shader = context.shader_manager().get(get_shader_name(distance));
GPU_shader_bind(shader);
horizontal_pass_result.bind_as_texture(shader, "input_tx");
const MorphologicalDistanceFeatherWeights &weights =
context.cache_manager().morphological_distance_feather_weights.get(falloff_type,
math::abs(distance));
weights.bind_weights_as_texture(shader, "weights_tx");
weights.bind_distance_falloffs_as_texture(shader, "falloffs_tx");
const Domain domain = original_input.domain();
output.allocate_texture(domain);
output.bind_as_image(shader, "output_img");
/* Notice that the domain is transposed, see the note on the horizontal pass function for more
* information on the reasoning behind this. */
compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
GPU_shader_unbind();
horizontal_pass_result.unbind_as_texture();
weights.unbind_weights_as_texture();
weights.unbind_distance_falloffs_as_texture();
output.unbind_as_image();
}
void morphological_distance_feather(
Context &context, Result &input, Result &output, int distance, int falloff_type)
{
Result horizontal_pass_result = horizontal_pass(context, input, distance, falloff_type);
vertical_pass(context, input, horizontal_pass_result, output, distance, falloff_type);
horizontal_pass_result.release();
}
} // namespace blender::realtime_compositor

@ -10,6 +10,7 @@
#include "GPU_texture.h"
#include "COM_context.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
#include "COM_algorithm_symmetric_separable_blur.hh"
@ -18,6 +19,15 @@
namespace blender::realtime_compositor {
static const char *get_blur_shader(ResultType type)
{
if (type == ResultType::Float) {
return "compositor_symmetric_separable_blur_float";
}
return "compositor_symmetric_separable_blur_color";
}
static Result horizontal_pass(Context &context,
Result &input,
float radius,
@ -25,7 +35,7 @@ static Result horizontal_pass(Context &context,
bool extend_bounds,
bool gamma_correct)
{
GPUShader *shader = context.shader_manager().get("compositor_symmetric_separable_blur");
GPUShader *shader = context.shader_manager().get(get_blur_shader(input.type()));
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "extend_bounds", extend_bounds);
@ -53,7 +63,7 @@ static Result horizontal_pass(Context &context,
* pass. */
const int2 transposed_domain = int2(domain.size.y, domain.size.x);
Result output = Result::Temporary(ResultType::Color, context.texture_pool());
Result output = Result::Temporary(input.type(), context.texture_pool());
output.allocate_texture(transposed_domain);
output.bind_as_image(shader, "output_img");
@ -76,7 +86,7 @@ static void vertical_pass(Context &context,
bool extend_bounds,
bool gamma_correct)
{
GPUShader *shader = context.shader_manager().get("compositor_symmetric_separable_blur");
GPUShader *shader = context.shader_manager().get(get_blur_shader(original_input.type()));
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "extend_bounds", extend_bounds);

@ -2,6 +2,7 @@
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_assert.h"
#include "BLI_math_matrix_types.hh"
#include "BLI_math_vector_types.hh"
@ -133,6 +134,33 @@ void Result::pass_through(Result &target)
target.master_ = this;
}
void Result::steal_data(Result &source)
{
BLI_assert(type_ == source.type_);
BLI_assert(!is_allocated() && source.is_allocated());
BLI_assert(master_ == nullptr && source.master_ == nullptr);
is_single_value_ = source.is_single_value_;
texture_ = source.texture_;
texture_pool_ = source.texture_pool_;
domain_ = source.domain_;
switch (type_) {
case ResultType::Float:
float_value_ = source.float_value_;
break;
case ResultType::Vector:
vector_value_ = source.vector_value_;
break;
case ResultType::Color:
color_value_ = source.color_value_;
break;
}
source.texture_ = nullptr;
source.texture_pool_ = nullptr;
}
void Result::transform(const float3x3 &transformation)
{
domain_.transform(transformation);
@ -235,6 +263,7 @@ void Result::release()
reference_count_--;
if (reference_count_ == 0) {
texture_pool_->release(texture_);
texture_ = nullptr;
}
}

@ -0,0 +1,21 @@
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 key = texture_load(key_tx, texel);
vec4 color = texture_load(input_tx, texel);
float matte = texture_load(matte_tx, texel).x;
/* Alpha multiply the matte to the image. */
color *= matte;
/* Color despill. */
ivec3 key_argmax = argmax(key.rgb);
float weighted_average = mix(color[key_argmax.y], color[key_argmax.z], despill_balance);
color[key_argmax.x] -= (color[key_argmax.x] - weighted_average) * despill_factor;
imageStore(output_img, texel, color);
}

@ -0,0 +1,32 @@
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
float compute_saturation(vec4 color, ivec3 argmax)
{
float weighted_average = mix(color[argmax.y], color[argmax.z], key_balance);
return (color[argmax.x] - weighted_average) * abs(1.0 - weighted_average);
}
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 input_color = texture_load(input_tx, texel);
/* We assume that the keying screen will not be overexposed in the image, so if the input
* brightness is high, we assume the pixel is opaque. */
if (min_v3(input_color) > 1.0f) {
imageStore(output_img, texel, vec4(1.0));
return;
}
vec4 key_color = texture_load(key_tx, texel);
ivec3 key_argmax = argmax(key_color.rgb);
float input_saturation = compute_saturation(input_color, key_argmax);
float key_saturation = compute_saturation(key_color, key_argmax);
float matte = 1.0f - clamp(input_saturation / key_saturation, 0.0, 1.0);
imageStore(output_img, texel, vec4(matte));
}

@ -0,0 +1,12 @@
#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 color_ycca;
rgba_to_ycca_itu_709(texture_load(input_tx, texel), color_ycca);
imageStore(output_img, texel, color_ycca);
}

@ -0,0 +1,17 @@
#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 color_ycca;
rgba_to_ycca_itu_709(texture_load(input_tx, texel), color_ycca);
color_ycca.yz = texture_load(new_chroma_tx, texel).yz;
vec4 color_rgba;
ycca_to_rgba_itu_709(color_ycca, color_rgba);
imageStore(output_img, texel, color_rgba);
}

@ -0,0 +1,54 @@
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
float matte = texture_load(input_matte_tx, texel).x;
/* Search the neighbourhood around the current matte value and identify if it lies along the
* edges of the matte. This is needs to be computed only when we need to compute the edges output
* or tweak the levels of the matte. */
bool is_edge = false;
if (compute_edges || black_level != 0.0 || white_level != 1.0) {
/* Count the number of neighbours whose matte is sufficiently similar to the current matte,
* as controlled by the edge_tolerance factor. */
int count = 0;
for (int j = -edge_search_radius; j <= edge_search_radius; j++) {
for (int i = -edge_search_radius; i <= edge_search_radius; i++) {
float neighbour_matte = texture_load(input_matte_tx, texel + ivec2(i, j)).x;
count += int(distance(matte, neighbour_matte) < edge_tolerance);
}
}
/* If the number of neighbours that are sufficiently similar to the center matte is less that
* 90% of the total number of neighbours, then that means the variance is high in that areas
* and it is considered an edge. */
is_edge = count < ((edge_search_radius * 2 + 1) * (edge_search_radius * 2 + 1)) * 0.9;
}
float tweaked_matte = matte;
/* Remap the matte using the black and white levels, but only for areas that are not on the edge
* of the matte to preserve details. Also check for equality between levels to avoid zero
* division. */
if (!is_edge && white_level != black_level) {
tweaked_matte = clamp((matte - black_level) / (white_level - black_level), 0.0, 1.0);
}
/* Exclude unwanted areas using the provided garbage matte, 1 means unwanted, so invert the
* garbage matte and take the minimum. */
if (apply_garbage_matte) {
float garbage_matte = texture_load(garbage_matte_tx, texel).x;
tweaked_matte = min(tweaked_matte, 1.0 - garbage_matte);
}
/* Include wanted areas that were incorrectly keyed using the provided core matte. */
if (apply_core_matte) {
float core_matte = texture_load(core_matte_tx, texel).x;
tweaked_matte = max(tweaked_matte, core_matte);
}
imageStore(output_matte_img, texel, vec4(tweaked_matte));
imageStore(output_edges_img, texel, vec4(is_edge ? 1.0 : 0.0));
}

@ -0,0 +1,57 @@
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_keying_extract_chroma)
.local_group_size(16, 16)
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_keying_extract_chroma.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_keying_replace_chroma)
.local_group_size(16, 16)
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_2D, "new_chroma_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_keying_replace_chroma.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_keying_compute_matte)
.local_group_size(16, 16)
.push_constant(Type::FLOAT, "key_balance")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_2D, "key_tx")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_keying_compute_matte.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_keying_tweak_matte)
.local_group_size(16, 16)
.push_constant(Type::BOOL, "compute_edges")
.push_constant(Type::BOOL, "apply_core_matte")
.push_constant(Type::BOOL, "apply_garbage_matte")
.push_constant(Type::INT, "edge_search_radius")
.push_constant(Type::FLOAT, "edge_tolerance")
.push_constant(Type::FLOAT, "black_level")
.push_constant(Type::FLOAT, "white_level")
.sampler(0, ImageType::FLOAT_2D, "input_matte_tx")
.sampler(1, ImageType::FLOAT_2D, "garbage_matte_tx")
.sampler(2, ImageType::FLOAT_2D, "core_matte_tx")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_matte_img")
.image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_edges_img")
.compute_source("compositor_keying_tweak_matte.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_keying_compute_image)
.local_group_size(16, 16)
.push_constant(Type::FLOAT, "despill_factor")
.push_constant(Type::FLOAT, "despill_balance")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_2D, "matte_tx")
.sampler(2, ImageType::FLOAT_2D, "key_tx")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_keying_compute_image.glsl")
.do_static_compilation(true);

@ -4,13 +4,21 @@
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur)
GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_shared)
.local_group_size(16, 16)
.push_constant(Type::BOOL, "extend_bounds")
.push_constant(Type::BOOL, "gamma_correct_input")
.push_constant(Type::BOOL, "gamma_uncorrect_output")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_1D, "weights_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_symmetric_separable_blur.glsl")
.compute_source("compositor_symmetric_separable_blur.glsl");
GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_float)
.additional_info("compositor_symmetric_separable_blur_shared")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_color)
.additional_info("compositor_symmetric_separable_blur_shared")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.do_static_compilation(true);

@ -54,6 +54,20 @@ mat2 rot2_from_angle(float a)
return mat2(c, -s, s, c);
}
/* Computes the full argmax of the given vector, that is, the index of the greatest component will
* be in the returned x component, the index of the smallest component will be in the returned z
* component, and the index of the middle component will be in the returned y component.
*
* This is computed by utilizing the fact that booleans are converted to the integers 0 and 1 for
* false and true respectively. So if we compare every component to all other components using the
* greaterThan comparator, we get 0 for the greatest component, because no other component is
* greater, 1 for the middle component, and 2 for the smallest component. */
ivec3 argmax(vec3 v)
{
return ivec3(greaterThan(v, v.xxx)) + ivec3(greaterThan(v, v.yyy)) +
ivec3(greaterThan(v, v.zzz));
}
#define min3(a, b, c) min(a, min(b, c))
#define min4(a, b, c, d) min(a, min3(b, c, d))
#define min5(a, b, c, d, e) min(a, min4(b, c, d, e))

@ -19,7 +19,8 @@
#include "GPU_state.h"
#include "GPU_texture.h"
#include "COM_morphological_distance_feather_weights.hh"
#include "COM_algorithm_morphological_distance.hh"
#include "COM_algorithm_morphological_distance_feather.hh"
#include "COM_node_operation.hh"
#include "COM_utilities.hh"
@ -175,33 +176,7 @@ class DilateErodeOperation : public NodeOperation {
void execute_distance()
{
GPUShader *shader = shader_manager().get(get_morphological_distance_shader_name());
GPU_shader_bind(shader);
/* Pass the absolute value of the distance. We have specialized shaders for each sign. */
GPU_shader_uniform_1i(shader, "radius", math::abs(get_distance()));
const Result &input_mask = get_input("Mask");
input_mask.bind_as_texture(shader, "input_tx");
const Domain domain = compute_domain();
Result &output_mask = get_result("Mask");
output_mask.allocate_texture(domain);
output_mask.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, domain.size);
GPU_shader_unbind();
output_mask.unbind_as_image();
input_mask.unbind_as_texture();
}
const char *get_morphological_distance_shader_name()
{
if (get_distance() > 0) {
return "compositor_morphological_distance_dilate";
}
return "compositor_morphological_distance_erode";
morphological_distance(context(), get_input("Mask"), get_result("Mask"), get_distance());
}
/* ------------------------------------------
@ -244,87 +219,11 @@ class DilateErodeOperation : public NodeOperation {
void execute_distance_feather()
{
GPUTexture *horizontal_pass_result = execute_distance_feather_horizontal_pass();
execute_distance_feather_vertical_pass(horizontal_pass_result);
}
GPUTexture *execute_distance_feather_horizontal_pass()
{
GPUShader *shader = shader_manager().get(get_morphological_distance_feather_shader_name());
GPU_shader_bind(shader);
const Result &input_image = get_input("Mask");
input_image.bind_as_texture(shader, "input_tx");
const MorphologicalDistanceFeatherWeights &weights =
context().cache_manager().morphological_distance_feather_weights.get(
node_storage(bnode()).falloff, math::abs(get_distance()));
weights.bind_weights_as_texture(shader, "weights_tx");
weights.bind_distance_falloffs_as_texture(shader, "falloffs_tx");
/* We allocate an output image of a transposed size, that is, with a height equivalent to the
* width of the input and vice versa. This is done as a performance optimization. The shader
* will process the image horizontally and write it to the intermediate output transposed. Then
* the vertical pass will execute the same horizontal pass shader, but since its input is
* transposed, it will effectively do a vertical pass and write to the output transposed,
* effectively undoing the transposition in the horizontal pass. This is done to improve
* spatial cache locality in the shader and to avoid having two separate shaders for each of
* the passes. */
const Domain domain = compute_domain();
const int2 transposed_domain = int2(domain.size.y, domain.size.x);
GPUTexture *horizontal_pass_result = texture_pool().acquire_color(transposed_domain);
const int image_unit = GPU_shader_get_sampler_binding(shader, "output_img");
GPU_texture_image_bind(horizontal_pass_result, image_unit);
compute_dispatch_threads_at_least(shader, domain.size);
GPU_shader_unbind();
input_image.unbind_as_texture();
weights.unbind_weights_as_texture();
weights.unbind_distance_falloffs_as_texture();
GPU_texture_image_unbind(horizontal_pass_result);
return horizontal_pass_result;
}
void execute_distance_feather_vertical_pass(GPUTexture *horizontal_pass_result)
{
GPUShader *shader = shader_manager().get(get_morphological_distance_feather_shader_name());
GPU_shader_bind(shader);
GPU_memory_barrier(GPU_BARRIER_TEXTURE_FETCH);
const int texture_image_unit = GPU_shader_get_sampler_binding(shader, "input_tx");
GPU_texture_bind(horizontal_pass_result, texture_image_unit);
const MorphologicalDistanceFeatherWeights &weights =
context().cache_manager().morphological_distance_feather_weights.get(
node_storage(bnode()).falloff, math::abs(get_distance()));
weights.bind_weights_as_texture(shader, "weights_tx");
weights.bind_distance_falloffs_as_texture(shader, "falloffs_tx");
const Domain domain = compute_domain();
Result &output_image = get_result("Mask");
output_image.allocate_texture(domain);
output_image.bind_as_image(shader, "output_img");
/* Notice that the domain is transposed, see the note on the horizontal pass method for more
* information on the reasoning behind this. */
compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
GPU_shader_unbind();
output_image.unbind_as_image();
weights.unbind_weights_as_texture();
weights.unbind_distance_falloffs_as_texture();
GPU_texture_unbind(horizontal_pass_result);
}
const char *get_morphological_distance_feather_shader_name()
{
if (get_distance() > 0) {
return "compositor_morphological_distance_feather_dilate";
}
return "compositor_morphological_distance_feather_erode";
morphological_distance_feather(context(),
get_input("Mask"),
get_result("Mask"),
get_distance(),
node_storage(bnode()).falloff);
}
/* ---------------

@ -7,13 +7,22 @@
*/
#include "BLI_math_base.h"
#include "BLI_math_vector_types.hh"
#include "DNA_movieclip_types.h"
#include "DNA_scene_types.h"
#include "UI_interface.h"
#include "UI_resources.h"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "COM_algorithm_morphological_distance.hh"
#include "COM_algorithm_morphological_distance_feather.hh"
#include "COM_algorithm_symmetric_separable_blur.hh"
#include "COM_node_operation.hh"
#include "COM_utilities.hh"
#include "node_composite_util.hh"
@ -21,12 +30,18 @@
namespace blender::nodes::node_composite_keying_cc {
NODE_STORAGE_FUNCS(NodeKeyingData)
static void cmp_node_keying_declare(NodeDeclarationBuilder &b)
{
b.add_input<decl::Color>("Image").default_value({0.8f, 0.8f, 0.8f, 1.0f});
b.add_input<decl::Color>("Key Color").default_value({1.0f, 1.0f, 1.0f, 1.0f});
b.add_input<decl::Float>("Garbage Matte").hide_value();
b.add_input<decl::Float>("Core Matte").hide_value();
b.add_input<decl::Color>("Image")
.default_value({0.8f, 0.8f, 0.8f, 1.0f})
.compositor_domain_priority(0);
b.add_input<decl::Color>("Key Color")
.default_value({1.0f, 1.0f, 1.0f, 1.0f})
.compositor_domain_priority(1);
b.add_input<decl::Float>("Garbage Matte").hide_value().compositor_domain_priority(2);
b.add_input<decl::Float>("Core Matte").hide_value().compositor_domain_priority(3);
b.add_output<decl::Color>("Image");
b.add_output<decl::Float>("Matte");
b.add_output<decl::Float>("Edges");
@ -72,10 +87,277 @@ class KeyingOperation : public NodeOperation {
void execute() override
{
get_input("Image").pass_through(get_result("Image"));
get_result("Matte").allocate_invalid();
get_result("Edges").allocate_invalid();
context().set_info_message("Viewport compositor setup not fully supported");
Result blurred_input = compute_blurred_input();
Result matte = compute_matte(blurred_input);
blurred_input.release();
/* This also computes the edges output if needed. */
Result tweaked_matte = compute_tweaked_matte(matte);
matte.release();
Result &output_image = get_result("Image");
Result &output_matte = get_result("Matte");
if (output_image.should_compute() || output_matte.should_compute()) {
Result blurred_matte = compute_blurred_matte(tweaked_matte);
tweaked_matte.release();
Result morphed_matte = compute_morphed_matte(blurred_matte);
blurred_matte.release();
Result feathered_matte = compute_feathered_matte(morphed_matte);
morphed_matte.release();
if (output_image.should_compute()) {
compute_image(feathered_matte);
}
if (output_matte.should_compute()) {
output_matte.steal_data(feathered_matte);
}
else {
feathered_matte.release();
}
}
}
Result compute_blurred_input()
{
/* No blur needed, return the original matte. We also increment the reference count of the
* input because the caller will release it after the call, and we want to extend its life
* since it is now returned as the output. */
const float blur_size = node_storage(bnode()).blur_pre;
if (blur_size == 0.0f) {
Result output = get_input("Image");
output.increment_reference_count();
return output;
}
Result chroma = extract_input_chroma();
Result blurred_chroma = Result::Temporary(ResultType::Color, context().texture_pool());
symmetric_separable_blur(context(), chroma, blurred_chroma, float2(blur_size), R_FILTER_BOX);
chroma.release();
Result blurred_input = replace_input_chroma(blurred_chroma);
blurred_chroma.release();
return blurred_input;
}
Result extract_input_chroma()
{
GPUShader *shader = context().shader_manager().get("compositor_keying_extract_chroma");
GPU_shader_bind(shader);
Result &input = get_input("Image");
input.bind_as_texture(shader, "input_tx");
Result output = Result::Temporary(ResultType::Color, context().texture_pool());
output.allocate_texture(input.domain());
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, input.domain().size);
GPU_shader_unbind();
input.unbind_as_texture();
output.unbind_as_image();
return output;
}
Result replace_input_chroma(Result &new_chroma)
{
GPUShader *shader = context().shader_manager().get("compositor_keying_replace_chroma");
GPU_shader_bind(shader);
Result &input = get_input("Image");
input.bind_as_texture(shader, "input_tx");
new_chroma.bind_as_texture(shader, "new_chroma_tx");
Result output = Result::Temporary(ResultType::Color, context().texture_pool());
output.allocate_texture(input.domain());
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, input.domain().size);
GPU_shader_unbind();
input.unbind_as_texture();
new_chroma.unbind_as_texture();
output.unbind_as_image();
return output;
}
Result compute_matte(Result &input)
{
GPUShader *shader = context().shader_manager().get("compositor_keying_compute_matte");
GPU_shader_bind(shader);
GPU_shader_uniform_1f(shader, "key_balance", node_storage(bnode()).screen_balance);
input.bind_as_texture(shader, "input_tx");
Result &key_color = get_input("Key Color");
key_color.bind_as_texture(shader, "key_tx");
Result output = Result::Temporary(ResultType::Float, context().texture_pool());
output.allocate_texture(input.domain());
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, input.domain().size);
GPU_shader_unbind();
input.unbind_as_texture();
key_color.unbind_as_texture();
output.unbind_as_image();
return output;
}
Result compute_tweaked_matte(Result &input_matte)
{
Result &output_edges = get_result("Edges");
const float black_level = node_storage(bnode()).clip_black;
const float white_level = node_storage(bnode()).clip_white;
const bool core_matte_exists = node().input_by_identifier("Core Matte")->is_logically_linked();
const bool garbage_matte_exists =
node().input_by_identifier("Garbage Matte")->is_logically_linked();
/* The edges output is not needed and the matte is not tweaked, so return the original matte.
* We also increment the reference count of the input because the caller will release it after
* the call, and we want to extend its life since it is now returned as the output. */
if (!output_edges.should_compute() && (black_level == 0.0f && white_level == 1.0f) &&
!core_matte_exists && !garbage_matte_exists)
{
Result output_matte = input_matte;
input_matte.increment_reference_count();
return output_matte;
}
GPUShader *shader = context().shader_manager().get("compositor_keying_tweak_matte");
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "compute_edges", output_edges.should_compute());
GPU_shader_uniform_1b(shader, "apply_core_matte", core_matte_exists);
GPU_shader_uniform_1b(shader, "apply_garbage_matte", garbage_matte_exists);
GPU_shader_uniform_1i(shader, "edge_search_radius", node_storage(bnode()).edge_kernel_radius);
GPU_shader_uniform_1f(shader, "edge_tolerance", node_storage(bnode()).edge_kernel_tolerance);
GPU_shader_uniform_1f(shader, "black_level", black_level);
GPU_shader_uniform_1f(shader, "white_level", white_level);
input_matte.bind_as_texture(shader, "input_matte_tx");
Result &garbage_matte = get_input("Garbage Matte");
garbage_matte.bind_as_texture(shader, "garbage_matte_tx");
Result &core_matte = get_input("Core Matte");
core_matte.bind_as_texture(shader, "core_matte_tx");
Result output_matte = Result::Temporary(ResultType::Float, context().texture_pool());
output_matte.allocate_texture(input_matte.domain());
output_matte.bind_as_image(shader, "output_matte_img");
output_edges.allocate_texture(input_matte.domain());
output_edges.bind_as_image(shader, "output_edges_img");
compute_dispatch_threads_at_least(shader, input_matte.domain().size);
GPU_shader_unbind();
input_matte.unbind_as_texture();
garbage_matte.unbind_as_texture();
core_matte.unbind_as_texture();
output_matte.unbind_as_image();
output_edges.unbind_as_image();
return output_matte;
}
Result compute_blurred_matte(Result &input_matte)
{
const float blur_size = node_storage(bnode()).blur_post;
/* No blur needed, return the original matte. We also increment the reference count of the
* input because the caller will release it after the call, and we want to extend its life
* since it is now returned as the output. */
if (blur_size == 0.0f) {
Result output_matte = input_matte;
input_matte.increment_reference_count();
return output_matte;
}
Result blurred_matte = Result::Temporary(ResultType::Float, context().texture_pool());
symmetric_separable_blur(context(), input_matte, blurred_matte, float2(blur_size));
return blurred_matte;
}
Result compute_morphed_matte(Result &input_matte)
{
const int distance = node_storage(bnode()).dilate_distance;
/* No morphology needed, return the original matte. We also increment the reference count of
* the input because the caller will release it after the call, and we want to extend its life
* since it is now returned as the output. */
if (distance == 0) {
Result output_matte = input_matte;
input_matte.increment_reference_count();
return output_matte;
}
Result morphed_matte = Result::Temporary(ResultType::Float, context().texture_pool());
morphological_distance(context(), input_matte, morphed_matte, distance);
return morphed_matte;
}
Result compute_feathered_matte(Result &input_matte)
{
const int distance = node_storage(bnode()).feather_distance;
/* No feathering needed, return the original matte. We also increment the reference count of
* the input because the caller will release it after the call, and we want to extend its life
* since it is now returned as the output. */
if (distance == 0) {
Result output_matte = input_matte;
input_matte.increment_reference_count();
return output_matte;
}
Result feathered_matte = Result::Temporary(ResultType::Float, context().texture_pool());
morphological_distance_feather(
context(), input_matte, feathered_matte, distance, node_storage(bnode()).feather_falloff);
return feathered_matte;
}
void compute_image(Result &matte)
{
GPUShader *shader = context().shader_manager().get("compositor_keying_compute_image");
GPU_shader_bind(shader);
GPU_shader_uniform_1f(shader, "despill_factor", node_storage(bnode()).despill_factor);
GPU_shader_uniform_1f(shader, "despill_balance", node_storage(bnode()).despill_balance);
Result &input = get_input("Image");
input.bind_as_texture(shader, "input_tx");
Result &key = get_input("Key Color");
key.bind_as_texture(shader, "key_tx");
matte.bind_as_texture(shader, "matte_tx");
Result &output = get_result("Image");
output.allocate_texture(matte.domain());
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, input.domain().size);
GPU_shader_unbind();
input.unbind_as_texture();
key.unbind_as_texture();
matte.unbind_as_texture();
output.unbind_as_image();
}
};
@ -99,8 +381,6 @@ void register_node_type_cmp_keying()
node_type_storage(
&ntype, "NodeKeyingData", node_free_standard_storage, node_copy_standard_storage);
ntype.get_compositor_operation = file_ns::get_compositor_operation;
ntype.realtime_compositor_unsupported_message = N_(
"Node not supported in the Viewport compositor");
nodeRegisterType(&ntype);
}