2018-10-28 09:37:41 +00:00
|
|
|
/*
|
2019-05-01 11:14:11 +00:00
|
|
|
* Copyright 2018 Blender Foundation
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
2018-10-28 09:37:41 +00:00
|
|
|
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
|
2019-04-17 04:17:24 +00:00
|
|
|
ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
|
|
|
|
int num_slots,
|
|
|
|
float id,
|
|
|
|
float weight)
|
2018-10-28 09:37:41 +00:00
|
|
|
{
|
2019-04-17 04:17:24 +00:00
|
|
|
kernel_assert(id != ID_NONE);
|
|
|
|
if (weight == 0.0f) {
|
|
|
|
return;
|
|
|
|
}
|
2018-11-03 23:08:55 +00:00
|
|
|
|
2019-04-17 04:17:24 +00:00
|
|
|
for (int slot = 0; slot < num_slots; slot++) {
|
|
|
|
ccl_global float2 *id_buffer = (ccl_global float2 *)buffer;
|
2018-10-28 09:37:41 +00:00
|
|
|
#ifdef __ATOMIC_PASS_WRITE__
|
2019-04-17 04:17:24 +00:00
|
|
|
/* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
|
|
|
|
if (id_buffer[slot].x == ID_NONE) {
|
|
|
|
/* Use an atomic to claim this slot.
|
2019-05-01 11:14:11 +00:00
|
|
|
* If a different thread got here first, try again from this slot on. */
|
2019-04-17 04:17:24 +00:00
|
|
|
float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id);
|
|
|
|
if (old_id != ID_NONE && old_id != id) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* If there already is a slot for that ID, add the weight.
|
|
|
|
* If no slot was found, add it to the last. */
|
|
|
|
else if (id_buffer[slot].x == id || slot == num_slots - 1) {
|
|
|
|
atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
|
|
|
|
break;
|
|
|
|
}
|
2018-11-09 10:34:54 +00:00
|
|
|
#else /* __ATOMIC_PASS_WRITE__ */
|
2019-04-17 04:17:24 +00:00
|
|
|
/* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
|
|
|
|
if (id_buffer[slot].x == ID_NONE) {
|
|
|
|
id_buffer[slot].x = id;
|
|
|
|
id_buffer[slot].y = weight;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* If there already is a slot for that ID, add the weight.
|
2019-05-01 11:14:11 +00:00
|
|
|
* If no slot was found, add it to the last. */
|
2019-04-17 04:17:24 +00:00
|
|
|
else if (id_buffer[slot].x == id || slot == num_slots - 1) {
|
|
|
|
id_buffer[slot].y += weight;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif /* __ATOMIC_PASS_WRITE__ */
|
|
|
|
}
|
2018-10-28 09:37:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots)
|
|
|
|
{
|
2019-04-17 04:17:24 +00:00
|
|
|
ccl_global float2 *id_buffer = (ccl_global float2 *)buffer;
|
|
|
|
for (int slot = 1; slot < num_slots; ++slot) {
|
|
|
|
if (id_buffer[slot].x == ID_NONE) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/* Since we're dealing with a tiny number of elements, insertion sort should be fine. */
|
|
|
|
int i = slot;
|
|
|
|
while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) {
|
|
|
|
float2 swap = id_buffer[i];
|
|
|
|
id_buffer[i] = id_buffer[i - 1];
|
|
|
|
id_buffer[i - 1] = swap;
|
|
|
|
--i;
|
|
|
|
}
|
|
|
|
}
|
2018-10-28 09:37:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __KERNEL_GPU__
|
|
|
|
/* post-sorting for Cryptomatte */
|
2019-04-17 04:17:24 +00:00
|
|
|
ccl_device void kernel_cryptomatte_post(
|
|
|
|
KernelGlobals *kg, ccl_global float *buffer, uint sample, int x, int y, int offset, int stride)
|
2018-10-28 09:37:41 +00:00
|
|
|
{
|
2019-04-17 04:17:24 +00:00
|
|
|
if (sample - 1 == kernel_data.integrator.aa_samples) {
|
|
|
|
int index = offset + x + y * stride;
|
|
|
|
int pass_stride = kernel_data.film.pass_stride;
|
|
|
|
ccl_global float *cryptomatte_buffer = buffer + index * pass_stride +
|
|
|
|
kernel_data.film.pass_cryptomatte;
|
|
|
|
kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
|
|
|
|
}
|
2018-10-28 09:37:41 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
CCL_NAMESPACE_END
|