blender/intern/cycles/util/util_atomic.h

/*
 * Copyright 2014 Blender Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef __UTIL_ATOMIC_H__
#define __UTIL_ATOMIC_H__

#ifndef __KERNEL_GPU__

/* Using atomic ops header from Blender. */
#include "atomic_ops.h"

ATOMIC_INLINE void atomic_update_max_z(size_t *maximum_value, size_t value)
{
	size_t prev_value = *maximum_value;
	while(prev_value < value) {
		if(atomic_cas_z(maximum_value, prev_value, value) != prev_value) {
			break;
		}
	}
}

#define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x))

#define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)

#define CCL_LOCAL_MEM_FENCE 0
#define ccl_barrier(flags) (void)0

#else  /* __KERNEL_GPU__ */

#ifdef __KERNEL_OPENCL__

/* Float atomics implementation credits:
 *   http://suhorukov.blogspot.in/2011/12/opencl-11-atomic-operations-on-floating.html
 */
ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *source,
                                        const float operand)
{
	union {
		unsigned int int_value;
		float float_value;
	} new_value;
	union {
		unsigned int int_value;
		float float_value;
	} prev_value;
	do {
		prev_value.float_value = *source;
		new_value.float_value = prev_value.float_value + operand;
	} while(atomic_cmpxchg((volatile ccl_global unsigned int *)source,
	                       prev_value.int_value,
	                       new_value.int_value) != prev_value.int_value);
	return new_value.float_value;
}

#define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x))
#define atomic_fetch_and_inc_uint32(p) atomic_inc((p))

#define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE
#define ccl_barrier(flags) barrier(flags)

#endif  /* __KERNEL_OPENCL__ */

#ifdef __KERNEL_CUDA__

#define atomic_add_and_fetch_float(p, x) (atomicAdd((float*)(p), (float)(x)) + (float)(x))

#define atomic_fetch_and_add_uint32(p, x) atomicAdd((unsigned int*)(p), (unsigned int)(x))
#define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)

#define CCL_LOCAL_MEM_FENCE
#define ccl_barrier(flags) __syncthreads()

#endif  /* __KERNEL_CUDA__ */

#endif  /* __KERNEL_GPU__ */

#endif /* __UTIL_ATOMIC_H__ */
Cycles: Use lock in the memory statistics CPU rendering is allowed to allocate memory from multiple threads, which means statistics need to be avare of this. 2014-12-02 10:36:44 +00:00			`/*`
			`* Copyright 2014 Blender Foundation`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
Cleanup: Fix Cycles Apache header. This was already mixed a bit, but the dot belongs there. 2014-12-25 01:50:24 +00:00			`* limitations under the License.`
Cycles: Use lock in the memory statistics CPU rendering is allowed to allocate memory from multiple threads, which means statistics need to be avare of this. 2014-12-02 10:36:44 +00:00			`*/`

			`#ifndef __UTIL_ATOMIC_H__`
			`#define __UTIL_ATOMIC_H__`

Cycles: Move utility atomics function to util_atomic.h No functional changes, just better to keep all atomic function in a single place, they might become handy later. 2015-05-21 10:48:50 +00:00			`#ifndef __KERNEL_GPU__`

Cycles: Use lock in the memory statistics CPU rendering is allowed to allocate memory from multiple threads, which means statistics need to be avare of this. 2014-12-02 10:36:44 +00:00			`/* Using atomic ops header from Blender. */`
			`#include "atomic_ops.h"`

			`ATOMIC_INLINE void atomic_update_max_z(size_t *maximum_value, size_t value)`
			`{`
			`size_t prev_value = *maximum_value;`
Cycles: Code cleanup, spaces around keywords This inconsistency drove me totally crazy, it's really confusing when it's inconsistent especially when you work on both Cycles and Blender sides. Shouldn;t cause merge PITA, it's whitespace changes only, Git should be able to merge it nicely. 2015-03-27 19:15:15 +00:00			`while(prev_value < value) {`
			`if(atomic_cas_z(maximum_value, prev_value, value) != prev_value) {`
Cycles: Use lock in the memory statistics CPU rendering is allowed to allocate memory from multiple threads, which means statistics need to be avare of this. 2014-12-02 10:36:44 +00:00			`break;`
			`}`
			`}`
			`}`

Cycles: Add more atomic operations 2017-02-22 12:41:18 +00:00			`#define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x))`

			`#define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)`

			`#define CCL_LOCAL_MEM_FENCE 0`
			`#define ccl_barrier(flags) (void)0`

Cycles: Move utility atomics function to util_atomic.h No functional changes, just better to keep all atomic function in a single place, they might become handy later. 2015-05-21 10:48:50 +00:00			`#else /* __KERNEL_GPU__ */`

			`#ifdef __KERNEL_OPENCL__`

			`/* Float atomics implementation credits:`
			`* http://suhorukov.blogspot.in/2011/12/opencl-11-atomic-operations-on-floating.html`
			`*/`
Cycles: Add more atomic operations 2017-02-22 12:41:18 +00:00			`ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *source,`
Cycles: Move utility atomics function to util_atomic.h No functional changes, just better to keep all atomic function in a single place, they might become handy later. 2015-05-21 10:48:50 +00:00			`const float operand)`
			`{`
			`union {`
			`unsigned int int_value;`
			`float float_value;`
			`} new_value;`
			`union {`
			`unsigned int int_value;`
			`float float_value;`
			`} prev_value;`
			`do {`
			`prev_value.float_value = *source;`
			`new_value.float_value = prev_value.float_value + operand;`
Cycles: Code cleanup, spaces around keyword and brace 2015-06-01 13:11:57 +00:00			`} while(atomic_cmpxchg((volatile ccl_global unsigned int *)source,`
			`prev_value.int_value,`
			`new_value.int_value) != prev_value.int_value);`
Cycles: Add more atomic operations 2017-02-22 12:41:18 +00:00			`return new_value.float_value;`
Cycles: Move utility atomics function to util_atomic.h No functional changes, just better to keep all atomic function in a single place, they might become handy later. 2015-05-21 10:48:50 +00:00			`}`

Cycles: Add more atomic operations 2017-02-22 12:41:18 +00:00			`#define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x))`
			`#define atomic_fetch_and_inc_uint32(p) atomic_inc((p))`

			`#define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE`
			`#define ccl_barrier(flags) barrier(flags)`

Cycles: Move utility atomics function to util_atomic.h No functional changes, just better to keep all atomic function in a single place, they might become handy later. 2015-05-21 10:48:50 +00:00			`#endif /* __KERNEL_OPENCL__ */`

Cycles: Add more atomic operations 2017-02-22 12:41:18 +00:00			`#ifdef __KERNEL_CUDA__`

			`#define atomic_add_and_fetch_float(p, x) (atomicAdd((float*)(p), (float)(x)) + (float)(x))`

			`#define atomic_fetch_and_add_uint32(p, x) atomicAdd((unsigned int*)(p), (unsigned int)(x))`
			`#define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)`

			`#define CCL_LOCAL_MEM_FENCE`
			`#define ccl_barrier(flags) __syncthreads()`

			`#endif /* __KERNEL_CUDA__ */`

Cycles: Move utility atomics function to util_atomic.h No functional changes, just better to keep all atomic function in a single place, they might become handy later. 2015-05-21 10:48:50 +00:00			`#endif /* __KERNEL_GPU__ */`

Cycles: Use lock in the memory statistics CPU rendering is allowed to allocate memory from multiple threads, which means statistics need to be avare of this. 2014-12-02 10:36:44 +00:00			`#endif /* __UTIL_ATOMIC_H__ */`