From ba3ae9ea273f7e596607281ffd77871e5a44fca7 Mon Sep 17 00:00:00 2001
From: Bastien Montagne <montagne29@wanadoo.fr>
Date: Mon, 9 May 2016 17:03:08 +0200
Subject: [PATCH] Cleanup and refactor our atomic library.

This commit:
* Removes most of all dirty internal details from public atomi_ops.h file, and move them into /intern private subdir.
* Removes unused 'architectures' (__apple__ and jemalloc).
* Split each implementation into its own file.
* Makes use of C99's limits.h system header to determine pointer and int size, instead of using fix hardcoded list of architectures.
* Introduces new 'faked' atomics ops for floats.

Note that we may add a lot more real and 'faked' atomic operations over integers and floats
(multiplication, division, bitshift, bitwise booleans, etc.), as needs arise.

Reviewers: sergey, campbellbarton

Differential Revision: https://developer.blender.org/D1982
---
 intern/atomic/atomic_ops.h              | 505 +++---------------------
 intern/atomic/intern/atomic_ops_ext.h   | 146 +++++++
 intern/atomic/intern/atomic_ops_msvc.h  | 102 +++++
 intern/atomic/intern/atomic_ops_unix.h  | 180 +++++++++
 intern/atomic/intern/atomic_ops_utils.h | 110 ++++++
 source/blender/blenkernel/intern/pbvh.c |  11 +-
 6 files changed, 590 insertions(+), 464 deletions(-)
 create mode 100644 intern/atomic/intern/atomic_ops_ext.h
 create mode 100644 intern/atomic/intern/atomic_ops_msvc.h
 create mode 100644 intern/atomic/intern/atomic_ops_unix.h
 create mode 100644 intern/atomic/intern/atomic_ops_utils.h

diff --git a/intern/atomic/atomic_ops.h b/intern/atomic/atomic_ops.h
index dd1bdd2328d..e4e1bdc1c09 100644
--- a/intern/atomic/atomic_ops.h
+++ b/intern/atomic/atomic_ops.h
@@ -1,11 +1,11 @@
 /*
- * Adopted from jemalloc with this license:
+ * Original code from jemalloc with this license:
  *
  * Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
  * All rights reserved.
  * Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
  * Copyright (C) 2009-2013 Facebook, Inc.  All rights reserved.
-
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  * 1. Redistributions of source code must retain the above copyright notice(s),
@@ -13,7 +13,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright notice(s),
  *    this list of conditions and the following disclaimer in the documentation
  *    and/or other materials provided with the distribution.
-
+ *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
@@ -24,64 +24,59 @@
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: adapted from jemalloc.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/**
+ * \file atomic_ops.h
+ * \ingroup Atomic
+ *
+ * \author Copyright (C) 2016 Blender Foundation, adapted from jemalloc.
+ * \brief Provides wrapper around system-specific atomic primitives, and some extensions (faked-atomic operations
+ *        over float numbers).
  */
 
 #ifndef __ATOMIC_OPS_H__
 #define __ATOMIC_OPS_H__
 
-#include <assert.h>
-
-#if defined (__APPLE__)
-#  include <libkern/OSAtomic.h>
-#elif defined(_MSC_VER)
-#  define NOGDI
-#  ifndef NOMINMAX
-#    define NOMINMAX
-#  endif
-#  define WIN32_LEAN_AND_MEAN
-#  include <windows.h>
-#elif defined(__arm__)
+#if defined(__arm__)
 /* Attempt to fix compilation error on Debian armel kernel.
  * arm7 architecture does have both 32 and 64bit atomics, however
  * it's gcc doesn't have __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n defined.
  */
 #  define JE_FORCE_SYNC_COMPARE_AND_SWAP_1
-#  define JE_FORCE_SYNC_COMPARE_AND_SWAP_8
 #  define JE_FORCE_SYNC_COMPARE_AND_SWAP_4
+#  define JE_FORCE_SYNC_COMPARE_AND_SWAP_8
 #endif
 
-/* needed for int types */
-#include "../../source/blender/blenlib/BLI_sys_types.h"
-#include <stdlib.h>
-#include <stddef.h>
+#include "intern/atomic_ops_utils.h"
 
-/* little macro so inline keyword works */
-#if defined(_MSC_VER)
-#  define ATOMIC_INLINE static __forceinline
-#else
-#  if (defined(__APPLE__) && defined(__ppc__))
-/* static inline __attribute__ here breaks osx ppc gcc42 build */
-#    define ATOMIC_INLINE static __attribute__((always_inline))
-#  else
-#    define ATOMIC_INLINE static inline __attribute__((always_inline))
-#  endif
-#endif
-
-/* This is becoming a bit nastier that it was originally foreseen,
- * consider using autoconfig detection instead.
- */
-#if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__) || defined(__s390x__) || defined(__powerpc64__) || defined(__aarch64__) || (defined(__sparc__) && defined(__arch64__)) || defined(__alpha__) || defined(__mips64)
-#  define LG_SIZEOF_PTR 3
-#  define LG_SIZEOF_INT 2
-#else
-#  define LG_SIZEOF_PTR 2
-#  define LG_SIZEOF_INT 2
-#endif
-
-/************************/
+/******************************************************************************/
 /* Function prototypes. */
 
-#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
 ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x);
 ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x);
 ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new);
@@ -102,420 +97,22 @@ ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x);
 ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x);
 ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new);
 
-/******************************************************************************/
-/* 64-bit operations. */
-#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-#  ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
-ATOMIC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-	return __sync_add_and_fetch(p, x);
-}
-
-ATOMIC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-	return __sync_sub_and_fetch(p, x);
-}
-
-ATOMIC_INLINE uint64_t
-atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
-{
-	return __sync_val_compare_and_swap(v, old, _new);
-}
-#elif (defined(_MSC_VER))
-ATOMIC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-	return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x;
-}
-
-ATOMIC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-	return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x;
-}
-
-ATOMIC_INLINE uint64_t
-atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
-{
-	return InterlockedCompareExchange64((int64_t *)v, _new, old);
-}
-#elif (defined(__APPLE__))
-ATOMIC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-	return (uint64_t)OSAtomicAdd64((int64_t)x, (int64_t *)p);
-}
-
-ATOMIC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-	return (uint64_t)OSAtomicAdd64(-((int64_t)x), (int64_t *)p);
-}
-
-ATOMIC_INLINE uint64_t
-atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
-{
-	uint64_t init_val = *v;
-	OSAtomicCompareAndSwap64((int64_t)old, (int64_t)_new, (int64_t *)v);
-	return init_val;
-}
-#  elif (defined(__amd64__) || defined(__x86_64__))
-ATOMIC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-	asm volatile (
-	    "lock; xaddq %0, %1;"
-	    : "+r" (x), "=m" (*p) /* Outputs. */
-	    : "m" (*p) /* Inputs. */
-	    );
-	return x;
-}
-
-ATOMIC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-	x = (uint64_t)(-(int64_t)x);
-	asm volatile (
-	    "lock; xaddq %0, %1;"
-	    : "+r" (x), "=m" (*p) /* Outputs. */
-	    : "m" (*p) /* Inputs. */
-	    );
-	return x;
-}
-
-ATOMIC_INLINE uint64_t
-atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
-{
-	uint64_t ret;
-	asm volatile (
-	    "lock; cmpxchgq %2,%1"
-	    : "=a" (ret), "+m" (*v)
-	    : "r" (_new), "0" (old)
-	    : "memory");
-	return ret;
-}
-
-#  elif (defined(JEMALLOC_ATOMIC9))
-ATOMIC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-	/*
-	 * atomic_fetchadd_64() doesn't exist, but we only ever use this
-	 * function on LP64 systems, so atomic_fetchadd_long() will do.
-	 */
-	assert(sizeof(uint64_t) == sizeof(unsigned long));
-
-	return atomic_fetchadd_long(p, (unsigned long)x) + x;
-}
-
-ATOMIC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-	assert(sizeof(uint64_t) == sizeof(unsigned long));
-
-	return atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x;
-}
-
-ATOMIC_INLINE uint64_t
-atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
-{
-	assert(sizeof(uint64_t) == sizeof(unsigned long));
-
-	return atomic_cmpset_long(v, old, _new);
-}
-#  elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
-ATOMIC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-	return __sync_add_and_fetch(p, x);
-}
-
-ATOMIC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-	return __sync_sub_and_fetch(p, x);
-}
-
-ATOMIC_INLINE uint64_t
-atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
-{
-	return __sync_val_compare_and_swap(v, old, _new);
-}
-#  else
-#    error "Missing implementation for 64-bit atomic operations"
-#  endif
-#endif
+/* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation,
+ *          which means they are only efficient if collisions are highly unlikely (i.e. if probability of two threads
+ *          working on the same pointer at the same time is very low). */
+ATOMIC_INLINE float atomic_add_fl(float *p, const float x);
 
 /******************************************************************************/
-/* 32-bit operations. */
-#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
-ATOMIC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
-	return __sync_add_and_fetch(p, x);
-}
+/* Include system-dependent implementations. */
 
-ATOMIC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
-	return __sync_sub_and_fetch(p, x);
-}
-
-ATOMIC_INLINE uint32_t
-atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
-{
-   return __sync_val_compare_and_swap(v, old, _new);
-}
-#elif (defined(_MSC_VER))
-ATOMIC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
-	return InterlockedExchangeAdd(p, x) + x;
-}
-
-ATOMIC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
-	return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
-}
-
-ATOMIC_INLINE uint32_t
-atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
-{
-	return InterlockedCompareExchange((long *)v, _new, old);
-}
-#elif (defined(__APPLE__))
-ATOMIC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
-	return (uint32_t)OSAtomicAdd32((int32_t)x, (int32_t *)p);
-}
-
-ATOMIC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
-	return (uint32_t)OSAtomicAdd32(-((int32_t)x), (int32_t *)p);
-}
-
-ATOMIC_INLINE uint32_t
-atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
-{
-	uint32_t init_val = *v;
-	OSAtomicCompareAndSwap32((int32_t)old, (int32_t)_new, (int32_t *)v);
-	return init_val;
-}
-#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
-ATOMIC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
-	asm volatile (
-	    "lock; xaddl %0, %1;"
-	    : "+r" (x), "=m" (*p) /* Outputs. */
-	    : "m" (*p) /* Inputs. */
-	    );
-	return x;
-}
-
-ATOMIC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
-	x = (uint32_t)(-(int32_t)x);
-	asm volatile (
-	    "lock; xaddl %0, %1;"
-	    : "+r" (x), "=m" (*p) /* Outputs. */
-	    : "m" (*p) /* Inputs. */
-	    );
-	return x;
-}
-
-ATOMIC_INLINE uint32_t
-atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
-{
-	uint32_t ret;
-	asm volatile (
-	    "lock; cmpxchgl %2,%1"
-	    : "=a" (ret), "+m" (*v)
-	    : "r" (_new), "0" (old)
-	    : "memory");
-	return ret;
-}
-#elif (defined(JEMALLOC_ATOMIC9))
-ATOMIC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
-	return atomic_fetchadd_32(p, x) + x;
-}
-
-ATOMIC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
-	return atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x;
-}
-
-ATOMIC_INLINE uint32_t
-atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
-{
-	return atomic_cmpset_32(v, old, _new);
-}
-#elif defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)
-ATOMIC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
-	return __sync_add_and_fetch(p, x);
-}
-
-ATOMIC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
-	return __sync_sub_and_fetch(p, x);
-}
-
-ATOMIC_INLINE uint32_t
-atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
-{
-	return __sync_val_compare_and_swap(v, old, _new);
-}
+/* Note that we are using _unix flavor as fallback here (it will raise precompiler errors as needed). */
+#if defined(_MSC_VER)
+#  include "intern/atomic_ops_msvc.h"
 #else
-#  error "Missing implementation for 32-bit atomic operations"
+#  include "intern/atomic_ops_unix.h"
 #endif
 
-/******************************************************************************/
-/* 8-bit operations. */
-#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
-ATOMIC_INLINE uint8_t
-atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
-{
-	return __sync_fetch_and_or(p, b);
-}
-ATOMIC_INLINE uint8_t
-atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
-{
-	return __sync_fetch_and_and(p, b);
-}
-#elif (defined(_MSC_VER))
-#include <intrin.h>
-#pragma intrinsic(_InterlockedAnd8)
-ATOMIC_INLINE uint8_t
-atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
-{
-#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-	return InterlockedOr8((char *)p, (char)b);
-#else
-	return _InterlockedOr8((char *)p, (char)b);
-#endif
-}
-ATOMIC_INLINE uint8_t
-atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
-{
-#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-	return InterlockedAnd8((char *)p, (char)b);
-#else
-	return _InterlockedAnd8((char *)p, (char)b);
-#endif
-}
-#elif defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_1)
-ATOMIC_INLINE uint8_t
-atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
-{
-	return __sync_fetch_and_or(p, b);
-}
-ATOMIC_INLINE uint8_t
-atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
-{
-	return __sync_fetch_and_and(p, b);
-}
-#else
-#  error "Missing implementation for 8-bit atomic operations"
-#endif
-
-/******************************************************************************/
-/* size_t operations. */
-ATOMIC_INLINE size_t
-atomic_add_z(size_t *p, size_t x)
-{
-	assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
-
-#if (LG_SIZEOF_PTR == 3)
-	return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
-#elif (LG_SIZEOF_PTR == 2)
-	return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
-#endif
-}
-
-ATOMIC_INLINE size_t
-atomic_sub_z(size_t *p, size_t x)
-{
-	assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
-
-#if (LG_SIZEOF_PTR == 3)
-	return (size_t)atomic_add_uint64((uint64_t *)p,
-	                                 (uint64_t)-((int64_t)x));
-#elif (LG_SIZEOF_PTR == 2)
-	return (size_t)atomic_add_uint32((uint32_t *)p,
-	                                 (uint32_t)-((int32_t)x));
-#endif
-}
-
-ATOMIC_INLINE size_t
-atomic_cas_z(size_t *v, size_t old, size_t _new)
-{
-	assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR);
-
-#if (LG_SIZEOF_PTR == 3)
-	return (size_t)atomic_cas_uint64((uint64_t *)v,
-	                                 (uint64_t)old,
-	                                 (uint64_t)_new);
-#elif (LG_SIZEOF_PTR == 2)
-	return (size_t)atomic_cas_uint32((uint32_t *)v,
-	                                 (uint32_t)old,
-	                                 (uint32_t)_new);
-#endif
-}
-
-/******************************************************************************/
-/* unsigned operations. */
-ATOMIC_INLINE unsigned
-atomic_add_u(unsigned *p, unsigned x)
-{
-	assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
-
-#if (LG_SIZEOF_INT == 3)
-	return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
-#elif (LG_SIZEOF_INT == 2)
-	return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
-#endif
-}
-
-ATOMIC_INLINE unsigned
-atomic_sub_u(unsigned *p, unsigned x)
-{
-	assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
-
-#if (LG_SIZEOF_INT == 3)
-	return (unsigned)atomic_add_uint64((uint64_t *)p,
-	                                   (uint64_t)-((int64_t)x));
-#elif (LG_SIZEOF_INT == 2)
-	return (unsigned)atomic_add_uint32((uint32_t *)p,
-	                                   (uint32_t)-((int32_t)x));
-#endif
-}
-
-ATOMIC_INLINE unsigned
-atomic_cas_u(unsigned *v, unsigned old, unsigned _new)
-{
-	assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT);
-
-#if (LG_SIZEOF_PTR == 3)
-	return (unsigned)atomic_cas_uint64((uint64_t *)v,
-	                                   (uint64_t)old,
-	                                   (uint64_t)_new);
-#elif (LG_SIZEOF_PTR == 2)
-	return (unsigned)atomic_cas_uint32((uint32_t *)v,
-	                                   (uint32_t)old,
-	                                   (uint32_t)_new);
-#endif
-}
+/* Include 'fake' atomic extensions, built over real atomic primitives. */
+#include "intern/atomic_ops_ext.h"
 
 #endif /* __ATOMIC_OPS_H__ */
diff --git a/intern/atomic/intern/atomic_ops_ext.h b/intern/atomic/intern/atomic_ops_ext.h
new file mode 100644
index 00000000000..4065299d2ea
--- /dev/null
+++ b/intern/atomic/intern/atomic_ops_ext.h
@@ -0,0 +1,146 @@
+/*
+ * Original code from jemalloc with this license:
+ *
+ * Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
+ * All rights reserved.
+ * Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
+ * Copyright (C) 2009-2013 Facebook, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 1. Redistributions of source code must retain the above copyright notice(s),
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice(s),
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: adapted from jemalloc.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+#ifndef __ATOMIC_OPS_EXT_H__
+#define __ATOMIC_OPS_EXT_H__
+
+#include "atomic_ops_utils.h"
+
+/******************************************************************************/
+/* size_t operations. */
+ATOMIC_INLINE size_t atomic_add_z(size_t *p, size_t x)
+{
+	assert(sizeof(size_t) == LG_SIZEOF_PTR);
+
+#if (LG_SIZEOF_PTR == 8)
+	return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
+#elif (LG_SIZEOF_PTR == 4)
+	return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
+#endif
+}
+
+ATOMIC_INLINE size_t atomic_sub_z(size_t *p, size_t x)
+{
+	assert(sizeof(size_t) == LG_SIZEOF_PTR);
+
+#if (LG_SIZEOF_PTR == 8)
+	return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
+#elif (LG_SIZEOF_PTR == 4)
+	return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
+#endif
+}
+
+ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new)
+{
+	assert(sizeof(size_t) == LG_SIZEOF_PTR);
+
+#if (LG_SIZEOF_PTR == 8)
+	return (size_t)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
+#elif (LG_SIZEOF_PTR == 4)
+	return (size_t)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
+#endif
+}
+
+/******************************************************************************/
+/* unsigned operations. */
+ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x)
+{
+	assert(sizeof(unsigned) == LG_SIZEOF_INT);
+
+#if (LG_SIZEOF_INT == 8)
+	return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x);
+#elif (LG_SIZEOF_INT == 4)
+	return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x);
+#endif
+}
+
+ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x)
+{
+	assert(sizeof(unsigned) == LG_SIZEOF_INT);
+
+#if (LG_SIZEOF_INT == 8)
+	return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
+#elif (LG_SIZEOF_INT == 4)
+	return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
+#endif
+}
+
+ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new)
+{
+	assert(sizeof(unsigned) == LG_SIZEOF_INT);
+
+#if (LG_SIZEOF_INT == 8)
+	return (unsigned)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new);
+#elif (LG_SIZEOF_INT == 4)
+	return (unsigned)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new);
+#endif
+}
+
+/******************************************************************************/
+/* float operations. */
+
+ATOMIC_INLINE float atomic_add_fl(float *p, const float x)
+{
+	assert(sizeof(float) == sizeof(uint32_t));
+
+	float oldval, newval;
+	uint32_t prevval;
+
+	do {  /* Note that since collisions are unlikely, loop will nearly always run once. */
+		oldval = *p;
+		newval = oldval + x;
+		prevval = atomic_cas_uint32((uint32_t *)p, *(uint32_t *)(&oldval), *(uint32_t *)(&newval));
+	} while (UNLIKELY(prevval != *(uint32_t *)(&oldval)));
+
+	return newval;
+}
+
+#endif /* __ATOMIC_OPS_EXT_H__ */
diff --git a/intern/atomic/intern/atomic_ops_msvc.h b/intern/atomic/intern/atomic_ops_msvc.h
new file mode 100644
index 00000000000..bd9186e7864
--- /dev/null
+++ b/intern/atomic/intern/atomic_ops_msvc.h
@@ -0,0 +1,102 @@
+/*
+ * Adopted from jemalloc with this license:
+ *
+ * Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
+ * All rights reserved.
+ * Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
+ * Copyright (C) 2009-2013 Facebook, Inc.  All rights reserved.
+
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 1. Redistributions of source code must retain the above copyright notice(s),
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice(s),
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ATOMIC_OPS_MSVC_H__
+#define __ATOMIC_OPS_MSVC_H__
+
+#include "atomic_ops_utils.h"
+
+#define NOGDI
+#ifndef NOMINMAX
+#  define NOMINMAX
+#endif
+#define WIN32_LEAN_AND_MEAN
+
+#include <windows.h>
+#include <intrin.h>
+
+/******************************************************************************/
+/* 64-bit operations. */
+#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
+ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+	return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x;
+}
+
+ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+	return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x;
+}
+
+ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
+{
+	return InterlockedCompareExchange64((int64_t *)v, _new, old);
+}
+#endif
+
+/******************************************************************************/
+/* 32-bit operations. */
+ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+	return InterlockedExchangeAdd(p, x) + x;
+}
+
+ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+	return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
+}
+
+ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
+{
+	return InterlockedCompareExchange((long *)v, _new, old);
+}
+
+/******************************************************************************/
+/* 8-bit operations. */
+
+#pragma intrinsic(_InterlockedAnd8)
+ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
+{
+#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
+	return InterlockedAnd8((char *)p, (char)b);
+#else
+	return _InterlockedAnd8((char *)p, (char)b);
+#endif
+}
+
+#pragma intrinsic(_InterlockedOr8)
+ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
+{
+#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
+	return InterlockedOr8((char *)p, (char)b);
+#else
+	return _InterlockedOr8((char *)p, (char)b);
+#endif
+}
+
+#endif /* __ATOMIC_OPS_MSVC_H__ */
diff --git a/intern/atomic/intern/atomic_ops_unix.h b/intern/atomic/intern/atomic_ops_unix.h
new file mode 100644
index 00000000000..0a0b988bd72
--- /dev/null
+++ b/intern/atomic/intern/atomic_ops_unix.h
@@ -0,0 +1,180 @@
+/*
+ * Original code from jemalloc with this license:
+ *
+ * Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
+ * All rights reserved.
+ * Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
+ * Copyright (C) 2009-2013 Facebook, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 1. Redistributions of source code must retain the above copyright notice(s),
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice(s),
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: adapted from jemalloc.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+#ifndef __ATOMIC_OPS_GCC_H__
+#define __ATOMIC_OPS_GCC_H__
+
+#include "atomic_ops_utils.h"
+
+/******************************************************************************/
+/* 64-bit operations. */
+#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8)
+#  if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
+ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+	return __sync_add_and_fetch(p, x);
+}
+
+ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+	return __sync_sub_and_fetch(p, x);
+}
+
+ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
+{
+	return __sync_val_compare_and_swap(v, old, _new);
+}
+#  elif (defined(__amd64__) || defined(__x86_64__))
+ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+	asm volatile (
+	    "lock; xaddq %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+	return x;
+}
+
+ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+	x = (uint64_t)(-(int64_t)x);
+	asm volatile (
+	    "lock; xaddq %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+	return x;
+}
+
+ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
+{
+	uint64_t ret;
+	asm volatile (
+	    "lock; cmpxchgq %2,%1"
+	    : "=a" (ret), "+m" (*v)
+	    : "r" (_new), "0" (old)
+	    : "memory");
+	return ret;
+}
+#  else
+#    error "Missing implementation for 64-bit atomic operations"
+#  endif
+#endif
+
+/******************************************************************************/
+/* 32-bit operations. */
+#if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
+ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+	return __sync_add_and_fetch(p, x);
+}
+
+ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+	return __sync_sub_and_fetch(p, x);
+}
+
+ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
+{
+   return __sync_val_compare_and_swap(v, old, _new);
+}
+#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
+ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+	asm volatile (
+	    "lock; xaddl %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+	return x;
+}
+
+ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+	x = (uint32_t)(-(int32_t)x);
+	asm volatile (
+	    "lock; xaddl %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+	return x;
+}
+
+ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
+{
+	uint32_t ret;
+	asm volatile (
+	    "lock; cmpxchgl %2,%1"
+	    : "=a" (ret), "+m" (*v)
+	    : "r" (_new), "0" (old)
+	    : "memory");
+	return ret;
+}
+#else
+#  error "Missing implementation for 32-bit atomic operations"
+#endif
+
+/******************************************************************************/
+/* 8-bit operations. */
+#if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_1))
+ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
+{
+	return __sync_fetch_and_and(p, b);
+}
+ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
+{
+	return __sync_fetch_and_or(p, b);
+}
+#else
+#  error "Missing implementation for 8-bit atomic operations"
+#endif
+
+#endif /* __ATOMIC_OPS_GCC_H__ */
diff --git a/intern/atomic/intern/atomic_ops_utils.h b/intern/atomic/intern/atomic_ops_utils.h
new file mode 100644
index 00000000000..fcbb2346243
--- /dev/null
+++ b/intern/atomic/intern/atomic_ops_utils.h
@@ -0,0 +1,110 @@
+/*
+ * Original code from jemalloc with this license:
+ *
+ * Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
+ * All rights reserved.
+ * Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
+ * Copyright (C) 2009-2013 Facebook, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 1. Redistributions of source code must retain the above copyright notice(s),
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice(s),
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: adapted from jemalloc.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+#ifndef __ATOMIC_OPS_UTILS_H__
+#define __ATOMIC_OPS_UTILS_H__
+
+/* needed for int types */
+#include "../../../source/blender/blenlib/BLI_sys_types.h"
+#include <stdlib.h>
+#include <limits.h>
+
+#include <assert.h>
+
+/* little macro so inline keyword works */
+#if defined(_MSC_VER)
+#  define ATOMIC_INLINE static __forceinline
+#else
+#  if (defined(__APPLE__) && defined(__ppc__))
+/* static inline __attribute__ here breaks osx ppc gcc42 build */
+#    define ATOMIC_INLINE static __attribute__((always_inline))
+#  else
+#    define ATOMIC_INLINE static inline __attribute__((always_inline))
+#  endif
+#endif
+
+#ifndef LIKELY
+#  ifdef __GNUC__
+#    define LIKELY(x)       __builtin_expect(!!(x), 1)
+#    define UNLIKELY(x)     __builtin_expect(!!(x), 0)
+#  else
+#    define LIKELY(x)       (x)
+#    define UNLIKELY(x)     (x)
+#  endif
+#endif
+
+#ifdef UINTPTR_MAX
+#  if (UINTPTR_MAX == 0xFFFFFFFF)
+#    define LG_SIZEOF_PTR 4
+#  elif (UINTPTR_MAX == 0xFFFFFFFFFFFFFFFF)
+#    define LG_SIZEOF_PTR 8
+#  endif
+#elif defined(__WORDSIZE)  /* Fallback for older glibc and cpp */
+#  if (__WORDSIZE == 32)
+#    define LG_SIZEOF_PTR 4
+#  elif (__WORDSIZE == 64)
+#    define LG_SIZEOF_PTR 8
+#  endif
+#endif
+
+#ifndef LG_SIZEOF_PTR
+#  error "Cannot find pointer size"
+#endif
+
+#if (UINT_MAX == 0xFFFFFFFF)
+#  define LG_SIZEOF_INT 4
+#elif (UINT_MAX == 0xFFFFFFFFFFFFFFFF)
+#  define LG_SIZEOF_INT 8
+#else
+#  error "Cannot find int size"
+#endif
+
+#endif /* __ATOMIC_OPS_UTILS_H__ */
diff --git a/source/blender/blenkernel/intern/pbvh.c b/source/blender/blenkernel/intern/pbvh.c
index 330b5922c9a..d73f087a3fe 100644
--- a/source/blender/blenkernel/intern/pbvh.c
+++ b/source/blender/blenkernel/intern/pbvh.c
@@ -979,16 +979,7 @@ static void pbvh_update_normals_accum_task_cb(void *userdata, const int n)
 					 *       Not exact equivalent though, since atomicity is only ensured for one component
 					 *       of the vector at a time, but here it shall not make any sensible difference. */
 					for (int k = 3; k--; ) {
-						/* Atomic float addition.
-						 * Note that since collision are unlikely, loop will nearly always run once. */
-						float oldval, newval;
-						uint32_t prevval;
-						do {
-							oldval = vnors[v][k];
-							newval = oldval + fn[k];
-							prevval = atomic_cas_uint32(
-							              (uint32_t *)&vnors[v][k], *(uint32_t *)(&oldval), *(uint32_t *)(&newval));
-						} while (UNLIKELY(prevval != *(uint32_t *)(&oldval)));
+						atomic_add_fl(&vnors[v][k], fn[k]);
 					}
 				}
 			}