From ba3ae9ea273f7e596607281ffd77871e5a44fca7 Mon Sep 17 00:00:00 2001 From: Bastien Montagne Date: Mon, 9 May 2016 17:03:08 +0200 Subject: [PATCH] Cleanup and refactor our atomic library. This commit: * Removes most of all dirty internal details from public atomi_ops.h file, and move them into /intern private subdir. * Removes unused 'architectures' (__apple__ and jemalloc). * Split each implementation into its own file. * Makes use of C99's limits.h system header to determine pointer and int size, instead of using fix hardcoded list of architectures. * Introduces new 'faked' atomics ops for floats. Note that we may add a lot more real and 'faked' atomic operations over integers and floats (multiplication, division, bitshift, bitwise booleans, etc.), as needs arise. Reviewers: sergey, campbellbarton Differential Revision: https://developer.blender.org/D1982 --- intern/atomic/atomic_ops.h | 505 +++--------------------- intern/atomic/intern/atomic_ops_ext.h | 146 +++++++ intern/atomic/intern/atomic_ops_msvc.h | 102 +++++ intern/atomic/intern/atomic_ops_unix.h | 180 +++++++++ intern/atomic/intern/atomic_ops_utils.h | 110 ++++++ source/blender/blenkernel/intern/pbvh.c | 11 +- 6 files changed, 590 insertions(+), 464 deletions(-) create mode 100644 intern/atomic/intern/atomic_ops_ext.h create mode 100644 intern/atomic/intern/atomic_ops_msvc.h create mode 100644 intern/atomic/intern/atomic_ops_unix.h create mode 100644 intern/atomic/intern/atomic_ops_utils.h diff --git a/intern/atomic/atomic_ops.h b/intern/atomic/atomic_ops.h index dd1bdd2328d..e4e1bdc1c09 100644 --- a/intern/atomic/atomic_ops.h +++ b/intern/atomic/atomic_ops.h @@ -1,11 +1,11 @@ /* - * Adopted from jemalloc with this license: + * Original code from jemalloc with this license: * * Copyright (C) 2002-2013 Jason Evans . * All rights reserved. * Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. * Copyright (C) 2009-2013 Facebook, Inc. All rights reserved. - + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright notice(s), @@ -13,7 +13,7 @@ * 2. Redistributions in binary form must reproduce the above copyright notice(s), * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. - + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO @@ -24,64 +24,59 @@ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2016 Blender Foundation. + * All rights reserved. + * + * The Original Code is: adapted from jemalloc. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +/** + * \file atomic_ops.h + * \ingroup Atomic + * + * \author Copyright (C) 2016 Blender Foundation, adapted from jemalloc. + * \brief Provides wrapper around system-specific atomic primitives, and some extensions (faked-atomic operations + * over float numbers). */ #ifndef __ATOMIC_OPS_H__ #define __ATOMIC_OPS_H__ -#include - -#if defined (__APPLE__) -# include -#elif defined(_MSC_VER) -# define NOGDI -# ifndef NOMINMAX -# define NOMINMAX -# endif -# define WIN32_LEAN_AND_MEAN -# include -#elif defined(__arm__) +#if defined(__arm__) /* Attempt to fix compilation error on Debian armel kernel. * arm7 architecture does have both 32 and 64bit atomics, however * it's gcc doesn't have __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n defined. */ # define JE_FORCE_SYNC_COMPARE_AND_SWAP_1 -# define JE_FORCE_SYNC_COMPARE_AND_SWAP_8 # define JE_FORCE_SYNC_COMPARE_AND_SWAP_4 +# define JE_FORCE_SYNC_COMPARE_AND_SWAP_8 #endif -/* needed for int types */ -#include "../../source/blender/blenlib/BLI_sys_types.h" -#include -#include +#include "intern/atomic_ops_utils.h" -/* little macro so inline keyword works */ -#if defined(_MSC_VER) -# define ATOMIC_INLINE static __forceinline -#else -# if (defined(__APPLE__) && defined(__ppc__)) -/* static inline __attribute__ here breaks osx ppc gcc42 build */ -# define ATOMIC_INLINE static __attribute__((always_inline)) -# else -# define ATOMIC_INLINE static inline __attribute__((always_inline)) -# endif -#endif - -/* This is becoming a bit nastier that it was originally foreseen, - * consider using autoconfig detection instead. - */ -#if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__) || defined(__s390x__) || defined(__powerpc64__) || defined(__aarch64__) || (defined(__sparc__) && defined(__arch64__)) || defined(__alpha__) || defined(__mips64) -# define LG_SIZEOF_PTR 3 -# define LG_SIZEOF_INT 2 -#else -# define LG_SIZEOF_PTR 2 -# define LG_SIZEOF_INT 2 -#endif - -/************************/ +/******************************************************************************/ /* Function prototypes. */ -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new); @@ -102,420 +97,22 @@ ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x); ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x); ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new); -/******************************************************************************/ -/* 64-bit operations. */ -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) -# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 -ATOMIC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - return __sync_add_and_fetch(p, x); -} - -ATOMIC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - return __sync_sub_and_fetch(p, x); -} - -ATOMIC_INLINE uint64_t -atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) -{ - return __sync_val_compare_and_swap(v, old, _new); -} -#elif (defined(_MSC_VER)) -ATOMIC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x; -} - -ATOMIC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x; -} - -ATOMIC_INLINE uint64_t -atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) -{ - return InterlockedCompareExchange64((int64_t *)v, _new, old); -} -#elif (defined(__APPLE__)) -ATOMIC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - return (uint64_t)OSAtomicAdd64((int64_t)x, (int64_t *)p); -} - -ATOMIC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - return (uint64_t)OSAtomicAdd64(-((int64_t)x), (int64_t *)p); -} - -ATOMIC_INLINE uint64_t -atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) -{ - uint64_t init_val = *v; - OSAtomicCompareAndSwap64((int64_t)old, (int64_t)_new, (int64_t *)v); - return init_val; -} -# elif (defined(__amd64__) || defined(__x86_64__)) -ATOMIC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - return x; -} - -ATOMIC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - x = (uint64_t)(-(int64_t)x); - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - return x; -} - -ATOMIC_INLINE uint64_t -atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) -{ - uint64_t ret; - asm volatile ( - "lock; cmpxchgq %2,%1" - : "=a" (ret), "+m" (*v) - : "r" (_new), "0" (old) - : "memory"); - return ret; -} - -# elif (defined(JEMALLOC_ATOMIC9)) -ATOMIC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - /* - * atomic_fetchadd_64() doesn't exist, but we only ever use this - * function on LP64 systems, so atomic_fetchadd_long() will do. - */ - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return atomic_fetchadd_long(p, (unsigned long)x) + x; -} - -ATOMIC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x; -} - -ATOMIC_INLINE uint64_t -atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) -{ - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return atomic_cmpset_long(v, old, _new); -} -# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) -ATOMIC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - return __sync_add_and_fetch(p, x); -} - -ATOMIC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - return __sync_sub_and_fetch(p, x); -} - -ATOMIC_INLINE uint64_t -atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) -{ - return __sync_val_compare_and_swap(v, old, _new); -} -# else -# error "Missing implementation for 64-bit atomic operations" -# endif -#endif +/* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation, + * which means they are only efficient if collisions are highly unlikely (i.e. if probability of two threads + * working on the same pointer at the same time is very low). */ +ATOMIC_INLINE float atomic_add_fl(float *p, const float x); /******************************************************************************/ -/* 32-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 -ATOMIC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - return __sync_add_and_fetch(p, x); -} +/* Include system-dependent implementations. */ -ATOMIC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - return __sync_sub_and_fetch(p, x); -} - -ATOMIC_INLINE uint32_t -atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) -{ - return __sync_val_compare_and_swap(v, old, _new); -} -#elif (defined(_MSC_VER)) -ATOMIC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - return InterlockedExchangeAdd(p, x) + x; -} - -ATOMIC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - return InterlockedExchangeAdd(p, -((int32_t)x)) - x; -} - -ATOMIC_INLINE uint32_t -atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) -{ - return InterlockedCompareExchange((long *)v, _new, old); -} -#elif (defined(__APPLE__)) -ATOMIC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - return (uint32_t)OSAtomicAdd32((int32_t)x, (int32_t *)p); -} - -ATOMIC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - return (uint32_t)OSAtomicAdd32(-((int32_t)x), (int32_t *)p); -} - -ATOMIC_INLINE uint32_t -atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) -{ - uint32_t init_val = *v; - OSAtomicCompareAndSwap32((int32_t)old, (int32_t)_new, (int32_t *)v); - return init_val; -} -#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) -ATOMIC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - return x; -} - -ATOMIC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - x = (uint32_t)(-(int32_t)x); - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - return x; -} - -ATOMIC_INLINE uint32_t -atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) -{ - uint32_t ret; - asm volatile ( - "lock; cmpxchgl %2,%1" - : "=a" (ret), "+m" (*v) - : "r" (_new), "0" (old) - : "memory"); - return ret; -} -#elif (defined(JEMALLOC_ATOMIC9)) -ATOMIC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - return atomic_fetchadd_32(p, x) + x; -} - -ATOMIC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - return atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x; -} - -ATOMIC_INLINE uint32_t -atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) -{ - return atomic_cmpset_32(v, old, _new); -} -#elif defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4) -ATOMIC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - return __sync_add_and_fetch(p, x); -} - -ATOMIC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - return __sync_sub_and_fetch(p, x); -} - -ATOMIC_INLINE uint32_t -atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) -{ - return __sync_val_compare_and_swap(v, old, _new); -} +/* Note that we are using _unix flavor as fallback here (it will raise precompiler errors as needed). */ +#if defined(_MSC_VER) +# include "intern/atomic_ops_msvc.h" #else -# error "Missing implementation for 32-bit atomic operations" +# include "intern/atomic_ops_unix.h" #endif -/******************************************************************************/ -/* 8-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 -ATOMIC_INLINE uint8_t -atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b) -{ - return __sync_fetch_and_or(p, b); -} -ATOMIC_INLINE uint8_t -atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b) -{ - return __sync_fetch_and_and(p, b); -} -#elif (defined(_MSC_VER)) -#include -#pragma intrinsic(_InterlockedAnd8) -ATOMIC_INLINE uint8_t -atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b) -{ -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) - return InterlockedOr8((char *)p, (char)b); -#else - return _InterlockedOr8((char *)p, (char)b); -#endif -} -ATOMIC_INLINE uint8_t -atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b) -{ -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) - return InterlockedAnd8((char *)p, (char)b); -#else - return _InterlockedAnd8((char *)p, (char)b); -#endif -} -#elif defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_1) -ATOMIC_INLINE uint8_t -atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b) -{ - return __sync_fetch_and_or(p, b); -} -ATOMIC_INLINE uint8_t -atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b) -{ - return __sync_fetch_and_and(p, b); -} -#else -# error "Missing implementation for 8-bit atomic operations" -#endif - -/******************************************************************************/ -/* size_t operations. */ -ATOMIC_INLINE size_t -atomic_add_z(size_t *p, size_t x) -{ - assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR); - -#if (LG_SIZEOF_PTR == 3) - return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_PTR == 2) - return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x); -#endif -} - -ATOMIC_INLINE size_t -atomic_sub_z(size_t *p, size_t x) -{ - assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR); - -#if (LG_SIZEOF_PTR == 3) - return (size_t)atomic_add_uint64((uint64_t *)p, - (uint64_t)-((int64_t)x)); -#elif (LG_SIZEOF_PTR == 2) - return (size_t)atomic_add_uint32((uint32_t *)p, - (uint32_t)-((int32_t)x)); -#endif -} - -ATOMIC_INLINE size_t -atomic_cas_z(size_t *v, size_t old, size_t _new) -{ - assert(sizeof(size_t) == 1 << LG_SIZEOF_PTR); - -#if (LG_SIZEOF_PTR == 3) - return (size_t)atomic_cas_uint64((uint64_t *)v, - (uint64_t)old, - (uint64_t)_new); -#elif (LG_SIZEOF_PTR == 2) - return (size_t)atomic_cas_uint32((uint32_t *)v, - (uint32_t)old, - (uint32_t)_new); -#endif -} - -/******************************************************************************/ -/* unsigned operations. */ -ATOMIC_INLINE unsigned -atomic_add_u(unsigned *p, unsigned x) -{ - assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT); - -#if (LG_SIZEOF_INT == 3) - return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_INT == 2) - return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x); -#endif -} - -ATOMIC_INLINE unsigned -atomic_sub_u(unsigned *p, unsigned x) -{ - assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT); - -#if (LG_SIZEOF_INT == 3) - return (unsigned)atomic_add_uint64((uint64_t *)p, - (uint64_t)-((int64_t)x)); -#elif (LG_SIZEOF_INT == 2) - return (unsigned)atomic_add_uint32((uint32_t *)p, - (uint32_t)-((int32_t)x)); -#endif -} - -ATOMIC_INLINE unsigned -atomic_cas_u(unsigned *v, unsigned old, unsigned _new) -{ - assert(sizeof(unsigned) == 1 << LG_SIZEOF_INT); - -#if (LG_SIZEOF_PTR == 3) - return (unsigned)atomic_cas_uint64((uint64_t *)v, - (uint64_t)old, - (uint64_t)_new); -#elif (LG_SIZEOF_PTR == 2) - return (unsigned)atomic_cas_uint32((uint32_t *)v, - (uint32_t)old, - (uint32_t)_new); -#endif -} +/* Include 'fake' atomic extensions, built over real atomic primitives. */ +#include "intern/atomic_ops_ext.h" #endif /* __ATOMIC_OPS_H__ */ diff --git a/intern/atomic/intern/atomic_ops_ext.h b/intern/atomic/intern/atomic_ops_ext.h new file mode 100644 index 00000000000..4065299d2ea --- /dev/null +++ b/intern/atomic/intern/atomic_ops_ext.h @@ -0,0 +1,146 @@ +/* + * Original code from jemalloc with this license: + * + * Copyright (C) 2002-2013 Jason Evans . + * All rights reserved. + * Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. + * Copyright (C) 2009-2013 Facebook, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * 1. Redistributions of source code must retain the above copyright notice(s), + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice(s), + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2016 Blender Foundation. + * All rights reserved. + * + * The Original Code is: adapted from jemalloc. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +#ifndef __ATOMIC_OPS_EXT_H__ +#define __ATOMIC_OPS_EXT_H__ + +#include "atomic_ops_utils.h" + +/******************************************************************************/ +/* size_t operations. */ +ATOMIC_INLINE size_t atomic_add_z(size_t *p, size_t x) +{ + assert(sizeof(size_t) == LG_SIZEOF_PTR); + +#if (LG_SIZEOF_PTR == 8) + return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_PTR == 4) + return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + +ATOMIC_INLINE size_t atomic_sub_z(size_t *p, size_t x) +{ + assert(sizeof(size_t) == LG_SIZEOF_PTR); + +#if (LG_SIZEOF_PTR == 8) + return (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); +#elif (LG_SIZEOF_PTR == 4) + return (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); +#endif +} + +ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new) +{ + assert(sizeof(size_t) == LG_SIZEOF_PTR); + +#if (LG_SIZEOF_PTR == 8) + return (size_t)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new); +#elif (LG_SIZEOF_PTR == 4) + return (size_t)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new); +#endif +} + +/******************************************************************************/ +/* unsigned operations. */ +ATOMIC_INLINE unsigned atomic_add_u(unsigned *p, unsigned x) +{ + assert(sizeof(unsigned) == LG_SIZEOF_INT); + +#if (LG_SIZEOF_INT == 8) + return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_INT == 4) + return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + +ATOMIC_INLINE unsigned atomic_sub_u(unsigned *p, unsigned x) +{ + assert(sizeof(unsigned) == LG_SIZEOF_INT); + +#if (LG_SIZEOF_INT == 8) + return (unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x)); +#elif (LG_SIZEOF_INT == 4) + return (unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x)); +#endif +} + +ATOMIC_INLINE unsigned atomic_cas_u(unsigned *v, unsigned old, unsigned _new) +{ + assert(sizeof(unsigned) == LG_SIZEOF_INT); + +#if (LG_SIZEOF_INT == 8) + return (unsigned)atomic_cas_uint64((uint64_t *)v, (uint64_t)old, (uint64_t)_new); +#elif (LG_SIZEOF_INT == 4) + return (unsigned)atomic_cas_uint32((uint32_t *)v, (uint32_t)old, (uint32_t)_new); +#endif +} + +/******************************************************************************/ +/* float operations. */ + +ATOMIC_INLINE float atomic_add_fl(float *p, const float x) +{ + assert(sizeof(float) == sizeof(uint32_t)); + + float oldval, newval; + uint32_t prevval; + + do { /* Note that since collisions are unlikely, loop will nearly always run once. */ + oldval = *p; + newval = oldval + x; + prevval = atomic_cas_uint32((uint32_t *)p, *(uint32_t *)(&oldval), *(uint32_t *)(&newval)); + } while (UNLIKELY(prevval != *(uint32_t *)(&oldval))); + + return newval; +} + +#endif /* __ATOMIC_OPS_EXT_H__ */ diff --git a/intern/atomic/intern/atomic_ops_msvc.h b/intern/atomic/intern/atomic_ops_msvc.h new file mode 100644 index 00000000000..bd9186e7864 --- /dev/null +++ b/intern/atomic/intern/atomic_ops_msvc.h @@ -0,0 +1,102 @@ +/* + * Adopted from jemalloc with this license: + * + * Copyright (C) 2002-2013 Jason Evans . + * All rights reserved. + * Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. + * Copyright (C) 2009-2013 Facebook, Inc. All rights reserved. + + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * 1. Redistributions of source code must retain the above copyright notice(s), + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice(s), + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ATOMIC_OPS_MSVC_H__ +#define __ATOMIC_OPS_MSVC_H__ + +#include "atomic_ops_utils.h" + +#define NOGDI +#ifndef NOMINMAX +# define NOMINMAX +#endif +#define WIN32_LEAN_AND_MEAN + +#include +#include + +/******************************************************************************/ +/* 64-bit operations. */ +#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) +ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) +{ + return InterlockedExchangeAdd64((int64_t *)p, (int64_t)x) + x; +} + +ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + return InterlockedExchangeAdd64((int64_t *)p, -((int64_t)x)) - x; +} + +ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) +{ + return InterlockedCompareExchange64((int64_t *)v, _new, old); +} +#endif + +/******************************************************************************/ +/* 32-bit operations. */ +ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) +{ + return InterlockedExchangeAdd(p, x) + x; +} + +ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + return InterlockedExchangeAdd(p, -((int32_t)x)) - x; +} + +ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) +{ + return InterlockedCompareExchange((long *)v, _new, old); +} + +/******************************************************************************/ +/* 8-bit operations. */ + +#pragma intrinsic(_InterlockedAnd8) +ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b) +{ +#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) + return InterlockedAnd8((char *)p, (char)b); +#else + return _InterlockedAnd8((char *)p, (char)b); +#endif +} + +#pragma intrinsic(_InterlockedOr8) +ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b) +{ +#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) + return InterlockedOr8((char *)p, (char)b); +#else + return _InterlockedOr8((char *)p, (char)b); +#endif +} + +#endif /* __ATOMIC_OPS_MSVC_H__ */ diff --git a/intern/atomic/intern/atomic_ops_unix.h b/intern/atomic/intern/atomic_ops_unix.h new file mode 100644 index 00000000000..0a0b988bd72 --- /dev/null +++ b/intern/atomic/intern/atomic_ops_unix.h @@ -0,0 +1,180 @@ +/* + * Original code from jemalloc with this license: + * + * Copyright (C) 2002-2013 Jason Evans . + * All rights reserved. + * Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. + * Copyright (C) 2009-2013 Facebook, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * 1. Redistributions of source code must retain the above copyright notice(s), + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice(s), + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2016 Blender Foundation. + * All rights reserved. + * + * The Original Code is: adapted from jemalloc. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +#ifndef __ATOMIC_OPS_GCC_H__ +#define __ATOMIC_OPS_GCC_H__ + +#include "atomic_ops_utils.h" + +/******************************************************************************/ +/* 64-bit operations. */ +#if (LG_SIZEOF_PTR == 8 || LG_SIZEOF_INT == 8) +# if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) +ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) +{ + return __sync_add_and_fetch(p, x); +} + +ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + return __sync_sub_and_fetch(p, x); +} + +ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) +{ + return __sync_val_compare_and_swap(v, old, _new); +} +# elif (defined(__amd64__) || defined(__x86_64__)) +ATOMIC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) +{ + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + return x; +} + +ATOMIC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + x = (uint64_t)(-(int64_t)x); + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + return x; +} + +ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new) +{ + uint64_t ret; + asm volatile ( + "lock; cmpxchgq %2,%1" + : "=a" (ret), "+m" (*v) + : "r" (_new), "0" (old) + : "memory"); + return ret; +} +# else +# error "Missing implementation for 64-bit atomic operations" +# endif +#endif + +/******************************************************************************/ +/* 32-bit operations. */ +#if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) +ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) +{ + return __sync_add_and_fetch(p, x); +} + +ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + return __sync_sub_and_fetch(p, x); +} + +ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) +{ + return __sync_val_compare_and_swap(v, old, _new); +} +#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +ATOMIC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) +{ + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + return x; +} + +ATOMIC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + x = (uint32_t)(-(int32_t)x); + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + return x; +} + +ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new) +{ + uint32_t ret; + asm volatile ( + "lock; cmpxchgl %2,%1" + : "=a" (ret), "+m" (*v) + : "r" (_new), "0" (old) + : "memory"); + return ret; +} +#else +# error "Missing implementation for 32-bit atomic operations" +#endif + +/******************************************************************************/ +/* 8-bit operations. */ +#if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_1)) +ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b) +{ + return __sync_fetch_and_and(p, b); +} +ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b) +{ + return __sync_fetch_and_or(p, b); +} +#else +# error "Missing implementation for 8-bit atomic operations" +#endif + +#endif /* __ATOMIC_OPS_GCC_H__ */ diff --git a/intern/atomic/intern/atomic_ops_utils.h b/intern/atomic/intern/atomic_ops_utils.h new file mode 100644 index 00000000000..fcbb2346243 --- /dev/null +++ b/intern/atomic/intern/atomic_ops_utils.h @@ -0,0 +1,110 @@ +/* + * Original code from jemalloc with this license: + * + * Copyright (C) 2002-2013 Jason Evans . + * All rights reserved. + * Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. + * Copyright (C) 2009-2013 Facebook, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * 1. Redistributions of source code must retain the above copyright notice(s), + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice(s), + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ***** BEGIN GPL LICENSE BLOCK ***** + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2016 Blender Foundation. + * All rights reserved. + * + * The Original Code is: adapted from jemalloc. + * + * ***** END GPL LICENSE BLOCK ***** + */ + +#ifndef __ATOMIC_OPS_UTILS_H__ +#define __ATOMIC_OPS_UTILS_H__ + +/* needed for int types */ +#include "../../../source/blender/blenlib/BLI_sys_types.h" +#include +#include + +#include + +/* little macro so inline keyword works */ +#if defined(_MSC_VER) +# define ATOMIC_INLINE static __forceinline +#else +# if (defined(__APPLE__) && defined(__ppc__)) +/* static inline __attribute__ here breaks osx ppc gcc42 build */ +# define ATOMIC_INLINE static __attribute__((always_inline)) +# else +# define ATOMIC_INLINE static inline __attribute__((always_inline)) +# endif +#endif + +#ifndef LIKELY +# ifdef __GNUC__ +# define LIKELY(x) __builtin_expect(!!(x), 1) +# define UNLIKELY(x) __builtin_expect(!!(x), 0) +# else +# define LIKELY(x) (x) +# define UNLIKELY(x) (x) +# endif +#endif + +#ifdef UINTPTR_MAX +# if (UINTPTR_MAX == 0xFFFFFFFF) +# define LG_SIZEOF_PTR 4 +# elif (UINTPTR_MAX == 0xFFFFFFFFFFFFFFFF) +# define LG_SIZEOF_PTR 8 +# endif +#elif defined(__WORDSIZE) /* Fallback for older glibc and cpp */ +# if (__WORDSIZE == 32) +# define LG_SIZEOF_PTR 4 +# elif (__WORDSIZE == 64) +# define LG_SIZEOF_PTR 8 +# endif +#endif + +#ifndef LG_SIZEOF_PTR +# error "Cannot find pointer size" +#endif + +#if (UINT_MAX == 0xFFFFFFFF) +# define LG_SIZEOF_INT 4 +#elif (UINT_MAX == 0xFFFFFFFFFFFFFFFF) +# define LG_SIZEOF_INT 8 +#else +# error "Cannot find int size" +#endif + +#endif /* __ATOMIC_OPS_UTILS_H__ */ diff --git a/source/blender/blenkernel/intern/pbvh.c b/source/blender/blenkernel/intern/pbvh.c index 330b5922c9a..d73f087a3fe 100644 --- a/source/blender/blenkernel/intern/pbvh.c +++ b/source/blender/blenkernel/intern/pbvh.c @@ -979,16 +979,7 @@ static void pbvh_update_normals_accum_task_cb(void *userdata, const int n) * Not exact equivalent though, since atomicity is only ensured for one component * of the vector at a time, but here it shall not make any sensible difference. */ for (int k = 3; k--; ) { - /* Atomic float addition. - * Note that since collision are unlikely, loop will nearly always run once. */ - float oldval, newval; - uint32_t prevval; - do { - oldval = vnors[v][k]; - newval = oldval + fn[k]; - prevval = atomic_cas_uint32( - (uint32_t *)&vnors[v][k], *(uint32_t *)(&oldval), *(uint32_t *)(&newval)); - } while (UNLIKELY(prevval != *(uint32_t *)(&oldval))); + atomic_add_fl(&vnors[v][k], fn[k]); } } }