Cycles: Replace own aligned allocator with system one

This replaces our own implementation of aligned malloc with system calls,
which depends on which operation system you're on.

This is probably really minor noticeable change, but in the same time it
might reduce amount of wasted memory.
This commit is contained in:
Sergey Sharybin 2015-02-14 17:29:47 +05:00
parent 24976dd29d
commit 01067fe51c
4 changed files with 113 additions and 27 deletions

@ -9,6 +9,7 @@ set(INC_SYS
)
set(SRC
util_aligned_malloc.cpp
util_cache.cpp
util_logging.cpp
util_md5.cpp
@ -33,6 +34,7 @@ endif()
set(SRC_HEADERS
util_algorithm.h
util_aligned_malloc.h
util_args.h
util_atomic.h
util_boundbox.h

@ -0,0 +1,73 @@
/*
* Copyright 2011-2015 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "util_aligned_malloc.h"
/* Adopted from Libmv. */
#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__NetBSD__)
/* Needed for memalign on Linux and _aligned_alloc on Windows. */
# ifdef FREE_WINDOWS
/* Make sure _aligned_malloc is included. */
# ifdef __MSVCRT_VERSION__
# undef __MSVCRT_VERSION__
# endif
# define __MSVCRT_VERSION__ 0x0700
# endif /* FREE_WINDOWS */
# include <malloc.h>
#else
/* Apple's malloc is 16-byte aligned, and does not have malloc.h, so include
* stdilb instead.
*/
# include <cstdlib>
#endif
CCL_NAMESPACE_BEGIN
void *util_aligned_malloc(int size, int alignment)
{
#ifdef _WIN32
return _aligned_malloc(size, alignment);
#elif defined(__APPLE__)
/* On Mac OS X, both the heap and the stack are guaranteed 16-byte aligned so
* they work natively with SSE types with no further work.
*/
assert(alignment == 16);
return malloc(size);
#elif defined(__FreeBSD__) || defined(__NetBSD__)
void *result;
if (posix_memalign(&result, alignment, size)) {
/* Non-zero means allocation error
* either no allocation or bad alignment value.
*/
return NULL;
}
return result;
#else /* This is for Linux. */
return memalign(alignment, size);
#endif
}
void util_aligned_free(void *ptr)
{
#ifdef _WIN32
_aligned_free(ptr);
#else
free(ptr);
#endif
}
CCL_NAMESPACE_END

@ -0,0 +1,30 @@
/*
* Copyright 2011-2015 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __UTIL_ALIGNED_MALLOC_H__
#define __UTIL_ALIGNED_MALLOC_H__
CCL_NAMESPACE_BEGIN
/* Allocate block of size bytes at least aligned to a given value. */
void *util_aligned_malloc(int size, int alignment);
/* Free memory allocated by util_aligned_malloc. */
void util_aligned_free(void *ptr);
CCL_NAMESPACE_END
#endif /* __UTIL_ALIGNED_MALLOC_H__ */

@ -22,32 +22,13 @@
#include <string.h>
#include <vector>
#include "util_aligned_malloc.h"
#include "util_types.h"
CCL_NAMESPACE_BEGIN
using std::vector;
static inline void *malloc_aligned(size_t size, size_t alignment)
{
void *data = (void*)malloc(size + sizeof(void*) + alignment - 1);
union { void *ptr; size_t offset; } u;
u.ptr = (char*)data + sizeof(void*);
u.offset = (u.offset + alignment - 1) & ~(alignment - 1);
*(((void**)u.ptr) - 1) = data;
return u.ptr;
}
static inline void free_aligned(void *ptr)
{
if(ptr) {
void *data = *(((void**)ptr) - 1);
free(data);
}
}
/* Array
*
* Simplified version of vector, serving multiple purposes:
@ -74,7 +55,7 @@ public:
datasize = 0;
}
else {
data = (T*)malloc_aligned(sizeof(T)*newsize, alignment);
data = (T*)util_aligned_malloc(sizeof(T)*newsize, alignment);
datasize = newsize;
}
}
@ -91,7 +72,7 @@ public:
datasize = 0;
}
else {
data = (T*)malloc_aligned(sizeof(T)*from.datasize, alignment);
data = (T*)util_aligned_malloc(sizeof(T)*from.datasize, alignment);
memcpy(data, from.data, from.datasize*sizeof(T));
datasize = from.datasize;
}
@ -105,7 +86,7 @@ public:
data = NULL;
if(datasize > 0) {
data = (T*)malloc_aligned(sizeof(T)*datasize, alignment);
data = (T*)util_aligned_malloc(sizeof(T)*datasize, alignment);
memcpy(data, &from[0], datasize*sizeof(T));
}
@ -114,7 +95,7 @@ public:
~array()
{
free_aligned(data);
util_aligned_free(data);
}
void resize(size_t newsize)
@ -123,10 +104,10 @@ public:
clear();
}
else if(newsize != datasize) {
T *newdata = (T*)malloc_aligned(sizeof(T)*newsize, alignment);
T *newdata = (T*)util_aligned_malloc(sizeof(T)*newsize, alignment);
if(data) {
memcpy(newdata, data, ((datasize < newsize)? datasize: newsize)*sizeof(T));
free_aligned(data);
util_aligned_free(data);
}
data = newdata;
@ -136,7 +117,7 @@ public:
void clear()
{
free_aligned(data);
util_aligned_free(data);
data = NULL;
datasize = 0;
}