forked from bartvdbraak/blender
Cycles: ensure any SSE data is allocated 16 byte aligned, happens automatically
on many platforms but is not assured everywhere.
This commit is contained in:
parent
5da48f425f
commit
240fb6fa26
@ -552,6 +552,30 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __device_inline const __m12
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef __KERNEL_GPU__
|
||||||
|
|
||||||
|
static inline void *malloc_aligned(size_t size, size_t alignment)
|
||||||
|
{
|
||||||
|
void *data = (void*)malloc(size + sizeof(void*) + alignment - 1);
|
||||||
|
|
||||||
|
union { void *ptr; size_t offset; } u;
|
||||||
|
u.ptr = (char*)data + sizeof(void*);
|
||||||
|
u.offset = (u.offset + alignment - 1) & ~(alignment - 1);
|
||||||
|
*(((void**)u.ptr) - 1) = data;
|
||||||
|
|
||||||
|
return u.ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void free_aligned(void *ptr)
|
||||||
|
{
|
||||||
|
if(ptr) {
|
||||||
|
void *data = *(((void**)ptr) - 1);
|
||||||
|
free(data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
|
||||||
#endif /* __UTIL_TYPES_H__ */
|
#endif /* __UTIL_TYPES_H__ */
|
||||||
|
@ -24,18 +24,22 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "util_types.h"
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
using std::vector;
|
using std::vector;
|
||||||
|
|
||||||
/* Array
|
/* Array
|
||||||
*
|
*
|
||||||
* Simplified version of vector, serving two purposes:
|
* Simplified version of vector, serving multiple purposes:
|
||||||
* - somewhat faster in that it does not clear memory on resize/alloc,
|
* - somewhat faster in that it does not clear memory on resize/alloc,
|
||||||
* this was actually showing up in profiles quite significantly
|
* this was actually showing up in profiles quite significantly. it
|
||||||
* - if this is used, we are not tempted to use inefficient operations */
|
* also does not run any constructors/destructors
|
||||||
|
* - if this is used, we are not tempted to use inefficient operations
|
||||||
|
* - aligned allocation for SSE data types */
|
||||||
|
|
||||||
template<typename T>
|
template<typename T, size_t alignment = 16>
|
||||||
class array
|
class array
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -52,7 +56,7 @@ public:
|
|||||||
datasize = 0;
|
datasize = 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
data = new T[newsize];
|
data = (T*)malloc_aligned(sizeof(T)*newsize, alignment);
|
||||||
datasize = newsize;
|
datasize = newsize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -69,7 +73,7 @@ public:
|
|||||||
datasize = 0;
|
datasize = 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
data = new T[from.datasize];
|
data = (T*)malloc_aligned(sizeof(T)*from.datasize, alignment);
|
||||||
memcpy(data, from.data, from.datasize*sizeof(T));
|
memcpy(data, from.data, from.datasize*sizeof(T));
|
||||||
datasize = from.datasize;
|
datasize = from.datasize;
|
||||||
}
|
}
|
||||||
@ -83,7 +87,10 @@ public:
|
|||||||
data = NULL;
|
data = NULL;
|
||||||
|
|
||||||
if(datasize > 0) {
|
if(datasize > 0) {
|
||||||
data = new T[datasize];
|
data = (T*)malloc_aligned(sizeof(T)*datasize, alignment);
|
||||||
|
memcpy(data, &from[0], datasize*sizeof(T));
|
||||||
|
free_aligned(data);
|
||||||
|
data = (T*)malloc_aligned(sizeof(T)*datasize, alignment);
|
||||||
memcpy(data, &from[0], datasize*sizeof(T));
|
memcpy(data, &from[0], datasize*sizeof(T));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -92,7 +99,7 @@ public:
|
|||||||
|
|
||||||
~array()
|
~array()
|
||||||
{
|
{
|
||||||
delete [] data;
|
free_aligned(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
void resize(size_t newsize)
|
void resize(size_t newsize)
|
||||||
@ -100,10 +107,10 @@ public:
|
|||||||
if(newsize == 0) {
|
if(newsize == 0) {
|
||||||
clear();
|
clear();
|
||||||
}
|
}
|
||||||
else {
|
else if(newsize != datasize) {
|
||||||
T *newdata = new T[newsize];
|
T *newdata = (T*)malloc_aligned(sizeof(T)*newsize, alignment);
|
||||||
memcpy(newdata, data, ((datasize < newsize)? datasize: newsize)*sizeof(T));
|
memcpy(newdata, data, ((datasize < newsize)? datasize: newsize)*sizeof(T));
|
||||||
delete [] data;
|
free_aligned(data);
|
||||||
|
|
||||||
data = newdata;
|
data = newdata;
|
||||||
datasize = newsize;
|
datasize = newsize;
|
||||||
@ -112,7 +119,7 @@ public:
|
|||||||
|
|
||||||
void clear()
|
void clear()
|
||||||
{
|
{
|
||||||
delete [] data;
|
free_aligned(data);
|
||||||
data = NULL;
|
data = NULL;
|
||||||
datasize = 0;
|
datasize = 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user