forked from bartvdbraak/blender
Cycles: ensure any SSE data is allocated 16 byte aligned, happens automatically
on many platforms but is not assured everywhere.
This commit is contained in:
parent
5da48f425f
commit
240fb6fa26
@ -552,6 +552,30 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __device_inline const __m12
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
|
||||
static inline void *malloc_aligned(size_t size, size_t alignment)
|
||||
{
|
||||
void *data = (void*)malloc(size + sizeof(void*) + alignment - 1);
|
||||
|
||||
union { void *ptr; size_t offset; } u;
|
||||
u.ptr = (char*)data + sizeof(void*);
|
||||
u.offset = (u.offset + alignment - 1) & ~(alignment - 1);
|
||||
*(((void**)u.ptr) - 1) = data;
|
||||
|
||||
return u.ptr;
|
||||
}
|
||||
|
||||
static inline void free_aligned(void *ptr)
|
||||
{
|
||||
if(ptr) {
|
||||
void *data = *(((void**)ptr) - 1);
|
||||
free(data);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_TYPES_H__ */
|
||||
|
@ -24,18 +24,22 @@
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
|
||||
#include "util_types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
using std::vector;
|
||||
|
||||
/* Array
|
||||
*
|
||||
* Simplified version of vector, serving two purposes:
|
||||
* Simplified version of vector, serving multiple purposes:
|
||||
* - somewhat faster in that it does not clear memory on resize/alloc,
|
||||
* this was actually showing up in profiles quite significantly
|
||||
* - if this is used, we are not tempted to use inefficient operations */
|
||||
* this was actually showing up in profiles quite significantly. it
|
||||
* also does not run any constructors/destructors
|
||||
* - if this is used, we are not tempted to use inefficient operations
|
||||
* - aligned allocation for SSE data types */
|
||||
|
||||
template<typename T>
|
||||
template<typename T, size_t alignment = 16>
|
||||
class array
|
||||
{
|
||||
public:
|
||||
@ -52,7 +56,7 @@ public:
|
||||
datasize = 0;
|
||||
}
|
||||
else {
|
||||
data = new T[newsize];
|
||||
data = (T*)malloc_aligned(sizeof(T)*newsize, alignment);
|
||||
datasize = newsize;
|
||||
}
|
||||
}
|
||||
@ -69,7 +73,7 @@ public:
|
||||
datasize = 0;
|
||||
}
|
||||
else {
|
||||
data = new T[from.datasize];
|
||||
data = (T*)malloc_aligned(sizeof(T)*from.datasize, alignment);
|
||||
memcpy(data, from.data, from.datasize*sizeof(T));
|
||||
datasize = from.datasize;
|
||||
}
|
||||
@ -83,7 +87,10 @@ public:
|
||||
data = NULL;
|
||||
|
||||
if(datasize > 0) {
|
||||
data = new T[datasize];
|
||||
data = (T*)malloc_aligned(sizeof(T)*datasize, alignment);
|
||||
memcpy(data, &from[0], datasize*sizeof(T));
|
||||
free_aligned(data);
|
||||
data = (T*)malloc_aligned(sizeof(T)*datasize, alignment);
|
||||
memcpy(data, &from[0], datasize*sizeof(T));
|
||||
}
|
||||
|
||||
@ -92,7 +99,7 @@ public:
|
||||
|
||||
~array()
|
||||
{
|
||||
delete [] data;
|
||||
free_aligned(data);
|
||||
}
|
||||
|
||||
void resize(size_t newsize)
|
||||
@ -100,10 +107,10 @@ public:
|
||||
if(newsize == 0) {
|
||||
clear();
|
||||
}
|
||||
else {
|
||||
T *newdata = new T[newsize];
|
||||
else if(newsize != datasize) {
|
||||
T *newdata = (T*)malloc_aligned(sizeof(T)*newsize, alignment);
|
||||
memcpy(newdata, data, ((datasize < newsize)? datasize: newsize)*sizeof(T));
|
||||
delete [] data;
|
||||
free_aligned(data);
|
||||
|
||||
data = newdata;
|
||||
datasize = newsize;
|
||||
@ -112,7 +119,7 @@ public:
|
||||
|
||||
void clear()
|
||||
{
|
||||
delete [] data;
|
||||
free_aligned(data);
|
||||
data = NULL;
|
||||
datasize = 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user