Speedup for guarded allocator
- Re-arrange locks, so no actual memory allocation (which is relatively slow) happens from inside the lock. operation system will take care of locks which might be needed there on it's own. - Use spin lock instead of mutex, since it's just list operations happens from inside lock, no need in mutex here. - Use atomic operations for memory in use and total used blocks counters. This makes guarded allocator almost the same speed as non-guarded one in files from Tube project. There're still MemHead/MemTail overhead which might be bad for CPU cache utilization. TODO: We need smarter 32/64bit compile-time check, currently i'm afraid only x86 CPU family is detecting reliably.
This commit is contained in:
parent
c4f6340f7d
commit
1a81197819
@ -44,6 +44,7 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* TODO(sergey): check on other 64bit platforms. */
|
||||
#if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__)
|
||||
# define LG_SIZEOF_PTR 3
|
||||
# define LG_SIZEOF_INT 3
|
||||
|
@ -25,6 +25,7 @@
|
||||
|
||||
set(INC
|
||||
.
|
||||
../atomic
|
||||
)
|
||||
|
||||
set(INC_SYS
|
||||
|
@ -38,6 +38,6 @@ if env['WITH_BF_CXX_GUARDEDALLOC']:
|
||||
sources.append('cpp/mallocn.cpp')
|
||||
defs.append('WITH_CXX_GUARDEDALLOC')
|
||||
|
||||
incs = '.'
|
||||
incs = '. ../atomic'
|
||||
|
||||
env.BlenderLib ('bf_intern_guardedalloc', sources, Split(incs), defs, libtype=['intern','player'], priority = [5,150] )
|
||||
|
@ -50,6 +50,8 @@
|
||||
|
||||
#include "MEM_guardedalloc.h"
|
||||
|
||||
#include "atomic_ops.h"
|
||||
|
||||
/* should always be defined except for experimental cases */
|
||||
#ifdef WITH_GUARDEDALLOC
|
||||
|
||||
@ -210,8 +212,20 @@ static const char *check_memlist(MemHead *memh);
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
|
||||
static volatile int totblock = 0;
|
||||
static volatile uintptr_t mem_in_use = 0, mmap_in_use = 0, peak_mem = 0;
|
||||
/* TODO(sergey): need smarter check for 64bit platform. */
|
||||
#if defined(_M_X64) || defined(__amd64__) || defined(__x86_64__)
|
||||
typedef uint64_t mem_uintptr_t;
|
||||
# define mem_atomic_uint_sub atomic_sub_uint64
|
||||
# define mem_atomic_uint_add atomic_add_uint64
|
||||
#else
|
||||
typedef int32_t mem_int_t;
|
||||
typedef uint32_t mem_uintptr_t;
|
||||
# define mem_atomic_uint_sub atomic_sub_uint32
|
||||
# define mem_atomic_uint_add atomic_add_uint32
|
||||
#endif
|
||||
|
||||
static mem_uintptr_t totblock = 0;
|
||||
static mem_uintptr_t mem_in_use = 0, mmap_in_use = 0, peak_mem = 0;
|
||||
|
||||
static volatile struct localListBase _membase;
|
||||
static volatile struct localListBase *membase = &_membase;
|
||||
@ -493,31 +507,29 @@ static void make_memhead_header(MemHead *memh, size_t len, const char *str)
|
||||
|
||||
memt = (MemTail *)(((char *) memh) + sizeof(MemHead) + len);
|
||||
memt->tag3 = MEMTAG3;
|
||||
|
||||
|
||||
mem_atomic_uint_add(&totblock, 1);
|
||||
mem_atomic_uint_add(&mem_in_use, len);
|
||||
|
||||
mem_lock_thread();
|
||||
addtail(membase, &memh->next);
|
||||
if (memh->next) {
|
||||
memh->nextname = MEMNEXT(memh->next)->name;
|
||||
}
|
||||
|
||||
totblock++;
|
||||
mem_in_use += len;
|
||||
|
||||
peak_mem = mem_in_use > peak_mem ? mem_in_use : peak_mem;
|
||||
mem_unlock_thread();
|
||||
}
|
||||
|
||||
void *MEM_mallocN(size_t len, const char *str)
|
||||
{
|
||||
MemHead *memh;
|
||||
|
||||
mem_lock_thread();
|
||||
|
||||
len = (len + 3) & ~3; /* allocate in units of 4 */
|
||||
|
||||
memh = (MemHead *)malloc(len + sizeof(MemHead) + sizeof(MemTail));
|
||||
|
||||
if (memh) {
|
||||
make_memhead_header(memh, len, str);
|
||||
mem_unlock_thread();
|
||||
if (malloc_debug_memset && len)
|
||||
memset(memh + 1, 255, len);
|
||||
|
||||
@ -528,7 +540,6 @@ void *MEM_mallocN(size_t len, const char *str)
|
||||
#endif
|
||||
return (++memh);
|
||||
}
|
||||
mem_unlock_thread();
|
||||
print_error("Malloc returns null: len=" SIZET_FORMAT " in %s, total %u\n",
|
||||
SIZET_ARG(len), str, (unsigned int) mem_in_use);
|
||||
return NULL;
|
||||
@ -538,15 +549,12 @@ void *MEM_callocN(size_t len, const char *str)
|
||||
{
|
||||
MemHead *memh;
|
||||
|
||||
mem_lock_thread();
|
||||
|
||||
len = (len + 3) & ~3; /* allocate in units of 4 */
|
||||
|
||||
memh = (MemHead *)calloc(len + sizeof(MemHead) + sizeof(MemTail), 1);
|
||||
|
||||
if (memh) {
|
||||
make_memhead_header(memh, len, str);
|
||||
mem_unlock_thread();
|
||||
#ifdef DEBUG_MEMCOUNTER
|
||||
if (_mallocn_count == DEBUG_MEMCOUNTER_ERROR_VAL)
|
||||
memcount_raise(__func__);
|
||||
@ -554,7 +562,6 @@ void *MEM_callocN(size_t len, const char *str)
|
||||
#endif
|
||||
return (++memh);
|
||||
}
|
||||
mem_unlock_thread();
|
||||
print_error("Calloc returns null: len=" SIZET_FORMAT " in %s, total %u\n",
|
||||
SIZET_ARG(len), str, (unsigned int) mem_in_use);
|
||||
return NULL;
|
||||
@ -565,8 +572,6 @@ void *MEM_mapallocN(size_t len, const char *str)
|
||||
{
|
||||
MemHead *memh;
|
||||
|
||||
mem_lock_thread();
|
||||
|
||||
len = (len + 3) & ~3; /* allocate in units of 4 */
|
||||
|
||||
memh = mmap(NULL, len + sizeof(MemHead) + sizeof(MemTail),
|
||||
@ -575,7 +580,8 @@ void *MEM_mapallocN(size_t len, const char *str)
|
||||
if (memh != (MemHead *)-1) {
|
||||
make_memhead_header(memh, len, str);
|
||||
memh->mmap = 1;
|
||||
mmap_in_use += len;
|
||||
mem_atomic_uint_add(&mmap_in_use, len);
|
||||
mem_lock_thread();
|
||||
peak_mem = mmap_in_use > peak_mem ? mmap_in_use : peak_mem;
|
||||
mem_unlock_thread();
|
||||
#ifdef DEBUG_MEMCOUNTER
|
||||
@ -586,7 +592,6 @@ void *MEM_mapallocN(size_t len, const char *str)
|
||||
return (++memh);
|
||||
}
|
||||
else {
|
||||
mem_unlock_thread();
|
||||
print_error("Mapalloc returns null, fallback to regular malloc: "
|
||||
"len=" SIZET_FORMAT " in %s, total %u\n",
|
||||
SIZET_ARG(len), str, (unsigned int) mmap_in_use);
|
||||
@ -844,7 +849,6 @@ void MEM_freeN(void *vmemh)
|
||||
return;
|
||||
}
|
||||
|
||||
mem_lock_thread();
|
||||
if ((memh->tag1 == MEMTAG1) &&
|
||||
(memh->tag2 == MEMTAG2) &&
|
||||
((memh->len & 0x3) == 0))
|
||||
@ -858,8 +862,6 @@ void MEM_freeN(void *vmemh)
|
||||
/* after tags !!! */
|
||||
rem_memblock(memh);
|
||||
|
||||
mem_unlock_thread();
|
||||
|
||||
return;
|
||||
}
|
||||
MemorY_ErroR(memh->name, "end corrupt");
|
||||
@ -869,7 +871,9 @@ void MEM_freeN(void *vmemh)
|
||||
}
|
||||
}
|
||||
else {
|
||||
mem_lock_thread();
|
||||
name = check_memlist(memh);
|
||||
mem_unlock_thread();
|
||||
if (name == NULL)
|
||||
MemorY_ErroR("free", "pointer not in memlist");
|
||||
else
|
||||
@ -879,8 +883,6 @@ void MEM_freeN(void *vmemh)
|
||||
totblock--;
|
||||
/* here a DUMP should happen */
|
||||
|
||||
mem_unlock_thread();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@ -927,6 +929,7 @@ static void remlink(volatile localListBase *listbase, void *vlink)
|
||||
|
||||
static void rem_memblock(MemHead *memh)
|
||||
{
|
||||
mem_lock_thread();
|
||||
remlink(membase, &memh->next);
|
||||
if (memh->prev) {
|
||||
if (memh->next)
|
||||
@ -934,9 +937,10 @@ static void rem_memblock(MemHead *memh)
|
||||
else
|
||||
MEMNEXT(memh->prev)->nextname = NULL;
|
||||
}
|
||||
mem_unlock_thread();
|
||||
|
||||
totblock--;
|
||||
mem_in_use -= memh->len;
|
||||
mem_atomic_uint_sub(&totblock, 1);
|
||||
mem_atomic_uint_sub(&mem_in_use, memh->len);
|
||||
|
||||
#ifdef DEBUG_MEMDUPLINAME
|
||||
if (memh->need_free_name)
|
||||
@ -944,7 +948,7 @@ static void rem_memblock(MemHead *memh)
|
||||
#endif
|
||||
|
||||
if (memh->mmap) {
|
||||
mmap_in_use -= memh->len;
|
||||
mem_atomic_uint_sub(&mmap_in_use, memh->len);
|
||||
if (munmap(memh, memh->len + sizeof(MemHead) + sizeof(MemTail)))
|
||||
printf("Couldn't unmap memory %s\n", memh->name);
|
||||
}
|
||||
|
@ -111,7 +111,7 @@ static TaskScheduler *task_scheduler = NULL;
|
||||
* BLI_end_threads(&lb);
|
||||
*
|
||||
************************************************ */
|
||||
static pthread_mutex_t _malloc_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
static SpinLock _malloc_lock;
|
||||
static pthread_mutex_t _image_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_mutex_t _image_draw_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_mutex_t _viewer_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
@ -138,22 +138,25 @@ typedef struct ThreadSlot {
|
||||
|
||||
static void BLI_lock_malloc_thread(void)
|
||||
{
|
||||
pthread_mutex_lock(&_malloc_lock);
|
||||
BLI_spin_lock(&_malloc_lock);
|
||||
}
|
||||
|
||||
static void BLI_unlock_malloc_thread(void)
|
||||
{
|
||||
pthread_mutex_unlock(&_malloc_lock);
|
||||
BLI_spin_unlock(&_malloc_lock);
|
||||
}
|
||||
|
||||
void BLI_threadapi_init(void)
|
||||
{
|
||||
mainid = pthread_self();
|
||||
|
||||
BLI_spin_init(&_malloc_lock);
|
||||
}
|
||||
|
||||
void BLI_threadapi_exit(void)
|
||||
{
|
||||
BLI_task_scheduler_free(task_scheduler);
|
||||
BLI_spin_end(&_malloc_lock);
|
||||
}
|
||||
|
||||
TaskScheduler *BLI_task_scheduler_get(void)
|
||||
|
@ -29,6 +29,7 @@ add_definitions(-DWITH_DNA_GHASH)
|
||||
|
||||
blender_include_dirs(
|
||||
../../../../intern/guardedalloc
|
||||
../../../../intern/atomic
|
||||
../../blenlib
|
||||
..
|
||||
)
|
||||
|
@ -46,6 +46,7 @@ dna = env.Clone()
|
||||
makesdna_tool.Append(CCFLAGS = '-DBASE_HEADER="\\"source/blender/makesdna/\\"" ')
|
||||
|
||||
makesdna_tool.Append (CPPPATH = ['#/intern/guardedalloc',
|
||||
'#/intern/atomic',
|
||||
'../../makesdna', '../../bmesh'])
|
||||
|
||||
if env['OURPLATFORM'] == 'linuxcross':
|
||||
|
@ -36,6 +36,7 @@ incs = [
|
||||
'.',
|
||||
'./intern',
|
||||
'#/intern/guardedalloc',
|
||||
'#/intern/atomic',
|
||||
'#/intern/memutil',
|
||||
'#/extern/glew/include',
|
||||
'#/intern/audaspace/intern',
|
||||
|
@ -276,6 +276,7 @@ blender_include_dirs(
|
||||
../../../../intern/audaspace/intern
|
||||
../../../../intern/cycles/blender
|
||||
../../../../intern/guardedalloc
|
||||
../../../../intern/atomic
|
||||
../../../../intern/memutil
|
||||
../../../../intern/smoke/extern
|
||||
)
|
||||
|
@ -58,6 +58,7 @@ extern "C"
|
||||
#endif // __cplusplus
|
||||
#include "MEM_guardedalloc.h"
|
||||
#include "BKE_blender.h"
|
||||
#include "BKE_depsgraph.h"
|
||||
#include "BKE_global.h"
|
||||
#include "BKE_icons.h"
|
||||
#include "BKE_image.h"
|
||||
|
Loading…
Reference in New Issue
Block a user