Sculpt/dyntopo: Make the omp threads configurable to overcome performance issues
- autodetect optimal default, which typically avoids HT threads - can store setting in .blend per scene - this does not touch general omp max threads, due i found other areas where the calculations are fitting for huge corecount - Intel notes, some of the older generation processors with HyperThreading would not provide significant performance boost for FPU intensive applications. On those systems you might want to set OMP_NUM_THREADS = total number of cores (not total number of hardware theads).
This commit is contained in:
parent
e05d35bfaf
commit
277fb1a31f
@ -1283,7 +1283,8 @@ class VIEW3D_PT_sculpt_options(Panel, View3DPaintPanel):
|
||||
|
||||
def draw(self, context):
|
||||
layout = self.layout
|
||||
|
||||
scene = context.scene
|
||||
|
||||
toolsettings = context.tool_settings
|
||||
sculpt = toolsettings.sculpt
|
||||
capabilities = sculpt.brush.sculpt_capabilities
|
||||
@ -1293,6 +1294,14 @@ class VIEW3D_PT_sculpt_options(Panel, View3DPaintPanel):
|
||||
col.label(text="Gravity:")
|
||||
col.prop(sculpt, "gravity", slider=True, text="Factor")
|
||||
col.prop(sculpt, "gravity_object")
|
||||
|
||||
col.separator()
|
||||
col.label(text="OpenMP Threads:")
|
||||
col.row(align=True).prop(scene, "omp_mode", expand=True)
|
||||
sub = col.column(align=True)
|
||||
sub.enabled = scene.omp_mode == 'MANUAL'
|
||||
sub.prop(scene, "omp_num_threads")
|
||||
col.separator()
|
||||
|
||||
layout.prop(sculpt, "use_threaded", text="Threaded Sculpt")
|
||||
layout.prop(sculpt, "show_low_resolution")
|
||||
|
@ -137,6 +137,8 @@ bool BKE_scene_check_rigidbody_active(const struct Scene *scene);
|
||||
int BKE_scene_num_threads(const struct Scene *scene);
|
||||
int BKE_render_num_threads(const struct RenderData *r);
|
||||
|
||||
int BKE_scene_num_omp_threads(const struct Scene *scene);
|
||||
void BKE_scene_omp_threads_update(const struct Scene *scene);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -638,6 +638,9 @@ Scene *BKE_scene_add(Main *bmain, const char *name)
|
||||
|
||||
sce->gm.exitkey = 218; // Blender key code for ESC
|
||||
|
||||
sce->omp_mode = SCE_OMP_AUTO;
|
||||
sce->omp_num_threads = 1;
|
||||
|
||||
sound_create_scene(sce);
|
||||
|
||||
/* color management */
|
||||
@ -1868,3 +1871,10 @@ int BKE_scene_num_threads(const Scene *scene)
|
||||
return BKE_render_num_threads(&scene->r);
|
||||
}
|
||||
|
||||
int BKE_scene_num_omp_threads(const struct Scene *scene)
|
||||
{
|
||||
if (scene->omp_mode == SCE_OMP_AUTO)
|
||||
return BLI_omp_thread_count();
|
||||
else
|
||||
return scene->omp_num_threads;
|
||||
}
|
||||
|
@ -75,6 +75,8 @@ int BLI_system_thread_count(void); /* gets the number of threads the system
|
||||
void BLI_system_num_threads_override_set(int num);
|
||||
int BLI_system_num_threads_override_get(void);
|
||||
|
||||
int BLI_omp_thread_count(void); /* gets the number of openmp threads the system can make use of */
|
||||
|
||||
/* Global Mutex Locks
|
||||
*
|
||||
* One custom lock available now. can be extended. */
|
||||
|
@ -54,10 +54,25 @@
|
||||
# include <sys/time.h>
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) && defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__)
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#if defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__)
|
||||
# define USE_APPLE_OMP_FIX
|
||||
#endif
|
||||
|
||||
/* how many cores not counting HT aka pysical cores */
|
||||
static int system_physical_thread_count(void)
|
||||
{
|
||||
int ptcount;
|
||||
size_t ptcount_len = sizeof(ptcount);
|
||||
sysctlbyname("hw.physicalcpu", &ptcount, &ptcount_len, NULL, 0);
|
||||
return ptcount;
|
||||
}
|
||||
#endif // __APPLE__
|
||||
|
||||
#ifdef USE_APPLE_OMP_FIX
|
||||
/* ************** libgomp (Apple gcc 4.2.1) TLS bug workaround *************** */
|
||||
extern pthread_key_t gomp_tls_key;
|
||||
@ -335,6 +350,22 @@ void BLI_end_threads(ListBase *threadbase)
|
||||
|
||||
/* System Information */
|
||||
|
||||
/* gets the number of openmp threads the system can make use of */
|
||||
int BLI_omp_thread_count(void)
|
||||
{
|
||||
int t;
|
||||
#ifdef _OPENMP
|
||||
#ifdef __APPLE__
|
||||
t = system_physical_thread_count();
|
||||
#else
|
||||
t = omp_get_num_procs();
|
||||
#endif
|
||||
#else
|
||||
t = 1;
|
||||
#endif
|
||||
return t;
|
||||
}
|
||||
|
||||
/* how many threads are native on this system? */
|
||||
int BLI_system_thread_count(void)
|
||||
{
|
||||
|
@ -67,6 +67,7 @@
|
||||
#include "BKE_multires.h"
|
||||
#include "BKE_paint.h"
|
||||
#include "BKE_report.h"
|
||||
#include "BKE_scene.h"
|
||||
#include "BKE_lattice.h" /* for armature_deform_verts */
|
||||
#include "BKE_node.h"
|
||||
#include "BKE_object.h"
|
||||
@ -1541,10 +1542,10 @@ static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *no
|
||||
|
||||
grid_hidden = BKE_pbvh_grid_hidden(ss->pbvh);
|
||||
|
||||
thread_num = 0;
|
||||
#ifdef _OPENMP
|
||||
if (sd->flags & SCULPT_USE_OPENMP)
|
||||
thread_num = omp_get_thread_num();
|
||||
thread_num = omp_get_thread_num();
|
||||
#else
|
||||
thread_num = 0;
|
||||
#endif
|
||||
tmpgrid_co = ss->cache->tmpgrid_co[thread_num];
|
||||
tmprow_co = ss->cache->tmprow_co[thread_num];
|
||||
@ -3769,7 +3770,7 @@ static void sculpt_init_mirror_clipping(Object *ob, SculptSession *ss)
|
||||
}
|
||||
}
|
||||
|
||||
static void sculpt_omp_start(Sculpt *sd, SculptSession *ss)
|
||||
static void sculpt_omp_start(Scene *scene, Sculpt *sd, SculptSession *ss)
|
||||
{
|
||||
StrokeCache *cache = ss->cache;
|
||||
|
||||
@ -3779,15 +3780,17 @@ static void sculpt_omp_start(Sculpt *sd, SculptSession *ss)
|
||||
* Justification: Empirically I've found that two threads per
|
||||
* processor gives higher throughput. */
|
||||
if (sd->flags & SCULPT_USE_OPENMP) {
|
||||
cache->num_threads = omp_get_num_procs();
|
||||
cache->num_threads = BKE_scene_num_omp_threads(scene);
|
||||
}
|
||||
else {
|
||||
cache->num_threads = 1;
|
||||
}
|
||||
omp_set_num_threads(cache->num_threads);
|
||||
#else
|
||||
(void)sd;
|
||||
cache->num_threads = 1;
|
||||
#endif
|
||||
// printf("Sculpt omp threadcount: %d\n", cache->num_threads);
|
||||
if (ss->multires) {
|
||||
int i, gridsize, array_mem_size;
|
||||
BKE_pbvh_node_get_grids(ss->pbvh, NULL, NULL, NULL, NULL,
|
||||
@ -4002,7 +4005,7 @@ static void sculpt_update_cache_invariants(bContext *C, Sculpt *sd, SculptSessio
|
||||
cache->previous_vertex_rotation = 0;
|
||||
cache->init_dir_set = false;
|
||||
|
||||
sculpt_omp_start(sd, ss);
|
||||
sculpt_omp_start(scene, sd, ss);
|
||||
}
|
||||
|
||||
static void sculpt_update_brush_delta(UnifiedPaintSettings *ups, Object *ob, Brush *brush)
|
||||
@ -4626,6 +4629,12 @@ static void sculpt_stroke_done(const bContext *C, struct PaintStroke *UNUSED(str
|
||||
WM_event_add_notifier(C, NC_OBJECT | ND_DRAW, ob);
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
if (!(sd->flags & SCULPT_USE_OPENMP))
|
||||
omp_set_num_threads(BLI_system_thread_count());
|
||||
// printf("Reseted to omp threadcount: %d\n", BLI_system_thread_count());
|
||||
#endif
|
||||
|
||||
sculpt_brush_exit_tex(sd);
|
||||
}
|
||||
|
||||
|
@ -1224,6 +1224,10 @@ typedef struct Scene {
|
||||
|
||||
/* RigidBody simulation world+settings */
|
||||
struct RigidBodyWorld *rigidbody_world;
|
||||
|
||||
/* Openmp Global Settings */
|
||||
int omp_num_threads;
|
||||
int omp_mode;
|
||||
} Scene;
|
||||
|
||||
|
||||
@ -1769,6 +1773,10 @@ typedef enum SculptFlags {
|
||||
#define USER_UNIT_OPT_SPLIT 1
|
||||
#define USER_UNIT_ROT_RADIANS 2
|
||||
|
||||
/* OpenMP settings */
|
||||
#define SCE_OMP_AUTO 0
|
||||
#define SCE_OMP_MANUAL 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include "BKE_freestyle.h"
|
||||
#include "BKE_editmesh.h"
|
||||
#include "BKE_paint.h"
|
||||
#include "BKE_scene.h"
|
||||
|
||||
#include "RNA_define.h"
|
||||
#include "RNA_enum_types.h"
|
||||
@ -680,6 +681,17 @@ static char *rna_RenderSettings_path(PointerRNA *UNUSED(ptr))
|
||||
return BLI_sprintfN("render");
|
||||
}
|
||||
|
||||
static void rna_omp_threads_update(Main *UNUSED(bmain), Scene *scene, PointerRNA *UNUSED(ptr))
|
||||
{
|
||||
BKE_scene_omp_threads_update(scene);
|
||||
}
|
||||
|
||||
static int rna_omp_threads_get(PointerRNA *ptr)
|
||||
{
|
||||
Scene *scene = (Scene *)ptr->data;
|
||||
return BKE_scene_num_omp_threads(scene);
|
||||
}
|
||||
|
||||
static int rna_RenderSettings_threads_get(PointerRNA *ptr)
|
||||
{
|
||||
RenderData *rd = (RenderData *)ptr->data;
|
||||
@ -5088,6 +5100,12 @@ void RNA_def_scene(BlenderRNA *brna)
|
||||
{0, NULL, 0, NULL, NULL}
|
||||
};
|
||||
|
||||
static EnumPropertyItem omp_threads_mode_items[] = {
|
||||
{SCE_OMP_AUTO, "AUTO", 0, "Auto-detect", "Automatically determine the number of threads, based on CPUs"},
|
||||
{SCE_OMP_MANUAL, "MANUAL", 0, "Manual", "Manually determine the number of threads"},
|
||||
{0, NULL, 0, NULL, NULL}
|
||||
};
|
||||
|
||||
/* Struct definition */
|
||||
srna = RNA_def_struct(brna, "Scene", "ID");
|
||||
RNA_def_struct_ui_text(srna, "Scene",
|
||||
@ -5450,6 +5468,17 @@ void RNA_def_scene(BlenderRNA *brna)
|
||||
RNA_def_property_struct_type(prop, "ColorManagedSequencerColorspaceSettings");
|
||||
RNA_def_property_ui_text(prop, "Sequencer Color Space Settings", "Settings of color space sequencer is working in");
|
||||
|
||||
prop = RNA_def_property(srna, "omp_num_threads", PROP_INT, PROP_NONE);
|
||||
RNA_def_property_range(prop, 1, BLENDER_MAX_THREADS);
|
||||
RNA_def_property_int_funcs(prop, "rna_omp_threads_get", NULL, NULL);
|
||||
RNA_def_property_ui_text(prop, "OpenMP Threads",
|
||||
"Number of CPU threads to use simultaneously for openmp"
|
||||
"(for multi-core/CPU systems)");
|
||||
|
||||
prop = RNA_def_property(srna, "omp_mode", PROP_ENUM, PROP_NONE);
|
||||
RNA_def_property_enum_items(prop, omp_threads_mode_items);
|
||||
RNA_def_property_ui_text(prop, "OpenMP Mode", "Determine the amount of openmp threads used");
|
||||
|
||||
/* Nestled Data */
|
||||
/* *** Non-Animated *** */
|
||||
RNA_define_animate_sdna(false);
|
||||
|
Loading…
Reference in New Issue
Block a user