Attempted fix for bug #8349: QMC raytracing being very slow on some

computers, probably due to slow multithreaded malloc. Now each render
thread keeps a list of qmc samplers that it fills as needed while
rendering (since it is hard to predict the actual amount needed in
advance due to ray recursion).
This commit is contained in:
Brecht Van Lommel 2008-04-14 10:14:59 +00:00
parent 3dcb3cc4cd
commit 6da7b60cd3
4 changed files with 77 additions and 47 deletions

@ -64,8 +64,10 @@ typedef struct SampleTables
typedef struct QMCSampler
{
struct QMCSampler *next, *prev;
int type;
int tot;
int used;
double *samp2d;
double offs[BLENDER_MAX_THREADS][2];
} QMCSampler;
@ -150,7 +152,7 @@ struct Render
/* samples */
SampleTables *samples;
float jit[32][2];
QMCSampler *qsa;
ListBase *qmcsamplers;
/* shadow counter, detect shadow-reuse for shaders */
int shadowsamplenr[BLENDER_MAX_THREADS];
@ -455,7 +457,6 @@ typedef struct LampRen {
struct ShadBuf *shb;
float *jitter;
QMCSampler *qsa;
float imat[3][3];
float spottexfac;

@ -104,9 +104,7 @@ extern void ray_trace(ShadeInput *, ShadeResult *);
extern void ray_ao(ShadeInput *, float *);
extern void init_jitter_plane(LampRen *lar);
extern void init_ao_sphere(struct World *wrld);
extern void init_lamp_hammersley(LampRen *lar);
extern void free_lamp_qmcsampler(LampRen *lar);
extern void init_render_hammersley(Render *re);
extern void init_render_qmcsampler(Render *re);
extern void free_render_qmcsampler(Render *re);
#endif /* RENDER_EXT_H */

@ -3684,9 +3684,6 @@ static GroupObject *add_render_lamp(Render *re, Object *ob)
if(re->r.mode & R_SHADOW) {
if ((lar->mode & LA_SHAD_RAY) && (lar->ray_samp_method == LA_SAMP_HAMMERSLEY)) {
init_lamp_hammersley(lar);
}
if(la->type==LA_AREA && (lar->mode & LA_SHAD_RAY) && (lar->ray_samp_method == LA_SAMP_CONSTANT)) {
init_jitter_plane(lar);
}
@ -4372,7 +4369,6 @@ void RE_Database_Free(Render *re)
freeshadowbuf(lar);
if(lar->jitter) MEM_freeN(lar->jitter);
if(lar->shadsamp) MEM_freeN(lar->shadsamp);
if(lar->qsa) free_lamp_qmcsampler(lar);
curvemapping_free(lar->curfalloff);
}
@ -4410,8 +4406,7 @@ void RE_Database_Free(Render *re)
re->wrld.aotables= NULL;
re->scene->world->aotables= NULL;
}
if((re->r.mode & R_RAYTRACE) && (re->wrld.mode & WO_AMB_OCC) &&
(re->wrld.ao_samp_method == WO_AOSAMP_HAMMERSLEY) && (re->qsa))
if(re->r.mode & R_RAYTRACE)
free_render_qmcsampler(re);
if(re->r.mode & R_RAYTRACE) freeraytree(re);
@ -4786,11 +4781,12 @@ void RE_Database_FromScene(Render *re, Scene *scene, int use_camera_view)
}
init_render_world(re); /* do first, because of ambient. also requires re->osa set correct */
if((re->r.mode & R_RAYTRACE) && (re->wrld.mode & WO_AMB_OCC)) {
if (re->wrld.ao_samp_method == WO_AOSAMP_HAMMERSLEY)
init_render_hammersley(re);
else if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
init_ao_sphere(&re->wrld);
if(re->r.mode & R_RAYTRACE) {
init_render_qmcsampler(re);
if(re->wrld.mode & WO_AMB_OCC)
if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
init_ao_sphere(&re->wrld);
}
/* still bad... doing all */
@ -5439,11 +5435,12 @@ void RE_Database_Baking(Render *re, Scene *scene, int type, Object *actob)
}
init_render_world(re); /* do first, because of ambient. also requires re->osa set correct */
if((re->r.mode & R_RAYTRACE) && (re->wrld.mode & WO_AMB_OCC)) {
if (re->wrld.ao_samp_method == WO_AOSAMP_HAMMERSLEY)
init_render_hammersley(re);
else if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
init_ao_sphere(&re->wrld);
if(re->r.mode & R_RAYTRACE) {
init_render_qmcsampler(re);
if(re->wrld.mode & WO_AMB_OCC)
if (re->wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
init_ao_sphere(&re->wrld);
}
/* still bad... doing all */

@ -40,8 +40,9 @@
#include "BKE_utildefines.h"
#include "BLI_arithb.h"
#include "BLI_rand.h"
#include "BLI_blenlib.h"
#include "BLI_jitter.h"
#include "BLI_rand.h"
#include "PIL_time.h"
@ -729,8 +730,8 @@ static void hammersley_create(double *out, int n)
struct QMCSampler *QMC_initSampler(int type, int tot)
{
QMCSampler *qsa = MEM_mallocN(sizeof(QMCSampler), "qmc sampler");
qsa->samp2d = MEM_mallocN(2*sizeof(double)*tot, "qmc sample table");
QMCSampler *qsa = MEM_callocN(sizeof(QMCSampler), "qmc sampler");
qsa->samp2d = MEM_callocN(2*sizeof(double)*tot, "qmc sample table");
qsa->tot = tot;
qsa->type = type;
@ -871,27 +872,55 @@ static void QMC_sampleHemiCosine(float *vec, QMCSampler *qsa, int thread, int nu
#endif
/* called from convertBlenderScene.c */
/* samples don't change per pixel, so build the samples in advance for efficiency */
void init_lamp_hammersley(LampRen *lar)
void init_render_qmcsampler(Render *re)
{
lar->qsa = QMC_initSampler(SAMP_TYPE_HAMMERSLEY, lar->ray_totsamp);
re->qmcsamplers= MEM_callocN(sizeof(ListBase)*BLENDER_MAX_THREADS, "QMCListBase");
}
void init_render_hammersley(Render *re)
QMCSampler *get_thread_qmcsampler(Render *re, int thread, int type, int tot)
{
re->qsa = QMC_initSampler(SAMP_TYPE_HAMMERSLEY, (re->wrld.aosamp * re->wrld.aosamp));
QMCSampler *qsa;
/* create qmc samplers as needed, since recursion makes it hard to
* predict how many are needed */
for(qsa=re->qmcsamplers[thread].first; qsa; qsa=qsa->next) {
if(qsa->type == type && qsa->tot == tot && !qsa->used) {
qsa->used= 1;
return qsa;
}
}
qsa= QMC_initSampler(type, tot);
qsa->used= 1;
BLI_addtail(&re->qmcsamplers[thread], qsa);
return qsa;
}
void free_lamp_qmcsampler(LampRen *lar)
void release_thread_qmcsampler(Render *re, int thread, QMCSampler *qsa)
{
QMC_freeSampler(lar->qsa);
lar->qsa = NULL;
qsa->used= 0;
}
void free_render_qmcsampler(Render *re)
{
QMC_freeSampler(re->qsa);
re->qsa = NULL;
QMCSampler *qsa, *next;
int a;
if(re->qmcsamplers) {
for(a=0; a<BLENDER_MAX_THREADS; a++) {
for(qsa=re->qmcsamplers[a].first; qsa; qsa=next) {
next= qsa->next;
QMC_freeSampler(qsa);
}
re->qmcsamplers[a].first= re->qmcsamplers[a].last= NULL;
}
MEM_freeN(re->qmcsamplers);
re->qmcsamplers= NULL;
}
}
static int adaptive_sample_variance(int samples, float *col, float *colsq, float thresh)
@ -968,7 +997,7 @@ static void trace_refract(float *col, ShadeInput *shi, ShadeResult *shr)
else samp_type = SAMP_TYPE_HAMMERSLEY;
/* all samples are generated per pixel */
qsa = QMC_initSampler(samp_type, max_samples);
qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples);
QMC_initPixel(qsa, shi->thread);
} else
max_samples = 1;
@ -1026,7 +1055,8 @@ static void trace_refract(float *col, ShadeInput *shi, ShadeResult *shr)
col[2] /= (float)samples;
col[3] /= (float)samples;
if (qsa) QMC_freeSampler(qsa);
if (qsa)
release_thread_qmcsampler(&R, shi->thread, qsa);
}
static void trace_reflect(float *col, ShadeInput *shi, ShadeResult *shr, float fresnelfac)
@ -1053,7 +1083,7 @@ static void trace_reflect(float *col, ShadeInput *shi, ShadeResult *shr, float f
else samp_type = SAMP_TYPE_HAMMERSLEY;
/* all samples are generated per pixel */
qsa = QMC_initSampler(samp_type, max_samples);
qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples);
QMC_initPixel(qsa, shi->thread);
} else
max_samples = 1;
@ -1131,7 +1161,8 @@ static void trace_reflect(float *col, ShadeInput *shi, ShadeResult *shr, float f
col[1] /= (float)samples;
col[2] /= (float)samples;
if (qsa) QMC_freeSampler(qsa);
if (qsa)
release_thread_qmcsampler(&R, shi->thread, qsa);
}
/* extern call from render loop */
@ -1546,9 +1577,9 @@ void ray_ao_qmc(ShadeInput *shi, float *shadfac)
max_samples /= speedfac;
if (max_samples < 5) max_samples = 5;
qsa = QMC_initSampler(SAMP_TYPE_HALTON, max_samples);
qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
} else if (R.wrld.ao_samp_method==WO_AOSAMP_HAMMERSLEY)
qsa = R.qsa;
qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
QMC_initPixel(qsa, shi->thread);
@ -1621,7 +1652,8 @@ void ray_ao_qmc(ShadeInput *shi, float *shadfac)
shadfac[0]= shadfac[1]= shadfac[2]= 1.0f - fac/(float)samples;
}
if ((qsa) && (qsa->type == SAMP_TYPE_HALTON)) QMC_freeSampler(qsa);
if (qsa)
release_thread_qmcsampler(&R, shi->thread, qsa);
}
/* extern call from shade_lamp_loop, ambient occlusion calculus */
@ -1787,11 +1819,11 @@ static void ray_shadow_qmc(ShadeInput *shi, LampRen *lar, float *lampco, float *
/* sampling init */
if (lar->ray_samp_method==LA_SAMP_HALTON) {
qsa = QMC_initSampler(SAMP_TYPE_HALTON, max_samples);
qsa_jit = QMC_initSampler(SAMP_TYPE_HALTON, max_samples);
qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
qsa_jit = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
} else if (lar->ray_samp_method==LA_SAMP_HAMMERSLEY) {
qsa = lar->qsa;
qsa_jit = QMC_initSampler(SAMP_TYPE_HAMMERSLEY, max_samples);
qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
qsa_jit = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
}
QMC_initPixel(qsa, shi->thread);
@ -1921,8 +1953,10 @@ static void ray_shadow_qmc(ShadeInput *shi, LampRen *lar, float *lampco, float *
} else
shadfac[3]= 1.0f-fac/samples;
if (qsa_jit) QMC_freeSampler(qsa_jit);
if ((qsa) && (qsa->type == SAMP_TYPE_HALTON)) QMC_freeSampler(qsa);
if (qsa_jit)
release_thread_qmcsampler(&R, shi->thread, qsa_jit);
if (qsa)
release_thread_qmcsampler(&R, shi->thread, qsa);
}
static void ray_shadow_jitter(ShadeInput *shi, LampRen *lar, float *lampco, float *shadfac, Isect *isec)