forked from bartvdbraak/blender
Cuda use streams and async to avoid busywaiting
This is my first stab at this and is based on this IRC converstation: <mib2berlin> brecht: this is meaning as reminder only, I know you have other things to do > http://openvidia.sourceforge.net/index.php/Optimization_Notes#avoiding_busy_waits <brecht> mib2berlin: thanks, bookmarked only tested on Ubuntu 14.04 / cuda 5.0 but ill do some more testing tomorrow. Also unsure about the placement and the lifetime of the stream and the event. But creating / deleting these seems to incur a non trivial cost. Reviewers: brecht Reviewed By: brecht CC: mib2berlin, dingto Differential Revision: https://developer.blender.org/D262
This commit is contained in:
parent
abf18033f3
commit
84f9587540
@ -41,6 +41,8 @@ public:
|
|||||||
CUdevice cuDevice;
|
CUdevice cuDevice;
|
||||||
CUcontext cuContext;
|
CUcontext cuContext;
|
||||||
CUmodule cuModule;
|
CUmodule cuModule;
|
||||||
|
CUstream cuStream;
|
||||||
|
CUevent tileDone;
|
||||||
map<device_ptr, bool> tex_interp_map;
|
map<device_ptr, bool> tex_interp_map;
|
||||||
int cuDevId;
|
int cuDevId;
|
||||||
int cuDevArchitecture;
|
int cuDevArchitecture;
|
||||||
@ -207,6 +209,9 @@ public:
|
|||||||
if(cuda_error_(result, "cuCtxCreate"))
|
if(cuda_error_(result, "cuCtxCreate"))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
cuda_assert(cuStreamCreate(&cuStream, 0))
|
||||||
|
cuda_assert(cuEventCreate(&tileDone, 0x1))
|
||||||
|
|
||||||
int major, minor;
|
int major, minor;
|
||||||
cuDeviceComputeCapability(&major, &minor, cuDevId);
|
cuDeviceComputeCapability(&major, &minor, cuDevId);
|
||||||
cuDevArchitecture = major*100 + minor*10;
|
cuDevArchitecture = major*100 + minor*10;
|
||||||
@ -223,6 +228,8 @@ public:
|
|||||||
{
|
{
|
||||||
task_pool.stop();
|
task_pool.stop();
|
||||||
|
|
||||||
|
cuda_assert(cuEventDestroy(tileDone))
|
||||||
|
cuda_assert(cuStreamDestroy(cuStream))
|
||||||
cuda_assert(cuCtxDestroy(cuContext))
|
cuda_assert(cuCtxDestroy(cuContext))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -645,9 +652,10 @@ public:
|
|||||||
|
|
||||||
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1))
|
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1))
|
||||||
cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1))
|
cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1))
|
||||||
cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks))
|
cuda_assert(cuLaunchGridAsync(cuPathTrace, xblocks, yblocks, cuStream))
|
||||||
|
|
||||||
cuda_assert(cuCtxSynchronize())
|
cuda_assert(cuEventRecord(tileDone, cuStream ))
|
||||||
|
cuda_assert(cuEventSynchronize(tileDone))
|
||||||
|
|
||||||
cuda_pop_context();
|
cuda_pop_context();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user