forked from bartvdbraak/blender
Cycles: make CUDA code a bit more robust to host/device alloc failures.
Fixes a few corner cases found while stress testing host mapped memory.
This commit is contained in:
parent
e84966ac42
commit
6699454fb6
@ -650,7 +650,7 @@ public:
|
||||
|
||||
void generic_copy_to(device_memory& mem)
|
||||
{
|
||||
if(mem.device_pointer) {
|
||||
if(mem.host_pointer && mem.device_pointer) {
|
||||
CUDAContextScope scope(this);
|
||||
cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
|
||||
}
|
||||
@ -715,11 +715,11 @@ public:
|
||||
size_t offset = elem*y*w;
|
||||
size_t size = elem*w*h;
|
||||
|
||||
if(mem.device_pointer) {
|
||||
if(mem.host_pointer && mem.device_pointer) {
|
||||
cuda_assert(cuMemcpyDtoH((uchar*)mem.host_pointer + offset,
|
||||
(CUdeviceptr)(mem.device_pointer + offset), size));
|
||||
}
|
||||
else {
|
||||
else if(mem.host_pointer) {
|
||||
memset((char*)mem.host_pointer + offset, 0, size);
|
||||
}
|
||||
}
|
||||
@ -1118,13 +1118,17 @@ public:
|
||||
|
||||
int shift_stride = stride*h;
|
||||
int num_shifts = (2*r+1)*(2*r+1);
|
||||
int mem_size = sizeof(float)*shift_stride*2*num_shifts;
|
||||
int mem_size = sizeof(float)*shift_stride*num_shifts;
|
||||
int channel_offset = 0;
|
||||
|
||||
CUdeviceptr temporary_mem;
|
||||
cuda_assert(cuMemAlloc(&temporary_mem, mem_size));
|
||||
CUdeviceptr difference = temporary_mem;
|
||||
CUdeviceptr blurDifference = temporary_mem + sizeof(float)*shift_stride * num_shifts;
|
||||
device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem");
|
||||
temporary_mem.alloc_to_device(2*mem_size);
|
||||
|
||||
if(have_error())
|
||||
return false;
|
||||
|
||||
CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer);
|
||||
CUdeviceptr blurDifference = difference + mem_size;
|
||||
|
||||
CUdeviceptr weightAccum = task->nlm_state.temporary_3_ptr;
|
||||
cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float)*shift_stride));
|
||||
@ -1156,7 +1160,7 @@ public:
|
||||
CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args);
|
||||
}
|
||||
|
||||
cuMemFree(temporary_mem);
|
||||
temporary_mem.free();
|
||||
|
||||
{
|
||||
CUfunction cuNLMNormalize;
|
||||
@ -1225,10 +1229,14 @@ public:
|
||||
int num_shifts = (2*r+1)*(2*r+1);
|
||||
int mem_size = sizeof(float)*shift_stride*num_shifts;
|
||||
|
||||
CUdeviceptr temporary_mem;
|
||||
cuda_assert(cuMemAlloc(&temporary_mem, 2*mem_size));
|
||||
CUdeviceptr difference = temporary_mem;
|
||||
CUdeviceptr blurDifference = temporary_mem + mem_size;
|
||||
device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem");
|
||||
temporary_mem.alloc_to_device(2*mem_size);
|
||||
|
||||
if(have_error())
|
||||
return false;
|
||||
|
||||
CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer);
|
||||
CUdeviceptr blurDifference = difference + mem_size;
|
||||
|
||||
{
|
||||
CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
|
||||
@ -1268,7 +1276,7 @@ public:
|
||||
CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
|
||||
}
|
||||
|
||||
cuMemFree(temporary_mem);
|
||||
temporary_mem.free();
|
||||
|
||||
{
|
||||
CUfunction cuFinalize;
|
||||
|
@ -86,7 +86,7 @@ void device_memory::device_free()
|
||||
|
||||
void device_memory::device_copy_to()
|
||||
{
|
||||
if(data_size) {
|
||||
if(host_pointer) {
|
||||
device->mem_copy_to(*this);
|
||||
}
|
||||
}
|
||||
|
@ -151,6 +151,10 @@ bool RenderBuffers::copy_from_device()
|
||||
|
||||
bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels)
|
||||
{
|
||||
if(buffer.data() == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
float invsample = 1.0f/sample;
|
||||
float scale = invsample;
|
||||
bool variance = (offset == DENOISING_PASS_NORMAL_VAR) ||
|
||||
@ -218,6 +222,10 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp
|
||||
|
||||
bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels)
|
||||
{
|
||||
if(buffer.data() == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int pass_offset = 0;
|
||||
|
||||
for(size_t j = 0; j < params.passes.size(); j++) {
|
||||
|
@ -703,7 +703,7 @@ void ImageManager::device_load_image(Device *device,
|
||||
|
||||
/* Slot assignment */
|
||||
int flat_slot = type_index_to_flattened_slot(slot, type);
|
||||
string name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot);
|
||||
img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot);
|
||||
|
||||
/* Free previous texture in slot. */
|
||||
if(img->mem) {
|
||||
@ -715,7 +715,7 @@ void ImageManager::device_load_image(Device *device,
|
||||
/* Create new texture. */
|
||||
if(type == IMAGE_DATA_TYPE_FLOAT4) {
|
||||
device_vector<float4> *tex_img
|
||||
= new device_vector<float4>(device, name.c_str(), MEM_TEXTURE);
|
||||
= new device_vector<float4>(device, img->mem_name.c_str(), MEM_TEXTURE);
|
||||
|
||||
if(!file_load_image<TypeDesc::FLOAT, float>(img,
|
||||
type,
|
||||
@ -741,7 +741,7 @@ void ImageManager::device_load_image(Device *device,
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_FLOAT) {
|
||||
device_vector<float> *tex_img
|
||||
= new device_vector<float>(device, name.c_str(), MEM_TEXTURE);
|
||||
= new device_vector<float>(device, img->mem_name.c_str(), MEM_TEXTURE);
|
||||
|
||||
if(!file_load_image<TypeDesc::FLOAT, float>(img,
|
||||
type,
|
||||
@ -764,7 +764,7 @@ void ImageManager::device_load_image(Device *device,
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_BYTE4) {
|
||||
device_vector<uchar4> *tex_img
|
||||
= new device_vector<uchar4>(device, name.c_str(), MEM_TEXTURE);
|
||||
= new device_vector<uchar4>(device, img->mem_name.c_str(), MEM_TEXTURE);
|
||||
|
||||
if(!file_load_image<TypeDesc::UINT8, uchar>(img,
|
||||
type,
|
||||
@ -790,7 +790,7 @@ void ImageManager::device_load_image(Device *device,
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_BYTE) {
|
||||
device_vector<uchar> *tex_img
|
||||
= new device_vector<uchar>(device, name.c_str(), MEM_TEXTURE);
|
||||
= new device_vector<uchar>(device, img->mem_name.c_str(), MEM_TEXTURE);
|
||||
|
||||
if(!file_load_image<TypeDesc::UINT8, uchar>(img,
|
||||
type,
|
||||
@ -812,7 +812,7 @@ void ImageManager::device_load_image(Device *device,
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_HALF4) {
|
||||
device_vector<half4> *tex_img
|
||||
= new device_vector<half4>(device, name.c_str(), MEM_TEXTURE);
|
||||
= new device_vector<half4>(device, img->mem_name.c_str(), MEM_TEXTURE);
|
||||
|
||||
if(!file_load_image<TypeDesc::HALF, half>(img,
|
||||
type,
|
||||
@ -837,7 +837,7 @@ void ImageManager::device_load_image(Device *device,
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_HALF) {
|
||||
device_vector<half> *tex_img
|
||||
= new device_vector<half>(device, name.c_str(), MEM_TEXTURE);
|
||||
= new device_vector<half>(device, img->mem_name.c_str(), MEM_TEXTURE);
|
||||
|
||||
if(!file_load_image<TypeDesc::HALF, half>(img,
|
||||
type,
|
||||
|
@ -111,6 +111,7 @@ public:
|
||||
InterpolationType interpolation;
|
||||
ExtensionType extension;
|
||||
|
||||
string mem_name;
|
||||
device_memory *mem;
|
||||
|
||||
int users;
|
||||
|
@ -644,7 +644,7 @@ void ObjectManager::device_update_flags(Device *,
|
||||
|
||||
void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Scene *scene)
|
||||
{
|
||||
if(scene->objects.size() == 0) {
|
||||
if(dscene->objects.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user