Fix OpenCL group size performance issue on Intel GPUs
Contributed by Intel. On some scenes like classroom with particular integrated GPUs this speeds up rendering 1.97x. With other benchmarks and GPUs it's between 0.99-1.14x.
This commit is contained in:
parent
bb32ecadb5
commit
91a5dbbd17
@ -569,6 +569,11 @@ class OpenCLSplitKernel : public DeviceSplitKernel {
|
||||
size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size);
|
||||
int2 global_size = make_int2(max(round_down((int)sqrt(num_elements), 64), 64),
|
||||
(int)sqrt(num_elements));
|
||||
|
||||
if (device->info.description.find("Intel") != string::npos) {
|
||||
global_size = make_int2(min(512, global_size.x), min(512, global_size.y));
|
||||
}
|
||||
|
||||
VLOG(1) << "Global size: " << global_size << ".";
|
||||
return global_size;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user