Fix OpenCL group size performance issue on Intel GPUs

Contributed by Intel. On some scenes like classroom with particular integrated
GPUs this speeds up rendering 1.97x. With other benchmarks and GPUs it's
between 0.99-1.14x.
This commit is contained in:
Brecht Van Lommel 2021-05-17 13:32:50 +02:00
parent bb32ecadb5
commit 91a5dbbd17

@ -569,6 +569,11 @@ class OpenCLSplitKernel : public DeviceSplitKernel {
size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size);
int2 global_size = make_int2(max(round_down((int)sqrt(num_elements), 64), 64),
(int)sqrt(num_elements));
if (device->info.description.find("Intel") != string::npos) {
global_size = make_int2(min(512, global_size.x), min(512, global_size.y));
}
VLOG(1) << "Global size: " << global_size << ".";
return global_size;
}