8dd7b5b26b
This patch tunes the integrator state sizing for Metal (`num_concurrent_states` and `num_concurrent_busy_states`). On all GPUs architecture, we adjust the busy:total states ratio to be 1:4 which gives better rendering performance than the previous 1:16 ratio (independent of total state count). This gives a small performance uplift (e.g. 2-3% on M1 Ultra). Additionally for M2 architectures, we double the overall state size if there is available headroom. Inclusive of the first change, we can expect uplift of close to 10% in future, as this results in larger dispatch sizes and minimises work submission overheads. In order to make an accurate determination of available headroom, we defer the calculation of `num_concurrent_states` and `num_concurrent_busy_states` until the time of integrator state allocation (i.e. after all of the scene data has been allocated). We also refactor `alloc_integrator_soa` to calculate an *exact* single-state-size in a first pass, right before allocating the integrator SoA buffers in a second pass. Reviewed By: brecht Differential Revision: https://developer.blender.org/D16313
57 lines
1.3 KiB
C++
57 lines
1.3 KiB
C++
/* SPDX-License-Identifier: Apache-2.0
|
|
* Copyright 2011-2022 Blender Foundation */
|
|
|
|
#pragma once
|
|
|
|
#ifdef WITH_HIP
|
|
|
|
# include "device/kernel.h"
|
|
# include "device/memory.h"
|
|
# include "device/queue.h"
|
|
|
|
# include "device/hip/util.h"
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
class HIPDevice;
|
|
class device_memory;
|
|
|
|
/* Base class for HIP queues. */
|
|
class HIPDeviceQueue : public DeviceQueue {
|
|
public:
|
|
HIPDeviceQueue(HIPDevice *device);
|
|
~HIPDeviceQueue();
|
|
|
|
virtual int num_concurrent_states(const size_t state_size) const override;
|
|
virtual int num_concurrent_busy_states(const size_t state_size) const override;
|
|
|
|
virtual void init_execution() override;
|
|
|
|
virtual bool enqueue(DeviceKernel kernel,
|
|
const int work_size,
|
|
DeviceKernelArguments const &args) override;
|
|
|
|
virtual bool synchronize() override;
|
|
|
|
virtual void zero_to_device(device_memory &mem) override;
|
|
virtual void copy_to_device(device_memory &mem) override;
|
|
virtual void copy_from_device(device_memory &mem) override;
|
|
|
|
virtual hipStream_t stream()
|
|
{
|
|
return hip_stream_;
|
|
}
|
|
|
|
virtual unique_ptr<DeviceGraphicsInterop> graphics_interop_create() override;
|
|
|
|
protected:
|
|
HIPDevice *hip_device_;
|
|
hipStream_t hip_stream_;
|
|
|
|
void assert_success(hipError_t result, const char *operation);
|
|
};
|
|
|
|
CCL_NAMESPACE_END
|
|
|
|
#endif /* WITH_HIP */
|