forked from bartvdbraak/blender
Cycles: various fixes for HIP and compilation of HIP binaries
* Additional structs added to the hipew loader for device props * Adds hipRTC functions to the loader for future usage * Enables CPU+GPU usage for HIP * Cleanup to the adaptive kernel compilation process * Fix for kernel compilation failures with HIP with latest master Ref T92393, D12958
This commit is contained in:
parent
d1fcf93f03
commit
d092933abb
141
extern/hipew/include/hipew.h
vendored
141
extern/hipew/include/hipew.h
vendored
@ -425,6 +425,105 @@ typedef struct HIPdevprop_st {
|
|||||||
int textureAlign;
|
int textureAlign;
|
||||||
} HIPdevprop;
|
} HIPdevprop;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
// 32-bit Atomics
|
||||||
|
unsigned hasGlobalInt32Atomics : 1; ///< 32-bit integer atomics for global memory.
|
||||||
|
unsigned hasGlobalFloatAtomicExch : 1; ///< 32-bit float atomic exch for global memory.
|
||||||
|
unsigned hasSharedInt32Atomics : 1; ///< 32-bit integer atomics for shared memory.
|
||||||
|
unsigned hasSharedFloatAtomicExch : 1; ///< 32-bit float atomic exch for shared memory.
|
||||||
|
unsigned hasFloatAtomicAdd : 1; ///< 32-bit float atomic add in global and shared memory.
|
||||||
|
|
||||||
|
// 64-bit Atomics
|
||||||
|
unsigned hasGlobalInt64Atomics : 1; ///< 64-bit integer atomics for global memory.
|
||||||
|
unsigned hasSharedInt64Atomics : 1; ///< 64-bit integer atomics for shared memory.
|
||||||
|
|
||||||
|
// Doubles
|
||||||
|
unsigned hasDoubles : 1; ///< Double-precision floating point.
|
||||||
|
|
||||||
|
// Warp cross-lane operations
|
||||||
|
unsigned hasWarpVote : 1; ///< Warp vote instructions (__any, __all).
|
||||||
|
unsigned hasWarpBallot : 1; ///< Warp ballot instructions (__ballot).
|
||||||
|
unsigned hasWarpShuffle : 1; ///< Warp shuffle operations. (__shfl_*).
|
||||||
|
unsigned hasFunnelShift : 1; ///< Funnel two words into one with shift&mask caps.
|
||||||
|
|
||||||
|
// Sync
|
||||||
|
unsigned hasThreadFenceSystem : 1; ///< __threadfence_system.
|
||||||
|
unsigned hasSyncThreadsExt : 1; ///< __syncthreads_count, syncthreads_and, syncthreads_or.
|
||||||
|
|
||||||
|
// Misc
|
||||||
|
unsigned hasSurfaceFuncs : 1; ///< Surface functions.
|
||||||
|
unsigned has3dGrid : 1; ///< Grid and group dims are 3D (rather than 2D).
|
||||||
|
unsigned hasDynamicParallelism : 1; ///< Dynamic parallelism.
|
||||||
|
} hipDeviceArch_t;
|
||||||
|
|
||||||
|
typedef struct hipDeviceProp_t {
|
||||||
|
char name[256]; ///< Device name.
|
||||||
|
size_t totalGlobalMem; ///< Size of global memory region (in bytes).
|
||||||
|
size_t sharedMemPerBlock; ///< Size of shared memory region (in bytes).
|
||||||
|
int regsPerBlock; ///< Registers per block.
|
||||||
|
int warpSize; ///< Warp size.
|
||||||
|
int maxThreadsPerBlock; ///< Max work items per work group or workgroup max size.
|
||||||
|
int maxThreadsDim[3]; ///< Max number of threads in each dimension (XYZ) of a block.
|
||||||
|
int maxGridSize[3]; ///< Max grid dimensions (XYZ).
|
||||||
|
int clockRate; ///< Max clock frequency of the multiProcessors in khz.
|
||||||
|
int memoryClockRate; ///< Max global memory clock frequency in khz.
|
||||||
|
int memoryBusWidth; ///< Global memory bus width in bits.
|
||||||
|
size_t totalConstMem; ///< Size of shared memory region (in bytes).
|
||||||
|
int major; ///< Major compute capability. On HCC, this is an approximation and features may
|
||||||
|
///< differ from CUDA CC. See the arch feature flags for portable ways to query
|
||||||
|
///< feature caps.
|
||||||
|
int minor; ///< Minor compute capability. On HCC, this is an approximation and features may
|
||||||
|
///< differ from CUDA CC. See the arch feature flags for portable ways to query
|
||||||
|
///< feature caps.
|
||||||
|
int multiProcessorCount; ///< Number of multi-processors (compute units).
|
||||||
|
int l2CacheSize; ///< L2 cache size.
|
||||||
|
int maxThreadsPerMultiProcessor; ///< Maximum resident threads per multi-processor.
|
||||||
|
int computeMode; ///< Compute mode.
|
||||||
|
int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*"
|
||||||
|
///< instructions. New for HIP.
|
||||||
|
hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP.
|
||||||
|
int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently.
|
||||||
|
int pciDomainID; ///< PCI Domain ID
|
||||||
|
int pciBusID; ///< PCI Bus ID.
|
||||||
|
int pciDeviceID; ///< PCI Device ID.
|
||||||
|
size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor.
|
||||||
|
int isMultiGpuBoard; ///< 1 if device is on a multi-GPU board, 0 if not.
|
||||||
|
int canMapHostMemory; ///< Check whether HIP can map host memory
|
||||||
|
int gcnArch; ///< DEPRECATED: use gcnArchName instead
|
||||||
|
char gcnArchName[256]; ///< AMD GCN Arch Name.
|
||||||
|
int integrated; ///< APU vs dGPU
|
||||||
|
int cooperativeLaunch; ///< HIP device supports cooperative launch
|
||||||
|
int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices
|
||||||
|
int maxTexture1DLinear; ///< Maximum size for 1D textures bound to linear memory
|
||||||
|
int maxTexture1D; ///< Maximum number of elements in 1D images
|
||||||
|
int maxTexture2D[2]; ///< Maximum dimensions (width, height) of 2D images, in image elements
|
||||||
|
int maxTexture3D[3]; ///< Maximum dimensions (width, height, depth) of 3D images, in image elements
|
||||||
|
unsigned int* hdpMemFlushCntl; ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
|
||||||
|
unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
|
||||||
|
size_t memPitch; ///<Maximum pitch in bytes allowed by memory copies
|
||||||
|
size_t textureAlignment; ///<Alignment requirement for textures
|
||||||
|
size_t texturePitchAlignment; ///<Pitch alignment requirement for texture references bound to pitched memory
|
||||||
|
int kernelExecTimeoutEnabled; ///<Run time limit for kernels executed on the device
|
||||||
|
int ECCEnabled; ///<Device has ECC support enabled
|
||||||
|
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
|
||||||
|
int cooperativeMultiDeviceUnmatchedFunc; ///< HIP device supports cooperative launch on multiple
|
||||||
|
///devices with unmatched functions
|
||||||
|
int cooperativeMultiDeviceUnmatchedGridDim; ///< HIP device supports cooperative launch on multiple
|
||||||
|
///devices with unmatched grid dimensions
|
||||||
|
int cooperativeMultiDeviceUnmatchedBlockDim; ///< HIP device supports cooperative launch on multiple
|
||||||
|
///devices with unmatched block dimensions
|
||||||
|
int cooperativeMultiDeviceUnmatchedSharedMem; ///< HIP device supports cooperative launch on multiple
|
||||||
|
///devices with unmatched shared memories
|
||||||
|
int isLargeBar; ///< 1: if it is a large PCI bar device, else 0
|
||||||
|
int asicRevision; ///< Revision of the GPU in this device
|
||||||
|
int managedMemory; ///< Device supports allocating managed memory on this system
|
||||||
|
int directManagedMemAccessFromHost; ///< Host can directly access managed memory on the device without migration
|
||||||
|
int concurrentManagedAccess; ///< Device can coherently access managed memory concurrently with the CPU
|
||||||
|
int pageableMemoryAccess; ///< Device supports coherently accessing pageable memory
|
||||||
|
///< without calling hipHostRegister on it
|
||||||
|
int pageableMemoryAccessUsesHostPageTables; ///< Device accesses pageable memory via the host's page tables
|
||||||
|
} hipDeviceProp_t;
|
||||||
|
|
||||||
typedef enum HIPpointer_attribute_enum {
|
typedef enum HIPpointer_attribute_enum {
|
||||||
HIP_POINTER_ATTRIBUTE_CONTEXT = 1,
|
HIP_POINTER_ATTRIBUTE_CONTEXT = 1,
|
||||||
HIP_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,
|
HIP_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,
|
||||||
@ -951,6 +1050,25 @@ typedef enum HIPGLmap_flags_enum {
|
|||||||
HIP_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
|
HIP_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
|
||||||
} HIPGLmap_flags;
|
} HIPGLmap_flags;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* hipRTC related
|
||||||
|
*/
|
||||||
|
typedef struct _hiprtcProgram* hiprtcProgram;
|
||||||
|
|
||||||
|
typedef enum hiprtcResult {
|
||||||
|
HIPRTC_SUCCESS = 0,
|
||||||
|
HIPRTC_ERROR_OUT_OF_MEMORY = 1,
|
||||||
|
HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
|
||||||
|
HIPRTC_ERROR_INVALID_INPUT = 3,
|
||||||
|
HIPRTC_ERROR_INVALID_PROGRAM = 4,
|
||||||
|
HIPRTC_ERROR_INVALID_OPTION = 5,
|
||||||
|
HIPRTC_ERROR_COMPILATION = 6,
|
||||||
|
HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
|
||||||
|
HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
|
||||||
|
HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
|
||||||
|
HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
|
||||||
|
HIPRTC_ERROR_INTERNAL_ERROR = 11
|
||||||
|
} hiprtcResult;
|
||||||
|
|
||||||
/* Function types. */
|
/* Function types. */
|
||||||
typedef hipError_t HIPAPI thipGetErrorName(hipError_t error, const char** pStr);
|
typedef hipError_t HIPAPI thipGetErrorName(hipError_t error, const char** pStr);
|
||||||
@ -958,6 +1076,7 @@ typedef hipError_t HIPAPI thipInit(unsigned int Flags);
|
|||||||
typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion);
|
typedef hipError_t HIPAPI thipDriverGetVersion(int* driverVersion);
|
||||||
typedef hipError_t HIPAPI thipGetDevice(hipDevice_t* device, int ordinal);
|
typedef hipError_t HIPAPI thipGetDevice(hipDevice_t* device, int ordinal);
|
||||||
typedef hipError_t HIPAPI thipGetDeviceCount(int* count);
|
typedef hipError_t HIPAPI thipGetDeviceCount(int* count);
|
||||||
|
typedef hipError_t HIPAPI thipGetDeviceProperties(hipDeviceProp_t* props, int deviceId);
|
||||||
typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev);
|
typedef hipError_t HIPAPI thipDeviceGetName(char* name, int len, hipDevice_t dev);
|
||||||
typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
|
typedef hipError_t HIPAPI thipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
|
||||||
typedef hipError_t HIPAPI thipDeviceComputeCapability(int* major, int* minor, hipDevice_t dev);
|
typedef hipError_t HIPAPI thipDeviceComputeCapability(int* major, int* minor, hipDevice_t dev);
|
||||||
@ -1071,6 +1190,16 @@ typedef hipError_t HIPAPI thipGraphicsMapResources(unsigned int count, hipGraphi
|
|||||||
typedef hipError_t HIPAPI thipGraphicsUnmapResources(unsigned int count, hipGraphicsResource* resources, hipStream_t hStream);
|
typedef hipError_t HIPAPI thipGraphicsUnmapResources(unsigned int count, hipGraphicsResource* resources, hipStream_t hStream);
|
||||||
typedef hipError_t HIPAPI thipGraphicsGLRegisterBuffer(hipGraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags);
|
typedef hipError_t HIPAPI thipGraphicsGLRegisterBuffer(hipGraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags);
|
||||||
typedef hipError_t HIPAPI thipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, unsigned int hipDeviceCount, hipGLDeviceList deviceList);
|
typedef hipError_t HIPAPI thipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, unsigned int hipDeviceCount, hipGLDeviceList deviceList);
|
||||||
|
typedef hiprtcResult HIPAPI thiprtcGetErrorString(hiprtcResult result);
|
||||||
|
typedef hiprtcResult HIPAPI thiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression);
|
||||||
|
typedef hiprtcResult HIPAPI thiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options);
|
||||||
|
typedef hiprtcResult HIPAPI thiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, int numHeaders, const char** headers, const char** includeNames);
|
||||||
|
typedef hiprtcResult HIPAPI thiprtcDestroyProgram(hiprtcProgram* prog);
|
||||||
|
typedef hiprtcResult HIPAPI thiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, const char** lowered_name);
|
||||||
|
typedef hiprtcResult HIPAPI thiprtcGetProgramLog(hiprtcProgram prog, char* log);
|
||||||
|
typedef hiprtcResult HIPAPI thiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet);
|
||||||
|
typedef hiprtcResult HIPAPI thiprtcGetCode(hiprtcProgram prog, char* code);
|
||||||
|
typedef hiprtcResult HIPAPI thiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet);
|
||||||
|
|
||||||
|
|
||||||
/* Function declarations. */
|
/* Function declarations. */
|
||||||
@ -1079,6 +1208,7 @@ extern thipInit *hipInit;
|
|||||||
extern thipDriverGetVersion *hipDriverGetVersion;
|
extern thipDriverGetVersion *hipDriverGetVersion;
|
||||||
extern thipGetDevice *hipGetDevice;
|
extern thipGetDevice *hipGetDevice;
|
||||||
extern thipGetDeviceCount *hipGetDeviceCount;
|
extern thipGetDeviceCount *hipGetDeviceCount;
|
||||||
|
extern thipGetDeviceProperties *hipGetDeviceProperties;
|
||||||
extern thipDeviceGetName *hipDeviceGetName;
|
extern thipDeviceGetName *hipDeviceGetName;
|
||||||
extern thipDeviceGetAttribute *hipDeviceGetAttribute;
|
extern thipDeviceGetAttribute *hipDeviceGetAttribute;
|
||||||
extern thipDeviceComputeCapability *hipDeviceComputeCapability;
|
extern thipDeviceComputeCapability *hipDeviceComputeCapability;
|
||||||
@ -1187,6 +1317,17 @@ extern thipGraphicsUnmapResources *hipGraphicsUnmapResources;
|
|||||||
extern thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
|
extern thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
|
||||||
extern thipGLGetDevices *hipGLGetDevices;
|
extern thipGLGetDevices *hipGLGetDevices;
|
||||||
|
|
||||||
|
extern thiprtcGetErrorString* hiprtcGetErrorString;
|
||||||
|
extern thiprtcAddNameExpression* hiprtcAddNameExpression;
|
||||||
|
extern thiprtcCompileProgram* hiprtcCompileProgram;
|
||||||
|
extern thiprtcCreateProgram* hiprtcCreateProgram;
|
||||||
|
extern thiprtcDestroyProgram* hiprtcDestroyProgram;
|
||||||
|
extern thiprtcGetLoweredName* hiprtcGetLoweredName;
|
||||||
|
extern thiprtcGetProgramLog* hiprtcGetProgramLog;
|
||||||
|
extern thiprtcGetProgramLogSize* hiprtcGetProgramLogSize;
|
||||||
|
extern thiprtcGetCode* hiprtcGetCode;
|
||||||
|
extern thiprtcGetCodeSize* hiprtcGetCodeSize;
|
||||||
|
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
HIPEW_SUCCESS = 0,
|
HIPEW_SUCCESS = 0,
|
||||||
|
23
extern/hipew/src/hipew.c
vendored
23
extern/hipew/src/hipew.c
vendored
@ -70,6 +70,7 @@ thipInit *hipInit;
|
|||||||
thipDriverGetVersion *hipDriverGetVersion;
|
thipDriverGetVersion *hipDriverGetVersion;
|
||||||
thipGetDevice *hipGetDevice;
|
thipGetDevice *hipGetDevice;
|
||||||
thipGetDeviceCount *hipGetDeviceCount;
|
thipGetDeviceCount *hipGetDeviceCount;
|
||||||
|
thipGetDeviceProperties *hipGetDeviceProperties;
|
||||||
thipDeviceGetName *hipDeviceGetName;
|
thipDeviceGetName *hipDeviceGetName;
|
||||||
thipDeviceGetAttribute *hipDeviceGetAttribute;
|
thipDeviceGetAttribute *hipDeviceGetAttribute;
|
||||||
thipDeviceComputeCapability *hipDeviceComputeCapability;
|
thipDeviceComputeCapability *hipDeviceComputeCapability;
|
||||||
@ -178,6 +179,17 @@ thipGraphicsResourceGetMappedPointer *hipGraphicsResourceGetMappedPointer;
|
|||||||
thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
|
thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
|
||||||
thipGLGetDevices *hipGLGetDevices;
|
thipGLGetDevices *hipGLGetDevices;
|
||||||
|
|
||||||
|
thiprtcGetErrorString* hiprtcGetErrorString;
|
||||||
|
thiprtcAddNameExpression* hiprtcAddNameExpression;
|
||||||
|
thiprtcCompileProgram* hiprtcCompileProgram;
|
||||||
|
thiprtcCreateProgram* hiprtcCreateProgram;
|
||||||
|
thiprtcDestroyProgram* hiprtcDestroyProgram;
|
||||||
|
thiprtcGetLoweredName* hiprtcGetLoweredName;
|
||||||
|
thiprtcGetProgramLog* hiprtcGetProgramLog;
|
||||||
|
thiprtcGetProgramLogSize* hiprtcGetProgramLogSize;
|
||||||
|
thiprtcGetCode* hiprtcGetCode;
|
||||||
|
thiprtcGetCodeSize* hiprtcGetCodeSize;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static DynamicLibrary dynamic_library_open_find(const char **paths) {
|
static DynamicLibrary dynamic_library_open_find(const char **paths) {
|
||||||
@ -242,6 +254,7 @@ static int hipewHipInit(void) {
|
|||||||
HIP_LIBRARY_FIND_CHECKED(hipDriverGetVersion);
|
HIP_LIBRARY_FIND_CHECKED(hipDriverGetVersion);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
|
HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
|
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
|
||||||
|
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceProperties);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
|
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
|
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
|
HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
|
||||||
@ -346,6 +359,16 @@ static int hipewHipInit(void) {
|
|||||||
HIP_LIBRARY_FIND_CHECKED(hipGraphicsGLRegisterBuffer);
|
HIP_LIBRARY_FIND_CHECKED(hipGraphicsGLRegisterBuffer);
|
||||||
HIP_LIBRARY_FIND_CHECKED(hipGLGetDevices);
|
HIP_LIBRARY_FIND_CHECKED(hipGLGetDevices);
|
||||||
#endif
|
#endif
|
||||||
|
HIP_LIBRARY_FIND_CHECKED(hiprtcGetErrorString);
|
||||||
|
HIP_LIBRARY_FIND_CHECKED(hiprtcAddNameExpression);
|
||||||
|
HIP_LIBRARY_FIND_CHECKED(hiprtcCompileProgram);
|
||||||
|
HIP_LIBRARY_FIND_CHECKED(hiprtcCreateProgram);
|
||||||
|
HIP_LIBRARY_FIND_CHECKED(hiprtcDestroyProgram);
|
||||||
|
HIP_LIBRARY_FIND_CHECKED(hiprtcGetLoweredName);
|
||||||
|
HIP_LIBRARY_FIND_CHECKED(hiprtcGetProgramLog);
|
||||||
|
HIP_LIBRARY_FIND_CHECKED(hiprtcGetProgramLogSize);
|
||||||
|
HIP_LIBRARY_FIND_CHECKED(hiprtcGetCode);
|
||||||
|
HIP_LIBRARY_FIND_CHECKED(hiprtcGetCodeSize);
|
||||||
result = HIPEW_SUCCESS;
|
result = HIPEW_SUCCESS;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -1329,7 +1329,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
|||||||
elif entry.type == 'CPU':
|
elif entry.type == 'CPU':
|
||||||
cpu_devices.append(entry)
|
cpu_devices.append(entry)
|
||||||
# Extend all GPU devices with CPU.
|
# Extend all GPU devices with CPU.
|
||||||
if compute_device_type != 'CPU' and compute_device_type != 'HIP':
|
if compute_device_type != 'CPU':
|
||||||
devices.extend(cpu_devices)
|
devices.extend(cpu_devices)
|
||||||
return devices
|
return devices
|
||||||
|
|
||||||
|
@ -208,7 +208,7 @@ bool HIPDevice::use_adaptive_compilation()
|
|||||||
return DebugFlags().hip.adaptive_compile;
|
return DebugFlags().hip.adaptive_compile;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Common NVCC flags which stays the same regardless of shading model,
|
/* Common HIPCC flags which stays the same regardless of shading model,
|
||||||
* kernel sources md5 and only depends on compiler or compilation settings.
|
* kernel sources md5 and only depends on compiler or compilation settings.
|
||||||
*/
|
*/
|
||||||
string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
|
||||||
@ -239,11 +239,13 @@ string HIPDevice::compile_kernel(const uint kernel_features,
|
|||||||
int major, minor;
|
int major, minor;
|
||||||
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
|
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
|
||||||
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
|
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
|
||||||
|
hipDeviceProp_t props;
|
||||||
|
hipGetDeviceProperties(&props, hipDevId);
|
||||||
|
|
||||||
/* Attempt to use kernel provided with Blender. */
|
/* Attempt to use kernel provided with Blender. */
|
||||||
if (!use_adaptive_compilation()) {
|
if (!use_adaptive_compilation()) {
|
||||||
if (!force_ptx) {
|
if (!force_ptx) {
|
||||||
const string fatbin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
|
const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, props.gcnArchName));
|
||||||
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
|
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
|
||||||
if (path_exists(fatbin)) {
|
if (path_exists(fatbin)) {
|
||||||
VLOG(1) << "Using precompiled kernel.";
|
VLOG(1) << "Using precompiled kernel.";
|
||||||
@ -283,17 +285,21 @@ string HIPDevice::compile_kernel(const uint kernel_features,
|
|||||||
const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
|
const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
|
||||||
|
|
||||||
const char *const kernel_ext = "genco";
|
const char *const kernel_ext = "genco";
|
||||||
|
std::string options;
|
||||||
# ifdef _WIN32
|
# ifdef _WIN32
|
||||||
const char *const options =
|
options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -ffast-math");
|
||||||
"save-temps -Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp";
|
|
||||||
# else
|
# else
|
||||||
const char *const options =
|
options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ffast-math");
|
||||||
"save-temps -Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ggdb";
|
|
||||||
# endif
|
# endif
|
||||||
|
# ifdef _DEBUG
|
||||||
|
options.append(" -save-temps");
|
||||||
|
# endif
|
||||||
|
options.append(" --amdgpu-target=").append(props.gcnArchName);
|
||||||
|
|
||||||
const string include_path = source_path;
|
const string include_path = source_path;
|
||||||
const char *const kernel_arch = force_ptx ? "compute" : "sm";
|
const char *const kernel_arch = props.gcnArchName;
|
||||||
const string fatbin_file = string_printf(
|
const string fatbin_file = string_printf(
|
||||||
"cycles_%s_%s_%d%d_%s", name, kernel_arch, major, minor, kernel_md5.c_str());
|
"cycles_%s_%s_%s", name, kernel_arch, kernel_md5.c_str());
|
||||||
const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
|
const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
|
||||||
VLOG(1) << "Testing for locally compiled kernel " << fatbin << ".";
|
VLOG(1) << "Testing for locally compiled kernel " << fatbin << ".";
|
||||||
if (path_exists(fatbin)) {
|
if (path_exists(fatbin)) {
|
||||||
@ -350,7 +356,7 @@ string HIPDevice::compile_kernel(const uint kernel_features,
|
|||||||
|
|
||||||
string command = string_printf("%s -%s -I %s --%s %s -o \"%s\"",
|
string command = string_printf("%s -%s -I %s --%s %s -o \"%s\"",
|
||||||
hipcc,
|
hipcc,
|
||||||
options,
|
options.c_str(),
|
||||||
include_path.c_str(),
|
include_path.c_str(),
|
||||||
kernel_ext,
|
kernel_ext,
|
||||||
source_path.c_str(),
|
source_path.c_str(),
|
||||||
|
@ -487,9 +487,6 @@ endif()
|
|||||||
# HIP module
|
# HIP module
|
||||||
|
|
||||||
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||||
# 64 bit only
|
|
||||||
set(HIP_BITS 64)
|
|
||||||
|
|
||||||
# build for each arch
|
# build for each arch
|
||||||
set(hip_sources device/hip/kernel.cpp
|
set(hip_sources device/hip/kernel.cpp
|
||||||
${SRC_HEADERS}
|
${SRC_HEADERS}
|
||||||
@ -504,32 +501,41 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
|||||||
set(hip_fatbins)
|
set(hip_fatbins)
|
||||||
|
|
||||||
macro(CYCLES_HIP_KERNEL_ADD arch prev_arch name flags sources experimental)
|
macro(CYCLES_HIP_KERNEL_ADD arch prev_arch name flags sources experimental)
|
||||||
if(${arch} MATCHES "compute_.*")
|
set(format "fatbin")
|
||||||
set(format "ptx")
|
|
||||||
else()
|
|
||||||
set(format "fatbin")
|
|
||||||
endif()
|
|
||||||
set(hip_file ${name}_${arch}.${format})
|
set(hip_file ${name}_${arch}.${format})
|
||||||
|
|
||||||
set(kernel_sources ${sources})
|
set(kernel_sources ${sources})
|
||||||
if(NOT ${prev_arch} STREQUAL "none")
|
if(NOT ${prev_arch} STREQUAL "none")
|
||||||
if(${prev_arch} MATCHES "compute_.*")
|
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin)
|
||||||
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx)
|
|
||||||
else()
|
|
||||||
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.fatbin)
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(hip_kernel_src "/device/hip/${name}.cpp")
|
set(hip_kernel_src "/device/hip/${name}.cpp")
|
||||||
|
|
||||||
set(hip_flags ${flags}
|
if(WIN32)
|
||||||
|
set(hip_command ${CMAKE_COMMAND})
|
||||||
|
set(hip_flags
|
||||||
|
-E env "HIP_PATH=${HIP_ROOT_DIR}" "PATH=${HIP_PERL_PATH}"
|
||||||
|
${HIP_HIPCC_EXECUTABLE}.bat)
|
||||||
|
else()
|
||||||
|
set(hip_command ${HIP_HIPCC_EXECUTABLE})
|
||||||
|
set(hip_flags)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(hip_flags
|
||||||
|
${hip_flags}
|
||||||
|
--amdgpu-target=${arch}
|
||||||
|
${HIP_HIPCC_FLAGS}
|
||||||
|
--genco
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src}
|
||||||
|
${flags}
|
||||||
-D CCL_NAMESPACE_BEGIN=
|
-D CCL_NAMESPACE_BEGIN=
|
||||||
-D CCL_NAMESPACE_END=
|
-D CCL_NAMESPACE_END=
|
||||||
-D HIPCC
|
-D HIPCC
|
||||||
-m ${HIP_BITS}
|
|
||||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
|
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
|
||||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/hip
|
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/hip
|
||||||
--use_fast_math
|
-Wno-parentheses-equality
|
||||||
|
-Wno-unused-value
|
||||||
|
--hipcc-func-supp
|
||||||
|
-ffast-math
|
||||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
|
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
|
||||||
|
|
||||||
if(${experimental})
|
if(${experimental})
|
||||||
@ -541,20 +547,9 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
|||||||
set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
|
set(hip_flags ${hip_flags} -D __KERNEL_DEBUG__)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_NANOVDB)
|
add_custom_target(
|
||||||
set(hip_flags ${hip_flags}
|
${hip_file}
|
||||||
-D WITH_NANOVDB
|
COMMAND ${hip_command} ${hip_flags}
|
||||||
-I "${NANOVDB_INCLUDE_DIR}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_custom_command(
|
|
||||||
OUTPUT ${hip_file}
|
|
||||||
COMMAND ${HIP_HIPCC_EXECUTABLE}
|
|
||||||
-arch=${arch}
|
|
||||||
${HIP_HIPCC_FLAGS}
|
|
||||||
--${format}
|
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src}
|
|
||||||
${hip_flags}
|
|
||||||
DEPENDS ${kernel_sources})
|
DEPENDS ${kernel_sources})
|
||||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file}" ${CYCLES_INSTALL_PATH}/lib)
|
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file}" ${CYCLES_INSTALL_PATH}/lib)
|
||||||
list(APPEND hip_fatbins ${hip_file})
|
list(APPEND hip_fatbins ${hip_file})
|
||||||
|
@ -27,10 +27,10 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
/* Not actually used, just a NULL pointer that gets passed everywhere, which we
|
/* Not actually used, just a NULL pointer that gets passed everywhere, which we
|
||||||
* hope gets optimized out by the compiler. */
|
* hope gets optimized out by the compiler. */
|
||||||
struct KernelGlobals {
|
struct KernelGlobalsGPU {
|
||||||
/* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */
|
|
||||||
int unused[1];
|
int unused[1];
|
||||||
};
|
};
|
||||||
|
typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
|
||||||
|
|
||||||
/* Global scene data and textures */
|
/* Global scene data and textures */
|
||||||
__constant__ KernelData __data;
|
__constant__ KernelData __data;
|
||||||
|
Loading…
Reference in New Issue
Block a user