Merge branch 'blender-v4.1-release' into main
This commit is contained in:
commit
7453c5ed67
@ -88,10 +88,8 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
|
||||
default_storage_mode = MTLResourceStorageModeManaged;
|
||||
|
||||
if (@available(macos 11.0, *)) {
|
||||
if ([mtlDevice hasUnifiedMemory]) {
|
||||
default_storage_mode = MTLResourceStorageModeShared;
|
||||
}
|
||||
if ([mtlDevice hasUnifiedMemory]) {
|
||||
default_storage_mode = MTLResourceStorageModeShared;
|
||||
}
|
||||
|
||||
switch (device_vendor) {
|
||||
@ -243,14 +241,13 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
mtlAncillaryArgEncoder = [mtlDevice newArgumentEncoderWithArguments:ancillary_desc];
|
||||
|
||||
// preparing the blas arg encoder
|
||||
if (@available(macos 11.0, *)) {
|
||||
if (use_metalrt) {
|
||||
MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init];
|
||||
arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure;
|
||||
arg_desc_blas.access = MTLArgumentAccessReadOnly;
|
||||
mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]];
|
||||
[arg_desc_blas release];
|
||||
}
|
||||
|
||||
if (use_metalrt) {
|
||||
MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init];
|
||||
arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure;
|
||||
arg_desc_blas.access = MTLArgumentAccessReadOnly;
|
||||
mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]];
|
||||
[arg_desc_blas release];
|
||||
}
|
||||
|
||||
for (int i = 0; i < ancillary_desc.count; i++) {
|
||||
@ -1150,11 +1147,9 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
||||
}
|
||||
}
|
||||
MTLStorageMode storage_mode = MTLStorageModeManaged;
|
||||
if (@available(macos 10.15, *)) {
|
||||
/* Intel GPUs don't support MTLStorageModeShared for MTLTextures. */
|
||||
if ([mtlDevice hasUnifiedMemory] && device_vendor != METAL_GPU_INTEL) {
|
||||
storage_mode = MTLStorageModeShared;
|
||||
}
|
||||
/* Intel GPUs don't support MTLStorageModeShared for MTLTextures. */
|
||||
if ([mtlDevice hasUnifiedMemory] && device_vendor != METAL_GPU_INTEL) {
|
||||
storage_mode = MTLStorageModeShared;
|
||||
}
|
||||
|
||||
/* General variables for both architectures */
|
||||
@ -1332,14 +1327,12 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
||||
}
|
||||
}
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
/* Optimize the texture for GPU access. */
|
||||
id<MTLCommandBuffer> commandBuffer = [mtlGeneralCommandQueue commandBuffer];
|
||||
id<MTLBlitCommandEncoder> blitCommandEncoder = [commandBuffer blitCommandEncoder];
|
||||
[blitCommandEncoder optimizeContentsForGPUAccess:mtlTexture];
|
||||
[blitCommandEncoder endEncoding];
|
||||
[commandBuffer commit];
|
||||
}
|
||||
/* Optimize the texture for GPU access. */
|
||||
id<MTLCommandBuffer> commandBuffer = [mtlGeneralCommandQueue commandBuffer];
|
||||
id<MTLBlitCommandEncoder> blitCommandEncoder = [commandBuffer blitCommandEncoder];
|
||||
[blitCommandEncoder optimizeContentsForGPUAccess:mtlTexture];
|
||||
[blitCommandEncoder endEncoding];
|
||||
[commandBuffer commit];
|
||||
|
||||
/* Set Mapping and tag that we need to (re-)upload to device */
|
||||
texture_slot_map[slot] = mtlTexture;
|
||||
@ -1418,27 +1411,25 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
bvh_metal->motion_blur = motion_blur;
|
||||
if (bvh_metal->build(progress, mtlDevice, mtlGeneralCommandQueue, refit)) {
|
||||
|
||||
if (@available(macos 11.0, *)) {
|
||||
if (bvh->params.top_level) {
|
||||
bvhMetalRT = bvh_metal;
|
||||
if (bvh->params.top_level) {
|
||||
bvhMetalRT = bvh_metal;
|
||||
|
||||
// allocate required buffers for BLAS array
|
||||
uint64_t count = bvhMetalRT->blas_array.size();
|
||||
uint64_t bufferSize = mtlBlasArgEncoder.encodedLength * count;
|
||||
blas_buffer = [mtlDevice newBufferWithLength:bufferSize options:default_storage_mode];
|
||||
stats.mem_alloc(blas_buffer.allocatedSize);
|
||||
// allocate required buffers for BLAS array
|
||||
uint64_t count = bvhMetalRT->blas_array.size();
|
||||
uint64_t bufferSize = mtlBlasArgEncoder.encodedLength * count;
|
||||
blas_buffer = [mtlDevice newBufferWithLength:bufferSize options:default_storage_mode];
|
||||
stats.mem_alloc(blas_buffer.allocatedSize);
|
||||
|
||||
for (uint64_t i = 0; i < count; ++i) {
|
||||
if (bvhMetalRT->blas_array[i]) {
|
||||
[mtlBlasArgEncoder setArgumentBuffer:blas_buffer
|
||||
offset:i * mtlBlasArgEncoder.encodedLength];
|
||||
[mtlBlasArgEncoder setAccelerationStructure:bvhMetalRT->blas_array[i] atIndex:0];
|
||||
}
|
||||
}
|
||||
if (default_storage_mode == MTLResourceStorageModeManaged) {
|
||||
[blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)];
|
||||
for (uint64_t i = 0; i < count; ++i) {
|
||||
if (bvhMetalRT->blas_array[i]) {
|
||||
[mtlBlasArgEncoder setArgumentBuffer:blas_buffer
|
||||
offset:i * mtlBlasArgEncoder.encodedLength];
|
||||
[mtlBlasArgEncoder setAccelerationStructure:bvhMetalRT->blas_array[i] atIndex:0];
|
||||
}
|
||||
}
|
||||
if (default_storage_mode == MTLResourceStorageModeManaged) {
|
||||
[blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -465,19 +465,18 @@ void MetalKernelPipeline::compile()
|
||||
device_kernel_as_string(device_kernel);
|
||||
|
||||
NSError *error = NULL;
|
||||
if (@available(macOS 11.0, *)) {
|
||||
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
||||
func_desc.name = [@(function_name.c_str()) copy];
|
||||
|
||||
if (pso_type != PSO_GENERIC) {
|
||||
func_desc.constantValues = GetConstantValues(&kernel_data_);
|
||||
}
|
||||
else {
|
||||
func_desc.constantValues = GetConstantValues();
|
||||
}
|
||||
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
||||
func_desc.name = [@(function_name.c_str()) copy];
|
||||
|
||||
function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error];
|
||||
if (pso_type != PSO_GENERIC) {
|
||||
func_desc.constantValues = GetConstantValues(&kernel_data_);
|
||||
}
|
||||
else {
|
||||
func_desc.constantValues = GetConstantValues();
|
||||
}
|
||||
|
||||
function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error];
|
||||
|
||||
if (function == nil) {
|
||||
NSString *err = [error localizedDescription];
|
||||
@ -489,52 +488,50 @@ void MetalKernelPipeline::compile()
|
||||
function.label = [@(function_name.c_str()) copy];
|
||||
|
||||
if (use_metalrt) {
|
||||
if (@available(macOS 11.0, *)) {
|
||||
/* create the id<MTLFunction> for each intersection function */
|
||||
const char *function_names[] = {
|
||||
"__anyhit__cycles_metalrt_visibility_test_tri",
|
||||
"__anyhit__cycles_metalrt_visibility_test_box",
|
||||
"__anyhit__cycles_metalrt_shadow_all_hit_tri",
|
||||
"__anyhit__cycles_metalrt_shadow_all_hit_box",
|
||||
"__anyhit__cycles_metalrt_volume_test_tri",
|
||||
"__anyhit__cycles_metalrt_volume_test_box",
|
||||
"__anyhit__cycles_metalrt_local_hit_tri",
|
||||
"__anyhit__cycles_metalrt_local_hit_box",
|
||||
"__anyhit__cycles_metalrt_local_hit_tri_prim",
|
||||
"__anyhit__cycles_metalrt_local_hit_box_prim",
|
||||
"__intersection__curve",
|
||||
"__intersection__curve_shadow",
|
||||
"__intersection__point",
|
||||
"__intersection__point_shadow",
|
||||
};
|
||||
assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM);
|
||||
/* create the id<MTLFunction> for each intersection function */
|
||||
const char *function_names[] = {
|
||||
"__anyhit__cycles_metalrt_visibility_test_tri",
|
||||
"__anyhit__cycles_metalrt_visibility_test_box",
|
||||
"__anyhit__cycles_metalrt_shadow_all_hit_tri",
|
||||
"__anyhit__cycles_metalrt_shadow_all_hit_box",
|
||||
"__anyhit__cycles_metalrt_volume_test_tri",
|
||||
"__anyhit__cycles_metalrt_volume_test_box",
|
||||
"__anyhit__cycles_metalrt_local_hit_tri",
|
||||
"__anyhit__cycles_metalrt_local_hit_box",
|
||||
"__anyhit__cycles_metalrt_local_hit_tri_prim",
|
||||
"__anyhit__cycles_metalrt_local_hit_box_prim",
|
||||
"__intersection__curve",
|
||||
"__intersection__curve_shadow",
|
||||
"__intersection__point",
|
||||
"__intersection__point_shadow",
|
||||
};
|
||||
assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM);
|
||||
|
||||
MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
||||
for (int i = 0; i < METALRT_FUNC_NUM; i++) {
|
||||
const char *function_name = function_names[i];
|
||||
desc.name = [@(function_name) copy];
|
||||
MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
||||
for (int i = 0; i < METALRT_FUNC_NUM; i++) {
|
||||
const char *function_name = function_names[i];
|
||||
desc.name = [@(function_name) copy];
|
||||
|
||||
if (pso_type != PSO_GENERIC) {
|
||||
desc.constantValues = GetConstantValues(&kernel_data_);
|
||||
}
|
||||
else {
|
||||
desc.constantValues = GetConstantValues();
|
||||
}
|
||||
|
||||
NSError *error = NULL;
|
||||
rt_intersection_function[i] = [mtlLibrary newFunctionWithDescriptor:desc error:&error];
|
||||
|
||||
if (rt_intersection_function[i] == nil) {
|
||||
NSString *err = [error localizedDescription];
|
||||
string errors = [err UTF8String];
|
||||
|
||||
error_str = string_printf(
|
||||
"Error getting intersection function \"%s\": %s", function_name, errors.c_str());
|
||||
break;
|
||||
}
|
||||
|
||||
rt_intersection_function[i].label = [@(function_name) copy];
|
||||
if (pso_type != PSO_GENERIC) {
|
||||
desc.constantValues = GetConstantValues(&kernel_data_);
|
||||
}
|
||||
else {
|
||||
desc.constantValues = GetConstantValues();
|
||||
}
|
||||
|
||||
NSError *error = NULL;
|
||||
rt_intersection_function[i] = [mtlLibrary newFunctionWithDescriptor:desc error:&error];
|
||||
|
||||
if (rt_intersection_function[i] == nil) {
|
||||
NSString *err = [error localizedDescription];
|
||||
string errors = [err UTF8String];
|
||||
|
||||
error_str = string_printf(
|
||||
"Error getting intersection function \"%s\": %s", function_name, errors.c_str());
|
||||
break;
|
||||
}
|
||||
|
||||
rt_intersection_function[i].label = [@(function_name) copy];
|
||||
}
|
||||
}
|
||||
|
||||
@ -611,23 +608,19 @@ void MetalKernelPipeline::compile()
|
||||
computePipelineStateDescriptor.buffers[1].mutability = MTLMutabilityImmutable;
|
||||
computePipelineStateDescriptor.buffers[2].mutability = MTLMutabilityImmutable;
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = threads_per_threadgroup;
|
||||
}
|
||||
computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = threads_per_threadgroup;
|
||||
computePipelineStateDescriptor.threadGroupSizeIsMultipleOfThreadExecutionWidth = true;
|
||||
|
||||
computePipelineStateDescriptor.computeFunction = function;
|
||||
|
||||
if (@available(macOS 11.0, *)) {
|
||||
/* Attach the additional functions to an MTLLinkedFunctions object */
|
||||
if (linked_functions) {
|
||||
computePipelineStateDescriptor.linkedFunctions = [[MTLLinkedFunctions alloc] init];
|
||||
computePipelineStateDescriptor.linkedFunctions.functions = linked_functions;
|
||||
}
|
||||
computePipelineStateDescriptor.maxCallStackDepth = 1;
|
||||
if (use_metalrt) {
|
||||
computePipelineStateDescriptor.maxCallStackDepth = 8;
|
||||
}
|
||||
/* Attach the additional functions to an MTLLinkedFunctions object */
|
||||
if (linked_functions) {
|
||||
computePipelineStateDescriptor.linkedFunctions = [[MTLLinkedFunctions alloc] init];
|
||||
computePipelineStateDescriptor.linkedFunctions.functions = linked_functions;
|
||||
}
|
||||
computePipelineStateDescriptor.maxCallStackDepth = 1;
|
||||
if (use_metalrt) {
|
||||
computePipelineStateDescriptor.maxCallStackDepth = 8;
|
||||
}
|
||||
|
||||
MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
|
||||
@ -669,23 +662,21 @@ void MetalKernelPipeline::compile()
|
||||
loading_existing_archive = path_cache_kernel_exists_and_mark_used(metalbin_path);
|
||||
creating_new_archive = !loading_existing_archive;
|
||||
|
||||
if (@available(macOS 11.0, *)) {
|
||||
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
|
||||
if (loading_existing_archive) {
|
||||
archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
|
||||
}
|
||||
NSError *error = nil;
|
||||
archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:&error];
|
||||
if (!archive) {
|
||||
const char *err = error ? [[error localizedDescription] UTF8String] : nullptr;
|
||||
metal_printf("newBinaryArchiveWithDescriptor failed: %s\n", err ? err : "nil");
|
||||
}
|
||||
[archiveDesc release];
|
||||
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
|
||||
if (loading_existing_archive) {
|
||||
archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
|
||||
}
|
||||
NSError *error = nil;
|
||||
archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:&error];
|
||||
if (!archive) {
|
||||
const char *err = error ? [[error localizedDescription] UTF8String] : nullptr;
|
||||
metal_printf("newBinaryArchiveWithDescriptor failed: %s\n", err ? err : "nil");
|
||||
}
|
||||
[archiveDesc release];
|
||||
|
||||
if (loading_existing_archive) {
|
||||
pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
|
||||
computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
|
||||
}
|
||||
if (loading_existing_archive) {
|
||||
pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
|
||||
computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
|
||||
}
|
||||
}
|
||||
|
||||
@ -792,19 +783,16 @@ void MetalKernelPipeline::compile()
|
||||
num_threads_per_block = std::max(num_threads_per_block, (int)pipeline.threadExecutionWidth);
|
||||
}
|
||||
|
||||
if (@available(macOS 11.0, *)) {
|
||||
if (ShaderCache::running) {
|
||||
if (creating_new_archive || recreate_archive) {
|
||||
if (![archive serializeToURL:[NSURL fileURLWithPath:@(metalbin_path.c_str())]
|
||||
error:&error])
|
||||
{
|
||||
metal_printf("Failed to save binary archive to %s, error:\n%s\n",
|
||||
metalbin_path.c_str(),
|
||||
[[error localizedDescription] UTF8String]);
|
||||
}
|
||||
else {
|
||||
path_cache_kernel_mark_added_and_clear_old(metalbin_path);
|
||||
}
|
||||
if (ShaderCache::running) {
|
||||
if (creating_new_archive || recreate_archive) {
|
||||
if (![archive serializeToURL:[NSURL fileURLWithPath:@(metalbin_path.c_str())] error:&error])
|
||||
{
|
||||
metal_printf("Failed to save binary archive to %s, error:\n%s\n",
|
||||
metalbin_path.c_str(),
|
||||
[[error localizedDescription] UTF8String]);
|
||||
}
|
||||
else {
|
||||
path_cache_kernel_mark_added_and_clear_old(metalbin_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -815,20 +803,18 @@ void MetalKernelPipeline::compile()
|
||||
|
||||
if (use_metalrt && linked_functions) {
|
||||
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
|
||||
if (@available(macOS 11.0, *)) {
|
||||
MTLIntersectionFunctionTableDescriptor *ift_desc =
|
||||
[[MTLIntersectionFunctionTableDescriptor alloc] init];
|
||||
ift_desc.functionCount = table_functions[table].count;
|
||||
intersection_func_table[table] = [this->pipeline
|
||||
newIntersectionFunctionTableWithDescriptor:ift_desc];
|
||||
MTLIntersectionFunctionTableDescriptor *ift_desc =
|
||||
[[MTLIntersectionFunctionTableDescriptor alloc] init];
|
||||
ift_desc.functionCount = table_functions[table].count;
|
||||
intersection_func_table[table] = [this->pipeline
|
||||
newIntersectionFunctionTableWithDescriptor:ift_desc];
|
||||
|
||||
/* Finally write the function handles into this pipeline's table */
|
||||
int size = (int)[table_functions[table] count];
|
||||
for (int i = 0; i < size; i++) {
|
||||
id<MTLFunctionHandle> handle = [pipeline
|
||||
functionHandleWithFunction:table_functions[table][i]];
|
||||
[intersection_func_table[table] setFunction:handle atIndex:i];
|
||||
}
|
||||
/* Finally write the function handles into this pipeline's table */
|
||||
int size = (int)[table_functions[table] count];
|
||||
for (int i = 0; i < size; i++) {
|
||||
id<MTLFunctionHandle> handle = [pipeline
|
||||
functionHandleWithFunction:table_functions[table][i]];
|
||||
[intersection_func_table[table] setFunction:handle atIndex:i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -21,38 +21,32 @@ MetalDeviceQueue::MetalDeviceQueue(MetalDevice *device)
|
||||
: DeviceQueue(device), metal_device_(device), stats_(device->stats)
|
||||
{
|
||||
@autoreleasepool {
|
||||
if (@available(macos 11.0, *)) {
|
||||
command_buffer_desc_ = [[MTLCommandBufferDescriptor alloc] init];
|
||||
command_buffer_desc_.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
|
||||
}
|
||||
command_buffer_desc_ = [[MTLCommandBufferDescriptor alloc] init];
|
||||
command_buffer_desc_.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
|
||||
|
||||
mtlDevice_ = device->mtlDevice;
|
||||
mtlCommandQueue_ = device->mtlComputeCommandQueue;
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
shared_event_ = [mtlDevice_ newSharedEvent];
|
||||
shared_event_id_ = 1;
|
||||
shared_event_ = [mtlDevice_ newSharedEvent];
|
||||
shared_event_id_ = 1;
|
||||
|
||||
/* Shareable event listener */
|
||||
event_queue_ = dispatch_queue_create("com.cycles.metal.event_queue", NULL);
|
||||
shared_event_listener_ = [[MTLSharedEventListener alloc] initWithDispatchQueue:event_queue_];
|
||||
}
|
||||
/* Shareable event listener */
|
||||
event_queue_ = dispatch_queue_create("com.cycles.metal.event_queue", NULL);
|
||||
shared_event_listener_ = [[MTLSharedEventListener alloc] initWithDispatchQueue:event_queue_];
|
||||
|
||||
wait_semaphore_ = dispatch_semaphore_create(0);
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
if (getenv("CYCLES_METAL_PROFILING")) {
|
||||
/* Enable per-kernel timing breakdown (shown at end of render). */
|
||||
timing_shared_event_ = [mtlDevice_ newSharedEvent];
|
||||
label_command_encoders_ = true;
|
||||
}
|
||||
if (getenv("CYCLES_METAL_DEBUG")) {
|
||||
/* Enable very verbose tracing (shows every dispatch). */
|
||||
verbose_tracing_ = true;
|
||||
label_command_encoders_ = true;
|
||||
}
|
||||
timing_shared_event_id_ = 1;
|
||||
if (getenv("CYCLES_METAL_PROFILING")) {
|
||||
/* Enable per-kernel timing breakdown (shown at end of render). */
|
||||
timing_shared_event_ = [mtlDevice_ newSharedEvent];
|
||||
label_command_encoders_ = true;
|
||||
}
|
||||
if (getenv("CYCLES_METAL_DEBUG")) {
|
||||
/* Enable very verbose tracing (shows every dispatch). */
|
||||
verbose_tracing_ = true;
|
||||
label_command_encoders_ = true;
|
||||
}
|
||||
timing_shared_event_id_ = 1;
|
||||
|
||||
setup_capture();
|
||||
}
|
||||
@ -104,28 +98,26 @@ void MetalDeviceQueue::setup_capture()
|
||||
label_command_encoders_ = true;
|
||||
|
||||
if (auto capture_url = getenv("CYCLES_DEBUG_METAL_CAPTURE_URL")) {
|
||||
if (@available(macos 10.15, *)) {
|
||||
if ([captureManager supportsDestination:MTLCaptureDestinationGPUTraceDocument]) {
|
||||
if ([captureManager supportsDestination:MTLCaptureDestinationGPUTraceDocument]) {
|
||||
|
||||
MTLCaptureDescriptor *captureDescriptor = [[MTLCaptureDescriptor alloc] init];
|
||||
captureDescriptor.captureObject = mtlCaptureScope_;
|
||||
captureDescriptor.destination = MTLCaptureDestinationGPUTraceDocument;
|
||||
captureDescriptor.outputURL = [NSURL fileURLWithPath:@(capture_url)];
|
||||
MTLCaptureDescriptor *captureDescriptor = [[MTLCaptureDescriptor alloc] init];
|
||||
captureDescriptor.captureObject = mtlCaptureScope_;
|
||||
captureDescriptor.destination = MTLCaptureDestinationGPUTraceDocument;
|
||||
captureDescriptor.outputURL = [NSURL fileURLWithPath:@(capture_url)];
|
||||
|
||||
NSError *error;
|
||||
if (![captureManager startCaptureWithDescriptor:captureDescriptor error:&error]) {
|
||||
NSString *err = [error localizedDescription];
|
||||
printf("Start capture failed: %s\n", [err UTF8String]);
|
||||
}
|
||||
else {
|
||||
printf("Capture started (URL: %s)\n", capture_url);
|
||||
is_capturing_to_disk_ = true;
|
||||
}
|
||||
NSError *error;
|
||||
if (![captureManager startCaptureWithDescriptor:captureDescriptor error:&error]) {
|
||||
NSString *err = [error localizedDescription];
|
||||
printf("Start capture failed: %s\n", [err UTF8String]);
|
||||
}
|
||||
else {
|
||||
printf("Capture to file is not supported\n");
|
||||
printf("Capture started (URL: %s)\n", capture_url);
|
||||
is_capturing_to_disk_ = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
printf("Capture to file is not supported\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -188,13 +180,11 @@ void MetalDeviceQueue::end_capture()
|
||||
printf("[mtlCaptureScope_ endScope]\n");
|
||||
|
||||
if (is_capturing_to_disk_) {
|
||||
if (@available(macos 10.15, *)) {
|
||||
[[MTLCaptureManager sharedCaptureManager] stopCapture];
|
||||
has_captured_to_disk_ = true;
|
||||
is_capturing_to_disk_ = false;
|
||||
is_capturing_ = false;
|
||||
printf("Capture stopped\n");
|
||||
}
|
||||
[[MTLCaptureManager sharedCaptureManager] stopCapture];
|
||||
has_captured_to_disk_ = true;
|
||||
is_capturing_to_disk_ = false;
|
||||
is_capturing_ = false;
|
||||
printf("Capture stopped\n");
|
||||
}
|
||||
}
|
||||
|
||||
@ -208,14 +198,9 @@ MetalDeviceQueue::~MetalDeviceQueue()
|
||||
close_compute_encoder();
|
||||
close_blit_encoder();
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
[shared_event_listener_ release];
|
||||
[shared_event_ release];
|
||||
}
|
||||
|
||||
if (@available(macos 11.0, *)) {
|
||||
[command_buffer_desc_ release];
|
||||
}
|
||||
[shared_event_listener_ release];
|
||||
[shared_event_ release];
|
||||
[command_buffer_desc_ release];
|
||||
|
||||
if (mtlCaptureScope_) {
|
||||
[mtlCaptureScope_ release];
|
||||
@ -343,10 +328,8 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
|
||||
id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(kernel);
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
if (timing_shared_event_) {
|
||||
command_encoder_labels_.push_back({kernel, work_size, timing_shared_event_id_});
|
||||
}
|
||||
if (timing_shared_event_) {
|
||||
command_encoder_labels_.push_back({kernel, work_size, timing_shared_event_id_});
|
||||
}
|
||||
|
||||
/* Determine size requirement for argument buffer. */
|
||||
@ -395,10 +378,8 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
|
||||
/* Allocate an argument buffer. */
|
||||
MTLResourceOptions arg_buffer_options = MTLResourceStorageModeManaged;
|
||||
if (@available(macOS 11.0, *)) {
|
||||
if ([mtlDevice_ hasUnifiedMemory]) {
|
||||
arg_buffer_options = MTLResourceStorageModeShared;
|
||||
}
|
||||
if ([mtlDevice_ hasUnifiedMemory]) {
|
||||
arg_buffer_options = MTLResourceStorageModeShared;
|
||||
}
|
||||
|
||||
id<MTLBuffer> arg_buffer = temp_buffer_pool_.get_buffer(mtlDevice_,
|
||||
@ -598,27 +579,25 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
threadsPerThreadgroup:size_threads_per_threadgroup];
|
||||
|
||||
[mtlCommandBuffer_ addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||
/* Enhanced command buffer errors are only available in 11.0+ */
|
||||
if (@available(macos 11.0, *)) {
|
||||
string str;
|
||||
if (command_buffer.status != MTLCommandBufferStatusCompleted) {
|
||||
str = string_printf("Command buffer not completed. status = %d. ",
|
||||
int(command_buffer.status));
|
||||
}
|
||||
if (command_buffer.error) {
|
||||
@autoreleasepool {
|
||||
const char *errCStr = [[NSString stringWithFormat:@"%@", command_buffer.error]
|
||||
UTF8String];
|
||||
str += string_printf("(%s.%s):\n%s\n",
|
||||
kernel_type_as_string(metal_kernel_pso->pso_type),
|
||||
device_kernel_as_string(kernel),
|
||||
errCStr);
|
||||
}
|
||||
}
|
||||
if (!str.empty()) {
|
||||
metal_device_->set_error(str);
|
||||
/* Enhanced command buffer errors */
|
||||
string str;
|
||||
if (command_buffer.status != MTLCommandBufferStatusCompleted) {
|
||||
str = string_printf("Command buffer not completed. status = %d. ",
|
||||
int(command_buffer.status));
|
||||
}
|
||||
if (command_buffer.error) {
|
||||
@autoreleasepool {
|
||||
const char *errCStr = [[NSString stringWithFormat:@"%@", command_buffer.error]
|
||||
UTF8String];
|
||||
str += string_printf("(%s.%s):\n%s\n",
|
||||
kernel_type_as_string(metal_kernel_pso->pso_type),
|
||||
device_kernel_as_string(kernel),
|
||||
errCStr);
|
||||
}
|
||||
}
|
||||
if (!str.empty()) {
|
||||
metal_device_->set_error(str);
|
||||
}
|
||||
}];
|
||||
|
||||
if (verbose_tracing_ || is_capturing_) {
|
||||
@ -681,44 +660,40 @@ bool MetalDeviceQueue::synchronize()
|
||||
if (mtlCommandBuffer_) {
|
||||
scoped_timer timer;
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
if (timing_shared_event_) {
|
||||
/* For per-kernel timing, add event handlers to measure & accumulate dispatch times. */
|
||||
__block double completion_time = 0;
|
||||
for (uint64_t i = command_buffer_start_timing_id_; i < timing_shared_event_id_; i++) {
|
||||
[timing_shared_event_
|
||||
notifyListener:shared_event_listener_
|
||||
atValue:i
|
||||
block:^(id<MTLSharedEvent> /*sharedEvent*/, uint64_t value) {
|
||||
completion_time = timer.get_time() - completion_time;
|
||||
last_completion_time_ = completion_time;
|
||||
for (auto label : command_encoder_labels_) {
|
||||
if (label.timing_id == value) {
|
||||
TimingStats &stat = timing_stats_[label.kernel];
|
||||
stat.num_dispatches++;
|
||||
stat.total_time += completion_time;
|
||||
stat.total_work_size += label.work_size;
|
||||
}
|
||||
if (timing_shared_event_) {
|
||||
/* For per-kernel timing, add event handlers to measure & accumulate dispatch times. */
|
||||
__block double completion_time = 0;
|
||||
for (uint64_t i = command_buffer_start_timing_id_; i < timing_shared_event_id_; i++) {
|
||||
[timing_shared_event_
|
||||
notifyListener:shared_event_listener_
|
||||
atValue:i
|
||||
block:^(id<MTLSharedEvent> /*sharedEvent*/, uint64_t value) {
|
||||
completion_time = timer.get_time() - completion_time;
|
||||
last_completion_time_ = completion_time;
|
||||
for (auto label : command_encoder_labels_) {
|
||||
if (label.timing_id == value) {
|
||||
TimingStats &stat = timing_stats_[label.kernel];
|
||||
stat.num_dispatches++;
|
||||
stat.total_time += completion_time;
|
||||
stat.total_work_size += label.work_size;
|
||||
}
|
||||
}];
|
||||
}
|
||||
}
|
||||
}];
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t shared_event_id_ = this->shared_event_id_++;
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
__block dispatch_semaphore_t block_sema = wait_semaphore_;
|
||||
[shared_event_ notifyListener:shared_event_listener_
|
||||
atValue:shared_event_id_
|
||||
block:^(id<MTLSharedEvent> /*sharedEvent*/, uint64_t /*value*/) {
|
||||
dispatch_semaphore_signal(block_sema);
|
||||
}];
|
||||
__block dispatch_semaphore_t block_sema = wait_semaphore_;
|
||||
[shared_event_ notifyListener:shared_event_listener_
|
||||
atValue:shared_event_id_
|
||||
block:^(id<MTLSharedEvent> /*sharedEvent*/, uint64_t /*value*/) {
|
||||
dispatch_semaphore_signal(block_sema);
|
||||
}];
|
||||
|
||||
[mtlCommandBuffer_ encodeSignalEvent:shared_event_ value:shared_event_id_];
|
||||
[mtlCommandBuffer_ commit];
|
||||
dispatch_semaphore_wait(wait_semaphore_, DISPATCH_TIME_FOREVER);
|
||||
}
|
||||
[mtlCommandBuffer_ encodeSignalEvent:shared_event_ value:shared_event_id_];
|
||||
[mtlCommandBuffer_ commit];
|
||||
dispatch_semaphore_wait(wait_semaphore_, DISPATCH_TIME_FOREVER);
|
||||
|
||||
[mtlCommandBuffer_ release];
|
||||
|
||||
@ -897,42 +872,40 @@ id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel
|
||||
{
|
||||
bool concurrent = (kernel < DEVICE_KERNEL_INTEGRATOR_NUM);
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
if (timing_shared_event_) {
|
||||
/* Close the current encoder to ensure we're able to capture per-encoder timing data. */
|
||||
close_compute_encoder();
|
||||
}
|
||||
|
||||
if (mtlComputeEncoder_) {
|
||||
if (mtlComputeEncoder_.dispatchType == concurrent ? MTLDispatchTypeConcurrent :
|
||||
MTLDispatchTypeSerial)
|
||||
{
|
||||
/* declare usage of MTLBuffers etc */
|
||||
prepare_resources(kernel);
|
||||
|
||||
return mtlComputeEncoder_;
|
||||
}
|
||||
close_compute_encoder();
|
||||
}
|
||||
|
||||
close_blit_encoder();
|
||||
|
||||
if (!mtlCommandBuffer_) {
|
||||
mtlCommandBuffer_ = [mtlCommandQueue_ commandBuffer];
|
||||
[mtlCommandBuffer_ retain];
|
||||
}
|
||||
|
||||
mtlComputeEncoder_ = [mtlCommandBuffer_
|
||||
computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent :
|
||||
MTLDispatchTypeSerial];
|
||||
|
||||
[mtlComputeEncoder_ retain];
|
||||
[mtlComputeEncoder_ setLabel:@(device_kernel_as_string(kernel))];
|
||||
|
||||
/* declare usage of MTLBuffers etc */
|
||||
prepare_resources(kernel);
|
||||
if (timing_shared_event_) {
|
||||
/* Close the current encoder to ensure we're able to capture per-encoder timing data. */
|
||||
close_compute_encoder();
|
||||
}
|
||||
|
||||
if (mtlComputeEncoder_) {
|
||||
if (mtlComputeEncoder_.dispatchType == concurrent ? MTLDispatchTypeConcurrent :
|
||||
MTLDispatchTypeSerial)
|
||||
{
|
||||
/* declare usage of MTLBuffers etc */
|
||||
prepare_resources(kernel);
|
||||
|
||||
return mtlComputeEncoder_;
|
||||
}
|
||||
close_compute_encoder();
|
||||
}
|
||||
|
||||
close_blit_encoder();
|
||||
|
||||
if (!mtlCommandBuffer_) {
|
||||
mtlCommandBuffer_ = [mtlCommandQueue_ commandBuffer];
|
||||
[mtlCommandBuffer_ retain];
|
||||
}
|
||||
|
||||
mtlComputeEncoder_ = [mtlCommandBuffer_
|
||||
computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent :
|
||||
MTLDispatchTypeSerial];
|
||||
|
||||
[mtlComputeEncoder_ retain];
|
||||
[mtlComputeEncoder_ setLabel:@(device_kernel_as_string(kernel))];
|
||||
|
||||
/* declare usage of MTLBuffers etc */
|
||||
prepare_resources(kernel);
|
||||
|
||||
return mtlComputeEncoder_;
|
||||
}
|
||||
|
||||
@ -962,10 +935,8 @@ void MetalDeviceQueue::close_compute_encoder()
|
||||
[mtlComputeEncoder_ release];
|
||||
mtlComputeEncoder_ = nil;
|
||||
|
||||
if (@available(macos 10.14, *)) {
|
||||
if (timing_shared_event_) {
|
||||
[mtlCommandBuffer_ encodeSignalEvent:timing_shared_event_ value:timing_shared_event_id_++];
|
||||
}
|
||||
if (timing_shared_event_) {
|
||||
[mtlCommandBuffer_ encodeSignalEvent:timing_shared_event_ value:timing_shared_event_id_++];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -890,16 +890,15 @@ GHOST_TSuccess GHOST_SystemCocoa::getPixelAtCursor(float r_color[3]) const
|
||||
* This behavior could confuse users, especially when trying to pick a color from another app,
|
||||
* potentially capturing the wallpaper under that app window.
|
||||
*/
|
||||
if (@available(macOS 11.0, *)) {
|
||||
/* Although these methods are documented as available for macOS 10.15, they are not actually
|
||||
* shipped, leading to a crash if used on macOS 10.15.
|
||||
*
|
||||
* Ref: https://developer.apple.com/forums/thread/683860?answerId=684400022#684400022
|
||||
*/
|
||||
if (!CGPreflightScreenCaptureAccess()) {
|
||||
CGRequestScreenCaptureAccess();
|
||||
return GHOST_kFailure;
|
||||
}
|
||||
|
||||
/* Although these methods are documented as available for macOS 10.15, they are not actually
|
||||
* shipped, leading to a crash if used on macOS 10.15.
|
||||
*
|
||||
* Ref: https://developer.apple.com/forums/thread/683860?answerId=684400022#684400022
|
||||
*/
|
||||
if (!CGPreflightScreenCaptureAccess()) {
|
||||
CGRequestScreenCaptureAccess();
|
||||
return GHOST_kFailure;
|
||||
}
|
||||
|
||||
CGEventRef event = CGEventCreate(nil);
|
||||
|
@ -306,66 +306,50 @@ bool MTLBackend::metal_is_supported()
|
||||
return false;
|
||||
}
|
||||
|
||||
if (@available(macOS 10.15, *)) {
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
|
||||
/* Debug: Enable low power GPU with Environment Var: METAL_FORCE_INTEL. */
|
||||
static const char *forceIntelStr = getenv("METAL_FORCE_INTEL");
|
||||
bool forceIntel = forceIntelStr ? (atoi(forceIntelStr) != 0) : false;
|
||||
/* Debug: Enable low power GPU with Environment Var: METAL_FORCE_INTEL. */
|
||||
static const char *forceIntelStr = getenv("METAL_FORCE_INTEL");
|
||||
bool forceIntel = forceIntelStr ? (atoi(forceIntelStr) != 0) : false;
|
||||
|
||||
if (forceIntel) {
|
||||
NSArray<id<MTLDevice>> *allDevices = MTLCopyAllDevices();
|
||||
for (id<MTLDevice> _device in allDevices) {
|
||||
if (_device.lowPower) {
|
||||
device = _device;
|
||||
}
|
||||
if (forceIntel) {
|
||||
NSArray<id<MTLDevice>> *allDevices = MTLCopyAllDevices();
|
||||
for (id<MTLDevice> _device in allDevices) {
|
||||
if (_device.lowPower) {
|
||||
device = _device;
|
||||
}
|
||||
}
|
||||
|
||||
/* If Intel, we must be on macOS 11.2+ for full Metal backend support. */
|
||||
NSString *gpu_name = [device name];
|
||||
const char *vendor = [gpu_name UTF8String];
|
||||
if ((strstr(vendor, "Intel") || strstr(vendor, "INTEL"))) {
|
||||
if (@available(macOS 11.2, *)) {
|
||||
/* Intel device supported -- Carry on.
|
||||
* NOTE: @available syntax cannot be negated. */
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Metal Viewport requires argument buffer tier-2 support and Barycentric Coordinates.
|
||||
* These are available on most hardware configurations supporting Metal 2.2. */
|
||||
bool supports_argument_buffers_tier2 = ([device argumentBuffersSupport] ==
|
||||
MTLArgumentBuffersTier2);
|
||||
bool supports_barycentrics = [device supportsShaderBarycentricCoordinates] ||
|
||||
supports_barycentric_whitelist(device);
|
||||
bool supported_metal_version = [device supportsFamily:MTLGPUFamilyMac2];
|
||||
|
||||
bool result = supports_argument_buffers_tier2 && supports_barycentrics &&
|
||||
supported_os_version && supported_metal_version;
|
||||
|
||||
if (G.debug & G_DEBUG_GPU) {
|
||||
if (!supports_argument_buffers_tier2) {
|
||||
printf("[Metal] Device does not support argument buffers tier 2\n");
|
||||
}
|
||||
if (!supports_barycentrics) {
|
||||
printf("[Metal] Device does not support barycentrics coordinates\n");
|
||||
}
|
||||
if (!supported_metal_version) {
|
||||
printf("[Metal] Device does not support metal 2.2 or higher\n");
|
||||
}
|
||||
|
||||
if (result) {
|
||||
printf("Device with name %s supports metal minimum requirements\n",
|
||||
[[device name] UTF8String]);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
return false;
|
||||
|
||||
/* Metal Viewport requires argument buffer tier-2 support and Barycentric Coordinates.
|
||||
* These are available on most hardware configurations supporting Metal 2.2. */
|
||||
bool supports_argument_buffers_tier2 = ([device argumentBuffersSupport] ==
|
||||
MTLArgumentBuffersTier2);
|
||||
bool supports_barycentrics = [device supportsShaderBarycentricCoordinates] ||
|
||||
supports_barycentric_whitelist(device);
|
||||
bool supported_metal_version = [device supportsFamily:MTLGPUFamilyMac2];
|
||||
|
||||
bool result = supports_argument_buffers_tier2 && supports_barycentrics && supported_os_version &&
|
||||
supported_metal_version;
|
||||
|
||||
if (G.debug & G_DEBUG_GPU) {
|
||||
if (!supports_argument_buffers_tier2) {
|
||||
printf("[Metal] Device does not support argument buffers tier 2\n");
|
||||
}
|
||||
if (!supports_barycentrics) {
|
||||
printf("[Metal] Device does not support barycentrics coordinates\n");
|
||||
}
|
||||
if (!supported_metal_version) {
|
||||
printf("[Metal] Device does not support metal 2.2 or higher\n");
|
||||
}
|
||||
|
||||
if (result) {
|
||||
printf("Device with name %s supports metal minimum requirements\n",
|
||||
[[device name] UTF8String]);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void MTLBackend::capabilities_init(MTLContext *ctx)
|
||||
|
@ -53,15 +53,13 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin()
|
||||
BLI_assert(MTLCommandBufferManager::num_active_cmd_bufs <
|
||||
GHOST_ContextCGL::max_command_buffer_count);
|
||||
|
||||
if (@available(macos 11.0, *)) {
|
||||
if (G.debug & G_DEBUG_GPU) {
|
||||
/* Debug: Enable Advanced Errors for GPU work execution. */
|
||||
MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init];
|
||||
desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
|
||||
desc.retainedReferences = YES;
|
||||
BLI_assert(context_.queue != nil);
|
||||
active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc];
|
||||
}
|
||||
if (G.debug & G_DEBUG_GPU) {
|
||||
/* Debug: Enable Advanced Errors for GPU work execution. */
|
||||
MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init];
|
||||
desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
|
||||
desc.retainedReferences = YES;
|
||||
BLI_assert(context_.queue != nil);
|
||||
active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc];
|
||||
}
|
||||
|
||||
/* Ensure command buffer is created if debug command buffer unavailable. */
|
||||
@ -151,14 +149,12 @@ bool MTLCommandBufferManager::submit(bool wait)
|
||||
|
||||
/* Command buffer execution debugging can return an error message if
|
||||
* execution has failed or encountered GPU-side errors. */
|
||||
if (@available(macos 11.0, *)) {
|
||||
if (G.debug & G_DEBUG_GPU) {
|
||||
if (G.debug & G_DEBUG_GPU) {
|
||||
|
||||
NSError *error = [active_command_buffer_ error];
|
||||
if (error != nil) {
|
||||
NSLog(@"%@", error);
|
||||
BLI_assert(false);
|
||||
}
|
||||
NSError *error = [active_command_buffer_ error];
|
||||
if (error != nil) {
|
||||
NSLog(@"%@", error);
|
||||
BLI_assert(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -578,86 +574,80 @@ bool MTLCommandBufferManager::insert_memory_barrier(eGPUBarrier barrier_bits,
|
||||
eGPUStageBarrierBits before_stages,
|
||||
eGPUStageBarrierBits after_stages)
|
||||
{
|
||||
/* Only supporting Metal on 10.14 onward anyway - Check required for warnings. */
|
||||
if (@available(macOS 10.14, *)) {
|
||||
/* Apple Silicon does not support memory barriers for RenderCommandEncoder's.
|
||||
* We do not currently need these due to implicit API guarantees. However, render->render
|
||||
* resource dependencies are only evaluated at RenderCommandEncoder boundaries due to work
|
||||
* execution on TBDR architecture.
|
||||
*
|
||||
* NOTE: Render barriers are therefore inherently expensive. Where possible, opt for local
|
||||
* synchronization using raster order groups, or, prefer compute to avoid subsequent passes
|
||||
* re-loading pass attachments which are not needed. */
|
||||
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
|
||||
if (is_tile_based_arch && (active_command_encoder_type_ != MTL_COMPUTE_COMMAND_ENCODER)) {
|
||||
if (active_command_encoder_type_ == MTL_RENDER_COMMAND_ENCODER) {
|
||||
end_active_command_encoder();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Apple Silicon does not support memory barriers for RenderCommandEncoder's.
|
||||
* We do not currently need these due to implicit API guarantees. However, render->render
|
||||
* resource dependencies are only evaluated at RenderCommandEncoder boundaries due to work
|
||||
* execution on TBDR architecture.
|
||||
*
|
||||
* NOTE: Render barriers are therefore inherently expensive. Where possible, opt for local
|
||||
* synchronization using raster order groups, or, prefer compute to avoid subsequent passes
|
||||
* re-loading pass attachments which are not needed. */
|
||||
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
|
||||
if (is_tile_based_arch && (active_command_encoder_type_ != MTL_COMPUTE_COMMAND_ENCODER)) {
|
||||
if (active_command_encoder_type_ == MTL_RENDER_COMMAND_ENCODER) {
|
||||
end_active_command_encoder();
|
||||
/* Resolve scope. */
|
||||
MTLBarrierScope scope = 0;
|
||||
if (barrier_bits & GPU_BARRIER_SHADER_IMAGE_ACCESS || barrier_bits & GPU_BARRIER_TEXTURE_FETCH) {
|
||||
bool is_compute = (active_command_encoder_type_ != MTL_RENDER_COMMAND_ENCODER);
|
||||
scope |= (is_compute ? 0 : MTLBarrierScopeRenderTargets) | MTLBarrierScopeTextures;
|
||||
}
|
||||
if (barrier_bits & GPU_BARRIER_SHADER_STORAGE ||
|
||||
barrier_bits & GPU_BARRIER_VERTEX_ATTRIB_ARRAY || barrier_bits & GPU_BARRIER_ELEMENT_ARRAY ||
|
||||
barrier_bits & GPU_BARRIER_UNIFORM || barrier_bits & GPU_BARRIER_BUFFER_UPDATE)
|
||||
{
|
||||
scope = scope | MTLBarrierScopeBuffers;
|
||||
}
|
||||
|
||||
if (scope != 0) {
|
||||
/* Issue barrier based on encoder. */
|
||||
switch (active_command_encoder_type_) {
|
||||
case MTL_NO_COMMAND_ENCODER:
|
||||
case MTL_BLIT_COMMAND_ENCODER: {
|
||||
/* No barrier to be inserted. */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Rendering. */
|
||||
case MTL_RENDER_COMMAND_ENCODER: {
|
||||
/* Currently flagging both stages -- can use bits above to filter on stage type --
|
||||
* though full barrier is safe for now. */
|
||||
MTLRenderStages before_stage_flags = 0;
|
||||
MTLRenderStages after_stage_flags = 0;
|
||||
if (before_stages & GPU_BARRIER_STAGE_VERTEX &&
|
||||
!(before_stages & GPU_BARRIER_STAGE_FRAGMENT))
|
||||
{
|
||||
before_stage_flags = before_stage_flags | MTLRenderStageVertex;
|
||||
}
|
||||
if (before_stages & GPU_BARRIER_STAGE_FRAGMENT) {
|
||||
before_stage_flags = before_stage_flags | MTLRenderStageFragment;
|
||||
}
|
||||
if (after_stages & GPU_BARRIER_STAGE_VERTEX) {
|
||||
after_stage_flags = after_stage_flags | MTLRenderStageVertex;
|
||||
}
|
||||
if (after_stages & GPU_BARRIER_STAGE_FRAGMENT) {
|
||||
after_stage_flags = MTLRenderStageFragment;
|
||||
}
|
||||
|
||||
id<MTLRenderCommandEncoder> rec = this->get_active_render_command_encoder();
|
||||
BLI_assert(rec != nil);
|
||||
[rec memoryBarrierWithScope:scope
|
||||
afterStages:after_stage_flags
|
||||
beforeStages:before_stage_flags];
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Resolve scope. */
|
||||
MTLBarrierScope scope = 0;
|
||||
if (barrier_bits & GPU_BARRIER_SHADER_IMAGE_ACCESS || barrier_bits & GPU_BARRIER_TEXTURE_FETCH)
|
||||
{
|
||||
bool is_compute = (active_command_encoder_type_ != MTL_RENDER_COMMAND_ENCODER);
|
||||
scope |= (is_compute ? 0 : MTLBarrierScopeRenderTargets) | MTLBarrierScopeTextures;
|
||||
}
|
||||
if (barrier_bits & GPU_BARRIER_SHADER_STORAGE ||
|
||||
barrier_bits & GPU_BARRIER_VERTEX_ATTRIB_ARRAY ||
|
||||
barrier_bits & GPU_BARRIER_ELEMENT_ARRAY || barrier_bits & GPU_BARRIER_UNIFORM ||
|
||||
barrier_bits & GPU_BARRIER_BUFFER_UPDATE)
|
||||
{
|
||||
scope = scope | MTLBarrierScopeBuffers;
|
||||
}
|
||||
|
||||
if (scope != 0) {
|
||||
/* Issue barrier based on encoder. */
|
||||
switch (active_command_encoder_type_) {
|
||||
case MTL_NO_COMMAND_ENCODER:
|
||||
case MTL_BLIT_COMMAND_ENCODER: {
|
||||
/* No barrier to be inserted. */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Rendering. */
|
||||
case MTL_RENDER_COMMAND_ENCODER: {
|
||||
/* Currently flagging both stages -- can use bits above to filter on stage type --
|
||||
* though full barrier is safe for now. */
|
||||
MTLRenderStages before_stage_flags = 0;
|
||||
MTLRenderStages after_stage_flags = 0;
|
||||
if (before_stages & GPU_BARRIER_STAGE_VERTEX &&
|
||||
!(before_stages & GPU_BARRIER_STAGE_FRAGMENT))
|
||||
{
|
||||
before_stage_flags = before_stage_flags | MTLRenderStageVertex;
|
||||
}
|
||||
if (before_stages & GPU_BARRIER_STAGE_FRAGMENT) {
|
||||
before_stage_flags = before_stage_flags | MTLRenderStageFragment;
|
||||
}
|
||||
if (after_stages & GPU_BARRIER_STAGE_VERTEX) {
|
||||
after_stage_flags = after_stage_flags | MTLRenderStageVertex;
|
||||
}
|
||||
if (after_stages & GPU_BARRIER_STAGE_FRAGMENT) {
|
||||
after_stage_flags = MTLRenderStageFragment;
|
||||
}
|
||||
|
||||
id<MTLRenderCommandEncoder> rec = this->get_active_render_command_encoder();
|
||||
BLI_assert(rec != nil);
|
||||
[rec memoryBarrierWithScope:scope
|
||||
afterStages:after_stage_flags
|
||||
beforeStages:before_stage_flags];
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Compute. */
|
||||
case MTL_COMPUTE_COMMAND_ENCODER: {
|
||||
id<MTLComputeCommandEncoder> rec = this->get_active_compute_command_encoder();
|
||||
BLI_assert(rec != nil);
|
||||
[rec memoryBarrierWithScope:scope];
|
||||
return true;
|
||||
}
|
||||
/* Compute. */
|
||||
case MTL_COMPUTE_COMMAND_ENCODER: {
|
||||
id<MTLComputeCommandEncoder> rec = this->get_active_compute_command_encoder();
|
||||
BLI_assert(rec != nil);
|
||||
[rec memoryBarrierWithScope:scope];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -314,16 +314,13 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
|
||||
MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease];
|
||||
options.languageVersion = MTLLanguageVersion2_2;
|
||||
options.fastMathEnabled = YES;
|
||||
options.preserveInvariance = YES;
|
||||
|
||||
if (@available(macOS 11.00, *)) {
|
||||
options.preserveInvariance = YES;
|
||||
|
||||
/* Raster order groups for tile data in struct require Metal 2.3.
|
||||
* Retaining Metal 2.2. for old shaders to maintain backwards
|
||||
* compatibility for existing features. */
|
||||
if (info->subpass_inputs_.size() > 0) {
|
||||
options.languageVersion = MTLLanguageVersion2_3;
|
||||
}
|
||||
/* Raster order groups for tile data in struct require Metal 2.3.
|
||||
* Retaining Metal 2.2. for old shaders to maintain backwards
|
||||
* compatibility for existing features. */
|
||||
if (info->subpass_inputs_.size() > 0) {
|
||||
options.languageVersion = MTLLanguageVersion2_3;
|
||||
}
|
||||
#if defined(MAC_OS_VERSION_14_0)
|
||||
if (@available(macOS 14.00, *)) {
|
||||
@ -364,14 +361,6 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
|
||||
/* Inject unique context ID to avoid cross-context shader cache collisions.
|
||||
* Required on macOS 11.0. */
|
||||
NSString *source_with_header = source_with_header_a;
|
||||
if (@available(macos 11.0, *)) {
|
||||
/* Pass-through. Availability syntax requirement, expression cannot be negated. */
|
||||
}
|
||||
else {
|
||||
source_with_header = [source_with_header_a
|
||||
stringByAppendingString:[NSString stringWithFormat:@"\n\n#define MTL_CONTEXT_IND %d\n",
|
||||
context_->context_id]];
|
||||
}
|
||||
[source_with_header retain];
|
||||
|
||||
/* Prepare Shader Library. */
|
||||
|
@ -2793,9 +2793,7 @@ std::string MSLGeneratorInterface::generate_msl_vertex_out_struct(ShaderStage sh
|
||||
* by ensuring that vertex position is consistently calculated between subsequent passes
|
||||
* with maximum precision. */
|
||||
out << "\tfloat4 _default_position_ [[position]]";
|
||||
if (@available(macos 11.0, *)) {
|
||||
out << " [[invariant]]";
|
||||
}
|
||||
out << " [[invariant]]";
|
||||
out << ";" << std::endl;
|
||||
}
|
||||
else {
|
||||
@ -2806,9 +2804,7 @@ std::string MSLGeneratorInterface::generate_msl_vertex_out_struct(ShaderStage sh
|
||||
|
||||
/* Use invariance if available. See above for detail. */
|
||||
out << "\tfloat4 " << this->vertex_output_varyings[0].name << " [[position]];";
|
||||
if (@available(macos 11.0, *)) {
|
||||
out << " [[invariant]]";
|
||||
}
|
||||
out << " [[invariant]]";
|
||||
out << ";" << std::endl;
|
||||
first_attr_is_position = true;
|
||||
}
|
||||
|
@ -2345,15 +2345,9 @@ void gpu::MTLTexture::ensure_baked()
|
||||
/* Override storage mode if memoryless attachments are being used.
|
||||
* NOTE: Memoryless textures can only be supported on TBDR GPUs. */
|
||||
if (gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) {
|
||||
if (@available(macOS 11.00, *)) {
|
||||
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
|
||||
if (is_tile_based_arch) {
|
||||
texture_descriptor_.storageMode = MTLStorageModeMemoryless;
|
||||
}
|
||||
}
|
||||
else {
|
||||
MTL_LOG_WARNING(
|
||||
"GPU_TEXTURE_USAGE_MEMORYLESS is not available on macOS versions prior to 11.0");
|
||||
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
|
||||
if (is_tile_based_arch) {
|
||||
texture_descriptor_.storageMode = MTLStorageModeMemoryless;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user