Merge branch 'blender-v4.1-release' into main

This commit is contained in:
Brecht Van Lommel 2024-02-16 19:31:31 +01:00
commit 7453c5ed67
9 changed files with 389 additions and 489 deletions

@ -88,10 +88,8 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
default_storage_mode = MTLResourceStorageModeManaged;
if (@available(macos 11.0, *)) {
if ([mtlDevice hasUnifiedMemory]) {
default_storage_mode = MTLResourceStorageModeShared;
}
if ([mtlDevice hasUnifiedMemory]) {
default_storage_mode = MTLResourceStorageModeShared;
}
switch (device_vendor) {
@ -243,14 +241,13 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
mtlAncillaryArgEncoder = [mtlDevice newArgumentEncoderWithArguments:ancillary_desc];
// preparing the blas arg encoder
if (@available(macos 11.0, *)) {
if (use_metalrt) {
MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init];
arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure;
arg_desc_blas.access = MTLArgumentAccessReadOnly;
mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]];
[arg_desc_blas release];
}
if (use_metalrt) {
MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init];
arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure;
arg_desc_blas.access = MTLArgumentAccessReadOnly;
mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]];
[arg_desc_blas release];
}
for (int i = 0; i < ancillary_desc.count; i++) {
@ -1150,11 +1147,9 @@ void MetalDevice::tex_alloc(device_texture &mem)
}
}
MTLStorageMode storage_mode = MTLStorageModeManaged;
if (@available(macos 10.15, *)) {
/* Intel GPUs don't support MTLStorageModeShared for MTLTextures. */
if ([mtlDevice hasUnifiedMemory] && device_vendor != METAL_GPU_INTEL) {
storage_mode = MTLStorageModeShared;
}
/* Intel GPUs don't support MTLStorageModeShared for MTLTextures. */
if ([mtlDevice hasUnifiedMemory] && device_vendor != METAL_GPU_INTEL) {
storage_mode = MTLStorageModeShared;
}
/* General variables for both architectures */
@ -1332,14 +1327,12 @@ void MetalDevice::tex_alloc(device_texture &mem)
}
}
if (@available(macos 10.14, *)) {
/* Optimize the texture for GPU access. */
id<MTLCommandBuffer> commandBuffer = [mtlGeneralCommandQueue commandBuffer];
id<MTLBlitCommandEncoder> blitCommandEncoder = [commandBuffer blitCommandEncoder];
[blitCommandEncoder optimizeContentsForGPUAccess:mtlTexture];
[blitCommandEncoder endEncoding];
[commandBuffer commit];
}
/* Optimize the texture for GPU access. */
id<MTLCommandBuffer> commandBuffer = [mtlGeneralCommandQueue commandBuffer];
id<MTLBlitCommandEncoder> blitCommandEncoder = [commandBuffer blitCommandEncoder];
[blitCommandEncoder optimizeContentsForGPUAccess:mtlTexture];
[blitCommandEncoder endEncoding];
[commandBuffer commit];
/* Set Mapping and tag that we need to (re-)upload to device */
texture_slot_map[slot] = mtlTexture;
@ -1418,27 +1411,25 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
bvh_metal->motion_blur = motion_blur;
if (bvh_metal->build(progress, mtlDevice, mtlGeneralCommandQueue, refit)) {
if (@available(macos 11.0, *)) {
if (bvh->params.top_level) {
bvhMetalRT = bvh_metal;
if (bvh->params.top_level) {
bvhMetalRT = bvh_metal;
// allocate required buffers for BLAS array
uint64_t count = bvhMetalRT->blas_array.size();
uint64_t bufferSize = mtlBlasArgEncoder.encodedLength * count;
blas_buffer = [mtlDevice newBufferWithLength:bufferSize options:default_storage_mode];
stats.mem_alloc(blas_buffer.allocatedSize);
// allocate required buffers for BLAS array
uint64_t count = bvhMetalRT->blas_array.size();
uint64_t bufferSize = mtlBlasArgEncoder.encodedLength * count;
blas_buffer = [mtlDevice newBufferWithLength:bufferSize options:default_storage_mode];
stats.mem_alloc(blas_buffer.allocatedSize);
for (uint64_t i = 0; i < count; ++i) {
if (bvhMetalRT->blas_array[i]) {
[mtlBlasArgEncoder setArgumentBuffer:blas_buffer
offset:i * mtlBlasArgEncoder.encodedLength];
[mtlBlasArgEncoder setAccelerationStructure:bvhMetalRT->blas_array[i] atIndex:0];
}
}
if (default_storage_mode == MTLResourceStorageModeManaged) {
[blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)];
for (uint64_t i = 0; i < count; ++i) {
if (bvhMetalRT->blas_array[i]) {
[mtlBlasArgEncoder setArgumentBuffer:blas_buffer
offset:i * mtlBlasArgEncoder.encodedLength];
[mtlBlasArgEncoder setAccelerationStructure:bvhMetalRT->blas_array[i] atIndex:0];
}
}
if (default_storage_mode == MTLResourceStorageModeManaged) {
[blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)];
}
}
}

@ -465,19 +465,18 @@ void MetalKernelPipeline::compile()
device_kernel_as_string(device_kernel);
NSError *error = NULL;
if (@available(macOS 11.0, *)) {
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
func_desc.name = [@(function_name.c_str()) copy];
if (pso_type != PSO_GENERIC) {
func_desc.constantValues = GetConstantValues(&kernel_data_);
}
else {
func_desc.constantValues = GetConstantValues();
}
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
func_desc.name = [@(function_name.c_str()) copy];
function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error];
if (pso_type != PSO_GENERIC) {
func_desc.constantValues = GetConstantValues(&kernel_data_);
}
else {
func_desc.constantValues = GetConstantValues();
}
function = [mtlLibrary newFunctionWithDescriptor:func_desc error:&error];
if (function == nil) {
NSString *err = [error localizedDescription];
@ -489,52 +488,50 @@ void MetalKernelPipeline::compile()
function.label = [@(function_name.c_str()) copy];
if (use_metalrt) {
if (@available(macOS 11.0, *)) {
/* create the id<MTLFunction> for each intersection function */
const char *function_names[] = {
"__anyhit__cycles_metalrt_visibility_test_tri",
"__anyhit__cycles_metalrt_visibility_test_box",
"__anyhit__cycles_metalrt_shadow_all_hit_tri",
"__anyhit__cycles_metalrt_shadow_all_hit_box",
"__anyhit__cycles_metalrt_volume_test_tri",
"__anyhit__cycles_metalrt_volume_test_box",
"__anyhit__cycles_metalrt_local_hit_tri",
"__anyhit__cycles_metalrt_local_hit_box",
"__anyhit__cycles_metalrt_local_hit_tri_prim",
"__anyhit__cycles_metalrt_local_hit_box_prim",
"__intersection__curve",
"__intersection__curve_shadow",
"__intersection__point",
"__intersection__point_shadow",
};
assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM);
/* create the id<MTLFunction> for each intersection function */
const char *function_names[] = {
"__anyhit__cycles_metalrt_visibility_test_tri",
"__anyhit__cycles_metalrt_visibility_test_box",
"__anyhit__cycles_metalrt_shadow_all_hit_tri",
"__anyhit__cycles_metalrt_shadow_all_hit_box",
"__anyhit__cycles_metalrt_volume_test_tri",
"__anyhit__cycles_metalrt_volume_test_box",
"__anyhit__cycles_metalrt_local_hit_tri",
"__anyhit__cycles_metalrt_local_hit_box",
"__anyhit__cycles_metalrt_local_hit_tri_prim",
"__anyhit__cycles_metalrt_local_hit_box_prim",
"__intersection__curve",
"__intersection__curve_shadow",
"__intersection__point",
"__intersection__point_shadow",
};
assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM);
MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
for (int i = 0; i < METALRT_FUNC_NUM; i++) {
const char *function_name = function_names[i];
desc.name = [@(function_name) copy];
MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
for (int i = 0; i < METALRT_FUNC_NUM; i++) {
const char *function_name = function_names[i];
desc.name = [@(function_name) copy];
if (pso_type != PSO_GENERIC) {
desc.constantValues = GetConstantValues(&kernel_data_);
}
else {
desc.constantValues = GetConstantValues();
}
NSError *error = NULL;
rt_intersection_function[i] = [mtlLibrary newFunctionWithDescriptor:desc error:&error];
if (rt_intersection_function[i] == nil) {
NSString *err = [error localizedDescription];
string errors = [err UTF8String];
error_str = string_printf(
"Error getting intersection function \"%s\": %s", function_name, errors.c_str());
break;
}
rt_intersection_function[i].label = [@(function_name) copy];
if (pso_type != PSO_GENERIC) {
desc.constantValues = GetConstantValues(&kernel_data_);
}
else {
desc.constantValues = GetConstantValues();
}
NSError *error = NULL;
rt_intersection_function[i] = [mtlLibrary newFunctionWithDescriptor:desc error:&error];
if (rt_intersection_function[i] == nil) {
NSString *err = [error localizedDescription];
string errors = [err UTF8String];
error_str = string_printf(
"Error getting intersection function \"%s\": %s", function_name, errors.c_str());
break;
}
rt_intersection_function[i].label = [@(function_name) copy];
}
}
@ -611,23 +608,19 @@ void MetalKernelPipeline::compile()
computePipelineStateDescriptor.buffers[1].mutability = MTLMutabilityImmutable;
computePipelineStateDescriptor.buffers[2].mutability = MTLMutabilityImmutable;
if (@available(macos 10.14, *)) {
computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = threads_per_threadgroup;
}
computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = threads_per_threadgroup;
computePipelineStateDescriptor.threadGroupSizeIsMultipleOfThreadExecutionWidth = true;
computePipelineStateDescriptor.computeFunction = function;
if (@available(macOS 11.0, *)) {
/* Attach the additional functions to an MTLLinkedFunctions object */
if (linked_functions) {
computePipelineStateDescriptor.linkedFunctions = [[MTLLinkedFunctions alloc] init];
computePipelineStateDescriptor.linkedFunctions.functions = linked_functions;
}
computePipelineStateDescriptor.maxCallStackDepth = 1;
if (use_metalrt) {
computePipelineStateDescriptor.maxCallStackDepth = 8;
}
/* Attach the additional functions to an MTLLinkedFunctions object */
if (linked_functions) {
computePipelineStateDescriptor.linkedFunctions = [[MTLLinkedFunctions alloc] init];
computePipelineStateDescriptor.linkedFunctions.functions = linked_functions;
}
computePipelineStateDescriptor.maxCallStackDepth = 1;
if (use_metalrt) {
computePipelineStateDescriptor.maxCallStackDepth = 8;
}
MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
@ -669,23 +662,21 @@ void MetalKernelPipeline::compile()
loading_existing_archive = path_cache_kernel_exists_and_mark_used(metalbin_path);
creating_new_archive = !loading_existing_archive;
if (@available(macOS 11.0, *)) {
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
if (loading_existing_archive) {
archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
}
NSError *error = nil;
archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:&error];
if (!archive) {
const char *err = error ? [[error localizedDescription] UTF8String] : nullptr;
metal_printf("newBinaryArchiveWithDescriptor failed: %s\n", err ? err : "nil");
}
[archiveDesc release];
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
if (loading_existing_archive) {
archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
}
NSError *error = nil;
archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:&error];
if (!archive) {
const char *err = error ? [[error localizedDescription] UTF8String] : nullptr;
metal_printf("newBinaryArchiveWithDescriptor failed: %s\n", err ? err : "nil");
}
[archiveDesc release];
if (loading_existing_archive) {
pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
}
if (loading_existing_archive) {
pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
}
}
@ -792,19 +783,16 @@ void MetalKernelPipeline::compile()
num_threads_per_block = std::max(num_threads_per_block, (int)pipeline.threadExecutionWidth);
}
if (@available(macOS 11.0, *)) {
if (ShaderCache::running) {
if (creating_new_archive || recreate_archive) {
if (![archive serializeToURL:[NSURL fileURLWithPath:@(metalbin_path.c_str())]
error:&error])
{
metal_printf("Failed to save binary archive to %s, error:\n%s\n",
metalbin_path.c_str(),
[[error localizedDescription] UTF8String]);
}
else {
path_cache_kernel_mark_added_and_clear_old(metalbin_path);
}
if (ShaderCache::running) {
if (creating_new_archive || recreate_archive) {
if (![archive serializeToURL:[NSURL fileURLWithPath:@(metalbin_path.c_str())] error:&error])
{
metal_printf("Failed to save binary archive to %s, error:\n%s\n",
metalbin_path.c_str(),
[[error localizedDescription] UTF8String]);
}
else {
path_cache_kernel_mark_added_and_clear_old(metalbin_path);
}
}
}
@ -815,20 +803,18 @@ void MetalKernelPipeline::compile()
if (use_metalrt && linked_functions) {
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
if (@available(macOS 11.0, *)) {
MTLIntersectionFunctionTableDescriptor *ift_desc =
[[MTLIntersectionFunctionTableDescriptor alloc] init];
ift_desc.functionCount = table_functions[table].count;
intersection_func_table[table] = [this->pipeline
newIntersectionFunctionTableWithDescriptor:ift_desc];
MTLIntersectionFunctionTableDescriptor *ift_desc =
[[MTLIntersectionFunctionTableDescriptor alloc] init];
ift_desc.functionCount = table_functions[table].count;
intersection_func_table[table] = [this->pipeline
newIntersectionFunctionTableWithDescriptor:ift_desc];
/* Finally write the function handles into this pipeline's table */
int size = (int)[table_functions[table] count];
for (int i = 0; i < size; i++) {
id<MTLFunctionHandle> handle = [pipeline
functionHandleWithFunction:table_functions[table][i]];
[intersection_func_table[table] setFunction:handle atIndex:i];
}
/* Finally write the function handles into this pipeline's table */
int size = (int)[table_functions[table] count];
for (int i = 0; i < size; i++) {
id<MTLFunctionHandle> handle = [pipeline
functionHandleWithFunction:table_functions[table][i]];
[intersection_func_table[table] setFunction:handle atIndex:i];
}
}
}

@ -21,38 +21,32 @@ MetalDeviceQueue::MetalDeviceQueue(MetalDevice *device)
: DeviceQueue(device), metal_device_(device), stats_(device->stats)
{
@autoreleasepool {
if (@available(macos 11.0, *)) {
command_buffer_desc_ = [[MTLCommandBufferDescriptor alloc] init];
command_buffer_desc_.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
}
command_buffer_desc_ = [[MTLCommandBufferDescriptor alloc] init];
command_buffer_desc_.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
mtlDevice_ = device->mtlDevice;
mtlCommandQueue_ = device->mtlComputeCommandQueue;
if (@available(macos 10.14, *)) {
shared_event_ = [mtlDevice_ newSharedEvent];
shared_event_id_ = 1;
shared_event_ = [mtlDevice_ newSharedEvent];
shared_event_id_ = 1;
/* Shareable event listener */
event_queue_ = dispatch_queue_create("com.cycles.metal.event_queue", NULL);
shared_event_listener_ = [[MTLSharedEventListener alloc] initWithDispatchQueue:event_queue_];
}
/* Shareable event listener */
event_queue_ = dispatch_queue_create("com.cycles.metal.event_queue", NULL);
shared_event_listener_ = [[MTLSharedEventListener alloc] initWithDispatchQueue:event_queue_];
wait_semaphore_ = dispatch_semaphore_create(0);
if (@available(macos 10.14, *)) {
if (getenv("CYCLES_METAL_PROFILING")) {
/* Enable per-kernel timing breakdown (shown at end of render). */
timing_shared_event_ = [mtlDevice_ newSharedEvent];
label_command_encoders_ = true;
}
if (getenv("CYCLES_METAL_DEBUG")) {
/* Enable very verbose tracing (shows every dispatch). */
verbose_tracing_ = true;
label_command_encoders_ = true;
}
timing_shared_event_id_ = 1;
if (getenv("CYCLES_METAL_PROFILING")) {
/* Enable per-kernel timing breakdown (shown at end of render). */
timing_shared_event_ = [mtlDevice_ newSharedEvent];
label_command_encoders_ = true;
}
if (getenv("CYCLES_METAL_DEBUG")) {
/* Enable very verbose tracing (shows every dispatch). */
verbose_tracing_ = true;
label_command_encoders_ = true;
}
timing_shared_event_id_ = 1;
setup_capture();
}
@ -104,28 +98,26 @@ void MetalDeviceQueue::setup_capture()
label_command_encoders_ = true;
if (auto capture_url = getenv("CYCLES_DEBUG_METAL_CAPTURE_URL")) {
if (@available(macos 10.15, *)) {
if ([captureManager supportsDestination:MTLCaptureDestinationGPUTraceDocument]) {
if ([captureManager supportsDestination:MTLCaptureDestinationGPUTraceDocument]) {
MTLCaptureDescriptor *captureDescriptor = [[MTLCaptureDescriptor alloc] init];
captureDescriptor.captureObject = mtlCaptureScope_;
captureDescriptor.destination = MTLCaptureDestinationGPUTraceDocument;
captureDescriptor.outputURL = [NSURL fileURLWithPath:@(capture_url)];
MTLCaptureDescriptor *captureDescriptor = [[MTLCaptureDescriptor alloc] init];
captureDescriptor.captureObject = mtlCaptureScope_;
captureDescriptor.destination = MTLCaptureDestinationGPUTraceDocument;
captureDescriptor.outputURL = [NSURL fileURLWithPath:@(capture_url)];
NSError *error;
if (![captureManager startCaptureWithDescriptor:captureDescriptor error:&error]) {
NSString *err = [error localizedDescription];
printf("Start capture failed: %s\n", [err UTF8String]);
}
else {
printf("Capture started (URL: %s)\n", capture_url);
is_capturing_to_disk_ = true;
}
NSError *error;
if (![captureManager startCaptureWithDescriptor:captureDescriptor error:&error]) {
NSString *err = [error localizedDescription];
printf("Start capture failed: %s\n", [err UTF8String]);
}
else {
printf("Capture to file is not supported\n");
printf("Capture started (URL: %s)\n", capture_url);
is_capturing_to_disk_ = true;
}
}
else {
printf("Capture to file is not supported\n");
}
}
}
@ -188,13 +180,11 @@ void MetalDeviceQueue::end_capture()
printf("[mtlCaptureScope_ endScope]\n");
if (is_capturing_to_disk_) {
if (@available(macos 10.15, *)) {
[[MTLCaptureManager sharedCaptureManager] stopCapture];
has_captured_to_disk_ = true;
is_capturing_to_disk_ = false;
is_capturing_ = false;
printf("Capture stopped\n");
}
[[MTLCaptureManager sharedCaptureManager] stopCapture];
has_captured_to_disk_ = true;
is_capturing_to_disk_ = false;
is_capturing_ = false;
printf("Capture stopped\n");
}
}
@ -208,14 +198,9 @@ MetalDeviceQueue::~MetalDeviceQueue()
close_compute_encoder();
close_blit_encoder();
if (@available(macos 10.14, *)) {
[shared_event_listener_ release];
[shared_event_ release];
}
if (@available(macos 11.0, *)) {
[command_buffer_desc_ release];
}
[shared_event_listener_ release];
[shared_event_ release];
[command_buffer_desc_ release];
if (mtlCaptureScope_) {
[mtlCaptureScope_ release];
@ -343,10 +328,8 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(kernel);
if (@available(macos 10.14, *)) {
if (timing_shared_event_) {
command_encoder_labels_.push_back({kernel, work_size, timing_shared_event_id_});
}
if (timing_shared_event_) {
command_encoder_labels_.push_back({kernel, work_size, timing_shared_event_id_});
}
/* Determine size requirement for argument buffer. */
@ -395,10 +378,8 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
/* Allocate an argument buffer. */
MTLResourceOptions arg_buffer_options = MTLResourceStorageModeManaged;
if (@available(macOS 11.0, *)) {
if ([mtlDevice_ hasUnifiedMemory]) {
arg_buffer_options = MTLResourceStorageModeShared;
}
if ([mtlDevice_ hasUnifiedMemory]) {
arg_buffer_options = MTLResourceStorageModeShared;
}
id<MTLBuffer> arg_buffer = temp_buffer_pool_.get_buffer(mtlDevice_,
@ -598,27 +579,25 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
threadsPerThreadgroup:size_threads_per_threadgroup];
[mtlCommandBuffer_ addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
/* Enhanced command buffer errors are only available in 11.0+ */
if (@available(macos 11.0, *)) {
string str;
if (command_buffer.status != MTLCommandBufferStatusCompleted) {
str = string_printf("Command buffer not completed. status = %d. ",
int(command_buffer.status));
}
if (command_buffer.error) {
@autoreleasepool {
const char *errCStr = [[NSString stringWithFormat:@"%@", command_buffer.error]
UTF8String];
str += string_printf("(%s.%s):\n%s\n",
kernel_type_as_string(metal_kernel_pso->pso_type),
device_kernel_as_string(kernel),
errCStr);
}
}
if (!str.empty()) {
metal_device_->set_error(str);
/* Enhanced command buffer errors */
string str;
if (command_buffer.status != MTLCommandBufferStatusCompleted) {
str = string_printf("Command buffer not completed. status = %d. ",
int(command_buffer.status));
}
if (command_buffer.error) {
@autoreleasepool {
const char *errCStr = [[NSString stringWithFormat:@"%@", command_buffer.error]
UTF8String];
str += string_printf("(%s.%s):\n%s\n",
kernel_type_as_string(metal_kernel_pso->pso_type),
device_kernel_as_string(kernel),
errCStr);
}
}
if (!str.empty()) {
metal_device_->set_error(str);
}
}];
if (verbose_tracing_ || is_capturing_) {
@ -681,44 +660,40 @@ bool MetalDeviceQueue::synchronize()
if (mtlCommandBuffer_) {
scoped_timer timer;
if (@available(macos 10.14, *)) {
if (timing_shared_event_) {
/* For per-kernel timing, add event handlers to measure & accumulate dispatch times. */
__block double completion_time = 0;
for (uint64_t i = command_buffer_start_timing_id_; i < timing_shared_event_id_; i++) {
[timing_shared_event_
notifyListener:shared_event_listener_
atValue:i
block:^(id<MTLSharedEvent> /*sharedEvent*/, uint64_t value) {
completion_time = timer.get_time() - completion_time;
last_completion_time_ = completion_time;
for (auto label : command_encoder_labels_) {
if (label.timing_id == value) {
TimingStats &stat = timing_stats_[label.kernel];
stat.num_dispatches++;
stat.total_time += completion_time;
stat.total_work_size += label.work_size;
}
if (timing_shared_event_) {
/* For per-kernel timing, add event handlers to measure & accumulate dispatch times. */
__block double completion_time = 0;
for (uint64_t i = command_buffer_start_timing_id_; i < timing_shared_event_id_; i++) {
[timing_shared_event_
notifyListener:shared_event_listener_
atValue:i
block:^(id<MTLSharedEvent> /*sharedEvent*/, uint64_t value) {
completion_time = timer.get_time() - completion_time;
last_completion_time_ = completion_time;
for (auto label : command_encoder_labels_) {
if (label.timing_id == value) {
TimingStats &stat = timing_stats_[label.kernel];
stat.num_dispatches++;
stat.total_time += completion_time;
stat.total_work_size += label.work_size;
}
}];
}
}
}];
}
}
uint64_t shared_event_id_ = this->shared_event_id_++;
if (@available(macos 10.14, *)) {
__block dispatch_semaphore_t block_sema = wait_semaphore_;
[shared_event_ notifyListener:shared_event_listener_
atValue:shared_event_id_
block:^(id<MTLSharedEvent> /*sharedEvent*/, uint64_t /*value*/) {
dispatch_semaphore_signal(block_sema);
}];
__block dispatch_semaphore_t block_sema = wait_semaphore_;
[shared_event_ notifyListener:shared_event_listener_
atValue:shared_event_id_
block:^(id<MTLSharedEvent> /*sharedEvent*/, uint64_t /*value*/) {
dispatch_semaphore_signal(block_sema);
}];
[mtlCommandBuffer_ encodeSignalEvent:shared_event_ value:shared_event_id_];
[mtlCommandBuffer_ commit];
dispatch_semaphore_wait(wait_semaphore_, DISPATCH_TIME_FOREVER);
}
[mtlCommandBuffer_ encodeSignalEvent:shared_event_ value:shared_event_id_];
[mtlCommandBuffer_ commit];
dispatch_semaphore_wait(wait_semaphore_, DISPATCH_TIME_FOREVER);
[mtlCommandBuffer_ release];
@ -897,42 +872,40 @@ id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel
{
bool concurrent = (kernel < DEVICE_KERNEL_INTEGRATOR_NUM);
if (@available(macos 10.14, *)) {
if (timing_shared_event_) {
/* Close the current encoder to ensure we're able to capture per-encoder timing data. */
close_compute_encoder();
}
if (mtlComputeEncoder_) {
if (mtlComputeEncoder_.dispatchType == concurrent ? MTLDispatchTypeConcurrent :
MTLDispatchTypeSerial)
{
/* declare usage of MTLBuffers etc */
prepare_resources(kernel);
return mtlComputeEncoder_;
}
close_compute_encoder();
}
close_blit_encoder();
if (!mtlCommandBuffer_) {
mtlCommandBuffer_ = [mtlCommandQueue_ commandBuffer];
[mtlCommandBuffer_ retain];
}
mtlComputeEncoder_ = [mtlCommandBuffer_
computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent :
MTLDispatchTypeSerial];
[mtlComputeEncoder_ retain];
[mtlComputeEncoder_ setLabel:@(device_kernel_as_string(kernel))];
/* declare usage of MTLBuffers etc */
prepare_resources(kernel);
if (timing_shared_event_) {
/* Close the current encoder to ensure we're able to capture per-encoder timing data. */
close_compute_encoder();
}
if (mtlComputeEncoder_) {
if (mtlComputeEncoder_.dispatchType == concurrent ? MTLDispatchTypeConcurrent :
MTLDispatchTypeSerial)
{
/* declare usage of MTLBuffers etc */
prepare_resources(kernel);
return mtlComputeEncoder_;
}
close_compute_encoder();
}
close_blit_encoder();
if (!mtlCommandBuffer_) {
mtlCommandBuffer_ = [mtlCommandQueue_ commandBuffer];
[mtlCommandBuffer_ retain];
}
mtlComputeEncoder_ = [mtlCommandBuffer_
computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent :
MTLDispatchTypeSerial];
[mtlComputeEncoder_ retain];
[mtlComputeEncoder_ setLabel:@(device_kernel_as_string(kernel))];
/* declare usage of MTLBuffers etc */
prepare_resources(kernel);
return mtlComputeEncoder_;
}
@ -962,10 +935,8 @@ void MetalDeviceQueue::close_compute_encoder()
[mtlComputeEncoder_ release];
mtlComputeEncoder_ = nil;
if (@available(macos 10.14, *)) {
if (timing_shared_event_) {
[mtlCommandBuffer_ encodeSignalEvent:timing_shared_event_ value:timing_shared_event_id_++];
}
if (timing_shared_event_) {
[mtlCommandBuffer_ encodeSignalEvent:timing_shared_event_ value:timing_shared_event_id_++];
}
}
}

@ -890,16 +890,15 @@ GHOST_TSuccess GHOST_SystemCocoa::getPixelAtCursor(float r_color[3]) const
* This behavior could confuse users, especially when trying to pick a color from another app,
* potentially capturing the wallpaper under that app window.
*/
if (@available(macOS 11.0, *)) {
/* Although these methods are documented as available for macOS 10.15, they are not actually
* shipped, leading to a crash if used on macOS 10.15.
*
* Ref: https://developer.apple.com/forums/thread/683860?answerId=684400022#684400022
*/
if (!CGPreflightScreenCaptureAccess()) {
CGRequestScreenCaptureAccess();
return GHOST_kFailure;
}
/* Although these methods are documented as available for macOS 10.15, they are not actually
* shipped, leading to a crash if used on macOS 10.15.
*
* Ref: https://developer.apple.com/forums/thread/683860?answerId=684400022#684400022
*/
if (!CGPreflightScreenCaptureAccess()) {
CGRequestScreenCaptureAccess();
return GHOST_kFailure;
}
CGEventRef event = CGEventCreate(nil);

@ -306,66 +306,50 @@ bool MTLBackend::metal_is_supported()
return false;
}
if (@available(macOS 10.15, *)) {
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
/* Debug: Enable low power GPU with Environment Var: METAL_FORCE_INTEL. */
static const char *forceIntelStr = getenv("METAL_FORCE_INTEL");
bool forceIntel = forceIntelStr ? (atoi(forceIntelStr) != 0) : false;
/* Debug: Enable low power GPU with Environment Var: METAL_FORCE_INTEL. */
static const char *forceIntelStr = getenv("METAL_FORCE_INTEL");
bool forceIntel = forceIntelStr ? (atoi(forceIntelStr) != 0) : false;
if (forceIntel) {
NSArray<id<MTLDevice>> *allDevices = MTLCopyAllDevices();
for (id<MTLDevice> _device in allDevices) {
if (_device.lowPower) {
device = _device;
}
if (forceIntel) {
NSArray<id<MTLDevice>> *allDevices = MTLCopyAllDevices();
for (id<MTLDevice> _device in allDevices) {
if (_device.lowPower) {
device = _device;
}
}
/* If Intel, we must be on macOS 11.2+ for full Metal backend support. */
NSString *gpu_name = [device name];
const char *vendor = [gpu_name UTF8String];
if ((strstr(vendor, "Intel") || strstr(vendor, "INTEL"))) {
if (@available(macOS 11.2, *)) {
/* Intel device supported -- Carry on.
* NOTE: @available syntax cannot be negated. */
}
else {
return false;
}
}
/* Metal Viewport requires argument buffer tier-2 support and Barycentric Coordinates.
* These are available on most hardware configurations supporting Metal 2.2. */
bool supports_argument_buffers_tier2 = ([device argumentBuffersSupport] ==
MTLArgumentBuffersTier2);
bool supports_barycentrics = [device supportsShaderBarycentricCoordinates] ||
supports_barycentric_whitelist(device);
bool supported_metal_version = [device supportsFamily:MTLGPUFamilyMac2];
bool result = supports_argument_buffers_tier2 && supports_barycentrics &&
supported_os_version && supported_metal_version;
if (G.debug & G_DEBUG_GPU) {
if (!supports_argument_buffers_tier2) {
printf("[Metal] Device does not support argument buffers tier 2\n");
}
if (!supports_barycentrics) {
printf("[Metal] Device does not support barycentrics coordinates\n");
}
if (!supported_metal_version) {
printf("[Metal] Device does not support metal 2.2 or higher\n");
}
if (result) {
printf("Device with name %s supports metal minimum requirements\n",
[[device name] UTF8String]);
}
}
return result;
}
return false;
/* Metal Viewport requires argument buffer tier-2 support and Barycentric Coordinates.
* These are available on most hardware configurations supporting Metal 2.2. */
bool supports_argument_buffers_tier2 = ([device argumentBuffersSupport] ==
MTLArgumentBuffersTier2);
bool supports_barycentrics = [device supportsShaderBarycentricCoordinates] ||
supports_barycentric_whitelist(device);
bool supported_metal_version = [device supportsFamily:MTLGPUFamilyMac2];
bool result = supports_argument_buffers_tier2 && supports_barycentrics && supported_os_version &&
supported_metal_version;
if (G.debug & G_DEBUG_GPU) {
if (!supports_argument_buffers_tier2) {
printf("[Metal] Device does not support argument buffers tier 2\n");
}
if (!supports_barycentrics) {
printf("[Metal] Device does not support barycentrics coordinates\n");
}
if (!supported_metal_version) {
printf("[Metal] Device does not support metal 2.2 or higher\n");
}
if (result) {
printf("Device with name %s supports metal minimum requirements\n",
[[device name] UTF8String]);
}
}
return result;
}
void MTLBackend::capabilities_init(MTLContext *ctx)

@ -53,15 +53,13 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin()
BLI_assert(MTLCommandBufferManager::num_active_cmd_bufs <
GHOST_ContextCGL::max_command_buffer_count);
if (@available(macos 11.0, *)) {
if (G.debug & G_DEBUG_GPU) {
/* Debug: Enable Advanced Errors for GPU work execution. */
MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init];
desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
desc.retainedReferences = YES;
BLI_assert(context_.queue != nil);
active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc];
}
if (G.debug & G_DEBUG_GPU) {
/* Debug: Enable Advanced Errors for GPU work execution. */
MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init];
desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
desc.retainedReferences = YES;
BLI_assert(context_.queue != nil);
active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc];
}
/* Ensure command buffer is created if debug command buffer unavailable. */
@ -151,14 +149,12 @@ bool MTLCommandBufferManager::submit(bool wait)
/* Command buffer execution debugging can return an error message if
* execution has failed or encountered GPU-side errors. */
if (@available(macos 11.0, *)) {
if (G.debug & G_DEBUG_GPU) {
if (G.debug & G_DEBUG_GPU) {
NSError *error = [active_command_buffer_ error];
if (error != nil) {
NSLog(@"%@", error);
BLI_assert(false);
}
NSError *error = [active_command_buffer_ error];
if (error != nil) {
NSLog(@"%@", error);
BLI_assert(false);
}
}
}
@ -578,86 +574,80 @@ bool MTLCommandBufferManager::insert_memory_barrier(eGPUBarrier barrier_bits,
eGPUStageBarrierBits before_stages,
eGPUStageBarrierBits after_stages)
{
/* Only supporting Metal on 10.14 onward anyway - Check required for warnings. */
if (@available(macOS 10.14, *)) {
/* Apple Silicon does not support memory barriers for RenderCommandEncoder's.
* We do not currently need these due to implicit API guarantees. However, render->render
* resource dependencies are only evaluated at RenderCommandEncoder boundaries due to work
* execution on TBDR architecture.
*
* NOTE: Render barriers are therefore inherently expensive. Where possible, opt for local
* synchronization using raster order groups, or, prefer compute to avoid subsequent passes
* re-loading pass attachments which are not needed. */
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
if (is_tile_based_arch && (active_command_encoder_type_ != MTL_COMPUTE_COMMAND_ENCODER)) {
if (active_command_encoder_type_ == MTL_RENDER_COMMAND_ENCODER) {
end_active_command_encoder();
return true;
}
return false;
}
/* Apple Silicon does not support memory barriers for RenderCommandEncoder's.
* We do not currently need these due to implicit API guarantees. However, render->render
* resource dependencies are only evaluated at RenderCommandEncoder boundaries due to work
* execution on TBDR architecture.
*
* NOTE: Render barriers are therefore inherently expensive. Where possible, opt for local
* synchronization using raster order groups, or, prefer compute to avoid subsequent passes
* re-loading pass attachments which are not needed. */
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
if (is_tile_based_arch && (active_command_encoder_type_ != MTL_COMPUTE_COMMAND_ENCODER)) {
if (active_command_encoder_type_ == MTL_RENDER_COMMAND_ENCODER) {
end_active_command_encoder();
/* Resolve scope. */
MTLBarrierScope scope = 0;
if (barrier_bits & GPU_BARRIER_SHADER_IMAGE_ACCESS || barrier_bits & GPU_BARRIER_TEXTURE_FETCH) {
bool is_compute = (active_command_encoder_type_ != MTL_RENDER_COMMAND_ENCODER);
scope |= (is_compute ? 0 : MTLBarrierScopeRenderTargets) | MTLBarrierScopeTextures;
}
if (barrier_bits & GPU_BARRIER_SHADER_STORAGE ||
barrier_bits & GPU_BARRIER_VERTEX_ATTRIB_ARRAY || barrier_bits & GPU_BARRIER_ELEMENT_ARRAY ||
barrier_bits & GPU_BARRIER_UNIFORM || barrier_bits & GPU_BARRIER_BUFFER_UPDATE)
{
scope = scope | MTLBarrierScopeBuffers;
}
if (scope != 0) {
/* Issue barrier based on encoder. */
switch (active_command_encoder_type_) {
case MTL_NO_COMMAND_ENCODER:
case MTL_BLIT_COMMAND_ENCODER: {
/* No barrier to be inserted. */
return false;
}
/* Rendering. */
case MTL_RENDER_COMMAND_ENCODER: {
/* Currently flagging both stages -- can use bits above to filter on stage type --
* though full barrier is safe for now. */
MTLRenderStages before_stage_flags = 0;
MTLRenderStages after_stage_flags = 0;
if (before_stages & GPU_BARRIER_STAGE_VERTEX &&
!(before_stages & GPU_BARRIER_STAGE_FRAGMENT))
{
before_stage_flags = before_stage_flags | MTLRenderStageVertex;
}
if (before_stages & GPU_BARRIER_STAGE_FRAGMENT) {
before_stage_flags = before_stage_flags | MTLRenderStageFragment;
}
if (after_stages & GPU_BARRIER_STAGE_VERTEX) {
after_stage_flags = after_stage_flags | MTLRenderStageVertex;
}
if (after_stages & GPU_BARRIER_STAGE_FRAGMENT) {
after_stage_flags = MTLRenderStageFragment;
}
id<MTLRenderCommandEncoder> rec = this->get_active_render_command_encoder();
BLI_assert(rec != nil);
[rec memoryBarrierWithScope:scope
afterStages:after_stage_flags
beforeStages:before_stage_flags];
return true;
}
return false;
}
/* Resolve scope. */
MTLBarrierScope scope = 0;
if (barrier_bits & GPU_BARRIER_SHADER_IMAGE_ACCESS || barrier_bits & GPU_BARRIER_TEXTURE_FETCH)
{
bool is_compute = (active_command_encoder_type_ != MTL_RENDER_COMMAND_ENCODER);
scope |= (is_compute ? 0 : MTLBarrierScopeRenderTargets) | MTLBarrierScopeTextures;
}
if (barrier_bits & GPU_BARRIER_SHADER_STORAGE ||
barrier_bits & GPU_BARRIER_VERTEX_ATTRIB_ARRAY ||
barrier_bits & GPU_BARRIER_ELEMENT_ARRAY || barrier_bits & GPU_BARRIER_UNIFORM ||
barrier_bits & GPU_BARRIER_BUFFER_UPDATE)
{
scope = scope | MTLBarrierScopeBuffers;
}
if (scope != 0) {
/* Issue barrier based on encoder. */
switch (active_command_encoder_type_) {
case MTL_NO_COMMAND_ENCODER:
case MTL_BLIT_COMMAND_ENCODER: {
/* No barrier to be inserted. */
return false;
}
/* Rendering. */
case MTL_RENDER_COMMAND_ENCODER: {
/* Currently flagging both stages -- can use bits above to filter on stage type --
* though full barrier is safe for now. */
MTLRenderStages before_stage_flags = 0;
MTLRenderStages after_stage_flags = 0;
if (before_stages & GPU_BARRIER_STAGE_VERTEX &&
!(before_stages & GPU_BARRIER_STAGE_FRAGMENT))
{
before_stage_flags = before_stage_flags | MTLRenderStageVertex;
}
if (before_stages & GPU_BARRIER_STAGE_FRAGMENT) {
before_stage_flags = before_stage_flags | MTLRenderStageFragment;
}
if (after_stages & GPU_BARRIER_STAGE_VERTEX) {
after_stage_flags = after_stage_flags | MTLRenderStageVertex;
}
if (after_stages & GPU_BARRIER_STAGE_FRAGMENT) {
after_stage_flags = MTLRenderStageFragment;
}
id<MTLRenderCommandEncoder> rec = this->get_active_render_command_encoder();
BLI_assert(rec != nil);
[rec memoryBarrierWithScope:scope
afterStages:after_stage_flags
beforeStages:before_stage_flags];
return true;
}
/* Compute. */
case MTL_COMPUTE_COMMAND_ENCODER: {
id<MTLComputeCommandEncoder> rec = this->get_active_compute_command_encoder();
BLI_assert(rec != nil);
[rec memoryBarrierWithScope:scope];
return true;
}
/* Compute. */
case MTL_COMPUTE_COMMAND_ENCODER: {
id<MTLComputeCommandEncoder> rec = this->get_active_compute_command_encoder();
BLI_assert(rec != nil);
[rec memoryBarrierWithScope:scope];
return true;
}
}
}

@ -314,16 +314,13 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease];
options.languageVersion = MTLLanguageVersion2_2;
options.fastMathEnabled = YES;
options.preserveInvariance = YES;
if (@available(macOS 11.00, *)) {
options.preserveInvariance = YES;
/* Raster order groups for tile data in struct require Metal 2.3.
* Retaining Metal 2.2. for old shaders to maintain backwards
* compatibility for existing features. */
if (info->subpass_inputs_.size() > 0) {
options.languageVersion = MTLLanguageVersion2_3;
}
/* Raster order groups for tile data in struct require Metal 2.3.
* Retaining Metal 2.2. for old shaders to maintain backwards
* compatibility for existing features. */
if (info->subpass_inputs_.size() > 0) {
options.languageVersion = MTLLanguageVersion2_3;
}
#if defined(MAC_OS_VERSION_14_0)
if (@available(macOS 14.00, *)) {
@ -364,14 +361,6 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
/* Inject unique context ID to avoid cross-context shader cache collisions.
* Required on macOS 11.0. */
NSString *source_with_header = source_with_header_a;
if (@available(macos 11.0, *)) {
/* Pass-through. Availability syntax requirement, expression cannot be negated. */
}
else {
source_with_header = [source_with_header_a
stringByAppendingString:[NSString stringWithFormat:@"\n\n#define MTL_CONTEXT_IND %d\n",
context_->context_id]];
}
[source_with_header retain];
/* Prepare Shader Library. */

@ -2793,9 +2793,7 @@ std::string MSLGeneratorInterface::generate_msl_vertex_out_struct(ShaderStage sh
* by ensuring that vertex position is consistently calculated between subsequent passes
* with maximum precision. */
out << "\tfloat4 _default_position_ [[position]]";
if (@available(macos 11.0, *)) {
out << " [[invariant]]";
}
out << " [[invariant]]";
out << ";" << std::endl;
}
else {
@ -2806,9 +2804,7 @@ std::string MSLGeneratorInterface::generate_msl_vertex_out_struct(ShaderStage sh
/* Use invariance if available. See above for detail. */
out << "\tfloat4 " << this->vertex_output_varyings[0].name << " [[position]];";
if (@available(macos 11.0, *)) {
out << " [[invariant]]";
}
out << " [[invariant]]";
out << ";" << std::endl;
first_attr_is_position = true;
}

@ -2345,15 +2345,9 @@ void gpu::MTLTexture::ensure_baked()
/* Override storage mode if memoryless attachments are being used.
* NOTE: Memoryless textures can only be supported on TBDR GPUs. */
if (gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) {
if (@available(macOS 11.00, *)) {
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
if (is_tile_based_arch) {
texture_descriptor_.storageMode = MTLStorageModeMemoryless;
}
}
else {
MTL_LOG_WARNING(
"GPU_TEXTURE_USAGE_MEMORYLESS is not available on macOS versions prior to 11.0");
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
if (is_tile_based_arch) {
texture_descriptor_.storageMode = MTLStorageModeMemoryless;
}
}