From 716c0980d7424dffafe222c5579851b11193301a Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Wed, 1 Jun 2022 04:58:13 -0500 Subject: [PATCH 01/12] VideoBackends: Add Metal renderer --- Source/Core/Core/CMakeLists.txt | 4 + Source/Core/VideoBackends/CMakeLists.txt | 4 + Source/Core/VideoBackends/D3D/D3DRender.cpp | 4 +- Source/Core/VideoBackends/D3D/D3DRender.h | 4 +- .../VideoBackends/D3D12/D3D12Renderer.cpp | 4 +- .../Core/VideoBackends/D3D12/D3D12Renderer.h | 4 +- .../Core/VideoBackends/Metal/CMakeLists.txt | 37 + Source/Core/VideoBackends/Metal/MRCHelpers.h | 81 ++ .../Core/VideoBackends/Metal/MTLBoundingBox.h | 30 + .../VideoBackends/Metal/MTLBoundingBox.mm | 97 ++ Source/Core/VideoBackends/Metal/MTLMain.mm | 157 ++++ .../Core/VideoBackends/Metal/MTLObjectCache.h | 91 ++ .../VideoBackends/Metal/MTLObjectCache.mm | 175 ++++ .../Core/VideoBackends/Metal/MTLPerfQuery.h | 20 + .../Core/VideoBackends/Metal/MTLPerfQuery.mm | 4 + Source/Core/VideoBackends/Metal/MTLPipeline.h | 64 ++ .../Core/VideoBackends/Metal/MTLPipeline.mm | 66 ++ Source/Core/VideoBackends/Metal/MTLRenderer.h | 91 ++ .../Core/VideoBackends/Metal/MTLRenderer.mm | 709 +++++++++++++++ Source/Core/VideoBackends/Metal/MTLShader.h | 30 + .../VideoBackends/Metal/MTLStateTracker.h | 266 ++++++ .../VideoBackends/Metal/MTLStateTracker.mm | 849 ++++++++++++++++++ Source/Core/VideoBackends/Metal/MTLTexture.h | 77 ++ Source/Core/VideoBackends/Metal/MTLTexture.mm | 179 ++++ Source/Core/VideoBackends/Metal/MTLUtil.h | 54 ++ Source/Core/VideoBackends/Metal/MTLUtil.mm | 358 ++++++++ .../VideoBackends/Metal/MTLVertexManager.h | 30 + .../VideoBackends/Metal/MTLVertexManager.mm | 89 ++ .../Core/VideoBackends/Metal/VideoBackend.h | 27 + Source/Core/VideoBackends/OGL/OGLRender.cpp | 4 +- Source/Core/VideoBackends/OGL/OGLRender.h | 4 +- .../Core/VideoBackends/Vulkan/VKRenderer.cpp | 4 +- Source/Core/VideoBackends/Vulkan/VKRenderer.h | 4 +- .../Core/VideoCommon/FramebufferShaderGen.cpp | 5 + Source/Core/VideoCommon/RenderBase.h | 4 +- Source/Core/VideoCommon/ShaderGenCommon.cpp | 3 + Source/Core/VideoCommon/Spirv.cpp | 2 +- Source/Core/VideoCommon/TextureCacheBase.cpp | 3 +- .../VideoCommon/TextureConversionShader.cpp | 116 ++- Source/Core/VideoCommon/UberShaderPixel.cpp | 2 +- Source/Core/VideoCommon/VideoBackendBase.cpp | 4 + Source/Core/VideoCommon/VideoCommon.h | 1 + 42 files changed, 3714 insertions(+), 47 deletions(-) create mode 100644 Source/Core/VideoBackends/Metal/CMakeLists.txt create mode 100644 Source/Core/VideoBackends/Metal/MRCHelpers.h create mode 100644 Source/Core/VideoBackends/Metal/MTLBoundingBox.h create mode 100644 Source/Core/VideoBackends/Metal/MTLBoundingBox.mm create mode 100644 Source/Core/VideoBackends/Metal/MTLMain.mm create mode 100644 Source/Core/VideoBackends/Metal/MTLObjectCache.h create mode 100644 Source/Core/VideoBackends/Metal/MTLObjectCache.mm create mode 100644 Source/Core/VideoBackends/Metal/MTLPerfQuery.h create mode 100644 Source/Core/VideoBackends/Metal/MTLPerfQuery.mm create mode 100644 Source/Core/VideoBackends/Metal/MTLPipeline.h create mode 100644 Source/Core/VideoBackends/Metal/MTLPipeline.mm create mode 100644 Source/Core/VideoBackends/Metal/MTLRenderer.h create mode 100644 Source/Core/VideoBackends/Metal/MTLRenderer.mm create mode 100644 Source/Core/VideoBackends/Metal/MTLShader.h create mode 100644 Source/Core/VideoBackends/Metal/MTLStateTracker.h create mode 100644 Source/Core/VideoBackends/Metal/MTLStateTracker.mm create mode 100644 Source/Core/VideoBackends/Metal/MTLTexture.h create mode 100644 Source/Core/VideoBackends/Metal/MTLTexture.mm create mode 100644 Source/Core/VideoBackends/Metal/MTLUtil.h create mode 100644 Source/Core/VideoBackends/Metal/MTLUtil.mm create mode 100644 Source/Core/VideoBackends/Metal/MTLVertexManager.h create mode 100644 Source/Core/VideoBackends/Metal/MTLVertexManager.mm create mode 100644 Source/Core/VideoBackends/Metal/VideoBackend.h diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 99a143f2bd..f3a6d722e2 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -631,6 +631,10 @@ if(ENABLE_VULKAN) target_link_libraries(core PUBLIC videovulkan) endif() +if(APPLE) + target_link_libraries(core PUBLIC videometal) +endif() + if(USE_MGBA) target_sources(core PRIVATE HW/GBACore.cpp diff --git a/Source/Core/VideoBackends/CMakeLists.txt b/Source/Core/VideoBackends/CMakeLists.txt index 3ab2d4c4e4..55b44f8c5d 100644 --- a/Source/Core/VideoBackends/CMakeLists.txt +++ b/Source/Core/VideoBackends/CMakeLists.txt @@ -8,6 +8,10 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows") add_subdirectory(D3D12) endif() +if(APPLE) + add_subdirectory(Metal) +endif() + if(ENABLE_VULKAN) add_subdirectory(Vulkan) endif() diff --git a/Source/Core/VideoBackends/D3D/D3DRender.cpp b/Source/Core/VideoBackends/D3D/D3DRender.cpp index 24d7526110..988f9fb6ad 100644 --- a/Source/Core/VideoBackends/D3D/D3DRender.cpp +++ b/Source/Core/VideoBackends/D3D/D3DRender.cpp @@ -150,8 +150,8 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) D3D::context->DrawIndexed(num_indices, base_index, base_vertex); } -void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) { D3D::stateman->SetComputeShader(static_cast(shader)->GetD3DComputeShader()); D3D::stateman->SyncComputeBindings(); diff --git a/Source/Core/VideoBackends/D3D/D3DRender.h b/Source/Core/VideoBackends/D3D/D3DRender.h index ba247e3ec8..a0d25bc270 100644 --- a/Source/Core/VideoBackends/D3D/D3DRender.h +++ b/Source/Core/VideoBackends/D3D/D3DRender.h @@ -57,8 +57,8 @@ public: float far_depth) override; void Draw(u32 base_vertex, u32 num_vertices) override; void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; - void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override; void BindBackbuffer(const ClearColor& clear_color = {}) override; void PresentBackbuffer() override; void SetFullscreen(bool enable_fullscreen) override; diff --git a/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp b/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp index d4eb207a45..b1cb3378ed 100644 --- a/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp +++ b/Source/Core/VideoBackends/D3D12/D3D12Renderer.cpp @@ -365,8 +365,8 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) g_dx_context->GetCommandList()->DrawIndexedInstanced(num_indices, 1, base_index, base_vertex, 0); } -void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) { SetRootSignatures(); SetDescriptorHeaps(); diff --git a/Source/Core/VideoBackends/D3D12/D3D12Renderer.h b/Source/Core/VideoBackends/D3D12/D3D12Renderer.h index eaf4544022..9624199aa4 100644 --- a/Source/Core/VideoBackends/D3D12/D3D12Renderer.h +++ b/Source/Core/VideoBackends/D3D12/D3D12Renderer.h @@ -69,8 +69,8 @@ public: float far_depth) override; void Draw(u32 base_vertex, u32 num_vertices) override; void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; - void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override; void BindBackbuffer(const ClearColor& clear_color = {}) override; void PresentBackbuffer() override; diff --git a/Source/Core/VideoBackends/Metal/CMakeLists.txt b/Source/Core/VideoBackends/Metal/CMakeLists.txt new file mode 100644 index 0000000000..a3becc1d2e --- /dev/null +++ b/Source/Core/VideoBackends/Metal/CMakeLists.txt @@ -0,0 +1,37 @@ +add_library(videometal + MRCHelpers.h + MTLBoundingBox.mm + MTLBoundingBox.h + MTLMain.mm + MTLObjectCache.h + MTLObjectCache.mm + MTLPerfQuery.mm + MTLPerfQuery.h + MTLPipeline.mm + MTLPipeline.h + MTLRenderer.mm + MTLRenderer.h + MTLShader.h + MTLStateTracker.mm + MTLStateTracker.h + MTLTexture.mm + MTLTexture.h + MTLUtil.mm + MTLUtil.h + MTLVertexManager.mm + MTLVertexManager.h + VideoBackend.h +) + +find_library(METAL_LIBRARY Metal) +find_library(QUARTZCORE_LIBRARY QuartzCore) + +target_link_libraries(videometal +PUBLIC + common + videocommon +PRIVATE + spirv_cross + ${METAL_LIBRARY} + ${QUARTZCORE_LIBRARY} +) diff --git a/Source/Core/VideoBackends/Metal/MRCHelpers.h b/Source/Core/VideoBackends/Metal/MRCHelpers.h new file mode 100644 index 0000000000..1da2e0c8d1 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MRCHelpers.h @@ -0,0 +1,81 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +// clang-format off +#ifndef __OBJC__ + #error This header is for use with Objective-C++ only. +#endif +#if __has_feature(objc_arc) + #error This file is for manual reference counting! Compile without -fobjc-arc +#endif +// clang-format on + +#pragma once + +#include +#include + +/// Managed Obj-C pointer +template +class MRCOwned +{ + T ptr; + MRCOwned(T ptr) : ptr(ptr) {} + +public: + MRCOwned() : ptr(nullptr) {} + MRCOwned(std::nullptr_t) : ptr(nullptr) {} + MRCOwned(MRCOwned&& other) : ptr(other.ptr) { other.ptr = nullptr; } + MRCOwned(const MRCOwned& other) : ptr(other.ptr) { [ptr retain]; } + ~MRCOwned() + { + if (ptr) + [ptr release]; + } + operator T() const { return ptr; } + MRCOwned& operator=(const MRCOwned& other) + { + [other.ptr retain]; + if (ptr) + [ptr release]; + ptr = other.ptr; + return *this; + } + MRCOwned& operator=(MRCOwned&& other) + { + std::swap(ptr, other.ptr); + return *this; + } + void Reset() + { + [ptr release]; + ptr = nullptr; + } + T Disown() + { + T tmp = ptr; + ptr = nullptr; + return tmp; + } + T Get() const { return ptr; } + static MRCOwned Transfer(T ptr) { return MRCOwned(ptr); } + static MRCOwned Retain(T ptr) + { + [ptr retain]; + return MRCOwned(ptr); + } +}; + +/// Take ownership of an Obj-C pointer (equivalent to __bridge_transfer) +template +static inline MRCOwned MRCTransfer(T ptr) +{ + return MRCOwned::Transfer(ptr); +} + +/// Retain an Obj-C pointer (equivalent to __bridge) +template +static inline MRCOwned MRCRetain(T ptr) +{ + return MRCOwned::Retain(ptr); +} diff --git a/Source/Core/VideoBackends/Metal/MTLBoundingBox.h b/Source/Core/VideoBackends/Metal/MTLBoundingBox.h new file mode 100644 index 0000000000..d4a67389a2 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLBoundingBox.h @@ -0,0 +1,30 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoCommon/BoundingBox.h" + +#include + +#include "VideoBackends/Metal/MRCHelpers.h" + +namespace Metal +{ +class BoundingBox final : public ::BoundingBox +{ +public: + ~BoundingBox() override; + + bool Initialize() override; + +protected: + std::vector Read(u32 index, u32 length) override; + void Write(u32 index, const std::vector& values) override; + +private: + BBoxType* m_cpu_buffer_ptr; + MRCOwned> m_download_fence; + MRCOwned> m_upload_fence; + MRCOwned> m_cpu_buffer; + MRCOwned> m_gpu_buffer; +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLBoundingBox.mm b/Source/Core/VideoBackends/Metal/MTLBoundingBox.mm new file mode 100644 index 0000000000..89c446b040 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLBoundingBox.mm @@ -0,0 +1,97 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLBoundingBox.h" + +#include "VideoBackends/Metal/MTLObjectCache.h" +#include "VideoBackends/Metal/MTLStateTracker.h" + +static constexpr size_t BUFFER_SIZE = sizeof(BBoxType) * NUM_BBOX_VALUES; + +Metal::BoundingBox::~BoundingBox() +{ + if (g_state_tracker) + g_state_tracker->SetBBoxBuffer(nullptr, nullptr, nullptr); +} + +bool Metal::BoundingBox::Initialize() +{ + const MTLResourceOptions gpu_storage_mode = + g_features.unified_memory ? MTLResourceStorageModeShared : MTLResourceStorageModePrivate; + const MTLResourceOptions gpu_options = gpu_storage_mode | MTLResourceHazardTrackingModeUntracked; + const id dev = g_device; + m_upload_fence = MRCTransfer([dev newFence]); + [m_upload_fence setLabel:@"BBox Upload Fence"]; + m_download_fence = MRCTransfer([dev newFence]); + [m_download_fence setLabel:@"BBox Download Fence"]; + m_gpu_buffer = MRCTransfer([dev newBufferWithLength:BUFFER_SIZE options:gpu_options]); + if (g_features.unified_memory) + { + [m_gpu_buffer setLabel:@"BBox Buffer"]; + m_cpu_buffer_ptr = static_cast([m_gpu_buffer contents]); + } + else + { + m_cpu_buffer = MRCTransfer([dev newBufferWithLength:BUFFER_SIZE + options:MTLResourceStorageModeShared]); + m_cpu_buffer_ptr = static_cast([m_cpu_buffer contents]); + [m_gpu_buffer setLabel:@"BBox GPU Buffer"]; + [m_cpu_buffer setLabel:@"BBox CPU Buffer"]; + } + g_state_tracker->SetBBoxBuffer(m_gpu_buffer, m_upload_fence, m_download_fence); + return true; +} + +std::vector Metal::BoundingBox::Read(u32 index, u32 length) +{ + @autoreleasepool + { + g_state_tracker->EndRenderPass(); + if (!g_features.unified_memory) + { + id download = [g_state_tracker->GetRenderCmdBuf() blitCommandEncoder]; + [download setLabel:@"BBox Download"]; + [download waitForFence:m_download_fence]; + [download copyFromBuffer:m_gpu_buffer + sourceOffset:0 + toBuffer:m_cpu_buffer + destinationOffset:0 + size:BUFFER_SIZE]; + [download endEncoding]; + } + g_state_tracker->FlushEncoders(); + g_state_tracker->WaitForFlushedEncoders(); + return std::vector(m_cpu_buffer_ptr + index, m_cpu_buffer_ptr + index + length); + } +} + +void Metal::BoundingBox::Write(u32 index, const std::vector& values) +{ + const u32 size = values.size() * sizeof(BBoxType); + if (g_features.unified_memory && !g_state_tracker->HasUnflushedData() && + !g_state_tracker->GPUBusy()) + { + // We can just write directly to the buffer! + memcpy(m_cpu_buffer_ptr + index, values.data(), size); + } + else + { + @autoreleasepool + { + StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::Other, size, + StateTracker::AlignMask::Other); + memcpy(map.cpu_buffer, values.data(), size); + g_state_tracker->EndRenderPass(); + id upload = [g_state_tracker->GetRenderCmdBuf() blitCommandEncoder]; + [upload setLabel:@"BBox Upload"]; + [upload waitForFence:m_download_fence]; + [upload copyFromBuffer:map.gpu_buffer + sourceOffset:map.gpu_offset + toBuffer:m_gpu_buffer + destinationOffset:index * sizeof(BBoxType) + size:size]; + [upload updateFence:m_upload_fence]; + [upload endEncoding]; + } + } +} diff --git a/Source/Core/VideoBackends/Metal/MTLMain.mm b/Source/Core/VideoBackends/Metal/MTLMain.mm new file mode 100644 index 0000000000..2b54d93784 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLMain.mm @@ -0,0 +1,157 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/VideoBackend.h" + +#include +#include +#include + +#include "Common/Common.h" +#include "Common/MsgHandler.h" + +#include "VideoBackends/Metal/MTLObjectCache.h" +#include "VideoBackends/Metal/MTLPerfQuery.h" +#include "VideoBackends/Metal/MTLRenderer.h" +#include "VideoBackends/Metal/MTLStateTracker.h" +#include "VideoBackends/Metal/MTLUtil.h" +#include "VideoBackends/Metal/MTLVertexManager.h" + +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/VideoCommon.h" +#include "VideoCommon/VideoConfig.h" + +std::string Metal::VideoBackend::GetName() const +{ + return NAME; +} + +std::string Metal::VideoBackend::GetDisplayName() const +{ + // i18n: Apple's Metal graphics API (https://developer.apple.com/metal/) + return _trans("Metal"); +} + +std::optional Metal::VideoBackend::GetWarningMessage() const +{ + if (Util::GetAdapterList().empty()) + { + return _trans("No Metal-compatible GPUs were found. " + "Use the OpenGL backend or upgrade your computer/GPU"); + } + + return std::nullopt; +} + +static bool WindowSystemTypeSupportsMetal(WindowSystemType type) +{ + switch (type) + { + case WindowSystemType::MacOS: + return true; + default: + return false; + } +} + +bool Metal::VideoBackend::Initialize(const WindowSystemInfo& wsi) +{ + @autoreleasepool + { + if (!WindowSystemTypeSupportsMetal(wsi.type) || !wsi.render_surface) + { + PanicAlertFmt("Bad WindowSystemInfo for Metal renderer."); + return false; + } + + auto devs = Util::GetAdapterList(); + if (devs.empty()) + { + PanicAlertFmt("No Metal GPUs detected."); + return false; + } + + Util::PopulateBackendInfo(&g_Config); + Util::PopulateBackendInfoAdapters(&g_Config, devs); + + // Since we haven't called InitializeShared yet, iAdapter may be out of range, + // so we have to check it ourselves. + size_t selected_adapter_index = static_cast(g_Config.iAdapter); + if (selected_adapter_index >= devs.size()) + { + WARN_LOG_FMT(VIDEO, "Metal adapter index out of range, selecting default adapter."); + selected_adapter_index = 0; + } + MRCOwned> adapter = std::move(devs[selected_adapter_index]); + Util::PopulateBackendInfoFeatures(&g_Config, adapter); + + // With the backend information populated, we can now initialize videocommon. + InitializeShared(); + + MRCOwned layer = MRCRetain(static_cast(wsi.render_surface)); + [layer setDevice:adapter]; + if (Util::ToAbstract([layer pixelFormat]) == AbstractTextureFormat::Undefined) + [layer setPixelFormat:MTLPixelFormatBGRA8Unorm]; + CGSize size = [layer bounds].size; + float scale = [layer contentsScale]; + + ObjectCache::Initialize(std::move(adapter)); + g_state_tracker = std::make_unique(); + g_renderer = std::make_unique(std::move(layer), size.width * scale, + size.height * scale, scale); + g_vertex_manager = std::make_unique(); + g_perf_query = std::make_unique(); + g_framebuffer_manager = std::make_unique(); + g_texture_cache = std::make_unique(); + g_shader_cache = std::make_unique(); + + if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() || + !g_renderer->Initialize() || !g_framebuffer_manager->Initialize() || + !g_texture_cache->Initialize()) + { + PanicAlertFmt("Failed to initialize renderer classes"); + Shutdown(); + return false; + } + + g_shader_cache->InitializeShaderCache(); + + return true; + } +} + +void Metal::VideoBackend::Shutdown() +{ + g_shader_cache->Shutdown(); + g_renderer->Shutdown(); + + g_shader_cache.reset(); + g_texture_cache.reset(); + g_framebuffer_manager.reset(); + g_perf_query.reset(); + g_vertex_manager.reset(); + g_renderer.reset(); + g_state_tracker.reset(); + ObjectCache::Shutdown(); + ShutdownShared(); +} + +void Metal::VideoBackend::InitBackendInfo() +{ + @autoreleasepool + { + Util::PopulateBackendInfo(&g_Config); + Util::PopulateBackendInfoAdapters(&g_Config, Util::GetAdapterList()); + } +} + +void Metal::VideoBackend::PrepareWindow(WindowSystemInfo& wsi) +{ + if (wsi.type != WindowSystemType::MacOS) + return; + NSView* view = static_cast(wsi.render_surface); + CAMetalLayer* layer = [CAMetalLayer layer]; + [view setWantsLayer:YES]; + [view setLayer:layer]; + wsi.render_surface = layer; +} diff --git a/Source/Core/VideoBackends/Metal/MTLObjectCache.h b/Source/Core/VideoBackends/Metal/MTLObjectCache.h new file mode 100644 index 0000000000..d8a10d2d51 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLObjectCache.h @@ -0,0 +1,91 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "VideoBackends/Metal/MRCHelpers.h" + +#include "VideoCommon/RenderState.h" + +namespace Metal +{ +extern MRCOwned> g_device; +extern MRCOwned> g_queue; + +struct DepthStencilSelector +{ + u8 value; + + DepthStencilSelector() : value(0) {} + DepthStencilSelector(bool update_enable, enum CompareMode cmp) + : value(update_enable | (static_cast(cmp) << 1)) + { + } + DepthStencilSelector(DepthState state) + : DepthStencilSelector(state.testenable ? state.updateenable : false, + state.testenable ? state.func : CompareMode::Always) + { + } + + bool UpdateEnable() const { return value & 1; } + enum CompareMode CompareMode() const { return static_cast(value >> 1); } + + bool operator==(const DepthStencilSelector& other) { return value == other.value; } + bool operator!=(const DepthStencilSelector& other) { return !(*this == other); } + static constexpr size_t N_VALUES = 1 << 4; +}; + +struct SamplerSelector +{ + u8 value; + SamplerSelector() : value(0) {} + SamplerSelector(SamplerState state) + { + value = (static_cast(state.tm0.min_filter.Value()) << 0) | + (static_cast(state.tm0.mag_filter.Value()) << 1) | + (static_cast(state.tm0.mipmap_filter.Value()) << 2) | + (static_cast(state.tm0.anisotropic_filtering) << 3); + value |= (static_cast(state.tm0.wrap_u.Value()) + + 3 * static_cast(state.tm0.wrap_v.Value())) + << 4; + } + FilterMode MinFilter() const { return static_cast(value & 1); } + FilterMode MagFilter() const { return static_cast((value >> 1) & 1); } + FilterMode MipFilter() const { return static_cast((value >> 2) & 1); } + WrapMode WrapU() const { return static_cast((value >> 4) % 3); } + WrapMode WrapV() const { return static_cast((value >> 4) / 3); } + bool AnisotropicFiltering() const { return ((value >> 3) & 1); } + + bool operator==(const SamplerSelector& other) { return value == other.value; } + bool operator!=(const SamplerSelector& other) { return !(*this == other); } + static constexpr size_t N_VALUES = (1 << 4) * 9; +}; + +class ObjectCache +{ + ObjectCache(); + +public: + ~ObjectCache(); + + static void Initialize(MRCOwned> device); + static void Shutdown(); + + id GetDepthStencil(DepthStencilSelector sel) { return m_dss[sel.value]; } + + id GetSampler(SamplerSelector sel) { return m_samplers[sel.value]; } + + id GetSampler(SamplerState state) { return GetSampler(SamplerSelector(state)); } + + void ReloadSamplers(); + +private: + MRCOwned> m_dss[DepthStencilSelector::N_VALUES]; + MRCOwned> m_samplers[SamplerSelector::N_VALUES]; +}; + +extern std::unique_ptr g_object_cache; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLObjectCache.mm b/Source/Core/VideoBackends/Metal/MTLObjectCache.mm new file mode 100644 index 0000000000..22809f5937 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLObjectCache.mm @@ -0,0 +1,175 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLObjectCache.h" + +#include "VideoCommon/VideoConfig.h" + +MRCOwned> Metal::g_device; +MRCOwned> Metal::g_queue; +std::unique_ptr Metal::g_object_cache; + +static void SetupDepthStencil( + MRCOwned> (&dss)[Metal::DepthStencilSelector::N_VALUES]); +static void SetupSamplers(MRCOwned> (&samplers)[Metal::SamplerSelector::N_VALUES]); + +Metal::ObjectCache::ObjectCache() +{ + SetupDepthStencil(m_dss); + SetupSamplers(m_samplers); +} + +Metal::ObjectCache::~ObjectCache() +{ +} + +void Metal::ObjectCache::Initialize(MRCOwned> device) +{ + g_device = std::move(device); + g_queue = MRCTransfer([g_device newCommandQueue]); + g_object_cache = std::unique_ptr(new ObjectCache); +} + +void Metal::ObjectCache::Shutdown() +{ + g_object_cache.reset(); + g_queue = nullptr; + g_device = nullptr; +} + +// MARK: Depth Stencil State + +// clang-format off + +static MTLCompareFunction Convert(CompareMode mode) +{ + const bool invert_depth = !g_Config.backend_info.bSupportsReversedDepthRange; + switch (mode) + { + case CompareMode::Never: return MTLCompareFunctionNever; + case CompareMode::Less: return invert_depth ? MTLCompareFunctionGreater + : MTLCompareFunctionLess; + case CompareMode::Equal: return MTLCompareFunctionEqual; + case CompareMode::LEqual: return invert_depth ? MTLCompareFunctionGreaterEqual + : MTLCompareFunctionLessEqual; + case CompareMode::Greater: return invert_depth ? MTLCompareFunctionLess + : MTLCompareFunctionGreater; + case CompareMode::NEqual: return MTLCompareFunctionNotEqual; + case CompareMode::GEqual: return invert_depth ? MTLCompareFunctionLessEqual + : MTLCompareFunctionGreaterEqual; + case CompareMode::Always: return MTLCompareFunctionAlways; + } +} + +static const char* to_string(MTLCompareFunction compare) +{ + switch (compare) + { + case MTLCompareFunctionNever: return "Never"; + case MTLCompareFunctionGreater: return "Greater"; + case MTLCompareFunctionEqual: return "Equal"; + case MTLCompareFunctionGreaterEqual: return "GEqual"; + case MTLCompareFunctionLess: return "Less"; + case MTLCompareFunctionNotEqual: return "NEqual"; + case MTLCompareFunctionLessEqual: return "LEqual"; + case MTLCompareFunctionAlways: return "Always"; + } +} + +// clang-format on + +static void SetupDepthStencil( + MRCOwned> (&dss)[Metal::DepthStencilSelector::N_VALUES]) +{ + auto desc = MRCTransfer([MTLDepthStencilDescriptor new]); + Metal::DepthStencilSelector sel; + for (size_t i = 0; i < std::size(dss); ++i) + { + sel.value = i; + MTLCompareFunction mcompare = Convert(sel.CompareMode()); + [desc setDepthWriteEnabled:sel.UpdateEnable()]; + [desc setDepthCompareFunction:mcompare]; + [desc setLabel:[NSString stringWithFormat:@"DSS %s%s", to_string(mcompare), + sel.UpdateEnable() ? "+Write" : ""]]; + dss[i] = MRCTransfer([Metal::g_device newDepthStencilStateWithDescriptor:desc]); + } +} + +// MARK: Samplers + +// clang-format off + +static MTLSamplerMinMagFilter ConvertMinMag(FilterMode filter) +{ + switch (filter) + { + case FilterMode::Linear: return MTLSamplerMinMagFilterLinear; + case FilterMode::Near: return MTLSamplerMinMagFilterNearest; + } +} + +static MTLSamplerMipFilter ConvertMip(FilterMode filter) +{ + switch (filter) + { + case FilterMode::Linear: return MTLSamplerMipFilterLinear; + case FilterMode::Near: return MTLSamplerMipFilterNearest; + } +} + +static MTLSamplerAddressMode Convert(WrapMode wrap) +{ + switch (wrap) + { + case WrapMode::Clamp: return MTLSamplerAddressModeClampToEdge; + case WrapMode::Mirror: return MTLSamplerAddressModeMirrorRepeat; + case WrapMode::Repeat: return MTLSamplerAddressModeRepeat; + } +} + +static const char* to_string(FilterMode filter) +{ + switch (filter) + { + case FilterMode::Linear: return "Ln"; + case FilterMode::Near: return "Pt"; + } +} +static const char* to_string(WrapMode wrap) +{ + switch (wrap) + { + case WrapMode::Clamp: return "C"; + case WrapMode::Mirror: return "M"; + case WrapMode::Repeat: return "R"; + } +} + +// clang-format on + +static void SetupSamplers(MRCOwned> (&samplers)[Metal::SamplerSelector::N_VALUES]) +{ + auto desc = MRCTransfer([MTLSamplerDescriptor new]); + Metal::SamplerSelector sel; + for (size_t i = 0; i < std::size(samplers); i++) + { + sel.value = i; + [desc setMinFilter:ConvertMinMag(sel.MinFilter())]; + [desc setMagFilter:ConvertMinMag(sel.MagFilter())]; + [desc setMipFilter:ConvertMip(sel.MipFilter())]; + [desc setSAddressMode:Convert(sel.WrapU())]; + [desc setTAddressMode:Convert(sel.WrapV())]; + [desc setMaxAnisotropy:1 << (sel.AnisotropicFiltering() ? g_ActiveConfig.iMaxAnisotropy : 0)]; + [desc setLabel:MRCTransfer([[NSString alloc] + initWithFormat:@"%s%s%s %s%s%s", to_string(sel.MinFilter()), + to_string(sel.MagFilter()), to_string(sel.MipFilter()), + to_string(sel.WrapU()), to_string(sel.WrapV()), + sel.AnisotropicFiltering() ? "(AF)" : ""])]; + samplers[i] = MRCTransfer([Metal::g_device newSamplerStateWithDescriptor:desc]); + } +} + +void Metal::ObjectCache::ReloadSamplers() +{ + SetupSamplers(m_samplers); +} diff --git a/Source/Core/VideoBackends/Metal/MTLPerfQuery.h b/Source/Core/VideoBackends/Metal/MTLPerfQuery.h new file mode 100644 index 0000000000..793cf8cec5 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLPerfQuery.h @@ -0,0 +1,20 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "VideoCommon/PerfQueryBase.h" + +namespace Metal +{ +class PerfQuery final : public PerfQueryBase +{ +public: + void EnableQuery(PerfQueryGroup type) override {} + void DisableQuery(PerfQueryGroup type) override {} + void ResetQuery() override {} + u32 GetQueryResult(PerfQueryType type) override { return 0; } + void FlushResults() override {} + bool IsFlushed() const override { return true; } +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm b/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm new file mode 100644 index 0000000000..2892bdc747 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm @@ -0,0 +1,4 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLPerfQuery.h" diff --git a/Source/Core/VideoBackends/Metal/MTLPipeline.h b/Source/Core/VideoBackends/Metal/MTLPipeline.h new file mode 100644 index 0000000000..d5a642aedd --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLPipeline.h @@ -0,0 +1,64 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "VideoBackends/Metal/MRCHelpers.h" +#include "VideoBackends/Metal/MTLObjectCache.h" +#include "VideoBackends/Metal/MTLShader.h" + +#include "VideoCommon/AbstractPipeline.h" +#include "VideoCommon/AbstractShader.h" + +namespace Metal +{ +class Pipeline final : public AbstractPipeline +{ +public: + explicit Pipeline(MRCOwned> pipeline, + MTLRenderPipelineReflection* reflection, MTLPrimitiveType prim, + MTLCullMode cull, DepthState depth, AbstractPipelineUsage usage); + + id Get() const { return m_pipeline; } + MTLPrimitiveType Prim() const { return m_prim; } + MTLCullMode Cull() const { return m_cull; } + DepthStencilSelector DepthStencil() const { return m_depth_stencil; } + AbstractPipelineUsage Usage() const { return m_usage; } + u32 GetTextures() const { return m_textures; } + u32 GetSamplers() const { return m_samplers; } + u32 GetVertexBuffers() const { return m_vertex_buffers; } + u32 GetFragmentBuffers() const { return m_fragment_buffers; } + bool UsesVertexBuffer(u32 index) const { return m_vertex_buffers & (1 << index); } + bool UsesFragmentBuffer(u32 index) const { return m_fragment_buffers & (1 << index); } + +private: + MRCOwned> m_pipeline; + MTLPrimitiveType m_prim; + MTLCullMode m_cull; + DepthStencilSelector m_depth_stencil; + AbstractPipelineUsage m_usage; + u32 m_textures = 0; + u32 m_samplers = 0; + u32 m_vertex_buffers = 0; + u32 m_fragment_buffers = 0; +}; + +class ComputePipeline : public Shader +{ +public: + explicit ComputePipeline(ShaderStage stage, MTLComputePipelineReflection* reflection, + std::string msl, MRCOwned> shader, + MRCOwned> pipeline); + + id GetComputePipeline() const { return m_compute_pipeline; } + bool UsesTexture(u32 index) const { return m_textures & (1 << index); } + bool UsesBuffer(u32 index) const { return m_buffers & (1 << index); } + +private: + MRCOwned> m_compute_pipeline; + u32 m_textures = 0; + u32 m_buffers = 0; +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLPipeline.mm b/Source/Core/VideoBackends/Metal/MTLPipeline.mm new file mode 100644 index 0000000000..405ea76973 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLPipeline.mm @@ -0,0 +1,66 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLPipeline.h" + +#include "Common/MsgHandler.h" + +static void MarkAsUsed(u32* list, u32 start, u32 length) +{ + for (u32 i = start; i < start + length; ++i) + *list |= 1 << i; +} + +static void GetArguments(NSArray* arguments, u32* textures, u32* samplers, + u32* buffers) +{ + for (MTLArgument* argument in arguments) + { + const u32 idx = [argument index]; + const u32 length = [argument arrayLength]; + if (idx + length > 32) + { + PanicAlertFmt("Making a MTLPipeline with high argument index {:d}..<{:d} for {:s}", // + idx, idx + length, [[argument name] UTF8String]); + continue; + } + switch ([argument type]) + { + case MTLArgumentTypeTexture: + if (textures) + MarkAsUsed(textures, idx, length); + else + PanicAlertFmt("Vertex function wants a texture!"); + break; + case MTLArgumentTypeSampler: + if (samplers) + MarkAsUsed(samplers, idx, length); + else + PanicAlertFmt("Vertex function wants a sampler!"); + break; + case MTLArgumentTypeBuffer: + MarkAsUsed(buffers, idx, length); + break; + default: + break; + } + } +} + +Metal::Pipeline::Pipeline(MRCOwned> pipeline, + MTLRenderPipelineReflection* reflection, MTLPrimitiveType prim, + MTLCullMode cull, DepthState depth, AbstractPipelineUsage usage) + : m_pipeline(std::move(pipeline)), m_prim(prim), m_cull(cull), m_depth_stencil(depth), + m_usage(usage) +{ + GetArguments([reflection vertexArguments], nullptr, nullptr, &m_vertex_buffers); + GetArguments([reflection fragmentArguments], &m_textures, &m_samplers, &m_fragment_buffers); +} + +Metal::ComputePipeline::ComputePipeline(ShaderStage stage, MTLComputePipelineReflection* reflection, + std::string msl, MRCOwned> shader, + MRCOwned> pipeline) + : Shader(stage, std::move(msl), std::move(shader)), m_compute_pipeline(std::move(pipeline)) +{ + GetArguments([reflection arguments], &m_textures, nullptr, &m_buffers); +} diff --git a/Source/Core/VideoBackends/Metal/MTLRenderer.h b/Source/Core/VideoBackends/Metal/MTLRenderer.h new file mode 100644 index 0000000000..6208e3f57e --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLRenderer.h @@ -0,0 +1,91 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "VideoCommon/RenderBase.h" + +#include "VideoBackends/Metal/MRCHelpers.h" + +namespace Metal +{ +class Framebuffer; +class Texture; + +class Renderer final : public ::Renderer +{ +public: + Renderer(MRCOwned layer, int width, int height, float layer_scale); + ~Renderer() override; + + bool IsHeadless() const override; + + bool Initialize() override; + + std::unique_ptr CreateTexture(const TextureConfig& config, + std::string_view name) override; + std::unique_ptr + CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; + std::unique_ptr + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override; + + std::unique_ptr CreateShaderFromSource(ShaderStage stage, std::string_view source, + std::string_view name) override; + std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, + size_t length, + std::string_view name) override; + std::unique_ptr CreateShaderFromMSL(ShaderStage stage, std::string msl, + std::string_view glsl, std::string_view name); + std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; + std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config, + const void* cache_data = nullptr, + size_t cache_data_length = 0) override; + + void Flush() override; + void WaitForGPUIdle() override; + void OnConfigChanged(u32 bits) override; + + void ClearScreen(const MathUtil::Rectangle& rc, bool color_enable, bool alpha_enable, + bool z_enable, u32 color, u32 z) override; + + void SetPipeline(const AbstractPipeline* pipeline) override; + void SetFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value = {}, + float depth_value = 0.0f) override; + void SetScissorRect(const MathUtil::Rectangle& rc) override; + void SetTexture(u32 index, const AbstractTexture* texture) override; + void SetSamplerState(u32 index, const SamplerState& state) override; + void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) override; + void UnbindTexture(const AbstractTexture* texture) override; + void SetViewport(float x, float y, float width, float height, float near_depth, + float far_depth) override; + void Draw(u32 base_vertex, u32 num_vertices) override; + void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override; + void BindBackbuffer(const ClearColor& clear_color = {}) override; + void PresentBackbuffer() override; + +protected: + std::unique_ptr<::BoundingBox> CreateBoundingBox() const override; + +private: + MRCOwned m_layer; + MRCOwned> m_drawable; + std::unique_ptr m_bb_texture; + std::unique_ptr m_backbuffer; + u32 m_texture_counter = 0; + u32 m_staging_texture_counter = 0; + std::array m_shader_counter = {}; + u32 m_pipeline_counter = 0; + + void CheckForSurfaceChange(); + void CheckForSurfaceResize(); + void SetupSurface(); +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLRenderer.mm b/Source/Core/VideoBackends/Metal/MTLRenderer.mm new file mode 100644 index 0000000000..b552b11722 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLRenderer.mm @@ -0,0 +1,709 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLRenderer.h" + +#include "VideoBackends/Metal/MTLBoundingBox.h" +#include "VideoBackends/Metal/MTLObjectCache.h" +#include "VideoBackends/Metal/MTLPipeline.h" +#include "VideoBackends/Metal/MTLStateTracker.h" +#include "VideoBackends/Metal/MTLTexture.h" +#include "VideoBackends/Metal/MTLUtil.h" +#include "VideoBackends/Metal/MTLVertexManager.h" + +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/NativeVertexFormat.h" +#include "VideoCommon/VertexShaderGen.h" +#include "VideoCommon/VideoBackendBase.h" + +Metal::Renderer::Renderer(MRCOwned layer, int width, int height, float layer_scale) + : ::Renderer(width, height, layer_scale, Util::ToAbstract([layer pixelFormat])), + m_layer(std::move(layer)) +{ + UpdateActiveConfig(); +} + +Metal::Renderer::~Renderer() = default; + +bool Metal::Renderer::IsHeadless() const +{ + return m_layer == nullptr; +} + +bool Metal::Renderer::Initialize() +{ + if (!::Renderer::Initialize()) + return false; + SetupSurface(); + g_state_tracker->FlushEncoders(); + return true; +} + +// MARK: Texture Creation + +std::unique_ptr Metal::Renderer::CreateTexture(const TextureConfig& config, + std::string_view name) +{ + @autoreleasepool + { + MRCOwned desc = MRCTransfer([MTLTextureDescriptor new]); + [desc setTextureType:config.samples > 1 ? MTLTextureType2DMultisampleArray : + MTLTextureType2DArray]; + [desc setPixelFormat:Util::FromAbstract(config.format)]; + [desc setWidth:config.width]; + [desc setHeight:config.height]; + [desc setMipmapLevelCount:config.levels]; + [desc setArrayLength:config.layers]; + [desc setSampleCount:config.samples]; + [desc setStorageMode:MTLStorageModePrivate]; + MTLTextureUsage usage = MTLTextureUsageShaderRead; + if (config.IsRenderTarget()) + usage |= MTLTextureUsageRenderTarget; + if (config.IsComputeImage()) + usage |= MTLTextureUsageShaderWrite; + [desc setUsage:usage]; + id texture = [g_device newTextureWithDescriptor:desc]; + if (!texture) + return nullptr; + + if (name.empty()) + [texture setLabel:[NSString stringWithFormat:@"Texture %d", m_texture_counter++]]; + else + [texture setLabel:MRCTransfer([[NSString alloc] initWithBytes:name.data() + length:name.size() + encoding:NSUTF8StringEncoding])]; + return std::make_unique(MRCTransfer(texture), config); + } +} + +std::unique_ptr +Metal::Renderer::CreateStagingTexture(StagingTextureType type, const TextureConfig& config) +{ + @autoreleasepool + { + const size_t stride = config.GetStride(); + const size_t buffer_size = stride * static_cast(config.height); + + MTLResourceOptions options = MTLStorageModeShared; + if (type == StagingTextureType::Upload) + options |= MTLResourceCPUCacheModeWriteCombined; + + id buffer = [g_device newBufferWithLength:buffer_size options:options]; + if (!buffer) + return nullptr; + [buffer + setLabel:[NSString stringWithFormat:@"Staging Texture %d", m_staging_texture_counter++]]; + return std::make_unique(MRCTransfer(buffer), type, config); + } +} + +std::unique_ptr +Metal::Renderer::CreateFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment) +{ + AbstractTexture* const either_attachment = color_attachment ? color_attachment : depth_attachment; + return std::make_unique( + color_attachment, depth_attachment, either_attachment->GetWidth(), + either_attachment->GetHeight(), either_attachment->GetLayers(), + either_attachment->GetSamples()); +} + +// MARK: Pipeline Creation + +namespace Metal +{ +class VertexFormat : public NativeVertexFormat +{ +public: + VertexFormat(const PortableVertexDeclaration& vtx_decl) + : NativeVertexFormat(vtx_decl), m_desc(MRCTransfer([MTLVertexDescriptor new])) + { + [[[m_desc layouts] objectAtIndexedSubscript:0] setStride:vtx_decl.stride]; + SetAttribute(SHADER_POSITION_ATTRIB, vtx_decl.position); + SetAttributes(SHADER_NORMAL_ATTRIB, vtx_decl.normals); + SetAttributes(SHADER_COLOR0_ATTRIB, vtx_decl.colors); + SetAttributes(SHADER_TEXTURE0_ATTRIB, vtx_decl.texcoords); + SetAttribute(SHADER_POSMTX_ATTRIB, vtx_decl.posmtx); + } + + MTLVertexDescriptor* Get() const { return m_desc; } + +private: + template + void SetAttributes(u32 attribute, const AttributeFormat (&format)[N]) + { + for (size_t i = 0; i < N; i++) + SetAttribute(attribute + i, format[i]); + } + void SetAttribute(u32 attribute, const AttributeFormat& format) + { + if (!format.enable) + return; + MTLVertexAttributeDescriptor* desc = [[m_desc attributes] objectAtIndexedSubscript:attribute]; + [desc setFormat:ConvertFormat(format.type, format.components, format.integer)]; + [desc setOffset:format.offset]; + [desc setBufferIndex:0]; + } + + static MTLVertexFormat ConvertFormat(ComponentFormat format, int count, bool int_format) + { + static constexpr MTLVertexFormat formats[2][5][4] = { + [false] = { + [static_cast(ComponentFormat::UByte)] = { MTLVertexFormatUCharNormalized, MTLVertexFormatUChar2Normalized, MTLVertexFormatUChar3Normalized, MTLVertexFormatUChar4Normalized }, + [static_cast(ComponentFormat::Byte)] = { MTLVertexFormatCharNormalized, MTLVertexFormatChar2Normalized, MTLVertexFormatChar3Normalized, MTLVertexFormatChar4Normalized }, + [static_cast(ComponentFormat::UShort)] = { MTLVertexFormatUShortNormalized, MTLVertexFormatUShort2Normalized, MTLVertexFormatUShort3Normalized, MTLVertexFormatUShort4Normalized }, + [static_cast(ComponentFormat::Short)] = { MTLVertexFormatShortNormalized, MTLVertexFormatShort2Normalized, MTLVertexFormatShort3Normalized, MTLVertexFormatShort4Normalized }, + [static_cast(ComponentFormat::Float)] = { MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4 }, + }, + [true] = { + [static_cast(ComponentFormat::UByte)] = { MTLVertexFormatUChar, MTLVertexFormatUChar2, MTLVertexFormatUChar3, MTLVertexFormatUChar4 }, + [static_cast(ComponentFormat::Byte)] = { MTLVertexFormatChar, MTLVertexFormatChar2, MTLVertexFormatChar3, MTLVertexFormatChar4 }, + [static_cast(ComponentFormat::UShort)] = { MTLVertexFormatUShort, MTLVertexFormatUShort2, MTLVertexFormatUShort3, MTLVertexFormatUShort4 }, + [static_cast(ComponentFormat::Short)] = { MTLVertexFormatShort, MTLVertexFormatShort2, MTLVertexFormatShort3, MTLVertexFormatShort4 }, + [static_cast(ComponentFormat::Float)] = { MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4 }, + }, + }; + return formats[int_format][static_cast(format)][count - 1]; + } + + MRCOwned m_desc; +}; +} // namespace Metal + +std::unique_ptr Metal::Renderer::CreateShaderFromSource(ShaderStage stage, + std::string_view source, + std::string_view name) +{ + std::optional msl = Util::TranslateShaderToMSL(stage, source); + if (!msl.has_value()) + { + PanicAlertFmt("Failed to convert shader {} to MSL", name); + return nullptr; + } + + return CreateShaderFromMSL(stage, std::move(*msl), source, name); +} + +std::unique_ptr Metal::Renderer::CreateShaderFromBinary(ShaderStage stage, + const void* data, + size_t length, + std::string_view name) +{ + return CreateShaderFromMSL(stage, std::string(static_cast(data), length), {}, name); +} + +// clang-format off + +static const char* StageFilename(ShaderStage stage) +{ + switch (stage) + { + case ShaderStage::Vertex: return "vs"; + case ShaderStage::Geometry: return "gs"; + case ShaderStage::Pixel: return "ps"; + case ShaderStage::Compute: return "cs"; + } +} + +static NSString* GenericShaderName(ShaderStage stage) +{ + switch (stage) + { + case ShaderStage::Vertex: return @"Vertex shader %d"; + case ShaderStage::Geometry: return @"Geometry shader %d"; + case ShaderStage::Pixel: return @"Pixel shader %d"; + case ShaderStage::Compute: return @"Compute shader %d"; + } +} + +// clang-format on + +std::unique_ptr Metal::Renderer::CreateShaderFromMSL(ShaderStage stage, + std::string msl, + std::string_view glsl, + std::string_view name) +{ + @autoreleasepool + { + NSError* err = nullptr; + auto DumpBadShader = [&](std::string_view msg) { + static int counter = 0; + std::string filename = VideoBackendBase::BadShaderFilename(StageFilename(stage), counter++); + std::ofstream stream(filename); + if (stream.good()) + { + stream << msl << std::endl; + stream << "/*" << std::endl; + stream << msg << std::endl; + stream << "Error:" << std::endl; + stream << [[err localizedDescription] UTF8String] << std::endl; + if (!glsl.empty()) + { + stream << "Original GLSL:" << std::endl; + stream << glsl << std::endl; + } + else + { + stream << "Shader was created with cached MSL so no GLSL is available." << std::endl; + } + } + + stream << std::endl; + stream << "Dolphin Version: " << Common::GetScmRevStr() << std::endl; + stream << "Video Backend: " << g_video_backend->GetDisplayName() << std::endl; + stream << "*/" << std::endl; + stream.close(); + + PanicAlertFmt("{} (written to {})\n", msg, filename); + }; + + auto lib = MRCTransfer([g_device newLibraryWithSource:[NSString stringWithUTF8String:msl.data()] + options:nil + error:&err]); + if (err) + { + DumpBadShader(fmt::format("Failed to compile {}", name)); + return nullptr; + } + auto fn = MRCTransfer([lib newFunctionWithName:@"main0"]); + if (!fn) + { + DumpBadShader(fmt::format("Shader {} is missing its main0 function", name)); + return nullptr; + } + if (!name.empty()) + [fn setLabel:MRCTransfer([[NSString alloc] initWithBytes:name.data() + length:name.size() + encoding:NSUTF8StringEncoding])]; + else + [fn setLabel:[NSString stringWithFormat:GenericShaderName(stage), + m_shader_counter[static_cast(stage)]++]]; + [lib setLabel:[fn label]]; + if (stage == ShaderStage::Compute) + { + MTLComputePipelineReflection* reflection = nullptr; + auto desc = [MTLComputePipelineDescriptor new]; + [desc setComputeFunction:fn]; + [desc setLabel:[fn label]]; + MRCOwned> pipeline = + MRCTransfer([g_device newComputePipelineStateWithDescriptor:desc + options:MTLPipelineOptionArgumentInfo + reflection:&reflection + error:&err]); + if (err) + { + DumpBadShader(fmt::format("Failed to compile compute pipeline {}", name)); + return nullptr; + } + return std::make_unique(stage, reflection, std::move(msl), std::move(fn), + std::move(pipeline)); + } + return std::make_unique(stage, std::move(msl), std::move(fn)); + } +} + +std::unique_ptr +Metal::Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +{ + @autoreleasepool + { + return std::make_unique(vtx_decl); + } +} + +static MTLPrimitiveTopologyClass GetClass(PrimitiveType prim) +{ + switch (prim) + { + case PrimitiveType::Points: + return MTLPrimitiveTopologyClassPoint; + case PrimitiveType::Lines: + return MTLPrimitiveTopologyClassLine; + case PrimitiveType::Triangles: + case PrimitiveType::TriangleStrip: + return MTLPrimitiveTopologyClassTriangle; + } +} + +static MTLPrimitiveType Convert(PrimitiveType prim) +{ + switch (prim) + { + case PrimitiveType::Points: return MTLPrimitiveTypePoint; + case PrimitiveType::Lines: return MTLPrimitiveTypeLine; + case PrimitiveType::Triangles: return MTLPrimitiveTypeTriangle; + case PrimitiveType::TriangleStrip: return MTLPrimitiveTypeTriangleStrip; + } +} + +static MTLCullMode Convert(CullMode cull) +{ + switch (cull) + { + case CullMode::None: + case CullMode::All: // Handled by disabling rasterization + return MTLCullModeNone; + case CullMode::Front: + return MTLCullModeFront; + case CullMode::Back: + return MTLCullModeBack; + } +} + +static MTLBlendFactor Convert(DstBlendFactor factor, bool src1) +{ + static constexpr MTLBlendFactor factors[2][8] = { + [false] = { + [static_cast(DstBlendFactor::Zero)] = MTLBlendFactorZero, + [static_cast(DstBlendFactor::One)] = MTLBlendFactorOne, + [static_cast(DstBlendFactor::SrcClr)] = MTLBlendFactorSourceColor, + [static_cast(DstBlendFactor::InvSrcClr)] = MTLBlendFactorOneMinusSourceColor, + [static_cast(DstBlendFactor::SrcAlpha)] = MTLBlendFactorSourceAlpha, + [static_cast(DstBlendFactor::InvSrcAlpha)] = MTLBlendFactorOneMinusSourceAlpha, + [static_cast(DstBlendFactor::DstAlpha)] = MTLBlendFactorDestinationAlpha, + [static_cast(DstBlendFactor::InvDstAlpha)] = MTLBlendFactorOneMinusDestinationAlpha, + }, + [true] = { + [static_cast(DstBlendFactor::Zero)] = MTLBlendFactorZero, + [static_cast(DstBlendFactor::One)] = MTLBlendFactorOne, + [static_cast(DstBlendFactor::SrcClr)] = MTLBlendFactorSourceColor, + [static_cast(DstBlendFactor::InvSrcClr)] = MTLBlendFactorOneMinusSource1Color, + [static_cast(DstBlendFactor::SrcAlpha)] = MTLBlendFactorSource1Alpha, + [static_cast(DstBlendFactor::InvSrcAlpha)] = MTLBlendFactorOneMinusSource1Alpha, + [static_cast(DstBlendFactor::DstAlpha)] = MTLBlendFactorDestinationAlpha, + [static_cast(DstBlendFactor::InvDstAlpha)] = MTLBlendFactorOneMinusDestinationAlpha, + }, + }; + return factors[src1][static_cast(factor)]; +} + +static MTLBlendFactor Convert(SrcBlendFactor factor, bool src1) +{ + static constexpr MTLBlendFactor factors[2][8] = { + [false] = { + [static_cast(SrcBlendFactor::Zero)] = MTLBlendFactorZero, + [static_cast(SrcBlendFactor::One)] = MTLBlendFactorOne, + [static_cast(SrcBlendFactor::DstClr)] = MTLBlendFactorDestinationColor, + [static_cast(SrcBlendFactor::InvDstClr)] = MTLBlendFactorOneMinusDestinationColor, + [static_cast(SrcBlendFactor::SrcAlpha)] = MTLBlendFactorSourceAlpha, + [static_cast(SrcBlendFactor::InvSrcAlpha)] = MTLBlendFactorOneMinusSourceAlpha, + [static_cast(SrcBlendFactor::DstAlpha)] = MTLBlendFactorDestinationAlpha, + [static_cast(SrcBlendFactor::InvDstAlpha)] = MTLBlendFactorOneMinusDestinationAlpha, + }, + [true] = { + [static_cast(SrcBlendFactor::Zero)] = MTLBlendFactorZero, + [static_cast(SrcBlendFactor::One)] = MTLBlendFactorOne, + [static_cast(SrcBlendFactor::DstClr)] = MTLBlendFactorDestinationColor, + [static_cast(SrcBlendFactor::InvDstClr)] = MTLBlendFactorOneMinusDestinationColor, + [static_cast(SrcBlendFactor::SrcAlpha)] = MTLBlendFactorSource1Alpha, + [static_cast(SrcBlendFactor::InvSrcAlpha)] = MTLBlendFactorOneMinusSource1Alpha, + [static_cast(SrcBlendFactor::DstAlpha)] = MTLBlendFactorDestinationAlpha, + [static_cast(SrcBlendFactor::InvDstAlpha)] = MTLBlendFactorOneMinusDestinationAlpha, + }, + }; + return factors[src1][static_cast(factor)]; +} + +std::unique_ptr +Metal::Renderer::CreatePipeline(const AbstractPipelineConfig& config, const void* cache_data, + size_t cache_data_length) +{ + @autoreleasepool + { + assert(!config.geometry_shader); + auto desc = MRCTransfer([MTLRenderPipelineDescriptor new]); + [desc setLabel:[NSString stringWithFormat:@"Pipeline %d", m_pipeline_counter++]]; + [desc setVertexFunction:static_cast(config.vertex_shader)->GetShader()]; + [desc setFragmentFunction:static_cast(config.pixel_shader)->GetShader()]; + if (config.vertex_format) + [desc setVertexDescriptor:static_cast(config.vertex_format)->Get()]; + RasterizationState rs = config.rasterization_state; + [desc setInputPrimitiveTopology:GetClass(rs.primitive)]; + if (rs.cullmode == CullMode::All) + [desc setRasterizationEnabled:NO]; + MTLRenderPipelineColorAttachmentDescriptor* color0 = [desc colorAttachments][0]; + BlendingState bs = config.blending_state; + MTLColorWriteMask mask = MTLColorWriteMaskNone; + if (bs.colorupdate) + mask |= MTLColorWriteMaskRed | MTLColorWriteMaskGreen | MTLColorWriteMaskBlue; + if (bs.alphaupdate) + mask |= MTLColorWriteMaskAlpha; + [color0 setWriteMask:mask]; + if (bs.blendenable) + { + [color0 setBlendingEnabled:YES]; + [color0 setSourceRGBBlendFactor: Convert(bs.srcfactor, bs.usedualsrc)]; + [color0 setSourceAlphaBlendFactor: Convert(bs.srcfactoralpha, bs.usedualsrc)]; + [color0 setDestinationRGBBlendFactor: Convert(bs.dstfactor, bs.usedualsrc)]; + [color0 setDestinationAlphaBlendFactor:Convert(bs.dstfactoralpha, bs.usedualsrc)]; + [color0 setRgbBlendOperation: bs.subtract ? MTLBlendOperationReverseSubtract : MTLBlendOperationAdd]; + [color0 setAlphaBlendOperation:bs.subtractAlpha ? MTLBlendOperationReverseSubtract : MTLBlendOperationAdd]; + } + FramebufferState fs = config.framebuffer_state; + [color0 setPixelFormat:Util::FromAbstract(fs.color_texture_format)]; + [desc setDepthAttachmentPixelFormat:Util::FromAbstract(fs.depth_texture_format)]; + if (Util::HasStencil(fs.depth_texture_format)) + [desc setStencilAttachmentPixelFormat:Util::FromAbstract(fs.depth_texture_format)]; + NSError* err = nullptr; + MTLRenderPipelineReflection* reflection = nullptr; + id pipe = + [g_device newRenderPipelineStateWithDescriptor:desc + options:MTLPipelineOptionArgumentInfo + reflection:&reflection + error:&err]; + if (err) + { + PanicAlertFmt("Failed to compile pipeline for {} and {}: {}", + [[[desc vertexFunction] label] UTF8String], + [[[desc fragmentFunction] label] UTF8String], + [[err localizedDescription] UTF8String]); + return nullptr; + } + return std::make_unique(MRCTransfer(pipe), reflection, Convert(rs.primitive), + Convert(rs.cullmode), config.depth_state, config.usage); + } +} + +void Metal::Renderer::Flush() +{ + @autoreleasepool + { + g_state_tracker->FlushEncoders(); + } +} + +void Metal::Renderer::WaitForGPUIdle() +{ + @autoreleasepool + { + g_state_tracker->FlushEncoders(); + g_state_tracker->WaitForFlushedEncoders(); + } +} + +void Metal::Renderer::OnConfigChanged(u32 bits) +{ + if (bits & CONFIG_CHANGE_BIT_VSYNC) + [m_layer setDisplaySyncEnabled:g_ActiveConfig.bVSyncActive]; + + if (bits & CONFIG_CHANGE_BIT_ANISOTROPY) + { + g_object_cache->ReloadSamplers(); + g_state_tracker->ReloadSamplers(); + } +} + +void Metal::Renderer::ClearScreen(const MathUtil::Rectangle& rc, bool color_enable, + bool alpha_enable, bool z_enable, u32 color, u32 z) +{ + MathUtil::Rectangle target_rc = Renderer::ConvertEFBRectangle(rc); + target_rc.ClampUL(0, 0, m_target_width, m_target_height); + + // All Metal render passes are fullscreen, so we can only run a fast clear if the target is too + if (target_rc == MathUtil::Rectangle(0, 0, m_target_width, m_target_height)) + { + // Determine whether the EFB has an alpha channel. If it doesn't, we can clear the alpha + // channel to 0xFF. This hopefully allows us to use the fast path in most cases. + if (bpmem.zcontrol.pixel_format == PixelFormat::RGB565_Z16 || + bpmem.zcontrol.pixel_format == PixelFormat::RGB8_Z24 || + bpmem.zcontrol.pixel_format == PixelFormat::Z24) + { + // Force alpha writes, and clear the alpha channel. This is different to the other backends, + // where the existing values of the alpha channel are preserved. + alpha_enable = true; + color &= 0x00FFFFFF; + } + + bool c_ok = (color_enable && alpha_enable) || + g_state_tracker->GetCurrentFramebuffer()->GetColorFormat() == + AbstractTextureFormat::Undefined; + bool z_ok = z_enable || g_state_tracker->GetCurrentFramebuffer()->GetDepthFormat() == + AbstractTextureFormat::Undefined; + if (c_ok && z_ok) + { + @autoreleasepool + { + // clang-format off + MTLClearColor clear_color = MTLClearColorMake( + static_cast((color >> 16) & 0xFF) / 255.0, + static_cast((color >> 8) & 0xFF) / 255.0, + static_cast((color >> 0) & 0xFF) / 255.0, + static_cast((color >> 24) & 0xFF) / 255.0); + // clang-format on + float z_normalized = static_cast(z & 0xFFFFFF) / 16777216.0f; + if (!g_Config.backend_info.bSupportsReversedDepthRange) + z_normalized = 1.f - z_normalized; + g_state_tracker->BeginClearRenderPass(clear_color, z_normalized); + return; + } + } + } + + g_state_tracker->EnableEncoderLabel(false); + g_framebuffer_manager->ClearEFB(rc, color_enable, alpha_enable, z_enable, color, z); + g_state_tracker->EnableEncoderLabel(true); +} + +void Metal::Renderer::SetPipeline(const AbstractPipeline* pipeline) +{ + g_state_tracker->SetPipeline(static_cast(pipeline)); +} + +void Metal::Renderer::SetFramebuffer(AbstractFramebuffer* framebuffer) +{ + // Shouldn't be bound as a texture. + if (AbstractTexture* color = framebuffer->GetColorAttachment()) + g_state_tracker->UnbindTexture(static_cast(color)->GetMTLTexture()); + if (AbstractTexture* depth = framebuffer->GetDepthAttachment()) + g_state_tracker->UnbindTexture(static_cast(depth)->GetMTLTexture()); + + m_current_framebuffer = framebuffer; + g_state_tracker->SetCurrentFramebuffer(static_cast(framebuffer)); +} + +void Metal::Renderer::SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) +{ + @autoreleasepool + { + SetFramebuffer(framebuffer); + g_state_tracker->BeginRenderPass(MTLLoadActionDontCare); + } +} + +void Metal::Renderer::SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, + const ClearColor& color_value, float depth_value) +{ + @autoreleasepool + { + SetFramebuffer(framebuffer); + MTLClearColor color = + MTLClearColorMake(color_value[0], color_value[1], color_value[2], color_value[3]); + g_state_tracker->BeginClearRenderPass(color, depth_value); + } +} + +void Metal::Renderer::SetScissorRect(const MathUtil::Rectangle& rc) +{ + g_state_tracker->SetScissor(rc); +} + +void Metal::Renderer::SetTexture(u32 index, const AbstractTexture* texture) +{ + g_state_tracker->SetTexture( + index, texture ? static_cast(texture)->GetMTLTexture() : nullptr); +} + +void Metal::Renderer::SetSamplerState(u32 index, const SamplerState& state) +{ + g_state_tracker->SetSampler(index, state); +} + +void Metal::Renderer::SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) +{ + g_state_tracker->SetComputeTexture(static_cast(texture)); +} + +void Metal::Renderer::UnbindTexture(const AbstractTexture* texture) +{ + g_state_tracker->UnbindTexture(static_cast(texture)->GetMTLTexture()); +} + +void Metal::Renderer::SetViewport(float x, float y, float width, float height, float near_depth, + float far_depth) +{ + g_state_tracker->SetViewport(x, y, width, height, near_depth, far_depth); +} + +void Metal::Renderer::Draw(u32 base_vertex, u32 num_vertices) +{ + @autoreleasepool + { + g_state_tracker->Draw(base_vertex, num_vertices); + } +} + +void Metal::Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) +{ + @autoreleasepool + { + g_state_tracker->DrawIndexed(base_index, num_indices, base_vertex); + } +} + +void Metal::Renderer::DispatchComputeShader(const AbstractShader* shader, // + u32 groupsize_x, u32 groupsize_y, u32 groupsize_z, + u32 groups_x, u32 groups_y, u32 groups_z) +{ + @autoreleasepool + { + g_state_tracker->SetPipeline(static_cast(shader)); + g_state_tracker->DispatchComputeShader(groupsize_x, groupsize_y, groupsize_z, // + groups_x, groups_y, groups_z); + } +} + +void Metal::Renderer::BindBackbuffer(const ClearColor& clear_color) +{ + @autoreleasepool + { + CheckForSurfaceChange(); + CheckForSurfaceResize(); + m_drawable = MRCRetain([m_layer nextDrawable]); + m_bb_texture->SetMTLTexture(MRCRetain([m_drawable texture])); + SetAndClearFramebuffer(m_backbuffer.get(), clear_color); + } +} + +void Metal::Renderer::PresentBackbuffer() +{ + @autoreleasepool + { + g_state_tracker->EndRenderPass(); + if (m_drawable) + { + [g_state_tracker->GetRenderCmdBuf() + addScheduledHandler:[drawable = std::move(m_drawable)](id) { [drawable present]; }]; + m_bb_texture->SetMTLTexture(nullptr); + m_drawable = nullptr; + } + g_state_tracker->FlushEncoders(); + } +} + +std::unique_ptr<::BoundingBox> Metal::Renderer::CreateBoundingBox() const +{ + return std::make_unique(); +} + +void Metal::Renderer::CheckForSurfaceChange() +{ + if (!m_surface_changed.TestAndClear()) + return; + m_layer = MRCRetain(static_cast(m_new_surface_handle)); + m_new_surface_handle = nullptr; + SetupSurface(); +} + +void Metal::Renderer::CheckForSurfaceResize() +{ + if (!m_surface_resized.TestAndClear()) + return; + SetupSurface(); +} + +void Metal::Renderer::SetupSurface() +{ + CGSize size = [m_layer bounds].size; + // TODO: Update m_backbuffer_scale (need to make doing that not break everything) + const float backbuffer_scale = [m_layer contentsScale]; + size.width *= backbuffer_scale; + size.height *= backbuffer_scale; + [m_layer setDrawableSize:size]; + m_backbuffer_width = size.width; + m_backbuffer_height = size.height; + TextureConfig cfg(m_backbuffer_width, m_backbuffer_height, 1, 1, 1, m_backbuffer_format, + AbstractTextureFlag_RenderTarget); + m_bb_texture = std::make_unique(nullptr, cfg); + m_backbuffer = std::make_unique(m_bb_texture.get(), nullptr, // + m_backbuffer_width, m_backbuffer_height, 1, 1); +} diff --git a/Source/Core/VideoBackends/Metal/MTLShader.h b/Source/Core/VideoBackends/Metal/MTLShader.h new file mode 100644 index 0000000000..053d84c2b9 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLShader.h @@ -0,0 +1,30 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "VideoBackends/Metal/MRCHelpers.h" + +#include "VideoCommon/AbstractPipeline.h" +#include "VideoCommon/AbstractShader.h" + +namespace Metal +{ +class Shader : public AbstractShader +{ +public: + explicit Shader(ShaderStage stage, std::string msl, MRCOwned> shader) + : AbstractShader(stage), m_msl(std::move(msl)), m_shader(std::move(shader)) + { + } + + id GetShader() const { return m_shader; } + BinaryData GetBinary() const override { return BinaryData(m_msl.begin(), m_msl.end()); } + +private: + std::string m_msl; + MRCOwned> m_shader; +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.h b/Source/Core/VideoBackends/Metal/MTLStateTracker.h new file mode 100644 index 0000000000..d9c79774e0 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.h @@ -0,0 +1,266 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include + +#include "Common/Assert.h" +#include "Common/CommonTypes.h" +#include "Common/MathUtil.h" + +#include "VideoBackends/Metal/MRCHelpers.h" +#include "VideoBackends/Metal/MTLObjectCache.h" +#include "VideoBackends/Metal/MTLTexture.h" +#include "VideoBackends/Metal/MTLUtil.h" + +#include "VideoCommon/RenderBase.h" + +namespace Metal +{ +class Pipeline; +class ComputePipeline; + +class StateTracker +{ +public: + enum class UploadBuffer + { + Other, + Uniform, + Vertex, + Index, + Texels, + Last = Texels + }; + + struct Map + { + id gpu_buffer; + size_t gpu_offset; + void* cpu_buffer; + }; + + enum class AlignMask : size_t + { + Other = 15, + Uniform = 255, + }; + + StateTracker(StateTracker&&) = delete; + explicit StateTracker(); + ~StateTracker(); + + Framebuffer* GetCurrentFramebuffer() { return m_current_framebuffer; }; + void SetCurrentFramebuffer(Framebuffer* framebuffer); + void BeginClearRenderPass(MTLClearColor color, float depth); + void BeginRenderPass(MTLLoadAction load_action); + void BeginRenderPass(MTLRenderPassDescriptor* descriptor); + void BeginComputePass(); + MTLRenderPassDescriptor* GetRenderPassDescriptor(Framebuffer* framebuffer, + MTLLoadAction load_action); + + void EndRenderPass(); + void FlushEncoders(); + void WaitForFlushedEncoders(); + bool HasUnflushedData() { return static_cast(m_current_render_cmdbuf); } + bool GPUBusy() + { + return m_current_draw != 1 + m_last_finished_draw.load(std::memory_order_acquire); + } + void ReloadSamplers(); + + void SetPipeline(const Pipeline* pipe); + void SetPipeline(const ComputePipeline* pipe); + void SetScissor(const MathUtil::Rectangle& rect); + void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth); + void SetTexture(u32 idx, id texture); + void SetSampler(u32 idx, const SamplerState& sampler); + void SetComputeTexture(const Texture* texture); + void InvalidateUniforms(bool vertex, bool fragment); + void SetUtilityUniform(const void* buffer, size_t size); + void SetTexelBuffer(id buffer, u32 offset0, u32 offset1); + void SetVerticesAndIndices(Map vertices, Map indices); + void SetBBoxBuffer(id bbox, id upload, id download); + void SetVertexBufferNow(u32 idx, id buffer, u32 offset); + void SetFragmentBufferNow(u32 idx, id buffer, u32 offset); + /// Use around utility draws that are commonly used immediately before gx draws to the same buffer + void EnableEncoderLabel(bool enabled) { m_flags.should_apply_label = enabled; } + void UnbindTexture(id texture); + + void Draw(u32 base_vertex, u32 num_vertices); + void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex); + void DispatchComputeShader(u32 groupsize_x, u32 groupsize_y, u32 groupsize_z, u32 groups_x, + u32 groups_y, u32 groups_z); + void ResolveTexture(id src, id dst, u32 layer, u32 level); + + size_t Align(size_t amt, AlignMask align) + { + return (amt + static_cast(align)) & ~static_cast(align); + } + Map AllocateForTextureUpload(size_t amt); + Map Allocate(UploadBuffer buffer_idx, size_t amt, AlignMask align) + { + Preallocate(buffer_idx, amt); + return CommitPreallocation(buffer_idx, amt, align); + } + void* Preallocate(UploadBuffer buffer_idx, size_t amt); + /// Must follow a call to Preallocate where amt is >= to the one provided here + Map CommitPreallocation(UploadBuffer buffer_idx, size_t amt, AlignMask align) + { + DEBUG_ASSERT((m_upload_buffers[static_cast(buffer_idx)].usage.Pos() & + static_cast(align)) == 0); + return CommitPreallocation(buffer_idx, Align(amt, align)); + } + id GetUploadEncoder(); + id GetTextureUploadEncoder(); + id GetRenderCmdBuf(); + +private: + class UsageTracker + { + struct UsageEntry + { + u64 drawno; + size_t pos; + }; + std::vector m_usage; + size_t m_size = 0; + size_t m_pos = 0; + + public: + size_t Size() { return m_size; } + size_t Pos() { return m_pos; } + bool PrepareForAllocation(u64 last_draw, size_t amt); + size_t Allocate(u64 current_draw, size_t amt); + void Reset(size_t new_size); + }; + + struct CPUBuffer + { + UsageTracker usage; + MRCOwned> mtlbuffer; + void* buffer = nullptr; + }; + + struct BufferPair + { + UsageTracker usage; + MRCOwned> cpubuffer; + MRCOwned> gpubuffer; + void* buffer = nullptr; + size_t last_upload = 0; + }; + + struct Backref; + + std::shared_ptr m_backref; + MRCOwned> m_fence; + MRCOwned> m_upload_cmdbuf; + MRCOwned> m_upload_encoder; + MRCOwned> m_texture_upload_cmdbuf; + MRCOwned> m_texture_upload_encoder; + MRCOwned> m_current_render_cmdbuf; + MRCOwned> m_last_render_cmdbuf; + MRCOwned> m_current_render_encoder; + MRCOwned> m_current_compute_encoder; + MRCOwned m_render_pass_desc[3]; + MRCOwned m_resolve_pass_desc; + Framebuffer* m_current_framebuffer; + CPUBuffer m_texture_upload_buffer; + BufferPair m_upload_buffers[static_cast(UploadBuffer::Last) + 1]; + u64 m_current_draw = 1; + std::atomic m_last_finished_draw{0}; + + MRCOwned> m_dummy_texture; + + // MARK: State + u8 m_dirty_textures; + u8 m_dirty_samplers; + union Flags + { + struct + { + // clang-format off + bool has_gx_vs_uniform : 1; + bool has_gx_ps_uniform : 1; + bool has_utility_vs_uniform : 1; + bool has_utility_ps_uniform : 1; + bool has_compute_texture : 1; + bool has_pipeline : 1; + bool has_scissor : 1; + bool has_viewport : 1; + bool has_vertices : 1; + bool has_texel_buffer : 1; + bool bbox_fence : 1; + bool should_apply_label : 1; + // clang-format on + }; + u16 bits = 0; + void NewEncoder() + { + Flags reset_mask; + // Set the flags you *don't* want to reset + reset_mask.should_apply_label = true; + bits &= reset_mask.bits; + } + } m_flags; + + /// Things that represent the state of the encoder + struct Current + { + NSString* label; + id pipeline; + std::array, 2> vertex_buffers; + std::array, 2> fragment_buffers; + u32 width; + u32 height; + MathUtil::Rectangle scissor_rect; + Util::Viewport viewport; + MTLDepthClipMode depth_clip_mode; + MTLCullMode cull_mode; + DepthStencilSelector depth_stencil; + } m_current; + + /// Things that represent what we'd *like* to have on the encoder for the next draw + struct State + { + MathUtil::Rectangle scissor_rect; + Util::Viewport viewport; + const Pipeline* render_pipeline = nullptr; + const ComputePipeline* compute_pipeline = nullptr; + std::array, 8> textures = {}; + std::array, 8> samplers = {}; + std::array sampler_min_lod; + std::array sampler_max_lod; + std::array sampler_states; + const Texture* compute_texture = nullptr; + std::unique_ptr utility_uniform; + u32 utility_uniform_size = 0; + u32 utility_uniform_capacity = 0; + id bbox = nullptr; + id bbox_upload_fence = nullptr; + id bbox_download_fence = nullptr; + id vertices = nullptr; + id indices = nullptr; + id texels = nullptr; + u32 vertices_offset; + u32 indices_offset; + u32 texel_buffer_offset0; + u32 texel_buffer_offset1; + } m_state; + + void SetSamplerForce(u32 idx, const SamplerState& sampler); + void Sync(BufferPair& buffer); + Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt); + void CheckViewport(); + void CheckScissor(); + void PrepareRender(); + void PrepareCompute(); +}; + +extern std::unique_ptr g_state_tracker; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm new file mode 100644 index 0000000000..ea2c8cd328 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm @@ -0,0 +1,849 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLStateTracker.h" + +#include +#include + +#include "Common/Assert.h" +#include "Common/BitUtils.h" + +#include "VideoBackends/Metal/MTLObjectCache.h" +#include "VideoBackends/Metal/MTLPipeline.h" +#include "VideoBackends/Metal/MTLTexture.h" +#include "VideoBackends/Metal/MTLUtil.h" + +#include "VideoCommon/PixelShaderManager.h" +#include "VideoCommon/Statistics.h" +#include "VideoCommon/VertexShaderManager.h" +#include "VideoCommon/VideoConfig.h" + +std::unique_ptr Metal::g_state_tracker; + +struct Metal::StateTracker::Backref +{ + std::mutex mtx; + StateTracker* state_tracker; + explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {} +}; + +static NSString* GetName(Metal::StateTracker::UploadBuffer buffer) +{ + // clang-format off + switch (buffer) + { + case Metal::StateTracker::UploadBuffer::Texels: return @"Texels"; + case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices"; + case Metal::StateTracker::UploadBuffer::Index: return @"Indices"; + case Metal::StateTracker::UploadBuffer::Uniform: return @"Uniforms"; + case Metal::StateTracker::UploadBuffer::Other: return @"Generic Upload"; + } + // clang-format on +} + +// MARK: - UsageTracker + +bool Metal::StateTracker::UsageTracker::PrepareForAllocation(u64 last_draw, size_t amt) +{ + auto removeme = std::find_if(m_usage.begin(), m_usage.end(), + [last_draw](UsageEntry usage) { return usage.drawno > last_draw; }); + if (removeme != m_usage.begin()) + m_usage.erase(m_usage.begin(), removeme); + + bool still_in_use = false; + const bool needs_wrap = m_pos + amt > m_size; + if (!m_usage.empty()) + { + size_t used = m_usage.front().pos; + if (needs_wrap) + still_in_use = used >= m_pos || used < amt; + else + still_in_use = used >= m_pos && used < m_pos + amt; + } + if (needs_wrap) + m_pos = 0; + + return still_in_use || amt > m_size; +} + +size_t Metal::StateTracker::UsageTracker::Allocate(u64 current_draw, size_t amt) +{ + if (m_usage.empty() || m_usage.back().drawno != current_draw) + m_usage.push_back({current_draw, m_pos}); + size_t ret = m_pos; + m_pos += amt; + return ret; +} + +void Metal::StateTracker::UsageTracker::Reset(size_t new_size) +{ + m_usage.clear(); + m_size = new_size; + m_pos = 0; +} + +// MARK: - StateTracker + +Metal::StateTracker::StateTracker() : m_backref(std::make_shared(this)) +{ + m_flags.should_apply_label = true; + m_fence = MRCTransfer([g_device newFence]); + for (MRCOwned& rpdesc : m_render_pass_desc) + { + rpdesc = MRCTransfer([MTLRenderPassDescriptor new]); + [[rpdesc depthAttachment] setStoreAction:MTLStoreActionStore]; + [[rpdesc stencilAttachment] setStoreAction:MTLStoreActionStore]; + } + m_resolve_pass_desc = MRCTransfer([MTLRenderPassDescriptor new]); + auto color0 = [[m_resolve_pass_desc colorAttachments] objectAtIndexedSubscript:0]; + [color0 setLoadAction:MTLLoadActionLoad]; + [color0 setStoreAction:MTLStoreActionMultisampleResolve]; + MTLTextureDescriptor* texdesc = + [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm + width:1 + height:1 + mipmapped:NO]; + [texdesc setTextureType:MTLTextureType2DArray]; + [texdesc setUsage:MTLTextureUsageShaderRead]; + [texdesc setStorageMode:MTLStorageModePrivate]; + m_dummy_texture = MRCTransfer([g_device newTextureWithDescriptor:texdesc]); + [m_dummy_texture setLabel:@"Dummy Texture"]; + for (size_t i = 0; i < std::size(m_state.samplers); ++i) + { + SetSamplerForce(i, RenderState::GetLinearSamplerState()); + SetTexture(i, m_dummy_texture); + } +} + +Metal::StateTracker::~StateTracker() +{ + FlushEncoders(); + std::lock_guard lock(m_backref->mtx); + m_backref->state_tracker = nullptr; +} + +// MARK: BufferPair Ops + +Metal::StateTracker::Map Metal::StateTracker::AllocateForTextureUpload(size_t amt) +{ + amt = (amt + 15) & ~15ull; + CPUBuffer& buffer = m_texture_upload_buffer; + u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire); + bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt); + if (__builtin_expect(needs_new, false)) + { + // Orphan buffer + size_t newsize = std::max(buffer.usage.Size() * 2, 4096); + while (newsize < amt) + newsize *= 2; + MTLResourceOptions options = + MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; + buffer.mtlbuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]); + [buffer.mtlbuffer setLabel:@"Texture Upload Buffer"]; + ASSERT_MSG(VIDEO, buffer.mtlbuffer, "Failed to allocate MTLBuffer (out of memory?)"); + buffer.buffer = [buffer.mtlbuffer contents]; + buffer.usage.Reset(newsize); + } + + size_t pos = buffer.usage.Allocate(m_current_draw, amt); + + Map ret = {buffer.mtlbuffer, pos, reinterpret_cast(buffer.buffer) + pos}; + DEBUG_ASSERT(pos <= buffer.usage.Size() && + "Previous code should have guaranteed there was enough space"); + return ret; +} + +void* Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt) +{ + BufferPair& buffer = m_upload_buffers[static_cast(buffer_idx)]; + u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire); + size_t base_pos = buffer.usage.Pos(); + bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt); + bool needs_upload = needs_new || buffer.usage.Pos() == 0; + if (!g_features.unified_memory && needs_upload) + { + if (base_pos != buffer.last_upload) + { + id encoder = GetUploadEncoder(); + [encoder copyFromBuffer:buffer.cpubuffer + sourceOffset:buffer.last_upload + toBuffer:buffer.gpubuffer + destinationOffset:buffer.last_upload + size:base_pos - buffer.last_upload]; + } + buffer.last_upload = 0; + } + if (__builtin_expect(needs_new, false)) + { + // Orphan buffer + size_t newsize = std::max(buffer.usage.Size() * 2, 4096); + while (newsize < amt) + newsize *= 2; + MTLResourceOptions options = + MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; + buffer.cpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]); + [buffer.cpubuffer setLabel:GetName(buffer_idx)]; + ASSERT_MSG(VIDEO, buffer.cpubuffer, "Failed to allocate MTLBuffer (out of memory?)"); + buffer.buffer = [buffer.cpubuffer contents]; + buffer.usage.Reset(newsize); + if (!g_features.unified_memory) + { + options = MTLResourceStorageModePrivate | MTLResourceHazardTrackingModeUntracked; + buffer.gpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]); + [buffer.gpubuffer setLabel:GetName(buffer_idx)]; + ASSERT_MSG(VIDEO, buffer.gpubuffer, "Failed to allocate MTLBuffer (out of memory?)"); + } + } + return reinterpret_cast(buffer.buffer) + buffer.usage.Pos(); +} + +Metal::StateTracker::Map Metal::StateTracker::CommitPreallocation(UploadBuffer buffer_idx, + size_t amt) +{ + BufferPair& buffer = m_upload_buffers[static_cast(buffer_idx)]; + size_t pos = buffer.usage.Allocate(m_current_draw, amt); + Map ret = {nil, pos, reinterpret_cast(buffer.buffer) + pos}; + ret.gpu_buffer = g_features.unified_memory ? buffer.cpubuffer : buffer.gpubuffer; + DEBUG_ASSERT(pos <= buffer.usage.Size() && + "Previous code should have guaranteed there was enough space"); + return ret; +} + +void Metal::StateTracker::Sync(BufferPair& buffer) +{ + if (g_features.unified_memory || buffer.usage.Pos() == buffer.last_upload) + return; + + id encoder = GetUploadEncoder(); + [encoder copyFromBuffer:buffer.cpubuffer + sourceOffset:buffer.last_upload + toBuffer:buffer.gpubuffer + destinationOffset:buffer.last_upload + size:buffer.usage.Pos() - buffer.last_upload]; + buffer.last_upload = buffer.usage.Pos(); +} + +// MARK: Render Pass / Encoder Management + +id Metal::StateTracker::GetUploadEncoder() +{ + if (!m_upload_cmdbuf) + { + @autoreleasepool + { + m_upload_cmdbuf = MRCRetain([g_queue commandBuffer]); + [m_upload_cmdbuf setLabel:@"Vertex Upload"]; + m_upload_encoder = MRCRetain([m_upload_cmdbuf blitCommandEncoder]); + [m_upload_encoder setLabel:@"Vertex Upload"]; + } + } + return m_upload_encoder; +} + +id Metal::StateTracker::GetTextureUploadEncoder() +{ + if (!m_texture_upload_cmdbuf) + { + @autoreleasepool + { + m_texture_upload_cmdbuf = MRCRetain([g_queue commandBuffer]); + [m_texture_upload_cmdbuf setLabel:@"Texture Upload"]; + m_texture_upload_encoder = MRCRetain([m_texture_upload_cmdbuf blitCommandEncoder]); + [m_texture_upload_encoder setLabel:@"Texture Upload"]; + } + } + return m_texture_upload_encoder; +} + +id Metal::StateTracker::GetRenderCmdBuf() +{ + if (!m_current_render_cmdbuf) + { + @autoreleasepool + { + m_current_render_cmdbuf = MRCRetain([g_queue commandBuffer]); + [m_current_render_cmdbuf setLabel:@"Draw"]; + } + } + return m_current_render_cmdbuf; +} + +void Metal::StateTracker::SetCurrentFramebuffer(Framebuffer* framebuffer) +{ + if (framebuffer == m_current_framebuffer) + return; + EndRenderPass(); + m_current_framebuffer = framebuffer; +} + +MTLRenderPassDescriptor* Metal::StateTracker::GetRenderPassDescriptor(Framebuffer* framebuffer, + MTLLoadAction load_action) +{ + const AbstractTextureFormat depth_fmt = framebuffer->GetDepthFormat(); + MTLRenderPassDescriptor* desc; + if (depth_fmt == AbstractTextureFormat::Undefined) + desc = m_render_pass_desc[0]; + else if (!Util::HasStencil(depth_fmt)) + desc = m_render_pass_desc[1]; + else + desc = m_render_pass_desc[2]; + desc.colorAttachments[0].texture = framebuffer->GetColor(); + desc.colorAttachments[0].loadAction = load_action; + if (depth_fmt != AbstractTextureFormat::Undefined) + { + desc.depthAttachment.texture = framebuffer->GetDepth(); + desc.depthAttachment.loadAction = load_action; + if (Util::HasStencil(depth_fmt)) + { + desc.stencilAttachment.texture = framebuffer->GetDepth(); + desc.stencilAttachment.loadAction = load_action; + } + } + return desc; +} + +void Metal::StateTracker::BeginClearRenderPass(MTLClearColor color, float depth) +{ + Framebuffer* framebuffer = m_current_framebuffer; + MTLRenderPassDescriptor* desc = GetRenderPassDescriptor(framebuffer, MTLLoadActionClear); + desc.colorAttachments[0].clearColor = color; + if (framebuffer->GetDepthFormat() != AbstractTextureFormat::Undefined) + { + desc.depthAttachment.clearDepth = depth; + if (Util::HasStencil(framebuffer->GetDepthFormat())) + desc.stencilAttachment.clearStencil = 0; + } + BeginRenderPass(desc); +} + +void Metal::StateTracker::BeginRenderPass(MTLLoadAction load_action) +{ + if (m_current_render_encoder) + return; + BeginRenderPass(GetRenderPassDescriptor(m_current_framebuffer, load_action)); +} + +void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor) +{ + EndRenderPass(); + m_current_render_encoder = + MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]); + if (!g_features.unified_memory) + [m_current_render_encoder waitForFence:m_fence beforeStages:MTLRenderStageVertex]; + AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment(); + if (!attachment) + attachment = m_current_framebuffer->GetDepthAttachment(); + static_assert(std::is_trivially_copyable::value, + "Make sure we can memset this"); + memset(&m_current, 0, sizeof(m_current)); + m_current.width = attachment->GetWidth(); + m_current.height = attachment->GetHeight(); + m_current.scissor_rect = MathUtil::Rectangle(0, 0, m_current.width, m_current.height); + m_current.viewport = { + 0.f, 0.f, static_cast(m_current.width), static_cast(m_current.height), + 0.f, 1.f}; + m_current.depth_stencil = DepthStencilSelector(false, CompareMode::Always); + m_current.depth_clip_mode = MTLDepthClipModeClip; + m_current.cull_mode = MTLCullModeNone; + m_flags.NewEncoder(); + m_dirty_samplers = 0xff; + m_dirty_textures = 0xff; + CheckScissor(); + CheckViewport(); + ASSERT_MSG(VIDEO, m_current_render_encoder, "Failed to create render encoder!"); +} + +void Metal::StateTracker::BeginComputePass() +{ + EndRenderPass(); + m_current_compute_encoder = MRCRetain([GetRenderCmdBuf() computeCommandEncoder]); + [m_current_compute_encoder setLabel:@"Compute"]; + if (!g_features.unified_memory) + [m_current_compute_encoder waitForFence:m_fence]; + m_flags.NewEncoder(); + m_dirty_samplers = 0xff; + m_dirty_textures = 0xff; +} + +void Metal::StateTracker::EndRenderPass() +{ + if (m_current_render_encoder) + { + if (m_flags.bbox_fence && m_state.bbox_download_fence) + [m_current_render_encoder updateFence:m_state.bbox_download_fence + afterStages:MTLRenderStageFragment]; + [m_current_render_encoder endEncoding]; + m_current_render_encoder = nullptr; + } + if (m_current_compute_encoder) + { + [m_current_compute_encoder endEncoding]; + m_current_compute_encoder = nullptr; + } +} + +void Metal::StateTracker::FlushEncoders() +{ + if (!m_current_render_cmdbuf) + return; + EndRenderPass(); + for (int i = 0; i <= static_cast(UploadBuffer::Last); ++i) + Sync(m_upload_buffers[i]); + if (g_features.unified_memory) + { + ASSERT(!m_upload_cmdbuf && "Should never be used!"); + } + else if (m_upload_cmdbuf) + { + [m_upload_encoder updateFence:m_fence]; + [m_upload_encoder endEncoding]; + [m_upload_cmdbuf commit]; + m_upload_encoder = nullptr; + m_upload_cmdbuf = nullptr; + } + if (m_texture_upload_cmdbuf) + { + [m_texture_upload_encoder endEncoding]; + [m_texture_upload_cmdbuf commit]; + m_texture_upload_encoder = nullptr; + m_texture_upload_cmdbuf = nullptr; + } + [m_current_render_cmdbuf + addCompletedHandler:[backref = m_backref, draw = m_current_draw](id buf) { + std::lock_guard guard(backref->mtx); + if (StateTracker* tracker = backref->state_tracker) + { + // We can do the update non-atomically because we only ever update under the lock + u64 newval = std::max(draw, tracker->m_last_finished_draw.load(std::memory_order_relaxed)); + tracker->m_last_finished_draw.store(newval, std::memory_order_release); + } + }]; + [m_current_render_cmdbuf commit]; + m_last_render_cmdbuf = std::move(m_current_render_cmdbuf); + m_current_render_cmdbuf = nullptr; + m_current_draw++; +} + +void Metal::StateTracker::WaitForFlushedEncoders() +{ + [m_last_render_cmdbuf waitUntilCompleted]; +} + +void Metal::StateTracker::ReloadSamplers() +{ + for (size_t i = 0; i < std::size(m_state.samplers); ++i) + m_state.samplers[i] = g_object_cache->GetSampler(m_state.sampler_states[i]); +} + +// MARK: State Setters + +void Metal::StateTracker::SetPipeline(const Pipeline* pipe) +{ + if (pipe != m_state.render_pipeline) + { + m_state.render_pipeline = pipe; + m_flags.has_pipeline = false; + } +} + +void Metal::StateTracker::SetPipeline(const ComputePipeline* pipe) +{ + if (pipe != m_state.compute_pipeline) + { + m_state.compute_pipeline = pipe; + m_flags.has_pipeline = false; + } +} + +void Metal::StateTracker::SetScissor(const MathUtil::Rectangle& rect) +{ + m_state.scissor_rect = rect; + CheckScissor(); +} + +void Metal::StateTracker::CheckScissor() +{ + auto clipped = m_state.scissor_rect; + clipped.ClampUL(0, 0, m_current.width, m_current.height); + m_flags.has_scissor = clipped == m_current.scissor_rect; +} + +void Metal::StateTracker::SetViewport(float x, float y, float width, float height, float near_depth, + float far_depth) +{ + m_state.viewport = {x, y, width, height, near_depth, far_depth}; + CheckViewport(); +} + +void Metal::StateTracker::CheckViewport() +{ + m_flags.has_viewport = + 0 == memcmp(&m_state.viewport, &m_current.viewport, sizeof(m_current.viewport)); +} + +void Metal::StateTracker::SetTexture(u32 idx, id texture) +{ + ASSERT(idx < std::size(m_state.textures)); + if (!texture) + texture = m_dummy_texture; + if (m_state.textures[idx] != texture) + { + m_state.textures[idx] = texture; + m_dirty_textures |= 1 << idx; + } +} + +void Metal::StateTracker::SetSamplerForce(u32 idx, const SamplerState& sampler) +{ + m_state.samplers[idx] = g_object_cache->GetSampler(sampler); + m_state.sampler_min_lod[idx] = sampler.tm1.min_lod; + m_state.sampler_max_lod[idx] = sampler.tm1.max_lod; + m_state.sampler_states[idx] = sampler; + m_dirty_samplers |= 1 << idx; +} + +void Metal::StateTracker::SetSampler(u32 idx, const SamplerState& sampler) +{ + ASSERT(idx < std::size(m_state.samplers)); + if (m_state.sampler_states[idx] != sampler) + SetSamplerForce(idx, sampler); +} + +void Metal::StateTracker::SetComputeTexture(const Texture* texture) +{ + if (m_state.compute_texture != texture) + { + m_state.compute_texture = texture; + m_flags.has_compute_texture = false; + } +} + +void Metal::StateTracker::UnbindTexture(id texture) +{ + for (size_t i = 0; i < std::size(m_state.textures); ++i) + { + if (m_state.textures[i] == texture) + { + m_state.textures[i] = m_dummy_texture; + m_dirty_textures |= 1 << i; + } + } +} + +void Metal::StateTracker::InvalidateUniforms(bool vertex, bool fragment) +{ + m_flags.has_gx_vs_uniform &= !vertex; + m_flags.has_gx_ps_uniform &= !fragment; +} + +void Metal::StateTracker::SetUtilityUniform(const void* buffer, size_t size) +{ + if (m_state.utility_uniform_capacity < size) + { + m_state.utility_uniform = std::unique_ptr(new u8[size]); + m_state.utility_uniform_capacity = size; + } + m_state.utility_uniform_size = size; + memcpy(m_state.utility_uniform.get(), buffer, size); + m_flags.has_utility_vs_uniform = false; + m_flags.has_utility_ps_uniform = false; +} + +void Metal::StateTracker::SetTexelBuffer(id buffer, u32 offset0, u32 offset1) +{ + m_state.texels = buffer; + m_state.texel_buffer_offset0 = offset0; + m_state.texel_buffer_offset1 = offset1; + m_flags.has_texel_buffer = false; +} + +void Metal::StateTracker::SetVerticesAndIndices(Map vertices, Map indices) +{ + m_state.vertices = vertices.gpu_buffer; + m_state.indices = indices.gpu_buffer; + m_state.vertices_offset = vertices.gpu_offset; + m_state.indices_offset = indices.gpu_offset; + m_flags.has_vertices = false; +} + +void Metal::StateTracker::SetBBoxBuffer(id bbox, id upload, + id download) +{ + m_state.bbox = bbox; + m_state.bbox_upload_fence = upload; + m_state.bbox_download_fence = download; +} + +void Metal::StateTracker::SetVertexBufferNow(u32 idx, id buffer, u32 offset) +{ + if (idx < std::size(m_current.vertex_buffers) && m_current.vertex_buffers[idx] == buffer) + { + [m_current_render_encoder setVertexBufferOffset:offset atIndex:idx]; + } + else + { + [m_current_render_encoder setVertexBuffer:buffer offset:offset atIndex:idx]; + m_current.vertex_buffers[idx] = buffer; + } +} + +void Metal::StateTracker::SetFragmentBufferNow(u32 idx, id buffer, u32 offset) +{ + if (idx < std::size(m_current.fragment_buffers) && m_current.fragment_buffers[idx] == buffer) + { + [m_current_render_encoder setFragmentBufferOffset:offset atIndex:idx]; + } + else + { + [m_current_render_encoder setFragmentBuffer:buffer offset:offset atIndex:idx]; + m_current.fragment_buffers[idx] = buffer; + } +} + +// MARK: Render + +// clang-format off +static constexpr NSString* LABEL_GX = @"GX Draw"; +static constexpr NSString* LABEL_UTIL = @"Utility Draw"; +// clang-format on + +static NSRange RangeOfBits(u32 value) +{ + ASSERT(value && "Value must be nonzero"); + u32 low = Common::CountTrailingZeros(value); + u32 high = 31 - Common::CountLeadingZeros(value); + return NSMakeRange(low, high + 1 - low); +} + +void Metal::StateTracker::PrepareRender() +{ + if (!m_current_render_encoder) + BeginRenderPass(MTLLoadActionLoad); + id enc = m_current_render_encoder; + const Pipeline* pipe = m_state.render_pipeline; + bool is_gx = pipe->Usage() == AbstractPipelineUsage::GX; + NSString* label = is_gx ? LABEL_GX : LABEL_UTIL; + if (m_flags.should_apply_label && m_current.label != label) + { + m_current.label = label; + [m_current_render_encoder setLabel:label]; + } + if (!m_flags.has_pipeline) + { + m_flags.has_pipeline = true; + if (pipe->Get() != m_current.pipeline) + { + m_current.pipeline = pipe->Get(); + [enc setRenderPipelineState:pipe->Get()]; + } + if (pipe->Cull() != m_current.cull_mode) + { + m_current.cull_mode = pipe->Cull(); + [enc setCullMode:pipe->Cull()]; + } + if (pipe->DepthStencil() != m_current.depth_stencil) + { + m_current.depth_stencil = pipe->DepthStencil(); + [enc setDepthStencilState:g_object_cache->GetDepthStencil(m_current.depth_stencil)]; + } + MTLDepthClipMode clip = is_gx && g_ActiveConfig.backend_info.bSupportsDepthClamp ? + MTLDepthClipModeClamp : + MTLDepthClipModeClip; + if (clip != m_current.depth_clip_mode) + { + m_current.depth_clip_mode = clip; + [enc setDepthClipMode:clip]; + } + if (is_gx && m_state.bbox_upload_fence && !m_flags.bbox_fence && pipe->UsesFragmentBuffer(2)) + { + m_flags.bbox_fence = true; + [enc waitForFence:m_state.bbox_upload_fence beforeStages:MTLRenderStageFragment]; + [enc setFragmentBuffer:m_state.bbox offset:0 atIndex:2]; + } + } + if (!m_flags.has_viewport) + { + m_flags.has_viewport = true; + m_current.viewport = m_state.viewport; + MTLViewport metal; + metal.originX = m_state.viewport.x; + metal.originY = m_state.viewport.y; + metal.width = m_state.viewport.width; + metal.height = m_state.viewport.height; + metal.znear = m_state.viewport.near_depth; + metal.zfar = m_state.viewport.far_depth; + [enc setViewport:metal]; + } + if (!m_flags.has_scissor) + { + m_flags.has_scissor = true; + m_current.scissor_rect = m_state.scissor_rect; + m_current.scissor_rect.ClampUL(0, 0, m_current.width, m_current.height); + MTLScissorRect metal; + metal.x = m_current.scissor_rect.left; + metal.y = m_current.scissor_rect.top; + metal.width = m_current.scissor_rect.right - m_current.scissor_rect.left; + metal.height = m_current.scissor_rect.bottom - m_current.scissor_rect.top; + [enc setScissorRect:metal]; + } + if (!m_flags.has_vertices && pipe->UsesVertexBuffer(0)) + { + m_flags.has_vertices = true; + if (m_state.vertices) + SetVertexBufferNow(0, m_state.vertices, m_state.vertices_offset); + } + if (u8 dirty = m_dirty_textures & pipe->GetTextures()) + { + m_dirty_textures &= ~pipe->GetTextures(); + NSRange range = RangeOfBits(dirty); + [enc setFragmentTextures:&m_state.textures[range.location] withRange:range]; + } + if (u8 dirty = m_dirty_samplers & pipe->GetSamplers()) + { + m_dirty_samplers &= ~pipe->GetSamplers(); + NSRange range = RangeOfBits(dirty); + [enc setFragmentSamplerStates:&m_state.samplers[range.location] + lodMinClamps:m_state.sampler_min_lod.data() + lodMaxClamps:m_state.sampler_max_lod.data() + withRange:range]; + } + if (is_gx) + { + // GX draw + if (!m_flags.has_gx_vs_uniform) + { + m_flags.has_gx_vs_uniform = true; + Map map = Allocate(UploadBuffer::Uniform, sizeof(VertexShaderConstants), AlignMask::Uniform); + memcpy(map.cpu_buffer, &VertexShaderManager::constants, sizeof(VertexShaderConstants)); + SetVertexBufferNow(1, map.gpu_buffer, map.gpu_offset); + if (pipe->UsesFragmentBuffer(1)) + SetFragmentBufferNow(1, map.gpu_buffer, map.gpu_offset); + ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, + Align(sizeof(VertexShaderConstants), AlignMask::Uniform)); + } + if (!m_flags.has_gx_ps_uniform) + { + m_flags.has_gx_ps_uniform = true; + Map map = Allocate(UploadBuffer::Uniform, sizeof(PixelShaderConstants), AlignMask::Uniform); + memcpy(map.cpu_buffer, &PixelShaderManager::constants, sizeof(PixelShaderConstants)); + SetFragmentBufferNow(0, map.gpu_buffer, map.gpu_offset); + ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, + Align(sizeof(PixelShaderConstants), AlignMask::Uniform)); + } + } + else + { + // Utility draw + if (!m_flags.has_utility_vs_uniform && pipe->UsesVertexBuffer(1)) + { + m_flags.has_utility_vs_uniform = true; + m_flags.has_gx_vs_uniform = false; + [enc setVertexBytes:m_state.utility_uniform.get() + length:m_state.utility_uniform_size + atIndex:1]; + } + if (!m_flags.has_utility_ps_uniform && pipe->UsesFragmentBuffer(0)) + { + m_flags.has_utility_ps_uniform = true; + m_flags.has_gx_ps_uniform = false; + [enc setFragmentBytes:m_state.utility_uniform.get() + length:m_state.utility_uniform_size + atIndex:0]; + } + if (!m_flags.has_texel_buffer && pipe->UsesFragmentBuffer(2)) + { + m_flags.has_texel_buffer = true; + SetFragmentBufferNow(2, m_state.texels, m_state.texel_buffer_offset0); + } + } +} + +void Metal::StateTracker::PrepareCompute() +{ + if (!m_current_compute_encoder) + BeginComputePass(); + id enc = m_current_compute_encoder; + const ComputePipeline* pipe = m_state.compute_pipeline; + if (!m_flags.has_pipeline) + { + m_flags.has_pipeline = true; + [enc setComputePipelineState:pipe->GetComputePipeline()]; + } + if (!m_flags.has_compute_texture && pipe->UsesTexture(0)) + { + m_flags.has_compute_texture = true; + [enc setTexture:m_state.compute_texture->GetMTLTexture() atIndex:0]; + } + // Compute and render can't happen at the same time, so just reuse one of the flags + if (!m_flags.has_utility_vs_uniform && pipe->UsesBuffer(0)) + { + m_flags.has_utility_vs_uniform = true; + [enc setBytes:m_state.utility_uniform.get() length:m_state.utility_uniform_size atIndex:0]; + } + if (!m_flags.has_texel_buffer && pipe->UsesBuffer(2)) + { + m_flags.has_texel_buffer = true; + [enc setBuffer:m_state.texels offset:m_state.texel_buffer_offset0 atIndex:2]; + if (pipe->UsesBuffer(3)) + [enc setBuffer:m_state.texels offset:m_state.texel_buffer_offset1 atIndex:3]; + } +} + +void Metal::StateTracker::Draw(u32 base_vertex, u32 num_vertices) +{ + PrepareRender(); + [m_current_render_encoder drawPrimitives:m_state.render_pipeline->Prim() + vertexStart:base_vertex + vertexCount:num_vertices]; +} + +void Metal::StateTracker::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) +{ + PrepareRender(); + if (!base_vertex) + { + [m_current_render_encoder + drawIndexedPrimitives:m_state.render_pipeline->Prim() + indexCount:num_indices + indexType:MTLIndexTypeUInt16 + indexBuffer:m_state.indices + indexBufferOffset:m_state.indices_offset + base_index * sizeof(u16)]; + } + else + { + [m_current_render_encoder + drawIndexedPrimitives:m_state.render_pipeline->Prim() + indexCount:num_indices + indexType:MTLIndexTypeUInt16 + indexBuffer:m_state.indices + indexBufferOffset:m_state.indices_offset + base_index * sizeof(u16) + instanceCount:1 + baseVertex:base_vertex + baseInstance:0]; + } +} + +void Metal::StateTracker::DispatchComputeShader(u32 groupsize_x, u32 groupsize_y, u32 groupsize_z, + u32 groups_x, u32 groups_y, u32 groups_z) +{ + PrepareCompute(); + [m_current_compute_encoder + dispatchThreadgroups:MTLSizeMake(groups_x, groups_y, groups_z) + threadsPerThreadgroup:MTLSizeMake(groupsize_x, groupsize_y, groupsize_z)]; +} + +void Metal::StateTracker::ResolveTexture(id src, id dst, u32 layer, + u32 level) +{ + EndRenderPass(); + auto color0 = [[m_resolve_pass_desc colorAttachments] objectAtIndexedSubscript:0]; + [color0 setTexture:src]; + [color0 setResolveTexture:dst]; + [color0 setResolveSlice:layer]; + [color0 setResolveLevel:level]; + id enc = + [GetRenderCmdBuf() renderCommandEncoderWithDescriptor:m_resolve_pass_desc]; + [enc setLabel:@"Multisample Resolve"]; + [enc endEncoding]; +} diff --git a/Source/Core/VideoBackends/Metal/MTLTexture.h b/Source/Core/VideoBackends/Metal/MTLTexture.h new file mode 100644 index 0000000000..7c01f1de4b --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLTexture.h @@ -0,0 +1,77 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "VideoCommon/AbstractFramebuffer.h" +#include "VideoCommon/AbstractStagingTexture.h" +#include "VideoCommon/AbstractTexture.h" + +#include "VideoBackends/Metal/MRCHelpers.h" + +namespace Metal +{ +class Texture final : public AbstractTexture +{ +public: + explicit Texture(MRCOwned> tex, const TextureConfig& config); + ~Texture(); + + void CopyRectangleFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& src_rect, u32 src_layer, + u32 src_level, const MathUtil::Rectangle& dst_rect, + u32 dst_layer, u32 dst_level) override; + void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, + u32 layer, u32 level) override; + void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, + size_t buffer_size) override; + + id GetMTLTexture() const { return m_tex; } + void SetMTLTexture(MRCOwned> tex) { m_tex = std::move(tex); } + +private: + MRCOwned> m_tex; +}; + +class StagingTexture final : public AbstractStagingTexture +{ +public: + StagingTexture(MRCOwned> buffer, StagingTextureType type, + const TextureConfig& config); + ~StagingTexture(); + + void CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect) override; + void CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, u32 dst_layer, + u32 dst_level) override; + + bool Map() override; + void Unmap() override; + void Flush() override; + +private: + MRCOwned> m_buffer; + MRCOwned> m_wait_buffer; +}; + +class Framebuffer final : public AbstractFramebuffer +{ +public: + Framebuffer(AbstractTexture* color, AbstractTexture* depth, u32 width, u32 height, u32 layers, + u32 samples); + ~Framebuffer(); + + id GetColor() const + { + return static_cast(GetColorAttachment())->GetMTLTexture(); + } + id GetDepth() const + { + return static_cast(GetDepthAttachment())->GetMTLTexture(); + } +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLTexture.mm b/Source/Core/VideoBackends/Metal/MTLTexture.mm new file mode 100644 index 0000000000..67c114caa7 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLTexture.mm @@ -0,0 +1,179 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLTexture.h" + +#include "Common/Align.h" +#include "Common/Assert.h" + +#include "VideoBackends/Metal/MTLStateTracker.h" + +Metal::Texture::Texture(MRCOwned> tex, const TextureConfig& config) + : AbstractTexture(config), m_tex(std::move(tex)) +{ +} + +Metal::Texture::~Texture() +{ + if (g_state_tracker) + g_state_tracker->UnbindTexture(m_tex); +} + +void Metal::Texture::CopyRectangleFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& src_rect, + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect, + u32 dst_layer, u32 dst_level) +{ + g_state_tracker->EndRenderPass(); + id msrc = static_cast(src)->GetMTLTexture(); + id blit = [g_state_tracker->GetRenderCmdBuf() blitCommandEncoder]; + MTLSize size = MTLSizeMake(src_rect.right - src_rect.left, src_rect.bottom - src_rect.top, 1); + [blit setLabel:@"Texture Copy"]; + [blit copyFromTexture:msrc + sourceSlice:src_layer + sourceLevel:src_level + sourceOrigin:MTLOriginMake(src_rect.left, src_rect.top, 0) + sourceSize:size + toTexture:m_tex + destinationSlice:dst_layer + destinationLevel:dst_level + destinationOrigin:MTLOriginMake(dst_rect.left, dst_rect.top, 0)]; + [blit endEncoding]; +} + +void Metal::Texture::ResolveFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& rect, u32 layer, u32 level) +{ + ASSERT(rect == MathUtil::Rectangle(0, 0, src->GetWidth(), src->GetHeight())); + id src_tex = static_cast(src)->GetMTLTexture(); + g_state_tracker->ResolveTexture(src_tex, m_tex, layer, level); +} + +void Metal::Texture::Load(u32 level, u32 width, u32 height, u32 row_length, // + const u8* buffer, size_t buffer_size) +{ + @autoreleasepool + { + const u32 block_size = GetBlockSizeForFormat(GetFormat()); + const u32 num_rows = Common::AlignUp(height, block_size) / block_size; + const u32 source_pitch = CalculateStrideForFormat(m_config.format, row_length); + const u32 upload_size = source_pitch * num_rows; + StateTracker::Map map = g_state_tracker->AllocateForTextureUpload(upload_size); + memcpy(map.cpu_buffer, buffer, upload_size); + id encoder = g_state_tracker->GetTextureUploadEncoder(); + [encoder copyFromBuffer:map.gpu_buffer + sourceOffset:map.gpu_offset + sourceBytesPerRow:source_pitch + sourceBytesPerImage:upload_size + sourceSize:MTLSizeMake(width, height, 1) + toTexture:m_tex + destinationSlice:0 + destinationLevel:level + destinationOrigin:MTLOriginMake(0, 0, 0)]; + } +} + +Metal::StagingTexture::StagingTexture(MRCOwned> buffer, StagingTextureType type, + const TextureConfig& config) + : AbstractStagingTexture(type, config), m_buffer(std::move(buffer)) +{ + m_map_pointer = static_cast([m_buffer contents]); + m_map_stride = config.GetStride(); +} + +Metal::StagingTexture::~StagingTexture() = default; + +void Metal::StagingTexture::CopyFromTexture(const AbstractTexture* src, + const MathUtil::Rectangle& src_rect, // + u32 src_layer, u32 src_level, + const MathUtil::Rectangle& dst_rect) +{ + @autoreleasepool + { + const size_t stride = m_config.GetStride(); + const u32 offset = dst_rect.top * stride + dst_rect.left * m_texel_size; + const MTLSize size = + MTLSizeMake(src_rect.right - src_rect.left, src_rect.bottom - src_rect.top, 1); + g_state_tracker->EndRenderPass(); + m_wait_buffer = MRCRetain(g_state_tracker->GetRenderCmdBuf()); + id download_encoder = [m_wait_buffer blitCommandEncoder]; + [download_encoder setLabel:@"Texture Download"]; + [download_encoder copyFromTexture:static_cast(src)->GetMTLTexture() + sourceSlice:src_layer + sourceLevel:src_level + sourceOrigin:MTLOriginMake(src_rect.left, src_rect.top, 0) + sourceSize:size + toBuffer:m_buffer + destinationOffset:offset + destinationBytesPerRow:stride + destinationBytesPerImage:stride * size.height]; + [download_encoder endEncoding]; + m_needs_flush = true; + } +} + +void Metal::StagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, // + AbstractTexture* dst, + const MathUtil::Rectangle& dst_rect, // + u32 dst_layer, u32 dst_level) +{ + @autoreleasepool + { + const size_t stride = m_config.GetStride(); + const u32 offset = dst_rect.top * stride + dst_rect.left * m_texel_size; + const MTLSize size = + MTLSizeMake(src_rect.right - src_rect.left, src_rect.bottom - src_rect.top, 1); + g_state_tracker->EndRenderPass(); + m_wait_buffer = MRCRetain(g_state_tracker->GetRenderCmdBuf()); + id upload_encoder = [m_wait_buffer blitCommandEncoder]; + [upload_encoder setLabel:@"Texture Upload"]; + [upload_encoder copyFromBuffer:m_buffer + sourceOffset:offset + sourceBytesPerRow:stride + sourceBytesPerImage:stride * size.height + sourceSize:size + toTexture:static_cast(dst)->GetMTLTexture() + destinationSlice:dst_layer + destinationLevel:dst_level + destinationOrigin:MTLOriginMake(dst_rect.left, dst_rect.top, 0)]; + [upload_encoder endEncoding]; + m_needs_flush = true; + } +} + +bool Metal::StagingTexture::Map() +{ + // Always mapped + return true; +} + +void Metal::StagingTexture::Unmap() +{ + // Always mapped +} + +void Metal::StagingTexture::Flush() +{ + m_needs_flush = false; + if (!m_wait_buffer) + return; + if ([m_wait_buffer status] != MTLCommandBufferStatusCompleted) + { + // Flush while we wait, since who knows how long we'll be sitting here + g_state_tracker->FlushEncoders(); + [m_wait_buffer waitUntilCompleted]; + } + m_wait_buffer = nullptr; +} + +Metal::Framebuffer::Framebuffer(AbstractTexture* color, AbstractTexture* depth, // + u32 width, u32 height, u32 layers, u32 samples) + : AbstractFramebuffer(color, depth, + color ? color->GetFormat() : AbstractTextureFormat::Undefined, // + depth ? depth->GetFormat() : AbstractTextureFormat::Undefined, // + width, height, layers, samples) +{ +} + +Metal::Framebuffer::~Framebuffer() = default; diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.h b/Source/Core/VideoBackends/Metal/MTLUtil.h new file mode 100644 index 0000000000..385f508648 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLUtil.h @@ -0,0 +1,54 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "VideoCommon/AbstractShader.h" +#include "VideoCommon/TextureConfig.h" +#include "VideoCommon/VideoConfig.h" + +#include "VideoBackends/Metal/MRCHelpers.h" + +namespace Metal +{ +struct DeviceFeatures +{ + bool unified_memory; + bool subgroup_ops; +}; + +extern DeviceFeatures g_features; + +namespace Util +{ +struct Viewport +{ + float x; + float y; + float width; + float height; + float near_depth; + float far_depth; +}; + +/// Gets the list of Metal devices, ordered so the system default device is first +std::vector>> GetAdapterList(); +void PopulateBackendInfo(VideoConfig* config); +void PopulateBackendInfoAdapters(VideoConfig* config, + const std::vector>>& adapters); +void PopulateBackendInfoFeatures(VideoConfig* config, id device); + +AbstractTextureFormat ToAbstract(MTLPixelFormat format); +MTLPixelFormat FromAbstract(AbstractTextureFormat format); +static inline bool HasStencil(AbstractTextureFormat format) +{ + return format == AbstractTextureFormat::D24_S8 || format == AbstractTextureFormat::D32F_S8; +} + +std::optional TranslateShaderToMSL(ShaderStage stage, std::string_view source); + +} // namespace Util +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.mm b/Source/Core/VideoBackends/Metal/MTLUtil.mm new file mode 100644 index 0000000000..b851a3d256 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLUtil.mm @@ -0,0 +1,358 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLUtil.h" + +#include +#include + +#include + +#include "Common/MsgHandler.h" + +#include "VideoCommon/Spirv.h" + +Metal::DeviceFeatures Metal::g_features; + +std::vector>> Metal::Util::GetAdapterList() +{ + std::vector>> list; + id default_dev = MTLCreateSystemDefaultDevice(); + if (default_dev) + list.push_back(MRCTransfer(default_dev)); + + auto devices = MRCTransfer(MTLCopyAllDevices()); + for (id device in devices.Get()) + { + if (device != default_dev) + list.push_back(MRCRetain(device)); + } + return list; +} + +void Metal::Util::PopulateBackendInfo(VideoConfig* config) +{ + config->backend_info.api_type = APIType::Metal; + config->backend_info.bUsesLowerLeftOrigin = false; + config->backend_info.bSupportsExclusiveFullscreen = false; + config->backend_info.bSupportsDualSourceBlend = true; + config->backend_info.bSupportsPrimitiveRestart = true; + config->backend_info.bSupportsGeometryShaders = false; + config->backend_info.bSupportsComputeShaders = true; + config->backend_info.bSupports3DVision = false; + config->backend_info.bSupportsEarlyZ = true; + config->backend_info.bSupportsBindingLayout = true; + config->backend_info.bSupportsBBox = true; + config->backend_info.bSupportsGSInstancing = false; + config->backend_info.bSupportsPostProcessing = true; + config->backend_info.bSupportsPaletteConversion = true; + config->backend_info.bSupportsClipControl = true; + config->backend_info.bSupportsSSAA = true; + config->backend_info.bSupportsFragmentStoresAndAtomics = true; + config->backend_info.bSupportsReversedDepthRange = false; + config->backend_info.bSupportsLogicOp = false; + config->backend_info.bSupportsMultithreading = false; + config->backend_info.bSupportsGPUTextureDecoding = true; + config->backend_info.bSupportsCopyToVram = true; + config->backend_info.bSupportsBitfield = true; + config->backend_info.bSupportsDynamicSamplerIndexing = true; + config->backend_info.bSupportsFramebufferFetch = false; + config->backend_info.bSupportsBackgroundCompiling = true; + config->backend_info.bSupportsLargePoints = true; + config->backend_info.bSupportsPartialDepthCopies = true; + config->backend_info.bSupportsDepthReadback = true; + config->backend_info.bSupportsShaderBinaries = false; + config->backend_info.bSupportsPipelineCacheData = false; + config->backend_info.bSupportsCoarseDerivatives = false; + config->backend_info.bSupportsTextureQueryLevels = true; + config->backend_info.bSupportsLodBiasInSampler = false; + config->backend_info.bSupportsSettingObjectNames = true; +} + +void Metal::Util::PopulateBackendInfoAdapters(VideoConfig* config, + const std::vector>>& adapters) +{ + config->backend_info.Adapters.clear(); + for (id adapter : adapters) + { + config->backend_info.Adapters.push_back([[adapter name] UTF8String]); + } +} + +void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id device) +{ +#if TARGET_OS_OSX + config->backend_info.bSupportsDepthClamp = true; + config->backend_info.bSupportsST3CTextures = true; + config->backend_info.bSupportsBPTCTextures = true; +#else + bool supports_mac1 = false; + bool supports_apple4 = false; + if (@available(iOS 13, *)) + { + supports_mac1 = [device supportsFamily:MTLGPUFamilyMac1]; + supports_apple4 = [device supportsFamily:MTLGPUFamilyApple4]; + } + else + { + supports_apple4 = [device supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]; + } + config->backend_info.bSupportsDepthClamp = supports_mac1 || supports_apple4; + config->backend_info.bSupportsST3CTextures = supports_mac1; + config->backend_info.bSupportsBPTCTextures = supports_mac1; + config->backend_info.bSupportsFramebufferFetch = true; +#endif + if (char* env = getenv("MTL_UNIFIED_MEMORY")) + g_features.unified_memory = env[0] == '1' || env[0] == 'y' || env[0] == 'Y'; + else if (@available(macOS 10.15, iOS 13.0, *)) + g_features.unified_memory = [device hasUnifiedMemory]; + else + g_features.unified_memory = false; + + g_features.subgroup_ops = false; + if (@available(macOS 10.15, iOS 13, *)) + { + // Requires SIMD-scoped reduction operations + g_features.subgroup_ops = + [device supportsFamily:MTLGPUFamilyMac2] || [device supportsFamily:MTLGPUFamilyApple6]; + config->backend_info.bSupportsFramebufferFetch = [device supportsFamily:MTLGPUFamilyApple1]; + } + if ([[device name] containsString:@"AMD"]) + { + // Broken + g_features.subgroup_ops = false; + } +} + +// clang-format off + +AbstractTextureFormat Metal::Util::ToAbstract(MTLPixelFormat format) +{ + switch (format) + { + case MTLPixelFormatRGBA8Unorm: return AbstractTextureFormat::RGBA8; + case MTLPixelFormatBGRA8Unorm: return AbstractTextureFormat::BGRA8; + case MTLPixelFormatBC1_RGBA: return AbstractTextureFormat::DXT1; + case MTLPixelFormatBC2_RGBA: return AbstractTextureFormat::DXT3; + case MTLPixelFormatBC3_RGBA: return AbstractTextureFormat::DXT5; + case MTLPixelFormatBC7_RGBAUnorm: return AbstractTextureFormat::BPTC; + case MTLPixelFormatR16Unorm: return AbstractTextureFormat::R16; + case MTLPixelFormatDepth16Unorm: return AbstractTextureFormat::D16; + case MTLPixelFormatDepth24Unorm_Stencil8: return AbstractTextureFormat::D24_S8; + case MTLPixelFormatR32Float: return AbstractTextureFormat::R32F; + case MTLPixelFormatDepth32Float: return AbstractTextureFormat::D32F; + case MTLPixelFormatDepth32Float_Stencil8: return AbstractTextureFormat::D32F_S8; + default: return AbstractTextureFormat::Undefined; + } +} + +MTLPixelFormat Metal::Util::FromAbstract(AbstractTextureFormat format) +{ + switch (format) + { + case AbstractTextureFormat::RGBA8: return MTLPixelFormatRGBA8Unorm; + case AbstractTextureFormat::BGRA8: return MTLPixelFormatBGRA8Unorm; + case AbstractTextureFormat::DXT1: return MTLPixelFormatBC1_RGBA; + case AbstractTextureFormat::DXT3: return MTLPixelFormatBC2_RGBA; + case AbstractTextureFormat::DXT5: return MTLPixelFormatBC3_RGBA; + case AbstractTextureFormat::BPTC: return MTLPixelFormatBC7_RGBAUnorm; + case AbstractTextureFormat::R16: return MTLPixelFormatR16Unorm; + case AbstractTextureFormat::D16: return MTLPixelFormatDepth16Unorm; + case AbstractTextureFormat::D24_S8: return MTLPixelFormatDepth24Unorm_Stencil8; + case AbstractTextureFormat::R32F: return MTLPixelFormatR32Float; + case AbstractTextureFormat::D32F: return MTLPixelFormatDepth32Float; + case AbstractTextureFormat::D32F_S8: return MTLPixelFormatDepth32Float_Stencil8; + case AbstractTextureFormat::Undefined: return MTLPixelFormatInvalid; + } +} + +// clang-format on + +// MARK: Shader Translation + +static const std::string_view SHADER_HEADER = R"( +// Target GLSL 4.5. +#version 450 core +// Always available on Metal +#extension GL_EXT_shader_8bit_storage : require +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require + +#define ATTRIBUTE_LOCATION(x) layout(location = x) +#define FRAGMENT_OUTPUT_LOCATION(x) layout(location = x) +#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) layout(location = x, index = y) +#define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1)) +#define SAMPLER_BINDING(x) layout(set = 1, binding = x) +#define TEXEL_BUFFER_BINDING(x) layout(set = 1, binding = (x + 8)) +#define SSBO_BINDING(x) layout(std430, set = 2, binding = x) +#define INPUT_ATTACHMENT_BINDING(x, y, z) layout(set = x, binding = y, input_attachment_index = z) +#define VARYING_LOCATION(x) layout(location = x) +#define FORCE_EARLY_Z layout(early_fragment_tests) in + +// Metal framebuffer fetch helpers. +#define FB_FETCH_VALUE subpassLoad(in_ocol0) + +// hlsl to glsl function translation +#define API_METAL 1 +#define float2 vec2 +#define float3 vec3 +#define float4 vec4 +#define uint2 uvec2 +#define uint3 uvec3 +#define uint4 uvec4 +#define int2 ivec2 +#define int3 ivec3 +#define int4 ivec4 +#define frac fract +#define lerp mix + +// These were changed in Vulkan +#define gl_VertexID gl_VertexIndex +#define gl_InstanceID gl_InstanceIndex +)"; +static const std::string_view COMPUTE_SHADER_HEADER = R"( +// Target GLSL 4.5. +#version 450 core +// Always available on Metal +#extension GL_EXT_shader_8bit_storage : require +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require + +// All resources are packed into one descriptor set for compute. +#define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1)) +#define SSBO_BINDING(x) layout(std430, set = 2, binding = x) +#define IMAGE_BINDING(format, x) layout(format, set = 1, binding = x) + +// hlsl to glsl function translation +#define API_METAL 1 +#define float2 vec2 +#define float3 vec3 +#define float4 vec4 +#define uint2 uvec2 +#define uint3 uvec3 +#define uint4 uvec4 +#define int2 ivec2 +#define int3 ivec3 +#define int4 ivec4 +#define frac fract +#define lerp mix +)"; +static const std::string_view SUBGROUP_HELPER_HEADER = R"( +#extension GL_KHR_shader_subgroup_basic : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable +#extension GL_KHR_shader_subgroup_ballot : enable + +#define SUPPORTS_SUBGROUP_REDUCTION 1 +#define CAN_USE_SUBGROUP_REDUCTION true +#define IS_HELPER_INVOCATION gl_HelperInvocation +#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect()) +#define SUBGROUP_MIN(value) value = subgroupMin(value) +#define SUBGROUP_MAX(value) value = subgroupMax(value) +)"; + +static const std::string_view MSL_HEADER = + // We know our shader generator leaves unused variables. + "#pragma clang diagnostic ignored \"-Wunused-variable\"\n" + // These are usually when the compiler doesn't think a switch is exhaustive + "#pragma clang diagnostic ignored \"-Wreturn-type\"\n"; + +static constexpr spirv_cross::MSLResourceBinding +MakeResourceBinding(spv::ExecutionModel stage, u32 set, u32 binding, // + u32 msl_buffer, u32 msl_texture, u32 msl_sampler) +{ + spirv_cross::MSLResourceBinding resource; + resource.stage = stage; + resource.desc_set = set; + resource.binding = binding; + resource.msl_buffer = msl_buffer; + resource.msl_texture = msl_texture; + resource.msl_sampler = msl_sampler; + return resource; +} + +std::optional Metal::Util::TranslateShaderToMSL(ShaderStage stage, + std::string_view source) +{ + std::string full_source; + + std::string_view header = stage == ShaderStage::Compute ? COMPUTE_SHADER_HEADER : SHADER_HEADER; + full_source.reserve(header.size() + SUBGROUP_HELPER_HEADER.size() + source.size()); + + full_source.append(header); + if (Metal::g_features.subgroup_ops) + full_source.append(SUBGROUP_HELPER_HEADER); + full_source.append(source); + + std::optional code; + switch (stage) + { + case ShaderStage::Vertex: + code = SPIRV::CompileVertexShader(full_source, APIType::Metal, glslang::EShTargetSpv_1_3); + break; + case ShaderStage::Geometry: + PanicAlertFmt("Tried to compile geometry shader for Metal, but Metal doesn't support them!"); + break; + case ShaderStage::Pixel: + code = SPIRV::CompileFragmentShader(full_source, APIType::Metal, glslang::EShTargetSpv_1_3); + break; + case ShaderStage::Compute: + code = SPIRV::CompileComputeShader(full_source, APIType::Metal, glslang::EShTargetSpv_1_3); + break; + } + if (!code.has_value()) + return std::nullopt; + + // clang-format off + static const spirv_cross::MSLResourceBinding resource_bindings[] = { + MakeResourceBinding(spv::ExecutionModelVertex, 0, 0, 1, 0, 0), // vs/ubo + MakeResourceBinding(spv::ExecutionModelVertex, 0, 1, 1, 0, 0), // vs/ubo + MakeResourceBinding(spv::ExecutionModelFragment, 0, 0, 0, 0, 0), // vs/ubo + MakeResourceBinding(spv::ExecutionModelFragment, 0, 1, 1, 0, 0), // vs/ubo + MakeResourceBinding(spv::ExecutionModelFragment, 1, 0, 0, 0, 0), // ps/samp0 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 1, 0, 1, 1), // ps/samp1 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 2, 0, 2, 2), // ps/samp2 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 3, 0, 3, 3), // ps/samp3 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 4, 0, 4, 4), // ps/samp4 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 5, 0, 5, 5), // ps/samp5 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 6, 0, 6, 6), // ps/samp6 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 7, 0, 7, 7), // ps/samp7 + MakeResourceBinding(spv::ExecutionModelFragment, 1, 8, 0, 8, 8), // ps/samp8 + MakeResourceBinding(spv::ExecutionModelFragment, 2, 0, 2, 0, 0), // ps/ssbo + MakeResourceBinding(spv::ExecutionModelGLCompute, 0, 1, 0, 0, 0), // cs/ubo + MakeResourceBinding(spv::ExecutionModelGLCompute, 1, 0, 0, 0, 0), // cs/output_image + MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 0, 2, 0, 0), // cs/ssbo + MakeResourceBinding(spv::ExecutionModelGLCompute, 2, 1, 3, 0, 0), // cs/ssbo + }; + + spirv_cross::CompilerMSL::Options options; +#if TARGET_OS_OSX + options.platform = spirv_cross::CompilerMSL::Options::macOS; +#elif TARGET_OS_IOS + options.platform = spirv_cross::CompilerMSL::Options::iOS; +#else + #error What platform is this? +#endif + // clang-format on + + spirv_cross::CompilerMSL compiler(std::move(*code)); + + if (@available(macOS 11, iOS 14, *)) + options.set_msl_version(2, 3); + else if (@available(macOS 10.15, iOS 13, *)) + options.set_msl_version(2, 2); + else if (@available(macOS 10.14, iOS 12, *)) + options.set_msl_version(2, 1); + else + options.set_msl_version(2, 0); + options.use_framebuffer_fetch_subpasses = true; + compiler.set_msl_options(options); + + for (auto& binding : resource_bindings) + compiler.add_msl_resource_binding(binding); + + std::string msl(MSL_HEADER); + msl += compiler.compile(); + return msl; +} diff --git a/Source/Core/VideoBackends/Metal/MTLVertexManager.h b/Source/Core/VideoBackends/Metal/MTLVertexManager.h new file mode 100644 index 0000000000..29c715f1fd --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLVertexManager.h @@ -0,0 +1,30 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "VideoBackends/Metal/MTLUtil.h" +#include "VideoCommon/VertexManagerBase.h" + +namespace Metal +{ +class VertexManager final : public VertexManagerBase +{ +public: + VertexManager(); + ~VertexManager() override; + + void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, u32* out_offset, + const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) override; + +protected: + void ResetBuffer(u32 vertex_stride) override; + void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, + u32* out_base_index) override; + void UploadUniforms() override; +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLVertexManager.mm b/Source/Core/VideoBackends/Metal/MTLVertexManager.mm new file mode 100644 index 0000000000..3683e79ba6 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLVertexManager.mm @@ -0,0 +1,89 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLVertexManager.h" + +#include "VideoBackends/Metal/MTLStateTracker.h" + +#include "VideoCommon/PixelShaderManager.h" +#include "VideoCommon/Statistics.h" +#include "VideoCommon/VertexShaderManager.h" + +Metal::VertexManager::VertexManager() +{ +} + +Metal::VertexManager::~VertexManager() = default; + +void Metal::VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) +{ + g_state_tracker->SetUtilityUniform(uniforms, uniforms_size); +} + +bool Metal::VertexManager::UploadTexelBuffer(const void* data, u32 data_size, + TexelBufferFormat format, u32* out_offset) +{ + *out_offset = 0; + StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::Texels, data_size, + StateTracker::AlignMask::Other); + memcpy(map.cpu_buffer, data, data_size); + g_state_tracker->SetTexelBuffer(map.gpu_buffer, map.gpu_offset, 0); + return true; +} + +bool Metal::VertexManager::UploadTexelBuffer(const void* data, u32 data_size, + TexelBufferFormat format, u32* out_offset, + const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, + u32* out_palette_offset) +{ + *out_offset = 0; + *out_palette_offset = 0; + + const u32 aligned_data_size = g_state_tracker->Align(data_size, StateTracker::AlignMask::Other); + const u32 total_size = aligned_data_size + palette_size; + StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::Texels, total_size, + StateTracker::AlignMask::Other); + memcpy(map.cpu_buffer, data, data_size); + memcpy(static_cast(map.cpu_buffer) + aligned_data_size, palette_data, palette_size); + g_state_tracker->SetTexelBuffer(map.gpu_buffer, map.gpu_offset, + map.gpu_offset + aligned_data_size); + return true; +} + +void Metal::VertexManager::ResetBuffer(u32 vertex_stride) +{ + const u32 max_vertex_size = 65535 * vertex_stride; + void* vertex = g_state_tracker->Preallocate(StateTracker::UploadBuffer::Vertex, max_vertex_size); + void* index = + g_state_tracker->Preallocate(StateTracker::UploadBuffer::Index, MAXIBUFFERSIZE * sizeof(u16)); + + m_cur_buffer_pointer = m_base_buffer_pointer = static_cast(vertex); + m_end_buffer_pointer = m_base_buffer_pointer + max_vertex_size; + m_index_generator.Start(static_cast(index)); +} + +void Metal::VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, + u32* out_base_vertex, u32* out_base_index) +{ + const u32 vsize = num_vertices * vertex_stride; + const u32 isize = num_indices * sizeof(u16); + StateTracker::Map vmap = g_state_tracker->CommitPreallocation( + StateTracker::UploadBuffer::Vertex, vsize, StateTracker::AlignMask::Other); + StateTracker::Map imap = g_state_tracker->CommitPreallocation( + StateTracker::UploadBuffer::Index, isize, StateTracker::AlignMask::Other); + + ADDSTAT(g_stats.this_frame.bytes_vertex_streamed, vsize); + ADDSTAT(g_stats.this_frame.bytes_index_streamed, isize); + + g_state_tracker->SetVerticesAndIndices(vmap, imap); + *out_base_vertex = 0; + *out_base_index = 0; +} + +void Metal::VertexManager::UploadUniforms() +{ + g_state_tracker->InvalidateUniforms(VertexShaderManager::dirty, PixelShaderManager::dirty); + VertexShaderManager::dirty = false; + PixelShaderManager::dirty = false; +} diff --git a/Source/Core/VideoBackends/Metal/VideoBackend.h b/Source/Core/VideoBackends/Metal/VideoBackend.h new file mode 100644 index 0000000000..3a567718e2 --- /dev/null +++ b/Source/Core/VideoBackends/Metal/VideoBackend.h @@ -0,0 +1,27 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "VideoCommon/VideoBackendBase.h" + +namespace Metal +{ +class VideoBackend : public VideoBackendBase +{ +public: + bool Initialize(const WindowSystemInfo& wsi) override; + void Shutdown() override; + + std::string GetName() const override; + std::string GetDisplayName() const override; + std::optional GetWarningMessage() const override; + + void InitBackendInfo() override; + + void PrepareWindow(WindowSystemInfo& wsi) override; + + static constexpr const char* NAME = "Metal"; +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/OGL/OGLRender.cpp b/Source/Core/VideoBackends/OGL/OGLRender.cpp index 93df1ccc29..78f9d8e176 100644 --- a/Source/Core/VideoBackends/OGL/OGLRender.cpp +++ b/Source/Core/VideoBackends/OGL/OGLRender.cpp @@ -903,8 +903,8 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) } } -void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) { glUseProgram(static_cast(shader)->GetGLComputeProgramID()); glDispatchCompute(groups_x, groups_y, groups_z); diff --git a/Source/Core/VideoBackends/OGL/OGLRender.h b/Source/Core/VideoBackends/OGL/OGLRender.h index 02266776ab..72da03ef29 100644 --- a/Source/Core/VideoBackends/OGL/OGLRender.h +++ b/Source/Core/VideoBackends/OGL/OGLRender.h @@ -125,8 +125,8 @@ public: float far_depth) override; void Draw(u32 base_vertex, u32 num_vertices) override; void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; - void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override; void BindBackbuffer(const ClearColor& clear_color = {}) override; void PresentBackbuffer() override; diff --git a/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp b/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp index db5037f427..36fa325ae7 100644 --- a/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp @@ -641,8 +641,8 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) base_vertex, 0); } -void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) { StateTracker::GetInstance()->SetComputeShader(static_cast(shader)); if (StateTracker::GetInstance()->BindCompute()) diff --git a/Source/Core/VideoBackends/Vulkan/VKRenderer.h b/Source/Core/VideoBackends/Vulkan/VKRenderer.h index 4e69bbe577..11fc7dd96e 100644 --- a/Source/Core/VideoBackends/Vulkan/VKRenderer.h +++ b/Source/Core/VideoBackends/Vulkan/VKRenderer.h @@ -77,8 +77,8 @@ public: float far_depth) override; void Draw(u32 base_vertex, u32 num_vertices) override; void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; - void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) override; void BindBackbuffer(const ClearColor& clear_color = {}) override; void PresentBackbuffer() override; void SetFullscreen(bool enable_fullscreen) override; diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.cpp b/Source/Core/VideoCommon/FramebufferShaderGen.cpp index 345578d071..6426b259b4 100644 --- a/Source/Core/VideoCommon/FramebufferShaderGen.cpp +++ b/Source/Core/VideoCommon/FramebufferShaderGen.cpp @@ -34,6 +34,7 @@ void EmitSamplerDeclarations(ShaderCode& code, u32 start = 0, u32 end = 1, switch (GetAPIType()) { case APIType::D3D: + case APIType::Metal: case APIType::OpenGL: case APIType::Vulkan: { @@ -55,6 +56,7 @@ void EmitSampleTexture(ShaderCode& code, u32 n, std::string_view coords) switch (GetAPIType()) { case APIType::D3D: + case APIType::Metal: case APIType::OpenGL: case APIType::Vulkan: code.Write("texture(samp{}, {})", n, coords); @@ -72,6 +74,7 @@ void EmitTextureLoad(ShaderCode& code, u32 n, std::string_view coords) switch (GetAPIType()) { case APIType::D3D: + case APIType::Metal: case APIType::OpenGL: case APIType::Vulkan: code.Write("texelFetch(samp{}, ({}).xyz, ({}).w)", n, coords, coords); @@ -89,6 +92,7 @@ void EmitVertexMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_col switch (GetAPIType()) { case APIType::D3D: + case APIType::Metal: case APIType::OpenGL: case APIType::Vulkan: { @@ -138,6 +142,7 @@ void EmitPixelMainDeclaration(ShaderCode& code, u32 num_tex_inputs, u32 num_colo switch (GetAPIType()) { case APIType::D3D: + case APIType::Metal: case APIType::OpenGL: case APIType::Vulkan: { diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index 403d028525..8824e6ff77 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -113,8 +113,8 @@ public: virtual void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) {} // Dispatching compute shaders with currently-bound state. - virtual void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, - u32 groups_z) + virtual void DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, + u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) { } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index 511643e83a..b9965421cc 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -60,6 +60,9 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool case APIType::D3D: filename += "D3D"; break; + case APIType::Metal: + filename += "Metal"; + break; case APIType::OpenGL: filename += "OpenGL"; break; diff --git a/Source/Core/VideoCommon/Spirv.cpp b/Source/Core/VideoCommon/Spirv.cpp index b10b4eb92b..0fc22e1731 100644 --- a/Source/Core/VideoCommon/Spirv.cpp +++ b/Source/Core/VideoCommon/Spirv.cpp @@ -55,7 +55,7 @@ CompileShaderToSPV(EShLanguage stage, APIType api_type, glslang::TShader::ForbidIncluder includer; EProfile profile = ECoreProfile; EShMessages messages = static_cast(EShMsgDefault | EShMsgSpvRules); - if (api_type == APIType::Vulkan) + if (api_type == APIType::Vulkan || api_type == APIType::Metal) messages = static_cast(messages | EShMsgVulkanRules); int default_version = 450; diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 2f896027ad..07bbe0aa2e 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -2924,7 +2924,8 @@ bool TextureCacheBase::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, con auto dispatch_groups = TextureConversionShaderTiled::GetDispatchCount(info, aligned_width, aligned_height); - g_renderer->DispatchComputeShader(shader, dispatch_groups.first, dispatch_groups.second, 1); + g_renderer->DispatchComputeShader(shader, info->group_size_x, info->group_size_y, 1, + dispatch_groups.first, dispatch_groups.second, 1); // Copy from decoding texture -> final texture // This is because we don't want to have to create compute view for every layer diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 1e7f2b64b5..459b12b9c8 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -519,10 +519,49 @@ UBO_BINDING(std140, 1) uniform UBO { uint u_palette_offset; }; -TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer; -#ifdef HAS_PALETTE -TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer; +#if defined(API_METAL) + +#if defined(TEXEL_BUFFER_FORMAT_R8) + SSBO_BINDING(0) readonly buffer Input { uint8_t s_input_buffer[]; }; + #define FETCH(offset) uint(s_input_buffer[offset]) +#elif defined(TEXEL_BUFFER_FORMAT_R16) + SSBO_BINDING(0) readonly buffer Input { uint16_t s_input_buffer[]; }; + #define FETCH(offset) uint(s_input_buffer[offset]) +#elif defined(TEXEL_BUFFER_FORMAT_RGBA8) + SSBO_BINDING(0) readonly buffer Input { u8vec4 s_input_buffer[]; }; + #define FETCH(offset) uvec4(s_input_buffer[offset]) +#elif defined(TEXEL_BUFFER_FORMAT_R32G32) + SSBO_BINDING(0) readonly buffer Input { uvec2 s_input_buffer[]; }; + #define FETCH(offset) s_input_buffer[offset] +#else + #error No texel buffer? #endif + +#ifdef HAS_PALETTE + SSBO_BINDING(1) readonly buffer Palette { uint16_t s_palette_buffer[]; }; + #define FETCH_PALETTE(offset) uint(s_palette_buffer[offset]) +#endif + +#else + +TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer; + +#if defined(TEXEL_BUFFER_FORMAT_R8) || defined(TEXEL_BUFFER_FORMAT_R16) + #define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset)).r +#elif defined(TEXEL_BUFFER_FORMAT_RGBA8) + #define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset)) +#elif defined(TEXEL_BUFFER_FORMAT_R32G32) + #define FETCH(offset) texelFetch(s_input_buffer, int((offset) + u_src_offset)).rg +#else + #error No texel buffer? +#endif + +#ifdef HAS_PALETTE + TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer; + #define FETCH_PALETTE(offset) texelFetch(s_palette_buffer, int((offset) + u_palette_offset)).r +#endif + +#endif // defined(API_METAL) IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image; #define GROUP_MEMORY_BARRIER_WITH_SYNC memoryBarrierShared(); barrier(); @@ -563,7 +602,7 @@ uint GetTiledTexelOffset(uint2 block_size, uint2 coords) { uint2 block = coords / block_size; uint2 offset = coords % block_size; - uint buffer_pos = u_src_offset; + uint buffer_pos = 0; buffer_pos += block.y * u_src_row_stride; buffer_pos += block.x * (block_size.x * block_size.y); buffer_pos += offset.y * block_size.x; @@ -575,7 +614,7 @@ uint GetTiledTexelOffset(uint2 block_size, uint2 coords) uint4 GetPaletteColor(uint index) { // Fetch and swap BE to LE. - uint val = Swap16(texelFetch(s_palette_buffer, int(u_palette_offset + index)).x); + uint val = Swap16(FETCH_PALETTE(index)); uint4 color; #if defined(PALETTE_FORMAT_IA8) @@ -633,14 +672,14 @@ static const std::map s_decoding_shader_info{ // the size of the buffer elements. uint2 block = coords.xy / 8u; uint2 offset = coords.xy % 8u; - uint buffer_pos = u_src_offset; + uint buffer_pos = 0; buffer_pos += block.y * u_src_row_stride; buffer_pos += block.x * 32u; buffer_pos += offset.y * 4u; buffer_pos += offset.x / 2u; // Select high nibble for odd texels, low for even. - uint val = texelFetch(s_input_buffer, int(buffer_pos)).x; + uint val = FETCH(buffer_pos); uint i; if ((coords.x & 1u) == 0u) i = Convert4To8((val >> 4)); @@ -663,7 +702,7 @@ static const std::map s_decoding_shader_info{ // Tiled in 8x4 blocks, 8 bits per pixel uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords); - uint val = texelFetch(s_input_buffer, int(buffer_pos)).x; + uint val = FETCH(buffer_pos); uint i = Convert4To8((val & 0x0Fu)); uint a = Convert4To8((val >> 4)); uint4 color = uint4(i, i, i, a); @@ -681,7 +720,7 @@ static const std::map s_decoding_shader_info{ // Tiled in 8x4 blocks, 8 bits per pixel uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords); - uint i = texelFetch(s_input_buffer, int(buffer_pos)).x; + uint i = FETCH(buffer_pos); uint4 color = uint4(i, i, i, i); float4 norm_color = float4(color) / 255.0; @@ -697,7 +736,7 @@ static const std::map s_decoding_shader_info{ // Tiled in 4x4 blocks, 16 bits per pixel uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); - uint val = texelFetch(s_input_buffer, int(buffer_pos)).x; + uint val = FETCH(buffer_pos); uint a = (val & 0xFFu); uint i = (val >> 8); uint4 color = uint4(i, i, i, a); @@ -714,7 +753,7 @@ static const std::map s_decoding_shader_info{ // Tiled in 4x4 blocks uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); - uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x); + uint val = Swap16(FETCH(buffer_pos)); uint4 color; color.x = Convert5To8(bitfieldExtract(val, 11, 5)); @@ -736,7 +775,7 @@ static const std::map s_decoding_shader_info{ // Tiled in 4x4 blocks uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); - uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x); + uint val = Swap16(FETCH(buffer_pos)); uint4 color; if ((val & 0x8000u) != 0u) @@ -771,7 +810,7 @@ static const std::map s_decoding_shader_info{ // for the entire block, then the GB channels afterwards. uint2 block = coords.xy / 4u; uint2 offset = coords.xy % 4u; - uint buffer_pos = u_src_offset; + uint buffer_pos = 0; // Our buffer has 16-bit elements, so the offsets here are half what they would be in bytes. buffer_pos += block.y * u_src_row_stride; @@ -780,8 +819,8 @@ static const std::map s_decoding_shader_info{ buffer_pos += offset.x; // The two GB channels follow after the block's AR channels. - uint val1 = texelFetch(s_input_buffer, int(buffer_pos + 0u)).x; - uint val2 = texelFetch(s_input_buffer, int(buffer_pos + 16u)).x; + uint val1 = FETCH(buffer_pos + 0u); + uint val2 = FETCH(buffer_pos + 16u); uint4 color; color.a = (val1 & 0xFFu); @@ -835,14 +874,14 @@ static const std::map s_decoding_shader_info{ // Calculate tiled block coordinates. uint2 tile_block_coords = block_coords / 2u; uint2 subtile_block_coords = block_coords % 2u; - uint buffer_pos = u_src_offset; + uint buffer_pos = 0; buffer_pos += tile_block_coords.y * u_src_row_stride; buffer_pos += tile_block_coords.x * 4u; buffer_pos += subtile_block_coords.y * 2u; buffer_pos += subtile_block_coords.x; // Read the entire DXT block to shared memory. - uint2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy; + uint2 raw_data = FETCH(buffer_pos); shared_temp[block_in_group] = raw_data; } @@ -921,14 +960,14 @@ static const std::map s_decoding_shader_info{ // the size of the buffer elements. uint2 block = coords.xy / 8u; uint2 offset = coords.xy % 8u; - uint buffer_pos = u_src_offset; + uint buffer_pos = 0; buffer_pos += block.y * u_src_row_stride; buffer_pos += block.x * 32u; buffer_pos += offset.y * 4u; buffer_pos += offset.x / 2u; // Select high nibble for odd texels, low for even. - uint val = texelFetch(s_input_buffer, int(buffer_pos)).x; + uint val = FETCH(buffer_pos); uint index = ((coords.x & 1u) == 0u) ? (val >> 4) : (val & 0x0Fu); float4 norm_color = GetPaletteColorNormalized(index); imageStore(output_image, int3(int2(coords), 0), norm_color); @@ -945,7 +984,7 @@ static const std::map s_decoding_shader_info{ // Tiled in 8x4 blocks, 8 bits per pixel uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords); - uint index = texelFetch(s_input_buffer, int(buffer_pos)).x; + uint index = FETCH(buffer_pos); float4 norm_color = GetPaletteColorNormalized(index); imageStore(output_image, int3(int2(coords), 0), norm_color); } @@ -960,7 +999,7 @@ static const std::map s_decoding_shader_info{ // Tiled in 4x4 blocks, 16 bits per pixel uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); - uint index = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x) & 0x3FFFu; + uint index = Swap16(FETCH(buffer_pos)) & 0x3FFFu; float4 norm_color = GetPaletteColorNormalized(index); imageStore(output_image, int3(int2(coords), 0), norm_color); } @@ -976,8 +1015,8 @@ static const std::map s_decoding_shader_info{ DEFINE_MAIN(8, 8) { uint2 uv = gl_GlobalInvocationID.xy; - int buffer_pos = int(u_src_offset + (uv.y * u_src_row_stride) + (uv.x / 2u)); - float4 yuyv = float4(texelFetch(s_input_buffer, buffer_pos)); + uint buffer_pos = (uv.y * u_src_row_stride) + (uv.x / 2u); + float4 yuyv = float4(FETCH(buffer_pos)); float y = (uv.x & 1u) != 0u ? yuyv.b : yuyv.r; @@ -1034,6 +1073,25 @@ std::string GenerateDecodingShader(TextureFormat format, std::optionalbuffer_format) + { + case TEXEL_BUFFER_FORMAT_R8_UINT: + ss << "#define TEXEL_BUFFER_FORMAT_R8 1\n"; + break; + case TEXEL_BUFFER_FORMAT_R16_UINT: + ss << "#define TEXEL_BUFFER_FORMAT_R16 1\n"; + break; + case TEXEL_BUFFER_FORMAT_RGBA8_UINT: + ss << "#define TEXEL_BUFFER_FORMAT_RGBA8 1\n"; + break; + case TEXEL_BUFFER_FORMAT_R32G32_UINT: + ss << "#define TEXEL_BUFFER_FORMAT_R32G32 1\n"; + break; + case NUM_TEXEL_BUFFER_FORMATS: + ASSERT(0); + break; + } + ss << decoding_shader_header; ss << info->shader_body; @@ -1121,7 +1179,10 @@ float4 DecodePixel(int val) ss << "\n"; - ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n"; + if (api_type == APIType::Metal) + ss << "SSBO_BINDING(0) readonly buffer Palette { uint16_t palette[]; };\n"; + else + ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n"; ss << "SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n"; ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n"; @@ -1143,9 +1204,12 @@ float4 DecodePixel(int val) ss << "void main() {\n"; ss << " float3 coords = v_tex0;\n"; ss << " int src = int(round(texture(samp1, coords).r * multiplier));\n"; - ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n"; + if (api_type == APIType::Metal) + ss << " src = int(palette[uint(src)]);\n"; + else + ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n"; - ss << " src = ((src << 8) & 0xFF00) | (src >> 8);\n"; + ss << " src = ((src << 8) | (src >> 8)) & 0xFFFF;\n"; ss << " ocol0 = DecodePixel(src);\n"; ss << "}\n"; diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 0c8475123e..22708f3d66 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -82,7 +82,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config, #ifdef __APPLE__ // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK) // if we want to use it. - if (api_type == APIType::Vulkan) + if (api_type == APIType::Vulkan || api_type == APIType::Metal) { if (use_dual_source) { diff --git a/Source/Core/VideoCommon/VideoBackendBase.cpp b/Source/Core/VideoCommon/VideoBackendBase.cpp index 6028b7b14a..2a3c15db0a 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.cpp +++ b/Source/Core/VideoCommon/VideoBackendBase.cpp @@ -35,6 +35,9 @@ #ifdef HAS_VULKAN #include "VideoBackends/Vulkan/VideoBackend.h" #endif +#ifdef __APPLE__ +#include "VideoBackends/Metal/VideoBackend.h" +#endif #include "VideoCommon/AsyncRequests.h" #include "VideoCommon/BPStructs.h" @@ -227,6 +230,7 @@ const std::vector>& VideoBackendBase::GetAvail #ifdef __APPLE__ // Emplace the Vulkan backend at the beginning so it takes precedence over OpenGL. backends.emplace(backends.begin(), std::make_unique()); + backends.push_back(std::make_unique()); #else backends.push_back(std::make_unique()); #endif diff --git a/Source/Core/VideoCommon/VideoCommon.h b/Source/Core/VideoCommon/VideoCommon.h index 390224eda4..3b6540c822 100644 --- a/Source/Core/VideoCommon/VideoCommon.h +++ b/Source/Core/VideoCommon/VideoCommon.h @@ -39,6 +39,7 @@ enum class APIType OpenGL, D3D, Vulkan, + Metal, Nothing }; From a5ef9dfd5316f8db28f9da16710eeeed9ab1e19e Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sat, 11 Jun 2022 18:39:24 -0500 Subject: [PATCH 02/12] VideoBackends:Metal: Use DriverDetails for bugs --- Source/Core/VideoBackends/Metal/MTLUtil.mm | 23 ++++++++++++++++++---- Source/Core/VideoCommon/DriverDetails.cpp | 10 ++++++++++ Source/Core/VideoCommon/DriverDetails.h | 4 +++- Source/Core/VideoCommon/PixelShaderGen.cpp | 2 +- 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.mm b/Source/Core/VideoBackends/Metal/MTLUtil.mm index b851a3d256..b20242e82a 100644 --- a/Source/Core/VideoBackends/Metal/MTLUtil.mm +++ b/Source/Core/VideoBackends/Metal/MTLUtil.mm @@ -10,6 +10,7 @@ #include "Common/MsgHandler.h" +#include "VideoCommon/DriverDetails.h" #include "VideoCommon/Spirv.h" Metal::DeviceFeatures Metal::g_features; @@ -81,6 +82,21 @@ void Metal::Util::PopulateBackendInfoAdapters(VideoConfig* config, void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id device) { + // Initialize DriverDetails first so we can use it later + DriverDetails::Vendor vendor = DriverDetails::VENDOR_UNKNOWN; + if ([[device name] containsString:@"NVIDIA"]) + vendor = DriverDetails::VENDOR_NVIDIA; + else if ([[device name] containsString:@"AMD"]) + vendor = DriverDetails::VENDOR_ATI; + else if ([[device name] containsString:@"Intel"]) + vendor = DriverDetails::VENDOR_INTEL; + else if ([[device name] containsString:@"Apple"]) + vendor = DriverDetails::VENDOR_APPLE; + const NSOperatingSystemVersion cocoa_ver = [[NSProcessInfo processInfo] operatingSystemVersion]; + double version = cocoa_ver.majorVersion * 100 + cocoa_ver.minorVersion; + DriverDetails::Init(DriverDetails::API_METAL, vendor, DriverDetails::DRIVER_APPLE, version, + DriverDetails::Family::UNKNOWN); + #if TARGET_OS_OSX config->backend_info.bSupportsDepthClamp = true; config->backend_info.bSupportsST3CTextures = true; @@ -117,11 +133,10 @@ void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id [device supportsFamily:MTLGPUFamilyMac2] || [device supportsFamily:MTLGPUFamilyApple6]; config->backend_info.bSupportsFramebufferFetch = [device supportsFamily:MTLGPUFamilyApple1]; } - if ([[device name] containsString:@"AMD"]) - { - // Broken + if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_SUBGROUP_INVOCATION_ID)) g_features.subgroup_ops = false; - } + if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING)) + config->backend_info.bSupportsDynamicSamplerIndexing = false; } // clang-format off diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index d4d828e12e..f255460d74 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -98,6 +98,8 @@ constexpr BugInfo m_known_bugs[] = { BUG_BROKEN_DUAL_SOURCE_BLENDING, -1.0, -1.0, true}, {API_VULKAN, OS_OSX, VENDOR_INTEL, DRIVER_PORTABILITY, Family::UNKNOWN, BUG_BROKEN_DUAL_SOURCE_BLENDING, -1.0, -1.0, true}, + {API_METAL, OS_OSX, VENDOR_INTEL, DRIVER_APPLE, Family::UNKNOWN, + BUG_BROKEN_DUAL_SOURCE_BLENDING, -1.0, -1.0, true}, {API_OPENGL, OS_ALL, VENDOR_IMGTEC, DRIVER_IMGTEC, Family::UNKNOWN, BUG_BROKEN_BITWISE_OP_NEGATION, -1.0, 108.4693462, true}, {API_VULKAN, OS_WINDOWS, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_PRIMITIVE_RESTART, -1.0, @@ -120,6 +122,8 @@ constexpr BugInfo m_known_bugs[] = { BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true}, {API_VULKAN, OS_OSX, VENDOR_ALL, DRIVER_PORTABILITY, Family::UNKNOWN, BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true}, + {API_METAL, OS_OSX, VENDOR_ALL, DRIVER_APPLE, Family::UNKNOWN, BUG_BROKEN_REVERSED_DEPTH_RANGE, + -1.0, -1.0, true}, {API_VULKAN, OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_SLOW_CACHED_READBACK_MEMORY, -1.0, -1.0, true}, {API_VULKAN, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, @@ -130,6 +134,8 @@ constexpr BugInfo m_known_bugs[] = { -1.0, -1.0, true}, {API_VULKAN, OS_OSX, VENDOR_ATI, DRIVER_PORTABILITY, Family::UNKNOWN, BUG_BROKEN_SUBGROUP_INVOCATION_ID, -1.0, -1.0, true}, + {API_METAL, OS_OSX, VENDOR_ATI, DRIVER_APPLE, Family::UNKNOWN, + BUG_BROKEN_SUBGROUP_INVOCATION_ID, -1.0, -1.0, true}, {API_OPENGL, OS_ANDROID, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, BUG_BROKEN_MULTITHREADED_SHADER_PRECOMPILATION, -1.0, -1.0, true}, {API_VULKAN, OS_ANDROID, VENDOR_ALL, DRIVER_ALL, Family::UNKNOWN, @@ -140,8 +146,12 @@ constexpr BugInfo m_known_bugs[] = { -1.0, -1.0, true}, {API_VULKAN, OS_OSX, VENDOR_APPLE, DRIVER_PORTABILITY, Family::UNKNOWN, BUG_BROKEN_DISCARD_WITH_EARLY_Z, -1.0, -1.0, true}, + {API_METAL, OS_OSX, VENDOR_APPLE, DRIVER_APPLE, Family::UNKNOWN, + BUG_BROKEN_DISCARD_WITH_EARLY_Z, -1.0, -1.0, true}, {API_VULKAN, OS_OSX, VENDOR_INTEL, DRIVER_PORTABILITY, Family::UNKNOWN, BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING, -1.0, -1.0, true}, + {API_METAL, OS_OSX, VENDOR_INTEL, DRIVER_APPLE, Family::UNKNOWN, + BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING, -1.0, -1.0, true}, }; static std::map m_bugs; diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index 646fa00f74..b7a7a5689f 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -13,7 +13,8 @@ namespace DriverDetails enum API { API_OPENGL = (1 << 0), - API_VULKAN = (1 << 1) + API_VULKAN = (1 << 1), + API_METAL = (1 << 2), }; // Enum of supported operating systems @@ -64,6 +65,7 @@ enum Driver DRIVER_IMGTEC, // Official PowerVR driver DRIVER_VIVANTE, // Official Vivante driver DRIVER_PORTABILITY, // Vulkan via Metal on macOS + DRIVER_APPLE, // Metal on macOS DRIVER_UNKNOWN // Unknown driver, default to official hardware driver }; diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index bf46764c0e..79d2bd1d52 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -810,7 +810,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos #ifdef __APPLE__ // Framebuffer fetch is only supported by Metal, so ensure that we're running Vulkan (MoltenVK) // if we want to use it. - if (api_type == APIType::Vulkan) + if (api_type == APIType::Vulkan || api_type == APIType::Metal) { if (!uid_data->no_dual_src) { From 61705b05da4b18cca54b54319cd5463e3cf945f5 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sun, 12 Jun 2022 00:20:00 -0500 Subject: [PATCH 03/12] VideoBackends:Metal: FBFetch for Intel GPUs --- Source/Core/VideoBackends/Metal/MTLUtil.mm | 88 ++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.mm b/Source/Core/VideoBackends/Metal/MTLUtil.mm index b20242e82a..edb250e43f 100644 --- a/Source/Core/VideoBackends/Metal/MTLUtil.mm +++ b/Source/Core/VideoBackends/Metal/MTLUtil.mm @@ -80,6 +80,89 @@ void Metal::Util::PopulateBackendInfoAdapters(VideoConfig* config, } } +static bool DetectIntelGPUFBFetch(id dev) +{ + // Even though it's nowhere in the feature set tables, some Intel GPUs support fbfetch! + // Annoyingly, the Haswell compiler successfully makes a pipeline but actually miscompiles it and + // doesn't insert any fbfetch instructions. + // The Broadwell compiler inserts the Skylake fbfetch instruction, + // but Broadwell doesn't support that. It seems to make the shader not do anything. + // So we actually have to test the thing + + static constexpr const char* shader = R"( +vertex float4 fs_triangle(uint vid [[vertex_id]]) { + return float4(vid & 1 ? 3 : -1, vid & 2 ? 3 : -1, 0, 1); +} +fragment float4 fbfetch_test(float4 in [[color(0), raster_order_group(0)]]) { + return in * 2; +} +)"; + auto lib = MRCTransfer([dev newLibraryWithSource:[NSString stringWithUTF8String:shader] + options:nil + error:nil]); + if (!lib) + return false; + auto pdesc = MRCTransfer([MTLRenderPipelineDescriptor new]); + [pdesc setVertexFunction:MRCTransfer([lib newFunctionWithName:@"fs_triangle"])]; + [pdesc setFragmentFunction:MRCTransfer([lib newFunctionWithName:@"fbfetch_test"])]; + [[pdesc colorAttachments][0] setPixelFormat:MTLPixelFormatRGBA8Unorm]; + auto pipe = MRCTransfer([dev newRenderPipelineStateWithDescriptor:pdesc error:nil]); + if (!pipe) + return false; + auto buf = MRCTransfer([dev newBufferWithLength:4 options:MTLResourceStorageModeShared]); + auto tdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm + width:1 + height:1 + mipmapped:false]; + [tdesc setUsage:MTLTextureUsageRenderTarget]; + auto tex = MRCTransfer([dev newTextureWithDescriptor:tdesc]); + auto q = MRCTransfer([dev newCommandQueue]); + u32 px = 0x11223344; + memcpy([buf contents], &px, 4); + id cmdbuf = [q commandBuffer]; + id upload_encoder = [cmdbuf blitCommandEncoder]; + [upload_encoder copyFromBuffer:buf + sourceOffset:0 + sourceBytesPerRow:4 + sourceBytesPerImage:4 + sourceSize:MTLSizeMake(1, 1, 1) + toTexture:tex + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + [upload_encoder endEncoding]; + auto rpdesc = MRCTransfer([MTLRenderPassDescriptor new]); + [[rpdesc colorAttachments][0] setTexture:tex]; + [[rpdesc colorAttachments][0] setLoadAction:MTLLoadActionLoad]; + [[rpdesc colorAttachments][0] setStoreAction:MTLStoreActionStore]; + id renc = [cmdbuf renderCommandEncoderWithDescriptor:rpdesc]; + [renc setRenderPipelineState:pipe]; + [renc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3]; + [renc endEncoding]; + id download_encoder = [cmdbuf blitCommandEncoder]; + [download_encoder copyFromTexture:tex + sourceSlice:0 + sourceLevel:0 + sourceOrigin:MTLOriginMake(0, 0, 0) + sourceSize:MTLSizeMake(1, 1, 1) + toBuffer:buf + destinationOffset:0 + destinationBytesPerRow:4 + destinationBytesPerImage:4]; + [download_encoder endEncoding]; + [cmdbuf commit]; + [cmdbuf waitUntilCompleted]; + u32 outpx; + memcpy(&outpx, [buf contents], 4); + // Proper fbfetch will double contents, Haswell will return black, and Broadwell will do nothing + if (outpx == 0x22446688) + return true; // Skylake+ + else if (outpx == 0x11223344) + return false; // Broadwell + else + return false; // Haswell +} + void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id device) { // Initialize DriverDetails first so we can use it later @@ -135,6 +218,11 @@ void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id } if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_SUBGROUP_INVOCATION_ID)) g_features.subgroup_ops = false; +#if TARGET_OS_OSX + if (@available(macOS 11, *)) + if (vendor == DriverDetails::VENDOR_INTEL) + config->backend_info.bSupportsFramebufferFetch |= DetectIntelGPUFBFetch(device); +#endif if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_DYNAMIC_SAMPLER_INDEXING)) config->backend_info.bSupportsDynamicSamplerIndexing = false; } From e32213d031f30e82d2d19ce26ef3a0262c061bd8 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Mon, 13 Jun 2022 00:56:25 -0500 Subject: [PATCH 04/12] VideoBackends:Metal: Create samplers on demand --- Source/Core/VideoBackends/Metal/MTLObjectCache.h | 8 +++++++- Source/Core/VideoBackends/Metal/MTLObjectCache.mm | 15 ++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/Source/Core/VideoBackends/Metal/MTLObjectCache.h b/Source/Core/VideoBackends/Metal/MTLObjectCache.h index d8a10d2d51..2f2bcd48dc 100644 --- a/Source/Core/VideoBackends/Metal/MTLObjectCache.h +++ b/Source/Core/VideoBackends/Metal/MTLObjectCache.h @@ -76,13 +76,19 @@ public: id GetDepthStencil(DepthStencilSelector sel) { return m_dss[sel.value]; } - id GetSampler(SamplerSelector sel) { return m_samplers[sel.value]; } + id GetSampler(SamplerSelector sel) + { + if (__builtin_expect(!m_samplers[sel.value], false)) + m_samplers[sel.value] = CreateSampler(sel); + return m_samplers[sel.value]; + } id GetSampler(SamplerState state) { return GetSampler(SamplerSelector(state)); } void ReloadSamplers(); private: + MRCOwned> CreateSampler(SamplerSelector sel); MRCOwned> m_dss[DepthStencilSelector::N_VALUES]; MRCOwned> m_samplers[SamplerSelector::N_VALUES]; }; diff --git a/Source/Core/VideoBackends/Metal/MTLObjectCache.mm b/Source/Core/VideoBackends/Metal/MTLObjectCache.mm index 22809f5937..afee912016 100644 --- a/Source/Core/VideoBackends/Metal/MTLObjectCache.mm +++ b/Source/Core/VideoBackends/Metal/MTLObjectCache.mm @@ -11,12 +11,10 @@ std::unique_ptr Metal::g_object_cache; static void SetupDepthStencil( MRCOwned> (&dss)[Metal::DepthStencilSelector::N_VALUES]); -static void SetupSamplers(MRCOwned> (&samplers)[Metal::SamplerSelector::N_VALUES]); Metal::ObjectCache::ObjectCache() { SetupDepthStencil(m_dss); - SetupSamplers(m_samplers); } Metal::ObjectCache::~ObjectCache() @@ -147,13 +145,11 @@ static const char* to_string(WrapMode wrap) // clang-format on -static void SetupSamplers(MRCOwned> (&samplers)[Metal::SamplerSelector::N_VALUES]) +MRCOwned> Metal::ObjectCache::CreateSampler(SamplerSelector sel) { - auto desc = MRCTransfer([MTLSamplerDescriptor new]); - Metal::SamplerSelector sel; - for (size_t i = 0; i < std::size(samplers); i++) + @autoreleasepool { - sel.value = i; + auto desc = MRCTransfer([MTLSamplerDescriptor new]); [desc setMinFilter:ConvertMinMag(sel.MinFilter())]; [desc setMagFilter:ConvertMinMag(sel.MagFilter())]; [desc setMipFilter:ConvertMip(sel.MipFilter())]; @@ -165,11 +161,12 @@ static void SetupSamplers(MRCOwned> (&samplers)[Metal::Sampl to_string(sel.MagFilter()), to_string(sel.MipFilter()), to_string(sel.WrapU()), to_string(sel.WrapV()), sel.AnisotropicFiltering() ? "(AF)" : ""])]; - samplers[i] = MRCTransfer([Metal::g_device newSamplerStateWithDescriptor:desc]); + return MRCTransfer([Metal::g_device newSamplerStateWithDescriptor:desc]); } } void Metal::ObjectCache::ReloadSamplers() { - SetupSamplers(m_samplers); + for (auto& sampler : m_samplers) + sampler = nullptr; } From 5742ccf8dedd382b17138cf0a40fa1102c66283c Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Mon, 13 Jun 2022 02:24:33 -0500 Subject: [PATCH 05/12] VideoBackends:Metal: Cache pipelines Metal pipelines hold less stuff than Dolphin pipelines, so duplicates will appear --- .../Core/VideoBackends/Metal/CMakeLists.txt | 3 + .../Core/VideoBackends/Metal/MTLObjectCache.h | 9 + .../VideoBackends/Metal/MTLObjectCache.mm | 326 ++++++++++++++++++ Source/Core/VideoBackends/Metal/MTLPipeline.h | 31 +- .../Core/VideoBackends/Metal/MTLPipeline.mm | 12 +- Source/Core/VideoBackends/Metal/MTLRenderer.h | 1 - .../Core/VideoBackends/Metal/MTLRenderer.mm | 211 +----------- Source/Core/VideoBackends/Metal/MTLShader.h | 8 +- Source/Core/VideoBackends/Metal/MTLShader.mm | 19 + .../VideoBackends/Metal/MTLVertexFormat.h | 23 ++ .../VideoBackends/Metal/MTLVertexFormat.mm | 143 ++++++++ 11 files changed, 555 insertions(+), 231 deletions(-) create mode 100644 Source/Core/VideoBackends/Metal/MTLShader.mm create mode 100644 Source/Core/VideoBackends/Metal/MTLVertexFormat.h create mode 100644 Source/Core/VideoBackends/Metal/MTLVertexFormat.mm diff --git a/Source/Core/VideoBackends/Metal/CMakeLists.txt b/Source/Core/VideoBackends/Metal/CMakeLists.txt index a3becc1d2e..744cb2f755 100644 --- a/Source/Core/VideoBackends/Metal/CMakeLists.txt +++ b/Source/Core/VideoBackends/Metal/CMakeLists.txt @@ -11,6 +11,7 @@ add_library(videometal MTLPipeline.h MTLRenderer.mm MTLRenderer.h + MTLShader.mm MTLShader.h MTLStateTracker.mm MTLStateTracker.h @@ -18,6 +19,8 @@ add_library(videometal MTLTexture.h MTLUtil.mm MTLUtil.h + MTLVertexFormat.mm + MTLVertexFormat.h MTLVertexManager.mm MTLVertexManager.h VideoBackend.h diff --git a/Source/Core/VideoBackends/Metal/MTLObjectCache.h b/Source/Core/VideoBackends/Metal/MTLObjectCache.h index 2f2bcd48dc..86668e614d 100644 --- a/Source/Core/VideoBackends/Metal/MTLObjectCache.h +++ b/Source/Core/VideoBackends/Metal/MTLObjectCache.h @@ -10,8 +10,12 @@ #include "VideoCommon/RenderState.h" +struct AbstractPipelineConfig; +class AbstractPipeline; + namespace Metal { +class Shader; extern MRCOwned> g_device; extern MRCOwned> g_queue; @@ -87,7 +91,12 @@ public: void ReloadSamplers(); + std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config); + void ShaderDestroyed(const Shader* shader); + private: + class Internal; + std::unique_ptr m_internal; MRCOwned> CreateSampler(SamplerSelector sel); MRCOwned> m_dss[DepthStencilSelector::N_VALUES]; MRCOwned> m_samplers[SamplerSelector::N_VALUES]; diff --git a/Source/Core/VideoBackends/Metal/MTLObjectCache.mm b/Source/Core/VideoBackends/Metal/MTLObjectCache.mm index afee912016..e9b836ce1c 100644 --- a/Source/Core/VideoBackends/Metal/MTLObjectCache.mm +++ b/Source/Core/VideoBackends/Metal/MTLObjectCache.mm @@ -3,6 +3,20 @@ #include "VideoBackends/Metal/MTLObjectCache.h" +#include +#include +#include + +#include "Common/Assert.h" +#include "Common/MsgHandler.h" + +#include "VideoBackends/Metal/MTLPipeline.h" +#include "VideoBackends/Metal/MTLUtil.h" +#include "VideoBackends/Metal/MTLVertexFormat.h" + +#include "VideoCommon/AbstractPipeline.h" +#include "VideoCommon/NativeVertexFormat.h" +#include "VideoCommon/VertexShaderGen.h" #include "VideoCommon/VideoConfig.h" MRCOwned> Metal::g_device; @@ -14,6 +28,7 @@ static void SetupDepthStencil( Metal::ObjectCache::ObjectCache() { + m_internal = std::make_unique(); SetupDepthStencil(m_dss); } @@ -170,3 +185,314 @@ void Metal::ObjectCache::ReloadSamplers() for (auto& sampler : m_samplers) sampler = nullptr; } + +// MARK: Pipelines + +static MTLPrimitiveTopologyClass GetClass(PrimitiveType prim) +{ + switch (prim) + { + case PrimitiveType::Points: + return MTLPrimitiveTopologyClassPoint; + case PrimitiveType::Lines: + return MTLPrimitiveTopologyClassLine; + case PrimitiveType::Triangles: + case PrimitiveType::TriangleStrip: + return MTLPrimitiveTopologyClassTriangle; + } +} + +static MTLPrimitiveType Convert(PrimitiveType prim) +{ + // clang-format off + switch (prim) + { + case PrimitiveType::Points: return MTLPrimitiveTypePoint; + case PrimitiveType::Lines: return MTLPrimitiveTypeLine; + case PrimitiveType::Triangles: return MTLPrimitiveTypeTriangle; + case PrimitiveType::TriangleStrip: return MTLPrimitiveTypeTriangleStrip; + } + // clang-format on +} + +static MTLCullMode Convert(CullMode cull) +{ + switch (cull) + { + case CullMode::None: + case CullMode::All: // Handled by disabling rasterization + return MTLCullModeNone; + case CullMode::Front: + return MTLCullModeFront; + case CullMode::Back: + return MTLCullModeBack; + } +} + +static MTLBlendFactor Convert(DstBlendFactor factor, bool usedualsrc) +{ + // clang-format off + switch (factor) + { + case DstBlendFactor::Zero: return MTLBlendFactorZero; + case DstBlendFactor::One: return MTLBlendFactorOne; + case DstBlendFactor::SrcClr: return MTLBlendFactorSourceColor; + case DstBlendFactor::InvSrcClr: return MTLBlendFactorOneMinusSourceColor; + case DstBlendFactor::SrcAlpha: return usedualsrc ? MTLBlendFactorSource1Alpha + : MTLBlendFactorSourceAlpha; + case DstBlendFactor::InvSrcAlpha: return usedualsrc ? MTLBlendFactorOneMinusSource1Alpha + : MTLBlendFactorOneMinusSourceAlpha; + case DstBlendFactor::DstAlpha: return MTLBlendFactorDestinationAlpha; + case DstBlendFactor::InvDstAlpha: return MTLBlendFactorOneMinusDestinationAlpha; + } + // clang-format on +} + +static MTLBlendFactor Convert(SrcBlendFactor factor, bool usedualsrc) +{ + // clang-format off + switch (factor) + { + case SrcBlendFactor::Zero: return MTLBlendFactorZero; + case SrcBlendFactor::One: return MTLBlendFactorOne; + case SrcBlendFactor::DstClr: return MTLBlendFactorDestinationColor; + case SrcBlendFactor::InvDstClr: return MTLBlendFactorOneMinusDestinationColor; + case SrcBlendFactor::SrcAlpha: return usedualsrc ? MTLBlendFactorSource1Alpha + : MTLBlendFactorSourceAlpha; + case SrcBlendFactor::InvSrcAlpha: return usedualsrc ? MTLBlendFactorOneMinusSource1Alpha + : MTLBlendFactorOneMinusSourceAlpha; + case SrcBlendFactor::DstAlpha: return MTLBlendFactorDestinationAlpha; + case SrcBlendFactor::InvDstAlpha: return MTLBlendFactorOneMinusDestinationAlpha; + } + // clang-format on +} + +class Metal::ObjectCache::Internal +{ +public: + using StoredPipeline = std::pair>, PipelineReflection>; + /// Holds only the things that are actually used in a Metal pipeline + struct PipelineID + { + struct VertexAttribute + { + // Just hold the things that might differ while using the same shader + // (Really only a thing for ubershaders) + u8 offset : 6; + u8 components : 2; + VertexAttribute() = default; + explicit VertexAttribute(AttributeFormat format) + : offset(format.offset), components(format.components - 1) + { + if (!format.enable) + offset = 0x3F; // Set it to something unlikely + } + }; + template + static void CopyAll(std::array& output, const AttributeFormat (&input)[N]) + { + for (size_t i = 0; i < N; ++i) + output[i] = VertexAttribute(input[i]); + } + PipelineID(const AbstractPipelineConfig& cfg) + { + memset(this, 0, sizeof(*this)); + if (const NativeVertexFormat* v = cfg.vertex_format) + { + const PortableVertexDeclaration& decl = v->GetVertexDeclaration(); + v_stride = v->GetVertexStride(); + v_position = VertexAttribute(decl.position); + CopyAll(v_normals, decl.normals); + CopyAll(v_colors, decl.colors); + CopyAll(v_texcoords, decl.texcoords); + v_posmtx = VertexAttribute(decl.posmtx); + } + vertex_shader = static_cast(cfg.vertex_shader); + fragment_shader = static_cast(cfg.pixel_shader); + framebuffer.color_texture_format = cfg.framebuffer_state.color_texture_format.Value(); + framebuffer.depth_texture_format = cfg.framebuffer_state.depth_texture_format.Value(); + blend.colorupdate = cfg.blending_state.colorupdate.Value(); + blend.alphaupdate = cfg.blending_state.alphaupdate.Value(); + if (cfg.blending_state.blendenable) + { + // clang-format off + blend.blendenable = true; + blend.usedualsrc = cfg.blending_state.usedualsrc.Value(); + blend.srcfactor = cfg.blending_state.srcfactor.Value(); + blend.dstfactor = cfg.blending_state.dstfactor.Value(); + blend.srcfactoralpha = cfg.blending_state.srcfactoralpha.Value(); + blend.dstfactoralpha = cfg.blending_state.dstfactoralpha.Value(); + blend.subtract = cfg.blending_state.subtract.Value(); + blend.subtractAlpha = cfg.blending_state.subtractAlpha.Value(); + // clang-format on + } + // Throw extras in bits we don't otherwise use + if (cfg.rasterization_state.cullmode == CullMode::All) + blend.hex |= 1 << 29; + if (cfg.rasterization_state.primitive == PrimitiveType::Points) + blend.hex |= 1 << 30; + else if (cfg.rasterization_state.primitive == PrimitiveType::Lines) + blend.hex |= 1 << 31; + } + PipelineID() { memset(this, 0, sizeof(*this)); } + PipelineID(const PipelineID& other) { memcpy(this, &other, sizeof(*this)); } + PipelineID& operator=(const PipelineID& other) + { + memcpy(this, &other, sizeof(*this)); + return *this; + } + bool operator<(const PipelineID& other) const + { + return memcmp(this, &other, sizeof(*this)) < 0; + } + bool operator==(const PipelineID& other) const + { + return memcmp(this, &other, sizeof(*this)) == 0; + } + + u8 v_stride; + VertexAttribute v_position; + std::array v_normals; + std::array v_colors; + std::array v_texcoords; + VertexAttribute v_posmtx; + const Shader* vertex_shader; + const Shader* fragment_shader; + BlendingState blend; + FramebufferState framebuffer; + }; + + std::mutex m_mtx; + std::condition_variable m_cv; + std::map m_pipelines; + std::map> m_shaders; + std::array m_pipeline_counter; + + StoredPipeline CreatePipeline(const AbstractPipelineConfig& config) + { + @autoreleasepool + { + ASSERT(!config.geometry_shader); + auto desc = MRCTransfer([MTLRenderPipelineDescriptor new]); + [desc setVertexFunction:static_cast(config.vertex_shader)->GetShader()]; + [desc setFragmentFunction:static_cast(config.pixel_shader)->GetShader()]; + if (config.usage == AbstractPipelineUsage::GX) + { + if ([[[desc vertexFunction] label] containsString:@"Uber"]) + [desc + setLabel:[NSString stringWithFormat:@"GX Uber Pipeline %d", m_pipeline_counter[0]++]]; + else + [desc setLabel:[NSString stringWithFormat:@"GX Pipeline %d", m_pipeline_counter[1]++]]; + } + else + { + [desc setLabel:[NSString stringWithFormat:@"Utility Pipeline %d", m_pipeline_counter[2]++]]; + } + if (config.vertex_format) + [desc setVertexDescriptor:static_cast(config.vertex_format)->Get()]; + RasterizationState rs = config.rasterization_state; + [desc setInputPrimitiveTopology:GetClass(rs.primitive)]; + if (rs.cullmode == CullMode::All) + [desc setRasterizationEnabled:NO]; + MTLRenderPipelineColorAttachmentDescriptor* color0 = + [[desc colorAttachments] objectAtIndexedSubscript:0]; + BlendingState bs = config.blending_state; + MTLColorWriteMask mask = MTLColorWriteMaskNone; + if (bs.colorupdate) + mask |= MTLColorWriteMaskRed | MTLColorWriteMaskGreen | MTLColorWriteMaskBlue; + if (bs.alphaupdate) + mask |= MTLColorWriteMaskAlpha; + [color0 setWriteMask:mask]; + if (bs.blendenable) + { + // clang-format off + [color0 setBlendingEnabled:YES]; + [color0 setSourceRGBBlendFactor: Convert(bs.srcfactor, bs.usedualsrc)]; + [color0 setSourceAlphaBlendFactor: Convert(bs.srcfactoralpha, bs.usedualsrc)]; + [color0 setDestinationRGBBlendFactor: Convert(bs.dstfactor, bs.usedualsrc)]; + [color0 setDestinationAlphaBlendFactor:Convert(bs.dstfactoralpha, bs.usedualsrc)]; + [color0 setRgbBlendOperation: bs.subtract ? MTLBlendOperationReverseSubtract : MTLBlendOperationAdd]; + [color0 setAlphaBlendOperation:bs.subtractAlpha ? MTLBlendOperationReverseSubtract : MTLBlendOperationAdd]; + // clang-format on + } + FramebufferState fs = config.framebuffer_state; + [color0 setPixelFormat:Util::FromAbstract(fs.color_texture_format)]; + [desc setDepthAttachmentPixelFormat:Util::FromAbstract(fs.depth_texture_format)]; + if (Util::HasStencil(fs.depth_texture_format)) + [desc setStencilAttachmentPixelFormat:Util::FromAbstract(fs.depth_texture_format)]; + NSError* err = nullptr; + MTLRenderPipelineReflection* reflection = nullptr; + id pipe = + [g_device newRenderPipelineStateWithDescriptor:desc + options:MTLPipelineOptionArgumentInfo + reflection:&reflection + error:&err]; + if (err) + { + PanicAlertFmt("Failed to compile pipeline for {} and {}: {}", + [[[desc vertexFunction] label] UTF8String], + [[[desc fragmentFunction] label] UTF8String], + [[err localizedDescription] UTF8String]); + return std::make_pair(nullptr, PipelineReflection()); + } + + return std::make_pair(MRCTransfer(pipe), PipelineReflection(reflection)); + } + } + + StoredPipeline GetOrCreatePipeline(const AbstractPipelineConfig& config) + { + std::unique_lock lock(m_mtx); + PipelineID pid(config); + auto it = m_pipelines.find(pid); + if (it != m_pipelines.end()) + { + while (!it->second.first && !it->second.second.textures) + m_cv.wait(lock); // Wait for whoever's already compiling this + return it->second; + } + // Reserve the spot now, so other threads know we're making it + it = m_pipelines.insert({pid, {nullptr, PipelineReflection()}}).first; + lock.unlock(); + StoredPipeline pipe = CreatePipeline(config); + lock.lock(); + if (pipe.first) + it->second = pipe; + else + it->second.second.textures = 1; // Abuse this as a "failed to create pipeline" flag + m_shaders[pid.vertex_shader].push_back(pid); + m_shaders[pid.fragment_shader].push_back(pid); + lock.unlock(); + m_cv.notify_all(); // Wake up anyone who might be waiting + return pipe; + } + + void ShaderDestroyed(const Shader* shader) + { + std::lock_guard lock(m_mtx); + auto it = m_shaders.find(shader); + if (it == m_shaders.end()) + return; + // It's unlikely, but if a shader is destroyed, a new one could be made with the same address + // (Also, we know it won't be used anymore, so there's no reason to keep these around) + for (const PipelineID& pid : it->second) + m_pipelines.erase(pid); + m_shaders.erase(it); + } +}; + +std::unique_ptr +Metal::ObjectCache::CreatePipeline(const AbstractPipelineConfig& config) +{ + Internal::StoredPipeline pipeline = m_internal->GetOrCreatePipeline(config); + if (!pipeline.first) + return nullptr; + return std::make_unique( + std::move(pipeline.first), pipeline.second, Convert(config.rasterization_state.primitive), + Convert(config.rasterization_state.cullmode), config.depth_state, config.usage); +} + +void Metal::ObjectCache::ShaderDestroyed(const Shader* shader) +{ + m_internal->ShaderDestroyed(shader); +} diff --git a/Source/Core/VideoBackends/Metal/MTLPipeline.h b/Source/Core/VideoBackends/Metal/MTLPipeline.h index d5a642aedd..aa4fadbdbf 100644 --- a/Source/Core/VideoBackends/Metal/MTLPipeline.h +++ b/Source/Core/VideoBackends/Metal/MTLPipeline.h @@ -14,24 +14,34 @@ namespace Metal { +struct PipelineReflection +{ + u32 textures = 0; + u32 samplers = 0; + u32 vertex_buffers = 0; + u32 fragment_buffers = 0; + PipelineReflection() = default; + explicit PipelineReflection(MTLRenderPipelineReflection* reflection); +}; + class Pipeline final : public AbstractPipeline { public: explicit Pipeline(MRCOwned> pipeline, - MTLRenderPipelineReflection* reflection, MTLPrimitiveType prim, - MTLCullMode cull, DepthState depth, AbstractPipelineUsage usage); + const PipelineReflection& reflection, MTLPrimitiveType prim, MTLCullMode cull, + DepthState depth, AbstractPipelineUsage usage); id Get() const { return m_pipeline; } MTLPrimitiveType Prim() const { return m_prim; } MTLCullMode Cull() const { return m_cull; } DepthStencilSelector DepthStencil() const { return m_depth_stencil; } AbstractPipelineUsage Usage() const { return m_usage; } - u32 GetTextures() const { return m_textures; } - u32 GetSamplers() const { return m_samplers; } - u32 GetVertexBuffers() const { return m_vertex_buffers; } - u32 GetFragmentBuffers() const { return m_fragment_buffers; } - bool UsesVertexBuffer(u32 index) const { return m_vertex_buffers & (1 << index); } - bool UsesFragmentBuffer(u32 index) const { return m_fragment_buffers & (1 << index); } + u32 GetTextures() const { return m_reflection.textures; } + u32 GetSamplers() const { return m_reflection.samplers; } + u32 GetVertexBuffers() const { return m_reflection.vertex_buffers; } + u32 GetFragmentBuffers() const { return m_reflection.fragment_buffers; } + bool UsesVertexBuffer(u32 index) const { return m_reflection.vertex_buffers & (1 << index); } + bool UsesFragmentBuffer(u32 index) const { return m_reflection.fragment_buffers & (1 << index); } private: MRCOwned> m_pipeline; @@ -39,10 +49,7 @@ private: MTLCullMode m_cull; DepthStencilSelector m_depth_stencil; AbstractPipelineUsage m_usage; - u32 m_textures = 0; - u32 m_samplers = 0; - u32 m_vertex_buffers = 0; - u32 m_fragment_buffers = 0; + PipelineReflection m_reflection; }; class ComputePipeline : public Shader diff --git a/Source/Core/VideoBackends/Metal/MTLPipeline.mm b/Source/Core/VideoBackends/Metal/MTLPipeline.mm index 405ea76973..5afaa477a1 100644 --- a/Source/Core/VideoBackends/Metal/MTLPipeline.mm +++ b/Source/Core/VideoBackends/Metal/MTLPipeline.mm @@ -47,14 +47,18 @@ static void GetArguments(NSArray* arguments, u32* textures, u32* s } } +Metal::PipelineReflection::PipelineReflection(MTLRenderPipelineReflection* reflection) +{ + GetArguments([reflection vertexArguments], nullptr, nullptr, &vertex_buffers); + GetArguments([reflection fragmentArguments], &textures, &samplers, &fragment_buffers); +} + Metal::Pipeline::Pipeline(MRCOwned> pipeline, - MTLRenderPipelineReflection* reflection, MTLPrimitiveType prim, + const PipelineReflection& reflection, MTLPrimitiveType prim, MTLCullMode cull, DepthState depth, AbstractPipelineUsage usage) : m_pipeline(std::move(pipeline)), m_prim(prim), m_cull(cull), m_depth_stencil(depth), - m_usage(usage) + m_usage(usage), m_reflection(reflection) { - GetArguments([reflection vertexArguments], nullptr, nullptr, &m_vertex_buffers); - GetArguments([reflection fragmentArguments], &m_textures, &m_samplers, &m_fragment_buffers); } Metal::ComputePipeline::ComputePipeline(ShaderStage stage, MTLComputePipelineReflection* reflection, diff --git a/Source/Core/VideoBackends/Metal/MTLRenderer.h b/Source/Core/VideoBackends/Metal/MTLRenderer.h index 6208e3f57e..f8b97dc0d3 100644 --- a/Source/Core/VideoBackends/Metal/MTLRenderer.h +++ b/Source/Core/VideoBackends/Metal/MTLRenderer.h @@ -82,7 +82,6 @@ private: u32 m_texture_counter = 0; u32 m_staging_texture_counter = 0; std::array m_shader_counter = {}; - u32 m_pipeline_counter = 0; void CheckForSurfaceChange(); void CheckForSurfaceResize(); diff --git a/Source/Core/VideoBackends/Metal/MTLRenderer.mm b/Source/Core/VideoBackends/Metal/MTLRenderer.mm index b552b11722..e8e033c8aa 100644 --- a/Source/Core/VideoBackends/Metal/MTLRenderer.mm +++ b/Source/Core/VideoBackends/Metal/MTLRenderer.mm @@ -9,11 +9,10 @@ #include "VideoBackends/Metal/MTLStateTracker.h" #include "VideoBackends/Metal/MTLTexture.h" #include "VideoBackends/Metal/MTLUtil.h" +#include "VideoBackends/Metal/MTLVertexFormat.h" #include "VideoBackends/Metal/MTLVertexManager.h" #include "VideoCommon/FramebufferManager.h" -#include "VideoCommon/NativeVertexFormat.h" -#include "VideoCommon/VertexShaderGen.h" #include "VideoCommon/VideoBackendBase.h" Metal::Renderer::Renderer(MRCOwned layer, int width, int height, float layer_scale) @@ -110,66 +109,6 @@ Metal::Renderer::CreateFramebuffer(AbstractTexture* color_attachment, // MARK: Pipeline Creation -namespace Metal -{ -class VertexFormat : public NativeVertexFormat -{ -public: - VertexFormat(const PortableVertexDeclaration& vtx_decl) - : NativeVertexFormat(vtx_decl), m_desc(MRCTransfer([MTLVertexDescriptor new])) - { - [[[m_desc layouts] objectAtIndexedSubscript:0] setStride:vtx_decl.stride]; - SetAttribute(SHADER_POSITION_ATTRIB, vtx_decl.position); - SetAttributes(SHADER_NORMAL_ATTRIB, vtx_decl.normals); - SetAttributes(SHADER_COLOR0_ATTRIB, vtx_decl.colors); - SetAttributes(SHADER_TEXTURE0_ATTRIB, vtx_decl.texcoords); - SetAttribute(SHADER_POSMTX_ATTRIB, vtx_decl.posmtx); - } - - MTLVertexDescriptor* Get() const { return m_desc; } - -private: - template - void SetAttributes(u32 attribute, const AttributeFormat (&format)[N]) - { - for (size_t i = 0; i < N; i++) - SetAttribute(attribute + i, format[i]); - } - void SetAttribute(u32 attribute, const AttributeFormat& format) - { - if (!format.enable) - return; - MTLVertexAttributeDescriptor* desc = [[m_desc attributes] objectAtIndexedSubscript:attribute]; - [desc setFormat:ConvertFormat(format.type, format.components, format.integer)]; - [desc setOffset:format.offset]; - [desc setBufferIndex:0]; - } - - static MTLVertexFormat ConvertFormat(ComponentFormat format, int count, bool int_format) - { - static constexpr MTLVertexFormat formats[2][5][4] = { - [false] = { - [static_cast(ComponentFormat::UByte)] = { MTLVertexFormatUCharNormalized, MTLVertexFormatUChar2Normalized, MTLVertexFormatUChar3Normalized, MTLVertexFormatUChar4Normalized }, - [static_cast(ComponentFormat::Byte)] = { MTLVertexFormatCharNormalized, MTLVertexFormatChar2Normalized, MTLVertexFormatChar3Normalized, MTLVertexFormatChar4Normalized }, - [static_cast(ComponentFormat::UShort)] = { MTLVertexFormatUShortNormalized, MTLVertexFormatUShort2Normalized, MTLVertexFormatUShort3Normalized, MTLVertexFormatUShort4Normalized }, - [static_cast(ComponentFormat::Short)] = { MTLVertexFormatShortNormalized, MTLVertexFormatShort2Normalized, MTLVertexFormatShort3Normalized, MTLVertexFormatShort4Normalized }, - [static_cast(ComponentFormat::Float)] = { MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4 }, - }, - [true] = { - [static_cast(ComponentFormat::UByte)] = { MTLVertexFormatUChar, MTLVertexFormatUChar2, MTLVertexFormatUChar3, MTLVertexFormatUChar4 }, - [static_cast(ComponentFormat::Byte)] = { MTLVertexFormatChar, MTLVertexFormatChar2, MTLVertexFormatChar3, MTLVertexFormatChar4 }, - [static_cast(ComponentFormat::UShort)] = { MTLVertexFormatUShort, MTLVertexFormatUShort2, MTLVertexFormatUShort3, MTLVertexFormatUShort4 }, - [static_cast(ComponentFormat::Short)] = { MTLVertexFormatShort, MTLVertexFormatShort2, MTLVertexFormatShort3, MTLVertexFormatShort4 }, - [static_cast(ComponentFormat::Float)] = { MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4 }, - }, - }; - return formats[int_format][static_cast(format)][count - 1]; - } - - MRCOwned m_desc; -}; -} // namespace Metal - std::unique_ptr Metal::Renderer::CreateShaderFromSource(ShaderStage stage, std::string_view source, std::string_view name) @@ -311,157 +250,11 @@ Metal::Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_d } } -static MTLPrimitiveTopologyClass GetClass(PrimitiveType prim) -{ - switch (prim) - { - case PrimitiveType::Points: - return MTLPrimitiveTopologyClassPoint; - case PrimitiveType::Lines: - return MTLPrimitiveTopologyClassLine; - case PrimitiveType::Triangles: - case PrimitiveType::TriangleStrip: - return MTLPrimitiveTopologyClassTriangle; - } -} - -static MTLPrimitiveType Convert(PrimitiveType prim) -{ - switch (prim) - { - case PrimitiveType::Points: return MTLPrimitiveTypePoint; - case PrimitiveType::Lines: return MTLPrimitiveTypeLine; - case PrimitiveType::Triangles: return MTLPrimitiveTypeTriangle; - case PrimitiveType::TriangleStrip: return MTLPrimitiveTypeTriangleStrip; - } -} - -static MTLCullMode Convert(CullMode cull) -{ - switch (cull) - { - case CullMode::None: - case CullMode::All: // Handled by disabling rasterization - return MTLCullModeNone; - case CullMode::Front: - return MTLCullModeFront; - case CullMode::Back: - return MTLCullModeBack; - } -} - -static MTLBlendFactor Convert(DstBlendFactor factor, bool src1) -{ - static constexpr MTLBlendFactor factors[2][8] = { - [false] = { - [static_cast(DstBlendFactor::Zero)] = MTLBlendFactorZero, - [static_cast(DstBlendFactor::One)] = MTLBlendFactorOne, - [static_cast(DstBlendFactor::SrcClr)] = MTLBlendFactorSourceColor, - [static_cast(DstBlendFactor::InvSrcClr)] = MTLBlendFactorOneMinusSourceColor, - [static_cast(DstBlendFactor::SrcAlpha)] = MTLBlendFactorSourceAlpha, - [static_cast(DstBlendFactor::InvSrcAlpha)] = MTLBlendFactorOneMinusSourceAlpha, - [static_cast(DstBlendFactor::DstAlpha)] = MTLBlendFactorDestinationAlpha, - [static_cast(DstBlendFactor::InvDstAlpha)] = MTLBlendFactorOneMinusDestinationAlpha, - }, - [true] = { - [static_cast(DstBlendFactor::Zero)] = MTLBlendFactorZero, - [static_cast(DstBlendFactor::One)] = MTLBlendFactorOne, - [static_cast(DstBlendFactor::SrcClr)] = MTLBlendFactorSourceColor, - [static_cast(DstBlendFactor::InvSrcClr)] = MTLBlendFactorOneMinusSource1Color, - [static_cast(DstBlendFactor::SrcAlpha)] = MTLBlendFactorSource1Alpha, - [static_cast(DstBlendFactor::InvSrcAlpha)] = MTLBlendFactorOneMinusSource1Alpha, - [static_cast(DstBlendFactor::DstAlpha)] = MTLBlendFactorDestinationAlpha, - [static_cast(DstBlendFactor::InvDstAlpha)] = MTLBlendFactorOneMinusDestinationAlpha, - }, - }; - return factors[src1][static_cast(factor)]; -} - -static MTLBlendFactor Convert(SrcBlendFactor factor, bool src1) -{ - static constexpr MTLBlendFactor factors[2][8] = { - [false] = { - [static_cast(SrcBlendFactor::Zero)] = MTLBlendFactorZero, - [static_cast(SrcBlendFactor::One)] = MTLBlendFactorOne, - [static_cast(SrcBlendFactor::DstClr)] = MTLBlendFactorDestinationColor, - [static_cast(SrcBlendFactor::InvDstClr)] = MTLBlendFactorOneMinusDestinationColor, - [static_cast(SrcBlendFactor::SrcAlpha)] = MTLBlendFactorSourceAlpha, - [static_cast(SrcBlendFactor::InvSrcAlpha)] = MTLBlendFactorOneMinusSourceAlpha, - [static_cast(SrcBlendFactor::DstAlpha)] = MTLBlendFactorDestinationAlpha, - [static_cast(SrcBlendFactor::InvDstAlpha)] = MTLBlendFactorOneMinusDestinationAlpha, - }, - [true] = { - [static_cast(SrcBlendFactor::Zero)] = MTLBlendFactorZero, - [static_cast(SrcBlendFactor::One)] = MTLBlendFactorOne, - [static_cast(SrcBlendFactor::DstClr)] = MTLBlendFactorDestinationColor, - [static_cast(SrcBlendFactor::InvDstClr)] = MTLBlendFactorOneMinusDestinationColor, - [static_cast(SrcBlendFactor::SrcAlpha)] = MTLBlendFactorSource1Alpha, - [static_cast(SrcBlendFactor::InvSrcAlpha)] = MTLBlendFactorOneMinusSource1Alpha, - [static_cast(SrcBlendFactor::DstAlpha)] = MTLBlendFactorDestinationAlpha, - [static_cast(SrcBlendFactor::InvDstAlpha)] = MTLBlendFactorOneMinusDestinationAlpha, - }, - }; - return factors[src1][static_cast(factor)]; -} - std::unique_ptr Metal::Renderer::CreatePipeline(const AbstractPipelineConfig& config, const void* cache_data, size_t cache_data_length) { - @autoreleasepool - { - assert(!config.geometry_shader); - auto desc = MRCTransfer([MTLRenderPipelineDescriptor new]); - [desc setLabel:[NSString stringWithFormat:@"Pipeline %d", m_pipeline_counter++]]; - [desc setVertexFunction:static_cast(config.vertex_shader)->GetShader()]; - [desc setFragmentFunction:static_cast(config.pixel_shader)->GetShader()]; - if (config.vertex_format) - [desc setVertexDescriptor:static_cast(config.vertex_format)->Get()]; - RasterizationState rs = config.rasterization_state; - [desc setInputPrimitiveTopology:GetClass(rs.primitive)]; - if (rs.cullmode == CullMode::All) - [desc setRasterizationEnabled:NO]; - MTLRenderPipelineColorAttachmentDescriptor* color0 = [desc colorAttachments][0]; - BlendingState bs = config.blending_state; - MTLColorWriteMask mask = MTLColorWriteMaskNone; - if (bs.colorupdate) - mask |= MTLColorWriteMaskRed | MTLColorWriteMaskGreen | MTLColorWriteMaskBlue; - if (bs.alphaupdate) - mask |= MTLColorWriteMaskAlpha; - [color0 setWriteMask:mask]; - if (bs.blendenable) - { - [color0 setBlendingEnabled:YES]; - [color0 setSourceRGBBlendFactor: Convert(bs.srcfactor, bs.usedualsrc)]; - [color0 setSourceAlphaBlendFactor: Convert(bs.srcfactoralpha, bs.usedualsrc)]; - [color0 setDestinationRGBBlendFactor: Convert(bs.dstfactor, bs.usedualsrc)]; - [color0 setDestinationAlphaBlendFactor:Convert(bs.dstfactoralpha, bs.usedualsrc)]; - [color0 setRgbBlendOperation: bs.subtract ? MTLBlendOperationReverseSubtract : MTLBlendOperationAdd]; - [color0 setAlphaBlendOperation:bs.subtractAlpha ? MTLBlendOperationReverseSubtract : MTLBlendOperationAdd]; - } - FramebufferState fs = config.framebuffer_state; - [color0 setPixelFormat:Util::FromAbstract(fs.color_texture_format)]; - [desc setDepthAttachmentPixelFormat:Util::FromAbstract(fs.depth_texture_format)]; - if (Util::HasStencil(fs.depth_texture_format)) - [desc setStencilAttachmentPixelFormat:Util::FromAbstract(fs.depth_texture_format)]; - NSError* err = nullptr; - MTLRenderPipelineReflection* reflection = nullptr; - id pipe = - [g_device newRenderPipelineStateWithDescriptor:desc - options:MTLPipelineOptionArgumentInfo - reflection:&reflection - error:&err]; - if (err) - { - PanicAlertFmt("Failed to compile pipeline for {} and {}: {}", - [[[desc vertexFunction] label] UTF8String], - [[[desc fragmentFunction] label] UTF8String], - [[err localizedDescription] UTF8String]); - return nullptr; - } - return std::make_unique(MRCTransfer(pipe), reflection, Convert(rs.primitive), - Convert(rs.cullmode), config.depth_state, config.usage); - } + return g_object_cache->CreatePipeline(config); } void Metal::Renderer::Flush() diff --git a/Source/Core/VideoBackends/Metal/MTLShader.h b/Source/Core/VideoBackends/Metal/MTLShader.h index 053d84c2b9..018346bb6d 100644 --- a/Source/Core/VideoBackends/Metal/MTLShader.h +++ b/Source/Core/VideoBackends/Metal/MTLShader.h @@ -15,13 +15,11 @@ namespace Metal class Shader : public AbstractShader { public: - explicit Shader(ShaderStage stage, std::string msl, MRCOwned> shader) - : AbstractShader(stage), m_msl(std::move(msl)), m_shader(std::move(shader)) - { - } + explicit Shader(ShaderStage stage, std::string msl, MRCOwned> shader); + ~Shader(); id GetShader() const { return m_shader; } - BinaryData GetBinary() const override { return BinaryData(m_msl.begin(), m_msl.end()); } + BinaryData GetBinary() const override; private: std::string m_msl; diff --git a/Source/Core/VideoBackends/Metal/MTLShader.mm b/Source/Core/VideoBackends/Metal/MTLShader.mm new file mode 100644 index 0000000000..a1afcb160b --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLShader.mm @@ -0,0 +1,19 @@ + +#include "VideoBackends/Metal/MTLShader.h" + +#include "VideoBackends/Metal/MTLObjectCache.h" + +Metal::Shader::Shader(ShaderStage stage, std::string msl, MRCOwned> shader) + : AbstractShader(stage), m_msl(std::move(msl)), m_shader(std::move(shader)) +{ +} + +Metal::Shader::~Shader() +{ + g_object_cache->ShaderDestroyed(this); +} + +AbstractShader::BinaryData Metal::Shader::GetBinary() const +{ + return BinaryData(m_msl.begin(), m_msl.end()); +} diff --git a/Source/Core/VideoBackends/Metal/MTLVertexFormat.h b/Source/Core/VideoBackends/Metal/MTLVertexFormat.h new file mode 100644 index 0000000000..e4babc057b --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLVertexFormat.h @@ -0,0 +1,23 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "VideoBackends/Metal/MRCHelpers.h" + +#include "VideoCommon/NativeVertexFormat.h" + +namespace Metal +{ +class VertexFormat : public NativeVertexFormat +{ +public: + VertexFormat(const PortableVertexDeclaration& vtx_decl); + + MTLVertexDescriptor* Get() const { return m_desc; } + + MRCOwned m_desc; +}; +} // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLVertexFormat.mm b/Source/Core/VideoBackends/Metal/MTLVertexFormat.mm new file mode 100644 index 0000000000..6453c5189a --- /dev/null +++ b/Source/Core/VideoBackends/Metal/MTLVertexFormat.mm @@ -0,0 +1,143 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VideoBackends/Metal/MTLVertexFormat.h" + +#include "VideoCommon/VertexShaderGen.h" + +static MTLVertexFormat ConvertFormat(ComponentFormat format, int count, bool int_format) +{ + // clang-format off + if (int_format) + { + switch (format) + { + case ComponentFormat::UByte: + switch (count) + { + case 1: return MTLVertexFormatUChar; + case 2: return MTLVertexFormatUChar2; + case 3: return MTLVertexFormatUChar3; + case 4: return MTLVertexFormatUChar4; + default: return MTLVertexFormatInvalid; + } + case ComponentFormat::Byte: + switch (count) + { + case 1: return MTLVertexFormatChar; + case 2: return MTLVertexFormatChar2; + case 3: return MTLVertexFormatChar3; + case 4: return MTLVertexFormatChar4; + default: return MTLVertexFormatInvalid; + } + case ComponentFormat::UShort: + switch (count) + { + case 1: return MTLVertexFormatUShort; + case 2: return MTLVertexFormatUShort2; + case 3: return MTLVertexFormatUShort3; + case 4: return MTLVertexFormatUShort4; + default: return MTLVertexFormatInvalid; + } + case ComponentFormat::Short: + switch (count) + { + case 1: return MTLVertexFormatShort; + case 2: return MTLVertexFormatShort2; + case 3: return MTLVertexFormatShort3; + case 4: return MTLVertexFormatShort4; + default: return MTLVertexFormatInvalid; + } + case ComponentFormat::Float: + switch (count) + { + case 1: return MTLVertexFormatFloat; + case 2: return MTLVertexFormatFloat2; + case 3: return MTLVertexFormatFloat3; + case 4: return MTLVertexFormatFloat4; + default: return MTLVertexFormatInvalid; + } + } + } + else + { + switch (format) + { + case ComponentFormat::UByte: + switch (count) + { + case 1: return MTLVertexFormatUCharNormalized; + case 2: return MTLVertexFormatUChar2Normalized; + case 3: return MTLVertexFormatUChar3Normalized; + case 4: return MTLVertexFormatUChar4Normalized; + default: return MTLVertexFormatInvalid; + } + case ComponentFormat::Byte: + switch (count) + { + case 1: return MTLVertexFormatCharNormalized; + case 2: return MTLVertexFormatChar2Normalized; + case 3: return MTLVertexFormatChar3Normalized; + case 4: return MTLVertexFormatChar4Normalized; + default: return MTLVertexFormatInvalid; + } + case ComponentFormat::UShort: + switch (count) + { + case 1: return MTLVertexFormatUShortNormalized; + case 2: return MTLVertexFormatUShort2Normalized; + case 3: return MTLVertexFormatUShort3Normalized; + case 4: return MTLVertexFormatUShort4Normalized; + default: return MTLVertexFormatInvalid; + } + case ComponentFormat::Short: + switch (count) + { + case 1: return MTLVertexFormatShortNormalized; + case 2: return MTLVertexFormatShort2Normalized; + case 3: return MTLVertexFormatShort3Normalized; + case 4: return MTLVertexFormatShort4Normalized; + default: return MTLVertexFormatInvalid; + } + case ComponentFormat::Float: + switch (count) + { + case 1: return MTLVertexFormatFloat; + case 2: return MTLVertexFormatFloat2; + case 3: return MTLVertexFormatFloat3; + case 4: return MTLVertexFormatFloat4; + default: return MTLVertexFormatInvalid; + } + } + } + // clang-format on +} + +static void SetAttribute(MTLVertexDescriptor* desc, u32 attribute, const AttributeFormat& format) +{ + if (!format.enable) + return; + MTLVertexAttributeDescriptor* attr_desc = [[desc attributes] objectAtIndexedSubscript:attribute]; + [attr_desc setFormat:ConvertFormat(format.type, format.components, format.integer)]; + [attr_desc setOffset:format.offset]; + [attr_desc setBufferIndex:0]; +} + +template +static void SetAttributes(MTLVertexDescriptor* desc, u32 attribute, + const AttributeFormat (&format)[N]) +{ + for (size_t i = 0; i < N; ++i) + SetAttribute(desc, attribute + i, format[i]); +} + +Metal::VertexFormat::VertexFormat(const PortableVertexDeclaration& vtx_decl) + : NativeVertexFormat(vtx_decl), m_desc(MRCTransfer([MTLVertexDescriptor new])) +{ + [[[m_desc layouts] objectAtIndexedSubscript:0] setStride:vtx_decl.stride]; + SetAttribute(m_desc, SHADER_POSITION_ATTRIB, vtx_decl.position); + SetAttributes(m_desc, SHADER_NORMAL_ATTRIB, vtx_decl.normals); + SetAttributes(m_desc, SHADER_COLOR0_ATTRIB, vtx_decl.colors); + SetAttributes(m_desc, SHADER_TEXTURE0_ATTRIB, vtx_decl.texcoords); + SetAttribute(m_desc, SHADER_POSMTX_ATTRIB, vtx_decl.posmtx); +} From c0fd1281713f0ffe8bcfb628f8d328c7a1b5ae09 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Mon, 20 Jun 2022 20:24:36 -0500 Subject: [PATCH 06/12] VideoBackends:Metal: Use base vertex --- .../VideoBackends/Metal/MTLStateTracker.h | 7 ++- .../VideoBackends/Metal/MTLStateTracker.mm | 49 +++++++------------ .../VideoBackends/Metal/MTLVertexManager.h | 4 ++ .../VideoBackends/Metal/MTLVertexManager.mm | 25 ++++++---- 4 files changed, 41 insertions(+), 44 deletions(-) diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.h b/Source/Core/VideoBackends/Metal/MTLStateTracker.h index d9c79774e0..ac930767c4 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.h +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.h @@ -46,6 +46,7 @@ public: enum class AlignMask : size_t { + None = 0, Other = 15, Uniform = 255, }; @@ -83,7 +84,7 @@ public: void InvalidateUniforms(bool vertex, bool fragment); void SetUtilityUniform(const void* buffer, size_t size); void SetTexelBuffer(id buffer, u32 offset0, u32 offset1); - void SetVerticesAndIndices(Map vertices, Map indices); + void SetVerticesAndIndices(id vertices, id indices); void SetBBoxBuffer(id bbox, id upload, id download); void SetVertexBufferNow(u32 idx, id buffer, u32 offset); void SetFragmentBufferNow(u32 idx, id buffer, u32 offset); @@ -107,7 +108,7 @@ public: Preallocate(buffer_idx, amt); return CommitPreallocation(buffer_idx, amt, align); } - void* Preallocate(UploadBuffer buffer_idx, size_t amt); + std::pair Preallocate(UploadBuffer buffer_idx, size_t amt); /// Must follow a call to Preallocate where amt is >= to the one provided here Map CommitPreallocation(UploadBuffer buffer_idx, size_t amt, AlignMask align) { @@ -247,8 +248,6 @@ private: id vertices = nullptr; id indices = nullptr; id texels = nullptr; - u32 vertices_offset; - u32 indices_offset; u32 texel_buffer_offset0; u32 texel_buffer_offset1; } m_state; diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm index ea2c8cd328..8451f413da 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm @@ -154,7 +154,7 @@ Metal::StateTracker::Map Metal::StateTracker::AllocateForTextureUpload(size_t am return ret; } -void* Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt) +std::pair Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt) { BufferPair& buffer = m_upload_buffers[static_cast(buffer_idx)]; u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire); @@ -195,7 +195,8 @@ void* Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt) ASSERT_MSG(VIDEO, buffer.gpubuffer, "Failed to allocate MTLBuffer (out of memory?)"); } } - return reinterpret_cast(buffer.buffer) + buffer.usage.Pos(); + size_t pos = buffer.usage.Pos(); + return std::make_pair(reinterpret_cast(buffer.buffer) + pos, pos); } Metal::StateTracker::Map Metal::StateTracker::CommitPreallocation(UploadBuffer buffer_idx, @@ -558,13 +559,14 @@ void Metal::StateTracker::SetTexelBuffer(id buffer, u32 offset0, u32 m_flags.has_texel_buffer = false; } -void Metal::StateTracker::SetVerticesAndIndices(Map vertices, Map indices) +void Metal::StateTracker::SetVerticesAndIndices(id vertices, id indices) { - m_state.vertices = vertices.gpu_buffer; - m_state.indices = indices.gpu_buffer; - m_state.vertices_offset = vertices.gpu_offset; - m_state.indices_offset = indices.gpu_offset; - m_flags.has_vertices = false; + if (m_state.vertices != vertices) + { + m_flags.has_vertices = false; + m_state.vertices = vertices; + } + m_state.indices = indices; } void Metal::StateTracker::SetBBoxBuffer(id bbox, id upload, @@ -691,7 +693,7 @@ void Metal::StateTracker::PrepareRender() { m_flags.has_vertices = true; if (m_state.vertices) - SetVertexBufferNow(0, m_state.vertices, m_state.vertices_offset); + SetVertexBufferNow(0, m_state.vertices, 0); } if (u8 dirty = m_dirty_textures & pipe->GetTextures()) { @@ -801,27 +803,14 @@ void Metal::StateTracker::Draw(u32 base_vertex, u32 num_vertices) void Metal::StateTracker::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) { PrepareRender(); - if (!base_vertex) - { - [m_current_render_encoder - drawIndexedPrimitives:m_state.render_pipeline->Prim() - indexCount:num_indices - indexType:MTLIndexTypeUInt16 - indexBuffer:m_state.indices - indexBufferOffset:m_state.indices_offset + base_index * sizeof(u16)]; - } - else - { - [m_current_render_encoder - drawIndexedPrimitives:m_state.render_pipeline->Prim() - indexCount:num_indices - indexType:MTLIndexTypeUInt16 - indexBuffer:m_state.indices - indexBufferOffset:m_state.indices_offset + base_index * sizeof(u16) - instanceCount:1 - baseVertex:base_vertex - baseInstance:0]; - } + [m_current_render_encoder drawIndexedPrimitives:m_state.render_pipeline->Prim() + indexCount:num_indices + indexType:MTLIndexTypeUInt16 + indexBuffer:m_state.indices + indexBufferOffset:base_index * sizeof(u16) + instanceCount:1 + baseVertex:base_vertex + baseInstance:0]; } void Metal::StateTracker::DispatchComputeShader(u32 groupsize_x, u32 groupsize_y, u32 groupsize_z, diff --git a/Source/Core/VideoBackends/Metal/MTLVertexManager.h b/Source/Core/VideoBackends/Metal/MTLVertexManager.h index 29c715f1fd..8914d5fe84 100644 --- a/Source/Core/VideoBackends/Metal/MTLVertexManager.h +++ b/Source/Core/VideoBackends/Metal/MTLVertexManager.h @@ -26,5 +26,9 @@ protected: void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, u32* out_base_index) override; void UploadUniforms() override; + +private: + u32 m_vertex_offset; + u32 m_base_vertex; }; } // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLVertexManager.mm b/Source/Core/VideoBackends/Metal/MTLVertexManager.mm index 3683e79ba6..8fb3e25de4 100644 --- a/Source/Core/VideoBackends/Metal/MTLVertexManager.mm +++ b/Source/Core/VideoBackends/Metal/MTLVertexManager.mm @@ -54,31 +54,36 @@ bool Metal::VertexManager::UploadTexelBuffer(const void* data, u32 data_size, void Metal::VertexManager::ResetBuffer(u32 vertex_stride) { const u32 max_vertex_size = 65535 * vertex_stride; - void* vertex = g_state_tracker->Preallocate(StateTracker::UploadBuffer::Vertex, max_vertex_size); - void* index = + const u32 vertex_alloc = max_vertex_size + vertex_stride - 1; // for alignment + auto vertex = g_state_tracker->Preallocate(StateTracker::UploadBuffer::Vertex, vertex_alloc); + auto index = g_state_tracker->Preallocate(StateTracker::UploadBuffer::Index, MAXIBUFFERSIZE * sizeof(u16)); - m_cur_buffer_pointer = m_base_buffer_pointer = static_cast(vertex); + // Align the base vertex + m_base_vertex = (vertex.second + vertex_stride - 1) / vertex_stride; + m_vertex_offset = m_base_vertex * vertex_stride - vertex.second; + m_cur_buffer_pointer = m_base_buffer_pointer = static_cast(vertex.first) + m_vertex_offset; m_end_buffer_pointer = m_base_buffer_pointer + max_vertex_size; - m_index_generator.Start(static_cast(index)); + m_index_generator.Start(static_cast(index.first)); } void Metal::VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, u32* out_base_index) { - const u32 vsize = num_vertices * vertex_stride; + const u32 vsize = num_vertices * vertex_stride + m_vertex_offset; const u32 isize = num_indices * sizeof(u16); StateTracker::Map vmap = g_state_tracker->CommitPreallocation( - StateTracker::UploadBuffer::Vertex, vsize, StateTracker::AlignMask::Other); + StateTracker::UploadBuffer::Vertex, vsize, StateTracker::AlignMask::None); StateTracker::Map imap = g_state_tracker->CommitPreallocation( - StateTracker::UploadBuffer::Index, isize, StateTracker::AlignMask::Other); + StateTracker::UploadBuffer::Index, isize, StateTracker::AlignMask::None); ADDSTAT(g_stats.this_frame.bytes_vertex_streamed, vsize); ADDSTAT(g_stats.this_frame.bytes_index_streamed, isize); - g_state_tracker->SetVerticesAndIndices(vmap, imap); - *out_base_vertex = 0; - *out_base_index = 0; + DEBUG_ASSERT(vmap.gpu_offset + m_vertex_offset == m_base_vertex * vertex_stride); + g_state_tracker->SetVerticesAndIndices(vmap.gpu_buffer, imap.gpu_buffer); + *out_base_vertex = m_base_vertex; + *out_base_index = imap.gpu_offset / sizeof(u16); } void Metal::VertexManager::UploadUniforms() From 6ee0248eab7622d2db19f28f120b447b5db11379 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Tue, 21 Jun 2022 02:07:35 -0500 Subject: [PATCH 07/12] VideoBackends:Metal: MSAA support --- Source/Core/VideoBackends/D3D/D3DMain.cpp | 1 + .../Core/VideoBackends/D3D12/VideoBackend.cpp | 1 + Source/Core/VideoBackends/Metal/MTLMain.mm | 11 +++- .../VideoBackends/Metal/MTLObjectCache.mm | 2 + Source/Core/VideoBackends/Metal/MTLUtil.mm | 10 ++++ .../Core/VideoBackends/Null/NullBackend.cpp | 1 + Source/Core/VideoBackends/OGL/OGLMain.cpp | 1 + Source/Core/VideoBackends/Software/SWmain.cpp | 1 + .../VideoBackends/Vulkan/VulkanContext.cpp | 1 + .../Core/VideoCommon/FramebufferManager.cpp | 52 +++++++++++++++++-- Source/Core/VideoCommon/FramebufferManager.h | 2 + .../Core/VideoCommon/FramebufferShaderGen.cpp | 16 ++++++ .../Core/VideoCommon/FramebufferShaderGen.h | 1 + Source/Core/VideoCommon/VideoConfig.h | 1 + 14 files changed, 95 insertions(+), 6 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/D3DMain.cpp b/Source/Core/VideoBackends/D3D/D3DMain.cpp index 6fc2c285a1..b8d7fb5871 100644 --- a/Source/Core/VideoBackends/D3D/D3DMain.cpp +++ b/Source/Core/VideoBackends/D3D/D3DMain.cpp @@ -111,6 +111,7 @@ void VideoBackend::FillBackendInfo() g_Config.backend_info.bSupportsLodBiasInSampler = true; g_Config.backend_info.bSupportsLogicOp = D3D::SupportsLogicOp(g_Config.iAdapter); g_Config.backend_info.bSupportsSettingObjectNames = true; + g_Config.backend_info.bSupportsPartialMultisampleResolve = true; g_Config.backend_info.Adapters = D3DCommon::GetAdapterNames(); g_Config.backend_info.AAModes = D3D::GetAAModes(g_Config.iAdapter); diff --git a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp index 197d309332..b0611a8690 100644 --- a/Source/Core/VideoBackends/D3D12/VideoBackend.cpp +++ b/Source/Core/VideoBackends/D3D12/VideoBackend.cpp @@ -86,6 +86,7 @@ void VideoBackend::FillBackendInfo() g_Config.backend_info.bSupportsTextureQueryLevels = true; g_Config.backend_info.bSupportsLodBiasInSampler = true; g_Config.backend_info.bSupportsSettingObjectNames = true; + g_Config.backend_info.bSupportsPartialMultisampleResolve = true; // We can only check texture support once we have a device. if (g_dx_context) diff --git a/Source/Core/VideoBackends/Metal/MTLMain.mm b/Source/Core/VideoBackends/Metal/MTLMain.mm index 2b54d93784..f294f297f2 100644 --- a/Source/Core/VideoBackends/Metal/MTLMain.mm +++ b/Source/Core/VideoBackends/Metal/MTLMain.mm @@ -141,7 +141,16 @@ void Metal::VideoBackend::InitBackendInfo() @autoreleasepool { Util::PopulateBackendInfo(&g_Config); - Util::PopulateBackendInfoAdapters(&g_Config, Util::GetAdapterList()); + auto adapters = Util::GetAdapterList(); + Util::PopulateBackendInfoAdapters(&g_Config, adapters); + if (!adapters.empty()) + { + // Use the selected adapter, or the first to fill features. + size_t index = static_cast(g_Config.iAdapter); + if (index >= adapters.size()) + index = 0; + Util::PopulateBackendInfoFeatures(&g_Config, adapters[index]); + } } } diff --git a/Source/Core/VideoBackends/Metal/MTLObjectCache.mm b/Source/Core/VideoBackends/Metal/MTLObjectCache.mm index e9b836ce1c..3181e1fdee 100644 --- a/Source/Core/VideoBackends/Metal/MTLObjectCache.mm +++ b/Source/Core/VideoBackends/Metal/MTLObjectCache.mm @@ -311,6 +311,7 @@ public: fragment_shader = static_cast(cfg.pixel_shader); framebuffer.color_texture_format = cfg.framebuffer_state.color_texture_format.Value(); framebuffer.depth_texture_format = cfg.framebuffer_state.depth_texture_format.Value(); + framebuffer.samples = cfg.framebuffer_state.samples.Value(); blend.colorupdate = cfg.blending_state.colorupdate.Value(); blend.alphaupdate = cfg.blending_state.alphaupdate.Value(); if (cfg.blending_state.blendenable) @@ -416,6 +417,7 @@ public: // clang-format on } FramebufferState fs = config.framebuffer_state; + [desc setRasterSampleCount:fs.samples]; [color0 setPixelFormat:Util::FromAbstract(fs.color_texture_format)]; [desc setDepthAttachmentPixelFormat:Util::FromAbstract(fs.depth_texture_format)]; if (Util::HasStencil(fs.depth_texture_format)) diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.mm b/Source/Core/VideoBackends/Metal/MTLUtil.mm index edb250e43f..b80b235436 100644 --- a/Source/Core/VideoBackends/Metal/MTLUtil.mm +++ b/Source/Core/VideoBackends/Metal/MTLUtil.mm @@ -68,6 +68,8 @@ void Metal::Util::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsTextureQueryLevels = true; config->backend_info.bSupportsLodBiasInSampler = false; config->backend_info.bSupportsSettingObjectNames = true; + // Metal requires multisample resolve to be done on a render pass + config->backend_info.bSupportsPartialMultisampleResolve = false; } void Metal::Util::PopulateBackendInfoAdapters(VideoConfig* config, @@ -201,6 +203,14 @@ void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id config->backend_info.bSupportsBPTCTextures = supports_mac1; config->backend_info.bSupportsFramebufferFetch = true; #endif + + config->backend_info.AAModes.clear(); + for (u32 i = 1; i <= 64; i <<= 1) + { + if ([device supportsTextureSampleCount:i]) + config->backend_info.AAModes.push_back(i); + } + if (char* env = getenv("MTL_UNIFIED_MEMORY")) g_features.unified_memory = env[0] == '1' || env[0] == 'y' || env[0] == 'Y'; else if (@available(macOS 10.15, iOS 13.0, *)) diff --git a/Source/Core/VideoBackends/Null/NullBackend.cpp b/Source/Core/VideoBackends/Null/NullBackend.cpp index 5d04a5adac..b68c9cfd94 100644 --- a/Source/Core/VideoBackends/Null/NullBackend.cpp +++ b/Source/Core/VideoBackends/Null/NullBackend.cpp @@ -59,6 +59,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsTextureQueryLevels = false; g_Config.backend_info.bSupportsLodBiasInSampler = false; g_Config.backend_info.bSupportsSettingObjectNames = false; + g_Config.backend_info.bSupportsPartialMultisampleResolve = true; // aamodes: We only support 1 sample, so no MSAA g_Config.backend_info.Adapters.clear(); diff --git a/Source/Core/VideoBackends/OGL/OGLMain.cpp b/Source/Core/VideoBackends/OGL/OGLMain.cpp index a286625b34..24a65f746d 100644 --- a/Source/Core/VideoBackends/OGL/OGLMain.cpp +++ b/Source/Core/VideoBackends/OGL/OGLMain.cpp @@ -93,6 +93,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsShaderBinaries = false; g_Config.backend_info.bSupportsPipelineCacheData = false; g_Config.backend_info.bSupportsLodBiasInSampler = true; + g_Config.backend_info.bSupportsPartialMultisampleResolve = true; // TODO: There is a bug here, if texel buffers or SSBOs/atomics are not supported the graphics // options will show the option when it is not supported. The only way around this would be diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp index a74b2eb1c9..80dc4603f2 100644 --- a/Source/Core/VideoBackends/Software/SWmain.cpp +++ b/Source/Core/VideoBackends/Software/SWmain.cpp @@ -88,6 +88,7 @@ void VideoSoftware::InitBackendInfo() g_Config.backend_info.bSupportsTextureQueryLevels = false; g_Config.backend_info.bSupportsLodBiasInSampler = false; g_Config.backend_info.bSupportsSettingObjectNames = false; + g_Config.backend_info.bSupportsPartialMultisampleResolve = true; // aamodes g_Config.backend_info.AAModes = {1}; diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index a6cb201d72..cad326c83a 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -294,6 +294,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsTextureQueryLevels = true; // Assumed support. config->backend_info.bSupportsLodBiasInSampler = false; // Dependent on OS. config->backend_info.bSupportsSettingObjectNames = false; // Dependent on features. + config->backend_info.bSupportsPartialMultisampleResolve = true; // Assumed support. } void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list) diff --git a/Source/Core/VideoCommon/FramebufferManager.cpp b/Source/Core/VideoCommon/FramebufferManager.cpp index d4fdfd0ff2..76c265c76a 100644 --- a/Source/Core/VideoCommon/FramebufferManager.cpp +++ b/Source/Core/VideoCommon/FramebufferManager.cpp @@ -188,12 +188,23 @@ bool FramebufferManager::CreateEFBFramebuffer() // Create resolved textures if MSAA is on if (g_ActiveConfig.MultisamplingEnabled()) { + u32 flags = 0; + if (!g_ActiveConfig.backend_info.bSupportsPartialMultisampleResolve) + flags |= AbstractTextureFlag_RenderTarget; m_efb_resolve_color_texture = g_renderer->CreateTexture( TextureConfig(efb_color_texture_config.width, efb_color_texture_config.height, 1, - efb_color_texture_config.layers, 1, efb_color_texture_config.format, 0), + efb_color_texture_config.layers, 1, efb_color_texture_config.format, flags), "EFB color resolve texture"); if (!m_efb_resolve_color_texture) return false; + + if (!g_ActiveConfig.backend_info.bSupportsPartialMultisampleResolve) + { + m_efb_color_resolve_framebuffer = + g_renderer->CreateFramebuffer(m_efb_resolve_color_texture.get(), nullptr); + if (!m_efb_color_resolve_framebuffer) + return false; + } } // We also need one to convert the D24S8 to R32F if that is being used (Adreno). @@ -248,12 +259,27 @@ AbstractTexture* FramebufferManager::ResolveEFBColorTexture(const MathUtil::Rect clamped_region.ClampUL(0, 0, GetEFBWidth(), GetEFBHeight()); // Resolve to our already-created texture. - for (u32 layer = 0; layer < GetEFBLayers(); layer++) + if (g_ActiveConfig.backend_info.bSupportsPartialMultisampleResolve) { - m_efb_resolve_color_texture->ResolveFromTexture(m_efb_color_texture.get(), clamped_region, - layer, 0); + for (u32 layer = 0; layer < GetEFBLayers(); layer++) + { + m_efb_resolve_color_texture->ResolveFromTexture(m_efb_color_texture.get(), clamped_region, + layer, 0); + } + } + else + { + m_efb_color_texture->FinishedRendering(); + g_renderer->BeginUtilityDrawing(); + g_renderer->SetAndDiscardFramebuffer(m_efb_color_resolve_framebuffer.get()); + g_renderer->SetPipeline(m_efb_color_resolve_pipeline.get()); + g_renderer->SetTexture(0, m_efb_color_texture.get()); + g_renderer->SetSamplerState(0, RenderState::GetPointSamplerState()); + g_renderer->SetViewportAndScissor(clamped_region); + g_renderer->Draw(0, 3); + m_efb_resolve_color_texture->FinishedRendering(); + g_renderer->EndUtilityDrawing(); } - m_efb_resolve_color_texture->FinishedRendering(); return m_efb_resolve_color_texture.get(); } @@ -487,6 +513,22 @@ bool FramebufferManager::CompileReadbackPipelines() m_efb_depth_resolve_pipeline = g_renderer->CreatePipeline(config); if (!m_efb_depth_resolve_pipeline) return false; + + if (!g_ActiveConfig.backend_info.bSupportsPartialMultisampleResolve) + { + config.framebuffer_state.color_texture_format = GetEFBColorFormat(); + auto color_resolve_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, + FramebufferShaderGen::GenerateResolveColorPixelShader(GetEFBSamples()), + "Color resolve pixel shader"); + if (!color_resolve_shader) + return false; + + config.pixel_shader = color_resolve_shader.get(); + m_efb_color_resolve_pipeline = g_renderer->CreatePipeline(config); + if (!m_efb_color_resolve_pipeline) + return false; + } } // EFB restore pipeline diff --git a/Source/Core/VideoCommon/FramebufferManager.h b/Source/Core/VideoCommon/FramebufferManager.h index b9144f83ef..2d27fef09b 100644 --- a/Source/Core/VideoCommon/FramebufferManager.h +++ b/Source/Core/VideoCommon/FramebufferManager.h @@ -170,7 +170,9 @@ protected: std::unique_ptr m_efb_framebuffer; std::unique_ptr m_efb_convert_framebuffer; + std::unique_ptr m_efb_color_resolve_framebuffer; std::unique_ptr m_efb_depth_resolve_framebuffer; + std::unique_ptr m_efb_color_resolve_pipeline; std::unique_ptr m_efb_depth_resolve_pipeline; // Pipeline for restoring the contents of the EFB from a save state diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.cpp b/Source/Core/VideoCommon/FramebufferShaderGen.cpp index 6426b259b4..49684bb518 100644 --- a/Source/Core/VideoCommon/FramebufferShaderGen.cpp +++ b/Source/Core/VideoCommon/FramebufferShaderGen.cpp @@ -338,6 +338,22 @@ std::string GenerateColorPixelShader() return code.GetBuffer(); } +std::string GenerateResolveColorPixelShader(u32 samples) +{ + ShaderCode code; + EmitSamplerDeclarations(code, 0, 1, true); + EmitPixelMainDeclaration(code, 1, 0); + code.Write("{{\n" + " int layer = int(v_tex0.z);\n" + " int3 coords = int3(int2(gl_FragCoord.xy), layer);\n" + " ocol0 = float4(0.0f);\n"); + code.Write(" for (int i = 0; i < {}; i++)\n", samples); + code.Write(" ocol0 += texelFetch(samp0, coords, i);\n"); + code.Write(" ocol0 /= {}.0f;\n", samples); + code.Write("}}\n"); + return code.GetBuffer(); +} + std::string GenerateResolveDepthPixelShader(u32 samples) { ShaderCode code; diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.h b/Source/Core/VideoCommon/FramebufferShaderGen.h index 9d487f7a5a..8a6aac553f 100644 --- a/Source/Core/VideoCommon/FramebufferShaderGen.h +++ b/Source/Core/VideoCommon/FramebufferShaderGen.h @@ -15,6 +15,7 @@ std::string GenerateScreenQuadVertexShader(); std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors); std::string GenerateTextureCopyVertexShader(); std::string GenerateTextureCopyPixelShader(); +std::string GenerateResolveColorPixelShader(u32 samples); std::string GenerateResolveDepthPixelShader(u32 samples); std::string GenerateClearVertexShader(); std::string GenerateEFBPokeVertexShader(); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 6ce7bed379..8de5d1e593 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -238,6 +238,7 @@ struct VideoConfig final bool bSupportsTextureQueryLevels = false; bool bSupportsLodBiasInSampler = false; bool bSupportsSettingObjectNames = false; + bool bSupportsPartialMultisampleResolve = false; } backend_info; // Utility From c48035908cc1821625c135b9a96e09e47264ed45 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Tue, 21 Jun 2022 04:07:54 -0500 Subject: [PATCH 08/12] VideoBackends:Metal: Use unified memory path by default on all GPUs --- Source/Core/VideoBackends/Metal/MTLUtil.mm | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.mm b/Source/Core/VideoBackends/Metal/MTLUtil.mm index b80b235436..85ac96b342 100644 --- a/Source/Core/VideoBackends/Metal/MTLUtil.mm +++ b/Source/Core/VideoBackends/Metal/MTLUtil.mm @@ -211,12 +211,13 @@ void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id config->backend_info.AAModes.push_back(i); } + // The unified memory path (using shared buffers for everything) performs noticeably better with + // bbox even on discrete GPUs (20fps vs 15fps in Super Paper Mario elevator), so default to that. + // The separate buffer + manual upload path is left available for testing and comparison. if (char* env = getenv("MTL_UNIFIED_MEMORY")) g_features.unified_memory = env[0] == '1' || env[0] == 'y' || env[0] == 'Y'; - else if (@available(macOS 10.15, iOS 13.0, *)) - g_features.unified_memory = [device hasUnifiedMemory]; else - g_features.unified_memory = false; + g_features.unified_memory = true; g_features.subgroup_ops = false; if (@available(macOS 10.15, iOS 13, *)) From ee3f2b8fcb953151fb6617f31d7a96bb7ebc12a5 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sun, 26 Jun 2022 01:04:53 -0500 Subject: [PATCH 09/12] VideoBackends:Metal: Implement PerfQuery --- .../Core/VideoBackends/Metal/MTLPerfQuery.h | 26 +++-- .../Core/VideoBackends/Metal/MTLPerfQuery.mm | 86 +++++++++++++++++ .../VideoBackends/Metal/MTLStateTracker.h | 11 +++ .../VideoBackends/Metal/MTLStateTracker.mm | 94 ++++++++++++++++++- 4 files changed, 210 insertions(+), 7 deletions(-) diff --git a/Source/Core/VideoBackends/Metal/MTLPerfQuery.h b/Source/Core/VideoBackends/Metal/MTLPerfQuery.h index 793cf8cec5..993b92e93c 100644 --- a/Source/Core/VideoBackends/Metal/MTLPerfQuery.h +++ b/Source/Core/VideoBackends/Metal/MTLPerfQuery.h @@ -3,6 +3,9 @@ #pragma once +#include +#include + #include "VideoCommon/PerfQueryBase.h" namespace Metal @@ -10,11 +13,22 @@ namespace Metal class PerfQuery final : public PerfQueryBase { public: - void EnableQuery(PerfQueryGroup type) override {} - void DisableQuery(PerfQueryGroup type) override {} - void ResetQuery() override {} - u32 GetQueryResult(PerfQueryType type) override { return 0; } - void FlushResults() override {} - bool IsFlushed() const override { return true; } + void EnableQuery(PerfQueryGroup type) override; + void DisableQuery(PerfQueryGroup type) override; + void ResetQuery() override; + u32 GetQueryResult(PerfQueryType type) override; + void FlushResults() override; + bool IsFlushed() const override; + + /// Notify PerfQuery of a new pending encoder + /// One call to ReturnResults should be made for every call to IncCount + void IncCount() { m_query_count.fetch_add(1, std::memory_order_relaxed); } + /// May be called from any thread + void ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count, u32 query_id); + +private: + u32 m_current_query = 0; + std::mutex m_results_mtx; + std::condition_variable m_cv; }; } // namespace Metal diff --git a/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm b/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm index 2892bdc747..42139e63bf 100644 --- a/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm +++ b/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm @@ -2,3 +2,89 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "VideoBackends/Metal/MTLPerfQuery.h" + +#include "VideoBackends/Metal/MTLStateTracker.h" + +void Metal::PerfQuery::EnableQuery(PerfQueryGroup type) +{ + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + g_state_tracker->EnablePerfQuery(type, m_current_query); +} + +void Metal::PerfQuery::DisableQuery(PerfQueryGroup type) +{ + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + g_state_tracker->DisablePerfQuery(); +} + +void Metal::PerfQuery::ResetQuery() +{ + std::lock_guard lock(m_results_mtx); + m_current_query++; + for (std::atomic& result : m_results) + result.store(0, std::memory_order_relaxed); +} + +u32 Metal::PerfQuery::GetQueryResult(PerfQueryType type) +{ + u32 result = 0; + if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC) + { + result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed); + } + else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT) + { + result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed); + } + else if (type == PQ_BLEND_INPUT) + { + result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) + + m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed); + } + else if (type == PQ_EFB_COPY_CLOCKS) + { + result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed); + } + + return result; +} + +void Metal::PerfQuery::FlushResults() +{ + if (IsFlushed()) + return; + + // There's a possibility that some active performance queries are unflushed + g_state_tracker->FlushEncoders(); + + std::unique_lock lock(m_results_mtx); + while (!IsFlushed()) + m_cv.wait(lock); +} + +bool Metal::PerfQuery::IsFlushed() const +{ + return m_query_count.load(std::memory_order_acquire) == 0; +} + +void Metal::PerfQuery::ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count, + u32 query_id) +{ + { + std::lock_guard lock(m_results_mtx); + if (m_current_query == query_id) + { + for (size_t i = 0; i < count; ++i) + { + u64 native_res_result = data[i] * (EFB_WIDTH * EFB_HEIGHT) / + (g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight()); + + native_res_result /= g_ActiveConfig.iMultisamples; + + m_results[groups[i]].fetch_add(native_res_result, std::memory_order_relaxed); + } + } + m_query_count.fetch_sub(1, std::memory_order_release); + } + m_cv.notify_one(); +} diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.h b/Source/Core/VideoBackends/Metal/MTLStateTracker.h index ac930767c4..1807f345e4 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.h +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.h @@ -17,6 +17,7 @@ #include "VideoBackends/Metal/MTLTexture.h" #include "VideoBackends/Metal/MTLUtil.h" +#include "VideoCommon/PerfQueryBase.h" #include "VideoCommon/RenderBase.h" namespace Metal @@ -90,6 +91,8 @@ public: void SetFragmentBufferNow(u32 idx, id buffer, u32 offset); /// Use around utility draws that are commonly used immediately before gx draws to the same buffer void EnableEncoderLabel(bool enabled) { m_flags.should_apply_label = enabled; } + void EnablePerfQuery(PerfQueryGroup group, u32 query_id); + void DisablePerfQuery(); void UnbindTexture(id texture); void Draw(u32 base_vertex, u32 num_vertices); @@ -157,8 +160,10 @@ private: }; struct Backref; + struct PerfQueryTracker; std::shared_ptr m_backref; + std::vector> m_perf_query_tracker_cache; MRCOwned> m_fence; MRCOwned> m_upload_cmdbuf; MRCOwned> m_upload_encoder; @@ -224,7 +229,9 @@ private: MTLDepthClipMode depth_clip_mode; MTLCullMode cull_mode; DepthStencilSelector depth_stencil; + PerfQueryGroup perf_query_group; } m_current; + std::shared_ptr m_current_perf_query; /// Things that represent what we'd *like* to have on the encoder for the next draw struct State @@ -250,8 +257,12 @@ private: id texels = nullptr; u32 texel_buffer_offset0; u32 texel_buffer_offset1; + PerfQueryGroup perf_query_group = static_cast(-1); } m_state; + u32 m_perf_query_tracker_counter = 0; + + std::shared_ptr NewPerfQueryTracker(); void SetSamplerForce(u32 idx, const SamplerState& sampler); void Sync(BufferPair& buffer); Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt); diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm index 8451f413da..66ae33b733 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm @@ -10,6 +10,7 @@ #include "Common/BitUtils.h" #include "VideoBackends/Metal/MTLObjectCache.h" +#include "VideoBackends/Metal/MTLPerfQuery.h" #include "VideoBackends/Metal/MTLPipeline.h" #include "VideoBackends/Metal/MTLTexture.h" #include "VideoBackends/Metal/MTLUtil.h" @@ -19,6 +20,8 @@ #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoConfig.h" +static constexpr u32 PERF_QUERY_BUFFER_SIZE = 512; + std::unique_ptr Metal::g_state_tracker; struct Metal::StateTracker::Backref @@ -28,6 +31,14 @@ struct Metal::StateTracker::Backref explicit Backref(StateTracker* state_tracker) : state_tracker(state_tracker) {} }; +struct Metal::StateTracker::PerfQueryTracker +{ + MRCOwned> buffer; + const u64* contents; + std::vector groups; + u32 query_id; +}; + static NSString* GetName(Metal::StateTracker::UploadBuffer buffer) { // clang-format off @@ -328,8 +339,12 @@ void Metal::StateTracker::BeginRenderPass(MTLLoadAction load_action) void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor) { EndRenderPass(); + if (m_current_perf_query) + [descriptor setVisibilityResultBuffer:m_current_perf_query->buffer]; m_current_render_encoder = MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]); + if (m_current_perf_query) + [descriptor setVisibilityResultBuffer:nil]; if (!g_features.unified_memory) [m_current_render_encoder waitForFence:m_fence beforeStages:MTLRenderStageVertex]; AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment(); @@ -347,6 +362,7 @@ void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor) m_current.depth_stencil = DepthStencilSelector(false, CompareMode::Always); m_current.depth_clip_mode = MTLDepthClipModeClip; m_current.cull_mode = MTLCullModeNone; + m_current.perf_query_group = static_cast(-1); m_flags.NewEncoder(); m_dirty_samplers = 0xff; m_dirty_textures = 0xff; @@ -411,15 +427,23 @@ void Metal::StateTracker::FlushEncoders() m_texture_upload_cmdbuf = nullptr; } [m_current_render_cmdbuf - addCompletedHandler:[backref = m_backref, draw = m_current_draw](id buf) { + addCompletedHandler:[backref = m_backref, draw = m_current_draw, + q = std::move(m_current_perf_query)](id buf) { std::lock_guard guard(backref->mtx); if (StateTracker* tracker = backref->state_tracker) { // We can do the update non-atomically because we only ever update under the lock u64 newval = std::max(draw, tracker->m_last_finished_draw.load(std::memory_order_relaxed)); tracker->m_last_finished_draw.store(newval, std::memory_order_release); + if (q) + { + if (PerfQuery* query = static_cast(g_perf_query.get())) + query->ReturnResults(q->contents, q->groups.data(), q->groups.size(), q->query_id); + tracker->m_perf_query_tracker_cache.emplace_back(std::move(q)); + } } }]; + m_current_perf_query = nullptr; [m_current_render_cmdbuf commit]; m_last_render_cmdbuf = std::move(m_current_render_cmdbuf); m_current_render_cmdbuf = nullptr; @@ -603,6 +627,57 @@ void Metal::StateTracker::SetFragmentBufferNow(u32 idx, id buffer, u3 } } +std::shared_ptr Metal::StateTracker::NewPerfQueryTracker() +{ + static_cast(g_perf_query.get())->IncCount(); + // The cache is repopulated asynchronously + std::lock_guard lock(m_backref->mtx); + if (m_perf_query_tracker_cache.empty()) + { + // Make a new one + @autoreleasepool + { + std::shared_ptr tracker = std::make_shared(); + const MTLResourceOptions options = + MTLResourceStorageModeShared | MTLResourceHazardTrackingModeUntracked; + id buffer = [g_device newBufferWithLength:PERF_QUERY_BUFFER_SIZE * sizeof(u64) + options:options]; + [buffer setLabel:[NSString stringWithFormat:@"PerfQuery Buffer %d", + m_perf_query_tracker_counter++]]; + tracker->buffer = MRCTransfer(buffer); + tracker->contents = static_cast([buffer contents]); + return tracker; + } + } + else + { + // Reuse an old one + std::shared_ptr tracker = std::move(m_perf_query_tracker_cache.back()); + m_perf_query_tracker_cache.pop_back(); + return tracker; + } +} + +void Metal::StateTracker::EnablePerfQuery(PerfQueryGroup group, u32 query_id) +{ + m_state.perf_query_group = group; + if (!m_current_perf_query || m_current_perf_query->query_id != query_id || + m_current_perf_query->groups.size() == PERF_QUERY_BUFFER_SIZE) + { + if (m_current_render_encoder) + EndRenderPass(); + if (!m_current_perf_query) + m_current_perf_query = NewPerfQueryTracker(); + m_current_perf_query->groups.clear(); + m_current_perf_query->query_id = query_id; + } +} + +void Metal::StateTracker::DisablePerfQuery() +{ + m_state.perf_query_group = static_cast(-1); +} + // MARK: Render // clang-format off @@ -620,6 +695,9 @@ static NSRange RangeOfBits(u32 value) void Metal::StateTracker::PrepareRender() { + // BeginRenderPass needs this + if (m_state.perf_query_group != static_cast(-1) && !m_current_perf_query) + m_current_perf_query = NewPerfQueryTracker(); if (!m_current_render_encoder) BeginRenderPass(MTLLoadActionLoad); id enc = m_current_render_encoder; @@ -710,6 +788,20 @@ void Metal::StateTracker::PrepareRender() lodMaxClamps:m_state.sampler_max_lod.data() withRange:range]; } + if (m_state.perf_query_group != m_current.perf_query_group) + { + m_current.perf_query_group = m_state.perf_query_group; + if (m_state.perf_query_group == static_cast(-1)) + { + [enc setVisibilityResultMode:MTLVisibilityResultModeDisabled offset:0]; + } + else + { + [enc setVisibilityResultMode:MTLVisibilityResultModeCounting + offset:m_current_perf_query->groups.size() * 8]; + m_current_perf_query->groups.push_back(m_state.perf_query_group); + } + } if (is_gx) { // GX draw From 5065767abdadac6cb0b2597ccb1ea47af3549bb4 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sun, 3 Jul 2022 20:47:31 -0500 Subject: [PATCH 10/12] VideoBackends:Metal: Avoid submitting draw calls with no vertices/indices --- Source/Core/VideoBackends/Metal/MTLStateTracker.mm | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm index 66ae33b733..b221644640 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm @@ -80,6 +80,10 @@ bool Metal::StateTracker::UsageTracker::PrepareForAllocation(u64 last_draw, size size_t Metal::StateTracker::UsageTracker::Allocate(u64 current_draw, size_t amt) { + // Allocation of zero bytes would make the buffer think it's full + // Zero bytes is useless anyways, so don't mark usage in that case + if (!amt) + return m_pos; if (m_usage.empty() || m_usage.back().drawno != current_draw) m_usage.push_back({current_draw, m_pos}); size_t ret = m_pos; @@ -886,6 +890,8 @@ void Metal::StateTracker::PrepareCompute() void Metal::StateTracker::Draw(u32 base_vertex, u32 num_vertices) { + if (!num_vertices) + return; PrepareRender(); [m_current_render_encoder drawPrimitives:m_state.render_pipeline->Prim() vertexStart:base_vertex @@ -894,6 +900,8 @@ void Metal::StateTracker::Draw(u32 base_vertex, u32 num_vertices) void Metal::StateTracker::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) { + if (!num_indices) // Happens in Metroid Prime, Metal API validation doesn't like this + return; PrepareRender(); [m_current_render_encoder drawIndexedPrimitives:m_state.render_pipeline->Prim() indexCount:num_indices From a41345127f697515ceb65ed349032e9ed3abc7f9 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Thu, 21 Jul 2022 20:07:23 -0500 Subject: [PATCH 11/12] VideoBackends:Metal: Remove unified memory config Not worth the extra code --- .../VideoBackends/Metal/MTLBoundingBox.mm | 35 +---- .../VideoBackends/Metal/MTLStateTracker.h | 19 +-- .../VideoBackends/Metal/MTLStateTracker.mm | 120 ++---------------- Source/Core/VideoBackends/Metal/MTLTexture.mm | 3 +- Source/Core/VideoBackends/Metal/MTLUtil.h | 1 - Source/Core/VideoBackends/Metal/MTLUtil.mm | 8 -- 6 files changed, 21 insertions(+), 165 deletions(-) diff --git a/Source/Core/VideoBackends/Metal/MTLBoundingBox.mm b/Source/Core/VideoBackends/Metal/MTLBoundingBox.mm index 89c446b040..4c1a6ebd48 100644 --- a/Source/Core/VideoBackends/Metal/MTLBoundingBox.mm +++ b/Source/Core/VideoBackends/Metal/MTLBoundingBox.mm @@ -16,28 +16,16 @@ Metal::BoundingBox::~BoundingBox() bool Metal::BoundingBox::Initialize() { - const MTLResourceOptions gpu_storage_mode = - g_features.unified_memory ? MTLResourceStorageModeShared : MTLResourceStorageModePrivate; - const MTLResourceOptions gpu_options = gpu_storage_mode | MTLResourceHazardTrackingModeUntracked; + const MTLResourceOptions gpu_options = + MTLResourceStorageModeShared | MTLResourceHazardTrackingModeUntracked; const id dev = g_device; m_upload_fence = MRCTransfer([dev newFence]); [m_upload_fence setLabel:@"BBox Upload Fence"]; m_download_fence = MRCTransfer([dev newFence]); [m_download_fence setLabel:@"BBox Download Fence"]; m_gpu_buffer = MRCTransfer([dev newBufferWithLength:BUFFER_SIZE options:gpu_options]); - if (g_features.unified_memory) - { - [m_gpu_buffer setLabel:@"BBox Buffer"]; - m_cpu_buffer_ptr = static_cast([m_gpu_buffer contents]); - } - else - { - m_cpu_buffer = MRCTransfer([dev newBufferWithLength:BUFFER_SIZE - options:MTLResourceStorageModeShared]); - m_cpu_buffer_ptr = static_cast([m_cpu_buffer contents]); - [m_gpu_buffer setLabel:@"BBox GPU Buffer"]; - [m_cpu_buffer setLabel:@"BBox CPU Buffer"]; - } + [m_gpu_buffer setLabel:@"BBox Buffer"]; + m_cpu_buffer_ptr = static_cast([m_gpu_buffer contents]); g_state_tracker->SetBBoxBuffer(m_gpu_buffer, m_upload_fence, m_download_fence); return true; } @@ -47,18 +35,6 @@ std::vector Metal::BoundingBox::Read(u32 index, u32 length) @autoreleasepool { g_state_tracker->EndRenderPass(); - if (!g_features.unified_memory) - { - id download = [g_state_tracker->GetRenderCmdBuf() blitCommandEncoder]; - [download setLabel:@"BBox Download"]; - [download waitForFence:m_download_fence]; - [download copyFromBuffer:m_gpu_buffer - sourceOffset:0 - toBuffer:m_cpu_buffer - destinationOffset:0 - size:BUFFER_SIZE]; - [download endEncoding]; - } g_state_tracker->FlushEncoders(); g_state_tracker->WaitForFlushedEncoders(); return std::vector(m_cpu_buffer_ptr + index, m_cpu_buffer_ptr + index + length); @@ -68,8 +44,7 @@ std::vector Metal::BoundingBox::Read(u32 index, u32 length) void Metal::BoundingBox::Write(u32 index, const std::vector& values) { const u32 size = values.size() * sizeof(BBoxType); - if (g_features.unified_memory && !g_state_tracker->HasUnflushedData() && - !g_state_tracker->GPUBusy()) + if (!g_state_tracker->HasUnflushedData() && !g_state_tracker->GPUBusy()) { // We can just write directly to the buffer! memcpy(m_cpu_buffer_ptr + index, values.data(), size); diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.h b/Source/Core/VideoBackends/Metal/MTLStateTracker.h index 1807f345e4..3e7bac832d 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.h +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.h @@ -34,6 +34,7 @@ public: Uniform, Vertex, Index, + TextureData, Texels, Last = Texels }; @@ -105,7 +106,6 @@ public: { return (amt + static_cast(align)) & ~static_cast(align); } - Map AllocateForTextureUpload(size_t amt); Map Allocate(UploadBuffer buffer_idx, size_t amt, AlignMask align) { Preallocate(buffer_idx, amt); @@ -119,7 +119,6 @@ public: static_cast(align)) == 0); return CommitPreallocation(buffer_idx, Align(amt, align)); } - id GetUploadEncoder(); id GetTextureUploadEncoder(); id GetRenderCmdBuf(); @@ -143,28 +142,18 @@ private: void Reset(size_t new_size); }; - struct CPUBuffer + struct Buffer { UsageTracker usage; MRCOwned> mtlbuffer; void* buffer = nullptr; }; - struct BufferPair - { - UsageTracker usage; - MRCOwned> cpubuffer; - MRCOwned> gpubuffer; - void* buffer = nullptr; - size_t last_upload = 0; - }; - struct Backref; struct PerfQueryTracker; std::shared_ptr m_backref; std::vector> m_perf_query_tracker_cache; - MRCOwned> m_fence; MRCOwned> m_upload_cmdbuf; MRCOwned> m_upload_encoder; MRCOwned> m_texture_upload_cmdbuf; @@ -176,8 +165,7 @@ private: MRCOwned m_render_pass_desc[3]; MRCOwned m_resolve_pass_desc; Framebuffer* m_current_framebuffer; - CPUBuffer m_texture_upload_buffer; - BufferPair m_upload_buffers[static_cast(UploadBuffer::Last) + 1]; + Buffer m_upload_buffers[static_cast(UploadBuffer::Last) + 1]; u64 m_current_draw = 1; std::atomic m_last_finished_draw{0}; @@ -264,7 +252,6 @@ private: std::shared_ptr NewPerfQueryTracker(); void SetSamplerForce(u32 idx, const SamplerState& sampler); - void Sync(BufferPair& buffer); Map CommitPreallocation(UploadBuffer buffer_idx, size_t actual_amt); void CheckViewport(); void CheckScissor(); diff --git a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm index b221644640..3ab6224f83 100644 --- a/Source/Core/VideoBackends/Metal/MTLStateTracker.mm +++ b/Source/Core/VideoBackends/Metal/MTLStateTracker.mm @@ -44,11 +44,12 @@ static NSString* GetName(Metal::StateTracker::UploadBuffer buffer) // clang-format off switch (buffer) { - case Metal::StateTracker::UploadBuffer::Texels: return @"Texels"; - case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices"; - case Metal::StateTracker::UploadBuffer::Index: return @"Indices"; - case Metal::StateTracker::UploadBuffer::Uniform: return @"Uniforms"; - case Metal::StateTracker::UploadBuffer::Other: return @"Generic Upload"; + case Metal::StateTracker::UploadBuffer::TextureData: return @"Texture Data"; + case Metal::StateTracker::UploadBuffer::Texels: return @"Texels"; + case Metal::StateTracker::UploadBuffer::Vertex: return @"Vertices"; + case Metal::StateTracker::UploadBuffer::Index: return @"Indices"; + case Metal::StateTracker::UploadBuffer::Uniform: return @"Uniforms"; + case Metal::StateTracker::UploadBuffer::Other: return @"Generic Upload"; } // clang-format on } @@ -103,7 +104,6 @@ void Metal::StateTracker::UsageTracker::Reset(size_t new_size) Metal::StateTracker::StateTracker() : m_backref(std::make_shared(this)) { m_flags.should_apply_label = true; - m_fence = MRCTransfer([g_device newFence]); for (MRCOwned& rpdesc : m_render_pass_desc) { rpdesc = MRCTransfer([MTLRenderPassDescriptor new]); @@ -140,10 +140,9 @@ Metal::StateTracker::~StateTracker() // MARK: BufferPair Ops -Metal::StateTracker::Map Metal::StateTracker::AllocateForTextureUpload(size_t amt) +std::pair Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt) { - amt = (amt + 15) & ~15ull; - CPUBuffer& buffer = m_texture_upload_buffer; + Buffer& buffer = m_upload_buffers[static_cast(buffer_idx)]; u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire); bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt); if (__builtin_expect(needs_new, false)) @@ -155,61 +154,11 @@ Metal::StateTracker::Map Metal::StateTracker::AllocateForTextureUpload(size_t am MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; buffer.mtlbuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]); - [buffer.mtlbuffer setLabel:@"Texture Upload Buffer"]; + [buffer.mtlbuffer setLabel:GetName(buffer_idx)]; ASSERT_MSG(VIDEO, buffer.mtlbuffer, "Failed to allocate MTLBuffer (out of memory?)"); buffer.buffer = [buffer.mtlbuffer contents]; buffer.usage.Reset(newsize); } - - size_t pos = buffer.usage.Allocate(m_current_draw, amt); - - Map ret = {buffer.mtlbuffer, pos, reinterpret_cast(buffer.buffer) + pos}; - DEBUG_ASSERT(pos <= buffer.usage.Size() && - "Previous code should have guaranteed there was enough space"); - return ret; -} - -std::pair Metal::StateTracker::Preallocate(UploadBuffer buffer_idx, size_t amt) -{ - BufferPair& buffer = m_upload_buffers[static_cast(buffer_idx)]; - u64 last_draw = m_last_finished_draw.load(std::memory_order_acquire); - size_t base_pos = buffer.usage.Pos(); - bool needs_new = buffer.usage.PrepareForAllocation(last_draw, amt); - bool needs_upload = needs_new || buffer.usage.Pos() == 0; - if (!g_features.unified_memory && needs_upload) - { - if (base_pos != buffer.last_upload) - { - id encoder = GetUploadEncoder(); - [encoder copyFromBuffer:buffer.cpubuffer - sourceOffset:buffer.last_upload - toBuffer:buffer.gpubuffer - destinationOffset:buffer.last_upload - size:base_pos - buffer.last_upload]; - } - buffer.last_upload = 0; - } - if (__builtin_expect(needs_new, false)) - { - // Orphan buffer - size_t newsize = std::max(buffer.usage.Size() * 2, 4096); - while (newsize < amt) - newsize *= 2; - MTLResourceOptions options = - MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; - buffer.cpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]); - [buffer.cpubuffer setLabel:GetName(buffer_idx)]; - ASSERT_MSG(VIDEO, buffer.cpubuffer, "Failed to allocate MTLBuffer (out of memory?)"); - buffer.buffer = [buffer.cpubuffer contents]; - buffer.usage.Reset(newsize); - if (!g_features.unified_memory) - { - options = MTLResourceStorageModePrivate | MTLResourceHazardTrackingModeUntracked; - buffer.gpubuffer = MRCTransfer([g_device newBufferWithLength:newsize options:options]); - [buffer.gpubuffer setLabel:GetName(buffer_idx)]; - ASSERT_MSG(VIDEO, buffer.gpubuffer, "Failed to allocate MTLBuffer (out of memory?)"); - } - } size_t pos = buffer.usage.Pos(); return std::make_pair(reinterpret_cast(buffer.buffer) + pos, pos); } @@ -217,46 +166,17 @@ std::pair Metal::StateTracker::Preallocate(UploadBuffer buffer_id Metal::StateTracker::Map Metal::StateTracker::CommitPreallocation(UploadBuffer buffer_idx, size_t amt) { - BufferPair& buffer = m_upload_buffers[static_cast(buffer_idx)]; + Buffer& buffer = m_upload_buffers[static_cast(buffer_idx)]; size_t pos = buffer.usage.Allocate(m_current_draw, amt); Map ret = {nil, pos, reinterpret_cast(buffer.buffer) + pos}; - ret.gpu_buffer = g_features.unified_memory ? buffer.cpubuffer : buffer.gpubuffer; + ret.gpu_buffer = buffer.mtlbuffer; DEBUG_ASSERT(pos <= buffer.usage.Size() && "Previous code should have guaranteed there was enough space"); return ret; } -void Metal::StateTracker::Sync(BufferPair& buffer) -{ - if (g_features.unified_memory || buffer.usage.Pos() == buffer.last_upload) - return; - - id encoder = GetUploadEncoder(); - [encoder copyFromBuffer:buffer.cpubuffer - sourceOffset:buffer.last_upload - toBuffer:buffer.gpubuffer - destinationOffset:buffer.last_upload - size:buffer.usage.Pos() - buffer.last_upload]; - buffer.last_upload = buffer.usage.Pos(); -} - // MARK: Render Pass / Encoder Management -id Metal::StateTracker::GetUploadEncoder() -{ - if (!m_upload_cmdbuf) - { - @autoreleasepool - { - m_upload_cmdbuf = MRCRetain([g_queue commandBuffer]); - [m_upload_cmdbuf setLabel:@"Vertex Upload"]; - m_upload_encoder = MRCRetain([m_upload_cmdbuf blitCommandEncoder]); - [m_upload_encoder setLabel:@"Vertex Upload"]; - } - } - return m_upload_encoder; -} - id Metal::StateTracker::GetTextureUploadEncoder() { if (!m_texture_upload_cmdbuf) @@ -349,8 +269,6 @@ void Metal::StateTracker::BeginRenderPass(MTLRenderPassDescriptor* descriptor) MRCRetain([GetRenderCmdBuf() renderCommandEncoderWithDescriptor:descriptor]); if (m_current_perf_query) [descriptor setVisibilityResultBuffer:nil]; - if (!g_features.unified_memory) - [m_current_render_encoder waitForFence:m_fence beforeStages:MTLRenderStageVertex]; AbstractTexture* attachment = m_current_framebuffer->GetColorAttachment(); if (!attachment) attachment = m_current_framebuffer->GetDepthAttachment(); @@ -380,8 +298,6 @@ void Metal::StateTracker::BeginComputePass() EndRenderPass(); m_current_compute_encoder = MRCRetain([GetRenderCmdBuf() computeCommandEncoder]); [m_current_compute_encoder setLabel:@"Compute"]; - if (!g_features.unified_memory) - [m_current_compute_encoder waitForFence:m_fence]; m_flags.NewEncoder(); m_dirty_samplers = 0xff; m_dirty_textures = 0xff; @@ -409,20 +325,6 @@ void Metal::StateTracker::FlushEncoders() if (!m_current_render_cmdbuf) return; EndRenderPass(); - for (int i = 0; i <= static_cast(UploadBuffer::Last); ++i) - Sync(m_upload_buffers[i]); - if (g_features.unified_memory) - { - ASSERT(!m_upload_cmdbuf && "Should never be used!"); - } - else if (m_upload_cmdbuf) - { - [m_upload_encoder updateFence:m_fence]; - [m_upload_encoder endEncoding]; - [m_upload_cmdbuf commit]; - m_upload_encoder = nullptr; - m_upload_cmdbuf = nullptr; - } if (m_texture_upload_cmdbuf) { [m_texture_upload_encoder endEncoding]; diff --git a/Source/Core/VideoBackends/Metal/MTLTexture.mm b/Source/Core/VideoBackends/Metal/MTLTexture.mm index 67c114caa7..fd0358e10e 100644 --- a/Source/Core/VideoBackends/Metal/MTLTexture.mm +++ b/Source/Core/VideoBackends/Metal/MTLTexture.mm @@ -59,7 +59,8 @@ void Metal::Texture::Load(u32 level, u32 width, u32 height, u32 row_length, // const u32 num_rows = Common::AlignUp(height, block_size) / block_size; const u32 source_pitch = CalculateStrideForFormat(m_config.format, row_length); const u32 upload_size = source_pitch * num_rows; - StateTracker::Map map = g_state_tracker->AllocateForTextureUpload(upload_size); + StateTracker::Map map = g_state_tracker->Allocate(StateTracker::UploadBuffer::TextureData, + upload_size, StateTracker::AlignMask::Other); memcpy(map.cpu_buffer, buffer, upload_size); id encoder = g_state_tracker->GetTextureUploadEncoder(); [encoder copyFromBuffer:map.gpu_buffer diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.h b/Source/Core/VideoBackends/Metal/MTLUtil.h index 385f508648..dfedecd7c6 100644 --- a/Source/Core/VideoBackends/Metal/MTLUtil.h +++ b/Source/Core/VideoBackends/Metal/MTLUtil.h @@ -16,7 +16,6 @@ namespace Metal { struct DeviceFeatures { - bool unified_memory; bool subgroup_ops; }; diff --git a/Source/Core/VideoBackends/Metal/MTLUtil.mm b/Source/Core/VideoBackends/Metal/MTLUtil.mm index 85ac96b342..50916a38e3 100644 --- a/Source/Core/VideoBackends/Metal/MTLUtil.mm +++ b/Source/Core/VideoBackends/Metal/MTLUtil.mm @@ -211,14 +211,6 @@ void Metal::Util::PopulateBackendInfoFeatures(VideoConfig* config, id config->backend_info.AAModes.push_back(i); } - // The unified memory path (using shared buffers for everything) performs noticeably better with - // bbox even on discrete GPUs (20fps vs 15fps in Super Paper Mario elevator), so default to that. - // The separate buffer + manual upload path is left available for testing and comparison. - if (char* env = getenv("MTL_UNIFIED_MEMORY")) - g_features.unified_memory = env[0] == '1' || env[0] == 'y' || env[0] == 'Y'; - else - g_features.unified_memory = true; - g_features.subgroup_ops = false; if (@available(macOS 10.15, iOS 13, *)) { From 6559c6b8ee8d35694282f3dcfaf09f96f73874db Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Thu, 21 Jul 2022 20:10:10 -0500 Subject: [PATCH 12/12] VideoBackends:Multiple: Grammar fixes --- Source/Core/VideoBackends/Metal/MTLRenderer.mm | 2 +- Source/Core/VideoBackends/Vulkan/VKRenderer.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/VideoBackends/Metal/MTLRenderer.mm b/Source/Core/VideoBackends/Metal/MTLRenderer.mm index e8e033c8aa..3639b01241 100644 --- a/Source/Core/VideoBackends/Metal/MTLRenderer.mm +++ b/Source/Core/VideoBackends/Metal/MTLRenderer.mm @@ -301,7 +301,7 @@ void Metal::Renderer::ClearScreen(const MathUtil::Rectangle& rc, bool color bpmem.zcontrol.pixel_format == PixelFormat::RGB8_Z24 || bpmem.zcontrol.pixel_format == PixelFormat::Z24) { - // Force alpha writes, and clear the alpha channel. This is different to the other backends, + // Force alpha writes, and clear the alpha channel. This is different from the other backends, // where the existing values of the alpha channel are preserved. alpha_enable = true; color &= 0x00FFFFFF; diff --git a/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp b/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp index 36fa325ae7..23dba4613b 100644 --- a/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp @@ -155,7 +155,7 @@ void Renderer::ClearScreen(const MathUtil::Rectangle& rc, bool color_enable bpmem.zcontrol.pixel_format == PixelFormat::RGB8_Z24 || bpmem.zcontrol.pixel_format == PixelFormat::Z24) { - // Force alpha writes, and clear the alpha channel. This is different to the other backends, + // Force alpha writes, and clear the alpha channel. This is different from the other backends, // where the existing values of the alpha channel are preserved. alpha_enable = true; color &= 0x00FFFFFF;