From 3dce1df00e3bf28707270c7d0c58fd1b6cabe3be Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 11 Jun 2022 15:49:08 +0200 Subject: [PATCH] JitArm64: Implement "soft MMU" This is used when fastmem isn't available. Instead of always falling back to the C++ code in MMU.cpp, the JIT translates addresses on its own by looking them up in a table that Dolphin constructs. This is slower than fastmem, but faster than the old non-fastmem code. This is primarily useful for iOS, since that's the only major platform nowadays where you can't reliably get fastmem. I think it would make sense to merge this feature to master despite this, since there's nothing actually iOS-specific about the feature. It would be of use for me when I have to disable fastmem to stop Android Studio from constantly breaking on segfaults, for instance. Co-authored-by: OatmealDome --- Source/Core/Core/HW/Memmap.cpp | 50 ++++++++++++++----- Source/Core/Core/HW/Memmap.h | 5 ++ Source/Core/Core/PowerPC/JitArm64/Jit.h | 18 +++---- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 50 +++++++++++++------ .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 2 +- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 8 +-- Source/Core/Core/PowerPC/MMU.h | 3 +- 7 files changed, 94 insertions(+), 42 deletions(-) diff --git a/Source/Core/Core/HW/Memmap.cpp b/Source/Core/Core/HW/Memmap.cpp index 4f2aaa1a5d..f61e974bf3 100644 --- a/Source/Core/Core/HW/Memmap.cpp +++ b/Source/Core/Core/HW/Memmap.cpp @@ -47,6 +47,8 @@ namespace Memory // Store the MemArena here u8* physical_base = nullptr; u8* logical_base = nullptr; +u8* physical_page_mappings_base = nullptr; +u8* logical_page_mappings_base = nullptr; static bool is_fastmem_arena_initialized = false; // The MemArena class @@ -223,6 +225,9 @@ static std::array s_physical_regions; static std::vector logical_mapped_entries; +static std::array s_physical_page_mappings; +static std::array s_logical_page_mappings; + void Init() { const auto get_mem1_size = [] { @@ -280,6 +285,8 @@ void Init() } g_arena.GrabSHMSegment(mem_size); + s_physical_page_mappings.fill(nullptr); + // Create an anonymous view of the physical memory for (const PhysicalMemoryRegion& region : s_physical_regions) { @@ -295,8 +302,17 @@ void Init() region.physical_address, region.size); exit(0); } + + for (u32 i = 0; i < region.size; i += PowerPC::BAT_PAGE_SIZE) + { + const size_t index = (i + region.physical_address) >> PowerPC::BAT_INDEX_SHIFT; + s_physical_page_mappings[index] = *region.out_pointer + i; + } } + physical_page_mappings_base = reinterpret_cast(s_physical_page_mappings.data()); + logical_page_mappings_base = reinterpret_cast(s_logical_page_mappings.data()); + InitMMIO(wii); Clear(); @@ -347,14 +363,14 @@ bool InitFastmemArena() void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) { - if (!is_fastmem_arena_initialized) - return; - for (auto& entry : logical_mapped_entries) { g_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } logical_mapped_entries.clear(); + + s_logical_page_mappings.fill(nullptr); + for (u32 i = 0; i < dbat_table.size(); ++i) { if (dbat_table[i] & PowerPC::BAT_PHYSICAL_BIT) @@ -375,19 +391,27 @@ void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) if (intersection_start < intersection_end) { // Found an overlapping region; map it. - u32 position = physical_region.shm_position + intersection_start - mapping_address; - u8* base = logical_base + logical_address + intersection_start - translated_address; - u32 mapped_size = intersection_end - intersection_start; - void* mapped_pointer = g_arena.MapInMemoryRegion(position, mapped_size, base); - if (!mapped_pointer) + if (is_fastmem_arena_initialized) { - PanicAlertFmt("Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} " - "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", - intersection_start, mapped_size, logical_address); - exit(0); + u32 position = physical_region.shm_position + intersection_start - mapping_address; + u8* base = logical_base + logical_address + intersection_start - translated_address; + u32 mapped_size = intersection_end - intersection_start; + + void* mapped_pointer = g_arena.MapInMemoryRegion(position, mapped_size, base); + if (!mapped_pointer) + { + PanicAlertFmt( + "Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} " + "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", + intersection_start, mapped_size, logical_address); + exit(0); + } + logical_mapped_entries.push_back({mapped_pointer, mapped_size}); } - logical_mapped_entries.push_back({mapped_pointer, mapped_size}); + + s_logical_page_mappings[i] = + *physical_region.out_pointer + intersection_start - mapping_address; } } } diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index 5a15a1e343..32065f2053 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -27,6 +27,11 @@ namespace Memory extern u8* physical_base; extern u8* logical_base; +// This page table is used for a "soft MMU" implementation when +// setting up the full memory map in process memory isn't possible. +extern u8* physical_page_mappings_base; +extern u8* logical_page_mappings_base; + // The actual memory used for backing the memory map. extern u8* m_pRAM; extern u8* m_pEXRAM; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index e89c2b1a9b..454b4f68a7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -216,9 +216,6 @@ protected: void DumpCode(const u8* start, const u8* end); // This enum is used for selecting an implementation of EmitBackpatchRoutine. - // - // The below descriptions of each enum entry apply when jo.fastmem_arena is true. - // If jo.fastmem_arena is false, the slow C++ code is always called instead. enum class MemAccessMode { // Always calls the slow C++ code. For performance reasons, should generally only be used if @@ -227,8 +224,10 @@ protected: // Only emits fast access code. Must only be used if the guest address is known in advance // and IsOptimizableRAMAddress returns true for it, otherwise Dolphin will likely crash! AlwaysUnsafe, - // Best in most cases. Tries to run fast access code, and if that fails, uses backpatching to - // replace the code with a call to the slow C++ code. + // Best in most cases. If backpatching is possible (!emitting_routine && jo.fastmem_arena): + // Tries to run fast access code, and if that fails, uses backpatching to replace the code + // with a call to the slow C++ code. Otherwise: Checks whether the fast access code will work, + // then branches to either the fast access code or the slow C++ code. Auto, }; @@ -249,11 +248,12 @@ protected: // // Additional scratch registers are used in the following situations: // - // mode == Auto && emitting_routine: X2 - // mode == Auto && emitting_routine && (flags & BackPatchInfo::FLAG_STORE): X0 - // mode == Auto && emitting_routine && !(flags & BackPatchInfo::FLAG_STORE): X3 + // emitting_routine && (mode == Auto || (mode != AlwaysSafe && !jo.fastmem_arena)): X2 + // emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X0 + // emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3 + // !emitting_routine && mode != AlwaysSafe && !jo.fastmem_arena: X30 // - // mode != AlwaysUnsafe || !jo.fastmem_arena: + // mode != AlwaysUnsafe: // X30 (plus most other registers, unless marked in gprs_to_push and fprs_to_push) void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index e5ce801175..80bacaed6a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -60,8 +60,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, { const u32 access_size = BackPatchInfo::GetFlagSize(flags); - const bool emit_fastmem = jo.fastmem_arena && mode != MemAccessMode::AlwaysSafe; - const bool emit_slowmem = !jo.fastmem_arena || mode != MemAccessMode::AlwaysUnsafe; + const bool emit_fastmem = mode != MemAccessMode::AlwaysSafe; + const bool emit_slowmem = mode != MemAccessMode::AlwaysUnsafe; bool in_far_code = false; const u8* fastmem_start = GetCodePtr(); @@ -69,7 +69,29 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (emit_fastmem) { - if (emit_slowmem && emitting_routine) + ARM64Reg memory_base = MEM_REG; + ARM64Reg memory_offset = addr; + + if (!jo.fastmem_arena) + { + const ARM64Reg temp = emitting_routine ? ARM64Reg::W2 : ARM64Reg::W30; + + memory_base = EncodeRegTo64(temp); + memory_offset = ARM64Reg::W8; // TODO + + LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT); + LDR(memory_base, MEM_REG, ArithOption(temp, true)); + + if (emit_slowmem) + { + FixupBranch pass = CBNZ(memory_base); + slowmem_fixup = B(); + SetJumpTarget(pass); + } + + AND(memory_offset, addr, LogicalImm(PowerPC::BAT_PAGE_SIZE - 1, 64)); + } + else if (emit_slowmem && emitting_routine) { const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W0 : ARM64Reg::W3; const ARM64Reg temp2 = ARM64Reg::W2; @@ -82,11 +104,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg temp = ARM64Reg::D0; temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true); - m_float_emit.STR(access_size, temp, MEM_REG, addr); + m_float_emit.STR(access_size, temp, memory_base, memory_offset); } else if ((flags & BackPatchInfo::FLAG_LOAD) && (flags & BackPatchInfo::FLAG_FLOAT)) { - m_float_emit.LDR(access_size, EncodeRegToDouble(RS), MEM_REG, addr); + m_float_emit.LDR(access_size, EncodeRegToDouble(RS), memory_base, memory_offset); ByteswapAfterLoad(this, &m_float_emit, EncodeRegToDouble(RS), EncodeRegToDouble(RS), flags, true, false); @@ -97,28 +119,28 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true); if (flags & BackPatchInfo::FLAG_SIZE_32) - STR(temp, MEM_REG, addr); + STR(temp, memory_base, memory_offset); else if (flags & BackPatchInfo::FLAG_SIZE_16) - STRH(temp, MEM_REG, addr); + STRH(temp, memory_base, memory_offset); else - STRB(temp, MEM_REG, addr); + STRB(temp, memory_base, memory_offset); } else if (flags & BackPatchInfo::FLAG_ZERO_256) { // This literally only stores 32bytes of zeros to the target address ARM64Reg temp = ARM64Reg::X30; - ADD(temp, addr, MEM_REG); + ADD(temp, memory_base, memory_offset); STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0); STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16); } else { if (flags & BackPatchInfo::FLAG_SIZE_32) - LDR(RS, MEM_REG, addr); + LDR(RS, memory_base, memory_offset); else if (flags & BackPatchInfo::FLAG_SIZE_16) - LDRH(RS, MEM_REG, addr); + LDRH(RS, memory_base, memory_offset); else if (flags & BackPatchInfo::FLAG_SIZE_8) - LDRB(RS, MEM_REG, addr); + LDRB(RS, memory_base, memory_offset); ByteswapAfterLoad(this, &m_float_emit, RS, RS, flags, true, false); } @@ -134,7 +156,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, in_far_code = true; SwitchToFarCode(); - if (!emitting_routine) + if (jo.fastmem_arena && !emitting_routine) { FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_end]; fastmem_area->fastmem_code = fastmem_start; @@ -264,7 +286,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (in_far_code) { - if (emitting_routine) + if (slowmem_fixup) { FixupBranch done = B(); SwitchToNearCode(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 7a077f6ce2..af0fc0386b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -393,7 +393,7 @@ void Arm64GPRCache::GetAllocationOrder() ARM64Reg::W11, ARM64Reg::W10, ARM64Reg::W9, - ARM64Reg::W8, + // ARM64Reg::W8, ARM64Reg::W7, ARM64Reg::W6, ARM64Reg::W5, diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index a506ce02e1..e50ec27929 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -93,10 +93,10 @@ void JitArm64::GenerateAsm() // set the mem_base based on MSR flags LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr)); FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27); - MOVP2R(MEM_REG, Memory::physical_base); + MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::physical_base : Memory::physical_page_mappings_base); FixupBranch membaseend = B(); SetJumpTarget(physmem); - MOVP2R(MEM_REG, Memory::logical_base); + MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::logical_base : Memory::logical_page_mappings_base); SetJumpTarget(membaseend); // iCache[(address >> 2) & iCache_Mask]; @@ -141,10 +141,10 @@ void JitArm64::GenerateAsm() // set the mem_base based on MSR flags and jump to next block. LDR(IndexType::Unsigned, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr)); FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27); - MOVP2R(MEM_REG, Memory::physical_base); + MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::physical_base : Memory::physical_page_mappings_base); BR(ARM64Reg::X0); SetJumpTarget(physmem); - MOVP2R(MEM_REG, Memory::logical_base); + MOVP2R(MEM_REG, jo.fastmem_arena ? Memory::logical_base : Memory::logical_page_mappings_base); BR(ARM64Reg::X0); // Call JIT diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 726e861f62..6eda9a22b7 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -197,11 +197,12 @@ TranslateResult JitCache_TranslateAddress(u32 address); constexpr int BAT_INDEX_SHIFT = 17; constexpr u32 BAT_PAGE_SIZE = 1 << BAT_INDEX_SHIFT; +constexpr u32 BAT_PAGE_COUNT = 1 << (32 - BAT_INDEX_SHIFT); constexpr u32 BAT_MAPPED_BIT = 0x1; constexpr u32 BAT_PHYSICAL_BIT = 0x2; constexpr u32 BAT_WI_BIT = 0x4; constexpr u32 BAT_RESULT_MASK = UINT32_C(~0x7); -using BatTable = std::array; // 128 KB +using BatTable = std::array; // 128 KB extern BatTable ibat_table; extern BatTable dbat_table; inline bool TranslateBatAddess(const BatTable& bat_table, u32* address, bool* wi)