From 5b5124d9fff8fc005fe9e960736ce7748d1d629a Mon Sep 17 00:00:00 2001 From: JosJuice Date: Wed, 14 Jun 2023 23:27:58 +0200 Subject: [PATCH 1/2] JitArm64: Use immediates more in fres routine Not sure why I didn't do this to begin with. Maybe I was under the impression that the most significant bit of a 12-bit immediate was a sign bit. --- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index cfa2965879..96663bfdb3 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -287,8 +287,7 @@ void JitArm64::GenerateFres() CMP(ARM64Reg::X2, 895); FixupBranch small_exponent = B(CCFlags::CC_LO); - MOVI2R(ARM64Reg::X4, 1148LL); - CMP(ARM64Reg::X2, ARM64Reg::X4); + CMP(ARM64Reg::X2, 1148); FixupBranch large_exponent = B(CCFlags::CC_HI); UBFX(ARM64Reg::X2, ARM64Reg::X1, 47, 5); // Grab upper part of mantissa @@ -320,8 +319,7 @@ void JitArm64::GenerateFres() RET(); SetJumpTarget(large_exponent); - MOVI2R(ARM64Reg::X4, 0x7FF); - CMP(ARM64Reg::X2, ARM64Reg::X4); + CMP(ARM64Reg::X2, 0x7FF); CSEL(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X3, CCFlags::CC_EQ); RET(); } From 9c13436e2b01237b2b8979da0f399d7c412e789a Mon Sep 17 00:00:00 2001 From: JosJuice Date: Thu, 15 Jun 2023 22:02:39 +0200 Subject: [PATCH 2/2] JitArm64: Improve instruction scheduling in fres routine We want to have a low number of instructions between the LDP and the MADD so that the MADD can start immediately after the LDP finishes even if we're on a lower-end in-order CPU. --- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 96663bfdb3..03b3c7487d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -293,12 +293,12 @@ void JitArm64::GenerateFres() UBFX(ARM64Reg::X2, ARM64Reg::X1, 47, 5); // Grab upper part of mantissa MOVP2R(ARM64Reg::X3, &Common::fres_expected); ADD(ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X2, ArithOption(ARM64Reg::X2, ShiftType::LSL, 3)); - LDP(IndexType::Signed, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::X2, 0); UBFX(ARM64Reg::X1, ARM64Reg::X1, 37, 10); // Grab lower part of mantissa + LDP(IndexType::Signed, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::X2, 0); MOVI2R(ARM64Reg::W4, 1); - AND(ARM64Reg::X0, ARM64Reg::X0, LogicalImm(Common::DOUBLE_SIGN | Common::DOUBLE_EXP, 64)); MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W4); SUB(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W1, ArithOption(ARM64Reg::W1, ShiftType::LSR, 1)); + AND(ARM64Reg::X0, ARM64Reg::X0, LogicalImm(Common::DOUBLE_SIGN | Common::DOUBLE_EXP, 64)); ORR(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1, ArithOption(ARM64Reg::X1, ShiftType::LSL, 29)); RET();