diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 4137b8e23f..d14d998462 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -430,6 +430,8 @@ add_library(core PowerPC/Interpreter/Interpreter_Tables.cpp PowerPC/Interpreter/Interpreter.cpp PowerPC/Interpreter/Interpreter.h + PowerPC/JitCommon/DivUtils.cpp + PowerPC/JitCommon/DivUtils.h PowerPC/JitCommon/JitAsmCommon.cpp PowerPC/JitCommon/JitAsmCommon.h PowerPC/JitCommon/JitBase.cpp diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index c1237463d8..358e7a8ea6 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -96,7 +96,7 @@ public: void GenerateConstantOverflow(bool overflow); void GenerateConstantOverflow(s64 val); - void GenerateOverflow(); + void GenerateOverflow(Gen::CCFlags cond = Gen::CCFlags::CC_NO); void FinalizeCarryOverflow(bool oe, bool inv = false); void FinalizeCarry(Gen::CCFlags cond); void FinalizeCarry(bool ca); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index dbe45feec2..184b3c82ef 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -16,10 +16,12 @@ #include "Core/PowerPC/Jit64/Jit.h" #include "Core/PowerPC/Jit64/RegCache/JitRegCache.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" +#include "Core/PowerPC/JitCommon/DivUtils.h" #include "Core/PowerPC/PPCAnalyst.h" #include "Core/PowerPC/PowerPC.h" using namespace Gen; +using namespace JitCommon; void Jit64::GenerateConstantOverflow(s64 val) { @@ -42,9 +44,9 @@ void Jit64::GenerateConstantOverflow(bool overflow) } // We could do overflow branchlessly, but unlike carry it seems to be quite a bit rarer. -void Jit64::GenerateOverflow() +void Jit64::GenerateOverflow(Gen::CCFlags cond) { - FixupBranch jno = J_CC(CC_NO); + FixupBranch jno = J_CC(cond); // XER[OV/SO] = 1 MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_OV_MASK | XER_SO_MASK)); FixupBranch exit = J(); @@ -1342,6 +1344,207 @@ void Jit64::divwx(UGeckoInstruction inst) GenerateConstantOverflow(false); } } + else if (gpr.IsImm(a)) + { + // Constant dividend + const u32 dividend = gpr.Imm32(a); + + if (dividend == 0) + { + if (inst.OE) + { + RCOpArg Rb = gpr.Use(b, RCMode::Read); + RegCache::Realize(Rb); + + CMP_or_TEST(32, Rb, Imm32(0)); + GenerateOverflow(CC_NZ); + } + + // Zero divided by anything is always zero + gpr.SetImmediate32(d, 0); + } + else + { + RCX64Reg Rb = gpr.Bind(b, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + // no register choice + RCX64Reg eax = gpr.Scratch(EAX); + RCX64Reg edx = gpr.Scratch(EDX); + RegCache::Realize(Rb, Rd, eax, edx); + + // Check for divisor == 0 + TEST(32, Rb, Rb); + + FixupBranch normal_path; + + if (dividend == 0x80000000) + { + // Divisor is 0, proceed to overflow case + const FixupBranch overflow = J_CC(CC_Z); + // Otherwise, check for divisor == -1 + CMP(32, Rb, Imm32(0xFFFFFFFF)); + normal_path = J_CC(CC_NE); + + SetJumpTarget(overflow); + } + else + { + // Divisor is not 0, take normal path + normal_path = J_CC(CC_NZ); + // Otherwise, proceed to overflow case + } + + // Set Rd to all ones or all zeroes + if (dividend & 0x80000000) + MOV(32, Rd, Imm32(0xFFFFFFFF)); + else + XOR(32, Rd, Rd); + + if (inst.OE) + GenerateConstantOverflow(true); + + const FixupBranch done = J(); + + SetJumpTarget(normal_path); + + MOV(32, eax, Imm32(dividend)); + CDQ(); + IDIV(32, Rb); + MOV(32, Rd, eax); + + if (inst.OE) + GenerateConstantOverflow(false); + + SetJumpTarget(done); + } + } + else if (gpr.IsImm(b)) + { + // Constant divisor + const s32 divisor = gpr.SImm32(b); + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rd); + + // Handle 0, 1, and -1 explicitly + if (divisor == 0) + { + if (d != a) + MOV(32, Rd, Ra); + SAR(32, Rd, Imm8(31)); + if (inst.OE) + GenerateConstantOverflow(true); + } + else if (divisor == 1) + { + if (d != a) + MOV(32, Rd, Ra); + if (inst.OE) + GenerateConstantOverflow(false); + } + else if (divisor == -1) + { + if (d != a) + MOV(32, Rd, Ra); + + NEG(32, Rd); + const FixupBranch normal = J_CC(CC_NO); + + MOV(32, Rd, Imm32(0xFFFFFFFF)); + if (inst.OE) + GenerateConstantOverflow(true); + const FixupBranch done = J(); + + SetJumpTarget(normal); + if (inst.OE) + GenerateConstantOverflow(false); + + SetJumpTarget(done); + } + else if (divisor == 2 || divisor == -2) + { + X64Reg tmp = RSCRATCH; + if (Ra.IsSimpleReg() && Ra.GetSimpleReg() != Rd) + tmp = Ra.GetSimpleReg(); + else + MOV(32, R(tmp), Ra); + + MOV(32, Rd, R(tmp)); + SHR(32, Rd, Imm8(31)); + ADD(32, Rd, R(tmp)); + SAR(32, Rd, Imm8(1)); + + if (divisor < 0) + NEG(32, Rd); + + if (inst.OE) + GenerateConstantOverflow(false); + } + else if (MathUtil::IsPow2(divisor) || MathUtil::IsPow2(-divisor)) + { + u32 abs_val = std::abs(divisor); + + X64Reg tmp = RSCRATCH; + if (Ra.IsSimpleReg() && Ra.GetSimpleReg() != Rd) + tmp = Ra.GetSimpleReg(); + else + MOV(32, R(tmp), Ra); + + TEST(32, R(tmp), R(tmp)); + LEA(32, Rd, MDisp(tmp, abs_val - 1)); + CMOVcc(32, Rd, R(tmp), CC_NS); + SAR(32, Rd, Imm8(IntLog2(abs_val))); + + if (divisor < 0) + NEG(32, Rd); + + if (inst.OE) + GenerateConstantOverflow(false); + } + else + { + // Optimize signed 32-bit integer division by a constant + Magic m = SignedDivisionConstants(divisor); + + MOVSX(64, 32, RSCRATCH, Ra); + + if (divisor > 0 && m.multiplier < 0) + { + IMUL(64, Rd, R(RSCRATCH), Imm32(m.multiplier)); + SHR(64, Rd, Imm8(32)); + ADD(32, Rd, R(RSCRATCH)); + SHR(32, R(RSCRATCH), Imm8(31)); + SAR(32, Rd, Imm8(m.shift)); + } + else if (divisor < 0 && m.multiplier > 0) + { + IMUL(64, Rd, R(RSCRATCH), Imm32(m.multiplier)); + SHR(64, R(RSCRATCH), Imm8(32)); + SUB(32, R(RSCRATCH), Rd); + MOV(32, Rd, R(RSCRATCH)); + SHR(32, Rd, Imm8(31)); + SAR(32, R(RSCRATCH), Imm8(m.shift)); + } + else if (m.multiplier > 0) + { + IMUL(64, Rd, R(RSCRATCH), Imm32(m.multiplier)); + SHR(32, R(RSCRATCH), Imm8(31)); + SAR(64, R(Rd), Imm8(32 + m.shift)); + } + else + { + IMUL(64, RSCRATCH, R(RSCRATCH), Imm32(m.multiplier)); + MOV(64, Rd, R(RSCRATCH)); + SHR(64, R(RSCRATCH), Imm8(63)); + SAR(64, R(Rd), Imm8(32 + m.shift)); + } + + ADD(32, Rd, R(RSCRATCH)); + + if (inst.OE) + GenerateConstantOverflow(false); + } + } else { RCOpArg Ra = gpr.Use(a, RCMode::Read); @@ -1364,7 +1567,6 @@ void Jit64::divwx(UGeckoInstruction inst) SetJumpTarget(overflow); SAR(32, eax, Imm8(31)); - MOV(32, Rd, eax); if (inst.OE) { GenerateConstantOverflow(true); @@ -1376,12 +1578,13 @@ void Jit64::divwx(UGeckoInstruction inst) CDQ(); IDIV(32, Rb); - MOV(32, Rd, eax); if (inst.OE) { GenerateConstantOverflow(false); } + SetJumpTarget(done); + MOV(32, Rd, eax); } if (inst.Rc) ComputeRC(d); diff --git a/Source/Core/Core/PowerPC/JitCommon/DivUtils.cpp b/Source/Core/Core/PowerPC/JitCommon/DivUtils.cpp new file mode 100644 index 0000000000..c19b7c8091 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitCommon/DivUtils.cpp @@ -0,0 +1,57 @@ +// Copyright 2021 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "Core/PowerPC/JitCommon/DivUtils.h" + +namespace JitCommon +{ +Magic SignedDivisionConstants(s32 d) +{ + const u32 two31 = 2147483648; + + const u32 ad = std::abs(d); + const u32 t = two31 - (d < 0); + const u32 anc = t - 1 - t % ad; + u32 q1 = two31 / anc; + u32 r1 = two31 - q1 * anc; + u32 q2 = two31 / ad; + u32 r2 = two31 - q2 * ad; + + s32 p = 31; + u32 delta; + + do + { + p++; + + q1 *= 2; + r1 *= 2; + if (r1 >= anc) + { + q1++; + r1 -= anc; + } + + q2 *= 2; + r2 *= 2; + if (r2 >= ad) + { + q2++; + r2 -= ad; + } + delta = ad - r2; + } while (q1 < delta || (q1 == delta && r1 == 0)); + + Magic mag; + mag.multiplier = q2 + 1; + if (d < 0) + mag.multiplier = -mag.multiplier; + mag.shift = p - 32; + + return mag; +} + +} // namespace JitCommon diff --git a/Source/Core/Core/PowerPC/JitCommon/DivUtils.h b/Source/Core/Core/PowerPC/JitCommon/DivUtils.h new file mode 100644 index 0000000000..b243e2654b --- /dev/null +++ b/Source/Core/Core/PowerPC/JitCommon/DivUtils.h @@ -0,0 +1,22 @@ +// Copyright 2021 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "Common/CommonTypes.h" + +namespace JitCommon +{ +struct Magic +{ + s32 multiplier; + u8 shift; +}; + +// Calculate the constants required to optimize a signed 32-bit integer division. +// Taken from The PowerPC Compiler Writer's Guide and LLVM. +// Divisor must not be -1, 0, and 1. +Magic SignedDivisionConstants(s32 divisor); + +} // namespace JitCommon diff --git a/Source/Core/DolphinLib.vcxproj b/Source/Core/DolphinLib.vcxproj index 5485304bdd..211b0a5423 100644 --- a/Source/Core/DolphinLib.vcxproj +++ b/Source/Core/DolphinLib.vcxproj @@ -27,6 +27,12 @@ {41279555-f94f-4ebc-99de-af863c10c5c4} + + + + + + \ No newline at end of file diff --git a/Source/UnitTests/DivUtilsTest.cpp b/Source/UnitTests/DivUtilsTest.cpp new file mode 100644 index 0000000000..7802fc8496 --- /dev/null +++ b/Source/UnitTests/DivUtilsTest.cpp @@ -0,0 +1,33 @@ +// Copyright 2021 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "Core/PowerPC/JitCommon/DivUtils.h" + +using namespace JitCommon; + +TEST(DivUtils, Signed) +{ + Magic m3 = SignedDivisionConstants(3); + Magic m5 = SignedDivisionConstants(5); + Magic m7 = SignedDivisionConstants(7); + Magic minus3 = SignedDivisionConstants(-3); + Magic minus5 = SignedDivisionConstants(-5); + Magic minus7 = SignedDivisionConstants(-7); + + EXPECT_EQ(0x55555556, m3.multiplier); + EXPECT_EQ(0, m3.shift); + EXPECT_EQ(0x66666667, m5.multiplier); + EXPECT_EQ(1, m5.shift); + EXPECT_EQ(-0x6DB6DB6D, m7.multiplier); + EXPECT_EQ(2, m7.shift); + + EXPECT_EQ(-0x55555556, minus3.multiplier); + EXPECT_EQ(0, minus3.shift); + EXPECT_EQ(-0x66666667, minus5.multiplier); + EXPECT_EQ(1, minus5.shift); + EXPECT_EQ(0x6DB6DB6D, minus7.multiplier); + EXPECT_EQ(2, minus7.shift); +} diff --git a/Source/UnitTests/UnitTests.vcxproj b/Source/UnitTests/UnitTests.vcxproj index d0b0c27fb6..a178911b22 100644 --- a/Source/UnitTests/UnitTests.vcxproj +++ b/Source/UnitTests/UnitTests.vcxproj @@ -69,6 +69,7 @@ +