diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 0191cbd846..35904a8184 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -80,9 +80,6 @@ void JitArm64::fp_arith(UGeckoInstruction inst) const bool fma = use_b && use_c; const bool negate_result = (op5 & ~0x1) == 30; - // Addition and subtraction can't generate new NaNs, they can only take NaNs from inputs - const bool can_generate_nan = (op5 & ~0x1) != 20; - const bool output_is_single = inst.OPCD == 59; const bool inaccurate_fma = op5 > 25 && !Config::Get(Config::SESSION_USE_FMA); const bool round_c = use_c && output_is_single && !js.op->fprIsSingle[inst.FC]; @@ -203,45 +200,35 @@ void JitArm64::fp_arith(UGeckoInstruction inst) if (use_c && VA != VC && (!use_b || VB != VC)) inputs.push_back(VC); - // If any inputs are NaNs, pick the first NaN of them and set its quiet bit - for (size_t i = 0; i < inputs.size(); ++i) + // If any inputs are NaNs, pick the first NaN of them and set its quiet bit. + // However, we can skip checking the last input, because if exactly one input is NaN, AArch64 + // arithmetic instructions automatically pick that NaN and make it quiet, just like we want. + for (size_t i = 0; i < inputs.size() - 1; ++i) { - // Skip checking if the input is a NaN if it's the last input and we're guaranteed to have at - // least one NaN input - const bool check_input = can_generate_nan || i != inputs.size() - 1; - const ARM64Reg input = inputs[i]; - FixupBranch skip; - if (check_input) - { - m_float_emit.FCMP(input); - skip = B(CCFlags::CC_VC); - } + + m_float_emit.FCMP(input); + FixupBranch skip = B(CCFlags::CC_VC); // Make the NaN quiet m_float_emit.FADD(VD, input, input); nan_fixups.push_back(B()); - if (check_input) - SetJumpTarget(skip); + SetJumpTarget(skip); } std::optional nan_early_fixup; - if (can_generate_nan) + if (negate_result) { - // There was no NaN in any of the inputs, so the NaN must have been generated by the - // arithmetic instruction. In this case, the result is already correct. - if (negate_result) - { - if (result_reg != VD) - m_float_emit.MOV(EncodeRegToDouble(VD), EncodeRegToDouble(result_reg)); - nan_fixups.push_back(B()); - } - else - { - nan_early_fixup = B(); - } + // If we have a NaN, we must not execute FNEG. + if (result_reg != VD) + m_float_emit.MOV(EncodeRegToDouble(VD), EncodeRegToDouble(result_reg)); + nan_fixups.push_back(B()); + } + else + { + nan_early_fixup = B(); } SwitchToNearCode(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 5b876649cc..6f211b1078 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -384,45 +384,38 @@ void JitArm64::ps_sumX(UGeckoInstruction inst) m_float_emit.DUP(size, reg_encoder(V0), reg_encoder(VB), 1); - FixupBranch a_nan_done, b_nan_done; + FixupBranch a_nan_done; if (m_accurate_nans) { - const auto check_nan = [&](ARM64Reg input) { - m_float_emit.FCMP(scalar_reg_encoder(input)); - FixupBranch not_nan = B(CCFlags::CC_VC); - FixupBranch nan = B(); - SetJumpTarget(not_nan); + m_float_emit.FCMP(scalar_reg_encoder(VA)); + FixupBranch a_not_nan = B(CCFlags::CC_VC); + FixupBranch a_nan = B(); + SetJumpTarget(a_not_nan); - SwitchToFarCode(); - SetJumpTarget(nan); + SwitchToFarCode(); + SetJumpTarget(a_nan); - if (upper) - { - m_float_emit.FADD(scalar_reg_encoder(V0), scalar_reg_encoder(input), - scalar_reg_encoder(input)); - m_float_emit.TRN1(size, reg_encoder(VD), reg_encoder(VC), reg_encoder(V0)); - } - else if (d != c) - { - m_float_emit.FADD(scalar_reg_encoder(VD), scalar_reg_encoder(input), - scalar_reg_encoder(input)); - m_float_emit.INS(size, VD, 1, VC, 1); - } - else - { - m_float_emit.FADD(scalar_reg_encoder(V0), scalar_reg_encoder(input), - scalar_reg_encoder(input)); - m_float_emit.INS(size, VD, 0, V0, 0); - } + if (upper) + { + m_float_emit.FADD(scalar_reg_encoder(V0), scalar_reg_encoder(VA), scalar_reg_encoder(VA)); + m_float_emit.TRN1(size, reg_encoder(VD), reg_encoder(VC), reg_encoder(V0)); + } + else if (d != c) + { + m_float_emit.FADD(scalar_reg_encoder(VD), scalar_reg_encoder(VA), scalar_reg_encoder(VA)); + m_float_emit.INS(size, VD, 1, VC, 1); + } + else + { + m_float_emit.FADD(scalar_reg_encoder(V0), scalar_reg_encoder(VA), scalar_reg_encoder(VA)); + m_float_emit.INS(size, VD, 0, V0, 0); + } - FixupBranch nan_done = B(); - SwitchToNearCode(); + FixupBranch a_nan_done = B(); + SwitchToNearCode(); - return nan_done; - }; - - a_nan_done = check_nan(VA); - b_nan_done = check_nan(V0); + // If exactly one input is NaN, AArch64 arithmetic instructions automatically pick that NaN + // and make it quiet, just like we want. So if rA isn't NaN, we can skip checking rB. } if (upper) @@ -442,10 +435,7 @@ void JitArm64::ps_sumX(UGeckoInstruction inst) } if (m_accurate_nans) - { SetJumpTarget(a_nan_done); - SetJumpTarget(b_nan_done); - } fpr.Unlock(V0); if (temp_gpr != ARM64Reg::INVALID_REG)