forked from dolphin-emu/dolphin
Add BitSet and, as a test, convert some JitRegCache stuff to it.
This is a higher level, more concise wrapper for bitsets which supports efficiently counting and iterating over set bits. It's similar to std::bitset, but the latter does not support efficient iteration (and at least in libc++, the count algorithm is subpar, not that it really matters). The converted uses include both bitsets and, notably, considerably less efficient regular arrays (for in/out registers in PPCAnalyst). Unfortunately, this may slightly pessimize unoptimized builds.
This commit is contained in:
@@ -249,21 +249,15 @@ static bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
||||
// That is, check that none of b's outputs matches any of a's inputs,
|
||||
// and that none of a's outputs matches any of b's inputs.
|
||||
// The latter does not apply if a is a cmp, of course, but doesn't hurt to check.
|
||||
for (int j = 0; j < 3; j++)
|
||||
{
|
||||
int regInA = a.regsIn[j];
|
||||
int regInB = b.regsIn[j];
|
||||
// register collision: b outputs to one of a's inputs
|
||||
if (regInA >= 0 && (b.regsOut[0] == regInA || b.regsOut[1] == regInA))
|
||||
return false;
|
||||
// register collision: a outputs to one of b's inputs
|
||||
if (regInB >= 0 && (a.regsOut[0] == regInB || a.regsOut[1] == regInB))
|
||||
return false;
|
||||
// register collision: b outputs to one of a's outputs (overwriting it)
|
||||
for (int k = 0; k < 2; k++)
|
||||
if (b.regsOut[k] >= 0 && (b.regsOut[k] == a.regsOut[0] || b.regsOut[k] == a.regsOut[1]))
|
||||
return false;
|
||||
}
|
||||
// register collision: b outputs to one of a's inputs
|
||||
if (b.regsOut & a.regsIn)
|
||||
return false;
|
||||
// register collision: a outputs to one of b's inputs
|
||||
if (a.regsOut & b.regsIn)
|
||||
return false;
|
||||
// register collision: b outputs to one of a's outputs (overwriting it)
|
||||
if (b.regsOut & a.regsOut)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -520,42 +514,41 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
|
||||
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 467) // mtspr
|
||||
code->outputCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
|
||||
|
||||
int numOut = 0;
|
||||
int numIn = 0;
|
||||
int numFloatIn = 0;
|
||||
code->regsIn = BitSet32(0);
|
||||
code->regsOut = BitSet32(0);
|
||||
if (opinfo->flags & FL_OUT_A)
|
||||
{
|
||||
code->regsOut[numOut++] = code->inst.RA;
|
||||
code->regsOut[code->inst.RA] = true;
|
||||
block->m_gpa->SetOutputRegister(code->inst.RA, index);
|
||||
}
|
||||
if (opinfo->flags & FL_OUT_D)
|
||||
{
|
||||
code->regsOut[numOut++] = code->inst.RD;
|
||||
code->regsOut[code->inst.RD] = true;
|
||||
block->m_gpa->SetOutputRegister(code->inst.RD, index);
|
||||
}
|
||||
if (opinfo->flags & FL_OUT_S)
|
||||
{
|
||||
code->regsOut[numOut++] = code->inst.RS;
|
||||
code->regsOut[code->inst.RS] = true;
|
||||
block->m_gpa->SetOutputRegister(code->inst.RS, index);
|
||||
}
|
||||
if ((opinfo->flags & FL_IN_A) || ((opinfo->flags & FL_IN_A0) && code->inst.RA != 0))
|
||||
{
|
||||
code->regsIn[numIn++] = code->inst.RA;
|
||||
code->regsIn[code->inst.RA] = true;
|
||||
block->m_gpa->SetInputRegister(code->inst.RA, index);
|
||||
}
|
||||
if (opinfo->flags & FL_IN_B)
|
||||
{
|
||||
code->regsIn[numIn++] = code->inst.RB;
|
||||
code->regsIn[code->inst.RB] = true;
|
||||
block->m_gpa->SetInputRegister(code->inst.RB, index);
|
||||
}
|
||||
if (opinfo->flags & FL_IN_C)
|
||||
{
|
||||
code->regsIn[numIn++] = code->inst.RC;
|
||||
code->regsIn[code->inst.RC] = true;
|
||||
block->m_gpa->SetInputRegister(code->inst.RC, index);
|
||||
}
|
||||
if (opinfo->flags & FL_IN_S)
|
||||
{
|
||||
code->regsIn[numIn++] = code->inst.RS;
|
||||
code->regsIn[code->inst.RS] = true;
|
||||
block->m_gpa->SetInputRegister(code->inst.RS, index);
|
||||
}
|
||||
|
||||
@@ -564,24 +557,17 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
|
||||
code->fregOut = code->inst.FD;
|
||||
else if (opinfo->flags & FL_OUT_FLOAT_S)
|
||||
code->fregOut = code->inst.FS;
|
||||
code->fregsIn = BitSet32(0);
|
||||
if (opinfo->flags & FL_IN_FLOAT_A)
|
||||
code->fregsIn[numFloatIn++] = code->inst.FA;
|
||||
code->fregsIn[code->inst.FA] = true;
|
||||
if (opinfo->flags & FL_IN_FLOAT_B)
|
||||
code->fregsIn[numFloatIn++] = code->inst.FB;
|
||||
code->fregsIn[code->inst.FB] = true;
|
||||
if (opinfo->flags & FL_IN_FLOAT_C)
|
||||
code->fregsIn[numFloatIn++] = code->inst.FC;
|
||||
code->fregsIn[code->inst.FC] = true;
|
||||
if (opinfo->flags & FL_IN_FLOAT_D)
|
||||
code->fregsIn[numFloatIn++] = code->inst.FD;
|
||||
code->fregsIn[code->inst.FD] = true;
|
||||
if (opinfo->flags & FL_IN_FLOAT_S)
|
||||
code->fregsIn[numFloatIn++] = code->inst.FS;
|
||||
|
||||
// Set remaining register slots as unused (-1)
|
||||
for (int j = numIn; j < 3; j++)
|
||||
code->regsIn[j] = -1;
|
||||
for (int j = numOut; j < 2; j++)
|
||||
code->regsOut[j] = -1;
|
||||
for (int j = numFloatIn; j < 4; j++)
|
||||
code->fregsIn[j] = -1;
|
||||
code->fregsIn[code->inst.FS] = true;
|
||||
|
||||
switch (opinfo->type)
|
||||
{
|
||||
@@ -797,7 +783,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
||||
// Scan for flag dependencies; assume the next block (or any branch that can leave the block)
|
||||
// wants flags, to be safe.
|
||||
bool wantsCR0 = true, wantsCR1 = true, wantsFPRF = true, wantsCA = true;
|
||||
u32 fprInUse = 0, gprInUse = 0, gprInReg = 0, fprInXmm = 0;
|
||||
BitSet32 fprInUse, gprInUse, gprInReg, fprInXmm;
|
||||
for (int i = block->m_num_instructions - 1; i >= 0; i--)
|
||||
{
|
||||
bool opWantsCR0 = code[i].wantsCR0;
|
||||
@@ -822,30 +808,20 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
||||
code[i].fprInXmm = fprInXmm;
|
||||
// TODO: if there's no possible endblocks or exceptions in between, tell the regcache
|
||||
// we can throw away a register if it's going to be overwritten later.
|
||||
for (int j = 0; j < 3; j++)
|
||||
if (code[i].regsIn[j] >= 0)
|
||||
{
|
||||
gprInUse |= 1 << code[i].regsIn[j];
|
||||
gprInReg |= 1 << code[i].regsIn[j];
|
||||
}
|
||||
for (int j = 0; j < 4; j++)
|
||||
if (code[i].fregsIn[j] >= 0)
|
||||
{
|
||||
fprInUse |= 1 << code[i].fregsIn[j];
|
||||
if (strncmp(code[i].opinfo->opname, "stfd", 4))
|
||||
fprInXmm |= 1 << code[i].fregsIn[j];
|
||||
}
|
||||
gprInUse |= code[i].regsIn;
|
||||
gprInReg |= code[i].regsIn;
|
||||
fprInUse |= code[i].fregsIn;
|
||||
if (strncmp(code[i].opinfo->opname, "stfd", 4))
|
||||
fprInXmm |= code[i].fregsIn;
|
||||
// For now, we need to count output registers as "used" though; otherwise the flush
|
||||
// will result in a redundant store (e.g. store to regcache, then store again to
|
||||
// the same location later).
|
||||
for (int j = 0; j < 2; j++)
|
||||
if (code[i].regsOut[j] >= 0)
|
||||
gprInUse |= 1 << code[i].regsOut[j];
|
||||
gprInUse |= code[i].regsOut;
|
||||
if (code[i].fregOut >= 0)
|
||||
{
|
||||
fprInUse |= 1 << code[i].fregOut;
|
||||
fprInUse[code[i].fregOut] = true;
|
||||
if (strncmp(code[i].opinfo->opname, "stfd", 4))
|
||||
fprInXmm |= 1 << code[i].fregOut;
|
||||
fprInXmm[code[i].fregOut] = true;
|
||||
}
|
||||
}
|
||||
return address;
|
||||
|
||||
Reference in New Issue
Block a user