diff --git a/Source/Core/AudioCommon/Src/Mixer.cpp b/Source/Core/AudioCommon/Src/Mixer.cpp index 146314088d..2c526cf3bd 100644 --- a/Source/Core/AudioCommon/Src/Mixer.cpp +++ b/Source/Core/AudioCommon/Src/Mixer.cpp @@ -32,7 +32,7 @@ unsigned int CMixer::Mix(short* samples, unsigned int numSamples) return numSamples; } - unsigned int numLeft = Common::AtomicLoad(m_numSamples); + unsigned int numLeft = GetNumSamples(); if (m_AIplaying) { if (numLeft < numSamples)//cannot do much about this m_AIplaying = false; @@ -43,6 +43,16 @@ unsigned int CMixer::Mix(short* samples, unsigned int numSamples) m_AIplaying = true; } + // Cache access in non-volatile variable + // This is the only function changing the read value, so it's safe to + // cache it locally although it's written here. + // The writing pointer will be modified outside, but it will only increase, + // so we will just ignore new written data while interpolating. + // Without this cache, the compiler wouldn't be allowed to optimize the + // interpolation loop. + u32 indexR = Common::AtomicLoad(m_indexR); + u32 indexW = Common::AtomicLoad(m_indexW); + if (m_AIplaying) { numLeft = (numLeft > numSamples) ? numSamples : numLeft; @@ -57,7 +67,7 @@ unsigned int CMixer::Mix(short* samples, unsigned int numSamples) for (unsigned int i = 0; i < numLeft * 2; i += 8) { - _mm_storeu_si128((__m128i *)&samples[i], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&m_buffer[(m_indexR + i) & INDEX_MASK]), sr_mask)); + _mm_storeu_si128((__m128i *)&samples[i], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&m_buffer[(indexR + i) & INDEX_MASK]), sr_mask)); } } else @@ -65,38 +75,38 @@ unsigned int CMixer::Mix(short* samples, unsigned int numSamples) { for (unsigned int i = 0; i < numLeft * 2; i+=2) { - samples[i] = Common::swap16(m_buffer[(m_indexR + i + 1) & INDEX_MASK]); - samples[i+1] = Common::swap16(m_buffer[(m_indexR + i) & INDEX_MASK]); + samples[i] = Common::swap16(m_buffer[(indexR + i + 1) & INDEX_MASK]); + samples[i+1] = Common::swap16(m_buffer[(indexR + i) & INDEX_MASK]); } } - m_indexR += numLeft * 2; + indexR += numLeft * 2; } else //linear interpolation { //render numleft sample pairs to samples[] - //advance m_indexR with sample position + //advance indexR with sample position //remember fractional offset static u32 frac = 0; const u32 ratio = (u32)( 65536.0f * (float)AudioInterface::GetAIDSampleRate() / (float)m_sampleRate ); for (u32 i = 0; i < numLeft * 2; i+=2) { - u32 m_indexR2 = m_indexR + 2; //next sample - if ((m_indexR2 & INDEX_MASK) == (m_indexW & INDEX_MASK)) //..if it exists - m_indexR2 = m_indexR; + u32 indexR2 = indexR + 2; //next sample + if ((indexR2 & INDEX_MASK) == (indexW & INDEX_MASK)) //..if it exists + indexR2 = indexR; - s16 l1 = Common::swap16(m_buffer[m_indexR & INDEX_MASK]); //current - s16 l2 = Common::swap16(m_buffer[m_indexR2 & INDEX_MASK]); //next + s16 l1 = Common::swap16(m_buffer[indexR & INDEX_MASK]); //current + s16 l2 = Common::swap16(m_buffer[indexR2 & INDEX_MASK]); //next int sampleL = ((l1 << 16) + (l2 - l1) * (u16)frac) >> 16; samples[i+1] = sampleL; - s16 r1 = Common::swap16(m_buffer[(m_indexR + 1) & INDEX_MASK]); //current - s16 r2 = Common::swap16(m_buffer[(m_indexR2 + 1) & INDEX_MASK]); //next + s16 r1 = Common::swap16(m_buffer[(indexR + 1) & INDEX_MASK]); //current + s16 r2 = Common::swap16(m_buffer[(indexR2 + 1) & INDEX_MASK]); //next int sampleR = ((r1 << 16) + (r2 - r1) * (u16)frac) >> 16; samples[i] = sampleR; frac += ratio; - m_indexR += 2 * (u16)(frac >> 16); + indexR += 2 * (u16)(frac >> 16); frac &= 0xffff; } } @@ -109,12 +119,15 @@ unsigned int CMixer::Mix(short* samples, unsigned int numSamples) if (numSamples > numLeft) { unsigned short s[2]; - s[0] = Common::swap16(m_buffer[(m_indexR - 1) & INDEX_MASK]); - s[1] = Common::swap16(m_buffer[(m_indexR - 2) & INDEX_MASK]); + s[0] = Common::swap16(m_buffer[(indexR - 1) & INDEX_MASK]); + s[1] = Common::swap16(m_buffer[(indexR - 2) & INDEX_MASK]); for (unsigned int i = numLeft*2; i < numSamples*2; i+=2) *(u32*)(samples+i) = *(u32*)(s); // memset(&samples[numLeft * 2], 0, (numSamples - numLeft) * 4); } + + // Flush cached variable + Common::AtomicStore(m_indexR, indexR); //when logging, also throttle HLE audio if (m_logAudio) { @@ -135,19 +148,21 @@ unsigned int CMixer::Mix(short* samples, unsigned int numSamples) AudioInterface::Callback_GetStreaming(samples, numSamples, m_sampleRate); } - - Common::AtomicAdd(m_numSamples, -(s32)numLeft); - return numSamples; } void CMixer::PushSamples(const short *samples, unsigned int num_samples) { + // Cache access in non-volatile variable + // indexR isn't allowed to cache in the audio throttling loop as it + // needs to get updates to not deadlock. + u32 indexW = Common::AtomicLoad(m_indexW); + if (m_throttle) { // The auto throttle function. This loop will put a ceiling on the CPU MHz. - while (num_samples + Common::AtomicLoad(m_numSamples) > MAX_SAMPLES) + while (num_samples * 2 + ((indexW - Common::AtomicLoad(m_indexR)) & INDEX_MASK) >= MAX_SAMPLES * 2) { if (*PowerPC::GetStatePtr() != PowerPC::CPU_RUNNING || soundStream->IsMuted()) break; @@ -160,37 +175,47 @@ void CMixer::PushSamples(const short *samples, unsigned int num_samples) } // Check if we have enough free space - if (num_samples + Common::AtomicLoad(m_numSamples) > MAX_SAMPLES) + // indexW == m_indexR results in empty buffer, so indexR must always be smaller than indexW + if (num_samples * 2 + ((indexW - Common::AtomicLoad(m_indexR)) & INDEX_MASK) >= MAX_SAMPLES * 2) return; // AyuanX: Actual re-sampling work has been moved to sound thread // to alleviate the workload on main thread // and we simply store raw data here to make fast mem copy - int over_bytes = num_samples * 4 - (MAX_SAMPLES * 2 - (m_indexW & INDEX_MASK)) * sizeof(short); + int over_bytes = num_samples * 4 - (MAX_SAMPLES * 2 - (indexW & INDEX_MASK)) * sizeof(short); if (over_bytes > 0) { - memcpy(&m_buffer[m_indexW & INDEX_MASK], samples, num_samples * 4 - over_bytes); + memcpy(&m_buffer[indexW & INDEX_MASK], samples, num_samples * 4 - over_bytes); memcpy(&m_buffer[0], samples + (num_samples * 4 - over_bytes) / sizeof(short), over_bytes); } else { - memcpy(&m_buffer[m_indexW & INDEX_MASK], samples, num_samples * 4); + memcpy(&m_buffer[indexW & INDEX_MASK], samples, num_samples * 4); } - - m_indexW += num_samples * 2; - - if (AudioInterface::GetAIDSampleRate() == m_sampleRate) - Common::AtomicAdd(m_numSamples, num_samples); - else if ((AudioInterface::GetAIDSampleRate() == 32000) && (m_sampleRate == 48000)) - Common::AtomicAdd(m_numSamples, num_samples * 3 / 2); - else - Common::AtomicAdd(m_numSamples, num_samples * 2 / 3); - + + Common::AtomicAdd(m_indexW, num_samples * 2); + return; } unsigned int CMixer::GetNumSamples() { - return Common::AtomicLoad(m_numSamples); + // Guess how many samples would be available after interpolation. + // As interpolation needs at least on sample from the future to + // linear interpolate between them, one sample less is available. + // We also can't say the current interpolation state (specially + // the frac), so to be sure, subtract one again to be sure not + // to underflow the fifo. + + u32 numSamples = ((Common::AtomicLoad(m_indexW) - Common::AtomicLoad(m_indexR)) & INDEX_MASK) / 2; + + if (AudioInterface::GetAIDSampleRate() == m_sampleRate) + numSamples = numSamples; // 1:1 + else if (m_sampleRate == 48000 && AudioInterface::GetAIDSampleRate() == 32000) + numSamples = numSamples * 3 / 2 - 2; // most common case + else + numSamples = numSamples * m_sampleRate / AudioInterface::GetAIDSampleRate() - 2; + + return numSamples; } diff --git a/Source/Core/AudioCommon/Src/Mixer.h b/Source/Core/AudioCommon/Src/Mixer.h index 6dcbe78ece..6d797d5c3e 100644 --- a/Source/Core/AudioCommon/Src/Mixer.h +++ b/Source/Core/AudioCommon/Src/Mixer.h @@ -23,7 +23,6 @@ public: , m_channels(2) , m_HLEready(false) , m_logAudio(0) - , m_numSamples(0) , m_indexW(0) , m_indexR(0) , m_AIplaying(true) @@ -97,9 +96,8 @@ protected: bool m_throttle; short m_buffer[MAX_SAMPLES * 2]; - volatile u32 m_numSamples; - u32 m_indexW; - u32 m_indexR; + volatile u32 m_indexW; + volatile u32 m_indexR; bool m_AIplaying; std::mutex m_csMixing;