From 367e1b4d4c90b39978b0920faf31e65a922ff462 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 8 Aug 2016 22:07:18 +0200 Subject: [PATCH 1/3] PixelEngine: Drop write-only variables. --- Source/Core/VideoCommon/CommandProcessor.cpp | 16 ---------------- Source/Core/VideoCommon/PixelEngine.cpp | 6 ------ 2 files changed, 22 deletions(-) diff --git a/Source/Core/VideoCommon/CommandProcessor.cpp b/Source/Core/VideoCommon/CommandProcessor.cpp index cd471aae44..6e841d5fca 100644 --- a/Source/Core/VideoCommon/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/CommandProcessor.cpp @@ -38,8 +38,6 @@ static u16 m_tokenReg; static std::atomic s_interrupt_set; static std::atomic s_interrupt_waiting; -static std::atomic s_interrupt_token_waiting; -static std::atomic s_interrupt_finish_waiting; static bool IsOnThread() { @@ -65,8 +63,6 @@ void DoState(PointerWrap& p) p.Do(s_interrupt_set); p.Do(s_interrupt_waiting); - p.Do(s_interrupt_token_waiting); - p.Do(s_interrupt_finish_waiting); } static inline void WriteLow(volatile u32& _reg, u16 lowbits) @@ -112,8 +108,6 @@ void Init() s_interrupt_set.store(false); s_interrupt_waiting.store(false); - s_interrupt_finish_waiting.store(false); - s_interrupt_token_waiting.store(false); et_UpdateInterrupts = CoreTiming::RegisterEvent("CPInterrupt", UpdateInterrupts_Wrapper); } @@ -356,16 +350,6 @@ bool IsInterruptWaiting() return s_interrupt_waiting.load(); } -void SetInterruptTokenWaiting(bool waiting) -{ - s_interrupt_token_waiting.store(waiting); -} - -void SetInterruptFinishWaiting(bool waiting) -{ - s_interrupt_finish_waiting.store(waiting); -} - void SetCPStatusFromGPU() { // breakpoint diff --git a/Source/Core/VideoCommon/PixelEngine.cpp b/Source/Core/VideoCommon/PixelEngine.cpp index 3a223619c1..d7fd67c18f 100644 --- a/Source/Core/VideoCommon/PixelEngine.cpp +++ b/Source/Core/VideoCommon/PixelEngine.cpp @@ -257,14 +257,12 @@ static void SetToken_OnMainThread(u64 userdata, s64 cyclesLate) s_signal_token_interrupt.store(1); UpdateInterrupts(); } - CommandProcessor::SetInterruptTokenWaiting(false); } static void SetFinish_OnMainThread(u64 userdata, s64 cyclesLate) { s_signal_finish_interrupt.store(1); UpdateInterrupts(); - CommandProcessor::SetInterruptFinishWaiting(false); Core::FrameUpdateOnCPUThread(); } @@ -278,8 +276,6 @@ void SetToken(const u16 _token, const int _bSetTokenAcknowledge) s_signal_token_interrupt.store(1); } - CommandProcessor::SetInterruptTokenWaiting(true); - if (!SConfig::GetInstance().bCPUThread || Fifo::UseDeterministicGPUThread()) CoreTiming::ScheduleEvent(0, et_SetTokenOnMainThread, _token | (_bSetTokenAcknowledge << 16)); else @@ -291,8 +287,6 @@ void SetToken(const u16 _token, const int _bSetTokenAcknowledge) // THIS IS EXECUTED FROM VIDEO THREAD (BPStructs.cpp) when a new frame has been drawn void SetFinish() { - CommandProcessor::SetInterruptFinishWaiting(true); - if (!SConfig::GetInstance().bCPUThread || Fifo::UseDeterministicGPUThread()) CoreTiming::ScheduleEvent(0, et_SetFinishOnMainThread, 0); else From a051db979231cb8440aefbac9f8baf9e5a627916 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 8 Aug 2016 22:35:01 +0200 Subject: [PATCH 2/3] PixelEngine: Cleanup PE Token. This also affects the behavior a bit, it should now fit better to the one of the commonly more used PE Finish flag. --- Source/Core/Core/State.cpp | 2 +- Source/Core/VideoCommon/PixelEngine.cpp | 72 ++++++++-------------- Source/Core/VideoCommon/VideoBackendBase.h | 5 -- 3 files changed, 27 insertions(+), 52 deletions(-) diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index 4d097d7eb5..feee9730f5 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -70,7 +70,7 @@ static Common::Event g_compressAndDumpStateSyncEvent; static std::thread g_save_thread; // Don't forget to increase this after doing changes on the savestate system -static const u32 STATE_VERSION = 54; // Last changed in PR 3782 +static const u32 STATE_VERSION = 55; // Maps savestate versions to Dolphin versions. // Versions after 42 don't need to be added to this list, diff --git a/Source/Core/VideoCommon/PixelEngine.cpp b/Source/Core/VideoCommon/PixelEngine.cpp index d7fd67c18f..43b6399e6f 100644 --- a/Source/Core/VideoCommon/PixelEngine.cpp +++ b/Source/Core/VideoCommon/PixelEngine.cpp @@ -90,10 +90,10 @@ static UPEDstAlphaConfReg m_DstAlphaConf; static UPEAlphaModeConfReg m_AlphaModeConf; static UPEAlphaReadReg m_AlphaRead; static UPECtrlReg m_Control; -// static u16 m_Token; // token value most recently encountered +static std::atomic s_Token; // token value most recently encountered -static std::atomic s_signal_token_interrupt; -static std::atomic s_signal_finish_interrupt; +static bool s_signal_token_interrupt; +static bool s_signal_finish_interrupt; static int et_SetTokenOnMainThread; static int et_SetFinishOnMainThread; @@ -112,14 +112,13 @@ void DoState(PointerWrap& p) p.Do(m_AlphaModeConf); p.Do(m_AlphaRead); p.DoPOD(m_Control); + p.Do(s_Token); p.Do(s_signal_token_interrupt); p.Do(s_signal_finish_interrupt); } static void UpdateInterrupts(); -static void UpdateTokenInterrupt(bool active); -static void UpdateFinishInterrupt(bool active); static void SetToken_OnMainThread(u64 userdata, s64 cyclesLate); static void SetFinish_OnMainThread(u64 userdata, s64 cyclesLate); @@ -131,9 +130,10 @@ void Init() m_DstAlphaConf.Hex = 0; m_AlphaModeConf.Hex = 0; m_AlphaRead.Hex = 0; + s_Token = 0; - s_signal_token_interrupt.store(0); - s_signal_finish_interrupt.store(0); + s_signal_token_interrupt = false; + s_signal_finish_interrupt = false; et_SetTokenOnMainThread = CoreTiming::RegisterEvent("SetToken", SetToken_OnMainThread); et_SetFinishOnMainThread = CoreTiming::RegisterEvent("SetFinish", SetFinish_OnMainThread); @@ -191,10 +191,10 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base) UPECtrlReg tmpCtrl(val); if (tmpCtrl.PEToken) - s_signal_token_interrupt.store(0); + s_signal_token_interrupt = false; if (tmpCtrl.PEFinish) - s_signal_finish_interrupt.store(0); + s_signal_finish_interrupt = false; m_Control.PETokenEnable = tmpCtrl.PETokenEnable; m_Control.PEFinishEnable = tmpCtrl.PEFinishEnable; @@ -206,7 +206,9 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base) })); // Token register, readonly. - mmio->Register(base | PE_TOKEN_REG, MMIO::DirectRead(&CommandProcessor::fifo.PEToken), + mmio->Register(base | PE_TOKEN_REG, + + MMIO::ComplexRead([](u32) { return s_Token.load(); }), MMIO::InvalidWrite()); // BBOX registers, readonly and need to update a flag. @@ -223,45 +225,23 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base) static void UpdateInterrupts() { // check if there is a token-interrupt - UpdateTokenInterrupt((s_signal_token_interrupt.load() & m_Control.PETokenEnable) != 0); + ProcessorInterface::SetInterrupt(INT_CAUSE_PE_TOKEN, + s_signal_token_interrupt && m_Control.PETokenEnable); // check if there is a finish-interrupt - UpdateFinishInterrupt((s_signal_finish_interrupt.load() & m_Control.PEFinishEnable) != 0); + ProcessorInterface::SetInterrupt(INT_CAUSE_PE_FINISH, + s_signal_finish_interrupt && m_Control.PEFinishEnable); } -static void UpdateTokenInterrupt(bool active) -{ - ProcessorInterface::SetInterrupt(INT_CAUSE_PE_TOKEN, active); -} - -static void UpdateFinishInterrupt(bool active) -{ - ProcessorInterface::SetInterrupt(INT_CAUSE_PE_FINISH, active); -} - -// TODO(mb2): Refactor SetTokenINT_OnMainThread(u64 userdata, int cyclesLate). -// Think about the right order between tokenVal and tokenINT... one day maybe. -// Cleanup++ - -// Called only if BPMEM_PE_TOKEN_INT_ID is ack by GP static void SetToken_OnMainThread(u64 userdata, s64 cyclesLate) { - // XXX: No 16-bit atomic store available, so cheat and use 32-bit. - // That's what we've always done. We're counting on fifo.PEToken to be - // 4-byte padded. - Common::AtomicStore(*(volatile u32*)&CommandProcessor::fifo.PEToken, userdata & 0xffff); - INFO_LOG(PIXELENGINE, "VIDEO Backend raises INT_CAUSE_PE_TOKEN (btw, token: %04x)", - CommandProcessor::fifo.PEToken); - if (userdata >> 16) - { - s_signal_token_interrupt.store(1); - UpdateInterrupts(); - } + s_signal_token_interrupt = true; + UpdateInterrupts(); } static void SetFinish_OnMainThread(u64 userdata, s64 cyclesLate) { - s_signal_finish_interrupt.store(1); + s_signal_finish_interrupt = true; UpdateInterrupts(); Core::FrameUpdateOnCPUThread(); @@ -271,16 +251,16 @@ static void SetFinish_OnMainThread(u64 userdata, s64 cyclesLate) // THIS IS EXECUTED FROM VIDEO THREAD void SetToken(const u16 _token, const int _bSetTokenAcknowledge) { + INFO_LOG(PIXELENGINE, "VIDEO Backend raises INT_CAUSE_PE_TOKEN (btw, token: %04x)", _token); + s_Token.store(_token); + if (_bSetTokenAcknowledge) // set token INT { - s_signal_token_interrupt.store(1); + if (!SConfig::GetInstance().bCPUThread || Fifo::UseDeterministicGPUThread()) + CoreTiming::ScheduleEvent(0, et_SetTokenOnMainThread, 0); + else + CoreTiming::ScheduleEvent_Threadsafe(0, et_SetTokenOnMainThread, 0); } - - if (!SConfig::GetInstance().bCPUThread || Fifo::UseDeterministicGPUThread()) - CoreTiming::ScheduleEvent(0, et_SetTokenOnMainThread, _token | (_bSetTokenAcknowledge << 16)); - else - CoreTiming::ScheduleEvent_Threadsafe(0, et_SetTokenOnMainThread, - _token | (_bSetTokenAcknowledge << 16)); } // SetFinish diff --git a/Source/Core/VideoCommon/VideoBackendBase.h b/Source/Core/VideoCommon/VideoBackendBase.h index 09570f63f8..d5cb8a4259 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.h +++ b/Source/Core/VideoCommon/VideoBackendBase.h @@ -43,11 +43,6 @@ struct SCPFifoStruct volatile u32 CPReadPointer; volatile u32 CPBreakpoint; volatile u32 SafeCPReadPointer; - // Super Monkey Ball Adventure require this. - // Because the read&check-PEToken-loop stays in its JITed block I suppose. - // So no possiblity to ack the Token irq by the scheduler until some sort of PPC watchdog do its - // mess. - volatile u16 PEToken; volatile u32 bFF_GPLinkEnable; volatile u32 bFF_GPReadEnable; From 82bdc4ef8647d759775d0b1bc9b88203e845b77a Mon Sep 17 00:00:00 2001 From: degasus Date: Wed, 10 Aug 2016 09:44:08 +0200 Subject: [PATCH 3/3] PixelEngine: Delay token updates by events. To still get a speedup, mark if already an event is queued. If so, don't raise a new event. --- Source/Core/VideoCommon/PixelEngine.cpp | 105 ++++++++++++++++-------- Source/Core/VideoCommon/PixelEngine.h | 2 +- 2 files changed, 71 insertions(+), 36 deletions(-) diff --git a/Source/Core/VideoCommon/PixelEngine.cpp b/Source/Core/VideoCommon/PixelEngine.cpp index 43b6399e6f..f774b54064 100644 --- a/Source/Core/VideoCommon/PixelEngine.cpp +++ b/Source/Core/VideoCommon/PixelEngine.cpp @@ -4,9 +4,8 @@ // http://www.nvidia.com/object/General_FAQ.html#t6 !!!!! -#include +#include -#include "Common/Atomic.h" #include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" @@ -90,13 +89,18 @@ static UPEDstAlphaConfReg m_DstAlphaConf; static UPEAlphaModeConfReg m_AlphaModeConf; static UPEAlphaReadReg m_AlphaRead; static UPECtrlReg m_Control; -static std::atomic s_Token; // token value most recently encountered + +std::mutex s_token_finish_mutex; +static u16 s_token; +static u16 s_token_pending; +static bool s_token_interrupt_pending; +static bool s_finish_interrupt_pending; +static bool s_event_raised; static bool s_signal_token_interrupt; static bool s_signal_finish_interrupt; -static int et_SetTokenOnMainThread; -static int et_SetFinishOnMainThread; +static int et_SetTokenFinishOnMainThread; enum { @@ -112,15 +116,19 @@ void DoState(PointerWrap& p) p.Do(m_AlphaModeConf); p.Do(m_AlphaRead); p.DoPOD(m_Control); - p.Do(s_Token); + + p.Do(s_token); + p.Do(s_token_pending); + p.Do(s_token_interrupt_pending); + p.Do(s_finish_interrupt_pending); + p.Do(s_event_raised); p.Do(s_signal_token_interrupt); p.Do(s_signal_finish_interrupt); } static void UpdateInterrupts(); -static void SetToken_OnMainThread(u64 userdata, s64 cyclesLate); -static void SetFinish_OnMainThread(u64 userdata, s64 cyclesLate); +static void SetTokenFinish_OnMainThread(u64 userdata, s64 cyclesLate); void Init() { @@ -130,13 +138,17 @@ void Init() m_DstAlphaConf.Hex = 0; m_AlphaModeConf.Hex = 0; m_AlphaRead.Hex = 0; - s_Token = 0; + + s_token = 0; + s_token_pending = 0; + s_token_interrupt_pending = false; + s_finish_interrupt_pending = false; + s_event_raised = false; s_signal_token_interrupt = false; s_signal_finish_interrupt = false; - et_SetTokenOnMainThread = CoreTiming::RegisterEvent("SetToken", SetToken_OnMainThread); - et_SetFinishOnMainThread = CoreTiming::RegisterEvent("SetFinish", SetFinish_OnMainThread); + et_SetTokenFinishOnMainThread = CoreTiming::RegisterEvent("SetTokenFinish", SetTokenFinish_OnMainThread); } void RegisterMMIO(MMIO::Mapping* mmio, u32 base) @@ -207,8 +219,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base) // Token register, readonly. mmio->Register(base | PE_TOKEN_REG, - - MMIO::ComplexRead([](u32) { return s_Token.load(); }), + MMIO::ComplexRead([](u32) { return s_token; }), MMIO::InvalidWrite()); // BBOX registers, readonly and need to update a flag. @@ -233,46 +244,70 @@ static void UpdateInterrupts() s_signal_finish_interrupt && m_Control.PEFinishEnable); } -static void SetToken_OnMainThread(u64 userdata, s64 cyclesLate) +static void SetTokenFinish_OnMainThread(u64 userdata, s64 cyclesLate) { - s_signal_token_interrupt = true; - UpdateInterrupts(); + std::unique_lock lk(s_token_finish_mutex); + s_event_raised = false; + + s_token = s_token_pending; + + if (s_token_interrupt_pending) + { + s_token_interrupt_pending = false; + s_signal_token_interrupt = true; + UpdateInterrupts(); + } + + if (s_finish_interrupt_pending) + { + s_finish_interrupt_pending = false; + s_signal_finish_interrupt = true; + UpdateInterrupts(); + lk.unlock(); + Core::FrameUpdateOnCPUThread(); + } } -static void SetFinish_OnMainThread(u64 userdata, s64 cyclesLate) +// Raise the event handler above on the CPU thread. +// s_token_finish_mutex must be locked. +// THIS IS EXECUTED FROM VIDEO THREAD +static void RaiseEvent() { - s_signal_finish_interrupt = true; - UpdateInterrupts(); + if (s_event_raised) + return; - Core::FrameUpdateOnCPUThread(); + s_event_raised = true; + if (!SConfig::GetInstance().bCPUThread || Fifo::UseDeterministicGPUThread()) + CoreTiming::ScheduleEvent(0, et_SetTokenFinishOnMainThread, 0); + else + CoreTiming::ScheduleEvent_Threadsafe(0, et_SetTokenFinishOnMainThread, 0); } // SetToken // THIS IS EXECUTED FROM VIDEO THREAD -void SetToken(const u16 _token, const int _bSetTokenAcknowledge) +void SetToken(const u16 token, const bool interrupt) { - INFO_LOG(PIXELENGINE, "VIDEO Backend raises INT_CAUSE_PE_TOKEN (btw, token: %04x)", _token); - s_Token.store(_token); + INFO_LOG(PIXELENGINE, "VIDEO Backend raises INT_CAUSE_PE_TOKEN (btw, token: %04x)", token); - if (_bSetTokenAcknowledge) // set token INT - { - if (!SConfig::GetInstance().bCPUThread || Fifo::UseDeterministicGPUThread()) - CoreTiming::ScheduleEvent(0, et_SetTokenOnMainThread, 0); - else - CoreTiming::ScheduleEvent_Threadsafe(0, et_SetTokenOnMainThread, 0); - } + std::lock_guard lk(s_token_finish_mutex); + + s_token_pending = token; + s_token_interrupt_pending |= interrupt; + + RaiseEvent(); } // SetFinish // THIS IS EXECUTED FROM VIDEO THREAD (BPStructs.cpp) when a new frame has been drawn void SetFinish() { - if (!SConfig::GetInstance().bCPUThread || Fifo::UseDeterministicGPUThread()) - CoreTiming::ScheduleEvent(0, et_SetFinishOnMainThread, 0); - else - CoreTiming::ScheduleEvent_Threadsafe(0, et_SetFinishOnMainThread, 0); - INFO_LOG(PIXELENGINE, "VIDEO Set Finish"); + + std::lock_guard lk(s_token_finish_mutex); + + s_finish_interrupt_pending |= true; + + RaiseEvent(); } UPEAlphaReadReg GetAlphaReadMode() diff --git a/Source/Core/VideoCommon/PixelEngine.h b/Source/Core/VideoCommon/PixelEngine.h index df978a2673..53b6ee6b1a 100644 --- a/Source/Core/VideoCommon/PixelEngine.h +++ b/Source/Core/VideoCommon/PixelEngine.h @@ -61,7 +61,7 @@ void DoState(PointerWrap& p); void RegisterMMIO(MMIO::Mapping* mmio, u32 base); // gfx backend support -void SetToken(const u16 _token, const int _bSetTokenAcknowledge); +void SetToken(const u16 token, const bool interrupt); void SetFinish(); UPEAlphaReadReg GetAlphaReadMode();