| 
									
										
										
										
											2013-04-17 23:29:41 -04:00
										 |  |  | // Copyright 2013 Dolphin Emulator Project
 | 
					
						
							| 
									
										
										
										
											2015-05-18 01:08:10 +02:00
										 |  |  | // Licensed under GPLv2+
 | 
					
						
							| 
									
										
										
										
											2013-04-17 23:29:41 -04:00
										 |  |  | // Refer to the license.txt file included.
 | 
					
						
							| 
									
										
										
										
											2013-01-31 23:11:53 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-01 10:56:13 -05:00
										 |  |  | #include "VideoBackends/OGL/StreamBuffer.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-27 11:56:22 +01:00
										 |  |  | #include "Common/Align.h"
 | 
					
						
							| 
									
										
										
										
											2017-02-01 10:56:13 -05:00
										 |  |  | #include "Common/CommonFuncs.h"
 | 
					
						
							| 
									
										
										
										
											2015-09-19 04:40:00 +12:00
										 |  |  | #include "Common/GL/GLUtil.h"
 | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  | #include "Common/MemoryUtil.h"
 | 
					
						
							| 
									
										
										
										
											2014-02-17 05:18:15 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | #include "VideoBackends/OGL/Render.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "VideoCommon/DriverDetails.h"
 | 
					
						
							|  |  |  | #include "VideoCommon/OnScreenDisplay.h"
 | 
					
						
							| 
									
										
										
										
											2013-01-31 23:11:53 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | namespace OGL | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | // moved out of constructor, so m_buffer is allowed to be const
 | 
					
						
							| 
									
										
										
										
											2015-12-21 10:09:03 -05:00
										 |  |  | static u32 GenBuffer() | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |   u32 id; | 
					
						
							|  |  |  |   glGenBuffers(1, &id); | 
					
						
							|  |  |  |   return id; | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2013-02-01 15:15:25 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-05 11:51:05 +02:00
										 |  |  | StreamBuffer::StreamBuffer(u32 type, u32 size) | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |     : m_buffer(GenBuffer()), m_buffertype(type), m_size(ROUND_UP_POW2(size)), | 
					
						
							|  |  |  |       m_bit_per_slot(IntLog2(ROUND_UP_POW2(size) / SYNC_POINTS)) | 
					
						
							| 
									
										
										
										
											2013-01-31 23:11:53 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |   m_iterator = 0; | 
					
						
							|  |  |  |   m_used_iterator = 0; | 
					
						
							|  |  |  |   m_free_iterator = 0; | 
					
						
							| 
									
										
										
										
											2013-01-31 23:11:53 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | StreamBuffer::~StreamBuffer() | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |   glDeleteBuffers(1, &m_buffer); | 
					
						
							| 
									
										
										
										
											2013-01-31 23:11:53 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-11 00:17:29 -05:00
										 |  |  | /* Shared synchronization code for ring buffers
 | 
					
						
							| 
									
										
										
										
											2014-03-29 11:05:44 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2015-01-11 00:17:29 -05:00
										 |  |  |  * The next three functions are to create/delete/use the OpenGL synchronization. | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  |  * ARB_sync (OpenGL 3.2) is used and required. | 
					
						
							| 
									
										
										
										
											2014-03-29 11:05:44 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  |  * To reduce overhead, the complete buffer is splitted up into SYNC_POINTS chunks. | 
					
						
							|  |  |  |  * For each of this chunks, there is a fence which checks if this chunk is still in use. | 
					
						
							| 
									
										
										
										
											2014-03-29 11:05:44 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |  * As our API allows to alloc more memory then it has to use, we have to catch how much is already | 
					
						
							|  |  |  |  * written. | 
					
						
							| 
									
										
										
										
											2014-03-29 11:05:44 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  |  * m_iterator      - writing position | 
					
						
							|  |  |  |  * m_free_iterator - last position checked if free | 
					
						
							|  |  |  |  * m_used_iterator - last position known to be written | 
					
						
							| 
									
										
										
										
											2014-03-29 11:05:44 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |  * So on alloc, we have to wait for all slots between m_free_iterator and m_iterator (and set | 
					
						
							|  |  |  |  * m_free_iterator to m_iterator afterwards). | 
					
						
							| 
									
										
										
										
											2014-03-29 11:05:44 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2015-01-11 00:17:29 -05:00
										 |  |  |  * We also assume that this buffer is accessed by the GPU between the Unmap and Map function, | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  |  * so we may create the fences on the start of mapping. | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |  * Some here, new fences for the chunks between m_used_iterator and m_iterator (also update | 
					
						
							|  |  |  |  * m_used_iterator). | 
					
						
							| 
									
										
										
										
											2014-03-29 11:05:44 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2015-01-11 00:17:29 -05:00
										 |  |  |  * As ring buffers have an ugly behavior on rollover, have fun to read this code ;) | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  |  */ | 
					
						
							| 
									
										
										
										
											2013-02-01 16:43:08 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | void StreamBuffer::CreateFences() | 
					
						
							| 
									
										
										
										
											2013-01-31 23:11:53 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |   for (int i = 0; i < SYNC_POINTS; i++) | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     m_fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | } | 
					
						
							|  |  |  | void StreamBuffer::DeleteFences() | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |   for (int i = Slot(m_free_iterator) + 1; i < SYNC_POINTS; i++) | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     glDeleteSync(m_fences[i]); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   for (int i = 0; i < Slot(m_iterator); i++) | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     glDeleteSync(m_fences[i]); | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2014-06-05 11:51:05 +02:00
										 |  |  | void StreamBuffer::AllocMemory(u32 size) | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |   // insert waiting slots for used memory
 | 
					
						
							|  |  |  |   for (int i = Slot(m_used_iterator); i < Slot(m_iterator); i++) | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     m_fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   m_used_iterator = m_iterator; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // wait for new slots to end of buffer
 | 
					
						
							|  |  |  |   for (int i = Slot(m_free_iterator) + 1; i <= Slot(m_iterator + size) && i < SYNC_POINTS; i++) | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     glClientWaitSync(m_fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); | 
					
						
							|  |  |  |     glDeleteSync(m_fences[i]); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   m_free_iterator = m_iterator + size; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // if buffer is full
 | 
					
						
							|  |  |  |   if (m_iterator + size >= m_size) | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     // insert waiting slots in unused space at the end of the buffer
 | 
					
						
							|  |  |  |     for (int i = Slot(m_used_iterator); i < SYNC_POINTS; i++) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       m_fences[i] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // move to the start
 | 
					
						
							|  |  |  |     m_used_iterator = m_iterator = 0;  // offset 0 is always aligned
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // wait for space at the start
 | 
					
						
							|  |  |  |     for (int i = 0; i <= Slot(m_iterator + size); i++) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       glClientWaitSync(m_fences[i], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); | 
					
						
							|  |  |  |       glDeleteSync(m_fences[i]); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     m_free_iterator = m_iterator + size; | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2013-10-29 01:23:17 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-11 00:17:29 -05:00
										 |  |  | /* The usual way to stream data to the GPU.
 | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  |  * Described here: https://www.opengl.org/wiki/Buffer_Object_Streaming#Unsynchronized_buffer_mapping
 | 
					
						
							|  |  |  |  * Just do unsync appends until the buffer is full. | 
					
						
							|  |  |  |  * When it's full, orphan (alloc a new buffer and free the old one) | 
					
						
							| 
									
										
										
										
											2014-03-29 11:05:44 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  |  * As reallocation is an overhead, this method isn't as fast as it is known to be. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | class MapAndOrphan : public StreamBuffer | 
					
						
							| 
									
										
										
										
											2013-02-01 12:30:08 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | public: | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |   MapAndOrphan(u32 type, u32 size) : StreamBuffer(type, size) | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     glBindBuffer(m_buffertype, m_buffer); | 
					
						
							|  |  |  |     glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   ~MapAndOrphan() {} | 
					
						
							|  |  |  |   std::pair<u8*, u32> Map(u32 size) override | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     if (m_iterator + size >= m_size) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |       glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW); | 
					
						
							|  |  |  |       m_iterator = 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     u8* pointer = (u8*)glMapBufferRange(m_buffertype, m_iterator, size, | 
					
						
							|  |  |  |                                         GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | | 
					
						
							|  |  |  |                                             GL_MAP_UNSYNCHRONIZED_BIT); | 
					
						
							|  |  |  |     return std::make_pair(pointer, m_iterator); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   void Unmap(u32 used_size) override | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     glFlushMappedBufferRange(m_buffertype, 0, used_size); | 
					
						
							|  |  |  |     glUnmapBuffer(m_buffertype); | 
					
						
							|  |  |  |     m_iterator += used_size; | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | }; | 
					
						
							| 
									
										
										
										
											2013-01-31 23:11:53 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | /* A modified streaming way without reallocation
 | 
					
						
							|  |  |  |  * This one fixes the reallocation overhead of the MapAndOrphan one. | 
					
						
							|  |  |  |  * So it alloc a ring buffer on initialization. | 
					
						
							| 
									
										
										
										
											2015-01-11 00:17:29 -05:00
										 |  |  |  * But with this limited resource, we have to care about the CPU-GPU distance. | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  |  * Else this fifo may overflow. | 
					
						
							|  |  |  |  * So we had traded orphan vs syncing. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | class MapAndSync : public StreamBuffer | 
					
						
							| 
									
										
										
										
											2013-01-31 23:11:53 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | public: | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |   MapAndSync(u32 type, u32 size) : StreamBuffer(type, size) | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     CreateFences(); | 
					
						
							|  |  |  |     glBindBuffer(m_buffertype, m_buffer); | 
					
						
							|  |  |  |     glBufferData(m_buffertype, m_size, nullptr, GL_STREAM_DRAW); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   ~MapAndSync() { DeleteFences(); } | 
					
						
							|  |  |  |   std::pair<u8*, u32> Map(u32 size) override | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     AllocMemory(size); | 
					
						
							|  |  |  |     u8* pointer = (u8*)glMapBufferRange(m_buffertype, m_iterator, size, | 
					
						
							|  |  |  |                                         GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | | 
					
						
							|  |  |  |                                             GL_MAP_UNSYNCHRONIZED_BIT); | 
					
						
							|  |  |  |     return std::make_pair(pointer, m_iterator); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   void Unmap(u32 used_size) override | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     glFlushMappedBufferRange(m_buffertype, 0, used_size); | 
					
						
							|  |  |  |     glUnmapBuffer(m_buffertype); | 
					
						
							|  |  |  |     m_iterator += used_size; | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | }; | 
					
						
							| 
									
										
										
										
											2013-12-27 10:56:03 -06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-11 00:17:29 -05:00
										 |  |  | /* Streaming fifo without mapping overhead.
 | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  |  * This one usually requires ARB_buffer_storage (OpenGL 4.4). | 
					
						
							| 
									
										
										
										
											2015-01-11 00:17:29 -05:00
										 |  |  |  * And is usually not available on OpenGL3 GPUs. | 
					
						
							| 
									
										
										
										
											2014-03-29 11:05:44 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  |  * ARB_buffer_storage allows us to render from a mapped buffer. | 
					
						
							|  |  |  |  * So we map it persistently in the initialization. | 
					
						
							| 
									
										
										
										
											2014-03-29 11:05:44 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  |  * Unsync mapping sounds like an easy task, but it isn't for threaded drivers. | 
					
						
							|  |  |  |  * So every mapping on current close-source driver _will_ end in | 
					
						
							|  |  |  |  * at least a round trip time between two threads. | 
					
						
							| 
									
										
										
										
											2014-03-29 11:05:44 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  |  * As persistently mapped buffer can't use orphaning, we also have to sync. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | class BufferStorage : public StreamBuffer | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |   BufferStorage(u32 type, u32 size, bool _coherent = false) | 
					
						
							|  |  |  |       : StreamBuffer(type, size), coherent(_coherent) | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     CreateFences(); | 
					
						
							|  |  |  |     glBindBuffer(m_buffertype, m_buffer); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // PERSISTANT_BIT to make sure that the buffer can be used while mapped
 | 
					
						
							|  |  |  |     // COHERENT_BIT is set so we don't have to use a MemoryBarrier on write
 | 
					
						
							|  |  |  |     // CLIENT_STORAGE_BIT is set since we access the buffer more frequently on the client side then
 | 
					
						
							|  |  |  |     // server side
 | 
					
						
							|  |  |  |     glBufferStorage(m_buffertype, m_size, nullptr, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | | 
					
						
							|  |  |  |                                                        (coherent ? GL_MAP_COHERENT_BIT : 0)); | 
					
						
							|  |  |  |     m_pointer = (u8*)glMapBufferRange( | 
					
						
							|  |  |  |         m_buffertype, 0, m_size, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | | 
					
						
							|  |  |  |                                      (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT)); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   ~BufferStorage() | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     DeleteFences(); | 
					
						
							|  |  |  |     glUnmapBuffer(m_buffertype); | 
					
						
							|  |  |  |     glBindBuffer(m_buffertype, 0); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   std::pair<u8*, u32> Map(u32 size) override | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     AllocMemory(size); | 
					
						
							|  |  |  |     return std::make_pair(m_pointer + m_iterator, m_iterator); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   void Unmap(u32 used_size) override | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     if (!coherent) | 
					
						
							|  |  |  |       glFlushMappedBufferRange(m_buffertype, m_iterator, used_size); | 
					
						
							|  |  |  |     m_iterator += used_size; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   u8* m_pointer; | 
					
						
							|  |  |  |   const bool coherent; | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* --- AMD only ---
 | 
					
						
							|  |  |  |  * Another streaming fifo without mapping overhead. | 
					
						
							|  |  |  |  * As we can't orphan without mapping, we have to sync. | 
					
						
							| 
									
										
										
										
											2014-03-29 11:05:44 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2015-01-11 00:17:29 -05:00
										 |  |  |  * This one uses AMD_pinned_memory which is available on all AMD GPUs. | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  |  * OpenGL 4.4 drivers should use BufferStorage. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | class PinnedMemory : public StreamBuffer | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |   PinnedMemory(u32 type, u32 size) : StreamBuffer(type, size) | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     CreateFences(); | 
					
						
							| 
									
										
										
										
											2016-11-27 11:56:22 +01:00
										 |  |  |     m_pointer = static_cast<u8*>(Common::AllocateAlignedMemory( | 
					
						
							|  |  |  |         Common::AlignUp(m_size, ALIGN_PINNED_MEMORY), ALIGN_PINNED_MEMORY)); | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |     glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, m_buffer); | 
					
						
							| 
									
										
										
										
											2016-11-27 11:56:22 +01:00
										 |  |  |     glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, | 
					
						
							|  |  |  |                  Common::AlignUp(m_size, ALIGN_PINNED_MEMORY), m_pointer, GL_STREAM_COPY); | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |     glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0); | 
					
						
							|  |  |  |     glBindBuffer(m_buffertype, m_buffer); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   ~PinnedMemory() | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     DeleteFences(); | 
					
						
							|  |  |  |     glBindBuffer(m_buffertype, 0); | 
					
						
							|  |  |  |     glFinish();  // ogl pipeline must be flushed, else this buffer can be in use
 | 
					
						
							| 
									
										
										
										
											2016-08-07 13:03:07 -04:00
										 |  |  |     Common::FreeAlignedMemory(m_pointer); | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |     m_pointer = nullptr; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   std::pair<u8*, u32> Map(u32 size) override | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     AllocMemory(size); | 
					
						
							|  |  |  |     return std::make_pair(m_pointer + m_iterator, m_iterator); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   void Unmap(u32 used_size) override { m_iterator += used_size; } | 
					
						
							|  |  |  |   u8* m_pointer; | 
					
						
							|  |  |  |   static const u32 ALIGN_PINNED_MEMORY = 4096; | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Fifo based on the glBufferSubData call.
 | 
					
						
							|  |  |  |  * As everything must be copied before glBufferSubData returns, | 
					
						
							|  |  |  |  * an additional memcpy in the driver will be done. | 
					
						
							|  |  |  |  * So this is a huge overhead, only use it if required. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | class BufferSubData : public StreamBuffer | 
					
						
							| 
									
										
										
										
											2013-08-29 21:03:48 +02:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | public: | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |   BufferSubData(u32 type, u32 size) : StreamBuffer(type, size) | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     glBindBuffer(m_buffertype, m_buffer); | 
					
						
							|  |  |  |     glBufferData(m_buffertype, size, nullptr, GL_STATIC_DRAW); | 
					
						
							|  |  |  |     m_pointer = new u8[m_size]; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   ~BufferSubData() { delete[] m_pointer; } | 
					
						
							|  |  |  |   std::pair<u8*, u32> Map(u32 size) override { return std::make_pair(m_pointer, 0); } | 
					
						
							|  |  |  |   void Unmap(u32 used_size) override { glBufferSubData(m_buffertype, 0, used_size, m_pointer); } | 
					
						
							|  |  |  |   u8* m_pointer; | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Fifo based on the glBufferData call.
 | 
					
						
							|  |  |  |  * Some trashy drivers stall in BufferSubData. | 
					
						
							|  |  |  |  * So here we use glBufferData, which realloc this buffer every time. | 
					
						
							|  |  |  |  * This may avoid stalls, but it is a bigger overhead than BufferSubData. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | class BufferData : public StreamBuffer | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |   BufferData(u32 type, u32 size) : StreamBuffer(type, size) | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     glBindBuffer(m_buffertype, m_buffer); | 
					
						
							|  |  |  |     m_pointer = new u8[m_size]; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   ~BufferData() { delete[] m_pointer; } | 
					
						
							|  |  |  |   std::pair<u8*, u32> Map(u32 size) override { return std::make_pair(m_pointer, 0); } | 
					
						
							|  |  |  |   void Unmap(u32 used_size) override | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     glBufferData(m_buffertype, used_size, m_pointer, GL_STREAM_DRAW); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   u8* m_pointer; | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-21 10:15:17 -05:00
										 |  |  | // Chooses the best streaming method based on the supported extensions and known issues
 | 
					
						
							|  |  |  | std::unique_ptr<StreamBuffer> StreamBuffer::Create(u32 type, u32 size) | 
					
						
							| 
									
										
										
										
											2014-01-23 00:47:49 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |   // without basevertex support, only streaming methods whith uploads everything to zero works fine:
 | 
					
						
							|  |  |  |   if (!g_ogl_config.bSupportsGLBaseVertex) | 
					
						
							|  |  |  |   { | 
					
						
							| 
									
										
										
										
											2016-10-30 00:56:18 +02:00
										 |  |  |     if (!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_BUFFER_STREAM)) | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |       return std::make_unique<BufferSubData>(type, size); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // BufferData is by far the worst way, only use it if needed
 | 
					
						
							|  |  |  |     return std::make_unique<BufferData>(type, size); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // Prefer the syncing buffers over the orphaning one
 | 
					
						
							|  |  |  |   if (g_ogl_config.bSupportsGLSync) | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     // pinned memory is much faster than buffer storage on AMD cards
 | 
					
						
							|  |  |  |     if (g_ogl_config.bSupportsGLPinnedMemory && | 
					
						
							| 
									
										
										
										
											2016-10-30 00:56:18 +02:00
										 |  |  |         !(DriverDetails::HasBug(DriverDetails::BUG_BROKEN_PINNED_MEMORY) && | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |           type == GL_ELEMENT_ARRAY_BUFFER)) | 
					
						
							|  |  |  |       return std::make_unique<PinnedMemory>(type, size); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // buffer storage works well in most situations
 | 
					
						
							| 
									
										
										
										
											2016-10-30 00:56:18 +02:00
										 |  |  |     bool coherent = DriverDetails::HasBug(DriverDetails::BUG_BROKEN_EXPLICIT_FLUSH); | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |     if (g_ogl_config.bSupportsGLBufferStorage && | 
					
						
							| 
									
										
										
										
											2016-10-30 00:56:18 +02:00
										 |  |  |         !(DriverDetails::HasBug(DriverDetails::BUG_BROKEN_BUFFER_STORAGE) && | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |           type == GL_ARRAY_BUFFER) && | 
					
						
							| 
									
										
										
										
											2016-10-30 00:56:18 +02:00
										 |  |  |         !(DriverDetails::HasBug(DriverDetails::BUG_INTEL_BROKEN_BUFFER_STORAGE) && | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |           type == GL_ELEMENT_ARRAY_BUFFER)) | 
					
						
							|  |  |  |       return std::make_unique<BufferStorage>(type, size, coherent); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // don't fall back to MapAnd* for Nvidia drivers
 | 
					
						
							| 
									
										
										
										
											2016-10-30 00:56:18 +02:00
										 |  |  |     if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_UNSYNC_MAPPING)) | 
					
						
							| 
									
										
										
										
											2016-06-24 10:43:46 +02:00
										 |  |  |       return std::make_unique<BufferSubData>(type, size); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // mapping fallback
 | 
					
						
							|  |  |  |     if (g_ogl_config.bSupportsGLSync) | 
					
						
							|  |  |  |       return std::make_unique<MapAndSync>(type, size); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // default fallback, should work everywhere, but isn't the best way to do this job
 | 
					
						
							|  |  |  |   return std::make_unique<MapAndOrphan>(type, size); | 
					
						
							| 
									
										
										
										
											2013-08-29 21:03:48 +02:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2013-01-31 23:11:53 +01:00
										 |  |  | } |