diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index f653dfbf01..290dc510cf 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -44,6 +44,8 @@ static GLuint s_encode_VBO = 0; static GLuint s_encode_VAO = 0; static TargetRectangle s_cached_sourceRc; +static GLuint s_PBO = 0; // for readback with different strides + static const char *VProgram = "ATTRIN vec2 rawpos;\n" "ATTRIN vec2 tex0;\n" @@ -186,6 +188,8 @@ void Init() glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, renderBufferWidth, renderBufferHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glGenBuffers(1, &s_PBO); + CreatePrograms(); } @@ -196,6 +200,7 @@ void Shutdown() glDeleteFramebuffers(1, &s_texConvFrameBuffer); glDeleteBuffers(1, &s_encode_VBO ); glDeleteVertexArrays(1, &s_encode_VAO ); + glDeleteBuffers(1, &s_PBO); s_rgbToYuyvProgram.Destroy(); s_yuyvToRgbProgram.Destroy(); @@ -206,6 +211,7 @@ void Shutdown() s_srcTexture = 0; s_dstTexture = 0; s_texConvFrameBuffer = 0; + s_PBO = 0; } void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, @@ -267,25 +273,37 @@ void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, // TODO: make this less slow. int writeStride = bpmem.copyMipMapStrideChannels * 32; + int dstSize = dstWidth*dstHeight*4; + int readHeight = readStride / dstWidth / 4; // 4 bytes per pixel + int readLoops = dstHeight / readHeight; - if (writeStride != readStride && toTexture) + if (writeStride != readStride && readLoops > 1 && toTexture) { // writing to a texture of a different size + // also copy more then one block line, so the different strides matters + // copy into one pbo first, map this buffer, and then memcpy into gc memory + // in this way, we only have one vram->ram transfer, but maybe a bigger + // cpu overhead because of the pbo + glBindBuffer(GL_PIXEL_PACK_BUFFER, s_PBO); + glBufferData(GL_PIXEL_PACK_BUFFER, dstSize, NULL, GL_STREAM_READ); + glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE, 0); + u8* pbo = (u8*)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, dstSize, GL_MAP_READ_BIT); - int readHeight = readStride / dstWidth; - readHeight /= 4; // 4 bytes per pixel - - int readStart = 0; - int readLoops = dstHeight / readHeight; + //int readStart = 0; for (int i = 0; i < readLoops; i++) { - glReadPixels(0, readStart, (GLsizei)dstWidth, (GLsizei)readHeight, GL_BGRA, GL_UNSIGNED_BYTE, destAddr); - readStart += readHeight; + memcpy(destAddr, pbo, readStride); + pbo += readStride; destAddr += writeStride; } + + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); } else + { glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE, destAddr); + } GL_REPORT_ERRORD();