forked from dolphin-emu/dolphin
		
	git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3379 8ced0084-cf51-0410-be5f-012b33b47a6e
		
			
				
	
	
		
			358 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			358 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
// Copyright (C) 2003-2009 Dolphin Project.
 | 
						|
 | 
						|
// This program is free software: you can redistribute it and/or modify
 | 
						|
// it under the terms of the GNU General Public License as published by
 | 
						|
// the Free Software Foundation, version 2.0.
 | 
						|
 | 
						|
// This program is distributed in the hope that it will be useful,
 | 
						|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
// GNU General Public License 2.0 for more details.
 | 
						|
 | 
						|
// A copy of the GPL 2.0 should have been included with the program.
 | 
						|
// If not, see http://www.gnu.org/licenses/
 | 
						|
 | 
						|
// Official SVN repository and contact information can be found at
 | 
						|
// http://code.google.com/p/dolphin-emu/
 | 
						|
 | 
						|
// Fast image conversion using OpenGL shaders.
 | 
						|
// This kind of stuff would be a LOT nicer with OpenCL.
 | 
						|
 | 
						|
#include "TextureConverter.h"
 | 
						|
#include "TextureConversionShader.h"
 | 
						|
#include "PixelShaderCache.h"
 | 
						|
#include "VertexShaderManager.h"
 | 
						|
#include "Globals.h"
 | 
						|
#include "Config.h"
 | 
						|
#include "ImageWrite.h"
 | 
						|
#include "Render.h"
 | 
						|
 | 
						|
namespace TextureConverter
 | 
						|
{
 | 
						|
 | 
						|
static GLuint s_texConvFrameBuffer = 0;
 | 
						|
static GLuint s_srcTexture = 0;			// for decoding from RAM
 | 
						|
static GLuint s_dstRenderBuffer = 0;	// for encoding to RAM
 | 
						|
 | 
						|
const int renderBufferWidth = 1024;
 | 
						|
const int renderBufferHeight = 1024;
 | 
						|
 | 
						|
static FRAGMENTSHADER s_rgbToYuyvProgram;
 | 
						|
static FRAGMENTSHADER s_yuyvToRgbProgram;
 | 
						|
 | 
						|
// Not all slots are taken - but who cares.
 | 
						|
const u32 NUM_ENCODING_PROGRAMS = 64;
 | 
						|
static FRAGMENTSHADER s_encodingPrograms[NUM_ENCODING_PROGRAMS];
 | 
						|
 | 
						|
void CreateRgbToYuyvProgram()
 | 
						|
{
 | 
						|
	// Output is BGRA because that is slightly faster than RGBA.
 | 
						|
 | 
						|
	// TODO: Use the dot() function for faster dot products. Probably mostly helps ATI (nvidia is scalar anyway).
 | 
						|
	const char *FProgram =
 | 
						|
	"uniform samplerRECT samp0 : register(s0);\n"	
 | 
						|
	"void main(\n"
 | 
						|
	"  out float4 ocol0 : COLOR0,\n"
 | 
						|
	"  in float2 uv0 : TEXCOORD0)\n"
 | 
						|
	"{\n"		
 | 
						|
	"  float2 uv1 = float2(uv0.x + 1.0f, uv0.y);\n"
 | 
						|
	"  float3 c0 = texRECT(samp0, uv0).rgb;\n"
 | 
						|
	"  float3 c1 = texRECT(samp0, uv1).rgb;\n"
 | 
						|
 | 
						|
	"  float y0 = (0.257f * c0.r) + (0.504f * c0.g) + (0.098f * c0.b) + 0.0625f;\n"
 | 
						|
	"  float u0 =-(0.148f * c0.r) - (0.291f * c0.g) + (0.439f * c0.b) + 0.5f;\n"
 | 
						|
	"  float v0 = (0.439f * c0.r) - (0.368f * c0.g) - (0.071f * c0.b) + 0.5f;\n"
 | 
						|
	"  float y1 = (0.257f * c1.r) + (0.504f * c1.g) + (0.098f * c1.b) + 0.0625f;\n"
 | 
						|
	"  float u1 =-(0.148f * c1.r) - (0.291f * c1.g) + (0.439f * c1.b) + 0.5f;\n"
 | 
						|
	"  float v1 = (0.439f * c1.r) - (0.368f * c1.g) - (0.071f * c1.b) + 0.5f;\n"
 | 
						|
 | 
						|
	"  ocol0 = float4(y1, (u0 + u1) / 2, y0, (v0 + v1) / 2);\n"
 | 
						|
	"}\n";
 | 
						|
 | 
						|
	if (!PixelShaderCache::CompilePixelShader(s_rgbToYuyvProgram, FProgram)) {
 | 
						|
        ERROR_LOG(VIDEO, "Failed to create RGB to YUYV fragment program");
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void CreateYuyvToRgbProgram()
 | 
						|
{
 | 
						|
	const char *FProgram =
 | 
						|
	"uniform samplerRECT samp0 : register(s0);\n"	
 | 
						|
	"void main(\n"
 | 
						|
	"  out float4 ocol0 : COLOR0,\n"
 | 
						|
	"  in float2 uv0 : TEXCOORD0)\n"
 | 
						|
	"{\n"		
 | 
						|
	"  float4 c0 = texRECT(samp0, uv0).rgba;\n"
 | 
						|
 | 
						|
	"  float f = step(0.5, frac(uv0.x));\n"
 | 
						|
	"  float y = lerp(c0.b, c0.r, f);\n"
 | 
						|
	"  float yComp = 1.164f * (y - 0.0625f);\n"
 | 
						|
	"  float uComp = c0.g - 0.5f;\n"
 | 
						|
	"  float vComp = c0.a - 0.5f;\n"
 | 
						|
 | 
						|
    "  ocol0 = float4(yComp + (1.596f * vComp),\n"
 | 
						|
	"                 yComp - (0.813f * vComp) - (0.391f * uComp),\n"
 | 
						|
	"                 yComp + (2.018f * uComp),\n"
 | 
						|
	"                 1.0f);\n"
 | 
						|
	"}\n";
 | 
						|
 | 
						|
	if (!PixelShaderCache::CompilePixelShader(s_yuyvToRgbProgram, FProgram)) {
 | 
						|
        ERROR_LOG(VIDEO, "Failed to create YUYV to RGB fragment program");
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
FRAGMENTSHADER &GetOrCreateEncodingShader(u32 format)
 | 
						|
{
 | 
						|
	if (format > NUM_ENCODING_PROGRAMS)
 | 
						|
	{
 | 
						|
		PanicAlert("Unknown texture copy format: 0x%x\n", format);
 | 
						|
		return s_encodingPrograms[0];
 | 
						|
	}
 | 
						|
 | 
						|
	if (s_encodingPrograms[format].glprogid == 0)
 | 
						|
	{
 | 
						|
		const char* shader = TextureConversionShader::GenerateEncodingShader(format);
 | 
						|
 | 
						|
#if defined(_DEBUG) || defined(DEBUGFAST)
 | 
						|
		if (g_Config.iLog & CONF_SAVESHADERS && shader) {
 | 
						|
			static int counter = 0;
 | 
						|
			char szTemp[MAX_PATH];
 | 
						|
			sprintf(szTemp, "%s/enc_%04i.txt", FULL_DUMP_DIR, counter++);
 | 
						|
 | 
						|
			SaveData(szTemp, shader);
 | 
						|
		}
 | 
						|
#endif
 | 
						|
 | 
						|
		if (!PixelShaderCache::CompilePixelShader(s_encodingPrograms[format], shader)) {
 | 
						|
			const char* error = cgGetLastListing(g_cgcontext);
 | 
						|
			ERROR_LOG(VIDEO, "Failed to create encoding fragment program");
 | 
						|
		}
 | 
						|
    }
 | 
						|
 | 
						|
	return s_encodingPrograms[format];
 | 
						|
}
 | 
						|
 | 
						|
void Init()
 | 
						|
{
 | 
						|
	glGenFramebuffersEXT(1, &s_texConvFrameBuffer);
 | 
						|
 | 
						|
	glGenRenderbuffersEXT(1, &s_dstRenderBuffer);
 | 
						|
	glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, s_dstRenderBuffer);
 | 
						|
	glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGBA, renderBufferWidth, renderBufferHeight);
 | 
						|
 | 
						|
	glGenTextures(1, &s_srcTexture);
 | 
						|
	glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_srcTexture);
 | 
						|
	glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
 | 
						|
    glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
 | 
						|
 | 
						|
	CreateRgbToYuyvProgram();
 | 
						|
	CreateYuyvToRgbProgram();
 | 
						|
}
 | 
						|
 | 
						|
void Shutdown()
 | 
						|
{
 | 
						|
	glDeleteTextures(1, &s_srcTexture);	
 | 
						|
	glDeleteRenderbuffersEXT(1, &s_dstRenderBuffer);
 | 
						|
	glDeleteFramebuffersEXT(1, &s_texConvFrameBuffer);
 | 
						|
 | 
						|
	s_rgbToYuyvProgram.Destroy();
 | 
						|
	s_yuyvToRgbProgram.Destroy();
 | 
						|
 | 
						|
	for (int i = 0; i < NUM_ENCODING_PROGRAMS; i++)
 | 
						|
		s_encodingPrograms[i].Destroy();
 | 
						|
 | 
						|
	s_srcTexture = 0;
 | 
						|
	s_dstRenderBuffer = 0;
 | 
						|
	s_texConvFrameBuffer = 0;
 | 
						|
}
 | 
						|
 | 
						|
void EncodeToRamUsingShader(FRAGMENTSHADER& shader, GLuint srcTexture, const TRectangle& sourceRc,
 | 
						|
				            u8* destAddr, int dstWidth, int dstHeight, bool linearFilter)
 | 
						|
{
 | 
						|
	Renderer::ResetGLState();
 | 
						|
	
 | 
						|
	// switch to texture converter frame buffer
 | 
						|
	// attach render buffer as color destination
 | 
						|
	Renderer::SetFramebuffer(s_texConvFrameBuffer);
 | 
						|
	glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, s_dstRenderBuffer);
 | 
						|
	glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_EXT, s_dstRenderBuffer);	
 | 
						|
	GL_REPORT_ERRORD();
 | 
						|
	
 | 
						|
	for (int i = 1; i < 8; ++i)
 | 
						|
		TextureMngr::DisableStage(i);
 | 
						|
 | 
						|
	// set source texture
 | 
						|
	glActiveTexture(GL_TEXTURE0);
 | 
						|
	glEnable(GL_TEXTURE_RECTANGLE_ARB);
 | 
						|
	glBindTexture(GL_TEXTURE_RECTANGLE_ARB, srcTexture);
 | 
						|
 | 
						|
	if (linearFilter)
 | 
						|
	{
 | 
						|
		glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
 | 
						|
		glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
 | 
						|
		glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
 | 
						|
	}
 | 
						|
	GL_REPORT_ERRORD();
 | 
						|
 | 
						|
	glViewport(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight);
 | 
						|
 | 
						|
	glEnable(GL_FRAGMENT_PROGRAM_ARB);
 | 
						|
	glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader.glprogid);	
 | 
						|
 | 
						|
	// Draw...
 | 
						|
	glBegin(GL_QUADS);
 | 
						|
    glTexCoord2f((float)sourceRc.left, (float)sourceRc.top);     glVertex2f(-1,-1);
 | 
						|
	glTexCoord2f((float)sourceRc.left, (float)sourceRc.bottom);  glVertex2f(-1,1);
 | 
						|
    glTexCoord2f((float)sourceRc.right, (float)sourceRc.bottom); glVertex2f(1,1);
 | 
						|
    glTexCoord2f((float)sourceRc.right, (float)sourceRc.top);    glVertex2f(1,-1);
 | 
						|
    glEnd();
 | 
						|
	GL_REPORT_ERRORD();
 | 
						|
 | 
						|
	// .. and then readback the results.
 | 
						|
	// TODO: make this less slow.
 | 
						|
	glReadPixels(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE, destAddr);
 | 
						|
	GL_REPORT_ERRORD();
 | 
						|
 | 
						|
	Renderer::SetFramebuffer(0);
 | 
						|
    Renderer::RestoreGLState();
 | 
						|
    VertexShaderManager::SetViewportChanged();
 | 
						|
	
 | 
						|
	glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
 | 
						|
    TextureMngr::DisableStage(0);
 | 
						|
 | 
						|
	Renderer::RestoreGLState();
 | 
						|
    GL_REPORT_ERRORD();
 | 
						|
}
 | 
						|
 | 
						|
void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, bool bScaleByHalf, const TRectangle& source)
 | 
						|
{
 | 
						|
	u32 format = copyfmt;
 | 
						|
 | 
						|
	if (bFromZBuffer)
 | 
						|
	{
 | 
						|
		format |= _GX_TF_ZTF;
 | 
						|
		if (copyfmt == 11)
 | 
						|
			format = GX_TF_Z16;
 | 
						|
		else if (format < GX_TF_Z8 || format > GX_TF_Z24X8)
 | 
						|
			format |= _GX_TF_CTF;
 | 
						|
	}
 | 
						|
	else
 | 
						|
		if (copyfmt > GX_TF_RGBA8 || (copyfmt < GX_TF_RGB565 && !bIsIntensityFmt))
 | 
						|
			format |= _GX_TF_CTF;
 | 
						|
 | 
						|
	FRAGMENTSHADER& texconv_shader = GetOrCreateEncodingShader(format);
 | 
						|
	if (texconv_shader.glprogid == 0)
 | 
						|
		return;
 | 
						|
 | 
						|
	u8 *dest_ptr = Memory_GetPtr(address);
 | 
						|
 | 
						|
	u32 source_texture = bFromZBuffer ? Renderer::ResolveAndGetDepthTarget(source) : Renderer::ResolveAndGetRenderTarget(source);
 | 
						|
	int width = source.right - source.left;
 | 
						|
	int height = source.bottom - source.top;
 | 
						|
 | 
						|
	int size_in_bytes = TexDecoder_GetTextureSizeInBytes(width, height, format);
 | 
						|
 | 
						|
	// Invalidate any existing texture covering this memory range.
 | 
						|
	// TODO - don't delete the texture if it already exists, just replace the contents.
 | 
						|
	TextureMngr::InvalidateRange(address, size_in_bytes);
 | 
						|
 | 
						|
	if (bScaleByHalf)
 | 
						|
	{
 | 
						|
		// Hm. Shouldn't this only scale destination, not source?
 | 
						|
		// The bloom in Beyond Good & Evil is a good test case - due to this problem,
 | 
						|
		// it goes very wrong. Compare by switching back and forth between Copy textures to RAM and GL Texture.
 | 
						|
		// This also affects the shadows in Burnout 2 badly.
 | 
						|
		width /= 2;
 | 
						|
		height /= 2;
 | 
						|
	}
 | 
						|
	
 | 
						|
	u16 blkW = TextureConversionShader::GetBlockWidthInTexels(format) - 1;
 | 
						|
	u16 blkH = TextureConversionShader::GetBlockHeightInTexels(format) - 1;	
 | 
						|
	u16 samples = TextureConversionShader::GetEncodedSampleCount(format);	
 | 
						|
 | 
						|
	// only copy on cache line boundaries
 | 
						|
	// extra pixels are copied but not displayed in the resulting texture
 | 
						|
	s32 expandedWidth = (width + blkW) & (~blkW);
 | 
						|
	s32 expandedHeight = (height + blkH) & (~blkH);
 | 
						|
 | 
						|
    float MValueX = Renderer::GetTargetScaleX();
 | 
						|
	float MValueY = Renderer::GetTargetScaleY();
 | 
						|
 | 
						|
	float top = Renderer::GetTargetHeight() - (source.top + expandedHeight) * MValueY;
 | 
						|
 | 
						|
    float sampleStride = bScaleByHalf?2.0f:1.0f;
 | 
						|
 | 
						|
	TextureConversionShader::SetShaderParameters((float)expandedWidth, expandedHeight * MValueY, source.left * MValueX, top, sampleStride * MValueX, sampleStride * MValueY);
 | 
						|
 | 
						|
	TRectangle scaledSource;
 | 
						|
	scaledSource.top = 0;
 | 
						|
	scaledSource.bottom = expandedHeight;
 | 
						|
	scaledSource.left = 0;
 | 
						|
	scaledSource.right = expandedWidth / samples;
 | 
						|
 | 
						|
	EncodeToRamUsingShader(texconv_shader, source_texture, scaledSource, dest_ptr, expandedWidth / samples, expandedHeight, bScaleByHalf);
 | 
						|
}
 | 
						|
 | 
						|
void EncodeToRamYUYV(GLuint srcTexture, const TRectangle& sourceRc,
 | 
						|
				     u8* destAddr, int dstWidth, int dstHeight)
 | 
						|
{
 | 
						|
	EncodeToRamUsingShader(s_rgbToYuyvProgram, srcTexture, sourceRc, destAddr, dstWidth / 2, dstHeight, false);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
// Should be scale free.
 | 
						|
void DecodeToTexture(u8* srcAddr, int srcWidth, int srcHeight, GLuint destTexture)
 | 
						|
{
 | 
						|
	Renderer::ResetGLState();
 | 
						|
 | 
						|
	float srcFormatFactor = 0.5f;
 | 
						|
	float srcFmtWidth = srcWidth * srcFormatFactor;
 | 
						|
 | 
						|
	// swich to texture converter frame buffer
 | 
						|
	// attach destTexture as color destination
 | 
						|
	Renderer::SetFramebuffer(s_texConvFrameBuffer);
 | 
						|
	glBindTexture(GL_TEXTURE_RECTANGLE_ARB, destTexture);	
 | 
						|
	glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, destTexture, 0);
 | 
						|
 | 
						|
    for (int i = 1; i < 8; ++i)
 | 
						|
		TextureMngr::DisableStage(i);
 | 
						|
 | 
						|
	// activate source texture
 | 
						|
	// set srcAddr as data for source texture
 | 
						|
	glActiveTexture(GL_TEXTURE0);
 | 
						|
	glEnable(GL_TEXTURE_RECTANGLE_ARB);
 | 
						|
	glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_srcTexture);
 | 
						|
 | 
						|
	// TODO: make this less slow.  (How?)
 | 
						|
    glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, (GLsizei)srcFmtWidth, (GLsizei)srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr);	
 | 
						|
 | 
						|
	glViewport(0, 0, srcWidth, srcHeight);
 | 
						|
 | 
						|
	glEnable(GL_FRAGMENT_PROGRAM_ARB);
 | 
						|
    glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, s_yuyvToRgbProgram.glprogid);	
 | 
						|
 | 
						|
	GL_REPORT_ERRORD();
 | 
						|
 | 
						|
    glBegin(GL_QUADS);
 | 
						|
	glTexCoord2f(srcFmtWidth, (float)srcHeight); glVertex2f(1,-1);
 | 
						|
	glTexCoord2f(srcFmtWidth, 0); glVertex2f(1,1);
 | 
						|
	glTexCoord2f(0, 0); glVertex2f(-1,1);
 | 
						|
	glTexCoord2f(0, (float)srcHeight); glVertex2f(-1,-1);
 | 
						|
    glEnd();	
 | 
						|
 | 
						|
	// reset state
 | 
						|
	glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
 | 
						|
    TextureMngr::DisableStage(0);
 | 
						|
 | 
						|
	VertexShaderManager::SetViewportChanged();
 | 
						|
 | 
						|
	Renderer::RestoreGLState();
 | 
						|
    GL_REPORT_ERRORD();
 | 
						|
}
 | 
						|
 | 
						|
}  // namespace
 |