| 
									
										
										
										
											2008-12-08 05:30:24 +00:00
										 |  |  | // Copyright (C) 2003-2008 Dolphin Project.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // This program is free software: you can redistribute it and/or modify
 | 
					
						
							|  |  |  | // it under the terms of the GNU General Public License as published by
 | 
					
						
							|  |  |  | // the Free Software Foundation, version 2.0.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // This program is distributed in the hope that it will be useful,
 | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					
						
							|  |  |  | // GNU General Public License 2.0 for more details.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // A copy of the GPL 2.0 should have been included with the program.
 | 
					
						
							|  |  |  | // If not, see http://www.gnu.org/licenses/
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Official SVN repository and contact information can be found at
 | 
					
						
							|  |  |  | // http://code.google.com/p/dolphin-emu/
 | 
					
						
							|  |  |  | #include "Common.h"
 | 
					
						
							|  |  |  | #include "x64Emitter.h"
 | 
					
						
							|  |  |  | #include "ABI.h"
 | 
					
						
							|  |  |  | #include "CPUDetect.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace Gen | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	static u8 *code; | 
					
						
							|  |  |  | 	static bool enableBranchHints = false; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void SetCodePtr(u8 *ptr) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		code = ptr; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	const u8 *GetCodePtr() | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		return code; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	u8 *GetWritableCodePtr() | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		return code; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void ReserveCodeSpace(int bytes) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		for (int i = 0; i < bytes; i++) | 
					
						
							|  |  |  | 			*code++ = 0xCC; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	const u8 *AlignCode4() | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		int c = int((u64)code & 3); | 
					
						
							|  |  |  | 		if (c) | 
					
						
							|  |  |  | 			ReserveCodeSpace(4-c); | 
					
						
							|  |  |  | 		return code; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	const u8 *AlignCode16() | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		int c = int((u64)code & 15); | 
					
						
							|  |  |  | 		if (c) | 
					
						
							|  |  |  | 			ReserveCodeSpace(16-c); | 
					
						
							|  |  |  | 		return code; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	const u8 *AlignCodePage() | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		int c = int((u64)code & 4095); | 
					
						
							|  |  |  | 		if (c) | 
					
						
							|  |  |  | 			ReserveCodeSpace(4096-c); | 
					
						
							|  |  |  | 		return code; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	inline void Write8(u8 value) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		*code++ = value; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	inline void Write16(u16 value) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		*(u16*)code = value; | 
					
						
							|  |  |  | 		code += 2; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	inline void Write32(u32 value) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		*(u32*)code = value; | 
					
						
							|  |  |  | 		code += 4; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	inline void Write64(u64 value) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		*(u64*)code = value; | 
					
						
							|  |  |  | 		code += 8; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void WriteModRM( int mod, int rm, int reg ) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		Write8((u8)((mod << 6) | ((rm & 7) << 3) | (reg & 7))); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void WriteSIB(int scale, int index, int base) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7))); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void OpArg::WriteRex(bool op64, int customOp) const | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | #ifdef _M_X64
 | 
					
						
							|  |  |  | 		u8 op = 0x40; | 
					
						
							|  |  |  | 		if (customOp == -1)       customOp = operandReg; | 
					
						
							|  |  |  | 		if (op64)                 op |= 8; | 
					
						
							|  |  |  | 		if (customOp >> 3)        op |= 4; | 
					
						
							|  |  |  | 		if (indexReg >> 3)        op |= 2; | 
					
						
							|  |  |  | 		if (offsetOrBaseReg >> 3) op |= 1; //TODO investigate if this is dangerous
 | 
					
						
							|  |  |  | 		if (op != 0x40) | 
					
						
							|  |  |  | 			Write8(op); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | 		_dbg_assert_(DYNA_REC, (operandReg >> 3) == 0); | 
					
						
							|  |  |  | 		_dbg_assert_(DYNA_REC, (indexReg >> 3) == 0); | 
					
						
							|  |  |  | 		_dbg_assert_(DYNA_REC, (offsetOrBaseReg >> 3) == 0); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void OpArg::WriteRest(int extraBytes, X64Reg _operandReg) const | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (_operandReg == 0xff) | 
					
						
							|  |  |  | 			_operandReg = (X64Reg)this->operandReg; | 
					
						
							|  |  |  | 		int mod = 0; | 
					
						
							|  |  |  | 		int ireg = indexReg; | 
					
						
							|  |  |  | 		bool SIB = false; | 
					
						
							|  |  |  | 		int _offsetOrBaseReg = this->offsetOrBaseReg; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (scale == SCALE_RIP) //Also, on 32-bit, just an immediate address
 | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			// Oh, RIP addressing. 
 | 
					
						
							|  |  |  | 			_offsetOrBaseReg = 5; | 
					
						
							|  |  |  | 			WriteModRM(0, _operandReg&7, 5); | 
					
						
							|  |  |  | 			//TODO : add some checks
 | 
					
						
							|  |  |  | #ifdef _M_X64
 | 
					
						
							|  |  |  | 			u64 ripAddr = (u64)code + 4 + extraBytes; | 
					
						
							|  |  |  | 			s32 offs = (s32)((s64)offset - (s64)ripAddr); | 
					
						
							|  |  |  | 			Write32((u32)offs); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | 			Write32((u32)offset); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 			return; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (scale == 0) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			// Oh, no memory, Just a reg.
 | 
					
						
							|  |  |  | 			mod = 3; //11
 | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else if (scale >= 1) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			//Ah good, no scaling.
 | 
					
						
							|  |  |  | 			if (scale == SCALE_ATREG && !((_offsetOrBaseReg&7) == 4 || (_offsetOrBaseReg&7) == 5)) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				//Okay, we're good. No SIB necessary.
 | 
					
						
							|  |  |  | 				int ioff = (int)offset; | 
					
						
							|  |  |  | 				if (ioff == 0) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					mod = 0; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				else if (ioff<-128 || ioff>127) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					mod = 2; //32-bit displacement
 | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					mod = 1; //8-bit displacement
 | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else //if (scale != SCALE_ATREG)
 | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				if ((_offsetOrBaseReg & 7) == 4) //this would occupy the SIB encoding :(
 | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					//So we have to fake it with SIB encoding :(
 | 
					
						
							|  |  |  | 					SIB = true; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 				if (scale >= SCALE_1 && scale < SCALE_ATREG) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					SIB = true; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 				if (scale == SCALE_ATREG && _offsetOrBaseReg == 4)  | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					SIB = true; | 
					
						
							|  |  |  | 					ireg = 4; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 				//Okay, we're fine. Just disp encoding.
 | 
					
						
							|  |  |  | 				//We need displacement. Which size?
 | 
					
						
							|  |  |  | 				int ioff = (int)(s64)offset; | 
					
						
							|  |  |  | 				if (ioff < -128 || ioff > 127) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					mod = 2; //32-bit displacement
 | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					mod = 1; //8-bit displacement
 | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// Okay. Time to do the actual writing
 | 
					
						
							|  |  |  | 		// ModRM byte:
 | 
					
						
							|  |  |  | 		int oreg = _offsetOrBaseReg; | 
					
						
							|  |  |  | 		if (SIB) | 
					
						
							|  |  |  | 			oreg = 4; | 
					
						
							|  |  |  | 		 | 
					
						
							|  |  |  | 		// TODO(ector): WTF is this if about? I don't remember writing it :-)
 | 
					
						
							|  |  |  | 		//if (RIP)
 | 
					
						
							|  |  |  | 		//    oreg = 5;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		WriteModRM(mod, _operandReg&7, oreg&7); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (SIB) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			//SIB byte
 | 
					
						
							|  |  |  | 			int ss; | 
					
						
							|  |  |  | 			switch (scale) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 			case 0: _offsetOrBaseReg = 4; ss = 0; break; //RSP 
 | 
					
						
							|  |  |  | 			case 1: ss = 0; break; | 
					
						
							|  |  |  | 			case 2: ss = 1; break; | 
					
						
							|  |  |  | 			case 4: ss = 2; break; | 
					
						
							|  |  |  | 			case 8: ss = 3; break; | 
					
						
							|  |  |  | 			case SCALE_ATREG: ss = 0; break; | 
					
						
							|  |  |  | 			default: _assert_msg_(DYNA_REC, 0, "Invalid scale for SIB byte"); ss = 0; break; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			Write8((u8)((ss << 6) | ((ireg&7)<<3) | (_offsetOrBaseReg&7))); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (mod == 1) //8-bit disp
 | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Write8((u8)(s8)(s32)offset); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else if (mod == 2) //32-bit disp
 | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Write32((u32)offset); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// W = operand extended width (1 if 64-bit)
 | 
					
						
							|  |  |  | 	// R = register# upper bit
 | 
					
						
							|  |  |  | 	// X = scale amnt upper bit
 | 
					
						
							|  |  |  | 	// B = base register# upper bit
 | 
					
						
							|  |  |  | 	void Rex(int w, int r, int x, int b) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		w = w ? 1 : 0;		 | 
					
						
							|  |  |  | 		r = r ? 1 : 0; | 
					
						
							|  |  |  | 		x = x ? 1 : 0; | 
					
						
							|  |  |  | 		b = b ? 1 : 0; | 
					
						
							|  |  |  | 		u8 rx = (u8)(0x40 | (w << 3) | (r << 2) | (x << 1) | (b)); | 
					
						
							|  |  |  | 		if (rx != 0x40) | 
					
						
							|  |  |  | 			Write8(rx); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void JMP(const u8 *addr, bool force5Bytes) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		u64 fn = (u64)addr; | 
					
						
							|  |  |  | 		if (!force5Bytes) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			s32 distance = (s32)(fn - ((u64)code + 2)); //TODO - sanity check
 | 
					
						
							|  |  |  | 			//8 bits will do
 | 
					
						
							|  |  |  | 			Write8(0xEB); | 
					
						
							|  |  |  | 			Write8((u8)(s8)distance); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			s32 distance = (s32)(fn - ((u64)code + 5)); //TODO - sanity check
 | 
					
						
							|  |  |  | 			Write8(0xE9); | 
					
						
							|  |  |  | 			Write32((u32)(s32)distance); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void JMPptr(const OpArg &arg2) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		OpArg arg = arg2; | 
					
						
							|  |  |  | 		if (arg.IsImm()) _assert_msg_(DYNA_REC, 0, "JMPptr - Imm argument"); | 
					
						
							|  |  |  | 		arg.operandReg = 4; | 
					
						
							|  |  |  | 		arg.WriteRex(false); | 
					
						
							|  |  |  | 		Write8(0xFF); | 
					
						
							|  |  |  | 		arg.WriteRest(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	//Can be used to trap other processors, before overwriting their code
 | 
					
						
							|  |  |  | 	// not used in dolphin
 | 
					
						
							|  |  |  | 	void JMPself() | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		Write8(0xEB); | 
					
						
							|  |  |  | 		Write8(0xFE); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void CALLptr(OpArg arg) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (arg.IsImm()) _assert_msg_(DYNA_REC, 0, "CALLptr - Imm argument"); | 
					
						
							|  |  |  | 		arg.operandReg = 2; | 
					
						
							|  |  |  | 		arg.WriteRex(false); | 
					
						
							|  |  |  | 		Write8(0xFF); | 
					
						
							|  |  |  | 		arg.WriteRest(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void CALL(void *fnptr) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		u64 distance = u64(fnptr) - (u64(code) + 5); | 
					
						
							|  |  |  | 		if (distance >= 0x0000000080000000ULL | 
					
						
							|  |  |  | 		 && distance <  0xFFFFFFFF80000000ULL) { | 
					
						
							|  |  |  | 			PanicAlert("CALL out of range (%p calls %p)", code, fnptr); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		Write8(0xE8); | 
					
						
							|  |  |  | 		Write32(u32(distance)); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	FixupBranch J(bool force5bytes) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		FixupBranch branch; | 
					
						
							|  |  |  | 		branch.type = force5bytes ? 1 : 0; | 
					
						
							|  |  |  | 		branch.ptr = code + (force5bytes ? 5 : 2); | 
					
						
							|  |  |  | 		if (!force5bytes) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			//8 bits will do
 | 
					
						
							|  |  |  | 			Write8(0xEB); | 
					
						
							|  |  |  | 			Write8(0); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Write8(0xE9); | 
					
						
							|  |  |  | 			Write32(0); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		return branch; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // These are to be used with Jcc only.
 | 
					
						
							|  |  |  | 	// Found in intel manual 2A 
 | 
					
						
							|  |  |  | 	// These do not really make a difference for any current X86 CPU, 
 | 
					
						
							|  |  |  | 	// but are provided here for future use
 | 
					
						
							|  |  |  | 	void HINT_NOT_TAKEN() { if (enableBranchHints) Write8(0x2E); } | 
					
						
							|  |  |  | 	void HINT_TAKEN()     { if (enableBranchHints) Write8(0x3E); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	FixupBranch J_CC(CCFlags conditionCode, bool force5bytes) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		FixupBranch branch; | 
					
						
							|  |  |  | 		branch.type = force5bytes ? 1 : 0; | 
					
						
							|  |  |  | 		branch.ptr = code + (force5bytes ? 5 : 2); | 
					
						
							|  |  |  | 		if (!force5bytes) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			//8 bits will do
 | 
					
						
							|  |  |  | 			Write8(0x70 + conditionCode); | 
					
						
							|  |  |  | 			Write8(0); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Write8(0x0F); | 
					
						
							|  |  |  | 			Write8(0x80 + conditionCode); | 
					
						
							|  |  |  | 			Write32(0); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		return branch; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		u64 fn = (u64)addr; | 
					
						
							|  |  |  | 		if (!force5Bytes) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			s32 distance = (s32)(fn - ((u64)code + 2)); //TODO - sanity check
 | 
					
						
							|  |  |  | 			//8 bits will do
 | 
					
						
							|  |  |  | 			Write8(0x70 + conditionCode); | 
					
						
							|  |  |  | 			Write8((u8)(s8)distance); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			s32 distance = (s32)(fn - ((u64)code + 6)); //TODO - sanity check
 | 
					
						
							|  |  |  | 			Write8(0x0F); | 
					
						
							|  |  |  | 			Write8(0x80 + conditionCode); | 
					
						
							|  |  |  | 			Write32((u32)(s32)distance); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void SetJumpTarget(const FixupBranch &branch) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (branch.type == 0) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			branch.ptr[-1] = (u8)(s8)(code - branch.ptr); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else if (branch.type == 1) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			((s32*)branch.ptr)[-1] = (s32)(code - branch.ptr); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  | 	void INC(int bits, OpArg arg) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (arg.IsImm()) _assert_msg_(DYNA_REC, 0, "INC - Imm argument"); | 
					
						
							|  |  |  | 		arg.operandReg = 0; | 
					
						
							|  |  |  | 		if (bits == 16) {Write8(0x66);} | 
					
						
							|  |  |  | 		arg.WriteRex(bits == 64); | 
					
						
							|  |  |  | 		Write8(bits == 8 ? 0xFE : 0xFF); | 
					
						
							|  |  |  | 		arg.WriteRest(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	void DEC(int bits, OpArg arg) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (arg.IsImm()) _assert_msg_(DYNA_REC, 0, "DEC - Imm argument"); | 
					
						
							|  |  |  | 		arg.operandReg = 1; | 
					
						
							|  |  |  | 		if (bits == 16) {Write8(0x66);} | 
					
						
							|  |  |  | 		arg.WriteRex(bits == 64); | 
					
						
							|  |  |  | 		Write8(bits == 8 ? 0xFE : 0xFF); | 
					
						
							|  |  |  | 		arg.WriteRest(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	//Single byte opcodes
 | 
					
						
							|  |  |  | 	//There is no PUSHAD/POPAD in 64-bit mode.
 | 
					
						
							|  |  |  | 	void INT3() {Write8(0xCC);} | 
					
						
							|  |  |  | 	void RET()  {Write8(0xC3);} | 
					
						
							|  |  |  | 	void RET_FAST()  {Write8(0xF3); Write8(0xC3);} //two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a ret
 | 
					
						
							|  |  |  | 	 | 
					
						
							|  |  |  | 	void NOP(int count) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		// TODO: look up the fastest nop sleds for various sizes
 | 
					
						
							|  |  |  | 		int i; | 
					
						
							|  |  |  | 		switch (count) { | 
					
						
							|  |  |  | 		case 1: | 
					
						
							|  |  |  | 			Write8(0x90); | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		case 2: | 
					
						
							|  |  |  | 			Write8(0x66); | 
					
						
							|  |  |  | 			Write8(0x90); | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		default: | 
					
						
							|  |  |  | 			for (i = 0; i < count; i++) { | 
					
						
							|  |  |  | 				Write8(0x90); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	}  | 
					
						
							|  |  |  | 	 | 
					
						
							|  |  |  | 	void PAUSE() {Write8(0xF3); NOP();} //use in tight spinloops for energy saving on some cpu
 | 
					
						
							|  |  |  | 	void CLC()  {Write8(0xF8);} //clear carry
 | 
					
						
							|  |  |  | 	void CMC()  {Write8(0xF5);} //flip carry
 | 
					
						
							|  |  |  | 	void STC()  {Write8(0xF9);} //set carry
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	//TODO: xchg ah, al ???
 | 
					
						
							|  |  |  | 	void XCHG_AHAL() | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		Write8(0x86); | 
					
						
							|  |  |  | 		Write8(0xe0); | 
					
						
							|  |  |  | 		// alt. 86 c4
 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	//These two can not be executed on early Intel 64-bit CPU:s, only on AMD!
 | 
					
						
							|  |  |  | 	void LAHF() {Write8(0x9F);} | 
					
						
							|  |  |  | 	void SAHF() {Write8(0x9E);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PUSHF() {Write8(0x9C);} | 
					
						
							|  |  |  | 	void POPF()  {Write8(0x9D);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void LFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xE8);} | 
					
						
							|  |  |  | 	void MFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF0);} | 
					
						
							|  |  |  | 	void SFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF8);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void WriteSimple1Byte(int bits, u8 byte, X64Reg reg) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (bits == 16) {Write8(0x66);} | 
					
						
							|  |  |  | 		Rex(bits == 64, 0, 0, (int)reg >> 3); | 
					
						
							|  |  |  | 		Write8(byte + ((int)reg & 7)); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (bits == 16) {Write8(0x66);} | 
					
						
							|  |  |  | 		Rex(bits==64, 0, 0, (int)reg >> 3); | 
					
						
							|  |  |  | 		Write8(byte1); | 
					
						
							|  |  |  | 		Write8(byte2 + ((int)reg & 7)); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void CWD(int bits) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (bits == 16) {Write8(0x66);} | 
					
						
							|  |  |  | 		Rex(bits == 64, 0, 0, 0); | 
					
						
							|  |  |  | 		Write8(0x99); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void CBW(int bits) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (bits == 8) {Write8(0x66);} | 
					
						
							|  |  |  | 		Rex(bits == 32, 0, 0, 0); | 
					
						
							|  |  |  | 		Write8(0x98); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	//Simple opcodes
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	//push/pop do not need wide to be 64-bit
 | 
					
						
							|  |  |  | 	void PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);} | 
					
						
							|  |  |  | 	void POP(X64Reg reg)  {WriteSimple1Byte(32, 0x58, reg);} | 
					
						
							|  |  |  | 	 | 
					
						
							|  |  |  | 	void PUSH(int bits, const OpArg ®)  | 
					
						
							|  |  |  | 	{  | 
					
						
							|  |  |  | 		if (reg.IsSimpleReg()) | 
					
						
							|  |  |  | 			PUSH(reg.GetSimpleReg()); | 
					
						
							|  |  |  | 		else if (reg.IsImm()) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			switch (reg.GetImmBits()) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 			case 8: | 
					
						
							|  |  |  | 				Write8(0x6A); | 
					
						
							|  |  |  | 				Write8((u8)(s8)reg.offset); | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			case 16: | 
					
						
							|  |  |  | 				Write8(0x66); | 
					
						
							|  |  |  | 				Write8(0x68); | 
					
						
							|  |  |  | 				Write16((u16)(s16)(s32)reg.offset); | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			case 32: | 
					
						
							|  |  |  | 				Write8(0x68); | 
					
						
							|  |  |  | 				Write32((u32)reg.offset); | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			default: | 
					
						
							|  |  |  | 				_assert_msg_(DYNA_REC, 0, "PUSH - Bad imm bits"); | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			//INT3();
 | 
					
						
							|  |  |  | 			if (bits == 16) | 
					
						
							|  |  |  | 				Write8(0x66); | 
					
						
							|  |  |  | 			reg.WriteRex(bits == 64); | 
					
						
							|  |  |  | 			Write8(0xFF); | 
					
						
							|  |  |  | 			reg.WriteRest(0,(X64Reg)6); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void POP(int /*bits*/, const OpArg ®) | 
					
						
							|  |  |  | 	{  | 
					
						
							|  |  |  | 		if (reg.IsSimpleReg()) | 
					
						
							|  |  |  | 			POP(reg.GetSimpleReg()); | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 			INT3(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void BSWAP(int bits, X64Reg reg) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (bits >= 32) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			WriteSimple2Byte(bits, 0x0F, 0xC8, reg); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else if (bits == 16) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			//fake 16-bit bswap, TODO replace with xchg ah, al where appropriate
 | 
					
						
							|  |  |  | 			WriteSimple2Byte(false, 0x0F, 0xC8, reg); | 
					
						
							|  |  |  | 			SHR(32, R(reg), Imm8(16)); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else if (bits == 8) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			_assert_msg_(DYNA_REC, 0, "BSWAP - Wrong number of bits"); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Undefined opcode - reserved
 | 
					
						
							|  |  |  | 	// If we ever need a way to always cause a non-breakpoint hard exception...
 | 
					
						
							|  |  |  | 	void UD2() | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		Write8(0x0F); | 
					
						
							|  |  |  | 		Write8(0x0B); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PREFETCH(PrefetchLevel level, OpArg arg) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (arg.IsImm()) _assert_msg_(DYNA_REC, 0, "PREFETCH - Imm argument");; | 
					
						
							|  |  |  | 		arg.operandReg = (u8)level; | 
					
						
							|  |  |  | 		arg.WriteRex(false); | 
					
						
							|  |  |  | 		Write8(0x0F); | 
					
						
							|  |  |  | 		Write8(0x18); | 
					
						
							|  |  |  | 		arg.WriteRest(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void SETcc(CCFlags flag, OpArg dest) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (dest.IsImm()) _assert_msg_(DYNA_REC, 0, "SETcc - Imm argument"); | 
					
						
							|  |  |  | 		dest.operandReg = 0; | 
					
						
							|  |  |  | 		dest.WriteRex(false); | 
					
						
							|  |  |  | 		Write8(0x0F); | 
					
						
							|  |  |  | 		Write8(0x90 + (u8)flag); | 
					
						
							|  |  |  | 		dest.WriteRest(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "CMOVcc - Imm argument"); | 
					
						
							|  |  |  | 		src.operandReg = dest; | 
					
						
							|  |  |  | 		src.WriteRex(bits == 64); | 
					
						
							|  |  |  | 		Write8(0x0F); | 
					
						
							|  |  |  | 		Write8(0x40 + (u8)flag); | 
					
						
							|  |  |  | 		src.WriteRest(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void WriteMulDivType(int bits, OpArg src, int ext) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "WriteMulDivType - Imm argument"); | 
					
						
							|  |  |  | 		src.operandReg = ext; | 
					
						
							|  |  |  | 		if (bits == 16) Write8(0x66); | 
					
						
							|  |  |  | 		src.WriteRex(bits == 64); | 
					
						
							|  |  |  | 		if (bits == 8) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Write8(0xF6); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else  | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Write8(0xF7); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		src.WriteRest(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void MUL(int bits, OpArg src)  {WriteMulDivType(bits, src, 4);} | 
					
						
							|  |  |  | 	void DIV(int bits, OpArg src)  {WriteMulDivType(bits, src, 6);} | 
					
						
							|  |  |  | 	void IMUL(int bits, OpArg src) {WriteMulDivType(bits, src, 5);} | 
					
						
							|  |  |  | 	void IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);} | 
					
						
							|  |  |  | 	void NEG(int bits, OpArg src)  {WriteMulDivType(bits, src, 3);} | 
					
						
							|  |  |  | 	void NOT(int bits, OpArg src)  {WriteMulDivType(bits, src, 2);} | 
					
						
							|  |  |  | 	 | 
					
						
							|  |  |  | 	void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "WriteBitSearchType - Imm argument"); | 
					
						
							|  |  |  | 		src.operandReg = (u8)dest; | 
					
						
							|  |  |  | 		if (bits == 16) Write8(0x66); | 
					
						
							|  |  |  | 		src.WriteRex(bits == 64); | 
					
						
							|  |  |  | 		Write8(0x0F); | 
					
						
							|  |  |  | 		Write8(byte2); | 
					
						
							|  |  |  | 		src.WriteRest(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void MOVNTI(int bits, OpArg dest, X64Reg src) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (bits <= 16) _assert_msg_(DYNA_REC, 0, "MOVNTI - bits<=16"); | 
					
						
							|  |  |  | 		WriteBitSearchType(bits, src, dest, 0xC3); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit
 | 
					
						
							|  |  |  | 	void BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "MOVSX - Imm argument"); | 
					
						
							|  |  |  | 		if (dbits == sbits) { | 
					
						
							|  |  |  | 			MOV(dbits, R(dest), src); | 
					
						
							|  |  |  | 			return; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		src.operandReg = (u8)dest; | 
					
						
							|  |  |  | 		if (dbits == 16) Write8(0x66); | 
					
						
							|  |  |  | 		src.WriteRex(dbits == 64); | 
					
						
							|  |  |  | 		if (sbits == 8) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Write8(0x0F); | 
					
						
							|  |  |  | 			Write8(0xBE); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else if (sbits == 16) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Write8(0x0F); | 
					
						
							|  |  |  | 			Write8(0xBF); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else if (sbits == 32 && dbits == 64) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Write8(0x63); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Crash(); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		src.WriteRest(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "MOVZX - Imm argument"); | 
					
						
							|  |  |  | 		if (dbits == sbits) { | 
					
						
							|  |  |  | 			MOV(dbits, R(dest), src); | 
					
						
							|  |  |  | 			return; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		src.operandReg = (u8)dest; | 
					
						
							|  |  |  | 		if (dbits == 16) Write8(0x66); | 
					
						
							|  |  |  | 		src.WriteRex(dbits == 64); | 
					
						
							|  |  |  | 		if (sbits == 8) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Write8(0x0F); | 
					
						
							|  |  |  | 			Write8(0xB6); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else if (sbits == 16) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Write8(0x0F); | 
					
						
							|  |  |  | 			Write8(0xB7); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Crash(); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		src.WriteRest(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void LEA(int bits, X64Reg dest, OpArg src) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "LEA - Imm argument"); | 
					
						
							|  |  |  | 		src.operandReg = (u8)dest; | 
					
						
							|  |  |  | 		if (bits == 16) Write8(0x66); //TODO: performance warning
 | 
					
						
							|  |  |  | 		src.WriteRex(bits == 64); | 
					
						
							|  |  |  | 		Write8(0x8D); | 
					
						
							|  |  |  | 		src.WriteRest(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	//shift can be either imm8 or cl
 | 
					
						
							|  |  |  | 	void WriteShift(int bits, OpArg dest, OpArg &shift, int ext) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		bool writeImm = false; | 
					
						
							|  |  |  | 		if (dest.IsImm()) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			_assert_msg_(DYNA_REC, 0, "WriteShift - can't shift imms"); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			_assert_msg_(DYNA_REC, 0, "WriteShift - illegal argument");  | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		dest.operandReg = ext; | 
					
						
							|  |  |  | 		if (bits == 16) Write8(0x66); | 
					
						
							|  |  |  | 		dest.WriteRex(bits == 64); | 
					
						
							|  |  |  | 		if (shift.GetImmBits() == 8) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			//ok an imm
 | 
					
						
							|  |  |  | 			u8 imm = (u8)shift.offset; | 
					
						
							|  |  |  | 			if (imm == 1) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				Write8(bits == 8 ? 0xD0 : 0xD1); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				writeImm = true; | 
					
						
							|  |  |  | 				Write8(bits == 8 ? 0xC0 : 0xC1); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Write8(bits == 8 ? 0xD2 : 0xD3); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		dest.WriteRest(writeImm ? 1 : 0); | 
					
						
							|  |  |  | 		if (writeImm) | 
					
						
							|  |  |  | 			Write8((u8)shift.offset); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// large rotates and shift are slower on intel than amd
 | 
					
						
							|  |  |  | 	// intel likes to rotate by 1, and the op is smaller too
 | 
					
						
							|  |  |  | 	void ROL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 0);} | 
					
						
							|  |  |  | 	void ROR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 1);} | 
					
						
							|  |  |  | 	void RCL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 2);} | 
					
						
							|  |  |  | 	void RCR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 3);} | 
					
						
							|  |  |  | 	void SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 4);} | 
					
						
							|  |  |  | 	void SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);} | 
					
						
							|  |  |  | 	void SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void OpArg::WriteSingleByteOp(u8 op, X64Reg _operandReg, int bits) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (bits == 16) | 
					
						
							|  |  |  | 			Write8(0x66); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		this->operandReg = (u8)_operandReg; | 
					
						
							|  |  |  | 		WriteRex(bits == 64); | 
					
						
							|  |  |  | 		Write8(op); | 
					
						
							|  |  |  | 		WriteRest(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	//todo : add eax special casing
 | 
					
						
							|  |  |  | 	struct NormalOpDef | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, ext; | 
					
						
							|  |  |  | 	}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	const NormalOpDef nops[11] =  | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		{0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0}, //ADD
 | 
					
						
							|  |  |  | 		{0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 2}, //ADC
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		{0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 5}, //SUB
 | 
					
						
							|  |  |  | 		{0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 3}, //SBB
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		{0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 4}, //AND
 | 
					
						
							|  |  |  | 		{0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 1}, //OR
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		{0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 6}, //XOR
 | 
					
						
							|  |  |  | 		{0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0}, //MOV
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		{0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0}, //TEST (to == from)
 | 
					
						
							|  |  |  | 		{0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 7}, //CMP
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		{0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 7}, //XCHG
 | 
					
						
							|  |  |  | 	}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	//operand can either be immediate or register
 | 
					
						
							|  |  |  | 	void OpArg::WriteNormalOp(bool toRM, NormalOp op, const OpArg &operand, int bits) const | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		X64Reg _operandReg = (X64Reg)this->operandReg; | 
					
						
							|  |  |  | 		if (IsImm()) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			_assert_msg_(DYNA_REC, 0, "WriteNormalOp - Imm argument, wrong order"); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (bits == 16) | 
					
						
							|  |  |  | 			Write8(0x66); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		int immToWrite = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (operand.IsImm()) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			_operandReg = (X64Reg)0; | 
					
						
							|  |  |  | 			WriteRex(bits == 64); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			if (!toRM) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				_assert_msg_(DYNA_REC, 0, "WriteNormalOp - Writing to Imm (!toRM)"); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			if (operand.scale == SCALE_IMM8 && bits == 8)  | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				Write8(nops[op].imm8); | 
					
						
							|  |  |  | 				_assert_msg_(DYNA_REC, code[-1] != 0xCC, "ARGH1"); | 
					
						
							|  |  |  | 				immToWrite = 8; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else if ((operand.scale == SCALE_IMM16 && bits == 16) || | 
					
						
							|  |  |  |                                  (operand.scale == SCALE_IMM32 && bits == 32) ||  | 
					
						
							|  |  |  |                                  (operand.scale == SCALE_IMM32 && bits == 64)) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				Write8(nops[op].imm32); | 
					
						
							|  |  |  | 				_assert_msg_(DYNA_REC, code[-1] != 0xCC, "ARGH2"); | 
					
						
							|  |  |  | 				immToWrite = 32; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else if ((operand.scale == SCALE_IMM8 && bits == 16) || | 
					
						
							|  |  |  |                                  (operand.scale == SCALE_IMM8 && bits == 32) || | 
					
						
							|  |  |  |                                  (operand.scale == SCALE_IMM8 && bits == 64)) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				Write8(nops[op].simm8); | 
					
						
							|  |  |  | 				_assert_msg_(DYNA_REC, code[-1] != 0xCC, "ARGH3"); | 
					
						
							|  |  |  | 				immToWrite = 8; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else if (operand.scale == SCALE_IMM64 && bits == 64) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				if (op == nrmMOV) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					Write8(0xB8 + (offsetOrBaseReg & 7)); | 
					
						
							|  |  |  | 					Write64((u64)operand.offset); | 
					
						
							|  |  |  | 					return; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				_assert_msg_(DYNA_REC, 0, "WriteNormalOp - Only MOV can take 64-bit imm"); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				_assert_msg_(DYNA_REC, 0, "WriteNormalOp - Unhandled case"); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			_operandReg = (X64Reg)nops[op].ext; //pass extension in REG of ModRM
 | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			_operandReg = (X64Reg)operand.offsetOrBaseReg; | 
					
						
							|  |  |  | 			WriteRex(bits == 64, _operandReg); | 
					
						
							|  |  |  | 			// mem/reg or reg/reg op
 | 
					
						
							|  |  |  | 			if (toRM) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				Write8(bits == 8 ? nops[op].toRm8 : nops[op].toRm32); | 
					
						
							|  |  |  | 				_assert_msg_(DYNA_REC, code[-1] != 0xCC, "ARGH4"); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				Write8(bits == 8 ? nops[op].fromRm8 : nops[op].fromRm32); | 
					
						
							|  |  |  | 				_assert_msg_(DYNA_REC, code[-1] != 0xCC, "ARGH5"); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		WriteRest(immToWrite>>3, _operandReg); | 
					
						
							|  |  |  | 		switch (immToWrite) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 		case 0: | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		case 8: | 
					
						
							|  |  |  | 			Write8((u8)operand.offset); | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		case 32: | 
					
						
							|  |  |  | 			Write32((u32)operand.offset); | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		default: | 
					
						
							|  |  |  | 			_assert_msg_(DYNA_REC, 0, "WriteNormalOp - Unhandled case"); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void WriteNormalOp(int bits, NormalOp op, const OpArg &a1, const OpArg &a2) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (a1.IsImm()) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			//Booh! Can't write to an imm
 | 
					
						
							|  |  |  | 			_assert_msg_(DYNA_REC, 0, "WriteNormalOp - a1 cannot be imm"); | 
					
						
							|  |  |  | 			return; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if (a2.IsImm()) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			a1.WriteNormalOp(true, op, a2, bits); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			if (a1.IsSimpleReg()) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				a2.WriteNormalOp(false, op, a1, bits); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				a1.WriteNormalOp(true, op, a2, bits); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void ADD (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmADD, a1, a2);} | 
					
						
							|  |  |  | 	void ADC (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmADC, a1, a2);} | 
					
						
							|  |  |  | 	void SUB (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmSUB, a1, a2);} | 
					
						
							|  |  |  | 	void SBB (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmSBB, a1, a2);} | 
					
						
							|  |  |  | 	void AND (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmAND, a1, a2);} | 
					
						
							|  |  |  | 	void OR  (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmOR , a1, a2);} | 
					
						
							|  |  |  | 	void XOR (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmXOR, a1, a2);} | 
					
						
							|  |  |  | 	void MOV (int bits, const OpArg &a1, const OpArg &a2)  | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		_assert_msg_(DYNA_REC, !a1.IsSimpleReg() || !a2.IsSimpleReg() || a1.GetSimpleReg() != a2.GetSimpleReg(), "Redundant MOV @ %p",  | 
					
						
							|  |  |  | 					 code);  | 
					
						
							|  |  |  | 		WriteNormalOp(bits, nrmMOV, a1, a2); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	void TEST(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmTEST, a1, a2);} | 
					
						
							|  |  |  | 	void CMP (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmCMP, a1, a2);} | 
					
						
							|  |  |  | 	void XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmXCHG, a1, a2);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (bits == 8) { | 
					
						
							|  |  |  | 			_assert_msg_(DYNA_REC, 0, "IMUL - illegal bit size!"); | 
					
						
							|  |  |  | 			return; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if (a1.IsImm()) { | 
					
						
							|  |  |  | 			_assert_msg_(DYNA_REC, 0, "IMUL - second arg cannot be imm!"); | 
					
						
							|  |  |  | 			return; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if (!a2.IsImm()) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			_assert_msg_(DYNA_REC, 0, "IMUL - third arg must be imm!"); | 
					
						
							|  |  |  | 			return; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (bits == 16) | 
					
						
							|  |  |  | 			Write8(0x66); | 
					
						
							|  |  |  | 		a1.WriteRex(bits == 64, regOp); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (a2.GetImmBits() == 8) { | 
					
						
							|  |  |  | 			Write8(0x6B); | 
					
						
							|  |  |  | 			a1.WriteRest(1, regOp); | 
					
						
							|  |  |  | 			Write8((u8)a2.offset); | 
					
						
							|  |  |  | 		} else { | 
					
						
							|  |  |  | 			Write8(0x69); | 
					
						
							|  |  |  | 			if (a2.GetImmBits() == 16 && bits == 16) { | 
					
						
							|  |  |  | 				a1.WriteRest(2, regOp); | 
					
						
							|  |  |  | 				Write16((u16)a2.offset); | 
					
						
							|  |  |  | 			} else if (a2.GetImmBits() == 32 && | 
					
						
							|  |  |  | 				(bits == 32 || bits == 64)) { | 
					
						
							|  |  |  | 					a1.WriteRest(4, regOp); | 
					
						
							|  |  |  | 					Write32((u32)a2.offset); | 
					
						
							|  |  |  | 			} else { | 
					
						
							|  |  |  | 				_assert_msg_(DYNA_REC, 0, "IMUL - unhandled case!"); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void IMUL(int bits, X64Reg regOp, OpArg a) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (bits == 8) { | 
					
						
							|  |  |  | 			_assert_msg_(DYNA_REC, 0, "IMUL - illegal bit size!"); | 
					
						
							|  |  |  | 			return; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if (a.IsImm()) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			IMUL(bits, regOp, R(regOp), a) ; | 
					
						
							|  |  |  | 			return ; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (bits == 16) | 
					
						
							|  |  |  | 			Write8(0x66); | 
					
						
							|  |  |  | 		a.WriteRex(bits == 64, regOp); | 
					
						
							|  |  |  | 		Write8(0x0F); | 
					
						
							|  |  |  | 		Write8(0xAF); | 
					
						
							|  |  |  | 		a.WriteRest(0, regOp); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void WriteSSEOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (size == 64 && packed) | 
					
						
							|  |  |  | 			Write8(0x66); //this time, override goes upwards
 | 
					
						
							|  |  |  | 		if (!packed) | 
					
						
							|  |  |  | 			Write8(size == 64 ? 0xF2 : 0xF3); | 
					
						
							|  |  |  | 		arg.operandReg = regOp; | 
					
						
							|  |  |  | 		arg.WriteRex(false); | 
					
						
							|  |  |  | 		Write8(0x0F); | 
					
						
							|  |  |  | 		Write8(sseOp); | 
					
						
							|  |  |  | 		arg.WriteRest(extrabytes); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x6E, true, dest, arg, 0);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void MOVQ_xmm(X64Reg dest, OpArg arg) { | 
					
						
							|  |  |  | #ifdef _M_X64
 | 
					
						
							|  |  |  | 			// Alternate encoding
 | 
					
						
							|  |  |  | 			// This does not display correctly in MSVC's debugger, it thinks it's a MOVD
 | 
					
						
							|  |  |  | 			arg.operandReg = dest; | 
					
						
							|  |  |  | 			Write8(0x66); | 
					
						
							|  |  |  | 			arg.WriteRex(true); | 
					
						
							|  |  |  | 			Write8(0x0f); | 
					
						
							|  |  |  | 			Write8(0x6E); | 
					
						
							|  |  |  | 			arg.WriteRest(0); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | 			arg.operandReg = dest; | 
					
						
							|  |  |  | 			Write8(0xF3); | 
					
						
							|  |  |  | 			Write8(0x0f); | 
					
						
							|  |  |  | 			Write8(0x7E); | 
					
						
							|  |  |  | 			arg.WriteRest(0); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(64, 0x7E, true, src, arg, 0);} | 
					
						
							|  |  |  | 	void MOVQ_xmm(OpArg arg, X64Reg src) { | 
					
						
							|  |  |  | 		if (src > 7) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			// Alternate encoding
 | 
					
						
							|  |  |  | 			// This does not display correctly in MSVC's debugger, it thinks it's a MOVD
 | 
					
						
							|  |  |  | 			arg.operandReg = src; | 
					
						
							|  |  |  | 			Write8(0x66); | 
					
						
							|  |  |  | 			arg.WriteRex(true); | 
					
						
							|  |  |  | 			Write8(0x0f); | 
					
						
							|  |  |  | 			Write8(0x7E); | 
					
						
							|  |  |  | 			arg.WriteRest(0); | 
					
						
							|  |  |  | 		} else { | 
					
						
							|  |  |  | 			// INT3();
 | 
					
						
							|  |  |  | 			arg.operandReg = src; | 
					
						
							|  |  |  | 			arg.WriteRex(false); | 
					
						
							|  |  |  | 			Write8(0x66); | 
					
						
							|  |  |  | 			Write8(0x0f); | 
					
						
							|  |  |  | 			Write8(0xD6); | 
					
						
							|  |  |  | 			arg.WriteRest(0); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void WriteMXCSR(OpArg arg, int ext) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (arg.IsImm() || arg.IsSimpleReg())  | 
					
						
							|  |  |  | 			_assert_msg_(DYNA_REC, 0, "MXCSR - invalid operand"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		arg.operandReg = ext; | 
					
						
							|  |  |  | 		arg.WriteRex(false); | 
					
						
							|  |  |  | 		Write8(0x0F); | 
					
						
							|  |  |  | 		Write8(0xAE); | 
					
						
							|  |  |  | 		arg.WriteRest(); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	enum NormalSSEOps | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		sseCMP =         0xC2,  | 
					
						
							|  |  |  | 		sseADD =         0x58, //ADD
 | 
					
						
							|  |  |  | 		sseSUB =		 0x5C, //SUB
 | 
					
						
							|  |  |  | 		sseAND =		 0x54, //AND
 | 
					
						
							|  |  |  | 		sseANDN =		 0x55, //ANDN
 | 
					
						
							|  |  |  | 		sseOR  =         0x56,  | 
					
						
							|  |  |  | 		sseXOR  =        0x57, | 
					
						
							|  |  |  | 		sseMUL =		 0x59, //MUL,
 | 
					
						
							|  |  |  | 		sseDIV =		 0x5E, //DIV
 | 
					
						
							|  |  |  | 		sseMIN =		 0x5D, //MIN
 | 
					
						
							|  |  |  | 		sseMAX =		 0x5F, //MAX
 | 
					
						
							|  |  |  | 		sseCOMIS =		 0x2F, //COMIS
 | 
					
						
							|  |  |  | 		sseUCOMIS =		 0x2E, //UCOMIS
 | 
					
						
							|  |  |  | 		sseSQRT =		 0x51, //SQRT
 | 
					
						
							|  |  |  | 		sseRSQRT =		 0x52, //RSQRT (NO DOUBLE PRECISION!!!)
 | 
					
						
							|  |  |  | 		sseMOVAPfromRM = 0x28, //MOVAP from RM
 | 
					
						
							|  |  |  | 		sseMOVAPtoRM =	 0x29, //MOVAP to RM
 | 
					
						
							|  |  |  | 		sseMOVUPfromRM = 0x10, //MOVUP from RM
 | 
					
						
							|  |  |  | 		sseMOVUPtoRM =	 0x11, //MOVUP to RM
 | 
					
						
							|  |  |  | 		sseMASKMOVDQU =  0xF7, | 
					
						
							|  |  |  | 		sseLDDQU      =  0xF0, | 
					
						
							|  |  |  | 		sseSHUF       =  0xC6, | 
					
						
							|  |  |  | 		sseMOVNTDQ     = 0xE7, | 
					
						
							|  |  |  | 		sseMOVNTP      = 0x2B, | 
					
						
							|  |  |  | 	}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);} | 
					
						
							|  |  |  | 	void LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void MOVNTDQ(OpArg arg, X64Reg regOp)   {WriteSSEOp(64, sseMOVNTDQ, true, regOp, arg);} | 
					
						
							|  |  |  | 	void MOVNTPS(OpArg arg, X64Reg regOp)   {WriteSSEOp(32, sseMOVNTP, true, regOp, arg);} | 
					
						
							|  |  |  | 	void MOVNTPD(OpArg arg, X64Reg regOp)   {WriteSSEOp(64, sseMOVNTP, true, regOp, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void ADDSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseADD, false, regOp, arg);} | 
					
						
							|  |  |  | 	void ADDSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseADD, false, regOp, arg);} | 
					
						
							|  |  |  | 	void SUBSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseSUB, false, regOp, arg);} | 
					
						
							|  |  |  | 	void SUBSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseSUB, false, regOp, arg);} | 
					
						
							|  |  |  | 	void CMPSS(X64Reg regOp, OpArg arg, u8 compare)   {WriteSSEOp(32, sseCMP, false, regOp, arg,1); Write8(compare);} | 
					
						
							|  |  |  | 	void CMPSD(X64Reg regOp, OpArg arg, u8 compare)   {WriteSSEOp(64, sseCMP, false, regOp, arg,1); Write8(compare);} | 
					
						
							|  |  |  | 	void MULSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseMUL, false, regOp, arg);} | 
					
						
							|  |  |  | 	void MULSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseMUL, false, regOp, arg);} | 
					
						
							|  |  |  | 	void DIVSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseDIV, false, regOp, arg);} | 
					
						
							|  |  |  | 	void DIVSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseDIV, false, regOp, arg);} | 
					
						
							|  |  |  | 	void MINSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseMIN, false, regOp, arg);} | 
					
						
							|  |  |  | 	void MINSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseMIN, false, regOp, arg);} | 
					
						
							|  |  |  | 	void MAXSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseMAX, false, regOp, arg);} | 
					
						
							|  |  |  | 	void MAXSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseMAX, false, regOp, arg);} | 
					
						
							|  |  |  | 	void SQRTSS(X64Reg regOp, OpArg arg)  {WriteSSEOp(32, sseSQRT, false, regOp, arg);} | 
					
						
							|  |  |  | 	void SQRTSD(X64Reg regOp, OpArg arg)  {WriteSSEOp(64, sseSQRT, false, regOp, arg);} | 
					
						
							|  |  |  | 	void RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseRSQRT, false, regOp, arg);} | 
					
						
							|  |  |  | 	void RSQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseRSQRT, false, regOp, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void ADDPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseADD, true, regOp, arg);} | 
					
						
							|  |  |  | 	void ADDPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseADD, true, regOp, arg);} | 
					
						
							|  |  |  | 	void SUBPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseSUB, true, regOp, arg);} | 
					
						
							|  |  |  | 	void SUBPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseSUB, true, regOp, arg);} | 
					
						
							|  |  |  | 	void CMPPS(X64Reg regOp, OpArg arg, u8 compare)   {WriteSSEOp(32, sseCMP, true, regOp, arg,1); Write8(compare);} | 
					
						
							|  |  |  | 	void CMPPD(X64Reg regOp, OpArg arg, u8 compare)   {WriteSSEOp(64, sseCMP, true, regOp, arg,1); Write8(compare);} | 
					
						
							|  |  |  | 	void ANDPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseAND, true, regOp, arg);} | 
					
						
							|  |  |  | 	void ANDPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseAND, true, regOp, arg);} | 
					
						
							|  |  |  | 	void ANDNPS(X64Reg regOp, OpArg arg)  {WriteSSEOp(32, sseANDN, true, regOp, arg);} | 
					
						
							|  |  |  | 	void ANDNPD(X64Reg regOp, OpArg arg)  {WriteSSEOp(64, sseANDN, true, regOp, arg);} | 
					
						
							|  |  |  | 	void ORPS(X64Reg regOp, OpArg arg)    {WriteSSEOp(32, sseOR, true, regOp, arg);} | 
					
						
							|  |  |  | 	void ORPD(X64Reg regOp, OpArg arg)    {WriteSSEOp(64, sseOR, true, regOp, arg);} | 
					
						
							|  |  |  | 	void XORPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseXOR, true, regOp, arg);} | 
					
						
							|  |  |  | 	void XORPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseXOR, true, regOp, arg);} | 
					
						
							|  |  |  | 	void MULPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseMUL, true, regOp, arg);} | 
					
						
							|  |  |  | 	void MULPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseMUL, true, regOp, arg);} | 
					
						
							|  |  |  | 	void DIVPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseDIV, true, regOp, arg);} | 
					
						
							|  |  |  | 	void DIVPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseDIV, true, regOp, arg);} | 
					
						
							|  |  |  | 	void MINPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseMIN, true, regOp, arg);} | 
					
						
							|  |  |  | 	void MINPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseMIN, true, regOp, arg);} | 
					
						
							|  |  |  | 	void MAXPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseMAX, true, regOp, arg);} | 
					
						
							|  |  |  | 	void MAXPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseMAX, true, regOp, arg);} | 
					
						
							|  |  |  | 	void SQRTPS(X64Reg regOp, OpArg arg)  {WriteSSEOp(32, sseSQRT, true, regOp, arg);} | 
					
						
							|  |  |  | 	void SQRTPD(X64Reg regOp, OpArg arg)  {WriteSSEOp(64, sseSQRT, true, regOp, arg);} | 
					
						
							|  |  |  | 	void RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseRSQRT, true, regOp, arg);} | 
					
						
							|  |  |  | 	void RSQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseRSQRT, true, regOp, arg);} | 
					
						
							|  |  |  | 	void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(32, sseSHUF, true, regOp, arg,1); Write8(shuffle);}  | 
					
						
							|  |  |  | 	void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(64, sseSHUF, true, regOp, arg,1); Write8(shuffle);}  | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void COMISS(X64Reg regOp, OpArg arg)  {WriteSSEOp(32, sseCOMIS, true, regOp, arg);} //weird that these should be packed
 | 
					
						
							|  |  |  | 	void COMISD(X64Reg regOp, OpArg arg)  {WriteSSEOp(64, sseCOMIS, true, regOp, arg);} //ordered
 | 
					
						
							|  |  |  | 	void UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseUCOMIS, true, regOp, arg);} //unordered
 | 
					
						
							|  |  |  | 	void UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseUCOMIS, true, regOp, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void MOVAPS(X64Reg regOp, OpArg arg)  {WriteSSEOp(32, sseMOVAPfromRM, true, regOp, arg);} | 
					
						
							|  |  |  | 	void MOVAPD(X64Reg regOp, OpArg arg)  {WriteSSEOp(64, sseMOVAPfromRM, true, regOp, arg);} | 
					
						
							|  |  |  | 	void MOVAPS(OpArg arg, X64Reg regOp)  {WriteSSEOp(32, sseMOVAPtoRM, true, regOp, arg);} | 
					
						
							|  |  |  | 	void MOVAPD(OpArg arg, X64Reg regOp)  {WriteSSEOp(64, sseMOVAPtoRM, true, regOp, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void MOVUPS(X64Reg regOp, OpArg arg)  {WriteSSEOp(32, sseMOVUPfromRM, true, regOp, arg);} | 
					
						
							|  |  |  | 	void MOVUPD(X64Reg regOp, OpArg arg)  {WriteSSEOp(64, sseMOVUPfromRM, true, regOp, arg);} | 
					
						
							|  |  |  | 	void MOVUPS(OpArg arg, X64Reg regOp)  {WriteSSEOp(32, sseMOVUPtoRM, true, regOp, arg);} | 
					
						
							|  |  |  | 	void MOVUPD(OpArg arg, X64Reg regOp)  {WriteSSEOp(64, sseMOVUPtoRM, true, regOp, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void MOVSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(32, sseMOVUPfromRM, false, regOp, arg);} | 
					
						
							|  |  |  | 	void MOVSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(64, sseMOVUPfromRM, false, regOp, arg);} | 
					
						
							|  |  |  | 	void MOVSS(OpArg arg, X64Reg regOp)   {WriteSSEOp(32, sseMOVUPtoRM, false, regOp, arg);} | 
					
						
							|  |  |  | 	void MOVSD(OpArg arg, X64Reg regOp)   {WriteSSEOp(64, sseMOVUPtoRM, false, regOp, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0x5A, true, regOp, arg);} | 
					
						
							|  |  |  | 	void CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5A, true, regOp, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5A, false, regOp, arg);} | 
					
						
							|  |  |  | 	void CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0x5A, false, regOp, arg);} | 
					
						
							|  |  |  | 	void CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0xF2, false, regOp, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0xE6, false, regOp, arg);} | 
					
						
							|  |  |  | 	void CVTDQ2PS(X64Reg regOp, const OpArg &arg) {WriteSSEOp(32, 0x5B, true, regOp, arg);} | 
					
						
							|  |  |  | 	void CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0xE6, false, regOp, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void MASKMOVDQU(X64Reg dest, X64Reg src)  {WriteSSEOp(64, sseMASKMOVDQU, true, dest, R(src));} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(32, 0x50, true, dest, arg);} | 
					
						
							|  |  |  | 	void MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x50, true, dest, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void LDDQU(X64Reg dest, OpArg arg)    {WriteSSEOp(64, sseLDDQU, false, dest, arg);} // For integer data only
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x14, true, dest, arg);} | 
					
						
							|  |  |  | 	void UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x15, true, dest, arg);} | 
					
						
							|  |  |  | 	 | 
					
						
							|  |  |  | 	void MOVDDUP(X64Reg regOp, OpArg arg)  | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (cpu_info.bSSE3) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			WriteSSEOp(64, 0x12, false, regOp, arg); //SSE3 movddup
 | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			// Simulate this instruction with SSE2 instructions
 | 
					
						
							|  |  |  | 			if (!arg.IsSimpleReg(regOp)) | 
					
						
							|  |  |  | 				MOVQ_xmm(regOp, arg); | 
					
						
							|  |  |  | 			UNPCKLPD(regOp, R(regOp)); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	//There are a few more left
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Also some integer instrucitons are missing
 | 
					
						
							|  |  |  | 	void PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x6B, true, dest, arg);} | 
					
						
							|  |  |  | 	void PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x63, true, dest, arg);} | 
					
						
							|  |  |  | 	//void PACKUSDW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x66, true, dest, arg);} // WRONG
 | 
					
						
							|  |  |  | 	void PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x67, true, dest, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x60, true, dest, arg);} | 
					
						
							|  |  |  | 	void PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x61, true, dest, arg);} | 
					
						
							|  |  |  | 	void PUNPCKLDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x62, true, dest, arg);} | 
					
						
							|  |  |  | 	//void PUNPCKLQDQ(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x60, true, dest, arg);}
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// WARNING not REX compatible
 | 
					
						
							|  |  |  | 	void PSRAW(X64Reg reg, int shift) { | 
					
						
							|  |  |  | 		if (reg > 7) | 
					
						
							|  |  |  | 			PanicAlert("The PSRAW-emitter does not support regs above 7"); | 
					
						
							|  |  |  | 		Write8(0x66); | 
					
						
							|  |  |  | 		Write8(0x0f); | 
					
						
							|  |  |  | 		Write8(0x71); | 
					
						
							|  |  |  | 		Write8(0xE0 | reg); | 
					
						
							|  |  |  | 		Write8(shift); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	 | 
					
						
							|  |  |  | 	// WARNING not REX compatible
 | 
					
						
							|  |  |  | 	void PSRAD(X64Reg reg, int shift) { | 
					
						
							|  |  |  | 		if (reg > 7) | 
					
						
							|  |  |  | 			PanicAlert("The PSRAD-emitter does not support regs above 7"); | 
					
						
							|  |  |  | 		Write8(0x66); | 
					
						
							|  |  |  | 		Write8(0x0f); | 
					
						
							|  |  |  | 		Write8(0x72); | 
					
						
							|  |  |  | 		Write8(0xE0 | reg); | 
					
						
							|  |  |  | 		Write8(shift); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PSHUFB(X64Reg dest, OpArg arg) { | 
					
						
							|  |  |  | 		if (!cpu_info.bSSSE3) { | 
					
						
							|  |  |  | 			PanicAlert("Trying to use PSHUFB on a system that doesn't support it. Bad programmer."); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		Write8(0x66); | 
					
						
							|  |  |  | 		arg.operandReg = dest; | 
					
						
							|  |  |  | 		arg.WriteRex(false); | 
					
						
							|  |  |  | 		Write8(0x0f); | 
					
						
							|  |  |  | 		Write8(0x38); | 
					
						
							|  |  |  | 		Write8(0x00); | 
					
						
							|  |  |  | 		arg.WriteRest(0); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PAND(X64Reg dest, OpArg arg)     {WriteSSEOp(64, 0xDB, true, dest, arg);} | 
					
						
							|  |  |  | 	void PANDN(X64Reg dest, OpArg arg)    {WriteSSEOp(64, 0xDF, true, dest, arg);} | 
					
						
							|  |  |  | 	void PXOR(X64Reg dest, OpArg arg)     {WriteSSEOp(64, 0xEF, true, dest, arg);} | 
					
						
							|  |  |  | 	void POR(X64Reg dest, OpArg arg)      {WriteSSEOp(64, 0xEB, true, dest, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PADDB(X64Reg dest, OpArg arg)    {WriteSSEOp(64, 0xFC, true, dest, arg);} | 
					
						
							|  |  |  | 	void PADDW(X64Reg dest, OpArg arg)    {WriteSSEOp(64, 0xFD, true, dest, arg);} | 
					
						
							|  |  |  | 	void PADDD(X64Reg dest, OpArg arg)    {WriteSSEOp(64, 0xFE, true, dest, arg);} | 
					
						
							|  |  |  | 	void PADDQ(X64Reg dest, OpArg arg)    {WriteSSEOp(64, 0xD4, true, dest, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PADDSB(X64Reg dest, OpArg arg)   {WriteSSEOp(64, 0xEC, true, dest, arg);} | 
					
						
							|  |  |  | 	void PADDSW(X64Reg dest, OpArg arg)   {WriteSSEOp(64, 0xED, true, dest, arg);} | 
					
						
							|  |  |  | 	void PADDUSB(X64Reg dest, OpArg arg)  {WriteSSEOp(64, 0xDC, true, dest, arg);} | 
					
						
							|  |  |  | 	void PADDUSW(X64Reg dest, OpArg arg)  {WriteSSEOp(64, 0xDD, true, dest, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PSUBB(X64Reg dest, OpArg arg)    {WriteSSEOp(64, 0xF8, true, dest, arg);} | 
					
						
							|  |  |  | 	void PSUBW(X64Reg dest, OpArg arg)    {WriteSSEOp(64, 0xF9, true, dest, arg);} | 
					
						
							|  |  |  | 	void PSUBD(X64Reg dest, OpArg arg)    {WriteSSEOp(64, 0xFA, true, dest, arg);} | 
					
						
							|  |  |  | 	void PSUBQ(X64Reg dest, OpArg arg)    {WriteSSEOp(64, 0xDB, true, dest, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PSUBSB(X64Reg dest, OpArg arg)   {WriteSSEOp(64, 0xE8, true, dest, arg);} | 
					
						
							|  |  |  | 	void PSUBSW(X64Reg dest, OpArg arg)   {WriteSSEOp(64, 0xE9, true, dest, arg);} | 
					
						
							|  |  |  | 	void PSUBUSB(X64Reg dest, OpArg arg)  {WriteSSEOp(64, 0xD8, true, dest, arg);} | 
					
						
							|  |  |  | 	void PSUBUSW(X64Reg dest, OpArg arg)  {WriteSSEOp(64, 0xD9, true, dest, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PAVGB(X64Reg dest, OpArg arg)    {WriteSSEOp(64, 0xE0, true, dest, arg);} | 
					
						
							|  |  |  | 	void PAVGW(X64Reg dest, OpArg arg)    {WriteSSEOp(64, 0xE3, true, dest, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PCMPEQB(X64Reg dest, OpArg arg)  {WriteSSEOp(64, 0x74, true, dest, arg);} | 
					
						
							|  |  |  | 	void PCMPEQW(X64Reg dest, OpArg arg)  {WriteSSEOp(64, 0x75, true, dest, arg);} | 
					
						
							|  |  |  | 	void PCMPEQD(X64Reg dest, OpArg arg)  {WriteSSEOp(64, 0x76, true, dest, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PCMPGTB(X64Reg dest, OpArg arg)  {WriteSSEOp(64, 0x64, true, dest, arg);} | 
					
						
							|  |  |  | 	void PCMPGTW(X64Reg dest, OpArg arg)  {WriteSSEOp(64, 0x65, true, dest, arg);} | 
					
						
							|  |  |  | 	void PCMPGTD(X64Reg dest, OpArg arg)  {WriteSSEOp(64, 0x66, true, dest, arg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PEXTRW(X64Reg dest, OpArg arg, u8 subreg)    {WriteSSEOp(64, 0x64, true, dest, arg); Write8(subreg);} | 
					
						
							|  |  |  | 	void PINSRW(X64Reg dest, OpArg arg, u8 subreg)    {WriteSSEOp(64, 0x64, true, dest, arg); Write8(subreg);} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PMADDWD(X64Reg dest, OpArg arg)  {WriteSSEOp(64, 0xF5, true, dest, arg); } | 
					
						
							|  |  |  | 	void PSADBW(X64Reg dest, OpArg arg)   {WriteSSEOp(64, 0xF6, true, dest, arg);} | 
					
						
							|  |  |  | 	 | 
					
						
							|  |  |  | 	void PMAXSW(X64Reg dest, OpArg arg)   {WriteSSEOp(64, 0xEE, true, dest, arg); } | 
					
						
							|  |  |  | 	void PMAXUB(X64Reg dest, OpArg arg)   {WriteSSEOp(64, 0xDE, true, dest, arg); } | 
					
						
							|  |  |  | 	void PMINSW(X64Reg dest, OpArg arg)   {WriteSSEOp(64, 0xEA, true, dest, arg); } | 
					
						
							|  |  |  | 	void PMINUB(X64Reg dest, OpArg arg)   {WriteSSEOp(64, 0xDA, true, dest, arg); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void PMOVMSKB(X64Reg dest, OpArg arg)    {WriteSSEOp(64, 0xD7, true, dest, arg); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Prefixes
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void LOCK() { Write8(0xF0); } | 
					
						
							|  |  |  | 	void REP()  { Write8(0xF3); } | 
					
						
							|  |  |  | 	void REPNE(){ Write8(0xF2); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void FWAIT() | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		Write8(0x9B); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	void RTDSC() { Write8(0x0F); Write8(0x31); } | 
					
						
							|  |  |  | 	 | 
					
						
							|  |  |  | // helper routines for setting pointers
 | 
					
						
							|  |  |  | void CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     using namespace Gen; | 
					
						
							|  |  |  | #ifdef _M_X64
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef _MSC_VER
 | 
					
						
							|  |  |  |     MOV(32, R(RCX), Imm32(arg0)); | 
					
						
							|  |  |  |     MOV(32, R(RDX), Imm32(arg1)); | 
					
						
							|  |  |  |     MOV(32, R(R8),  Imm32(arg2)); | 
					
						
							|  |  |  |     CALL(fnptr); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     MOV(32, R(RDI), Imm32(arg0)); | 
					
						
							|  |  |  |     MOV(32, R(RSI), Imm32(arg1)); | 
					
						
							|  |  |  |     MOV(32, R(RDX), Imm32(arg2)); | 
					
						
							|  |  |  |     CALL(fnptr); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     ABI_AlignStack(3 * 4); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg2)); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg1)); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg0)); | 
					
						
							|  |  |  |     CALL(fnptr); | 
					
						
							|  |  |  | #ifdef _WIN32
 | 
					
						
							|  |  |  |     // don't inc stack
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     ABI_RestoreStack(3 * 4); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void CallCdeclFunction4(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     using namespace Gen; | 
					
						
							|  |  |  | #ifdef _M_X64
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef _MSC_VER
 | 
					
						
							|  |  |  |     MOV(32, R(RCX), Imm32(arg0)); | 
					
						
							|  |  |  |     MOV(32, R(RDX), Imm32(arg1)); | 
					
						
							|  |  |  |     MOV(32, R(R8), Imm32(arg2)); | 
					
						
							|  |  |  |     MOV(32, R(R9), Imm32(arg3)); | 
					
						
							|  |  |  |     CALL(fnptr); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     MOV(32, R(RDI), Imm32(arg0)); | 
					
						
							|  |  |  |     MOV(32, R(RSI), Imm32(arg1)); | 
					
						
							|  |  |  |     MOV(32, R(RDX), Imm32(arg2)); | 
					
						
							|  |  |  |     MOV(32, R(RCX), Imm32(arg3)); | 
					
						
							|  |  |  |     CALL(fnptr); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     ABI_AlignStack(4 * 4); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg3)); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg2)); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg1)); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg0)); | 
					
						
							|  |  |  |     CALL(fnptr); | 
					
						
							|  |  |  | #ifdef _WIN32
 | 
					
						
							|  |  |  |     // don't inc stack
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     ABI_RestoreStack(4 * 4); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void CallCdeclFunction5(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     using namespace Gen; | 
					
						
							|  |  |  | #ifdef _M_X64
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef _MSC_VER
 | 
					
						
							|  |  |  |     MOV(32, R(RCX), Imm32(arg0)); | 
					
						
							|  |  |  |     MOV(32, R(RDX), Imm32(arg1)); | 
					
						
							|  |  |  |     MOV(32, R(R8),  Imm32(arg2)); | 
					
						
							|  |  |  |     MOV(32, R(R9),  Imm32(arg3)); | 
					
						
							|  |  |  | 	MOV(32, MDisp(RSP, 0x20), Imm32(arg4)); | 
					
						
							|  |  |  |     CALL(fnptr); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     MOV(32, R(RDI), Imm32(arg0)); | 
					
						
							|  |  |  |     MOV(32, R(RSI), Imm32(arg1)); | 
					
						
							|  |  |  |     MOV(32, R(RDX), Imm32(arg2)); | 
					
						
							|  |  |  |     MOV(32, R(RCX), Imm32(arg3)); | 
					
						
							|  |  |  |     MOV(32, R(R8),  Imm32(arg4)); | 
					
						
							|  |  |  |     CALL(fnptr); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     ABI_AlignStack(5 * 4); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg4)); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg3)); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg2)); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg1)); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg0)); | 
					
						
							|  |  |  |     CALL(fnptr); | 
					
						
							|  |  |  | #ifdef _WIN32
 | 
					
						
							|  |  |  |     // don't inc stack
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     ABI_RestoreStack(5 * 4); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void CallCdeclFunction6(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 arg5) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     using namespace Gen; | 
					
						
							|  |  |  | #ifdef _M_X64
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef _MSC_VER
 | 
					
						
							|  |  |  |     MOV(32, R(RCX), Imm32(arg0)); | 
					
						
							|  |  |  |     MOV(32, R(RDX), Imm32(arg1)); | 
					
						
							|  |  |  |     MOV(32, R(R8), Imm32(arg2)); | 
					
						
							|  |  |  |     MOV(32, R(R9), Imm32(arg3)); | 
					
						
							|  |  |  | 	MOV(32, MDisp(RSP, 0x20), Imm32(arg4)); | 
					
						
							|  |  |  | 	MOV(32, MDisp(RSP, 0x28), Imm32(arg5)); | 
					
						
							|  |  |  |     CALL(fnptr); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     MOV(32, R(RDI), Imm32(arg0)); | 
					
						
							|  |  |  |     MOV(32, R(RSI), Imm32(arg1)); | 
					
						
							|  |  |  |     MOV(32, R(RDX), Imm32(arg2)); | 
					
						
							|  |  |  |     MOV(32, R(RCX), Imm32(arg3)); | 
					
						
							|  |  |  |     MOV(32, R(R8), Imm32(arg4)); | 
					
						
							|  |  |  |     MOV(32, R(R9), Imm32(arg5)); | 
					
						
							|  |  |  |     CALL(fnptr); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     ABI_AlignStack(6 * 4); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg5)); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg4)); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg3)); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg2)); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg1)); | 
					
						
							|  |  |  |     PUSH(32, Imm32(arg0)); | 
					
						
							|  |  |  |     CALL(fnptr); | 
					
						
							|  |  |  | #ifdef _WIN32
 | 
					
						
							|  |  |  |     // don't inc stack
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     ABI_RestoreStack(6 * 4); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef _M_X64
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // See header
 | 
					
						
							|  |  |  | void ___CallCdeclImport3(void* impptr, u32 arg0, u32 arg1, u32 arg2) { | 
					
						
							|  |  |  |     MOV(32, R(RCX), Imm32(arg0)); | 
					
						
							|  |  |  |     MOV(32, R(RDX), Imm32(arg1)); | 
					
						
							|  |  |  |     MOV(32, R(R8), Imm32(arg2)); | 
					
						
							|  |  |  | 	CALLptr(M(impptr)); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | void ___CallCdeclImport4(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3) { | 
					
						
							|  |  |  |     MOV(32, R(RCX), Imm32(arg0)); | 
					
						
							|  |  |  |     MOV(32, R(RDX), Imm32(arg1)); | 
					
						
							|  |  |  |     MOV(32, R(R8), Imm32(arg2)); | 
					
						
							|  |  |  |     MOV(32, R(R9), Imm32(arg3)); | 
					
						
							|  |  |  | 	CALLptr(M(impptr)); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | void ___CallCdeclImport5(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4) { | 
					
						
							|  |  |  |     MOV(32, R(RCX), Imm32(arg0)); | 
					
						
							|  |  |  |     MOV(32, R(RDX), Imm32(arg1)); | 
					
						
							|  |  |  |     MOV(32, R(R8), Imm32(arg2)); | 
					
						
							|  |  |  |     MOV(32, R(R9), Imm32(arg3)); | 
					
						
							|  |  |  | 	MOV(32, MDisp(RSP, 0x20), Imm32(arg4)); | 
					
						
							|  |  |  | 	CALLptr(M(impptr)); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | void ___CallCdeclImport6(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 arg5) { | 
					
						
							|  |  |  |     MOV(32, R(RCX), Imm32(arg0)); | 
					
						
							|  |  |  |     MOV(32, R(RDX), Imm32(arg1)); | 
					
						
							|  |  |  |     MOV(32, R(R8), Imm32(arg2)); | 
					
						
							|  |  |  |     MOV(32, R(R9), Imm32(arg3)); | 
					
						
							|  |  |  | 	MOV(32, MDisp(RSP, 0x20), Imm32(arg4)); | 
					
						
							|  |  |  | 	MOV(32, MDisp(RSP, 0x28), Imm32(arg5)); | 
					
						
							|  |  |  | 	CALLptr(M(impptr)); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } |