/*
Copyright (C) 2001 StrmnNrmn

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

*/

// Static Recompilation
#include "stdafx.h"

#include "OSHLE/ultra_r4300.h"		// C0_COUNT

#include "Core/CPU.h"
#include "Core/Interrupt.h"
#include "Core/R4300.h"
#include "DynaRec/DynaRec.h"

#include "Core/Registers.h"			// For REG_?? / RegNames

#include "Debug/Debug.h"
#include "asm.h"
#include "Debug/DBGConsole.h"
#include "Utility/Profiler.h"
#include "Utility/PrintOpCode.h"

DWORD g_dwNumStaticEntries = 0;
DWORD g_dwStaticCodeTableSize = 0;
DWORD g_dwStaticCodeTableInitialSize = 30000;	// Size of first buffer
CDynarecCode **g_pDynarecCodeTable = NULL;

static BYTE	* g_pGlobalBuffer = NULL;
static DWORD g_dwGlobalBufferPtr = 0;
static DWORD g_dwGlobalBufferSize = 0;

/* Added by Lkb (24/8/2001)
The second buffer is used to hold conditionally executed code pieces that will usually not be executed

Example:

globalbuffer:
...
TEST a, a
JZ globalsecondbuffer_1234 # usually a is nonzero
MOV EAX, a
return_from_globalsecondbuffer_1234:

globalsecondbuffer:
...
globalsecondbuffer_1234
CALL xxx
JMP return_from_globalsecondbuffer_1234

The code that uses this is responsible to mantain the pointer 16-byte-aligned.
The second buffer must come AFTER the first in the memory layout, otherwise branch prediction will be screwed up.

This is the same algorithm used in the Linux schedule() function (/usr/src/linux/kernel/sched.c)

The only problem of this system is that it uses 32-bit relative addresses (and thus 6-byte long instructions) are used.
However managing 8-bit +127/-128 relative displacements would be challenging since they would interfere with the normal code
Otherwise a 16-bit override prefix could be used (but is it advantageous?)
*/

static BYTE	* g_pGlobalSecondBuffer = NULL;
static DWORD g_dwGlobalSecondBufferPtr = 0;
static DWORD g_dwGlobalSecondBufferSize = 0;

DWORD g_dwNumSRCompiled = 0;
DWORD g_dwNumSROptimised = 0;
DWORD g_dwNumSRFailed = 0;

#define SR_FLAGS_BRANCHES			0x00000001
#define SR_FLAGS_BRANCHES_TO_START	0x00000002
#define SR_FLAGS_HANDLE_BRANCH		0x00000004

BOOL g_bOptimiseLink = TRUE;		// Needs to be FALSE for 007
// Return TRUE if compiled ok, FALSE otherwise
typedef BOOL (*SR_EmitInstructionType)(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags);

// Defined in SRInliner.cpp
BOOL SR_Emit_Inline_Function(CDynarecCode *pCode, void* pFunction);

//*****************************************************************************
//
//*****************************************************************************
#define DR_COLLECT_UNOPT_STATS		0

#if DR_COLLECT_UNOPT_STATS 
#define UNOPTCOUNT_STRING_SIZE	8
typedef struct 
{
	CHAR	szString[UNOPTCOUNT_STRING_SIZE+1];
	DWORD	dwCount;

	static int __cdecl compare(const void *elem1, const void *elem2)
	{
		if (((UnoptCount*)elem1)->dwCount < ((UnoptCount*)elem2)->dwCount)
			return -1;
		if (((UnoptCount*)elem1)->dwCount > ((UnoptCount*)elem2)->dwCount)
			return +1;
		
		return 0;
	}

} UnoptCount;

static std::vector<UnoptCount> g_UnoptCount;
#endif // DR_COLLECT_UNOPT_STATS

//*****************************************************************************
//
//*****************************************************************************
typedef struct tagCachedRegInfo
{
	REGCODE iCachedIReg;	// INVALID_CODE if uncached, else ???_CODE of intel reg we're cached in
	BOOL  bValid;			// If cached, TRUE if value in intel register is valid
	BOOL  bDirty;			// If cached, TRUE if value has been modified

} CachedRegInfo;

typedef struct tagHiRegInfo
{
	BOOL	bDirty;			// Do we need to write information about this register back to memory?

	BOOL	bUnk;			// If TRUE, we don't know the value of this register
	BOOL	bSignExt;		// If TRUE, value is sign-extension of low value, dwValue is invalid
	DWORD	dwValue;		// If bUnk is FALSE, this is the value of the register (through setmipshi..)
} HiRegInfo;

static CachedRegInfo g_MIPSRegInfo[32];
static HiRegInfo g_MIPSHiRegInfo[32];
static DWORD g_IntelRegInfo[8];

inline void MarkMIPSAsValid( u32 mreg, BOOL valid )
{
	g_MIPSRegInfo[mreg].bValid = valid;
}
inline BOOL IsMIPSValid( u32 mreg )
{
	return g_MIPSRegInfo[mreg].bValid;
}

inline void MarkMIPSAsDirty( u32 mreg, BOOL dirty )
{
	g_MIPSRegInfo[mreg].bDirty = dirty;	
}
inline BOOL IsMIPSDirty( u32 mreg )
{
	return g_MIPSRegInfo[mreg].bDirty;
}

inline REGCODE GetMIPSCachedReg( u32 mreg )
{
	return g_MIPSRegInfo[mreg].iCachedIReg;
}

inline void SetMIPSCachedReg( u32 mreg, REGCODE reg )
{
	g_MIPSRegInfo[mreg].iCachedIReg = reg;
}


inline void MarkMIPSHiAsUnknown( u32 mreg, BOOL unk )
{
	g_MIPSHiRegInfo[mreg].bUnk = unk;
}

inline void MarkMIPSHiAsDirty( u32 mreg, BOOL dirty )
{
	g_MIPSHiRegInfo[mreg].bDirty = dirty;	
}

inline BOOL IsMIPSHiDirty( u32 mreg )
{
	return g_MIPSHiRegInfo[mreg].bDirty;
}

inline BOOL IsMIPSHiUnknown( u32 mreg )
{
	return g_MIPSHiRegInfo[mreg].bUnk;
}

inline void SetMIPSHiValue( u32 mreg, u32 value )
{
	g_MIPSHiRegInfo[mreg].dwValue = value;
}

inline u32 GetMIPSHiValue( u32 mreg )
{
	return g_MIPSHiRegInfo[mreg].dwValue;
}


#include "DR_FP.inl"
#include "DR_Tables.inl"

//*****************************************************************************
// Forward declarations
//*****************************************************************************
static BOOL SR_EnsureTableSpace();
static void SR_AddCompiledCode(CDynarecCode *pCode, DWORD * pdwBase);

static void SR_FlushMipsRegs(CDynarecCode * pCode);
static void SR_RestoreIntelRegs(CDynarecCode * pCode);

static void SR_PreEmitCachedRegCheck(CDynarecCode *pCode);
static void SR_PostEmitCachedRegCheck(CDynarecCode *pCode, BOOL bOptimised);

static void SR_ResetRegCacheInfo();
static void SR_Stat_Analyze(CDynarecCode * pCode);
static DWORD SR_CheckStuffCallback(DWORD dwNumCycles);

// this is really, really, really ugly - however until the dynarec support routines get better, this cannot be easily split
static void SR_Emit_ReadWriteAddress(CDynarecCode* pCode, DWORD base, DWORD offset, DWORD xor, bool write = false, DWORD rt = ~0, bool baseimm = false, bool base_already_loaded = false, REGCODE destreg = EAX_CODE);


//*****************************************************************************
// Assumes: mreg is cached
//*****************************************************************************
static void EnsureCachedValidLo(CDynarecCode * pCode, DWORD mreg)	
{
	REGCODE iCachedReg = GetMIPSCachedReg( mreg );
	
	if (!IsMIPSValid( mreg ))	
	{
		DPF(DEBUG_DYNREC, "  ++ Cached value for %s is invalid - loading...\\/", RegNames[mreg]);
		pCode->MOV_REG_MEM(iCachedReg, (((BYTE *)&g_qwGPR[0]) + (lohalf(mreg)*4)) );	
		MarkMIPSAsValid( mreg, TRUE );							
	}
	else
	{
		DPF(DEBUG_DYNREC, "  ++ Using cached value for %s...\\/", RegNames[mreg]);
	}
}


//*****************************************************************************
//
//*****************************************************************************
static void LoadMIPSLo(CDynarecCode * pCode, REGCODE ireg, DWORD mreg)	
{
	REGCODE iCachedReg = GetMIPSCachedReg( mreg );
	
	if (iCachedReg != INVALID_CODE)
	{	
		EnsureCachedValidLo(pCode, mreg);																
		pCode->MOV(ireg, iCachedReg);											
	}																																		
	else if (mreg == REG_r0)											
	{
		DPF(DEBUG_DYNREC, "  ++ Clearing reg for r0 load...\\/");		
		// We have to use MOV and not XOR to avoid setting the flags
		//XOR(ireg, ireg);											
		pCode->MOVI(ireg, 0);
	}
	else																
	{
		pCode->MOV_REG_MEM(ireg, (((BYTE *)&g_qwGPR[0]) + (lohalf(mreg)*4)) );	
	}
}


//*****************************************************************************
// TODO - if the reg is cached, check signed/zero/set flags?!?
//*****************************************************************************
static void LoadMIPSHi(CDynarecCode * pCode, REGCODE ireg, DWORD mreg)
{
	if (mreg == REG_r0)
	{
		DPF(DEBUG_DYNREC, "  ++ Clearing reg for r0 load...\\/");
		//XOR(ireg, ireg);
		pCode->MOVI(ireg, 0);
	}
	else
	{
		if (!IsMIPSHiUnknown( mreg ))
		{
			DPF(DEBUG_DYNREC, "  ++ We could optimise hi reg load here (loading 0x%08x)!!!", GetMIPSHiValue( mreg ));
			pCode->MOVI(ireg, GetMIPSHiValue( mreg ));
		}
		else
		{
			pCode->MOV_REG_MEM(ireg, (((BYTE *)&g_qwGPR[0]) + (hihalf(mreg)*4)) );
		}
	}
}

//*****************************************************************************
//
//*****************************************************************************
static void StoreMIPSLo(CDynarecCode * pCode, DWORD mreg, REGCODE ireg)
{
	REGCODE iCachedReg = GetMIPSCachedReg( mreg );
	
	if (iCachedReg != INVALID_CODE)
	{
		DPF(DEBUG_DYNREC, "  ++ Updating cached value for %s...\\/", RegNames[mreg]);
		pCode->MOV(iCachedReg, ireg);
		MarkMIPSAsDirty( mreg, TRUE );
		MarkMIPSAsValid( mreg, TRUE );
	}
	else
	{
		pCode->MOV_MEM_REG((((BYTE *)&g_qwGPR[0]) + (lohalf(mreg)*4)), (ireg) );
	}
}

//*****************************************************************************
// TODO - Keep record of signed/zero/set flags?
//*****************************************************************************
static void StoreMIPSHi(CDynarecCode * pCode, DWORD mreg, REGCODE ireg)
{
	pCode->MOV_MEM_REG((((BYTE *)&g_qwGPR[0]) + (hihalf(mreg)*4)), ireg );

	// We know longer know the contents of the high register...
	if (!IsMIPSHiUnknown( mreg ))
	{
		DPF(DEBUG_DYNREC, "  ++ We've successfully avoided a hiwriteback!");
	}
	MarkMIPSHiAsUnknown( mreg, TRUE );
	MarkMIPSHiAsDirty( mreg, FALSE );
	
}

//*****************************************************************************
//
//*****************************************************************************
static void SetMIPSLo(CDynarecCode * pCode, DWORD mreg, DWORD data)
{
	REGCODE iCachedReg = GetMIPSCachedReg( mreg );
	
	if (iCachedReg != INVALID_CODE)
	{
		DPF(DEBUG_DYNREC, "  ++ Setting cached value for %s...\\/", RegNames[mreg]);
		pCode->MOVI(iCachedReg, data);
		MarkMIPSAsDirty( mreg, TRUE );
		MarkMIPSAsValid( mreg, TRUE );
	}
	else
	{
		pCode->MOVI_MEM( (((BYTE *)&g_qwGPR[0]) + (lohalf(mreg)*4)), data);
	}
}
	
//*****************************************************************************
//
//*****************************************************************************
static void SetMIPSHi(CDynarecCode * pCode, DWORD mreg, DWORD data)
{
	//pCode->MOVI_MEM( (((BYTE *)&g_qwGPR[0]) + (hihalf(mreg)*4)), (data));

	DPF(DEBUG_DYNREC, "  ++ We could stall writeback here!!!");
	MarkMIPSHiAsUnknown( mreg, FALSE );
	SetMIPSHiValue( mreg, data );
	MarkMIPSHiAsDirty( mreg, TRUE );
}

//*****************************************************************************
//
//*****************************************************************************
HRESULT SR_Init(DWORD dwSize)
{
	g_dwStaticCodeTableInitialSize = dwSize;
	
	g_dwNumStaticEntries = 0;
	g_dwStaticCodeTableSize = 0;
	g_pDynarecCodeTable = NULL;

	// Reserve a huge range of memory. We do this because we can't simply
	// allocate a new buffer and copy the existing code across (this would
	// mess up all the existing function pointers and jumps etc).
	// Note that this call does not actually allocate any storage - we're not
	// actually asking Windows to allocate 256Mb!
	g_pGlobalBuffer = (BYTE*)VirtualAlloc(NULL,
										 256 * 1024 * 1024, 
										 MEM_RESERVE, 
										 PAGE_EXECUTE_READWRITE);
	if (g_pGlobalBuffer == NULL)
		return E_OUTOFMEMORY;

	g_dwGlobalBufferPtr = 0;
	g_dwGlobalBufferSize = 0;

	g_pGlobalSecondBuffer = g_pGlobalBuffer + 192 * 1024 * 1024;
	g_dwGlobalSecondBufferPtr = 0;
	g_dwGlobalSecondBufferSize = 0;

	return S_OK;
}

//*****************************************************************************
//
//*****************************************************************************
void SR_Reset()
{
	SR_Fini();
	SR_Init(g_dwStaticCodeTableInitialSize);
}

//*****************************************************************************
//
//*****************************************************************************
void SR_Fini()
{
	if (g_pDynarecCodeTable != NULL)
	{

		for (DWORD i = 0; i < g_dwNumStaticEntries; i++)
		{
			CDynarecCode * pCode = g_pDynarecCodeTable[i];
			if (pCode != NULL)
			{
				delete pCode;
				g_pDynarecCodeTable[i] = NULL;
			}
		}

		delete []g_pDynarecCodeTable;
		g_pDynarecCodeTable = NULL;
	}

	g_dwNumStaticEntries = 0;
	g_dwStaticCodeTableSize = 0;

	if (g_pGlobalBuffer != NULL)
	{
		// Decommit all the pages first
		VirtualFree(g_pGlobalBuffer, 256 * 1024 * 1024, MEM_DECOMMIT);
		// Now release
		VirtualFree(g_pGlobalBuffer, 0, MEM_RELEASE);
		g_pGlobalBuffer = NULL;
		g_pGlobalSecondBuffer = NULL;
	}

}

//*****************************************************************************
//
//*****************************************************************************
void SR_Stats()
{
	DWORD dwCount = 0;
	DWORD dwTotalInstrs = 0;
	DWORD dwTotalOptimised = 0;
	DWORD dwTotalInstrsExe = 0;
	DWORD dwMaxLen = 0;
	DWORD dwMaxCount = 0;

	DWORD dwTotalBlocksOptimised = 0;
	DWORD dwTotalOptInstrs = 0;
	DWORD dwTotalOptInstrsOptimised = 0;
	DWORD dwTotalOptInputBytes = 0;
	DWORD dwTotalOptOutputBytes = 0;
		
	DWORD dwTotalBlocksUnoptimised = 0;
	DWORD dwTotalUnoptInstrs = 0;
	DWORD dwTotalUnoptInstrsOptimised = 0;
	DWORD dwTotalUnoptInputBytes = 0;
	DWORD dwTotalUnoptOutputBytes = 0;

	DWORD i;

	for (i = 0; i < g_dwNumStaticEntries; i++)
	{
		CDynarecCode * pCode = g_pDynarecCodeTable[i];
		if (pCode != NULL)
		{
			DPF(DEBUG_DYNREC, "0x%08x, %s, %d ops, %d optimised",
				pCode->dwStartPC, pCode->dwOptimiseLevel ? "optimised": "unoptimised", 
				pCode->dwNumOps, pCode->dwNumOptimised);
			
			dwTotalInstrs += pCode->dwNumOps;
			dwTotalOptimised += pCode->dwNumOptimised;
			dwTotalInstrsExe += (pCode->dwNumOps * pCode->dwCount);
			dwCount += pCode->dwCount;

			if (pCode->dwOptimiseLevel == 0)
			{
				dwTotalUnoptInputBytes += pCode->dwNumOps * 4;
				dwTotalUnoptOutputBytes += pCode->dwCurrentPos;
				dwTotalUnoptInstrs += pCode->dwNumOps;
				dwTotalUnoptInstrsOptimised += pCode->dwNumOptimised;
				dwTotalBlocksUnoptimised++;
			}
			else
			{
				dwTotalOptInputBytes += pCode->dwNumOps * 4;
				dwTotalOptOutputBytes += pCode->dwCurrentPos;
				dwTotalOptInstrs += pCode->dwNumOps;
				dwTotalOptInstrsOptimised += pCode->dwNumOptimised;
				dwTotalBlocksOptimised++;
			}

			if (pCode->dwNumOps > dwMaxLen)
				dwMaxLen = pCode->dwNumOps;

			if (pCode->dwCount > dwMaxCount)
				dwMaxCount = pCode->dwCount;
		}
	}

	DBGConsole_Msg(0, "Dynarec Stats");		
	DBGConsole_Msg(0, "-------------");		
	DBGConsole_Msg(0, "%d Entries (%#.3f %% optimised)", g_dwNumStaticEntries, (float)dwTotalBlocksOptimised * 100.0f / (float)g_dwNumStaticEntries);
	DBGConsole_Msg(0, "%d Ops compiled in total (%#.3f %% optimised)", dwTotalInstrs, (float)dwTotalOptimised * 100.0f / (float)dwTotalInstrs);
	DBGConsole_Msg(0, "%d Ops executed in total", dwTotalInstrsExe);
	DBGConsole_Msg(0, "%d Calls", dwCount);
	DBGConsole_Msg(0, "%#.3f Average ops/call", (float)dwTotalInstrsExe / (float)dwCount);
	DBGConsole_Msg(0, "%d Longest run", dwMaxLen);
	DBGConsole_Msg(0, "%d Largest Count", dwMaxCount);
	DBGConsole_Msg(0, "");
	DBGConsole_Msg(0, "Unoptimised");
	DBGConsole_Msg(0, "---------");
	DBGConsole_Msg(0, "%d Input Bytes", dwTotalUnoptInputBytes);
	DBGConsole_Msg(0, "%d Output Bytes", dwTotalUnoptOutputBytes);
	DBGConsole_Msg(0, "%#.3f Average Expansion Ratio", (float)dwTotalUnoptOutputBytes/(float)dwTotalUnoptInputBytes);
	DBGConsole_Msg(0, "%#.3f%% optimised inplace", (float)dwTotalUnoptInstrsOptimised * 100.0f / (float)dwTotalUnoptInstrs);
	DBGConsole_Msg(0, "");
	DBGConsole_Msg(0, "Optimised");
	DBGConsole_Msg(0, "---------");
	DBGConsole_Msg(0, "%d Input Bytes", dwTotalOptInputBytes);
	DBGConsole_Msg(0, "%d Output Bytes", dwTotalOptOutputBytes);
	DBGConsole_Msg(0, "%#.3f Average Expansion Ratio", (float)dwTotalOptOutputBytes/(float)dwTotalOptInputBytes);
	DBGConsole_Msg(0, "%#.3f%% optimised inplace", (float)dwTotalOptInstrsOptimised * 100.0f / (float)dwTotalOptInstrs);


#if DR_COLLECT_UNOPT_STATS
	DBGConsole_Msg(0, "");

	DBGConsole_Msg(0, "Unoptimised Instrs:");
	DBGConsole_Msg(0, "-------------------:");
	DBGConsole_Msg(0, "Name  Count");

	qsort(&g_UnoptCount[0], g_UnoptCount.size(), sizeof(g_UnoptCount[0]), 
		UnoptCount::compare);

	for (i = 0; i < g_UnoptCount.size(); i++)
	{
		DBGConsole_Msg(0, "%s %d", g_UnoptCount[i].szString, g_UnoptCount[i].dwCount);
	}
#endif
}



//*****************************************************************************
// Ensures that there is space in the table for one more entry
//*****************************************************************************
BOOL SR_EnsureTableSpace()
{
	CDynarecCode ** pNewBuffer;

	// Check if current space is sufficient
	if ((g_dwNumStaticEntries+1) < g_dwStaticCodeTableSize)
		return TRUE;

	// Double the current buffer size, or set to initial size if currently empty
	DWORD dwNewSize = g_dwStaticCodeTableSize * 2;

	if (dwNewSize == 0)
		dwNewSize = g_dwStaticCodeTableInitialSize;

	pNewBuffer = new CDynarecCode *[dwNewSize];
	if (pNewBuffer == NULL)
		return FALSE;

	DBGConsole_Msg(0, "Resizing DR buffer to %d entries", dwNewSize);


	if (g_dwNumStaticEntries > 0)
		memcpy(pNewBuffer, g_pDynarecCodeTable, g_dwNumStaticEntries * sizeof(CDynarecCode *));

	SAFE_DELETEARRAY(g_pDynarecCodeTable);
	g_pDynarecCodeTable = pNewBuffer;
	g_dwStaticCodeTableSize = dwNewSize;

	return TRUE;
}


//*****************************************************************************
// Add the compiled code to g_pDynarecCodeTable. Insert hacked instruction
// into RDRAM to allow us to quickly branch to our compiled code
//*****************************************************************************
void SR_AddCompiledCode(CDynarecCode *pCode, DWORD * pdwBase)
{
	DWORD dwNewInstr;
	
	if (!SR_EnsureTableSpace())
	{
		DBGConsole_Msg(0, "Warning - static code table size is too small!");
		// Should really abort, because we will keep trying to compile this op
		return;
	}

	pCode->dwEntry = g_dwNumStaticEntries;


	pCode->dwOriginalOp = *pdwBase;
	if (pCode->dwNumOps == 0)  // Don't bother using compiled code - this entry makes us just execute the original mips
	{
		g_dwNumSRFailed++;
		dwNewInstr = make_op(OP_SRHACK_NOOPT) | pCode->dwEntry;
	}
	else                      // Use the compiled code
		dwNewInstr = make_op(OP_SRHACK_UNOPT) | pCode->dwEntry;
	*pdwBase = dwNewInstr;

	g_pDynarecCodeTable[g_dwNumStaticEntries] = pCode;
	g_dwNumStaticEntries++;
}


//*****************************************************************************
//
//*****************************************************************************

static void PROLOGUE( CDynarecCode * pCode )
{

}

//*****************************************************************************
// EPILOGUE - set up g_pPCMemBase before returning
//*****************************************************************************
static void EPILOGUE( CDynarecCode * pCode, u32 dwPC )
{
	pCode->MOVI(EAX_CODE, dwPC);
	pCode->MOV_MEM_REG(&g_dwPC, EAX_CODE);
/*
	SR_Emit_ReadWriteAddress(pCode, dwPC, 0, 0, false, ~0, true, true);
/*/
	pCode->MOV(ECX_CODE, EAX_CODE);
	pCode->SHRI(EAX_CODE, 18);
	// call dword ptr [g_ReadAddressLookupTable + eax*4]
	pCode->EmitBYTE(0xFF);
	pCode->EmitBYTE(0x14);
	pCode->EmitBYTE(0x85);
	pCode->EmitDWORD((DWORD)g_ReadAddressLookupTable);

/**/

	pCode->MOV_MEM_REG(&g_pPCMemBase, EAX_CODE);
	pCode->RET();  
}


//*****************************************************************************
//
//*****************************************************************************
void SR_AllocateCodeBuffer(CDynarecCode * pCode)
{
	// Round up to 16 byte boundry
	g_dwGlobalBufferPtr = (g_dwGlobalBufferPtr + 15) & (~15);

	// This is a bit of a hack. We assume that no single entry will generate more than 
	// 32k of storage. If there appear to be problems with this assumption, this
	// value can be enlarged
	if (g_dwGlobalBufferPtr + 32768 > g_dwGlobalBufferSize)
	{
		// Increase by 1MB
		LPVOID pNewAddress;

		g_dwGlobalBufferSize += 1024 * 1024;
		pNewAddress = VirtualAlloc(g_pGlobalBuffer, g_dwGlobalBufferSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
		//if (pNewAddress != g_pGlobalBuffer)
		if (pNewAddress == 0)
		{
			// By Lkb: What does this mean? When the address is explicitly specified it should be always returned
			// by VirtualAlloc unless the allocation fails (return is 0). Am I missing something?

			//DBGConsole_Msg(0, "SR Buffer base address has changed 0x%08x 0x%08x",
			//	g_pGlobalBuffer, pNewAddress);

			DBGConsole_Msg(0, "SR Buffer allocation failed"); // maybe this should be an abort?
		}
		else
		{
			DBGConsole_Msg(0, "Allocated %dMB of storage for dynarec buffer",
				g_dwGlobalBufferSize / (1024*1024));
		}

	}

	if (g_dwGlobalSecondBufferPtr + 32768 > g_dwGlobalSecondBufferSize)
	{
		// Increase by 1MB
		LPVOID pNewAddress;

		g_dwGlobalSecondBufferSize += 1024 * 1024;
		pNewAddress = VirtualAlloc(g_pGlobalSecondBuffer, g_dwGlobalSecondBufferSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
		if (pNewAddress == 0)
		{
			DBGConsole_Msg(0, "SR Second Buffer allocation failed"); // maybe this should be an abort?
		}
		else
		{
			DBGConsole_Msg(0, "Allocated %dMB of storage for dynarec second buffer",
				g_dwGlobalSecondBufferSize / (1024*1024));
		}
	}

	pCode->pCodeBuffer = &g_pGlobalBuffer[g_dwGlobalBufferPtr];
	pCode->dwBuffSize = 0;
	pCode->dwCurrentPos = 0;   // Start of buffer
}

//*****************************************************************************
//
//*****************************************************************************
void SR_SetCodeBuffer(CDynarecCode * pCode)
{
	g_dwGlobalBufferPtr += pCode->dwCurrentPos;
}

//*****************************************************************************
//
//*****************************************************************************
CDynarecCode * SR_CompileCode(DWORD dwPC)
{
	DWORD *pdwPCMemBase;

	g_dwNumSRCompiled++;
	
	if (InternalReadAddress(dwPC, (void**)&pdwPCMemBase) == MEM_UNUSED)
		return NULL;

	CDynarecCode *pCode = new CDynarecCode(dwPC, pdwPCMemBase[0]);
	if(pCode == NULL)
		return NULL;

	START_PROFILE( CProfiler::PROFILE_DYNAREC );
	
	// Initialise various fields
	SR_AllocateCodeBuffer(pCode);


	//#ifdef DAEDALUS_LOG
	//DPF(DEBUG_DYNREC, "Compiling block starting at 0x%08x:", pCode->dwStartPC);
	//#endif

	DWORD dwOp;
	DWORD dwFlags;
	BOOL bEmitOk;

	DWORD *pdwPC = pdwPCMemBase;
	// Loop forever, incrementing the PC each loop. We break out of the loop when
	// we reach an operation that we can't compile
	for ( ;; pdwPC++)
	{
		dwOp = *pdwPC;

		dwFlags = 0;			// Reset flags to 0 - Emit instructions don't do this

		// Emit the actual instruction
		bEmitOk = SR_EmitInstruction[R4300_OP(dwOp)](pCode, dwOp, &dwFlags);

		// Exit if instruction wasn't emitted ok
		if (pCode->dwWarn != 0)
			break;

		if (!bEmitOk)
			break;

		pCode->dwNumOps++;

		// Exit if one or more of the flags were set (later we can selectively add
		// code to handle counter adjustments here)
		if (dwFlags != 0)
			break;
	}

	#ifdef DAEDALUS_LOG
	char opstr[300];
	DWORD dwCurrPC = pCode->dwStartPC+pCode->dwNumOps*4;
	SprintOpCodeInfo(opstr, dwCurrPC, dwOp);
	DPF(DEBUG_DYNREC, "Stopped On: 0x%08x: %s", dwCurrPC, opstr);
	#endif


	// Ensure all registers are stored
	if (pCode->dwNumOps > 0)
	{
		pCode->dwEndPC = pCode->dwStartPC + ((pCode->dwNumOps-1)*4);

		EPILOGUE(pCode, pCode->dwEndPC);
	}

	// Check if there was an error along the way (e.g. memory allocation failed)
	if (pCode->dwWarn != 0)
	{
		DBGConsole_Msg(0, "Static Recompilation failed!");
		SAFE_DELETEARRAY(pCode->pCodeBuffer);
		delete pCode;
		return NULL;
	}

	SR_AddCompiledCode(pCode, pdwPCMemBase);

	SR_SetCodeBuffer(pCode);

	STOP_PROFILE( CProfiler::PROFILE_DYNAREC );


	return pCode;
}


//*****************************************************************************
//
//*****************************************************************************
void SR_OptimiseCode(CDynarecCode *pCode, DWORD dwLevel)
{
	// Optimise the code more aggressively
	DWORD dwOp;
	DWORD dwFlags;
	BOOL bEmitOk;
	DWORD *pdwPCMemBase = (DWORD *)ReadAddress(pCode->dwStartPC);
	CDynarecCode * pCodeTarget = NULL;

	g_dwNumSROptimised++;

	// Avoid re-optimizing
	if (pCode->dwOptimiseLevel >= 1)
		return;


	START_PROFILE( CProfiler::PROFILE_DYNAREC );


	pCode->dwOptimiseLevel = 1;

	// Branch target set from previous unoptimized compilation!
	if (g_bOptimiseLink)
	{
		if (pCode->dwBranchTarget != ~0)
		{
			// Get the opcode of the target pc..
			DWORD * pdwTarget;

			if (InternalReadAddress(pCode->dwBranchTarget, (void **)&pdwTarget))
			{
				DWORD dwOp = pdwTarget[0];

				if (R4300_OP(dwOp) == OP_SRHACK_UNOPT ||
					R4300_OP(dwOp) == OP_SRHACK_OPT)
				{
					DWORD dwEntry = dwOp & 0x00FFFFFF;
					if (dwEntry < g_dwNumStaticEntries)
					{
						pCodeTarget = g_pDynarecCodeTable[dwEntry];

						DPF(DEBUG_DYNREC, " Target entry is %d", dwEntry);
						DPF(DEBUG_DYNREC, "   StartAddress: 0x%08x, OptimizeLevel: %d", pCodeTarget->dwStartPC, pCodeTarget->dwOptimiseLevel);

						if (pCodeTarget->dwOptimiseLevel == 0)
						{
							SR_OptimiseCode(pCodeTarget, dwLevel+1);
						}
					}
				}
				else if (R4300_OP(dwOp) != OP_SRHACK_NOOPT &&
						 R4300_OP(dwOp) != OP_PATCH)
				{
					// Uncompiled code - compile now?
					pCodeTarget = SR_CompileCode(pCode->dwBranchTarget);
					if (pCodeTarget != NULL && pCodeTarget->dwNumOps > 0)
						SR_OptimiseCode(pCodeTarget, dwLevel+1);

					if (pCodeTarget->dwOptimiseLevel == 0)
						pCodeTarget = NULL;

				}
				else
				{
				//	DBGConsole_Msg(0, "0x%08x Huh??: 0x%08x (%d)", pCode->dwBranchTarget, dwOp, R4300_OP(dwOp));
		
				}
			}
		}
	}
	// Initialise various fields
	SR_AllocateCodeBuffer(pCode);		// Restart using new buffer

	pCode->dwNumOps = 0;			// The emit instructions expect this to be the number of ops since dwStartPC
	pCode->dwNumOptimised = 0;
	//pCode->dwCRC = 0;
	pCode->dwWarn = 0;
	pCode->dwBranchTarget = ~0;
	

	// Set the sp when the update count equals this value
	pCode->bSpCachedInESI = FALSE;
	pCode->dwSetSpPostUpdate = ~0;


	#ifdef DAEDALUS_LOG
	DPF(DEBUG_DYNREC, "");
	DPF(DEBUG_DYNREC, "");
	DPF(DEBUG_DYNREC, "************************************");
	DPF(DEBUG_DYNREC, "Optimising block starting at 0x%08x (%d hits, %d deep):", pCode->dwStartPC, pCode->dwCount, dwLevel);
	#endif


	PROLOGUE( pCode );

	// Clear the fp status
	SR_FP_Init(pCode);
	SR_ResetRegCacheInfo();

	// Display stats for this block before we optimise
	#ifdef DAEDALUS_LOG
	pCode->DisplayRegStats();
	#endif
	

	SR_Stat_Analyze(pCode);

	pCode->ResetStats();

	
	if (pCode->dwSetSpPostUpdate != ~0)
		pCode->PUSH(ESI_CODE);


	// Loop forever, incrementing the PC each loop. We break out of the loop when
	// we reach an operation that we can't compile
	dwFlags = 0;

	DWORD dwNumOptimisedCheck;
	BOOL bHasBeenOptimised;
	BOOL bKeepCompiling = TRUE;
	BOOL bEmitDelayedOp = FALSE;
	BOOL bHandleBranch = FALSE;
	
	while ( bKeepCompiling || bEmitDelayedOp )
	{
		g_dwFPTimeNow++;

		dwOp = pdwPCMemBase[pCode->dwNumOps];

		if (!pCode->bSpCachedInESI && pCode->dwRegUpdates[REG_sp] == pCode->dwSetSpPostUpdate)
		{
			DPF(DEBUG_DYNREC, "  Setting sp in ESI after update %d", pCode->dwRegUpdates[REG_sp]);
			
			pCode->bSpCachedInESI = TRUE;
			
			
			// Calculate SP address and store in esi:
			// Will use cached value if present
			/*LoadMIPSLo(pCode, EAX_CODE, REG_sp);

			pCode->MOV(ECX_CODE, EAX_CODE);			// For fastcall

			// Get top bits (offset into table) in ECX
			pCode->SHRI(EAX_CODE, 18);

			// call dword ptr [g_ReadAddressLookupTable + eax*4]
			pCode->EmitBYTE(0xFF);
			pCode->EmitBYTE(0x14);
			pCode->EmitBYTE(0x85);		// 0x8d for ecx
			pCode->EmitDWORD((DWORD)g_ReadAddressLookupTable);		// Won't work if g_ReadAddressLookupTable recreated
			pCode->MOV(ESI_CODE, EAX_CODE);*/
			

			SR_Emit_ReadWriteAddress(pCode, REG_sp, 0, 0, false, ~0, false, false, ESI_CODE);

		}

		dwFlags = 0;			// Reset flags to 0 - Emit instructions don't do this

		dwNumOptimisedCheck = pCode->dwNumOptimised;


		SR_PreEmitCachedRegCheck(pCode);

			//SetVar32(g_dwPC, pCode->dwStartPC + (pCode->dwNumOps*4));

			// Emit the actual instruction
			bEmitOk = SR_EmitInstruction[R4300_OP(dwOp)](pCode, dwOp, &dwFlags);

			// Exit if instruction wasn't emitted ok
			if (!bEmitOk || pCode->dwWarn != 0)
			{
				bKeepCompiling = FALSE;
				break;
			}
		
			bHasBeenOptimised = pCode->dwNumOptimised != dwNumOptimisedCheck;
		
#ifdef DAEDALUS_LOG
			CHAR opstr[300];
			DWORD dwCurrPC = pCode->dwStartPC+pCode->dwNumOps*4;
			SprintOpCodeInfo(opstr, dwCurrPC, dwOp);
			DPF(DEBUG_DYNREC, "0x%08x: %c %s", dwCurrPC, bHasBeenOptimised ? '*' : ' ', opstr);
#endif

#if DR_COLLECT_UNOPT_STATS

			if (!bHasBeenOptimised && dwFlags == 0)
			{
				CHAR opstr[300];
				DWORD dwCurrPC = pCode->dwStartPC+pCode->dwNumOps*4;
				SprintOpCodeInfo(opstr, dwCurrPC, dwOp);
				opstr[UNOPTCOUNT_STRING_SIZE] = 0;

				LONG i;
				BOOL bFound = FALSE;

				for (i = 0; i < g_UnoptCount.size(); i++)
				{
					if (lstrcmpi(g_UnoptCount[i].szString, opstr) == 0)
					{
						g_UnoptCount[i].dwCount++;
						bFound = TRUE;
						break;
					}
				}
				if (!bFound)
				{
					UnoptCount uc;

					DBGConsole_Msg(0, "%s", opstr);
					lstrcpy(uc.szString, opstr);
					uc.dwCount = 0;
					g_UnoptCount.push_back(uc);
				}

			}
#endif

		SR_PostEmitCachedRegCheck(pCode, bHasBeenOptimised);
		
		pCode->dwNumOps++;

		// Exit if we've emitted a delayed op
		if (bEmitDelayedOp)
			break;

		// Exit if one or more of the flags were set 
		if (g_bOptimiseLink && (dwFlags & SR_FLAGS_BRANCHES))
		{
			bEmitDelayedOp = TRUE;
			if (dwFlags & SR_FLAGS_HANDLE_BRANCH)
				bHandleBranch = TRUE;
		}
		else if (dwFlags != 0)
		{
			// Stop compiling!
			//DBGConsole_Msg(0, "Stopping because %08x", dwFlags);
			break;
		}
	}



	// Flush all FP regs that are currently cached
	SR_FP_FlushAllRegs();

	
	if (pCode->dwSetSpPostUpdate != ~0)
		pCode->POP(ESI_CODE);


	SR_FlushMipsRegs(pCode);

	SR_RestoreIntelRegs(pCode);


	
	// Ok, so we've now flushed all the registers
	// If we're executing an internal branch, execute it now
	if (bEmitDelayedOp)
	{
		DPF(DEBUG_DYNREC, "Emitting code to update delay info...");
		// Need to clear g_nDelay here, or set to EXEC_DELAY if it is currently DO_DELAY

		//
		if (bHandleBranch && pCodeTarget != NULL)
		{
			LONG nJump;

			// If NO_DELAY, skip all this, and return
			pCode->MOV_REG_MEM(EAX_CODE, &g_nDelay);
			pCode->TEST(EAX_CODE, EAX_CODE);			// NO_DELAY == 0, 
			
			pCode->JE(5+5+2+2+2+
			   10+5+
			   10);			// ->branch_not_taken

			pCode->MOVI(ECX_CODE, pCode->dwNumOps);			// 5
			pCode->MOVI(EAX_CODE, (DWORD)SR_CheckStuffCallback);	// Copy p to eax - 5
			pCode->CALL_EAX();								// 2
			pCode->TEST(EAX_CODE, EAX_CODE);				// 2
			pCode->JNE(10+5);		//-> stuff_to_do				// 2

			// Set delay instruction to NO_DELAY - have we handled the op?
			SetVar32(g_nDelay, NO_DELAY);		// 10 bytes
			// Branch to the top of the code..
			nJump = pCodeTarget->pCodeBuffer - (pCode->pCodeBuffer + pCode->dwCurrentPos + 5);		// Plus 5 for the jump
			pCode->JMP_DIRECT((DWORD)nJump);		// 5 bytes

// stuff_to_do
			// If g_nDelay was DO_DELAY, we come here
			SetVar32(g_nDelay, EXEC_DELAY);		// 10 bytes
// branch_not_taken
			// No delay, because the jump was not taken
			// If g_nDelay == NO_DELAY, then we jump here
		}
		else
		{
			DPF(DEBUG_DYNREC, "Unhandled branch");

			pCode->MOV_REG_MEM(EAX_CODE, &g_nDelay);
			pCode->CMPI(EAX_CODE, NO_DELAY);			// Delay set?
			
			pCode->JE(10);
			// Just let the core handle the jump for now...change from DO_DELAY to EXEC_DELAY
			SetVar32(g_nDelay, EXEC_DELAY);		// 10 bytes
		}
	}


	pCode->dwEndPC = pCode->dwStartPC + ((pCode->dwNumOps-1)*4);

	EPILOGUE(pCode, pCode->dwEndPC);


	// Check if there was an error along the way (e.g. memory allocation failed)
	if (pCode->dwWarn != 0)
	{
		DBGConsole_Msg(0, "!!Optimisation Recompilation failed!");		
		//delete pCode;
	}
	else
	{

		SR_SetCodeBuffer(pCode);

		{
			//DWORD dwOp = pdwPCMemBase[0];
			//if (!OP_IS_A_HACK(dwOp))
			//	g_pdwOriginalMips[pCode->dwInstrNum] = dwOp;

			pdwPCMemBase[0] = make_op(OP_SRHACK_OPT) | pCode->dwEntry;
		}
	}

	DPF(DEBUG_DYNREC, "************************************");
	DPF(DEBUG_DYNREC, "");

	STOP_PROFILE( CProfiler::PROFILE_DYNAREC );
}

//*****************************************************************************
// Initialise the status of what we know about all the mips registers
//*****************************************************************************
void SR_ResetRegCacheInfo()
{
	DWORD i;

	for (i = 0; i < 32; i++)
	{
		SetMIPSCachedReg( i, INVALID_CODE );
		MarkMIPSAsValid( i, FALSE );
		MarkMIPSAsDirty( i, FALSE );

		MarkMIPSHiAsDirty( i, FALSE );
		MarkMIPSHiAsUnknown( i, TRUE );
		SetMIPSHiValue( i, 0 );		// Ignored
	}

	// Init intel regs
	for (i = 0; i < 8; i++)
	{
		g_IntelRegInfo[i] = ~0;
	}
}


//*****************************************************************************
//
//*****************************************************************************
void SR_Stat_Analyze(CDynarecCode * pCode)
{
	DWORD i;
	
	// Determine which base reg to cache
	DWORD dwMaxBaseUseCount = 0;
	DWORD dwMaxBaseUseIndex = ~0;
	// Determine which general reg to cache
	DWORD dwMaxUseCount[3] = { 0,0,0 };
	DWORD dwMaxUseIndex[3] = { ~0,~0,~0 };

	// Ignore reg0 - start at reg 1
	for (i = 1; i < 32; i++)
	{
		// Base reg suitability?
		if (pCode->dwRegBaseUse[i] > 1 &&
			pCode->dwRegUpdates[i] == 0)
		{
			DPF(DEBUG_DYNREC, "  Mmm - %s looks nice for base caching with %d uses", 
				RegNames[i], pCode->dwRegBaseUse[i]);

			if (pCode->dwRegBaseUse[i] > dwMaxBaseUseCount)
			{
				dwMaxBaseUseCount = pCode->dwRegBaseUse[i];
				dwMaxBaseUseIndex = i;
			}
		}

		// Read reg suitability?
		DWORD dwUses = pCode->dwRegReads[i] +
					   pCode->dwRegUpdates[i] + 
					   pCode->dwRegBaseUse[i];

		// TODO: When more load/store ops are optimised, add base usage to count.
		// TODO: Writes are more expensive with more generic ops, becase we have to
		//       re-load the register is its contents have changed in the call!
		if (dwUses > 1)			
		{
			DPF(DEBUG_DYNREC, "  Mmm - %s looks nice for general caching with %d reads, %d writes, %d base uses", 
				RegNames[i], pCode->dwRegReads[i], pCode->dwRegUpdates[i], pCode->dwRegBaseUse[i]);

			if (dwUses > dwMaxUseCount[0])
			{
				dwMaxUseCount[2] = dwMaxUseCount[1];
				dwMaxUseIndex[2] = dwMaxUseIndex[1];
				
				dwMaxUseCount[1] = dwMaxUseCount[0];
				dwMaxUseIndex[1] = dwMaxUseIndex[0];
				
				dwMaxUseCount[0] = dwUses;
				dwMaxUseIndex[0] = i;
			}
			else if (dwUses > dwMaxUseCount[1])
			{
				dwMaxUseCount[2] = dwMaxUseCount[1];
				dwMaxUseIndex[2] = dwMaxUseIndex[1];
				
				dwMaxUseCount[1] = dwUses;
				dwMaxUseIndex[1] = i;
			}
			else if (dwUses > dwMaxUseCount[2])
			{			
				dwMaxUseCount[2] = dwUses;
				dwMaxUseIndex[2] = i;
			}
		}	
	}


	
	if (dwMaxBaseUseIndex != ~0)
	{
		DPF(DEBUG_DYNREC, "  Best register for base pointer caching is %s", RegNames[dwMaxBaseUseIndex]);
	}
	// Only bother if the register is used as a base more than once
	if (pCode->dwRegBaseUse[REG_sp] > 1)
	{
		// Double the "preference" for base caching of sp, because it avoids
		// having to call ReadAddress
		if (dwMaxUseIndex[2] != ~0 && (pCode->dwRegBaseUse[REG_sp]*2) < dwMaxUseCount[2])
		{
			DPF(DEBUG_DYNREC, "  Caching %s rather than sp/base",
				RegNames[dwMaxUseIndex[2]], pCode->dwRegUpdates[REG_sp]);
		}
		else
		{
			// If the register changes several times, ignore
			if (pCode->dwRegUpdates[REG_sp] > 1)
			{
				DPF(DEBUG_DYNREC, "  Unable to cache SP - %d updates", pCode->dwRegUpdates[REG_sp]);
			}
			else
			{
				DPF(DEBUG_DYNREC, "  Will cache sp in ESI after update %d", pCode->dwRegUpdates[REG_sp]);

				pCode->dwSetSpPostUpdate = pCode->dwRegUpdates[REG_sp];
			}
		}
	}


	
	if (dwMaxUseIndex[0] != ~0)
	{
		DPF(DEBUG_DYNREC, "  Best register for general caching is %s", RegNames[dwMaxUseIndex[0]]);

		// Cache read reg in EDI:
		// Save edi for future use
		pCode->PUSH(EDI_CODE);

		SetMIPSCachedReg( dwMaxUseIndex[0], EDI_CODE );
		MarkMIPSAsDirty( dwMaxUseIndex[0], FALSE );
		MarkMIPSAsValid( dwMaxUseIndex[0], FALSE );

		g_IntelRegInfo[EDI_CODE] = dwMaxUseIndex[0];
	}

	// Do this here, as EBX < ESI < EDI 
	if (pCode->dwSetSpPostUpdate == ~0 && dwMaxUseIndex[2] != ~0)
	{
		DPF(DEBUG_DYNREC, "  3rd best register for general caching is %s", RegNames[dwMaxUseIndex[2]]);

		// Cache read reg in ESI:
		// Save esi for future use
		pCode->PUSH(ESI_CODE);

		SetMIPSCachedReg( dwMaxUseIndex[2], ESI_CODE );
		MarkMIPSAsDirty( dwMaxUseIndex[2], FALSE );
		MarkMIPSAsValid( dwMaxUseIndex[2], FALSE );
		g_IntelRegInfo[ESI_CODE] = dwMaxUseIndex[2];
	}

	if (dwMaxUseIndex[1] != ~0)
	{
		DPF(DEBUG_DYNREC, "  2nd best register for general caching is %s", RegNames[dwMaxUseIndex[1]]);

		// Cache read reg in EBX:
		// Save ebx for future use
		pCode->PUSH(EBX_CODE);
	
		SetMIPSCachedReg( dwMaxUseIndex[1], EBX_CODE );
		MarkMIPSAsDirty( dwMaxUseIndex[1], FALSE );
		MarkMIPSAsValid( dwMaxUseIndex[1], FALSE );		
		g_IntelRegInfo[EBX_CODE] = dwMaxUseIndex[1];
	}
}

//*****************************************************************************
//
//*****************************************************************************
void SR_FlushMipsRegs(CDynarecCode * pCode)
{
	DWORD i;

	// TODO: We'd need to flush hi regs here!
	for (i = 1; i < 32; i++)
	{
		if (IsMIPSHiDirty( i ) && !IsMIPSHiUnknown( i ))
		{
			// Writeback
			DPF(DEBUG_DYNREC, "Writing back info for %s/hi: 0x%08x", RegNames[i], GetMIPSHiValue( i ));

			pCode->MOVI_MEM( (((BYTE *)&g_qwGPR[0]) + (hihalf(i)*4)), GetMIPSHiValue( i ));

			MarkMIPSHiAsDirty( i, FALSE );
		}
	}

	LONG iReg;
	// Pop regs from lowest to highest...
	for (iReg = 0; iReg < 8; iReg++)
	{
		// If we have cached a register, we may need to flush it
		if (g_IntelRegInfo[iReg] != ~0)
		{
			DWORD dwMReg = g_IntelRegInfo[iReg];

			if (IsMIPSDirty( dwMReg ))
			{
				if (IsMIPSValid( dwMReg ))
				{
					DPF(DEBUG_DYNREC, "Cached reg %s is dirty: flushing", RegNames[dwMReg]);
					
					pCode->MOV_MEM_REG((((BYTE *)&g_qwGPR[0]) + (lohalf(dwMReg)*4)), (REGCODE)iReg );

					// We could update the modcount here?
				}
				MarkMIPSAsDirty( dwMReg, FALSE );
			}
		}

	}
}



//*****************************************************************************
//
//*****************************************************************************
void SR_RestoreIntelRegs(CDynarecCode * pCode)
{
	DWORD iReg;
	// Pop regs from lowest to highest...
	for (iReg = 0; iReg < 8; iReg++)
	{
		// If we have cached a register, we may need to flush it
		if (g_IntelRegInfo[iReg] != ~0)
		{
			DWORD dwMReg = g_IntelRegInfo[iReg];

			MarkMIPSAsValid( dwMReg, FALSE );

			// Always restore!
			pCode->POP((REGCODE)iReg);
		}
	}
}



//*****************************************************************************
//
//*****************************************************************************
static DWORD g_dwWriteCheck[32];

void SR_PreEmitCachedRegCheck(CDynarecCode *pCode)
{
	DWORD i;

	// Record how many updates we've had
	for (i = 0; i < 32; i++)
	{
		g_dwWriteCheck[i] = pCode->dwRegUpdates[i];
	}
}

//*****************************************************************************
//
//*****************************************************************************
void SR_PostEmitCachedRegCheck(CDynarecCode *pCode, BOOL bOptimised)
{
	DWORD i;
	// If the operation was NOT optimised, and our cached register was written to, 
	// then we have to reload the value:
	if (!bOptimised)
	{
		// Also, check hi regs! IsMIPSHiUnknown( i )
		for (i = 0; i < 32; i++)
		{
			if (g_dwWriteCheck[i] != pCode->dwRegUpdates[i])
			{
				DPF(DEBUG_DYNREC, "  Reg %s was updated", RegNames[i]);

				// See if the lo value is invalid
				if (GetMIPSCachedReg( i ) != INVALID_CODE)
				{
					MarkMIPSAsValid( i, FALSE );
				}

				// See if the hi value we have is invalid
				if (!IsMIPSHiUnknown( i ))
				{
					MarkMIPSHiAsUnknown( i, TRUE );
					MarkMIPSHiAsDirty( i, FALSE );
				}
			}
		}
	}
}

//*****************************************************************************
// Returns non-zero if we should continue
//*****************************************************************************
DWORD SR_CheckStuffCallback(DWORD dwNumCycles)
{
	// Increment count register
	if (g_pFirstCPUEvent->dwCount > dwNumCycles)
	{
		*(DWORD *)&g_qwCPR[0][C0_COUNT] = *(DWORD *)&g_qwCPR[0][C0_COUNT]+dwNumCycles;
		g_pFirstCPUEvent->dwCount-= dwNumCycles;

		return g_dwCPUStuffToDo;		// Don't return 0 - return flags if they're set
	}
	else
	{
		dwNumCycles = g_pFirstCPUEvent->dwCount - 1;
		
		*(DWORD *)&g_qwCPR[0][C0_COUNT] = *(DWORD *)&g_qwCPR[0][C0_COUNT]+ dwNumCycles;
		g_pFirstCPUEvent->dwCount = 1;

		return 1;
	}

}




//*****************************************************************************
//
//*****************************************************************************
static void SR_Emit_Generic_R4300(CDynarecCode *pCode, DWORD dwOp, CPU_Instruction pF)
{

	// Flush all fp registers before a generic call
	if (pCode->dwOptimiseLevel >= 1)
	{
		SR_FlushMipsRegs(pCode);		// Valid flag updated after op "executed"
		SR_FP_FlushAllRegs();
	}

	// Set up dwOp
	pCode->MOVI(ECX_CODE, dwOp);

	// Lkb: I think it's better to only inline functions when dwOptimiseLevel
	// is greater than zero. This avoid inlining for functions that are only
	// ever hit once, saving buffer space and "one-off" overheads.
	if (pCode->dwOptimiseLevel < 1)
	{
		// Call function
		pCode->MOVI(EAX_CODE, (DWORD)pF);	// Copy p to eax
		pCode->CALL_EAX();
	}
	else
	{
		// BEGIN MODIFIED BY Lkb - 9/jun/2001 - GCC support
		if(!SR_Emit_Inline_Function(pCode, (void*)pF))
		// END MODIFIED BY Lkb - 9/jun/2001 - GCC support
		{
			// Call function
			pCode->MOVI(EAX_CODE, (DWORD)pF);	// Copy p to eax
			pCode->CALL_EAX();
		}
	}

}




//*****************************************************************************
//
//*****************************************************************************
BOOL SR_Emit_Unk(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	return FALSE;
}

BOOL SR_Emit_DBG_BreakPoint(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	DWORD dwBreakPoint = dwOp & 0x03FFFFFF;

	dwOp = g_BreakPoints[dwBreakPoint].dwOriginalOp;
	// Use the original opcode
	return SR_EmitInstruction[R4300_OP(dwOp)](pCode, dwOp, pdwFlags);

}


BOOL SR_Emit_SRHack_UnOpt(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	DWORD dwEntry = dwOp & 0x03FFFFFF;
	if (g_pDynarecCodeTable[dwEntry] == NULL)
	{
		DBGConsole_Msg(0, "No compiled code here!.");
		return FALSE;
	}

	dwOp = g_pDynarecCodeTable[dwEntry]->dwOriginalOp;

	// Recurse, using the original opcode
	return SR_EmitInstruction[R4300_OP(dwOp)](pCode, dwOp, pdwFlags);
}

BOOL SR_Emit_SRHack_Opt(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	DWORD dwEntry = dwOp & 0x03FFFFFF;
	if (g_pDynarecCodeTable[dwEntry] == NULL)
	{
		DBGConsole_Msg(0, "No compiled code here!.");
		return FALSE;
	}

	dwOp = g_pDynarecCodeTable[dwEntry]->dwOriginalOp;

	// Recurse, using the original opcode
	return SR_EmitInstruction[R4300_OP(dwOp)](pCode, dwOp, pdwFlags);
}


BOOL SR_Emit_SRHack_NoOpt(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	DWORD dwEntry = dwOp & 0x03FFFFFF;

	dwOp = g_pDynarecCodeTable[dwEntry]->dwOriginalOp;
	// Recurse, using the original opcode
	return SR_EmitInstruction[R4300_OP(dwOp)](pCode, dwOp, pdwFlags);
}

BOOL SR_Emit_Special(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	return SR_EmitSpecialInstruction[R4300_FNC(dwOp)](pCode, dwOp, pdwFlags);
}

BOOL SR_Emit_RegImm(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	return SR_EmitRegImmInstruction[R4300_RT(dwOp)](pCode, dwOp, pdwFlags);
}

BOOL SR_Emit_CoPro0(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	return SR_EmitCop0Instruction[cop0fmt(dwOp)](pCode, dwOp, pdwFlags);
}

BOOL SR_Emit_CoPro1(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{ 
	return SR_EmitCop1Instruction[cop0fmt(dwOp)](pCode, dwOp, pdwFlags);
}

// this is really, really, really ugly - however until the dynarec support routines get better, this cannot be easily split
void SR_Emit_ReadWriteAddress(CDynarecCode* pCode, DWORD base, DWORD offset, DWORD xor, bool write, DWORD rt, bool baseimm, bool base_already_loaded, REGCODE destreg)
{
	if(!base_already_loaded)
	{
		if(baseimm)
		{
			pCode->MOVI(destreg, base);
		}
		else if(base > 31) // memory location
		{
			pCode->MOV_REG_MEM(destreg, (void*)base);
		}
		else
		{
			LoadMIPSLo(pCode, destreg, base);
		}
	}

	// Add offset
	if (offset != 0)
	{
		pCode->ADDI(destreg, offset);
	}
	
	pCode->MOV(EDX_CODE, destreg);

	// it's here to improve parallelism
	// but maybe it would be better to put it after the JS
	if (xor)
	{
		pCode->XORI(destreg, xor);
	}

	pCode->SHRI (EDX_CODE, 18);

	// ADD EAX, [g_ReadAddressPointerLookupTable + EDX*4]
	pCode->EmitBYTE (0x03);
	pCode->EmitBYTE ((destreg << 3) | 4);
	pCode->EmitBYTE (0x95);
	pCode->EmitDWORD (write ? (DWORD)g_WriteAddressPointerLookupTable : (DWORD)g_ReadAddressPointerLookupTable);

	// JS special
	pCode->EmitBYTE (0x0f);
	pCode->EmitBYTE (0x88);

	g_dwGlobalSecondBufferPtr = ((g_dwGlobalSecondBufferPtr - 1) & 0xfffffff0) + 0x10; // align to 16-byte boundary

	pCode->EmitDWORD ((UINT_PTR)g_pGlobalSecondBuffer + g_dwGlobalSecondBufferPtr - (UINT_PTR)pCode->pCodeBuffer - pCode->dwCurrentPos - 4);

	BOOL was_rt_valid = IsMIPSValid( rt );

	if(rt != ~0)
	{
		LoadMIPSLo(pCode, EDX_CODE, rt);

		// MOV [EAX], EDX
		pCode->EmitBYTE (0x89);
		pCode->EmitBYTE (0x10 | destreg);
	}

	// FIXME: this is truly horrible

	// MOV ECX, address
	if(baseimm)
	{
		g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0xb8 | ECX_CODE;
		*(DWORD*)(g_pGlobalSecondBuffer + g_dwGlobalSecondBufferPtr) = (DWORD)base;
		g_dwGlobalSecondBufferPtr += 4;
	}
	else if(base > 31) // memory location
	{
		g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0x8b;
		g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = (ECX_CODE << 3) | 0x5;
		*(DWORD*)(g_pGlobalSecondBuffer + g_dwGlobalSecondBufferPtr) = (DWORD)base;
		g_dwGlobalSecondBufferPtr += 4;
	}
	else
	{
		// similar to LoadMIPSLo
		REGCODE iCachedReg = GetMIPSCachedReg( base );

		if (iCachedReg != INVALID_CODE)
		{	
			if (ECX_CODE != iCachedReg)
			{
				// MOV ECX, cachedreg
				g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0x8b;
				g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0xc0 | (ECX_CODE<<3) | (WORD)iCachedReg;
			}
		}																																		
		else if (base == REG_r0)		
		{
			// XOR ECX, ECX
			g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0x31;
			g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0xc9;
		}
		else																
		{
			g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0x8b;
			g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = (ECX_CODE << 3) | 0x5;
			*(DWORD*)(g_pGlobalSecondBuffer + g_dwGlobalSecondBufferPtr) = (DWORD)((BYTE*)&g_qwGPR[0] + (base * 8));
			g_dwGlobalSecondBufferPtr += 4;
		}
	}

	if (rt != ~0)
	{
		// MOV EAX, EDX
		g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0x89;
		g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0xd0;

		// MOV EDX, rt
		// similar to LoadMIPSLo
		REGCODE iCachedReg = GetMIPSCachedReg( rt );

		if (iCachedReg != INVALID_CODE)
		{
			if (!was_rt_valid)	
			{
				g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0x8b;
				g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = ((WORD)iCachedReg << 3) | 0x5;
				*(DWORD*)(g_pGlobalSecondBuffer + g_dwGlobalSecondBufferPtr) = (DWORD)((BYTE*)&g_qwGPR[0] + (rt * 8));
				g_dwGlobalSecondBufferPtr += 4;

				MarkMIPSAsValid( rt, TRUE );
			}

			if (EDX_CODE != iCachedReg)
			{
				g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0x8b;
				g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0xc0 | (EDX_CODE<<3) | (WORD)iCachedReg;
			}
		}																																		
		else if (rt == REG_r0)		
		{
			// XOR EDX, EDX
			g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0x31;
			g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0xd2;
		}
		else																
		{
			g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0x8b;
			g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = (EDX_CODE << 3) | 0x5;
			*(DWORD*)(g_pGlobalSecondBuffer + g_dwGlobalSecondBufferPtr) = (DWORD)((BYTE*)&g_qwGPR[0] + (rt * 8));
			g_dwGlobalSecondBufferPtr += 4;
		}
	}

	if(offset)
	{
		// ADD ECX, offset
		g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0x81;
		g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0xc0 | ECX_CODE;
		*(DWORD*)(g_pGlobalSecondBuffer + g_dwGlobalSecondBufferPtr) = offset;
		g_dwGlobalSecondBufferPtr += 4;
	}

	if(xor)
	{
		// XOR ECX, xor
		g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0x83;
		g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0xf0 | ECX_CODE;
		g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = xor;
	}

	// CALL DWORD PTR [g_ReadAddressLookupTable + EAX/EDX*4]
	g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0xff;
	g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0x14;
	g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = (rt != ~0) ? 0x85 : 0x95;

	*(DWORD*)(g_pGlobalSecondBuffer + g_dwGlobalSecondBufferPtr) = write ? ((rt != ~0) ? (DWORD)g_WriteAddressValueLookupTable : (DWORD)g_WriteAddressLookupTable) : (DWORD)g_ReadAddressLookupTable;
	g_dwGlobalSecondBufferPtr += 4;

	if(destreg != EAX_CODE)
	{
		g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0x8b;
		g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0xc0 | (destreg << 3) | (WORD)EAX_CODE;
	}

	// JMP after_pointer_null
	g_pGlobalSecondBuffer[g_dwGlobalSecondBufferPtr++] = 0xe9;
	*(DWORD*)(g_pGlobalSecondBuffer + g_dwGlobalSecondBufferPtr) = (UINT_PTR)pCode->pCodeBuffer + pCode->dwCurrentPos - (UINT_PTR)(g_pGlobalSecondBuffer + g_dwGlobalSecondBufferPtr + 4);
	g_dwGlobalSecondBufferPtr += 4;
}

#include "dr_branch.inl"
#include "dr_cop1.inl"
#include "dr_cop1_s.inl"
#include "dr_cop1_d.inl"
#include "dr_load.inl"
#include "dr_math.inl"
#include "dr_math_imm.inl"
#include "dr_store.inl"


BOOL SR_Emit_LL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_LLD(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_LDC2(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_SC(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_SCD(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_SDC2(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }




BOOL SR_Emit_BEQL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	// We can't execute this op yet, as we need to ensure that dwPC is set correctly
	// if branch is not taken
	return FALSE;

}
BOOL SR_Emit_BNEL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	// We can't execute this op yet, as we need to ensure that dwPC is set correctly
	// if branch is not taken
	return FALSE;
}

BOOL SR_Emit_BLEZL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	// We can't execute this op yet, as we need to ensure that dwPC is set correctly
	// if branch is not taken
	return FALSE;
}

BOOL SR_Emit_BGTZL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	// We can't execute this op yet, as we need to ensure that dwPC is set correctly
	// if branch is not taken
	return FALSE;
}



BOOL SR_Emit_DADDI(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRS = R4300_RS(dwOp);

	pCode->Stat_D_S(dwRT, dwRS);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_DADDI);
	return TRUE;
}

BOOL SR_Emit_DADDIU(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRS = R4300_RS(dwOp);

	pCode->Stat_D_S(dwRT, dwRS);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_DADDIU);
	return TRUE;
}

BOOL SR_Emit_LDL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwBase = R4300_RS(dwOp);

	pCode->Stat_Load_B_D(dwBase, dwRT);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_LDL);
	return TRUE;
}


BOOL SR_Emit_LDR(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwBase = R4300_RS(dwOp);

	pCode->Stat_Load_B_D(dwBase, dwRT);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_LDR);
	return TRUE;
}



BOOL SR_Emit_LWL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwBase = R4300_RS(dwOp);

	pCode->Stat_Load_B_D(dwBase, dwRT);


	SR_Emit_Generic_R4300(pCode, dwOp, R4300_LWL);
	return TRUE;
}



BOOL SR_Emit_LWR(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwBase = R4300_RS(dwOp);

	pCode->Stat_Load_B_D(dwBase, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_LWR);
	return TRUE;
}

BOOL SR_Emit_LWU(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwBase = R4300_RS(dwOp);

	pCode->Stat_Load_B_D(dwBase, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_LWU);
	return TRUE;
}

BOOL SR_Emit_LD(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwBase = R4300_RS(dwOp);

	pCode->Stat_Load_B_D(dwBase, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_LD);
	return TRUE;
}



BOOL SR_Emit_SWL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwBase = R4300_RS(dwOp);

	pCode->Stat_Save_B_S(dwBase, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_SWL);
	return TRUE;
}


BOOL SR_Emit_SDL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwBase = R4300_RS(dwOp);

	pCode->Stat_Save_B_S(dwBase, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_SDL);
	return TRUE;
}

BOOL SR_Emit_SDR(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwBase = R4300_RS(dwOp);

	pCode->Stat_Save_B_S(dwBase, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_SDR);
	return TRUE;
}

BOOL SR_Emit_SWR(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwBase = R4300_RS(dwOp);

	pCode->Stat_Save_B_S(dwBase, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_SWR);
	return TRUE;
}

BOOL SR_Emit_CACHE(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	// Ignore	
	if (pCode->dwOptimiseLevel >= 1)
	{
		pCode->dwNumOptimised++;
	}

	return TRUE;
}





BOOL SR_Emit_SD(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwBase = R4300_RS(dwOp);

	pCode->Stat_Save_B_S(dwBase, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_SD);
	return TRUE;
}


////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////


BOOL SR_Emit_Special_Unk(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }


BOOL SR_Emit_Special_SLLV(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S_S(dwRD, dwRS, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_SLLV);
	return TRUE;
}

BOOL SR_Emit_Special_SRLV(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S_S(dwRD, dwRS, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_SRLV);
	return TRUE;
}

BOOL SR_Emit_Special_SRAV(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S_S(dwRD, dwRS, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_SRAV);
	return TRUE;
}

BOOL SR_Emit_Special_DSLLV(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S_S(dwRD, dwRS, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DSLLV);
	return TRUE;
}

BOOL SR_Emit_Special_DSRLV(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S_S(dwRD, dwRS, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DSRLV);
	return TRUE;
}

BOOL SR_Emit_Special_DSRAV(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S_S(dwRD, dwRS, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DSRAV);
	return TRUE;
}


BOOL SR_Emit_Special_SYSCALL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_Special_BREAK(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_Special_SYNC(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }



// Move to MultHi/Lo is not very common
BOOL SR_Emit_Special_MTHI(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);

	pCode->Stat_S(dwRS);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_MTHI);
	return TRUE;
}


BOOL SR_Emit_Special_MTLO(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);

	pCode->Stat_S(dwRS);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_MTLO);
	return TRUE;
}


BOOL SR_Emit_Special_MULT(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);

	pCode->Stat_S_S(dwRS, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_MULT);
	return TRUE;
}



BOOL SR_Emit_Special_DIV(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);

	pCode->Stat_S_S(dwRS, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DIV);
	return TRUE;
}


BOOL SR_Emit_Special_DIVU(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);

	pCode->Stat_S_S(dwRS, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DIVU);
	return TRUE;
}

BOOL SR_Emit_Special_DMULT(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);

	pCode->Stat_S_S(dwRS, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DMULT);
	return TRUE;
}

BOOL SR_Emit_Special_DMULTU(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);

	pCode->Stat_S_S(dwRS, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DMULTU);
	return TRUE;
}

BOOL SR_Emit_Special_DDIV(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);

	pCode->Stat_S_S(dwRS, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DDIV);
	return TRUE;
}

BOOL SR_Emit_Special_DDIVU(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);

	pCode->Stat_S_S(dwRS, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DDIVU);
	return TRUE;
}

BOOL SR_Emit_Special_TGE(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_Special_TGEU(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_Special_TLT(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_Special_TLTU(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_Special_TEQ(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_Special_TNE(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }






BOOL SR_Emit_Special_DSLL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	//const u32 dwSA = R4300_SA(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S(dwRD, dwRT);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DSLL);
	return TRUE;
}

BOOL SR_Emit_Special_DSRL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	//const u32 dwSA = R4300_SA(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S(dwRD, dwRT);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DSRL);
	return TRUE;
}

BOOL SR_Emit_Special_DSRA(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	//const u32 dwSA = R4300_SA(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S(dwRD, dwRT);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DSRA);
	return TRUE;
}

BOOL SR_Emit_Special_DSLL32(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	//const u32 dwSA = R4300_SA(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S(dwRD, dwRT);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DSLL32);
	return TRUE;
}

BOOL SR_Emit_Special_DSRL32(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	//const u32 dwSA = R4300_SA(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S(dwRD, dwRT);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DSRL32);
	return TRUE;
}

BOOL SR_Emit_Special_DSRA32(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	//const u32 dwSA = R4300_SA(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S(dwRD, dwRT);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DSRA32);
	return TRUE;
}






BOOL SR_Emit_Special_DADD(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S_S(dwRD, dwRS, dwRT);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DADD);
	return TRUE;
}

BOOL SR_Emit_Special_DADDU(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S_S(dwRD, dwRS, dwRT);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DADDU);
	return TRUE;
}

BOOL SR_Emit_Special_DSUB(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S_S(dwRD, dwRS, dwRT);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DSUB);
	return TRUE;
}

BOOL SR_Emit_Special_DSUBU(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRS = R4300_RS(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S_S(dwRD, dwRS, dwRT);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_DSUBU);
	return TRUE;
}






////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////

BOOL SR_Emit_RegImm_Unk(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }

BOOL SR_Emit_RegImm_BLTZL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_RegImm_BGEZL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }

BOOL SR_Emit_RegImm_TGEI(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_RegImm_TGEIU(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_RegImm_TLTI(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_RegImm_TLTIU(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_RegImm_TEQI(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_RegImm_TNEI(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }

BOOL SR_Emit_RegImm_BLTZAL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_RegImm_BGEZAL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_RegImm_BLTZALL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_RegImm_BGEZALL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }

////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////

BOOL SR_Emit_Cop0_Unk(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }

BOOL SR_Emit_Cop0_MFC0(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	// Don't emit is this is refering to COUNT, as it's not maintained
	// during the execution of recompiled code
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwFS = R4300_FS(dwOp);

	//g_qwGPR[dwRT] = (s64)(s32)g_qwCPR[0][dwFS];

	switch (dwFS)
	{
		case C0_COUNT:
			// Could emit instruction to set count here?
			return FALSE;
	}
	
	pCode->Stat_D(dwRT);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Cop0_MFC0);
	return TRUE;
}

BOOL SR_Emit_Cop0_MTC0(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){

	// Don't emit is this is refering to COUNT, as it's not maintained
	// during the execution of recompiled code
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwFS = R4300_FS(dwOp);

	//g_qwCPR[0][dwFS] = g_qwGPR[dwRT];

	switch (dwFS)
	{
		case C0_COMPARE:
		case C0_COUNT:
			// Could emit instruction to set count here?
			return FALSE;
	}
	
	pCode->Stat_S(dwRT);

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Cop0_MTC0);
	return TRUE;
}

BOOL SR_Emit_Cop0_TLB(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////


BOOL SR_Emit_Cop1_Unk(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }

BOOL SR_Emit_Cop1_BCInstr(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	return SR_EmitCop1BC1Instruction[R4300_BC(dwOp)](pCode, dwOp, pdwFlags);
}

BOOL SR_Emit_Cop1_SInstr(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	return SR_EmitCop1SInstruction[R4300_FNC(dwOp)](pCode, dwOp, pdwFlags);
}

BOOL SR_Emit_Cop1_DInstr(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	return SR_EmitCop1DInstruction[R4300_FNC(dwOp)](pCode, dwOp, pdwFlags);
}

BOOL SR_Emit_Cop1_WInstr(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	switch (R4300_FNC(dwOp))
	{
		case OP_CVT_S:
		{
			return SR_Emit_Cop1_W_CVT_S(pCode, dwOp, pdwFlags);
		}

		case OP_CVT_D:
		{
			return SR_Emit_Cop1_W_CVT_D(pCode, dwOp, pdwFlags);
		}
	}
	return FALSE;
}






BOOL SR_Emit_Cop1_LInstr(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }


BOOL SR_BC1_BC1F(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	
	*pdwFlags |= SR_FLAGS_BRANCHES;

	// Set PC = StartPC + NumOps * 4
	SetVar32(g_dwPC, pCode->dwStartPC + (pCode->dwNumOps*4));

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_BC1_BC1F);
	return TRUE;
}

BOOL SR_BC1_BC1T(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	
	*pdwFlags |= SR_FLAGS_BRANCHES;

	// Set PC = StartPC + NumOps * 4
	SetVar32(g_dwPC, pCode->dwStartPC + (pCode->dwNumOps*4));

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_BC1_BC1T);
	return TRUE;
}

BOOL SR_BC1_BC1FL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	// Can't yet execute Likely Instructions
	return FALSE;
	*pdwFlags |= SR_FLAGS_BRANCHES;

	// Set PC = StartPC + NumOps * 4
	SetVar32(g_dwPC, pCode->dwStartPC + (pCode->dwNumOps*4));

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_BC1_BC1FL);
	return TRUE;
}

BOOL SR_BC1_BC1TL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	// Can't yet execute Likely Instructions
	return FALSE;
	*pdwFlags |= SR_FLAGS_BRANCHES;

	// Set PC = StartPC + NumOps * 4
	SetVar32(g_dwPC, pCode->dwStartPC + (pCode->dwNumOps*4));

	SR_Emit_Generic_R4300(pCode, dwOp, R4300_BC1_BC1TL);
	return TRUE;
}


////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////


BOOL SR_Emit_Cop1_S_Unk(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }
BOOL SR_Emit_Cop1_D_Unk(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags){ return FALSE; }




BOOL SR_Emit_Cop1_DMFC1(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwFS = R4300_FS(dwOp);

	pCode->Stat_D(dwRT);
	pCode->Stat_Double_S(dwFS);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Cop1_DMFC1);

	return TRUE;
}


BOOL SR_Emit_Cop1_DMTC1(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwFS = R4300_FS(dwOp);

	pCode->Stat_S(dwRT);
	pCode->Stat_Double_D(dwFS);
	
	SR_Emit_Generic_R4300(pCode, dwOp, R4300_Cop1_DMTC1);
	return TRUE;
}
