#define SR_MATH_IMM_OPTIMISE_FLAG		pCode->dwOptimiseLevel < 1
//#define SR_MATH_IMM_OPTIMISE_FLAG		1

#define TEST_DISABLE_SR_MATH_IMM		//return FALSE;


BOOL SR_Emit_ADDI(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
TEST_DISABLE_SR_MATH_IMM
	return SR_Emit_ADDIU(pCode, dwOp, pdwFlags);
	//return TRUE;
}

BOOL SR_Emit_ADDIU(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
TEST_DISABLE_SR_MATH_IMM
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRS = R4300_RS(dwOp);

	pCode->Stat_D_S(dwRT, dwRS);
			
	if (SR_MATH_IMM_OPTIMISE_FLAG)
	{
		SR_Emit_Generic_R4300(pCode, dwOp, R4300_ADDIU);
	}
	else
	{
		s32 nData = (s32)(s16)R4300_IMM(dwOp);

		//g_qwGPR[dwRT] = (s64)g_qwGPR[dwRS] + (s64)(s32)wData;

		// We can do the following optimisations:
		//  If rs is reg0, we just copy the immediate data to regRT
		//  If wData is 0, we can copy regRS to regRT
		pCode->dwNumOptimised++;

		if (dwRT == REG_r0) return TRUE;

		if (dwRS == REG_r0)
		{
			// The source register is zero, so we only have to consider the immediate value
			SetMIPSLo(pCode, dwRT, nData);
			if (nData >= 0 )
			{
				SetMIPSHi(pCode, dwRT, 0);		// Set top bits to zero
			}
			else
			{
				SetMIPSHi(pCode, dwRT, 0xFFFFFFFF);	// Set top bits to -1
			}
		}
		else
		{
			// Try to optimise low-word addition
			if (dwRS == dwRT)
			{
				// Optimise for in-place addition
				REGCODE iCachedReg = GetMIPSCachedReg( dwRT );

				if (iCachedReg != INVALID_CODE)
				{
					DPF(DEBUG_DYNREC, "  ++ ADDI1: Adding inplace");
					// Ensure reg is loaded...
					EnsureCachedValidLo(pCode, dwRT);

					// Add inplace
					pCode->ADDI(iCachedReg, nData);
					MarkMIPSAsDirty( dwRT, TRUE );
				}
				else
				{
					LoadMIPSLo(pCode, EAX_CODE, dwRS);
					pCode->ADDI(EAX_CODE, nData);
					StoreMIPSLo(pCode, dwRT, EAX_CODE);
				}

			}
			else
			{
				// Optimise for in-place addition
				REGCODE iCachedReg = GetMIPSCachedReg( dwRT );
				
				if (iCachedReg != INVALID_CODE)
				{
					DPF(DEBUG_DYNREC, "  ++ ADDI2: Adding inplace");
					
					// Ignore validity:
					LoadMIPSLo(pCode, iCachedReg, dwRS);
					pCode->ADDI(iCachedReg, nData);
					MarkMIPSAsDirty( dwRT, TRUE );
					MarkMIPSAsValid( dwRT, TRUE );
				}
				else
				{
					LoadMIPSLo(pCode, EAX_CODE, dwRS);
					pCode->ADDI(EAX_CODE, nData);
					StoreMIPSLo(pCode, dwRT, EAX_CODE);
				}
			}

			// Finally perform high-word addition
			// WARN - the ADCI will only work if the flags have not been modified by a store above!
			LoadMIPSHi(pCode, EDX_CODE, dwRS);
			if (nData >= 0 )
				pCode->ADCI(EDX_CODE, 0);
			else
				pCode->ADCI(EDX_CODE, -1);		// 0xFF

			StoreMIPSHi(pCode, dwRT, EDX_CODE);
		}
	}

	return TRUE;
}

BOOL SR_Emit_ANDI(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
TEST_DISABLE_SR_MATH_IMM

	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRS = R4300_RS(dwOp);

	pCode->Stat_D_S(dwRT, dwRS);

	if (SR_MATH_IMM_OPTIMISE_FLAG)
	{
		SR_Emit_Generic_R4300(pCode, dwOp, R4300_ANDI);
	}
	else
	{
		u32 dwData = (u32)(u16)R4300_IMM(dwOp);

		pCode->dwNumOptimised++;

		if (dwRT == REG_r0) return TRUE;			// Skip attempts to write to r0

		//g_qwGPR[dwRT] = g_qwGPR[dwRS] & (u64)wData;
		// Note that top half is always set to 0...

		if (dwRT == dwRS)
		{
			// Optimise for ANDI inplace
			REGCODE iDstReg = GetMIPSCachedReg( dwRT );

			if (iDstReg != INVALID_CODE)
			{
				// Load up reg if it's not already valid:
				EnsureCachedValidLo(pCode, dwRT);
				
				// And onto cached value
				DPF(DEBUG_DYNREC, "  ++ ANDI1: Anding inplace");
				pCode->ANDI(iDstReg, dwData);

				MarkMIPSAsDirty( dwRT, TRUE );
			}
			else
			{
				// If the reg is cached, we could still avoid a reg->reg copy here!?
				// Dest is uncached - we can't optimise any more
				LoadMIPSLo(pCode, EAX_CODE, dwRS);		// Reg->reg copy if cached
				pCode->ANDI(EAX_CODE, dwData);
				StoreMIPSLo(pCode, dwRT, EAX_CODE);
			}
		}
		else
		{
			// Source/Dest are different. See if we can optimise for destination being cached
			REGCODE iDstReg = GetMIPSCachedReg( dwRT );
			
			if (iDstReg != INVALID_CODE) // Ignore validity - we're overwriting
			{
				// If dwRS is cached, this will be a reg->reg copy
				DPF(DEBUG_DYNREC, "  ++ ANDI2: Inplace, ignoring validity");
				LoadMIPSLo(pCode, iDstReg, dwRS);
				pCode->ANDI(iDstReg, dwData);
				MarkMIPSAsDirty( dwRT, TRUE );
				MarkMIPSAsValid( dwRT, TRUE );
			}
			else
			{
				LoadMIPSLo(pCode, EAX_CODE, dwRS);
				pCode->ANDI(EAX_CODE, dwData);
				StoreMIPSLo(pCode, dwRT, EAX_CODE);
			}
		}
		
		
		SetMIPSHi(pCode, dwRT, 0);

	}
	
	return TRUE;
}

BOOL SR_Emit_ORI(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
TEST_DISABLE_SR_MATH_IMM

	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRS = R4300_RS(dwOp);

	pCode->Stat_D_S(dwRT, dwRS);

	if (SR_MATH_IMM_OPTIMISE_FLAG)
	{
		SR_Emit_Generic_R4300(pCode, dwOp, R4300_ORI);
	}
	else
	{
		u32 dwData = (u32)(u16)R4300_IMM(dwOp);

		pCode->dwNumOptimised++;

		if (dwRT == REG_r0) return TRUE;			// Skip attempts to write to r0

		//g_qwGPR[dwRT] = g_qwGPR[dwRS] | (u64)wData;


		// Note that there is also a case when dwRT == dwRS
		if (dwRS == REG_r0)					// Setting to immediate data
		{
			// g_qwGRP[dwRT] = (u64)dwData
			DPF(DEBUG_DYNREC, "  ++ ORI0: Setting value");
			SetMIPSLo(pCode, dwRT, dwData);
			SetMIPSHi(pCode, dwRT, 0);
		}
		else if (dwRT == dwRS)
		{
			// Optimise for inplace oring
			REGCODE iDstReg = GetMIPSCachedReg( dwRT );

			if (iDstReg != INVALID_CODE)
			{
				// Load up reg if it's not already valid:
				EnsureCachedValidLo(pCode, dwRT);
				
				// Or onto cached value
				DPF(DEBUG_DYNREC, "  ++ ORI1: Oring inplace");
				pCode->ORI(iDstReg, dwData);

				MarkMIPSAsDirty( dwRT, TRUE );

				// High bits are the same!
			}
			else
			{
				// If the reg is cached, we could still avoid a reg->reg copy here!?
				// Dest is uncached - we can't optimise any more
				LoadMIPSLo(pCode, EAX_CODE, dwRS);		// Reg->reg copy if cached
				pCode->ORI(EAX_CODE, dwData);
				StoreMIPSLo(pCode, dwRT, EAX_CODE);

				// Hi bits are the same!
			}

		}
		else								// "Normal" operation source/dest different
		{
			// Source/Dest are different. See if we can optimise for destination being cached
			REGCODE iDstReg = GetMIPSCachedReg( dwRT );
			
			if (iDstReg != INVALID_CODE) // Ignore validity - we're overwriting
			{
				// If dwRS is cached, this will be a reg->reg copy
				DPF(DEBUG_DYNREC, "  ++ ORI2: Inplace, ignoring validity");
				LoadMIPSLo(pCode, iDstReg, dwRS);
				pCode->ORI(iDstReg, dwData);
				MarkMIPSAsDirty( dwRT, TRUE );
				MarkMIPSAsValid( dwRT, TRUE );
			}
			else
			{
				LoadMIPSLo(pCode, EAX_CODE, dwRS);
				pCode->ORI(EAX_CODE, dwData);		// Code ORI to ignore oring in 0?
				StoreMIPSLo(pCode, dwRT, EAX_CODE);
			}

			// Copy top half, as regs are different			
			LoadMIPSHi(pCode, EAX_CODE, dwRS);
			StoreMIPSHi(pCode, dwRT, EAX_CODE);
		}
	}

	return TRUE;
}

BOOL SR_Emit_XORI(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
TEST_DISABLE_SR_MATH_IMM
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRS = R4300_RS(dwOp);

	pCode->Stat_D_S(dwRT, dwRS);

	if (SR_MATH_IMM_OPTIMISE_FLAG)
	{
		SR_Emit_Generic_R4300(pCode, dwOp, R4300_XORI);
	}
	else
	{
		u32 dwData = (u32)(u16)R4300_IMM(dwOp);
		//g_qwGPR[dwRT] = g_qwGPR[dwRS] ^ (u64)wData;

		pCode->dwNumOptimised++;

		if (dwRT == 0) return TRUE;
		
		LoadMIPSLo(pCode, EAX_CODE, dwRS);
		pCode->XORI(EAX_CODE, dwData);

		// Copy high bits to dwRT (i.e. xoring with 0)
		if (dwRS != dwRT)	// Don't bother if we're doing e.g. XORI s1,s1,0xFFFF
		{
			LoadMIPSHi(pCode, EDX_CODE, dwRS);
			StoreMIPSHi(pCode, dwRT, EDX_CODE);
		}

		StoreMIPSLo(pCode, dwRT, EAX_CODE);

	}
	return TRUE;
}


// TODO: If a lui 0x80xx or 0xa0xx is performed,
// flag the register as "ram pointer". This will
// let us optimise other regs but SP in Store 
// ops
BOOL SR_Emit_LUI(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
TEST_DISABLE_SR_MATH_IMM

	const u32 dwRT = R4300_RT(dwOp);

	pCode->Stat_D(dwRT);

	if (SR_MATH_IMM_OPTIMISE_FLAG)
	{
		SR_Emit_Generic_R4300(pCode, dwOp, R4300_LUI);
	}
	else
	{
		pCode->dwNumOptimised++;
		
		s32 nData = ((s32)(s16)R4300_IMM(dwOp))<<16;

		//g_qwGPR[dwRT] = (s64)(s32)(wData<<16);

		if (dwRT == 0) return TRUE;		// Don't set reg0

		// Check for next op being ADDIU or ADDI (common way of setting entire register
		//if (R4300_OP(dwNextOp) == ADDIU || R4300_OP(dwNextOp) == ADDI) {

		SetMIPSLo(pCode, dwRT, nData);
		if (nData >= 0)
		{
			SetMIPSHi(pCode, dwRT, 0);			// Clear tops bits
		}
		else
		{
			SetMIPSHi(pCode, dwRT, 0xFFFFFFFF);	// Set top bits
		}
	}

	return TRUE;
}



BOOL SR_Emit_SLTI(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
TEST_DISABLE_SR_MATH_IMM

	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRS = R4300_RS(dwOp);

	pCode->Stat_D_S(dwRT, dwRS);

	if (SR_MATH_IMM_OPTIMISE_FLAG)
	{
		SR_Emit_Generic_R4300(pCode, dwOp, R4300_SLTI);
	}
	else
	{
		s32 nData = (s32)(s16)R4300_IMM(dwOp);

		// Cast to ints to ensure sign is taken into account
		//g_qwGPR[dwRT] = ((s64)g_qwGPR[dwRS] < (s64)(s32)wData ? 1: 0);

		pCode->dwNumOptimised++;

		if (dwRT == 0) return TRUE;		// Don't set r0

		// TODO optimise for when cmp reg is cached/valid (compare direct)

		if (nData >= 0)
		{
			// nData is positive
			LoadMIPSHi(pCode, EDX_CODE, dwRS);
			pCode->XOR(EAX_CODE, EAX_CODE);		// Clear dlo so that SETcc AL works
			pCode->CMPI(EDX_CODE, 0x00000000);	// 6 bytes
			pCode->SETL(EAX_CODE);						// 3 bytes

			LoadMIPSLo(pCode, EDX_CODE, dwRS);	// Doesn't affect flags, but we must load before branch to preserve our internal record of the status
			
			pCode->JNE(0x9);							// Next two instructions are 9 bytes
			pCode->CMPI(EDX_CODE, nData);		// 6 bytes
			pCode->SETB(EAX_CODE);						// 3 bytes
		}
		else 
		{
			// nData is negative
			LoadMIPSHi(pCode, EDX_CODE, dwRS);
			pCode->XOR(EAX_CODE, EAX_CODE);		// Clear EAX so that SETcc AL works
			pCode->CMPI(EDX_CODE, 0xFFFFFFFF);	// 6 bytes
			pCode->SETL(EAX_CODE);						// 3 bytes
			
			LoadMIPSLo(pCode, EDX_CODE, dwRS);	// See comments above
			
			pCode->JNE(0x9);							// Next two instructions are 9 bytes
			pCode->CMPI(EDX_CODE, nData);		// 6 bytes
			pCode->SETB(EAX_CODE);						// 3 bytes
		}

		// Store result in RT
		StoreMIPSLo(pCode, dwRT, EAX_CODE);
		SetMIPSHi(pCode, dwRT, 0);				

	}
	return TRUE;
}

BOOL SR_Emit_SLTIU(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
TEST_DISABLE_SR_MATH_IMM
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRS = R4300_RS(dwOp);

	pCode->Stat_D_S(dwRT, dwRS);

	if (SR_MATH_IMM_OPTIMISE_FLAG)
	{
		SR_Emit_Generic_R4300(pCode, dwOp, R4300_SLTIU);
	}
	else
	{
		s32 nData = (s32)(s16)R4300_IMM(dwOp);

		// Cast to ints to ensure sign is taken into account
		//g_qwGPR[dwRT] = ((s64)g_qwGPR[dwRS] < (s64)(s32)wData ? 1: 0);

		pCode->dwNumOptimised++;

		if (dwRT == 0) return TRUE;		// Don't set r0

		LoadMIPSHi(pCode, EDX_CODE, dwRS);
		if (nData >= 0) {
			// nData is positive
			pCode->XOR(EAX_CODE, EAX_CODE);	// Clear dlo so that SETcc AL works
			pCode->CMPI(EDX_CODE, 0x00000000);	// 6 bytes
			pCode->SETB(EAX_CODE);						// 3 bytes
			
			LoadMIPSLo(pCode, EDX_CODE, dwRS);	// See comments on SLT for why we load before branch
			
			pCode->JNE(0x9);							// Next two instructions are 9 bytes
			pCode->CMPI(EDX_CODE, nData);		// 6 bytes
			pCode->SETB(EAX_CODE);						// 3 bytes
		} else {
			// nData is negative
			pCode->XOR(EAX_CODE, EAX_CODE);		// Clear EAX so that SETcc AL works
			pCode->CMPI(EDX_CODE, 0xFFFFFFFF);	// 6 bytes
			pCode->SETB(EAX_CODE);						// 3 bytes
			
			LoadMIPSLo(pCode, EDX_CODE, dwRS);	// See comments on SLT for why we load before branch
			
			pCode->JNE(0x9);							// Next two instructions are 9 bytes
			pCode->CMPI(EDX_CODE, nData);		// 6 bytes
			pCode->SETB(EAX_CODE);						// 3 bytes
		}

		// Store result in RT
		StoreMIPSLo(pCode, dwRT, EAX_CODE);
		SetMIPSHi(pCode, dwRT, 0);				

	}
	return TRUE;
}







BOOL SR_Emit_Special_SLL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
TEST_DISABLE_SR_MATH_IMM

	if (dwOp == 0) 
	{
		pCode->dwNumOptimised++;
		return TRUE;
	}
	
	const u32 dwSA = R4300_SA(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S(dwRD, dwRT);
	

	// Skip NOPS

	if (SR_MATH_IMM_OPTIMISE_FLAG)
	{
		SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_SLL);
	}
	else
	{
		//g_qwGPR[dwRD] = (s64)(s32)((((u32)g_qwGPR[dwRT]) << (dwSA)) & 0xFFFFFFFF);

		pCode->dwNumOptimised++;

		if (dwRD == 0) return TRUE;

		if (dwRD == dwRT)
		{
			// Optimise for n = n << x
			REGCODE iDstReg = GetMIPSCachedReg( dwRD );
			
			if (iDstReg != INVALID_CODE && IsMIPSValid( dwRD ))
			{
				DPF(DEBUG_DYNREC, "  ++ Shifting in place using cached reg...\\/");
				pCode->SHLI(iDstReg, dwSA);
				MarkMIPSAsDirty( dwRD, TRUE );
				// Valid already set

				pCode->MOV(EAX_CODE, iDstReg);
				pCode->SARI(EAX_CODE, 31);
				StoreMIPSHi(pCode, dwRD, EAX_CODE);
			}
			else
			{
				// Register is not cached! Use normal version
				LoadMIPSLo(pCode, EAX_CODE, dwRT);
				pCode->SHLI(EAX_CODE, dwSA);

				StoreMIPSLo(pCode, dwRD, EAX_CODE);
				pCode->SARI(EAX_CODE, 31);
				StoreMIPSHi(pCode, dwRD, EAX_CODE);	
			}
		}
		else
		{
			// Optimise for CACHED = UNCACHED << n
			LoadMIPSLo(pCode, EAX_CODE, dwRT);
			pCode->SHLI(EAX_CODE, dwSA);

			StoreMIPSLo(pCode, dwRD, EAX_CODE);
			pCode->SARI(EAX_CODE, 31);
			StoreMIPSHi(pCode, dwRD, EAX_CODE);
		}
	}
	return TRUE;
}

BOOL SR_Emit_Special_SRL(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
TEST_DISABLE_SR_MATH_IMM
	const u32 dwSA = R4300_SA(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S(dwRD, dwRT);
	
	if (SR_MATH_IMM_OPTIMISE_FLAG)
	{
		SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_SRL);
	}
	else
	{
		//g_qwGPR[dwRD] = (s64)(s32)(((u32)g_qwGPR[dwRT]) >> dwSA);

		pCode->dwNumOptimised++;

		if (dwRD == 0) return TRUE;	// Don't set reg0

		LoadMIPSLo(pCode, EAX_CODE, dwRT);
		pCode->SHRI(EAX_CODE, dwSA);
		//D2Q(EAX_CODE, EDX_CODE);	// This isn't necessary - top bits will ALWAYS be 0. I think
		SetMIPSHi(pCode, dwRD, 0);
		StoreMIPSLo(pCode, dwRD, EAX_CODE);
	}
	return TRUE;
}

BOOL SR_Emit_Special_SRA(CDynarecCode *pCode, DWORD dwOp, DWORD * pdwFlags)
{
TEST_DISABLE_SR_MATH_IMM
	const u32 dwSA = R4300_SA(dwOp);
	const u32 dwRT = R4300_RT(dwOp);
	const u32 dwRD = R4300_RD(dwOp);

	pCode->Stat_D_S(dwRD, dwRT);


	if (SR_MATH_IMM_OPTIMISE_FLAG)
	{
		SR_Emit_Generic_R4300(pCode, dwOp, R4300_Special_SRA);
	}
	else
	{
		// Need to check that this correctly works with sign
		//g_qwGPR[dwRD] = (s64)(s32)(((s32)g_qwGPR[dwRT]) >> dwSA);

		pCode->dwNumOptimised++;

		if (dwRD == 0) return TRUE;	// Don't set reg0

		LoadMIPSLo(pCode, EAX_CODE, dwRT);
		pCode->SARI(EAX_CODE, dwSA);

		StoreMIPSLo(pCode, dwRD, EAX_CODE);
		pCode->SARI(EAX_CODE, 31);// This isn't necessary - top bits will ALWAYS be same as before. I think
		StoreMIPSHi(pCode, dwRD, EAX_CODE);
		
	}
	return TRUE;
}