/*====================================================================

filename:     trx_ppc_rec_opcodes.cpp
project:      GCemu
created:      2004-6-18
mail:		  duddie@walla.com

Copyright (c) 2005 Duddie & Tratax

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

====================================================================*/
/*
 *	Tratax PowerPC recompiler
 *	trx_ppc_rec_opcodes.cpp
 *
 *  Opcode implementations for integer ops
 *
 *
 *	2004-7-17 started work based on assembly interpreter core
 *  2004-8-10 started rewrite for constant and register caching
 *  2004-8-19 split off fpu and paired single opcodes
 * Note:
 *
 * Very important:
 * Only EAX, ECX, EDX can be freely used. any other register needs to be preserved
 * because the register cache is using EBX, ESI, EDI, EBP
 *
 * Static branch prediction: 
 * - Predict forward  conditional branches NOT taken
 * - Predict backward conditional branches taken
 * - Predict Indirect branches NOT taken


- cmpli and cmpwi are called a lot with constant values. This makes CR predictable and probably makes conditional jumps predictable
 and therefore something that can be removed. For now, just do constant calculation and see if it works. Later should keep constant
 flags on CR and see if there is something more that can be done.

- more 'temporary' registers can be removed. for example in addi there is no need for EAX. destination cache register can be used immediately
 
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "system/types.h"
#include "config.h"
#include "hardware/hw_io.h"
#include "cpu/trx_ppc_cpu.h"
#include "cpu/trx_ppc_rec_fpu_ps_opcodes_sse2.h"
#include "trx_ppc_rec.h"
#include "asm_x86.h"
#include "debug/tracers.h"

extern void gx_write_fifo32(uint32 data);
extern void gx_write_fifo16(uint16 data);
extern void gx_write_fifo8(uint8 data);


#pragma warning (disable:4311)

#define CR0_LT (1<<31)
#define CR0_GT (1<<30)
#define CR0_EQ (1<<29)
#define CR0_SO (1<<28)

//extern uint32 blockdebugstart;
//extern uint32 zero = 0, one = 1;
//extern uint32 cr_gt, cr_lt, cr_eq, cr_so;

static uint32 trx_ppc_cmp_and_mask[8] = 
{
	0xfffffff0,
	0xffffff0f,
	0xfffff0ff,
	0xffff0fff,
	0xfff0ffff,
	0xff0fffff,
	0xf0ffffff,
	0x0fffffff
};

void trx_rec_gen_blockend(void)
{ 
	regc_end();
	if(use_sse)psc_end();

	if((trxCPUrec.block_instr+1) > 127)
	{
		gen_asm(SUB_M32I32, (uint32)&cpuslice_left, trxCPUrec.block_instr+1);
	}
	else
	{
		gen_asm(SUB_M32I8, (uint32)&cpuslice_left, trxCPUrec.block_instr+1);
	}
}

void trx_gen_breakpoint(void)
{
	trxCPUrec.blockend = BLOCKEND_STOP;
	trx_rec_gen_blockend();
	gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, trxCPUrec.npc);
	gen_asm(MOV_M32I32, (uint32)&cpu_instr_breakpoint_flag, 1);
}

void trx_ppc_gen_cr0(void)
{
	// $$$$ VERY IMPORTANT $$$$
	// I assume that EAX contains the value that CR0 will be calculated from !
	gen_asm(CMP_RI32, EAX, 0);
	gen_asm(CMOVG_M, EAX, (uint32)&cr_gt);
	gen_asm(CMOVL_M, EAX, (uint32)&cr_lt);
	gen_asm(CMOVE_M, EAX, (uint32)&cr_eq);
	gen_asm(MOV_RM, ECX, (uint32)&trxCPUrec.cr);
	gen_asm(AND_RI32, ECX, 0x0fffffff);
	gen_asm(OR_RR, ECX, EAX);
	gen_asm(MOV_MR,(uint32)&trxCPUrec.cr, ECX); 
}

static inline uint32 trx_ppc_word_rotl(uint32 data, int n)
{
	n &= 0x1f;
	return (data << n) | (data >> (32-n));
}

static inline uint32 trx_ppc_mask(int MB, int ME)
{
	uint32 mask;
	if (MB <= ME) {
		if (ME-MB == 31) {
			mask = 0xffffffff;
		} else {
			mask = ((1<<(ME-MB+1))-1)<<(31-ME);
		}
	} else {
		mask = trx_ppc_word_rotl((1<<(32-MB+ME+1))-1, 31-ME);
	}
	return mask;
}

//==============================================================================
// Integer Arithmetic opcodes (22 total, 19 emulated)
// not emulated: subfmex
//

// tested
void trx_ppc_gen_addx()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	//trxCPUrec.gpr[rD] = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
	regc_load(EAX, rA);
	if(regc_is_constant(rB))
	{
		gen_asm(ADD_RI32, EAX, regc_getconstant(rB));
	}
	else
	{
		gen_asm(ADD_RR, EAX, regc_getcachereg(rB));
	}
	regc_store(EAX, rD);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1)	trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_addcx()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	//trxCPUrec.gpr[rD] = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
	// update xer
	//if (trxCPUrec.gpr[rD] < trxCPUrec.gpr[rA]) {
	//	trxCPUrec.xer_ca = 1;
	//} else {
	//	trxCPUrec.xer_ca = 0;
	//}
	regc_load(EAX, rA);
	if(regc_is_constant(rB))
	{
		gen_asm(ADD_RI32, EAX, regc_getconstant(rB));
	}
	else
	{
		gen_asm(ADD_RR, EAX, regc_getcachereg(rB));
	}
	gen_asm(SETC_M, (uint32)&trxCPUrec.xer_ca);
	regc_store(EAX, rD);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1)	trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_addex()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

//	a = trxCPUrec.gpr[rA];
//	b = trxCPUrec.gpr[rB];
//	ca = trxCPUrec.xer_ca;
//	trxCPUrec.gpr[rD] = a + b + ca;

	// update xer
//	if (trx_ppc_carry_3(~a, b, ca)) {
//		trxCPUrec.xer_ca = 1;
//	} else {
//		trxCPUrec.xer_ca = 0;
//	}
	regc_load(EAX, rA);
	if(regc_is_constant(rB))
	{
		gen_asm(ADD_RI32, EAX, regc_getconstant(rB));
	}
	else
	{
		gen_asm(ADD_RR, EAX, regc_getcachereg(rB));
	}
	gen_asm(SETC_R8, CL);
	gen_asm(ADD_RM, EAX, (uint32)&trxCPUrec.xer_ca);
	gen_asm(SETC_R8, CH);
	gen_asm(OR_R8R8, CL, CH);
	gen_asm(MOV_MR8, (uint32)&trxCPUrec.xer_ca, CL);
	regc_store(EAX, rD);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1)	trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_addi(void)
{
	uint32 rD, rA;
	sint16 imm;
	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	if(rA == 0)
	{
		regc_store_constant(imm, rD);
	}
	else
	{
		if(regc_is_constant(rA))
		{
			regc_store_constant(regc_load_constant(rA) + imm, rD);
		}
		else
		{
			regc_load(EAX, rA);
			gen_asm(ADD_RI32, EAX, imm);
			regc_store(EAX, rD);
		}
	}
}

// tested
void trx_ppc_gen_addic()
{
	uint32 rD, rA;
	sint16 imm;
	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	//a = trxCPUrec.gpr[rA];
	//trxCPUrec.gpr[rD] = a + imm;
	// update XER
	//if (trxCPUrec.gpr[rD] < a) {
	//	trxCPUrec.xer_ca = 1;
	//} else {
	//	trxCPUrec.xer_ca = 0;
	//}
	regc_load(EAX, rA);
	gen_asm(ADD_RI32, EAX, imm);
	gen_asm(SETC_M, (uint32)&trxCPUrec.xer_ca);
	regc_store(EAX, rD);
}

// tested
void trx_ppc_gen_addic_()
{
	uint32 rD, rA;
	sint16 imm;
	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	//a = trxCPUrec.gpr[rA];
	//trxCPUrec.gpr[rD] = a + imm;
	// update XER
	//if (trxCPUrec.gpr[rD] < a) {
	//	trxCPUrec.xer_ca = 1;
	//} else {
	//	trxCPUrec.xer_ca = 0;
	//}
	regc_load(EAX, rA);
	gen_asm(ADD_RI32, EAX, imm);
	gen_asm(SETC_M, (uint32)&trxCPUrec.xer_ca);
	regc_store(EAX, rD);
	// EAX must contain final value!
	trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_addis(void)
{
	uint32 rD, rA, imm;
	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode << 16;

	//if(rA == 0)
	//{
	//	trxCPUrec.gpr[rD] = imm;
	//}
	//else
	//{
	//	trxCPUrec.gpr[rD] = trxCPUrec.gpr[rA] + imm;
	//}
	if(rA == 0)
	{
		regc_store_constant(imm, rD);
	}
	else
	{
		if(regc_is_constant(rA))
		{
			regc_store_constant(regc_load_constant(rA) + imm, rD);
		}
		else
		{
			regc_load(EAX, rA);
			gen_asm(ADD_RI32, EAX, imm);
			regc_store(EAX, rD);
		}
	}
}

// tested
void trx_ppc_gen_addzex()
{
	uint32 rD, rA;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;

	//a = trxCPUrec.gpr[rA];
	//ca = trxCPUrec.xer_ca;
	//trxCPUrec.gpr[rD] = a + ca;
	//if ((a == 0xffffffff) && ca) {
	//	trxCPUrec.xer_ca = 1;
	//} else {
	//	trxCPUrec.xer_ca = 0;
	//}
	regc_load(EAX, rA);
	gen_asm(ADD_RM, EAX, (uint32)&trxCPUrec.xer_ca);
	gen_asm(SETC_M, (uint32)&trxCPUrec.xer_ca);
	regc_store(EAX, rD);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1)	trx_ppc_gen_cr0();
}

// unverified !!!
void trx_ppc_gen_addmex()
{
	uint32 rD, rA;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;

	// rd = (ra) + ca - 1;
	//a = trxCPUrec.gpr[rA];
	//ca = trxCPUrec.xer_ca;
	//trxCPUrec.gpr[rD] = a + ca;
	//if ((a == 0xffffffff) && ca) {
	//	trxCPUrec.xer_ca = 1;
	//} else {
	//	trxCPUrec.xer_ca = 0;
	//}
	regc_load(EAX, rA);
	gen_asm(ADD_RM, EAX, (uint32)&trxCPUrec.xer_ca);
	gen_asm(SUB_RI32, EAX, 1);
	gen_asm(SETC_M, (uint32)&trxCPUrec.xer_ca);
	regc_store(EAX, rD);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1)	trx_ppc_gen_cr0();
}

// untested
void trx_ppc_gen_divwx(void)
{
	uint32 rD, rA, rB;
	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = (trxCPUrec.opcode >> 11)& 0x1f;

	//a = trxCPUrec.gpr[rA];
	//b = trxCPUrec.gpr[rB];
	//c = a / b;
	//trxCPUrec.gpr[rD] = c;
	regc_load(EAX, rA);
	regc_load(ECX, rB);
	gen_asm(CDQ);
	gen_asm(TEST_RR, ECX, ECX);
	gen_asm(CMOVE_M, ECX, (uint32)&one); // avoid div0 !
	gen_asm(IDIV_R, ECX);
	regc_store(EAX, rD);
	// EAX must contain final value!
	if(trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_divwux(void)
{
	uint32 rD, rA, rB;
	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = (trxCPUrec.opcode >> 11)& 0x1f;

	//trxCPUrec.gpr[rD] = trxCPUrec.gpr[rA] / trxCPUrec.gpr[rB];
	regc_load(EAX, rA);
	regc_load(ECX, rB);
	gen_asm(XOR_RR, EDX, EDX);
	gen_asm(TEST_RR, ECX, ECX);
	gen_asm(CMOVE_M, ECX, (uint32)&one); // avoid div0 !
	gen_asm(DIV_R, ECX);
	regc_store(EAX, rD);
	// EAX must contain final value!
	if(trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_mulhwx()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	//sint64 a = trxCPUrec.gpr[rA];
	//sint64 b = trxCPUrec.gpr[rB];
	//sint64 c = a*b;
	//trxCPUrec.gpr[rD] = (sint32)(c>>32);
	regc_load(EAX, rA);
	if(regc_is_constant(rB))
	{
		regc_load(ECX, rB);		
		gen_asm(IMUL_R, ECX);
	}
	else
	{
		gen_asm(IMUL_R, regc_getcachereg(rB));
	}
	regc_store(EDX, rD);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1)
	{
		gen_asm(MOV_RR, EAX, EDX);
		trx_ppc_gen_cr0();
	}
}

// tested
void trx_ppc_gen_mulhwux()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	//uint64 a = trxCPUrec.gpr[rA];
	//uint64 b = trxCPUrec.gpr[rB];
	//uint64 c = a*b;
	//trxCPUrec.gpr[rD] = (uint32)(c>>32);
	regc_load(EAX, rA);
	if(regc_is_constant(rB))
	{
		regc_load(ECX, rB);		
		gen_asm(MUL_R, ECX);
	}
	else
	{
		gen_asm(MUL_R, regc_getcachereg(rB));
	}
	regc_store(EDX, rD);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1)	
	{
		gen_asm(MOV_RR, EAX, EDX);
		trx_ppc_gen_cr0();
	}
}

// tested
void trx_ppc_gen_mulli(void)
{
	uint32 rD, rA;
	sint16 imm;
	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	//trxCPUrec.gpr[rD] = trxCPUrec.gpr[rA] * imm;
	regc_load(EAX, rA);
	gen_asm(MOV_RI32, ECX, imm);
	gen_asm(MUL_R, ECX);
	regc_store(EAX, rD);
}

// tested
void trx_ppc_gen_mullwx()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	//trxCPUrec.gpr[rD] = trxCPUrec.gpr[rA] * trxCPUrec.gpr[rB];
	regc_load(EAX, rA);
	if(regc_is_constant(rB))
	{
		regc_load(ECX, rB);		
		gen_asm(MUL_R, ECX);
	}
	else
	{
		gen_asm(MUL_R, regc_getcachereg(rB));
	}
	regc_store(EAX, rD);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1)	trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_negx()
{
	uint32 rD, rA;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;

	//trxCPUrec.gpr[rD] = -(sint32)trxCPUrec.gpr[rA];
	regc_load(EAX, rA);
	gen_asm(NEG_R, EAX);
	regc_store(EAX, rD);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1)	trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_subfx()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	//trxCPUrec.gpr[rD] = ~trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB] + 1;
	regc_load(EAX, rA);
	gen_asm(NOT_R, EAX);
	if(regc_is_constant(rB))
	{
		gen_asm(ADD_RI32, EAX, regc_getconstant(rB));
	}
	else
	{
		gen_asm(ADD_RR, EAX, regc_getcachereg(rB));
	}
	gen_asm(ADD_RI8, EAX, 1);
	regc_store(EAX, rD);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_subfcx()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	//a = trxCPUrec.gpr[rA];
	//b = trxCPUrec.gpr[rB];
	//trxCPUrec.gpr[rD] = ~a + b + 1;
	// update xer
	//if (trx_ppc_carry_3(~a, b, 1)) {
	//	trxCPUrec.xer_ca = 1;
	//} else {
	//	trxCPUrec.xer_ca = 0;
	//}
	regc_load(EAX, rA);
	gen_asm(NOT_R, EAX);
	if(regc_is_constant(rB))
	{
		gen_asm(ADD_RI32, EAX, regc_getconstant(rB));
	}
	else
	{
		gen_asm(ADD_RR, EAX, regc_getcachereg(rB));
	}
	gen_asm(SETC_R8, CL);
	gen_asm(ADD_RI8, EAX, 1);
	gen_asm(SETC_R8, CH);
	gen_asm(OR_R8R8, CL, CH);
	regc_store(EAX, rD);
	gen_asm(MOV_MR8, (uint32)&trxCPUrec.xer_ca, CL);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_subfex()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	//a = trxCPUrec.gpr[rA];
	//b = trxCPUrec.gpr[rB];
	//ca = trxCPUrec.xer_ca;
	//trxCPUrec.gpr[rD] = ~a + b + ca;
	// update xer
	//if (trx_ppc_carry_3(~a, b, ca)) {
	//	trxCPUrec.xer_ca = 1;
	//} else {
	//	trxCPUrec.xer_ca = 0;
	//}
	regc_load(EAX, rA);
	gen_asm(NOT_R, EAX);
	if(regc_is_constant(rB))
	{
		gen_asm(ADD_RI32, EAX, regc_getconstant(rB));
	}
	else
	{
		gen_asm(ADD_RR, EAX, regc_getcachereg(rB));
	}
	gen_asm(SETC_R8, CL);
	gen_asm(ADD_RM, EAX, (uint32)&trxCPUrec.xer_ca);
	gen_asm(SETC_R8, CH);
	gen_asm(OR_R8R8, CL, CH);
	regc_store(EAX, rD);
	gen_asm(MOV_MR8, (uint32)&trxCPUrec.xer_ca, CL);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_subfzex()
{
	uint32 rD, rA;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;

	regc_load(EAX, rA);
	gen_asm(NOT_R, EAX);
	gen_asm(ADD_RM, EAX, (uint32)&trxCPUrec.xer_ca);
	gen_asm(SETC_M, (uint32)&trxCPUrec.xer_ca);
	gen_asm(OR_R8R8, CL, CH);
	regc_store(EAX, rD);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_subfic(void)
{
	uint32 rD, rA;
	sint16 imm;
	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;
	
	//a = trxCPUrec.gpr[rA];
	//trxCPUrec.gpr[rD] = ~a + imm + 1;
	// update XER
	//if (trx_ppc_carry_3(~a, imm, 1)) {
	//	trxCPUrec.xer_ca = 1;
	//} else {
	//	trxCPUrec.xer_ca = 0;
	//}
	regc_load(EAX, rA);
	gen_asm(NOT_R, EAX);
	gen_asm(ADD_RI32, EAX, imm);
	gen_asm(SETC_R8, CL);
	gen_asm(ADD_RI32, EAX, 1);
	gen_asm(SETC_R8, CH);
	gen_asm(OR_R8R8, CL, CH);
	regc_store(EAX, rD);
	gen_asm(MOV_MR8, (uint32)&trxCPUrec.xer_ca, CL);
}

//==============================================================================
// Integer Compare opcodes ( 4 total, 4 emulated)
// 


// tested only for cr = 0
void trx_ppc_gen_cmp(void)
{
	uint32 cr, rA, rB;
	uint32 shift, mask;
	cr = (trxCPUrec.opcode >> 23)& 0x7;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = (trxCPUrec.opcode >> 11)& 0x1f;

	cr = 7-cr;
	shift = 28-(cr*4); 
	mask = trx_ppc_cmp_and_mask[cr];

	regc_load(EAX, rA);
	if(regc_is_constant(rB))
	{
		gen_asm(CMP_RI32, EAX, regc_getconstant(rB));
	}
	else
	{
		gen_asm(CMP_RR, EAX, regc_getcachereg(rB));
	}
	gen_asm(CMOVG_M, EAX, (uint32)&cr_gt);
	gen_asm(CMOVL_M, EAX, (uint32)&cr_lt);
	gen_asm(CMOVE_M, EAX, (uint32)&cr_eq);
	if(shift != 0)gen_asm(SHR_RI8, EAX, shift);
	gen_asm(MOV_RM, ECX, (uint32)&trxCPUrec.cr);
	gen_asm(AND_RI32, ECX, mask);
	gen_asm(OR_RR, ECX, EAX);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.cr, ECX);
}

// tested only for cr = 0
void trx_ppc_gen_cmpi(void)
{
	uint32 cr, rA;
	sint32 imm;
	sint16 imm16;
	uint32 shift, mask;

	cr = (trxCPUrec.opcode >> 23)& 0x7;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm16 = trxCPUrec.opcode & 0xffff;
	imm = imm16;

	cr = 7-cr;
	shift = 28-(cr*4); 
	mask = trx_ppc_cmp_and_mask[cr];

	if(regc_is_constant(rA))
	{
		//printf("constant cmpi detected at %x\n", trxCPUrec.block_startPC);
		// signed compare
		uint32 res;
		sint32 a;

		a = regc_getconstant(rA);

		if (a < imm) {
			res = cr_lt;
		} else if (a > imm) {
			res = cr_gt;
		} else {
			res = cr_eq;
		}

		// mask off and set correct flags
		gen_asm(MOV_RM, ECX, (uint32)&trxCPUrec.cr);
		gen_asm(AND_RI32, ECX, mask);
		gen_asm(OR_RI32, ECX, res>>shift);
		gen_asm(MOV_MR, (uint32)&trxCPUrec.cr, ECX);
		return;
	}
	else
	{
		gen_asm(CMP_RI32, regc_getcachereg(rA), imm);
	}
	gen_asm(CMOVG_M, EAX, (uint32)&cr_gt);
	gen_asm(CMOVL_M, EAX, (uint32)&cr_lt);
	gen_asm(CMOVE_M, EAX, (uint32)&cr_eq);
	if(shift != 0)gen_asm(SHR_RI8, EAX, shift);
	gen_asm(MOV_RM, ECX, (uint32)&trxCPUrec.cr);
	gen_asm(AND_RI32, ECX, mask);
	gen_asm(OR_RR, ECX, EAX);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.cr, ECX);
}

// tested only for cr = 0
void trx_ppc_gen_cmpl(void)
{
	uint32 cr, rA, rB;
	uint32 shift, mask;

	cr = (trxCPUrec.opcode >> 23)& 0x7;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = (trxCPUrec.opcode >> 11)& 0x1f;

/*
	a = trxCPUrec.gpr[rA];
	b = trxCPUrec.gpr[rB];
	if (a < b) {
		res = 8;
	} else if (a > b) {
		res = 4;
	} else {
		res = 2;
	}
//	if (trxCPUrec.xer & XER_SO) res |= 1;

	// mask off and set correct flags
	cr = 7-cr;
	trxCPUrec.cr &= trx_ppc_cmp_and_mask[cr];
	trxCPUrec.cr |= res<<(cr*4);
*/

	cr = 7-cr;
	shift = 28-(cr*4); 
	mask = trx_ppc_cmp_and_mask[cr];

	regc_load(EAX, rA);
	if(regc_is_constant(rB))
	{
		gen_asm(CMP_RI32, EAX, regc_getconstant(rB));
	}
	else
	{
		gen_asm(CMP_RR, EAX, regc_getcachereg(rB));
	}
	gen_asm(CMOVA_M, EAX, (uint32)&cr_gt);
	gen_asm(CMOVB_M, EAX, (uint32)&cr_lt);
	gen_asm(CMOVE_M, EAX, (uint32)&cr_eq);
	if(shift != 0)gen_asm(SHR_RI8, EAX, shift);
	gen_asm(MOV_RM, ECX, (uint32)&trxCPUrec.cr);
	gen_asm(AND_RI32, ECX, mask);
	gen_asm(OR_RR, ECX, EAX);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.cr, ECX);
}

// tested only for cr = 0
void trx_ppc_gen_cmpli(void)
{
	uint32 cr, rA;
	uint32 imm;
	uint32 shift, mask;

	cr = (trxCPUrec.opcode >> 23)& 0x7;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	cr = 7-cr;
	shift = 28-(cr*4); 
	mask = trx_ppc_cmp_and_mask[cr];

	if(regc_is_constant(rA))
	{
		//printf("constant cmpli detected at %x\n", trxCPUrec.block_startPC);
		// unsigned compare
		uint32 res;
		uint32 a;

		a = regc_getconstant(rA);

		if (a < imm) {
			res = cr_lt;
		} else if (a > imm) {
			res = cr_gt;
		} else {
			res = cr_eq;
		}

		// mask off and set correct flags
		gen_asm(MOV_RM, ECX, (uint32)&trxCPUrec.cr);
		gen_asm(AND_RI32, ECX, mask);
		gen_asm(OR_RI32, ECX, res>>shift);
		gen_asm(MOV_MR, (uint32)&trxCPUrec.cr, ECX);
		return;
	}
	else
	{
		gen_asm(CMP_RI32, regc_getcachereg(rA), imm);
	}
//	regc_load(EAX, rA);
//	gen_asm(CMP_RI32, EAX, imm);
	gen_asm(CMOVA_M, EAX, (uint32)&cr_gt);
	gen_asm(CMOVB_M, EAX, (uint32)&cr_lt);
	gen_asm(CMOVE_M, EAX, (uint32)&cr_eq);
	if(shift != 0)gen_asm(SHR_RI8, EAX, shift);
	gen_asm(MOV_RM, ECX, (uint32)&trxCPUrec.cr);
	gen_asm(AND_RI32, ECX, mask);
	gen_asm(OR_RR, ECX, EAX);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.cr, ECX);
}

//==============================================================================
// Integer Logical opcodes
//

// tested
void trx_ppc_gen_andx()
{
	uint32 rS, rA, rB;

	rS = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

//	trxCPUrec.gpr[rA] = trxCPUrec.gpr[rS] & trxCPUrec.gpr[rB];
	regc_load(EAX, rS);
	if(regc_is_constant(rB))
	{
		gen_asm(AND_RI32, EAX, regc_getconstant(rB));
	}
	else
	{
		gen_asm(AND_RR, EAX, regc_getcachereg(rB));
	}
	regc_store(EAX, rA);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1)	trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_nandx()
{
	uint32 rS, rA, rB;

	rS = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

//	trxCPUrec.gpr[rA] = ~(trxCPUrec.gpr[rS] & trxCPUrec.gpr[rB]);
	regc_load(EAX, rS);
	if(regc_is_constant(rB))
	{
		gen_asm(AND_RI32, EAX, regc_getconstant(rB));
	}
	else
	{
		gen_asm(AND_RR, EAX, regc_getcachereg(rB));
	}
	gen_asm(NOT_R, EAX);
	regc_store(EAX, rA);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1)	trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_andcx()
{
	uint32 rS, rA, rB;

	rS = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

//	trxCPUrec.gpr[rA] = trxCPUrec.gpr[rS] & ~trxCPUrec.gpr[rB];
	regc_load(EAX, rB);
	gen_asm(NOT_R, EAX);
	if(regc_is_constant(rS))
	{
		gen_asm(AND_RI32, EAX, regc_getconstant(rS));
	}
	else
	{
		gen_asm(AND_RR, EAX, regc_getcachereg(rS));
	}
	regc_store(EAX, rA);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1)	trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_andi_(void)
{
	uint32 rS, rA;
	uint32 imm;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

//	trxCPUrec.gpr[rA] = trxCPUrec.gpr[rS] & imm;
	regc_load(EAX, rS);
	gen_asm(AND_RI32, EAX, imm);
	regc_store(EAX, rA);
	// EAX must contain final value!
	trx_ppc_gen_cr0();
}
// tested
void trx_ppc_gen_andis_(void)
{
	uint32 rS, rA;
	uint32 imm;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = (trxCPUrec.opcode << 16);

//	trxCPUrec.gpr[rA] = trxCPUrec.gpr[rS] & imm;
	regc_load(EAX, rS);
	gen_asm(AND_RI32, EAX, imm);
	regc_store(EAX, rA);
	// EAX must contain final value!
	trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_cntlzwx()
{
	uint rS, rA;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;

	//uint32 n=0;
	//uint32 x=0x80000000;
	//uint32 v=trxCPUrec.gpr[rS];
	//while (!(v & x)) {
	//	n++;
	//	if (n==32) break;
	//	x>>=1;
	//}
	//trxCPUrec.gpr[rA] = n;

	regc_load(EAX, rS);
	gen_asm(MOV_RI32, ECX, -1);
	gen_asm(BSR_RR, ECX, EAX);
	gen_asm(MOV_RI32, EAX, 31);
	gen_asm(SUB_RR, EAX, ECX);
	regc_store(EAX, rA);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_extsbx()
{
	uint32 rS, rA;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;

//	a  = trxCPUrec.gpr[rS];
//	trxCPUrec.gpr[rA] = a;
	regc_load(EAX, rS);
	gen_asm(MOVSX328_RR, EAX, EAX);
	regc_store(EAX, rA);
	// EAX must contain final value!
	if(trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_extshx()
{
	uint32 rS, rA;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;

	//a  = trxCPUrec.gpr[rS];
	//trxCPUrec.gpr[rA] = a;
	regc_load(EAX, rS);
	gen_asm(MOVSX3216_RR, EAX, EAX);
	regc_store(EAX, rA);
	// EAX must contain final value!
	if(trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_norx()
{
	int rS, rA, rB;
	rS = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	//trxCPUrec.gpr[rA] = ~(trxCPUrec.gpr[rS] | trxCPUrec.gpr[rB]);
	regc_load(EAX, rS);
	if(regc_is_constant(rB))
	{
		gen_asm(OR_RI32, EAX, regc_getconstant(rB));
	}
	else
	{
		gen_asm(OR_RR, EAX, regc_getcachereg(rB));
	}
	gen_asm(NOT_R, EAX);
	regc_store(EAX, rA);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_orx(void)
{
	uint32 rS, rA, rB;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = (trxCPUrec.opcode >> 11)& 0x1f;

	//trxCPUrec.gpr[rA] = trxCPUrec.gpr[rS] | trxCPUrec.gpr[rB];
	regc_load(EAX, rS);
	if(regc_is_constant(rB))
	{
		gen_asm(OR_RI32, EAX, regc_getconstant(rB));
	}
	else
	{
		gen_asm(OR_RR, EAX, regc_getcachereg(rB));
	}
	regc_store(EAX, rA);
	// EAX must contain final value!
	if(trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_orcx(void)
{
	uint32 rS, rA, rB;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = (trxCPUrec.opcode >> 11)& 0x1f;

	//trxCPUrec.gpr[rA] = trxCPUrec.gpr[rS] | ~trxCPUrec.gpr[rB];
	regc_load(EAX, rB);
	gen_asm(NOT_R, EAX);
	if(regc_is_constant(rS))
	{
		gen_asm(OR_RI32, EAX, regc_getconstant(rS));
	}
	else
	{
		gen_asm(OR_RR, EAX, regc_getcachereg(rS));
	}
	regc_store(EAX, rA);
	// EAX must contain final value!
	if(trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_ori(void)
{
	uint32 rS, rA;
	uint32 imm;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	// discard NOPs
	if((imm == 0) && (rS == 0) && (rA == 0))return;

	//	trxCPUrec.gpr[rA] = trxCPUrec.gpr[rS] | imm;
	if(regc_is_constant(rS))
	{
		regc_store_constant(regc_load_constant(rS) | imm, rA);
	}
	else
	{
		regc_load(EAX, rS);
		gen_asm(OR_RI32, EAX, imm);
		regc_store(EAX, rA);
	}
}

// tested
void trx_ppc_gen_oris(void)
{
	uint32 rS, rA;
	uint32 imm;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode <<16;

//	trxCPUrec.gpr[rA] = trxCPUrec.gpr[rS] | imm;
	if(regc_is_constant(rS))
	{
		regc_store_constant(regc_load_constant(rS) | imm, rA);
	}
	else
	{
		regc_load(EAX, rS);
		gen_asm(OR_RI32, EAX, imm);
		regc_store(EAX, rA);
	}
}

// tested
void trx_ppc_gen_xorx(void)
{
	uint32 rS, rA, rB;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = (trxCPUrec.opcode >> 11)& 0x1f;

//	trxCPUrec.gpr[rA] = trxCPUrec.gpr[rS] ^ trxCPUrec.gpr[rB];
	regc_load(EAX, rS);
	if(regc_is_constant(rB))
	{
		gen_asm(XOR_RI32, EAX, regc_getconstant(rB));
	}
	else
	{
		gen_asm(XOR_RR, EAX, regc_getcachereg(rB));
	}
	regc_store(EAX, rA);
	// EAX must contain final value!
	if(trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_xori(void)
{
	uint32 rS, rA;
	uint32 imm;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

//	trxCPUrec.gpr[rA] = trxCPUrec.gpr[rS] ^ imm;
	if(regc_is_constant(rS))
	{
		regc_store_constant(regc_load_constant(rS) ^ imm, rA);
	}
	else
	{
		regc_load(EAX, rS);
		gen_asm(XOR_RI32, EAX, imm);
		regc_store(EAX, rA);
	}
}

// tested
void trx_ppc_gen_xoris(void)
{
	uint32 rS, rA;
	uint32 imm;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode <<16;

//	trxCPUrec.gpr[rA] = trxCPUrec.gpr[rS] ^ imm;
	if(regc_is_constant(rS))
	{
		regc_store_constant(regc_load_constant(rS) ^ imm, rA);
	}
	else
	{
		regc_load(EAX, rS);
		gen_asm(XOR_RI32, EAX, imm);
		regc_store(EAX, rA);
	}
}

// tested
void trx_ppc_gen_eqvx(void)
{
	uint32 rS, rA, rB;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = (trxCPUrec.opcode >> 11)& 0x1f;

//	trxCPUrec.gpr[rA] = ~(trxCPUrec.gpr[rS] ^ trxCPUrec.gpr[rB]);
	regc_load(EAX, rS);
	if(regc_is_constant(rB))
	{
		gen_asm(XOR_RI32, EAX, regc_getconstant(rB));
	}
	else
	{
		gen_asm(XOR_RR, EAX, regc_getcachereg(rB));
	}
	gen_asm(NOT_R, EAX);
	regc_store(EAX, rA);
	// EAX must contain final value!
	if(trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

//==============================================================================
// Rotate and shift opcodes (7 opcodes, 7 emulated)
//
// tested
void trx_ppc_gen_rlwnmx()
{
	uint32 rS, rA, rB, MB, ME, mask;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = (trxCPUrec.opcode >> 11)& 0x1f;
	MB = (trxCPUrec.opcode >> 6)& 0x1f;
	ME = (trxCPUrec.opcode >> 1)& 0x1f;

	//val = trx_ppc_word_rotl(trxCPUrec.gpr[rS], rB);
    mask = trx_ppc_mask(MB, ME);
	// finally apply it
	//trxCPUrec.gpr[rA] = val & mask;
	regc_load(EAX, rS);
	regc_load(ECX, rB);
	gen_asm(ROL_CL, EAX);
	gen_asm(AND_RI32, EAX, mask);
	regc_store(EAX, rA);
	// EAX must contain final value!
	if (trxCPUrec.opcode & 1) 	trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_rlwimix()
{
	uint32 rS, rA, SH, MB, ME, mask;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	SH = (trxCPUrec.opcode >> 11)& 0x1f;
	MB = (trxCPUrec.opcode >> 6)& 0x1f;
	ME = (trxCPUrec.opcode >> 1)& 0x1f;

//	val = trx_ppc_word_rotl(trxCPUrec.gpr[rS], SH);
	mask = trx_ppc_mask(MB, ME);
//	trxCPUrec.gpr[rA] = (val & mask) | (trxCPUrec.gpr[rA] & ~mask);

	regc_load(EAX, rS);
	if(SH != 0)gen_asm(ROL_RI8, EAX, SH);
	gen_asm(AND_RI32, EAX, mask);
	regc_load(ECX, rA);
	gen_asm(AND_RI32, ECX, ~mask);
	gen_asm(OR_RR, EAX, ECX);
	regc_store(EAX, rA);

	// EAX must contain final value!
	if (trxCPUrec.opcode & 1) 	trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_rlwinmx()
{
	uint32 rS, rA, SH, MB, ME, mask;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	SH = (trxCPUrec.opcode >> 11)& 0x1f;
	MB = (trxCPUrec.opcode >> 6)& 0x1f;
	ME = (trxCPUrec.opcode >> 1)& 0x1f;

	//val = trx_ppc_word_rotl(trxCPUrec.gpr[rS], SH);
    mask = trx_ppc_mask(MB, ME);
	// finally apply it
	//trxCPUrec.gpr[rA] = val & mask;
	regc_load(EAX, rS);
	if(SH != 0)gen_asm(ROL_RI8, EAX, SH);
	gen_asm(AND_RI32, EAX, mask);
	regc_store(EAX, rA);

	// EAX must contain final value!
	if (trxCPUrec.opcode & 1) 	trx_ppc_gen_cr0();
}

// tested
void trx_ppc_gen_slwx()
{
	uint32 rS, rA, rB;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = (trxCPUrec.opcode >> 11)& 0x1f;
	//SH = trxCPUrec.gpr[rB] & 0x3f;
	//if (SH > 31) {
	//	trxCPUrec.gpr[rA] = 0;
	//} else {
	//	trxCPUrec.gpr[rA] = trxCPUrec.gpr[rS] << SH;
	//}
	regc_load(EAX, rS);
	regc_load(ECX, rB);
	gen_asm(CMP_RI32, ECX, 31);
	gen_asm(CMOVA_M, EAX, (uint32)&zero);
	gen_asm(SHL_CL, EAX);
	regc_store(EAX, rA);
	// EAX must contain final value!
	if(trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested for positive numbers and negative numbers
void trx_ppc_gen_srawx()
{
	uint32 rS, rA, rB;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = (trxCPUrec.opcode >> 11)& 0x1f;

	regc_load(EAX, rS);
	regc_load(ECX, rB);
	gen_asm(TEST_RR, EAX, EAX);
	gen_asm(JS_I8, 0x17); 
// positive number
	gen_asm(XOR_RR, EDX, EDX);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.xer_ca, EDX);
	gen_asm(CMP_RI32, ECX, 31);
	gen_asm(CMOVA_M, EAX, (uint32)&zero);
	gen_asm(JMP_I8, 0x20);
// negative number handling
	gen_asm(MOV_R8I8, DL, 0x20); 
	gen_asm(SUB_R8R8, DL, CL);
	gen_asm(MOV_R8R8, CH, CL);
	gen_asm(MOV_R8R8, CL, DL);
	gen_asm(MOV_RR, EDX, EAX);
	gen_asm(SHL_CL, EDX);
	gen_asm(SETNE_R8, DL);
	gen_asm(MOV_R8R8, CL, CH);
	gen_asm(TEST_R8R8, CL, CL);
	gen_asm(CMOVE_M, EDX, (uint32)&zero); 
	gen_asm(MOV_MR8, (uint32)&trxCPUrec.xer_ca, DL);
// done
	gen_asm(SAR_CL, EAX);
	regc_store(EAX, rA);
	// EAX must contain final value!
	if(trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested for positive nonzero shifts and negative nonzero shifts
void trx_ppc_gen_srawix()
{
	uint32 rS, rA, SH, mask;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	SH = (trxCPUrec.opcode >> 11)& 0x1f;
	
	// calculate bitmask for XER_CA calculation
	mask = 0xffffffff >> (32-SH);

	// fixed shift amount
	if(SH == 0)
	{
		// clear xer_ca and copy value
		regc_load(EAX, rS);
		regc_store(EAX, rA);
		gen_asm(XOR_RR, ECX, ECX);
		gen_asm(MOV_MR, (uint32)&trxCPUrec.xer_ca, ECX);
	}
	else
	{
		regc_load(EAX, rS);
	// negative number check 
		gen_asm(TEST_RR, EAX, EAX);
		gen_asm(JS_I8, 0xa);
		gen_asm(XOR_RR, EDX, EDX);
		gen_asm(MOV_MR, (uint32)&trxCPUrec.xer_ca, EDX);
		gen_asm(JMP_I8, 0xf);
	// negative number handling here !
		gen_asm(MOV_RR, EDX, EAX);
		gen_asm(AND_RI32, EDX, mask);
		gen_asm(SETNE_M, (uint32)&trxCPUrec.xer_ca);
		gen_asm(SAR_RI8, EAX, SH);
		regc_store(EAX, rA);
	}
	// EAX must contain final value!
	if(trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

// tested (also for shift amounts above 31)
void trx_ppc_gen_srwx()
{
	uint32 rS, rA, rB;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = (trxCPUrec.opcode >> 11)& 0x1f;

	//uint32 v = trxCPUrec.gpr[rB] & 0x3f;
	//if (v > 31) {
	//	trxCPUrec.gpr[rA] = 0;
	//} else {
	//	trxCPUrec.gpr[rA] = trxCPUrec.gpr[rS] >> v;
	//}
	regc_load(EAX, rS);
	regc_load(ECX, rB);
	gen_asm(CMP_RI32, ECX, 31);
	gen_asm(CMOVA_M, EAX, (uint32)&zero);
	gen_asm(SHR_CL, EAX);
	regc_store(EAX, rA);
	// EAX must contain final value!
	if(trxCPUrec.opcode & 1) trx_ppc_gen_cr0();
}

//==============================================================================
// Branching opcodes (4 opcodes, 4 emulated) 
//

// tested
void trx_ppc_gen_bx(void)
{
	uint32 LI, LK, AA, nextpc;
	LI = trxCPUrec.opcode &0x3fffffc;
	LK = (trxCPUrec.opcode & 1);
	AA = (trxCPUrec.opcode & 2);

	// sign extend
	if (LI&0x02000000) LI |= 0xfc000000;

	// absolute or relative branch ?
	if(!AA)
	{
		LI += trxCPUrec.pc;
	}
	// update link register ?
	//if (LK) trxCPUrec.lr = trxCPUrec.pc + 4; 
	//	trxCPUrec.npc = LI;
	if(LK)
	{
		nextpc = trxCPUrec.pc + 4;
		gen_asm(MOV_RI32, EAX, nextpc); 
		gen_asm(MOV_MR, (uint32)&trxCPUrec.lr, EAX);
	}
	// remove unconditional branches except when they loop (wait for interrupt)
	// loop detection ..
	if(LI == trxCPUrec.block_startPC)
	{
		printf("[Tratax Recompiler] Spinloop (unconditional branch loop) detected @%8.8x\n", trxCPUrec.block_startPC);
		gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, LI);
		// subtract from cpuslice and encode jump to begin of block
		trx_rec_gen_blockend();
		gen_asm(JMP_M, (uint32)&translation_memory[block_start_x86_pos]);
		trxCPUrec.blockend = BLOCKEND_STOP;
	}
	else
	{
		trxCPUrec.npc = LI;
	}
}

// tested (only few cases!)
void trx_ppc_gen_bcx()
{
	uint32 BO, BI, LK, BD, AA, crmask, nextpc;
	BO = (trxCPUrec.opcode >> 22)& 0xf; // we dont care about the prediction bit!
	BI = (trxCPUrec.opcode >> 16)& 0x1f;
	BD = (trxCPUrec.opcode)&0xfffc;if (BD&0x8000) BD |= 0xffff0000;
	LK = (trxCPUrec.opcode & 1);
	AA = (trxCPUrec.opcode & 2);

/*
	// update link register ?
	if (LK) trxCPUrec.lr = trxCPUrec.npc; 

	// need to adjust count ? bit 2 set means decrement counter
	if (!(BO & 2)) trxCPUrec.ctr--;
	// test condition
	bool cr = (trxCPUrec.cr & (1<<(31-BI)));

	bool bo2 = (BO & 1);
	bool bo8 = (BO & 4); // branch condition true
	if (((BO & 2) || ((trxCPUrec.ctr!=0) ^ bo2))	&& ((BO & 8) || (!(cr ^ bo8)))) 
	{
		// absolute or relative branch ?
		if(!AA)
		{
			BD += (trxCPUrec.pc);
		}
		trxCPUrec.npc = BD;
	}
*/
	if(!AA)
	{
		BD += (trxCPUrec.pc);
	}
	if(LK)
	{
		nextpc = trxCPUrec.pc + 4;
		gen_asm(MOV_RI32, EAX, nextpc); 
		gen_asm(MOV_MR, (uint32)&trxCPUrec.lr, EAX);
	}
	if (!(BO & 2))
	{
		gen_asm(SUB_M32I8, (uint32)&trxCPUrec.ctr, 1);
	}

	crmask = (1<<(31-BI));

	trx_rec_gen_blockend();
	trxCPUrec.blockend = BLOCKEND_STOP;

	// idle loop detection here.
	//8013A5D4  800Dxxxx  GEKKO: lwz        r0, xxxx(sd2)
	//8013A5D8  28000000  GEKKO: cmplwi     r0, 0
	//8013A5DC  4182FFF8  GEKKO: beq+       0x8013A5D4
	if(!config_skipidle && config_cpumode != CPU_SELFCHECKMODE )
	{
		if((trxCPUrec.opcode == 0x4182FFF8) && (trxCPUrec.block_instr == 2))
		{
			// looks like candidate for idle loop
			uint32 op1, op2;

			op1 = mem_iread(trxCPUrec.pc-4);
			op2 = mem_iread(trxCPUrec.pc-8);

			if((op1 == 0x28000000) && ((op2 & 0xffff0000) == 0x800d0000))
			{
				// it IS idle loop .. special sauce for exit
				//gen_asm(BREAK);
				gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.cr);
				gen_asm(AND_RI32, EAX, crmask);
				gen_asm(JNE_I8, 15);
				gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, trxCPUrec.npc);
				gen_asm(CALL_M, (uint32)&rec_patchmeup);
				gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, BD);
				gen_asm(MOV_M32I32, (uint32)&cpuslice_left, -1); // give up timeslice here .. the big speed gain !
				gen_asm(JMP_M, (uint32)&translation_memory[block_start_x86_pos]);

				printf("[Tratax Recompiler] idle loop detected at %x and patched\n", trxCPUrec.pc);
				return;
			}
		}
	}

	switch(BO)
	{
	case 2:
		gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.cr);
		gen_asm(AND_RI32, EAX, crmask);
		gen_asm(JE_I8, 15);
		gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, trxCPUrec.npc);
		gen_asm(CALL_M, (uint32)&rec_patchmeup);
		gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, BD);
		gen_asm(CALL_M, (uint32)&rec_patchmeup);
		break;
	case 6:
		gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.cr);
		gen_asm(AND_RI32, EAX, crmask);
		gen_asm(JNE_I8, 15);
		gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, trxCPUrec.npc);
		gen_asm(CALL_M, (uint32)&rec_patchmeup);
		gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, BD);
		gen_asm(CALL_M, (uint32)&rec_patchmeup);
		break;
	case 8:
		gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.ctr);
		gen_asm(TEST_RR, EAX, EAX);
		gen_asm(JNE_I8, 15);
		gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, trxCPUrec.npc);
		gen_asm(CALL_M, (uint32)&rec_patchmeup);
		gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, BD);
		gen_asm(CALL_M, (uint32)&rec_patchmeup);
		break;
	case 9:
		gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.ctr);
		gen_asm(TEST_RR, EAX, EAX);
		gen_asm(JE_I8, 15);
		gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, trxCPUrec.npc);
		gen_asm(CALL_M, (uint32)&rec_patchmeup);
		gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, BD);
		gen_asm(CALL_M, (uint32)&rec_patchmeup);
		break;
	case 10: // branch always ... can be optimized cause doesnt need to break block
		gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, BD);
		gen_asm(CALL_M, (uint32)&rec_patchmeup);
		break;
	default: // unhandled branch
		printf("[Tratax Recompiler] trx_ppc_gen_bcx() unhandled case: %d\n", BO);
		exit(0);
		break;
	}
}

// tested only one case !
void trx_ppc_gen_bcctrx()
{
	uint32 BO, BI, LK, crmask, nextpc;
	BO = (trxCPUrec.opcode >> 22)& 0xf; // we dont care about the prediction bit!
	BI = (trxCPUrec.opcode >> 16)& 0x1f;
	LK = (trxCPUrec.opcode & 1);
/*
	// need to adjust count ? bit 2 set means decrement counter
	if (!(BO & 2)) trxCPUrec.ctr--;
	// test condition
	cr = (trxCPUrec.cr & (1<<(31-BI)));
	// when not branching just go to next instruction
	BD = trxCPUrec.npc;

	switch(BO)
	{
	case 0: if((trxCPUrec.ctr!=0)&&(!cr))BD = trxCPUrec.ctr & 0xfffffffc;break;
	case 1: if((trxCPUrec.ctr==0)&&(!cr))BD = trxCPUrec.ctr & 0xfffffffc;break;
	case 2: if(!cr)BD = trxCPUrec.lr & 0xfffffffc;break;
	case 4: if((trxCPUrec.ctr!=0)&&(cr))BD = trxCPUrec.ctr & 0xfffffffc;break;
	case 5: if((trxCPUrec.ctr==0)&&(cr))BD = trxCPUrec.ctr & 0xfffffffc;break;
	case 6: if(cr)BD = trxCPUrec.ctr & 0xfffffffc;break;
	case 8: if(trxCPUrec.ctr!=0)BD = trxCPUrec.ctr & 0xfffffffc;break;
	case 9: if(trxCPUrec.ctr==0)BD = trxCPUrec.ctr & 0xfffffffc;break;
	case 10: BD = trxCPUrec.ctr & 0xfffffffc;break;
	default: // illegal branch, skip it.
	break;
	}
	// update link register ?
	if (LK) trxCPUrec.lr = trxCPUrec.npc; 
	trxCPUrec.npc = BD;
*/
	if(LK)
	{
		nextpc = trxCPUrec.pc + 4;
		gen_asm(MOV_RI32, EAX, nextpc); 
		gen_asm(MOV_MR, (uint32)&trxCPUrec.lr, EAX);
	}
	if (!(BO & 2))
	{
		gen_asm(SUB_M32I8, (uint32)&trxCPUrec.ctr, 1);
	}

	crmask = (1<<(31-BI));

	trx_rec_gen_blockend();

	switch(BO)
	{
	case 10: // BD = trxCPUrec.ctr & 0xfffffffc;break;
		// unpredictable, so need to do lookup in translation_table all the time
		gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.ctr);
		gen_asm(MOV_MR, (uint32)&trxCPUrec.npc, EAX);
		break;
	default: // unhandled branch
		printf("[Tratax Recompiler] trx_ppc_gen_bcctrx() unhandled case: %d\n", BO);
		exit(0);
		break;
	}
	trxCPUrec.blockend = BLOCKEND_STOP;
}

// tested, only few cases
void trx_ppc_gen_bclrx(void)
{
	uint32 BO, BI, LK, crmask, nextpc;
	BO = (trxCPUrec.opcode >> 22)& 0xf; // we dont care about the prediction bit!
	BI = (trxCPUrec.opcode >> 16)& 0x1f;
	LK = (trxCPUrec.opcode & 1);

/*
	// need to adjust count ? bit 2 set means decrement counter
	if (!(BO & 2)) trxCPUrec.ctr--;
	// test condition
	cr = (trxCPUrec.cr & (1<<(31-BI)));
	// when not branching just go to next instruction
	BD = trxCPUrec.npc;
	switch(BO)
	{
	case 0: if((trxCPUrec.ctr!=0)&&(!cr))BD = trxCPUrec.lr & 0xfffffffc;break;
	case 1: if((trxCPUrec.ctr==0)&&(!cr))BD = trxCPUrec.lr & 0xfffffffc;break;
	case 2: if(!cr)BD = trxCPUrec.lr & 0xfffffffc;break;
	case 4: if((trxCPUrec.ctr!=0)&&(cr))BD = trxCPUrec.lr & 0xfffffffc;break;
	case 5: if((trxCPUrec.ctr==0)&&(cr))BD = trxCPUrec.lr & 0xfffffffc;break;
	case 6: if(cr)BD = trxCPUrec.lr & 0xfffffffc;break;
	case 8: if(trxCPUrec.ctr!=0)BD = trxCPUrec.lr & 0xfffffffc;break;
	case 9: if(trxCPUrec.ctr==0)BD = trxCPUrec.lr & 0xfffffffc;break;
	case 10: BD = trxCPUrec.lr & 0xfffffffc;break;
	default: // illegal branch, skip it.
	break;
	}
	// update link register ?
	if (LK) trxCPUrec.lr = trxCPUrec.npc; 
	trxCPUrec.npc = BD;
*/
	nextpc = trxCPUrec.pc+4;
	if (!(BO & 2))
	{
		gen_asm(SUB_M32I8, (uint32)&trxCPUrec.ctr, 1);
	}

	crmask = (1<<(31-BI));

	trx_rec_gen_blockend();

	switch(BO)
	{
	case 2: //if(!cr)BD = trxCPUrec.lr & 0xfffffffc;break;
		gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.cr);
		gen_asm(AND_RI32, EAX, crmask);
		// carefull ! lr register is used in this type of branch !
		if(LK)
		{
			gen_asm(JE_I8, 25);
			gen_asm(MOV_M32I32, (uint32)&trxCPUrec.lr, nextpc);
		}
		else
		{
			gen_asm(JE_I8, 15);
		}
		gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, trxCPUrec.npc);
		gen_asm(CALL_M, (uint32)&rec_patchmeup);
		// unpredictable, so need to do lookup in translation_table all the time
		gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.lr);
		gen_asm(MOV_MR, (uint32)&trxCPUrec.npc, EAX);
		// carefull ! lr register is used in this type of branch !
		if(LK)gen_asm(MOV_M32I32, (uint32)&trxCPUrec.lr, nextpc);
		break;
	case 6: //if(cr)BD = trxCPUrec.lr & 0xfffffffc;break;
		gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.cr);
		gen_asm(AND_RI32, EAX, crmask);
		// carefull ! lr register is used in this type of branch !
		if(LK)
		{
			gen_asm(JNE_I8, 25);
			gen_asm(MOV_M32I32, (uint32)&trxCPUrec.lr, nextpc);
		}
		else
		{
			gen_asm(JNE_I8, 15);
		}
		gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, trxCPUrec.npc);
		gen_asm(CALL_M, (uint32)&rec_patchmeup);
		// unpredictable, so need to do lookup in translation_table all the time
		gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.lr);
		gen_asm(MOV_MR, (uint32)&trxCPUrec.npc, EAX);
		// carefull ! lr register is used in this type of branch !
		if(LK)gen_asm(MOV_M32I32, (uint32)&trxCPUrec.lr, nextpc);
		break;
	case 10: // BD = trxCPUrec.lr & 0xfffffffc;break;
		// unpredictable, so need to do lookup in translation_table all the time
		gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.lr);
		gen_asm(MOV_MR, (uint32)&trxCPUrec.npc, EAX);
		// carefull ! lr register is used in this type of branch !
		if(LK)gen_asm(MOV_M32I32, (uint32)&trxCPUrec.lr, nextpc);
		break;
	default: // unhandled branch
		printf("[Tratax Recompiler] trx_ppc_gen_bclrx() unhandled case: %d\n", BO);
		exit(0);
		break;
	}
	trxCPUrec.blockend = BLOCKEND_STOP;
}


//==============================================================================
// System control opcodes ( 2 opcodes , 1 emulated)

void trx_ppc_gen_mtsr()
{
	int rS, SR, rB;

	rS=((trxCPUrec.opcode)>>21)&0x1f;
	SR=((trxCPUrec.opcode)>>16)&0x1f;
	rB=((trxCPUrec.opcode)>>11)&0x1f;

	// trxCPUrec.sr[SR & 0xf] = trxCPUrec.gpr[rS];
	regc_load(EAX, rS);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.sr[SR & 0xf], EAX);
}

void trx_ppc_gen_mtsrin()
{
	int rS, rB;

	rS=((trxCPUrec.opcode)>>21)&0x1f;
	rB=((trxCPUrec.opcode)>>11)&0x1f;

	//trxCPUrec.sr[trxCPUrec.gpr[rB] >> 28] = trxCPUrec.gpr[rS];
	regc_load(EAX, rS);
	regc_load(ECX, rB);
	gen_asm(SHR_RI8, ECX, 26); // index by 32bit words!
	gen_asm(AND_RI32, ECX, 0xf<<2);
	gen_asm(MOV_MRRI32, EAX, ECX, (uint32)&trxCPUrec.sr[0]);
}

void trx_ppc_gen_mfsr()
{
	int rD, SR;

	rD=((trxCPUrec.opcode)>>21)&0x1f;
	SR=((trxCPUrec.opcode)>>16)&0x1f;

	//trxCPUrec.gpr[rD] = trxCPUrec.sr[SR & 0xf];
	gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.sr[SR & 0xf]);
	regc_store(EAX, rD);
}

void trx_ppc_gen_mfsrin()
{
	int rD, rB;

	rD=((trxCPUrec.opcode)>>21)&0x1f;
	rB=((trxCPUrec.opcode)>>11)&0x1f;

	//trxCPUrec.gpr[rD] = trxCPUrec.sr[trxCPUrec.gpr[rB] >> 28];
	regc_load(ECX, rB);
	gen_asm(SHR_RI8, ECX, 26); // index by 32bit words!
	gen_asm(AND_RI32, ECX, 0xf<<2);
	gen_asm(MOV_RMRI32, EAX, ECX, (uint32)&trxCPUrec.sr[0]);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_rfi()
{
	//trxCPUrec.msr = trxCPUrec.srr[1];
	//trxCPUrec.npc = trxCPUrec.srr[0] & 0xfffffffc;
	trx_rec_gen_blockend();
	
	gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_SRR1]); 
	gen_asm(MOV_MR, (uint32)&trxCPUrec.msr, EAX);
	gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_SRR0]);
	gen_asm(AND_RI32, EAX, 0xfffffffc);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.npc, EAX);
	// in multi block mode, we need to exit immediately at exceptions!
	gen_asm(MOV_RM, EAX, (uint32)&cpuslice_left);
	gen_asm(MOV_RM, ECX, (uint32)&cpuslice);
	gen_asm(SUB_RR, ECX, EAX);
	gen_asm(MOV_MR, (uint32)&cpuslice, ECX);
	gen_asm(MOV_M32I32, (uint32)&cpuslice_left, -1);
//	cpuslice -= cpuslice_left;
//	cpuslice_left = -1;
	trxCPUrec.blockend = BLOCKEND_STOP;	
}

//==============================================================================
// Condition register opcodes (10 opcodes, 3 emulated)
// not emulated: crand, crandc, crnand, crnor
//
// used
void trx_ppc_gen_mtcrf()
{
	uint32 rS, crm, CRM;
	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	crm = (trxCPUrec.opcode >> 12) & 0xff;
	CRM = ((crm&0x80)?0xf0000000:0)|((crm&0x40)?0x0f000000:0)|((crm&0x20)?0x00f00000:0)|((crm&0x10)?0x000f0000:0)|
	      ((crm&0x08)?0x0000f000:0)|((crm&0x04)?0x00000f00:0)|((crm&0x02)?0x000000f0:0)|((crm&0x01)?0x0000000f:0);

	//trxCPUrec.cr = (trxCPUrec.gpr[rS] & CRM) | (trxCPUrec.cr & ~CRM);
	regc_load(EAX, rS);
	gen_asm(AND_RI32, EAX, CRM);
	gen_asm(MOV_RM, ECX, (uint32)&trxCPUrec.cr);
	gen_asm(AND_RI32, ECX, ~CRM);
	gen_asm(OR_RR, EAX, ECX);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.cr, EAX);
}

// used
void trx_ppc_gen_mcrf()
{
	uint32 crD, crS, src_shift, dst_shift, mask;
	crD = (trxCPUrec.opcode >> 23)& 0x7;
	crS = (trxCPUrec.opcode >> 18)& 0x7;

	crD = 7-crD;
	crS = 7-crS;
	
//	c = (trxCPUrec.cr>>(crS*4)) & 0xf;
//	trxCPUrec.cr &= trx_ppc_cmp_and_mask[crD];
//	trxCPUrec.cr |= c<<(crD*4);

	src_shift = (crS*4);
	dst_shift = (crD*4);
	mask = trx_ppc_cmp_and_mask[crD];

	gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.cr);
	gen_asm(MOV_RR, ECX, EAX);
	gen_asm(SHR_RI8, EAX, src_shift);
	gen_asm(AND_RI32, EAX, 0xf);
	gen_asm(AND_RI32, ECX, mask);
	gen_asm(SHL_RI8, EAX, dst_shift);
	gen_asm(OR_RR, EAX, ECX);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.cr, EAX);
}

// used
void trx_ppc_gen_crxor()
{
	int crD, crA, crB, mask;
	
	crD=((trxCPUrec.opcode)>>21)&0x1f;
	crA=((trxCPUrec.opcode)>>16)&0x1f;
	crB=((trxCPUrec.opcode)>>11)&0x1f;
	mask = ~(1<<(31-crD));
/*
	if ((!(trxCPUrec.cr & (1<<(31-crA))) && (trxCPUrec.cr & (1<<(31-crB))))
	  || ((trxCPUrec.cr & (1<<(31-crA))) && !(trxCPUrec.cr & (1<<(31-crB))))) {
		trxCPUrec.cr |= (1<<(31-crD));
	} else {
		trxCPUrec.cr &= ~(1<<(31-crD));
	}
*/
	gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.cr);
	gen_asm(MOV_RR, ECX, EAX);
	gen_asm(MOV_RR, EDX, EAX);
	gen_asm(SHR_RI8, ECX, (31-crA));
	gen_asm(SHR_RI8, EDX, (31-crB));
	gen_asm(XOR_RR, ECX, EDX);
	gen_asm(AND_RI32, ECX, 1);
	gen_asm(SHL_RI8, ECX, (31-crD));
	gen_asm(AND_RI32, EAX, mask);
	gen_asm(OR_RR, EAX, ECX);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.cr, EAX);
}

// used
void trx_ppc_gen_cror()
{
	int crD, crA, crB, mask;
	
	crD=((trxCPUrec.opcode)>>21)&0x1f;
	crA=((trxCPUrec.opcode)>>16)&0x1f;
	crB=((trxCPUrec.opcode)>>11)&0x1f;
	mask = ~(1<<(31-crD));

	gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.cr);
	gen_asm(MOV_RR, ECX, EAX);
	gen_asm(MOV_RR, EDX, EAX);
	gen_asm(SHR_RI8, ECX, (31-crA));
	gen_asm(SHR_RI8, EDX, (31-crB));
	gen_asm(OR_RR, ECX, EDX);
	gen_asm(AND_RI32, ECX, 1);
	gen_asm(SHL_RI8, ECX, (31-crD));
	gen_asm(AND_RI32, EAX, mask);
	gen_asm(OR_RR, EAX, ECX);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.cr, EAX);
}

// used
void trx_ppc_gen_crand()
{
	int crD, crA, crB, mask;
	
	crD=((trxCPUrec.opcode)>>21)&0x1f;
	crA=((trxCPUrec.opcode)>>16)&0x1f;
	crB=((trxCPUrec.opcode)>>11)&0x1f;
	mask = ~(1<<(31-crD));

	gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.cr);
	gen_asm(MOV_RR, ECX, EAX);
	gen_asm(MOV_RR, EDX, EAX);
	gen_asm(SHR_RI8, ECX, (31-crA));
	gen_asm(SHR_RI8, EDX, (31-crB));
	gen_asm(AND_RR, ECX, EDX);
	gen_asm(AND_RI32, ECX, 1);
	gen_asm(SHL_RI8, ECX, (31-crD));
	gen_asm(AND_RI32, EAX, mask);
	gen_asm(OR_RR, EAX, ECX);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.cr, EAX);
}
// used
void trx_ppc_gen_crnor()
{
	int crD, crA, crB, mask;
	
	crD=((trxCPUrec.opcode)>>21)&0x1f;
	crA=((trxCPUrec.opcode)>>16)&0x1f;
	crB=((trxCPUrec.opcode)>>11)&0x1f;
	mask = ~(1<<(31-crD));

	gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.cr);
	gen_asm(MOV_RR, ECX, EAX);
	gen_asm(MOV_RR, EDX, EAX);
	gen_asm(SHR_RI8, ECX, (31-crA));
	gen_asm(SHR_RI8, EDX, (31-crB));
	gen_asm(OR_RR, ECX, EDX);
	gen_asm(NOT_R, ECX);
	gen_asm(AND_RI32, ECX, 1);
	gen_asm(SHL_RI8, ECX, (31-crD));
	gen_asm(AND_RI32, EAX, mask);
	gen_asm(OR_RR, EAX, ECX);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.cr, EAX);
}

// used
void trx_ppc_gen_creqv()
{
	int crD, crA, crB, mask;
	
	crD=((trxCPUrec.opcode)>>21)&0x1f;
	crA=((trxCPUrec.opcode)>>16)&0x1f;
	crB=((trxCPUrec.opcode)>>11)&0x1f;
	mask = ~(1<<(31-crD));

	gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.cr);
	gen_asm(MOV_RR, ECX, EAX);
	gen_asm(MOV_RR, EDX, EAX);
	gen_asm(SHR_RI8, ECX, (31-crA));
	gen_asm(SHR_RI8, EDX, (31-crB));
	gen_asm(XOR_RR, ECX, EDX);
	gen_asm(NOT_R, ECX);
	gen_asm(AND_RI32, ECX, 1);
	gen_asm(SHL_RI8, ECX, (31-crD));
	gen_asm(AND_RI32, EAX, mask);
	gen_asm(OR_RR, EAX, ECX);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.cr, EAX);
}

// used
void trx_ppc_gen_mfcr()
{
	uint32 rD;
	rD = (trxCPUrec.opcode >> 21)& 0x1f;

	//trxCPUrec.gpr[rD] = trxCPUrec.cr;
	
	gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.cr);
	regc_store(EAX, rD);
}

// used, not proven correct !
void trx_ppc_gen_mcrxr()
{
	uint32 crD, mask, src_shift;
	crD = (trxCPUrec.opcode >> 23)& 0x7;

	crD = 7-crD;
	
	//c = trxCPUrec.xer & 0xf; // bits 0 - 3 are copied
	//trxCPUrec.cr &= trx_ppc_cmp_and_mask[crD];
	//trxCPUrec.cr |= c<<(crD*4);
	//trxCPUrec.xer &= ~0xf; // and clear the bits

	mask = trx_ppc_cmp_and_mask[crD];
	src_shift = (crD*4);

	gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.xer);
	gen_asm(MOV_RR, EDX, EAX);
	gen_asm(AND_RI32, EAX, 0xf);
	gen_asm(MOV_RM, ECX, (uint32)&trxCPUrec.cr);
	gen_asm(AND_RI32, ECX, mask);
	gen_asm(SHL_RI8, EAX, src_shift);
	gen_asm(OR_RR, ECX, EAX);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.cr, ECX);
	gen_asm(AND_RI32, EDX, ~0xf);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.xer, EDX);
}

//==============================================================================
// Integer load opcodes 
//
// used
void trx_ppc_gen_lbz()
{
	uint32 rD, rA;
	sint16 imm;

	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	if(rA == 0)
	{
		//EA = 0 + imm;
//		printf("[Tratax recompiler] lbz constant access to %8.8x\n", imm);
		gen_asm(MOV_RI32, ECX, imm);
	}
	else
	{
		//EA = trxCPUrec.gpr[rA] + imm;
		if(regc_is_constant(rA))
		{
//			printf("[Tratax recompiler] lbz constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(ECX, rA);
		gen_asm(ADD_RI32, ECX, imm);
	}
//	trxCPUrec.gpr[rD] = mem_read8(EA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read8);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lzbu()
{
	uint32 rD, rA;
	sint16 imm;

	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	// illegal form!
	if(rD == rA)gen_asm(BREAK);

	if(regc_is_constant(rA))
	{
//		printf("[Tratax recompiler] lzbu constant access at %8.8x\n", trxCPUrec.pc);
	}
	//EA = trxCPUrec.gpr[rA] + imm;
	regc_load(ECX, rA);
	gen_asm(ADD_RI32, ECX, imm);
	regc_store(ECX, rA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read8);
	regc_store(EAX, rD);
}
// used 
void trx_ppc_gen_lbzx()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	if(rA == 0)
	{
		//EA = trxCPUrec.gpr[rB];
		if(regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] lzbx constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(ECX, rB);
	}
	else
	{
		if(regc_is_constant(rA) && regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] lzbx constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
		regc_load(EAX, rA);
		regc_load(ECX, rB);
		gen_asm(ADD_RR, ECX, EAX);
	}
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read8);
	regc_store(EAX, rD);
}
// used 
void trx_ppc_gen_lbzux()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	// illegal form!
	if(rD == rA)gen_asm(BREAK);

	if(regc_is_constant(rA) && regc_is_constant(rB))
	{
//		printf("[Tratax recompiler] lbzux constant access at %8.8x\n", trxCPUrec.pc);
	}
	//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
	regc_load(EAX, rA);
	regc_load(ECX, rB);
	gen_asm(ADD_RR, ECX, EAX);
	regc_store(ECX, rA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read8);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lha()
{
	uint32 rD, rA;
	sint16 imm;

	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	if(rA == 0)
	{
		//EA = 0 + imm;
//		printf("[Tratax recompiler] lha constant access at %8.8x\n", trxCPUrec.pc);
		gen_asm(MOV_RI32, ECX, imm);
	}
	else
	{
		if(regc_is_constant(rA))
		{
//			printf("[Tratax recompiler] lha constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rA] + imm;
		regc_load(ECX, rA);
		gen_asm(ADD_RI32, ECX, imm);
	}
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read16);
	gen_asm(MOVSX3216_RR, EAX, EAX);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lhau()
{
	uint32 rD, rA;
	sint16 imm;

	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	// illegal form!
	if(rD == rA)gen_asm(BREAK);

	if(regc_is_constant(rA))
	{
//		printf("[Tratax recompiler] lhau constant access at %8.8x\n", trxCPUrec.pc);
	}
	//EA = trxCPUrec.gpr[rA] + imm;
	regc_load(ECX, rA);
	gen_asm(ADD_RI32, ECX, imm);
	regc_store(ECX, rA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read16);
	gen_asm(MOVSX3216_RR, EAX, EAX);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lhax()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	if(rA == 0)
	{
		//EA = trxCPUrec.gpr[rB];
		if(regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] lhax constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(ECX, rB);
	}
	else
	{
		//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
		if(regc_is_constant(rA) && regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] lhax constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(EAX, rA);
		regc_load(ECX, rB);
		gen_asm(ADD_RR, ECX, EAX);
	}
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read16);
	gen_asm(MOVSX3216_RR, EAX, EAX);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lhz()
{
	uint32 rD, rA;
	sint16 imm;

	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	if(rA == 0)
	{
//		printf("[Tratax recompiler] lhz constant access at %8.8x\n", trxCPUrec.pc);
		//EA = 0 + imm;
		gen_asm(MOV_RI32, ECX, imm);
	}
	else
	{
		if(regc_is_constant(rA))
		{
//			printf("[Tratax recompiler] lhz constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rA] + imm;
		regc_load(ECX, rA);
		gen_asm(ADD_RI32, ECX, imm);
	}
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read16);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lhzu()
{
	uint32 rD, rA;
	sint16 imm;

	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	// illegal form!
	if(rD == rA)gen_asm(BREAK);

	if(regc_is_constant(rA))
	{
//		printf("[Tratax recompiler] lhzu constant access at %8.8x\n", trxCPUrec.pc);
	}
	regc_load(ECX, rA);
	gen_asm(ADD_RI32, ECX, imm);
	regc_store(ECX, rA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read16);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lhzx()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	if(rA == 0)
	{
		if(regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] lhzx constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rB];
		gen_asm(MOV_RM, ECX, (uint32)&trxCPUrec.gpr[rB]);
	}
	else
	{
		if(regc_is_constant(rA) && regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] lhzx constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
		regc_load(EAX, rA);
		regc_load(ECX, rB);
		gen_asm(ADD_RR, ECX, EAX);
	}
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read16);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lhzux()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	// illegal form!
	if(rD == rA)gen_asm(BREAK);

	if(rA == 0)
	{
		if(regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] lhzx constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rB];
		gen_asm(MOV_RM, ECX, (uint32)&trxCPUrec.gpr[rB]);
	}
	else
	{
		if(regc_is_constant(rA) && regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] lhzx constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
		regc_load(EAX, rA);
		regc_load(ECX, rB);
		gen_asm(ADD_RR, ECX, EAX);
	}
	regc_store(ECX, rA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read16);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lhbrx()
{
	uint32 rD, rA, rB;

	rD=((trxCPUrec.opcode)>>21)&0x1f;
	rA=((trxCPUrec.opcode)>>16)&0x1f;
	rB=((trxCPUrec.opcode)>>11)&0x1f;

	if(rA == 0)
	{
		if(regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] lhbrx constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rB];
		gen_asm(MOV_RM, ECX, (uint32)&trxCPUrec.gpr[rB]);
	}
	else
	{
		if(regc_is_constant(rA) && regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] lhbrx constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
		regc_load(EAX, rA);
		regc_load(ECX, rB);
		gen_asm(ADD_RR, ECX, EAX);
	}
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read16);
	gen_asm(XCHG_R8R8, AL, AH);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lwz()
{
	uint32 rD, rA;
	sint16 imm;

	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	// speed hack for stack access
	if((config_stackinram == 1) && (rA == 1))
	{
		regc_load(ECX, rA);
		gen_asm(AND_RI32, ECX, MEM_MASK32MB);
		gen_asm(MOV_RMRI32, EAX, ECX, (uint32)&gMemory[imm]);
		gen_asm(BSWAP_R, EAX);
		regc_store(EAX, rD);
		return;
	}

	if(rA == 0)
	{
		//EA = 0 + imm;
		gen_asm(MOV_RI32, ECX, imm);
	}
	else
	{
		if(regc_is_constant(rA))
		{
			uint32 EA;

			EA = regc_getconstant(rA) + imm;


			if(config_constantaddress == 1)
			{
				// memory based ?
				if(EA < 0xc8000000)
				{
					EA &= MEM_MASK32MB;
					gen_asm(MOV_RM, EAX, (uint32)&gMemory[EA]);
					gen_asm(BSWAP_R, EAX);
					regc_store(EAX, rD);
					return;
				}
				else
				{
	//				printf("[Tratax recompiler] lwz constant access at %8.8x to %8.8x\n", trxCPUrec.pc, EA);
				}
			}
		}
		//EA = trxCPUrec.gpr[rA] + imm;
		regc_load(ECX, rA);
		gen_asm(ADD_RI32, ECX, imm);
	}
	if(use_sse)psc_flushall();

	gen_asm(CALL_M, (uint32)p_rec_mem_read32);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lwzu()
{
	uint32 rD, rA;
	sint16 imm;

	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	// illegal form!
	if(rD == rA)gen_asm(BREAK);

	if(regc_is_constant(rA))
	{
		uint32 EA;

		EA = regc_getconstant(rA) + imm;

		if(config_constantaddress == 1)
		{
			// memory based ?
			if(EA < 0xc8000000)
			{
				EA &= MEM_MASK32MB;
				gen_asm(MOV_RM, EAX, (uint32)&gMemory[EA]);
				gen_asm(BSWAP_R, EAX);
				regc_store(EAX, rD);
				regc_store_constant(regc_getconstant(rA) + imm, rA);
				return;
			}
			else
			{
	//			printf("[Tratax recompiler] lwzu constant access at %8.8x to %8.8x\n", trxCPUrec.pc, EA);
			}
		}
	}
	//EA = trxCPUrec.gpr[rA] + imm;
	regc_load(ECX, rA);
	gen_asm(ADD_RI32, ECX, imm);
	regc_store(ECX, rA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read32);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lwzx()
{
	uint32 rD, rA, rB;

	rD=((trxCPUrec.opcode)>>21)&0x1f;
	rA=((trxCPUrec.opcode)>>16)&0x1f;
	rB=((trxCPUrec.opcode)>>11)&0x1f;

	if(rA == 0)
	{
		if(regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] lwzx constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rB];
		regc_load(ECX, rB);
	}
	else
	{
		if(regc_is_constant(rA) && regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] lwzx constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(EAX, rA);
		regc_load(ECX, rB);
		//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
		gen_asm(ADD_RR, ECX, EAX);
	}
//	trxCPUrec.gpr[rD] = mem_read32(EA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read32);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lwzux()
{
	uint32 rD, rA, rB;

	rD=((trxCPUrec.opcode)>>21)&0x1f;
	rA=((trxCPUrec.opcode)>>16)&0x1f;
	rB=((trxCPUrec.opcode)>>11)&0x1f;

	// illegal form!
	if(rD == rA)gen_asm(BREAK);

	if(regc_is_constant(rA) && regc_is_constant(rB))
	{
//		printf("[Tratax recompiler] lwzux constant access at %8.8x\n", trxCPUrec.pc);
	}
	regc_load(EAX, rA);
	regc_load(ECX, rB);
	//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
	gen_asm(ADD_RR, ECX, EAX);
	regc_store(ECX, rA);
	//	trxCPUrec.gpr[rD] = mem_read32(EA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read32);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lwbrx()
{
	uint32 rD, rA, rB;

	rD=((trxCPUrec.opcode)>>21)&0x1f;
	rA=((trxCPUrec.opcode)>>16)&0x1f;
	rB=((trxCPUrec.opcode)>>11)&0x1f;

	if(rA == 0)
	{
		if(regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] lwbrx constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rB];
		regc_load(ECX, rB);
	}
	else
	{
		if(regc_is_constant(rA) && regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] lwbrx constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(EAX, rA);
		regc_load(ECX, rB);
		//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
		gen_asm(ADD_RR, ECX, EAX);
	}
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_read32);
	gen_asm(BSWAP_R, EAX);
	regc_store(EAX, rD);
}

// used
void trx_ppc_gen_lmw(void)
{
	uint32 rD, rA;
	sint16 imm;

	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	if(rA == 0)
	{
		//EA = 0 + imm;
//		printf("[Tratax recompiler] lmw constant access at %8.8x\n", trxCPUrec.pc);
		gen_asm(MOV_RI32, ECX, imm);
	}
	else
	{
		//EA = trxCPUrec.gpr[rA] + imm;
		if(regc_is_constant(rA))
		{
//			printf("[Tratax recompiler] lmw constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(ECX, rA);
		gen_asm(ADD_RI32, ECX, imm);
	}
	while(rD <= 31)
	{
		//	trxCPUrec.gpr[rD] = mem_read32(EA);	
		gen_asm(PUSH_R, ECX);
		if(use_sse)psc_flushall();
		gen_asm(CALL_M, (uint32)p_rec_mem_read32);
		gen_asm(POP_R, ECX);
		regc_store(EAX, rD);
		rD++;
		//EA +=4;
		gen_asm(ADD_RI32, ECX, 4);
	}
}

// used
void trx_ppc_gen_lswi(void)
{
	uint32 rD, rA, NB, n, r, i;
	sint16 imm;

	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	NB = (trxCPUrec.opcode >> 11)&0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	if(NB == 0)
	{
		n = 32;
	}
	else
	{
		n = NB;
	}

	if(rA == 0)
	{
		//EA = 0 + imm;
		gen_asm(MOV_RI32, ECX, imm);
	}
	else
	{
		//EA = trxCPUrec.gpr[rA] + imm;
		if(regc_is_constant(rA))
		{
		}
		regc_load(ECX, rA);
		gen_asm(ADD_RI32, ECX, imm);
	}
/*
	r = rD - 1;
	i = 0;
	while (n>0)
	{
		if(i==0)
		{
			r++;
			r &= 31;
			trxCPUint.gpr[r] = 0;
		}
		// register already wiped by above code, so OR is safe
		trxCPUint.gpr[r] |= mem_read8_int(EA) << (24 - i);
		i += 8;
		if(i == 32)i=0;
		EA++;
		n--;
	}
*/
	gen_asm(BREAK);
	r = rD - 1;
	i = 0;
	while(n > 0)
	{
		if(i==0)
		{
			r++;
			r &= 31;
			// clear this register
			gen_asm(XOR_RR, EAX, EAX);
			regc_store(EAX, r);
		}
		gen_asm(PUSH_R, ECX);
		if(use_sse)psc_flushall();
		gen_asm(CALL_M, (uint32)p_rec_mem_read8);
		gen_asm(POP_R, ECX);
		gen_asm(SHL_RI8, EAX, (24 - i));
		regc_load(EDX, r);
		gen_asm(OR_RR, EAX, EDX);
		regc_store(EAX, rD);
		i += 8;
		if(i == 32)i=0;
		gen_asm(ADD_RI32, ECX, 1);
		n--;
	}
}

//==============================================================================
// Integer store opcodes
//
// used
void trx_ppc_gen_stb(void)
{
	uint32 rS, rA;
	sint16 imm;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	if(rA == 0)
	{
		//EA = 0 + imm;
//		printf("[Tratax recompiler] stb constant access at %8.8x\n", trxCPUrec.pc);
		gen_asm(MOV_RI32, ECX, imm);
	}
	else
	{
		//EA = trxCPUrec.gpr[rA] + imm;
		if(regc_is_constant(rA))
		{
//			printf("[Tratax recompiler] stb constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(ECX, rA);
		gen_asm(ADD_RI32, ECX, imm);
	}
	regc_load(EAX, rS);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_write8);
}

// used
void trx_ppc_gen_stbu(void)
{
	uint32 rS, rA;
	sint16 imm;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	//EA = trxCPUrec.gpr[rA] + imm;
	if(regc_is_constant(rA))
	{
//		printf("[Tratax recompiler] stbu constant access at %8.8x\n", trxCPUrec.pc);
	}
	regc_load(ECX, rA);
	gen_asm(ADD_RI32, ECX, imm);
	regc_load(EAX, rS);
	regc_store(ECX, rA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_write8);
}

// used
void trx_ppc_gen_stbx(void)
{
	uint32 rS, rA, rB;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	if(rA == 0)
	{
		if(regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] stbx constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rB];
		regc_load(ECX, rB);
	}
	else
	{
		if(regc_is_constant(rA) && regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] stbx constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(EAX, rA);
		regc_load(ECX, rB);
		//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
		gen_asm(ADD_RR, ECX, EAX);
	}
	//	mem_write8(EA, trxCPUrec.gpr[rS]);
	regc_load(EAX, rS);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_write8);
}
// used
void trx_ppc_gen_stbux(void)
{
	uint32 rS, rA, rB;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
	if(regc_is_constant(rA) && regc_is_constant(rB))
	{
//		printf("[Tratax recompiler] stbux constant access at %8.8x\n", trxCPUrec.pc);
	}
	regc_load(EAX, rA);
	regc_load(ECX, rB);
	//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
	gen_asm(ADD_RR, ECX, EAX);
	regc_load(EAX, rS);
	// update
	regc_store(ECX, rA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_write8);
}

// used
void trx_ppc_gen_sth(void)
{
	uint32 rS, rA;
	sint16 imm;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	if(rA == 0)
	{
		//EA = 0 + imm;
//		printf("[Tratax recompiler] sth constant access at %8.8x\n", trxCPUrec.pc);
		gen_asm(MOV_RI32, ECX, imm);
	}
	else
	{
		//EA = trxCPUrec.gpr[rA] + imm;
		if(regc_is_constant(rA))
		{
//			printf("[Tratax recompiler] sth constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(ECX, rA);
		gen_asm(ADD_RI32, ECX, imm);
	}
	//	mem_write16(EA, trxCPUrec.gpr[rS]);
	regc_load(EAX, rS);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_write16);
}

// used
void trx_ppc_gen_sthu(void)
{
	uint32 rS, rA;
	sint16 imm;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	if(regc_is_constant(rA))
	{
//		printf("[Tratax recompiler] sthu constant access at %8.8x\n", trxCPUrec.pc);
	}
	regc_load(ECX, rA);
	gen_asm(ADD_RI32, ECX, imm);
	regc_load(EAX, rS);
	regc_store(ECX, rA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_write16);
}

// used
void trx_ppc_gen_sthx(void)
{
	uint32 rS, rA, rB;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	if(rA == 0)
	{
		if(regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] sthx constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rB];
		regc_load(ECX, rB);
	}
	else
	{
		if(regc_is_constant(rA) && regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] sthx constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(EAX, rA);
		regc_load(ECX, rB);
		//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
		gen_asm(ADD_RR, ECX, EAX);
	}
	regc_load(EAX, rS);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_write16);
}

// used
void trx_ppc_gen_sthux(void)
{
	uint32 rS, rA, rB;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
	if(regc_is_constant(rA) && regc_is_constant(rB))
	{
//		printf("[Tratax recompiler] stbux constant access at %8.8x\n", trxCPUrec.pc);
	}
	regc_load(EAX, rA);
	regc_load(ECX, rB);
	//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
	gen_asm(ADD_RR, ECX, EAX);
	regc_load(EAX, rS);
	// update
	regc_store(ECX, rA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_write16);
}

// used
void trx_ppc_gen_sthbrx(void)
{
	uint32 rS, rA, rB;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	if(rA == 0)
	{
		if(regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] sthbrx constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rB];
		regc_load(ECX, rB);
	}
	else
	{
		if(regc_is_constant(rA) && regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] sthbrx constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(EAX, rA);
		regc_load(ECX, rB);
		//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
		gen_asm(ADD_RR, ECX, EAX);
	}
	regc_load(EAX, rS);
	gen_asm(XCHG_R8R8, AL, AH);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_write16);
}

// used
void trx_ppc_gen_stw(void)
{
	uint32 rS, rA;
	sint16 imm;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	// speed hack for stack access
	if((config_stackinram == 1) && (rA == 1))
	{
		regc_load(EAX, rS);
		regc_load(ECX, rA);
		gen_asm(BSWAP_R, EAX);
		gen_asm(AND_RI32, ECX, MEM_MASK32MB);
		gen_asm(MOV_MRRI32, EAX, ECX, (uint32)&gMemory[imm]);
		return;
	}

	if(rA == 0)
	{
		//EA = 0 + imm;
//		printf("[Tratax recompiler] stw constant access at %8.8x\n", trxCPUrec.pc);
		gen_asm(MOV_RI32, ECX, imm);
	}
	else
	{
		if(regc_is_constant(rA))
		{
			uint32 EA;

			EA = regc_getconstant(rA) + imm;

			if(config_constantaddress == 1)
			{
				// memory based ?
				if(EA < 0xc8000000)
				{
					EA &= MEM_MASK32MB;
					regc_load(EAX, rS);
					gen_asm(BSWAP_R, EAX);
					gen_asm(MOV_MR, (uint32)&gMemory[EA], EAX);
					return;
				}
				else
				{
					// gx fifo write ?
					if((EA & 0xffffff00) == 0xcc008000)
					{
						regc_load(EAX, rS);
						gen_asm(PUSH_R, EAX);
						gen_asm(CALL_M, (uint32)&gx_write_fifo32);//void gx_write_fifo32(uint32 data);
						gen_asm(ADD_RI8, ESP, 4);
						return;
					}
	//				printf("[Tratax recompiler] stw constant access at %8.8x to %8.8x\n", trxCPUrec.pc, EA);
				}
			}
		}
		regc_load(ECX, rA);
		gen_asm(ADD_RI32, ECX, imm);
	}
	regc_load(EAX, rS);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_write32);
}

// used
void trx_ppc_gen_stwu(void)
{
	uint32 rS, rA;
	sint16 imm;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	// speed hack for stack access (stack frame)
	if((config_stackinram == 1) && (rA == 1)&&(rS ==1))
	{
		regc_load(ECX, rA);
		gen_asm(MOV_RR, EAX, ECX);
		gen_asm(ADD_RI32, ECX, imm);
		regc_store(ECX, rA);
		gen_asm(BSWAP_R, EAX);
		gen_asm(AND_RI32, ECX, MEM_MASK32MB);
		gen_asm(MOV_MRRI32, EAX, ECX, (uint32)&gMemory[0]);
		return;
	}

	if(regc_is_constant(rA))
	{
		uint32 EA;

		EA = regc_getconstant(rA) + imm;

		if(config_constantaddress == 1)
		{
			// memory based ?
			if(EA < 0xc8000000)
			{
				EA &= MEM_MASK32MB;
				regc_load(EAX, rS);
				gen_asm(BSWAP_R, EAX);
				gen_asm(MOV_MR, (uint32)&gMemory[EA], EAX);
				regc_store_constant(regc_getconstant(rA) + imm, rA);
				return;
			}
			else
			{
	//			printf("[Tratax recompiler] stwu constant access at %8.8x to %8.8x\n", trxCPUrec.pc, EA);
			}
		}
	}
	regc_load(ECX, rA);
	gen_asm(ADD_RI32, ECX, imm);
	regc_load(EAX, rS);
	regc_store(ECX, rA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_write32);
}

// used
void trx_ppc_gen_stwx()
{
	uint32 rS, rA, rB;

	rS = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	if(rA == 0)
	{
		if(regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] stwx constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rB];
		regc_load(ECX, rB);
	}
	else
	{
		if(regc_is_constant(rA) && regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] stwx constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(EAX, rA);
		regc_load(ECX, rB);
		//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
		gen_asm(ADD_RR, ECX, EAX);
	}
	regc_load(EAX, rS);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_write32);
}

// used
void trx_ppc_gen_stwux()
{
	uint32 rS, rA, rB;

	rS = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
	if(regc_is_constant(rA) && regc_is_constant(rB))
	{
//		printf("[Tratax recompiler] stwux constant access at %8.8x\n", trxCPUrec.pc);
	}
	regc_load(EAX, rA);
	regc_load(ECX, rB);
	//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
	gen_asm(ADD_RR, ECX, EAX);
	regc_load(EAX, rS);
	// update
	regc_store(ECX, rA);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_write32);
}

// used
void trx_ppc_gen_stwbrx()
{
	uint32 rS, rA, rB;

	rS = ((trxCPUrec.opcode)>>21)&0x1f;
	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	if(rA == 0)
	{
		if(regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] stwbrx constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rB];
		regc_load(ECX, rB);
	}
	else
	{
		if(regc_is_constant(rA) && regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] stwbrx constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(EAX, rA);
		regc_load(ECX, rB);
		//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
		gen_asm(ADD_RR, ECX, EAX);
	}
	regc_load(EAX, rS);
	gen_asm(BSWAP_R, EAX);
	if(use_sse)psc_flushall();
	gen_asm(CALL_M, (uint32)p_rec_mem_write32);
}

// used
void trx_ppc_gen_stmw(void)
{
	uint32 rS, rA;
	sint16 imm;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	if(rA == 0)
	{
		//EA = 0 + imm;
//		printf("[Tratax recompiler] stwm constant access at %8.8x\n", trxCPUrec.pc);
		gen_asm(MOV_RI32, ECX, imm);
	}
	else
	{
		//EA = trxCPUrec.gpr[rA] + imm;
		if(regc_is_constant(rA))
		{
//			printf("[Tratax recompiler] stwm constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(ECX, rA);
		gen_asm(ADD_RI32, ECX, imm);
	}
	while(rS <= 31)
	{
		regc_load(EAX, rS);
		gen_asm(PUSH_R, ECX);
		if(use_sse)psc_flushall();
		gen_asm(CALL_M, (uint32)p_rec_mem_write32);
		gen_asm(POP_R, ECX);
		//EA +=4; ECX could be destroyed by this function !
		gen_asm(ADD_RI32, ECX, 4);
		rS++;
	}
}

// used
void trx_ppc_gen_stswi(void)
{
	uint32 rS, rA, NB, n, r, i;
	sint16 imm;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	rA = (trxCPUrec.opcode >> 16)& 0x1f;
	NB = (trxCPUrec.opcode >> 11)&0x1f;
	imm = trxCPUrec.opcode & 0xffff;

	if(NB == 0)
	{
		n = 32;
	}
	else
	{
		n = NB;
	}

	if(rA == 0)
	{
		//EA = 0 + imm;
		gen_asm(MOV_RI32, ECX, imm);
	}
	else
	{
		//EA = trxCPUrec.gpr[rA] + imm;
		if(regc_is_constant(rA))
		{
		}
		regc_load(ECX, rA);
		gen_asm(ADD_RI32, ECX, imm);
	}
/*
	r = rS - 1;
	i = 0;
	while (n>0)
	{
		if(i==0)
		{
			r++;
			r &= 31;
		}
		mem_write8_int(EA, (trxCPUint.gpr[r] >> (24 - i)) & 0xff);
		i += 8;
		if(i == 32)i=0;
		EA++;
		n--;
	}
*/
	gen_asm(BREAK);
	r = rS - 1;
	i = 0;
	while(n > 0)
	{
		if(i==0)
		{
			r++;
			r &= 31;
		}
		regc_load(EAX, r);
		gen_asm(SHR_RI8, EAX, (24 - i));
		gen_asm(PUSH_R, ECX);
		if(use_sse)psc_flushall();
		gen_asm(CALL_M, (uint32)p_rec_mem_write8);
		gen_asm(POP_R, ECX);
		i += 8;
		if(i == 32)i=0;
		gen_asm(ADD_RI32, ECX, 1);
		n--;
	}
}

void trx_ppc_gen_dcbz()
{
	// data cache block zero
	uint32 rA, rB, i;

	rA = ((trxCPUrec.opcode)>>16)&0x1f;
	rB = ((trxCPUrec.opcode)>>11)&0x1f;

	if(rA == 0)
	{
		if(regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] dcbz constant access at %8.8x\n", trxCPUrec.pc);
		}
		//EA = trxCPUrec.gpr[rB];
		regc_load(ECX, rB);
	}
	else
	{
		if(regc_is_constant(rA) && regc_is_constant(rB))
		{
//			printf("[Tratax recompiler] dcbz constant access at %8.8x\n", trxCPUrec.pc);
		}
		regc_load(EAX, rA);
		regc_load(ECX, rB);
		//EA = trxCPUrec.gpr[rA] + trxCPUrec.gpr[rB];
		gen_asm(ADD_RR, ECX, EAX);
	}

	// could be optimized because cache always is in memory
	// 32 bytes in a cache block (8 x uint32)
	for(i = 0; i < 8; i++)
	{
		gen_asm(XOR_RR, EAX, EAX);
		gen_asm(PUSH_R, ECX);
		gen_asm(CALL_M, (uint32)p_rec_mem_write32);
		gen_asm(POP_R, ECX);
		//EA +=4; ECX could be destroyed by this function !
		gen_asm(ADD_RI32, ECX, 4);
	}
}
//==============================================================================
// Processor control opcodes 
//
void trx_ppc_gen_icbc()
{
	// selfmodcheck routines will keep track of overwritten compiled code
}

void trx_ppc_gen_sc()
{
	// flushcache 
	return;

	//trxCPUrec.srr[0] = trxCPUrec.npc;
	//trxCPUrec.srr[1] = trxCPUrec.msr & 0x87c0ffff;
	//trxCPUrec.msr = 0;
	//trxCPUrec.npc = 0x00C00
	trx_rec_gen_blockend();

	gen_asm(MOV_RI32, EAX, trxCPUrec.pc+4);
	gen_asm(MOV_MR,(uint32)&trxCPUrec.spr[PPC_SRR0], EAX);
	gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.msr);
	gen_asm(AND_RI32, EAX, 0x87c0ffff);
	gen_asm(MOV_MR,(uint32)&trxCPUrec.spr[PPC_SRR1], EAX);
	gen_asm(XOR_RR, EAX, EAX);
	gen_asm(MOV_MR,(uint32)&trxCPUrec.msr, EAX);
	gen_asm(MOV_RI32, EAX, 0x00C00);
	gen_asm(MOV_MR,(uint32)&trxCPUrec.npc, EAX);
	trxCPUrec.blockend = BLOCKEND_STOP;   
}

// used
void trx_ppc_gen_mfmsr()
{
	uint32 rD;
	rD=((trxCPUrec.opcode)>>21)&0x1f;

	//	trxCPUrec.gpr[rD] = trxCPUrec.msr;
	gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.msr);
	regc_store(EAX, rD);
}
// used
void trx_ppc_gen_mtmsr()
{
	uint32 rS;

	rS=((trxCPUrec.opcode)>>21)&0x1f;
	//	trxCPUrec.msr = trxCPUrec.gpr[rS];
	regc_load(EAX, rS);
	gen_asm(MOV_MR, (uint32)&trxCPUrec.msr, EAX);

	trx_rec_gen_blockend();

	// in multi block mode, we need to exit immediately at exceptions!
	gen_asm(MOV_RM, EAX, (uint32)&cpuslice_left);
	gen_asm(MOV_RM, ECX, (uint32)&cpuslice);
	gen_asm(SUB_RR, ECX, EAX);
	gen_asm(MOV_MR, (uint32)&cpuslice, ECX);
	gen_asm(MOV_M32I32, (uint32)&cpuslice_left, -1);
	gen_asm(MOV_M32I32, (uint32)&trxCPUrec.npc, trxCPUrec.pc+4);
	trxCPUrec.blockend = BLOCKEND_STOP;	
}
// used
void trx_ppc_gen_mfspr()
{
	uint32 rD, spr1, spr2;

	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	spr1 = (trxCPUrec.opcode >> 16)& 0x1f;
	spr2 = (trxCPUrec.opcode >> 11)& 0x1f;

	switch((spr2<<5)|spr1)
	{
		case 1: //trxCPUrec.gpr[rD] = trxCPUrec.xer; if(trxCPUrec.xer_ca)trxCPUrec.gpr[rD] |= XER_CA; return;
		{			
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.xer);
			gen_asm(MOV_RM, ECX, (uint32)&trxCPUrec.xer_ca);
			// dont forget to shift into place !
			gen_asm(SHL_RI8, ECX, 29);
			gen_asm(OR_RR, EAX, ECX);
			regc_store(EAX, rD);
			return; 
		}
		case 8: //trxCPUrec.gpr[rD] = trxCPUrec.lr; return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.lr);regc_store(EAX, rD);return; 
		case 9: //trxCPUrec.gpr[rD] = trxCPUrec.ctr; return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.ctr);regc_store(EAX, rD);return; 
	}
	// the rest is privileged instructions .. do we have a right to them ?
	if (trxCPUrec.msr & MSR_PR) 
	{
//		trx_ppc_exception(PPC_EXC_PROGRAM, PPC_EXC_PROGRAM_PRIV, 0);
		gen_asm(BREAK);
		return;
	}

	switch((spr2<<5)|spr1)
	{
		case 22: //trxCPUrec.gpr[rD] = trxCPUrec.spr[PPC_DEC]; return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_DEC]);regc_store(EAX, rD);break; 
		case 26: //trxCPUrec.gpr[rD] = trxCPUrec.srr[0]; return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_SRR0]);regc_store(EAX, rD);break; 
		case 27: //trxCPUrec.gpr[rD] = trxCPUrec.srr[1]; return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_SRR1]);regc_store(EAX, rD);break; 
		case 912: //trxCPUrec.gpr[rD] = trxCPUrec.spr[PPC_GQR0+0]; return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_GQR0+0]);regc_store(EAX, rD);break; 
		case 913: //trxCPUrec.gpr[rD] = trxCPUrec.spr[PPC_GQR0+1]; return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_GQR0+1]);regc_store(EAX, rD);break; 
		case 914: //trxCPUrec.gpr[rD] = trxCPUrec.spr[PPC_GQR0+2]; return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_GQR0+2]);regc_store(EAX, rD);break; 
		case 915: //trxCPUrec.gpr[rD] = trxCPUrec.spr[PPC_GQR0+3]; return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_GQR0+3]);regc_store(EAX, rD);break; 
		case 916: //trxCPUrec.gpr[rD] = trxCPUrec.spr[PPC_GQR0+4]; return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_GQR0+4]);regc_store(EAX, rD);break; 
		case 917: //trxCPUrec.gpr[rD] = trxCPUrec.spr[PPC_GQR0+5]; return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_GQR0+5]);regc_store(EAX, rD);break; 
		case 918: //trxCPUrec.gpr[rD] = trxCPUrec.spr[PPC_GQR0+6]; return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_GQR0+6]);regc_store(EAX, rD);break; 
		case 919: //trxCPUrec.gpr[rD] = trxCPUrec.spr[PPC_GQR0+7]; return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_GQR0+7]);regc_store(EAX, rD);break; 
		case 920:	// hid[2]
			//trxCPUrec.gpr[rD] = trxCPUrec.spr[PPC_HID2];
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_HID2]);regc_store(EAX, rD); return;
			break;
		case 921: 	// wpar
			//trxCPUrec.gpr[rD] = trxCPUrec.wpar;
			//gen_asm(BREAK);
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_WPAR]);
			// merge with data status
			gen_asm(OR_RM, EAX, (uint32)&wpar_hasdata);
			regc_store(EAX, rD);
			break;
		case 922:	// dma-u
			//trxCPUrec.gpr[rD] = trxCPUrec.spr[PPC_DMAU];
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_DMAU]);regc_store(EAX, rD);
			break;
		case 923:	// dma-l
			//trxCPUrec.gpr[rD] = trxCPUrec.spr[PPC_DMAL];
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_DMAL]);regc_store(EAX, rD);
			break;
		default:
			//trxCPUrec.gpr[rD] = trxCPUrec.spr[(spr2<<5)|spr1];
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[(spr2<<5)|spr1]);regc_store(EAX, rD);
			break;
	}
}

// used
void trx_ppc_gen_mftb()
{
	uint32 rD, spr1, spr2;

	rD = (trxCPUrec.opcode >> 21)& 0x1f;
	spr1 = (trxCPUrec.opcode >> 16)& 0x1f;
	spr2 = (trxCPUrec.opcode >> 11)& 0x1f;

	switch (spr2) {
	case 8:
		switch (spr1) {
		case 12: //trxCPUrec.gpr[rD] = (uint32)trxCPUrec.tb; return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_TBL]); regc_store(EAX, rD);return;
		case 13: //trxCPUrec.gpr[rD] = (uint32)(trxCPUrec.tb >> 32); return;
			gen_asm(MOV_RM, EAX, (uint32)&trxCPUrec.spr[PPC_TBH]); regc_store(EAX, rD);return;
		}
		break;
	}
}

// used
void trx_ppc_gen_mtspr()
{
	uint32 rS, spr1, spr2;

	rS = (trxCPUrec.opcode >> 21)& 0x1f;
	spr1 = (trxCPUrec.opcode >> 16)& 0x1f;
	spr2 = (trxCPUrec.opcode >> 11)& 0x1f;

	switch((spr2<<5)|spr1)
	{
		case 1:
			//trxCPUrec.xer = trxCPUrec.gpr[rS]; trxCPUrec.xer_ca = (trxCPUrec.xer >> 29) & 1; trxCPUrec.xer &= ~XER_CA; return;
			{
				regc_load(EAX, rS);
				gen_asm(MOV_RR, ECX, EAX);
				gen_asm(AND_RI32, ECX, ~XER_CA);
				gen_asm(MOV_MR, (uint32)&trxCPUrec.xer, ECX); 
				gen_asm(SHR_RI8, EAX, 29);
				gen_asm(AND_RI32, EAX, 1);
				gen_asm(MOV_MR, (uint32)&trxCPUrec.xer_ca, EAX); 
				return;
			}
		case 8:	//trxCPUrec.lr = trxCPUrec.gpr[rS]; return;
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.lr, EAX); return;
		case 9:	//trxCPUrec.ctr = trxCPUrec.gpr[rS]; return;
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.ctr, EAX); return;
	}

	// rest is priviliged material
	if (trxCPUrec.msr & MSR_PR) 
	{
//		trx_ppc_exception(PPC_EXC_PROGRAM, PPC_EXC_PROGRAM_PRIV, 0);
		gen_asm(BREAK);
		return;
	}

	switch((spr2<<5)|spr1)
	{
		case 22: //trxCPUrec.spr[PPC_DEC] = trxCPUrec.gpr[rS]; return;
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_DEC], EAX); break;
		case 26: //trxCPUrec.srr[0] = trxCPUrec.gpr[rS]; return;
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_SRR0], EAX); break;
		case 27: //trxCPUrec.srr[1] = trxCPUrec.gpr[rS]; return;
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_SRR1], EAX); break;
		case 912: //trxCPUrec.spr[PPC_GQR0+0] = trxCPUrec.gpr[rS]; return;
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_GQR0+0], EAX); break;
		case 913: //trxCPUrec.spr[PPC_GQR0+1] = trxCPUrec.gpr[rS]; return;
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_GQR0+1], EAX); break;
		case 914: //trxCPUrec.spr[PPC_GQR0+2] = trxCPUrec.gpr[rS]; return;
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_GQR0+2], EAX); break;
		case 915: //trxCPUrec.spr[PPC_GQR0+3] = trxCPUrec.gpr[rS]; return;
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_GQR0+3], EAX); break;
		case 916: //trxCPUrec.spr[PPC_GQR0+4] = trxCPUrec.gpr[rS]; return;
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_GQR0+4], EAX); break;
		case 917: //trxCPUrec.spr[PPC_GQR0+5] = trxCPUrec.gpr[rS]; return;
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_GQR0+5], EAX); break;
		case 918: //trxCPUrec.spr[PPC_GQR0+6] = trxCPUrec.gpr[rS]; return;
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_GQR0+6], EAX); break;
		case 919: //trxCPUrec.spr[PPC_GQR0+7] = trxCPUrec.gpr[rS]; return;
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_GQR0+7], EAX); break;
		case 920: // hid[2]
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_HID2], EAX); 
			break;
		case 921: // wpar
			//trxCPUrec.wpar = trxCPUrec.gpr[rS];
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_WPAR], EAX); 
			gen_asm(CALL_M, (uint32)&wpar_set);
			break;
		case 922:// dma-u
			//trxCPUrec.spr[PPC_DMAU] = trxCPUrec.gpr[rS];
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_DMAU], EAX); 
			break;
		case 923:// dma-l
			//trxCPUrec.spr[PPC_DMAL] = trxCPUrec.gpr[rS];
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[PPC_DMAL], EAX); 
			gen_asm(CALL_M, (uint32)&dma_engine);
			break;
		case 1019:	// ictc
			//trxCPUrec.spr[1019] = trxCPUrec.gpr[rS];
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[1019], EAX); 
			printf("[trxCPUrec] ICTC set to: %x\n", trxCPUrec.spr[1019]);
			exit(0);
			break;
		default:
			//trxCPUrec.spr[(spr2<<5)|spr1] = trxCPUrec.gpr[rS];
			regc_load(EAX, rS); gen_asm(MOV_MR, (uint32)&trxCPUrec.spr[(spr2<<5)|spr1], EAX);
			break;
	}
}

