/*====================================================================

filename:     trx_ppc_int_opcodes.cpp
project:      GCemu
created:      2004-6-18
mail:		  duddie@walla.com

Copyright (c) 2005 Duddie & Tratax

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

====================================================================*/
#include <stdio.h>
#include <stdlib.h>
#include "system/types.h"
#include "hardware/hw_io.h"
#include "cpu/trx_ppc_cpu.h"
#include "cpu/trx_ppc_int.h"

#if WITH_LOGGING
#include "elf_loader.h"
extern 	elf_file *elf;
#endif // WITH_LOGGING

static uint32 trx_ppc_cmp_and_mask[8] = 
{
	0xfffffff0,
	0xffffff0f,
	0xfffff0ff,
	0xffff0fff,
	0xfff0ffff,
	0xff0fffff,
	0xf0ffffff,
	0x0fffffff
};

static inline bool trx_ppc_carry_3(uint32 a, uint32 b, uint32 c)
{
	if ((a+b) < a) {
		return true;
	}
	if ((a+b+c) < c) {
		return true;
	}
	return false;
}

static inline uint32 trx_ppc_word_rotl(uint32 data, int n)
{
	n &= 0x1f;
	return (data << n) | (data >> (32-n));
}

static inline uint32 trx_ppc_mask(int MB, int ME)
{
	uint32 mask;
	if (MB <= ME) {
		if (ME-MB == 31) {
			mask = 0xffffffff;
		} else {
			mask = ((1<<(ME-MB+1))-1)<<(31-ME);
		}
	} else {
		mask = trx_ppc_word_rotl((1<<(32-MB+ME+1))-1, 31-ME);
	}
	return mask;
}

void trx_ppc_set_cr0(uint32 r)
{
	_asm
	{
		cmp r,0
		cmovg eax, cr_gt
		cmovl eax, cr_lt
		cmove eax, cr_eq
		mov ecx, dword ptr trxCPUint.cr
		and ecx, 0x0fffffff
		or ecx, eax
		mov dword ptr trxCPUint.cr, ecx
	}
}

//==============================================================================
// Integer Arithmetic opcodes (22 total, 19 emulated)
// not emulated: addmex, subfmex,
//

// tested
void trx_ppc_int_addx()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	//trxCPUint.gpr[rD] = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];
	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		mov edx, dword ptr prb
		add eax, [edx]
		mov edx, dword ptr prd
		mov [edx], eax
	};
	/*
	gen_load(EAX, rA); // mov ebp, trxCPUint.gpr[rA] ; mov eax, ebp
	gen_load(ECX, rB); // mov edi, trxCPUint.gpr[rB] ; mov ecx, edi 
	gen_asm(ADD_RR, EAX, ECX); // add eax, ecx
	gen_store(EAX, rD); // mov esi, eax
	*/
	if (trxCPUint.opcode & 1)	trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// tested
void trx_ppc_int_addcx()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	//trxCPUint.gpr[rD] = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];
	// update xer
	//if (trxCPUint.gpr[rD] < trxCPUint.gpr[rA]) {
	//	trxCPUint.xer_ca = 1;
	//} else {
	//	trxCPUint.xer_ca = 0;
	//}
	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		mov edx, dword ptr prb
		add eax, [edx]
		setc byte ptr trxCPUint.xer_ca
		mov edx, dword ptr prd
		mov [edx], eax
	};

	if (trxCPUint.opcode & 1)	trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// tested 
void trx_ppc_int_addex()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

//	a = trxCPUint.gpr[rA];
//	b = trxCPUint.gpr[rB];
//	ca = trxCPUint.xer_ca;
//	trxCPUint.gpr[rD] = a + b + ca;

	// update xer
//	if (trx_ppc_carry_3(~a, b, ca)) {
//		trxCPUint.xer_ca = 1;
//	} else {
//		trxCPUint.xer_ca = 0;
//	}
	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		mov edx, dword ptr prb
		add eax, [edx]	; // add a + b
		setc cl			// if carry happens, remember
		add eax, dword ptr trxCPUint.xer_ca; // add ca 
		setc ch
		or cl, ch		// any carry should be carried over
		mov byte ptr trxCPUint.xer_ca, cl
		mov edx, dword ptr prd
		mov [edx], eax
	};
	if (trxCPUint.opcode & 1)	trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// tested
void trx_ppc_int_addi(void)
{
	uint32 rD, rA;
	sint16 imm;
	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	//if(rA == 0)
	//{
	//	trxCPUint.gpr[rD] = 0 + imm;
	//}
	//else
	//{
	//	trxCPUint.gpr[rD] = trxCPUint.gpr[rA] + imm;
	//}

	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	if(rA == 0)
	{
		_asm
		{
			movsx eax, word ptr [imm]
			mov edx, dword ptr prd
			mov [edx], eax
		};
	}
	else
	{
		_asm
		{
			movsx eax, word ptr [imm]
			mov edx, dword ptr pra
			add eax, [edx]
			mov edx, dword ptr prd
			mov [edx], eax
		};
	}
}

// tested
void trx_ppc_int_addic()
{
	uint32 rD, rA;
	sint16 imm;
	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	//a = trxCPUint.gpr[rA];
	//trxCPUint.gpr[rD] = a + imm;
	// update XER
	//if (trxCPUint.gpr[rD] < a) {
	//	trxCPUint.xer_ca = 1;
	//} else {
	//	trxCPUint.xer_ca = 0;
	//}
	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		movsx eax, word ptr [imm]
		mov edx, dword ptr pra
		add eax, [edx]
		setc byte ptr trxCPUint.xer_ca
		mov edx, dword ptr prd
		mov [edx], eax
	};
}

// tested
void trx_ppc_int_addic_()
{
	uint32 rD, rA;
	sint16 imm;
	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	//a = trxCPUint.gpr[rA];
	//trxCPUint.gpr[rD] = a + imm;
	// update XER
	//if (trxCPUint.gpr[rD] < a) {
	//	trxCPUint.xer_ca = 1;
	//} else {
	//	trxCPUint.xer_ca = 0;
	//}
	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		movsx eax, word ptr [imm]
		mov edx, dword ptr pra
		add eax, [edx]
		setc byte ptr trxCPUint.xer_ca
		mov edx, dword ptr prd
		mov [edx], eax
	};
	trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// tested
void trx_ppc_int_addis(void)
{
	uint32 rD, rA, imm;
	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode << 16;

	//if(rA == 0)
	//{
	//	trxCPUint.gpr[rD] = imm;
	//}
	//else
	//{
	//	trxCPUint.gpr[rD] = trxCPUint.gpr[rA] + imm;
	//}
	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	if(rA == 0)
	{
		_asm
		{
			mov eax, dword ptr [imm]
			mov edx, dword ptr prd
			mov [edx], eax
		};
	}
	else
	{
		_asm
		{
			mov eax, dword ptr [imm]
			mov edx, dword ptr pra
			add eax, [edx]
			mov edx, dword ptr prd
			mov [edx], eax
		};
	}
}

// tested
void trx_ppc_int_addzex()
{
	uint32 rD, rA;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;

	//a = trxCPUint.gpr[rA];
	//ca = trxCPUint.xer_ca;
	//trxCPUint.gpr[rD] = a + ca;
	//if ((a == 0xffffffff) && ca) {
	//	trxCPUint.xer_ca = 1;
	//} else {
	//	trxCPUint.xer_ca = 0;
	//}

	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		add eax, trxCPUint.xer_ca
		setc byte ptr trxCPUint.xer_ca
		mov edx, dword ptr prd
		mov [edx], eax
	};

	if (trxCPUint.opcode & 1)	trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// overflow case not fully tested !
void trx_ppc_int_addmex()
{
	uint32 rD, rA;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;

	// rd = (ra) + ca - 1;
	//a = trxCPUint.gpr[rA];
	//ca = trxCPUint.xer_ca;
	//trxCPUint.gpr[rD] = a + ca;
	//if ((a == 0xffffffff) && ca) {
	//	trxCPUint.xer_ca = 1;
	//} else {
	//	trxCPUint.xer_ca = 0;
	//}

	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		add eax, trxCPUint.xer_ca
		sub eax, 1
		setc byte ptr trxCPUint.xer_ca
		mov edx, dword ptr prd
		mov [edx], eax
	};

	if (trxCPUint.opcode & 1)	trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// untested
void trx_ppc_int_divwx(void)
{
	uint32 rD, rA, rB;
	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = (trxCPUint.opcode >> 11)& 0x1f;

	//a = trxCPUint.gpr[rA];
	//b = trxCPUint.gpr[rB];
	//c = a / b;
	//trxCPUint.gpr[rD] = c;

	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		cdq
		mov ebx, dword ptr prb
		mov ecx, [ebx]
		test ecx, ecx
		je idiv0
		idiv ecx	; // eax = edx:[a] / [b] 
		mov edx, dword ptr prd
		mov [edx], eax
idiv0:
	};
	if(trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// tested
void trx_ppc_int_divwux(void)
{
	uint32 rD, rA, rB;
	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = (trxCPUint.opcode >> 11)& 0x1f;

	//trxCPUint.gpr[rD] = trxCPUint.gpr[rA] / trxCPUint.gpr[rB];

	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		xor edx, edx
		mov ebx, dword ptr prb
		mov ecx, [ebx]
		test ecx, ecx
		je div0
		div ecx			; // eax = edx:[a] / [b] 
		mov edx, dword ptr prd
		mov [edx], eax
div0:
	};

	// need to update CR0 bits ?
	if(trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// tested
void trx_ppc_int_mulhwx()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	//sint64 a = trxCPUint.gpr[rA];
	//sint64 b = trxCPUint.gpr[rB];
	//sint64 c = a*b;
	//trxCPUint.gpr[rD] = (sint32)(c>>32);

	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		mov ebx, dword ptr prb
		mov ecx, [ebx]
		imul ecx			; // edx:eax = eax * ecx 
		mov ebx, dword ptr prd
		mov [ebx], edx
	};

	if (trxCPUint.opcode & 1)	trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// tested
void trx_ppc_int_mulhwux()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	//uint64 a = trxCPUint.gpr[rA];
	//uint64 b = trxCPUint.gpr[rB];
	//uint64 c = a*b;
	//trxCPUint.gpr[rD] = (uint32)(c>>32);

	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		mov ebx, dword ptr prb
		mov ecx, [ebx]
		mul ecx			; // edx:eax = eax * ecx 
		mov ebx, dword ptr prd
		mov [ebx], edx
	};

	if (trxCPUint.opcode & 1)	trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// tested
void trx_ppc_int_mulli(void)
{
	uint32 rD, rA;
	sint16 imm;
	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	//trxCPUint.gpr[rD] = trxCPUint.gpr[rA] * imm;

	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		movsx ecx, word ptr [imm]
		mov ebx, dword ptr pra
		mov eax, [ebx]
		mul ecx			; // edx:eax = eax * ecx 
		mov ebx, dword ptr prd
		mov [ebx], eax
	};

}

// tested
void trx_ppc_int_mullwx()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	//trxCPUint.gpr[rD] = trxCPUint.gpr[rA] * trxCPUint.gpr[rB];

	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		mov ebx, dword ptr prb
		mov ecx, [ebx]
		mul ecx			; // edx:eax = eax * ecx 
		mov ebx, dword ptr prd
		mov [ebx], eax
	};

	if (trxCPUint.opcode & 1)	trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// tested
void trx_ppc_int_negx()
{
	uint32 rD, rA;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;

	//trxCPUint.gpr[rD] = -(sint32)trxCPUint.gpr[rA];

	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		neg eax			 
		mov ebx, dword ptr prd
		mov [ebx], eax
	};

	if (trxCPUint.opcode & 1)	trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// tested
void trx_ppc_int_subfx()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	//trxCPUint.gpr[rD] = ~trxCPUint.gpr[rA] + trxCPUint.gpr[rB] + 1;

	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		not eax
		mov edx, dword ptr prb
		add eax, [edx]
		inc eax
		mov edx, dword ptr prd
		mov [edx], eax
	};

	if (trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// tested
void trx_ppc_int_subfcx()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	//a = trxCPUint.gpr[rA];
	//b = trxCPUint.gpr[rB];
	//trxCPUint.gpr[rD] = ~a + b + 1;
	// update xer
	//if (trx_ppc_carry_3(~a, b, 1)) {
	//	trxCPUint.xer_ca = 1;
	//} else {
	//	trxCPUint.xer_ca = 0;
	//}

	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		not eax
		mov edx, dword ptr prb
		add eax, [edx]
		setc cl
		add eax, 1
		setc ch
		or cl, ch
		mov edx, dword ptr prd
		mov [edx], eax
		mov byte ptr trxCPUint.xer_ca, cl
	};

	if (trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// tested
void trx_ppc_int_subfex()
{
	uint32 rD, rA, rB;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	//a = trxCPUint.gpr[rA];
	//b = trxCPUint.gpr[rB];
	//ca = trxCPUint.xer_ca;
	//trxCPUint.gpr[rD] = ~a + b + ca;
	// update xer
	//if (trx_ppc_carry_3(~a, b, ca)) {
	//	trxCPUint.xer_ca = 1;
	//} else {
	//	trxCPUint.xer_ca = 0;
	//}

	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		not eax
		mov edx, dword ptr prb
		add eax, [edx]
		setc cl
		add eax, trxCPUint.xer_ca
		setc ch
		or cl, ch
		mov edx, dword ptr prd
		mov [edx], eax
		mov byte ptr trxCPUint.xer_ca, cl
	};

	if (trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// tested
void trx_ppc_int_subfzex()
{
	uint32 rD, rA;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;

	//subfzex()
	//uint32 a = gCPU.gpr[rA];
	//uint32 ca = ((gCPU.xer&XER_CA)?1:0);
	//gCPU.gpr[rD] = ~a + ca;
	//if (!a && ca) {
	//	gCPU.xer |= XER_CA;
	//} else {
	//	gCPU.xer &= ~XER_CA;
	//}

	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		not eax
		add eax, trxCPUint.xer_ca
		setc byte ptr trxCPUint.xer_ca
		mov edx, dword ptr prd
		mov [edx], eax
	};

	if (trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rD]);
}

// tested
void trx_ppc_int_subfic(void)
{
	uint32 rD, rA;
	sint16 imm;
	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;
	
	//a = trxCPUint.gpr[rA];
	//trxCPUint.gpr[rD] = ~a + imm + 1;
	// update XER
	//if (trx_ppc_carry_3(~a, imm, 1)) {
	//	trxCPUint.xer_ca = 1;
	//} else {
	//	trxCPUint.xer_ca = 0;
	//}
	prd = &trxCPUint.gpr[rD];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		movsx ecx, word ptr [imm]
		mov edx, dword ptr pra
		mov eax, [edx]
		not eax
		add eax, ecx
		setc cl
		add eax, 1
		setc ch
		or cl, ch
		mov edx, dword ptr prd
		mov [edx], eax
		mov byte ptr trxCPUint.xer_ca, cl
	};

}

//==============================================================================
// Integer Compare opcodes ( 4 total, 4 emulated)
// 


// tested only for cr = 0
void trx_ppc_int_cmp(void)
{
	uint32 cr, rA, rB;
	uint32 shift, mask;
	cr = (trxCPUint.opcode >> 23)& 0x7;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = (trxCPUint.opcode >> 11)& 0x1f;

/*
	a = trxCPUint.gpr[rA];
	b = trxCPUint.gpr[rB];
	if (a < b) {
		res = 8;
	} else if (a > b) {
		res = 4;
	} else {
		res = 2;
	}
	// mask off and set correct flags
	cr = 7-cr;
	trxCPUint.cr &= trx_ppc_cmp_and_mask[cr];
	trxCPUint.cr |= res<<(cr*4);
*/
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];

	cr = 7-cr;
	shift = 28-(cr*4); 
	mask = trx_ppc_cmp_and_mask[cr];

	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		mov edx, dword ptr prb
		mov ecx, [edx]
		cmp eax, ecx
		cmovg eax, cr_gt
		cmovl eax, cr_lt
		cmove eax, cr_eq
		mov ecx, dword ptr [shift]
		shr eax, cl
		mov ecx, dword ptr trxCPUint.cr
		and ecx, dword ptr [mask]
		or ecx, eax
		mov dword ptr trxCPUint.cr, ecx
	}
}

// tested only for cr = 0
void trx_ppc_int_cmpi(void)
{
	uint32 cr, rA;
	sint32 imm;
	sint16 imm16;
	uint32 shift, mask;

	cr = (trxCPUint.opcode >> 23)& 0x7;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm16 = trxCPUint.opcode & 0xffff;
	imm = imm16;

/*
	a = trxCPUint.gpr[rA];
	if (a < imm) {
		res = 8;
	} else if (a > imm) {
		res = 4;
	} else {
		res = 2;
	}

	// mask off and set correct flags
	cr = 7-cr;
	trxCPUint.cr &= trx_ppc_cmp_and_mask[cr];
	trxCPUint.cr |= res<<(cr*4);
*/
	pra = &trxCPUint.gpr[rA];

	cr = 7-cr;
	shift = 28-(cr*4); 
	mask = trx_ppc_cmp_and_mask[cr];

	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		mov ecx, dword ptr [imm]
		cmp eax, ecx
		cmovg eax, cr_gt
		cmovl eax, cr_lt
		cmove eax, cr_eq
		mov ecx, dword ptr [shift]
		shr eax, cl
		mov ecx, dword ptr trxCPUint.cr
		and ecx, dword ptr [mask]
		or ecx, eax
		mov dword ptr trxCPUint.cr, ecx
	}
}

// tested only for cr = 0
void trx_ppc_int_cmpl(void)
{
	uint32 cr, rA, rB;
	uint32 shift, mask;

	cr = (trxCPUint.opcode >> 23)& 0x7;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = (trxCPUint.opcode >> 11)& 0x1f;

/*
	a = trxCPUint.gpr[rA];
	b = trxCPUint.gpr[rB];
	if (a < b) {
		res = 8;
	} else if (a > b) {
		res = 4;
	} else {
		res = 2;
	}
//	if (trxCPUint.xer & XER_SO) res |= 1;

	// mask off and set correct flags
	cr = 7-cr;
	trxCPUint.cr &= trx_ppc_cmp_and_mask[cr];
	trxCPUint.cr |= res<<(cr*4);
*/
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];

	cr = 7-cr;
	shift = 28-(cr*4); 
	mask = trx_ppc_cmp_and_mask[cr];

	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		mov edx, dword ptr prb
		mov ecx, [edx]
		cmp eax, ecx
		cmova eax, dword ptr [cr_gt]
		cmovb eax, dword ptr [cr_lt]
		cmove eax, dword ptr [cr_eq]
		mov ecx, dword ptr [shift]
		shr eax, cl
		mov ecx, dword ptr trxCPUint.cr
		and ecx, dword ptr [mask]
		or ecx, eax
		mov dword ptr trxCPUint.cr, ecx
	}
}

// tested only for cr = 0
void trx_ppc_int_cmpli(void)
{
	uint32 cr, rA;
	uint32 imm;
	uint32 shift, mask;

	cr = (trxCPUint.opcode >> 23)& 0x7;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

/*
	a = trxCPUint.gpr[rA];
	if (a < imm) {
		res = 8;
	} else if (a > imm) {
		res = 4;
	} else {
		res = 2;
	}
//	if (trxCPUint.xer & XER_SO) res |= 1;

	// mask off and set correct flags
	cr = 7-cr;
	trxCPUint.cr &= trx_ppc_cmp_and_mask[cr];
	trxCPUint.cr |= res<<(cr*4);
*/
	pra = &trxCPUint.gpr[rA];

	cr = 7-cr;
	shift = 28-(cr*4); 
	mask = trx_ppc_cmp_and_mask[cr];

	_asm
	{
		mov edx, dword ptr pra
		mov eax, [edx]
		mov ecx, dword ptr [imm]
		cmp eax, ecx
		cmova eax, dword ptr [cr_gt]
		cmovb eax, dword ptr [cr_lt]
		cmove eax, dword ptr [cr_eq]
		mov ecx, dword ptr [shift]
		shr eax, cl
		mov ecx, dword ptr trxCPUint.cr
		and ecx, dword ptr [mask]
		or ecx, eax
		mov dword ptr trxCPUint.cr, ecx
	}
}

//==============================================================================
// Integer Logical opcodes (17 opcodes, 12 emulated)
// not emulated: orcx, xori
//

// tested
void trx_ppc_int_andx()
{
	uint32 rS, rA, rB;

	rS = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

//	trxCPUint.gpr[rA] = trxCPUint.gpr[rS] & trxCPUint.gpr[rB];

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov edx, dword ptr prb
		and eax, [edx]
		mov edx, dword ptr pra
		mov [edx], eax
	};

	if (trxCPUint.opcode & 1)	trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_nandx()
{
	uint32 rS, rA, rB;

	rS = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

//	trxCPUint.gpr[rA] = ~(trxCPUint.gpr[rS] & trxCPUint.gpr[rB]);

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov edx, dword ptr prb
		and eax, [edx]
		not eax
		mov edx, dword ptr pra
		mov [edx], eax
	};

	if (trxCPUint.opcode & 1)	trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_andcx()
{
	uint32 rS, rA, rB;

	rS = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

//	trxCPUint.gpr[rA] = trxCPUint.gpr[rS] & ~trxCPUint.gpr[rB];

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov edx, dword ptr prb
		mov ecx, [edx]
		not ecx
		and eax, ecx
		mov edx, dword ptr pra
		mov [edx], eax
	};

	if (trxCPUint.opcode & 1)	trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_andi_(void)
{
	uint32 rS, rA;
	uint32 imm;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

//	trxCPUint.gpr[rA] = trxCPUint.gpr[rS] & imm;

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		movzx ecx, word ptr [imm]
		mov edx, dword ptr prs
		mov eax, [edx]
		and eax, ecx
		mov edx, dword ptr pra
		mov [edx], eax
	};

	trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_andis_(void)
{
	uint32 rS, rA;
	uint32 imm;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = (trxCPUint.opcode << 16);

//	trxCPUint.gpr[rA] = trxCPUint.gpr[rS] & imm);

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		mov ecx, dword ptr [imm]
		mov edx, dword ptr prs
		mov eax, [edx]
		and eax, ecx
		mov edx, dword ptr pra
		mov [edx], eax
	};

	trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_cntlzwx()
{
	uint rS, rA;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;

	//uint32 n=0;
	//uint32 x=0x80000000;
	//uint32 v=trxCPUint.gpr[rS];
	//while (!(v & x)) {
	//	n++;
	//	if (n==32) break;
	//	x>>=1;
	//}
	//trxCPUint.gpr[rA] = n;

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov ecx, -1
		bsr ecx, eax
		mov eax, 31
		sub eax, ecx
		mov edx, dword ptr pra
		mov [edx], eax
	};

	if (trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_extsbx()
{
	uint32 rS, rA;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;

//	a  = trxCPUint.gpr[rS];
//	trxCPUint.gpr[rA] = a;

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		movsx eax, al
		mov edx, dword ptr pra
		mov [edx], eax
	};

	if(trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_extshx()
{
	uint32 rS, rA;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;

	//a  = trxCPUint.gpr[rS];
	//trxCPUint.gpr[rA] = a;
	
	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		movsx eax, ax
		mov edx, dword ptr pra
		mov [edx], eax
	};

	if(trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_norx()
{
	int rS, rA, rB;
	rS = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	//trxCPUint.gpr[rA] = ~(trxCPUint.gpr[rS] | trxCPUint.gpr[rB]);

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov edx, dword ptr prb
		mov ecx, [edx]
		or eax, ecx
		not eax
		mov edx, dword ptr pra
		mov [edx], eax
	};

	if (trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_orx(void)
{
	uint32 rS, rA, rB;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = (trxCPUint.opcode >> 11)& 0x1f;

	//trxCPUint.gpr[rA] = trxCPUint.gpr[rS] | trxCPUint.gpr[rB];

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov edx, dword ptr prb
		mov ecx, [edx]
		or eax, ecx
		mov edx, dword ptr pra
		mov [edx], eax
	};

	if(trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_orcx(void)
{
	uint32 rS, rA, rB;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = (trxCPUint.opcode >> 11)& 0x1f;

	//trxCPUint.gpr[rA] = trxCPUint.gpr[rS] | ~trxCPUint.gpr[rB];

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov edx, dword ptr prb
		mov ecx, [edx]
		not ecx
		or eax, ecx
		mov edx, dword ptr pra
		mov [edx], eax
	};

	if(trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_ori(void)
{
	uint32 rS, rA;
	uint32 imm;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

//	trxCPUint.gpr[rA] = trxCPUint.gpr[rS] | imm;
	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov ecx, dword ptr [imm]
		or eax, ecx
		mov edx, dword ptr pra
		mov [edx], eax
	};
}

// tested
void trx_ppc_int_oris(void)
{
	uint32 rS, rA;
	uint32 imm;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode <<16;

//	trxCPUint.gpr[rA] = trxCPUint.gpr[rS] | imm;

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov ecx, dword ptr [imm]
		or eax, ecx
		mov edx, dword ptr pra
		mov [edx], eax
	};
}

// tested
void trx_ppc_int_xorx(void)
{
	uint32 rS, rA, rB;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = (trxCPUint.opcode >> 11)& 0x1f;

//	trxCPUint.gpr[rA] = trxCPUint.gpr[rS] ^ trxCPUint.gpr[rB];

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov edx, dword ptr prb
		mov ecx, [edx]
		xor eax, ecx
		mov edx, dword ptr pra
		mov [edx], eax
	};

	if(trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_xori(void)
{
	uint32 rS, rA;
	uint32 imm;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

//	trxCPUint.gpr[rA] = trxCPUint.gpr[rS] ^ imm;
	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov ecx, dword ptr [imm]
		xor eax, ecx
		mov edx, dword ptr pra
		mov [edx], eax
	};
}

// tested
void trx_ppc_int_xoris(void)
{
	uint32 rS, rA;
	uint32 imm;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode <<16;

//	trxCPUint.gpr[rA] = trxCPUint.gpr[rS] ^ imm;
	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov ecx, dword ptr [imm]
		xor eax, ecx
		mov edx, dword ptr pra
		mov [edx], eax
	};
}

void trx_ppc_int_eqvx()
{
	uint32 rS, rA, rB;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = (trxCPUint.opcode >> 11)& 0x1f;

//	trxCPUint.gpr[rA] = ~(trxCPUint.gpr[rS] ^ trxCPUint.gpr[rB]);

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov edx, dword ptr prb
		mov ecx, [edx]
		xor eax, ecx
		not eax
		mov edx, dword ptr pra
		mov [edx], eax
	};

	if(trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

//==============================================================================
// Rotate and shift opcodes (7 opcodes, 7 emulated)
// not emulated:
//

void trx_ppc_int_rlwnmx()
{
	uint32 rS, rA, rB, MB, ME, mask;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = (trxCPUint.opcode >> 11)& 0x1f;
	MB = (trxCPUint.opcode >> 6)& 0x1f;
	ME = (trxCPUint.opcode >> 1)& 0x1f;

	//uint32 v = ppc_word_rotl(gCPU.gpr[rS], gCPU.gpr[rB]);
	mask = trx_ppc_mask(MB, ME);
	//gCPU.gpr[rA] = v & mask;

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov ebx, dword ptr prs
		mov eax, [ebx]
		mov ebx, dword ptr prb
		mov ecx, [ebx]
		rol eax,cl			;// val = rotl(rS, rB)
		mov ecx, dword ptr [mask]
		and eax, ecx		; (val & mask)
		mov ebx, dword ptr pra
		mov [ebx], eax
	};

	if (trxCPUint.opcode & 1) 	trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_rlwimix()
{
	uint32 rS, rA, SH, MB, ME, mask;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	SH = (trxCPUint.opcode >> 11)& 0x1f;
	MB = (trxCPUint.opcode >> 6)& 0x1f;
	ME = (trxCPUint.opcode >> 1)& 0x1f;

//	val = trx_ppc_word_rotl(trxCPUint.gpr[rS], SH);
	mask = trx_ppc_mask(MB, ME);
//	trxCPUint.gpr[rA] = (val & mask) | (trxCPUint.gpr[rA] & ~mask);

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		mov ebx, dword ptr prs
		mov eax, [ebx]
		mov ecx, dword ptr [SH]
		rol eax,cl			;// val = rotl(rS, SH)
		mov ecx, dword ptr [mask]
		and eax, ecx		; (val & mask)
		mov ebx, dword ptr pra
		mov edx, [ebx]
		not ecx				
		and edx, ecx		; (rA & ~mask)
		or eax, edx			; (val & mask ) | (rA & ~mask) 
		mov [ebx], eax
	};

	if (trxCPUint.opcode & 1) 	trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_rlwinmx()
{
	uint32 rS, rA, SH, MB, ME, mask;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	SH = (trxCPUint.opcode >> 11)& 0x1f;
	MB = (trxCPUint.opcode >> 6)& 0x1f;
	ME = (trxCPUint.opcode >> 1)& 0x1f;

	//val = trx_ppc_word_rotl(trxCPUint.gpr[rS], SH);
    mask = trx_ppc_mask(MB, ME);
	// finally apply it
	//trxCPUint.gpr[rA] = val & mask;

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov ecx, dword ptr [SH]
		rol eax,cl			;// val = rotl(rS, SH)
		mov ecx, dword ptr [mask]
		and eax, ecx		; (val & mask)
		mov edx, dword ptr pra
		mov [edx], eax
	};

	if (trxCPUint.opcode & 1) 	trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested
void trx_ppc_int_slwx()
{
	uint32 rS, rA, rB;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = (trxCPUint.opcode >> 11)& 0x1f;
	//SH = trxCPUint.gpr[rB] & 0x3f;
	//if (SH > 31) {
	//	trxCPUint.gpr[rA] = 0;
	//} else {
	//	trxCPUint.gpr[rA] = trxCPUint.gpr[rS] << SH;
	//}

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov edx, dword ptr prb
		mov ecx, [edx]
		cmp ecx, 31					;// values above 31 should give 0 result
		cmova eax, dword ptr [zero]
		shl eax,cl			
		mov edx, dword ptr pra
		mov [edx], eax
	};

	if(trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested for positive numbers and negative numbers
void trx_ppc_int_srawx()
{
	uint32 rS, rA, rB;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = (trxCPUint.opcode >> 11)& 0x1f;

/*
	SH = trxCPUint.gpr[rB] & 0x3f;
	trxCPUint.gpr[rA] = trxCPUint.gpr[rS];
	trxCPUint.xer_ca = 0;
	if (trxCPUint.gpr[rA] & 0x80000000) {
		uint32 ca = 0;
		for (uint i=0; i < SH; i++) {
			if (trxCPUint.gpr[rA] & 1) ca = 1;
			trxCPUint.gpr[rA] >>= 1;
			trxCPUint.gpr[rA] |= 0x80000000;
		}
		if (ca) trxCPUint.xer_ca = 1;
	} else {
		if (SH > 31) {
			trxCPUint.gpr[rA] = 0;
		} else {
			trxCPUint.gpr[rA] >>= SH;
		}
	}
*/

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov ebx, dword ptr prs
		mov eax, [ebx]
		mov ebx, dword ptr prb
		mov ecx, [ebx]
		test eax, eax
		js sraw_negative
		; // positive number shift 
		mov byte ptr trxCPUint.xer_ca, 0			;// xer_ca is cleared
		cmp ecx, 31				;// values above 31 should give 0 result
		cmova eax, dword ptr [zero]
		sar eax,cl				; // just shift it and we're done			
		jmp done
sraw_negative:
		; // negative number shift, xer_ca is set if any 1 bits are shifted out
		mov dl, 32
		sub dl, cl
		mov ch, cl
		mov cl, dl
		mov ebx, eax
		shl ebx, cl			; // calculate XER_CA by shifting left and checking if nonzero
		setne dl
		mov cl, ch
		test cl, cl			; // XER_CA calculation fails if there is no shift done
		cmovz edx, dword ptr [zero]
		mov byte ptr trxCPUint.xer_ca, dl
		sar eax,cl			; // rS >>= SH			
done:
		mov edx, dword ptr pra
		mov [edx], eax
	};

	if(trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested for positive nonzero shifts and negative nonzero shifts
void trx_ppc_int_srawix()
{
	uint32 rS, rA, SH, mask;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	SH = (trxCPUint.opcode >> 11)& 0x1f;
	
	// calculate bitmask for XER_CA calculation
	mask = 0xffffffff >> (32-SH);
/*
	trxCPUint.gpr[rA] = trxCPUint.gpr[rS];
	trxCPUint.xer_ca = 0;
	if (trxCPUint.gpr[rA] & 0x80000000) {
		uint32 ca = 0;
		for (uint i=0; i < SH; i++) {
			if (trxCPUint.gpr[rA] & 1) ca = 1;
			trxCPUint.gpr[rA] >>= 1;
			trxCPUint.gpr[rA] |= 0x80000000;
		}
		if (ca) trxCPUint.xer_ca = 1;
	} else {
		if (SH > 31) {
			trxCPUint.gpr[rA] = 0;
		} else {
			trxCPUint.gpr[rA] >>= SH;
		}
	}     
*/
	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	// fixed shift amount
	if(SH == 0)
	{
		// clear xer_ca and copy value
		_asm
		{
			mov edx, dword ptr prs
			mov eax, [edx]
			mov edx, dword ptr pra
			mov [edx], eax
			mov byte ptr trxCPUint.xer_ca, 0
		}
	}
	else
	{
		// shift anywhere between 1 and 31 bits
		_asm
		{
			mov edx, dword ptr prs
			mov eax, [edx]
			mov ecx, dword ptr [SH]
			test eax, eax
			js srawi_negative
			; // positive number shift 
			mov byte ptr trxCPUint.xer_ca, 0			;// xer_ca is cleared
			jmp done
	srawi_negative:
			; // negative number shift, xer_ca is set if any 1 bits are shifted out
			mov edx, eax
			and edx, dword ptr [mask] 
			setne byte ptr trxCPUint.xer_ca			; // nonzero means XER_CA is 1 else 0
	done:
			sar eax,cl				; // just shift it and we're done			
			mov ebx, dword ptr pra
			mov [ebx], eax
		};		
	}
	if(trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

// tested (also for shift amounts above 31)
void trx_ppc_int_srwx()
{
	uint32 rS, rA, rB;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = (trxCPUint.opcode >> 11)& 0x1f;

	//uint32 v = trxCPUint.gpr[rB] & 0x3f;
	//if (v > 31) {
	//	trxCPUint.gpr[rA] = 0;
	//} else {
	//	trxCPUint.gpr[rA] = trxCPUint.gpr[rS] >> v;
	//}

	prs = &trxCPUint.gpr[rS];
	pra = &trxCPUint.gpr[rA];
	prb = &trxCPUint.gpr[rB];
	_asm
	{
		mov edx, dword ptr prs
		mov eax, [edx]
		mov edx, dword ptr prb
		mov ecx, [edx]
		cmp ecx, 31				;// values above 31 should give 0 result
		cmova eax, dword ptr [zero]
		shr eax,cl				; // just shift it and we're done			
		mov edx, dword ptr pra
		mov [edx],eax
	}

	if(trxCPUint.opcode & 1) trx_ppc_set_cr0(trxCPUint.gpr[rA]);
}

//==============================================================================
// Branching opcodes (4 opcodes, 4 emulated) 
//

// tested
void trx_ppc_int_bx(void)
{
	uint32 LI, LK, AA, nextpc;
	LI = trxCPUint.opcode &0x3fffffc;
	LK = (trxCPUint.opcode & 1);
	AA = (trxCPUint.opcode & 2);

	// sign extend
	if (LI&0x02000000) LI |= 0xfc000000;

	// absolute or relative branch ?
	if(!AA)
	{
		LI += trxCPUint.pc;
	}
	// update link register ?
	//if (LK) trxCPUint.lr = trxCPUint.pc + 4; 
	// finally update target
	//trxCPUint.npc = LI;
	if(LK)
	{
#if WITH_LOGGING
		syslog(CPU,"%08x bl %s (%08x, %08x, %08x, %08x, %08x, %08x) -> %08x \n", CPUcurrmode->pc, elf_getfunctionname(elf, LI), CPUcurrmode->gpr[3], CPUcurrmode->gpr[4], CPUcurrmode->gpr[5], CPUcurrmode->gpr[6], CPUcurrmode->gpr[7], CPUcurrmode->gpr[8], LI);
#endif
		nextpc = trxCPUint.pc + 4;
		_asm
		{
			mov eax, dword ptr [nextpc]
			mov dword ptr trxCPUint.lr, eax
		}
	}
	_asm
	{
		mov eax, dword ptr [LI]
		mov dword ptr trxCPUint.npc, eax
	}
	// remove unconditional branches except when they loop
	// loop detection ..
	if(LI == trxCPUint.block_startPC)
	{
		//printf("[Tratax Recompiler] Spinloop (unconditional branch loop) detected @%8.8x\n", trxCPUint.block_startPC);
		trxCPUint.blockend = BLOCKEND_STOP;
	}
}

// tested (only few cases!)
void trx_ppc_int_bcx()
{
	uint32 BO, BI, LK, BD, AA, crmask;
	BO = (trxCPUint.opcode >> 22)& 0xf; // we dont care about the prediction bit!
	BI = (trxCPUint.opcode >> 16)& 0x1f;
	BD = (trxCPUint.opcode)&0xfffc;if (BD&0x8000) BD |= 0xffff0000;
	LK = (trxCPUint.opcode & 1);
	AA = (trxCPUint.opcode & 2);

/*
	// update link register ?
	if (LK) trxCPUint.lr = trxCPUint.npc; 

	// need to adjust count ? bit 2 set means decrement counter
	if (!(BO & 2)) trxCPUint.ctr--;
	// test condition
	bool cr = (trxCPUint.cr & (1<<(31-BI)));

	bool bo2 = (BO & 1);
	bool bo8 = (BO & 4); // branch condition true
	if (((BO & 2) || ((trxCPUint.ctr!=0) ^ bo2))	&& ((BO & 8) || (!(cr ^ bo8)))) 
	{
		// absolute or relative branch ?
		if(!AA)
		{
			BD += (trxCPUint.pc);
		}
		trxCPUint.npc = BD;
	}
*/
	if(!AA)
	{
		BD += (trxCPUint.pc);
	}
	if(LK)
	{
		_asm
		{
			mov eax, dword ptr trxCPUint.npc
			mov dword ptr trxCPUint.lr, eax
		}
	}
	if (!(BO & 2))
	{
		_asm
		{
			sub dword ptr trxCPUint.ctr, 1
		}
	}

	crmask = (1<<(31-BI));
	switch(BO)
	{
	case 2:
		_asm
		{
			mov edx, trxCPUint.npc
			mov eax, trxCPUint.cr
			and eax, dword ptr [crmask]
			cmove edx, dword ptr [BD]
			mov dword ptr trxCPUint.npc, edx	
		}
		break;
	case 6:
		_asm
		{
			mov edx, trxCPUint.npc
			mov eax, trxCPUint.cr
			and eax, dword ptr [crmask]
			cmovne edx, dword ptr [BD]
			mov dword ptr trxCPUint.npc, edx	
		}
		break;
	case 8:
		_asm
		{
			mov edx, trxCPUint.npc
			mov eax, trxCPUint.ctr
			test eax, eax
			cmovne edx, dword ptr [BD]
			mov dword ptr trxCPUint.npc, edx	
		}
		break;
	case 9:
		_asm
		{
			mov ecx, trxCPUint.npc
			mov edx, dword ptr [BD]
			mov eax, dword ptr trxCPUint.ctr
			test eax, eax
			cmove ecx, edx
			mov dword ptr trxCPUint.npc, ecx
		}
		break;
	case 10: // branch always ... can be optimized cause doesnt need to break block
		_asm
		{
			mov ecx, trxCPUint.npc
			mov dword ptr trxCPUint.npc, ecx
		}
		break;
	default: // unhandled branch
		printf("[Tratax interpreter] trx_ppc_gen_bcx() unhandled case: %d\n", BO);
		exit(0);
		break;
	}
	// loop detection ..
	trxCPUint.blockend = BLOCKEND_STOP;
}

// tested only one case !
void trx_ppc_int_bcctrx()
{
	uint32 BO, BI, LK, crmask;
	BO = (trxCPUint.opcode >> 22)& 0xf; // we dont care about the prediction bit!
	BI = (trxCPUint.opcode >> 16)& 0x1f;
	LK = (trxCPUint.opcode & 1);
/*
	// need to adjust count ? bit 2 set means decrement counter
	if (!(BO & 2)) trxCPUint.ctr--;
	// test condition
	cr = (trxCPUint.cr & (1<<(31-BI)));
	// when not branching just go to next instruction
	BD = trxCPUint.npc;

	switch(BO)
	{
	case 0: if((trxCPUint.ctr!=0)&&(!cr))BD = trxCPUint.ctr & 0xfffffffc;break;
	case 1: if((trxCPUint.ctr==0)&&(!cr))BD = trxCPUint.ctr & 0xfffffffc;break;
	case 2: if(!cr)BD = trxCPUint.lr & 0xfffffffc;break;
	case 4: if((trxCPUint.ctr!=0)&&(cr))BD = trxCPUint.ctr & 0xfffffffc;break;
	case 5: if((trxCPUint.ctr==0)&&(cr))BD = trxCPUint.ctr & 0xfffffffc;break;
	case 6: if(cr)BD = trxCPUint.ctr & 0xfffffffc;break;
	case 8: if(trxCPUint.ctr!=0)BD = trxCPUint.ctr & 0xfffffffc;break;
	case 9: if(trxCPUint.ctr==0)BD = trxCPUint.ctr & 0xfffffffc;break;
	case 10: BD = trxCPUint.ctr & 0xfffffffc;break;
	default: // illegal branch, skip it.
	break;
	}
	// update link register ?
	if (LK) trxCPUint.lr = trxCPUint.npc; 
	trxCPUint.npc = BD;
*/
	if(LK)
	{
		_asm
		{
			mov eax, dword ptr trxCPUint.npc
			mov dword ptr trxCPUint.lr, eax
		}
	}
	if (!(BO & 2))
	{
		_asm
		{
			sub dword ptr trxCPUint.ctr, 1
		}
	}

	crmask = (1<<(31-BI));
	switch(BO)
	{
	case 10: // BD = trxCPUint.ctr & 0xfffffffc;break;
		_asm
		{
			mov eax, trxCPUint.ctr
			mov dword ptr trxCPUint.npc, eax	
		}
		break;
	default: // unhandled branch
		_asm
		{	
			mov eax, dword ptr [BO]
			int 3h;
		}
		break;
	}
	trxCPUint.blockend = BLOCKEND_STOP;
}

// tested, only few cases
void trx_ppc_int_bclrx(void)
{
	uint32 BO, BI, LK, crmask, nextpc;
	BO = (trxCPUint.opcode >> 22)& 0xf; // we dont care about the prediction bit!
	BI = (trxCPUint.opcode >> 16)& 0x1f;
	LK = (trxCPUint.opcode & 1);

/*
	// need to adjust count ? bit 2 set means decrement counter
	if (!(BO & 2)) trxCPUint.ctr--;
	// test condition
	cr = (trxCPUint.cr & (1<<(31-BI)));
	// when not branching just go to next instruction
	BD = trxCPUint.npc;
	switch(BO)
	{
	case 0: if((trxCPUint.ctr!=0)&&(!cr))BD = trxCPUint.lr & 0xfffffffc;break;
	case 1: if((trxCPUint.ctr==0)&&(!cr))BD = trxCPUint.lr & 0xfffffffc;break;
	case 2: if(!cr)BD = trxCPUint.lr & 0xfffffffc;break;
	case 4: if((trxCPUint.ctr!=0)&&(cr))BD = trxCPUint.lr & 0xfffffffc;break;
	case 5: if((trxCPUint.ctr==0)&&(cr))BD = trxCPUint.lr & 0xfffffffc;break;
	case 6: if(cr)BD = trxCPUint.lr & 0xfffffffc;break;
	case 8: if(trxCPUint.ctr!=0)BD = trxCPUint.lr & 0xfffffffc;break;
	case 9: if(trxCPUint.ctr==0)BD = trxCPUint.lr & 0xfffffffc;break;
	case 10: BD = trxCPUint.lr & 0xfffffffc;break;
	default: // illegal branch, skip it.
	break;
	}
	// update link register ?
	if (LK) trxCPUint.lr = trxCPUint.npc; 
	trxCPUint.npc = BD;
*/
	nextpc = trxCPUint.pc+4;
	if (!(BO & 2))
	{
		_asm
		{
			sub dword ptr trxCPUint.ctr, 1
		}
	}

	crmask = (1<<(31-BI));

	switch(BO)
	{
	case 2: //if(!cr)BD = trxCPUint.lr & 0xfffffffc;break;
		_asm
		{
			mov edx, trxCPUint.npc
			mov eax, trxCPUint.cr
			and eax, dword ptr [crmask]
			cmove edx, dword ptr [trxCPUint.lr]
			mov dword ptr trxCPUint.npc, edx	
		}
		break;
	case 6: //if(cr)BD = trxCPUint.lr & 0xfffffffc;break;
		_asm
		{
			mov edx, trxCPUint.npc
			mov eax, trxCPUint.cr
			and eax, dword ptr [crmask]
			cmovne edx, dword ptr [trxCPUint.lr]
			mov dword ptr trxCPUint.npc, edx	
		}
		break;
	case 10: // BD = trxCPUint.lr & 0xfffffffc;break;
		_asm
		{
			mov eax, dword ptr [trxCPUint.lr]
			mov dword ptr trxCPUint.npc, eax	
		}
		break;
	default: // unhandled branch
		printf("bclrx() unhandled case: %d\n", BO);
		exit(0);
		break;
	}
	// carefull ! lr register is used in this type of branch !
	if(LK)
	{
		_asm
		{
			mov eax, nextpc 
			mov trxCPUint.lr, eax
		}
	}
	trxCPUint.blockend = BLOCKEND_STOP;
}


//==============================================================================
// System control opcodes ( 2 opcodes , 1 emulated)
// used
void trx_ppc_int_mtsr()
{
	int rS, SR, rB;

	rS=((trxCPUint.opcode)>>21)&0x1f;
	SR=((trxCPUint.opcode)>>16)&0x1f;
	rB=((trxCPUint.opcode)>>11)&0x1f;

	trxCPUint.sr[SR & 0xf] = trxCPUint.gpr[rS];
}

void trx_ppc_int_mtsrin()
{
	int rS, rB;

	rS=((trxCPUint.opcode)>>21)&0x1f;
	rB=((trxCPUint.opcode)>>11)&0x1f;

	trxCPUint.sr[trxCPUint.gpr[rB] >> 28] = trxCPUint.gpr[rS];
}

void trx_ppc_int_mfsr()
{
	int rD, SR;

	rD=((trxCPUint.opcode)>>21)&0x1f;
	SR=((trxCPUint.opcode)>>16)&0x1f;

	trxCPUint.gpr[rD] = trxCPUint.sr[SR & 0xf];
}

void trx_ppc_int_mfsrin()
{
	int rD, rB;

	rD=((trxCPUint.opcode)>>21)&0x1f;
	rB=((trxCPUint.opcode)>>11)&0x1f;

	trxCPUint.gpr[rD] = trxCPUint.sr[trxCPUint.gpr[rB] >> 28];
}

void trx_ppc_int_rfi()
{
	trxCPUint.msr = trxCPUint.spr[PPC_SRR1];
	trxCPUint.npc = trxCPUint.spr[PPC_SRR0] & 0xfffffffc;
	trxCPUint.blockend = BLOCKEND_STOP;
	// we need to exit immediately at exceptions!
	cpuslice -= cpuslice_left;
	cpuslice_left = -1;
}

//==============================================================================
// Condition register opcodes (10 opcodes, 3 emulated)
// not emulated: crand, crandc, creqv, crnand, crnor, crorc, crxor
//
// used
void trx_ppc_int_mtcrf()
{
	uint32 rS, crm, CRM;
	rS = (trxCPUint.opcode >> 21)& 0x1f;
	crm = (trxCPUint.opcode >> 12) & 0xff;
	CRM = ((crm&0x80)?0xf0000000:0)|((crm&0x40)?0x0f000000:0)|((crm&0x20)?0x00f00000:0)|((crm&0x10)?0x000f0000:0)|
	      ((crm&0x08)?0x0000f000:0)|((crm&0x04)?0x00000f00:0)|((crm&0x02)?0x000000f0:0)|((crm&0x01)?0x0000000f:0);
	trxCPUint.cr = (trxCPUint.gpr[rS] & CRM) | (trxCPUint.cr & ~CRM);
}
// used
void trx_ppc_int_mcrf()
{
	uint32 crD, crS, c;
	crD = (trxCPUint.opcode >> 23)& 0x7;
	crS = (trxCPUint.opcode >> 18)& 0x7;

	crD = 7-crD;
	crS = 7-crS;
	
	c = (trxCPUint.cr>>(crS*4)) & 0xf;
	trxCPUint.cr &= trx_ppc_cmp_and_mask[crD];
	trxCPUint.cr |= c<<(crD*4);
}

// used
void trx_ppc_int_crxor()
{
	int crD, crA, crB;
	
	crD=((trxCPUint.opcode)>>21)&0x1f;
	crA=((trxCPUint.opcode)>>16)&0x1f;
	crB=((trxCPUint.opcode)>>11)&0x1f;

	if ((!(trxCPUint.cr & (1<<(31-crA))) && (trxCPUint.cr & (1<<(31-crB))))
	  || ((trxCPUint.cr & (1<<(31-crA))) && !(trxCPUint.cr & (1<<(31-crB))))) {
		trxCPUint.cr |= (1<<(31-crD));
	} else {
		trxCPUint.cr &= ~(1<<(31-crD));
	}
}
// used
void trx_ppc_int_cror()
{
	int crD, crA, crB;

	crD=((trxCPUint.opcode)>>21)&0x1f;
	crA=((trxCPUint.opcode)>>16)&0x1f;
	crB=((trxCPUint.opcode)>>11)&0x1f;

	uint32 t = (1<<(31-crA)) | (1<<(31-crB));
	if (trxCPUint.cr & t) {
		trxCPUint.cr |= (1<<(31-crD));
	} else {
		trxCPUint.cr &= ~(1<<(31-crD));
	}
}

// used
void trx_ppc_int_crnor()
{
	int crD, crA, crB;

	crD=((trxCPUint.opcode)>>21)&0x1f;
	crA=((trxCPUint.opcode)>>16)&0x1f;
	crB=((trxCPUint.opcode)>>11)&0x1f;

	uint32 t = (1<<(31-crA)) | (1<<(31-crB));
	if (trxCPUint.cr & !t) {
		trxCPUint.cr |= (1<<(31-crD));
	} else {
		trxCPUint.cr &= ~(1<<(31-crD));
	}
}

// used
void trx_ppc_int_creqv()
{
	int crD, crA, crB;

	crD=((trxCPUint.opcode)>>21)&0x1f;
	crA=((trxCPUint.opcode)>>16)&0x1f;
	crB=((trxCPUint.opcode)>>11)&0x1f;

	if (((trxCPUint.cr & (1<<(31-crA))) && (trxCPUint.cr & (1<<(31-crB))))
	  || (!(trxCPUint.cr & (1<<(31-crA))) && !(trxCPUint.cr & (1<<(31-crB))))) {
		trxCPUint.cr |= (1<<(31-crD));
	} else {
		trxCPUint.cr &= ~(1<<(31-crD));
	}
}

// used
void trx_ppc_int_mfcr()
{
	uint32 rD;
	rD = (trxCPUint.opcode >> 21)& 0x1f;

	trxCPUint.gpr[rD] = trxCPUint.cr;
}

// used
void trx_ppc_int_mcrxr()
{
	uint32 crD, c;
	crD = (trxCPUint.opcode >> 23)& 0x7;

	c = trxCPUint.xer_ca<<1; // bits 31 - 29 are copied but only XER CA is calculated
	crD = 7-crD;
	
	trxCPUint.cr &= trx_ppc_cmp_and_mask[crD];
	trxCPUint.cr |= c<<(crD*4);

	trxCPUint.xer &= ~0xf; // and clear the bits
}

//==============================================================================
// Integer load opcodes (16 opcodes, 11 emulated)
// not emulated: lhau, lhaux, lwzux
//
// used
void trx_ppc_int_lbz()
{
	uint32 rD, rA, EA;
	sint16 imm;
	uint8 r;

	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	if(rA == 0)	EA = 0 + imm;
	else EA = trxCPUint.gpr[rA] + imm;
	
	r = mem_read8_int(EA);	
	trxCPUint.gpr[rD] = r;
}

// used
void trx_ppc_int_lzbu()
{
	uint32 rD, rA, EA;
	sint16 imm;
	uint8	r;

	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	EA = trxCPUint.gpr[rA] + imm;
	
	r = mem_read8_int(EA);
	trxCPUint.gpr[rA] = EA;
	trxCPUint.gpr[rD] = r;
}
// used 
void trx_ppc_int_lbzx()
{
	uint32 rD, rA, rB, EA;
	uint8 r;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	if(rA == 0)	EA = trxCPUint.gpr[rB];
	else EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	r = mem_read8_int(EA);
	trxCPUint.gpr[rD] = r;
}

// used 
void trx_ppc_int_lbzux()
{
	uint32 rD, rA, rB, EA;
	uint8 r;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	r = mem_read8_int(EA);
	trxCPUint.gpr[rD] = r;
	// update
	trxCPUint.gpr[rA] = EA;	
}

// used
void trx_ppc_int_lha()
{
	uint32 rD, rA, EA;
	sint16 r;
	sint16 imm;

	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	if(rA == 0)	EA = 0 + imm;
	else EA = trxCPUint.gpr[rA] + imm;
	
	r = mem_read16_int(EA);
	trxCPUint.gpr[rD] = r;
}

// used
void trx_ppc_int_lhau()
{
	uint32 rD, rA, EA;
	sint16 r;
	sint16 imm;

	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	if(rA == 0)	EA = 0 + imm;
	else EA = trxCPUint.gpr[rA] + imm;
	
	r = mem_read16_int(EA);

	trxCPUint.gpr[rA] = EA;
	trxCPUint.gpr[rD] = r;
}

// used
void trx_ppc_int_lhax()
{
	uint32 rD, rA, rB, EA;
	sint16 r;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	if(rA == 0)	EA = trxCPUint.gpr[rB];
	else EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];
	
	r = mem_read16_int(EA);
	trxCPUint.gpr[rD] = r;
}

// used
void trx_ppc_int_lhz()
{
	uint32 rD, rA, EA;
	uint16 r;
	sint16 imm;

	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	if(rA == 0)	EA = 0 + imm;
	else EA = trxCPUint.gpr[rA] + imm;
	
	r = mem_read16_int(EA);
	trxCPUint.gpr[rD] = r;
}

// used
void trx_ppc_int_lhzu()
{
	uint32 rD, rA, EA;
	sint16 imm;
	uint16	r;

	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	EA = trxCPUint.gpr[rA] + imm;

	r = mem_read16_int(EA);
	trxCPUint.gpr[rA] = EA;
	trxCPUint.gpr[rD] = r;
}

// used
void trx_ppc_int_lhzx()
{
	uint32 rD, rA, rB, EA;
	uint16 r;

	rD = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	if(rA == 0)	EA = trxCPUint.gpr[rB];
	else EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	r = mem_read16_int(EA);
	trxCPUint.gpr[rD] = r;
}

// used
void trx_ppc_int_lhbrx()
{
	uint32 rD, rA, rB, EA;
	uint16 res;

	rD=((trxCPUint.opcode)>>21)&0x1f;
	rA=((trxCPUint.opcode)>>16)&0x1f;
	rB=((trxCPUint.opcode)>>11)&0x1f;

	if(rA == 0)	EA = trxCPUint.gpr[rB];
	else EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	res = mem_read16_int(EA);
	_asm
	{
		mov ax, res
		xchg al, ah
		mov res, ax
	};
	trxCPUint.gpr[rD] = res;
}

// used
void trx_ppc_int_lwz()
{
	uint32 rD, rA, EA;
	sint16 imm;

	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	if(rA == 0)	EA = 0 + imm;
	else EA = trxCPUint.gpr[rA] + imm;
	
	trxCPUint.gpr[rD] = mem_read32_int(EA);	
}

// used
void trx_ppc_int_lwzu()
{
	uint32 rD, rA, EA;
	sint16 imm;

	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	EA = trxCPUint.gpr[rA] + imm;

	trxCPUint.gpr[rA] = EA;
	trxCPUint.gpr[rD] = mem_read32_int(EA);	
}

// used
void trx_ppc_int_lwzx()
{
	uint32 rD, rA, rB, EA;

	rD=((trxCPUint.opcode)>>21)&0x1f;
	rA=((trxCPUint.opcode)>>16)&0x1f;
	rB=((trxCPUint.opcode)>>11)&0x1f;

	if(rA == 0)	EA = trxCPUint.gpr[rB];
	else EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	trxCPUint.gpr[rD] = mem_read32_int(EA);	
}

// used
void trx_ppc_int_lwzux()
{
	uint32 rD, rA, rB, EA;

	rD=((trxCPUint.opcode)>>21)&0x1f;
	rA=((trxCPUint.opcode)>>16)&0x1f;
	rB=((trxCPUint.opcode)>>11)&0x1f;

	EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	trxCPUint.gpr[rD] = mem_read32_int(EA);	
	trxCPUint.gpr[rA] = EA;
}

// used
void trx_ppc_int_lwbrx()
{
	uint32 rD, rA, rB, EA;
	uint32 res;

	rD=((trxCPUint.opcode)>>21)&0x1f;
	rA=((trxCPUint.opcode)>>16)&0x1f;
	rB=((trxCPUint.opcode)>>11)&0x1f;

	if(rA == 0)	EA = trxCPUint.gpr[rB];
	else EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	res = mem_read32_int(EA);
	_asm
	{
		mov eax, res
		bswap eax
		mov res, eax
	};
	trxCPUint.gpr[rD] = res;
}

// used
void trx_ppc_int_lmw(void)
{
	uint32 rD, rA, EA;
	sint16 imm;

	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	if(rA == 0)	EA = 0 + imm;
	else EA = trxCPUint.gpr[rA] + imm;

	while(rD <= 31)
	{
		trxCPUint.gpr[rD] = mem_read32_int(EA);	
		rD++;
		EA +=4;
	}
}

// used
void trx_ppc_int_lswi(void)
{
	uint32 rD, rA, NB, EA, n, r, i;
	sint16 imm;

	rD = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	NB = ((trxCPUint.opcode)>>11)&0x1f;
	imm = trxCPUint.opcode & 0xffff;

	if(rA == 0)	EA = 0 + imm;
	else EA = trxCPUint.gpr[rA] + imm;

	if(NB == 0)
	{
		n = 32;
	}
	else
	{
		n = NB;
	}

	r = rD - 1;
	i = 0;
	while (n>0)
	{
		if(i==0)
		{
			r++;
			r &= 31;
			trxCPUint.gpr[r] = 0;
		}
		// register already wiped by above code, so OR is safe
		trxCPUint.gpr[r] |= mem_read8_int(EA) << (24 - i);
		i += 8;
		if(i == 32)i=0;
		EA++;
		n--;
	}
}

//==============================================================================
// Integer store opcodes (12 opcodes, 9 emulated)
// not emulated: stbux, sthux
//
// used
void trx_ppc_int_stb(void)
{
	uint32 rS, rA, EA;
	sint16 imm;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	// rA = 0 is illegal, we assume no illegal code will be run with this interpreter
	EA = trxCPUint.gpr[rA] + imm;
	mem_write8_int(EA, (uint8)trxCPUint.gpr[rS]);
}

// used
void trx_ppc_int_stbu(void)
{
	uint32 rS, rA, EA;
	sint16 imm;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	// rA = 0 is illegal, we assume no illegal code will be run with this interpreter
	EA = trxCPUint.gpr[rA] + imm;
	mem_write8_int(EA, (uint8)trxCPUint.gpr[rS]);
	trxCPUint.gpr[rA] = EA;
}

// used
void trx_ppc_int_stbx(void)
{
	uint32 rS, rA, rB, EA;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	if(rA == 0)	EA = trxCPUint.gpr[rB];
	else EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	mem_write8_int(EA, (uint8)trxCPUint.gpr[rS]);
}

// used
void trx_ppc_int_stbux(void)
{
	uint32 rS, rA, rB, EA;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	mem_write8_int(EA, (uint8)trxCPUint.gpr[rS]);
	trxCPUint.gpr[rA] = EA;
}

// used
void trx_ppc_int_sth(void)
{
	uint32 rS, rA, EA;
	sint16 imm;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	if(rA == 0)	EA = 0 + imm;
	else EA = trxCPUint.gpr[rA] + imm;
	mem_write16_int(EA, (uint16)trxCPUint.gpr[rS]);
}

// used
void trx_ppc_int_sthu(void)
{
	uint32 rS, rA, EA;
	sint16 imm;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	// rA = 0 is illegal, we assume no illegal code will be run with this interpreter
	EA = trxCPUint.gpr[rA] + imm;
	mem_write16_int(EA, (uint16)trxCPUint.gpr[rS]);
	trxCPUint.gpr[rA] = EA;
}

// used
void trx_ppc_int_sthx(void)
{
	uint32 rS, rA, rB, EA;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	if(rA == 0)	EA = trxCPUint.gpr[rB];
	else EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	mem_write16_int(EA, (uint16)trxCPUint.gpr[rS]);
}

// used
void trx_ppc_int_sthbrx(void)
{
	uint32 rS, rA, rB, EA;
	uint16 res;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	if(rA == 0)	EA = trxCPUint.gpr[rB];
	else EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	res = trxCPUint.gpr[rS];
	_asm
	{
		mov ax, res
		xchg al, ah
		mov res, ax
	}
	mem_write16_int(EA, res);
}

// used
void trx_ppc_int_stw(void)
{
	uint32 rS, rA, EA;
	sint16 imm;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	if(rA == 0)	EA = 0 + imm;
	else EA = trxCPUint.gpr[rA] + imm;
	mem_write32_int(EA, trxCPUint.gpr[rS]);
}

// used
void trx_ppc_int_stwu(void)
{
	uint32 rS, rA, EA;
	sint16 imm;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	// rA = 0 is illegal, we assume no illegal code will be run with this interpreter
	EA = trxCPUint.gpr[rA] + imm;
	mem_write32_int(EA, trxCPUint.gpr[rS]);
	trxCPUint.gpr[rA] = EA;
}

// used
void trx_ppc_int_stwx()
{
	uint32 rS, rA, rB, EA;

	rS = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	if(rA == 0)	EA = trxCPUint.gpr[rB];
	else EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	mem_write32_int(EA, trxCPUint.gpr[rS]);
}

// used
void trx_ppc_int_stfiwx()
{
	uint32 rS, rA, rB, EA;

	rS = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	if(rA == 0)	EA = trxCPUint.gpr[rB];
	else EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	mem_write32_int(EA, (uint32)trxCPUint.fpr[rS]);
}

// used
void trx_ppc_int_stwux()
{
	uint32 rS, rA, rB, EA;

	rS = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	mem_write32_int(EA, trxCPUint.gpr[rS]);
	trxCPUint.gpr[rA] = EA;
}

// used
void trx_ppc_int_stwbrx()
{
	uint32 rS, rA, rB, EA;
	uint32 res;

	rS = ((trxCPUint.opcode)>>21)&0x1f;
	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	if(rA == 0)	EA = trxCPUint.gpr[rB];
	else EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	res = trxCPUint.gpr[rS];
	_asm
	{
		mov eax, res
		bswap eax
		mov res, eax
	};
	mem_write32_int(EA, res);
}

// used
void trx_ppc_int_stmw(void)
{
	uint32 rS, rA, EA;
	sint16 imm;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	imm = trxCPUint.opcode & 0xffff;

	if(rA == 0)	EA = 0 + imm;
	else EA = trxCPUint.gpr[rA] + imm;

	while(rS <= 31)
	{
		mem_write32_int(EA, trxCPUint.gpr[rS]);
		rS++;
		EA +=4;
	}
}

// used
void trx_ppc_int_stswi(void)
{
	uint32 rS, rA, NB, EA, n, r, i;
	sint16 imm;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	rA = (trxCPUint.opcode >> 16)& 0x1f;
	NB = ((trxCPUint.opcode)>>11)&0x1f;
	imm = trxCPUint.opcode & 0xffff;

	if(rA == 0)	EA = 0 + imm;
	else EA = trxCPUint.gpr[rA] + imm;

	if(NB == 0)
	{
		n = 32;
	}
	else
	{
		n = NB;
	}

	r = rS - 1;
	i = 0;
	while (n>0)
	{
		if(i==0)
		{
			r++;
			r &= 31;
		}
		mem_write8_int(EA, (trxCPUint.gpr[r] >> (24 - i)) & 0xff);
		i += 8;
		if(i == 32)i=0;
		EA++;
		n--;
	}
}

void trx_ppc_int_dcbz()
{
	// data cache block zero
	uint32 rA, rB, EA, i;

	rA = ((trxCPUint.opcode)>>16)&0x1f;
	rB = ((trxCPUint.opcode)>>11)&0x1f;

	if(rA == 0)	EA = trxCPUint.gpr[rB];
	else EA = trxCPUint.gpr[rA] + trxCPUint.gpr[rB];

	// 32 bytes in a cache block
	for(i = 0; i < 8; i++)
	{
		mem_write32_int(EA, 0);
		EA += 4;
	}
}
//==============================================================================
// Processor control opcodes 
//
void trx_ppc_int_sc()
{
	// flushcache 
	return;
	trx_ppc_exception(PPC_EXC_SC);
	trxCPUint.blockend = BLOCKEND_STOP;
}
// used
void trx_ppc_int_mfmsr()
{
	uint32 rD;

	rD=((trxCPUint.opcode)>>21)&0x1f;
	trxCPUint.gpr[rD] = trxCPUint.msr;
}
// used
void trx_ppc_int_mtmsr()
{
	uint32 rS;

	rS=((trxCPUint.opcode)>>21)&0x1f;
	trxCPUint.msr = trxCPUint.gpr[rS];
	trxCPUint.blockend = BLOCKEND_STOP;
	// we need to exit immediately at exceptions!
	cpuslice -= cpuslice_left;
	cpuslice_left = -1;
}
// used
void trx_ppc_int_mfspr()
{
	uint32 rD, spr1, spr2;

	rD = (trxCPUint.opcode >> 21)& 0x1f;
	spr1 = (trxCPUint.opcode >> 16)& 0x1f;
	spr2 = (trxCPUint.opcode >> 11)& 0x1f;

	switch((spr2<<5)|spr1)
	{
		case 1: trxCPUint.gpr[rD] = trxCPUint.xer; if(trxCPUint.xer_ca)trxCPUint.gpr[rD] |= XER_CA; return;
		case 8: trxCPUint.gpr[rD] = trxCPUint.lr; return;
		case 9: trxCPUint.gpr[rD] = trxCPUint.ctr; return;
	}
	// the rest is privileged instructions .. do we have a right to them ?
	if (trxCPUint.msr & MSR_PR) 
	{
		trx_ppc_exception(PPC_EXC_PROGRAM, PPC_EXC_PROGRAM_PRIV);
		return;
	}

	switch((spr2<<5)|spr1)
	{
		case 22: trxCPUint.gpr[rD] = trxCPUint.spr[PPC_DEC]; break;
		//case 26: trxCPUint.gpr[rD] = trxCPUint.spr[PPC_SRR0]; break;
		//case 27: trxCPUint.gpr[rD] = trxCPUint.spr[PPC_SRR1]; break;
		case 912: trxCPUint.gpr[rD] = trxCPUint.spr[PPC_GQR0+0]; break;
		case 913: trxCPUint.gpr[rD] = trxCPUint.spr[PPC_GQR0+1]; break;
		case 914: trxCPUint.gpr[rD] = trxCPUint.spr[PPC_GQR0+2]; break;
		case 915: trxCPUint.gpr[rD] = trxCPUint.spr[PPC_GQR0+3]; break;
		case 916: trxCPUint.gpr[rD] = trxCPUint.spr[PPC_GQR0+4]; break;
		case 917: trxCPUint.gpr[rD] = trxCPUint.spr[PPC_GQR0+5]; break;
		case 918: trxCPUint.gpr[rD] = trxCPUint.spr[PPC_GQR0+6]; break;
		case 919: trxCPUint.gpr[rD] = trxCPUint.spr[PPC_GQR0+7]; break;
		case 920:// hid[2]
			trxCPUint.gpr[rD] = trxCPUint.spr[PPC_HID2];
			break;
		case 921:// wpar
			trxCPUint.gpr[rD] = trxCPUint.spr[PPC_WPAR];
			trxCPUint.gpr[rD] |= wpar_hasdata;
//			printf("[trxCPUint] WPAR read: %8.8x\n", trxCPUint.wpar);
			break;
		case 922:// dma-u
			trxCPUint.gpr[rD] = trxCPUint.spr[PPC_DMAU];
//			printf("[trxCPUint] DMAU read: %8.8x\n", trxCPUint.spr[PPC_DMAU]);
			break;
		case 923:// dma-l
			trxCPUint.gpr[rD] = trxCPUint.spr[PPC_DMAL];
//			printf("[trxCPUint] DMAL read: %8.8x\n", trxCPUint.spr[PPC_DMAL]);
			break;
		default:
			trxCPUint.gpr[rD] = trxCPUint.spr[(spr2<<5)|spr1];
			break;
	}
}

// used
void trx_ppc_int_mftb()
{
	uint32 rD, spr1, spr2;

	rD = (trxCPUint.opcode >> 21)& 0x1f;
	spr1 = (trxCPUint.opcode >> 16)& 0x1f;
	spr2 = (trxCPUint.opcode >> 11)& 0x1f;

	switch (spr2) {
	case 8:
		switch (spr1) {
		case 12: trxCPUint.gpr[rD] = (uint32)trxCPUint.spr[PPC_TBL]; return;
		case 13: trxCPUint.gpr[rD] = (uint32)(trxCPUint.spr[PPC_TBH]); return;
		}
		break;
	}
}

// used
void trx_ppc_int_mtspr()
{
	uint32 rS, spr1, spr2;

	rS = (trxCPUint.opcode >> 21)& 0x1f;
	spr1 = (trxCPUint.opcode >> 16)& 0x1f;
	spr2 = (trxCPUint.opcode >> 11)& 0x1f;

	switch((spr2<<5)|spr1)
	{
		case 1: trxCPUint.xer = trxCPUint.gpr[rS]; trxCPUint.xer_ca = (trxCPUint.xer >> 29) & 1; trxCPUint.xer &= ~XER_CA; return;
		case 8:	trxCPUint.lr = trxCPUint.gpr[rS]; return;
		case 9:	trxCPUint.ctr = trxCPUint.gpr[rS]; return;
	}

	// rest is priviliged material
	if (trxCPUint.msr & MSR_PR) 
	{
		trx_ppc_exception(PPC_EXC_PROGRAM, PPC_EXC_PROGRAM_PRIV);
		return;
	}

	switch((spr2<<5)|spr1)
	{
		case 22: trxCPUint.spr[PPC_DEC] = trxCPUint.gpr[rS]; break;
		//case 26: trxCPUint.spr[PPC_SRR0] = trxCPUint.gpr[rS]; break;
		//case 27: trxCPUint.spr[PPC_SRR1] = trxCPUint.gpr[rS]; break;
		case 912: trxCPUint.spr[PPC_GQR0+0] = trxCPUint.gpr[rS]; break;
		case 913: trxCPUint.spr[PPC_GQR0+1] = trxCPUint.gpr[rS]; break;
		case 914: trxCPUint.spr[PPC_GQR0+2] = trxCPUint.gpr[rS]; break;
		case 915: trxCPUint.spr[PPC_GQR0+3] = trxCPUint.gpr[rS]; break;
		case 916: trxCPUint.spr[PPC_GQR0+4] = trxCPUint.gpr[rS]; break;
		case 917: trxCPUint.spr[PPC_GQR0+5] = trxCPUint.gpr[rS]; break;
		case 918: trxCPUint.spr[PPC_GQR0+6] = trxCPUint.gpr[rS]; break;
		case 919: trxCPUint.spr[PPC_GQR0+7] = trxCPUint.gpr[rS]; break;
		case 920:// hid[2]
			trxCPUint.spr[PPC_HID2] = trxCPUint.gpr[rS];
//			printf("[trxCPUint] HID2 set to: %8.8x\n", trxCPUint.spr[PPC_HID2]);
//			printf("HID2_LSQE: %s\n", (trxCPUint.spr[PPC_HID2] & HID2_LSQE)?"on":"off");  
//			printf("HID2_WPE: %s\n", (trxCPUint.spr[PPC_HID2] & HID2_WPE)?"on":"off");  
//			printf("HID2_PSE: %s\n", (trxCPUint.spr[PPC_HID2] & HID2_PSE)?"on":"off");  
//			printf("HID2_LCE: %s\n", (trxCPUint.spr[PPC_HID2] & HID2_LCE)?"on":"off");  
			// mask off DMAQL bits, our DMA is ultra fast (always empty)
			trxCPUint.spr[PPC_HID2] &= 0xf0ffffff;
//			printf("HID2 DMAQL: %d\n", (trxCPUint.spr[PPC_HID2]>>24)&0xf);  
			break;
		case 921:// wpar
			trxCPUint.spr[PPC_WPAR] = trxCPUint.gpr[rS];
			wpar_set();
//			printf("[trxCPUint] WPAR set to: %8.8x\n", trxCPUint.wpar);
			break;
		case 922:// dma-u
			trxCPUint.spr[PPC_DMAU] = trxCPUint.gpr[rS];
//			printf("[trxCPUint] DMAU set to: %8.8x\n", trxCPUint.spr[PPC_DMAU]);
//			printf("DMA dst address: %8.8x\n", trxCPUint.spr[PPC_DMAU] & ~0x1f);
//			printf("DMA len upper: %d cachelines\n", (trxCPUint.spr[PPC_DMAU] & 0x1f)<<2);
			break;
		case 923:// dma-l
			trxCPUint.spr[PPC_DMAL] = trxCPUint.gpr[rS];
			dma_engine();
			break;
		case 1019:// ictc
			trxCPUint.spr[1019] = trxCPUint.gpr[rS];
			printf("[trxCPUint] ICTC set to: %8.8x\n", trxCPUint.spr[1019]);
			break;
		default:
			trxCPUint.spr[(spr2<<5)|spr1] = trxCPUint.gpr[rS];
			break;
	}
}
