#include <stdlib.h>
#include <string.h>

#include "global.h"
#include "crystal.h"
#include "cpu.h"
#include "cpu_memorymap.h"
#include "ppu.h"
#include "ppu_memorymap.h"
#include "palette.h"
#include "log.h"

/*
2c02, PPU

Games/programs with problems:
	- Games that change fine x mid-scanline: this can be fixed by resetting fine x the same way as normal x. But, supposedly this behaviour is accurate!
		- Castlevania 3 (at mist effect)
		- Double Dragon 2 (above statusbar)
		- Super Mario Bros 3 (at 1st bonus round)
		- World Grand Prix (road)
		- etc.
	- Megaman 3: garbage scanline at titlescreen and boss intro (game bug!)
	- Rad Racer: yellow scanline on the road when you steer to the far left (game bug!)
	
	- Commando (Japanese version too): very glitchy, Japanese version too (game bug?)
	- Knight Rider (U) (Japanese version is fine): garbage scanline at title screen (game bug?)

TODO:
	- behaviour of sprite address during rendering (Akira relies on this)

*/

static void ppu_execute(void);
static void nothing(void);
void (*ppuupdate)(void);

static struct {
	BYTE control1;	/* control register 1 */
	BYTE control1_cold;
	WORD table_sp;
	WORD table_tp;
	BYTE address_inc;
	
	BYTE control2;	/* control register 2 */
	BYTE control2_cold;
	WORD emphasis;
	BYTE mono_mask;
	BYTE left_tc_disable_mask;
	BYTE left_sc_disable_mask;
	BYTE enabled;

	
	BYTE status;	/* status register */

	BYTE vblank;
	BYTE hblank;
	
	int cycles;
	BYTE latch;
	BYTE sc;
	WORD scc;
	BYTE visible_scanline;
	BYTE valid_scanline;
	
	int* cpu_cycles_ptr;
	BYTE* ppu_memmap_palette_ptr;
	BYTE* ppu_memmap_buffer_ptr;
	
	BYTE databus;
	WORD address;
	WORD temp_address;
	BYTE fine_x;
	
	BYTE* sprite_ram;
	BYTE sprite_ram_secondary[32+1];
	BYTE sprite_address;
	BYTE sprite_address_old;
	BYTE reading_sprites;
	
	BYTE tile_buffer[256+16];
	BYTE sprite_buffer[256+8];
	BYTE rendering;

	WORD* screen;
	/*WORD screen_offset;*/
	DWORD screen_offset;
	
	WORD nt;
	BYTE pt1;
	BYTE pt2;
	BYTE at;
	BYTE atshift;
	
	BYTE sprite_found;
	BYTE srs_offset;
	WORD sr_offset;
	BYTE sprite_0;
	BYTE sprite_y_max;
	WORD sprite_pa;
} ppu;

static struct {
	BYTE r,g,b;
} palette_local[64*8];

BYTE* ppu_get_enabled_ptr(void) { return &ppu.enabled; }
BYTE* ppu_get_valid_scanline_ptr(void) { return &ppu.valid_scanline; }
BYTE* ppu_get_control1_ptr(void) { return &ppu.control1; }
BYTE* ppu_get_control2_ptr(void) { return &ppu.control2; }
BYTE* ppu_get_reading_sprites_ptr(void) { return &ppu.reading_sprites; }
BYTE* ppu_get_rendering_ptr(void) { return &ppu.rendering; }
WORD* ppu_get_scc_ptr(void) { return &ppu.scc; }
BYTE* ppu_get_sc_ptr(void) { return &ppu.sc; }
BYTE* ppu_get_vblank_ptr(void) { return &ppu.vblank; }
BYTE* ppu_get_hblank_ptr(void) { return &ppu.hblank; }
WORD* ppu_get_address_ptr(void) { return &ppu.address; }
int* ppu_get_cycles_ptr(void){ return &ppu.cycles; }

#define READ_BYTE(x) ppu_read_memorymap[(x)>>8](x)
#define WRITE_BYTE(x,y) ppu_write_memorymap[(x)>>8](x,y)

static fp_ppu_read_memorymap ppu_read_memorymap[0x140]; /* last mirror in case of overflow */
static fp_ppu_write_memorymap ppu_write_memorymap[0x140];

#define _PSR(r) ppu_read_memorymap[i]=r
#define _PSW(w) ppu_write_memorymap[i]=w
#define _PSRW(r,w) _PSR(r); _PSW(w)

void ppu_set_fpt (BYTE type,BYTE custom,fp_ppu_read_memorymap funr1,fp_ppu_read_memorymap funr2,fp_ppu_read_memorymap funr3,fp_ppu_read_memorymap funr4,fp_ppu_write_memorymap funw1,fp_ppu_write_memorymap funw2,fp_ppu_write_memorymap funw3,fp_ppu_write_memorymap funw4)
{
	int i;
	BYTE all=type&8;
	type&=7;
	for (i=0;i<0x140;i++) {
		switch (i>>2&0xf) {
			/* patterntables 0x0-0xfff */
			case 0x0: if (type==0) { _PSRW(ppu_read_pt0a,ppu_write_pt0a); } if (all) { _PSRW(ppu_read_pt0a_override,ppu_write_pt0a_override); } if (type==1) { if (funr1!=NULL) _PSR(funr1); if (funw1!=NULL) _PSW(funw1); } break;
			case 0x1: if (type==0) { _PSRW(ppu_read_pt0b,ppu_write_pt0b); } if (all) { _PSRW(ppu_read_pt0b_override,ppu_write_pt0b_override); } if (type==1) { if (funr2!=NULL) _PSR(funr2); if (funw2!=NULL) _PSW(funw2); } break;
			case 0x2: if (type==0) { _PSRW(ppu_read_pt0c,ppu_write_pt0c); } if (all) { _PSRW(ppu_read_pt0c_override,ppu_write_pt0c_override); } if (type==1) { if (funr3!=NULL) _PSR(funr3); if (funw3!=NULL) _PSW(funw3); } break;
			case 0x3: if (type==0) { _PSRW(ppu_read_pt0d,ppu_write_pt0d); } if (all) { _PSRW(ppu_read_pt0d_override,ppu_write_pt0d_override); } if (type==1) { if (funr4!=NULL) _PSR(funr4); if (funw4!=NULL) _PSW(funw4); } break;
			/* patterntables 0x1000-0x1fff */
			case 0x4: if (type==0) { _PSRW(ppu_read_pt1a,ppu_write_pt1a); } if (all) { _PSRW(ppu_read_pt1a_override,ppu_write_pt1a_override); } if (type==2) { if (funr1!=NULL) _PSR(funr1); if (funw1!=NULL) _PSW(funw1); } break;
			case 0x5: if (type==0) { _PSRW(ppu_read_pt1b,ppu_write_pt1b); } if (all) { _PSRW(ppu_read_pt1b_override,ppu_write_pt1b_override); } if (type==2) { if (funr2!=NULL) _PSR(funr2); if (funw2!=NULL) _PSW(funw2); } break;
			case 0x6: if (type==0) { _PSRW(ppu_read_pt1c,ppu_write_pt1c); } if (all) { _PSRW(ppu_read_pt1c_override,ppu_write_pt1c_override); } if (type==2) { if (funr3!=NULL) _PSR(funr3); if (funw3!=NULL) _PSW(funw3); } break;
			case 0x7: if (type==0) { _PSRW(ppu_read_pt1d,ppu_write_pt1d); } if (all) { _PSRW(ppu_read_pt1d_override,ppu_write_pt1d_override); } if (type==2) { if (funr4!=NULL) _PSR(funr4); if (funw4!=NULL) _PSW(funw4); } break;
			/* nametables */
			case 0x8: case 0xc: if (type==0) { _PSRW(ppu_read_name0,ppu_write_name0); } if (all) { _PSRW(ppu_read_name0_override,ppu_write_name0_override); } if (type==3) { if (funr1!=NULL) _PSR(funr1); if (funw1!=NULL) _PSW(funw1); } break;
			case 0x9: case 0xd: if (type==0) { _PSRW(ppu_read_name1,ppu_write_name1); } if (all) { _PSRW(ppu_read_name1_override,ppu_write_name1_override); } if (type==3) { if (funr2!=NULL) _PSR(funr2); if (funw2!=NULL) _PSW(funw2); } break;
			case 0xa: case 0xe: if (type==0) { _PSRW(ppu_read_name2,ppu_write_name2); } if (all) { _PSRW(ppu_read_name2_override,ppu_write_name2_override); } if (type==3) { if (funr3!=NULL) _PSR(funr3); if (funw3!=NULL) _PSW(funw3); } break;
			case 0xb: case 0xf: if (type==0) { _PSRW(ppu_read_name3,ppu_write_name3); } if (all) { _PSRW(ppu_read_name3_override,ppu_write_name3_override); } if (type==3) { if (funr4!=NULL) _PSR(funr4); if (funw4!=NULL) _PSW(funw4); } break;
			
			default: break;
		}
		if ((i&0x3f)==0x3f) { _PSRW(ppu_read_palette,ppu_write_palette); if (all) _PSRW(ppu_read_palette_override,ppu_write_palette_override); }
		
		if (type==4) { if ((i&0x3f)==custom) { if (funr1!=NULL) _PSR(funr1); if (funw1!=NULL) _PSW(funw1); } }
	}
}

BYTE __fastcall ppu_read_openbus(register WORD address) { return ppu.databus; }

/* 0x2000, control register 1
	7	enable nmi on vblank
	6	ppu layer (always 0 for NES, unused)
	5	8x16 sprite mode
	4	tile table address (0 or 0x1000)
	3	sprite table address (0 or 0x1000)
	2	address increment value (1 or 32)
	1,0	base nametable address (0x2000, 0x2400, 0x2800, 0x2c00 */
void __fastcall ppu_write_control1(register BYTE data)
{
	(*ppuupdate)();
	if (BIN8(10000000)&data&ppu.status&~ppu.control1) cpu_set_interrupt(INTERRUPT_NMI|(INT_EDGE_NMI<<8));
	ppu.databus=ppu.control1=data;
	ppu.table_sp=data<<9&0x1000;
	ppu.table_tp=data<<8&0x1000;
	if (ppu.control1&BIT(2)) ppu.address_inc=32; else ppu.address_inc=1;
	ppu.sprite_y_max=(data>>2&8)|7;
	ppu.temp_address=(ppu.temp_address&BIN16(11110011,11111111))|(data<<10&BIN16(00001100,00000000));
}
void __fastcall ppu_write_control1_cold(register BYTE data) { ppu.control1_cold=data; }

/* 0x2001, control register 2
	7	more blue
	6	more green
	5	more red
	4	enable sprites
	3	enable tiles
	2	enable left sprite column (8 pixels)
	1	enable left tiles column (8 pixels)
	0	monochrome */
void __fastcall ppu_write_control2(register BYTE data)
{
	(*ppuupdate)();
	ppu.databus=ppu.control2=data;
	ppu.emphasis=(data&BIN8(11100000))<<1;
	if (data&1) ppu.mono_mask=BIN8(11110000); else ppu.mono_mask=0xff;
	if (data&2) ppu.left_tc_disable_mask=0xff; else ppu.left_tc_disable_mask=0;
	if (data&4) ppu.left_sc_disable_mask=0xff; else ppu.left_sc_disable_mask=0;
	ppu.enabled=(data&BIN8(00011000))!=0;
}
void __fastcall ppu_write_control2_cold(register BYTE data) { ppu.control2_cold=data; }

/* 0x2002, status
	7	vblank
	6	sprite 0 hit
	5	sprite overflow */
BYTE __fastcall ppu_read_status(register WORD address)
{
	(*ppuupdate)();
	ppu.databus=ppu.status|(ppu.databus&BIN8(00011111));
	ppu.latch=FALSE;
	ppu.status&=~BIT(7); /* vblank flag resets on read */
	return ppu.databus;
}

/* 0x2003, sprite memory address */
void __fastcall ppu_write_sprite_memory_address(register BYTE data)
{
	(*ppuupdate)();
	ppu.databus=ppu.sprite_address=data;
}

/* 0x2004, sprite memory data */
BYTE __fastcall  ppu_read_sprite_memory_data(register WORD address)
{
	(*ppuupdate)();
	if (ppu.valid_scanline&ppu.enabled) {
		if (ppu.scc<256) ppu.databus=ppu.sprite_ram_secondary[ppu.srs_offset&0x3f];
		else if (ppu.scc<320) ppu.databus=ppu.sprite_ram_secondary[(ppu.scc>>1&BIN8(00011111))|(ppu.scc&BIN8(00000011))];
		else ppu.databus=ppu.sprite_ram_secondary[0];
	}
	else {
		if ((ppu.sprite_address&3)==2) ppu.databus=ppu.sprite_ram[ppu.sprite_address]&BIN8(11100011);
		else ppu.databus=ppu.sprite_ram[ppu.sprite_address];
	}
	return ppu.databus;
}
void __fastcall ppu_write_sprite_memory_data(register BYTE data)
{
	(*ppuupdate)();
	ppu.databus=ppu.sprite_ram[ppu.sprite_address]=data;
	ppu.sprite_address++;
}

/* 0x2005, memory address(scroll) */
void __fastcall ppu_write_scroll(register BYTE data)
{
	(*ppuupdate)();
	ppu.databus=data;
	if (ppu.latch) ppu.temp_address=(ppu.temp_address&BIN16(00001100,00011111))|(data<<12&BIN16(01110000,00000000))|(data<<2&BIN16(00000011,11100000));
	else { ppu.fine_x=data&BIN8(00000111); ppu.temp_address=(ppu.temp_address&BIN16(01111111,11100000))|data>>3; }
	ppu.latch^=1;
}

/* 0x2006, memory address */
void __fastcall ppu_write_memory_address(register BYTE data)
{
	(*ppuupdate)();
	ppu.databus=data;
	if (ppu.latch) ppu.address=ppu.temp_address=(ppu.temp_address&BIN16(01111111,00000000))|data;
	else ppu.temp_address=(ppu.temp_address&0x00ff)|(data<<8&BIN16(00111111,00000000));
	ppu.latch^=1;
}

/* 0x2007, memory data */
BYTE __fastcall ppu_read_memory_data(register WORD address)
{
	(*ppuupdate)();
	ppu.databus=READ_BYTE(ppu.address);
	ppu.address+=ppu.address_inc;
	return ppu.databus;
}
void __fastcall ppu_write_memory_data(register BYTE data)
{
	(*ppuupdate)();
	WRITE_BYTE(ppu.address,data);
	ppu.databus=data;
	ppu.address+=ppu.address_inc;
}



void ppu_init(void)
{
	memset(&ppu,0,sizeof(ppu));
	if ((ppu.sprite_ram=malloc(0x100))==NULL) { LOG(LOG_MISC|LOG_ERROR,"sprite RAM allocation error!\n"); exit(1); }
	memset(ppu.sprite_ram,0,0x100);
	LOG(LOG_VERBOSE,"sprite RAM allocated\n");

	if ((ppu.screen=malloc(sizeof(WORD)*256*240))==NULL) { LOG(LOG_MISC|LOG_ERROR,"screen allocation error!\n"); exit(1); }
	memset(ppu.screen,0,sizeof(WORD)*256*240);
	LOG(LOG_VERBOSE,"PPU screen allocated\n");

	ppu.mono_mask=ppu.left_sc_disable_mask=ppu.left_tc_disable_mask=0xff;
	ppu.address_inc=1;
	ppu.sprite_y_max=7;
	
	ppu.cpu_cycles_ptr=cpu_get_cycles_ptr();
	ppu.ppu_memmap_palette_ptr=ppu_memmap_get_palette_ptr();
	ppu.ppu_memmap_buffer_ptr=ppu_memmap_get_buffer_ptr();
	LOG(LOG_VERBOSE,"PPU initialised\n");
}

void ppu_clean(void)
{
	if (ppu.sprite_ram!=NULL) { free(ppu.sprite_ram); ppu.sprite_ram=NULL; }
	if (ppu.screen!=NULL) { free(ppu.screen); ppu.screen=NULL; }

	LOG(LOG_VERBOSE,"sprite RAM deallocated\nPPU screen deallocated\nPPU cleaned\n");
}

void ppu_fill_palette_local(void)
{
	int i,j;
	for (j=0;j<8;j++)
		for (i=0;i<64;i++) {
			palette_local[i+j*64].r=palette->rgb_emphasis[j][i][0];
			palette_local[i+j*64].g=palette->rgb_emphasis[j][i][1];
			palette_local[i+j*64].b=palette->rgb_emphasis[j][i][2];
		}
}

__inline__ void ppu_force_update(void)
{
	if (ppu.rendering) return;
	else (*ppuupdate)();
}

void ppu_new_frame(void)
{
	ppu.status=ppu.visible_scanline=ppu.screen_offset=ppu.vblank=ppu.sc=ppu.scc=0;
	ppu.valid_scanline=TRUE;
	ppu.cycles=crystal->frame;
	ppuupdate=&ppu_execute;

	#if DEBUG_PPU
	LOG(LOG_PPU,"------- new frame ------- cc:%d\n",ppu.cycles);
	#endif
}

void ppu_cold(void)
{
	cpu_set_write_io2000(0x2000,ppu_write_control1_cold);
	cpu_set_write_io2000(0x2001,ppu_write_control2_cold);

	ppu.status=0x80;
}

void ppu_warm(void)
{
	cpu_set_io2000_std();
	
	ppu_write_control1(ppu.control1_cold);
	ppu_write_control2(ppu.control2_cold);
}


#define RESET_X()	ppu.address=(ppu.address&BIN16(11111011,11100000))|(ppu.temp_address&BIN16(00000100,00011111))
#define RESET_Y()	ppu.address=ppu.temp_address

#define INC_X()		tempb=(ppu.address+1)&BIN8(00011111); \
			ppu.address=(ppu.address&BIN16(11111111,11100000))|tempb; \
			if (tempb==0) ppu.address^=BIN16(00000100,00000000)
#define INC_Y()		tempw=(ppu.address+BIN16(00010000,00000000))&BIN16(01110000,00000000); \
			ppu.address=(ppu.address&BIN16(10001111,11111111))|tempw; \
			if (tempw==0) { \
				tempw=(ppu.address+BIN16(00000000,00100000))&BIN16(00000011,11100000); \
				if (tempw==BIN16(00000011,11000000)) { \
					tempw=0; \
					ppu.address^=BIN16(00001000,00000000); \
				} \
				ppu.address=(ppu.address&BIN16(11111100,00011111))|tempw; \
			}

#define FETCH_NT()	tempb=READ_BYTE((ppu.address&BIN16(00001111,11111111))|0x2000)
#define FETCH_AT()	ppu.atshift=(ppu.address&2)|(ppu.address>>4&4); \
			tempb=ppu.address>>12&7; \
			ppu.nt=ppu.table_tp|READ_BYTE((ppu.address&BIN16(00001100,00000000))|0x23c0|((ppu.address>>2&7)|(ppu.address>>4&BIN8(00111000))))<<4|tempb
#define FETCH_T1()	ppu.at=((READ_BYTE(ppu.nt)>>ppu.atshift)&3)<<2|0x80
#define FETCH_T2()	ppu.pt1=READ_BYTE(ppu.nt|8); \
			ppu.pt2=*ppu.ppu_memmap_buffer_ptr

#define _PTS_0		(ppu.pt1>>7&1)|(ppu.pt2>>6&2)
#define _PTS_1		(ppu.pt1>>6&1)|(ppu.pt2>>5&2)
#define _PTS_2		(ppu.pt1>>5&1)|(ppu.pt2>>4&2)
#define _PTS_3		(ppu.pt1>>4&1)|(ppu.pt2>>3&2)
#define _PTS_4		(ppu.pt1>>3&1)|(ppu.pt2>>2&2)
#define _PTS_5		(ppu.pt1>>2&1)|(ppu.pt2>>1&2)
#define _PTS_6		(ppu.pt1>>1&1)|(ppu.pt2&2)
#define _PTS_7		(ppu.pt1&1)|(ppu.pt2<<1&2)
#define SET_TBIT(o)	if (tempb) ppu.tile_buffer[o]=tempb|ppu.at; \
			else ppu.tile_buffer[o]=0
#define SET_TLINE(o)	tempw=o; \
			tempb=_PTS_0; SET_TBIT(tempw++); tempb=_PTS_1; SET_TBIT(tempw++); tempb=_PTS_2; SET_TBIT(tempw++); tempb=_PTS_3; SET_TBIT(tempw++); \
			tempb=_PTS_4; SET_TBIT(tempw++); tempb=_PTS_5; SET_TBIT(tempw++); tempb=_PTS_6; SET_TBIT(tempw++); tempb=_PTS_7; SET_TBIT(tempw)

#define EVAL_SPR_EMP()	ppu.sprite_ram_secondary[ppu.scc>>1]=0xff
#define EVAL_SPR_Y()	if (ppu.sr_offset<0x100) { \
				if (ppu.sprite_found) { \
					ppu.sprite_ram_secondary[ppu.srs_offset++]=ppu.sprite_ram[ppu.sr_offset++]; \
					ppu.sprite_found=ppu.sr_offset&3; \
				} \
				else { \
					ppu.sprite_ram_secondary[ppu.srs_offset&0x3f]=ppu.sprite_ram[ppu.sr_offset]; \
					tempb=ppu.sc-1-ppu.sprite_ram[ppu.sr_offset]; \
					if ((ppu.sprite_ram[ppu.sr_offset]<0xef)&(tempb<=ppu.sprite_y_max)) { \
						if (ppu.srs_offset<0x20) { \
							if (!ppu.sprite_0) ppu.sprite_0=ppu.sr_offset==0; \
							ppu.sprite_found=TRUE; \
							ppu.sr_offset++; ppu.srs_offset++; \
						} \
						else { \
							ppu.sr_offset+=4; \
							if (ppu.sr_offset&3) ppu.sr_offset+=(4-(ppu.sr_offset&3)); \
							ppu.srs_offset=0x60; \
							ppu.status|=BIT(5); \
						} \
					} \
					else ppu.sr_offset+=(4|(ppu.srs_offset==0x20)); \
				} \
			}

#define SET_SPRITE(x)	tempw=x; \
			tempb=ppu.sc-1-ppu.sprite_ram_secondary[tempw]; \
			if (ppu.sprite_ram_secondary[tempw+2]&0x80) tempb^=ppu.sprite_y_max; \
			if (ppu.sprite_y_max&8) { \
				if (tempb>7) tempb=(tempb&7)|0x10; \
				ppu.sprite_pa=(ppu.sprite_ram_secondary[tempw+1]<<12&0x1000)|(ppu.sprite_ram_secondary[tempw+1]<<4&BIN16(00001111,11100000))|tempb; \
			} \
			else ppu.sprite_pa=ppu.sprite_ram_secondary[tempw+1]<<4|tempb|ppu.table_sp

#define FETCH_S1()	ppu.reading_sprites^=1; \
			tempb=READ_BYTE(ppu.sprite_pa); \
			ppu.reading_sprites^=1
#define FETCH_S2()	ppu.reading_sprites^=1; \
			ppu.pt1=READ_BYTE(ppu.sprite_pa|8); \
			ppu.pt2=*ppu.ppu_memmap_buffer_ptr; \
			ppu.reading_sprites^=1

#define SET_SBIT_OR()	if (~ppu.sprite_ram_secondary[tempb]&0x20) tempb=(ppu.sprite_ram_secondary[tempb]<<2&BIN8(00001100))|BIN8(11010000); \
			else tempb=(ppu.sprite_ram_secondary[tempb]<<2&BIN8(00001100))|BIN8(10010000); \
			if (ppu.sprite_0) { \
				if (ppu.scc==263) tempb|=0x20; \
				else ppu.sprite_0=FALSE; \
			}
#define SET_SBIT(x,y)	if ((ppu.sprite_buffer[x]&BIN8(00011111))==0) { \
				ppu.sprite_buffer[x]|=y; \
				if (ppu.sprite_buffer[x]&BIN8(00011111)) ppu.sprite_buffer[x]|=tempb; \
			}
#define SET_SLINE(x)	tempb=x; \
			if (tempb<ppu.srs_offset) { \
				if (ppu.sprite_ram_secondary[tempb]&0x40) { \
					tempw=ppu.sprite_ram_secondary[tempb+1]+7; \
					SET_SBIT_OR(); \
					SET_SBIT(tempw,_PTS_0); tempw--; SET_SBIT(tempw,_PTS_1); tempw--; SET_SBIT(tempw,_PTS_2); tempw--; SET_SBIT(tempw,_PTS_3); tempw--; \
					SET_SBIT(tempw,_PTS_4); tempw--; SET_SBIT(tempw,_PTS_5); tempw--; SET_SBIT(tempw,_PTS_6); tempw--; SET_SBIT(tempw,_PTS_7); \
				} \
				else { \
					tempw=ppu.sprite_ram_secondary[tempb+1]; \
					SET_SBIT_OR(); \
					SET_SBIT(tempw,_PTS_0); tempw++; SET_SBIT(tempw,_PTS_1); tempw++; SET_SBIT(tempw,_PTS_2); tempw++; SET_SBIT(tempw,_PTS_3); tempw++; \
					SET_SBIT(tempw,_PTS_4); tempw++; SET_SBIT(tempw,_PTS_5); tempw++; SET_SBIT(tempw,_PTS_6); tempw++; SET_SBIT(tempw,_PTS_7); \
				} \
			}

#define EMPTY_XO()	*(xo++)=0; *(xo++)=0; *(xo++)=0; *(xo++)=0; *(xo++)=0; *(xo++)=0; *(xo++)=0; *(xo++)=0

static void ppu_execute(void)
{
	register BYTE tempb;
	register WORD tempw;
	BYTE* xo;
	BYTE i;

	if (ppu.cycles<=*ppu.cpu_cycles_ptr) return;
	ppu.rendering=TRUE;
	
	ppu_execute_start:
	
	switch (ppu.scc) {
		/* visible area */
		/* memory fetch 1-128: NT, AT, T1, T2 */
		/* cycle 0-63: empty secondary sprite buffer in parallel */
		/* cycle 64-255: evaluate sprite y coordinates in parallel */
		/* NT (double cases are unallowed :( (duh) ) */
		case 1:  case 9:  case 17: case 25: case 33: case 41: case 49: case 57:
			if (ppu.valid_scanline&ppu.enabled) { FETCH_NT(); EVAL_SPR_EMP(); } break;
		case 65: case 73: case 81: case 89: case 97: case 105:case 113:case 121:
		case 129:case 137:case 145:case 153:case 161:case 169:case 177:case 185:case 193:case 201:case 209:case 217:case 225:case 233:case 241:case 249:
			if (ppu.valid_scanline&ppu.enabled) { FETCH_NT(); EVAL_SPR_Y(); } break;
		/* AT + INC X ( + INC Y ) */
		case 3:  case 11: case 19: case 27: case 35: case 43: case 51: case 59:
			if (ppu.valid_scanline&ppu.enabled) { FETCH_AT(); INC_X(); EVAL_SPR_EMP(); } break;
		case 67: case 75: case 83: case 91: case 99: case 107:case 115:case 123:
		case 131:case 139:case 147:case 155:case 163:case 171:case 179:case 187:case 195:case 203:case 211:case 219:case 227:case 235:case 243:/*case 251:*/
			if (ppu.valid_scanline&ppu.enabled) { FETCH_AT(); INC_X(); EVAL_SPR_Y(); } break;
		case 251: if (ppu.valid_scanline&ppu.enabled) { FETCH_AT(); INC_X(); INC_Y(); EVAL_SPR_Y(); } break;
		/* T1 */
		case 5:  case 13: case 21: case 29: case 37: case 45: case 53: case 61:
			if (ppu.valid_scanline&ppu.enabled) { FETCH_T1(); EVAL_SPR_EMP(); } break;
		case 69: case 77: case 85: case 93: case 101:case 109:case 117:case 125:
		case 133:case 141:case 149:case 157:case 165:case 173:case 181:case 189:case 197:case 205:case 213:case 221:case 229:case 237:case 245:case 253:
			if (ppu.valid_scanline&ppu.enabled) { FETCH_T1(); EVAL_SPR_Y(); } break;
		/* T2 + fill tile buffer */
		case 7:  case 15: case 23: case 31: case 39: case 47: case 55: case 63:
			if (ppu.enabled) {
				if (ppu.valid_scanline) { FETCH_T2(); EVAL_SPR_EMP(); }
				if (ppu.visible_scanline) { SET_TLINE(ppu.scc+9); } 
			} break;
		case 71: case 79: case 87: case 95: case 103:case 111:case 119:case 127:
		case 135:case 143:case 151:case 159:case 167:case 175:case 183:case 191:case 199:case 207:case 215:case 223:case 231:case 239:case 247:case 255:
			if (ppu.enabled) {
				if (ppu.valid_scanline) { FETCH_T2(); EVAL_SPR_Y(); }
				if (ppu.visible_scanline) { SET_TLINE(ppu.scc+9); }
			} break;
		
		/* hblank */
		case 256:
			ppu.hblank=TRUE;
			if (ppu.valid_scanline) {
				ppu.sprite_pa=ppu.table_sp;
				xo=ppu.sprite_buffer; for (i=0x21;i>0;i--) { EMPTY_XO(); }
				xo=ppu.tile_buffer; for (i=0x22;i>0;i--) { EMPTY_XO(); }
			}
			
			#if DEBUG_PPU
			LOG(LOG_PPU,"------- hblank ------- sc:%d cc:%d\n",ppu.sc,ppu.cycles);
			#endif
			
			break;
		
		/* memory fetch 129-160: NT (unused), NT (unused), S1, S2, sprites in parallel */
		/* NT */
		/*case 257:*/case 265:case 273:case 281:case 289:case 297:case 305:case 313:
			if (ppu.valid_scanline&ppu.enabled) { FETCH_NT(); } break;
		/* NT ( + RESET X ) */
		case 257:
			if (ppu.valid_scanline&ppu.enabled) { RESET_X(); FETCH_NT(); } break;
		/* RESET Y */
		case 304:
			if (ppu.sc==0) if (ppu.enabled) { RESET_Y(); } break;
		/* NT + sprites */
		case 259:case 267:case 275:case 283:case 291:case 299:case 307:case 315:
			if (ppu.valid_scanline&ppu.enabled) { FETCH_NT(); SET_SPRITE(ppu.scc>>1&BIN8(00011100)); } break;
		/* S1 */
		case 261:case 269:case 277:case 285:case 293:case 301:case 309:case 317:
			if (ppu.valid_scanline&ppu.enabled) { FETCH_S1(); } break;
		/* S2 + fill sprite buffer */
		case 263:case 271:case 279:case 287:case 295:case 303:case 311:case 319:
			if (ppu.enabled) {
				if (ppu.valid_scanline) { FETCH_S2(); }
				if (ppu.sc<240) { SET_SLINE(ppu.scc>>1&BIN8(00011110)); }
			} break;
		
		/* memory fetch 161-168: NT, AT, T1, T2 (prefetch) */
		/* NT */
		case 321:case 329:
			if (ppu.valid_scanline&ppu.enabled) { FETCH_NT(); } break;
		/* AT + INC X */
		case 323:case 331:
			if (ppu.valid_scanline&ppu.enabled) { FETCH_AT(); INC_X(); } break;
		/* T1 */
		case 325: case 333:
			if (ppu.valid_scanline&ppu.enabled) { FETCH_T1(); } break;
		/* T2 + fill tile buffer */
		case 327: case 335:
			if (ppu.enabled) {
				if (ppu.valid_scanline)  { FETCH_T2(); }
				if (ppu.sc<240) { SET_TLINE(ppu.scc-327); }
			} break;
		
		/* memory fetch 169-170: NT (unused), NT (unused) */
		case 337:case 339:
			if (ppu.valid_scanline&ppu.enabled) { FETCH_NT(); } break;
		
		/* end */
		case 340:
			ppu.hblank=ppu.sprite_0=ppu.sprite_found=ppu.sr_offset=ppu.srs_offset=0;
			ppu.visible_scanline=ppu.valid_scanline=++ppu.sc<241;
			ppu.scc=~0;
			if (ppu.valid_scanline&ppu.enabled) ppu.sprite_address=0;
			
			if (ppu.sc==242) {
				ppu.scc++;
				ppu.vblank=TRUE;
				ppuupdate=&nothing;
				ppu.status|=BIT(7);
				if (ppu.control1&BIT(7)) cpu_set_vblank_nmi_want();
				
				#if DEBUG_PPU
				LOG(LOG_PPU,"------- vblank ------- cc:%d\n",ppu.cycles);
				#endif
				
				goto ppu_execute_end;
			}
			ppu.sprite_buffer[0xff]&=BIN8(11011111);
			
			#if DEBUG_PPU
			LOG(LOG_PPU,"------- new scanline ------- sc:%d cc:%d\n",ppu.sc,ppu.cycles-crystal->ppu_cycle);
			#endif
			
			if (ppu.sc==1) {
				if (crystal->odd_frame) {
					ppu.cycles-=CRYSTAL_PPU_CYCLE_FIXED;
					ppu.scc++;
					cpu_kill_cycle();
				}
				else goto skip_pixel;
			}
			else goto skip_pixel;
			break;
		default:
			break;
	}
	
	/* draw 1 pixel */
	if (ppu.visible_scanline&!ppu.hblank) {
		
		if (ppu.control2&BIT(3)) tempb=ppu.tile_buffer[ppu.scc+ppu.fine_x];
		else {
			if (ppu.enabled) tempb=0;
			else {
				if ((ppu.address&0x3f00)==0x3f00) tempb=ppu.address&0xff;
				else tempb=0;
				goto ppu_pixel_end;
			}
		}
		

		if (ppu.scc<8) {
			tempb&=ppu.left_tc_disable_mask;
			ppu.sprite_buffer[ppu.scc]&=ppu.left_sc_disable_mask;
		}
	
		if (ppu.control2&BIT(4))
			if (ppu.sprite_buffer[ppu.scc]&0x80) {
				if (tempb&0x80) {
					if (ppu.sprite_buffer[ppu.scc]&0x20) ppu.status|=BIT(6);
					if (ppu.sprite_buffer[ppu.scc]&0x40) tempb=ppu.sprite_buffer[ppu.scc];
				}
				else tempb=ppu.sprite_buffer[ppu.scc];
			}
		
		ppu_pixel_end:
		
		/*ppu.screen[ppu.screen_offset++]=(ppu.ppu_memmap_palette_ptr[tempb&BIN8(00011111)]&ppu.mono_mask)|ppu.emphasis;*/
		
		tempw=(ppu.ppu_memmap_palette_ptr[tempb&BIN8(00011111)]&ppu.mono_mask)|ppu.emphasis;
		screen[ppu.screen_offset++]=palette_local[tempw].b;
		screen[ppu.screen_offset++]=palette_local[tempw].g;
		screen[ppu.screen_offset++]=palette_local[tempw].r;
		ppu.screen_offset++;
	}
	
	skip_pixel:
	
	ppu.scc++;
	ppu.cycles-=CRYSTAL_PPU_CYCLE_FIXED;
	if (ppu.cycles<=*ppu.cpu_cycles_ptr) goto ppu_execute_end;
	else goto ppu_execute_start;
	ppu_execute_end:
	ppu.rendering=FALSE;
}

static void nothing(void)
{
	return;
}


void ppu_update_screen(void)
{
	BYTE* screen_offset=screen;
	BYTE red,green,blue;
	register BYTE t=0;
	register WORD o=0;
	
	update_screen_start:

	red=palette_local[ppu.screen[o]].r;
	green=palette_local[ppu.screen[o]].g;
	blue=palette_local[ppu.screen[o]].b;
	
	*(screen_offset)=blue;
	*(screen_offset+4)=blue;
	*(screen_offset+2048)=blue;
	*(screen_offset+2052)=blue;
	
	*(screen_offset+1)=green;
	*(screen_offset+5)=green;
	*(screen_offset+2049)=green;
	*(screen_offset+2053)=green;
	
	*(screen_offset+2)=red;
	*(screen_offset+6)=red;
	*(screen_offset+2050)=red;
	*(screen_offset+2054)=red;
	
	screen_offset+=8;
	
	if (++t==0) screen_offset+=512*4;
	if (++o==0xf000) goto update_screen_end;
	else goto update_screen_start;

	update_screen_end:
	return;
}
