/*******************************************************************************
  Snes9x - Portable Super Nintendo Entertainment System (TM) emulator.
 
  (c) Copyright 1996 - 2002 Gary Henderson (gary.henderson@ntlworld.com) and
                            Jerremy Koot (jkoot@snes9x.com)

  (c) Copyright 2001 - 2004 John Weidman (jweidman@slip.net)

  (c) Copyright 2002 - 2004 Brad Jorsch (anomie@users.sourceforge.net),
                            funkyass (funkyass@spam.shaw.ca),
                            Joel Yliluoma (http://iki.fi/bisqwit/)
                            Kris Bleakley (codeviolation@hotmail.com),
                            Matthew Kendora,
                            Nach (n-a-c-h@users.sourceforge.net),
                            Peter Bortas (peter@bortas.org) and
                            zones (kasumitokoduck@yahoo.com)

  C4 x86 assembler and some C emulation code
  (c) Copyright 2000 - 2003 zsKnight (zsknight@zsnes.com),
                            _Demo_ (_demo_@zsnes.com), and Nach

  C4 C++ code
  (c) Copyright 2003 Brad Jorsch

  DSP-1 emulator code
  (c) Copyright 1998 - 2004 Ivar (ivar@snes9x.com), _Demo_, Gary Henderson,
                            John Weidman, neviksti (neviksti@hotmail.com),
                            Kris Bleakley, Andreas Naive

  DSP-2 emulator code
  (c) Copyright 2003 Kris Bleakley, John Weidman, neviksti, Matthew Kendora, and
                     Lord Nightmare (lord_nightmare@users.sourceforge.net

  OBC1 emulator code
  (c) Copyright 2001 - 2004 zsKnight, pagefault (pagefault@zsnes.com) and
                            Kris Bleakley
  Ported from x86 assembler to C by sanmaiwashi

  SPC7110 and RTC C++ emulator code
  (c) Copyright 2002 Matthew Kendora with research by
                     zsKnight, John Weidman, and Dark Force

  S-DD1 C emulator code
  (c) Copyright 2003 Brad Jorsch with research by
                     Andreas Naive and John Weidman
 
  S-RTC C emulator code
  (c) Copyright 2001 John Weidman
  
  ST010 C++ emulator code
  (c) Copyright 2003 Feather, Kris Bleakley, John Weidman and Matthew Kendora

  Super FX x86 assembler emulator code 
  (c) Copyright 1998 - 2003 zsKnight, _Demo_, and pagefault 

  Super FX C emulator code 
  (c) Copyright 1997 - 1999 Ivar, Gary Henderson and John Weidman


  SH assembler code partly based on x86 assembler code
  (c) Copyright 2002 - 2004 Marcus Comstedt (marcus@mc.pp.se) 

 
  Specific ports contains the works of other authors. See headers in
  individual files.
 
  Snes9x homepage: http://www.snes9x.com
 
  Permission to use, copy, modify and distribute Snes9x in both binary and
  source form, for non-commercial purposes, is hereby granted without fee,
  providing that this license information and copyright notice appear with
  all copies and any derived work.
 
  This software is provided 'as-is', without any express or implied
  warranty. In no event shall the authors be held liable for any damages
  arising from the use of this software.
 
  Snes9x is freeware for PERSONAL USE only. Commercial users should
  seek permission of the copyright holders first. Commercial use includes
  charging money for Snes9x or software derived from Snes9x.
 
  The copyright holders request that bug fixes and improvements to the code
  should be forwarded to them so everyone can benefit from the modifications
  in future versions.
 
  Super NES and Super Nintendo Entertainment System are trademarks of
  Nintendo Co., Limited and its subsidiary companies.
*******************************************************************************/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "snes9x.h"

#include "memmap.h"
#include "ppu.h"
#include "cpuexec.h"
#include "dma.h"
#include "apu.h"
#include "gfx.h"
#include "sa1.h"
#include "spc7110.h"
#include "sdd1emu.h"

#ifdef DEBUGGER
#include "missing.h"
#endif

//#define DMA_DEBUG
#ifdef DMA_DEBUG
static const char dma_log_name[] = "dma.log";
static FILE *dma_fs = NULL;
extern FILE *trace;
#endif

// ABus
static uint8 buffer[0x10000];

extern int HDMA_ModeByteCounts [8];
extern uint8 *HDMAMemPointers [8];
extern uint8 *HDMABasePointers [8];

// #define SETA010_HDMA_FROM_CART

#ifdef SETA010_HDMA_FROM_CART
uint32 HDMARawPointers[8];	// Cart address space pointer
#endif

#define DMA_EXECUTE()\
{\
	CPU.Cycles += SLOW_ONE_CYCLE;\
    if (CPU.Cycles >= CPU.NextEvent)\
		S9xDoHBlankProcessing ();\
	if (CPU.Cycles >= EXT.NextAPUTimerPos)\
		S9xAPUMainLoop ();\
}

// for special chip
static inline void SetupSrcABus(SDMA *d, int c, uint8 *&base, uint16 &p, int &binc)
{
	//SDD1
	if (Settings.SDD1 && d->AAddressFixed && Memory.FillRAM [0x4801] > 0) {
		// Hacky support for pre-decompressed S-DD1 data
		uint8 *in_ptr= GetBasePointer((d->ABank << 16) | d->AAddress);
		in_ptr += d->AAddress;
		SDD1_decompress(buffer, in_ptr, c);
		base = buffer;
		p = 0;
		binc = !d->AAddressDecrement ? 1 : -1;
	}
	//SPC7110
	else if(Settings.SPC7110 &&
			((d->AAddressFixed && d->AAddress == 0x4800) || d->ABank == 0x50)) {
		int i = (s7r.reg4806 << 8) | s7r.reg4805;
#ifdef SPC7110_DEBUG
		printf("DMA Transfer of %04X bytes from %02X%02X%02X:%02X, offset of %04X, internal bank of %04X, multiplier %02X\n",
			   d->TransferBytes,
			   s7r.reg4803,
			   s7r.reg4802,
			   s7r.reg4801,
			   s7r.reg4804,
			   (unsigned long)i,
			   s7r.bank50Internal,
			   s7r.AlignBy);
#endif
		i = (i * s7r.AlignBy + s7r.bank50Internal) % DECOMP_BUFFER_SIZE;
		if((i + c) > DECOMP_BUFFER_SIZE) {
			int l0 = DECOMP_BUFFER_SIZE - i;
			memcpy(&buffer[0], &s7r.bank50[i], (size_t)l0);
			memcpy(&buffer[l0], &s7r.bank50[0], (size_t)(c - l0));
			base = buffer;
		}
		else 
			base = &s7r.bank50[i];

		s7r.bank50Internal = (s7r.bank50Internal + c) % DECOMP_BUFFER_SIZE;

		int ic = (s7r.reg480A << 8) | s7r.reg4809;
		ic -= d->TransferBytes;
		s7r.reg4809 = ic & 0x00ff;
		s7r.reg480A = (ic & 0xff00) >> 8;
		p = 0;
		binc = 1;
	}
	//SA-1
	else if (d->BAddress == 0x18 && SA1.in_char_dma && (d->ABank & 0xf0) == 0x40) {
		base = &Memory.ROM [CMemory::MAX_ROM_SIZE - 0x10000];
		S9xSA1CharConv (d, base, c);
		p = 0;
		binc = d->AAddressFixed ? 0 : (!d->AAddressDecrement ? 1 : -1);
	}
	//Normal
	else {
		base = GetBasePointer ((d->ABank << 16) + d->AAddress);
		if (!base)
			base = Memory.ROM;
		p = d->AAddress;
		binc = d->AAddressFixed ? 0 : (!d->AAddressDecrement ? 1 : -1);
	}
}

/**********************************************************************************************/
/* S9xDoDMA()                                                                                   */
/* This function preforms the general dma transfer                                            */
/**********************************************************************************************/

void S9xDoDMA (uint8 Channel)
{
	if (Channel > 7 || CPU.InDMA)
		return;

    CPU.InDMA = TRUE;

	SDMA *d = &DMA[Channel];

#ifdef DEBUGGER
	if (Settings.TraceDMA) {
		sprintf (String,
				 "DMA[%d]: %s Mode: %d 0x%02X%04X->0x21%02X Bytes: %d (%s) V-Line:%ld",
				 Channel,
				 d->TransferDirection ? "read" : "write",
				 d->TransferMode,
				 d->ABank,
				 d->AAddress,
				 d->BAddress,
				 d->TransferBytes,
				 d->AAddressFixed ? "fixed" : (d->AAddressDecrement ? "dec" : "inc"),
				 CPU.V_Counter);

		if (d->BAddress == 0x18 || d->BAddress == 0x19 || d->BAddress == 0x39 || d->BAddress == 0x3a)
			sprintf (String,
					 "%s VRAM: %04X (%d,%d) %s",
					 String,
					 PPU.VMA.Address,
					 PPU.VMA.Increment, PPU.VMA.FullGraphicCount,
					 PPU.VMA.High ? "word" : "byte");

		else if (d->BAddress == 0x22 || d->BAddress == 0x3b)
			sprintf (String,
					 "%s CGRAM: %02X (%x)",
					 String,
					 PPU.CGADD,
					 PPU.CGFLIP);			
		else if (d->BAddress == 0x04 || d->BAddress == 0x38)
			sprintf (String,
					 "%s OBJADDR: %04X",
					 String,
					 PPU.OAMAddr);
		S9xMessage (S9X_TRACE, S9X_DMA_TRACE, String);
	}
#endif

    int count = d->TransferBytes;

	if (count == 0)
		count = 0x10000;

	// add cycles
	int cycles = count;

	uint16 AAddress = d->AAddress;
	int inc = d->AAddressFixed ? 0 : (!d->AAddressDecrement ? 1 : -1);

	//does an invalid DMA actually take time?
	// I'd say yes, since 'invalid' is probably just the WRAM chip
	// not being able to read and write itself at the same time
	if(((d->ABank == 0x7E || d->ABank == 0x7F) && d->BAddress == 0x80) ||
	   (SNESGameFixes.LetsPachi && count == 0x10000)/* patch */)
		goto update_address;
	
	uint8 Work;

	if (!d->TransferDirection) {
		/* XXX: DMA is potentially broken here for cases where we DMA across
		 * XXX: memmap boundries. A possible solution would be to re-call
		 * XXX: GetBasePointer whenever we cross a boundry, and when
		 * XXX: GetBasePointer returns (0) to take the 'slow path' and use
		 * XXX: S9xGetByte instead of *base. GetBasePointer() would want to
		 * XXX: return (0) for MAP_PPU and whatever else is a register range
		 * XXX: rather than a RAM/ROM block, and we'd want to detect MAP_PPU
		 * XXX: (or specifically, Address Bus B addresses $2100-$21FF in
		 * XXX: banks $00-$3F) specially and treat it as MAP_NONE (since
		 * XXX: PPU->PPU transfers don't work).
		 */

		switch (d->BAddress) {
		case 0x18:
		case 0x19:
			if (IPPU.RenderThisFrame)
				FLUSH_REDRAW ();
			break;
		}

		int binc;
		uint8 *base;
		uint16 p;

		SetupSrcABus(d, count, base, p, binc);
			
		if (d->TransferMode == 0 || d->TransferMode == 2 || d->TransferMode == 6) {
			switch (d->BAddress) {
			case 0x04:
				do {
					Work = *(base + p);
					REGISTER_2104(Work);
					p += binc;
				} while (--count > 0);
				break;

			case 0x18:
#ifndef CORRECT_VRAM_READS
				IPPU.FirstVRAMRead = TRUE;
#endif
				if (!PPU.VMA.FullGraphicCount) {
					do {
						Work = *(base + p);
						REGISTER_2118_linear(Work);
						p += binc;
					} while (--count > 0);
				}
				else {
					do {
						Work = *(base + p);
						REGISTER_2118_tile(Work);
						p += binc;
					} while (--count > 0);
				}
				break;

			case 0x19:
#ifndef CORRECT_VRAM_READS
				IPPU.FirstVRAMRead = TRUE;
#endif
				if (!PPU.VMA.FullGraphicCount) {
					do {
						Work = *(base + p);
						REGISTER_2119_linear(Work);
						p += binc;
					} while (--count > 0);
				}
				else {
					do {
						Work = *(base + p);
						REGISTER_2119_tile(Work);
						p += binc;
					} while (--count > 0);
				}
				break;

			case 0x22:
				do {
					Work = *(base + p);
					REGISTER_2122(Work);
					p += binc;
				} while (--count > 0);
				break;

			case 0x80:
				do {
					Work = *(base + p);
					REGISTER_2180(Work);
					p += binc;
				} while (--count > 0);
				break;

			default:
				do {
					Work = *(base + p);
					S9xSetPPU (Work, 0x2100 + d->BAddress);
					p += binc;
				} while (--count > 0);
				break;
			}
		}
		else if (d->TransferMode == 1 || d->TransferMode == 5) {
			if (d->BAddress == 0x18) {
				// Write to V-RAM
#ifndef CORRECT_VRAM_READS
				IPPU.FirstVRAMRead = TRUE;
#endif
				if (!PPU.VMA.FullGraphicCount) {

					//"Chousenshi in tokyo dome" illegal DMA transfer exec.
					if(SNESGameFixes.ShinnichiFix &&
					   PPU.VMA.Address == 0x1000 &&
					   PPU.VMA.Increment == 1 &&
					   count == 0x10000 &&
					   d->ABank == 0x04 &&
					   d->AAddress == 0xAB00 &&
					   inc == 1) {
						count = count >> 3;
					}
					//"SD gundam gaiden2 entaku no kishi" illegal DMA transfer exec.
					else if(SNESGameFixes.SD_gundam_gaiden2_entaku_no_kishi &&
							PPU.VMA.Address == 0x1DA5 &&
							PPU.VMA.Increment == 1 &&
							count == 0x853A &&
							d->ABank == 0x7F &&
							d->AAddress == 0x6CF0 &&
							inc == 1) {
						count = count >> 3;
					}
					else if(SNESGameFixes.NBABullsVsBlazersPB &&
							PPU.VMA.Address == 0x4000 &&
							PPU.VMA.Increment == 1 &&
							count == 0x2100 &&
							d->ABank == 0x88 &&
							d->AAddress == 0x8802 &&
							inc == 1) {
						count &= ~0xFFF;
					}
					else if(SNESGameFixes.All_JapanProWrestle &&
							PPU.VMA.Address == 0x6008 &&
							PPU.VMA.Increment == 1 &&
							count == 0x5000 &&
							d->ABank == 0x7F &&
							d->AAddress == 0x0000 &&
							inc == 1) {
						count = count - 0x3000;
					}
					else if(SNESGameFixes.ikari_no_yousai &&
							PPU.VMA.Address == 0x4800 &&
							PPU.VMA.Increment == 1 &&
							count == 0x8000 &&
							d->ABank == 0x8F &&
							(d->AAddress == 0xC400 || d->AAddress == 0xCC00) &&
							inc == 1) {
						count = count >> 3;
					}

#if 0
					if(SNESGameFixes.ikari_no_yousai) {
						fprintf(stderr, "0x%04X 0x%02X 0x%05X 0x%02X 0x%04X %d\n",
								PPU.VMA.Address,
								PPU.VMA.Increment,
								count,
								d->ABank,
								d->AAddress,
								inc);
					}
#endif

					while (count > 1) {
						Work = *(base + p);
						REGISTER_2118_linear(Work);
						p += binc;
								
						Work = *(base + p);
						REGISTER_2119_linear(Work);
						p += binc;

						count -= 2;
					}
					if (count == 1) {
						Work = *(base + p);
						REGISTER_2118_linear(Work);
						p += binc;
					}
				}
				else {
					while (count > 1) {
						Work = *(base + p);
						REGISTER_2118_tile(Work);
						p += binc;
								
						Work = *(base + p);
						REGISTER_2119_tile(Work);
						p += binc;

						count -= 2;
					}
					if (count == 1) {
						Work = *(base + p);
						REGISTER_2118_tile(Work);
						p += binc;
					}
				}
			}
			else {
				// DMA mode 1 general case
				while (count > 1) {
					Work = *(base + p);
					S9xSetPPU (Work, 0x2100 + d->BAddress);
					p += binc;
							
					Work = *(base + p);
					S9xSetPPU (Work, 0x2101 + d->BAddress);
					p += binc;

					count -= 2;
				}
				if (count == 1) {
					Work = *(base + p);
					S9xSetPPU (Work, 0x2100 + d->BAddress);
					p += binc;
				}
			}
		}
		else if (d->TransferMode == 3 || d->TransferMode == 7) {
			do {
				Work = *(base + p);
				S9xSetPPU (Work, 0x2100 + d->BAddress);
				p += binc;
				if (count <= 1)
					break;
							
				Work = *(base + p);
				S9xSetPPU (Work, 0x2100 + d->BAddress);
				p += binc;
				if (count <= 2)
					break;
							
				Work = *(base + p);
				S9xSetPPU (Work, 0x2101 + d->BAddress);
				p += binc;
				if (count <= 3)
					break;
							
				Work = *(base + p);
				S9xSetPPU (Work, 0x2101 + d->BAddress);
				p += binc;

				count -= 4;
			} while (count > 0);
		}
		else if (d->TransferMode == 4) {
			do {
				Work = *(base + p);
				S9xSetPPU (Work, 0x2100 + d->BAddress);
				p += binc;
				if (count <= 1)
					break;
								
				Work = *(base + p);
				S9xSetPPU (Work, 0x2101 + d->BAddress);
				p += binc;
				if (count <= 2)
					break;
								
				Work = *(base + p);
				S9xSetPPU (Work, 0x2102 + d->BAddress);
				p += binc;
				if (count <= 3)
					break;
								
				Work = *(base + p);
				S9xSetPPU (Work, 0x2103 + d->BAddress);
				p += binc;

				count -= 4;
			} while (count > 0);
		}
	}//if (!d->TransferDirection)
	else {
		/* XXX: DMA is potentially broken here for cases where the dest is
		 * XXX: in the Address Bus B range. Note that this bad dest may not
		 * XXX: cover the whole range of the DMA though, if we transfer
		 * XXX: 65536 bytes only 256 of them may be Address Bus B.
		 */

		do {
			switch (d->TransferMode) {
			case 0:
				Work = S9xGetPPU (0x2100 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				count--;
				break;

			case 1:
				Work = S9xGetPPU (0x2100 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				if (!--count)
					break;
				
				Work = S9xGetPPU (0x2101 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				count--;
				break;

			case 2:
			case 6:
				Work = S9xGetPPU (0x2100 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				if (!--count)
					break;
				
				Work = S9xGetPPU (0x2100 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				count--;
				break;
				
			case 3:
			case 7:
				Work = S9xGetPPU (0x2100 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				if (!--count)
					break;
				
				Work = S9xGetPPU (0x2100 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				if (!--count)
					break;
				
				Work = S9xGetPPU (0x2101 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				if (!--count)
					break;
				
				Work = S9xGetPPU (0x2101 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				count--;
				break;
				
			case 4:
				Work = S9xGetPPU (0x2100 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				if (!--count)
					break;
				
				Work = S9xGetPPU (0x2101 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				if (!--count)
					break;
				
				Work = S9xGetPPU (0x2102 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				if (!--count)
					break;
				
				Work = S9xGetPPU (0x2103 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				count--;
				break;

			case 5:
				Work = S9xGetPPU (0x2100 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				if (!--count)
					break;
				
				Work = S9xGetPPU (0x2101 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				if (!--count)
					break;
				
				Work = S9xGetPPU (0x2100 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				if (!--count)
					break;
				
				Work = S9xGetPPU (0x2101 + d->BAddress);
				S9xSetByte (Work, (d->ABank << 16) + AAddress);
				AAddress += inc;
				count--;
				break;
			}
		} while (count);
    }

update_address:
	// Super Punch-Out requires that the A-BUS address be updated after the
	// DMA transfer.
	if (inc > 0)
		d->AAddress += cycles;
	else if (inc < 0)
		d->AAddress -= cycles;
	Memory.FillRAM[0x4302 + (Channel << 4)] = (uint8) d->AAddress;
	Memory.FillRAM[0x4303 + (Channel << 4)] = d->AAddress >> 8;
	
	// Secret of the Mana requires that the DMA bytes transfer count be set to
	// zero when DMA has completed.
	Memory.FillRAM [0x4305 + (Channel << 4)] = 0;
	Memory.FillRAM [0x4306 + (Channel << 4)] = 0;
	
	d->IndirectAddress = 0;
	d->TransferBytes = 0;
    
	if(Settings.SDD1)
		Memory.FillRAM [0x4801] = 0;

	//CPU.Cycles += SLOW_ONE_CYCLE * (count + 1);
	cycles += 1;

	//"CIRCUIT USA" HDMA is not executed while executing DMA.
	for(int c = 0; c < cycles; c++)
		DMA_EXECUTE();

	CPU.InDMA = FALSE;
}

#ifdef DEBUGGER
void S9xTraceSoundDSP (const char *s, int i1 = 0, int i2 = 0, int i3 = 0,
					   int i4 = 0, int i5 = 0, int i6 = 0, int i7 = 0);
#endif

// only inc pointer d->BAddress == VRAM and WRAM write register (no write)
// HDMA write WRAM do "NANGOKUSYONEN PAPUWA" bad, ROM hack patch is removed by this.
STATIC inline void HDMASetPPU(SDMA *d, uint8 Byte, uint16 Address)
{
	switch (Address) {
	case 0x2118:
		if (!PPU.VMA.High) {
			PPU.VMA.Address += PPU.VMA.Increment;
		}
		Memory.FillRAM [0x2118] = Byte;
		break;
		
	case 0x2119:
		if (PPU.VMA.High) {
			PPU.VMA.Address += PPU.VMA.Increment;
		}
		Memory.FillRAM [0x2119] = Byte;
		break;
		
	case 0x2180:
		PPU.WRAM++;
		PPU.WRAM &= 0x1FFFF;
		Memory.FillRAM [0x2180] = Byte;
		break;
		
	default:
		S9xSetPPU (Byte, Address);
		break;
	}
}

void S9xStartHDMA ()
{
	if (Settings.DisableHDMA)
		IPPU.HDMA = 0;
	else
#ifdef DEBUGGER
		missing.hdma_this_frame = IPPU.HDMA = Memory.FillRAM [0x420c];
#else
		IPPU.HDMA = Memory.FillRAM [0x420c];
#endif
	
	//per anomie timing post
	if(IPPU.HDMA != 0)
		CPU.Cycles += ONE_CYCLE * 3;
    
	IPPU.HDMAStarted = TRUE;

	for (int i = 0; i < 8; i++) {
		HDMAMemPointers [i] = NULL;
#ifdef SETA010_HDMA_FROM_CART
		HDMARawPointers [i] = 0;
#endif
		if(!(IPPU.HDMA & (1 << i)))
			continue;

		CPU.Cycles += SLOW_ONE_CYCLE;
		DMA [i].LineCount = 0;
		DMA [i].FirstLine = TRUE;
		DMA [i].Address = DMA [i].AAddress;
		if(DMA[i].HDMAIndirectAddressing)
			CPU.Cycles += (SLOW_ONE_CYCLE << 2);
    }
}

uint8 S9xDoHDMA (uint8 byte)
{
	CPU.InDMA = TRUE;
	CPU.Cycles += ONE_CYCLE * 3;

	uint8 mask = 1;
	for (int ch = 0; ch < 8; ch++, mask <<= 1) {
		if(!(byte & mask))
			continue;

		SDMA *d = &DMA[ch];

		if (d->LineCount == 0) {
			//remember, InDMA is set.
			//Get/Set incur no charges!
			uint8 line = S9xGetByte ((d->ABank << 16) + d->Address);
			CPU.Cycles += SLOW_ONE_CYCLE;

			if (line == 0x80) {
				d->Repeat = TRUE;
				d->LineCount = 128;
			}
			else {
				d->Repeat = !(line & 0x80);
				d->LineCount = line & 0x7f;
			}

			// Disable H-DMA'ing into V-RAM (register 2118) for Hook
			/* XXX: instead of d->BAddress == 0x18, make S9xSetPPU fail
			 * XXX: writes to $2118/9 when appropriate
			 */
			if (d->LineCount == 0) {
				byte &= ~mask;
				d->IndirectAddress += HDMAMemPointers [ch] - HDMABasePointers [ch];
				Memory.FillRAM [0x4305 + (ch << 4)] = (uint8) d->IndirectAddress;
				Memory.FillRAM [0x4306 + (ch << 4)] = d->IndirectAddress >> 8;
				continue;
			}

			d->Address++;
			d->FirstLine = TRUE;

			if (d->HDMAIndirectAddressing) {
				d->IndirectBank = Memory.FillRAM [0x4307 + (ch << 4)];
				//again, no cycle charges while InDMA is set!
				d->IndirectAddress = S9xGetWord ((d->ABank << 16) + d->Address);
				CPU.Cycles += SLOW_ONE_CYCLE << 2;
				d->Address += 2;
			}
			else {
				d->IndirectBank = d->ABank;
				d->IndirectAddress = d->Address;
			}

			HDMABasePointers [ch] = HDMAMemPointers [ch] = 
				S9xGetMemPointer ((d->IndirectBank << 16) + d->IndirectAddress);
#ifdef SETA010_HDMA_FROM_CART
			HDMARawPointers [ch] = (d->IndirectBank << 16) + d->IndirectAddress;
#endif
		}
		else
			CPU.Cycles += SLOW_ONE_CYCLE;

		if (!HDMAMemPointers [ch]) {
			if (!d->HDMAIndirectAddressing) {
				d->IndirectBank = d->ABank;
				d->IndirectAddress = d->Address;
			}
#ifdef SETA010_HDMA_FROM_CART
			HDMARawPointers [ch] = (d->IndirectBank << 16) + d->IndirectAddress;
#endif
			if (!(HDMABasePointers [ch] = HDMAMemPointers [ch] = 
				  S9xGetMemPointer ((d->IndirectBank << 16) + d->IndirectAddress))) {
				/* XXX: Instead of this, goto a slow path that first
				 * XXX: verifies src!=Address Bus B, then uses
				 * XXX: S9xGetByte(). Or make S9xGetByte return OpenBus
				 * XXX: (probably?) for Address Bus B while inDMA.
				 */
				byte &= ~mask;
				continue;
			}
			// Uncommenting the following line breaks Punchout - it starts
			// H-DMA during the frame.
			//d->FirstLine = TRUE;
		}

		if (d->Repeat && !d->FirstLine) {
			d->LineCount--;
			continue;
		}

#if 1
		// uniracer hack
		if (d->BAddress == 0x04) {
			if(SNESGameFixes.Uniracers) {
				PPU.OAMAddr = 0x10c;
				PPU.OAMFlip = 0;
			}
		}
#endif

#ifdef DEBUGGER
		if (Settings.TraceSoundDSP && d->FirstLine && 
			d->BAddress >= 0x40 && d->BAddress <= 0x43)
			S9xTraceSoundDSP ("Spooling data!!!\n");
		if (Settings.TraceHDMA && d->FirstLine) {
			sprintf (String, "H-DMA[%d] (%d) 0x%02X%04X->0x21%02X %s, Count: %3d, Rep: %s, V-LINE: %3ld %02X%04X",
					 ch, d->TransferMode, d->IndirectBank,
					 d->IndirectAddress,
					 d->BAddress,
					 d->HDMAIndirectAddressing ? "ind" : "abs",
					 d->LineCount,
					 d->Repeat ? "yes" : "no ", CPU.V_Counter,
					 d->ABank, d->Address);
			S9xMessage (S9X_TRACE, S9X_HDMA_TRACE, String);
		}
#endif

		switch (d->TransferMode) {

		case 0:
#ifdef SETA010_HDMA_FROM_CART
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 0), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMAMemPointers [ch]++;
#else
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 0), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
#endif
			HDMAMemPointers [ch] += 1;
			break;

		case 1:
#ifdef SETA010_HDMA_FROM_CART
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 0), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 1), 0x2101 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMARawPointers [ch] += 2;
#else
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 0), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 1), 0x2101 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
#endif
			HDMAMemPointers [ch] += 2;
			break;

		case 2:
		case 6:
#ifdef SETA010_HDMA_FROM_CART
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 0), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 1), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMARawPointers [ch] += 2;
#else
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 0), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 1), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
#endif
			HDMAMemPointers [ch] += 2;
			break;

		case 3:
		case 7:
#ifdef SETA010_HDMA_FROM_CART
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 0), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 1), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 2), 0x2101 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 3), 0x2101 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMARawPointers [ch] += 4;
#else
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 0), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 1), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 2), 0x2101 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 3), 0x2101 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
#endif
			HDMAMemPointers [ch] += 4;
			break;

		case 4:
#ifdef SETA010_HDMA_FROM_CART
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 0), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 1), 0x2101 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 2), 0x2102 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 3), 0x2103 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMARawPointers [ch] += 4;
#else
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 0), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 1), 0x2101 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 2), 0x2102 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 3), 0x2103 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
#endif
			HDMAMemPointers [ch] += 4;
			break;

		case 5:
#ifdef SETA010_HDMA_FROM_CART
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 0), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 1), 0x2101 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 2), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, S9xGetByte (HDMARawPointers [ch] + 3), 0x2101 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMARawPointers [ch] += 4;
#else
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 0), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 1), 0x2101 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 2), 0x2100 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
			HDMASetPPU (d, *(HDMAMemPointers [ch] + 3), 0x2101 + d->BAddress);
			CPU.Cycles += SLOW_ONE_CYCLE;
#endif
			HDMAMemPointers [ch] += 4;
			break;

		}
		if (!d->HDMAIndirectAddressing)
			d->Address += HDMA_ModeByteCounts [d->TransferMode];
		d->IndirectAddress += HDMA_ModeByteCounts [d->TransferMode];
		/* XXX: Check for d->IndirectAddress crossing a mapping boundry,
		 * XXX: and invalidate HDMAMemPointers[ch]
		 */
		d->FirstLine = FALSE;
		d->LineCount--;
	}
	CPU.InDMA = FALSE;
	return (byte);
}

void S9xResetDMA (bool8 full)
{
	int d;
	for (d = 0; d < 8; d++) {
		DMA [d].TransferDirection = FALSE;
		DMA [d].HDMAIndirectAddressing = FALSE;
		DMA [d].AAddressFixed = TRUE;
		DMA [d].AAddressDecrement = FALSE;
		DMA [d].TransferMode = 0xff;
		DMA [d].ABank = 0xff;
		DMA [d].AAddress = 0xffff;
		DMA [d].Address = 0xffff;
		DMA [d].BAddress = 0xff;
		DMA [d].TransferBytes = 0xffff;
    }
	for (int c = 0x4300; c < 0x4380; c += 0x10) {
		for (d = c; d < c + 12; d++)
			Memory.FillRAM [d] = 0xff;
		Memory.FillRAM [c + 0xf] = 0xff;
	}
}

