/*
Copyright (C) 2001 StrmnNrmn

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

*/

#ifndef __MEMORY_H__
#define __MEMORY_H__

#include "Debug/DBGConsole.h"

#include "OSHLE/ultra_rcp.h"

// I've taken out the memory region checking (for now at least)
// I was getting some very strange bugs with the Memory_AllocRegion
// function (which I think was a compiler bug, but I wasn't sure).
// In any case, reads and writes to the hardware registers is 
// relatively rare, and so the actual speedup is likely to be very
// slight
//#ifdef DAEDALUS_RELEASE_BUILD
//#define MEMORY_BOUNDS_CHECKING(x) 1
//#else
#define MEMORY_BOUNDS_CHECKING(x) x
//#endif

enum MEMBANKTYPE
{
	MEM_UNUSED = 0,			// Simplifies code so that we don't have to check for illegal memory accesses

	MEM_RD_RAM,				// 8 or 4 Mb (4/8*1024*1024)
	
	MEM_SP_MEM,			// 0x2000

	MEM_SP_PC_REG,			// 0x04

	MEM_PIF_RAM,			// 0x7C0 + 0x40

	MEM_RD_REG0,			// 0x30		// This has changed - used to be 1Mb
	MEM_RD_REG4,			// 0x30
	MEM_RD_REG8,			// 0x30
	MEM_SP_REG,				// 0x20
	MEM_DPC_REG,			// 0x20
	MEM_DPS_REG,			// 0x10
	MEM_MI_REG,				// 0x10
	MEM_VI_REG,				// 0x38
	MEM_AI_REG,				// 0x18
	MEM_PI_REG,				// 0x34
	MEM_RI_REG,				// 0x20
	MEM_SI_REG,				// 0x1C

	MEM_SRAM,				// 0x40000

	MEM_CARTROM,			// Variable

	NUM_MEM_BUFFERS
};

#define MEMORY_4_MEG (4*1024*1024)
#define MEMORY_8_MEG (8*1024*1024)


extern DWORD g_dwRamSize;
extern void *g_pMemoryBuffers[NUM_MEM_BUFFERS];


HRESULT Memory_Init();
void Memory_Fini(void);
void Memory_Reset(DWORD dwMainMem);
void Memory_Cleanup();

// I had to put the __cdecl in for compatibility with unziprom
void* __cdecl Memory_AllocCart( u32 cart_size );
void __cdecl Memory_FreeCart();

void Memory_SiDisable();
void Memory_SiEnable();

typedef void * (*MemFastFunction )(DWORD dwAddress);
typedef void (*MemWriteValueFunction )(DWORD dwAddress, DWORD dwValue);
typedef DWORD (*InternalMemFastFunction)(DWORD dwAddress, void ** pTranslated);

/* Modified by Lkb (24/8/2001)
   These tables were declared as pointers and dynamically allocated.
   However, by declaring them as pointers to access the tables the interpreter must use code like this:

   MOV EAX, DWORD PTR [address_of_the_variable_g_ReadAddressLookupTable]
   MOV EAX, DWORD PTR [EAX + desired_offset]

   Instead, by declaring them as integers the address of table is "known" at compile time
   (at load-time it may be relocated but the code referencing it will be patched)
   and the interpreter can use code like this:

   MOV EAX, DWORD PTR [address_of_the_array_g_ReadAddressLookupTable + desired_offset]

   Note that dynarec-generated code is not affected by this

   The exotic construction is required to ensure page-alignment

   Memory.cpp also changed appropriately
*/

// For debugging, it's more important to be able to use the debugger
#ifndef DAEDALUS_RELEASE_BUILD
extern MemFastFunction g_ReadAddressLookupTable[0x4000];
extern MemFastFunction g_WriteAddressLookupTable[0x4000];
extern MemWriteValueFunction g_WriteAddressValueLookupTable[0x4000];
extern InternalMemFastFunction InternalReadFastTable[0x4000];
extern InternalMemFastFunction InternalWriteFastTable[0x4000];
extern void* g_ReadAddressPointerLookupTable[0x4000];
extern void* g_WriteAddressPointerLookupTable[0x4000];

#else /* DAEDALUS_RELEASE_BUILD */

#pragma pack(push, 1)

ALIGNED_TYPE(struct memory_tables_struct_t, PAGE_ALIGN)
{
	MemFastFunction _g_ReadAddressLookupTable[0x4000];
	MemFastFunction _g_WriteAddressLookupTable[0x4000];
	MemWriteValueFunction _g_WriteAddressValueLookupTable[0x4000];

	InternalMemFastFunction _InternalReadFastTable[0x4000];
	InternalMemFastFunction _InternalWriteFastTable[0x4000];

	void* _g_ReadAddressPointerLookupTable[0x4000];
	void* _g_WriteAddressPointerLookupTable[0x4000];
};

ALIGNED_EXTERN(memory_tables_struct_t, memory_tables_struct, PAGE_ALIGN);
#pragma pack(pop)

#define g_ReadAddressLookupTable (memory_tables_struct._g_ReadAddressLookupTable)
#define g_WriteAddressLookupTable (memory_tables_struct._g_WriteAddressLookupTable)
#define g_WriteAddressValueLookupTable (memory_tables_struct._g_WriteAddressValueLookupTable)
#define InternalReadFastTable (memory_tables_struct._InternalReadFastTable)
#define InternalWriteFastTable (memory_tables_struct._InternalWriteFastTable)
#define g_ReadAddressPointerLookupTable (memory_tables_struct._g_ReadAddressPointerLookupTable)
#define g_WriteAddressPointerLookupTable (memory_tables_struct._g_WriteAddressPointerLookupTable)

#endif /* _DEBUG */

/* Added by Lkb (24/8/2001)
   These tables are used to implement a faster memory system similar to the one used in gnuboy (http://gnuboy.unix-fu.org/ - read docs/HACKING).
   However instead of testing for zero the entry like gnuboy, Daedalus checks the sign of the addition results.
   When the pointer table entry is valid, this should be faster since instead of MOV/TEST/ADD (1+1+1 uops) it uses just ADD mem (2 uops)
   But when the pointer table entry is invalid, it may be slower because it computes the address twice 

   # Old system:
   .intel_syntax
   MOV EAX, address
   MOV ECX, EAX
   SHR EAX, 18
   CALL DWORD PTR [g_ReadAddressLookupTable + EAX*4]
   # --> (for RAM)
   ADD ECX, [rambase_variable]
   MOV EAX, ECX
   RET

   # gnuboy system:
   .intel_syntax   
   MOV EAX, address
   MOV EDX, EAX
   SHR EDX, 18
   MOV ECX, [g_ReadAddressPointerLookupTable + EDX*4]
   TEST ECX, ECX
   JS pointer_null # usually not taken - thus forward branch
   ADD EAX, ECX
pointer_null_return_x:
#   [...] <rest of function code>
#   RET

pointer_null_x:
   MOV ECX, EAX
   CALL DWORD PTR [g_ReadAddressLookupTable + EDX*4]
#  --> (for RAM)
#  ADD ECX, [rambase_variable]
#  MOV EAX, ECX
#  RET
#  <--
   JMP pointer_null_return

   # New system:
   .intel_syntax   
   MOV EAX, address
   MOV EDX, EAX
   SHR EDX, 18
   ADD EAX, [g_ReadAddressPointerLookupTable + EDX*4]
   JS pointer_null # usually not taken - thus forward branch
pointer_null_return_x:
#   [...] <rest of function code>
#   RET

pointer_null_x:
   MOV ECX, address
   CALL DWORD PTR [g_ReadAddressLookupTable + EDX*4]
#  --> (for RAM)
#  ADD ECX, [rambase_variable]
#  MOV EAX, ECX
#  RET
#  <--
   JMP pointer_null_return
   
   Note however that the compiler may generate worse code.

   The old system is still usable (and it is required even if the new one is used since it will fallback to the old for access to memory-mapped hw registers and similar areas)

   TODO: instead of looking up TLB entries each time TLB-mapped memory is used, it is probably much faster to change the pointer table every time the TLB is modified
*/

inline void* Memory_GetInvalidPointerTableEntry(int entry)
{
	return (void*)(0xf0000000 - (entry << 18));
}

#define FuncTableReadAddress(dwAddr)  (void *)(g_ReadAddressLookupTable)[(dwAddr)>>18](dwAddr)
#define FuncTableWriteAddress(dwAddr)  (void *)(g_WriteAddressLookupTable)[(dwAddr)>>18](dwAddr)
#define FuncTableWriteValueAddress(dwAddr, dwValue)  (g_WriteAddressValueLookupTable)[(dwAddr)>>18](dwAddr, dwValue)

#if 0
#define ReadAddress FuncTableReadAddress
#define WriteAddress FuncTableWriteAddress
#define WriteValueAddress FuncTableWriteValueAddress
#else

inline void* ReadAddress (u32 addr)
{
	UINT_PTR tableEntry = (UINT_PTR)g_ReadAddressPointerLookupTable[addr >> 18] + addr;
#ifdef __GNUC__
	if(__builtin_expect ((INT_PTR)tableEntry >= 0, 1))
#else
	if((INT_PTR)tableEntry >= 0) // any way to tell MSC that tableEntry will typically be >= 0 ?
#endif
	{
		return (void*)(tableEntry);
	}
	else
	{
		return FuncTableReadAddress (addr);
	}
}

inline void* WriteAddress (u32 addr)
{
	UINT_PTR tableEntry = (UINT_PTR)g_WriteAddressPointerLookupTable[addr >> 18] + addr;
#ifdef __GNUC__
	if(__builtin_expect ((INT_PTR)tableEntry >= 0, 1))
#else
	if((INT_PTR)tableEntry >= 0) // any way to tell MSC that tableEntry will typically be >= 0 ?
#endif
	{
		return (void*)(tableEntry);
	}
	else
	{
		return FuncTableWriteAddress (addr);
	}
}

inline void WriteValueAddress (u32 addr, u32 value)
{
	UINT_PTR tableEntry = (UINT_PTR)g_WriteAddressPointerLookupTable[addr >> 18] + addr;

#ifdef __GNUC__
	if(__builtin_expect ((INT_PTR)tableEntry >= 0, 1))
#else
	if((INT_PTR)tableEntry >= 0) // any way to tell MSC that tableEntry will typically be >= 0 ?
#endif
	{
		*(u32*)(tableEntry) = value;
	}
	else
	{
		FuncTableWriteValueAddress (addr, value);
	}
}
#endif /* 0 */

#define InternalReadAddress(dwAddr, pTrans)  (InternalReadFastTable)[(dwAddr)>>18](dwAddr, pTrans)
#define InternalWriteAddress(dwAddr, pTrans)  (InternalWriteFastTable)[(dwAddr)>>18](dwAddr, pTrans)

inline u64 Read64Bits(DWORD dwAddress)
{
	u64 qwData = *(u64 *)ReadAddress(dwAddress);
	qwData = (qwData>>32) + (qwData<<32);
	return qwData;
}

inline u32 Read32Bits(const DWORD dwAddress)
{
	return *(u32 *)ReadAddress(dwAddress);
}

inline u16 Read16Bits(DWORD dwAddress)
{
	return *(u16 *)ReadAddress(dwAddress ^ 0x02);
}

inline BYTE Read8Bits(DWORD dwAddress)
{
	return *(u8 *)ReadAddress(dwAddress ^ 0x03);
}

// Useful defines for making code look nicer:
#define g_pu8RamBase ((u8*)g_pMemoryBuffers[MEM_RD_RAM])
#define g_ps8RamBase ((s8*)g_pMemoryBuffers[MEM_RD_RAM])
#define g_pu16RamBase ((u16*)g_pMemoryBuffers[MEM_RD_RAM])
#define g_pu32RamBase ((u32*)g_pMemoryBuffers[MEM_RD_RAM])

#define g_pu8SpMemBase ((u8*)g_pMemoryBuffers[MEM_SP_MEM])
#define g_ps8SpMemBase ((s8*)g_pMemoryBuffers[MEM_SP_MEM])
#define g_pu16SpMemBase ((u16*)g_pMemoryBuffers[MEM_SP_MEM])
#define g_pu32SpMemBase ((u32*)g_pMemoryBuffers[MEM_SP_MEM])


inline void Write64Bits(DWORD dwAddress, u64 qwData)
{
	*(u64 *)WriteAddress(dwAddress) = (qwData>>32) + (qwData<<32);
	//MemoryUpdate();
}

inline void Write32Bits(const DWORD dwAddress, u32 dwData)
{
	//*(u32 *)WriteAddress(dwAddress) = dwData;
	WriteValueAddress(dwAddress, dwData);
//	MemoryUpdate();
}

inline void Write16Bits(DWORD dwAddress, u16 wData)
{
	*(u16 *)WriteAddress(dwAddress ^ 0x2) = wData;	
	//MemoryUpdate();

}
inline void Write8Bits(DWORD dwAddress, u8 nData)
{
	*(u8 *)WriteAddress(dwAddress ^ 0x3) = nData;	
	//MemoryUpdate();
}


//////////////////////////////////////////////////////////////
// Quick Read/Write methods that require a base returned by
// ReadAddress or InternalReadAddress etc

inline u64 QuickRead64Bits(BYTE *pBase, DWORD dwOffset)
{
	u64 qwData = *(u64 *)(pBase + dwOffset);
	return (qwData>>32) + (qwData<<32);
}

inline u32 QuickRead32Bits(BYTE *pBase, DWORD dwOffset)
{
	return *(u32 *)(pBase + dwOffset);
}

inline void QuickWrite64Bits(BYTE *pBase, DWORD dwOffset, u64 qwValue)
{
	u64 qwData = (qwValue>>32) + (qwValue<<32);
	*(u64 *)(pBase + dwOffset) = qwData;
}

inline void QuickWrite32Bits(BYTE *pBase, DWORD dwOffset, u32 dwValue)
{
	*(u32 *)(pBase + dwOffset) = dwValue;
}


/////////////////////////////////////////////////////
/////////////////////////////////////////////////////
//               MI Register Macros                //
/////////////////////////////////////////////////////
/////////////////////////////////////////////////////


inline void Memory_MI_SetRegister(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - MI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_MI_REG])[dwOffset] = dwValue;
}

inline void Memory_MI_SetRegisterBits(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - MI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_MI_REG])[dwOffset] |= dwValue;
}

inline void Memory_MI_ClrRegisterBits(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - MI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_MI_REG])[dwOffset] &= ~dwValue;
}

inline DWORD Memory_MI_GetRegister(DWORD dwReg)
{
	DWORD dwOffset = (dwReg - MI_BASE_REG) / 4;
	return ((DWORD *)g_pMemoryBuffers[MEM_MI_REG])[dwOffset];
}

/////////////////////////////////////////////////////
/////////////////////////////////////////////////////
//               SP Register Macros                //
/////////////////////////////////////////////////////
/////////////////////////////////////////////////////


inline void Memory_SP_SetRegister(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - SP_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_SP_REG])[dwOffset] = dwValue;
}

inline void Memory_SP_SetRegisterBits(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - SP_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_SP_REG])[dwOffset] |= dwValue;
}

inline void Memory_SP_ClrRegisterBits(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - SP_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_SP_REG])[dwOffset] &= ~dwValue;
}

inline DWORD Memory_SP_GetRegister(DWORD dwReg)
{
	DWORD dwOffset = (dwReg - SP_BASE_REG) / 4;
	return ((DWORD *)g_pMemoryBuffers[MEM_SP_REG])[dwOffset];
}


/////////////////////////////////////////////////////
/////////////////////////////////////////////////////
//               AI Register Macros                //
/////////////////////////////////////////////////////
/////////////////////////////////////////////////////


inline void Memory_AI_SetRegister(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - AI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_AI_REG])[dwOffset] = dwValue;
}

inline void Memory_AI_SetRegisterBits(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - AI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_AI_REG])[dwOffset] |= dwValue;
}

inline void Memory_AI_ClrRegisterBits(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - AI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_AI_REG])[dwOffset] &= ~dwValue;
}

inline DWORD Memory_AI_GetRegister(DWORD dwReg)
{
	DWORD dwOffset = (dwReg - AI_BASE_REG) / 4;
	return ((DWORD *)g_pMemoryBuffers[MEM_AI_REG])[dwOffset];
}

/////////////////////////////////////////////////////
/////////////////////////////////////////////////////
//               VI Register Macros                //
/////////////////////////////////////////////////////
/////////////////////////////////////////////////////


inline void Memory_VI_SetRegister(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - VI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_VI_REG])[dwOffset] = dwValue;
}

inline void Memory_VI_SetRegisterBits(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - VI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_VI_REG])[dwOffset] |= dwValue;
}

inline void Memory_VI_ClrRegisterBits(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - VI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_VI_REG])[dwOffset] &= ~dwValue;
}

inline DWORD Memory_VI_GetRegister(DWORD dwReg)
{
	DWORD dwOffset = (dwReg - VI_BASE_REG) / 4;
	return ((DWORD *)g_pMemoryBuffers[MEM_VI_REG])[dwOffset];
}
/////////////////////////////////////////////////////
/////////////////////////////////////////////////////
//               SI Register Macros                //
/////////////////////////////////////////////////////
/////////////////////////////////////////////////////


inline void Memory_SI_SetRegister(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - SI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_SI_REG])[dwOffset] = dwValue;
}

inline void Memory_SI_SetRegisterBits(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - SI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_SI_REG])[dwOffset] |= dwValue;
}

inline void Memory_SI_ClrRegisterBits(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - SI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_SI_REG])[dwOffset] &= ~dwValue;
}

inline DWORD Memory_SI_GetRegister(DWORD dwReg)
{
	DWORD dwOffset = (dwReg - SI_BASE_REG) / 4;
	return ((DWORD *)g_pMemoryBuffers[MEM_SI_REG])[dwOffset];
}

/////////////////////////////////////////////////////
/////////////////////////////////////////////////////
//               PI Register Macros                //
/////////////////////////////////////////////////////
/////////////////////////////////////////////////////


inline void Memory_PI_SetRegister(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - PI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_PI_REG])[dwOffset] = dwValue;
}

inline void Memory_PI_SetRegisterBits(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - PI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_PI_REG])[dwOffset] |= dwValue;
}

inline void Memory_PI_ClrRegisterBits(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - PI_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_PI_REG])[dwOffset] &= ~dwValue;
}

inline DWORD Memory_PI_GetRegister(DWORD dwReg)
{
	DWORD dwOffset = (dwReg - PI_BASE_REG) / 4;
	return ((DWORD *)g_pMemoryBuffers[MEM_PI_REG])[dwOffset];
}

/////////////////////////////////////////////////////
/////////////////////////////////////////////////////
//               DPC Register Macros               //
/////////////////////////////////////////////////////
/////////////////////////////////////////////////////


inline void Memory_DPC_SetRegister(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - DPC_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_DPC_REG])[dwOffset] = dwValue;
}

inline void Memory_DPC_SetRegisterBits(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - DPC_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_DPC_REG])[dwOffset] |= dwValue;
}

inline void Memory_DPC_ClrRegisterBits(DWORD dwReg, DWORD dwValue)
{
	DWORD dwOffset = (dwReg - DPC_BASE_REG) / 4;
	((DWORD *)g_pMemoryBuffers[MEM_DPC_REG])[dwOffset] &= ~dwValue;
}

inline DWORD Memory_DPC_GetRegister(DWORD dwReg)
{
	DWORD dwOffset = (dwReg - DPC_BASE_REG) / 4;
	return ((DWORD *)g_pMemoryBuffers[MEM_DPC_REG])[dwOffset];
}

#endif
