/*
Copyright (C) 2001 StrmnNrmn

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

*/

// Static Recompilation
#include "stdafx.h"

#include "DynaRec.h"
#include "DynaRecTables.h"
#include "DynaRecInternal.h"
#include "CodeBufferManager.h"
#include "CodeGenerator.h"

#include "OSHLE/ultra_r4300.h"		// C0_COUNT

#include "Core/CPU.h"
#include "Core/Interrupt.h"
#include "Core/R4300.h"

#include "Core/Registers.h"			// For REG_?? / RegNames

#include "Debug/Debug.h"
#include "Debug/DBGConsole.h"
#include "Utility/Profiler.h"
#include "Utility/PrintOpCode.h"


//*****************************************************************************
//
//*****************************************************************************
static const u32 DAED_INVALID_BRANCH_TARGET = ~0;
static const u32		INTEL_REG_UNUSED( ~0 );

extern u32				gFPTimeNow;

static const u32		gInitialCodeTableSize = 30000;	// Size of first buffer
static std::vector<CDynarecCode *>	gpDynarecCodeTable;

static	CCodeBufferManager *		gpCodeBufferManager( NULL );


u32 g_dwNumSRCompiled = 0;
u32 g_dwNumSROptimised = 0;
u32 g_dwNumSRFailed = 0;


static const bool gOptimiseLink = true;		// Needs to be FALSE for 007

// Defined in SRInliner.cpp
BOOL DynaRec_Emit_Inline_Function(CCodeGeneratorX86 * p_code_gen, void* pFunction);


//*****************************************************************************
//
//*****************************************************************************
//#define DR_COLLECT_UNOPT_STATS

#ifdef DR_COLLECT_UNOPT_STATS 
#define UNOPTCOUNT_STRING_SIZE	8
struct UnoptCount
{
	char							szString[UNOPTCOUNT_STRING_SIZE+1];
	u32								Count;
	std::vector<CDynarecCode*>		Codes;

	static bool compare(const UnoptCount & a, const UnoptCount & b)
	{
		return a.Count < b.Count;
	}

};

static std::vector<UnoptCount> g_UnoptCount;
static void		DynaRec_CollectUnoptimisedStats( CDynarecCode * pCode, OpCode op_code );
#endif


#include "DR_Tables.inl"

//*****************************************************************************
// Forward declarations
//*****************************************************************************
static void SR_AddCompiledCode(CDynarecCode *pCode, u32 * pdwBase);

//*****************************************************************************
//
//*****************************************************************************
bool DynaRec_Init(u32 initial_table_size)
{
	gpDynarecCodeTable.clear();
	gpDynarecCodeTable.reserve( initial_table_size );

	gpCodeBufferManager = CCodeBufferManager::Create();
	if(gpCodeBufferManager == NULL)
		return false;

	if(!gpCodeBufferManager->Initialise())
		return false;

	return true;
}

//*****************************************************************************
//
//*****************************************************************************
void DynaRec_Reset()
{
	DynaRec_Fini();
	DynaRec_Init(gInitialCodeTableSize);
}

//*****************************************************************************
//
//*****************************************************************************
void DynaRec_Fini()
{
	for (u32 i = 0; i < gpDynarecCodeTable.size(); i++)
	{
		delete gpDynarecCodeTable[i];
		gpDynarecCodeTable[i] = NULL;
	}

	gpDynarecCodeTable.clear();

	if( gpCodeBufferManager != NULL )
	{
		gpCodeBufferManager->Finalise();
		delete gpCodeBufferManager;
	}
	gpCodeBufferManager = NULL;
}

//*****************************************************************************
//
//*****************************************************************************
CCodeBufferManager * DynaRec_GetCodeBufferManager()
{
	return gpCodeBufferManager;
}

//*****************************************************************************
//
//*****************************************************************************
void DynaRec_Stats()
{
	std::map< u32, u32 >	hitcount_freqs;		// Frequency of each hitcount
	static const u32 HITCOUNT_BAND_SIZE( 100 );

	u32 dwCount = 0;
	u32 dwTotalInstrs = 0;
	u32 dwTotalOptimised = 0;
	u32 dwTotalInstrsExe = 0;
	u32 dwMaxLen = 0;
	u32 dwMaxCount = 0;

	u32 dwTotalBlocksOptimised = 0;
	u32 dwTotalOptInstrs = 0;
	u32 dwTotalOptInstrsOptimised = 0;
	u32 dwTotalOptInputBytes = 0;
	u32 dwTotalOptOutputBytes = 0;
		
	u32 dwTotalBlocksUnoptimised = 0;
	u32 dwTotalUnoptInstrs = 0;
	u32 dwTotalUnoptInstrsOptimised = 0;
	u32 dwTotalUnoptInputBytes = 0;
	u32 dwTotalUnoptOutputBytes = 0;

	for (u32 i = 0; i < gpDynarecCodeTable.size(); i++)
	{
		CDynarecCode * pCode = gpDynarecCodeTable[i];
		if (pCode != NULL)
		{
			DPF( DEBUG_DYNREC, DSPrintf( "0x%08x, %s, %d ops, %d optimised",
				pCode->dwStartPC, pCode->dwOptimiseLevel ? "optimised": "unoptimised", 
				pCode->dwNumOps, pCode->dwNumOptimised ) );
			
			dwTotalInstrs += pCode->dwNumOps;
			dwTotalOptimised += pCode->dwNumOptimised;
			dwTotalInstrsExe += (pCode->dwNumOps * pCode->HitCount);
			dwCount += pCode->HitCount;

			u32		frequency_band;
			
			if(pCode->HitCount < 100)
			{
				frequency_band = pCode->HitCount;
			}
			else
			{
				frequency_band = (pCode->HitCount / HITCOUNT_BAND_SIZE ) * HITCOUNT_BAND_SIZE;
			}

			hitcount_freqs[ frequency_band ]++;

			if (pCode->dwOptimiseLevel == 0)
			{
				dwTotalUnoptInputBytes += pCode->dwNumOps * 4;
				dwTotalUnoptOutputBytes += pCode->GetCompiledCodeSize();
				dwTotalUnoptInstrs += pCode->dwNumOps;
				dwTotalUnoptInstrsOptimised += pCode->dwNumOptimised;
				dwTotalBlocksUnoptimised++;
			}
			else
			{
				dwTotalOptInputBytes += pCode->dwNumOps * 4;
				dwTotalOptOutputBytes += pCode->GetCompiledCodeSize();
				dwTotalOptInstrs += pCode->dwNumOps;
				dwTotalOptInstrsOptimised += pCode->dwNumOptimised;
				dwTotalBlocksOptimised++;
			}

			if (pCode->dwNumOps > dwMaxLen)
				dwMaxLen = pCode->dwNumOps;

			if (pCode->HitCount > dwMaxCount)
				dwMaxCount = pCode->HitCount;
		}
	}

	DBGConsole_Msg(0, "Dynarec Stats");		
	DBGConsole_Msg(0, "-------------");		
	DBGConsole_Msg(0, "%d Entries (%#.3f %% optimised)", gpDynarecCodeTable.size(), (float)dwTotalBlocksOptimised * 100.0f / (float)gpDynarecCodeTable.size());
	DBGConsole_Msg(0, "%d Ops compiled in total (%#.3f %% optimised)", dwTotalInstrs, (float)dwTotalOptimised * 100.0f / (float)dwTotalInstrs);
	DBGConsole_Msg(0, "%d Ops executed in total", dwTotalInstrsExe);
	DBGConsole_Msg(0, "%d Calls", dwCount);
	DBGConsole_Msg(0, "%#.3f Average ops/call", (float)dwTotalInstrsExe / (float)dwCount);
	DBGConsole_Msg(0, "%d Longest run", dwMaxLen);
	DBGConsole_Msg(0, "%d Largest HitCount", dwMaxCount);
	DBGConsole_Msg(0, "");
	DBGConsole_Msg(0, "Unoptimised");
	DBGConsole_Msg(0, "---------");
	DBGConsole_Msg(0, "%d Input Bytes", dwTotalUnoptInputBytes);
	DBGConsole_Msg(0, "%d Output Bytes", dwTotalUnoptOutputBytes);
	DBGConsole_Msg(0, "%#.3f Average Expansion Ratio", (float)dwTotalUnoptOutputBytes/(float)dwTotalUnoptInputBytes);
	DBGConsole_Msg(0, "%#.3f%% optimised inplace", (float)dwTotalUnoptInstrsOptimised * 100.0f / (float)dwTotalUnoptInstrs);
	DBGConsole_Msg(0, "");
	DBGConsole_Msg(0, "Optimised");
	DBGConsole_Msg(0, "---------");
	DBGConsole_Msg(0, "%d Input Bytes", dwTotalOptInputBytes);
	DBGConsole_Msg(0, "%d Output Bytes", dwTotalOptOutputBytes);
	DBGConsole_Msg(0, "%#.3f Average Expansion Ratio", (float)dwTotalOptOutputBytes/(float)dwTotalOptInputBytes);
	DBGConsole_Msg(0, "%#.3f%% optimised inplace", (float)dwTotalOptInstrsOptimised * 100.0f / (float)dwTotalOptInstrs);


#ifdef DR_COLLECT_UNOPT_STATS
	DBGConsole_Msg(0, "");

	DBGConsole_Msg(0, "Unoptimised Instrs:");
	DBGConsole_Msg(0, "-------------------:");
	DBGConsole_Msg(0, "Name  Count  CombinedCount");

	sort( g_UnoptCount.begin(), g_UnoptCount.end(), UnoptCount::compare );

	for (u32 i = 0; i < g_UnoptCount.size(); i++)
	{
		u32		combined_count( 0 );
		for(u32 c = 0; c < g_UnoptCount[i].Codes.size(); ++c)
		{
			CDynarecCode *	pcode( g_UnoptCount[i].Codes[c] );
			if(pcode != NULL)
			{
				combined_count += pcode->HitCount;
			}
		}

		DBGConsole_Msg(0, "%s %5d %d", g_UnoptCount[i].szString, g_UnoptCount[i].Count, combined_count);
	}
#endif

	FILE * fh( fopen( "c:\\hitcounts.csv", "w" ) );
	if(fh)
	{
		fprintf(fh, "HitCount, Count\n" );
		for(std::map<u32,u32>::iterator it = hitcount_freqs.begin(); it != hitcount_freqs.end(); ++it)
		{
			u32		val( it->second );
			if(it->first >= 100)
			{
				val /= HITCOUNT_BAND_SIZE;
			}
			fprintf( fh, "%d,%d\n", it->first, val );
		}
		fclose(fh);
	}
}

//*****************************************************************************
// Add the compiled code to gpDynarecCodeTable. Insert hacked instruction
// into RDRAM to allow us to quickly branch to our compiled code
//*****************************************************************************
void SR_AddCompiledCode(CDynarecCode *pCode, u32 * pdwBase)
{
	OpCode new_op_code;
	
	pCode->dwEntry = gpDynarecCodeTable.size();

	pCode->mOriginalOp._u32 = *pdwBase;
	if (pCode->dwNumOps == 0)  // Don't bother using compiled code - this entry makes us just execute the original mips
	{
		g_dwNumSRFailed++;
		new_op_code.op = OP_SRHACK_NOOPT;
		new_op_code.dynarec_index = pCode->dwEntry;
	}
	else                      // Use the compiled code
	{
		new_op_code.op = OP_SRHACK_UNOPT;
		new_op_code.dynarec_index = pCode->dwEntry;
	}

	*pdwBase = new_op_code._u32;

	gpDynarecCodeTable.push_back( pCode );
}

//*****************************************************************************
//
//*****************************************************************************
CDynarecCode *		DynaRec_GetEntry( u32 entry )
{
	if(entry < gpDynarecCodeTable.size())
	{
		return gpDynarecCodeTable[ entry ];
	}

	return NULL;
}


//*****************************************************************************
//
//*****************************************************************************
CDynarecCode * DynaRec_CompileCode(u32 dwPC)
{
	DAEDALUS_PROFILE( "DynaRec_CompileCode" );
	
	u32 *pdwPCMemBase;

	g_dwNumSRCompiled++;
	
	if (!Memory_GetInternalReadAddress(dwPC, (void**)&pdwPCMemBase))
		return NULL;

	CDynarecCode *pCode = new CDynarecCode(dwPC, pdwPCMemBase[0]);
	if(pCode == NULL)
		return NULL;
	
	// Initialise various fields
	pCode->AllocateCodeBuffer( gpCodeBufferManager );

	//#ifdef DAEDALUS_LOG
	//DPF(DEBUG_DYNREC, "Compiling block starting at 0x%08x:", pCode->dwStartPC);
	//#endif

	OpCode op_code;
	u32 dwFlags;
	BOOL bEmitOk;

	u32 *pdwPC = pdwPCMemBase;
	// Loop forever, incrementing the PC each loop. We break out of the loop when
	// we reach an operation that we can't compile
	for ( ;; pdwPC++)
	{
		op_code._u32 = *pdwPC;

		dwFlags = 0;			// Reset flags to 0 - Emit instructions don't do this

		// Emit the actual instruction
		bEmitOk = SR_EmitInstruction[op_code.op](pCode, op_code, &dwFlags);

		// Exit if instruction wasn't emitted ok
		if (!bEmitOk)
			break;

		pCode->dwNumOps++;

		// Exit if one or more of the flags were set (later we can selectively add
		// code to handle counter adjustments here)
		if (dwFlags != 0)
			break;
	}

	#ifdef DAEDALUS_LOG
	char opstr[300];
	u32 dwCurrPC = pCode->dwStartPC+pCode->dwNumOps*4;
	SprintOpCodeInfo( opstr, dwCurrPC, op_code );
	DPF( DEBUG_DYNREC, DSPrintf( "Stopped On: 0x%08x: %s", dwCurrPC, opstr ) );
	#endif


	// Ensure all registers are stored
	if (pCode->dwNumOps > 0)
	{
		pCode->dwEndPC = pCode->dwStartPC + ((pCode->dwNumOps-1)*4);

		pCode->mpCodeGenerator->GenerateEpilogue( pCode->dwEndPC );
	}

	SR_AddCompiledCode(pCode, pdwPCMemBase);

	pCode->FinaliseCodeBuffer( gpCodeBufferManager );

	return pCode;
}

//*****************************************************************************
//
//*****************************************************************************
void DynaRec_OptimiseCode(CDynarecCode *pCode, u32 dwLevel)
{
	DAEDALUS_PROFILE( "Dynarec_OptimiseCode" );

	// Optimise the code more aggressively
	OpCode op_code;
	u32 dwFlags;
	BOOL bEmitOk;
	u32 *pdwPCMemBase = (u32 *)ReadAddress(pCode->dwStartPC);
	CDynarecCode * pCodeTarget = NULL;

	g_dwNumSROptimised++;

	// Avoid re-optimizing
	if (pCode->dwOptimiseLevel >= 1)
		return;

	pCode->dwOptimiseLevel = 1;

	// Branch target set from previous unoptimized compilation!
	if (gOptimiseLink)
	{
		if (pCode->dwBranchTarget != DAED_INVALID_BRANCH_TARGET)
		{
			// Get the opcode of the target pc..
			u32 * pdwTarget;

			if (Memory_GetInternalReadAddress(pCode->dwBranchTarget, (void **)&pdwTarget))
			{
				OpCode op_code;
				op_code._u32 = pdwTarget[0];

				if (op_code.op == OP_SRHACK_UNOPT ||
					op_code.op == OP_SRHACK_OPT)
				{
					u32 dwEntry = op_code.dynarec_index;
					if (dwEntry < gpDynarecCodeTable.size())
					{
						pCodeTarget = gpDynarecCodeTable[dwEntry];

						DPF( DEBUG_DYNREC, DSPrintf( " Target entry is %d", dwEntry ) );
						DPF( DEBUG_DYNREC, DSPrintf( "   StartAddress: 0x%08x, OptimizeLevel: %d", pCodeTarget->dwStartPC, pCodeTarget->dwOptimiseLevel ) );

						if (pCodeTarget->dwOptimiseLevel == 0)
						{
							DynaRec_OptimiseCode(pCodeTarget, dwLevel+1);
						}
					}
				}
				else if (op_code.op != OP_SRHACK_NOOPT &&
						 op_code.op != OP_PATCH)
				{
					// Uncompiled code - compile now?
					pCodeTarget = DynaRec_CompileCode(pCode->dwBranchTarget);
					if (pCodeTarget != NULL && pCodeTarget->dwNumOps > 0)
						DynaRec_OptimiseCode(pCodeTarget, dwLevel+1);

					if (pCodeTarget->dwOptimiseLevel == 0)
						pCodeTarget = NULL;
				}
				else
				{
				//	DBGConsole_Msg(0, "0x%08x Huh??: 0x%08x (%d)", pCode->dwBranchTarget, op_code, op_code.op);
		
				}
			}
		}
	}
	// Initialise various fields
	pCode->AllocateCodeBuffer( gpCodeBufferManager );		// Restart using new buffer

	pCode->dwNumOps = 0;			// The emit instructions expect this to be the number of ops since dwStartPC
	pCode->dwNumOptimised = 0;
	//pCode->dwCRC = 0;
	pCode->dwBranchTarget = DAED_INVALID_BRANCH_TARGET;
	
	#ifdef DAEDALUS_LOG
	DPF( DEBUG_DYNREC, "" );
	DPF( DEBUG_DYNREC, "" );
	DPF( DEBUG_DYNREC, "************************************" );
	DPF( DEBUG_DYNREC, DSPrintf( "Optimising block starting at 0x%08x (%d hits, %d deep):", pCode->dwStartPC, pCode->HitCount, dwLevel ) );
	#endif

	pCode->mpCodeGenerator->GeneratePrologue();
	pCode->mpCodeGenerator->AnalyseStats( pCode->Stats );
	pCode->Stats.Reset();

	// Loop forever, incrementing the PC each loop. We break out of the loop when
	// we reach an operation that we can't compile
	dwFlags = 0;

	u32 dwNumOptimisedCheck;
	bool has_been_optimised;
	BOOL bKeepCompiling = TRUE;
	BOOL bEmitDelayedOp = FALSE;
	BOOL bHandleBranch = FALSE;
	
	while ( bKeepCompiling || bEmitDelayedOp )
	{
#if defined( DAEDALUS_W32 ) | defined( DAEDALUS_XBOX )
		gFPTimeNow++;
#endif

		op_code._u32 = pdwPCMemBase[pCode->dwNumOps];

		dwFlags = 0;			// Reset flags to 0 - Emit instructions don't do this
		dwNumOptimisedCheck = pCode->dwNumOptimised;

		pCode->mpCodeGenerator->PreEmitCachedRegCheck( pCode->Stats );

			//pCode->mpCodeGenerator->SetVar( &gCPUState.CurrentPC, pCode->dwStartPC + (pCode->dwNumOps*4) );

			// Emit the actual instruction
			bEmitOk = SR_EmitInstruction[op_code.op](pCode, op_code, &dwFlags);

			// Exit if instruction wasn't emitted ok
			if (!bEmitOk)
			{
				bKeepCompiling = FALSE;
				break;
			}
		
			has_been_optimised = pCode->dwNumOptimised != dwNumOptimisedCheck;
		
		#ifdef DAEDALUS_LOG
			CHAR opstr[300];
			u32 dwCurrPC = pCode->dwStartPC+pCode->dwNumOps*4;
			SprintOpCodeInfo( opstr, dwCurrPC, op_code );
			DPF( DEBUG_DYNREC, DSPrintf( "0x%08x: %c %s", dwCurrPC, has_been_optimised ? '*' : ' ', opstr ) );
		#endif

		#ifdef DR_COLLECT_UNOPT_STATS
			if (!has_been_optimised && dwFlags == 0)
			{
				DynaRec_CollectUnoptimisedStats( pCode, op_code );
			}
		#endif

		pCode->mpCodeGenerator->PostEmitCachedRegCheck( pCode->Stats, has_been_optimised );
		
		pCode->dwNumOps++;

		// Exit if we've emitted a delayed op
		if (bEmitDelayedOp)
			break;

		// Exit if one or more of the flags were set 
		if (gOptimiseLink && (dwFlags & SR_FLAGS_BRANCHES))
		{
			bEmitDelayedOp = TRUE;
			if (dwFlags & SR_FLAGS_HANDLE_BRANCH)
				bHandleBranch = TRUE;
		}
		else if (dwFlags != 0)
		{
			// Stop compiling!
			//DBGConsole_Msg(0, "Stopping because %08x", dwFlags);
			break;
		}
	}

	pCode->mpCodeGenerator->CleanupForExit();
	
	// Ok, so we've now flushed all the registers
	// If we're executing an internal branch, execute it now
	if (bEmitDelayedOp)
	{
		DPF(DEBUG_DYNREC, "Emitting code to update delay info...");
		// Need to clear gCPUState.Delay here, or set to EXEC_DELAY if it is currently DO_DELAY

		//
		if (bHandleBranch && pCodeTarget != NULL)
		{
			pCode->mpCodeGenerator->GenerateBlockEndBranch( pCodeTarget, pCode->dwNumOps );
		}
		else
		{
			DPF(DEBUG_DYNREC, "Unhandled branch");
			pCode->mpCodeGenerator->GenerateBlockEnd();
		}
	}


	pCode->dwEndPC = pCode->dwStartPC + ((pCode->dwNumOps-1)*4);

	pCode->mpCodeGenerator->GenerateEpilogue(pCode->dwEndPC);

	pCode->FinaliseCodeBuffer( gpCodeBufferManager );

	{
		//OpCode op_code = pdwPCMemBase[0];
		//if (!OP_IS_A_HACK(op_code._u32))
		//	g_pdwOriginalMips[pCode->dwInstrNum] = op_code;

		OpCode new_op_code;

		new_op_code.op = OP_SRHACK_OPT;
		new_op_code.dynarec_index = pCode->dwEntry;

		pdwPCMemBase[0] = new_op_code._u32;
	}

	DPF(DEBUG_DYNREC, "************************************");
	DPF(DEBUG_DYNREC, "");
}

#ifdef DR_COLLECT_UNOPT_STATS 
//*****************************************************************************
//
//*****************************************************************************
void	DynaRec_CollectUnoptimisedStats( CDynarecCode * pCode, OpCode op_code )
{
	char opstr[300];

	u32 dwCurrPC = pCode->dwStartPC+pCode->dwNumOps*4;
	SprintOpCodeInfo( opstr, dwCurrPC, op_code );
	opstr[UNOPTCOUNT_STRING_SIZE] = 0;

	for (u32 i = 0; i < g_UnoptCount.size(); i++)
	{
		if (stricmp(g_UnoptCount[i].szString, opstr) == 0)
		{
			g_UnoptCount[i].Count++;
			g_UnoptCount[i].Codes.push_back( pCode );
			return;
		}
	}

	UnoptCount uc;

	DBGConsole_Msg(0, "%s", opstr);
	strcpy(uc.szString, opstr);
	uc.Count = 1;
	uc.Codes.push_back( pCode );
	g_UnoptCount.push_back(uc);
}
#endif

//*****************************************************************************
//
//*****************************************************************************
void SR_Emit_Generic_R4300(CDynarecCode *pCode, OpCode op_code, CPU_Instruction pF)
{
	bool	optimise( pCode->dwOptimiseLevel >= 1 );

	pCode->mpCodeGenerator->GenerateGenericR4300( optimise, op_code, pF );
}

