/*
*	Glide64 - Glide video plugin for Nintendo 64 emulators.
*	Copyright (c) 2002  Dave2001
*
*	This program is free software; you can redistribute it and/or modify
*	it under the terms of the GNU General Public License as published by
*	the Free Software Foundation; either version 2 of the License, or
*	any later version.
*
*	This program is distributed in the hope that it will be useful,
*	but WITHOUT ANY WARRANTY; without even the implied warranty of
*	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*	GNU General Public License for more details.
*
*	You should have received a copy of the GNU General Public License
*	along with this program; if not, write to the Free Software
*	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

//****************************************************************
//
// Glide64 - Glide Plugin for Nintendo 64 emulators (tested mostly with Project64)
// Project started on December 29th, 2001
//
// To modify Glide64:
// * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
// * Do NOT send me the whole project or file that you modified.  Take out your modified code sections, and tell me where to put them.  If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
//
// Official Glide64 development channel: #Glide64 on EFnet
//
// Original author: Dave2001 (Dave2999@hotmail.com)
// Other authors: Gonetz, Gugaman
//
//****************************************************************

//****************************************************************
// Size: 2, Format: 0

DWORD Load32bRGBAas16bRGBA (DWORD dst, DWORD src, int wid_64, int height, int line, int real_width, int tile)
{
	if (wid_64 < 1) wid_64 = 1;
	if (height < 1) height = 1;
	int ext = (real_width - (wid_64 << 1)) << 1;

	wid_64 >>= 1;		// re-shift it, load twice as many quadwords

	__asm {
		mov esi,dword ptr [src]
		mov edi,dword ptr [dst]

		mov ecx,dword ptr [height]
y_loop:
		push ecx

		mov ecx,dword ptr [wid_64]
x_loop:
		mov eax,dword ptr [esi]		// read first pixel
		add esi,4
		bswap eax
		mov edx,eax

		xor ebx,ebx
		shl eax,8	// 0x000000F0 -> 0x0000F000 (a)
		and eax,0x0000F000
		or ebx,eax
		shr edx,12	// 0x0000F000 -> 0x0000000F (b)
		mov eax,edx
		and eax,0x0000000F
		or ebx,eax
		shr edx,4	// 0x00F00000 went to 0x00000F00 -> 0x000000F0 (g)
		mov eax,edx
		and eax,0x000000F0
		or ebx,eax
		shr edx,4	// 0xF0000000 went to 0x000F0000 went to 0x0000F000 -> 0x00000F00 (r)
		and edx,0x00000F00
		or ebx,edx

		mov eax,dword ptr [esi]		// read second pixel
		add esi,4
		bswap eax
		mov edx,eax

		shl eax,24	// 0x000000F0 -> 0xF0000000 (a)
		and eax,0xF0000000
		or ebx,eax
					// 0x00F00000 -> 0x00F00000 (g)
		mov eax,edx
		and eax,0x00F00000
		or ebx,eax
		rol edx,4	// 0x0000F000 (did not shift) -> 0x000F0000 (b)
		mov eax,edx
		and eax,0x000F0000
		or ebx,eax
		shl edx,24	// 0xF0000000 went to 0x0000000F -> 0x0F000000 (r)
		and edx,0x0F000000
		or ebx,edx

		mov dword ptr [edi],ebx
		add edi,4

		// * copy
		mov eax,dword ptr [esi]		// read first pixel
		add esi,4
		bswap eax
		mov edx,eax

		xor ebx,ebx
		shl eax,8	// 0x000000F0 -> 0x0000F000 (a)
		and eax,0x0000F000
		or ebx,eax
		shr edx,12	// 0x0000F000 -> 0x0000000F (b)
		mov eax,edx
		and eax,0x0000000F
		or ebx,eax
		shr edx,4	// 0x00F00000 went to 0x00000F00 -> 0x000000F0 (g)
		mov eax,edx
		and eax,0x000000F0
		or ebx,eax
		shr edx,4	// 0xF0000000 went to 0x000F0000 went to 0x0000F000 -> 0x00000F00 (r)
		and edx,0x00000F00
		or ebx,edx

		mov eax,dword ptr [esi]		// read second pixel
		add esi,4
		bswap eax
		mov edx,eax

		shl eax,24	// 0x000000F0 -> 0xF0000000 (a)
		and eax,0xF0000000
		or ebx,eax
					// 0x00F00000 -> 0x00F00000 (g)
		mov eax,edx
		and eax,0x00F00000
		or ebx,eax
		rol edx,4	// 0x0000F000 (did not shift) -> 0x000F0000 (b)
		mov eax,edx
		and eax,0x000F0000
		or ebx,eax
		shl edx,24	// 0xF0000000 went to 0x0000000F -> 0x0F000000 (r)
		and edx,0x0F000000
		or ebx,edx

		mov dword ptr [edi],ebx
		add edi,4
		// *

		dec ecx
		jnz x_loop

		pop ecx
		dec ecx
		jz end_y_loop
		push ecx

		add esi,dword ptr [line]
		add edi,dword ptr [ext]

		mov ecx,dword ptr [wid_64]
x_loop_2:
		mov eax,dword ptr [esi+8]		// read first pixel
		bswap eax
		mov edx,eax

		xor ebx,ebx
		shl eax,8	// 0x000000F0 -> 0x0000F000 (a)
		and eax,0x0000F000
		or ebx,eax
		shr edx,12	// 0x0000F000 -> 0x0000000F (b)
		mov eax,edx
		and eax,0x0000000F
		or ebx,eax
		shr edx,4	// 0x00F00000 went to 0x00000F00 -> 0x000000F0 (g)
		mov eax,edx
		and eax,0x000000F0
		or ebx,eax
		shr edx,4	// 0xF0000000 went to 0x000F0000 went to 0x0000F000 -> 0x00000F00 (r)
		and edx,0x00000F00
		or ebx,edx

		mov eax,dword ptr [esi+12]		// read second pixel
		bswap eax
		mov edx,eax

		shl eax,24	// 0x000000F0 -> 0xF0000000 (a)
		and eax,0xF0000000
		or ebx,eax
					// 0x00F00000 -> 0x00F00000 (g)
		mov eax,edx
		and eax,0x00F00000
		or ebx,eax
		rol edx,4	// 0x0000F000 (did not shift) -> 0x000F0000 (b)
		mov eax,edx
		and eax,0x000F0000
		or ebx,eax
		shl edx,24	// 0xF0000000 went to 0x0000000F -> 0x0F000000 (r)
		and edx,0x0F000000
		or ebx,edx

		mov dword ptr [edi],ebx
		add edi,4

		// * copy
		mov eax,dword ptr [esi+0]		// read first pixel
		bswap eax
		mov edx,eax

		xor ebx,ebx
		shl eax,8	// 0x000000F0 -> 0x0000F000 (a)
		and eax,0x0000F000
		or ebx,eax
		shr edx,12	// 0x0000F000 -> 0x0000000F (b)
		mov eax,edx
		and eax,0x0000000F
		or ebx,eax
		shr edx,4	// 0x00F00000 went to 0x00000F00 -> 0x000000F0 (g)
		mov eax,edx
		and eax,0x000000F0
		or ebx,eax
		shr edx,4	// 0xF0000000 went to 0x000F0000 went to 0x0000F000 -> 0x00000F00 (r)
		and edx,0x00000F00
		or ebx,edx

		mov eax,dword ptr [esi+4]		// read second pixel
		add esi,16
		bswap eax
		mov edx,eax

		shl eax,24	// 0x000000F0 -> 0xF0000000 (a)
		and eax,0xF0000000
		or ebx,eax
					// 0x00F00000 -> 0x00F00000 (g)
		mov eax,edx
		and eax,0x00F00000
		or ebx,eax
		rol edx,4	// 0x0000F000 (did not shift) -> 0x000F0000 (b)
		mov eax,edx
		and eax,0x000F0000
		or ebx,eax
		shl edx,24	// 0xF0000000 went to 0x0000000F -> 0x0F000000 (r)
		and edx,0x0F000000
		or ebx,edx

		mov dword ptr [edi],ebx
		add edi,4
		// *

		dec ecx
		jnz x_loop_2
		
		add esi,dword ptr [line]
		add edi,dword ptr [ext]

		pop ecx
		dec ecx
		jnz y_loop

end_y_loop:
	}

	return (1 << 16) | GR_TEXFMT_ARGB_4444;
}

DWORD Load32bRGBA (DWORD dst, DWORD src, int wid_64, int height, int line, int real_width, int tile)
{
  int id = tile - rdp.cur_tile;
  DWORD mod = (id == 0) ? cmb.mod_0 : cmb.mod_1;
  if (mod || !sup_32bit_tex)
    return Load32bRGBAas16bRGBA(dst, src, wid_64, height, line, real_width, tile);
  if (wid_64 < 1) wid_64 = 1;
  if (height < 1) height = 1;
  int ext = (real_width - (wid_64 << 1)) << 2;
  
  wid_64 >>= 1;		// re-shift it, load twice as many quadwords
  
  __asm {
    mov esi,dword ptr [src]
      mov edi,dword ptr [dst]
      
      mov ecx,dword ptr [height]
y_loop:
    push ecx
      
      mov ecx,dword ptr [wid_64]
x_loop:
    mov eax,dword ptr [esi]		// read first pixel
      add esi,4
      bswap eax
      ror eax, 8
      mov dword ptr [edi],eax
      add edi,4
      
      mov eax,dword ptr [esi]		// read second pixel
      add esi,4
      bswap eax
      ror eax, 8
      mov dword ptr [edi],eax
      add edi,4
      
      // * copy
      mov eax,dword ptr [esi]		// read first pixel
      add esi,4
      bswap eax
      ror eax, 8
      mov dword ptr [edi],eax
      add edi,4
      
      mov eax,dword ptr [esi]		// read second pixel
      add esi,4
      bswap eax
      ror eax, 8
      mov dword ptr [edi],eax
      add edi,4
      // *
      
      dec ecx
      jnz x_loop
      
      pop ecx
      dec ecx
      jz end_y_loop
      push ecx
      
      add esi,dword ptr [line]
      add edi,dword ptr [ext]
      
      mov ecx,dword ptr [wid_64]
x_loop_2:
    mov eax,dword ptr [esi+8]		// read first pixel
      bswap eax
      ror eax, 8
      mov dword ptr [edi],eax
      add edi,4
      
      mov eax,dword ptr [esi+12]		// read second pixel
      bswap eax
      ror eax, 8
      mov dword ptr [edi],eax
      add edi,4
      
      // * copy
      mov eax,dword ptr [esi+0]		// read first pixel
      bswap eax
      ror eax, 8
      mov dword ptr [edi],eax
      add edi,4
      
      mov eax,dword ptr [esi+4]		// read second pixel
      add esi,16
      bswap eax
      ror eax, 8
      mov dword ptr [edi],eax
      add edi,4
      // *
      
      dec ecx
      jnz x_loop_2
      
      add esi,dword ptr [line]
      add edi,dword ptr [ext]
      
      pop ecx
      dec ecx
      jnz y_loop
      
end_y_loop:
  }
  return (2 << 16) | GR_TEXFMT_ARGB_8888;
}

