; Assembly routines for the Dualis GPU
; /Mic, 2005


.686p
.mmx
.model flat,stdcall
option casemap:none


GL_UNSIGNED_SHORT_5_5_5_1	EQU 08034h

.data
	align 16
	PUBLIC objwidth
	PUBLIC objheight
	PUBLIC useGL
	PUBLIC useD3D
	
	objwidth	dd 8,16,32,64,
			   16,32,32,64,
			   8,8,16,32,
			   0,0,0,0
			   
	objheight	dd 8,16,32,64,
		   	   8,8,16,32,
			   16,32,32,64,
			   0,0,0,0
	

	useGL		dd 0
	useD3D		dd 0
	
	
.data?
	; Public variables
	align 16
	PUBLIC alphamap
	PUBLIC x2
	PUBLIC y2
	PUBLIC deltax
	PUBLIC deltay
	PUBLIC hTileMask	
	PUBLIC vTileMask	
	PUBLIC hScrMask	
	PUBLIC hScrShift	
	PUBLIC vScrMask	
	PUBLIC vScrShift	
	PUBLIC tileShift
	PUBLIC xtile
	PUBLIC hofs
	PUBLIC vofs
	PUBLIC VRAM
	PUBLIC VRAM_curr
	PUBLIC vscreen
	PUBLIC vscreen_curr
	PUBLIC scanline
	PUBLIC currPalette
	PUBLIC bgcnt
	PUBLIC vscreen_size
	PUBLIC numBlends
	PUBLIC useExtPltt
	PUBLIC lineaddr
	PUBLIC scrBase
	PUBLIC chrBase
	PUBLIC tileOffset
	PUBLIC core
	PUBLIC charBank
	PUBLIC screenBank
	PUBLIC objx2, objy2		
	PUBLIC objdx, objdy		
	PUBLIC objdmx, objdmy		
	PUBLIC xclip		
	PUBLIC clippedWidth	
	PUBLIC spriteCnt, spriteLimit, spriteTile, spriteBit6
	PUBLIC io_base, oam_base, pal_base
	PUBLIC prio_int
	PUBLIC cmTileShift
	PUBLIC glFormat, glFormatBackup
	PUBLIC patternTbl
	PUBLIC tileData
	PUBLIC newframe
	
	alphamap	dw 256 dup (?) 
	currPalette	dd ?
	x2		dd ?
	y2		dd ?
	deltax		dd ?
	deltay		dd ?
	hTileMask	dd ?
	vTileMask	dd ?
	hScrMask	dd ?
	hScrShift	dd ?
	vScrMask	dd ?
	vScrShift	dd ?
	tileShift	dd ?
	xtile		dd ?
	hofs		dd ?
	vofs		dd ?
	core		dd ?
	VRAM		dd ?
	VRAM_curr	dd ?
	io_base		dd ?
	oam_base	dd ?
	pal_base	dd ?
	charBank	dd ?
	screenBank	dd ?
	vscreen		dd ?
	vscreen_curr	dd ?
	scanline	dd ?
	counter		dd ?
	p_amap		dd ?
	numBlends	dd ?
	vscreen_size 	dd ?
	useExtPltt	dd ?
	lineaddr	dd ?
	color0		dd ?
	extPlttPtr	dd ?
	scrBase		dd ?
	chrBase		dd ?
	dummy		dd ?
	objx2		dd ?
	objy2		dd ?
	objdx		dd ?
	objdy		dd ?
	objdmx		dd ?
	objdmy		dd ?
	xclip		dd ?
	clippedWidth	dd ?
	spriteCnt	dd ?
	spriteLimit	dd ?
	spriteTile	dd ?
	spriteBit6	dd ?
	cmTileShift	dd ?
	prio_int	dd ?
	glFormat	dd ?
	glFormatBackup	dd ?
	patternTbl	dd ?
	tileData	dd ?
	newframe	dd ?
	tileOffset	dd 2 dup(?)
	count		dd ?
	delta		dd ?
	bgcnt		dw ?
	col0		dw ?


.code



NOFLIP_AND_STORE MACRO
	mov 	[edi],ax
ENDM

; Convert xBGR to xRGB
FLIP_AND_STORE_RS MACRO
	mov 	ebx,eax
	mov 	ecx,eax
	shl 	ebx,10
	shr 	eax,10
	and 	ecx,03E0h
	and 	ebx,7C00h
	and 	eax,001Fh
	or 	ecx,ebx
	or 	eax,ecx
	mov 	[edi],ax	
ENDM


; Convert xBGR to RGBx
FLIP_SHIFT_AND_STORE_RS MACRO
	mov 	ebx,eax
	mov 	ecx,eax
	shl 	ebx,11
	shr 	eax,9
	and 	ecx,003E0h
	and 	ebx,0F800h
	shl 	ecx,1
	and 	eax,0003Eh
	or 	ecx,ebx
	or 	eax,ecx
	mov 	[edi],ax	
ENDM

TEXT_BOTTOM_BGR2RGB MACRO flipOp,fillMode
	LOCAL __extpltt,__hloop,__ext_hloop,__done,__trans,__ext_trans
	
	pusha
	mov 	eax,[scanline]
	dec 	eax
	mov 	edi,[vscreen_curr]
	shl 	eax,9
	add 	edi,eax			; edi = vscreen+scanline*512

	mov 	esi,[VRAM]
	mov 	eax,[scrBase]
	shl 	eax,11			; Select 2k screen base block
	mov 	ebx,[scanline]
	dec 	ebx
	add 	ebx,[vofs]
	and 	ebx,0FFh			; temp fix !!
	mov 	ecx,ebx
	shr 	ebx,3
	shl 	ebx,5			; ebx = (scanline/8)*32
	add 	esi,eax

	and 	ecx,7			; ecx = (scanline+yscroll)&7  (tile row)
	mov 	edx,[VRAM]
	mov 	eax,[chrBase]
	shl 	eax,14			; Select 16k chr base block
	shl 	ecx,3			; Multiply by 8 (number of bytes per row)
	add 	edx,eax			; edx = VRAM + chrBase

	IF fillMode EQ 2
		mov 	eax,numBlends
		inc 	eax
		add 	eax,eax
		mul 	dword ptr [vscreen_size]
		add 	edi,eax
	ELSE	
		mov 	[lineaddr],edi		; Save destination address for later use
	ENDIF
	
	push 	ebp
	movd 	mm0,[x2]
	movd 	mm1,[deltax]
	movd 	mm2,[y2]
	movd 	mm3,[deltay]
	cmp 	useExtPltt,0
	jne 	__extpltt
	
	mov 	ebp,[currPalette]

	xor 	ecx,ecx
	__hloop:
		push 	ecx
		movd 	ebx,mm0 ;[x2]
		movd 	ebp,mm2 ;[y2]
		sar 	ebx,8
		sar 	ebp,8
		mov 	eax,ebx
		mov 	ecx,ebp
		sar 	ebx,3
		sar 	ebp,3
		and 	eax,7
		and 	ecx,7
		and 	ebx,[hTileMask]
		and 	ebp,[vTileMask]
		push	ecx
		mov 	ecx,[tileShift]
		shl 	ebp,cl
		add 	ebx,ebp
		pop 	ecx
		movzx 	ebx,byte ptr [esi+ebx*2]
		shl 	ecx,3
		shl 	ebx,6
		mov 	ebp,[currPalette]
		add 	ebx,edx
		add 	eax,ecx
		movzx 	ecx,byte ptr [ebx+eax]
		IF fillMode EQ 0
			mov 	ax,[ebp + ecx*2]
			mov 	[edi],ax
		ELSEIF fillMode EQ 1
			test 	ecx,ecx
			jz 	__trans
			mov 	ax,[ebp + ecx*2]
			mov 	[edi],ax
			__trans:			
		ELSE
		ENDIF
		paddd 	mm0,mm1
		paddd 	mm2,mm3
		add 	edi,2
		pop 	ecx
		inc 	ecx
		cmp 	ecx,256
		jne 	__hloop
		jmp 	__done


	__extpltt:
	mov 	ebp,useExtPltt
	mov 	extPlttPtr,ebp
	movd 	mm0,[x2]
	movd 	mm1,[deltax]
	movd 	mm2,[y2]
	movd 	mm3,[deltay]
	xor 	ecx,ecx
	__ext_hloop:
		push 	ecx
		movd 	ebx,mm0
		movd 	ebp,mm2
		sar 	ebx,8
		sar 	ebp,8
		mov 	eax,ebx			; EAX = u.whole
		mov 	ecx,ebp			; ECX = v.whole
		sar 	ebx,3
		sar 	ebp,3
		and 	eax,7			; EAX = tile pixel
		and 	ecx,7			; ECX = tile row
		and 	ebx,[hTileMask]
		and 	ebp,[vTileMask]
		push 	ecx
		mov 	ecx,[tileShift]
		shl 	ebp,cl
		add 	ebx,ebp
		pop 	ecx
		movzx 	ebx,word ptr [esi+ebx*2]	; Read map data
		shl 	ecx,3			; ECX = tile row * 8
		mov 	ebp,ebx
		and 	ebx,0FFh
		shr 	ebp,3
		shl 	ebx,6			; EBX = tile number * 64
		and 	ebp,01E00h			; EBP = palette# * 512
		add 	ebx,edx
		add 	ebp,extPlttPtr		
		add 	eax,ecx
		movzx 	ecx,byte ptr [ebx+eax]	; CL = pixel data
		IF fillMode EQ 0
			mov 	ax,[ebp + ecx*2]
			flipOp				; Flip and store color data
		ELSEIF fillMode EQ 1
			test 	ecx,ecx
			jz	__ext_trans
			mov 	ax,[ebp + ecx*2]
			flipOp				; Flip and store color data
			__ext_trans:			
		ELSE
		ENDIF
		paddd 	mm0,mm1			; u += du
		paddd 	mm2,mm3			; v += dv
		pop 	ecx
		add 	edi,2
		inc 	ecx
		cmp 	ecx,256
		jne 	__ext_hloop
	
	__done:
	pop 	ebp
	popa	
	emms
	ret
ENDM




bg_line_text_bottom PROC
	pusha
	mov 	eax,[scanline]
	dec 	eax
	mov 	edi,[vscreen_curr]
	shl 	eax,9
	add 	edi,eax				; edi = vscreen+scanline*512

	mov 	esi,[screenBank] 
	mov 	eax,[scrBase]
	shl 	eax,11				; Select 2k screen base block

	mov 	ebx,[scanline]
	dec 	ebx
	add 	ebx,[vofs]

	mov 	p_amap,OFFSET alphamap

	mov 	edx,ebx
	and 	edx,[vScrMask]
	shr 	edx,3
	mov 	ecx,[hTileMask]
	shr 	ecx,5
	add 	ecx,6
	shl 	edx,cl				; ebx = (scanline/8)*64
	add 	esi,edx
	add 	esi,eax
	mov 	ecx,ebx

	shr 	ecx,3
	and 	ecx,01Fh
	shl 	ecx,6
	add 	esi,ecx				
	mov 	ecx,ebx

	and 	ecx,7				; ecx = (scanline+yscroll)&7  (tile row)

	mov 	ebx,ecx
	xor 	ebx,7				; "Inverted" tile row (used for vflipping)
	shl 	ebx,3
	shl 	ecx,3				; Multiply by 4 (number of bytes per row)
	mov 	[tileOffset+4],ebx
	mov 	[tileOffset],ecx

	mov 	[lineaddr],edi

	mov 	edx,[charBank] 
	mov 	eax,[chrBase]
	shl 	eax,14
	add 	edx,eax
	mov 	[dummy],edx


	push	ebp
	mov 	ebp,[currPalette]

	mov 	eax,[hofs]
	mov 	ebx,eax
	shr 	eax,3				; eax = xscroll/8
	mov 	[xtile],eax
	lea 	ebx,[eax*8]
	and 	eax,01Fh 
	and 	ebx,[hScrMask]
	mov 	ecx,[hScrShift]
	shl 	ebx,cl
	lea 	eax,[ebx+eax*2]
	xor 	ebx,ebx				; ebx = 0
	mov 	bx,[esi+eax]			; Get tile data

	mov 	ebp,ebx
	shr 	ebp,3
	and 	ebp,01E00h
	add 	ebp,useExtPltt
	mov 	ax,[ebp]
	mov 	col0,ax
		
	inc 	dword ptr [xtile]		; Next tile..
	mov 	eax,ebx
	and 	ebx,03FFh			; ebx = tile number
	shr 	eax,11				; Get vflip flag
	shl 	ebx,6				; ebx *= 8*8
	and 	eax,1
	add 	ebx,[dummy] 			; ebx += VRAM + chrBase + tile_row*8
	add 	ebx,[tileOffset+eax*4]

	mov 	eax,[hofs]
	mov 	ecx,8
	and 	eax,7
	add 	ebx,eax
	sub 	ecx,eax				; ecx = 8 - (xscroll&7)  (length of first tile)

	cmp 	[useD3D],0
	jne 	@@m0_tile0_d3d
	cmp 	[useGL],0
	je 	@@m0_tile0
	cmp 	dword ptr [glFormat],GL_UNSIGNED_SHORT_5_5_5_1
	jne 	@@m0_tile0

	push 	esi
	mov 	count,ecx

	test 	[newframe],3
	mov	edx,p_amap
	jnz	@@m0_tile0_draw_trans_ogl

	mov 	eax,[currPalette]
	mov 	ax,[eax]
	mov 	[ebp],ax
	@@m0_tile0_draw_ogl:
		xor 	eax,eax			; Clear eax
		mov 	al,[ebx]		; Load pixel
		mov 	ax,[ebp + eax*2]	; Get 15-bit color value
		
		mov 	ecx,eax
		mov	esi,eax
		shl 	ecx,11
		shr 	eax,9
		and 	esi,003E0h
		and 	ecx,0F800h
		shl 	esi,1
		and 	eax,0003Eh
		or 	esi,ecx
		or 	eax,esi
		
		mov 	[edi],ax		; Store
		add 	edi,2			; Increase destination pointer
		inc 	ebx
		dec 	count
		jnz 	@@m0_tile0_draw_ogl
	pop 	esi
	mov 	ax,col0
	mov 	[ebp],ax
	jmp 	@@m0_tile0_done

	@@m0_tile0_draw_trans_ogl:
		xor 	eax,eax			; Clear eax
		mov 	al,[ebx]		; Load pixel
		test	eax,eax
		jz	@@m0_tile0_trans_ogl
		mov 	ax,[ebp + eax*2]	; Get 15-bit color value
		
		mov 	ecx,eax
		mov	esi,eax
		shl 	ecx,11
		shr 	eax,9
		and 	esi,003E0h
		and 	ecx,0F800h
		shl 	esi,1
		and 	eax,0003Eh
		or 	esi,ecx
		or 	eax,esi
		
		mov 	[edi],ax		; Store
		mov	al,0FFh
		@@m0_tile0_trans_ogl:
		mov	[edx],al
		add 	edi,2			; Increase destination pointer
		inc 	edx
		inc 	ebx
		dec 	count
		jnz 	@@m0_tile0_draw_trans_ogl
	pop 	esi
	mov	p_amap,edx
	jmp 	@@m0_tile0_done
		
	
	@@m0_tile0_d3d:
	@@m0_tile0:
	test 	[newframe],3
	jnz	@@m0_tile0_draw_trans

	mov 	eax,[currPalette]
	mov 	ax,[eax]
	mov 	[ebp],ax
	@@m0_tile0_draw:
		xor 	eax,eax			; Clear eax
		mov 	al,[ebx]		; Load pixel
		mov 	ax,[ebp + eax*2]	; Get 15-bit color value
		mov 	[edi],ax		; Store
		add 	edi,2			; Increase destination pointer
		inc 	ebx
		dec 	ecx
		jnz 	@@m0_tile0_draw
	mov 	ax,col0
	mov 	[ebp],ax
	jmp 	@@m0_tile0_done
	
	@@m0_tile0_draw_trans:
		xor 	eax,eax				; Clear eax
		mov 	al,[ebx]			; Load pixel
		test	eax,eax
		jz @@m0_tile0_trans
			mov 	ax,[ebp + eax*2]	; Get 15-bit color value
			mov 	[edi],ax		; Store
		@@m0_tile0_trans:
		inc	p_amap
		add 	edi,2				; Increase destination pointer
		inc 	ebx
		dec 	ecx
		jnz 	@@m0_tile0_draw_trans

	@@m0_tile0_done:
	mov 	ecx,31 

	test 	[newframe],3
	jnz 	@@m0_draw_tiles_trans
	
	@@m0_draw_tiles:
		push 	ecx
		mov 	eax,[xtile]
		lea 	ebx,[eax*8]
		and 	eax,01Fh 
		and 	ebx,[hScrMask]
		mov 	ecx,[hScrShift]
		shl 	ebx,cl
		lea 	eax,[ebx+eax*2]
		xor 	ebx,ebx				; ebx = 0
		mov 	bx,[esi+eax]			; Get tile data

		mov 	ebp,ebx
		shr 	ebp,3
		and 	ebp,01E00h
		add 	ebp,useExtPltt
		mov 	ax,[ebp]
		mov 	col0,ax
		mov 	eax,[currPalette]
		mov 	ax,[eax]
		mov 	[ebp],ax
		
		mov 	eax,ebx
		mov 	edx,ebx
		shr 	eax,11				; Get vflip flag
		inc 	dword ptr [xtile]
		and 	eax,1
		and 	edx,0400h			; Get hflip flag
		and 	ebx,03FFh
		shr 	edx,10
		shl 	ebx,6
		add 	ebx,[tileOffset+eax*4]
		add 	ebx,[dummy]
		mov 	eax,1
		lea 	ebx,[ebx+edx*8]
		sub 	eax,edx
		sub 	ebx,edx				; if (hflip==1) ebx+=7
		sub 	eax,edx
		mov 	edx,eax				; edx = (hflip==1) ? -1 : 1

		mov 	ecx,8

		cmp 	[useD3D],0
		jne 	@@m0_draw_d3d
		cmp 	[useGL],0
		je 	@@m0_draw
		cmp 	dword ptr [glFormat],GL_UNSIGNED_SHORT_5_5_5_1
		jne 	@@m0_draw
		
		push esi
		mov count,ecx
		@@m0_draw_ogl:
			xor 	eax,eax
			mov 	al,[ebx]
			mov 	ax,[ebp + eax*2]

			mov 	ecx,eax
			mov	esi,eax
			shl 	ecx,11
			shr 	eax,9
			and 	esi,003E0h
			and 	ecx,0F800h
			shl 	esi,1
			and 	eax,0003Eh
			or 	esi,ecx
			or 	eax,esi
	
			mov 	[edi],ax
			add 	edi,2
			add 	ebx,edx
			dec 	count 
			jnz 	@@m0_draw_ogl
		pop esi
		jmp 	@@m0_draw_done

		@@m0_draw:
		@@m0_draw_d3d:
			xor 	eax,eax
			mov 	al,[ebx]
			mov 	ax,[ebp + eax*2]
			mov 	[edi],ax
			add 	edi,2
			add 	ebx,edx
			dec 	ecx
			jnz 	@@m0_draw
			jmp 	@@m0_draw_done
			
		@@m0_draw_done:
		pop 	ecx
		mov 	ax,col0
		mov 	[ebp],ax
		dec 	ecx
	jnz 	@@m0_draw_tiles
	jmp 	@@m0_draw_tiles_end
	
	@@m0_draw_tiles_trans:
		push 	ecx
		mov 	eax,[xtile]
		lea 	ebx,[eax*8]
		and 	eax,01Fh 
		and 	ebx,[hScrMask]
		mov 	ecx,[hScrShift]
		shl 	ebx,cl
		lea 	eax,[ebx+eax*2]
		xor 	ebx,ebx				; ebx = 0
		mov 	bx,[esi+eax]			; Get tile data

		mov 	ebp,ebx
		shr 	ebp,3
		and 	ebp,01E00h
		add 	ebp,useExtPltt
		
		mov 	eax,ebx
		mov 	edx,ebx
		shr 	eax,11				; Get vflip flag
		inc 	dword ptr [xtile]
		and 	eax,1
		and 	edx,0400h			; Get hflip flag
		and 	ebx,03FFh
		shr 	edx,10
		shl 	ebx,6
		add 	ebx,[tileOffset+eax*4]
		add 	ebx,[dummy]
		mov 	eax,1
		lea 	ebx,[ebx+edx*8]
		sub 	eax,edx
		sub 	ebx,edx				; if (hflip==1) ebx+=7
		sub 	eax,edx
		mov 	edx,eax				; edx = (hflip==1) ? -1 : 1

		mov 	ecx,8
		
		cmp 	[useD3D],0
		jne 	@@m0_draw_trans_d3d
		cmp 	[useGL],0
		je 	@@m0_draw_trans
		cmp 	dword ptr [glFormat],GL_UNSIGNED_SHORT_5_5_5_1
		jne 	@@m0_draw_trans
		
		push 	esi
		mov 	count,ecx
		mov	delta,edx
		mov	edx,p_amap
		@@m0_draw_trans_ogl:
			xor 	eax,eax
			mov 	al,[ebx]
			test	eax,eax
			jz 	@@m0_trans_ogl
			mov 	ax,[ebp + eax*2]

			mov 	ecx,eax
			mov	esi,eax
			shl 	ecx,11
			shr 	eax,9
			and 	esi,003E0h
			and 	ecx,0F800h
			shl 	esi,1
			and 	eax,0003Eh
			or 	esi,ecx
			or 	eax,esi
	
			mov 	[edi],ax
			mov	al,0FFh
			@@m0_trans_ogl:
			add 	edi,2
			mov	[edx],al
			add 	ebx,delta ;edx
			inc	edx
			dec 	count 
			jnz 	@@m0_draw_trans_ogl
		pop 	esi
		mov 	p_amap,edx
		jmp 	@@m0_draw_trans_done
		
		@@m0_draw_trans_d3d:
		@@m0_draw_trans:
			xor 	eax,eax
			mov 	al,[ebx]
			test	eax,eax
			jz 	@@m0_trans
				mov 	ax,[ebp + eax*2]
				mov 	[edi],ax
			@@m0_trans:
			add 	edi,2
			add 	ebx,edx
			dec 	ecx
			jnz 	@@m0_draw_trans
		
		@@m0_draw_trans_done:
		pop 	ecx
		dec 	ecx
	jnz 	@@m0_draw_tiles_trans

@@m0_draw_tiles_end:
	mov 	ebp,[currPalette]
@@m0_tile31:
	mov 	eax,[xtile]

	lea 	ebx,[eax*8]
	and 	eax,01Fh 
	and 	ebx,[hScrMask]
	mov 	ecx,[hScrShift]
	shl 	ebx,cl
	lea 	eax,[ebx+eax*2]
	xor 	ebx,ebx				; ebx = 0
	mov 	bx,[esi+eax]			; Get tile data

	mov 	ebp,ebx
	shr 	ebp,3
	and 	ebp,01E00h
	add 	ebp,useExtPltt
	mov 	ax,[ebp]
	mov 	col0,ax
	
	mov 	eax,ebx				; Get vflip flag
	and 	ebx,03FFh
	shr 	eax,11
	shl 	ebx,6
	and 	eax,1
	add 	ebx,[dummy]
	add 	ebx,[tileOffset+eax*4]

	mov 	ecx,[hofs]
	and 	ecx,7
	jz 	@@m0_tile31_end

	cmp 	[useD3D],0
	jne 	@@m0_tile31_d3d
	cmp 	[useGL],0
	je 	@@m0_tile31_
	cmp 	dword ptr [glFormat],GL_UNSIGNED_SHORT_5_5_5_1
	jne 	@@m0_tile31_

	push 	esi
	mov 	count,ecx

	test 	[newframe],3
	jnz	@@m0_tile31_draw_trans_ogl

	mov 	eax,[currPalette]
	mov 	ax,[eax]
	mov 	[ebp],ax
	@@m0_tile31_draw_ogl:
		xor 	eax,eax
		mov 	al,[ebx]
		mov 	ax,[ebp + eax*2]

		mov 	ecx,eax
		mov	esi,eax
		shl 	ecx,11
		shr 	eax,9
		and 	esi,003E0h
		and 	ecx,0F800h
		shl 	esi,1
		and 	eax,0003Eh
		or 	esi,ecx
		or 	eax,esi
		
		mov 	[edi],ax
		add 	edi,2
		inc 	ebx
		dec 	count
		jnz 	@@m0_tile31_draw_ogl
	pop	esi
	mov 	ax,col0
	mov 	[ebp],ax
	jmp 	@@m0_tile31_end

	@@m0_tile31_draw_trans_ogl:
		xor 	eax,eax
		mov 	al,[ebx]
		test	eax,eax
		jz	@@m0_tile31_trans_ogl
		mov 	ax,[ebp + eax*2]

		mov 	ecx,eax
		mov	esi,eax
		shl 	ecx,11
		shr 	eax,9
		and 	esi,003E0h
		and 	ecx,0F800h
		shl 	esi,1
		and 	eax,0003Eh
		or 	esi,ecx
		or 	eax,esi
		
		mov 	[edi],ax
		@@m0_tile31_trans_ogl:
		add 	edi,2
		inc 	ebx
		dec 	count
		jnz 	@@m0_tile31_draw_trans_ogl
	pop	esi
	jmp 	@@m0_tile31_end
	
	
	@@m0_tile31_d3d:
	@@m0_tile31_:
	test 	[newframe],3
	jnz 	@@m0_tile31_draw_trans
	
	mov 	eax,[currPalette]
	mov 	ax,[eax]
	mov 	[ebp],ax
	@@m0_tile31_draw_d3d:
	@@m0_tile31_draw:
		xor 	eax,eax
		mov 	al,[ebx]
		mov 	ax,[ebp + eax*2]
		mov 	[edi],ax
		add 	edi,2
		inc 	ebx
		dec 	ecx
		jnz 	@@m0_tile31_draw
	mov 	ax,col0
	mov 	[ebp],ax
	jmp 	@@m0_tile31_end

	@@m0_tile31_draw_trans_d3d:
	@@m0_tile31_draw_trans:
		xor 	eax,eax
		mov 	al,[ebx]
		test 	eax,eax
		jz 	@@m0_tile31_trans
			mov 	ax,[ebp + eax*2]
			mov 	[edi],ax
		@@m0_tile31_trans:
		add 	edi,2
		inc 	ebx
		dec 	ecx
		jnz 	@@m0_tile31_draw_trans
	
@@m0_tile31_end:

	pop 	ebp
	popa
	ret
bg_line_text_bottom ENDP


bg_line_text_bottom2 PROC
	pusha
	mov 	eax,[scanline]
	dec 	eax
	mov 	edi,[vscreen_curr]
	shl 	eax,9
	add 	edi,eax			; edi = vscreen+scanline*512

	mov 	esi,[screenBank] ;[VRAM_curr]
	mov 	eax,[scrBase]
	shl 	eax,11			; Select 2k screen base block
	mov 	ebx,[scanline]
	dec 	ebx
	add 	ebx,[vofs]

	mov 	edx,ebx
	and 	ebx,0FFh		; temp fix !!
	and 	edx,[vScrMask]
	mov 	cl,3
	shl 	edx,cl
	mov 	ecx,ebx
	;mov 	ecx,edx
	shr 	ebx,3
	shl 	ebx,6			; ebx = (scanline/8)*64
	add 	esi,edx
	add 	esi,eax
	add 	esi,ebx			; esi = nametable

	and 	ecx,7			; ecx = (scanline+yscroll)&7  (tile row)
	mov 	edx,[charBank]
	mov 	ebx,ecx
	mov 	eax,[chrBase]
	xor 	ebx,7			; "Inverted" tile row (used for vflipping)
	shl 	eax,14			; Select 16k chr base block
	shl 	ebx,3
	shl 	ecx,3			; Multiply by 8 (number of bytes per row)
	add 	edx,eax			; edx = VRAM + chrBase
	mov 	[tileOffset+4],ebx
	;add 	edx,ecx			// edx += tile_row*8
	mov 	[dummy],edx
	mov 	[tileOffset],ecx

	mov 	[lineaddr],edi		; Save destination address for later use

	push 	ebp
	;mov 	ebp,[currPalette]
	mov 	ebp,useExtPltt
	mov 	extPlttPtr,ebp
	
	mov 	eax,[hofs]
	mov 	ebx,eax
	shr 	eax,3			; eax = xscroll/8
	mov 	[xtile],eax
	and 	eax,01Fh		; AND with the number of tiles per row
	and 	ebx,[hScrMask]
	mov 	ecx,[hScrShift]
	shl 	ebx,cl
	lea 	eax,[ebx+eax*2]
	xor 	ebx,ebx			; ebx = 0
	mov 	bx,[esi+eax]		; Get tile data
	inc 	dword ptr [xtile]	; Next tile..
	mov 	eax,ebx
	mov 	ebp,ebx

	shr 	eax,11			; Get vflip flag
	and 	ebx,03FFh		; ebx = tile number
	shr 	ebp,3
	shl 	ebx,6			; ebx *= 8*8
	;mov 	ebx,64

	and 	ebp,01E00h
	and 	eax,1
	add 	ebx,[dummy] 		; ebx += VRAM + chrBase + tile_row*8
	add 	ebp,extPlttPtr
	add 	ebx,[tileOffset+eax*4]

	mov 	ax,[ebp]
	mov 	col0,ax
	mov 	eax,[currPalette]
	mov 	ax,[eax]
	mov 	[ebp],ax

	mov 	eax,[hofs]
	;add	ebx,8
	mov 	ecx,8
	and 	eax,7
	add 	ebx,eax
	sub 	ecx,eax			; ecx = 8 - (xscroll&7)  (length of first tile)
	@@ltb_tile0_draw:	
		xor 	eax,eax			; Clear eax
		mov 	al,[ebx]		; Load pixel
		mov 	ax,[ebp + eax*2]	; Get 15-bit color value
		mov 	[edi],ax		; Store
		add 	edi,2			; Increase destination pointer
		inc 	ebx
		dec 	ecx
		jnz 	@@ltb_tile0_draw

	mov 	ax,col0
	mov 	[ebp],ax
		
	mov 	ecx,31 
	@@ltb_draw_tiles:
		push 	ecx
		mov 	eax,[xtile]
		lea 	ebx,[eax*8]
		and 	eax,[hTileMask] ;0x1f
		and 	ebx,[hScrMask]
		mov 	ecx,[hScrShift]
		shl 	ebx,cl
		lea 	eax,[ebx+eax*2]
		;and 	eax,0x3F //[hTileMask]
		;add 	eax,eax
		xor 	ebx,ebx			; ebx = 0
		mov 	bx,[esi+eax]		; Get tile data
		mov 	eax,ebx
		mov 	edx,ebx
		mov 	ebp,ebx
		shr 	eax,11			; Get vflip flag
		inc 	dword ptr [xtile]
		shr 	ebp,3
		and 	eax,1
		and 	edx,0400h		; Get hflip flag
		and 	ebx,03FFh
		shr 	edx,10
		;shl	ebx,6
		mov 	ebx,1*64
		;and 	ebp,01E00h		; Get palette #
		mov 	ebp,01E00h
		add 	ebx,[tileOffset+eax*4]
		add 	ebp,extPlttPtr
		add 	ebx,[dummy]
		mov 	eax,1
		lea 	ebx,[ebx+edx*8]
		sub 	eax,edx
		sub 	ebx,edx			; if (hflip==1) ebx+=7
		sub 	eax,edx
		mov 	edx,eax			; edx = (hflip==1) ? -1 : 1

		mov 	ax,[ebp]
		mov 	col0,ax
		mov 	eax,[currPalette]
		mov 	ax,[eax]
		mov 	[ebp],ax
	
		;xor 	edi,edi
		;div 	edi
		mov 	ecx,8
		@@ltb_draw:
			xor 	eax,eax
			mov 	al,[ebx]
			mov 	ax,[ebp + eax*2]
			mov 	[edi],ax
			add 	edi,2
			add 	ebx,edx
			dec 	ecx
			jnz 	@@ltb_draw
		pop 	ecx

		mov 	ax,col0
		mov 	[ebp],ax
	
		dec ecx
		jnz 	@@ltb_draw_tiles

	@@ltb_tile31:
	mov 	eax,[xtile]
	lea 	ebx,[eax*8]
	and 	eax,01fh ;[hTileMask] //0x1f
	and 	ebx,[hScrMask]
	mov 	ecx,[hScrShift]
	shl 	ebx,cl
	lea 	eax,[ebx+eax*2]
	xor 	ebx,ebx			; ebx = 0
	mov 	bx,[esi+eax]		; Get tile data
	mov 	eax,ebx			; Get vflip flag
	mov 	ebp,ebx

	shr 	eax,11
	and 	ebx,03FFh
	shr 	ebp,3
	shl 	ebx,6
	and 	ebp,01E00h		; Get palette #

	and 	eax,1
	add 	ebx,[dummy]
	add 	ebp,extPlttPtr
	add 	ebx,[tileOffset+eax*4]

	mov 	ecx,[hofs]
	and 	ecx,7
	jz 	@@ltb_tile31_end
	@@ltb_tile31_draw:
		xor 	eax,eax
		mov 	al,[ebx]
		mov 	ax,[ebp + eax*2]
		mov 	[edi],ax
		add 	edi,2
		inc 	ebx
		dec 	ecx
		jnz 	@@ltb_tile31_draw

	@@ltb_tile31_end:
	pop 	ebp
	popa
	ret
bg_line_text_bottom2 ENDP



RS_BOTTOM_BGR2RGB MACRO flipOp,fillMode
	LOCAL __extpltt,__hloop,__ext_hloop,__done,__trans,__ext_trans
	
	pusha
	mov 	eax,[scanline]
	dec 	eax
	mov 	edi,[vscreen_curr]
	shl 	eax,9
	add 	edi,eax			; edi = vscreen+scanline*512

	mov 	esi,[screenBank] 
	mov 	eax,[scrBase]
	shl 	eax,11			; Select 2k screen base block
	mov 	ebx,[scanline]
	dec 	ebx
	add 	ebx,[vofs]
	and 	ebx,0FFh		; temp fix !!
	mov 	ecx,ebx
	shr 	ebx,3
	shl 	ebx,5			; ebx = (scanline/8)*32
	add 	esi,eax

	and 	ecx,7			; ecx = (scanline+yscroll)&7  (tile row)
	mov 	edx,[charBank] 
	mov 	eax,[chrBase]
	shl 	eax,14			; Select 16k chr base block
	shl 	ecx,3			; Multiply by 8 (number of bytes per row)
	add 	edx,eax			; edx = VRAM + chrBase

	IF fillMode EQ 2
		mov 	eax,numBlends
		inc 	eax
		add 	eax,eax
		mul 	dword ptr [vscreen_size]
		add 	edi,eax
	ELSE	
		mov 	[lineaddr],edi		; Save destination address for later use
	ENDIF
	
	push 	ebp
	movd 	mm0,[x2]
	movd 	mm1,[deltax]
	movd 	mm2,[y2]
	movd 	mm3,[deltay]
	cmp 	useExtPltt,0
	jne 	__extpltt
	
	mov 	ebp,[currPalette]

	xor 	ecx,ecx
	__hloop:
		push 	ecx
		movd 	ebx,mm0 ;[x2]
		movd 	ebp,mm2 ;[y2]
		sar 	ebx,8
		sar 	ebp,8
		mov 	eax,ebx
		mov 	ecx,ebp
		sar 	ebx,3
		sar 	ebp,3
		and 	eax,7
		and 	ecx,7
		and 	ebx,[hTileMask]
		and 	ebp,[vTileMask]
		push 	ecx
		mov 	ecx,[tileShift]
		shl 	ebp,cl
		add 	ebx,ebp
		pop 	ecx
		movzx 	ebx,byte ptr [esi+ebx*2]
		shl 	ecx,3
		shl 	ebx,6
		mov 	ebp,[currPalette]
		add 	ebx,edx
		add 	eax,ecx
		movzx 	ecx,byte ptr [ebx+eax]
		IF fillMode EQ 0
			mov 	ax,[ebp + ecx*2]
			mov 	[edi],ax
		ELSEIF fillMode EQ 1
			test 	ecx,ecx
			jz 	__trans
			mov 	ax,[ebp + ecx*2]
			mov 	[edi],ax
			__trans:			
		ELSE
		ENDIF
		paddd 	mm0,mm1
		paddd 	mm2,mm3
		add 	edi,2
		pop 	ecx
		inc 	ecx
		cmp 	ecx,256
		jne 	__hloop
		jmp 	__done


	__extpltt:
	mov 	ebp,useExtPltt
	mov 	extPlttPtr,ebp
	movd 	mm0,[x2]
	movd 	mm1,[deltax]
	movd 	mm2,[y2]
	movd 	mm3,[deltay]
	xor 	ecx,ecx
	__ext_hloop:
		push 	ecx
		movd 	ebx,mm0
		movd 	ebp,mm2
		sar 	ebx,8
		sar 	ebp,8
		mov 	eax,ebx			; EAX = u.whole
		mov 	ecx,ebp			; ECX = v.whole
		sar 	ebx,3
		sar 	ebp,3
		and 	eax,7			; EAX = tile pixel
		and 	ecx,7			; ECX = tile row
		and 	ebx,[hTileMask]
		and 	ebp,[vTileMask]
		push 	ecx
		mov 	ecx,[tileShift]
		shl 	ebp,cl
		add 	ebx,ebp
		pop 	ecx
		movzx 	ebx,word ptr [esi+ebx*2]	; Read map data
		shl 	ecx,3				; ECX = tile row * 8
		mov 	ebp,ebx
		and 	ebx,0FFh
		shr 	ebp,3
		shl 	ebx,6				; EBX = tile number * 64
		and 	ebp,01E00h			; EBP = palette# * 512
		add 	ebx,edx
		add 	ebp,extPlttPtr		
		add 	eax,ecx
		movzx 	ecx,byte ptr [ebx+eax]		; CL = pixel data
		IF fillMode EQ 0
			mov 	ax,[ebp + ecx*2]
			flipOp				; Flip and store color data
		ELSEIF fillMode EQ 1
			test 	ecx,ecx
			jz 	__ext_trans
			mov 	ax,[ebp + ecx*2]
			flipOp				; Flip and store color data
			__ext_trans:			
		ELSE
		ENDIF
		paddd 	mm0,mm1			; u += du
		paddd 	mm2,mm3			; v += dv
		pop 	ecx
		add 	edi,2
		inc 	ecx
		cmp 	ecx,256
		jne 	__ext_hloop
	
	__done:
	pop	 ebp
	popa	
	emms
	ret
ENDM




; Draw a bottom-most extended rotoscale BG
extbg_line_rs_bottom PROC
	pusha
	mov 	eax,[scanline]
	dec 	eax
	mov 	edi,[vscreen_curr]
	shl 	eax,9
	add 	edi,eax			; edi = vscreen+scanline*512

	mov 	esi,[screenBank] 	;[VRAM_curr]
	mov 	eax,[scrBase]
	shl 	eax,11			; Select 2k screen base block
	mov 	ebx,[scanline]
	dec 	ebx
	add 	ebx,[vofs]
	and 	ebx,0FFh		; temp fix !!
	mov 	ecx,ebx
	shr 	ebx,3
	shl 	ebx,5			; ebx = (scanline/8)*32
	add 	esi,eax

	and 	ecx,7			; ecx = (scanline+yscroll)&7  (tile row)
	mov 	edx,[charBank] 		;[VRAM_curr]
	mov 	eax,[chrBase]
	shl 	eax,14			; Select 16k chr base block
	shl 	ecx,3			; Multiply by 8 (number of bytes per row)
	add 	edx,eax			; edx = VRAM + chrBase

	mov 	[lineaddr],edi		; Save destination address for later use

	push 	ebp
	cmp 	useExtPltt,0
	jne 	@@drsb_extpltt
	
	mov 	ebp,[currPalette]

	xor 	ecx,ecx
	@@drsb_hloop:
		push 	ecx
		mov 	ebx,[x2]
		mov 	ebp,[y2]
		sar 	ebx,8
		sar 	ebp,8
		mov 	eax,ebx
		mov 	ecx,ebp
		sar 	ebx,3
		sar 	ebp,3
		and 	eax,7
		and 	ecx,7
		and 	ebx,[hTileMask]
		and 	ebp,[vTileMask]
		push	ecx
		mov 	ecx,[tileShift]
		shl 	ebp,cl
		add 	ebx,ebp
		pop 	ecx
		movzx 	ebx,byte ptr [esi+ebx*2]
		shl 	ecx,3
		shl 	ebx,6
		mov 	ebp,[currPalette]
		add 	ebx,edx
		add 	eax,ecx
		movzx 	ecx,byte ptr [ebx+eax]
		mov 	ax,[ebp + ecx*2]
		mov 	[edi],ax
		mov 	eax,[deltax]
		mov 	ebx,[deltay]
		add 	edi,2
		add 	[x2],eax
		add 	[y2],ebx
		pop 	ecx
		inc 	ecx
		cmp 	ecx,256
		jne 	@@drsb_hloop
		jmp 	@@drsb_done


	@@drsb_extpltt:
	mov 	ebp,useExtPltt
	mov 	extPlttPtr,ebp
	xor 	ecx,ecx
	@@drsbe_hloop:
		push 	ecx
		mov 	ebx,[x2]
		mov 	ebp,[y2]
		sar 	ebx,8
		sar 	ebp,8
		mov 	eax,ebx			; EAX = u.whole
		mov 	ecx,ebp			; ECX = v.whole
		sar 	ebx,3
		sar 	ebp,3
		and 	eax,7			; EAX = tile pixel
		and 	ecx,7			; ECX = tile row
		and 	ebx,[hTileMask]
		and 	ebp,[vTileMask]
		push	ecx
		mov 	ecx,[tileShift]
		shl 	ebp,cl
		add 	ebx,ebp
		pop 	ecx
		movzx 	ebx,word ptr [esi+ebx*2]	; Read map data
		shl 	ecx,3				; ECX = tile row * 8
		mov 	ebp,ebx
		and 	ebx,0FFh
		shr 	ebp,3
		shl 	ebx,6				; EBX = tile number * 64
		and 	ebp,01E00h
		add 	ebx,edx
		add 	ebp,extPlttPtr
		add 	eax,ecx
		movzx 	ecx,byte ptr [ebx+eax]
		mov 	ax,[ebp + ecx*2]
		mov 	[edi],ax
		mov 	eax,[deltax]
		mov 	ebx,[deltay]
		add 	edi,2
		add 	[x2],eax
		add 	[y2],ebx
		pop 	ecx
		inc 	ecx
		cmp 	ecx,256
		jne 	@@drsbe_hloop
		jmp 	@@drsb_done
	
	@@drsb_done:
	pop 	ebp
	popa	
	ret
extbg_line_rs_bottom ENDP


extbg_line_rs_trans PROC
	RS_BOTTOM_BGR2RGB NOFLIP_AND_STORE,1
extbg_line_rs_trans ENDP


extbg_line_rs_ablend PROC
	ret
extbg_line_rs_ablend ENDP



extbg_line_rs_bottom_bgr2rgb_d3d PROC
	RS_BOTTOM_BGR2RGB FLIP_AND_STORE_RS,0
extbg_line_rs_bottom_bgr2rgb_d3d ENDP


extbg_line_rs_bottom_bgr2rgb_ogl PROC
	RS_BOTTOM_BGR2RGB FLIP_SHIFT_AND_STORE_RS,0
extbg_line_rs_bottom_bgr2rgb_ogl ENDP


extbg_line_rs_trans_bgr2rgb_d3d PROC
	RS_BOTTOM_BGR2RGB FLIP_AND_STORE_RS,1
extbg_line_rs_trans_bgr2rgb_d3d ENDP


extbg_line_rs_trans_bgr2rgb_ogl PROC
	RS_BOTTOM_BGR2RGB FLIP_SHIFT_AND_STORE_RS,1
extbg_line_rs_trans_bgr2rgb_ogl ENDP


extbg_line_rs_ablend_bgr2rgb_d3d PROC
	ret
extbg_line_rs_ablend_bgr2rgb_d3d ENDP

extbg_line_rs_ablend_bgr2rgb_ogl PROC
	ret
extbg_line_rs_ablend_bgr2rgb_ogl ENDP


LINE_256 MACRO drawTrans
	LOCAL __draw,__done,__nowrap,__trans,__opaque,__nw_draw,__nw_trans,__nw_opaque,__blank
	
	pusha
	mov 	eax,[scanline]
	dec 	eax
	mov 	edi,[vscreen_curr]
	mov 	ecx,eax
	mov 	esi,[charBank] ;[VRAM_curr]
	shl 	eax,9

	mov 	bx,bgcnt
	shr 	ebx,8
	and 	ebx,31
	shl 	ebx,14
	add 	esi,ebx		; ESI = VRAM + BG_BMPBASE

	add 	edi,eax
	push 	ebp
	mov 	[lineaddr],edi
	
	mov 	ecx,[hScrShift]

	movd 	mm0,[x2]
	movd 	mm1,[deltax]
	movd 	mm2,[y2]
	movd 	mm3,[deltay]
	mov 	dword ptr [counter],0

	; Check the AreaOver bit
	test 	bgcnt,2000h
	jz 	__nowrap
	
	__draw:
		push 	esi
		movd 	eax,mm0
		sar 	eax,8
		and 	eax,[hTileMask]

		add 	esi,eax
		movd 	eax,mm2				; v
		sar 	eax,8				; shift out fractional bits
		and 	eax,[vTileMask]

		shl 	eax,cl
		add 	esi,eax
		mov 	al,[esi]
		and 	eax,255
		mov 	esi,[currPalette]
		IF drawTrans EQ 1
			jnz 	__opaque
				mov 	ax,[esi]
			jmp 	__trans
			__opaque:	
				mov 	ax,[esi+eax*2]
			__trans:
			mov 	[edi],ax
		ELSE
			jz 	__trans
				mov 	ax,[esi+eax*2]
				mov 	[edi],ax
			__trans:
		ENDIF
		paddd 	mm0,mm1
		paddd 	mm2,mm3
		add 	edi,2
		inc 	dword ptr [counter] ;ecx
		mov 	eax,[hTileMask]
		pop 	esi
		cmp 	dword ptr [counter],256 
		jb 	__draw
		jmp 	__done

	__nowrap:
	__nw_draw:
		push 	esi
		movd 	eax,mm0
		sar 	eax,8
		js 	__blank		; u+x<0 ?
		cmp 	eax,[hTileMask]
		jg 	__blank		; u+x>=240 ?

		add 	esi,eax
		movd 	eax,mm2		; v
		sar 	eax,8		; shift out fractional bits
		js 	__blank		; v+y<0 ?
		cmp 	eax,[vTileMask]
		jg 	__blank		; v+y>240*160

		shl 	eax,cl
		add 	esi,eax
		mov 	al,[esi]
		and 	eax,255
		mov 	esi,[currPalette]
		IF drawTrans EQ 1
			jnz 	__nw_opaque
				mov 	ax,[esi]
			jmp 	__nw_trans
			__nw_opaque:	
				mov 	ax,[esi+eax*2]
			__nw_trans:
			mov 	[edi],ax
		ELSE
			jz 	__nw_trans
				mov 	ax,[esi+eax*2]
				mov 	[edi],ax
			__nw_trans:
		ENDIF
		paddd 	mm0,mm1
		paddd 	mm2,mm3
		add 	edi,2
		inc 	dword ptr [counter] ;ecx
		mov 	eax,[hTileMask]
		pop 	esi
		cmp 	dword ptr [counter],256 
		jb 	__nw_draw
		jmp 	__done
	__blank:
		IF drawTrans EQ 1
			mov 	esi,[currPalette]
			mov 	ax,[esi]
		ENDIF
		paddd 	mm0,mm1
		paddd 	mm2,mm3
		mov 	[edi],ax
		add 	edi,2
		inc 	dword ptr [counter] ;ecx
		mov 	eax,[hTileMask]
		pop 	esi
		cmp 	dword ptr [counter],256 
		jb	__nw_draw
		jmp 	__done
	
	__done:
	pop 	ebp
	popa
	emms
	ret
ENDM


extbg_line_256_bottom PROC 
	LINE_256 1	
extbg_line_256_bottom ENDP



extbg_line_256_trans PROC
	LINE_256 0
extbg_line_256_trans ENDP


LINE_256_A MACRO
	LOCAL __draw,__done,__nowrap,__trans,__opaque,__nw_draw,__nw_trans,__nw_opaque,__blank
	
	pusha
	mov 	eax,[scanline]
	dec 	eax
	mov 	edi,[vscreen_curr]
	mov 	ecx,eax
	mov 	esi,[charBank] ;[VRAM_curr]
	shl 	eax,9
	add 	edi,eax
	push 	ebp

	mov 	eax,numBlends
	inc 	eax
	add 	eax,eax
	mul 	dword ptr [vscreen_size]
	add 	edi,eax
	
	mov 	ecx,[hScrShift]

	movd 	mm0,[x2]
	movd 	mm1,[deltax]
	movd 	mm2,[y2]
	movd 	mm3,[deltay]
	mov 	edx,OFFSET alphamap
	mov 	dword ptr [counter],0

	; Check the AreaOver bit
	test 	bgcnt,2000h
	jz 	__nowrap
	
	__draw:
		push 	esi
		movd 	eax,mm0
		sar 	eax,8
		and 	eax,[hTileMask]

		add 	esi,eax
		movd 	eax,mm2				; v
		sar 	eax,8				; shift out fractional bits
		and 	eax,[vTileMask]

		shl 	eax,cl
		add 	esi,eax
		mov 	al,[esi]
		and 	eax,255
		mov 	esi,[currPalette]
		jnz 	__opaque
			mov 	byte ptr [edx],0
		jmp 	__trans
		__opaque:	
			mov 	ax,[esi+eax*2]
			mov 	byte ptr [edx],0FFh
			mov 	[edi],ax
		__trans:
		inc 	edx	
		paddd 	mm0,mm1
		paddd 	mm2,mm3
		add 	edi,2
		inc 	dword ptr [counter] ;ecx
		mov 	eax,[hTileMask]
		pop 	esi
		cmp 	dword ptr [counter],256 
		jb 	__draw
		jmp 	__done

	__nowrap:
	__nw_draw:
		push 	esi
		movd 	eax,mm0
		sar 	eax,8
		js 	__blank		; u+x<0 ?
		cmp 	eax,[hTileMask]
		jg 	__blank		; u+x>=240 ?

		add 	esi,eax
		movd 	eax,mm2		; v
		sar 	eax,8		; shift out fractional bits
		js 	__blank		; v+y<0 ?
		cmp 	eax,[vTileMask]
		jg 	__blank		; v+y>240*160

		shl 	eax,cl
		add 	esi,eax
		mov 	al,[esi]
		and 	eax,255
		mov 	esi,[currPalette]
		jnz 	__nw_opaque
			mov 	byte ptr [edx],0
		jmp 	__nw_trans
		__nw_opaque:	
			mov 	ax,[esi+eax*2]
			mov 	byte ptr [edx],0FFh
			mov 	[edi],ax
		__nw_trans:
		inc 	edx
		paddd 	mm0,mm1
		paddd 	mm2,mm3
		add 	edi,2
		inc 	dword ptr [counter] ;ecx
		mov 	eax,[hTileMask]
		pop 	esi
		cmp 	dword ptr [counter],256 
		jb 	__nw_draw
		jmp 	__done
	__blank:
		mov 	byte ptr [edx],0
		paddd 	mm0,mm1
		paddd 	mm2,mm3
		inc 	edx
		add 	edi,2
		inc 	dword ptr [counter] ;ecx
		mov 	eax,[hTileMask]
		pop 	esi
		cmp 	dword ptr [counter],256 
		jb 	__nw_draw
		jmp 	__done
	
	__done:
	pop 	ebp
	popa
	emms
	ret
ENDM

extbg_line_256_ablend PROC
	LINE_256_A
extbg_line_256_ablend ENDP



extbg_line_dc_bottom PROC
	pusha
	mov 	eax,[scanline]
	dec 	eax
	mov 	edi,[vscreen_curr]
	mov 	ecx,eax
	mov 	esi,[charBank] ;[VRAM_curr]
	mov 	bx,bgcnt
	shr 	ebx,8
	and 	ebx,31
	shl 	ebx,14
	add 	esi,ebx				; ESI = VRAM + BG_BMPBASE
	shl 	eax,9
	add 	edi,eax				; EDI = vscreen + (scanline-1)*512
	mov 	[lineaddr],edi

	; Check the AreaOver bit
	test 	bgcnt,2000h
	jz 	@@ldcb_nowrap
	push 	ebp

	mov 	ecx,[hScrShift]
	inc 	ecx

	; Load the rotation/scaling parameters
	mov 	eax,y2
	mov 	ebp,eax				
	mov 	eax,[deltay]
	mov 	ebx,eax				
	mov 	edx,x2				
	mov 	dword ptr [counter],0
			
	@@ldcb_draw:
		push 	esi			; Save ESI
		mov 	eax,edx
		sar 	eax,8
		and 	eax,[hTileMask]

		lea 	esi,[esi+eax*2]
		mov 	eax,ebp			; v
		sar 	eax,8			; shift out fractional bits
		and 	eax,[vTileMask]

		shl 	eax,cl			; Multiply by number of bytes per line
		add 	esi,eax			; Add to source address
		mov 	ax,[esi]			; Load pixel
		test 	eax,08000h			; Check alpha bit
		jnz 	@@ldcb_opaque
			mov 	esi,[currPalette]
			mov 	ax,[esi]		; Use color 0
		jmp 	@@ldcb_trans
		@@ldcb_opaque:	
			and 	eax,07FFFh		; Keep 15-bit direct color value
		@@ldcb_trans:
		add 	edx,[deltax]			; x += dx
		mov 	[edi],ax			; Store pixel
		add 	ebp,ebx				; y += dy	
		add 	edi,2
		inc 	dword ptr [counter] 
		pop 	esi				; Restore ESI
		cmp 	dword ptr [counter],256 
		jb 	@@ldcb_draw
		jmp 	@@ldcb_done

	@@ldcb_nowrap:
		push 	ebp
		
		mov 	ecx,[hScrShift]
		inc 	ecx

		mov 	eax,y2
		mov 	ebp,eax				
		mov 	eax,[deltay]
		mov 	ebx,eax				
		mov 	edx,x2				
		mov 	dword ptr [counter],0
			
	@@ldcb_nowrap_draw:
		push	esi
		mov 	eax,edx
		sar 	eax,8
		js 	@@ldcb_blank			; u+x<0 ?
		cmp 	eax,[hTileMask]
		jg 	@@ldcb_blank			; u+x>line width ?

		lea 	esi,[esi+eax*2]
		mov 	eax,ebp				; v
		sar 	eax,8				; shift out fractional bits
		js 	@@ldcb_blank			; v+y<0 ?
		cmp 	eax,[vTileMask]
		jg 	@@ldcb_blank			; v+y>screen height ?

		shl 	eax,cl
		add 	esi,eax
		mov 	ax,[esi]
		test	eax,08000h
		jnz 	@@ldcb_nw_opaque
			mov 	esi,[currPalette]
			mov 	ax,[esi]
		jmp 	@@ldcb_nw_trans
		@@ldcb_nw_opaque:	
			and 	eax,07FFFh
		@@ldcb_nw_trans:
		add 	edx,[deltax]
		mov 	[edi],ax
		add 	ebp,ebx
		add 	edi,2
		inc 	dword ptr [counter] 
		pop 	esi
		cmp 	dword ptr [counter],256 
		jb 	@@ldcb_nowrap_draw
		jmp 	@@ldcb_done

	; Area is outside the screen, display color 0
	@@ldcb_blank:
		mov 	esi,[currPalette]
		mov 	ax,[esi]
		mov 	[edi],ax
		add 	edx,[deltax]
		add 	ebp,ebx
		add 	edi,2
		inc 	dword ptr [counter] 
		pop 	esi
		cmp 	dword ptr [counter],256 
		jb 	@@ldcb_nowrap_draw

	@@ldcb_done:
		pop 	ebp
		popa
		ret
extbg_line_dc_bottom ENDP



extbg_line_dc_trans PROC
	pusha
	mov 	eax,[scanline]
	dec 	eax
	mov 	edi,[vscreen_curr]
	mov 	ecx,eax
	mov 	esi,[charBank] 
	mov 	bx,bgcnt
	shr 	ebx,8
	and 	ebx,31
	shl 	ebx,14
	add 	esi,ebx		; ESI = VRAM + BG_BMPBASE
	shl 	eax,9
	add 	edi,eax		; EDI = vscreen + (scanline-1)*512

	mov 	[lineaddr],edi
	
	; Check the AreaOver bit
	test 	bgcnt,2000h
	jz 	@@ldct_nowrap
	push 	ebp

	mov 	ecx,[hScrShift]
	inc 	ecx

	; Load the rotation/scaling parameters
	mov 	eax,y2
	mov 	ebp,eax				
	mov 	eax,[deltay]
	mov 	ebx,eax				
	mov 	edx,x2				
	mov 	dword ptr [counter],0
			
	@@ldct_draw:
		push 	esi			; Save ESI
		mov 	eax,edx
		sar 	eax,8
		and 	eax,[hTileMask]

		lea 	esi,[esi+eax*2]
		mov 	eax,ebp			; v
		sar 	eax,8			; shift out fractional bits
		and 	eax,[vTileMask]

		shl 	eax,cl			; Multiply by number of bytes per line
		add 	esi,eax			; Add to source address
		mov 	ax,[esi]		; Load pixel
		test 	eax,08000h		; Check alpha bit
		jz 	@@ldct_trans
			and 	eax,07FFFh	; Keep 15-bit direct color value
			mov 	[edi],ax	; Store pixel
		@@ldct_trans:
		add 	edx,[deltax]		; x += dx
		add 	ebp,ebx			; y += dy	
		add 	edi,2
		inc 	dword ptr [counter] 
		pop 	esi			; Restore ESI
		cmp 	dword ptr [counter],256 
		jb 	@@ldct_draw
		jmp 	@@ldct_done

	@@ldct_nowrap:
		push 	ebp
		
		mov 	ecx,[hScrShift]
		inc 	ecx

		mov 	eax,y2
		mov 	ebp,eax				
		mov 	eax,[deltay]
		mov 	ebx,eax				
		mov 	edx,x2				
		mov 	dword ptr [counter],0
			
	@@ldct_nowrap_draw:
		push 	esi
		mov 	eax,edx
		sar 	eax,8
		js 	@@ldct_blank			; u+x<0 ?
		cmp 	eax,[hTileMask]
		jg 	@@ldct_blank			; u+x>line width ?

		lea 	esi,[esi+eax*2]
		mov 	eax,ebp				; v
		sar 	eax,8				; shift out fractional bits
		js 	@@ldct_blank			; v+y<0 ?
		cmp 	eax,[vTileMask]
		jg 	@@ldct_blank			; v+y>screen height ?

		shl 	eax,cl
		add 	esi,eax
		mov 	ax,[esi]
		test 	eax,08000h
		jz 	@@ldct_nw_trans
			and 	eax,07FFFh
			mov 	[edi],ax
		@@ldct_nw_trans:
		add 	edx,[deltax]
		add 	ebp,ebx
		add 	edi,2
		inc 	dword ptr [counter] 
		pop 	esi
		cmp 	dword ptr [counter],256 
		jb 	@@ldct_nowrap_draw
		jmp 	@@ldct_done

	; Area is outside the screen, display color 0
	@@ldct_blank:
		add 	edx,[deltax]
		add 	ebp,ebx
		add 	edi,2
		inc 	dword ptr [counter] 
		pop 	esi
		cmp 	dword ptr [counter],256 
		jb 	@@ldct_nowrap_draw

	@@ldct_done:
		pop 	ebp
		popa
		ret
extbg_line_dc_trans ENDP



extbg_line_dc_ablend PROC
	pusha
	mov 	eax,[scanline]
	dec 	eax
	mov 	edi,[vscreen_curr]
	mov 	ecx,eax
	mov 	esi,[charBank] ;[VRAM_curr]
	mov 	bx,bgcnt
	shr 	ebx,8
	and 	ebx,31
	shl 	ebx,14
	add 	esi,ebx		; ESI = VRAM + BG_BMPBASE
	shl 	eax,9
	add 	edi,eax		; EDI = vscreen + (scanline-1)*512

	mov 	eax,numBlends
	inc 	eax
	add 	eax,eax
	mul 	dword ptr [vscreen_size]
	add 	edi,eax

	mov 	p_amap,OFFSET alphamap
	
	; Check the AreaOver bit
	test 	bgcnt,2000h
	jz 	@@ldca_nowrap
	push 	ebp

	mov 	ecx,[hScrShift]
	inc 	ecx

	; Load the rotation/scaling parameters
	mov 	eax,y2
	mov 	ebp,eax				
	mov 	eax,[deltay]
	mov 	ebx,eax				
	mov 	edx,x2				
	mov 	dword ptr [counter],0
			
	@@ldca_draw:
		push 	esi			; Save ESI
		mov 	eax,edx
		sar 	eax,8
		and 	eax,[hTileMask]

		lea 	esi,[esi+eax*2]
		mov 	eax,ebp			; v
		sar 	eax,8			; shift out fractional bits
		and 	eax,[vTileMask]

		shl 	eax,cl			; Multiply by number of bytes per line
		add 	esi,eax			; Add to source address
		mov 	ax,[esi]			; Load pixel
		test	 eax,08000h			; Check alpha bit
		jnz 	@@ldca_opaque
			mov 	esi,p_amap
			mov 	byte ptr [esi],0
		jmp 	@@ldca_trans
		@@ldca_opaque:	
			mov 	esi,p_amap
			and 	eax,07FFFh		; Keep 15-bit direct color value
			mov 	byte ptr [esi],0FFh
			mov 	[edi],ax		; Store pixel
		@@ldca_trans:
		add 	edx,[deltax]			; x += dx
		add 	ebp,ebx				; y += dy	
		inc 	dword ptr [p_amap]
		add 	edi,2
		inc 	dword ptr [counter] 
		pop 	esi				; Restore ESI
		cmp 	dword ptr [counter],256 
		jb 	@@ldca_draw
		jmp 	@@ldca_done

	@@ldca_nowrap:
		push 	ebp
		
		mov 	ecx,[hScrShift]
		inc 	ecx

		mov 	eax,y2
		mov 	ebp,eax				
		mov 	eax,[deltay]
		mov 	ebx,eax				
		mov 	edx,x2				
		mov 	dword ptr [counter],0
			
	@@ldca_nowrap_draw:
		push 	esi
		mov 	eax,edx
		sar 	eax,8
		js 	@@ldca_blank			; u+x<0 ?
		cmp 	eax,[hTileMask]
		jg 	@@ldca_blank			; u+x>line width ?

		lea 	esi,[esi+eax*2]
		mov 	eax,ebp				; v
		sar 	eax,8				; shift out fractional bits
		js 	@@ldca_blank			; v+y<0 ?
		cmp 	eax,[vTileMask]
		jg 	@@ldca_blank			; v+y>screen height ?

		shl 	eax,cl
		add 	esi,eax
		mov 	ax,[esi]
		test 	eax,08000h
		jnz 	@@ldca_nw_opaque
			mov 	esi,p_amap
			mov 	byte ptr [esi],0
		jmp 	@@ldca_nw_trans
		@@ldca_nw_opaque:	
			mov 	esi,p_amap
			and 	eax,07FFFh
			mov 	byte ptr [esi],0FFh
			mov 	[edi],ax
		@@ldca_nw_trans:
		add 	edx,[deltax]
		add 	ebp,ebx
		inc 	dword ptr [p_amap]
		add 	edi,2
		inc 	dword ptr [counter] 
		pop 	esi
		cmp 	dword ptr [counter],256 
		jb 	@@ldca_nowrap_draw
		jmp 	@@ldca_done

	; Area is outside the screen, display color 0
	@@ldca_blank:
		mov 	esi,p_amap
		mov 	byte ptr [esi],0
		add 	edx,[deltax]
		add 	ebp,ebx
		inc 	dword ptr [p_amap]
		add 	edi,2
		inc 	dword ptr [counter] 
		pop 	esi
		cmp 	dword ptr [counter],256 
		jb 	@@ldca_nowrap_draw

	@@ldca_done:
		pop 	ebp
		popa
		ret
extbg_line_dc_ablend ENDP



FLIP_DC MACRO
	mov 	ebx,eax
	mov 	ebp,eax
	shl 	ebx,10
	shr 	eax,10
	and 	ebp,03E0h
	and 	ebx,7C00h
	and 	eax,001Fh
	or 	ebp,ebx
	or 	eax,ebp
ENDM


FLIP_SHIFT_DC MACRO
	mov 	ebx,eax
	mov 	ebp,eax
	shl 	ebx,11
	shr 	eax,9
	and 	ebp,003E0h
	and 	ebx,0F800h
	shl 	ebp,1
	and 	eax,0003Eh
	or 	ebp,ebx
	or 	eax,ebp
ENDM


FLIP_AND_STORE_DC MACRO
	mov 	ebx,eax
	mov 	ebp,eax
	shl 	ebx,10
	shr 	eax,10
	and 	ebp,03E0h
	and 	ebx,7C00h
	and 	eax,001Fh
	or 	ebp,ebx
	or 	eax,ebp
	mov 	[edi],ax	
ENDM


FLIP_SHIFT_AND_STORE_DC MACRO
	mov 	ebx,eax
	mov 	ebp,eax
	shl 	ebx,11
	shr 	eax,9
	and 	ebp,003E0h
	and 	ebx,0F800h
	shl 	ebp,1
	and 	eax,0003Eh
	or 	ebp,ebx
	or 	eax,ebp
	mov 	[edi],ax	
ENDM


FLIP_AND_STORE_DC_A MACRO
	mov 	ebx,eax
	mov 	ebp,eax
	shl 	ebx,10
	shr 	eax,10
	and 	ebp,03E0h
	and 	ebx,7C00h
	and 	eax,001Fh
	or 	ebp,ebx
	or 	eax,ebp
	mov 	byte ptr [edx],0FFh
	mov 	[edi],ax	
ENDM


FLIP_SHIFT_AND_STORE_DC_A MACRO
	mov 	ebx,eax
	mov 	ebp,eax
	shl 	ebx,11
	shr 	eax,9
	and 	ebp,003E0h
	and 	ebx,0F800h
	shl 	ebp,1
	and 	eax,0003Eh
	or 	ebp,ebx
	or 	eax,ebp
	mov 	byte ptr [edx],0FFh
	mov 	[edi],ax	
ENDM


DC_BOTTOM_BGR2RGB MACRO flipOp
	LOCAL __nowrap,__draw,__opaque,__trans,__done,__nowrap_draw
	LOCAL __blank,__nw_opaque,__nw_trans
	
	pusha
	mov 	eax,[scanline]
	dec 	eax
	mov 	edi,[vscreen_curr]
	mov 	ecx,eax
	mov 	esi,[charBank] 
	mov 	bx,bgcnt
	shr 	ebx,8
	and 	ebx,31
	shl 	ebx,14
	add 	esi,ebx		; ESI = VRAM + BG_BMPBASE
	shl 	eax,9
	add 	edi,eax		; EDI = vscreen + (scanline-1)*512

	mov 	eax,[currPalette]
	movzx 	eax,word ptr [eax]
	flipOp
	mov 	color0,eax
	
	; Check the AreaOver bit
	test 	bgcnt,2000h
	jz 	__nowrap
	push 	ebp

	mov 	ecx,[hScrShift]
	inc 	ecx

	; Load the rotation/scaling parameters
	movd 	mm0,[x2]
	movd 	mm1,[deltax]
	movd 	mm2,[y2]
	movd 	mm3,[deltay]
	mov 	edx,0
	
	__draw:
		push 	esi			; Save ESI
		movd 	eax,mm0
		sar 	eax,8
		and 	eax,[hTileMask]

		lea 	esi,[esi+eax*2]
		movd 	eax,mm2			; v
		sar 	eax,8			; shift out fractional bits
		and 	eax,[vTileMask]

		shl 	eax,cl			; Multiply by number of bytes per line
		add 	esi,eax			; Add to source address
		mov 	ax,[esi]		; Load pixel
		test 	eax,08000h		; Check alpha bit
		jnz 	__opaque
			mov 	ax,word ptr color0
			mov 	[edi],ax
			jmp 	__trans
		__opaque:	
			flipOp
			mov 	[edi],ax
		__trans:
		paddd 	mm0,mm1			; x += dx
		paddd 	mm2,mm3			; y += dy	
		inc 	edx
		add 	edi,2
		pop 	esi			; Restore ESI
		cmp 	edx,256
		jb 	__draw
		jmp 	__done

	; Don't use area wrapping
	__nowrap:
		push 	ebp
		
		mov 	ecx,[hScrShift]
		inc 	ecx

		; Load the rotation/scaling parameters
		movd 	mm0,[x2]
		movd 	mm1,[deltax]
		movd 	mm2,[y2]
		movd 	mm3,[deltay]
		mov 	edx,0 ;p_amap		
			
	__nowrap_draw:
		push 	esi
		movd 	eax,mm0
		sar 	eax,8
		js 	__blank			; u+x<0 ?
		cmp 	eax,[hTileMask]
		jg 	__blank			; u+x>line width ?

		lea 	esi,[esi+eax*2]
		movd 	eax,mm2			; v
		sar 	eax,8			; shift out fractional bits
		js 	__blank			; v+y<0 ?
		cmp 	eax,[vTileMask]
		jg 	__blank			; v+y>screen height ?

		shl 	eax,cl
		add 	esi,eax
		mov 	ax,[esi]
		test 	eax,08000h
		jnz 	__nw_opaque
			mov 	ax,word ptr color0
			mov 	[edi],ax
			jmp 	__nw_trans
		__nw_opaque:	
			flipOp
			mov 	[edi],ax
		__nw_trans:
		paddd 	mm0,mm1			; x += dx
		paddd 	mm2,mm3			; y += dy	
		inc 	edx
		add 	edi,2
		pop 	esi
		cmp 	edx,256 ;dword ptr [counter],256 
		jb 	__nowrap_draw
		jmp 	__done

	; Area is outside the screen, display color 0
	__blank:
		mov 	ax,word ptr color0
		mov 	[edi],ax
		paddd 	mm0,mm1			; x += dx
		paddd 	mm2,mm3			; y += dy	
		inc 	edx
		add 	edi,2
		pop 	esi
		cmp 	edx,256 ;dword ptr [counter],256 
		jb 	__nowrap_draw

	__done:
		pop 	ebp
		popa
		emms
		ret
ENDM


DC_TRANS_BGR2RGB MACRO flipOp
	LOCAL __nowrap,__draw,__opaque,__trans,__done,__nowrap_draw
	LOCAL __blank,__nw_opaque,__nw_trans
	
	pusha
	mov 	eax,[scanline]
	dec 	eax
	mov 	edi,[vscreen_curr]
	mov 	ecx,eax
	mov 	esi,[charBank] ;[VRAM_curr]
	mov 	bx,bgcnt
	shr 	ebx,8
	and 	ebx,31
	shl 	ebx,14
	add 	esi,ebx		; ESI = VRAM + BG_BMPBASE
	shl 	eax,9
	add 	edi,eax		; EDI = vscreen + (scanline-1)*512

	; Check the AreaOver bit
	test 	bgcnt,2000h
	jz 	__nowrap
	push 	ebp

	mov 	ecx,[hScrShift]
	inc 	ecx

	; Load the rotation/scaling parameters
	movd 	mm0,[x2]
	movd 	mm1,[deltax]
	movd 	mm2,[y2]
	movd 	mm3,[deltay]
	;mov 	edx,p_amap
	;mov 	dword ptr [counter],0
	mov 	edx,0
	
	__draw:
		push 	esi			; Save ESI
		movd 	eax,mm0
		sar 	eax,8
		and 	eax,[hTileMask]

		lea 	esi,[esi+eax*2]
		movd 	eax,mm2			; v
		sar 	eax,8			; shift out fractional bits
		and 	eax,[vTileMask]

		shl 	eax,cl			; Multiply by number of bytes per line
		add 	esi,eax			; Add to source address
		mov 	ax,[esi]		; Load pixel
		test 	eax,08000h		; Check alpha bit
		jz 	__trans
		__opaque:	
			flipOp
		__trans:
		paddd 	mm0,mm1			; x += dx
		paddd 	mm2,mm3			; y += dy	
		inc 	edx
		add 	edi,2
		;inc 	dword ptr [counter] 
		pop 	esi			; Restore ESI
		;cmp 	dword ptr [counter],256 
		cmp 	edx,256
		jb 	__draw
		jmp 	__done

	; Don't use area wrapping
	__nowrap:
		push 	ebp
		
		mov 	ecx,[hScrShift]
		inc 	ecx

		; Load the rotation/scaling parameters
		movd 	mm0,[x2]
		movd 	mm1,[deltax]
		movd 	mm2,[y2]
		movd 	mm3,[deltay]
		mov 	edx,0 ;p_amap		
		;mov 	dword ptr [counter],0
			
	__nowrap_draw:
		push 	esi
		movd 	eax,mm0
		sar 	eax,8
		js 	__blank			; u+x<0 ?
		cmp 	eax,[hTileMask]
		jg 	__blank			; u+x>line width ?

		lea 	esi,[esi+eax*2]
		movd 	eax,mm2			; v
		sar 	eax,8			; shift out fractional bits
		js 	__blank			; v+y<0 ?
		cmp 	eax,[vTileMask]
		jg 	__blank			; v+y>screen height ?

		shl 	eax,cl
		add 	esi,eax
		mov 	ax,[esi]
		test 	eax,08000h
		jz 	__nw_trans
		__nw_opaque:	
			flipOp
		__nw_trans:
		paddd 	mm0,mm1			; x += dx
		paddd 	mm2,mm3			; y += dy	
		inc 	edx
		add 	edi,2
		pop 	esi
		cmp 	edx,256  
		jb 	__nowrap_draw
		jmp 	__done

	; Area is outside the screen, display color 0
	__blank:
		paddd 	mm0,mm1			; x += dx
		paddd 	mm2,mm3			; y += dy	
		inc 	edx
		add 	edi,2
		pop 	esi
		cmp 	edx,256  
		jb 	__nowrap_draw

	__done:
		pop 	ebp
		popa
		emms
		ret
ENDM



extbg_line_dc_bottom_bgr2rgb_ogl PROC
	DC_BOTTOM_BGR2RGB FLIP_SHIFT_DC
extbg_line_dc_bottom_bgr2rgb_ogl ENDP


extbg_line_dc_bottom_bgr2rgb_d3d PROC
	DC_BOTTOM_BGR2RGB FLIP_DC
extbg_line_dc_bottom_bgr2rgb_d3d ENDP


extbg_line_dc_trans_bgr2rgb_ogl PROC
	DC_TRANS_BGR2RGB FLIP_SHIFT_AND_STORE_DC
extbg_line_dc_trans_bgr2rgb_ogl ENDP


extbg_line_dc_trans_bgr2rgb_d3d PROC
	DC_TRANS_BGR2RGB FLIP_AND_STORE_DC
extbg_line_dc_trans_bgr2rgb_d3d ENDP



DC_ABLEND_BGR2RGB MACRO flipOp
	LOCAL __nowrap,__draw,__opaque,__trans,__done,__nowrap_draw
	LOCAL __blank,__nw_opaque,__nw_trans
	
	pusha
	mov 	eax,[scanline]
	dec 	eax
	mov 	edi,[vscreen_curr]
	mov 	ecx,eax
	mov 	esi,[charBank] ;[VRAM_curr]
	mov 	bx,bgcnt
	shr 	ebx,8
	and 	ebx,31
	shl 	ebx,14
	add 	esi,ebx		; ESI = VRAM + BG_BMPBASE
	shl 	eax,9
	add 	edi,eax		; EDI = vscreen + (scanline-1)*512

	mov 	eax,numBlends
	inc 	eax
	add 	eax,eax
	mul 	dword ptr [vscreen_size]
	add 	edi,eax

	mov 	p_amap,OFFSET alphamap
	
	; Check the AreaOver bit
	test 	dword ptr [bgcnt],2000h
	jz 	__nowrap
	push 	ebp

	mov 	ecx,[hScrShift]
	inc 	ecx

	; Load the rotation/scaling parameters
	movd 	mm0,[x2]

	movd 	mm1,[deltax]
	movd 	mm2,[y2]
	movd 	mm3,[deltay]
	mov 	edx,p_amap
	mov 	dword ptr [counter],0
			
	__draw:
		push 	esi			; Save ESI
		movd 	eax,mm0
		sar 	eax,8
		and 	eax,[hTileMask]

		lea 	esi,[esi+eax*2]
		movd 	eax,mm2			; v
		sar 	eax,8			; shift out fractional bits
		and 	eax,[vTileMask]

		shl 	eax,cl			; Multiply by number of bytes per line
		add 	esi,eax			; Add to source address
		mov 	ax,[esi]		; Load pixel
		test 	eax,08000h		; Check alpha bit
		jnz 	__opaque
			mov 	byte ptr [edx],0
		jmp 	__trans
		__opaque:	
			flipOp
		__trans:
		paddd 	mm0,mm1			; x += dx
		paddd 	mm2,mm3			; y += dy	
		inc 	edx
		add 	edi,2
		inc 	dword ptr [counter] 
		pop 	esi			; Restore ESI
		cmp 	dword ptr [counter],256 
		jb 	__draw
		jmp 	__done

	; Don't use area wrapping
	__nowrap:
		push 	ebp
		
		mov 	ecx,[hScrShift]
		inc 	ecx

		; Load the rotation/scaling parameters
		movd 	mm0,[x2]
		movd 	mm1,[deltax]
		movd 	mm2,[y2]
		movd 	mm3,[deltay]
		mov 	edx,p_amap		
		mov 	dword ptr [counter],0
			
	__nowrap_draw:
		push 	esi
		movd 	eax,mm0
		sar 	eax,8
		js 	__blank			; u+x<0 ?
		cmp 	eax,[hTileMask]
		jg 	__blank			; u+x>line width ?

		lea 	esi,[esi+eax*2]
		movd 	eax,mm2			; v
		sar 	eax,8			; shift out fractional bits
		js 	__blank			; v+y<0 ?
		cmp 	eax,[vTileMask]
		jg 	__blank			; v+y>screen height ?

		shl 	eax,cl
		add 	esi,eax
		mov 	ax,[esi]
		test 	eax,08000h
		jnz 	__nw_opaque
			mov 	byte ptr [edx],0
		jmp 	__nw_trans
		__nw_opaque:	
			flipOp
		__nw_trans:
		paddd 	mm0,mm1			; x += dx
		paddd 	mm2,mm3			; y += dy	
		inc 	edx
		add 	edi,2
		inc 	dword ptr [counter] 
		pop 	esi
		cmp 	dword ptr [counter],256 
		jb 	__nowrap_draw
		jmp 	__done

	; Area is outside the screen, display color 0
	__blank:
		mov 	byte ptr [edx],0
		paddd 	mm0,mm1			; x += dx
		paddd 	mm2,mm3			; y += dy	
		inc 	edx
		add 	edi,2
		inc 	dword ptr [counter] 
		pop 	esi
		cmp 	dword ptr [counter],256 
		jb 	__nowrap_draw

	__done:
		pop 	ebp
		popa
		emms
		ret
ENDM


extbg_line_dc_ablend_bgr2rgb_ogl PROC
	DC_ABLEND_BGR2RGB FLIP_SHIFT_AND_STORE_DC_A
extbg_line_dc_ablend_bgr2rgb_ogl ENDP


extbg_line_dc_ablend_bgr2rgb_d3d PROC
	DC_ABLEND_BGR2RGB FLIP_AND_STORE_DC_A
extbg_line_dc_ablend_bgr2rgb_d3d ENDP



mode6_line PROC
	pusha
	mov 	eax,[scanline]
	dec 	eax
	mov 	edi,[vscreen_curr]
	mov 	ecx,eax
	mov 	esi,[VRAM]
	shl 	eax,9
	add 	edi,eax				; EDI = vscreen + (scanline-1)*512
	movzx 	ecx,word ptr bgcnt 
	push 	ebp

	shr 	ecx,14
	and 	ecx,1
	mov 	edx,512
	mov 	ebp,512
	shl 	edx,cl
	xor 	ecx,1
	shl 	ebp,cl
	dec 	edx
	dec 	ebp
	xor 	ecx,1
	add 	ecx,9
	mov 	[hScrMask],edx
	mov 	[vScrMask],ebp

	mov 	eax,y2
	mov 	ebp,eax
	mov 	eax,deltay
	mov 	ebx,eax				
	mov 	edx,x2			
	mov 	dword ptr [counter],0

	; Check the AreaOver bit
	test 	bgcnt,2000h
	jz 	@@m6_nowrap
	
	@@m6_draw:
		push 	esi
		mov 	eax,edx
		sar 	eax,8
		and 	eax,[hScrMask]

		add 	esi,eax
		mov 	eax,ebp			; v
		sar 	eax,8			; shift out fractional bits
		and 	eax,[vScrMask]

		shl 	eax,cl			; Multiply by number of bytes per line
		add 	esi,eax
		mov 	al,[esi]
		and 	eax,255
		mov 	esi,[currPalette]
		mov 	ax,[esi+eax*2]
		mov 	[edi],ax
		add 	edx,[deltax]
		add 	ebp,ebx
		add 	edi,2
		inc 	dword ptr [counter]
		pop 	esi
		cmp 	dword ptr [counter],256
		jne 	@@m6_draw
		jmp 	@@m6_done

	@@m6_nowrap:
	@@m6_nw_draw:
		push 	esi
		mov 	eax,edx
		sar 	eax,8
		js 	@@m6_blank		; u+x<0 ?
		cmp 	eax,[hScrMask]
		jg 	@@m6_blank		; u+x>line width ?

		add 	esi,eax
		mov 	eax,ebp			; v
		sar 	eax,8			; shift out fractional bits
		js 	@@m6_blank		; v+y<0 ?
		cmp 	eax,[vScrMask]
		jg 	@@m6_blank		; v+y>screen height ?

		shl 	eax,cl
		add 	esi,eax
		mov 	al,[esi]
		and 	eax,255
		mov 	esi,[currPalette]
		mov 	ax,[esi+eax*2]
		mov 	[edi],ax
		add 	edx,[deltax]
		add 	ebp,ebx
		add 	edi,2
		inc 	dword ptr [counter]
		pop 	esi
		cmp 	dword ptr [counter],256
		jne 	@@m6_nw_draw
		jmp 	@@m6_done	
	@@m6_blank:
		mov 	esi,[currPalette]
		mov 	ax,[esi]
		mov 	[edi],ax
		add 	edx,[deltax]
		add 	ebp,ebx
		add 	edi,2
		inc 	dword ptr [counter]
		pop 	esi
		cmp 	dword ptr [counter],256
		jne 	@@m6_nw_draw

	@@m6_done:
		pop 	ebp
		popa
		ret
mode6_line ENDP



OBJ_BGR2RGB_OGL MACRO reg1,reg2
	mov 	reg1,eax
	mov 	reg2,eax
	and 	eax,0001Fh
	and 	reg2,003E0h
	and 	reg1,07C00h
	shl 	eax,11
	shl 	reg2,1
	shr 	reg1,9
	or 	eax,reg2
	or 	eax,reg1
ENDM

OBJ_BGR2RGB_D3D MACRO reg1,reg2
	mov 	reg1,eax
	mov 	reg2,eax
	and 	eax,0001Fh
	and 	reg2,003E0h
	and 	reg1,07C00h
	shl 	eax,10
	shr 	reg1,10
	or 	eax,reg2
	or 	eax,reg1
ENDM


OBJ_BGR2RGB_NULL MACRO reg1,reg2
ENDM


OBJ_EXPLTT MACRO flipOp
	LOCAL __odd,__odd_inner,__odd_pixel_invisible,__no_odd,__hloop,__hloop_inner,__pixel_invisible
	
	and 	ecx,7
	jz 	__no_odd

	__odd:
		__odd_inner:
			xor 	eax,eax
			mov 	al,[esi]
			or 	eax,eax
			jz 	__odd_pixel_invisible
				mov 	ax,[ebx+eax*2]
				flipOp 	edx,ebp
				mov 	[edi],ax
			__odd_pixel_invisible:
			inc 	esi
			add 	edi,2
			dec 	ecx
			jnz 	__odd_inner
		pop 	esi
		mov 	ecx,[hofs]
		add 	esi,ecx
		shr 	ecx,3
		inc 	ecx
		add 	esi,ecx
		and 	[clippedWidth],0FFFFFFF8h
		jmp 	__hloop

	__no_odd:
	pop 	esi
	__hloop:
		mov 	ecx,[clippedWidth]
		cmp 	ecx,8
		jl 	@@sprite_p_done
		mov 	ecx,8
		__hloop_inner:
			xor 	eax,eax
			mov 	al,[esi]
			or 	eax,eax
			jz 	__pixel_invisible
				mov 	ax,[ebx+eax*2]
				flipOp 	edx,ebp
				mov 	[edi],ax
			__pixel_invisible:
			inc 	esi
			add 	edi,2
			dec 	ecx
			jnz 	__hloop_inner
		add 	esi,[hofs]
		sub 	[clippedWidth],8
		jmp 	__hloop
ENDM


OBJ_EXPLTT_HFLIP MACRO flipOp
	LOCAL __odd,__odd_inner,__odd_pixel_invisible,__no_odd,__hloop,__hloop_inner,__pixel_invisible

	and 	ecx,7
	jz 	__no_odd
	
	__odd:
		__odd_inner:
			dec 	esi
			xor 	eax,eax
			mov 	al,[esi]
			or 	eax,eax
			jz 	__odd_pixel_invisible
				mov 	ax,[ebx+eax*2]
				flipOp 	edx,ebp
				mov 	[edi],ax
			__odd_pixel_invisible:
			add 	edi,2
			dec 	ecx
			jnz 	__odd_inner
			pop 	esi
			mov 	ecx,[hofs]
			sub 	esi,ecx
			shr 	ecx,3
			inc 	ecx
			sub 	esi,ecx
			and 	[clippedWidth],0FFFFFFF8h
			jmp 	__hloop

	__no_odd:
	pop 	esi
	__hloop:
		mov 	ecx,[clippedWidth]
		cmp 	ecx,8
		jl 	@@sprite_p_done
		mov 	ecx,8
		__hloop_inner:
			dec 	esi
			xor 	eax,eax
			mov 	al,[esi]
			or 	eax,eax
			jz 	__pixel_invisible
				mov 	ax,[ebx+eax*2]
				flipOp 	edx,ebp
				mov 	[edi],ax
			__pixel_invisible:
			add 	edi,2
			dec 	ecx
			jnz 	__hloop_inner
		sub 	esi,[hofs]
		sub 	[clippedWidth],8
		jmp 	__hloop
ENDM



obj_line PROC
	pusha
	mov 	eax,[scanline]
	shl 	eax,9
	add 	eax,[vscreen_curr]
	mov 	edi,eax				; EDI = vscreen + scanline*512

	mov 	ecx,0
	mov 	dword ptr [spriteCnt],127		; max # of sprites on this scanline

	@@sprite_p_loop:
		mov 	dword ptr [xclip],0
		mov 	ecx,[spriteCnt]
		mov 	edx,[oam_base]
			
		; Check for right priority
		mov 	ax,[4+edx+ecx*8]
		shr 	eax,10
		and 	eax,3
		cmp 	eax,[prio_int]
		jne 	@@next_sprite_p

		mov 	eax,[edx+ecx*8]			; Get attribute 0 and 1
		mov 	esi,eax
		mov 	edx,eax
		and 	esi,0C0000000h			; ss00 0000 0000 0000 0000 0000 0000 0000
		and 	edx,00000C000h			; SS00 0000 0000 0000
		shr 	esi,30				; 00ss
		shr 	edx,12				; SS00
		or 	esi,edx				; esi = SSss  (s=size, S=Shape)
		mov 	edx,[objheight+esi*4]
		push	ecx
		movd 	mm6,edx
		mov 	ecx,eax
		and 	ecx,0200h			; Double size flag
		shr 	ecx,9
		shl 	edx,cl
		dec 	edx

		movd 	mm0,eax
		and 	eax,0FFh			; EAX = sprite.y
		lea 	ebx,[eax+edx] 			; EBX = sprite.y+(height-1)
		and 	ebx,0FFh
		movd 	mm7,ecx
		inc 	edx
		movd 	mm4,edx
		shr 	edx,cl
		pop 	ecx
		dec 	edx
		cmp 	ebx,dword ptr [scanline] 	; is the entire sprite above this scanline?
		jl 	@@next_sprite_p
		cmp 	eax,dword ptr [scanline] 	; is the entire sprite below this scanline?
		jle 	@@sprite_p_y_ok
			lea 	ebx,[eax+edx]
			test 	ebx,0100h
			jz 	@@next_sprite_p
		@@sprite_p_y_ok:

		movd 	mm1,eax
		;//movd eax,mm1				// eax = sprite.y

		movd 	eax,mm0
		and 	eax,0C00h
		cmp 	eax,0C00h
		je 	@@sprite_p_bmp
		
		; Check the rotoscale bit
		movd 	eax,mm0
		test 	eax,0100h
		jnz 	@@sprite_p_rotoscale
		
		; Check the OBJ disable bit
		movd 	eax,mm0
		test 	eax,0200h
		jnz 	@@next_sprite_p

		; Check V-flip bit
		mov 	edx,0
		test 	eax,020000000h
		jz 	@@spnvf
			mov 	edx,[objheight+esi*4]
			dec 	edx
		@@spnvf:
		
		movd 	eax,mm1			; EAX = sprite.y
		push 	edi 			; save EDI
		mov 	ebx,[scanline]
		sub 	ebx,eax
		and 	ebx,0FFh
		xor 	ebx,edx
		
		mov 	edx,ebx
		
		push 	ecx
		movd 	ecx,mm0
		and 	ecx,02000h
		shr 	ecx,13
		and 	ebx,7
		add 	ecx,2
		shl 	ebx,cl
		pop 	ecx
		
		;and 	ebx,7
		;shl 	ebx,3 			; EBX = (line-sprite.y)*8
		shr 	edx,3			; EDX = (line-sprite.y)/8
		movd 	mm3,ebx

		test 	[spriteBit6],010h
		jz @@p_bit6_0
			push 	ecx
			movd 	ecx,mm0			; attribute 0
			and 	ecx,02000h		; color bit
			shr 	ecx,13			; 1 for 256x1, 0 for 16x16
			mov 	eax,[objwidth+esi*4]
			shr 	eax,3			; eax = width/8 (tiles per row)
			shl 	eax,cl			; eax *= (bpp==8) ? 2 : 1
			bsr 	cx,ax			; ecx = log2(tile_per_row)			; 
			shl 	edx,cl
			mov 	[spriteTile],edx
			pop 	ecx
			mov 	[hofs],56
			jmp 	@@p_bit6
		@@p_bit6_0:
			shl 	edx,5
			mov 	[spriteTile],edx
			mov 	[hofs],56
		@@p_bit6:

		movd	eax,mm0

		mov 	edx,[objwidth+esi*4]
		mov 	esi,ebx
		mov 	[dummy],edx
		mov 	[clippedWidth],edx
		mov 	ebx,eax
		dec 	edx
		and 	ebx,01FF0000h
		shr 	ebx,16			; sprite.x
		;/*add edx,ebx
		;cmp edx,0
		;jl @@sprite_p_done
		;cmp ebx,256
		;jge @@sprite_p_done*/

		cmp 	ebx,256
		jl 	@@sprite_p_x_ok
			mov 	eax,[dummy]
			add 	eax,ebx
			sub 	eax,513
			js 	@@sprite_p_done
			inc 	eax
			mov 	[clippedWidth],eax
			mov 	ebx,[dummy] 
			sub 	ebx,eax
			mov 	[xclip],ebx
			xor 	ebx,ebx
		@@sprite_p_x_ok:

		cmp 	dword ptr [clippedWidth],64
		ja 	@@sprite_p_done
		cmp 	dword ptr [clippedWidth],0
		jle 	@@sprite_p_done

		add 	esi,[patternTbl]
		mov 	edx,[oam_base]
		add 	edx,4
		mov 	eax,[edx+ecx*8]		; attribute 2
		movd 	mm2,eax
		mov 	ecx,[cmTileShift]
		and 	eax,03FFh
		shl 	eax,cl
		mov 	ecx,[spriteTile]
		shl 	ecx,5
		add 	eax,ecx
		;add 	eax,[spriteTile]
		;shl 	eax,5

		add 	esi,eax 		; eax = tile*32 + (line-sprite.y)*8 + patternTbl
		lea 	edi,[edi+ebx*2]		; edi += sprite.x*2
		mov 	ebx,[currPalette] 
		movd 	eax,mm0
		mov 	ecx,[clippedWidth] 

		test 	eax,02000h
		jz 	@@sprite_p_16x16

		test 	eax,010000000h
		jnz 	@@sprite_p_hflip

		mov 	eax,[xclip]
		and 	eax,038h
		shl 	eax,3
		add 	esi,eax
		mov 	eax,[xclip]
		and 	eax,7
		push 	esi
		add 	esi,eax 

		cmp 	dword ptr [useExtPltt],0
		je 	@@sprite_p_single_pltt
		movd 	ebx,mm2			; Attribute 2
		shr 	ebx,3
		and 	ebx,01E00h			; EBX = palette*512
		add 	ebx,[useExtPltt]

		cmp 	[useD3D],0
		jne 	@@sprite_p_ex_d3d
		cmp 	[useGL],0
		je 	@@sprite_p_single_pltt
		cmp 	dword ptr [glFormat],GL_UNSIGNED_SHORT_5_5_5_1
		jne 	@@sprite_p_single_pltt
		
		;;////////////////////////////////////////
		;// Extended palette OBJ w/ xBGR to RGBx
		;////////////////////////////////////////
		@@sprite_p_ex_ogl:
		OBJ_EXPLTT OBJ_BGR2RGB_OGL


		;////////////////////////////////////////
		;// Extended palette OBJ w/ xBGR to xRGB
		;////////////////////////////////////////
		@@sprite_p_ex_d3d:
		OBJ_EXPLTT OBJ_BGR2RGB_D3D


		;////////////////////////////////////////
		;// Regular 256-color OBJ
		;////////////////////////////////////////
		@@sprite_p_single_pltt:
		OBJ_EXPLTT OBJ_BGR2RGB_NULL



		@@sprite_p_hflip:
		mov 	eax,[dummy]		; EAX = width
		shr 	eax,3		
		dec 	eax
		shl 	eax,6		; EAX = ((width/8)-1) * 64
		add 	esi,eax
		add 	esi,8

		mov 	eax,[xclip]
		and 	eax,038h
		shl 	eax,3
		sub 	esi,eax

		mov 	eax,[xclip]
		and 	eax,7
		push 	esi
		sub 	esi,eax 

		cmp 	dword ptr [useExtPltt],0
		je 	@@sprite_p_hf_single_pltt
		movd 	ebx,mm2			; Attribute 2
		shr 	ebx,3
		and 	ebx,01E00h			; EBX = palette*512
		add 	ebx,[useExtPltt]

		cmp 	[useD3D],0
		jne 	@@sprite_p_hf_ex_d3d
		cmp 	[useGL],0
		je 	@@sprite_p_hf_single_pltt
		cmp 	dword ptr [glFormat],GL_UNSIGNED_SHORT_5_5_5_1
		jne 	@@sprite_p_hf_single_pltt

		;////////////////////////////////////////////////
		;// Extended palette H-flipped OBJ w/ xBGR to RGBx
		;////////////////////////////////////////////////
		@@sprite_p_hf_ex_ogl:
		OBJ_EXPLTT_HFLIP OBJ_BGR2RGB_OGL


		;////////////////////////////////////////////////
		;// Extended palette H-flipped OBJ w/ xBGR to xRGB
		;////////////////////////////////////////////////
		@@sprite_p_hf_ex_d3d:
		OBJ_EXPLTT_HFLIP OBJ_BGR2RGB_D3D


		;////////////////////////////////////////
		;// H-flipped 256-color OBJ
		;////////////////////////////////////////
		@@sprite_p_hf_single_pltt:
		OBJ_EXPLTT_HFLIP OBJ_BGR2RGB_NULL
		


		;////////////////////////////////////////
		;// 16-color OBJ
		;////////////////////////////////////////
		@@sprite_p_16x16:
		shr 	dword ptr [hofs],1
		test 	eax,010000000h			; Check the HFLIP flag
		jnz 	@@sprite_p_hflip_16x16

		movd 	eax,mm3				; (line-sprite.y)*8
		shr 	eax,1				; Divide by 2
		;sub 	esi,eax				; Subtract (line-sprite.y)*4 since each tile has half the number of bytes per row
		movd 	eax,mm2				; Attribute 2
		and 	eax,0F000h			; Palette
		shr 	eax,7				; Palette*32
		add 	ebx,eax

		mov 	eax,[xclip]
		and 	eax,038h
		shl 	eax,2
		add 	esi,eax
		mov 	eax,[xclip]
		and 	eax,7
		shr 	eax,1
		push 	esi
		add 	esi,eax 

		test 	ecx,7
		jz 	@@sprite_p_hloop_no_odd_4
		test 	ecx,1
		jz 	@@sprite_p_div2_4
		mov 	al,[esi]
		shr 	eax,4
		jz 	@@sprite_p_single_invisible_4
			mov 	ax,[ebx+eax*2]
			mov 	[edi],ax
		@@sprite_p_single_invisible_4:
			inc 	esi
			add 	edi,2
			dec 	ecx
		@@sprite_p_div2_4:

		and 	ecx,6
		jz 	@@sprite_p_hloop_only_single_4
		@@sprite_p_hloop_odd_4:
			@@sprite_p_hloop_odd_inner_4:
				xor 	eax,eax
				mov 	al,[esi]
				mov 	edx,eax
				and 	eax,0Fh
				jz 	@@sprite_p_odd_pixel_invisible_4
					mov 	ax,[ebx+eax*2]
					mov 	[edi],ax
				@@sprite_p_odd_pixel_invisible_4:
				shr 	edx,4
				jz 	@@sprite_p_odd_pixel_invisible_4_2
					mov 	ax,[ebx+edx*2]
					mov 	[edi+2],ax
				@@sprite_p_odd_pixel_invisible_4_2:
				inc 	esi
				add 	edi,4
				sub 	ecx,2
				jnz 	@@sprite_p_hloop_odd_inner_4
		@@sprite_p_hloop_only_single_4:
		pop 	esi
		mov 	ecx,[hofs]
		add 	esi,ecx
		shr 	ecx,3
		inc 	ecx
		add 	esi,ecx
		and 	[clippedWidth],0FFFFFFF8h
		jmp 	@@sprite_p_hloop_4
		@@sprite_p_hloop_no_odd_4:
		pop 	esi

		@@sprite_p_hloop_4:
			mov 	ecx,[clippedWidth]
			cmp 	ecx,8
			jl 	@@sprite_p_done
			mov 	ecx,4
			@@sprite_p_hloop_inner_4:
				xor 	eax,eax
				mov 	al,[esi]
				mov 	edx,eax
				and 	eax,0Fh
				jz 	@@sprite_p_pixel_invisible_4
					mov 	ax,[ebx+eax*2]
					mov 	[edi],ax
				@@sprite_p_pixel_invisible_4:
				shr 	edx,4
				jz 	@@sprite_p_pixel_invisible_4_2
					mov 	ax,[ebx+edx*2]
					mov 	[edi+2],ax
				@@sprite_p_pixel_invisible_4_2:
				inc 	esi
				add 	edi,4
				dec 	ecx
				jnz 	@@sprite_p_hloop_inner_4
			add 	esi,[hofs]
			sub 	[clippedWidth],8
			jmp 	@@sprite_p_hloop_4

		jmp 	@@sprite_p_done



		@@sprite_p_hflip_16x16:
		movd 	eax,mm3				; (line-sprite.y)*8
		shr 	eax,1				; Divide by 2
		;sub 	esi,eax				; Subtract (line-sprite.y)*4 since each tile has half the number of bytes per row
		movd 	eax,mm2				; Attribute 2
		and 	eax,0F000h			; Palette
		shr 	eax,7				; Palette*32
		add 	ebx,eax

		mov 	eax,[dummy]
		shr 	eax,3
		dec 	eax
		shl 	eax,5
		add 	esi,eax
		add 	esi,4

		mov 	eax,[xclip]
		and 	eax,038h
		shl 	eax,2
		sub 	esi,eax

		mov 	eax,[xclip]
		and 	eax,7
		shr 	eax,1
		push 	esi
		sub 	esi,eax 

		test 	ecx,7
		jz 	@@sprite_p_hloop_no_odd_4_hf

		test 	ecx,1
		jz 	@@sprite_p_div2_4_hf
		dec 	esi
		mov 	al,[esi]
		and 	eax,0Fh
		jz 	@@sprite_p_single_invisible_4_hf
			mov 	ax,[ebx+eax*2]
			mov 	[edi],ax
		@@sprite_p_single_invisible_4_hf:
			add 	edi,2
			dec 	ecx
		@@sprite_p_div2_4_hf:

		and 	ecx,6
		jz 	@@sprite_p_hloop_only_single_4_hf
		@@sprite_p_hloop_odd_4_hf:
			@@sprite_p_hloop_odd_inner_4_hf:
				xor 	eax,eax
				dec 	esi
				mov 	al,[esi]
				mov 	edx,eax
				shr 	eax,4
				jz 	@@sprite_p_odd_pixel_invisible_4_hf
					mov 	ax,[ebx+eax*2]
					mov 	[edi],ax
				@@sprite_p_odd_pixel_invisible_4_hf:
				and 	edx,0Fh
				jz 	@@sprite_p_odd_pixel_invisible_4_2_hf
					mov 	ax,[ebx+edx*2]
					mov 	[edi+2],ax
				@@sprite_p_odd_pixel_invisible_4_2_hf:
				add 	edi,4
				sub 	ecx,2
				jnz 	@@sprite_p_hloop_odd_inner_4_hf
		@@sprite_p_hloop_only_single_4_hf:
		pop 	esi
		mov 	ecx,[hofs]
		sub 	esi,ecx
		shr 	ecx,3
		inc 	ecx
		sub 	esi,ecx
		and 	[clippedWidth],0FFFFFFF8h
		jmp 	@@sprite_p_hloop_4_hf
		@@sprite_p_hloop_no_odd_4_hf:
		pop 	esi

		@@sprite_p_hloop_4_hf:
			mov 	ecx,[clippedWidth]
			cmp 	ecx,8
			jl 	@@sprite_p_done
			mov 	ecx,4
			@@sprite_p_hloop_inner_4_hf:
				xor 	eax,eax
				dec 	esi
				mov 	al,[esi]
				mov 	edx,eax
				shr 	eax,4
				jz 	@@sprite_p_pixel_invisible_4_hf
					mov 	ax,[ebx+eax*2]
					mov 	[edi],ax
				@@sprite_p_pixel_invisible_4_hf:
				and 	edx,0Fh
				jz 	@@sprite_p_pixel_invisible_4_2_hf
					mov 	ax,[ebx+edx*2]
					mov 	[edi+2],ax
				@@sprite_p_pixel_invisible_4_2_hf:
				add 	edi,4
				dec 	ecx
				jnz 	@@sprite_p_hloop_inner_4_hf
			sub 	esi,[hofs]
			sub 	[clippedWidth],8
			jmp 	@@sprite_p_hloop_4_hf

		jmp 	@@sprite_p_done



		;////////////////////////////////////////
		;// Bitmap OBJ
		;////////////////////////////////////////
		
		@@sprite_p_bmp:
		; Check the rotoscale bit
		movd 	eax,mm0
		test 	eax,0100h
		jnz 	@@sprite_p_bmp_rotoscale

		; Check the OBJ disable bit
		movd 	eax,mm0
		test 	eax,0200h
		jnz 	@@next_sprite_p

		mov 	edx,[oam_base]
		push 	ecx
		add 	edx,6
		movd 	ecx,mm7
		mov 	eax,[objwidth + esi*4]
		movd 	mm7,eax
		mov 	[clippedWidth],eax
		movd 	ecx,mm0
		and 	ecx,03E000000h
		shr 	ecx,20
		add 	edx,ecx
		push 	ebp

		sub 	edx,ecx
		pop 	ebp
		sub 	edx,2
		pop 	ecx
		push 	edi

		mov 	eax,[edx+ecx*8]			; Attribute 2
		movd 	ebx,mm0
		and 	eax,03FFh
		and 	ebx,01FF0000h
		mov 	[spriteTile],eax
		shr 	ebx,16
		cmp 	ebx,256
		jl 	@@sprite_p_bmp_x_ok
		movd 	eax,mm7
		add 	eax,ebx
		sub 	eax,513
		js 	@@sprite_p_done
		inc 	eax
		mov 	[clippedWidth],eax
		movd 	ebx,mm7
		sub 	ebx,eax
		mov 	[xclip],ebx
		xor 	ebx,ebx
		@@sprite_p_bmp_x_ok:

		lea 	edi,[edi+ebx*2]

		xor 	eax,eax
		xor 	ebx,ebx
		mov 	[objy2],eax
		mov 	[objx2],ebx

		movd 	eax,mm1				; sprite.y
		mov 	ebx,[scanline]
		sub 	ebx,eax				; ebx = scanline-sprite.y
		shr 	edx,1				; edx = height/2

		mov 	eax,ebx
		add 	[objy2],eax

		xor 	ecx,ecx
		xor 	ebx,ebx
		add 	ebx,[xclip]
		mov 	eax,ebx
		add 	[objx2],eax

		movd 	ebx,mm0
		test 	ebx,02000h
		jnz 	@@sprite_p_done 

		push 	ebp
		mov 	ecx,[objwidth+esi*4]
		movd 	mm5,ecx
		mov 	eax,ecx
		shr 	eax,3
		mov 	edx,[objheight+esi*4]
		movd 	mm6,[vofs]

		mov 	eax,[spriteTile]
		mov 	ebx,eax
		mov 	ecx,[hScrShift]
		and 	eax,[hTileMask]
		and 	ebx,[vTileMask]
		shl 	eax,cl
		mov 	ecx,[vScrShift]
		shl 	ebx,cl

		mov 	ecx,[clippedWidth]
		mov 	esi,[patternTbl]
		add 	esi,eax
		add 	esi,ebx
		mov 	ebp,[tileShift]

		mov 	ebx,[objy2]
		movd 	ecx,mm6
		mov 	eax,ebx
		shr 	ebx,3		; ebx = v/8
		and 	eax,7		; eax = v & 7
		shl 	ebx,cl		; ebx <<= 
		sub 	ecx,3
		shl 	eax,cl
		add 	esi,eax
		add 	esi,ebx

		mov 	edx,[objx2]

		mov 	ecx,[clippedWidth]

		mov 	dword ptr [objdx],1

		cmp 	[useD3D],0
		jne 	@@sprite_p_bmp_hloop_d3d
		cmp 	[useGL],0
		je 	@@sprite_p_bmp_hloop
		cmp 	dword ptr [glFormat],GL_UNSIGNED_SHORT_5_5_5_1
		je 	@@sprite_p_bmp_hloop_ogl

		@@sprite_p_bmp_hloop:
			push 	ecx
			push 	esi

			mov 	eax,edx 
			mov 	ebx,eax
			mov 	ecx,ebp
			shr 	eax,3
			and 	ebx,7
			shl 	eax,cl		; eax = (u/8)*64
			lea 	esi,[esi+ebx*2]
			mov 	ax,[esi+eax*2]
			test	eax,08000h
			jz 	@@sprite_p_bmp_transp
				and 	eax,07FFFh
				mov 	[edi],ax
			@@sprite_p_bmp_transp:
			add 	edx,[objdx] 
			add 	edi,2
			pop 	esi
			pop 	ecx
			dec 	ecx
			jnz 	@@sprite_p_bmp_hloop
			pop 	ebp
			jmp 	@@sprite_p_done

		@@sprite_p_bmp_hloop_ogl:
			push 	ecx
			push 	esi
			mov 	eax,edx 
			mov 	ebx,eax
			mov 	ecx,ebp
			shr 	eax,3
			and 	ebx,7
			shl 	eax,cl		; eax = (u/8)*64
			lea 	esi,[esi+ebx*2]
			mov 	ax,[esi+eax*2]
			test 	eax,08000h
			jz 	@@sprite_p_bmp_transp_ogl
				;mov 	ebx,eax
				;mov 	ecx,eax
				;and 	ebx,0x03E0
				;and 	ecx,0x7C00
				;and 	eax,0x001F
				;shl 	ebx,1
				;shr 	ecx,9
				;shl 	eax,11
				;or 	ebx,ecx
				;or 	ebx,eax
				OBJ_BGR2RGB_OGL ebx,ecx
				mov 	[edi],ax
			@@sprite_p_bmp_transp_ogl:
			add 	edx,[objdx] 
			add 	edi,2
			pop 	esi
			pop 	ecx
			dec 	ecx
			jnz 	@@sprite_p_bmp_hloop_ogl
		pop 	ebp
		jmp 	@@sprite_p_done

		@@sprite_p_bmp_hloop_d3d:
			push 	ecx
			push 	esi
			mov 	eax,edx 
			mov 	ebx,eax
			mov 	ecx,ebp
			shr 	eax,3
			and 	ebx,7
			shl 	eax,cl		; eax = (u/8)*64
			lea 	esi,[esi+ebx*2]
			mov 	ax,[esi+eax*2]
			test 	eax,08000h
			jz 	@@sprite_p_bmp_transp_d3d
				OBJ_BGR2RGB_D3D ebx,ecx
				mov 	[edi],ax
			@@sprite_p_bmp_transp_d3d:
			add 	edx,[objdx] 
			add 	edi,2
			pop 	esi
			pop 	ecx
			dec 	ecx
			jnz 	@@sprite_p_bmp_hloop_d3d
		pop 	ebp
		jmp 	@@sprite_p_done


		@@sprite_p_bmp_rotoscale:
		mov 	edx,[oam_base]
		push 	ecx
		add 	edx,6
		movd 	ecx,mm7
		mov 	eax,[objwidth + esi*4]
		shl 	eax,cl
		movd 	mm7,eax
		mov 	[clippedWidth],eax
		movd 	ecx,mm0
		and 	ecx,03E000000h
		shr 	ecx,20
		add 	edx,ecx
		push 	ebp

		movsx 	eax,word ptr [edx]		; PA-n
		movsx 	ebx,word ptr [edx+16]		; PC-n
		mov 	[objdx],eax
		mov 	[objdy],ebx

		movsx 	eax,word ptr [edx+8]		; PB-n
		movsx 	ebx,word ptr [edx+24]		; PD-n
		mov 	[objdmx],eax
		mov 	[objdmy],ebx

		sub 	edx,ecx
		pop 	ebp
		sub 	edx,2
		pop 	ecx
		push 	edi

		mov 	eax,[edx+ecx*8]			; Attribute 2
		movd 	ebx,mm0
		and 	eax,03FFh
		and 	ebx,01FF0000h
		mov 	[spriteTile],eax
		shr 	ebx,16
		cmp 	ebx,256
		jl 	@@sprite_p_bmp_rs_x_ok
		movd 	eax,mm7
		add 	eax,ebx
		sub 	eax,513
		js 	@@sprite_p_done
		inc 	eax
		mov 	[clippedWidth],eax
		movd 	ebx,mm7
		sub 	ebx,eax
		mov 	[xclip],ebx
		xor 	ebx,ebx
		@@sprite_p_bmp_rs_x_ok:

		lea 	edi,[edi+ebx*2]

		movd 	eax,mm6 
		mov 	ebx,[objwidth+esi*4]
		shl 	eax,7
		shl 	ebx,7
		mov 	[objy2],eax
		mov 	[objx2],ebx

		movd 	eax,mm1				; sprite.y
		mov 	ebx,[scanline]
		movd 	edx,mm4
		sub 	ebx,eax				; ebx = scanline-sprite.y
		shr 	edx,1				; edx = height/2
		inc 	ebx				; ebx = line (1..height)
		sub 	ebx,edx				; ebx =

		mov 	eax,ebx
		imul 	dword ptr [objdmy]
		add 	[objy2],eax

		mov 	eax,ebx
		imul 	dword ptr [objdmx]
		add 	[objx2],eax

		movd 	ebx,mm7
		xor 	ecx,ecx
		shr 	ebx,1
		neg 	ebx
		add 	ebx,[xclip]
		mov 	eax,ebx
		imul 	dword ptr [objdy]
		add 	[objy2],eax
		mov 	eax,[objdx]
		imul 	ebx
		add 	[objx2],eax

		movd 	ebx,mm0
		test 	ebx,02000h
		jnz 	@@sprite_p_done 

		push 	ebp
		mov 	ecx,[objwidth+esi*4]
		movd 	mm5,ecx
		mov 	eax,ecx
		shr 	eax,3
		mov 	edx,[objheight+esi*4]
		movd 	mm6,[vofs]

		mov 	eax,[spriteTile]
		mov 	ebx,eax
		mov 	ecx,[hScrShift]
		and 	eax,[hTileMask]
		and 	ebx,[vTileMask]
		shl 	eax,cl
		mov 	ecx,[vScrShift]
		shl 	ebx,cl

		mov 	ecx,[clippedWidth]
		mov 	esi,[patternTbl]
		add 	esi,eax
		add 	esi,ebx
		mov 	ebp,[tileShift]

		cmp 	[useD3D],0
		jne 	@@sprite_p_bmp_rs_hloop_d3d
		cmp 	[useGL],0
		je 	@@sprite_p_bmp_rs_hloop
		cmp 	dword ptr [glFormat],GL_UNSIGNED_SHORT_5_5_5_1
		je 	@@sprite_p_bmp_rs_hloop_ogl

		@@sprite_p_bmp_rs_hloop:
			push 	ecx
			mov 	ebx,[objy2]
			push 	esi
			sar 	ebx,8
			js 	@@sprite_p_bmp_rs_transp
			cmp 	ebx,edx
			jge 	@@sprite_p_bmp_rs_transp
			movd 	ecx,mm6
			mov 	eax,ebx
			shr 	ebx,3		; ebx = v/8
			and 	eax,7		; eax = v & 7
			shl 	ebx,cl		; ebx <<= 
			sub 	ecx,3
			shl 	eax,cl
			add 	esi,eax
			mov 	eax,[objx2]
			add 	esi,ebx
			sar 	eax,8
			js 	@@sprite_p_bmp_rs_transp
			movd 	ecx,mm5
			cmp 	eax,ecx
			jge 	@@sprite_p_bmp_rs_transp
			mov 	ebx,eax
			mov 	ecx,ebp
			shr 	eax,3
			and 	ebx,7
			shl 	eax,cl		; eax = (u/8)*64
			lea 	esi,[esi+ebx*2]
			mov 	ax,[esi+eax*2]
			test 	eax,08000h
			jz 	@@sprite_p_bmp_rs_transp
			and 	eax,07FFFh
			mov 	[edi],ax
			mov 	eax,[objdx]
			mov 	ecx,[objdy]
			add 	[objx2],eax
			add 	edi,2
			add 	[objy2],ecx
			pop 	esi
			pop 	ecx
			dec 	ecx
			jnz 	@@sprite_p_bmp_rs_hloop
			pop 	ebp
			jmp 	@@sprite_p_done
			
			@@sprite_p_bmp_rs_transp:
			mov 	eax,[objdx]
			mov 	ecx,[objdy]
			add 	[objx2],eax
			add 	edi,2
			add 	[objy2],ecx
			pop 	esi
			pop 	ecx
			dec 	ecx
			jnz 	@@sprite_p_bmp_rs_hloop
		pop 	ebp
		jmp 	@@sprite_p_done

		@@sprite_p_bmp_rs_hloop_ogl:
			push 	ecx
			mov 	ebx,[objy2]
			push 	esi
			sar 	ebx,8
			js 	@@sprite_p_bmp_rs_transp_ogl
			cmp 	ebx,edx
			jge 	@@sprite_p_bmp_rs_transp_ogl
			movd 	ecx,mm6
			mov 	eax,ebx
			shr 	ebx,3		; ebx = v/8
			and 	eax,7		; eax = v & 7
			shl 	ebx,cl		; ebx <<= 
			sub 	ecx,3
			shl 	eax,cl
			add 	esi,eax
			mov 	eax,[objx2]
			add 	esi,ebx
			sar 	eax,8
			js 	@@sprite_p_bmp_rs_transp_ogl
			movd 	ecx,mm5
			cmp 	eax,ecx
			jge 	@@sprite_p_bmp_rs_transp_ogl
			mov 	ebx,eax
			mov 	ecx,ebp
			shr 	eax,3
			and 	ebx,7
			shl 	eax,cl		; eax = (u/8)*64
			lea 	esi,[esi+ebx*2]
			mov 	ax,[esi+eax*2]
			test 	eax,08000h
			jz 	@@sprite_p_bmp_rs_transp_ogl
			; Convert xBGR to RGBx
			mov 	ebx,eax
			mov 	ecx,eax
			and 	ebx,003E0h
			and 	ecx,07C00h
			and 	eax,0001Fh
			shl 	ebx,1
			shr 	ecx,9
			shl 	eax,11
			or 	ebx,ecx
			or 	ebx,eax
			mov 	[edi],bx
			mov 	eax,[objdx]
			mov 	ecx,[objdy]
			add 	[objx2],eax
			add 	edi,2
			add 	[objy2],ecx
			pop 	esi
			pop 	ecx
			dec 	ecx
			jnz 	@@sprite_p_bmp_rs_hloop_ogl
		pop 	ebp
		jmp 	@@sprite_p_done
			
			@@sprite_p_bmp_rs_transp_ogl:
			mov 	eax,[objdx]
			mov 	ecx,[objdy]
			add 	[objx2],eax
			add 	edi,2
			add 	[objy2],ecx
			pop 	esi
			pop 	ecx
			dec 	ecx
			jnz 	@@sprite_p_bmp_rs_hloop_ogl
		pop 	ebp
		jmp 	@@sprite_p_done

		@@sprite_p_bmp_rs_hloop_d3d:
			push 	ecx
			mov 	ebx,[objy2]
			push 	esi
			sar 	ebx,8
			js 	@@sprite_p_bmp_rs_transp_d3d
			cmp 	ebx,edx
			jge 	@@sprite_p_bmp_rs_transp_d3d
			movd 	ecx,mm6
			mov 	eax,ebx
			shr 	ebx,3		; ebx = v/8
			and 	eax,7		; eax = v & 7
			shl 	ebx,cl		; ebx <<= 
			sub 	ecx,3
			shl 	eax,cl
			add 	esi,eax
			mov 	eax,[objx2]
			add 	esi,ebx
			sar 	eax,8
			js 	@@sprite_p_bmp_rs_transp_d3d
			movd 	ecx,mm5
			cmp 	eax,ecx
			jge 	@@sprite_p_bmp_rs_transp_d3d
			mov 	ebx,eax
			mov 	ecx,ebp
			shr 	eax,3
			and 	ebx,7
			shl 	eax,cl		; eax = (u/8)*64
			lea 	esi,[esi+ebx*2]
			mov 	ax,[esi+eax*2]
			test 	eax,08000h
			jz 	@@sprite_p_bmp_rs_transp_d3d
			; Convert xBGR to xRGB
			mov 	ebx,eax
			mov 	ecx,eax
			and 	ebx,003E0h
			and 	ecx,07C00h
			and 	eax,0001Fh
			shr 	ecx,10
			shl 	eax,10
			or 	ebx,ecx
			or 	ebx,eax
			mov 	[edi],bx
			mov 	eax,[objdx]
			mov 	ecx,[objdy]
			add 	[objx2],eax
			add 	edi,2
			add 	[objy2],ecx
			pop 	esi
			pop 	ecx
			dec 	ecx
			jnz 	@@sprite_p_bmp_rs_hloop_d3d
		pop 	ebp
		jmp 	@@sprite_p_done
		
			@@sprite_p_bmp_rs_transp_d3d:
			mov 	eax,[objdx]
			mov 	ecx,[objdy]
			add 	[objx2],eax
			add 	edi,2
			add 	[objy2],ecx
			pop 	esi
			pop 	ecx
			dec 	ecx
			jnz 	@@sprite_p_bmp_rs_hloop_d3d
		pop 	ebp
		jmp 	@@sprite_p_done
			

		@@sprite_p_rotoscale:
		mov 	edx,[oam_base]
		push 	ecx
		add 	edx,6
		movd 	ecx,mm7
		mov 	eax,[objwidth + esi*4]
		shl 	eax,cl
		movd 	mm7,eax
		mov 	[clippedWidth],eax
		movd 	ecx,mm0
		and 	ecx,03E000000h
		shr 	ecx,20
		add 	edx,ecx
		push	 ebp

		movsx 	eax,word ptr [edx]		; PA-n
		movsx 	ebx,word ptr [edx+16]		; PC-n
		mov 	[objdx],eax
		mov 	[objdy],ebx

		movsx 	eax,word ptr [edx+8]		; PB-n
		movsx 	ebx,word ptr [edx+24]		; PD-n
		mov 	[objdmx],eax
		mov 	[objdmy],ebx

		sub 	edx,ecx
		pop 	ebp
		sub 	edx,2
		pop 	ecx
		push 	edi

		mov 	eax,[edx+ecx*8]			; attribute 2
		movd 	ebx,mm0
		mov 	[tileData],eax
		and 	eax,03FFh
		and 	ebx,01FF0000h
		mov 	[spriteTile],eax
		shr 	ebx,16
		cmp 	ebx,240
		jl 	@@sprite_p_rs_x_ok
		movd 	eax,mm7
		add 	eax,ebx
		sub 	eax,513
		js 	@@sprite_p_done
		inc 	eax
		mov 	[clippedWidth],eax
		movd 	ebx,mm7
		sub 	ebx,eax
		mov 	[xclip],ebx
		xor 	ebx,ebx
		@@sprite_p_rs_x_ok:

		lea 	edi,[edi+ebx*2]

		movd 	eax,mm6 
		mov 	ebx,[objwidth+esi*4]
		shl 	eax,7
		shl 	ebx,7
		mov 	[objy2],eax
		mov 	[objx2],ebx

		movd 	eax,mm1				; sprite.y
		mov 	ebx,[scanline]
		movd 	edx,mm4
		sub 	ebx,eax				; ebx = scanline-sprite.y
		and 	ebx,0FFh			; wrap
		shr 	edx,1				; edx = height/2
		inc 	ebx				; ebx = line (1..height)
		sub 	ebx,edx				; ebx =

		mov 	eax,ebx
		imul 	dword ptr [objdmy]
		add 	[objy2],eax

		mov 	eax,ebx
		imul 	dword ptr [objdmx]
		add 	[objx2],eax

		movd 	ebx,mm7
		xor 	ecx,ecx
		shr 	ebx,1
		neg 	ebx
		add 	ebx,[xclip]
		mov 	eax,ebx
		imul 	dword ptr [objdy]
		add 	[objy2],eax
		mov 	eax,[objdx]
		imul 	ebx
		add 	[objx2],eax

		movd 	ebx,mm0
		test 	ebx,02000h
		jz 	@@sprite_p_rs_16x16

		push 	ebp
		mov 	ecx,[objwidth+esi*4]
		movd 	mm5,ecx
		mov 	eax,ecx
		mov 	ebp,[currPalette]
		shr 	eax,3
		mov 	edx,[objheight+esi*4]
		test 	[spriteBit6],010h
		jz 	@@sprite_p_rs_2d
			shl 	eax,1
			bsr 	cx,ax
			movd 	mm6,ecx
			jmp 	@@sprite_p_rs_1d
		@@sprite_p_rs_2d:
			mov 	ecx,5
			movd 	mm6,ecx
		@@sprite_p_rs_1d:

		cmp 	dword ptr [useExtPltt],0
		je 	@@sprite_p_single_pltt_3
			mov 	ebp,[tileData]
			shr 	ebp,3
			and 	ebp,01E00h
			add 	ebp,[useExtPltt]
		@@sprite_p_single_pltt_3:

		mov 	ecx,[clippedWidth]
		mov 	eax,[spriteTile]
		mov 	esi,[patternTbl]
		shl 	eax,5
		add 	esi,eax

		@@sprite_p_rs_hloop:
			push 	ecx
			mov 	ebx,[objy2]
			push 	esi
			sar 	ebx,8
			js 	@@sprite_p_rs_transp
			cmp 	ebx,edx
			jge 	@@sprite_p_rs_transp
			movd 	ecx,mm6
			mov 	eax,ebx
			shr 	ebx,3
			and 	eax,7
			shl 	ebx,cl
			lea 	esi,[esi+eax*8]
			mov 	eax,[objx2]
			shl 	ebx,5
			sar 	eax,8
			js 	@@sprite_p_rs_transp
			movd 	ecx,mm5
			cmp 	eax,ecx
			jge 	@@sprite_p_rs_transp
			mov 	ecx,eax
			shr 	eax,3
			and 	ecx,7
			shl 	eax,6		; eax = (u/8)*64
			add 	esi,ecx
			add 	esi,ebx
			mov 	al,[esi+eax]
			and 	eax,255
			jz 	@@sprite_p_rs_transp
			mov 	bx,[ebp+eax*2]
			mov 	[edi],bx
			mov 	eax,[objdx]
			mov 	ecx,[objdy]
			add 	[objx2],eax
			add 	edi,2
			add 	[objy2],ecx
			pop 	esi
			pop 	ecx
			dec 	ecx
			jnz 	@@sprite_p_rs_hloop
		pop 	ebp
		jmp 	@@sprite_p_done

			@@sprite_p_rs_transp:
			mov 	eax,[objdx]
			mov 	ecx,[objdy]
			add 	[objx2],eax
			add 	edi,2
			add 	[objy2],ecx
			pop 	esi
			pop 	ecx
			dec 	ecx
			jnz 	@@sprite_p_rs_hloop
		pop 	ebp
		jmp 	@@sprite_p_done


		@@sprite_p_rs_16x16:
		push 	ebp
		mov 	ecx,[objwidth+esi*4]
		movd 	mm5,ecx
		mov 	eax,ecx
		mov 	ebp,[tileData]		; Attrib 2
		and 	ebp,0F000h
		shr 	ebp,7
		add 	ebp,[currPalette]
		shr 	eax,3
		mov 	edx,[objheight+esi*4]
		test 	[spriteBit6],010h
		jz 	@@sprite_p_rs_16x16_2d
			shl 	eax,1
			bsr 	cx,ax
			movd	mm6,ecx
			jmp 	@@sprite_p_rs_16x16_1d
		@@sprite_p_rs_16x16_2d:
			mov 	ecx,5
			movd 	mm6,ecx
		@@sprite_p_rs_16x16_1d:

		mov 	ecx,[clippedWidth]
		mov 	eax,[spriteTile]
		mov 	esi,[patternTbl]
		shl 	eax,5
		add 	esi,eax

		@@sprite_p_rs_16x16_hloop:
			push 	ecx
			mov 	ebx,[objy2]
			push 	esi
			sar 	ebx,8
			js 	@@sprite_p_rs_16x16_transp
			cmp 	ebx,edx
			jge 	@@sprite_p_rs_16x16_transp
			movd 	ecx,mm6
			mov 	eax,ebx
			shr 	ebx,3
			and 	eax,7
			shl 	ebx,cl
			lea 	esi,[esi+eax*4]
			mov 	eax,[objx2]
			shl 	ebx,4
			sar 	eax,8
			js 	@@sprite_p_rs_16x16_transp
			movd 	ecx,mm5
			cmp 	eax,ecx
			jge 	@@sprite_p_rs_16x16_transp
			mov 	ecx,eax
			shr 	eax,3
			and 	ecx,7
			shl 	eax,5		; eax = (u/8)*32
			shr 	ecx,1
			lea 	esi,[esi+ecx]
			setc 	cl
			add 	esi,ebx
			shl 	ecx,2
			mov 	al,[esi+eax]
			shr 	eax,cl
			and 	eax,15
			jz 	@@sprite_p_rs_16x16_transp
			mov 	bx,[ebp+eax*2]
			mov 	[edi],bx
			mov 	eax,[objdx]
			mov 	ecx,[objdy]
			add 	[objx2],eax
			add 	edi,2
			add 	[objy2],ecx
			pop 	esi
			pop 	ecx
			dec 	ecx
			jnz 	@@sprite_p_rs_16x16_hloop
		pop 	ebp
		jmp 	@@sprite_p_done

			@@sprite_p_rs_16x16_transp:
			mov 	eax,[objdx]
			mov 	ecx,[objdy]
			add 	[objx2],eax
			add 	edi,2
			add 	[objy2],ecx
			pop 	esi
			pop 	ecx
			dec 	ecx
			jnz 	@@sprite_p_rs_16x16_hloop
		pop 	ebp
		jmp 	@@sprite_p_done


		@@sprite_p_done:
		pop 	edi

		@@next_sprite_p:
		dec 	dword ptr [spriteCnt]
		jns 	@@sprite_p_loop

	@@sprites_p_done:
	emms
	popa
	ret
obj_line ENDP

		


END
