#include "stdafx.h"
#include "common.h"

#pragma pack (1)

#define ADDMAC(ofs)\
		__asm jbe nope##ofs	\
		__asm add [edi+ofs+  0],ebx	\
		__asm add [edi+ofs+320],edx	\
		__asm add [edi+ofs+640],edx	\
		__asm add [edi+ofs+960],ebx	\
		__asm nope##ofs:		


int holdrand = 1;
int alexrand()
{
	return rand();
}

#define EBXVAL 01020201h
#define EDXVAL 02040402h

#define XXX  1
#define YYY  07f7f7f7fh

void superprint(int i)
{
	char ss[88];
	sprintf(ss,"%d\n",i);
	OutputDebugString(ss);
}
int _temp;

void quantum(void *dest, void*src,int seed)
{
	__asm
	{
		mov eax,dest
		mov edx,src
		mov ebx,seed

		pushad
		mov edi,eax
		mov esi,edx
		mov ebp,ebx

		
		mov ecx,320*200/8
floop:  mov eax,[edi]
		mov ebx,[edi+4]
		shr eax,XXX
		shr ebx,XXX
		and eax,YYY
		and ebx,YYY
		mov [edi],eax
		mov [edi+4],ebx
		add edi,8
		dec ecx
		jnz floop
		sub edi,320*200

		mov ebx,EBXVAL
		mov edx,EDXVAL

		

		mov ch,200-4
yloop:
		mov cl,320/4-1
xloop:
		mov	eax,[esi]
		;shr eax,2
		;and eax,3f3f3f3fh

		and ebp,NOISETABSIZE-1
		cmp al,byte ptr noisetab2[ebp]
		ADDMAC(0)
		cmp ah,byte ptr noisetab2[ebp+1]
		ADDMAC(1)
		shr eax,16
		cmp al,byte ptr noisetab2[ebp+2]
		ADDMAC(2)
		cmp ah,byte ptr noisetab2[ebp+3]
		ADDMAC(3)

		; clipping pass
		mov eax,[edi]
		xor ebx,ebx
		mov bl,al
		mov al,cliptab128[ebx]
		mov bl,ah
		mov ah,cliptab128[ebx]
		rol eax,16
		mov bl,al
		mov al,cliptab128[ebx]
		mov bl,ah
		mov ah,cliptab128[ebx]
		rol eax,16
		mov ebx,EBXVAL
		mov [edi],eax

		add ebp,4
		add edi,4
		add esi,4
		dec cl
		jnz xloop
		mov dword ptr [edi],0
		add edi,4
		add esi,4



		dec ch
		jz donet
		test ch,7
		jnz yloop
		
		pushad				
		call alexrand
		mov _temp,eax
		popad
		mov eax,_temp
		
		mov ebp,eax
		jmp yloop
donet:

		popad
		//ret
	}
}





void noisefade(void *destsrc, unsigned char *yshades, int seed)
{
	__asm
	{
		mov eax,destsrc
		mov edx,yshades
		mov ebx,seed
	

		pushad
		; dest/src, yshades, randseed
		mov esi,eax
		mov ebp,ebx
		mov edi,edx

		mov ebx,fadeptr
		push 200
yloop:
		mov ch,[edi]
		mov cl,80
		inc edi
xloop:
		and ebp,NOISETABSIZE-1
		mov bh,byte ptr noisetab[ebp]
		add bh,ch
		mov bl,[esi+2]
		mov al,[ebx]
		mov bh,byte ptr noisetab[ebp+1]
		add bh,ch
		mov bl,[esi+3]
		mov ah,[ebx]
		shl eax,16
		mov bh,byte ptr noisetab[ebp+2]
		add bh,ch
		mov bl,[esi]
		mov al,[ebx]
		mov bh,byte ptr noisetab[ebp+3]
		add bh,ch
		mov bl,[esi+1]
		mov ah,[ebx]
		add ebp,4
		mov [esi],eax
		add esi,4
		dec cl
		jnz xloop
		dec byte ptr [esp]
		jnz yloop
		pop eax
		popad
	}
}

#define pixmac(ofs)\
		__asm mov al,[esi+ofs-1]\
		__asm add al,[esi+ofs+1]\
		__asm add al,[esi+ofs+320]\
		__asm adc ah,bl\
		__asm add al,[esi+ofs-320]\
		__asm adc ah,bl\
		__asm add al,[esi+ofs]\
		__asm adc ah,bl\
		__asm shr eax,3		


void blurfade(void*dest, void *src)
{
	__asm
	{
		mov eax,dest
		mov edx,src
			
		pushad
		mov edi,eax
		mov esi,edx
		xor ebx,ebx
		xor eax,eax

		mov ecx,320*200/4
again:
		pixmac(2)
		mov dl,al
		pixmac(3)
		mov dh,al
		shl edx,16
		pixmac(0)
		mov dl,al
		pixmac(1)
		mov dh,al
		mov [edi],edx
		add esi,4
		add edi,4
		dec ecx
		jnz again
		popad
		
	}
}



int bmapwid,bmaphgt,bmapptr,tcol;
int xcount,ycount,xstart,ystart;
int xinc,yinc;
int destptr;

int edgetab[512*2];// times 512 dd (0)
unsigned char spritetab[256*2];// times 256 db (0)

int miny,maxy;


void scalespr(SLONG x1, SLONG y1, SLONG x2, SLONG y2, void *dest,
					 SLONG bmpwid, SLONG bmphgt, UBYTE *bmpptr, int col)
{
	//col=256;
	__asm
	{
		mov eax,dest
		mov destptr,eax
		mov eax,bmpwid
		mov bmapwid,eax
		mov eax,bmphgt
		mov bmaphgt,eax
		mov eax,bmpptr
		mov bmapptr,eax
		mov eax,col
		mov tcol,eax

		mov eax,x1
		mov edx,y1
		mov ebx,x2
		mov ecx,y2
		
		pushad
		push eax
		;mov eax,[esp+36+4]
		;mov destptr,eax
		;mov eax,[esp+40+4]
		;mov bmapwid,eax
		;mov eax,[esp+44+4]
		;mov bmaphgt,eax
		;mov eax,[esp+48+4]
		;mov bmapptr,eax
		mov edi,offset spritetab
		mov esi,256
		xor eax,eax
sprloop:
		mov [edi],ah
		inc edi
		add eax,tcol ; [esp+52+4]
		;.if eax>=128*256
		cmp eax,128*256
		jl skkk
		mov eax,7fffh
skkk:
		dec esi
		jnz sprloop
		pop eax

		xchg edx,ebx

		;    x          y
		; eax,edx to ebx,ecx

#define _xres  320
#define _yres  200

		cmp eax,_xres
		jge forgetit
		or edx,edx
		jle forgetit
		cmp ebx,_yres
		jge forgetit
		or ecx,ecx
		jle forgetit

		sub edx,eax
		jle forgetit
		sub ecx,ebx
		jle forgetit
		push edx
		push eax
		mov ebp,edx
		xor edx,edx
		mov eax,65536
		imul eax,bmapwid
		div ebp
		mov xinc,eax
		xor edx,edx
		mov eax,65536
		imul eax,bmaphgt
		div ecx
		mov yinc,eax
		pop eax
		pop edx
		add edx,eax
		add ecx,ebx

		mov xstart,0
		mov ystart,0

		or eax,eax
		jge nc1
		imul eax,xinc
		neg eax
		mov xstart,eax
		xor eax,eax
nc1:	cmp edx,_xres
		jle nc2
		mov edx,_xres
nc2:	or ebx,ebx
		jge nc3
		imul ebx,yinc
		neg ebx
		mov ystart,ebx
		xor ebx,ebx
nc3:	cmp ecx,_yres
		jle nc4
		mov ecx,_yres
nc4:	sub edx,eax
		jle forgetit
		sub ecx,ebx
		jle forgetit

		mov xcount,edx
		mov ycount,ecx

		; build screen pointer
		imul ebx,_xres
		lea esi,[ebx+eax]
		add esi,destptr

		; build table
		mov ecx,xcount
		mov edi,offset edgetab
		mov eax,xstart
		mov ebp,xinc
tabloop:
		mov edx,eax
		sar edx,16
		mov [edi],edx
		and eax,65535
		add eax,ebp
		add edi,4
		dec ecx
		jnz tabloop

		;.esi = screen
		;.ebx = texture somehow
		;.edx = fadetable
		;.eax = scratch
		;.ebp = output colour buildup
		;.edi = table of ebx incs
		; ecx = x counter

		;mov edx,fadetable
		xor edx,edx
lineloop:
		mov edi,offset edgetab
		mov ecx,xcount

		push esi

		mov ebx,ystart
		shr ebx,16
		imul ebx,bmapwid
		mov eax,yinc
		add ystart,eax
		add ebx,bmapptr

pixloop:
		add ebx,[edi]
		mov dl,[ebx]
		mov dl,byte ptr spritetab[edx]
		add dl,[esi]
		mov al,byte ptr cliptab128[edx]
		dec ecx
		jz  justal
		inc esi
		add edi,4

		add ebx,[edi]
		mov dl,[ebx]
		mov dl,byte ptr spritetab[edx]
		add dl,[esi]
		mov ah,byte ptr cliptab128[edx]
		add edi,4
		dec ecx
		mov [esi-1],ax
		jz	doneloop
		inc esi
		jmp pixloop
justal:
		mov [esi],al
doneloop:
		pop esi
		add esi,_xres
		dec ycount
		jnz lineloop
forgetit:
		popad
	}
}




void compose(void*dest,void *src,void *tab)
{
	__asm
	{
		mov eax,dest
		mov edx,src
		mov ebx,tab

				pushad
		mov edi,eax
		mov esi,edx
		;mov ebx,ebx
		mov ecx,64000/4

again:
		mov edx,[esi]
		mov eax,[edi]
		mov bl,al
		mov bh,dl
		mov al,[ebx]
		mov bl,ah
		mov bh,dh
		mov ah,[ebx]
		rol eax,16
		rol edx,16
		mov bl,al
		mov bh,dl
		mov al,[ebx]
		mov bl,ah
		mov bh,dh
		mov ah,[ebx]
		rol eax,16
		add esi,4
		mov [edi],eax
		add edi,4
		dec ecx
		jnz again
		popad
		//ret

	
	}
}

void composesil(void*dest,void *src,void *tab)
{
	__asm
	{
		mov eax,dest
		mov edx,src
		mov ebx,tab

				pushad
		mov edi,eax
		mov esi,edx
		;mov ebx,ebx
		mov ecx,64000/4

again:
		mov edx,[esi]
		shr edx,2
		mov eax,[edi]
		and edx,3f3f3f3fh
		mov bl,al
		add edx,10101010h
		mov bh,dl
		mov al,[ebx]


		mov bl,ah
		mov bh,dh
		mov ah,[ebx]
		rol eax,16
		rol edx,16
		mov bl,al
		mov bh,dl
		mov al,[ebx]
		mov bl,ah
		mov bh,dh
		mov ah,[ebx]
		rol eax,16
		add esi,4
		mov [edi],eax
		add edi,4
		dec ecx
		jnz again
		popad

	
	}
}

void composelight(void*dest,void *src,void *tab)
{
	__asm
	{
		mov eax,dest
		mov edx,src
		mov ebx,tab

		pushad
		mov edi,eax
		mov esi,edx
		;mov ebx,ebx
		mov ecx,64000/4

again:
		mov edx,[esi]
		shr edx,2
		mov eax,[edi]
		and edx,1f1f1f1fh
		mov bl,al
		add edx,30303030h
		mov bh,dl
		mov al,[ebx]
		mov bl,ah
		mov bh,dh
		mov ah,[ebx]
		rol eax,16
		rol edx,16
		mov bl,al
		mov bh,dl
		mov al,[ebx]
		mov bl,ah
		mov bh,dh
		mov ah,[ebx]
		rol eax,16
		add esi,4
		mov [edi],eax
		add edi,4
		dec ecx
		jnz again
		popad

	
	}
}

void composedark(void*dest,void *src,void *tab)
{
	__asm
	{
		mov eax,dest
		mov edx,src
		mov ebx,tab
	
				pushad
		mov edi,eax
		mov esi,edx
		;mov ebx,ebx
		mov ecx,64000/4

again:
		mov edx,[esi]
		shr edx,2
		mov eax,[edi]
		and edx,1f1f1f1fh
		neg edx
		mov bl,al
		add edx,30303030h
		mov bh,dl
		mov al,[ebx]
		mov bl,ah
		mov bh,dh
		mov ah,[ebx]
		rol eax,16
		rol edx,16
		mov bl,al
		mov bh,dl
		mov al,[ebx]
		mov bl,ah
		mov bh,dh
		mov ah,[ebx]
		rol eax,16
		add esi,4
		mov [edi],eax
		add edi,4
		dec ecx
		jnz again
		popad

	}
}

void composelightdark(void*dest,void *src,void *tab)
{
	__asm
	{
		mov eax,dest
		mov edx,src
		mov ebx,tab

				pushad
		mov edi,eax
		mov esi,edx
		;mov ebx,ebx
		mov ecx,64000/4

again:
		mov edx,[esi]
		shr edx,1
		mov eax,[edi]
		and edx,3f3f3f3fh
		mov bl,al
		add edx,10101010h



		mov bh,dl
		mov al,[ebx]
		mov bl,ah
		mov bh,dh
		mov ah,[ebx]
		rol eax,16
		rol edx,16
		mov bl,al
		mov bh,dl
		mov al,[ebx]
		mov bl,ah
		mov bh,dh
		mov ah,[ebx]
		rol eax,16
		add esi,4
		mov [edi],eax
		add edi,4
		dec ecx
		jnz again
		popad

	
	}
}






/////////////////////////////////////////////////////////////////////


void resetedges()
{
	miny=200;
	maxy=0;
}

__declspec( naked ) void addedge_() 
{
	__asm
	{
	

		push esi
		push edi
		push ebp
		push ebx
		push ecx
		; line eax,edx to ebx,ecx
		mov edi,offset edgebuf+4
		sub ebx,eax
		sub ecx,edx
		je forgetit
		jg down
		add eax,ebx
		add edx,ecx
		neg ebx
		neg ecx
		sub edi,4
down:
		cmp edx,200
		jge forgetit
		lea ebp,[ecx+edx]
		or ebp,ebp
		jle forgetit
		shl eax,16
		mov esi,reciptab[ecx*4+1024*4]
		imul ebx,esi

		; eax = x, ebx = dx, edx=y1, ebp=y2, ecx=ycount
		or edx,edx
		jge notop
		; top clip
		add ecx,edx
		jle forgetit
		imul edx,ebx
		sub eax,edx
		xor edx,edx
notop:  sub ebp,200
		jle nobottom
		sub ecx,ebp
		jle forgetit
		xor ebp,ebp
nobottom:
		; eax = x, ebx = dx, edx=y1, ecx=ycount

		add ebp,200
		;.if ebp>_maxy
		cmp ebp,maxy
		jle sk1
		mov maxy,ebp
sk1:
		;.endif
		;.if edx<miny
		cmp edx,miny
		jge sk2
		mov miny,edx
sk2:
		;.endif

		xor ebp,ebp
		mov esi,320*65536
		lea edi,[edi+edx*8]
		or ebx,ebx
		jle left
right:
		or eax,eax
		jle roffl
rloop1:
		cmp eax,esi
		jge roffr
		mov [edi],eax
		add edi,8
		add eax,ebx
rjumpback:
		dec ecx
		jnz rloop1
		jmp forgetit
roffl:
		mov dword ptr [edi],ebp	; 0
		add edi,8
		add eax,ebx
		jg  rjumpback
		dec ecx
		jnz roffl
		jmp forgetit
roffr:
		mov dword ptr [edi],esi ; 320*65536
		add edi,8
		dec ecx
		jnz roffr
		jmp forgetit

left:
		cmp eax,esi
		jge loffr
lloop1:
		or eax,eax
		jle loffl
		mov [edi],eax
		add edi,8
		add eax,ebx
ljumpback:
		dec ecx
		jnz lloop1
		jmp forgetit
loffr:
		mov dword ptr [edi],esi ; 320
		add edi,8
		add eax,ebx
		cmp eax,esi
		jl  ljumpback
		dec ecx
		jnz loffr
		jmp forgetit
loffl:
		mov dword ptr [edi],ebp ; 0
		add edi,8
		dec ecx
		jnz loffl

forgetit:
		pop edx	;; pop end point into start point...
		pop eax
		pop ebp
		pop edi
		pop esi
		ret // LEAVE THIS
	}
}





	

// adjust should be 0 or xinc&suffix&
#define calcincr(ecxofs,suffix,adjust) \
		__asm mov	eax,[ecx+4+ecxofs]\
		__asm sub	eax,[ecx+ecxofs]\
		__asm imul	eax,__x2\
		__asm mov	edx,[ecx+8+ecxofs]\
		__asm sub	edx,[ecx+4+ecxofs]\
		__asm imul	edx,__x1\
		__asm sub	eax,edx\
		__asm neg eax\
		__asm cmp eax,32767\
		__asm jge toobig1##suffix\
		__asm cmp eax,-32768\
		__asm jle toobig1##suffix\
		__asm cdq\
		__asm shld edx,eax,16\
		__asm shl eax,16\
		__asm idiv cval\
		__asm jmp toobig1a##suffix\
		__asm toobig1##suffix: \
		__asm cdq\
		__asm shld edx,eax,12\
		__asm shl eax,12\
		__asm idiv cval\
		__asm shl eax,4\
		__asm jmp toobig1a##suffix\
		__asm toobig2##suffix: \
		__asm cdq\
		__asm shld edx,eax,12\
		__asm shl eax,12\
		__asm idiv cval\
		__asm shl eax,4\
		__asm jmp toobig2a##suffix\
		__asm toobig1a##suffix: \
		__asm mov yinc##suffix,eax\
		__asm mov	eax,[ecx+8+ecxofs]\
		__asm sub	eax,[ecx+4+ecxofs]\
		__asm imul	eax,__y1\
		__asm mov	edx,[ecx+4+ecxofs]\
		__asm sub	edx,[ecx+0+ecxofs]\
		__asm imul	edx,__y2\
		__asm sub	eax,edx\
		__asm neg eax\
		__asm cmp eax,32767\
		__asm jge toobig2##suffix\
		__asm cmp eax,-32768\
		__asm jle toobig2##suffix\
		__asm cdq\
		__asm shld edx,eax,16\
		__asm shl eax,16\
		__asm idiv cval\
		__asm toobig2a##suffix: \
		__asm mov xinc##suffix,eax\
		__asm imul eax,[ebp]\
		__asm mov ebp,[ebp+4]\
		__asm imul ebp,yinc##suffix\
		__asm add ebp,eax\
		__asm mov eax,[ecx+ecxofs]\
		__asm neg ebp\
		__asm shl eax,16\
		__asm add ebp,eax\
		__asm mov eax,yinc##suffix\
		__asm imul eax,esi\
		__asm add ebp,adjust\
		__asm add ebp,32768\
		__asm add ebp,eax

#define calcincrsub(suffix)\
		} toobig1##suffix: __asm {\
		__asm cdq\
		__asm shld edx,eax,12\
		__asm shl eax,12\
		__asm idiv cval\
		__asm shl eax,4\
		__asm jmp toobig1a##suffix\
		} toobig2##suffix: __asm {\
		__asm cdq\
		__asm shld edx,eax,12\
		__asm shl eax,12\
		__asm idiv cval\
		__asm shl eax,4\
		__asm jmp toobig2a##suffix\


#define a_static

#pragma pack(1)
a_static int drawn,yinc_c,xinc_c,yinc_u;
a_static short ulo;
a_static int xinc_u;
a_static short _noname;
a_static int yinc_v;
a_static int xinc_v;
a_static int vhi;
a_static int uhi_vlo;
a_static int line_c;
a_static int line_u;
a_static int line_v;
a_static int line_edi;
a_static int __x1,__y1,__x2,__y2;

a_static int val1	;
a_static int ival1	;
a_static int yval1	;
a_static int iebp	;
a_static int yebp	;
a_static unsigned char val2	;
a_static unsigned char ival2	;
a_static unsigned char val3	;
a_static unsigned char ival3	;
a_static unsigned char yval2	;
a_static unsigned char yval3	;
a_static unsigned char pad1,pad2;




void texgoufill(void *dest, void *col, void *pts)
{
	/*
	__asm
	{
		__asm jmp asra;
	}
	__asm
	{
		__asm asra: __asm mov eax,4
	}
	*/
	__asm
	{
		mov eax,dest
		mov edx,col
		mov ebx,pts
	

		pushad
		mov val1,ebx ; source pts (first x,y used only)
		mov ebx,eax	; dest
		mov ecx,edx ; pts
		mov esi,miny
		mov edx,maxy
		sub edx,esi
		jle forgetit
		lea edi,[esi*4+esi]
		shl edi,6
		add ebx,edi
		sub ebx,2
		mov line_edi,ebx

		; calc increments

		push edx

		mov ebp,val1
		calcincr(12,_u,xinc_u)
		mov line_u,ebp
		mov ebp,val1
		calcincr(24,_v,xinc_v)
		mov line_v,ebp
		mov ebp,val1
		calcincr(0,_c,0)
		mov line_c,ebp

		mov eax,xinc_u
		cdq
		sar eax,16
		mov ecx,xinc_v
		shl ecx,16
		add eax,ecx
		mov uhi_vlo,eax
		mov eax,xinc_v
		adc edx,0
		sar eax,16
		add eax,edx

		mov dword ptr vhi,eax
		pop edx



		; work out the line values

		lea esi,edgebuf[esi*8]

#define count dword ptr [esp]
lineloop:
		mov edi,[esi]
		mov ecx,[esi+4]
		shr edi,16
		shr ecx,16
		sub ecx,edi
		jle nextline

		/*
		//
		cmp ecx,320
		jle ok1
		int 3
ok1:
		//
		*/

		push	esi
		push	edx
		push	ecx		; count on stack

		mov		ebx,texptr

		mov		ecx,xinc_v
		imul	ecx,edi
		add		ecx,line_v
		rol		ecx,16
		mov		bh,cl		; set up v: ecx, ebx

		mov		eax,xinc_u
		imul	eax,edi
		add		eax,line_u
		rol		eax,16
		mov		cl,al
		xor		al,al
		mov		esi,eax	; set up u (esi, cl)

		mov		eax,fadeptr
		mov		edx,xinc_c
		imul	edx,edi
		add		edx,line_c
		rol		edx,16
		mov		ah,dl
		mov		ebp,xinc_c
		shl		ebp,16		; set up colour: ebp, eax, edx, ch
		mov		ch,byte ptr xinc_c+2

		add		edi,line_edi ; set up edi



		; eax = |   table | col | tmp |
		; ebx = | texture | vhi | tmp |
		; ecx = |    v lo |dchi | uhi |
		; edx = |  col lo |  output   |
		; esi = |    u lo |  odd flag |
		; ebp = | dcol lo |           |
		; edi = | destination pointer |
		; increment variables: ulo, vhi, uhi_vlo, count. pref, stored on stack!!

		test edi,1
		jnz notodd
		; plot one odd pixel
		mov		bl,cl			; v		load ebx for read
		add		edx,ebp			; u 	ebp=colinclo. update col lo
		mov		al,[ebx]		; v		read texel
		adc		ah,ch			; u		ch=colinchi update col hi
		add		esi,dword ptr ulo			; v		update u lo.
		adc		ecx,dword ptr uhi_vlo		; u		update uhi / vlo
		mov		dl,[eax]		; v		look up the colour (note col is inced)
		adc		bh,byte ptr vhi			; u		update vhi

		;mov dl,cl ;****************

		mov		[edi+2],dl
		dec		count
		jz		doneline
		inc		edi
notodd:
		; plot in pairs
		shr		count,1
		jz		mustdraw1
		adc		esi,0	;	flag esi if odd pixel
xloop:
		mov		bl,cl			; v		load ebx for read
		add		edx,ebp			; u 	ebp=colinclo. update col lo
		mov		al,[ebx]		; v		read texel
		adc		ah,ch			; u		ch=colinchi update col hi
		add		esi,dword ptr ulo			; v		update u lo.
		adc		ecx,dword ptr uhi_vlo		; u		update uhi / vlo
		mov		dl,[eax]		; v		look up the colour (note col is inced)
		;mov dl,bh ;****************
		adc		bh,byte ptr vhi			; u		update vhi

		mov		bl,cl		    ; v		load ebx for NEXT read
		add		edx,ebp			; u 	ebp=colinclo. update col lo
		mov		al,[ebx]		; v		read texel
		adc		ah,ch			; u		ch=colinchi update col hi
		add		esi,dword ptr ulo			; v		update u lo
		adc		ecx,dword ptr uhi_vlo		; u		update uhi / vlo
		mov		dh,[eax]		; v		look up the colour (note col is inced)
		;mov dh,bh ;****************
		adc		bh,byte ptr vhi			; u		update vhi

		add		edi,2			;
		dec		count			; 		decrement count
		mov		[edi],dx
		jnz		xloop
		;------------------------------------

		test	esi,1
		jz		doneline
mustdraw1:
		; plot last pixel
		mov		bl,cl			; v		load ebx for read
		add		edx,ebp			; u 	ebp=colinclo. update col lo
		mov		al,[ebx]		; v		read texel
		adc		ah,ch			; u		ch=colinchi update col hi
		mov		dl,[eax]		; v		look up the colour (note col is inced)

		;mov dl,bh ;****************
		mov		[edi+2],dl

doneline:
		pop		ecx
		pop		edx
		pop		esi

nextline:
		dec edx
		jz forgetit
		add esi,8			; move on scan list
		add line_edi,320	; move on dest ptr
		mov eax,yinc_u
		mov	ebx,yinc_v
		mov ecx,yinc_c
		add line_u,eax
		add line_v,ebx
		add line_c,ecx
		jmp lineloop


forgetit:
		popad
		//ret
		jmp goaway


		//calcincrsub(_c)
		//calcincrsub(_u)
		//calcincrsub(_v)
goaway:
	}
}


void gouraudfill(void *dest, void *col, void *pts)
{
	__asm
	{
		mov eax,dest
		mov edx,col
		mov ebx,pts
	


		pushad
		mov ebp,ebx ; source pts (first x,y used only)
		mov ebx,eax	; dest
		mov ecx,edx ; pts
		mov esi,miny
		mov edx,maxy
		sub edx,esi
		jle forgetit
		lea edi,[esi*4+esi]
		shl edi,6
		add ebx,edi

		; calc increments

		push edx
		calcincr(0,_c,xinc_c)
		pop edx

		lea esi,edgebuf[esi*8]

lineloop:
		; ebp stores col value for start of line
		push ebp
		push ebx
		push edx
		mov edi,[esi]
		mov ecx,[esi+4]
		shr edi,16
		shr ecx,16
		sub ecx,edi
		jle nextline


		; need to do ebp += (edx=xinc) * edi

		mov edx,xinc_c
		mov eax,edx
		imul eax,edi
		add ebp,eax

		; need edi = dest, ebp:bl = col, edx:bh = col inc, ecx = count

		lea edi,[ebx+edi]
		shld ebx,ebp,16
		rol edx,16
		shl ebp,16
		mov bh,dl

		mov eax,[edi+3]	; fill the cache...
		test edi,1			; line up to dword boundary
		jz notodd
		dec ecx
		mov [edi],bl
		jz nextline
		add ebp,edx
		inc edi
		adc bl,bh
notodd:
		test edi,2
		jz notodd2
		dec ecx
		mov [edi],bl
		jz nextline
		add ebp,edx
		inc edi
		adc bl,bh
		dec ecx
		mov [edi],bl
		jz nextline
		add ebp,edx
		inc edi
		adc bl,bh
notodd2:
		push ecx
		mov eax,[edi+60]	; fill the cache...
		shr ecx,2
		jz endbit
pixloop:					; plot groups of 4
		;repeat 2
		mov al,bl
		add ebp,edx
		adc bl,bh
		mov ah,bl
		add ebp,edx
		adc bl,bh
		rol eax,16
		;endm
		;repeat 2
		mov al,bl
		add ebp,edx
		adc bl,bh
		mov ah,bl
		add ebp,edx
		adc bl,bh
		rol eax,16
		;endm

		mov [edi],eax
		add edi,4
		dec ecx
		jnz pixloop
endbit:
		pop ecx
		and ecx,3
		jz nextline
		dec ecx
		mov [edi],bl		;; plot the remaining (up to) 3 pixels
		jz nextline
		add ebp,edx
		adc bl,bh
		dec ecx
		mov [edi+1],bl
		jz nextline
		add ebp,edx
		adc bl,bh
		mov [edi+2],bl
nextline:
		pop edx
		pop ebx
		pop ebp
		add ebp,yinc_c
		add esi,8
		add ebx,320
		dec edx
		jnz lineloop
forgetit:
		popad
		//ret
		jmp goaway
//calcincrsub(_c)
goaway:

	}
}

#define calccmac(arse)	\
	__asm mov	eax,[esi+8+0]\
	__asm mov	ebx,[esi+16+4]\
	__asm sub	eax,[esi+0]\
	__asm sub	ebx,[esi+8+4]\
	__asm mov __x1,eax\
	__asm mov __y2,ebx\
	__asm imul	ebx\
	__asm mov	ebx,[esi+8+4]\
	__asm mov	edx,[esi+16+0]\
	__asm sub	ebx,[esi+4]\
	__asm sub	edx,[esi+8+0]\
	__asm mov __y1,ebx\
	__asm mov __x2,edx\
	__asm imul	ebx,edx\
	__asm sub	eax,ebx\
	__asm mov cval,eax\


int flattri(void *dest, void *pts, int col)
{
	__asm
	{
		//pushad
		mov eax,dest
		mov edx,pts
		mov ebx,col
	
		push esi
		push eax	; dest
		push ebx	; col
		mov esi,edx

		mov miny,200
		mov maxy,0
		
		calccmac(0)
		jle pforgetit

		
		mov eax,[esi]
		mov edx,[esi+4]
		mov ebx,[esi+8]
		mov ecx,[esi+12]
		call addedge_
		mov ebx,[esi+16]
		mov ecx,[esi+20]
		call addedge_
		mov ebx,[esi+0]
		mov ecx,[esi+4]
		call addedge_

		pop edx
		pop eax
		pop esi
		//jmp flatfill_
		push edx
		push eax
		call flatfill
		add esp,8
		jmp _ret
pforgetit:
		pop edx
		pop esi ; dont touch eax
		pop esi
_ret:
		//popad
		//ret
	}
}

int flatquad(void *dest, void *pts, int col)
{
	__asm
	{
		mov eax,dest
		mov edx,pts
		mov ebx,col
		push esi
		push eax	; dest
		push ebx	; col
		mov esi,edx

		calccmac(0)
		jle pforgetit

		mov miny,200
		mov maxy,0
		mov eax,[esi]
		mov edx,[esi+4]
		mov ebx,[esi+8]
		mov ecx,[esi+12]
		call addedge_
		mov ebx,[esi+16]
		mov ecx,[esi+20]
		call addedge_
		mov ebx,[esi+24]
		mov ecx,[esi+28]
		call addedge_
		mov ebx,[esi+0]
		mov ecx,[esi+4]
		call addedge_

		pop edx
		pop eax
		pop esi
		;jmp flatfill
		; fall thru

		push edx
		push eax		
		call flatfill
		add esp,8

		jmp _ret
pforgetit:
		pop edx
		pop esi ; dont touch eax
		pop esi
_ret:
		//popad
		//ret
	}
}


int flatfill(void *dest, int col)
{
	__asm
	{
		mov eax,dest
		mov edx,col
	


		or eax,eax
		jz goaway
		pushad
		mov dh,dl
		mov ebx,eax	; dest
		mov eax,edx ; col
		shl edx,16
		or  eax,edx
		mov esi,miny
		mov edx,maxy
		sub edx,esi
		jle forgetit2
		lea edi,[esi*4+esi]
		shl edi,6
		add ebx,edi
		lea esi,edgebuf[esi*8]
lineloop:
		mov edi,[esi]
		mov ecx,[esi+4]
		shr edi,16
		shr ecx,16
		sub ecx,edi
		jle nextline
		lea edi,[ebx+edi]
		mov ebp,[edi+3]	; fill the cache...

		test edi,1
		jz notodd
		dec ecx
		mov [edi],al
		jz nextline
		inc edi
notodd: test edi,2
		jz notodd2
		dec ecx
		mov [edi],al
		jz nextline
		inc edi
		dec ecx
		mov [edi],al
		jz nextline
		inc edi
notodd2:
		mov ebp,[edi+60]	 ; fill the cache some more
		mov ebp,ecx
		shr ecx,2
		rep stosd
		mov ecx,ebp
		and ecx,3
		rep stosb
nextline:
		add esi,8
		add ebx,320
		dec edx
		jnz lineloop
forgetit:
		popad
goaway:
		mov eax,cval
		//ret
		jmp xxgoaway
forgetit2:
		popad
		mov eax,-1
		//ret
xxgoaway:
	}
}

int checkfill(void *dest, int col)
{
	__asm
	{
		mov eax,dest
		mov edx,col
	
		or eax,eax
		jz goaway
		pushad
		mov dh,dl
		mov ebx,eax	; dest
		mov eax,edx ; col
		shl edx,16
		or  eax,edx
		mov esi,miny
		mov edx,maxy
		sub edx,esi
		jle forgetit2
		mov drawn,0
		lea edi,[esi*4+esi]
		shl edi,6
		add ebx,edi
		lea esi,edgebuf[esi*8]
lineloop:
		mov edi,[esi]
		mov ecx,[esi+4]
		shr edi,16
		shr ecx,16
		sub ecx,edi
		jle nextline
		lea edi,[ebx+edi]

pixloop:
		cmp byte ptr [edi],0
		jne skipit
		inc drawn
		mov [edi],al
skipit:
		inc edi
		dec ecx
		jnz pixloop

nextline:
		add esi,8
		add ebx,320
		dec edx
		jnz lineloop
forgetit:
		popad
		mov eax,drawn
		//ret
		jmp goawayxx


goaway:
		mov eax,-1
		//ret
		jmp goawayxx
forgetit2:
		popad
		mov eax,-1
		//ret
goawayxx:
	}
}





//;=========================================================================
//; focus stuff

#define pw_ra \
		  __asm add bl,[esi-640+2]\
		  __asm adc bh,0\
		  __asm add bl,[esi-320+3]\
		  __asm adc bh,0\
		  __asm add bl,[esi+3]\
		  __asm adc bh,0\
		  __asm add bl,[esi+320+3]\
		  __asm adc bh,0\
		  __asm add bl,[esi+640+2]\
		  __asm adc bh,0

#define pw_ls \
		  __asm sub bl,[esi-640-1]\
		  __asm sbb bh,0\
		  __asm sub bl,[esi-320-2]\
		  __asm sbb bh,0\
		  __asm sub bl,[esi-2]\
		  __asm sbb bh,0\
		  __asm sub bl,[esi+320-2]\
		  __asm sbb bh,0\
		  __asm sub bl,[esi+640-1]\
		  __asm sbb bh,0		\
		
#undef pixmac		

extern "C" unsigned char divide21[];

#define pixmac(leftsub,rightadd,leftsub2,rightadd2) \
		__asm mov al,byte ptr divide21[ebx]\
		__asm rightadd\
		__asm leftsub		\
		__asm inc esi\
		__asm mov ah,byte ptr divide21[ebx]\
		__asm rightadd2\
		__asm leftsub2	\
		__asm inc esi\
		__asm mov [edi],ax\
		__asm inc edi\
		__asm inc edi
	   



void blurproc(void *dest, void *src)
{
	__asm
	{
		mov eax,dest
		mov edx,src
	
		pushad
		mov esi,edx
		mov edi,eax
		add esi,320*2
		add edi,320*2
		mov ecx,195*256
lineloop:
		;  ***
		; *****
		; **!**
		; *****
		;  ***		; 21
		; line start - sum up the right half of the filter...
		xor ebx,ebx
		xor eax,eax
		mov bl,[esi-640]
		add bl,[esi-640+1]
		adc	bh,cl
		add bl,[esi-320+0]
		adc bh,cl
		add bl,[esi-320+1]
		adc bh,cl
		add bl,[esi-320+2]
		adc bh,cl
		add bl,[esi+0]
		adc bh,cl
		add bl,[esi+1]
		adc bh,cl
		add bl,[esi+2]
		adc bh,cl
		add bl,[esi+320+0]
		adc bh,cl
		add bl,[esi+320+1]
		adc bh,cl
		add bl,[esi+320+2]
		adc bh,cl
		add bl,[esi+640+0]
		adc bh,cl
		add bl,[esi+640+1]
		adc bh,cl
		; first 2 pixels are special
		// pixmac(pw_ls,pw_ra,pw_ls,pw_ra)
		//pixmac(0,1,1,1)
		pixmac( nop,pw_ra,pw_ls,pw_ra)
		; next 320-6 pixels are normal
		mov cl,(320-6)/2
xloop:  //pixmac(1,1,1,1)
		pixmac(pw_ls,pw_ra,pw_ls,pw_ra)
		dec cl
		jnz xloop
		//pixmac(1,0,1,0)
		pixmac(pw_ls,nop ,pw_ls, nop)
		; next line please...
		inc edi
		inc esi
		inc edi
		inc esi
		dec ch
		jnz lineloop
		popad
		//ret
	}
}

#define blendmac(dest,ofs)\
		__asm mov al,[esi+ofs]\
		__asm mov bl,byte ptr zmaptab1[eax]\
		__asm mov bh,[ebp+ofs]\
		__asm mov dest,[ebx]\
		__asm mov bl,byte ptr zmaptab2[eax]\
		__asm mov bh,[ebp+ofs+64000]\
		__asm add dest,[ebx]\
		__asm mov bl,byte ptr zmaptab3[eax]\
		__asm 	mov bh,[ebp+ofs+128000]\
		__asm add dest,[ebx]

		
void blendproc(void *dest, void *zbuf, void *src)
{
	__asm
	{
		mov eax,dest
		mov edx,zbuf
		mov ebx,src

		pushad
		mov edi,eax ; dest
		mov esi,edx ; zbuffer
		mov ebp,ebx ; 3 blurred screens
		mov ebx,multab ; mul tab (64k)
		xor eax,eax
		xor edx,edx

		add esi,4*320
		add edi,4*320
		add ebp,4*320
		mov ecx,(200-8)*80
pixloop:
		blendmac(dl,2)
		blendmac(dh,3)
		shl edx,16
		blendmac(dl,0)
		blendmac(dh,1)
		add esi,4
		add ebp,4
		mov [edi],edx
		add edi,4

		dec ecx
		jnz pixloop
		popad
		//ret
	}
}


void unclippedline(int x1,int y1,int x2,int y2)
{
	__asm
	{
		mov eax,x1
		mov edx,y1
		mov ebx,x2
		mov ecx,y2
	
		pushad
		; line eax,edx to ebx,ecx
		sub ebx,eax
		sub ecx,edx
		jge down
		add eax,ebx
		add edx,ecx
		neg ebx
		neg ecx
down:
		test ebx,ebx
		jz zero
		jg posa
		neg ebx
		cmp ebx,ecx
		jge leftwards
		neg ebx
		jmp downwards
zero:
		test ecx,ecx
		jz doneit
posa:
		cmp ebx,ecx
		jge rightwards
downwards:
		; line is mostly downwards
		shl eax,16
		lea esi,[edx*4+edx]	; esi=screenmem
		shl esi,6
		add esi,screenbuf
		mov ebp,reciptab[ecx*4+1024*4]
		imul ebp,ebx

		or ecx,ecx
		jle doneit
		; eax=x, ebp=xinc, esi=screenmem, ecx=count
		xor ebx,ebx
		mov edx,multab
		mov dh,byte ptr linecol
loop1:
		; plot 2 pixels

		mov dl,ah

		; calc edi here
		mov edi,eax
		shr edi,16
		add edi,esi

		not dl
		mov bl,[edx]
		add bl,[edi]
		mov bl,cliptab128[ebx]
		mov [edi],bl

		mov dl,ah
		mov bl,[edx]
		add bl,[edi+1]
		mov bl,cliptab128[ebx]
		mov [edi+1],bl
		add eax,ebp
		add esi,320
		dec ecx
		jnz loop1
		jmp doneit

leftwards:
		; ebx already negged.
		sub eax,ebx
		add edx,ecx
		neg ecx
rightwards:
		; mostly rightwards

		xchg ebx,ecx
		xchg eax,edx
		shl eax,16
		mov esi,edx
		add esi,screenbuf
		mov ebp,reciptab[ecx*4+1024*4]
		imul ebp,ebx

		or ecx,ecx
		jle doneit
		; eax=y, ebp=yinc, esi=screenmem, ecx=count
		xor ebx,ebx
		mov edx,multab
		mov dh,byte ptr linecol
loop2:
		; plot 2 pixels

		mov dl,ah

		; calc edi here
		mov edi,eax
		shr edi,16
		lea edi,[edi*4+edi]	; esi=screenmem
		shl edi,6
		add edi,esi

		not dl
		mov bl,[edx]
		add bl,[edi]
		mov bl,cliptab128[ebx]
		mov [edi],bl

		mov dl,ah
		mov bl,[edx]
		add bl,[edi+320]
		mov bl,cliptab128[ebx]
		mov [edi+320],bl
		add eax,ebp
		inc esi
		dec ecx
		jnz loop2

doneit:
		popad
		//ret

	}
}







;///////////////////////////////////////////////////////////////////////
;// blur routines

int _texture,_dest,_fadetab,_fadeinc,_sizeoffadetab;
int _blurwid=320;

extern void hblur(SLONG bmapwid, SLONG blurwid,
				  SLONG numdwords, SLONG numlines,
				  UBYTE *texture, UBYTE *dest,
				  UBYTE *fadetab,SLONG fadeinc, SLONG sizeoffadetab)
{
	__asm
	{
		mov eax,texture
		mov _texture,eax
		mov eax,dest
		mov _dest,eax
		mov eax,fadetab
		mov _fadetab,eax
		mov eax,fadeinc
		mov _fadeinc,eax
		mov eax,sizeoffadetab
		mov _sizeoffadetab,eax

		mov eax,bmapwid
		mov edx,blurwid
		mov ebx,numdwords
		mov ecx,numlines
	
		pushad
		mov ebp,eax
		;mov edx,edx
		mov al,bl
		mov ah,cl
		mov ecx,ebp
		mov esi,_texture;//[esp+36]
		mov edi,_dest;//[esp+40]
		mov ebp,_fadetab;//[esp+44]

		xor ebx,ebx
fillloop:
		rol ebx,16
		mov [ebp],bl
		rol ebx,16
		add ebx,_fadeinc;//[esp+48]
		cmp ebx,256*65536
		jl ok1
		mov ebx,256*65536-1
ok1:
		inc ebp
		dec _sizeoffadetab;//dword ptr [esp+52]
		jnz fillloop
		mov ebp,_fadetab;//[esp+44]

		; ecx = width of bitmap
		; edx = blur width
		; al  = number of dwords per line to draw
		; ah  = number of lines
		; esi = bitmap pointer (pad w 0s to left and right)
		; edi = dest screen
		; ebp = 1 line fade table

lineloop:
		push ecx
		push esi
		mov ecx,edx
		shr ecx,1
		sub esi,ecx

		mov ecx,edx
		xor ebx,ebx
startloop:
		add bl,[esi]
		inc esi
		adc bh,0
		dec ecx
		jnz startloop
		; ch is now 0
		sub esi,edx
		push edi
		push eax
		mov cl,al

pixloop:
		mov	al,[ebx+ebp]
		add bl,[esi+edx]
		adc bh,ch
		sub bl,[esi]
		sbb bh,ch
		mov	ah,[ebx+ebp]
		add bl,[esi+edx+1]
		adc bh,ch
		sub bl,[esi+1]
		sbb bh,ch
		rol eax,16
		mov	al,[ebx+ebp]
		add bl,[esi+edx+2]
		adc bh,ch
		sub bl,[esi+2]
		sbb bh,ch
		mov	ah,[ebx+ebp]
		add bl,[esi+edx+3]
		adc bh,ch
		sub bl,[esi+3]
		sbb bh,ch
		rol eax,16
		mov	[edi],eax
		add edi,4
		add esi,4
		dec cl
		jnz pixloop
np:
		pop eax
		pop edi
		pop esi
		pop ecx
		add edi,_blurwid
		
		add esi,ecx
		dec ah
		jnz lineloop
		popad
		//ret 5*4
	}
}

void setblurwid(int w)
{
	_blurwid=w;
}

int pintexptr;





/*
;bl:edx :edx contains u and lo v, bl contains hi v
;val2:val1 contains u/v incs
;ch:esi contains colour
;val3:ebp contains colour inc
;ival1,ival2,ival3,iebp are delta deltas
*/


//#define pinmapnsmacro macro WIDTH,HEIGHT,adjust

#define WIDTH 40
#define HEIGHT 25
#define adjust 0
void drawpinmapns(void *srctex, void *dest)
{
#include "pnimap.h"
}

#define WIDTH 32
#define HEIGHT 32
#define adjust 0
void drawpinmaptex(void *srctex, void *dest)
{
#include "pnimap.h"
}


#define WIDTH 40
#define HEIGHT 30
#define adjust 1

void drawpinmap48(void *srctex, void *dest)
{
#include "pnimap.h"
}

void ghostcopy(void *dest, void *src)
{

#define	gpixmac(ofs) \
		__asm mov bl,[esi+ofs]\
		__asm mov bh,[edi+ofs]\
		__asm mov cl,[esi+ofs+1]\
		__asm mov ch,[edi+ofs+1]\
		__asm mov al,[ebx]\
		__asm mov ah,[ecx]
	__asm
	{
	
		mov eax,dest
		mov edx,src

		pushad
		mov edi,eax
		mov esi,edx
		mov ebx,ghostptr
		mov ebp,64000/4
		mov ecx,ebx
again:
		gpixmac(2)
		shl eax,16
		gpixmac(0)
		add esi,4
		mov [edi],eax
		add edi,4
		dec ebp
		jnz again
		popad
	}
}
