#include <stdlib.h>

#include "context.h"
#include "depth.h"
#include "macros.h"
#include "texstate.h"
#include "triangle.h"
#include "vb.h"
#include "types.h"

#include "xsmesaP.h"
#include "glx_log.h"
#include "mesaglx/context.h"
#include "mesaglx/matrix.h"
#include "mesaglx/types.h"

#define GC XXGC
#include "gcstruct.h"
#include "pixmapstr.h"
#include "servermd.h" /* PixmapBytePad */
#include "scrnintstr.h"
#include "regionstr.h"
#include "windowstr.h"
#undef GC

#include "s3virgeglx.h"
#include "glx_symbols.h"

static int alpha_table[32][32];


static void SetupAlphaBlendTable()
{
	int i,j;
	for (i=0; i<32; i++) {
		for (j=0; j<32; j++) {
			alpha_table[i][j] = ((i * j) / 31);
		}
	}
}

static int __inline__ AlphaMerge(int src, int dst)
{
	int out;
	out  = alpha_table[((src & 0x7C000000) >> 26)][((dst & 0x7C000000) >> 26)] << 26;
	out |= alpha_table[((src & 0x03E00000) >> 21)][((dst & 0x03E00000) >> 21)] << 21;
	out |= alpha_table[((src & 0x001F0000) >> 16)][((dst & 0x001F0000) >> 16)] << 16;
	out |= alpha_table[((src & 0x00007C00) >> 10)][((dst & 0x00007C00) >> 10)] << 10;
	out |= alpha_table[((src & 0x000003E0) >> 5)][((dst & 0x000003E0) >> 5)] << 5;
	out |= alpha_table[((src & 0x0000001F))][((dst & 0x0000001F))];
	return out;
}

/*
 * s3virgeBackToFront
 * Blit the visible rectangles from the back buffer to the screen
 * Can't really do this from a direct context - don't have the clip
 * info, and getting it would be a lot slower than just sending a
 * request to the server to do the blit there.
 */
void s3virgeBackToFront( DrawablePtr drawable, s3virgeBufferPtr buf ) {
    RegionPtr	prgnClip;
    BoxPtr	pbox;
    int 	i, nbox;
    static int *src = NULL, *dst = NULL;
    int		size;
    int chunks, last;
    int 	xorg, yorg, pitch;

    if ( ( drawable->width != buf->width ) ||
	 ( drawable->height != buf->height ) ||
	 ( drawable->type != DRAWABLE_WINDOW ) )
    {
	s3virgeError( "BackToFront(): bad drawable\n" );
	return;
    }

    prgnClip = &((WindowPtr)drawable)->clipList;
    pbox = REGION_RECTS( prgnClip );
    nbox = REGION_NUM_RECTS( prgnClip );
    
    if( !nbox ) {
	/* window is completely covered */
	return;
    }

    xorg = drawable->x;
    yorg = drawable->y;
    pitch = buf->pitch;

    s3virgeMsg(1, "Doing swap.\n");

    if (s3virgeglx.lightmapHack) {     
    	    int addr = 0;
    	    
    	    s3virgeMsg(1, "Doing lightmap alpha blend.\n");
    	    
    	    size = buf->pitch * buf->height / 2;
	    chunks = size / 8;
	    last = size % 8;

    	    
            if (src == NULL) {
	    	    src = malloc(buf->pitch * buf->height * 2);
		    dst = malloc(buf->pitch * buf->height * 2);
		    SetupAlphaBlendTable();
	    }
	    
	    if (src == NULL || dst == NULL) {
	    	s3virgeError("Fatal error allocating lightmapHack memory!\n");
	    	// We'll segfault after this. :) 
	    }
	    
	    memcpy(src, ((void *)(s3virgeglx.linearBase + s3virgeDB->lightmapBufferBlock->ofs)), size * 4);
	    memcpy(dst, ((void *)(s3virgeglx.linearBase + s3virgeDB->backBufferBlock->ofs)), size * 4);

	    for (i=0; i<chunks; i++) {
	    	dst[addr] = AlphaMerge(src[addr], dst[addr]); addr++;
	    	dst[addr] = AlphaMerge(src[addr], dst[addr]); addr++;
	    	dst[addr] = AlphaMerge(src[addr], dst[addr]); addr++;
	    	dst[addr] = AlphaMerge(src[addr], dst[addr]); addr++;
	    	dst[addr] = AlphaMerge(src[addr], dst[addr]); addr++;
	    	dst[addr] = AlphaMerge(src[addr], dst[addr]); addr++;
	    	dst[addr] = AlphaMerge(src[addr], dst[addr]); addr++;
	    	dst[addr] = AlphaMerge(src[addr], dst[addr]); addr++;
	    }
	    
	    for (i=0; i<last; i++) {
	    	dst[addr] = AlphaMerge(src[addr], dst[addr]); addr++;
	    }

            memcpy(((void *)(s3virgeglx.linearBase + s3virgeDB->backBufferBlock->ofs)), dst, size * 4);
    }

    EnsureDMAOn();
    
    /* add the blit commands to the dma buffer */
    if (s3virgeglx.dmaDriver == 0) {
	    WAITFIFOEMPTY(10);
	    OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_CMDSET), (0xF << 27));

	    OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_CLIP_L_R), (drawable->x << 16) | (drawable->x + drawable->width - 1));
	    OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_CLIP_T_B), (drawable->y << 16) | (drawable->y + drawable->height - 1));

	    OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_SRC_BASE), (s3virgeDB->backBufferBlock->ofs) & 0x003FFFF8 ); /* SRC_BASE */

	    OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_DEST_BASE), 0 ); /* DEST_BASE */
	    OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_DEST_SRC_STRIDE), ((s3virgeDB->pitch * 2) | 
	            ((s3virgeglx.displayWidth * 2) << 16))); /* Stride */
	    OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_MONO_PAT0), ~(0));
	    OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_MONO_PAT1), ~(0));
	    OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_PAT_FG_COLOR), 0);
	    OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_CMDSET), 
	    			0x01 | /* Autoexecute */
	    			0x02 | /*clip */
				0x04 | /* 16 bit */
			    	0x20 | /* draw */
			    	0x400 | 
			    	(0x2 << 11) |
			    	(0xCC << 17) | 
			    	0x3 << 25); /* l-r, t-b */
	    
	    /* xy bitblt each visible rectangle */
	    for (i=nbox; i > 0; i--) {
	    	int	x = pbox->x1 - xorg;
		int	y = pbox->y1 - yorg;
		int	w = pbox->x2 - pbox->x1;
		int	h = pbox->y2 - pbox->y1;
		WAITFIFOEMPTY(3);
		OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_BITBLT_WIDTH_HEIGHT), ((w - 1) << 16) | h);
		OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_BITBLT_SRC_X_Y), (x << 16) | y);
		OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_BITBLT_DEST_X_Y), (pbox->x1 << 16) | pbox->y1);
		pbox++;
	    }
	    WAITFIFOEMPTY(1);
	    OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_CMDSET), (0xF << 27));
	} else {
		DMAGETPTR(15);
		DMAOUTREG((S3VIRGE_BITBLT_REG | S3VIRGE_SRC_BASE), 12);
		DMAOUT((s3virgeDB->backBufferBlock->ofs & 0x003FFFF8 )); /* SRC_BASE */
		DMAOUT(0);	
		DMAOUT((drawable->x << 16) | (drawable->x + drawable->width - 1));
		DMAOUT((drawable->y << 16) | (drawable->y + drawable->height - 1));
		DMAOUT(((s3virgeDB->pitch * 2) | ((s3virgeglx.displayWidth * 2) << 16))); /* Stride */
		DMAOUT(~(0));
		DMAOUT(~(0));
		DMAOUT(0);
		DMAOUT(0);
		DMAOUT(0);
		DMAOUT(0);
    		DMAOUT((0x01 | /* Autoexecute */
    			0x02 | /*clip */
			0x04 | /* 16 bit */
			0x20 | /* draw */
			0x400 | 
			(0x2 << 11) |
			(0xCC << 17) | 
			(0x3 << 25))); /* l-r, t-b */
		/* xy bitblt each visible rectangle */
		for (i=nbox; i > 0; i--) {
    			int	x = pbox->x1 - xorg;
			int	y = pbox->y1 - yorg;
			int	w = pbox->x2 - pbox->x1;
			int	h = pbox->y2 - pbox->y1;
			DMAGETPTR(3);
			DMAOUTREG((S3VIRGE_BITBLT_REG | S3VIRGE_BITBLT_WIDTH_HEIGHT), 3);
			DMAOUT(((w - 1) << 16) | h);
			DMAOUT((x << 16) | y);
			DMAOUT((pbox->x1 << 16) | pbox->y1);
			pbox++;
		}
		DMAFINISH();
	}
}


/*
 * ClearBox
 * Add hardware commands to draw a filled box for the
 * debugging display.
 */
static void ClearBox( int x, int y, int w, int h, int r, int g, int b )
{
	int	color;

	return;
	
	color = S3VIRGEPACKCOLOR565(r, g, b);

	WAITFIFOEMPTY(6);
        OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_CMDSET), (0xF << 27));
	/* hardware accelerated clear */
	OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_SRC_BASE), 0);
	OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_DEST_BASE), (s3virgeDB->backBufferBlock->ofs));
	OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_DEST_SRC_STRIDE), 
					(((s3virgeDB->pitch) * 2)) |
					((s3virgeDB->pitch) * 2 << 16));
	OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_MONO_PAT0), ~(0));
	OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_MONO_PAT1), ~(0));
	WAITFIFOEMPTY(4);
	OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_PAT_FG_COLOR), color);
	OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_BITBLT_WIDTH_HEIGHT), ((w - 1) << 16) | h );
	OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_BITBLT_DEST_X_Y), (x << 16) | y);
	OUTREG( (S3VIRGE_BITBLT_REG | S3VIRGE_CMDSET), 0x16000120 | 0x4 | (0xF0 << 17));

}


/*
 * performanceBoxes
 * Draw some small boxesin the corner of the buffer
 * based on some performance information
 */
void s3virgePerformanceBoxes( int is_direct )
{
    int		w, t;
    static int i;
    
    if (i) {
    	i = 0;
    	ClearBox( 4, 4, 8, 8, 255, 255, 255 );
    } else {
    	i = 1;
    	if (is_direct)
    		ClearBox( 4, 4, 8, 8, 255, 0, 0);
    	else
	    	ClearBox( 4, 4, 8, 8, 0, 0, 255);
    }

    if ( !s3virgeglx.boxes || !s3virgeDB ) {
	return;
    }
    
    /* draw a box to show we are active, so if it is't seen
       it means that it is completely software based rendering  */
    /* draw a purple box if we are direct rendering */
    if ( is_direct ) {			/* purple = direct (client dma) rendering */
	ClearBox( 4, 4, 8, 8, 255, 0, 255 );
    } else if ( s3virgeglx.dmaDriver ) {	/* white = server dma rendering */
	ClearBox( 4, 4, 8, 8, 255, 255, 255 );
    } else {				/* grey = servery PDMA */
	ClearBox( 4, 4, 8, 8, 128, 128, 128 );
    }

    /* draw a red box if we had to wait for drawing to complete
       (software render or texture swap) */
    if ( s3virgeglx.c_drawWaits ) {
	ClearBox( 16, 4, 8, 8, 255, 0, 0 );
	s3virgeglx.c_drawWaits = 0;
    }

    /* draw a blue box if the register protection signal was hit */
    if ( s3virgeglx.c_signals ) {
	ClearBox( 28, 4, 8, 8, 0, 0, 255 );
	s3virgeglx.c_signals = 0;
    }

    /* draw a yellow box if textures were swapped */
    if ( s3virgeglx.c_textureSwaps ) {
	ClearBox( 40, 4, 8, 8, 255, 255, 0 );
	s3virgeglx.c_textureSwaps = 0;
    }


    /* draw a green box if we had to wait for dma to complete (full
       utilization) on the previous frame */
    if ( !s3virgeglx.hardwareWentIdle ) {
	ClearBox( 64, 4, 8, 8, 0, 255, 0 );
    }
    s3virgeglx.hardwareWentIdle = 0;


#if 0
    /* show buffer utilization */
    if ( s3virgeglx.c_dmaFlush > 1 ) {
	/* draw a solid bar if we flushed more than one buffer */
	ClearBox( 4, 16, 252, 4, 255, 32, 32 );
    } else {
	/* draw bars to represent the utilization of primary buffer */
	ClearBox( 4, 16, 252, 4, 32, 32, 32 );
	t = dma_buffer->maxBufferDwords;
	w = 252 * dma_buffer->bufferDwords / t;
	if ( w < 1 ) {
	    w = 1;
	}
	ClearBox( 4, 16, w, 4, 196, 128, 128 );
    }
#endif
    s3virgeglx.c_dmaFlush = 0;
}


/*
 * Copy the back buffer to the front buffer.  If there's no back buffer
 * this is a no-op.  Only called in indirect contexts.
 */
void s3virgeGLXSwapBuffers( XSMesaBuffer b ) {
	s3virgeBufferPtr	buf;

	/* make sure mesa gives us everything */
	/* 3.1 leaves unsent verts without this, but there is probably
	a better internal call to get them sent... */
	if ( s3virgeCtx && s3virgeCtx->gl_ctx ) {
		glFlush();
	}
	
	s3virgeglx.swapBuffersCount++;

	if ( !b->backimage ) {
		return;	/* when is this ever hit? */
	}
	
	buf = (s3virgeBufferPtr)b->backimage->devPriv;
	if ( !VALID_S3VIRGE_BUFFER( buf ) ) {
		s3virgeError( "BackToFront(): invalid back buffer\n" );
		return;
	}

	if ( !__glx_is_server ) {
		s3virgeDirectClientSwapBuffers( b );
	} else {
	
		if ( buf->backBufferBlock ) {
			/* diagnostic drawing tools */
			s3virgePerformanceBoxes( 0 );
	
			/* hardware accelerated back to front blit */
			s3virgeBackToFront( (DrawablePtr)b->frontbuffer,buf );

			/* make sure all dma is going to get executed */
			/* if everything has gone well, this will be the only
			   flush each frame */
			s3virgeDmaFlush();
		} else {
			/* software blit ... */
		    	/* Use backimage's dimension (not buffer's) */
	        	ValidateGC( b->frontbuffer, b->cleargc );

			(*b->cleargc->ops->PutImage)( (DrawablePtr)b->frontbuffer,
						      b->cleargc,
						      b->frontbuffer->depth,
						      0, 0,
						      b->backimage->width,
						      b->backimage->height,
						      0, ZPixmap, b->backimage->data );
			}
		}

	/* report performance counters */
	s3virgeMsg( 9, "swapBuffers: c_gtrianges:%i c_ttriangles:%i  c_setup:%i c_textures:%i\n",
	       s3virgeglx.c_gtriangles, s3virgeglx.c_ttriangles,  
	       s3virgeglx.c_setupPointers, s3virgeglx.c_textureSwaps );

	s3virgeglx.c_gtriangles = 0;
	s3virgeglx.c_ttriangles = 0;
	s3virgeglx.c_setupPointers = 0;

	s3virgeMsg( 9, "---------------------------------------------------------\n" );
}

