/* -*- mode: C; c-basic-offset:8 -*- */
/*
 * GLX Hardware Device Driver for Matrox G200/G400
 * Copyright (C) 1999 Jeff Hartmann
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * JEFF HARTMANN, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
 * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 *
 * original by Jeff Hartmann <slicer@ionet.net>
 * 6/16/99: rewrite by John Carmack <johnc@idsoftware.com>
 */

#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/mman.h>
#include <stdio.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <signal.h>
#if defined(HAVE_ASM_SIGCONTEXT_H)
# include <asm/sigcontext.h>
#endif

#include "mm.h"
#include "mgadd.h"
#include "g200_mac.h"
#include "mgalib.h"
#include "hwlog.h"
#include "mgadirect.h"
#include "mgawarp.h"
#include "mgastate.h"
#include "mgaglx.h"
#include "hw_mtrr.h"

#include "pb.h"

/* public vars */
mgaDma_buffer	*dma_buffer;			/* dmaBuffers[ activeDmaBuffer ] */


/* This will be overwritten from the default values when glx.so is
 * loaded on the client.
 */
void mgaServerDmaFlush( int wait );
void    (*mgaDoDmaFlush)( int ) = mgaServerDmaFlush;
hwUI32	mgaActiveDmaBuffer = 0;	



static	hwUI32	registersLocked;		/* true when the registers are memory protected */

static void UnlockRegisters( void );
void mgaDmaResetBuffer( void );
static void mgaFlushPseudoDma( void );



#ifndef MGA_DEBUG
/* NOTE: must turn off 2d accel for ALWAYS_SYNC to work: */
int MGA_DEBUG = (0
/*  		  | MGA_DEBUG_OUTREG */
/*  		  | MGA_DEBUG_ALWAYS_SYNC */
/*  		  | MGA_DEBUG_VERBOSE_MSG */
		  );
#endif



void MgaSetSyncBusy( void ) { 
	/* set the DWGSYNC register to our magic value */
	OUTREG( MGAREG_DWGSYNC, SYNC_DMA_BUSY );
	while ( INREG( MGAREG_DWGSYNC ) != SYNC_DMA_BUSY ) {
		/* X might have left something drawing */
	}
}

static void delay( void ) 
{
}

/*
 * mgaWaitForDmaCompletion
 *
 */
#define	TIMEOUT_USEC 1000000

int mgaWaitForDmaCompletion( void ) {
	int		primAdr;
	int         i;
	int		startTime;
	int		curTime;
	int             iters = 0;

	if ( mgaglx.skipDma ) {
		return 0;
	}
	
	startTime = 0;
	curTime = 0;
	while ( 1 ) {
		primAdr = INREG( MGAREG_DWGSYNC );
		if ( primAdr != SYNC_DMA_BUSY ) {
			break;
		}

		iters++;
		curTime = usec();
		if ( startTime == 0 || curTime < startTime /*wrap case*/) {
			startTime = curTime;
		} else if ( curTime - startTime > TIMEOUT_USEC ) { 
			hwMsg( 1, "waitForDmaCompletion timed out\n" );
			break;
		}

		/* spin in place a bit so we aren't hammering the register */
		for ( i = 0 ; i < 10000 ; i++ ) {
			delay();
		}
	}

	hwMsg( 10, "waitForDmaCompletion, usec: %d\n", curTime - startTime );
	if ( !(INREG(MGAREG_STATUS) & STAT_endprdmasts_enable) ) {
		fprintf(stderr, "waitForDmaCompletion: still going!\n" );
		fprintf(stderr, "PRIMADDRESS is 0x%lx END 0x%lx\n",
			INREG(MGAREG_PRIMADDRESS),
			INREG(MGAREG_PRIMEND));
		fprintf(stderr, "SECADDRESS is 0x%lx END 0x%lx\n",
			INREG(MGAREG_SECADDRESS),
			INREG(MGAREG_SECEND));
		fprintf(stderr, "SETUPADDRESS is 0x%lx END 0x%lx\n",
			INREG(MGAREG_SETUPADDRESS),
			INREG(MGAREG_SETUPEND));
		fprintf(stderr, "STATUS is 0x%lx\n",
			INREG(MGAREG_STATUS));
		abort();	/* don't even let the xserver try to clean up */
	}

	if ( registersLocked ) {
		UnlockRegisters();
	}

	return iters;
}

/*
 * mgaDmaResetBuffer
 */
void mgaDmaResetBuffer( void ) {
	dma_buffer = dmaBuffers[ mgaActiveDmaBuffer ];
	dma_buffer->primaryDwords = 0;
	dma_buffer->secondaryDwords = 0;

	/* To make sure we don't send off a non initialized buffer
	 * to the card we add SOFTRAP instructions to the beginning of the buffer
	 */ 

	dma_buffer->virtualAddress[0] = 0x92929292; /* Command dword */
	dma_buffer->virtualAddress[1] = 0x92929292; /* Just to be on the safe side */
	dma_buffer->virtualAddress[2] = 0x92929292;
	dma_buffer->virtualAddress[3] = 0x92929292;
	dma_buffer->virtualAddress[4] = 0x92929292;

	/* This is required because each dma buffer is finished by a
	 * return to 2d state, as expected by the X server.  Could
	 * alternately do the stuff to make the fallback inside the
	 * signal handler, but this way is nicer if we ever support
	 * multiple direct clients.
	 */
	if (mgaDB && mgaCtx) {		
		if (MESA_VERBOSE&VERBOSE_DRIVER)
			fprintf(stderr, "needEnter3D and ENTER3D in mgaDmaResetBuffer\n");
		if ((MGA_DEBUG & MGA_DEBUG_ALWAYS_SYNC) == 0)
			mgaCtx->new_state |= MGA_NEW_CONTEXT;
	}
	
}


static void UnlockRegisters( void ) {
	mprotect( GLXSYM(MGAMMIOBase), 0x3000, PROT_READ | PROT_WRITE );
	registersLocked = 0;
}

#if defined(__i386__) && defined(__linux__)
static void RegisterAccessSignalHandler( int signal, struct sigcontext sc) 
#else
static void RegisterAccessSignalHandler( int signal) 
#endif  
{

#if defined(__i386__) && defined(__linux__)
	if(((void *)sc.cr2 < (void *)GLXSYM(MGAMMIOBase) )||
	   ((void *)sc.cr2 > (void *)(GLXSYM(MGAMMIOBase) + 0x3000))){
		/* Oops, a real segmentation fault. This might be a good place
		 * to set a breakpoint */
		FatalError("Segmentation fault!\n");
	}
#endif

	if ( !registersLocked ) {
		hwMsg( 10, "RegisterAccessSignalHandler() without registersLocked\n" );
		FatalError("RegisterAccessSignalHandler() without registersLocked\n");
	}
		
	/* someone has tried to access hardware registers, so make
	   sure dma is completed */
	hwMsg( 10, "RegisterAccessSignalHandler()\n" );
	mgaglx.c_signals++;
	mgaWaitForDmaCompletion();
	hwMsg( 10, "Leaving RASH()\n" );
}

static void LockRegisters( void ) {
	/* cause a SIGSEGV if the X server tries to write a hardware register */
	mprotect( GLXSYM(MGAMMIOBase), 0x3000, /* PROT_NONE */ PROT_READ );
#ifndef __FreeBSD__
	signal( SIGSEGV, (void (*)(int))RegisterAccessSignalHandler );
#else
	signal( SIGBUS, RegisterAccessSignalHandler );
#endif
	registersLocked = 1;
}

/*
 * mgaFlushRealDma
 */
static void mgaFlushRealDma( void ) {
	int		count;
	hwUI32	dmaEnd;

	if ( mgaglx.skipDma ) {
		return;
	}
	hwMsg( 11, "mgaFlushRealDma()\n" );

	/* make sure any write combining data is flushed */
	FlushWriteCombining();

	/* program the physical registers to start dma going */
	count = dma_buffer->primaryDwords;
	dmaEnd = dma_buffer->physicalAddress + count*4;

	OUTREG( MGAREG_PRIMADDRESS, dma_buffer->physicalAddress );
	OUTREG( MGAREG_PRIMEND, dmaEnd | mgaglx.use_agp);

	if (MGA_DEBUG & MGA_DEBUG_OUTREG)       
		fprintf(stderr, 
			"Flush 0x%x..0x%x\n",
			dma_buffer->physicalAddress,
			dmaEnd);


	/* if we are going to run async, throw a signal if X tries to use
	the hardware registers */
	if ( mgaglx.dmaDriver == 3 ) {
		LockRegisters();
	}
}

/*
 * mgaDmaFlush
 * Send all pending commands off to the hardware.
 * If we are running async, the hardware will be drawing
 * while we return to do other things.
 */
void mgaServerDmaFlush( int wait ) {
	int		start, end;

	/* if the buffer doesn't contain any data, just change in place */
	if ( !dma_buffer->primaryDwords ) {
		if (wait) {
			mgaWaitForDmaCompletion();
		}

		mgaDmaResetBuffer();
		return;
	}

	if ( dma_buffer->virtualAddress[0] == 0x92929292 ) {
		hwMsg(1,"mgaServerDmaFlush: Not flushing uninitialized buffer\n");
		return;
	}

	mgaglx.c_dmaFlush++;
	

	/* Add the commands at the end of the buffer to go back to
	 * drawing on the front buffer the way the X server expects.
	 * These commands aren't added with the normal macros, because that
	 * might trigger a recursive overflow.  Enough space should have
	 * been left for all this after any overflow check.
	 *
	 * DEBUG_ALWAYS_SYNC requires X server acceleration be turned off,
	 * so don't do this.
	 */
	if ((MGA_DEBUG&MGA_DEBUG_ALWAYS_SYNC) == 0) {
		memcpy(dma_buffer->virtualAddress + dma_buffer->primaryDwords, 
		       mgaFrontBuffer->Setup, 
		       sizeof(hwUI32) * mgaFrontBuffer->SetupSize);
		dma_buffer->primaryDwords += mgaFrontBuffer->SetupSize;
	}

	/* add a draw sync command at the end so we can tell when dma is done */
	dma_buffer->virtualAddress[ dma_buffer->primaryDwords ] =
		MGA_ADDRGEN( MGAREG_DMAPAD, MGAREG_DMAPAD, MGAREG_DMAPAD, 
			     MGAREG_DWGSYNC );
	dma_buffer->virtualAddress[ dma_buffer->primaryDwords + 1 ] = 0;
	dma_buffer->virtualAddress[ dma_buffer->primaryDwords + 2 ] = 0;
	dma_buffer->virtualAddress[ dma_buffer->primaryDwords + 3 ] = 0;
	dma_buffer->virtualAddress[ dma_buffer->primaryDwords + 4 ] = 0;
	
	dma_buffer->primaryDwords += 5;
	
	/* if we overran the buffers, there is buggy code! */
	if ( dma_buffer->primaryDwords > dma_buffer->maxPrimaryDwords ) {
		FatalError( "Primary dma buffer overflowed by %i dwords!", 
		dma_buffer->primaryDwords - dma_buffer->maxPrimaryDwords );
	}
	if ( dma_buffer->secondaryDwords > dma_buffer->maxSecondaryDwords ) {
		FatalError( "Secondary dma buffer overflowed by %i dwords!",
		dma_buffer->secondaryDwords - dma_buffer->maxSecondaryDwords );
	}
	
	/* completely skip doing the dma if we are on another VT */
	if ( GLXSYM(xf86VTSema) ) {
		/* we are going to intentionally touch the registers, so don't
		 * trip the signal handler
		 */
		if ( registersLocked ) {
			UnlockRegisters();
		}

		/* wait for the last buffer to complete */
		if ( !mgaWaitForDmaCompletion() ) {
			mgaglx.hardwareWentIdle = 1;
		} else {
			mgaglx.hardwareWentIdle = 0;
		}

		MgaSetSyncBusy();

		/* collect timing information if we are going syncronously */
		if ( mgaglx.dmaDriver != 3 ) {
 			start = usec();
 		} else {
 			start = end = 0;
 		}
 	
 	
	 	/* actually send the data to the card */
	 	if ( mgaglx.dmaDriver < 2 ) {
		 	mgaFlushPseudoDma();
		 } else {
	 		mgaFlushRealDma();
			if ( mgaglx.dmaDriver == 2 || wait ) {
				/* wait until the dma completes */
				mgaWaitForDmaCompletion();
			}
	 	}
 	
	 	if ( mgaglx.dmaDriver != 3 ) {
			end = usec();
		}

	} else {
		start = end = 0;
	}
 

	hwMsg(9, "flushmode %i, buffer %i: prim dwords:%i  sec dwords:%i  usec:%i\n", 
		mgaglx.dmaDriver,  mgaActiveDmaBuffer,
		dma_buffer->primaryDwords,
		dma_buffer->secondaryDwords, end - start );	

	/* swap to using the other buffer */
	mgaActiveDmaBuffer ^= 1;

	mgaDmaResetBuffer();	
}

/*
 * a finish can be caused by a software fallback inside a
 * renderStart/renderFinish or due to a vertex overflow,
 * so we must be able to handle the warp commands here.
 */
static void FlushOrFinishWithOverflowCheck( int finish ) {
	int	wasInsideRenderStart;
 
	wasInsideRenderStart = ( mgaglx.warp_seriesStart != NULL );

	if ( wasInsideRenderStart ) {
		int	oldOverflow;
		
		/* temporarily remove the overflow padding so the WARP
		 * secondary dma can be started */
		oldOverflow = dma_buffer->primaryOverflowPoint;
		dma_buffer->primaryOverflowPoint = dma_buffer->maxPrimaryDwords;
	 
		/* finish any unsent warp triangles */
		mgaWarpFinishPrimitives();

		/* put the overflow marker back */
		dma_buffer->primaryOverflowPoint = oldOverflow;
	}
	
	mgaDoDmaFlush( finish );

	/* Overflow can happen anywhere, so normal update mechanisms
	 * aren't sufficient.  We can't just call mgaDDUpdateHwState,
	 * because we may have overflowed from inside that (especially
	 * with texture uploads), and the recursion would be Bad.
	 * Note that this needs to be done whether we were inside a 
	 * primitive or not so we can reset the proper rendering state
	 * which is forced to the front buffer in mgaDoDmaFlush
	 */
	if ( mgaDB ) {
		mgaUpdateRegs( ~0 );
	}

	if ( wasInsideRenderStart ) {
		hwMsg( 9, "Resetting mgaWarpStartPrimitives after overflow\n" );
		mgaWarpStartPrimitives();
	} else {
		hwMsg( 9, "Overflow was not inside warp series\n" );
	}
}
 
/*
 * mgaDmaFlush
 */
void mgaDmaFlush( void ) {
	FlushOrFinishWithOverflowCheck( 0 );
}

/*
 * mgaDmaFinish
 */
void mgaDmaFinish( void ) {
	FlushOrFinishWithOverflowCheck( 1 );
}



/*
 * mgaDmaOverflow
 * This is called when MGADMAGETPTR is at the end of the buffer
 */
void mgaDmaOverflow( int newDwords ) {
	static int	recursive;
	
	hwMsg( 9, "mgaDmaOverflow(%i)\n", newDwords );

	/* we are so dead if this happens recursively */
	if ( recursive ) {
		FatalError( "recursive mgaDmaOverflow\n" );
	}
	recursive = 1;
	
	/* flush all the current commands so we will have another
           empty buffer.  If we are direct rendering, this will go to
           the server */
	FlushOrFinishWithOverflowCheck( 0 );

	mgaglx.c_overflows++;

	/* if something was asking for more dwords than an empty buffer
	   holds, we are completely screwed */
	if ( dma_buffer->primaryDwords + newDwords > 
	     dma_buffer->primaryOverflowPoint ) {
 		FatalError("mgaDmaOverflow > maxPrimaryDwords");
	}
	
	recursive = 0;
}

void mgaDmaSecondaryOverflow( int newDwords ) {
	/* if something was asking for more dwords than an empty buffer
	   holds, we are completely screwed */
	if ( newDwords > dma_buffer->maxSecondaryDwords ) {
 		FatalError("mgaDmaSecondaryOverflow > maxPrimaryDwords");
	}

	mgaDmaOverflow( 0 );	/* no check on primary */
}


void dmaRegisterOverrun( void ) {
	/* this is only when I am adding debugging code to the DMA macros */
	FatalError( "dmaRegisterOverrun\n" );
}


/*
 * mgaWaitDrawingEngine
 * This will not return until the drawing engine has completed
 * drawing pixels and it is safe to read or write the framebuffer
 * for software rendering.
 */
int mgaWaitDrawingEngine( void ) {
	/* note this for the performance block display */
	mgaglx.c_drawWaits++;

	/* make sure all pending dma has completed */
	mgaDmaFinish();

	return 0;
}



/* secondary dma is for ILOAD and VERTEX transfers, and can be properly
   simulated with pseudo dma if needed.  The data pointer should be from
   a previous mgaAllocSecondaryBuffer() */
void mgaSecondaryDma( transferType_t transferType, hwUI32 *data, int dwords ) {
	int		base;
	int		offset;
	DMALOCALS;

	if ( data < 
	     dma_buffer->virtualAddress + dma_buffer->maxPrimaryDwords ) {
		FatalError( "mgaSecondaryDma error: below start\n" );
	}
	
	if ( dwords <= 0 ) {
		FatalError( "mgaSecondaryDma error: dwords <= 0\n" );
	}

	if ( transferType & ~3 ) {
		FatalError( "mgaSecondaryDma error: bad transferType\n" );
	}
	
	if ( data - dma_buffer->virtualAddress + dwords > 
	     dma_buffer->maxSecondaryDwords + dma_buffer->maxPrimaryDwords ) {
		FatalError( "mgaSecondaryDma error: past end\n" );
	}
	
	/* add dma commands to start a secondary dma channel */

	offset = (char *)data - (char *)dma_buffer->virtualAddress;
	hwMsg( 9, "mgaSecondaryDma: %i, %i\n", offset, dwords * 4 );
	
	base = dma_buffer->physicalAddress + offset;
	
	/* this cannot be allowed to overflow, because the secondary
	buffer has already been allocated in this block */
	MGADMAGETPTR_NO_OVERFLOW(5);

	DMAOUTREG( MGAREG_SECADDRESS, ( base | transferType ) );
	DMAOUTREG( MGAREG_SECEND, ( ( base + dwords*4 ) | mgaglx.use_agp ) );

	/* don't pad the extra registers, because when secondary dma ends,
	it will always fetch a new quad */
	DMAADVANCECHOP();
	
	/* now check for overflow */
	if( dma_buffer->primaryDwords > dma_buffer->primaryOverflowPoint ) {
		mgaDmaOverflow( 0 );
	}
}



void mgaSetupDma( hwUI32 *data, int dwords ) 
{
	int		base;
	int		offset;
	DMALOCALS;

	if ( data < dma_buffer->virtualAddress + dma_buffer->maxPrimaryDwords )
		FatalError( "mgaSetupDma error: below start\n" );
	
	if ( dwords <= 0 ) 
		FatalError( "mgaSetupDma error: dwords <= 0\n" );
	
	if ( data - dma_buffer->virtualAddress + dwords > 
	     dma_buffer->maxSecondaryDwords + dma_buffer->maxPrimaryDwords ) {
		FatalError( "mgaSetupDma error: past end\n" );
	}	

	offset = (char *)data - (char *)dma_buffer->virtualAddress;
	
	base = dma_buffer->physicalAddress + offset;

	if (MGA_DEBUG&MGA_DEBUG_OUTREG)
		fprintf(stderr, 
			"mgaSetupDma: virt %p, offset %x, phys %x, bytes %i\n",
			data, offset, base, dwords * 4 );

	
	/* this cannot be allowed to overflow, because the setup
	buffer has already been allocated in this block */
	MGADMAGETPTR_NO_OVERFLOW(5);

	DMAOUTREG( MGAREG_SETUPADDRESS, ( base | SETADD_mode_vertlist ) );
	DMAOUTREG( MGAREG_SETUPEND, ( ( base + dwords*4 ) | mgaglx.use_agp ) );

	/* don't pad the extra registers, because when secondary dma ends,
	it will always fetch a new quad */
	DMAADVANCECHOP();
	
	/* now check for overflow */
	if( dma_buffer->primaryDwords > dma_buffer->primaryOverflowPoint ) 
		mgaDmaOverflow( 0 );	
}


/*
 * mgaAllocPrimaryAndSecondaryBuffer
 */
hwUI32 *mgaAllocPrimaryAndSecondaryBuffer( int primaryDwords, 
					    int secondaryDwords ) {
	/* if we won't be able to allocate enough of either kind, overflow now */
	if ( dma_buffer->secondaryDwords + secondaryDwords > 
	     dma_buffer->maxSecondaryDwords ) {
		mgaDmaSecondaryOverflow( secondaryDwords );
	}
	if ( dma_buffer->primaryDwords + primaryDwords > 
	     dma_buffer->primaryOverflowPoint ) {
		mgaDmaOverflow( primaryDwords );
	}
	
	return mgaAllocSecondaryBuffer( secondaryDwords );
}


/*
 * mgaFlushPseudoDma
 * Hand feed a dma buffer to the card, simulating secondary dma as needed.
 * This will parse the buffer even if skipdma is set, allowing safe
 * range checking.
 */
static void mgaFlushPseudoDma( void ) {
	hwUI32	save;
	hwUI32	*src, *p;
	int	i, j;
	int	mode;
	int	dwords;
	int	count;

	count = dma_buffer->primaryDwords;

	hwMsg( 20, "primary pseudoDma: %i dwords\n", count );
	
	mgaglx.hardwareWentIdle = 1;
	
	if ( !mgaglx.skipDma ) {
		save = INREG(MGAREG_OPMODE);
		OUTREG( MGAREG_OPMODE,(save & OM_dmamod_MASK) | (TT_GENERAL<<2) );
	}
  
	p = pseudoDmaVirtual;
	
  	src = dma_buffer->virtualAddress;
	for ( i = 0 ; i < count ; ) {
		src = dma_buffer->virtualAddress + i;
		
		/* check for a secondary dma start */
		if ( *(hwUI8 *)src == ADRINDEX(MGAREG_SECADDRESS) ) {
			hwUI32	start, end;
			
			start = src[1];
			end = src[2];

			mode = start & 3;
			start = ( start & ~3 ) - dma_buffer->physicalAddress;
			end = ( end & ~3 ) - dma_buffer->physicalAddress;
			
			if ( ( start >> 2 ) < dma_buffer->maxPrimaryDwords ) {
				FatalError( "mgaFlushPseudoDma: start before buffer: %i", start );
			}
						
			if ( ( end >> 2 ) > dma_buffer->maxSecondaryDwords + dma_buffer->maxPrimaryDwords ) {
				FatalError( "mgaFlushPseudoDma: end after buffer: %i", end );
			}
			
			if ( start >= end ) {
				FatalError( "mgaFlushPseudoDma: start >= end: %i, %i", start, end );
			}
			
			dwords = ( end - start ) >> 2;
			
			src = dma_buffer->virtualAddress + ( start >> 2 );
			
			hwMsg( 20, "secondary pseudoDma: %i dwords, mode %i\n", dwords, mode );

			if ( !mgaglx.skipDma ) {			
				OUTREG( MGAREG_OPMODE,(save & OM_dmamod_MASK) | (mode<<2) );

				for ( j = 0 ; j < dwords ; j++ ) {
					p[j] = src[j];
				}
					
				/* we will fetch a new general index word immediately */
				OUTREG( MGAREG_OPMODE,(save & OM_dmamod_MASK) | (TT_GENERAL<<2) );
			}
			i += 3;
			continue;
		}
		
		if ( !mgaglx.skipDma ) {
			p[0] = src[0];
			p[1] = src[1];
			p[2] = src[2];
			p[3] = src[3];
			p[4] = src[4];
		}
		
		i += 5;
		if ( i > count ) {
			FatalError( "mgaFlushPseudoDma: didn't end with a full quad" );
		}
	}

	if ( !mgaglx.skipDma ) {
		OUTREG(MGAREG_OPMODE,save);
	}
}



/* Setup DMA needs to get the physical address of the start of the
 * region returned.  The vertex data will be placed in the first
 * section of the buffer, followed directly by the elements (which are
 * really just pointers into the vertex data).  Only the elements will
 * be sent by the driver as dma - the vertices will be fetched in turn
 * by the card (I think).  
 *
 * Maybe we should be putting the vertices directly into card
 * memory to cut down host->card traffic...
 *
 * JDC: I probably broke this when I rearranged the warp code
 */
hwUI32	*mgaAllocSetupBuffer( int dwords, hwUI32 *phys_start ) 
{
	hwUI32	*buf, start;

	if (mgaCtx->new_state) 
		FatalError("mgaAllocSetupBuffer: mgaCtx->new_state == %x\n",
			   mgaCtx->new_state);

	/* make sure there is room */	
	if ( dma_buffer->secondaryDwords + dwords + 7 > 
	     dma_buffer->maxSecondaryDwords ) 
		mgaDmaSecondaryOverflow( dwords + 7 );
       	
	start = dma_buffer->maxPrimaryDwords + dma_buffer->secondaryDwords;

	if (start & 3) {
		int bump = 4 - (start & 3);
		dma_buffer->secondaryDwords += bump;
		start += bump;
	}

	if (dwords & 3) {
		int bump = 4 - (dwords & 3);
		dwords += bump;
	}

	*phys_start = dma_buffer->physicalAddress + start * 4;
	buf = dma_buffer->virtualAddress + start;
	dma_buffer->secondaryDwords += dwords;
	 
	if (MGA_DEBUG&MGA_DEBUG_OUTREG)
		fprintf(stderr, "allocated setup buffer, %d dwords, "
			"start %x (phys %x, virt %p)\n",
			dwords, start, *phys_start, buf);
	
//	mgaglx.warp_serieStart = buf + dwords;
	return buf;
}

