/* -*- mode: C; c-basic-offset:8 -*- */

/*
 * GLX Hardware Device Driver for Matrox G200/G400
 * Copyright (C) 1999 Jeff Hartmann
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * JEFF HARTMANN, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
 * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 *
 * original by Jeff Hartmann <slicer@ionet.net>
 * 6/16/99: rewrite by John Carmack <johnc@idsoftware.com>
 */

/*

This file is only entered at startup.  After mgaGlxInit completes,
nothing here will be executed again.

*/

#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/mman.h>
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <signal.h>

#include "context.h"
#include "depth.h"
#include "macros.h"
#include "texstate.h"
#include "triangle.h"
#include "vb.h"
#include "types.h"

#include "xsmesaP.h"
#include "glx_log.h"
#include "glx_config.h"

#include "mesaglx/context.h"
#include "mesaglx/matrix.h"
#include "mesaglx/types.h"

#define GC XXGC
#include "gcstruct.h"
#include "pixmapstr.h"
#include "servermd.h" /* PixmapBytePad */
#include "scrnintstr.h"
#include "regionstr.h"
#include "windowstr.h"
#undef GC

#include "hw_misc.h"
#include "mm.h"
#include "mgadd.h"
#include "g200_mac.h"
#include "mgalib.h"
#include "hwlog.h"
#include "mgadirect.h"
#include "mgawarp.h"
#include "mgaglx.h"
#include "hw_mtrr.h"

#ifdef HAVE_LINUX_NEWAGP
/* AGP kernel module interface */
#include <linux/agpgart.h>
#include <sys/ioctl.h>
#endif


#if defined(USE_X86_ASM)
#include "X86/common_x86asm.h"
#endif


/*
  Configuration values from config file:

mga_dma=0	: virtual buffer, pseudo dma
mga_dma=1	: physical buffer, pseudo dma
mga_dma=2	: physical buffer, real dma
mga_dma=3	: physical buffer, real dma, async

mga_dmaadr=92	: put the physical buffer at offset 92 * 0x100000 
			: use mga_dmaadr=AGP to use agp memory
mga_dmasize=4	: use 4 * 0x100000 bytes of memory for the buffers

mga_cmdsize=4	: use 4 * 0x100000 bytes for commands, the rest for textures

mga_cardcmds=1	: put command buffers on card memory ( DOESN'T WORK! )
mga_systemtexture = 1	: put textures in main pci/agp memory instead of on card

*/

memHeap_t	*cardHeap;
hwUI32		cardPhysical;
unsigned char	*cardVirtual;

memHeap_t	*textureHeap;
hwUI32		textureHeapPhysical;	/* 0 if we aren't using system memory texturing */
unsigned char	*textureHeapVirtual;	/* correct for either local or PCI heaps */

memHeap_t	*sysmemHeap;		/* physical memory, if available */
hwUI32		sysmemBytes;		/* size of memory block */
hwUI32		sysmemPhysical;		/* 0 if we don't have a physical mapping */
unsigned char	*sysmemVirtual;

hwUI32	*pseudoDmaVirtual;		/* our local mapping of pdma window */


#ifdef HAVE_LINUX_NEWAGP
/* This is way to simple for now, we should probably 
 * write better support for the new agp module
 * but this is okay for now I suppose;
 */
int    gartfd;
void   *gartbuf;

#define MAX_AGP_KEYS 128
agp_info agpinfo;
agp_allocate agpentries[MAX_AGP_KEYS];
#define GLX_AGPDEVICE		"/dev/agpgart"
#define GLX_AGPINFO_IOCTL	AGPIOC_INFO
#endif


/* private vars */
mgaDma_buffer	*dmaBuffers[2];


static	hwUI32	bufferBytes;			/* size of buffer */
static	hwUI32	bufferPhysical;		/* 0 if we don't have a physical mapping */
static	unsigned char	*bufferVirtual;

void mgaDmaResetBuffer( void );
void MgaSetSyncBusy( void );


/*
 * MapPseudoDmaWindow
 */
static void MapPseudoDmaWindow( void ) {
	hwUI32	pseudoDmaPhysical;	

	pseudoDmaPhysical =  (pcibusRead(GLXSYM(MGAPciTag), 0x00000018)) & 0xff800000;
	pseudoDmaVirtual = 
		 xf86MapVidMem(GLXSYM(vga256InfoRec).scrnIndex, EXTENDED_REGION,
			   (pointer) ((unsigned long) pseudoDmaPhysical),
			   0x800000);
	hwMsg( 1, "pseudoDmaPhysical : %p\n", pseudoDmaPhysical );
	hwMsg( 1, "pseudoDmaVirtual : %p\n", pseudoDmaVirtual );
}


static void DmaBenchmark( unsigned char *bufferVirtual, unsigned int physical, int dwords ) {
	int		start, end;
	int		mb;
	float		fsec;
	hwUI32	dmaEnd;
	int		i;
	
	for ( i = 0 ; i < dwords-1 ; i++ ) {
		// fill with dmapad
		((int *)bufferVirtual)[i] = 0x15151515;
	}
	
	/* only test a full quad of registers plus dwgsync */
	dwords = 5 * ((dwords-2)/5);
	
	/* add a draw sync command at the end so we can tell when dma is done */
	((int *)bufferVirtual)[ dwords - 5 ] =
		MGA_ADDRGEN( MGAREG_DWGSYNC, MGAREG_DWGSYNC, MGAREG_DWGSYNC, 
			     MGAREG_DWGSYNC );
	((int *)bufferVirtual)[ dwords - 4 ] = 0;
	((int *)bufferVirtual)[ dwords - 3 ] = 0;
	((int *)bufferVirtual)[ dwords - 2 ] = 0;
	((int *)bufferVirtual)[ dwords - 1 ] = 0;
	
	MgaSetSyncBusy();

	/* make sure any write combining data is flushed */
	FlushWriteCombining();

	dmaEnd = physical + dwords*4;                                                
  	start = usec();

	OUTREG( MGAREG_PRIMADDRESS, physical );
	OUTREG( MGAREG_PRIMEND, dmaEnd | mgaglx.use_agp);

	mgaWaitForDmaCompletion();
	
	end = usec();

	fsec = ( end - start ) / 1000000.0;

	mb = ( (float)dwords * 4 / 0x100000 ) / fsec;

	hwMsg( 1, "DmaBenchmark 0x%x bytes, %5.3f sec: %i mb/s\n", dwords*4, fsec, mb );
}

#if 0
/*
 * VisualDmaTest
 * This will scroll a half meg image on the screen using dma
 */
static void VisualDmaTest( void ) {
	int	*dest, *destPtr;
	int	cmd;
	int	i, j, k, r, g, b;
	DMALOCALS;
	int	*test;
	
	test = malloc( 16*1024*1024 );
	
	/* start the primary commands */
	for ( j = 0 ; j < 1280-512 ; j += 5 ) {
	
	dest = mgaAllocSecondaryBuffer( 512*256 );

	/* fill in the secondary buffer */
	destPtr = dest;
        for ( i = 0 ; i < 512 ; i++ ) {
		for ( k = 0 ; k < 256 ; k++ ) {
			int	pix;
			
			r = i * 255 / 512;
			g = k * 255 / 256;
			b = 128;
			pix = MGAPACKCOLOR565(r,g,b) |  
				( MGAPACKCOLOR565(r,g,b) << 16 );
			*destPtr++ = pix;
		}
	}

	
	/* XY destination, linear iload */
	cmd = DC_opcod_iload | 		/* image load */
	 	DC_atype_rpl |			/* raster replace mode */
		DC_linear_linear | 		/* linear source */
		DC_bltmod_bfcol |		/* source data is pre-formatted color */
		(0xC << DC_bop_SHIFT)  | 	/* use source bit op */
	 	DC_sgnzero_enable |		/* normal scanning direction */
	 	DC_shftzero_enable |	/* required for iload */
	 	DC_clipdis_enable;		/* don't use the clip rect */

	for ( i = 0 ; i < 512 ; i+=20 ) {
		MGADMAGETPTR( 20 );

		MGADMA_YDSTLEN( i, 512 );	/* top to bottom */
		MGADMA_FXBNDRY( j, 511+j );	/* full width */

		DMAOUTREG( MGAREG_AR0, 512 * 512 - 1);	/* source pixel count */
		DMAOUTREG( MGAREG_AR3, 0 );			/* required */
	
		/* pad if needed so the exec is at the end of a quad */
		DMAOUTREG( MGAREG_DMAPAD, 0 );
		DMAOUTREG( MGAREG_DMAPAD, 0 );
		DMAOUTREG( MGAREG_DMAPAD, 0 );
		
		MGADMA_DWGCTL_EXEC(cmd);
	
		DMAADVANCE();

		/* send the secondary data */	
		mgaSecondaryDma( TT_BLIT, dest, 512*256 );
	}
	
	mgaDmaFlush();
	
		/* pound on memory some */
		for ( i = 0 ; i < 4*1024*1024 ; i++ ) {
			test[i] = test[(i+2*1024*1024)&(4*1024*1024-1)];
		}
	}

	free( test );
}
#endif

static void AllocatePhysicalDmaBuffer( void ) {
	PMemBlock	block;
	
	/* determine total size of buffer */
	bufferBytes = mgaglx.cmdSize;
	if ( !bufferBytes ) {
		hwMsg(1,"defaulting to mga_cmdsize = 4\n" );
		bufferBytes = 4;
	} else {
		hwMsg(1,"using mga_cmdsize = %i\n", bufferBytes );
	}
	bufferBytes *= 0x100000;

	if(bufferBytes == sysmemBytes){
		/* Well, we have to have room for the commands AND the warp pipe, so shrink
		 * this by just enough
		 */
		bufferBytes -= mgaWarpPipeSize();
		hwMsg( 1, "Shrinking mga_cmdsize with %d bytes to make room for warp pipes\n"
		        , mgaWarpPipeSize());
	}
		
	block = mmAllocMem( sysmemHeap, bufferBytes, 8, 0 );
	if ( !block ) {
		hwMsg( 1, "failed to allocate 0x%x bytes from sysmemHeap for command buffers.\n"
			, bufferBytes );
		return;
	}
	hwMsg( 1, "allocated 0x%x bytes from sysmemHeap for command buffers.\n"
		, bufferBytes );
	bufferVirtual = sysmemVirtual + mmOffset( block );
	bufferPhysical = sysmemPhysical + mmOffset( block );
}	

static void AllocateVirtualDmaBuffer( void ) {
	/* determine total size of buffer */
	bufferBytes = mgaglx.cmdSize;
	if ( !bufferBytes ) {
		hwMsg(1,"defaulting to mga_cmdsize = 4\n" );
		bufferBytes = 4;
	} else {
		hwMsg(1,"using mga_cmdsize = %i\n", bufferBytes );
	}
	bufferBytes *= 0x100000;
	bufferVirtual = malloc( bufferBytes + 0x1000);
	/* align it to page size, might help on something used as much as this */
	bufferVirtual = (pointer)(((unsigned long) bufferVirtual & ~0xFFF) + 0x1000);
	hwMsg( 1, "allocated 0x%x bytes from virtual memory for command buffers.\n"
		, bufferBytes );
}

static void OptimizeDMA( void ) {
	int		option;

	/* turn on enhmemacc */
	option = pcibusRead( GLXSYM(MGAPciTag), 0x40 );
	pcibusWrite(GLXSYM(MGAPciTag), 0x00000040, option | (1<<22) );
}


static int IsSDRAM( void ) {
  	int		option;

	option = pcibusRead( GLXSYM(MGAPciTag), 0x40 );
	if ( option & ( 1<<14 ) ) {
		hwMsg(1,"    SGRAM features enabled\n" );
	  	return 0;
	}
  	return 1;
}

  

/*
 * AllocateCommandBuffers
 * The dma command buffers can be either virtual or in the sysmemHeap
 * Some fraction of the buffer will be used for primary dma, and the rest
 * will be for secondary dma.
 */
#define	PRIMARY_FRACTION	8
#define	OVERFLOW_DWORDS		40	/* a warp secondary dma, X server registers, dwgsync, etc */
static void AllocateCommandBuffers( void ) {
	/* try to allocate the command buffers in sysmem */
	if ( mgaglx.dmaDriver > 0 ) {
		if ( sysmemHeap ) {
			AllocatePhysicalDmaBuffer();
		}
	}
		
	/* if we didn't get real memory, get a virtual buffer and use PDMA */
	if ( !bufferPhysical ) {
		mgaglx.dmaDriver = 0;
		AllocateVirtualDmaBuffer();
	}
	
	if (__glx_is_server) {
		/* benchmark the writing speed to the command buffer */
		MemoryBenchmark( bufferVirtual, bufferBytes / 4 );
		MemoryBenchmark( bufferVirtual, bufferBytes / 4 );

		/* benchmark the read speed of the card's dma */
		if ( mgaglx.dmaDriver >= 2 ) {
			DmaBenchmark( bufferVirtual, (unsigned int)bufferPhysical, bufferBytes / 4 );
	 		DmaBenchmark( bufferVirtual, (unsigned int)bufferPhysical, bufferBytes / 4 );
	 		DmaBenchmark( bufferVirtual, (unsigned int)bufferPhysical, bufferBytes / 4 );
	 		DmaBenchmark( bufferVirtual, (unsigned int)bufferPhysical, bufferBytes / 4 );
	 		DmaBenchmark( bufferVirtual, (unsigned int)bufferPhysical, bufferBytes / 4 );
	 		if ( !mgaglx.use_agp ) {
				OptimizeDMA();
		 		DmaBenchmark( bufferVirtual, (unsigned int)bufferPhysical, bufferBytes / 4 );
		 		DmaBenchmark( bufferVirtual, (unsigned int)bufferPhysical, bufferBytes / 4 );
		 		DmaBenchmark( bufferVirtual, (unsigned int)bufferPhysical, bufferBytes / 4 );
		 		DmaBenchmark( bufferVirtual, (unsigned int)bufferPhysical, bufferBytes / 4 );
		 		DmaBenchmark( bufferVirtual, (unsigned int)bufferPhysical, bufferBytes / 4 );
		 	}
		}
	}

	/* always leave enough room for a X server setup and DWGSYNC after overflow checks */

	/* setup the two buffers that will be ping-ponged */
	dmaBuffers[0] = malloc(sizeof(mgaDma_buffer));
	memset(dmaBuffers[0], '\0', sizeof(mgaDma_buffer));
	dmaBuffers[0]->virtualAddress = (hwUI32 *)bufferVirtual;
	dmaBuffers[0]->physicalAddress = bufferPhysical;
	dmaBuffers[0]->maxPrimaryDwords = ( bufferBytes >> 3 ) / PRIMARY_FRACTION;
	dmaBuffers[0]->primaryOverflowPoint = dmaBuffers[0]->maxPrimaryDwords - OVERFLOW_DWORDS;
	dmaBuffers[0]->maxSecondaryDwords = ( bufferBytes >> 3 ) - dmaBuffers[0]->maxPrimaryDwords;
	
	dmaBuffers[1] = malloc(sizeof(mgaDma_buffer));
	memset(dmaBuffers[1], '\0', sizeof(mgaDma_buffer));
	dmaBuffers[1]->virtualAddress = (hwUI32 *)bufferVirtual + bufferBytes/8;
	dmaBuffers[1]->physicalAddress = bufferPhysical + bufferBytes/2;
	dmaBuffers[1]->maxPrimaryDwords = ( bufferBytes >> 3 ) / PRIMARY_FRACTION;
	dmaBuffers[1]->primaryOverflowPoint = dmaBuffers[1]->maxPrimaryDwords - OVERFLOW_DWORDS;
	dmaBuffers[1]->maxSecondaryDwords = ( bufferBytes >> 3 ) - dmaBuffers[0]->maxPrimaryDwords;

	hwMsg( 1, "dmaBuffers[]->maxPrimaryDwords = %i\n", dmaBuffers[0]->maxPrimaryDwords );
	hwMsg( 1, "dmaBuffers[]->maxSecondaryDwords = %i\n", dmaBuffers[0]->maxSecondaryDwords );
	
	mgaDmaResetBuffer();
}

void CloseGART( void )
{
	// this will be called when the direct rendering client closes down
#ifdef HAVE_LINUX_NEWAGP
	if ( gartfd > 0 ) {
	   close(gartfd);
	   gartfd = -1;
	}	   
	if ( gartbuf && gartbuf != MAP_FAILED ) {
		munmap( gartbuf, agpinfo.aper_size * 0x100000 );
		gartbuf = NULL;
	}
#endif
}

/*
 * AllocateGARTMemory
 */
static int AllocateGARTMemory(size_t size)
{
#ifdef HAVE_LINUX_NEWAGP
   int i, j, k, m, pages = (size + 4095) / 4096;
	int mode_mask;
   
   gartfd = open(GLX_AGPDEVICE, O_RDWR);
   if (gartfd == -1)
     {
	hwMsg(1, "unable to open " GLX_AGPDEVICE ": %s\n",
	       sys_errlist[errno]);
	return -1;
     }

   if (ioctl(gartfd, AGPIOC_ACQUIRE) != 0) {
	   hwMsg(1, "error acquiring agp module: %s\n", sys_errlist[errno]);
	   CloseGART();
	   return -1;
   }

   if (ioctl(gartfd, AGPIOC_INFO, &agpinfo) != 0) {
	   hwMsg(1, "error doing AGP info ioctl: %s\n", sys_errlist[errno]);
	   hwMsg(1, "first attempt\n");
	   CloseGART();
	   return -1;
   }

   gartbuf = mmap(NULL, agpinfo.aper_size * 0x100000, PROT_READ | PROT_WRITE, MAP_SHARED, gartfd, 0);
   if (gartbuf == MAP_FAILED) {
	   hwMsg(1, "mmap() on " GLX_AGPDEVICE " failed: %s\n",
		  sys_errlist[errno]);
	   CloseGART();
	   return -1;
   }
   
   if (__glx_is_server) {
	agp_setup modesetup;
	
	/* This should be table driven for what
	 * agp mode registers we know to work.
	 * Currently it just sets whatever is
	 * there which is not right.
	 */
	
	if (!(mode_mask = glx_getint_secure("mga_gart_mode_mask"))) {
     hwMsg(1, "no mga_gart_mode_mask defined: using mode 1\n");
	  mode_mask = 1;
   }

	modesetup.agp_mode = (agpinfo.agp_mode & ~7) | (agpinfo.agp_mode & mode_mask);
	
	if(ioctl(gartfd, AGPIOC_SETUP, &modesetup) != 0)
          {
             hwMsg(1, "Error initializing AGP point to point connection\n");
  	     CloseGART();
             return -1;
          }

        /* Call information function a second time for the agp mode */
        if (ioctl(gartfd, AGPIOC_INFO, &agpinfo) != 0) {
		hwMsg(1, "error doing AGP info ioctl: %s\n",
		       sys_errlist[errno]);
		hwMsg(1, "second attempt\n");
		CloseGART();
		return -1;
	}
        if ((agpinfo.agp_mode & 0x00000002 & mode_mask) && !mgaglx.isG400 ) {
             hwMsg(1, "enabling agp 2x pll encoding\n");
             OUTREG(MGAREG_AGP_PLL, AGP_PLL_agp2xpllen_enable);
          }
	if ( mgaglx.isG400 )
	  {
	     i = INREG(MGAREG_CFG_OR);
	     if (i & CFG_OR_comp_or_enable)
	       { hwMsg(1, "compensation override\n") }
	     else
	       hwMsg(1, "internal compensation logic will be used\n");
	     
	     j = MGA_GET_FIELD(CFG_OR_compfreq, i);
	     	     
	     hwMsg(1, "CFG_OR : 0x%x\n", i);
	     switch (j)
	       {
		case 0:
		  hwMsg(1, "AGP Compensation Frequency: (%i) 16 msec\n", j);
		  break;
		case 1:
		  hwMsg(1, "AGP Compensation Frequency: (%i) 31 msec\n", j);
		  break;
		case 2:
		  hwMsg(1, "AGP Compensation Frequency: (%i) 63 msec\n", j);
		  break;
		case 3:
		  hwMsg(1, "AGP Compensation Frequency: (%i) 0.125 sec\n", j);
		  break;
		case 4:
		  hwMsg(1, "AGP Compensation Frequency: (%i) 0.25 sec\n", j);
		  break;
		case 5:
		  hwMsg(1, "AGP Compensation Frequency: (%i) 0.5 sec\n", j);
		  break;
		case 6:
		  hwMsg(1, "AGP Compensation Frequency: (%i) 1 sec\n", j);
		  break;
		case 7:
		  hwMsg(1, "AGP Compensation Frequency: (%i) 2 sec\n", j);
		  break;
		default:
		  hwMsg(1, "AGP Compensation Frequency: (%i) Unknown!\n", j);
	       }
	     hwMsg(1, "AGP Compensation Value: high=%i low=%i\n", MGA_GET_FIELD(CFG_OR_comporup, i), MGA_GET_FIELD(CFG_OR_compordn, i));
	  }
     }
   else
     {
	return 0;
     }

   i = pages / 1024;
   j = pages % 1024;
   memset(agpentries, 0, sizeof(agp_allocate) * MAX_AGP_KEYS);
   for (k = 0; k < i; k++) {
	agp_allocate *entry;
	agp_bind bind;
	
	entry = agpentries + k;
	entry->pg_count = 1024;
	entry->type = 0;

        if (ioctl(gartfd, AGPIOC_ALLOCATE, entry)) {
             /* free previous pages */
	     
             for(m = 0; m < k; m++) {
		  int key = agpentries[m].key;

		  ioctl(gartfd, AGPIOC_DEALLOCATE, key);
	     }
	     
             hwMsg(1, "AGPGART: allocation of %i pages failed\n", pages);
	     return -1;
	}
	
	bind.key = entry->key;
	bind.pg_start = k * 1024;
	
	if (ioctl(gartfd, AGPIOC_BIND, &bind)) {
	     for (m = 0; m < k; m++) {
		     int key;

		     key = agpentries[m].key;
		     ioctl(gartfd, AGPIOC_DEALLOCATE, key);
	     }

	     hwMsg(1, "AGPGART: bind of %i pages failed\n", pages);
	     return -1;
	}
   }
   if (j != 0) {
	agp_allocate *entry;
	agp_bind bind;

	entry = agpentries + i;
	entry->pg_count = j;
	entry->type = 0;
	
        if (ioctl(gartfd, AGPIOC_ALLOCATE, entry)) {
		/* free previous pages */
		for(m = 0; m < i; m++) {
			int key = agpentries[m].key;

			ioctl(gartfd, AGPIOC_DEALLOCATE, key);
		}
	     
		hwMsg(1, "AGPGART: allocation of %i pages failed\n", pages);
		return -1;
	}
	
	bind.pg_start = (i + 1) * 1024;
	bind.key = entry->key;
	
	if (ioctl(gartfd, AGPIOC_BIND, &bind)) {
		for (m = 0; m < i; m++) {
			int key = agpentries[m].key;

			ioctl(gartfd, AGPIOC_DEALLOCATE, key);
		}
	     
		hwMsg(1, "AGPGART:allocate of %i pages failed\n", pages);
		return -1;
	}
   }
   if (ioctl(gartfd, AGPIOC_RELEASE) != 0) {
	   hwMsg(1, "error releasing agp module: %s\n", sys_errlist[errno]);
	   hwMsg(1, "direct rendering will not work.\n");
	   return 0;
   }
#endif /* HAVE_LINUX_NEWAGP */
   return 0;
}


/*
 * AllocateSystemMemory
 * Looks at environment variables to determine if a block
 * of physical memory has been left for graphics after the
 * memory available to the kernel.
 * System memory can be used for dma command buffers or
 * textures.
 */
static void AllocateSystemMemory( void ) {
 	int		fd;
  	char		*adr;
	
	sysmemPhysical = 0;
	sysmemVirtual = 0;
	sysmemHeap = 0;	
	
	if ( !mgaglx.dmaDriver ) {
		return;
	}
	
	/* determine total requested size of buffer */
	sysmemBytes = mgaglx.dmaSize;
	if ( !sysmemBytes ) {
		
#ifdef HAVE_LINUX_NEWAGP
		hwMsg( 1, "Defaulting to mga_dmasize = 4\n" );
		mgaglx.dmaSize = 4;
		sysmemBytes = 4;
#else
		hwMsg(1,"mga_dmasize not set, skipping physical allocation\n" );
		return;
#endif
		
	}
	sysmemBytes *= 0x100000;
	
#ifdef HAVE_LINUX_NEWAGP
	/* try AGP memory */
	adr = glx_getvar_secure( "mga_dmaadr" );
	
	if( !adr ){
		hwMsg(1, "defaulting to mga_dmaadr = agp\n");
	}
	
	if ( !adr || !strcasecmp( adr, "agp" ) ) {
		if (!AllocateGARTMemory(sysmemBytes))
			{
				sysmemPhysical = agpinfo.aper_base;
				sysmemVirtual = (unsigned char *)gartbuf;
				sysmemHeap = mmInit(0, sysmemBytes );
				
				hwMsg( 1, "AGP Aperture: %p\n", sysmemPhysical );
				hwMsg( 1, "sysmemSize: %p\n", sysmemBytes );
				
				mgaglx.use_agp = PDEA_pagpxfer_enable /* | PDEA_primnostart_enable */ ;
				hwMsg(1, "use_agp = %x\n", mgaglx.use_agp);
				
				SetWriteCombining( sysmemPhysical, agpinfo.aper_size * 0x100000);
				
				return;
			}
		hwMsg(1, "AllocateGARTMemory failed.\n" );
		return;
	}
	
#else

  adr = glx_getvar_secure( "mga_dmaadr" );
  if ( adr && !strcasecmp( adr, "agp" )) {
	  hwMsg( 1, "mga_dmaadr = agp isn't supported by this driver\n");
	  return;
  }
	
#endif  /* HAVE_LINUX_NEWAGP */

  /* mgaglx.dmaDriverADR should be set to a value >= the mem= kernel parm */
  sysmemPhysical = mgaglx.dmaAdr;
  sysmemPhysical *= 0x100000;


  /* Check sysmemPhysical against /proc/meminfo */

  if(!checkmemoffset(sysmemPhysical)){
    hwMsg( 1, "unlikely mga_dmaadr=%i, skipping physical allocation\n", mgaglx.dmaAdr );
    return;
  }


  fd = open( "/dev/mem", O_RDWR );
  if ( fd < 0 ) {
    hwMsg( 1, "failed to open /dev/mem\n" );
    return;
  }
  
  sysmemVirtual = (unsigned char *) mmap( NULL, sysmemBytes, PROT_READ | PROT_WRITE,
					  MAP_SHARED, fd, (off_t)sysmemPhysical );
  if ( sysmemVirtual == MAP_FAILED ) {
    hwMsg( 1, "failed to mmap sysmem\n" );
    close( fd );
    return;
  }

  /* FIXME: should verify the memory exists with read / write test */
  SetWriteCombining( sysmemPhysical, sysmemBytes );
  
  /* create a heap */
  sysmemHeap = mmInit(0, sysmemBytes );
  
  hwMsg( 1, "sysmemPhysical: %p\n", sysmemPhysical );
  hwMsg( 1, "sysmemVirtual: %p\n", sysmemVirtual );
  hwMsg( 1, "sysmemSize: %p\n", sysmemBytes );
  
}

/*
 * ChooseTextureHeap
 * Determine if textures should be stored in the cardHeap or the
 * sysmemHeap.
 */
static void ChooseTextureHeap( void ) {
	/* share textures on the card memory until proven otherwise */
	textureHeap = cardHeap;
      	textureHeapVirtual = (unsigned char *)GLXSYM(vgaLinearBase);
      	textureHeapPhysical = 0;

	/* if we don't have a system memory heap, textures MUST be on the card */
	if ( !sysmemHeap ) {
		hwMsg( 1, "No sysmemHeap, textures must be stored on card\n" );
		return;
	}
	
	/* see if we should use the system heap for textures or just dma commands */
	if ( !mgaglx.systemTexture ) {
		hwMsg( 1, "mga_systemtexture not set, textures will be stored on card\n" );
		return;
	}

	/* make sure there is some memory left for textures */
	if ( sysmemBytes < bufferBytes + 1024*1024 ) {
		hwMsg( 1, "sysmemBytes < bufferBytes + 1meg, textures will be stored on card\n" );
		return;
	}
	
	textureHeap = sysmemHeap;
      	textureHeapVirtual = sysmemVirtual;
      	textureHeapPhysical = sysmemPhysical;

	hwMsg( 1, "Texturing from sysmemHeap\n" );
}


/*
 * mgaDmaInit
 *
*/
void mgaDmaInit(void) {

        /* Server init - queries environment variables.  The client
	 * gets these values from the sever and initializes them in
	 * mgadirect.c 
	 */
        mgaglx.use_agp = 0;
    
        if (__glx_is_server) {
		if( glx_getvar_secure( "mga_dma" ) ){
			mgaglx.dmaDriver = glx_getint_secure("mga_dma");
		}else{
			hwMsg( 1, "defaulting to mga_dma = 3\n" );
			mgaglx.dmaDriver = 3; /* default value */
		}
		mgaglx.dmaSize = glx_getint_secure("mga_dmasize");
		mgaglx.dmaAdr = glx_getint_secure("mga_dmaadr");
		mgaglx.cmdSize = glx_getint("mga_cmdsize");
		mgaglx.cardCmds = glx_getint_secure("mga_cardcmds"); /* Secure since it is unstable */
		mgaglx.systemTexture = glx_getint("mga_systemtexture");

		mgaglx.isSdram = IsSDRAM();

#if 0	// we don't write to the framebuffer anymore
		/* set write combining on the framebuffer */
		SetWriteCombining( GLXSYM(xf86AccelInfoRec).ServerInfoRec->physBase,
				   GLXSYM(xf86AccelInfoRec).ServerInfoRec->physSize );
#endif

		/* set up the matrox pdma memory window */	
		MapPseudoDmaWindow();
	}


	if (mgaglx.dmaDriver < 2 && !mgaglx.noSetupDma) {
		hwMsg(1, "mga_dma < 2 -- disabling setup dma\n");
		mgaglx.noSetupDma = 1;
	}


	/* get some system memory and make it write combining if we can */
	AllocateSystemMemory();
       
	/* read the command environment variable */
	hwMsg(1,"mgaDmaInit: mga_dma = %i\n", mgaglx.dmaDriver );

	/* setup the two command buffers in the apropriate memory space */
	AllocateCommandBuffers();
	
	/* check for using a PCI texture heap */
	ChooseTextureHeap();
	
	/* prepare the first buffer for use */
	mgaDmaResetBuffer();
}

/*
 * CreateFrontBuffer
 * Called during initialization to set up parameters needed
 * for the swapbuffers blit.
 */
static mgaBufferPtr CreateFrontBuffer( void )
{
	int Attrib;
	mgaBufferPtr buf;
	int	size;
	int	maccess;
		  
	buf = malloc( sizeof( *buf ) );
	memset( buf, 0, sizeof( *buf ) );
	
	buf->magic = mgaBufferMagic;
	buf->width = GLXSYM(vga256InfoRec).virtualX;
	buf->height = GLXSYM(vga256InfoRec).virtualY;
	buf->pitch = GLXSYM(vga256InfoRec).displayWidth;

	switch( GLXSYM(vga256InfoRec).depth )  {
	case 15:
		Attrib = MGA_PF_555;
		buf->bytesPerPixel = 2;
		maccess = MA_pwidth_16 | MA_dit555_enable | 
		      MA_nodither_enable; /* xf86 default */
		break;
	case 16:
		Attrib = MGA_PF_565;
		buf->bytesPerPixel = 2;
		maccess = MA_pwidth_16 | MA_dit555_enable | 
		      MA_nodither_enable; /* xf86 default */
		break;
	case 24:
		if (GLXSYM(vgaBitsPerPixel) == 24) {
			/* we can't render to this, but we can blit to it */
			Attrib = MGA_PF_888;	
			buf->bytesPerPixel = 3;
			maccess = MA_pwidth_24;	
		} else {
			Attrib = MGA_PF_8888;
			buf->bytesPerPixel = 4;
			maccess = MA_pwidth_32;	
		}
		break;
	default:
		hwError("No support for %d bit depth.\n",GLXSYM(vgaBitsPerPixel));
		return NULL;
	}

	buf->SetupSize = MGA_SETUP_SIZE;
	buf->Setup[0] = MGA_SETUP_0;
	buf->Setup[5] = MGA_SETUP_5;
	buf->Setup[MGA_SETUP_MACCESS] = maccess;
	buf->Setup[MGA_SETUP_PITCH] = buf->pitch;
	buf->Setup[MGA_SETUP_DSTORG] = 0;	
	buf->Setup[MGA_SETUP_PLNWT] = ~0;
	buf->Setup[MGA_SETUP_ZORG] = 0;
	buf->Setup[MGA_SETUP_CXBNDRY] = 0x0fff0000;
	buf->Setup[MGA_SETUP_YTOP] = 0;
	buf->Setup[MGA_SETUP_YBOT] = 0x00ffffff;

	/* also reserve memory X uses for pixmap acceleration */
	size = buf->pitch * buf->height * buf->bytesPerPixel;
	if ( size < GLXSYM(xf86AccelInfoRec).PixmapCacheMemoryEnd ) {
		size = GLXSYM(xf86AccelInfoRec).PixmapCacheMemoryEnd;
	}	
	size = (size + 4095 ) & ~4095;
	buf->backBufferBlock = mmAllocMem( cardHeap,size, 7, 0 );
	
	mmMarkReserved(buf->backBufferBlock);

	return buf;
}



/*
 * This function should only verify that the current hardware is supported.
 * It should do no setup. As we support various Matrox chipsets, perhaps it
 * should return an indicator of which chipset is present.
 */
static GLboolean det_hwGfx() {  
    hwMsg(1,"Detected 0x%x Chip ID\n", GLXSYM(MGAchipset));
    mgaglx.isG200 = MGA_IS_G200(GLXSYM(MGAchipset));
    mgaglx.isG400 = MGA_IS_G400(GLXSYM(MGAchipset));
    
   /* is this the best way check for mga presence? */
   if(!mgaglx.isG200 && !mgaglx.isG400){ 
      hwError("GLXSYM(MGAchipset) not set, no mga hardware?\n");
      return GL_FALSE;
   }

   /* FIXME: implement support for other depths... */
   if(GLXSYM(vga256InfoRec).depth != 15 &&
      GLXSYM(vga256InfoRec).depth != 16 &&
      GLXSYM(vga256InfoRec).depth != 24 ) {
      hwError("Unsupported depth: %d, only 15,16, and 24 bpp are supported right now\n",
	       GLXSYM(vga256InfoRec).depth);
      return GL_FALSE;
   }

   return GL_TRUE;
}

/*
 * mgaInitLogging
 *
 */
void mgaInitLogging( void ) {		
	char	*logName;

	/* open the logfile and set loglevel */
	logName = glx_getvar_secure("hw_logfile");
	if ( __glx_is_server ) { 
		hwOpenLog( logName, "[mga] " );
	} else {
		/* direct rendering clients use a different file
		so they don't stomp on the server's log */
		if ( logName ) {
			char	newName[1024];
			
			strcpy( newName, logName );
			strcat( newName, "_direct" );
			hwOpenLog( newName, "[mga] " );
		}
		else {
		  /* hack, set the module to [mga] in on screen log */
		  hwOpenLog(NULL, "[mga] ");
		}
	}
	if (glx_getvar("hw_loglevel")) {
		hwSetLogLevel(glx_getint("hw_loglevel"));
	} else {
		hwSetLogLevel(DBG_LEVEL_BASE);
	}
}


/*
 * mgaDumpRegisters
 */
void mgaDumpRegisters( void ) {
	int	i, r;
	
	hwMsg(1, "Configuration registers:\n" );
	for ( i = 0 ; i < 256 ; i+=4 ) {
	  	r = pcibusRead( GLXSYM(MGAPciTag), i );
	  	hwMsg(1, "0x%2x : 0x%8x\n", i, r );
	}
	
	hwMsg(1, "Drawing registers:\n" );
	for ( i = 0x1c00 ; i < 0x1dff ; i+= 4 ) {
		r = INREG( i );
	  	hwMsg(1, "0x%2x : 0x%8x\n", i, r );
	}
	for ( i = 0x2180 ; i < 0x2dff ; i+= 4 ) {
		r = INREG( i );
	  	hwMsg(1, "0x%2x : 0x%8x\n", i, r );
	}

}

/* 
 * Soft reset (old, not used anymore)
 * This will reset 3D engine. If you don't do this, 3D drawing may not work
 * correctly. (esp. line with depth)
 * I have to do soft reset every time I turn on my computer.
 * If you don't use 3d line, everything should be ok w/o reset.
 */
void mgaSoftReset( void ) {
  OUTREG(MGAREG_RST, R_softreset_enable);
  usleep(20); /* in spec. minimum 10us */
  OUTREG(MGAREG_RST, R_softreset_disable);
}


/*
 * any registers we are never going to change can be set here.
 */
void mgaInitStaticRegisters( void ) {
	OUTREG( MGAREG_TEXTRANS, 0xffff );
	OUTREG( MGAREG_TEXTRANSHIGH, 0xffff );
}


/*
 * mgaInitGLX
 * This is the initial entry point for the mga hardware driver,
 * called at X server module load time, or libGL direct rendering
 * init time.
 */
GLboolean mgaInitGLX( void ) {
	mgaInitLogging();

	hwMsg(1,"virtual (x, y) (%d, %d)\n", GLXSYM(vga256InfoRec).virtualX,
		 GLXSYM(vga256InfoRec).virtualY);
	hwMsg(1,"width: %d\n", GLXSYM(vga256InfoRec).displayWidth);
	hwMsg(1,"depth: %d\n", GLXSYM(vga256InfoRec).depth);
	hwMsg(1,"memBase: 0x%08x\n", GLXSYM(vgaLinearBase));
	hwMsg(1,"videoRam: 0x%08x\n", GLXSYM(vga256InfoRec).videoRam);

	/* check to make sure that we are on an apropriate chip and not
	running in 8bpp mode */
	if ( !det_hwGfx() ) {
		return GL_FALSE;
	}

	/* start up our card memory manager */
	cardHeap = mmInit(0,GLXSYM(vga256InfoRec).videoRam * 1024);
	if ( !cardHeap ) {
		hwMsg( 1,"cardHeap creation failed, exiting!\n" );
		return GL_FALSE;	/* really shouldn't happen */
	}
	cardPhysical = (hwUI32)GLXSYM(xf86AccelInfoRec).ServerInfoRec->physBase;
	cardVirtual = (unsigned char *)GLXSYM(vgaLinearBase);
	
	/* reserve memory used by the desktop screen and set up
	some hardware acceleration values needed to draw to it */
	mgaFrontBuffer = CreateFrontBuffer();
	if (!mgaFrontBuffer) {
		hwError("Cannot create front buffer.\n");
		return GL_FALSE;	/* really shouldn't happen */
	}

	/* reserve last 1KB if hardware cursor is active*/
	if (GLXSYM(MGAdac).isHwCursor) {	
		mmReserveMem( cardHeap, (GLXSYM(vga256InfoRec).videoRam-1)*1024,1024);
	}
	/* the remaining memory is available for back buffers, depth
	buffers, and textures */
	mmDumpMemInfo( cardHeap );

	/* init the dma system */  
	mgaDmaInit();

	/* init any registers we are never going to touch again */
        if (__glx_is_server) {
		mgaInitStaticRegisters();
	}
	
	/* init warp */
	if ( !mgaWarpInit() ) {
 		FatalError( "Warp initialization failed" );
	}

	/* FIXME: what other GLXProcs pointers should we change? */
	GLXProcs.CreateContext = mgaGLXCreateContext;
	GLXProcs.DestroyContext = mgaGLXDestroyContext;
	GLXProcs.CreateImage = mgaGLXCreateImage;
	GLXProcs.DestroyImage = mgaGLXDestroyImage;
	GLXProcs.CreateDepthBuffer = mgaGLXCreateDepthBuffer;
	GLXProcs.MakeCurrent = mgaGLXMakeCurrent;
	GLXProcs.BindBuffer = mgaGLXBindBuffer;
	GLXProcs.SwapBuffers = mgaGLXSwapBuffers;
	GLXProcs.VendorPrivate = mgaGLXVendorPrivate;
	GLXProcs.AllowDirect = mgaGLXAllowDirect;

	if (!__glx_is_server) {
	   GLXProcs.ValidateFrontBuffer = mgaClientGetGeometry;
	}

	/* these vars can be changed between invocations of direct clients */
	if (glx_getint("mga_nullprims") ) {
		hwMsg( 1, "enabling mga_nullprims\n" );
		mgaglx.nullprims = 1;
	}
	if (glx_getint("mga_skipdma") ) {
		hwMsg( 1, "enabling mga_skipdma\n" );
		mgaglx.skipDma = 1;
	}
	if (glx_getint("hw_boxes") ) {
		hwMsg( 1, "enabling hw_boxes\n" );
		mgaglx.boxes = 1;
	}
	if (glx_getint("mga_nofallback") ) {
		hwMsg( 1, "enabling mga_nofallback\n" );
		mgaglx.noFallback = 1;
	}
	if (glx_getint("mga_nosgram") || mgaglx.isSdram ) {
		hwMsg( 1, "enabling mga_nosgram\n" );
		mgaglx.nosgram = 1;
	}
	if (glx_getint("mga_nosetupdma") ) {
		hwMsg( 1, "disabling mga_setupdma\n" );
		mgaglx.noSetupDma = 1;
	}
	
	if (glx_getint("mga_vsync") ) {
		hwMsg( 1, "enabling mga_vsync\n" );
		mgaglx.waitVSync = 1;
	}

	if (glx_getint("mga_no_fast_path") ){
		hwMsg( 1, "enabling mga_no_fast_path\n" );
		mgaglx.noFastpath = 1;
	}

	/* test by blitting to screen */
	// VisualDmaTest();

	// mgaDumpRegisters();

	hwError("mgaInitGLX completed\n");
	return GL_TRUE;
}


