/* $Id: mach64dmainit.c,v 1.9 1999/12/12 10:44:17 johnc Exp $ */

/*
 * GLX Hardware Device Driver for ATI Rage Pro
 * Copyright (C) 1999 Gareth Hughes
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
 * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * Based on MGA driver: mgadmainit.c ???
 *
 *    Gareth Hughes <garethh@bell-labs.com>
 */

/*
 * This file is only entered at startup.  After machInitGLX completes,
 * nothing here will be executed again.
 */

#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/mman.h>
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <signal.h>

#include "context.h"
#include "depth.h"
#include "macros.h"
#include "texstate.h"
#include "triangle.h"
#include "vb.h"
#include "types.h"

#include "xsmesaP.h"
#include "glx_log.h"
#include "mesaglx/context.h"
#include "mesaglx/matrix.h"
#include "mesaglx/types.h"

#define GC XXGC
#include "gcstruct.h"
#include "pixmapstr.h"
#include "servermd.h" /* PixmapBytePad */
#include "scrnintstr.h"
#include "regionstr.h"
#include "windowstr.h"
#undef GC

#include "vga.h"
#include "vgaPCI.h"
#include "mach64.h"
#include "xaa/xf86xaa.h"

#include "mach64glx.h"

#ifndef NO_MTRR

#define MTRR_NEED_STRINGS
#include <errno.h>
#include <asm/mtrr.h>
#include <sys/ioctl.h>

static int mtrr;

#if defined(USE_X86_ASM)
#include "X86/common_x86asm.h"
#endif

#endif

/* AGP kernel module interface */

enum chipset_type { NOT_SUPPORTED, INTEL_GENERIC, INTEL_LX, INTEL_BX, INTEL_GX,
                    VIA_GENERIC, VIA_VP3, VIA_MVP3, VIA_APOLLO_PRO, SIS_GENERIC};
enum agp_mode { AGP_MODE4X, AGP_MODE2X, AGP_MODE1X };

struct gart_info
{
    long physical;
    int size;
    int num_of_slots;
    enum chipset_type type;
    enum agp_mode mode;
};

/* Maximum size that can be allocated at once is 1024 entries */

struct gart_entry
{
    int ofs;
    int size;
};

#define GARTIOCINSERT _IOR('J', 1, struct gart_entry)
#define GARTIOCREMOVE _IOR('J', 2, struct gart_entry)
#define GARTIOCINFO   _IOW('J', 3, struct gart_info)
#define GARTIOCINIT   _IO ('J', 4)



/*

GLX_MACH64_DMA=0	: virtual buffer, pseudo dma
GLX_MACH64_DMA=1	: physical buffer, pseudo dma
GLX_MACH64_DMA=2	: physical buffer, real dma
GLX_MACH64_DMA=3	: physical buffer, real dma, async

GLX_MACH64_DMAADR=92	: put the physical buffer at offset 92 * 0x100000
			: use GLX_MACH64_DMAADR=AGP to use agp memory
GLX_MACH64_DMASIZE=4	: use 4 * 0x100000 bytes of memory for the buffers

GLX_MACH64_CMDSIZE=4	: use 4 * 0x100000 bytes for commands, the rest for textures
GLX_MACH64_TABLESIZE=16	: use 16 * 0x4000 bytes for the descriptor tables

GLX_MACH64_SYSTEMTEXTURE = 1	: put textures in main pci/agp memory instead of on card

*/

memHeap_t	*cardHeap;
mach64UI32	cardPhysical;

memHeap_t	*textureHeap;
mach64UI32	textureHeapPhysical;	/* 0 if we aren't using system memory texturing */
unsigned char	*textureHeapVirtual;	/* correct for either local or PCI heaps */

memHeap_t	*sysmemHeap;		/* physical memory, if available */
mach64UI32	sysmemBytes;		/* size of memory block */
mach64UI32	sysmemPhysical;		/* 0 if we don't have a physical mapping */
unsigned char	*sysmemVirtual;

int	use_agp;	/* will be set to "PDEA_pagpxfer_enable" if enabled or 0 if not */
int	gartfd;
void	*gartbuf;
struct gart_info gartinf;


/* private vars */
mach64Dma_buffer	*dmaBuffers[2];


static	mach64UI32	bufferBytes;		/* size of buffer */
static	mach64UI32	bufferPhysical;		/* 0 if we don't have a physical mapping */
static	unsigned char	*bufferVirtual;

static mach64UI32	tableBytes;		/* size of descriptor table */


/* FIXME: GH */
extern int __glx_is_server;

void mach64DmaResetBuffer( void );
void mach64SetSyncBusy( void );



static const char *getenvSafe( const char *name )
{
    const char	*r;

    r = getenv( name );
    if ( !r ) {
	return "";
    }
    return r;
}

/*=============================================*/


#ifndef NO_MTRR
static int IsPowerOfTwo( int val )
{
    int		i;

    for ( i = 0 ; i < 32 ; i++ ) {
	if ( val == ( 1 << i ) ) {
	    return 1;
	}
    }
    return 0;
}

static void mach64CloseMTRR( void )
{
    close( mtrr );
}

static void mach64OpenMTRR( void )
{
    if ( ( mtrr = open( "/proc/mtrr", O_WRONLY, 0 ) ) == -1 )
    {
	if ( errno == ENOENT ) {
	    mach64Error( "/proc/mtrr not found: MTRR not enabled\n" );
	}  else {
	    mach64Error( "Error opening /proc/mtrr: %s\n", strerror( errno ) );
	    mach64Error( "MTRR not enabled\n" );
	}
	return;
    }
    atexit( mach64CloseMTRR );
}


int CoverRangeWithMTRR( int base, int range, int type )
{
    int		count;
    int		size;

    count = 0;
    mach64Msg( 1,"CoverRangeWithMTRR( 0x%x, 0x%x, %i )\n", base, range, type );

    while ( range )
    {
	/* see how big of an mtrr we can make */

	/* size must be a power of 2 */
	for ( size = 2048 ; ; )
	{
	    size *= 2;

	    /* the base must be a multiple of the size */
	    if ( base != size * ( base / size ) ) {
		size >>= 1;
		break;
	    }

	    if ( size > range ) {
		size >>= 1;
		break;
	    }
	}

	/* set it if we aren't just checking the number */
	if ( type != -1 )
	{
	    struct mtrr_sentry sentry;

	    sentry.base = base;
	    sentry.size = size;
	    sentry.type = type;

	    mach64Msg( 1, "MTRR fragment added: addr=0x%x size=0x%x type=%i\n",
		       sentry.base, sentry.size, sentry.type );
	    if ( ioctl( mtrr, MTRRIOC_SET_ENTRY, &sentry ) == -1 ) {
		mach64Error( "Error doing ioctl(2) on /proc/mtrr: %s\n",
			     strerror( errno ) );
	    }
	}

	base += size;
	range -= size;
	count++;
    }

    mach64Msg( 1, "------\n" );

    return count;
}

/*
 * SetWriteCombining
 *
 * Checks for unsetting an existing MTRR if needed
 */
static void SetWriteCombining( long physical, int bytes )
{
    struct mtrr_sentry sentry;
    struct mtrr_gentry gentry;
    int i;

    if ( !mtrr ) {
	return;
    }

    /* remove any MTRR that conver the range */
    for ( i = 0 ; i < 128 ; i++ )
    {
	gentry.regnum = i;
	if ( ioctl( mtrr, MTRRIOC_GET_ENTRY, &gentry ) == -1 ) {
	    break;
	}
	mach64Msg( 1, "MTRR reg %i: addr=0x%x size=0x%x type=%i\n",
		   i, gentry.base, gentry.size, gentry.type );
	if ( gentry.base >= physical + bytes ) {
	    continue;
	}
	if ( gentry.base + gentry.size <= physical ) {
	    continue;
	}

	/* we must delete this entry */
	sentry.base = gentry.base;
	sentry.size = gentry.size;
	if ( ioctl( mtrr, MTRRIOC_DEL_ENTRY, &sentry ) == -1 ) {
	    mach64Error( "Error doing MTRRIOC_DEL_ENTRY on /proc/mtrr: %s\n",
			 strerror( errno ) );
	} else {
	    mach64Msg( 1, "MTRRIOC_DEL_ENTRY succeeded\n" );
	}

	/* recreate fragments around the new region if necessary */
	if ( gentry.base < physical ) {
	    CoverRangeWithMTRR( gentry.base, physical - sentry.base,
				gentry.type );
	}
	if ( gentry.base + gentry.size > physical + bytes ) {
	    CoverRangeWithMTRR( physical + bytes,
				gentry.base + gentry.size - sentry.base,
				gentry.type );
	}

	/* because we deleted an entry, we need to check this index again */
	i--;
    }

    /* set this range to write combining */
    sentry.base = physical;
    sentry.size = bytes;
    sentry.type = 1; /* write-combining */

    if ( ioctl( mtrr, MTRRIOC_SET_ENTRY, &sentry ) == -1 ) {
	mach64Error( "Error doing ioctl(2) on /proc/mtrr: %s\n",
		     strerror( errno ) );
	mach64Error( "MTRR not enabled\n" );
    } else {
	mach64Msg( 1, "MTRR enabled: write-combining, addr=0x%x size=0x%x\n",
		  sentry.base, sentry.size );
    }

}
#endif

/*=============================================*/


static void MemoryBenchmark( void *buffer, int dwords )
{
    int		i;
    int		start, end;
    int		mb;
    int 	*base;

    base = (int *)buffer;

    start = usec();
    for ( i = 0 ; i < dwords ; i += 8 ) {
	base[i] =
	    base[i+1] =
	    base[i+2] =
	    base[i+3] =
	    base[i+4] =
	    base[i+5] =
	    base[i+6] =
	    base[i+7] = 0x15151515;		/* dmapad nops */
    }
    end = usec();

    mb = ( (float)dwords / 0x40000 ) * 1000000 / ( end - start );

    mach64Msg( 1, "MemoryBenchmark: %i mb/s\n", mb );

    /* make the last command a DWGSYNC for DmaBenchmark */
    dwords -= dwords % 5;
    base[dwords-5] = 0x15151593;
}

static void DmaBenchmark( unsigned int physical, int dwords )
{
    int		start, end;
    int		mb;
    float	fsec;
    mach64UI32	dmaEnd;


    dmaEnd = physical + dwords*4;
    start = usec();

#if 0 /* FIXME: GH */
    OUTREG( MACH64REG_PRIMADDRESS, physical );
    OUTREG( MACH64REG_PRIMEND, dmaEnd | use_agp);
#endif

    mach64WaitForDmaCompletion();

    end = usec();

    fsec = ( end - start ) / 1000000.0;

    mb = ( (float)dwords * 4 / 0x100000 ) / fsec;

    mach64Msg( 1, "DmaBenchmark 0x%x bytes, %5.3f sec: %i mb/s\n",
	       dwords*4, fsec, mb );
}


/*
 * NonVisualDmaTest
 * Build up a half meg of dummy commands and send them to the card.
 */
void NonVisualDmaTest( void )
{
    int		i, j, k;
    DMALOCALS;

    mach64Msg( 1, "Starting DMA test...\n" );

    for ( i = 0 ; i < 4 ; i++ )
    {
	for ( j = 0 ; j < 0x9f ; j++ )
	{
	    DMAGETPTR( 32 );

	    for ( k = 0 ; k < 4 ; k++ )
	    {
		/* Just send some dummy commands. */
		DMAOUTREG( MACH64REG_Z_CNTL,
			   Z_tst_disable | Z_test_zalways );
		DMAOUTREG( MACH64REG_ALPHA_TST_CNTL,
			   ALPHA_tst_disable | ALPHA_test_aalways );

		DMAOUTREG( MACH64REG_Z_CNTL,
			   Z_tst_disable | Z_test_znever );
		DMAOUTREG( MACH64REG_ALPHA_TST_CNTL,
			   ALPHA_tst_disable | ALPHA_test_anever );
	    }

	    DMAADVANCE();
	}

	//mach64DmaFlush();
    }

    mach64Msg( 1, "  table entries: %d buffer cmds: %d\n", dma_buffer->tableDwords / 4, dma_buffer->bufferDwords / 2 );

    for ( i = 0 ; i < dma_buffer->tableDwords / 4 ; i++ )
    {
	mach64Msg( 1, "    entry: %d addr: %p cmd: 0x%x\n", i, dma_buffer->virtualTable[4*i+1], dma_buffer->virtualTable[4*i+2] );
    }

    mach64Msg( 1, "Flushing buffers...\n" );
    mach64DmaFlush();
}


static void AllocatePhysicalDmaBuffer( void )
{
    PMemBlock	block;

    /* determine total size of buffer */
    bufferBytes = mach64glx.cmdSize;
    if ( !bufferBytes ) {
	mach64Msg(1,"defaulting to GLX_MACH64_CMDSIZE = 4\n" );
	bufferBytes = 4;
    } else {
	mach64Msg(1,"using GLX_MACH64_CMDSIZE = %i\n", bufferBytes );
    }
    bufferBytes *= 0x100000;

    block = mmAllocMem( sysmemHeap, bufferBytes, 8, 0 );
    if ( !block ) {
	mach64Msg( 1, "failed to allocate 0x%x bytes from sysmemHeap for command buffers.\n",
		   bufferBytes );
	return;
    }
    mach64Msg( 1, "allocated 0x%x bytes from sysmemHeap for command buffers.\n",
	       bufferBytes );
    bufferVirtual = sysmemVirtual + mmOffset( block );
    bufferPhysical = sysmemPhysical + mmOffset( block );
}

static void AllocateVirtualDmaBuffer( void )
{
    /* determine total size of buffer */
    bufferBytes = mach64glx.cmdSize;
    if ( !bufferBytes ) {
	mach64Msg( 1, "defaulting to GLX_MACH64_CMDSIZE = 4\n" );
	bufferBytes = 4;
    } else {
	mach64Msg( 1, "using GLX_MACH64_CMDSIZE = %i\n", bufferBytes );
    }

    bufferBytes *= 0x100000;
    bufferVirtual = malloc( bufferBytes + 0x1000);
    /* align it to page size, might help on something used as much as this */
    bufferVirtual = (pointer)(((unsigned long) bufferVirtual & ~0xFFF) + 0x1000);
    mach64Msg( 1, "allocated 0x%x bytes from virtual memory for command buffers.\n",
	       bufferBytes );
}


/*
 * AllocateCommandBuffers
 * The dma command buffers can be either virtual or in the sysmemHeap
 */
#define	OVERFLOW_DWORDS	80

static void AllocateCommandBuffers( void )
{
    /* try to allocate the command buffers in either sysmem or cardmem */
    if ( mach64glx.dmaDriver > 0 ) {
	if ( sysmemHeap ) {
	    AllocatePhysicalDmaBuffer();
	}
    }

    /* if we didn't get real memory, get a virtual buffer and use PDMA */
    if ( !bufferPhysical ) {
	mach64glx.dmaDriver = 0;
	AllocateVirtualDmaBuffer();
    }

    if ( __glx_is_server )
    {
	/* benchmark the writing speed to the command buffer */
	MemoryBenchmark( bufferVirtual, bufferBytes / 4 );
	MemoryBenchmark( bufferVirtual, bufferBytes / 4 );
	MemoryBenchmark( bufferVirtual, bufferBytes / 4 );

	/* benchmark the read speed of the card's dma */
	if ( mach64glx.dmaDriver >= 2 )
	{
	    DmaBenchmark( (unsigned int)bufferPhysical, bufferBytes / 4 );
	    DmaBenchmark( (unsigned int)bufferPhysical, bufferBytes / 4 );
	    DmaBenchmark( (unsigned int)bufferPhysical, bufferBytes / 4 );
	    DmaBenchmark( (unsigned int)bufferPhysical, bufferBytes / 4 );
	    DmaBenchmark( (unsigned int)bufferPhysical, bufferBytes / 4 );
	}
    }

    /* determine size of descriptor tables */
    tableBytes = mach64glx.tableSize;
    if ( !tableBytes ) {
	mach64Msg( 1, "defaulting to GLX_MACH64_TABLESIZE = 16\n" );
	tableBytes = 16;
    } else {
	mach64Msg( 1, "using GLX_MACH64_TABLESIZE = %i\n", tableBytes );
    }
    tableBytes *= 0x400;

    /* always leave enough room for a X server setup and DWGSYNC after
       overflow checks */

    /*
     * GH: It it critical that the tables are aligned to their size, ie. the
     * default 16k table should be 16k-aligned.  We should really enforce
     * this...
     */

    /* setup the two buffers that will be ping-ponged */
    dmaBuffers[0] = malloc( sizeof(mach64Dma_buffer) );
    memset( dmaBuffers[0], '\0', sizeof(mach64Dma_buffer) );

    dmaBuffers[0]->virtualTable = (mach64UI32 *)bufferVirtual;
    dmaBuffers[0]->physicalTable = bufferPhysical;
    dmaBuffers[0]->virtualBuffer = (mach64UI32 *)bufferVirtual + tableBytes / 4;
    dmaBuffers[0]->physicalBuffer = bufferPhysical + tableBytes;
    dmaBuffers[0]->maxTableDwords = tableBytes / 4;
    dmaBuffers[0]->maxBufferDwords = bufferBytes / 8 - dmaBuffers[0]->maxTableDwords;
    dmaBuffers[0]->overflowBufferDwords = dmaBuffers[0]->maxBufferDwords - OVERFLOW_DWORDS;

    dmaBuffers[1] = malloc( sizeof(mach64Dma_buffer) );
    memset( dmaBuffers[1], '\0', sizeof(mach64Dma_buffer) );

    dmaBuffers[1]->virtualTable = (mach64UI32 *)bufferVirtual + bufferBytes / 8;
    dmaBuffers[1]->physicalTable = bufferPhysical + bufferBytes / 2;
    dmaBuffers[1]->virtualBuffer = (mach64UI32 *)bufferVirtual + bufferBytes / 8 + tableBytes / 4;
    dmaBuffers[1]->physicalBuffer = bufferPhysical + bufferBytes / 2 + tableBytes;
    dmaBuffers[1]->maxTableDwords = tableBytes / 4;
    dmaBuffers[1]->maxBufferDwords = bufferBytes / 8 - dmaBuffers[0]->maxTableDwords;
    dmaBuffers[1]->overflowBufferDwords = dmaBuffers[1]->maxBufferDwords - OVERFLOW_DWORDS;

    mach64Msg( 1, "dmaBuffers[]->maxTableDwords = %i\n",
	       dmaBuffers[0]->maxTableDwords );
    mach64Msg( 1, "dmaBuffers[]->maxBufferDwords = %i\n",
	       dmaBuffers[0]->maxBufferDwords );

    mach64Msg( 1, "dmaBuffers[0]->Table  phys: 0x%08x virt: 0x%08x\n",
	       dmaBuffers[0]->physicalTable, dmaBuffers[0]->virtualTable );
    mach64Msg( 1, "dmaBuffers[0]->Buffer phys: 0x%08x virt: 0x%08x\n",
	       dmaBuffers[0]->physicalBuffer, dmaBuffers[0]->virtualBuffer );

    mach64Msg( 1, "dmaBuffers[1]->Table phys: 0x%08x virt: 0x%08x\n",
	       dmaBuffers[1]->physicalTable, dmaBuffers[1]->virtualTable );
    mach64Msg( 1, "dmaBuffers[1]->Buffer phys: 0x%08x virt: 0x%08x\n",
	       dmaBuffers[1]->physicalBuffer, dmaBuffers[1]->virtualBuffer );

    mach64DmaResetBuffer();

    mmDumpMemInfo( sysmemHeap );
}


/*
 * AllocateSystemMemory
 * Looks at environment variables to determine if a block
 * of physical memory has been left for graphics after the
 * memory available to the kernel.
 * System memory can be used for dma command buffers or
 * textures.
 */
static void AllocateSystemMemory( void )
{
    int		fd;
    char	*adr;

    sysmemPhysical = 0;
    sysmemVirtual = 0;
    sysmemHeap = 0;

    if ( !mach64glx.dmaDriver ) {
	return;
    }

    /* determine total requested size of buffer */
    sysmemBytes = mach64glx.dmaSize;
    if ( !sysmemBytes ) {
	mach64Msg( 1, "GLX_MACH64_DMASIZE not set, skipping physical allocation\n" );
	return;
    }
    sysmemBytes *= 0x100000;

    /* try AGP memory */
    adr = getenv( "GLX_MACH64_DMAADR" );

    /* mach64glx.dmaDriverADR should be set to a value >= the
       mem= kernel parm */
    sysmemPhysical = mach64glx.dmaAdr;
    if ( sysmemPhysical < 16 ) {
	mach64Msg( 1, "unlikely GLX_MACH64_DMAADR=%i, skipping physical allocation\n", bufferPhysical );
	return;
    }
    sysmemPhysical *= 0x100000;



    /* FIXME!!!: should check sysmemPhysical against /proc/meminfo */

    fd = open( "/dev/mem", O_RDWR );
    if ( fd < 0 ) {
	mach64Msg( 1, "failed to open /dev/mem\n" );
	return;
    }

    sysmemVirtual = (unsigned char *)
	mmap( NULL, sysmemBytes, PROT_READ | PROT_WRITE,
	      MAP_SHARED, fd, (off_t)sysmemPhysical );
    if ( sysmemVirtual == MAP_FAILED ) {
	mach64Msg( 1, "failed to mmap sysmem\n" );
	close( fd );
	return;
    }

    /* FIXME: should verify the memory exists with read / write test */
#ifndef NO_MTRR
	/* set to write combining */
    if ( __glx_is_server )
    {
 	/* due to MTRR fragmentation issues, we can't do this for all
           memory ranges - except on the K6... */
  	if ( IsPowerOfTwo( sysmemPhysical ) ||
	     ( gl_identify_x86_cpu_features() & GL_CPU_3Dnow ) )
	{
	    mach64Msg( 1, "Setting write combining on system heap.\n" );
	    SetWriteCombining( sysmemPhysical, sysmemBytes );
  	}
	else
	{
	    mach64Msg( 1, "Can't set write combining on system heap, not power of two.\n" );
  	}
    }
#endif

    /* create a heap */
    sysmemHeap = mmInit( sysmemBytes );

    mach64Msg( 1, "sysmemPhysical: %p\n", sysmemPhysical );
    mach64Msg( 1, "sysmemVirtual: %p\n", sysmemVirtual );
    mach64Msg( 1, "sysmemSize: %p\n", sysmemBytes );

}

/*
 * mach64DmaInit
 *
*/
void mach64DmaInit(void)
{
    /* Server init - queries environment variables.  The client
     * gets these values from the sever and initializes them in
     * mach64direct.c
     */
    if ( __glx_is_server )
    {
	mach64glx.dmaDriver = atoi( getenvSafe("GLX_MACH64_DMA") );
	mach64glx.dmaSize = atoi( getenvSafe("GLX_MACH64_DMASIZE") );
	mach64glx.dmaAdr = atoi( getenvSafe("GLX_MACH64_DMAADR") );
	mach64glx.cmdSize = atoi( getenvSafe("GLX_MACH64_CMDSIZE") );
	mach64glx.tableSize = atoi( getenvSafe("GLX_MACH64_TABLESIZE") );
    }

    use_agp = 0;

#ifndef NO_MTRR
    if ( __glx_is_server )
    {
	/* prepare to set write combining */
	mach64OpenMTRR();
    }
#endif

    /* get some system memory and make it write combining if we can */
    AllocateSystemMemory();

    /* read the command environment variable */
    mach64Msg( 1, "mach64DmaInit: GLX_MACH64_DMA = %i\n", mach64glx.dmaDriver );

    /* setup the two command buffers in the apropriate memory space */
    AllocateCommandBuffers();

    /* prepare the first buffer for use */
    mach64DmaResetBuffer();
}

/*
 * mach64ScratchRegTest
 *
 * Do a simple read/write test to the Mach64's scratch registers.  Defined
 * in the mach64 Programmer's Guide.
 */
static void mach64ScratchRegTest()
{
    int		tmp;

    tmp = INREG( MACH64REG_SCRATCH_REG0 );
    OUTREG( MACH64REG_SCRATCH_REG0, 0x55555555 );

    if ( INREG( MACH64REG_SCRATCH_REG0 ) != 0x55555555 )
    {
	mach64Error( "Mach64 probe failed on read 1 of SCRATCH_REG0 %x\n",
		     MACH64REG_SCRATCH_REG0 );
    }
    else
    {
	mach64Msg( 1, "SCRATCH_REG0 read 1 successful\n" );

	OUTREG( MACH64REG_SCRATCH_REG0, 0xaaaaaaaa );

	if ( INREG( MACH64REG_SCRATCH_REG0 ) != 0xaaaaaaaa ) {
	    mach64Error( "Mach64 probe failed on read 2 of SCRATCH_REG0 %x\n",
			 MACH64REG_SCRATCH_REG0 );
	} else {
	    mach64Msg( 1, "SCRATCH_REG0 read 2 successful\n" );
	}
    }

    OUTREG( MACH64REG_SCRATCH_REG0, tmp );
}


/*
 * This function should only verify that the current hardware is supported.
 * It should do no setup. As we support various Matrox chipsets, perhaps it
 * should return an indicator of which chipset is present.
 */
GLboolean det_hwGfx()
{
#if 0	// JDC
   mach64Msg( 1, "Detected 0x%x Chip ID\n", mach64ChipType );

    /* is this the best way check for mach64 presence? */
    if ( !MACH64_IS_RAGEPRO( mach64ChipType ) ) {
	mach64Error( "mach64ChipType not set, no mach64 hardware?\n" );
	return GL_FALSE;
    }
#endif
    if( ( mach64glx.depth != 15 ) && ( mach64glx.depth != 16 ) )
    {
	mach64Error( "Unsupported depth: %d, only 15 and 16 bpp are supported right now\n",
		     mach64glx.depth );
	return GL_FALSE;
    }

    return GL_TRUE;
}

/*
 * mach64InitLogging
 *
 */
void mach64InitLogging( void )
{
    char	*logName;

    /* open the logfile and set loglevel */
    logName = getenv( "GLX_MACH64_LOGFILE" );
    if ( __glx_is_server )
    {
	mach64OpenLog( logName );
    }
    else
    {
	/* direct rendering clients use a different file
	   so they don't stomp on the server's log */
	if ( logName )
	{
	    char	newName[1024];

	    strcpy( newName, logName );
	    strcat( newName, "_direct" );
	    mach64OpenLog( newName );
	}
    }

    if ( getenv( "GLX_MACH64_LOGLEVEL" ) ) {
	mach64SetLogLevel( atoi( getenv( "GLX_MACH64_LOGLEVEL" ) ) );
    } else {
	mach64SetLogLevel( DBG_LEVEL_BASE );
    }
}


/*
 * mach64DumpRegisters
 */
void mach64DumpRegisters( void ) {
    int		i, r;
#if 0
    mach64Msg( 1, "Configuration registers:\n" );
    for ( i = 0 ; i < 256 ; i+=4 ) {
	r = pcibusRead( mach64PciTag, i );
	mach64Msg(1, "0x%2x : 0x%8x\n", i, r );
    }
#endif
    mach64Msg(1, "Drawing registers:\n" );
    for ( i = 0x0 ; i < 0x7ff ; i+= 4 ) {
	r = INREG( i );
	r = SWAP(r);
	mach64glx.registers[i>>2] = r;
//	mach64Msg(1, "0x%2x (%i_%2x): 0x%8x\n", i, i < 0x400, (i>>2)&255, r );
    }
}

/*
 *
 * GetXServerInfo
 * this will be different on the fbdev server and the mach64 server
 */
static int GetXServerInfo( void ) {
#ifdef __PPC_ 		/* FIXME: need a separate config for fbdev on intel */
	extern ScrnInfoRec fbdevInfoRec;
	extern int fbdevVirtBase;
	extern int fbdevRegBase;

	/* FIXME: do this better! */
	if ( !strstr( fbdevInfoRec.chipset, "Mach64" ) ) {
		return 0;
	}
    mach64glx.linearBase = fbdevVirtBase; // fbdevInfoRec.MemBase;
    mach64glx.MMIOBase = fbdevRegBase; // fbdevInfoRec.IObase;
    mach64glx.depth = fbdevInfoRec.depth;
    mach64glx.virtualX = fbdevInfoRec.virtualX;
    mach64glx.virtualY = fbdevInfoRec.virtualY;
    mach64glx.displayWidth = fbdevInfoRec.displayWidth;
    mach64glx.videoRam = fbdevInfoRec.videoRam;
	mach64glx.bytesPerPixel = ( fbdevInfoRec.bitsPerPixel + 7 ) / 8;
#else
	extern pointer mach64MemRegMap;

    mach64glx.linearBase = (int)mach64VideoMem;
    mach64glx.MMIOBase = (int)mach64MemRegMap;
    mach64glx.depth = mach64InfoRec.depth;
    mach64glx.virtualX = mach64InfoRec.virtualX;
    mach64glx.virtualY = mach64InfoRec.virtualY;
    mach64glx.displayWidth = mach64InfoRec.displayWidth;
    mach64glx.videoRam = mach64InfoRec.videoRam;
	mach64glx.bytesPerPixel = ( mach64InfoRec.bitsPerPixel + 7 ) / 8;

#endif

    mach64Msg( 1, "width: %d\n", mach64glx.virtualX );
    mach64Msg( 1, "height: %d\n", mach64glx.virtualY );
    mach64Msg( 1, "pitch: %d\n", mach64glx.displayWidth );
    mach64Msg( 1, "depth: %d\n", mach64glx.depth );
    mach64Msg( 1, "bytesPerPixel: %d\n", mach64glx.bytesPerPixel );
    mach64Msg( 1, "videoRam: %dk\n", mach64glx.videoRam );
    mach64Msg( 1, "memBase: 0x%08x\n", mach64glx.linearBase );
    mach64Msg( 1, "ioBase: 0x%08x\n", mach64glx.MMIOBase );

    return 1;
}

/*
 * mach64DrawTest
 *
 * Figure out how to draw the things we need.
 */
static void mach64DrawTest2( void ) {
	DMALOCALS;
	int i;

	/* draw something with the cpu to show we found the framebuffer properly */
	for ( i = 0 ; i < 1022*1024 ; i++ ) {
		((int *)mach64glx.linearBase)[i] = i;
	}

	DMAGETPTR( 100 );

	/* simple fill */
	DMAOUTREG( MACH64REG_DP_FRGD_CLR, 0x12345678 );	/* random color */
	DMAOUTREG( MACH64REG_DP_WRITE_MASK, 0xffffffff );	/* write to all */
	DMAOUTREG( MACH64REG_DP_MIX, 0x00070003 );			/* bkground leave alone */
	DMAOUTREG( MACH64REG_DP_SRC,
		DP_bkgd_src_foreground | DP_frgd_src_foreground | DP_mono_src_1 );
	DMAOUTREG( MACH64REG_CLR_CMP_CNTL, 0 );			/* disable color compare */
	DMAOUTREG( MACH64REG_GUI_TRAJ_CNTL, 3 );			/* left to right, top to bottom */
	DMAOUTREG( MACH64REG_DST_X_Y, (100<<16)|120 );		/* start at (120, 100) */
	DMAOUTREG( MACH64REG_DST_WIDTH_HEIGHT, (100<<16)|120 );	/* 120 wide, 100 high */

	/* block copy */
	DMAOUTREG( MACH64REG_DP_PIX_WIDTH, 0x30333 );	/* all 15 bit */
	DMAOUTREG( MACH64REG_DP_WRITE_MASK, 0xffffffff );	/* write to all */
	DMAOUTREG( MACH64REG_DP_MIX, 0x00070003 );	/* bkground leave alone */
	DMAOUTREG( MACH64REG_DP_SRC, 0x300 );		/* blit */

	DMAOUTREG( MACH64REG_CLR_CMP_CNTL, 0 );			/* disable color compare */

	DMAOUTREG( MACH64REG_GUI_TRAJ_CNTL, 3 );			/* left to right, top to bottom */

	DMAOUTREG( MACH64REG_SRC_OFF_PITCH, ( (1024/8) << 22 ) | 0 );
	DMAOUTREG( MACH64REG_SRC_WIDTH1, 120 );	/* hangs without this... */
	DMAOUTREG( MACH64REG_SRC_Y_X, (0<<16)|0 );

	DMAOUTREG( MACH64REG_DST_X_Y, (100<<16)|300 );		/* start at (300, 100) */
	DMAOUTREG( MACH64REG_DST_WIDTH_HEIGHT, (100<<16)|120 );	/* 120 wide, 100 high */

	/* tirangle rendering */
	/* DOESN'T WORK YET */
	DMAOUTREG( MACH64REG_SCALE_3D_CNTL,
		(3<<6) | (1<<16) );	/* enable setup for smooth, all source blend */
	DMAOUTREG( MACH64REG_SETUP_CNTL, 0 );
	DMAOUTREG( MACH64REG_Z_CNTL, 0 );
	DMAOUTREG( MACH64REG_ALPHA_TST_CNTL, 0 );

	DMAOUTREG( MACH64REG_DP_SRC, DP_bkgd_src_background | DP_frgd_src_3d_data | DP_mono_src_1 );

	DMAOUTREG( MACH64REG_VERTEX_1_X_Y, (550<<18)|(100<<2) );
	DMAOUTREG( MACH64REG_VERTEX_1_ARGB, 0xff0000ff );
	DMAOUTREG( MACH64REG_VERTEX_2_X_Y, (500<<18)|(220<<2) );
	DMAOUTREG( MACH64REG_VERTEX_1_ARGB, 0xff00ff00 );
	DMAOUTREG( MACH64REG_VERTEX_3_X_Y, (600<<18)|(200<<2) );
	DMAOUTREG( MACH64REG_VERTEX_1_ARGB, 0xffff0000 );

	DMAOUTFLOAT( MACH64REG_ONE_OVER_AREA_UC, 1.0/100.0 );

	DMAOUTREG( MACH64REG_DP_SRC, 0x300 );		/* blit */

	DMAADVANCE();
	mach64DmaFinish();

	/* draw a bunch of stuff to test buffer switching */
{
int	i, j;

for ( j = 0 ; j < 100 ; j+= 10 ) {
//sleep(1);
	DMAGETPTR( 1000 );
for ( i = 0 ; i < 1000 ; i+= 10 ) {
	DMAOUTREG( MACH64REG_DP_FRGD_CLR, 0x12345678 );	/* random color */
	DMAOUTREG( MACH64REG_DP_WRITE_MASK, 0xffffffff );	/* write to all */
	DMAOUTREG( MACH64REG_DP_MIX, 0x00070003 );			/* bkground leave alone */
	DMAOUTREG( MACH64REG_DP_SRC,
		DP_bkgd_src_foreground | DP_frgd_src_foreground | DP_mono_src_1 );
	DMAOUTREG( MACH64REG_CLR_CMP_CNTL, 0 );			/* disable color compare */
	DMAOUTREG( MACH64REG_GUI_TRAJ_CNTL, 3 );			/* left to right, top to bottom */
	DMAOUTREG( MACH64REG_DST_X_Y, (j<<16)|i );		/* start at (120, 100) */
	DMAOUTREG( MACH64REG_DST_WIDTH_HEIGHT, (8<<16)|8 );	/* 120 wide, 100 high */
}
	DMAADVANCE();
	mach64DmaFinish();
}

}

	/* let it sit there a bit so we can see it */
	sleep( 5 );
}


void EnableBlock1Registers( void ) {
	int	val;

	MACH64_WAITFREE();
	val = INREG( MACH64REG_BUS_CNTL );
	val = SWAP(val);
	mach64Msg( 1, "MACH64REG_BUS_CNTL = 0x%x\n",val );

	val |= BUS_ext_reg_enable;

	OUTREG( MACH64REG_BUS_CNTL, SWAP( val ) );

#if 1
	MACH64_WAITFREE();
	val = INREG( MACH64REG_BUS_CNTL );
	val = SWAP(val);
	mach64Msg( 1, "MACH64REG_BUS_CNTL = 0x%x\n",val );
#endif
}


/*
 * mach64InitGLX
 * This is the initial entry point for the mach64 hardware driver,
 * called at X server module load time, or libGL direct rendering
 * init time.
 */
GLboolean mach64InitGLX( void )
{
    /*
     * Begin usual GLX module initialization.
     */
    mach64InitLogging();

    if ( !GetXServerInfo() ) {
	mach64Msg( 1, "not a ragePro!\n" );
	return GL_FALSE;
    }

    /* Perform a quick register read/write test. */
    mach64ScratchRegTest();

    /* Enable the block 1 registers. */
    EnableBlock1Registers();

    /* check to make sure that we are on an apropriate chip and not
       running in 8bpp mode */
    if ( !det_hwGfx() ) {
	return GL_FALSE;
    }
    	/* see what the initial register state is like */
	mach64DumpRegisters();


    /* start up our card memory manager */
    cardHeap = mmInit( mach64glx.videoRam * 1024 );
    if ( !cardHeap ) {
	mach64Msg( 1, "cardHeap creation failed, exiting!\n" );
	return GL_FALSE;	/* really shouldn't happen */
    }
	textureHeap = cardHeap;	/* eventually we will get AGP texturing working */

    /* reserve memory for the front buffer */
    mmReserveMem( cardHeap, 0, mach64glx.displayWidth * mach64glx.virtualY * mach64glx.bytesPerPixel );

    /* reserve memory for the second bank of memory mapped registers */
    mmReserveMem( cardHeap, (mach64glx.videoRam-1) * 1024, 1024 );

#ifndef __PPC__
	/* make sure the user has turned off the pixmap and font cache */
	if ( !OFLG_ISSET( OPTION_NO_FONT_CACHE, &mach64InfoRec.options ) ||
		!OFLG_ISSET( OPTION_NO_PIXMAP_CACHE, &mach64InfoRec.options ) ) {
		mach64Error( "Font and pixmap caches must be disabled to use the GLX module.\n" );
		mach64Error( "Make sure you have the following in your XF86config file:\n" );
		mach64Error( "Section \"Device\"\n" );
   		mach64Error( "	Option	\"no_font_cache\"\n" );
    		mach64Error( "	Option	\"no_pixmap_cache\"\n" );
		return GL_FALSE;
	}	
#endif
    /* the remaining memory is available for back buffers, depth
       buffers, and textures */
    mmDumpMemInfo( cardHeap );

    /* init the dma system */
    mach64DmaInit();


    /* FIXME: what other GLXProcs pointers should we change? */
    GLXProcs.CreateContext = mach64GLXCreateContext;
    GLXProcs.DestroyContext = mach64GLXDestroyContext;
    GLXProcs.SwapBuffers = mach64GLXSwapBuffers;
    GLXProcs.CreateImage = mach64GLXCreateImage;
    GLXProcs.DestroyImage = mach64GLXDestroyImage;
    GLXProcs.CreateDepthBuffer = mach64GLXCreateDepthBuffer;
    GLXProcs.MakeCurrent = mach64GLXMakeCurrent;
    GLXProcs.BindBuffer = mach64GLXBindBuffer;
    GLXProcs.SwapBuffers = mach64GLXSwapBuffers;
#if 0 /* FIXME: GH */
    GLXProcs.VendorPrivate = mach64GLXVendorPrivate;
    GLXProcs.AllowDirect = mach64GLXAllowDirect;
#endif

    if ( !__glx_is_server ) {
#if 0 /* FIXME: GH */
	GLXProcs.ValidateFrontBuffer = mach64ClientGetGeometry;
#endif
    }

    /* these vars can be changed between invocations of direct clients */
    if ( getenv("GLX_MACH64_NULLPRIMS") ) {
	mach64Msg( 1, "enabling GLX_MACH64_NULLPRIMS\n" );
	mach64glx.nullprims = 1;
    }
    if ( getenv("GLX_MACH64_SKIPDMA") ) {
	mach64Msg( 1, "enabling GLX_MACH64_SKIPDMA\n" );
	mach64glx.skipDma = 1;
    }
    if ( getenv("GLX_MACH64_BOXES") ) {
	mach64Msg( 1, "enabling GLX_MACH64_BOXES\n" );
	mach64glx.boxes = 1;
    }
    if ( getenv("GLX_MACH64_NOFALLBACK") ) {
	mach64Msg( 1, "enabling GLX_MACH64_NOFALLBACK\n" );
	mach64glx.noFallback = 1;
    }

    	/* see if we can draw our basic primitives */
	mach64DrawTest2();

    mach64Error( "mach64InitGLX completed\n" );
    return GL_TRUE;
}


/*
 * Local Variables:
 * mode: c
 * c-basic-offset: 4
 * End:
 */
