/* $Id: mach64dma.h,v 1.5 1999/12/11 20:59:49 gareth Exp $ */

/*
 * GLX Hardware Device Driver for ATI Rage Pro
 * Copyright (C) 1999 Gareth Hughes
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
 * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * Based on MGA driver: mgadma.h ???
 *
 *    Gareth Hughes <garethh@bell-labs.com>
 */

#ifndef __MACH64_DMA_H__
#define __MACH64_DMA_H__

/*
 * The Rage Pro DMA stream operates in 4K chunks of dword pairs, in the
 * following format:
 *
 * ADDR (in MM offset format)
 * DATA
 * ADDR (in MM offset format)
 * DATA
 * ADDR (in MM offset format)
 * DATA
 * ....
 *
 * The data is accessed through descriptor tables, with each descriptor
 * entry consisting of 4 dwords:
 *
 * BM_FRAME_BUF_OFFSET	- Frame buffer offset for data transfer
 * BM_SYSTEM_MEM_ADDR	- Physical system memory address for data transfer
 * BM_COMMAND		- Count of bytes to transfer (4K max) + flags
 * RESERVED		- Always set to 0
 *
 * For more info, see the mach64 Programmer's Guide, section 8.10
 */

/* hardware registers and dma buffers need to be in little endien format */
unsigned _SWAP(unsigned a);
#ifdef __PPC__
#define SWAP(a) _SWAP(a)
#else
#define SWAP(a) a
#endif


/*
 * Not sure how this compares with the G200, but the Rage Pro has two
 * banks of registers, with bank 0 at (aperture base + memmap offset - 1KB)
 * and bank 1 at (aperture base + memmap offset - 2KB).  But, to send them
 * via DMA, we need to encode them as memory map select rather than physical
 * offsets.
 */
#define DWMREG0		0x0400
#define DWMREG0_END	0x07ff
#define DWMREG1		0x0000
#define DWMREG1_END	0x03ff

#define ISREG0(r)	( ( (r) >= DWMREG0 ) && ( (r) <= DWMREG0_END ) )
#define ADRINDEX0(r)	( (r - DWMREG0) >> 2 )
#define ADRINDEX1(r)	( ( (r - DWMREG1) >> 2 ) | 0x0100 )
#define ADRINDEX(r)	( ISREG0(r) ? ADRINDEX0(r) : ADRINDEX1(r) )

#define MMREG0		0x0000
#define MMREG0_END	0x00ff

#define ISMMREG0(r)	( ( (r) >= MMREG0 ) && ( (r) <= MMREG0_END ) )
#define MMSELECT0(r)	( ((r)<<2) + DWMREG0 )
#define MMSELECT1(r)	( ( (((r) & 0xff)<<2) + DWMREG1 ) )
#define MMSELECT(r)	( ISMMREG0(r) ? MMSELECT0(r) : MMSELECT1(r) )

#define DMA_FRAME_BUF_OFFSET	0
#define DMA_SYS_MEM_ADDR	1
#define DMA_COMMAND		2
#define DMA_RESERVED		3

/* the main initialization of the entire mach64 hardware driver */
GLboolean mach64InitGLX( void );

/* a flush command will guarantee that all data added to the dma buffer
is on its way to the card, and will eventually complete with no more
intervention.  If running with pseudo dma, this will be the same as a finish
call, but if async dma is active then the card will be executing the commands
while the cpu is doing other work.  A protected memory region keeps X server
interaction with the hardware registers safe. */
void mach64DmaFlush( void );

/* the overflow function is called when a block can't be allocated
in the current dma buffer.  It flushes the current buffer and
records some information */
void mach64DmaOverflow( int newDwords );

/* a finish command will guarantee that all dma commands have actually
been consumed by the card, and that the engine is completely idle.  It
is safe to do software rendering after this returns */
void mach64DmaFinish( void );

/* for routines that need to add a variable stream of register writes, use
the DMAGETPTR() / DMAOUTREG() / DMAADVANCE() interface.  Call
mach64DmaGetPtr() with a length in dwords that is guaranteed to be greater
than what you will be writing.  Using a large value, like 100, does not
waste anything.  Program all the registers you need with DMAOUTREG(),
When you have finished,call DMAADVANCE(), which will add any necessary padding
and commit the data to the dma buffer

DMALOCALS must be included at the top of all functions that use these
macros to declare temporary variables.

*/

typedef struct _dma_buffer
{
    mach64UI32	physicalTable;		/* descriptor table */
    mach64UI32 	*virtualTable;
    mach64UI32	physicalBuffer;
    mach64UI32 	*virtualBuffer;

    mach64UI32	tableDwords;		/* amount currently allocated */
    mach64UI32	maxTableDwords;
    mach64UI32	bufferDwords;		/* amount currently allocated */
    mach64UI32	overflowBufferDwords;
    mach64UI32	maxBufferDwords;
} mach64Dma_buffer;

/* cardHeap is the 8 / 16 / 32 megs of memory on the video card */
extern	memHeap_t	*cardHeap;

/* sysmemHeap is system memory we have been given after the area used
   by the kernel */
extern	memHeap_t	*sysmemHeap;
extern	mach64UI32	sysmemPhysical;	/* 0 if we don't have a physical mapping */
extern	unsigned char	*sysmemVirtual;

/* textureHeap will point to either cardHeap or sysmemHeap */
extern	memHeap_t	*textureHeap;
extern	mach64UI32	textureHeapPhysical;	/* 0 if we aren't using PCI texturing */
extern	unsigned char	*textureHeapVirtual;	/* correct for either local or PCI heaps */

extern	mach64Dma_buffer	*dma_buffer;

/* These are only required by mach64direct.c:
 */
extern mach64UI32	mach64ActiveDmaBuffer;
extern void (*mach64DoDmaFlush)( int );
extern mach64Dma_buffer	*dmaBuffers[2];

extern int mach64WaitForDmaCompletion( void );
extern void mach64DmaResetBuffer( void );

#define DMALOCALS	int outcount; mach64UI32 *dma_ptr

#define DMAGETPTR( length )						\
    if ( (dma_buffer->overflowBufferDwords - dma_buffer->bufferDwords) < length )\
	mach64DmaOverflow( length );					\
    dma_ptr = (dma_buffer->virtualBuffer + dma_buffer->bufferDwords);	\
    outcount = 0;

#define DMAOUTREG( reg, val )						\
    do {								\
	dma_ptr[ outcount++ ] = ADRINDEX( reg );			\
	dma_ptr[ outcount++ ] = val;					\
    } while ( 0 )

#define DMAOUTFLOAT( reg, val )						\
    do {								\
	dma_ptr[ outcount++ ] = ADRINDEX( reg );			\
	*(float *)&dma_ptr[ outcount++ ] = val;				\
    } while ( 0 )


#define DMAADVANCE() dma_buffer->bufferDwords += outcount;


#endif
