
#include <stdio.h>

#include "types.h"
#include "enums.h"
#include "cva.h"
#include "vertices.h"

#include "mmath.h"
#include "mgalib.h"
#include "mgarender.h"
#include "mgastate.h"
#include "mgadd.h"
#include "mgavb.h"
#include "xsmesaP.h"

#if defined(USE_MMX_ASM)
#include "mmx.h"
#endif

#include "X86/common_x86asm.h"


extern void mgaDDResizeVB( struct vertex_buffer *VB, GLuint size );

extern void gl_fast_copy_vb( struct vertex_buffer *VB );
  
struct mga_fast_tab {
   void (*build_vertices)( struct vertex_buffer *VB, GLuint do_cliptest );
   void (*interp)( GLfloat t, GLfloat *O, const GLfloat *I, const GLfloat *J );
};





#define NEGATIVE(f)          (f < 0)
#define DIFFERENT_SIGNS(a,b) ((a*b) < 0)
#define LINTERP( T, A, B )   ( (A) + (T) * ( (B) - (A) ) )


#if 1
#define INTERP_RGBA(t, out, a, b) {			\
   int i;						\
   for (i = 0; i < 4; i++) {				\
      GLfloat fa = UBYTE_COLOR_TO_FLOAT_COLOR(a[i]);	\
      GLfloat fb = UBYTE_COLOR_TO_FLOAT_COLOR(b[i]);	\
      GLfloat fo = LINTERP(t, fa, fb);			\
      FLOAT_COLOR_TO_UBYTE_COLOR(out[i], fo);		\
   }							\
}
#else
/* Need to check for possible over/underflow cases and verify if
 * there is a real speedup.
 */
#define INTERP_RGBA(t, out, a, b) {				\
   int n;							\
   const GLuint ti = FloatToInt(t*256.0F);			\
   const GLubyte *Ib = (const GLubyte *)&a[0];			\
   const GLubyte *Jb = (const GLubyte *)&b[0];			\
   GLubyte *Ob = (GLubyte *)&out[0];				\
								\
   for (n = 0 ; n < 4 ; n++)					\
      Ob[n] = (GLubyte) (Ib[n] + ((ti * (Jb[n] - Ib[n]))/256));	\
}
#endif


#define CLIP(SGN,V,PLANE)					\
if (mask & PLANE) {						\
   GLuint *indata = inlist[in];					\
   GLuint *outdata = inlist[in ^= 1];				\
   GLuint nr = n;						\
   GLfloat *J = verts[indata[nr-1]].f;			\
   GLfloat dpJ = (SGN J[V]) + J[3];				\
								\
   inlist[0] = vlist1;						\
   for (i = n = 0 ; i < nr ; i++) {				\
      GLuint elt_i = indata[i];					\
      GLfloat *I = verts[elt_i].f;				\
      GLfloat dpI = (SGN I[V]) + I[3];				\
								\
      if (DIFFERENT_SIGNS(dpI, dpJ)) {				\
								\
	 GLfloat *O = verts[next_vert].f;			\
	 GLfloat t, *in, *out;					\
								\
	 if (NEGATIVE(dpI)) {					\
	     t = dpI / (dpI - dpJ);				\
	     in = I;						\
	     out = J;						\
	 }							\
         else							\
	 {							\
	    t = dpJ / (dpJ - dpI);				\
	    in = J;						\
	    out = I;						\
	 }							\
								\
	 interp(t, O, in, out);					\
								\
	 clipmask[next_vert] = 0;				\
	 outdata[n++] = next_vert++;				\
      }								\
								\
      clipmask[elt_i] |= PLANE;		/* don't set up */	\
								\
      if (!NEGATIVE(dpI)) {					\
	 outdata[n++] = elt_i;					\
	 clipmask[elt_i] &= ~PLANE;	/* set up after all */	\
      }								\
								\
      J = I;							\
      dpJ = dpI;						\
   }								\
								\
   if (n < 3) return;						\
}

#define LINE_CLIP(x,y,z,w,PLANE)		\
if (mask & PLANE) {				\
   GLfloat dpI = DOT4V(I,x,y,z,w);		\
   GLfloat dpJ = DOT4V(J,x,y,z,w);		\
						\
   if (DIFFERENT_SIGNS(dpI, dpJ)) {		\
      GLfloat *O = verts[next_vert].f;	\
      GLfloat t = dpI / (dpI - dpJ);		\
						\
      interp(t, O, I, J);			\
						\
      clipmask[next_vert] = 0;			\
						\
      if (NEGATIVE(dpI)) {			\
  	 clipmask[elts[0]] |= PLANE;		\
	 I = O; elts[0] = next_vert++;		\
      } else {					\
  	 clipmask[elts[1]] |= PLANE;		\
	 J = O;	elts[1] = next_vert++;		\
      }						\
   }						\
   else if (NEGATIVE(dpI))			\
      return;					\
}


static __inline void mga_tri_clip( GLuint **p_elts,
				   mgaVertex *verts,
				   GLubyte *clipmask,
				   GLuint *p_next_vert,
				   GLubyte mask,
				   mga_interp_func interp )
{
   GLuint *elts = *p_elts;
   GLuint next_vert = *p_next_vert;
   GLuint vlist1[VB_MAX_CLIPPED_VERTS];
   GLuint vlist2[VB_MAX_CLIPPED_VERTS];
   GLuint *inlist[2];
   GLuint *out;
   GLuint in = 0;
   GLuint n = 3;
   GLuint i;

   inlist[0] = elts;
   inlist[1] = vlist2;

   CLIP(-,0,CLIP_RIGHT_BIT);
   CLIP(+,0,CLIP_LEFT_BIT);
   CLIP(-,1,CLIP_TOP_BIT);
   CLIP(+,1,CLIP_BOTTOM_BIT);
   CLIP(-,2,CLIP_FAR_BIT);
   CLIP(+,2,CLIP_NEAR_BIT);

   /* Convert the planar polygon to a list of triangles.
    */
   out = inlist[in];
   
   for (i = 2 ; i < n ; i++) {
      elts[0] = out[0];
      elts[1] = out[i-1];
      elts[2] = out[i];
      elts += 3;
   }

   *p_next_vert = next_vert;
   *p_elts = elts;
}


static __inline void mga_line_clip( GLuint **p_elts,
				    mgaVertex *verts,
				    GLubyte *clipmask,
				    GLuint *p_next_vert,
				    GLubyte mask,
				    mga_interp_func interp )
{
   GLuint *elts = *p_elts;
   GLfloat *I = verts[elts[0]].f;
   GLfloat *J = verts[elts[1]].f;
   GLuint next_vert = *p_next_vert;

   LINE_CLIP(1,0,0,-1,CLIP_LEFT_BIT);
   LINE_CLIP(-1,0,0,1,CLIP_RIGHT_BIT);
   LINE_CLIP(0,1,0,-1,CLIP_TOP_BIT);
   LINE_CLIP(0,-1,0,1,CLIP_BOTTOM_BIT);
   LINE_CLIP(0,0,1,-1,CLIP_FAR_BIT);
   LINE_CLIP(0,0,-1,1,CLIP_NEAR_BIT);

   *p_next_vert = next_vert;
   *p_elts += 2;
}





#define CLIP_POINT( e )				\
   if (mask[e])					\
      *out++ = e 

#define CLIP_LINE( e1, e0 )						\
do {									\
   GLubyte ormask = mask[e0] | mask[e1];				\
   out[0] = e1;								\
   out[1] = e0;								\
   out+=2;								\
   if (ormask) {							\
      out-=2;								\
      if (!(mask[e0] & mask[e1])) {				\
	 mga_line_clip( &out, verts, mask, &next_vert, ormask, interp);	\
      }									\
   }									\
} while (0)

#define CLIP_TRIANGLE( e2, e1, e0 )					\
do {									\
   GLubyte ormask;							\
   out[0] = e2;								\
   out[1] = e1;								\
   out[2] = e0;								\
   out += 3;								\
   ormask = mask[e2] | mask[e1] | mask[e0];				\
   if (ormask) {							\
      out -= 3;								\
      if ( !(mask[e2] & mask[e1] & mask[e0])) {			\
	 mga_tri_clip( &out, verts, mask, &next_vert, ormask, interp );	\
      }	\
   }\
} while (0)






/* Build a table of functions to clip each primitive type.  These
 * produce a list of elements in the appropriate 'reduced' primitive,
 * ie (points, lines, triangles) containing all the clipped and
 * unclipped primitives from the original list.
 */
#define LOCAL_VARS						\
   GLuint *elt = VB->EltPtr->data;				\
   mgaVertex *verts = MGA_DRIVER_DATA(VB)->verts;		\
   GLuint next_vert = MGA_DRIVER_DATA(VB)->last_vert;		\
   GLuint *out = MGA_DRIVER_DATA(VB)->clipped_elements.data;	\
   GLubyte *mask = VB->ClipMask;				\
   mga_interp_func interp = mgaCtx->interp;                     \
   (void) interp; (void) verts;

#define POSTFIX							\
   MGA_DRIVER_DATA(VB)->clipped_elements.count =		\
          out - MGA_DRIVER_DATA(VB)->clipped_elements.data;	\
   MGA_DRIVER_DATA(VB)->last_vert = next_vert;		


#define INIT(x)

#define RENDER_POINTS(start, count)			\
do {							\
   GLuint i;						\
   for (i = start ; i < count ; i++ )			\
      CLIP_POINT( elt[i] );				\
} while (0)

#define RENDER_LINE(i1, i0)      			\
   CLIP_LINE(elt[i1], elt[i0])

#define RENDER_TRI(i2, i1, i0, pv, parity)		\
do {							\
   GLuint e2 = elt[i2], e1 = elt[i1], e0 = elt[i0];	\
   if (parity) e2 = elt[i1], e1 = elt[i2];		\
   CLIP_TRIANGLE( e2, e1, e0 );				\
} while (0)

#define RENDER_QUAD(i3, i2, i1, i0, pv )		\
  CLIP_TRIANGLE(elt[i3], elt[i2], elt[i0]);     	\
  CLIP_TRIANGLE(elt[i2], elt[i1], elt[i0])

#define TAG(x) mga_clip_##x##_elt
#include "render_tmp.h"


#define LOCAL_VARS						\
   GLuint next_vert = MGA_DRIVER_DATA(VB)->last_vert;		\
   mgaVertex *verts = MGA_DRIVER_DATA(VB)->verts;		\
   GLuint *out = MGA_DRIVER_DATA(VB)->clipped_elements.data +	\
                 MGA_DRIVER_DATA(VB)->clipped_elements.count;	\
   GLubyte *mask = VB->ClipMask;				\
   mga_interp_func interp = mgaCtx->interp;			\
   (void) interp; (void) verts;

#define POSTFIX							\
   MGA_DRIVER_DATA(VB)->clipped_elements.count =		\
          out - MGA_DRIVER_DATA(VB)->clipped_elements.data;	\
   MGA_DRIVER_DATA(VB)->last_vert = next_vert;		



#define INIT(x)

#define RENDER_POINTS(start, count)		\
do {						\
   GLuint i;					\
   for (i = start ; i < count ; i++ )		\
      CLIP_POINT( i );				\
} while (0)

#define RENDER_LINE(i1, i0)			\
   CLIP_LINE(i1, i0)

#define RENDER_TRI(i2, i1, i0, pv, parity)	\
do {						\
   GLuint e2 = i2, e1 = i1, e0 = i0;		\
   if (parity) e2 = i1, e1 = i2;		\
   CLIP_TRIANGLE( e2, e1, e0 );			\
} while (0)

#define RENDER_QUAD(i3, i2, i1, i0, pv )	\
  CLIP_TRIANGLE(i3, i2, i0);			\
  CLIP_TRIANGLE(i2, i1, i0)

#define TAG(x) mga_clip_##x##_immediate
#include "render_tmp.h"


static int elt_factor[GL_POLYGON+1] = {
   0,
   0,
   0,
   0,
   1,				/* tris */
   2,
   2,
   1,				/* quads */
   2,
   2,
};  

static void mga_alloc_vert_elt_buf( struct vertex_buffer *VB )
{
   mgaVertexBufferPtr mgaVB = MGA_DRIVER_DATA(VB);
   GLcontext *ctx = VB->ctx;
   GLuint vertsize = MGA_CONTEXT(ctx)->vertsize;

   /* Allocate space for vertices and elements (these must be located
    * in the same dma buffer).  This is only possible for triangle
    * primitives (tris, fans, strips, quads and polys).
    */

   if (gl_reduce_prim[ctx->CVA.elt_mode] == GL_TRIANGLES)
   {
      int factor = elt_factor[ctx->CVA.elt_mode];
      int sz = vertsize;

      if (sz == 10) sz = 12;

      mgaVB->vert_buf = mgaAllocSetupBuffer( mgaVB->last_vert * sz +
					     VB->EltPtr->count * factor + 4,
					     &mgaVB->vert_phys_start );
   }
      
   
   /* TODO: don't send clipped verts to the card...  
    *
    * TODO: can we avoid building the clip-space vertices?  Or at
    *       least avoid writing the device-space coordinates over the
    *       clip-space ones, thus allowing better reuse of the verts
    *       in CVA mode?
    */
   if (mgaVB->vert_buf) {
      int i;
      hwUI32 *buf = mgaVB->vert_buf, *buf4;
      mgaVertexPtr vert = mgaVB->verts;
#if defined(USE_MMX_ASM)
		/* gprof showed impovement... */
		mmx_t *buf2, *buf3 = (mmx_t *) buf;

		/* runtime detection of MMX */
		if (gl_x86_cpu_features & GL_CPU_MMX) { 
        switch (vertsize) {
          case 10:
	 			for (i = 0 ; i < mgaVB->last_vert ; i++) {
		 			buf2 = (mmx_t *)&vert[i];
					MOVE_10DWORDS_MMX(buf2, buf3);
         		/* Vertices must be 16-byte (4-dword) aligned.  
       			*/
		 			buf3+=6;
	 			}
	 			break;
      	 case 8:
	 			for (i = 0 ; i < mgaVB->last_vert ; i++) {
		 			buf2 = (mmx_t *)&vert[i];
					MOVE_8DWORDS_MMX(buf2, buf3);
					buf3+=4;
	 			}
	 			break;
      	 default:
	 			FatalError("Bad vertex size");
        }
		  emms();
		  buf = (hwUI32 *) buf3;
		}
		else 
#endif
        switch (vertsize) {
          case 10:
         	/* Vertices must be 16-byte (4-dword) aligned.  
       		*/
	 			for (i = 0 ; i < mgaVB->last_vert ; i++, buf+=2) {
		         buf4 = (hwUI32 *)&vert[i];
				   *buf++ = *buf4++;
					*buf++ = *buf4++;
					*buf++ = *buf4++;
					*buf++ = *buf4++;
					*buf++ = *buf4++;
					*buf++ = *buf4++;
					*buf++ = *buf4++;
					*buf++ = *buf4++;
					*buf++ = *buf4++;
					*buf++ = *buf4++;
				 }
				 break;
			  case 8:
			    for (i = 0 ; i < mgaVB->last_vert ; i++) {
			      buf4 = (hwUI32 *)&vert[i];
					*buf++ = *buf4++;
					*buf++ = *buf4++;
					*buf++ = *buf4++;
					*buf++ = *buf4++;
					*buf++ = *buf4++;
					*buf++ = *buf4++;
					*buf++ = *buf4++;
					*buf++ = *buf4++;
				 }
				 break;
      	 default:
	 			FatalError("Bad vertex size");
		  }

      mgaVB->elt_buf = buf;
   }
}

static void mga_project_vertices( struct vertex_buffer *VB )
{
   mgaVertexBufferPtr mgaVB = MGA_DRIVER_DATA(VB);
   GLcontext *ctx = VB->ctx;
   GLfloat *m = MGA_CONTEXT(ctx)->map;
   GLmatrix *mat = &ctx->Viewport.WindowMap;

   m[MAT_SX] =   mat->m[MAT_SX];
   m[MAT_TX] =   mat->m[MAT_TX] - .5;
   m[MAT_SY] = (- mat->m[MAT_SY]);
   m[MAT_TY] = (- mat->m[MAT_TY]) + mgaDB->height - .5;
   m[MAT_SZ] =   mat->m[MAT_SZ] * (1.0 / 0x10000);
   m[MAT_TZ] =   mat->m[MAT_TZ] * (1.0 / 0x10000);

   gl_project_v16( mgaVB->verts[VB->CopyStart].f,
		   mgaVB->verts[mgaVB->last_vert].f,
		   m,
		   16 * 4 );

   mgaVB->vert_buf = 0;
   mgaVB->elt_buf = 0;

   if (!mgaglx.noSetupDma)
      mga_alloc_vert_elt_buf( VB );
}

static void mga_project_clipped_vertices( struct vertex_buffer *VB )
{
   mgaVertexBufferPtr mgaVB = MGA_DRIVER_DATA(VB);
   GLcontext *ctx = VB->ctx;
   GLfloat *m = MGA_CONTEXT(ctx)->map;
   GLmatrix *mat = &ctx->Viewport.WindowMap;

   m[MAT_SX] =   mat->m[MAT_SX];
   m[MAT_TX] =   mat->m[MAT_TX] - .5;
   m[MAT_SY] = (- mat->m[MAT_SY]);
   m[MAT_TY] = (- mat->m[MAT_TY]) + mgaDB->height - .5;
   m[MAT_SZ] =   mat->m[MAT_SZ] * (1.0 / 0x10000);
   m[MAT_TZ] =   mat->m[MAT_TZ] * (1.0 / 0x10000);

   gl_project_clipped_v16( mgaVB->verts[VB->CopyStart].f,
			   mgaVB->verts[mgaVB->last_vert].f,
			   m,
			   16 * 4,
			   VB->ClipMask + VB->CopyStart );

   mgaVB->vert_buf = 0;
   mgaVB->elt_buf = 0;

   if (!mgaglx.noSetupDma)
      mga_alloc_vert_elt_buf( VB );
}


/* Pack rgba and/or texture into the remaining half of a 32 byte vertex.
 */
#define CLIP_UBYTE_COLOR  4
#define CLIP_UBYTE_B 0   
#define CLIP_UBYTE_G 1  
#define CLIP_UBYTE_R 2
#define CLIP_UBYTE_A 3   
#define CLIP_S0 6
#define CLIP_T0 7
#define CLIP_S1 8
#define CLIP_T1 9

#define TYPE (0)
#define TAG(x) x
#include "mgafasttmp.h"

#define TYPE (MGA_RGBA_BIT)
#define TAG(x) x##_RGBA
#include "mgafasttmp.h"

#define TYPE (MGA_TEX0_BIT)
#define TAG(x) x##_TMU0
#include "mgafasttmp.h"

#define TYPE (MGA_RGBA_BIT|MGA_TEX0_BIT)
#define TAG(x) x##_RGBA_TMU0
#include "mgafasttmp.h"

#define TYPE (MGA_RGBA_BIT|MGA_TEX0_BIT|MGA_TEX1_BIT)
#define TAG(x) x##_RGBA_TMU0_TMU1
#include "mgafasttmp.h"

/* This one *could* get away with sneaking tmu1 into the color and
 * specular slots, thus fitting inside a cache line.  Would be even
 * better if our warp code allowed us to send the smaller vertex to
 * the card.
 */
#define TYPE (MGA_TEX0_BIT|MGA_TEX1_BIT)
#define TAG(x) x##_TMU0_TMU1
#include "mgafasttmp.h"


/* Very sparsely popluated array - fix the indices.
 */
static struct mga_fast_tab mgaFastTab[0x80];

void mgaDDFastPathInit()
{
   mga_clip_render_init_elt();
   mga_clip_render_init_immediate();

   mga_init_fastpath( &mgaFastTab[0] );
   mga_init_fastpath_RGBA( &mgaFastTab[MGA_RGBA_BIT] );
   mga_init_fastpath_TMU0( &mgaFastTab[MGA_TEX0_BIT] );
   mga_init_fastpath_RGBA_TMU0( &mgaFastTab[MGA_RGBA_BIT|MGA_TEX0_BIT] );
   mga_init_fastpath_TMU0_TMU1( &mgaFastTab[MGA_TEX0_BIT|MGA_TEX1_BIT] );
   mga_init_fastpath_RGBA_TMU0_TMU1( &mgaFastTab[MGA_RGBA_BIT|MGA_TEX0_BIT|
						MGA_TEX1_BIT] );
}

#define VALID_SETUP (MGA_RGBA_BIT|MGA_TEX0_BIT|MGA_TEX1_BIT)


void mgaDDFastPath( struct vertex_buffer *VB )
{
   GLcontext *ctx = VB->ctx;
   GLenum prim = ctx->CVA.elt_mode;
   struct mga_fast_tab *tab = &mgaFastTab[mgaCtx->setupindex & VALID_SETUP];

   GLuint do_cliptest = 1;

/*  ((ctx->Array.NewArrayState & VERT_OBJ_ANY) || */
/*  			 (VB->pipeline->new_state & NEW_MODELVIEW)); */

   gl_prepare_arrays_cva( VB );	                 /* still need this */

   /* Reserve enough space for the pathological case.
    */
   if (VB->EltPtr->count * 12 > MGA_DRIVER_DATA(VB)->size) {
      mgaDDResizeVB( VB, VB->EltPtr->count * 12 );
      do_cliptest = 1;
   }

   tab->build_vertices( VB, do_cliptest );       /* object->clip space */

   if (mgaCtx->new_state)
      mgaDDUpdateHwState( ctx );

   if (VB->ClipOrMask) {
      if (!VB->ClipAndMask) {
	 GLubyte tmp = VB->ClipOrMask;
	 render_func *clip = mga_clip_render_tab_elt;
      
	 mgaCtx->interp = tab->interp;
      
	 clip[prim]( VB, 0, VB->EltPtr->count, 0 ); /* build new elts */

	 ctx->CVA.elt_mode = gl_reduce_prim[prim];
	 VB->EltPtr = &(MGA_DRIVER_DATA(VB)->clipped_elements);

	 mga_project_clipped_vertices( VB );    /* clip->device space */

	 VB->ClipOrMask = 0;
	 mgaDDRenderElementsDirect( VB );        /* render using new list */
	 VB->ClipOrMask = tmp;
      }
   } else {
      mga_project_vertices( VB );               /* clip->device space  */
      mgaDDRenderElementsDirect( VB );           /* render using orig list */
   }
   /* This indicates that there is no cached data to reuse.  
    */
   VB->pipeline->data_valid = 0;
   VB->pipeline->new_state = 0;
}



/* This has a little work to do before it is ready for the big time.
 * It's currently a little buggy and no faster than the normal path.  
 */
void mgaDDImmediateFastPath( struct vertex_buffer *VB )
{
   mgaVertexBufferPtr mvb = MGA_DRIVER_DATA(VB);
   struct mga_fast_tab *tab = &mgaFastTab[mgaCtx->setupindex & VALID_SETUP];
   GLuint start = VB->CopyStart;

   /* Do this in mesa when pipeline requests untransformed vertex
    * copying:
    */
   VB->ObjPtr->start = VEC_ELT(VB->ObjPtr, GLfloat, start);
   VB->TexCoordPtr[0]->start = VEC_ELT(VB->TexCoordPtr[0], GLfloat, start);
   VB->TexCoordPtr[1]->start = VEC_ELT(VB->TexCoordPtr[1], GLfloat, start);
   VB->ColorPtr->start = VEC_ELT(VB->ColorPtr, GLubyte, start);

   gl_fast_copy_vb( VB );
   tab->build_vertices( VB, 1 );  

   if (VB->ClipOrMask) {
      if (!VB->ClipAndMask) {
	 GLubyte tmp = VB->ClipOrMask;
	 render_func *clip = mga_clip_render_tab_immediate;
	 GLuint i, next;
	 GLuint last_prim = gl_reduce_prim[VB->Primitive[VB->CopyStart]];
	 GLuint last_start = VB->CopyStart;
	 GLuint parity = VB->Parity;

	 mgaCtx->interp = tab->interp;
	 mvb->clipped_elements.count = last_start;

	 for (i = VB->CopyStart ; i < VB->Count ; i = next, parity = 0 ) {
	    GLenum prim = VB->Primitive[i];
	    next = VB->NextPrimitive[i];

	    if (gl_reduce_prim[prim] != last_prim) {
	       GLuint current = mvb->clipped_elements.count;

	       mvb->next_primitive[last_start] = current;
	       mvb->primitive[last_start] = last_prim;
	       last_prim = gl_reduce_prim[prim];
	       last_start = current;
	    }
	    
	    clip[prim]( VB, i, next, parity );  /* build element list */ 
	 }

	 mvb->next_primitive[last_start] = mvb->clipped_elements.count;
	 mvb->primitive[last_start] = last_prim;

	 mga_project_clipped_vertices( VB );    /* clip->device space */

	 VB->EltPtr = &mvb->clipped_elements;
	 VB->Primitive = mvb->primitive;
	 VB->NextPrimitive = mvb->next_primitive;
	 VB->ClipOrMask = 0;	 	 
	 VB->Parity = 0;
	 mgaDDRenderElementsImmediate( VB );     /* render using element list */
	 VB->ClipOrMask = tmp;
      }
   } else {
      mga_project_vertices( VB );               /* clip->device space  */
      mgaDDRenderDirect( VB );                   /* render the normal way */
   }

   /* This indicates that there is no cached data to reuse.  
    */
   VB->pipeline->data_valid = 0;
   VB->pipeline->new_state = 0;
}


