#include <tamtypes.h>
#include <string.h>
#include "math.h"

	/*
	const float fov = 45.0f;
	const float aspect = (float)SCREEN_HEIGHT / (float)SCREEN_WIDTH;
	float tanfov = (float)tan((fov/2)/180*M_PI);
	xscale = (float)((SCREEN_WIDTH  / 2) / tanfov);
	yscale = (float)((SCREEN_HEIGHT / 2) / aspect / tanfov);
	*/

#define xscale 772.56572644933608776932464478663
#define yscale 463.53943586960165266159478687198

float math_sinetable[math_N][4];
float __attribute__((aligned(16))) math_projection_constants[] = { xscale/2, yscale/2, 1000, 0,
																   32000/16+320, 32000/16+128, 0, 0 };

void math_init()
{
	int i;
	float k=(2*M_PI/math_N);
	
	for(i=0;i<math_N;i++)
	{
		float a0=i*(2*M_PI/math_N);
		float a1=(i+1)*(2*M_PI/math_N);
		float v0,d0,v1,d1;
		
		v0=sin(a0);
		d0=cos(a0);
		v1=sin(a1);
		d1=cos(a1);

		math_sinetable[i][0]=v0;
		math_sinetable[i][1]=d0;
		math_sinetable[i][2]=(3*(v1-v0)-k*(d1+2*d0))/(k*k);
		math_sinetable[i][3]=(k*(d1+d0)-2*(v1-v0))/(k*k*k);
	}
}

void math_project_vertices(Vector2d* dst, Vector3d* src, int count)
{
/*
	while(count--)
	{
		dst->x = (int)((32000+(512/2)*16) + 16*xscale * src->x / src->z);
		dst->y = (int)((32000+(256/2)*16) + 16*yscale * src->y / src->z);
		dst++;
		src++;
	}
*/
#if 0
	while(count--)
	{
		float f= 2.0f / src->z;
		dst->x = (s32)((src->x*math_projection_constants[0]*f+math_projection_constants[4])*16);
		dst->y = (s32)((src->y*math_projection_constants[1]*f+math_projection_constants[5])*16);
		dst->z = (s32)((src->z * math_projection_constants[2])*16);
		dst++;
		src++;
	}
#else
	__asm__ __volatile__ (
		"lqc2		$vf10, 0(%3)						\n"
		"lqc2		$vf11, 16(%3)						\n"
		"move		$8, %0								\n"
		"move		$9, %1								\n"
		"move		$10, %2								\n"
	"math_project_vertices.inner:						\n"
		"lqc2		$vf8, 0($8)							\n"
	    "vrsqrt 	Q, $vf0w, $vf8z						\n" // ret mig til DIV FFS!
	    "vmul.xyz	$vf8xyz, $vf8xyz, $vf10xyz			\n"
	    "vwaitq											\n"
	    "vmulq.xy 	$vf8xy, $vf8xy, Q					\n"
	    "vadd.xyz	$vf8xyz, $vf8xyz, $vf11xyz			\n"
		"vftoi4.xyz	$vf8xyz, $vf8xyz					\n"
		"sqc2		$vf8, 0($9)							\n"
	    "addiu 		$8, 16								\n"
	    "addiu 		$9, 16								\n"
		"addiu		$10, -1								\n"
		"bnez		$10, math_project_vertices.inner	\n"
	 :: "r" (src), "r" (dst), "r" (count), "r" (math_projection_constants));
#endif
}


void math_transform_vertices(Vector3d* dst, Vector3d* src, Matrix* m, int count)
{
#if 0
	Vector3d v;
	while(count--)
	{
		v.x=m->xu*src->x+m->xv*src->y+m->xw*src->z+m->xx;
		v.y=m->yu*src->x+m->yv*src->y+m->yw*src->z+m->yy;
		v.z=m->zu*src->x+m->zv*src->y+m->zw*src->z+m->zz;
		*dst = v;
		dst++;
		src++;
	}
#else
  __asm__ __volatile__(
		"lqc2		vf8, 0x0(%2)\n"
		"lqc2		vf9, 0x10(%2)\n"
		"lqc2		vf10, 0x20(%2)\n"
		"lqc2		vf11, 0x30(%2)\n"
  		"move		$8, %0\n"
  		"move		$9, %1\n"
  		"move		$10, %3\n"
	"math_transform_vertices.inner:\n"
		"lqc2		vf4, 0x0($8)\n"
		"vmulax.xyz	ACC,vf8,vf4x\n"
		"vmadday.xyz	ACC,vf9,vf4y\n"
		"vmaddz.xyz	vf5,vf10,vf4z\n"
		"vadd		vf5,vf11,vf5\n"
		"sqc2		vf5,0x0($9)\n"
	    "addiu 		$8, 0x10\n"
	    "addiu 		$9, 0x10\n"
	    "addiu 		$10, -1\n"
	    "bnez 		$10, math_transform_vertices.inner\n"
		 :: "r" (src),"r" (dst),"r" (m), "r" (count));	
#endif
}

void math_matrix_identity(Matrix *m)
{
    m->xu=m->yv=m->zw=1.0f;
	m->xv=m->xw=m->xx=
	m->yu=m->yw=m->yy=
	m->zu=m->zv=m->zz=0.0f;
}

void math_matrix_rotatex(Matrix* m, float a)
{
	float s,c;
	float t;

	s=sin(a);
	c=cos(a);
	t=m->yu;
	m->yu=t*c-m->zu*s;
	m->zu=t*s+m->zu*c;
	t=m->yv;
	m->yv=t*c-m->zv*s;
	m->zv=t*s+m->zv*c;
	t=m->yw;
	m->yw=t*c-m->zw*s;
	m->zw=t*s+m->zw*c;
	t=m->yy;
	m->yy=t*c-m->zz*s;
	m->zz=t*s+m->zz*c;
}

void math_matrix_rotatey(Matrix* m, float a)
{
	float s,c;
	float t;

	s=sin(a);
	c=cos(a);
	t=m->zu;
	m->zu=t*c-m->xu*s;
	m->xu=t*s+m->xu*c;
	t=m->zv;
	m->zv=t*c-m->xv*s;
	m->xv=t*s+m->xv*c;
	t=m->zw;
	m->zw=t*c-m->xw*s;
	m->xw=t*s+m->xw*c;
	t=m->zz;
	m->zz=t*c-m->xx*s;
	m->xx=t*s+m->xx*c;
}

void math_matrix_rotatez(Matrix* m, float a)
{
	float s,c;
	float t;

	s=sin(a);
	c=cos(a);
	t=m->xu;
	m->xu=t*c-m->yu*s;
	m->yu=t*s+m->yu*c;
	t=m->xv;
	m->xv=t*c-m->yv*s;
	m->yv=t*s+m->yv*c;
	t=m->xw;
	m->xw=t*c-m->yw*s;
	m->yw=t*s+m->yw*c;
	t=m->xx;
	m->xx=t*c-m->yy*s;
	m->yy=t*s+m->yy*c;
}

void math_matrix_move(Matrix *m, float x, float y, float z)
{
	m->xx += x;
	m->yy += y;
	m->zz += z;
}

void math_matrix_lookat(Matrix *m, Vector3d* pos, Vector3d* target)
{
	Vector3d u,v={0,1,0,0},w;

	math_matrix_identity(m);
	
	w.x=target->x - pos->x;
	w.y=target->y - pos->y;
	w.z=target->z - pos->z;
	
	math_vector_cross(&u, &w, &v);
	math_vector_cross(&v, &w, &u);
	math_vector_normalize(&u, &u);
	math_vector_normalize(&v, &v);
	math_vector_normalize(&w, &w);

	m->xu=u.x;
	m->yu=u.y;
	m->zu=u.z;
	m->xv=v.x;
	m->yv=v.y;
	m->zv=v.z;
	m->xw=w.x;
	m->yw=w.y;
	m->zw=w.z;

	math_matrix_move(m, pos->x, pos->y, pos->z);	
}

void math_matrix_multiply(Matrix *dst, Matrix* a, Matrix* b)
{
	Matrix temp;

	temp.xu=b->xu*a->xu+b->xv*a->yu+b->xw*a->zu;
	temp.xv=b->xu*a->xv+b->xv*a->yv+b->xw*a->zv;
	temp.xw=b->xu*a->xw+b->xv*a->yw+b->xw*a->zw;
	temp.xx=b->xu*a->xx+b->xv*a->yy+b->xw*a->zz+b->xx;

	temp.yu=b->yu*a->xu+b->yv*a->yu+b->yw*a->zu;
	temp.yv=b->yu*a->xv+b->yv*a->yv+b->yw*a->zv;
	temp.yw=b->yu*a->xw+b->yv*a->yw+b->yw*a->zw;
	temp.yy=b->yu*a->xx+b->yv*a->yy+b->yw*a->zz+b->yy;

	temp.zu=b->zu*a->xu+b->zv*a->yu+b->zw*a->zu;
	temp.zv=b->zu*a->xv+b->zv*a->yv+b->zw*a->zv;
	temp.zw=b->zu*a->xw+b->zv*a->yw+b->zw*a->zw;
	temp.zz=b->zu*a->xx+b->zv*a->yy+b->zw*a->zz+b->zz;
	memcpy(dst,&temp,sizeof(Matrix));
}

void math_vector_cross(Vector3d* dst, Vector3d* a, Vector3d* b)
{
	__asm__ __volatile__(
		"lqc2		$vf4, 0(%0)\n"
		"lqc2		$vf5, 0(%1)\n"

		"vopmula.xyz ACCxyz, $vf4xyz, $vf5xyz\n"
		"vopmsub.xyz $vf6xyz, $vf5xyz, $vf4xyz\n"				

		"sqc2 		$vf6, 0(%2)\n"
		:: "r" (a),"r" (b),"r" (dst));	
}

void math_vector_calc_normal(Vector3d* dst, Vector3d* a, Vector3d* b, Vector3d* c)
{
/*	float ax = c->x-a->x;
	float ay = c->y-a->y;
	float az = c->z-a->z;
	float bx = c->x-b->x;
	float by = c->y-b->y;
	float bz = c->z-b->z;	
    dst->x=ay*bz-az*by;
    dst->y=az*bx-ax*bz;
	dst->z=ax*by-ay*bx;	
	math_vector_normalize(dst,dst); */

  __asm__ __volatile__(
		"lqc2		$vf4, 0(%0)\n"
		"lqc2		$vf5, 0(%1)\n"
		"lqc2		$vf6, 0(%2)\n"

		"vsub.xyz	$vf4xyz,$vf6xyz,$vf4xyz\n"
		"vsub.xyz	$vf5xyz,$vf6xyz,$vf5xyz\n"

	    "vopmula.xyz ACCxyz, $vf4xyz, $vf5xyz\n"
	    "vopmsub.xyz $vf6xyz, $vf5xyz, $vf4xyz\n"				
		
	    "vmul.xyz 	$vf4xyz, $vf6xyz, $vf6xyz\n"
	    "vaddy.x 	$vf4x, $vf4x, $vf4y\n"
	    "vaddz.x 	$vf4x, $vf4x, $vf4z\n"
	    "vrsqrt 		Q, $vf0w, $vf4x\n"
	    "vwaitq\n"
	    "vmulq.xyz 	$vf6xyz, $vf6xyz, Q\n"
		
	    "sqc2 		$vf6, 0(%3)\n"
	 :: "r" (a),"r" (b),"r" (c),"r" (dst));	
}

void math_vector_normalize(Vector3d* dst, Vector3d* src)
{
	/*float len;
	len = 1.0f/sqrt(src->x*src->x+src->y*src->y+src->z*src->z);
	dst->x=src->x*len;
	dst->y=src->y*len;
	dst->z=src->z*len;	
	*/
	
  __asm__ __volatile__(
		"lqc2		vf6, 0(%0)\n"
	    "vmul.xyz 	$vf4xyz, $vf6xyz, $vf6xyz\n"
	    "vaddy.x 	$vf4x, $vf4x, $vf4y\n"
	    "vaddz.x 	$vf4x, $vf4x, $vf4z\n"
	    "vrsqrt 		Q, $vf0w, $vf4x\n"
	    "vwaitq\n"
	    "vmulq.xyz 	$vf6xyz, $vf6xyz, Q\n"
	    "sqc2 $vf6, 0(%1)\n"
	 :: "r" (src),"r" (dst));	
}

float math_calc_light(Vector3d* n, Vector3d* pos, Vector3d* lightpos)
{
/*	Vector3d lv;
	float f;
	lv.x = lightpos->x-pos->x;
	lv.y = lightpos->y-pos->y;
	lv.z = lightpos->z-pos->z;
	math_vector_normalize(&lv,&lv);
	f = lv.x*n->x+lv.y*n->y+lv.z*n->z; */

	Vector3d lv;
	
	  __asm__ __volatile__(
		"lqc2		$vf5, 0(%1)\n"
		"lqc2		$vf6, 0(%2)\n"
		"lqc2		$vf7, 0(%0)\n"
		"vsub.xyz	$vf6, $vf6, $vf5\n"

	    "vmul.xyz 	$vf4xyz, $vf6xyz, $vf6xyz\n"
	    "vaddy.x 	$vf4x, $vf4x, $vf4y\n"
	    "vaddz.x 	$vf4x, $vf4x, $vf4z\n"
	    "vrsqrt 		Q, $vf0w, $vf4x\n"
	    "vwaitq\n"
	    "vmulq.xyz 	$vf6xyz, $vf6xyz, Q\n"

		"vmul.xyz 	$vf4xyz, $vf6xyz, $vf7xyz\n"
	    "vaddy.x 	$vf4x, $vf4x, $vf4y\n"
	    "vaddz.x 	$vf4x, $vf4x, $vf4z\n"
	    "vabs.x		$vf4x, $vf4x\n"

	    "sqc2 $vf4, 0(%3)\n"
		
	 :: "r" (n), "r" (pos), "r" (lightpos), "r" (&lv));
	
	return lv.x;
}

void math_vector_mul(Vector3d* dst, Vector3d* a, Vector3d* b)
{
  __asm__ __volatile__(
		"lqc2		$vf4, 0(%0)\n"
		"lqc2		$vf5, 0(%1)\n"

		"vmul.xyzw	$vf6xyzw,$vf4xyzw,$vf5xyzw\n"
		
	    "sqc2 		$vf6, 0(%2)\n"
	 :: "r" (a),"r" (b),"r" (dst) );	
}
