#define	LIBQDISPLAY_CORE
#include "../include/libqdisplay.h"

//#define	scalw(x, y)	(x * (1 << y))

unsigned char *waterTransparency;	// 50
unsigned char *slimeTransparency;	// 75
unsigned char *lavaTransparency;	// 90
unsigned char *preTransparency;		// do not calculate in the loop

unsigned char *texture;
int textureRow;
int textureMask1, textureMask2;
short int textureShift1, textureShift2;
short int textureMip;
short int textureType;
static int skyMovementX1 = 0, skyMovementY1 = 0;
static int skyMovementX2 = 0, skyMovementY2 = 0;
static float tmap[9];
#ifndef	FAST_WARP
int swim_u[256], swim_v[256], swim_phase;
#else
int *swim_u, *swim_v, swim_phase;
int swim_u0[WARP_X >> MIPMAP_0], swim_v0[WARP_X >> MIPMAP_0];
int swim_u1[WARP_X >> MIPMAP_1], swim_v1[WARP_X >> MIPMAP_1];
int swim_u2[WARP_X >> MIPMAP_2], swim_v2[WARP_X >> MIPMAP_2];
int swim_u3[WARP_X >> MIPMAP_3], swim_v3[WARP_X >> MIPMAP_3];
int *swim_um[MIPMAP_MAX] = {swim_u0, swim_u1, swim_u2, swim_u3};
int *swim_vm[MIPMAP_MAX] = {swim_v0, swim_v1, swim_v2, swim_v3};
#endif

/*
 * NOTE: subdivision of 16 is a really hard thig, it works most, but you can see sometimes curved textures
 *       if you have some processorpower use 8 instead!
 */
#define SUBDIV_SHIFT	4
#define SUBDIV		(1 << SUBDIV_SHIFT)
#define	SUBDIV_MASK	(SUBDIV - 1)

/*
 * Warping water.
 * 
 * tx = H_AMPL * sin(2.0 * PI * (y / V_PERIOD + t / T_PERIOD));
 * ty = V_AMPL * cos(2.0 * PI * (x / H_PERIOD + t / T_PERIOD));
 * 
 * - amplitudes: H_AMPL and V_AMPL,
 * - repetition periods: H_PERIOD and V_PERIOD,
 * - time repetition period: T_PERIOD.
 * 
 * - Ex: a period(spacing) of 128 and an amplitude of 8:
 * tx = 8.0 * sin(2.0 * PI * y / 128.0);
 * ty = 8.0 * cos(2.0 * PI * x / 128.0);
 */
void update_water(void)
{
  short int i;

  swim_phase++;
#ifndef	FAST_WARP
  for (i = 0; i < 256; ++i) {
    //swim_u[i] = FLOAT_FIX(sin(((i >> 6) + (swim_phase >> 6)) * M_PI * 2.0) * 8.0);
    //swim_v[i] = FLOAT_FIX(cos(((i >> 6) + (swim_phase >> 6)) * M_PI * 2.0) * 8.0);
    //swim_u[i] = scalw(sin(((i + swim_phase) * .09817477)), 19);
    //swim_v[i] = scalw(cos(((i + swim_phase) * .09817477)), 19);
    double val = (i + swim_phase) * .09817477;
    int val_u = scalw(sin(val), 19);					// * 524288;
    int val_v = scalw(cos(val), 19);					// * 524288;
    
    swim_u[i] = val_u;					// mip0
    swim_v[i] = val_v;
  }
#else
  for (i = 0; i < WARP_X; ++i) {
    double val = (i + swim_phase) * .09817477;
    int val_u = scalw(sin(val), 19);					// * 524288;
    int val_v = scalw(cos(val), 19);					// * 524288;
    
    swim_u0[i] = val_u;					// mip0
    swim_v0[i] = val_v;
    if(!(i & ((1 << MIPMAP_1) - 1))) {
      swim_u1[i >> MIPMAP_1] = val_u >> MIPMAP_1;	// mip1
      swim_v1[i >> MIPMAP_1] = val_v >> MIPMAP_1;
      if(!(i & ((1 << MIPMAP_2) - 1))) {
        swim_u2[i >> MIPMAP_2] = val_u >> MIPMAP_2;	// mip2
        swim_v2[i >> MIPMAP_2] = val_v >> MIPMAP_2;
        if(!(i & ((1 << MIPMAP_3) - 1))) {
          swim_u3[i >> MIPMAP_3] = val_u >> MIPMAP_3;	// mip3
          swim_v3[i >> MIPMAP_3] = val_v >> MIPMAP_3;
        }
      }
    }
  }
#endif

  skyMovementX1 += 0x00000100;
  skyMovementY1 += 0x00000001;
  skyMovementX2 += 0x00000200;
  skyMovementY2 += 0x00000002;
}

short int compute_mip_level(__memBase, int face)
{
  // dumb algorithm: grab 3d coordinate of some vertex,
  // compute dist from viewer
  double dist;
  int se = bspMem->dfaces[face].firstedge;
  int e = bspMem->dsurfedges[se];

  if (e < 0)
    e = -e;
  dist = scalw(dist2_from_viewer(&bspMem->dvertexes[bspMem->dedges[e].v[0]].point), -16);	// / 65536;
  if (dist < 1)
    return 0;
  if (dist < 4)
    return 1;
  if (dist < 16)
    return 2;
  return 3;
}

void compute_texture_gradients(__memBase, struct texture *Text, short int mip)
{
  //double tmap_data[9];
  float uu, vv;
  float tmp0, tmp1, tmp2;
  vec3_t P, M, N;

  // project vectors onto face's plane, and transform
  transform_vector(M, Text->textGradient.uv0);
  transform_vector(N, Text->textGradient.uv1);
  transform_point_raw(P, Text->textGradient.scaled);

  uu = Text->textGradient.u;
  vv = Text->textGradient.v;

  // we could just subtract (u,v) every time we compute a new (u,v);
  // instead we fold it into P:
  P[0] += uu * M[0] + vv * N[0];
  P[1] += uu * M[1] + vv * N[1];
  P[2] += uu * M[2] + vv * N[2];

  // offset by Center of screen--if this were folded into
  // transform translation we could avoid it
  tmp2 = N[0] * M[2] - N[2] * M[0];
  tmp1 = N[1] * M[2] - N[2] * M[1];
  tmp0 = N[0] * M[1] - N[1] * M[0];
  tmp0 -= tmp1 * xCenter + tmp2 * yCenter;
  tmap[8] = tmp2;
  tmap[7] = tmp1;
  tmap[6] = tmp0;
  
  tmp2 = P[2] * M[0] - P[0] * M[2];
  tmp1 = P[2] * M[1] - P[1] * M[2];
  tmp0 = P[1] * M[0] - P[0] * M[1];
  tmp0 -= tmp1 * xCenter + tmp2 * yCenter;
  tmap[5] = scalw(tmp2, -mip);
  tmap[4] = scalw(tmp1, -mip);
  tmap[3] = scalw(tmp0, -mip);
  
  tmp2 = P[0] * N[2] - P[2] * N[0];
  tmp1 = P[1] * N[2] - P[2] * N[1];
  tmp0 = P[0] * N[1] - P[1] * N[0];
  tmp0 -= tmp1 * xCenter + tmp2 * yCenter;
  tmap[2] = scalw(tmp2, -mip);
  tmap[1] = scalw(tmp1, -mip);
  tmap[0] = scalw(tmp0, -mip);
}

// draw an affine (linear) span starting at dest, n pixels long,
// starting at (u,v) in the texture and stepping by (du,dv) each pixel
//static void draw_affine(int n, unsigned char *dest, int u, int v, int du, int dv)
/*
 * if we are in liquid, we can calculate the average pixelcolor
 * of the liquid texture (eg. *lava1) and do a transp with this color
 * so we don't need to change the palette, and the accuracity
 * is better
 *
 * we can make the liquid with a falloff if we use the zbuffer,
 * we have the current z-value, and the z-value at that position
 * we sub them and calculate the transparency-level from that
 * (better use every 10th or like that transparency-level: first,
 *  the transparency is not so accurate, that every percent makes
 *  a change, second, all 100% transparency uses 6,5MB cached
 *  tables, both in memory and on disk (horror!))
 * the falloff is calculated from the brightness of the liquid texture
 * and the type, so brighter texture are more transparent than darker
 *
 * the liquid looks more real if the textures after it also warps
 * so we must determine, which textures lies in liquid, probably
 * we can use the same procedure as the liquid itself, maybe
 * we must project the liquids behaviour to the texture (uff)
 *
 * how to determine if a texture lies in liquid? as I know qbsp
 * splits the plaes at every intersection of two polygons, that
 * means we do not need to split the polygon at the liquid-line
 * the disadvatage of the mark-texture-in-liquid is, that while
 * we are in liquid, the textures outside the liquid doesn't warp
 *
 * probably we can do real wave in liquid, for that we do not change
 * the du or dv, but the z-value (upwards) of the polygon in a
 * reproducable caustics manner, the difficulty is, to calculate this
 * values in the very inner loop (draw_affine), thats slow
 *
 * how to avoid this mipmap shiftig and masking in liquid textures:
 * after building the waterblock convert the scanlines from this:
 *
 * +----+ example: memory-block-size is 64*64
 * |****|	   mipmap-size is 32*32
 * |    |	   memory is linear
 * |    |
 * |    |
 * +----+
 *
 * interally handled as this:	to this:
 *				
 * +--+				+----+ we need no shiftig, on masking
 * |**|				|**  | and it is faster, 'cause the 
 * |**|				|**  | conversion could be cached and is
 * +--+				|    | out of the span-draw-inner-loop
 *				|    |
 * 				+----+
 *
 * we can even remove all the shifting, if we put the warp-textures
 * in a 256*64 block, so the offset is 0x0000xxyy (0b000000000000000000xxxxxx00yyyyyy)
 * that is a 16k-block per watertexture, not too much
 *
 * probably it could be faster, if we call a hook defined in the TextureCache
 */
static inline unsigned char *draw_affine(int n, unsigned char *dest, int u, int v, int du, int dv)
{
  if (textureType == WALL_TYPE) {
    while (n--) {
      int iu = u >> 16;
      int iv = ((v >> 8) & 0x0000FF00) + textureRow;

#ifdef CALCULATE_PIXELDRAW
      pixelDraw++;
      if(*dest)
        pixelOverdraw++;
#endif
      
      *dest++ = texture[multMuls[iv] + iu];
      u += du;
      v += dv;
    }
  }
  else if (textureType == SKY_TYPE) {
    /* TODO: skies */
    while (n--) {
      int iu;
      int iv;
      unsigned char pel, sum;
      
#ifdef CALCULATE_PIXELDRAW
      pixelDraw++;
      if(*dest)
        pixelOverdraw++;
#endif

      iu = ((u >> 8) + skyMovementX1) & 0x00007F00;
      iv = ((v >> 16) + skyMovementY1) & 0x0000007F;
      sum = texture[iu + iv + 0x80];
      iu = ((u >> 8) + skyMovementX2) & 0x00007F00;
      iv = ((v >> 16) + skyMovementY2) & 0x0000007F;
      if((pel = texture[iu + iv]))
        sum = pel;
      *dest++ = sum;

      u += du;
      v += dv;
    }
  }
  else {
    while (n--) {
#ifndef	FAST_WARP
      int iv = ((v + (swim_v[((u >> textureShift2) & 0xff)] >> textureMip)) >> 16) & textureMask2;
      int iu = ((u + (swim_u[((v >> textureShift2) & 0xff)] >> textureMip)) >> textureShift1) & textureMask1;
#else
      int iv = ((v + swim_v[(u >> 16)]) >> 16) & textureMask2;
      int iu = ((u + swim_u[(v >> 16)]) >> textureShift1) & textureMask1;
#endif
      *dest++ = pretransp(texture[iu + iv], *dest);
      u += du;
      v += dv;
    }
  }
  return dest;
}

// given a span (x0,y)..(x1,y), draw a perspective-correct span for it
/*
void draw_span(int y, int sx, int ex)
{
  float u0, v0, w0, u1, v1, w1, z;
  int len, e, last = 0;
  int u, v, du, dv;
  int i, j;
  
  float preu = tmap[0] + y * tmap[2];
  float prev = tmap[3] + y * tmap[5];
  float prew = tmap[6] + y * tmap[8];

  // compute (u,v) at left end
  u0 = preu + sx * tmap[1];
  v0 = prev + sx * tmap[4];
  w0 = prew + sx * tmap[7];
  z = (float)((float)(1) / w0);
  u0 = u0 * z;
  v0 = v0 * z;

  for (;;) {
    len = ex - sx;
    if (len > SUBDIV)
      len = SUBDIV;
    else
      last = 1;

    u = FLOAT_TO_FIX(u0);
    v = FLOAT_TO_FIX(v0);

    if (len == 1)
      // shortcut out to avoid divide by 0 below
      du = dv = 0;
    else {
      e = sx + len - last;
      u1 = preu + e * tmap[1];
      v1 = prev + e * tmap[4];
      w1 = prew + e * tmap[7];
      z = (float)((float)(1) / w1);
      u1 = u1 * z;
      v1 = v1 * z;

      if (len == SUBDIV) {
        du = (FLOAT_TO_FIX(u1) - u) >> SUBDIV_SHIFT;
        dv = (FLOAT_TO_FIX(v1) - v) >> SUBDIV_SHIFT;
      }
      else {
        du = FLOAT_TO_FIX((u1 - u0) / (len - last));
        dv = FLOAT_TO_FIX((v1 - v0) / (len - last));
      }
      if (du < 0)
        ++du;
      if (dv < 0)
        ++dv;
    }

    draw_affine(len, localDim.frameBuffer + multRows[y] + sx, u, v, du, dv);
    if (last)
      break;

    sx += len;
    u0 = u1;
    v0 = v1;
  }
}

// sx = 16, ex = 32
sx	len	last	/	e
16	16	1	15	31
// sx = 16, ex = 33
sx	len	last	/	e
16	17	0	*	32
33	0	1	-1	32	...
// sx = 16, ex = 34
sx	len	last	/	e
16	17	0	*	33
33	1	1	*	33	...
//
*/
/*
 * the zbuffer is interesting for dynamic model-draw etc.
 * the buffers values (1/z) are all under 0, we can try to
 * store them as 16bit-wide-fraction
 *
 * while(n--) {
 *   *zbuf++ = (w >> 16);
 *   w += dw;
 * }
 *
 */
void draw_span(int y, int sx, int ex)
{
  float w0, w1;
  float v0, v1;
  float u0, u1;
//int w, dw;						// 1/zbuffer
  int v, dv;
  int u, du;
  int slen, rlen, len, end;

  unsigned char *dest = localDim.frameBuffer + multRows[y] + sx;
  float prew = tmap[6] + y * tmap[8];
  float prev = tmap[3] + y * tmap[5];
  float preu = tmap[0] + y * tmap[2];

  // compute (u,v) at left end
  w0 = 1 / (prew + sx * tmap[7]);			// 1/zbuffer
  v0 = (prev + sx * tmap[4]) * w0;
  u0 = (preu + sx * tmap[1]) * w0;

  len = ex - sx;
  for(slen = len >> SUBDIV_SHIFT; slen > 0 ;slen--) {
  //w = FLOAT_TO_FIX(w0);				// 1/zbuffer
    v = FLOAT_TO_FIX(v0);
    u = FLOAT_TO_FIX(u0);

    end = sx + SUBDIV;
    w1 = 1 / (prew + end * tmap[7]);
    v1 = (prev + end * tmap[4]) * w1;
    u1 = (preu + end * tmap[1]) * w1;

  //dw = (FLOAT_TO_FIX(v1) - w) >> SUBDIV_SHIFT;	// 1/zbuffer
    dv = (FLOAT_TO_FIX(v1) - v) >> SUBDIV_SHIFT;
    du = (FLOAT_TO_FIX(u1) - u) >> SUBDIV_SHIFT;

    dest = draw_affine(SUBDIV, dest, u, v, du, dv);
    sx = end;

  //w0 = w1;						// 1/zbuffer
    v0 = v1;
    u0 = u1;
  }

//w = FLOAT_TO_FIX(w0);					// 1/zbuffer
  v = FLOAT_TO_FIX(v0);
  u = FLOAT_TO_FIX(u0);
  if((rlen = (len & SUBDIV_MASK) - 1)) {		// a) do not calc if only draw 1 pixel
    end = sx + rlen;
    w1 = 1 / (prew + end * tmap[7]);
    v1 = (prev + end * tmap[4]) * w1;
    u1 = (preu + end * tmap[1]) * w1;

  //dw = FLOAT_TO_FIX((w1 - w0) / rlen);		// 1/zbuffer
    dv = FLOAT_TO_FIX((v1 - v0) / rlen);
    du = FLOAT_TO_FIX((u1 - u0) / rlen);
  }
							// a) but draw that pixel surely
  draw_affine(rlen + 1, dest, u, v, du, dv);		// for the last pixel the du and dv are thrown away
}
/*
// sx = 16, ex = 32
sx	len	slen	rlen	e
16	15		15	31
// sx = 16, ex = 33
sx	len	slen	rlen	e
16	16	1		32
// sx = 16, ex = 34
sx	len	slen	rlen	e
16	17	1		32
32			1	33	...
//
*/
