/*
 *
 *   qrash: the second portable demo in the world
 *
 *   Copyright (C) 1997  Queue Members Group Art Division
 *   Coded by Mad Max / Queue Members Group (Mike Shirobokov)
 *   <mad_max@qmg.rising.ru>
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 * 
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 */
#include <string.h>
#include <math.h>
#include "video.h"
#include "misc.h"
#include "asm-defs.h"

bool vidInitialized = false;

int vidSizeX=320, vidSizeY=240, vidPageSize, vidPageLength, vidBytesPerLine,
    vidMaxPage, vidHiResMode, vidBlackColor,
    vidFrameCount=0, vidStartTime= -1;

vidRGB vidCurrentPalette[256];

#ifdef __WATCOMC__

static int n;

// eax - c1, ebx - b1, ecx - c, edi - b, esi - x, ebp - x+vidSizeX/2
void
comb1( uchar* c1, uchar* b1, uchar* c, uchar* b, int n );
#pragma aux comb1 =\
"push ebp"\
"mov n,esi"\
"mov ebp,esi"\
"xor esi,esi"\
"_loop:"\
"mov dl,[ecx+esi*2]"\
"mov dl,[eax+esi]"\
"mov dh,[eax+ebp]"\
"mov [ecx+esi*2],dx"\
"mov dl,[ebx+esi]"\
"mov dh,[ebx+ebp]"\
"mov [edi+esi*2],dx"\
"inc esi"\
"inc ebp"\
"mov dl,[eax+esi]"\
"mov dh,[eax+ebp]"\
"mov [ecx+esi*2],dx"\
"mov dl,[ebx+esi]"\
"mov dh,[ebx+ebp]"\
"mov [edi+esi*2],dx"\
"inc esi"\
"inc ebp"\
"cmp esi,n"\
"jle _loop"\
"pop ebp"\
parm [eax] [ebx] [ecx] [edi] [esi]\
modify [eax ebx ecx edx esi edi];

void
comb2( uchar* c1, uchar* b1, uchar* c, uchar* b, int n );
#pragma aux comb2 =\
"push ebp"\
"mov n,esi"\
"mov ebp,esi"\
"xor esi,esi"\
"_loop:"\
"mov dh,[ecx+esi*2]"\
"mov dh,[eax+esi]"\
"mov dl,[eax+ebp]"\
"mov [ecx+esi*2],dx"\
"mov dh,[ebx+esi]"\
"mov dl,[ebx+ebp]"\
"mov [edi+esi*2],dx"\
"inc esi"\
"inc ebp"\
"mov dh,[eax+esi]"\
"mov dl,[eax+ebp]"\
"mov [ecx+esi*2],dx"\
"mov dh,[ebx+esi]"\
"mov dl,[ebx+ebp]"\
"mov [edi+esi*2],dx"\
"inc esi"\
"inc ebp"\
"cmp esi,n"\
"jle _loop"\
"pop ebp"\
parm [eax] [ebx] [ecx] [edi] [esi]\
modify [eax ebx ecx edx esi edi];

#else
#ifdef i386

inline void
comb1( uchar* c1, uchar* b1, uchar* c, uchar* b, int n )
{
__asm__ __volatile__ (
"jmp comb_start         \n"
"n: .long 0             \n"
"comb_start:            \n"
"pushl %%ebp            \n"
"movl %4,n              \n"
"movl %4,%%ebp          \n"
"xorl %4,%4             \n"

"loop1:                 \n"
"movb (%0,%4),%%dl      \n"
"movb (%0,%%ebp),%%dh   \n"
"orw %%dx,(%2,%4,2)     \n"
"movb (%1,%4),%%dl      \n"
"movb (%1,%%ebp),%%dh   \n"
"incl %%ebp             \n"
"movw %%dx,(%3,%4,2)    \n"
"incl %4                \n"

"movb (%0,%4),%%dl      \n"
"movb (%0,%%ebp),%%dh   \n"
"movw %%dx,(%2,%4,2)    \n"
"movb (%1,%4),%%dl      \n"
"movb (%1,%%ebp),%%dh   \n"
"incl %%ebp             \n"
"movw %%dx,(%3,%4,2)    \n"
"incl %4                \n"

"cmpl n,%4              \n"
"jl loop1               \n"
"popl %%ebp             \n"
:
: "a"(c1), "b"(b1), "c"(c), "D"(b), "S"(n)
: "%edx" );
}

inline void
comb2( uchar* c1, uchar* b1, uchar* c, uchar* b, int n )
{
__asm__ __volatile__ (
"pushl %%ebp            \n"
"movl %4,n              \n"
"movl %4,%%ebp          \n"
"xorl %4,%4             \n"
"loop2:                 \n"

"movb (%0,%4),%%dh      \n"
"movb (%0,%%ebp),%%dl   \n"
"orw %%dx,(%2,%4,2)     \n"
"movb (%1,%4),%%dh      \n"
"movb (%1,%%ebp),%%dl   \n"
"incl %%ebp             \n"
"movw %%dx,(%3,%4,2)    \n"
"incl %4                \n"

"movb (%0,%4),%%dh      \n"
"movb (%0,%%ebp),%%dl   \n"
"movw %%dx,(%2,%4,2)    \n"
"movb (%1,%4),%%dh      \n"
"movb (%1,%%ebp),%%dl   \n"
"incl %%ebp             \n"
"movw %%dx,(%3,%4,2)    \n"
"incl %4                \n"

"cmpl n,%4          \n"
"jl loop2               \n"
"popl %%ebp             \n"

:
: "a"(c1), "b"(b1), "c"(c), "D"(b), "S"(n)
: "%edx" );
}

#else

inline void
comb1( uchar* c1, uchar* b1, uchar* c, uchar* b, int n )
{
  uchar *c2 = c1+vidSizeX/2, *b2 = b1+vidSizeX/2;
  int x;
  for( x=0; x<n; x++ ) {
    ((ushort*)c)[x] |= (c1[x]<<8)|c2[x];
    ((ushort*)b)[x] = (b1[x]<<8)|b2[x];
  }
}

inline void comb2( uchar* c1, uchar* b1, uchar* c, uchar* b, int n )
{
  uchar *c2 = c1+vidSizeX/2, *b2 = b1+vidSizeX/2;
  int x;
  for( x=0; x<n; x++ ) {
    ((ushort*)c)[x] |= (c2[x]<<8)|c1[x];
    ((ushort*)b)[x] = (b2[x]<<8)|b1[x];
  }
}

#endif
#endif

void
vidCombineSplitPage( PAGE c1, PAGE b1, PAGE c, PAGE b )
{
  int n = vidSizeY/2;
  for( int y=0; y<n; y++ ) {
    comb1( c1, b1, c, b, vidSizeX/2 );
    c += vidBytesPerLine;
    b += vidBytesPerLine;
    c1 += vidBytesPerLine;
    b1 += vidBytesPerLine;
    comb2( c1, b1, c, b, vidSizeX/2 );
    c += vidBytesPerLine;
    b += vidBytesPerLine;
    c1 += vidBytesPerLine;
    b1 += vidBytesPerLine;
  }
}

void
vidMorphPalette( vidPalette pal1, vidPalette pal2, vidPalette to,
                      int step, int steps )
{
  for( int i=0; i<256; i++ ) {
    to[i].r = pal1[i].r+(pal2[i].r-pal1[i].r)*step/steps;
    to[i].g = pal1[i].g+(pal2[i].g-pal1[i].g)*step/steps;
    to[i].b = pal1[i].b+(pal2[i].b-pal1[i].b)*step/steps;
  }
}

void
vidClearPage( PAGE page, int value ) {
  for( int i=0; i<vidSizeY; i++ ) {
    memset( page, value, vidSizeX );
    page += vidBytesPerLine;
  }
}

const char* cacheSign = "to make p5 2-way cache happy";
#define CACHE_OFFSET 32

PAGE
vidAllocPage()
{
  PAGE p;
  static bool pad = false;
  if( pad ) {
    p = (PAGE)cmalloc(vidPageSize+CACHE_OFFSET);
    strcpy( (char*)p, cacheSign );
    p += CACHE_OFFSET;
  }
  else {
    p = (PAGE)cmalloc(vidPageSize);
  }
  pad = !pad;
  memset( p, 0, vidPageSize );
  return p;
}

void
vidFreePage( PAGE page )
{
  if( !strcmp( (char*)page, cacheSign ) ) {
    cfree(page);
  }
  else {
    cfree(page-CACHE_OFFSET);
  }
}

void
vidCopyPage( PAGE topage, PAGE frompage )
{
  for( int i=0; i<vidSizeY; i++ ) {
    memcpy( topage, frompage, vidSizeX );
    topage += vidBytesPerLine;
    frompage += vidBytesPerLine;
  }
}

void
vidCopyPageT( PAGE topage, PAGE frompage )
{
  for( int i=0; i<vidSizeY; i++ ) {
    int n = vidSizeX;
    for( int x=0; x<n; x++ ) {
      if( frompage[x] ) topage[x]=frompage[x];
    }
    topage += vidBytesPerLine;
    frompage += vidBytesPerLine;
  }
}

uchar
vidGetClosestColor( vidRGB* pal, uchar r, uchar g, uchar b )
{
  int dist = 999, n = 0;
  for( int i=0; i<256; i++ ) {
    int d = sqrt( sqr(pal[i].r-r)+sqr(pal[i].g-g)+sqr(pal[i].b-b) );
    if( d < dist ) {
      dist = d; n = i;
    }
  }
  return n;
}

void
vidShowPage( PAGE color, PAGE bw, uchar* dither )
{
  if( vidStartTime == -1 ) {
    vidStartTime = sysTimer();
  }
  static time = 0;
  if( sysDebug && vidFrameCount%3 ) {
    int frame_time = (sysTimer()-time);
    int fps = frame_time ? 3*sysTimerRes / frame_time : 0;
    for( int i=0; i<fps; i++ ) {
      int c = i<8 ? 255 : 254;
      color[i*3]=255; color[i*3+1]=255;
      color[i*3+vidBytesPerLine]=255; color[i*3+1+vidBytesPerLine]=255;
    }
  }
  time=sysTimer();
  vidFrameCount++;
  sysUpdateFrame();
  vidDoShowPage( color, bw, dither );
}

#define N_GAMMA 4
static int cur_gamma = 0;

bool
vid_kb_handler( KB key )
{
  switch(key) {
    case KB_F11: {
      cur_gamma = (cur_gamma+1)%N_GAMMA;
      vidSetPalette( vidCurrentPalette );
      return true;
    }
  }
  return false;
}

void
vidInitVideo()
{
  sysRegisterKeyboardHandler( vid_kb_handler );

  vidDoInitVideo();
  vidInitialized = true;
}

void
vidCloseVideo()
{
  if(vidInitialized) {
    vidDoCloseVideo();
    vidInitialized = false;
    if( sysDebug ) {
      char str[256];
      sprintf( str, "\nAverage frames per second: %5.2f",
                  vidFrameCount*sysTimerRes/(sysTimer()-vidStartTime) );
      vidMessage( str );
    }
  }
}

void
vidSetPalette( vidPalette org_pal )
{
  vidPalette pal;
  memcpy( pal, org_pal, sizeof(vidPalette) );
  memcpy( vidCurrentPalette, pal, sizeof(vidPalette) );
  float gamma = 1+cur_gamma*0.2;
  int border = 0, min_color = 0;
  for( int i=0; i<256; i++ ) {
    int r, g, b;
    r = pal[i].r * gamma;
    pal[i].r = min( MAX_RGB-1, r );
    g = pal[i].g * gamma;
    pal[i].g = min( MAX_RGB-1, g );
    b = pal[i].b * gamma;
    pal[i].b = min( MAX_RGB-1, b );
    if( min_color > r+g+b ) {
      border = i;
      min_color = r+g+b;
    }
  }
  vidDoSetPalette( pal, border );
}
