// fpaq0pv3 - Stationary order 0 file compressor.
// (C) 2004, Matt Mahoney under GPL, http://www.gnu.org/licenses/gpl.txt
// To compile: g++ -O fpaq0.cpp
// 15 April 2007--modified predictor to improve speed, Ilia Muraviev
// 04 April 2008--More faster in compression (20-30%) and decompression (10%),Nania Francesco Antonio
// 06 April 2008--Further speed optimization: c.speed +20%, d.speed +30% (from pv3),Shelwien
// 06 April 2008--Further speed optimization: c.speed +10-20%, d.speed + 5% (from pv4),Nania Francesco Antonio

#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <time.h>

typedef unsigned int uint;
typedef unsigned char byte;
typedef unsigned short word;

#define PSCALE 4096

//////////////////////////// Predictor /////////////////////////

struct Predictor {
  int cxt; // Context: last 0-8 bits with a leading 1
  int p[256]; // Probability of 1

  Predictor() {
    int i;
    for( i=0; i<sizeof(p)/sizeof(p[0]); i++ ) p[i] = PSCALE>>1;
  }

  // Assume a stationary order 0 stream of 9-bit symbols
  int P() const {
    return (p[cxt]);
  }

  void update( int y ) {
    if( y )
    {
    p[cxt] += (PSCALE-p[cxt])>>5;
    (cxt+=cxt) |= 1;
    } else
	{
    p[cxt] -= p[cxt]>>5;
	(cxt+=cxt);
    }
// same as above, but without branches:
// int &q = p[cxt];
// int qr = ((q&31)+31)&32;
// q -= ( q - ((PSCALE-qr)&(-y)) )>>5;

  }
};

struct Encoder {
  Predictor predictor;
  byte* ptr;         // Compressed data file
  uint x1, x2;            // Range, initially [0, 1), scaled by 2^32

  // In DECOMPRESS mode, initialize x to the first 4 bytes of the archive
  Encoder( byte* f ): ptr(f) {
    int i,c;
    x1 = 0x00000000;
    x2 = 0xFFFFFFFF;
  }

  void encode( int y ) {

    const uint xmid = x1 + ((x2-x1) >> 12) * predictor.P();
    if( y ) x2=xmid; else x1=xmid+1;

    predictor.update(y);
	news:
    switch ((x1^x2)<0x1000000 )
    {
    case 1:
    ptr[0] = x2>>24; ++ptr;
    (x1<<=8);
    (x2<<=8) |= 0xFF;
    goto news;
    }

  }

  void flush()
  {
    *ptr++ = x2>>24;
  }

};

struct Decoder {
  Predictor predictor;
  byte* ptr;         // Compressed data file
  uint x1, x2;            // Range, initially [0, 1), scaled by 2^32
  uint x;                 // Last 4 input bytes of archive.

  // In DECOMPRESS mode, initialize x to the first 4 bytes of the archive
  Decoder( byte* f ): ptr(f) {
    x1 = 0x00000000;
    x = (ptr[0]<<24)+(ptr[1]<<16)+(ptr[2]<<8)+ptr[3]; ptr+=4;
    x2 = 0xFFFFFFFF;
  }

  int decode() {
    const uint xmid = x1 + ((x2-x1) >> 12) * predictor.P();
    int y;
    if( x<=xmid ) y=1,x2=xmid; else x1=xmid+1,y=0;
    predictor.update(y);
    news:
    switch( (x1^x2)<0x1000000 )
    {
    case 1:
      (x1<<=8);
      (x2<<=8) |= 0xFF;
      (x <<=8) |= ptr[0]; ++ptr;
      goto news;
    }

    return y;
  }

};


//////////////////////////// main ////////////////////////////

byte* anew( int n, int aln ) {
  byte* p = new byte[n+aln-1];
  (p+=aln-1) -= (uint(p)&(aln-1));
  return p;
}

enum {
  ibsize = 1<<20, iblim = ibsize-512,
  obsize = 1<<13, oblim = obsize-4096
};
byte *inpbuf = anew( ibsize, 4096 );
byte *outbuf = anew( obsize, 4096 );

void EncodeRare( FILE* in, FILE* out ) {
  Encoder e( outbuf );
  while(1) {
    int i,l = fread( inpbuf, 1,ibsize, in );
    byte c;
    for( i=0; i<l; i++ ) {
      e.predictor.cxt = 0;
      e.encode(0);
      e.predictor.cxt = 1;

      c = inpbuf[i];
      e.encode( (c>>7)&1 );
      e.encode( (c>>6)&1 );
      e.encode( (c>>5)&1 );
      e.encode( (c>>4)&1 );
      e.encode( (c>>3)&1 );
      e.encode( (c>>2)&1 );
      e.encode( (c>>1)&1 );
      e.encode( (c)&1    );

      byte* pth = &outbuf[oblim];
      if( e.ptr>=pth ) {
        int n = fwrite( outbuf,1,oblim, out );
        memcpy( outbuf,pth, e.ptr-pth );
        e.ptr -= oblim;
      }
    }
    if( l<ibsize ) break;
  };
  e.predictor.cxt = 0;
  e.encode(1);  // EOF code
  e.flush();
  fwrite( outbuf,1,e.ptr-outbuf, out );
}

void DecodeRare( FILE* in, FILE* out ) {
  int flag=1,i,l = fread( inpbuf, 1,ibsize, in );
  for( i=0; (l+i<ibsize) && (i<256); i++ ) inpbuf[l+i]=0;
  Decoder e( inpbuf );
  while( flag ) {
    byte* ptr;
    for( ptr=outbuf; ptr<&outbuf[obsize]; ) {
      e.predictor.cxt = 0;
      if( e.decode() ) { flag=0; break; }
      e.predictor.cxt = 1;
      byte b[] = { e.decode(), e.decode(), e.decode(), e.decode(), e.decode(), e.decode(), e.decode(), e.decode() };

// alternative ways of doing the same:
// 1.   typedef byte weird[2][2][2][2][2][2][2][2];
//      weird &c = *(weird*)0x100; // to remove null warning
//      *ptr++ = (byte)(uint)&c[b[0]][b[1]][b[2]][b[3]][b[4]][b[5]][b[6]][b[7]];
// 2.   ptr[0] = ((b[0]+b[0]+b[1])<<6) | ((b[2]+b[2]+b[3])<<4) | ((b[4]+b[4]+b[5])<<2) | (b[6]+b[6]+b[7]);

      ptr[0] = (b[0]<<7)|(b[1]<<6)|(b[2]<<5)|(b[3]<<4)|(b[4]<<3)|(b[5]<<2)|(b[6]<<1)|b[7];
      ptr+=1;

      byte* pth = &inpbuf[iblim];
      if( (e.ptr>=pth) && (e.ptr<&inpbuf[l]) ) {
        memcpy( inpbuf,e.ptr, l=&inpbuf[l]-e.ptr );
        l += fread( &inpbuf[l],1,ibsize-l, in );
        for( i=0; (l+i<ibsize) && (i<256); i++ ) inpbuf[l+i]=0;
        e.ptr = inpbuf;
      }
    }
    fwrite( outbuf,1,ptr-outbuf, out );
  };
}
//////////////////////////// main ////////////////////////////

int main(int argc, char** argv) {


  // Chech arguments: fpaq0 c/d input output
  if( argc!=4 ) {
    printf("To compress:   fpaq0pv3 c input output\n"
           "To decompress: fpaq0pv3 d input output\n");
    exit(1);
  }

  // Open files
  FILE *in  = fopen(argv[2], "rb"); if (!in ) perror(argv[2]), exit(1);
  FILE *out = fopen(argv[3], "wb"); if (!out) perror(argv[3]), exit(1);

  // Start timer
  clock_t start = clock();

  // Compress
  if (argv[1][0]=='c') EncodeRare( in, out );
  else DecodeRare( in, out );

  // Print results
  printf("%s (%ld bytes) -> %s (%ld bytes) in %1.2f s.\n",
    argv[2], ftell(in),
    argv[3], ftell(out),
    ((double)clock()-start)/CLOCKS_PER_SEC
  );

  return 0;

  return 0;
}

