/*

  Name      :  FPU Coprocessor + math helpers
  Hardware  :  uM-FPU V3.1
  Notes     :  replaced digitialwrites with port manipulation for snappier crunching of the numbers 
  
  I could have used a 256 byte sin lookup table but interfacing to an external FPU was way cooler.
  I thought demos were supposed to be calculated 'real-time' anyway.
  check out the other math functions and the end of this file.
  
  Fpu.cpp - uM-FPU V3.1 floating point coprocessor library
  Copyright (c) 2008 Cam Thompson.
  Author: Cam Thompson, Micromega Corporation, <www.micromegacorp.com>
  Version: December 15, 2008

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

*/

#include "WProgram.h"
#include "fpu.h"


//---------- constructor ----------------------------------------------------

UMFPU::UMFPU()
{
  // initialize the SPI pins
  pinMode(SCK_PIN, OUTPUT);
  pinMode(MOSI_PIN, OUTPUT);
  pinMode(MISO_PIN, INPUT);
  pinMode(FPU_SS_PIN, OUTPUT);
}


byte fpu_transfer(byte data)
{
  SPSR = (1<<SPI2X);              // set prescaler bits - this speeds up transmission

  delayMicroseconds(1);  

  SPDR = data;			  // Start the transmission
  while (!(SPSR & (1<<SPIF)));     // Wait the end of the transmission

  SPSR = (0<<SPI2X);              // set prescaler bits - restore transmission speeds

  return SPDR;			  // return the received byte, we don't need that
}


//------------------- begin -------------------------------------------------

void UMFPU::begin(void)
{
  begin(FPU_SS_PIN);
}

void UMFPU::begin(byte pin)
{
  // initialize the chip select
  _cs = pin;
  digitalWrite(_cs, HIGH);
  pinMode(_cs, OUTPUT);
  reset();
}


void fpuOff(){
  SPCR = B01010000; 
  bitClear(PORTD,FPU_SS_PIN);
}

void fpuOn(){
  SPCR = B01011100;  
  bitSet(PORTD,FPU_SS_PIN);
}

//------------------- reset -------------------------------------------------

void UMFPU::reset()
{
  digitalWrite(_cs, LOW);

  // reset the FPU
  digitalWrite(MOSI_PIN, HIGH);
  for (byte i = 0; i < 80; i++)
  {
    digitalWrite(SCK_PIN, HIGH);
    digitalWrite(SCK_PIN, LOW);
  }
  digitalWrite(MOSI_PIN, LOW);
  delayMicro(10);

  // ual FAT SPI mode
  SPCR = B01010000;
  SPSR = (1<<SPI2X); // set prescaler bits

  digitalWrite(_cs, HIGH);
}

//------------------- sync --------------------------------------------------

byte UMFPU::sync()
{
  write(SYNC);
  return read();
}

//------------------- wait --------------------------------------------------

void UMFPU::wait(void)
{
fpuOff();
//  digitalWrite(_cs, LOW);

// Which is faster?
  while (bit_is_set(PINB,PINB4)) {};
//  while (digitalRead(MISO_PIN)) {};

fpuOn();
//  digitalWrite(_cs, HIGH);
}

//------------------- write -------------------------------------------------


void UMFPU::write(byte b1)
{
  fpuOff();
  fpu_transfer(b1);
  fpuOn();
}


void UMFPU::write(byte b1, byte b2)
{
  fpuOff();
  fpu_transfer(b1);
  fpu_transfer(b2);
  fpuOn();
}

void UMFPU::write(byte b1, byte b2, byte b3)
{
  fpuOff();
  fpu_transfer(b1);
  fpu_transfer(b2);
  fpu_transfer(b3);
  fpuOn();
}

void UMFPU::write(byte b1, byte b2, byte b3, byte b4)
{
  fpuOff();
  fpu_transfer(b1);
  fpu_transfer(b2);
  fpu_transfer(b3);
  fpu_transfer(b4);
  fpuOn();
}

void UMFPU::write(byte b1, byte b2, byte b3, byte b4, byte b5)
{ 
  fpuOff();
  fpu_transfer(b1);
  fpu_transfer(b2);
  fpu_transfer(b3);
  fpu_transfer(b4);
  fpu_transfer(b5);
  fpuOn();
}

void UMFPU::write(byte b1, byte b2, byte b3, byte b4, byte b5, byte b6)
{
  fpuOff();
  fpu_transfer(b1);
  fpu_transfer(b2);
  fpu_transfer(b3);
  fpu_transfer(b4);
  fpu_transfer(b5);
  fpu_transfer(b6);
  fpuOn();
}


void UMFPU::write(byte b1, byte b2, byte b3, byte b4, byte b5, byte b6, byte b7)
{
  fpuOff();
  fpu_transfer(b1);
  fpu_transfer(b2);
  fpu_transfer(b3);
  fpu_transfer(b4);
  fpu_transfer(b5);
  fpu_transfer(b6);
  fpu_transfer(b7);
  fpuOn();
}



void UMFPU::writeWord(int wval)
{
  fpuOff();
  _u.wval[0] = wval;
  fpu_transfer(_u.bval[1]);
  fpu_transfer(_u.bval[0]);
  fpuOn();
}

void UMFPU::writeLong(long lval)
{
  digitalWrite(_cs, LOW);
  _u.lval = lval;
  fpu_transfer(_u.bval[3]);
  fpu_transfer(_u.bval[2]);
  fpu_transfer(_u.bval[1]);
  fpu_transfer(_u.bval[0]);
  digitalWrite(_cs, HIGH);
}

void UMFPU::writeFloat(float fval)
{
  fpuOff();
  _u.fval = fval;
  fpu_transfer(_u.bval[3]);
  fpu_transfer(_u.bval[2]);
  fpu_transfer(_u.bval[1]);
  fpu_transfer(_u.bval[0]); 
  fpuOn();
}

void UMFPU::writeString(char *s)
{
  digitalWrite(_cs, LOW);
  while (*s) fpu_transfer(*s++);
  fpu_transfer(0);
  digitalWrite(_cs, HIGH);
}


//------------------- read --------------------------------------------------
byte UMFPU::read(void)
{
fpuOff();
delayMicroseconds(5);
//  readDelay();
  _u.bval[0] = fpu_transfer(0);
  fpuOn();
  return _u.bval[0];
}

int UMFPU::readWord(void)
{
  digitalWrite(_cs, LOW);
  readDelay();
  _u.bval[1] = fpu_transfer(0);
  _u.bval[0] = fpu_transfer(0);
  digitalWrite(_cs, HIGH);
  return _u.wval[0];
}

long UMFPU::readLong(void)
{
  digitalWrite(_cs, LOW);
  readDelay();
  _u.bval[3] = fpu_transfer(0);
  _u.bval[2] = fpu_transfer(0);
  _u.bval[1] = fpu_transfer(0);
  _u.bval[0] = fpu_transfer(0);
  digitalWrite(_cs, HIGH);
  return _u.lval;
}

float UMFPU::readFloat(void)
{
  fpuOff();
//  while (bit_is_set(PINB,PINB4)) {};
  delayMicroseconds(5);
//  readDelay();
  _u.bval[3] = fpu_transfer(0);
  _u.bval[2] = fpu_transfer(0);
  _u.bval[1] = fpu_transfer(0);
  _u.bval[0] = fpu_transfer(0);
  fpuOn();
  return _u.fval;
}

char *UMFPU::readString(char *s)
{
  return readString(s, READSTR);
}

char *UMFPU::readString(char *s, byte opcode)
{
  char *sb, c;

  wait();
  if (opcode != READSEL) opcode = READSTR;
  write(opcode);
  readDelay();

  digitalWrite(_cs, LOW);
  readDelay();
    sb = s;
  while (1) {
    c = fpu_transfer(0);
    if (!c) break;
    *s++ = c;
  }
  *s = '\0';
  digitalWrite(_cs, HIGH);
  return sb;
}

byte UMFPU::readStatus(void)
{
  wait();
  write(READSTATUS);
  return read();
}

//------------------- readDelay ---------------------------------------------

void UMFPU::readDelay(void)
{
  delayMicroseconds(1);
}

//---------- preinstantiate FPU object --------------------------------------

UMFPU Fpu = UMFPU();

//  Store lookup table in flash (program) memory instead of SRAM.
//PROGMEM prog_char cosTable[360] = {127,127,127,127,127,127,126,126,126,125,125,125,124,124,123,123,122,121,121,120,119,119,118,117,116,115,114,113,112,111,110,109,108,107,105,104,103,101,100,99,  97,96,94,93,91,90,88,87,85,83,82,80,78,76,75,73,71,69,67,65,  64,62,60,58,56,54,52,50,48,46,43,41,39,37,35,33,31,29,26,24,  22,20,18,15,13,11,9,7,4,2,0,-2,-4,-7,-9,-11,-13,-15,-18,-20,  -22,-24,-26,-29,-31,-33,-35,-37,-39,-41,-43,-46,-48,-50,-52,-54,-56,-58,-60,-62,  -63,-65,-67,-69,-71,-73,-75,-76,-78,-80,-82,-83,-85,-87,-88,-90,-91,-93,-94,-96,  -97,-99,-100,-101,-103,-104,-105,-107,-108,-109,-110,-111,-112,-113,-114,-115,-116,-117,-118,-119,  -119,-120,-121,-121,-122,-123,-123,-124,-124,-125,-125,-125,-126,-126,-126,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-127,-126,-126,-126,-125,-125,-125,-124,-124,-123,-123,-122,-121,-121,-120,  -119,-119,-118,-117,-116,-115,-114,-113,-112,-111,-110,-109,-108,-107,-105,-104,-103,-101,-100,-99,-97,-96,-94,-93,-91,-90,-88,-87,-85,-83,-82,-80,-78,-76,-75,-73,-71,-69,-67,-65,-64,-62,-60,-58,-56,-54,-52,-50,-48,-46,-43,-41,-39,-37,-35,-33,-31,-29,-26,-24,-22,-20,-18,-15,-13,-11,-9,-7,-4,-2,0,2,4,7,9,11,13,15,18,20,  22,24,26,29,31,33,35,37,39,41,43,46,48,50,52,54,56,58,60,62,  64,65,67,69,71,73,75,76,78,80,82,83,85,87,88,90,91,93,94,96,97,99,100,101,103,104,105,107,108,109,110,111,112,113,114,115,116,117,118,119,119,120,121,121,122,123,123,124,124,125,125,125,126,126,126,127,127,127,127,127};

void init_fpu(){
  Fpu.begin(FPU_SS_PIN);

  if (Fpu.sync() == SYNC_CHAR)
  {
    //    FpuSerial.printVersion();
    //    Serial.println();
  }
  else
  {
    while(1) ;
  }
}

// FPU version of: sin(float angle);
float fpu_cos(float angle)
{
  /*
  // for integer sin
  Fpu.write(SELECTA, 1,FSETI, angle,COS);
  Fpu.wait();
  Fpu.write(FREADA); // read from register 1
  return Fpu.readFloat();
  */
    Fpu.write(SELECTA, 1,FWRITE0);
    Fpu.writeFloat(angle);
    Fpu.write(FSET0,COS);
    Fpu.wait();
    Fpu.write(FREADA); // read from register 1
    return Fpu.readFloat();  
}

// FPU version of: sin(int angle);
float fpu_sin(float angle)
{
  
  /*
  Fpu.write(SELECTA, 1,FSETI, angle,SIN);
  Fpu.wait();
  Fpu.write(FREADA); // read from register 1
  return Fpu.readFloat();  
*/

    Fpu.write(SELECTA, 1,FWRITE0);
    Fpu.writeFloat(angle);
    Fpu.write(FSET0,SIN);
    Fpu.wait();
    Fpu.write(FREADA); // read from register 1
    return Fpu.readFloat();
}

// table lookup sin(int angle)
float lookup_cos(float angleF)
{
  /*
  int angle;
  angle = int(angleF) & 127;
  return (signed char)pgm_read_byte(&cosTable[angle]) / 127.; 
 */
  //return cosTable[angle] / 127.;
}
  
float lookup_sin(float angle)
{
/*
  int angle;
  angle = int(angleF) & 127;
 
  angle = (angle + 270);
  if (angle >= 360) angle-=360;
 // return (signed char)pgm_read_byte(&cosTable[angle]) / 127.;  // DIVISION!! RATS!!!!
 // return cosTable[angle] / 127.;

*/
// get value from lookup table stored in progmem
}

//low precision sine
float fastSine (float x)
{
  float sine;
  x = radians(x);
  //always wrap input angle to -PI..PI
  if (x < -3.14159265)
    x += 6.28318531;
  else
    if (x >  3.14159265)
      x -= 6.28318531;
  //compute sine
  if (x < 0)
    sine = 1.27323954 * x + 0.405284735 * x * x;
  else
    sine = 1.27323954 * x - 0.405284735 * x * x;
  return sine;
}

//low precision cosine
float fastCoSine (float x)
{
  float cosine;
  x = radians(x);
  //always wrap input angle to -PI..PI
  if (x < -3.14159265)
    x += 6.28318531;
  else
    if (x >  3.14159265)
      x -= 6.28318531;
  //compute cosine: sin(x + PI/2) = cos(x)
  x += 1.57079632;
  if (x >  3.14159265)
    x -= 6.28318531;
  if (x < 0)
    cosine = 1.27323954 * x + 0.405284735 * x * x;
  else
    cosine = 1.27323954 * x - 0.405284735 * x * x;
  return cosine;
}

//high precision sine
float fastSineAccurate (float x)
{
  float sine;
  x = radians(x);
  //always wrap input angle to -PI..PI
  if (x < -3.14159265)
    x += 6.28318531;
  else
    if (x >  3.14159265)
      x -= 6.28318531;

  //compute sine
  if (x < 0)
  {
    sine = 1.27323954 * x + .405284735 * x * x;

    if (sine < 0)
      sine = .225 * (sine *-sine - sine) + sine;
    else
      sine = .225 * (sine * sine - sine) + sine;
  }
  else
  {
    sine = 1.27323954 * x - 0.405284735 * x * x;

    if (sine < 0)
      sine = .225 * (sine *-sine - sine) + sine;
    else
      sine = .225 * (sine * sine - sine) + sine;
  }
  return sine;
}

// high precision cosine
float fastCoSineAccurate (float x)
{
  float cosine;
  x = radians(x);
  //compute cosine: sin(x + PI/2) = cos(x)
  x += 1.57079632;
  if (x >  3.14159265)
    x -= 6.28318531;
  if (x < 0)
  {
    cosine = 1.27323954 * x + 0.405284735 * x * x;

    if (cosine < 0)
      cosine = .225 * (cosine *-cosine - cosine) + cosine;
    else
      cosine = .225 * (cosine * cosine - cosine) + cosine;
  }
  else
  {
    cosine = 1.27323954 * x - 0.405284735 * x * x;

    if (cosine < 0)
      cosine = .225 * (cosine *-cosine - cosine) + cosine;
    else
      cosine = .225 * (cosine * cosine - cosine) + cosine;
  }
  return cosine;
}

// throw this shrimp into the BBQ
int divide(int num, int denom)
{
  int a=0, b=0;
  int i= 31; // CAREFUL: works only on int=32-bit machine!
  // Work from leftmost to rightmost bit in numerator 
  while(i>=0) {
    // appends one bit from numerator to a
    a = (a << 1) + ((num & (1 << i)) >> i); 
    b = b << 1;
    if (a >= denom) {
      a -= denom;
      b++;
    }
    i--;
  }
  return b;
}

