/*--------------------------------------------------------------------
 * TITLE: Plasma Floating Point Library
 * AUTHOR: Steve Rhoads (rhoadss@yahoo.com)
 * DATE CREATED: 3/2/06
 * FILENAME: math.c
 * PROJECT: Plasma CPU core
 * COPYRIGHT: Software placed into the public domain by the author.
 *    Software 'as is' without warranty.  Author liable for nothing.
 * DESCRIPTION:
 *    Plasma Floating Point Library
 *
 * 2018-02-09 -- simulation passed all functions, and
 *               verified on M2e board
 *
 *--------------------------------------------------------------------
 * IEEE_fp = sign(1) | exponent(8) | fraction(23)
 * cos(x)=1-x^2/2!+x^4/4!-x^6/6!+...
 * exp(x)=1+x+x^2/2!+x^3/3!+...
 * ln(1+x)=x-x^2/2+x^3/3-x^4/4+...
 * atan(x)=x-x^3/3+x^5/5-x^7/7+...
 * pow(x,y)=exp(y*ln(x))
 * x=tan(a+b)=(tan(a)+tan(b))/(1-tan(a)*tan(b))
 * atan(x)=b+atan((x-atan(b))/(1+x*atan(b)))
 * ln(a*x)=ln(a)+ln(x); ln(x^n)=n*ln(x)
 *--------------------------------------------------------------------*/
// #include "rtos.h"
#include "math.h"

#define USE_SW_MULT
#if !defined(WIN32) && !defined(USE_SW_MULT)
#define USE_MULT64
#endif


#define FtoL(X) (*(unsigned long*)&(X))
#define LtoF(X) (*(float*)&(X))


float FP_Neg(float a_fp)
{
   unsigned long a;
   a = FtoL(a_fp);
   a ^= 0x80000000;
   return LtoF(a);
}


float FP_Add(float a_fp, float b_fp)
{
   unsigned long a, b, c;
   unsigned long as, bs, cs;     //sign
   long ae, af, be, bf, ce, cf;  //exponent and fraction
   a = FtoL(a_fp);
   b = FtoL(b_fp);
   as = a >> 31;                        //sign
   ae = (a >> 23) & 0xff;               //exponent
   af = 0x00800000 | (a & 0x007fffff);  //fraction
   bs = b >> 31;
   be = (b >> 23) & 0xff;
   bf = 0x00800000 | (b & 0x007fffff);
   if(ae > be) 
   {
      if(ae - be < 30) 
         bf >>= ae - be;
      else 
         bf = 0;
      ce = ae;
   } 
   else 
   {
      if(be - ae < 30) 
         af >>= be - ae;
      else 
         af = 0;
      ce = be;
   }
   cf = (as ? -af : af) + (bs ? -bf : bf);
   cs = cf < 0;
   cf = cf>=0 ? cf : -cf;
   if(cf == 0) 
      return LtoF(cf);
   while(cf & 0xff000000) 
   {
      ++ce;
      cf >>= 1;
   }
   while((cf & 0xff800000) == 0) 
   {
      --ce;
      cf <<= 1;
   }
   c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
   if(ce < 1) 
      c = 0;

   return LtoF(c);
}


float FP_Sub(float a_fp, float b_fp)
{
   return FP_Add(a_fp, FP_Neg(b_fp));
}


float FP_Mult(float a_fp, float b_fp)
{
   unsigned long a, b, c;
   unsigned long as, af, bs, bf, cs, cf;
   long ae, be, ce;
#ifndef USE_MULT64
   unsigned long a2, a1, b2, b1, med1, med2;
#endif
   unsigned long hi, lo;
   a = FtoL(a_fp);
   b = FtoL(b_fp);
   as = a >> 31;
   ae = (a >> 23) & 0xff;
   af = 0x00800000 | (a & 0x007fffff);
   bs = b >> 31;
   be = (b >> 23) & 0xff;
   bf = 0x00800000 | (b & 0x007fffff);
   cs = as ^ bs;
#ifndef USE_MULT64
   a1 = af & 0xffff;
   a2 = af >> 16;
   b1 = bf & 0xffff;
   b2 = bf >> 16;
   lo = a1 * b1;
   med1 = a2 * b1 + (lo >> 16);
   med2 = a1 * b2;
   hi = a2 * b2 + (med1 >> 16) + (med2 >> 16);
   med1 = (med1 & 0xffff) + (med2 & 0xffff);
   hi += (med1 >> 16);
   lo = (med1 << 16) | (lo & 0xffff);
#else
   lo = OS_AsmMult(af, bf, &hi);
#endif
   cf = (hi << 9) | (lo >> 23);
   ce = ae + be - 0x80 + 1;
   if(cf == 0)
      return LtoF(cf);
   while(cf & 0xff000000)
   {
      ++ce;
      cf >>= 1;
   }
   c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
   if(ce < 1)
      c = 0;
   return LtoF(c);
}


float FP_Div(float a_fp, float b_fp)
{
   unsigned long a, b, c;
   unsigned long as, af, bs, bf, cs, cf;
   unsigned long a1, b1;
#ifndef USE_MULT64
   unsigned long a2, b2, med1, med2;
#endif
   unsigned long hi, lo;
   long ae, be, ce, d;
   a = FtoL(a_fp);
   b = FtoL(b_fp);
   as = a >> 31;
   ae = (a >> 23) & 0xff;
   af = 0x00800000 | (a & 0x007fffff);
   bs = b >> 31;
   be = (b >> 23) & 0xff;
   bf = 0x00800000 | (b & 0x007fffff);
   cs = as ^ bs;
   ce = ae - (be - 0x80) + 6 - 8;
   a1 = af << 4; //8
   b1 = bf >> 8;
   cf = a1 / b1;
   cf <<= 12; //8
#ifndef USE_MULT64
   a1 = cf & 0xffff;
   a2 = cf >> 16;
   b1 = bf & 0xffff;
   b2 = bf >> 16;
   lo = a1 * b1;
   med1 =a2 * b1 + (lo >> 16);
   med2 = a1 * b2;
   hi = a2 * b2 + (med1 >> 16) + (med2 >> 16);
   med1 = (med1 & 0xffff) + (med2 & 0xffff);
   hi += (med1 >> 16);
   lo = (med1 << 16) | (lo & 0xffff);
#else
   lo = OS_AsmMult(cf, bf, &hi);
#endif
   lo = (hi << 8) | (lo >> 24);
   d = af - lo;    //remainder
   assert(-0xffff < d && d < 0xffff);
   d <<= 16;
   b1 = bf >> 8;
   d = d / (long)b1;
   cf += d;
   if(cf == 0)
      return LtoF(cf);
   while(cf & 0xff000000)
   {
      ++ce;
      cf >>= 1;
   }
   if(ce < 0)
      ce = 0;
   c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
   if(ce < 1)
      c = 0;
   return LtoF(c);
}


long FP_ToLong(float a_fp)
{
   unsigned long a;
   unsigned long as;
   long ae;
   long af, shift;
   a = FtoL(a_fp);
   as = a >> 31;
   ae = (a >> 23) & 0xff;
   af = 0x00800000 | (a & 0x007fffff);
   af <<= 7;
   shift = -(ae - 0x80 - 29);
   if(shift > 0)
   {
      if(shift < 31)
         af >>= shift;
      else
         af = 0;
   }
   af = as ? -af: af;
   return af;
}


float FP_ToFloat(long af)
{
   unsigned long a;
   unsigned long as, ae;
   as = af>=0 ? 0: 1;
   af = af>=0 ? af: -af;
   ae = 0x80 + 22;
   if(af == 0)
      return LtoF(af);
   while(af & 0xff000000)
   {
      ++ae;
      af >>= 1;
   }
   while((af & 0xff800000) == 0)
   {
      --ae;
      af <<= 1;
   }
   a = (as << 31) | (ae << 23) | (af & 0x007fffff);
   return LtoF(a);

}


//0 iff a==b; 1 iff a>b; -1 iff a<b
int FP_Cmp(float a_fp, float b_fp)
{
   unsigned long a, b;
   unsigned long as, ae, af, bs, be, bf;
   int gt;
   a = FtoL(a_fp);
   b = FtoL(b_fp);
   if(a == b)
      return 0;
   as = a >> 31;
   bs = b >> 31;
   if(as > bs)
      return -1;
   if(as < bs)
      return 1;
   gt = as ? -1 : 1;
   ae = (a >> 23) & 0xff;
   be = (b >> 23) & 0xff;
   if(ae > be)
      return gt;
   if(ae < be)
      return -gt;
   af = 0x00800000 | (a & 0x007fffff);
   bf = 0x00800000 | (b & 0x007fffff);
   if(af > bf)
      return gt;
   return -gt;
}


int __ltsf2(float a, float b)
{
   return FP_Cmp(a, b);
}

int __lesf2(float a, float b)
{
   return FP_Cmp(a, b);
}

int __gtsf2(float a, float b)
{
   return FP_Cmp(a, b);
}

int __gesf2(float a, float b)
{
   return FP_Cmp(a, b);
}

int __eqsf2(float a, float b)
{
   return FtoL(a) != FtoL(b);
}

int __nesf2(float a, float b)
{
   return FtoL(a) != FtoL(b);
}


float FP_Sqrt(float a)
{
   float x1, y1, x2, y2, x3;
   long i;
   x1 = FP_ToFloat(1);
   y1 = FP_Sub(FP_Mult(x1, x1), a);  //y1=x1*x1-a;
   x2 = FP_ToFloat(10000);
   y2 = FP_Sub(FP_Mult(x2, x2), a);
   for(i = 0; i < 20; ++i) 
   {
      if(FtoL(y1) == FtoL(y2)) 
         return x2;     
      //x3=x2-(x1-x2)*y2/(y1-y2);
      x3 = FP_Sub(x2, FP_Div(FP_Mult(FP_Sub(x1, x2), y2), FP_Sub(y1, y2)));
      x1 = x2;
      y1 = y2;
      x2 = x3;
      y2 = FP_Sub(FP_Mult(x2, x2), a);
   }
   return x2;
}


//   static const float fcos0 = 2.33;


float FP_Cos(float rad)
{
  float PI = FP_Div(FP_ToFloat(314159265), FP_ToFloat(100000000));
  float PI_2 = FP_Div(PI, FP_ToFloat(2));
  float PI2  = FP_Mult(PI, FP_ToFloat(2));
  long a1, b1;
  float ra1;

   int n;
   float answer, x2, top, bottom, sign;
   while(FP_Cmp(rad, PI2) > 0) {
      rad = FP_Sub(rad, PI2);
   }
   while(FP_Cmp(rad, (float)0.0) < 0) {
      rad = FP_Add(rad, PI2);
   }
   answer = FP_ToFloat(1);
   sign = FP_ToFloat(1);
   if(FP_Cmp(rad, PI) >= 0)
   {
      rad = FP_Sub(rad, PI);
      sign = FP_ToFloat(-1);
   }
   if(FP_Cmp(rad, PI_2) >= 0)
   {
      rad = FP_Sub(PI, rad);
      sign = FP_Neg(sign);
   }
   x2 = FP_Mult(rad, rad);
   top = FP_ToFloat(1);
   bottom = FP_ToFloat(1);
   for(n = 2; n < 12; n += 2)
   {
      top = FP_Mult(top, FP_Neg(x2));
      bottom = FP_Mult(bottom, FP_ToFloat((n - 1) * n));
      answer = FP_Add(answer, FP_Div(top, bottom));
   }
   return FP_Mult(answer, sign);
}


float FP_Sin(float rad)
{
   float PI_2 = FP_Div(FP_ToFloat(314159265), FP_ToFloat(200000000));
   return FP_Cos(FP_Sub(rad, PI_2));
}


float FP_Atan(float x)
{
   float b = FP_Div(FP_ToFloat(314159265), FP_ToFloat(800000000));
   float PI_2 = FP_Div(FP_ToFloat(314159265), FP_ToFloat(200000000));
   float atan_b = FP_Div(FP_ToFloat(37419668), FP_ToFloat(100000000));
   int n;
   float answer, x2, top;
   if(FP_Cmp(x, FP_ToFloat(0)) >= 0)
   {
      if(FP_Cmp(x, FP_ToFloat(1)) > 0)
         return FP_Sub(PI_2, FP_Atan(FP_Div(FP_ToFloat(1), x)));
   }
   else
   {
      if(FP_Cmp(x, FP_ToFloat(-1)) > 0)
         return FP_Sub(-PI_2, FP_Atan(FP_Div(FP_ToFloat(1), x)));
   }
   if(FP_Cmp(x, FP_Div(FP_ToFloat(45), FP_ToFloat(100))) > 0)
   {
      //answer = (x - atan_b) / (1 + x * atan_b);
      answer = FP_Div(FP_Sub(x, atan_b), FP_Add(FP_ToFloat(1), FP_Mult(x, atan_b)));
      //answer = b + FP_Atan(answer) - (float)0.034633; /*FIXME fudge?*/
      answer = FP_Sub(FP_Add(b, FP_Atan(answer)), FP_Div(FP_ToFloat(34633), FP_ToFloat(1000000)));
      return answer;
   }
   if(FP_Cmp(x, FP_Div(FP_ToFloat(-45), FP_ToFloat(100))) < 0)
   {
      x = FP_Neg(x);
      //answer = (x - atan_b) / (1 + x * atan_b);
      answer = FP_Div(FP_Sub(x, atan_b), FP_Add(FP_ToFloat(1), FP_Mult(x, atan_b)));
      //answer = b + FP_Atan(answer) - (float)0.034633; /*FIXME*/
      answer = FP_Sub(FP_Add(b, FP_Atan(answer)), FP_Div(FP_ToFloat(34633), FP_ToFloat(1000000)));
      return FP_Neg(answer);
   }
   answer = x;
   x2 = FP_Mult(FP_Neg(x), x);
   top = x;
   for(n = 3; n < 14; n += 2)
   {
      top = FP_Mult(top, x2);
      answer = FP_Add(answer, FP_Div(top, FP_ToFloat(n)));
   }
   return answer;
}


float FP_Atan2(float y, float x)
{
   float PI = FP_Div(FP_ToFloat(314159265), FP_ToFloat(100000000));
   float answer,r;
   r = y / x;
   answer = FP_Atan(r);
   if(FP_Cmp(x, FP_ToFloat(0)) < 0)
   {
      if(FP_Cmp(y, FP_ToFloat(0)) > 0)
         answer = FP_Add(answer, PI);
      else
         answer = FP_Sub(answer, PI);
   }
   return answer;
}


float FP_Exp(float x)
{
   float e2     = FP_Div(FP_ToFloat(738905609), FP_ToFloat(100000000));
   float inv_e2 = FP_Div(FP_ToFloat(13533528), FP_ToFloat(100000000));
   float answer, top, bottom, mult;
   int n;

   mult = FP_ToFloat(1);
   while(FP_Cmp(x, FP_ToFloat(2)) > 0)
   {
      mult = FP_Mult(mult, e2);
      x = FP_Add(x, FP_ToFloat(-2));
   }
   while(FP_Cmp(x, FP_ToFloat(-2)) < 0)
   {
      mult = FP_Mult(mult, inv_e2);
      x = FP_Add(x, FP_ToFloat(2));
   }
   answer = FP_Add(FP_ToFloat(1), x);
   top = x;
   bottom = FP_ToFloat(1);
   for(n = 2; n < 15; ++n)
   {
      top = FP_Mult(top, x);
      bottom = FP_Mult(bottom, FP_ToFloat(n));
      answer = FP_Add(answer, FP_Div(top, bottom));
   }
   return FP_Mult(answer, mult);
}


float FP_Log(float x)
{
   float log_2       = FP_Div(FP_ToFloat(69314718), FP_ToFloat(100000000));
   float num_1p5     = FP_Div(FP_ToFloat(15), FP_ToFloat(10));      // 1.5
   float num_0p5     = FP_Div(FP_ToFloat(5), FP_ToFloat(10));       // 0.5
   float num_0p0625  = FP_Div(FP_ToFloat(625), FP_ToFloat(10000));  // 0.0625
   int n;
   float answer, top, add;
   add = FP_ToFloat(0);
   while(FP_Cmp(x, FP_ToFloat(16)) > 0)
   {
      x = FP_Mult(x, num_0p0625);
      add = FP_Add(add, FP_Mult(log_2, FP_ToFloat(4)));
   }
   while(FP_Cmp(x, num_1p5) > 0)
   {
      x = FP_Mult(x, num_0p5);
      add = FP_Add(add, log_2);
   }
   while(FP_Cmp(x, num_0p5) < 0)
   {
      x = FP_Mult(x, FP_ToFloat(2));
      add = FP_Sub(add, log_2);
   }
   x = FP_Sub(x, FP_ToFloat(1));
   answer = FP_ToFloat(0);
   top = FP_ToFloat(-1);
   for(n = 1; n < 14; ++n)
   {
      top = FP_Mult(top, FP_Neg(x));
      answer = FP_Add(answer, FP_Div(top, FP_ToFloat(n)));
   }
   return FP_Add(answer, add);
}


float FP_Pow(float x, float y)
{
   return FP_Exp(y * FP_Log(x));
}


/********************************************/
//These five functions will only be used if the flag "-mno-mul" is enabled
#ifdef USE_SW_MULT
unsigned long __mulsi3(unsigned long a, unsigned long b)
{
   unsigned long answer = 0;
   while(b)
   {
      if(b & 1)
         answer += a;
      a <<= 1;
      b >>= 1;
   }
   return answer;
}


static unsigned long DivideMod(unsigned long a, unsigned long b, int doMod)
{
   unsigned long upper=a, lower=0;
   int i;
   a = b << 31;
   for(i = 0; i < 32; ++i)
   {
      lower = lower << 1;
      if(upper >= a && a && b < 2)
      {
         upper = upper - a;
         lower |= 1;
      }
      a = ((b&2) << 30) | (a >> 1);
      b = b >> 1;
   }
   if(!doMod)
      return lower;
   return upper;
}


unsigned long __udivsi3(unsigned long a, unsigned long b)
{
   return DivideMod(a, b, 0);
}


long __divsi3(long a, long b)
{
   long answer, negate=0;
   if(a < 0)
   {
      a = -a;
      negate = !negate;
   }
   if(b < 0)
   {
      b = -b;
      negate = !negate;
   }
   answer = DivideMod(a, b, 0);
   if(negate)
      answer = -answer;
   return answer;
}


unsigned long __umodsi3(unsigned long a, unsigned long b)
{
   return DivideMod(a, b, 1);
}
#endif


















