#include <tamtypes.h>
#include <string.h>
#include "math.h"

/*
void math_project_vertices(Vector2d* dst, Vector3d* src, int count)
{

#if 0
	while(count--)
	{
		float f= 2.0f / src->z;
		dst->x = (s32)((src->x*math_projection_constants[0]*f+math_projection_constants[4])*16);
		dst->y = (s32)((src->y*math_projection_constants[1]*f+math_projection_constants[5])*16);
		dst->z = (s32)((src->z * math_projection_constants[2])*16);
		dst++;
		src++;
	}
#else
	__asm__ __volatile__ (
		"lqc2		$vf10, 0(%3)						\n"
		"lqc2		$vf11, 16(%3)						\n"
		"move		$8, %0								\n"
		"move		$9, %1								\n"
		"move		$10, %2								\n"
	"math_project_vertices.inner:						\n"
		"lqc2		$vf8, 0($8)							\n"
	    "vdiv 		Q, $vf0w, $vf8z						\n"
	    "vmul.xyz	$vf8xyz, $vf8xyz, $vf10xyz			\n"
	    "vwaitq											\n"
	    "vmulq.xy 	$vf8xy, $vf8xy, Q					\n"
	    "vadd.xyz	$vf8xyz, $vf8xyz, $vf11xyz			\n"
		"vftoi4.xyz	$vf8xyz, $vf8xyz					\n"
		"sqc2		$vf8, 0($9)							\n"
	    "addiu 		$8, 16								\n"
	    "addiu 		$9, 16								\n"
		"addiu		$10, -1								\n"
		"bnez		$10, math_project_vertices.inner	\n"
	 :: "r" (src), "r" (dst), "r" (count), "r" (math_projection_constants));
#endif
}*/


void math_transform_vertices(Vector3d* dst, Vector3d* src, Matrix* m, int count)
{
#if 0
	Vector3d v;
	while(count--)
	{
		v.x=m->xu*src->x+m->xv*src->y+m->xw*src->z+m->xx;
		v.y=m->yu*src->x+m->yv*src->y+m->yw*src->z+m->yy;
		v.z=m->zu*src->x+m->zv*src->y+m->zw*src->z+m->zz;
		*dst = v;
		dst++;
		src++;
	}
#else
  __asm__ __volatile__(
		"lqc2		vf8, 0x0(%2)\n"
		"lqc2		vf9, 0x10(%2)\n"
		"lqc2		vf10, 0x20(%2)\n"
		"lqc2		vf11, 0x30(%2)\n"
  		"move		$8, %0\n"
  		"move		$9, %1\n"
  		"move		$10, %3\n"
	"math_transform_vertices.inner:\n"
		"lqc2		vf4, 0x0($8)\n"
		"vmulax.xyzw	ACC,vf8,vf4\n"
		"vmadday.xyzw	ACC,vf9,vf4\n"
		"vmaddaz.xyzw	ACC,vf10,vf4\n"
		"vmaddw.xyzw	vf4,vf11,vf4\n"
		"sqc2		vf4,0x0($9)\n"
	    "addiu 		$8, 0x10\n"
	    "addiu 		$9, 0x10\n"
	    "addiu 		$10, -1\n"
	    "bnez 		$10, math_transform_vertices.inner\n"
		 :: "r" (src),"r" (dst),"r" (m), "r" (count));	
#endif
}

void math_matrix_identity(Matrix *m)
{
    m->xu=m->yv=m->zw=m->dummy4=1.0f;
	m->xv=m->xw=m->xx=m->dummy1=
	m->yu=m->yw=m->yy=m->dummy2=
	m->zu=m->zv=m->zz=m->dummy3=0.0f;
}

void math_matrix_rotatex(Matrix* m, float a)
{
	float s,c;
	float t;

	s=sin(a);
	c=cos(a);
	t=m->yu;
	m->yu=t*c-m->zu*s;
	m->zu=t*s+m->zu*c;
	t=m->yv;
	m->yv=t*c-m->zv*s;
	m->zv=t*s+m->zv*c;
	t=m->yw;
	m->yw=t*c-m->zw*s;
	m->zw=t*s+m->zw*c;
	t=m->yy;
	m->yy=t*c-m->zz*s;
	m->zz=t*s+m->zz*c;
}

void math_matrix_rotatey(Matrix* m, float a)
{
	float s,c;
	float t;

	s=sin(a);
	c=cos(a);
	t=m->zu;
	m->zu=t*c-m->xu*s;
	m->xu=t*s+m->xu*c;
	t=m->zv;
	m->zv=t*c-m->xv*s;
	m->xv=t*s+m->xv*c;
	t=m->zw;
	m->zw=t*c-m->xw*s;
	m->xw=t*s+m->xw*c;
	t=m->zz;
	m->zz=t*c-m->xx*s;
	m->xx=t*s+m->xx*c;
}

void math_matrix_rotatez(Matrix* m, float a)
{
	float s,c;
	float t;

	s=sin(a);
	c=cos(a);
	t=m->xu;
	m->xu=t*c-m->yu*s;
	m->yu=t*s+m->yu*c;
	t=m->xv;
	m->xv=t*c-m->yv*s;
	m->yv=t*s+m->yv*c;
	t=m->xw;
	m->xw=t*c-m->yw*s;
	m->yw=t*s+m->yw*c;
	t=m->xx;
	m->xx=t*c-m->yy*s;
	m->yy=t*s+m->yy*c;
}

void math_matrix_move(Matrix *m, float x, float y, float z)
{
	m->xx += x;
	m->yy += y;
	m->zz += z;
}

void math_matrix_lookat(Matrix *m, Vector3d* pos, Vector3d* target)
{
	Vector3d u,v={0,1,0,0},w;

	math_matrix_identity(m);
	
	w.x=target->x - pos->x;
	w.y=target->y - pos->y;
	w.z=target->z - pos->z;
	
	math_vector_cross(&u, &w, &v);
	math_vector_cross(&v, &w, &u);
	math_vector_normalize(&u, &u);
	math_vector_normalize(&v, &v);
	math_vector_normalize(&w, &w);

	m->xu=u.x;
	m->yu=u.y;
	m->zu=u.z;
	m->xv=v.x;
	m->yv=v.y;
	m->zv=v.z;
	m->xw=w.x;
	m->yw=w.y;
	m->zw=w.z;

	m->xx = -(m->xu*pos->x+m->xv*pos->y+m->xw*pos->z); 
	m->yy = -(m->yu*pos->x+m->yv*pos->y+m->yw*pos->z);
	m->zz = -(m->zu*pos->x+m->zv*pos->y+m->zw*pos->z);
}

void math_matrix_project(Matrix *m, float fov, float nearz, float farz)
{
	float aspect = 3.0/4.0;
	float xmin, xmax, ymin, ymax;

	ymax = nearz * tan(fov);
	ymin = -ymax;
	xmin = ymin * aspect;
	xmax = ymax * aspect;

	float x = (2.0f * nearz) / (xmax - xmin);
	float y = (2.0f * nearz) / (ymax - ymin);
	float a = (xmax + xmin) / (xmax - xmin);
	float b = (ymax + ymin) / (ymax - ymin);
	float c = -(farz + nearz) / ( farz - nearz);
	float d = -(2.0f * farz * nearz) / (farz - nearz);

	m->xu=x;	m->yu=0;	m->zu=a;	m->dummy1=0;
	m->xv=0;	m->yv=y;	m->zv=b;	m->dummy2=0;
	m->xw=0;	m->yw=0;	m->zw=c;	m->dummy3=d;
	m->xx=0;	m->yy=0;	m->zz=1;	m->dummy4=0;

}

void math_matrix_multiply(Matrix *dst, Matrix* a, Matrix* b)
{
	/*temp.xu=b->xu*a->xu+b->xv*a->yu+b->xw*a->zu;
	temp.xv=b->xu*a->xv+b->xv*a->yv+b->xw*a->zv;
	temp.xw=b->xu*a->xw+b->xv*a->yw+b->xw*a->zw;
	temp.xx=b->xu*a->xx+b->xv*a->yy+b->xw*a->zz+b->xx;

	temp.yu=b->yu*a->xu+b->yv*a->yu+b->yw*a->zu;
	temp.yv=b->yu*a->xv+b->yv*a->yv+b->yw*a->zv;
	temp.yw=b->yu*a->xw+b->yv*a->yw+b->yw*a->zw;
	temp.yy=b->yu*a->xx+b->yv*a->yy+b->yw*a->zz+b->yy;

	temp.zu=b->zu*a->xu+b->zv*a->yu+b->zw*a->zu;
	temp.zv=b->zu*a->xv+b->zv*a->yv+b->zw*a->zv;
	temp.zw=b->zu*a->xw+b->zv*a->yw+b->zw*a->zw;
	temp.zz=b->zu*a->xx+b->zv*a->yy+b->zw*a->zz+b->zz;*/
	asm __volatile__(
		"lqc2            vf16,0x00(%1)\n"
		"lqc2            vf17,0x10(%1)\n"
		"lqc2            vf18,0x20(%1)\n"
		"lqc2            vf19,0x30(%1)\n"
		"lqc2            vf20,0x00(%2)\n"
		"lqc2            vf21,0x10(%2)\n"
		"lqc2            vf22,0x20(%2)\n"
		"lqc2            vf23,0x30(%2)\n"
		"vmulax.xyzw     ACC,vf20,vf16\n"
		"vmadday.xyzw    ACC,vf21,vf16\n"
		"vmaddaz.xyzw    ACC,vf22,vf16\n"
		"vmaddw.xyzw     vf16,vf23,vf16\n"
		"vmulax.xyzw     ACC,vf20,vf17\n"
		"vmadday.xyzw    ACC,vf21,vf17\n"
		"vmaddaz.xyzw    ACC,vf22,vf17\n"
		"vmaddw.xyzw     vf17,vf23,vf17\n"
		"vmulax.xyzw     ACC,vf20,vf18\n"
		"vmadday.xyzw    ACC,vf21,vf18\n"
		"vmaddaz.xyzw    ACC,vf22,vf18\n"
		"vmaddw.xyzw     vf18,vf23,vf18\n"
		"vmulax.xyzw     ACC,vf20,vf19\n"
		"vmadday.xyzw    ACC,vf21,vf19\n"
		"vmaddaz.xyzw    ACC,vf22,vf19\n"
		"vmaddw.xyzw     vf19,vf23,vf19\n"
		"sqc2            vf16,0x00(%0)\n"
		"sqc2            vf17,0x10(%0)\n"
		"sqc2            vf18,0x20(%0)\n"
		"sqc2            vf19,0x30(%0)\n"
		: : "r"(dst), "r"(a), "r"(b) : "memory");
}

void math_vector_cross(Vector3d* dst, Vector3d* a, Vector3d* b)
{
	__asm__ __volatile__(
		"lqc2		$vf4, 0(%0)\n"
		"lqc2		$vf5, 0(%1)\n"

		"vopmula.xyz ACCxyz, $vf4xyz, $vf5xyz\n"
		"vopmsub.xyz $vf6xyz, $vf5xyz, $vf4xyz\n"				

		"sqc2 		$vf6, 0(%2)\n"
		:: "r" (a),"r" (b),"r" (dst));	
}

void math_vector_calc_normal(Vector3d* dst, Vector3d* a, Vector3d* b, Vector3d* c)
{
/*	float ax = c->x-a->x;
	float ay = c->y-a->y;
	float az = c->z-a->z;
	float bx = c->x-b->x;
	float by = c->y-b->y;
	float bz = c->z-b->z;	
    dst->x=ay*bz-az*by;
    dst->y=az*bx-ax*bz;
	dst->z=ax*by-ay*bx;	
	math_vector_normalize(dst,dst); */

  __asm__ __volatile__(
		"lqc2		$vf4, 0(%0)\n"
		"lqc2		$vf5, 0(%1)\n"
		"lqc2		$vf6, 0(%2)\n"

		"vsub.xyz	$vf4xyz,$vf6xyz,$vf4xyz\n"
		"vsub.xyz	$vf5xyz,$vf6xyz,$vf5xyz\n"

	    "vopmula.xyz ACCxyz, $vf4xyz, $vf5xyz\n"
	    "vopmsub.xyz $vf6xyz, $vf5xyz, $vf4xyz\n"				
		
	    "vmul.xyz 	$vf4xyz, $vf6xyz, $vf6xyz\n"
	    "vaddy.x 	$vf4x, $vf4x, $vf4y\n"
	    "vaddz.x 	$vf4x, $vf4x, $vf4z\n"
	    "vrsqrt 		Q, $vf0w, $vf4x\n"
	    "vwaitq\n"
	    "vmulq.xyz 	$vf6xyz, $vf6xyz, Q\n"
		
	    "sqc2 		$vf6, 0(%3)\n"
	 :: "r" (a),"r" (b),"r" (c),"r" (dst));	
}

void math_vector_normalize(Vector3d* dst, Vector3d* src)
{
	/*float len;
	len = 1.0f/sqrt(src->x*src->x+src->y*src->y+src->z*src->z);
	dst->x=src->x*len;
	dst->y=src->y*len;
	dst->z=src->z*len;	
	*/
	
  __asm__ __volatile__(
		"lqc2		vf6, 0(%0)\n"
	    "vmul.xyz 	$vf4xyz, $vf6xyz, $vf6xyz\n"
	    "vaddy.x 	$vf4x, $vf4x, $vf4y\n"
	    "vaddz.x 	$vf4x, $vf4x, $vf4z\n"
	    "vrsqrt 		Q, $vf0w, $vf4x\n"
	    "vwaitq\n"
	    "vmulq.xyz 	$vf6xyz, $vf6xyz, Q\n"
	    "sqc2 $vf6, 0(%1)\n"
	 :: "r" (src),"r" (dst));	
}

float math_calc_light(Vector3d* n, Vector3d* pos, Vector3d* lightpos)
{
/*	Vector3d lv;
	float f;
	lv.x = lightpos->x-pos->x;
	lv.y = lightpos->y-pos->y;
	lv.z = lightpos->z-pos->z;
	math_vector_normalize(&lv,&lv);
	f = lv.x*n->x+lv.y*n->y+lv.z*n->z; */

	Vector3d lv;
	
	  __asm__ __volatile__(
		"lqc2		$vf5, 0(%1)\n"
		"lqc2		$vf6, 0(%2)\n"
		"lqc2		$vf7, 0(%0)\n"
		"vsub.xyz	$vf6, $vf6, $vf5\n"

	    "vmul.xyz 	$vf4xyz, $vf6xyz, $vf6xyz\n"
	    "vaddy.x 	$vf4x, $vf4x, $vf4y\n"
	    "vaddz.x 	$vf4x, $vf4x, $vf4z\n"
	    "vrsqrt 		Q, $vf0w, $vf4x\n"
	    "vwaitq\n"
	    "vmulq.xyz 	$vf6xyz, $vf6xyz, Q\n"

		"vmul.xyz 	$vf4xyz, $vf6xyz, $vf7xyz\n"
	    "vaddy.x 	$vf4x, $vf4x, $vf4y\n"
	    "vaddz.x 	$vf4x, $vf4x, $vf4z\n"
	    "vabs.x		$vf4x, $vf4x\n"

	    "sqc2 $vf4, 0(%3)\n"
		
	 :: "r" (n), "r" (pos), "r" (lightpos), "r" (&lv));
	
	return lv.x;
}

void math_vector_mul(Vector3d* dst, Vector3d* a, Vector3d* b)
{
  __asm__ __volatile__(
		"lqc2		$vf4, 0(%0)\n"
		"lqc2		$vf5, 0(%1)\n"

		"vmul.xyzw	$vf6xyzw,$vf4xyzw,$vf5xyzw\n"
		
	    "sqc2 		$vf6, 0(%2)\n"
	 :: "r" (a),"r" (b),"r" (dst) );	
}

/* 2^(-i/16)
* The decimal values are rounded to 24-bit precision
*/
static float A[] = {
	1.00000000000000000000E0,
	9.57603275775909423828125E-1,
	9.17004048824310302734375E-1,
	8.78126084804534912109375E-1,
	8.40896427631378173828125E-1,
	8.05245161056518554687500E-1,
	7.71105408668518066406250E-1,
	7.38413095474243164062500E-1,
	7.07106769084930419921875E-1,
	6.77127778530120849609375E-1,
	6.48419797420501708984375E-1,
	6.20928883552551269531250E-1,
	5.94603538513183593750000E-1,
	5.69394290447235107421875E-1,
	5.45253872871398925781250E-1,
	5.22136867046356201171875E-1,
	5.00000000000000000000E-1
};
/* continuation, for even i only
* 2^(i/16)  =  A[i] + B[i/2]
*/
static float B[] = {
	0.00000000000000000000E0,
	-5.61963907099083340520586E-9,
	-1.23776636307969995237668E-8,
	4.03545234539989593104537E-9,
	1.21016171044789693621048E-8,
	-2.00949968760174979411038E-8,
	1.89881769396087499852802E-8,
	-6.53877009617774467211965E-9,
	0.00000000000000000000E0
};

/* 1 / A[i]
* The decimal values are full precision
*/
static float Ainv[] = {
	1.00000000000000000000000E0,
	1.04427378242741384032197E0,
	1.09050773266525765920701E0,
	1.13878863475669165370383E0,
	1.18920711500272106671750E0,
	1.24185781207348404859368E0,
	1.29683955465100966593375E0,
	1.35425554693689272829801E0,
	1.41421356237309504880169E0,
	1.47682614593949931138691E0,
	1.54221082540794082361229E0,
	1.61049033194925430817952E0,
	1.68179283050742908606225E0,
	1.75625216037329948311216E0,
	1.83400808640934246348708E0,
	1.91520656139714729387261E0,
	2.00000000000000000000000E0
};

#ifdef DEC
#define MEXP 2032.0
#define MNEXP -2032.0
#else
#define MEXP 2048.0
#define MNEXP -2400.0
#endif

/* log2(e) - 1 */
#define LOG2EA 0.44269504088896340736F
#define LOGE2F  0.693147180559945309F
#define MAXNUMF 3.4028234663852885981170418348451692544e38
#define MAXLOGF 88.72283905206835
#define MINLOGF -103.278929903431851103
#define LOG2EF 1.44269504088896341

#define F W
#define Fa Wa
#define Fb Wb
#define G W
#define Ga Wa
#define Gb u
#define H W
#define Ha Wb
#define Hb Wb

/* Find a multiple of 1/16 that is within 1/16 of x. */
#define reduc(x)  0.0625 * floorf( 16 * (x) )
#define floorf(x) ((float)(int)(x))

float ldexpf( float x, int pw2 )
{
	union
	{
		float y;
		unsigned short i[2];
	} u;
	short *q;
	int e;

#ifdef UNK
	printf( "%s\n", unkmsg );
	return(0.0);
#endif

	u.y = x;
	q = &u.i[1];
	while( (e = ( *q >> 7) & 0xff) == 0 )
	{
		if( u.y == (float )0.0 )
		{
			return( 0.0 );
		}
		/* Input is denormal. */
		if( pw2 > 0 )
		{
			u.y *= 2.0;
			pw2 -= 1;
		}
		if( pw2 < 0 )
		{
			if( pw2 < -24 )
				return( 0.0 );
			u.y *= 0.5;
			pw2 += 1;
		}
		if( pw2 == 0 )
			return(u.y);
	}

	e += pw2;

	/* Handle overflow */
	if( e > MEXP )
	{
		return( MAXNUMF );
	}

	*q &= 0x807f;

	/* Handle denormalized results */
	if( e < 1 )
	{
#if DENORMAL
		if( e < -24 )
			return( 0.0 );
		*q |= 0x80; /* Set LSB of exponent. */
		/* For denormals, significant bits may be lost even
		when dividing by 2.  Construct 2^-(1-e) so the result
		is obtained with only one multiplication.  */
		u.y *= ldexpf(1.0f, e - 1);
		return(u.y);
#else
		return( 0.0 );
#endif
	}
	*q |= (e & 0xff) << 7;
	return(u.y);
}

float frexpf( float x, int *pw2 )
{
	union
	{
		float y;
		unsigned short i[2];
	} u;
	int i;
	short *q;

	u.y = x;

	q = &u.i[1];

	/* find the exponent (power of 2) */

	i  = ( *q >> 7) & 0xff;
	if( i == 0 )
	{
		if( u.y == 0.0 )
		{
			*pw2 = 0;
			return(0.0);
		}
		/* Number is denormal or zero */
#if DENORMAL
		/* Handle denormal number. */
		do
		{
			u.y *= 2.0;
			i -= 1;
			k  = ( *q >> 7) & 0xff;
		}
		while( k == 0 );
		i = i + k;
#else
		*pw2 = 0;
		return( 0.0 );
#endif /* DENORMAL */
	}
	i -= 0x7e;
	*pw2 = i;
	*q &= 0x807f;	/* strip all exponent bits */
	*q |= 0x3f00;	/* mantissa between 0.5 and 1 */
	return( u.y );
}

float powif( float x, int nn )
{
	int n, e, sign, asign, lx;
	float w, y, s;

	if( x == 0.0 )
	{
		if( nn == 0 )
			return( 1.0 );
		else if( nn < 0 )
			return( MAXNUMF );
		else
			return( 0.0 );
	}

	if( nn == 0 )
		return( 1.0 );


	if( x < 0.0 )
	{
		asign = -1;
		x = -x;
	}
	else
		asign = 0;


	if( nn < 0 )
	{
		sign = -1;
		n = -nn;
		/*
		x = 1.0/x;
		*/
	}
	else
	{
		sign = 0;
		n = nn;
	}

	/* Overflow detection */

	/* Calculate approximate logarithm of answer */
	s = frexpf( x, &lx );
	e = (lx - 1)*n;
	if( (e == 0) || (e > 64) || (e < -64) )
	{
		s = (s - 7.0710678118654752e-1) / (s +  7.0710678118654752e-1);
		s = (2.9142135623730950 * s - 0.5 + lx) * nn * LOGE2F;
	}
	else
	{
		s = LOGE2F * e;
	}

	if( s > MAXLOGF )
	{
		y = MAXNUMF;
		goto done;
	}

	if( s < MINLOGF )
		return(0.0);

	/* Handle tiny denormal answer, but with less accuracy
	* since roundoff error in 1.0/x will be amplified.
	* The precise demarcation should be the gradual underflow threshold.
	*/
	if( s < (-MAXLOGF+2.0) )
	{
		x = 1.0/x;
		sign = 0;
	}

	/* First bit of the power */
	if( n & 1 )
		y = x;

	else
	{
		y = 1.0;
		asign = 0;
	}

	w = x;
	n >>= 1;
	while( n )
	{
		w = w * w;	/* arg to the 2-to-the-kth power */
		if( n & 1 )	/* if that bit is set, then include in product */
			y *= w;
		n >>= 1;
	}


done:

	if( asign )
		y = -y; /* odd power of negative number */
	if( sign )
		y = 1.0/y;
	return(y);
}



float pow( float x, float y )
{
	float u, w, z, W, Wa, Wb, ya, yb;
	/* float F, Fa, Fb, G, Ga, Gb, H, Ha, Hb */
	int e, i, nflg;


	nflg = 0;	/* flag = 1 if x<0 raised to integer power */
	w = floorf(y);
	if( w < 0 )
		z = -w;
	else
		z = w;
	if( (w == y) && (z < 32768.0) )
	{
		i = w;
		w = powif( x, i );
		return( w );
	}


	if( x <= 0.0F )
	{
		if( x == 0.0 )
		{
			if( y == 0.0 )
				return( 1.0 );  /*   0**0   */
			else  
				return( 0.0 );  /*   0**y   */
		}
		else
		{
			if( w != y )
			{
				return(0.0);
			}
			nflg = 1;
			if( x < 0 )
				x = -x;
		}
	}

	/* separate significand from exponent */
	x = frexpf( x, &e );

	/* find significand in antilog table A[] */
	i = 1;
	if( x <= A[9] )
		i = 9;
	if( x <= A[i+4] )
		i += 4;
	if( x <= A[i+2] )
		i += 2;
	if( x >= A[1] )
		i = -1;
	i += 1;


	/* Find (x - A[i])/A[i]
	* in order to compute log(x/A[i]):
	*
	* log(x) = log( a x/a ) = log(a) + log(x/a)
	*
	* log(x/a) = log(1+v),  v = x/a - 1 = (x-a)/a
	*/
	x -= A[i];
	x -= B[ i >> 1 ];
	x *= Ainv[i];


	/* rational approximation for log(1+v):
	*
	* log(1+v)  =  v  -  0.5 v^2  +  v^3 P(v)
	* Theoretical relative error of the approximation is 3.5e-11
	* on the interval 2^(1/16) - 1  > v > 2^(-1/16) - 1
	*/
	z = x*x;
	w = (((-0.1663883081054895  * x
		+ 0.2003770364206271) * x
		- 0.2500006373383951) * x
		+ 0.3333331095506474) * x * z;
	w -= 0.5 * z;

	/* Convert to base 2 logarithm:
	* multiply by log2(e)
	*/
	w = w + LOG2EA * w;
	/* Note x was not yet added in
	* to above rational approximation,
	* so do it now, while multiplying
	* by log2(e).
	*/
	z = w + LOG2EA * x;
	z = z + x;

	/* Compute exponent term of the base 2 logarithm. */
	w = -i;
	w *= 0.0625;  /* divide by 16 */
	w += e;
	/* Now base 2 log of x is w + z. */

	/* Multiply base 2 log by y, in extended precision. */

	/* separate y into large part ya
	* and small part yb less than 1/16
	*/
	ya = reduc(y);
	yb = y - ya;


	F = z * y  +  w * yb;
	Fa = reduc(F);
	Fb = F - Fa;

	G = Fa + w * ya;
	Ga = reduc(G);
	Gb = G - Ga;

	H = Fb + Gb;
	Ha = reduc(H);
	w = 16 * (Ga + Ha);

	/* Test the power of 2 for overflow */
	if( w > MEXP )
	{
		return( MAXNUMF );
	}

	if( w < MNEXP )
	{
		return( 0.0 );
	}

	e = w;
	Hb = H - Ha;

	if( Hb > 0.0 )
	{
		e += 1;
		Hb -= 0.0625;
	}

	/* Now the product y * log2(x)  =  Hb + e/16.0.
	*
	* Compute base 2 exponential of Hb,
	* where -0.0625 <= Hb <= 0.
	* Theoretical relative error of the approximation is 2.8e-12.
	*/
	/*  z  =  2**Hb - 1    */
	z = ((( 9.416993633606397E-003 * Hb
		+ 5.549356188719141E-002) * Hb
		+ 2.402262883964191E-001) * Hb
		+ 6.931471791490764E-001) * Hb;

	/* Express e/16 as an integer plus a negative number of 16ths.
	* Find lookup table entry for the fractional power of 2.
	*/
	if( e < 0 )
		i = -( -e >> 4 );
	else
		i = (e >> 4) + 1;
	e = (i << 4) - e;
	w = A[e];
	z = w + w * z;      /*    2**-e * ( 1 + (2**Hb-1) )    */
	z = ldexpf( z, i );  /* multiply by integer power of 2 */

	if( nflg )
	{
		/* For negative x,
		* find out if the integer exponent
		* is odd or even.
		*/
		w = 2 * floorf( (float) 0.5 * w );
		if( w != y )
			z = -z; /* odd exponent */
	}

	return( z );
}

float sinh(float xx)
{
register float z;
float x;

x = xx;
if( xx < 0 )
	z = -x;
else
	z = x;

if( z > MAXLOGF )
	{
	if( x > 0 )
		return( MAXNUMF );
	else
		return( -MAXNUMF );
	}
if( z > 1.0 )
	{
		float exp(float);
	z = exp(z);
	z = 0.5*z - (0.5/z);
	if( x < 0 )
		z = -z;
	}
else
	{
	z = x * x;
	z =
	(( 2.03721912945E-4 * z
	  + 8.33028376239E-3) * z
	  + 1.66667160211E-1) * z * x
	  + x;
	}
return( z );
}


#define C1 0.693359375
#define C2 -2.12194440e-4

float exp(float xx)
{
float x, z;
int n;

x = xx;


if( x > MAXLOGF)
	{
	return( MAXNUMF );
	}

if( x < MINLOGF )
	{
	return(0.0);
	}

/* Express e**x = e**g 2**n
 *   = e**g e**( n loge(2) )
 *   = e**( g + n loge(2) )
 */
z = floorf( LOG2EF * x + 0.5 ); /* floor() truncates toward -infinity. */
x -= z * C1;
x -= z * C2;
n = z;

z = x * x;
/* Theoretical peak relative error in [-0.5, +0.5] is 4.2e-9. */
z =
((((( 1.9875691500E-4  * x
   + 1.3981999507E-3) * x
   + 8.3334519073E-3) * x
   + 4.1665795894E-2) * x
   + 1.6666665459E-1) * x
   + 5.0000001201E-1) * z
   + x
   + 1.0;

/* multiply by power of 2 */
x = ldexpf( z, n );

return( x );
}
