#define SCR_W	(80*3)
#define SCR_H	(50*3)
#define SCR_C	4

#define TEX_REF 0	//reflection (should there be modes? like add/sub/alpha?)
#define TEX_DIF 1	//diffusion (specularness, one channel)
#define TEX_COL 2	//color
#define TEX_TRS 3	//transparency (should there be modes? like add/sub/alpha?)
#define MAX_TEX 4

#define STOC_SAMPLE	8

#define sqrtf	native_sqrt
#define sinf	sin
#define cosf	cos

typedef struct{
	float	v[3];			//vertices
	//	float	p[4];			//points (transformed coords, XYZW)
	float	t[2];			//texture coordinates
	float	n[3];			//normals		really useful
}vert_t;

typedef struct{
	unsigned int v[3];		//3 verts
	float	n[3];			//normals		really useful
}poly_t;
/*
typedef struct{
	int		vnum;			//number of vertices
	int		pnum;			//number of polys
	int		fnum;			//number of frames
	vert_t*	vert;			//the vertices
	poly_t*	poly;			//the polys
	float	x, y, z;		//position
	float	h, p, b;		//rotation
	int		tex[MAX_TEX];	//texture index	really useful ? per poly or per object ?
	int		frame;			//current frame
	float	ff;				//float frame for slow animations
	vert_t	bbox[8];		//8 points of its bounding box
	float	bx, by, bz, br;	//bounding sphere position + radius
}obj_t;
*/
typedef struct{
	float	x, y, z;		//position
	float	h, p, b;		//rotation
	float	dst;			//distace from cam to plane (fov)
}cam_t;

//baked stuff
typedef struct{
	int vOffs;
	int pOffs;
	int vnum;
	int pnum;
	int tex[MAX_TEX];
	//add bbox here
	//add bsphere here
}bObj_t;

typedef struct{
	float sky[4][3];	//sky gradient color, RGB
	int objNum;
	bObj_t o[64];
	int vnum;
	int pnum;
	vert_t v[2048];
	poly_t p[2048];
	//	int lightNum;
	//	light_t* l;
}scene_t;

//----------------------------------------------------------------------------//
/*void vecSub(float r[3], const float a[3], const float b[3])			//r = a - b
{
r[0] = a[0] - b[0];
r[1] = a[1] - b[1];
r[2] = a[2] - b[2];
}*/
#define vecSub(R,A,B) R[0]=A[0]-B[0];R[1]=A[1]-B[1];R[2]=A[2]-B[2];
/*void vecAdd(float r[3], const float a[3], const float b[3])			//r = a + b
{
r[0] = a[0] + b[0];
r[1] = a[1] + b[1];
r[2] = a[2] + b[2];
}*/
#define vecAdd(R,A,B) R[0]=A[0]+B[0];R[1]=A[1]+B[1];R[2]=A[2]+B[2];
/*void vecMul(float r[3], const float a[3], const float b)			//r = a * b
{
float w[3] = { a[0], a[1], a[2] };
r[0] = a[0] * b;
r[1] = a[1] * b;
r[2] = a[2] * b;
}*/
#define vecMul(R,A,B) R[0]=A[0]*(B);R[1]=A[1]*(B);R[2]=A[2]*(B);
float vecLenSQ(const float v[3])
{
	float l = v[0] * v[0] + v[1] * v[1] + v[2] * v[2];
	if (l == 0)return 0;
	if (l == 1)return 1;
	return l;
}
float vecLen(const float v[3])
{
	float l = v[0] * v[0] + v[1] * v[1] + v[2] * v[2];
	float ll;
	if (l == 0)return 0;
	if (l == 1)return 1;
	ll = sqrtf(l);
	return ll;
}
void vecNormal(float v[3])								//normalizes to 1.f
{
	float l = v[0] * v[0] + v[1] * v[1] + v[2] * v[2];
	if (l == 0)return;
	l = 1.f / sqrtf(l);
	v[0] *= l;
	v[1] *= l;
	v[2] *= l;
}
/*float vecDot(const float a[3], const float b[3])					//r = a x b
{
	return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
}*/
#define vecDot(A,B) (A[0]*B[0]+A[1]*B[1]+A[2]*B[2])
/*void vecCross(float r[3], const float a[3], const float b[3])		//r = a . b
{ //XYZZY
	r[0] = a[1] * b[2] - a[2] * b[1];
	r[1] = a[2] * b[0] - a[0] * b[2];
	r[2] = a[0] * b[1] - a[1] * b[0];
}*/
#define vecCross(r,a,b)	r[0] = a[1] * b[2] - a[2] * b[1];	r[1] = a[2] * b[0] - a[0] * b[2];	r[2] = a[0] * b[1] - a[1] * b[0];
void vecReflect(float r[3], const float n[3], const float v[3])
{
	//r=v-2(v.n)n
	float dot = vecDot(n, v);

	r[0] = v[0] - 2 * dot*n[0];
	r[1] = v[1] - 2 * dot*n[1];
	r[2] = v[2] - 2 * dot*n[2];
}
//----------------------------------------------------------------------------//
void _srand(unsigned int *seed,  unsigned int i)
{
	*seed = i;
}
unsigned short int _rand(unsigned int *seed)
{
	//*seed = *seed * 0x343fd + 0x269ec3;
	*seed = (*seed * 0x5DEECE66DL + 0xBL) & ((1L << 48) - 1);
	return *seed >> 16;
}
#define srand(A) _srand(seed,A)
#define rand() _rand(seed)
//----------------------------------------------------------------------------//
int triangle_intersection(const float V1[3],  // Triangle vertices
	const float	V2[3],
	const float	V3[3],
	const float	O[3],	//Ray origin
	const float	D[3],	//Ray direction
	float* out)			//returns distance from ray's origin
{
	const float EPSILON = 0.000001;
	float e1[3], e2[3];  //Edge1, Edge2
	float P[3], Q[3], T[3];
	float det, inv_det, u, v;
	float t;

	//Find vectors for two edges sharing V1
	vecSub(e1, V2, V1);
	vecSub(e2, V3, V1);
	//Begin calculating determinant - also used to calculate u parameter
	vecCross(P, D, e2);
	//if determinant is near zero, ray lies in plane of triangle or ray is parallel to plane of triangle
	det = vecDot(e1, P);
	//NOT CULLING
	if (det > -EPSILON && det < EPSILON) return 0;
	inv_det = 1.f / det;

	//calculate distance from V1 to ray origin
	vecSub(T, O, V1);

	//Calculate u parameter and test bound
	u = vecDot(T, P) * inv_det;
	//The intersection lies outside of the triangle
	if (u < 0.f || u > 1.f) return 0;

	//Prepare to test v parameter
	vecCross(Q, T, e1);

	//Calculate V parameter and test bound
	v = vecDot(D, Q) * inv_det;
	//The intersection lies outside of the triangle
	if (v < 0.f || u + v  > 1.f) return 0;

	t = vecDot(e2, Q) * inv_det;

	if (t > EPSILON) { //ray intersection
		*out = t;
		return 1;
	}

	// No hit, no win
	return 0;
}
//----------------------------------------------------------------------------//

void getTexCol(int texID, float uv[2], float col[3]){
#ifdef NO_TEXTURE_SUPPORTED_YET
	if (texID >= 0){
		bmp32_t* tx = &tex[texID];
		int u = (int)(uv[0] * tx->w) & (tx->w - 1);
		int v = (int)(uv[1] * tx->h) & (tx->h - 1);
		int taddr = (v << tx->shl) + u;

		col[0] = tx->pix[taddr * 4 + 0] / 255.f;
		col[1] = tx->pix[taddr * 4 + 1] / 255.f;
		col[2] = tx->pix[taddr * 4 + 2] / 255.f;
	}
	else
#endif
	{
		//treat tex ID (int) as [A]RGB (byte)
		unsigned char* pc = (unsigned char*)&texID;
//		unsigned char pc[3] = { (texID & 0x0000FF), (texID & 0x00FF00) >> 8, (texID & 0xFF0000) >> 16 };
		col[0] = pc[2] / 255.f;
		col[1] = pc[1] / 255.f;
		col[2] = pc[0] / 255.f;
	}
}
//----------------------------------------------------------------------------//
void traceRay(float org[3], float dir[3], __global scene_t* s, float c[3], int bounce,int* seed)
{
#define MAX_DIST (999999.f)
	float min_dst = MAX_DIST;
	int min_o = -1;
	int min_p;
	int o, p;
	float o2[3] = { dir[0], dir[1], -dir[2] };
	vecNormal(dir);
	for (o = 0; o < s->objNum; o++)
	{
		for (p = 0; p < s->o[o].pnum; p++)
		{
			//setup a triangle
			int poff = s->o[o].pOffs;
			int vi[3] = { s->p[poff + p].v[0], s->p[poff + p].v[1], s->p[poff + p].v[2] };
			float v[3][3] = {
				{ s->v[vi[0]].v[0], s->v[vi[0]].v[1], s->v[vi[0]].v[2] },
				{ s->v[vi[1]].v[0], s->v[vi[1]].v[1], s->v[vi[1]].v[2] },
				{ s->v[vi[2]].v[0], s->v[vi[2]].v[1], s->v[vi[2]].v[2] },
			};
			//check for collision
			float dst;
			if (triangle_intersection(v[0], v[1], v[2], org, dir, &dst))
				if (dst>0.0001f && dst < min_dst){
					min_dst = dst;
					min_o = o;
					min_p = poff + p;
				}
		}
	}
/*	//DEBUG
	if (min_o == -1)
		c[0] = c[1] = (c[2] = .25f) / 2;
	else
		c[0] = (c[1] = c[2] = .75f) / 2;
	return;
*/	//return the color
	if (min_dst<MAX_DIST){
		//according to bObj o and poly p and collision point c;
		//collision point = distance * direction

		float tmp[3], ref[3], hit[3];
		float n[3];		//local normal
		float uv[2];	//texture/uv coords

		vecMul(tmp, dir, min_dst);
		vecAdd(hit, org, tmp);
		float tc[3];	//texture color

		vecMul(tmp, dir, min_dst);
		vecAdd(hit, org, tmp);

		//find the UV coords and compute the new normal
		{
			float f1[3], f2[3], f3[3];
			float ab[3];
			float ac[3];
			float a, a1, a2, a3;
			// calculate vectors from hit point to vertices
			vecSub(f1, s->v[s->p[min_p].v[0] + s->o[min_o].vOffs].v, hit);
			vecSub(f2, s->v[s->p[min_p].v[1] + s->o[min_o].vOffs].v, hit);
			vecSub(f3, s->v[s->p[min_p].v[2] + s->o[min_o].vOffs].v, hit);
			// calculate the areas and factors (order of parameters doesn't matter):
			vecSub(ab, s->v[s->p[min_p].v[0] + s->o[min_o].vOffs].v, s->v[s->p[min_p].v[1] + s->o[min_o].vOffs].v);
			vecSub(ac, s->v[s->p[min_p].v[0] + s->o[min_o].vOffs].v, s->v[s->p[min_p].v[2] + s->o[min_o].vOffs].v);
			vecCross(tmp, ab, ac); a = vecLen(tmp);	// main triangle area a
			vecCross(tmp, f2, f3); a1 = vecLen(tmp) / a;	// p1's triangle area / a
			vecCross(tmp, f3, f1); a2 = vecLen(tmp) / a;	// p1's triangle area / a
			vecCross(tmp, f1, f2); a3 = vecLen(tmp) / a;	// p1's triangle area / a
			// find the uv corresponding to point f (uv1/uv2/uv3 are associated to p1/p2/p3):
			vecMul(n, s->v[s->p[min_p].v[0] + s->o[min_o].vOffs].n, a1);
			vecMul(tmp, s->v[s->p[min_p].v[1] + s->o[min_o].vOffs].n, a2);	vecAdd(n, n, tmp);
			vecMul(tmp, s->v[s->p[min_p].v[2] + s->o[min_o].vOffs].n, a3);	vecAdd(n, n, tmp);
			//same for UV
			uv[0] = s->v[s->p[min_p].v[0] + s->o[min_o].vOffs].t[0] * a1 +
				s->v[s->p[min_p].v[1] + s->o[min_o].vOffs].t[0] * a2 +
				s->v[s->p[min_p].v[2] + s->o[min_o].vOffs].t[0] * a3;
			uv[1] = s->v[s->p[min_p].v[0] + s->o[min_o].vOffs].t[0] * a1 +
				s->v[s->p[min_p].v[1] + s->o[min_o].vOffs].t[1] * a2 +
				s->v[s->p[min_p].v[2] + s->o[min_o].vOffs].t[1] * a3;
		}

		vecReflect(ref, n/*gScene.p[min_p].n*/, dir);


		//check for diffusion
		getTexCol(s->o[min_o].tex[TEX_DIF], uv, tc);
		if (tc[0]<1){
			float rn[3] = {
				(128 - (rand() % 255)) / 127.f,
				(128 - (rand() % 255)) / 127.f,
				(128 - (rand() % 255)) / 127.f
			};
			//			vecMul(rn,rn,1-tc[0]);
			//			vecAdd(ref,ref,rn);
			//			vecNormal(ref);
			vecAdd(n, n, rn);
			vecNormal(n);
			vecMul(n, n, 1 - tc[0]);
			vecMul(ref, ref, tc[0]);
			vecAdd(ref, n, ref);
		}

		if (bounce < 3)
			traceRay(hit, ref, s, c, bounce + 1, seed);
		else
			//nothing to show.. default to black? grey? something random?
			c[0] = c[1] = c[2] = .5f;

		//texture mapping REFLECTION
		getTexCol(s->o[min_o].tex[TEX_REF], uv, tc);
		c[0] *= tc[0];	//metal coloring goes here
		c[1] *= tc[1];
		c[2] *= tc[2];
		//texture mapping COLOR
		getTexCol(s->o[min_o].tex[TEX_COL], uv, tc);
		vecAdd(c, c, tc);	//plastic coloring goes here

		//texture mapping TRANSPARENCY
		//TODO: launch a ray, or bunch of, and tint and mix accordingly
		// or we could instead say 50% transp, 50% chance the ray continues ... but then it's harder to tint and stuff
		//getTexCol(s->o[min_o].tex[TEX_TRS],uv,tc);

	}
	else
	{
		//if no collision return sky gradient :)
		if (dir[1]>0){
			float a = dir[1], b = 1 - dir[1];
			c[0] = s->sky[3][0] * a + s->sky[2][0] * b;
			c[1] = s->sky[3][1] * a + s->sky[2][1] * b;
			c[2] = s->sky[3][2] * a + s->sky[2][2] * b;
		}
		else{
			float a = -dir[1], b = 1 + dir[1];
			c[0] = s->sky[0][0] * a + s->sky[1][0] * b;
			c[1] = s->sky[0][1] * a + s->sky[1][1] * b;
			c[2] = s->sky[0][2] * a + s->sky[1][2] * b;
		}
	}
}

__kernel void uploadScene(__global scene_t *scene, __global vert_t *vert, __global poly_t *poly)
{/*
//	(__global vert_t*)scene->v = vert;
//	(__global poly_t*)scene->p = poly;
	scene->v = vert;
	scene->p = poly;*/
}

__kernel void draw(__global unsigned char* scr, __global scene_t *scene, unsigned int randSeed)
{
	const int	x = get_global_id(0),
				y = get_global_id(1);
	
	float dir[3] = { x - (SCR_W / 2), -y + (SCR_H / 2), 100 };
//	float col[3] = { x*0.01f, y*0.01f, 0 };	//debug, but very nice actually...
	float col[3] = { 0, 0, 0 };
	float org[3] = { 0, 0, 0 };

	//randomize with what we've got
	int seed[0] = { 0 };
	//pretty darn complex but it's to assure a basic randomness to start from
	srand((int)(sinf(sinf(y*1.1f)*15.f + sinf((float)x*.06f) + x) * 1024 + 1024) + randSeed);

	vecNormal(dir);
	{
		int i;
		float acc[3];
		for (i = 0; i<STOC_SAMPLE; i++){
			traceRay(org, dir, scene, acc, 0, &seed);
			vecAdd(col, col, acc);
		}
		vecMul(col, col, 1.f / STOC_SAMPLE);
	}
	//clamp color
	if (col[0]<0)col[0] = 0; else if (col[0]>1)col[0] = 1;
	if (col[1]<0)col[1] = 0; else if (col[1]>1)col[1] = 1;
	if (col[2]<0)col[2] = 0; else if (col[2]>1)col[2] = 1;
	//store color
	scr[(y*SCR_W + x)*SCR_C    ] = (unsigned char)(col[0] * 255);
	scr[(y*SCR_W + x)*SCR_C + 1] = (unsigned char)(col[1] * 255);
	scr[(y*SCR_W + x)*SCR_C + 2] = (unsigned char)(col[2] * 255);
}