float AOVMargin;

float4x4 ViewProjection;
float4x4 Projection;
float4x4 View;
float4x4 NormalMatrix;

float Time;
float3 CamPosition;

struct CubeInput
{
	float4 PositionScale: CUBE_POSITION_SCALE;
	float4 Rotation: CUBE_ROTATION;
};

struct CubeOutput
{
	float4 PositionScale: CUBE_POSITION_SCALE;
	float4 Rotation: CUBE_ROTATION;
	float4 Color: CUBE_COLOR;
};

#ifdef AOV
struct GSOutput
{
	float4 Position: SV_Position;
	float4 ClipPosition: CLIPOSITION;
	float4 CubeCenterSize: CUBECENTERSIZE;
};
#else
struct GSOutput
{
	float4 Position: SV_Position;
	float4 Color: COLOR;
	float2 TexCoord: TEXCOORD;
	float3 ViewNormal: VIEWNORMAL;
	float3 ViewPosition: VIEWPOSITION;
};
#endif

float3x3 rotation3x3(float3 axis, float angle)
{
	float x = axis.x;
	float y = axis.y;
	float z = axis.z;
	float cos = cos(angle);
	float sin = sin(angle);
	float xx = x * x;
	float yy = y * y;
	float zz = z * z;
	float xy = x * y;
	float xz = x * z;
	float yz = y * z;

	float3x3 result;
    result[0][0] = xx + (cos * (1.0f - xx));
    result[0][1] = (xy - (cos * xy)) + (sin * z);
    result[0][2] = (xz - (cos * xz)) - (sin * y);
    result[1][0] = (xy - (cos * xy)) - (sin * z);
    result[1][1] = yy + (cos * (1.0f - yy));
    result[1][2] = (yz - (cos * yz)) + (sin * x);
    result[2][0] = (xz - (cos * xz)) + (sin * y);
    result[2][1] = (yz - (cos * yz)) - (sin * x);
    result[2][2] = zz + (cos * (1.0f - zz));

	return result;   
}


[maxvertexcount(24)]
void gsMain(in point CubeOutput p[1], inout TriangleStream<GSOutput> stream)
{
	float3 position = p[0].PositionScale.xyz;
	#ifdef AOV
		float scale = p[0].PositionScale.w * AOVMargin;
	#else
		float scale = p[0].PositionScale.w;
	#endif
	float3x3 rotation = rotation3x3(normalize(p[0].Rotation.xyz), p[0].Rotation.w);

	const float3 ov[8] =
	{
		float3(-1,-1,-1),
		float3( 1,-1,-1),
		float3( 1, 1,-1),
		float3(-1, 1,-1),
		float3(-1,-1, 1),
		float3( 1,-1, 1),
		float3( 1, 1, 1),
		float3(-1, 1, 1)
	};

	float4 v[8];
	v[0] = float4(position + mul(rotation, ov[0])*scale, 1);
	v[1] = float4(position + mul(rotation, ov[1])*scale, 1);
	v[2] = float4(position + mul(rotation, ov[2])*scale, 1);
	v[3] = float4(position + mul(rotation, ov[3])*scale, 1);
	v[4] = float4(position + mul(rotation, ov[4])*scale, 1);
	v[5] = float4(position + mul(rotation, ov[5])*scale, 1);
	v[6] = float4(position + mul(rotation, ov[6])*scale, 1);
	v[7] = float4(position + mul(rotation, ov[7])*scale, 1);

	float4 c[8];
	for (int i = 0; i < 8; i++)
	{
		c[i] = mul(View, v[i]);
		c[i] = mul(Projection, c[i]);
	}

	float3x3 view = float3x3(NormalMatrix[0].xyz, NormalMatrix[1].xyz, NormalMatrix[2].xyz);
	float3x3 worldView = mul(rotation, view);

	GSOutput g;
	#ifdef AOV
		// AOV
		g.CubeCenterSize.xyz = position;
		g.CubeCenterSize.w = scale / AOVMargin;
		#define emit(x) {g.ClipPosition=c[x];g.Position=c[x];stream.Append(g);}
		#define emit_quad(normal, x,y,z,w) {emit(x);emit(y);emit(z);emit(w);stream.RestartStrip();}
	#else
		// Deferred
		g.Color = p[0].Color;
		#define emit(x, n) {g.TexCoord = tcFromFace(ov[x], n)*0.5+0.5;  g.Position=c[x];g.ViewPosition=mul(View, v[x]).xyz;stream.Append(g);}
		#define emit_quad(n, x,y,z,w) {g.ViewNormal=mul(worldView, n); emit(x,n);emit(y,n);emit(z,n);emit(w,n); stream.RestartStrip();}
	#endif

	emit_quad(float3(0,-1,0), 1,5,0,4); 
	emit_quad(float3(-1,0,0), 0,4,3,7); 
	emit_quad(float3(0,1,0), 3,7,2,6);
	emit_quad(float3(1,0,0), 2,6,1,5);
	emit_quad(float3(0,0,1), 4,5,7,6); 
	emit_quad(float3(0,0,-1), 2,1,3,0);
}

