#define WIN32_LEAN_AND_MEAN
#define WIN32_EXTRA_LEAN

#include <sapi.h>
#include <Windows.h>
#include <d3d11.h>
#include <mmsystem.h>
#include <d3dcompiler.h>


#include "music\4klang.h"
#include "mmsystem.h"
#include "mmreg.h"
//#include <sal.h>
//#include <rpcsal.h>

SAMPLE_TYPE	lpSoundBuffer[MAX_SAMPLES*2];  
HWAVEOUT	hWaveOut;

#pragma data_seg(".wavefmt")
WAVEFORMATEX WaveFMT =
{
#ifdef FLOAT_32BIT	
	WAVE_FORMAT_IEEE_FLOAT,
#else
	WAVE_FORMAT_PCM,
#endif		
    2, // channels
    SAMPLE_RATE, // samples per sec
    SAMPLE_RATE*sizeof(SAMPLE_TYPE)*2, // bytes per sec
    sizeof(SAMPLE_TYPE)*2, // block alignment;
    sizeof(SAMPLE_TYPE)*8, // bits per sample
    0 // extension not needed
};

#pragma data_seg(".wavehdr")
WAVEHDR WaveHDR = 
{
	(LPSTR)lpSoundBuffer, 
	MAX_SAMPLES*sizeof(SAMPLE_TYPE)*2,			// MAX_SAMPLES*sizeof(float)*2(stereo)
	0, 
	0, 
	WHDR_PREPARED, 
	0, 
	0, 
	0
};


//#define SPEECH

#ifdef _DEBUG
#include <stdio.h>
#define D3DCHECK(x) if (FAILED((x))) ((char*)NULL)[0]=0;
//#define WIREFRAME 1

#else
#include "shader_code.h"
#define D3DCHECK(x) x;
#define FINAL 1
//#define DONTCRASH 1
//#define SHADERFILE 1
#endif


#define DEFINE_GUIDW(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) const GUID DECLSPEC_SELECTANY name = { l, w1, w2, { b1, b2,  b3,  b4,  b5,  b6,  b7,  b8 } }
DEFINE_GUIDW(IID_ID3D11Texture2D,0x6f15aaf2,0xd208,0x4e89,0x9a,0xb4,0x48,0x95,0x35,0xd3,0x4f,0x9c);

//#define NOSOUND
#define WINWIDTH 1280
#define WINHEIGHT 720

#ifndef FINAL
ID3DBlob* scode;
ID3D11Device *pd3dDevice;
void compile_shader(const char* entry, ID3D11DeviceChild** shader)
{
	D3D11_SO_DECLARATION_ENTRY sodecl={0,"SV_POSITION",0,0,4,0};
	static char entry_stub[]="ss_5_0";
	entry_stub[0]=entry[0];

#ifdef _DEBUG
	if (*shader)
	{
		(*shader)->lpVtbl->Release(*shader);
		(*shader)=NULL;
	}

	ID3DBlob* error=NULL;
	if (SUCCEEDED(D3DCompileFromFile(L"shader.hlsl", NULL, NULL, entry, entry_stub, 0, 0, &scode, &error)))
	{
		if (entry[0]=='v')
			pd3dDevice->lpVtbl->CreateVertexShader(pd3dDevice, scode->lpVtbl->GetBufferPointer(scode), scode->lpVtbl->GetBufferSize(scode), NULL, shader);
		else if (entry[0]=='p')
			pd3dDevice->lpVtbl->CreatePixelShader(pd3dDevice, scode->lpVtbl->GetBufferPointer(scode), scode->lpVtbl->GetBufferSize(scode), NULL, shader);
		else if (entry[0]=='g')
		{
			if (entry[1]=='0')
				pd3dDevice->lpVtbl->CreateGeometryShader(pd3dDevice, scode->lpVtbl->GetBufferPointer(scode), scode->lpVtbl->GetBufferSize(scode), NULL, shader);
			else
				pd3dDevice->lpVtbl->CreateGeometryShaderWithStreamOutput(pd3dDevice, scode->lpVtbl->GetBufferPointer(scode), scode->lpVtbl->GetBufferSize(scode), &sodecl, 1, NULL, 0, D3D11_SO_NO_RASTERIZED_STREAM, NULL, shader);
		}
//		scode->lpVtbl->Release(scode);
	} else {
		if (error)
		{
			OutputDebugStringA(error->lpVtbl->GetBufferPointer(error));
			error->lpVtbl->Release(error);
		}
	}
#else

#ifdef DONTCRASH
	ID3DBlob* error=NULL;
	auto retval=D3DCompile(shader_hlsl, strlen(shader_hlsl), NULL, NULL, NULL, entry, entry_stub, 0, 0, &scode, &error);
	if (FAILED(retval))
	{
		OutputDebugStringA(error->lpVtbl->GetBufferPointer(error));
		ExitProcess(0);
	}
#else
#ifdef SHADERFILE
	auto retval=D3DCompileFromFile(L"shader.hlsl", NULL, NULL, entry, entry_stub, 0, 0, &scode, NULL);
#else
	auto retval=D3DCompile(shader_hlsl, strlen(shader_hlsl), NULL, NULL, NULL, entry, entry_stub, 0, 0, &scode, NULL);
#endif
#endif
	if (entry[0]=='v')
		pd3dDevice->lpVtbl->CreateVertexShader(pd3dDevice, scode->lpVtbl->GetBufferPointer(scode), scode->lpVtbl->GetBufferSize(scode), NULL, shader);
	else if (entry[0]=='p')
		pd3dDevice->lpVtbl->CreatePixelShader(pd3dDevice, scode->lpVtbl->GetBufferPointer(scode), scode->lpVtbl->GetBufferSize(scode), NULL, shader);
	else if (entry[0]=='g')
	{
		if (entry[1]=='0')
			pd3dDevice->lpVtbl->CreateGeometryShader(pd3dDevice, scode->lpVtbl->GetBufferPointer(scode), scode->lpVtbl->GetBufferSize(scode), NULL, shader);
		else
			pd3dDevice->lpVtbl->CreateGeometryShaderWithStreamOutput(pd3dDevice, scode->lpVtbl->GetBufferPointer(scode), scode->lpVtbl->GetBufferSize(scode), &sodecl, 1, NULL, 0, D3D11_SO_NO_RASTERIZED_STREAM, NULL, shader);
	}
#endif
}
#endif

typedef struct
{
	unsigned int	mmtime_tag;
	unsigned int	current_time;
	unsigned int	current_instance;
	unsigned int	lod;
	float			res[4];
}  constant_buffer_t;

// timer global variables
constant_buffer_t cb_data = { TIME_SAMPLES,  0, 0, 0, WINWIDTH, WINHEIGHT, 0, 0 };
const static UINT zeroes[4]={0,0,0,0};
const static D3D11_VIEWPORT vp = {0, 0, WINWIDTH, WINHEIGHT, 0, 1}; 
#ifdef FINAL
	const static DXGI_SWAP_CHAIN_DESC sd = {{WINWIDTH, WINHEIGHT, {60, 1},  DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED, DXGI_MODE_SCALING_UNSPECIFIED }, {1, 0}, DXGI_USAGE_RENDER_TARGET_OUTPUT, 1, NULL, FALSE, DXGI_SWAP_EFFECT_DISCARD, 0};
#else
	const static DXGI_SWAP_CHAIN_DESC sd = {{WINWIDTH, WINHEIGHT, {0, 0},  DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED, DXGI_MODE_SCALING_UNSPECIFIED }, {1, 0}, DXGI_USAGE_RENDER_TARGET_OUTPUT, 1, NULL, TRUE, DXGI_SWAP_EFFECT_SEQUENTIAL, 0};
#endif

// Take away prolog and epilog, then put a minial prolog back manually with assembly below. The function never returns so no epilog is necessary.
__declspec( naked )  void __cdecl winmain()

{
	// Prolog
	//__asm enter 0x10, 0;
	__asm 
	{
		push ebp
        mov ebp,esp
        sub esp,__LOCAL_SIZE
	}
	
	{ // Extra scope to make compiler accept the __decalspec(naked) with local variables

#if 0
		{
	ISpVoice * pVoice = NULL;
	IClassFactory* factory = NULL;

	CoInitialize(NULL);
	ExitProcess(0);
		}
#endif

	// D3D 11 device variables
	// Global Variables:
	IDXGISwapChain *pSwapChain;
	ID3D11RenderTargetView *pRenderTargetView;
	ID3D11DeviceContext *pImmediateContext;
	ID3D11VertexShader* pVertexShader;
	ID3D11PixelShader* pPixelShader;
	ID3D11Buffer* pConstantBuffer;
	ID3D11Texture2D* pDepthTexture;
	ID3D11DepthStencilView *pDepthRT;
	ID3D11VertexShader* pVertexTree;
	ID3D11PixelShader* pPixelTree;
/*	ID3D11VertexShader* pVertexPass;
	ID3D11GeometryShader* pGeometrySphere[2];*/
	ID3D11Texture2D*	pTargetTexture;
	ID3D11RenderTargetView*	pTargetRT;
	ID3D11ShaderResourceView* pTargetRV;

#ifdef FINAL
	ID3D11Device *pd3dDevice;
#endif


	// the most simple window
	HWND hWnd = CreateWindow(L"edit", 0, WS_POPUP | WS_VISIBLE, 0, 0, WINWIDTH, WINHEIGHT, 0, 0, 0, 0);

	// don't show the cursor
	ShowCursor(FALSE);

	//
	DXGI_SWAP_CHAIN_DESC temp;
	temp = sd;
	temp.OutputWindow = hWnd;

 	D3D11CreateDeviceAndSwapChain(
			NULL,					// might fail with two adapters in machine
			D3D_DRIVER_TYPE_HARDWARE,
			NULL, 
#ifdef _DEBUG
			D3D11_CREATE_DEVICE_DEBUG,
#else
			0,//D3D11_CREATE_DEVICE_DEBUG,
#endif
			NULL,
			0,
			D3D11_SDK_VERSION,
			&temp,
			&pSwapChain,
			&pd3dDevice,
			NULL,
			&pImmediateContext);

	pSwapChain->lpVtbl->Present( pSwapChain, 0, 0 ); // clear

	// Create a back buffer render target, get a view on it to clear it later
	const static D3D11_TEXTURE2D_DESC depthtex = { WINWIDTH, WINHEIGHT, 1, 1, DXGI_FORMAT_D32_FLOAT, { 1, 0 }, D3D11_USAGE_DEFAULT, D3D11_BIND_DEPTH_STENCIL, 0, 0 };
	const static D3D11_TEXTURE2D_DESC targtex = { WINWIDTH, WINHEIGHT, 1, 1, DXGI_FORMAT_R8G8B8A8_UNORM, { 1, 0 }, D3D11_USAGE_DEFAULT, D3D11_BIND_RENDER_TARGET|D3D11_BIND_SHADER_RESOURCE, 0, 0 };
	ID3D11Texture2D *pBackBuffer;
	D3DCHECK(pSwapChain->lpVtbl->GetBuffer( pSwapChain, 0, (REFIID ) &IID_ID3D11Texture2D, (LPVOID*)&(pBackBuffer) ));
	D3DCHECK(pd3dDevice->lpVtbl->CreateTexture2D(pd3dDevice, &depthtex, NULL, &pDepthTexture));
	D3DCHECK(pd3dDevice->lpVtbl->CreateTexture2D(pd3dDevice, &targtex, NULL, &pTargetTexture));
	D3DCHECK(pd3dDevice->lpVtbl->CreateRenderTargetView(pd3dDevice, (ID3D11Resource*)pBackBuffer, NULL, &pRenderTargetView ));
	D3DCHECK(pd3dDevice->lpVtbl->CreateRenderTargetView(pd3dDevice, pTargetTexture, NULL, &pTargetRT));
	D3DCHECK(pd3dDevice->lpVtbl->CreateDepthStencilView(pd3dDevice, pDepthTexture, NULL, &pDepthRT));
	D3DCHECK(pd3dDevice->lpVtbl->CreateShaderResourceView(pd3dDevice, pTargetTexture, NULL, &pTargetRV));

	// constant buffer
	const static D3D11_BUFFER_DESC bd = { sizeof(constant_buffer_t), D3D11_USAGE_DEFAULT, D3D11_BIND_CONSTANT_BUFFER, 0, 0, 0 };
	D3DCHECK(pd3dDevice->lpVtbl->CreateBuffer(pd3dDevice, &bd, NULL, &pConstantBuffer));

	// default shaders
#ifdef _DEBUG
	pPixelShader=NULL;
	pVertexShader=NULL;
	pVertexTree=NULL;
	pPixelTree=NULL;
#endif
#ifndef FINAL
	compile_shader("v", &pVertexShader);
	compile_shader("p", &pPixelShader);
	compile_shader("vs", &pVertexTree);
	compile_shader("ps", &pPixelTree);
#else
	ID3DBlob* scode;

	for (int i=0;i<2;i++)
	{
		D3DCompile(shader_hlsl,strlen(shader_hlsl),NULL,NULL,NULL,i?"ps":"p","ps_5_0",0,0,&scode,NULL);
		pd3dDevice->lpVtbl->CreatePixelShader(pd3dDevice,scode->lpVtbl->GetBufferPointer(scode),scode->lpVtbl->GetBufferSize(scode),NULL,i?&pPixelTree:&pPixelShader);
		D3DCompile(shader_hlsl,strlen(shader_hlsl),NULL,NULL,NULL,i?"vs":"v","vs_5_0",0,0,&scode,NULL);
		pd3dDevice->lpVtbl->CreateVertexShader(pd3dDevice,scode->lpVtbl->GetBufferPointer(scode),scode->lpVtbl->GetBufferSize(scode),NULL,i?&pVertexTree:&pVertexShader);
	}
#endif

	pImmediateContext->lpVtbl->IASetPrimitiveTopology(pImmediateContext, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
	pImmediateContext->lpVtbl->PSSetConstantBuffers(pImmediateContext, 0, 1, &pConstantBuffer);
	pImmediateContext->lpVtbl->VSSetConstantBuffers(pImmediateContext, 0, 1, &pConstantBuffer);
	pImmediateContext->lpVtbl->RSSetViewports( pImmediateContext, 1, &vp );

#ifdef SPEECH
	ISpVoice * pVoice = NULL;
	if (SUCCEEDED(CoInitialize(NULL)) && (SUCCEEDED(CoCreateInstance(&CLSID_SpVoice, NULL, CLSCTX_ALL, &IID_ISpVoice, (void **)&pVoice))))
		pVoice->lpVtbl->Speak(pVoice, L"Hello", SPF_ASYNC|SPF_IS_NOT_XML, NULL);
#endif

#if 0
	FILE *f=fopen("music.raw", "rb");
	fread(lpSoundBuffer, sizeof(SAMPLE_TYPE)*MAX_SAMPLES*2, 1, f);
	fclose(f);
	for (int i=0;i<MAX_SAMPLES*2;i++)
	{
//		float a=pow(abs(lpSoundBuffer[i]), 1.0f);
		float a=lpSoundBuffer[i];
		a=a<0 ? -a : a;
		a=((a*a)+a)/2;
		lpSoundBuffer[i]=lpSoundBuffer[i]<0 ? -a : a;
	}
	f=fopen("music2.raw", "wb");
	fwrite(lpSoundBuffer, sizeof(SAMPLE_TYPE)*MAX_SAMPLES*2, 1, f);
	fclose(f);


#else
#ifndef NOSOUND
	_4klang_render(lpSoundBuffer);
#if 0
	FILE *f=fopen("music.raw", "wb");
	fwrite(lpSoundBuffer, sizeof(SAMPLE_TYPE)*MAX_SAMPLES*2, 1, f);
	fclose(f);
#endif
#else
	memset(&lpSoundBuffer[0], 0, sizeof(SAMPLE_TYPE)*MAX_SAMPLES*2);
#endif
#endif

	waveOutOpen			( &hWaveOut, 0, &WaveFMT, NULL, 0, CALLBACK_NULL );
	waveOutWrite		( hWaveOut, &WaveHDR, sizeof(WaveHDR) );

	do
	{
#ifdef _DEBUG
		MSG msg;
		PeekMessage(&msg, hWnd, 0, 0, PM_REMOVE);
		if ((GetAsyncKeyState(VK_CONTROL) && GetAsyncKeyState('S')))
		{
			compile_shader("v", &pVertexShader);
			compile_shader("p", &pPixelShader);
			compile_shader("vs", &pVertexTree);
			compile_shader("ps", &pPixelTree);

			waveOutClose(hWaveOut); hWaveOut=0; WaveHDR.dwFlags=2;
			waveOutOpen( &hWaveOut, 0, &WaveFMT, NULL, 0, CALLBACK_NULL );
			waveOutWrite( hWaveOut, &WaveHDR, sizeof(WaveHDR) );
		}

		if (!pVertexShader || !pPixelShader || !pVertexTree || !pPixelTree/* || !pGeometrySphere[0] || !pGeometrySphere[1]*/)
		{
			static float ClearColorError[4] = { 1.0f, 0.125f, 0.3f, 1.0f };
			pImmediateContext->lpVtbl->ClearRenderTargetView(pImmediateContext, pRenderTargetView, ClearColorError );
			pSwapChain->lpVtbl->Present( pSwapChain, 1, 0 );
			continue;
		}
#endif

		// calculate the current demo time
		waveOutGetPosition(hWaveOut, (LPMMTIME)&cb_data, sizeof(MMTIME));

#if 1
		// draw trees
		pImmediateContext->lpVtbl->OMSetRenderTargets(pImmediateContext, 1, &pTargetRT, pDepthRT);
		pImmediateContext->lpVtbl->ClearRenderTargetView(pImmediateContext, pTargetRT, (const FLOAT*)&zeroes);
		pImmediateContext->lpVtbl->ClearDepthStencilView(pImmediateContext, pDepthRT, D3D11_CLEAR_DEPTH, 1, 0);
		pImmediateContext->lpVtbl->VSSetShader(pImmediateContext, pVertexTree, NULL, 0);
		pImmediateContext->lpVtbl->PSSetShader(pImmediateContext, pPixelTree, NULL, 0);

		for (int i=0;i<4;i++)
		{
			cb_data.lod=16>>i;
			pImmediateContext->lpVtbl->UpdateSubresource(pImmediateContext, pConstantBuffer, 0, NULL, &cb_data, sizeof(constant_buffer_t), 0);
			pImmediateContext->lpVtbl->DrawInstanced(pImmediateContext, i==3?((1024+2)*2)*1024:((cb_data.lod+2)*2)*cb_data.lod*(cb_data.lod*cb_data.lod-1), i==3?1:1<<(i*6), 0, 0);
		}
#endif

		// draw fsquad
		pImmediateContext->lpVtbl->OMSetRenderTargets(pImmediateContext, 1, &pRenderTargetView, NULL);
		pImmediateContext->lpVtbl->PSSetShaderResources(pImmediateContext, 0, 1, &pTargetRV);
		pImmediateContext->lpVtbl->VSSetShader(pImmediateContext, pVertexShader, NULL, 0);
		pImmediateContext->lpVtbl->PSSetShader(pImmediateContext, pPixelShader, NULL, 0);
		pImmediateContext->lpVtbl->Draw(pImmediateContext, 3, 0);
#ifndef FINAL
		pImmediateContext->lpVtbl->PSSetShaderResources(pImmediateContext, 0, 1, (const ID3D11ShaderResourceView**)&zeroes);
#endif

#ifdef _DEBUG
		pSwapChain->lpVtbl->Present( pSwapChain, 0, 0 );
#else
		pSwapChain->lpVtbl->Present( pSwapChain, 1, 0 );
#endif
	}
#ifdef _DEBUG
	while (1);
#else
	while ((!GetAsyncKeyState(VK_ESCAPE)) && (!(WaveHDR.dwFlags & WHDR_DONE)));
#endif
	}

	ExitProcess(0);
}
