#define WIN32_LEAN_AND_MEAN
#define WIN32_EXTRA_LEAN
#include <windows.h>
#include "mzk.h"
#include "song.h"

//=================================================================================================================
//=================================================================================================================
/*static*/ const char *paths[2] = { "drivers/gm.dls", "drivers/etc/gm.dls" };
/*static*/ const int div[] = { 10, 3 };
/*static*/ int volumenes[NUMCHANNELS] = { 4, 4, 4, 4, 4, 4, 5, 4, 4, 2, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 5, 4, 1 };
/*static*/ int echosv[NUMCHANNELS]    = { 5, 6, 8, 8, 3, 5, 7, 8, 8, 8, 8, 8, 8, 1, 1, 4, 6, 2, 0, 4, 5, 8, 4, 3 };
/*static*/ int panns[NUMCHANNELS]     = { 3, 5, 1, 4, 3, 1, 1, 3, 4, 5, 2, 6, 4, 0, 4, 5, 1, 5, 2, 2, 3, 4, 5, 2 };
//=================================================================================================================
//=================================================================================================================
/*static*/ short sampleBuffer[44100*3];
/*static*/ int   ibuffer[(MZK_DURATION+10)*MZK_RATE*MZK_NUMCHANNELS];
//=================================================================================================================
//=================================================================================================================

// y = 2^x
static __forceinline float m2powf( float f )
{
    _asm fld   dword ptr [f]
    _asm fld1
    _asm fld   st(1)
    _asm fprem
    _asm f2xm1
    _asm faddp st(1), st
    _asm fscale
    _asm fstp  st(1)
    _asm fstp  dword ptr [f]
    return f;
}

// float to integer conversion
static __forceinline int f2i( const float x )
{
    int res;
    _asm fld     dword ptr [x]
    _asm fistp   dword ptr [res]
    return res;
}

void mzk_init( short *buffer )
{
    // open sample file
    int             m;
    HANDLE          h;
    for( h=INVALID_HANDLE_VALUE, m=0; h==INVALID_HANDLE_VALUE; m++ )
        h = (HANDLE)OpenFile( paths[m], (OFSTRUCT*)sampleBuffer, OF_READ );

    // render song
    const unsigned char *deltas_ptr = mzk_deltas_b0;
    const unsigned char *notas_ptr  = mzk_notas;
    for( int c=0; c<NUMCHANNELS; c++ )
    {
        // read samples
        unsigned long wr;
        int sampleLoops; 
        SetFilePointer( h, mzk_sampleOffsets[c], 0, FILE_BEGIN );
        ReadFile( h, &sampleLoops, 4, &wr, 0 );
        ReadFile( h, sampleBuffer, sampleLoops, &wr, 0 );
	    sampleLoops = sampleLoops/2;

        // cut samples
        if( c==15 || c==16 ) 
        {
            const int si = sampleLoops / div[c-15];
            for( m=0; m<256; m++ ) sampleBuffer[si+m] = (sampleBuffer[si+m]*(256-m))>>8;
            sampleLoops = si+256;
        }

        const int pl = 42*panns[c];
        const int pr = 256-pl;
        const int mi = *notas_ptr++;
        // render notes
        int lastmuestra = 0;
        const unsigned int nd = (unsigned int)mzk_numdeltas[c];
        for( unsigned int d=0; d<nd; d++ )
        {   
            const int delta = ((deltas_ptr[NUMNOTAS]<<8)+deltas_ptr[0]) * SPT; deltas_ptr++;
            const int nota  = mi + *notas_ptr++;
            const int freq  = f2i(30.02f*m2powf( float(nota)/12.0f ) );

            lastmuestra += delta;

            int p = 0;
            m = lastmuestra;
            do
            {
                // sample
                const int ipos = p>>10;
                const int fpos = p&1023;
                const int a = sampleBuffer[ipos+0];
                const int b = sampleBuffer[ipos+1];
                const int f = a+((fpos*(b-a))>>10);
                const int v = f * volumenes[c];
                const int vl = (v*pl)>>8;
                const int vr = (v*pr)>>8;

                #if 1
                int tm=m; if( tm>=0 )  { ibuffer[2*tm+0] += vl; ibuffer[2*tm+1] += vr; 
				int h = echosv[c];
                for( int e=0; e<6; e++ ) { ibuffer[2*(tm+(e<<13))+0] += (vl>>(h+e)); ibuffer[2*(tm+(e<<13))+1] += (vr>>(h+e)); }
                }
                #else
                // add (with some reverb)
                for( int e=0; e<6; e++ ) 
                {
                    const int q = m + (e<<13);
                    const int g = e + ((e==0)?0:2);
                    ibuffer[2*q+0] += (vl>>g);
                    ibuffer[2*q+1] += (vr>>g);
                }
                #endif

                m++;
	            p += freq;
            }while( (p>>10)<(sampleLoops-1) ); //&& m<(MZK_RATE*MZK_DURATION) ); // second condition can be removed because we allocate some extra space in buffer
        }
    }

    //----------------------------

#if 0
    for( int m=0; m<MZK_NUMSAMPLES; m++ ) 
    {
        int ix = ibuffer[m] / 4;
        if( ix>32767 ) ix=32767; else if( ix<-32767 ) ix=-32767;
        buffer[m] = ix;
    }
#else
    _asm lea esi, [ibuffer]
    _asm mov edi, buffer
    _asm mov ecx, (MZK_DURATION*MZK_NUMCHANNELS*MZK_RATE)
    _asm mov ebx, 32767
    _asm myloop:
    _asm     lodsd
    _asm     sar     eax, 2
    _asm     cmp     eax, ebx
    _asm     cmovg   eax, ebx
    _asm     neg     eax
    _asm     cmp     eax, ebx
    _asm     cmovg   eax, ebx
    _asm     neg     eax
    _asm     stosw
    _asm dec ecx
    _asm jnz myloop
#endif

//    CloseHandle( h );
}
