//-------------------------------------------------------------------------------------
//
// Copyright 2009 Intel Corporation
// All Rights Reserved
//
// Permission is granted to use, copy, distribute and prepare derivative works of this
// software for any purpose and without fee, provided, that the above copyright notice
// and this statement appear in all copies.  Intel makes no representations about the
// suitability of this software for any purpose.  THIS SOFTWARE IS PROVIDED "AS IS."
// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  Intel does not
// assume any responsibility for any errors which may appear in this software nor any
// responsibility to update it.
//

/////////////////////////////////////////////////////////////////////////////
// Based upon:
//
// Approximate Math Library for SSE / SSE2
//  Header File
//  Version 2.0
//  Author Alex Klimovitski, Intel GmbH
/////////////////////////////////////////////////////////////////////////////
#include <emmintrin.h>

#include "AMaths.h"
#include "AMaths_internal.h"

#ifdef AMATHS_ASM

__m128 __declspec(naked) __stdcall am_atanr2_ps(__m128 y, __m128 rx)
{
	__asm
	{
		movaps	xmm2, _ps_am_sign_mask
		xorps	xmm3, xmm3
		mov		ecx, esp
		movaps	xmm5, _ps_am_1
		andps	xmm2, xmm0
		mulps	xmm0, xmm1
		and		ecx, ~15
		orps	xmm2, _ps_am_pi
		cmpleps	xmm3, xmm1

		movaps	xmm6, _ps_am_m1
		rcpps	xmm4, xmm0

		cmpltps	xmm5, xmm0
		cmpnleps	xmm6, xmm0
		movaps	xmm1, _ps_atan_s0
		orps	xmm5, xmm6

		movaps	[ecx - 16], xmm2
		movaps	[ecx - 32], xmm3

		andps	xmm4, xmm5
		movaps	xmm2, _ps_atan_t0
		movaps	xmm7, xmm5
		andnps	xmm5, xmm0
		movaps	xmm3, _ps_atan_s1
		orps	xmm4, xmm5
		movaps	xmm0, xmm4

		movaps	xmm6, _ps_atan_t1
		mulps	xmm4, xmm4

		addps	xmm1, xmm4
		movaps	xmm5, _ps_atan_s2
		rcpps	xmm1, xmm1
		mulps	xmm1, xmm2
		movaps	xmm2, _ps_atan_t2
		addps	xmm3, xmm4
		addps	xmm1, xmm3

		movaps	xmm3, _ps_atan_s3
		rcpps	xmm1, xmm1
		mulps	xmm1, xmm6
		movaps	xmm6, _ps_atan_t3
		addps	xmm5, xmm4
		addps	xmm1, xmm5

		movaps	xmm5, _ps_am_sign_mask
		rcpps	xmm1, xmm1
		mulps	xmm1, xmm2
		addps	xmm3, xmm4
		movaps	xmm4, _ps_am_pi_o_2
		mulps	xmm6, xmm0
		addps	xmm1, xmm3

		andps	xmm0, xmm5
		rcpps	xmm1, xmm1
		movaps	xmm3, [ecx - 32]
		mulps	xmm1, xmm6

		orps	xmm0, xmm4
		subps	xmm0, xmm1
		movaps	xmm2, [ecx - 16]

		andps	xmm0, xmm7
		andnps	xmm7, xmm1
		orps	xmm0, xmm7

		movaps	xmm1, xmm0
		andps	xmm0, xmm3
		addps	xmm1, xmm2
		andnps	xmm3, xmm1
		orps	xmm0, xmm3

		ret		32
	}
}

#endif