;
;                                                                      
;    Module: MMX Alpha Blending                                        
;                                                                      
;                                                                      
;                                            (C) 2000 Twilight3D, Ltd. 
;                                            http://www.twilight3d.com 
;

%macro r_begin 0
	push    eax
	mov     eax, esp
	add     eax, 4
	pushad
	mov     edi, [eax+4]    ; dst
	mov     esi, [eax+8]    ; sr0
	mov     edx, [eax+12]   ; sr1
	mov     ecx, [eax+16]   ; count
	%ifdef __LINUX__
	movq    mm4, [mmx_mult0]
	movq    mm5, [mmx_mult1]
	%else
	%ifdef __BEOS__
	movq    mm4, [mmx_mult0]
	movq    mm5, [mmx_mult1]
	%else
	movq    mm4, [_mmx_mult0]
	movq    mm5, [_mmx_mult1]
	%endif
	%endif
	movq    mm6, [mask64]
	pxor    mm7, mm7
%endmacro

%macro r_end 0
	emms
	popad
	pop     eax
	ret
%endmacro


	segment .data

		global _mmx_mult0
		global mmx_mult0_
		global mmx_mult0
		global _mmx_mult1
		global mmx_mult1_
		global mmx_mult1

_mmx_mult0:
mmx_mult0_:
mmx_mult0:
			dd	0x0, 0x0
_mmx_mult1:
mmx_mult1_:
mmx_mult1:
			dd	0x0, 0x0


mask64:     dd  0x00ff00ff, 0x00ff00ff


	segment .text

	; Function pointer table

		global _mmx_alpha
		global mmx_alpha_
		global mmx_alpha
_mmx_alpha:
mmx_alpha_:
mmx_alpha:
	dd mmx_zero_zero
	dd mmx_zero_one
	dd mmx_zero_srccolor
	dd mmx_zero_srcinvcolor
	dd mmx_zero_srcalpha
	dd mmx_zero_srcinvalpha
	dd mmx_zero_dstcolor
	dd mmx_zero_dstinvcolor
	dd mmx_zero_dstalpha
	dd mmx_zero_dstinvalpha
	dd mmx_one_zero
	dd mmx_one_one
	dd mmx_one_srccolor
	dd mmx_one_srcinvcolor
	dd mmx_one_srcalpha
	dd mmx_one_srcinvalpha
	dd mmx_one_dstcolor
	dd mmx_one_dstinvcolor
	dd mmx_one_dstalpha
	dd mmx_one_dstinvalpha
	dd mmx_srccolor_zero
	dd mmx_srccolor_one
	dd mmx_srccolor_srccolor
	dd mmx_srccolor_srcinvcolor
	dd mmx_srccolor_srcalpha
	dd mmx_srccolor_srcinvalpha
	dd mmx_srccolor_dstcolor
	dd mmx_srccolor_dstinvcolor
	dd mmx_srccolor_dstalpha
	dd mmx_srccolor_dstinvalpha
	dd mmx_srcinvcolor_zero
	dd mmx_srcinvcolor_one
	dd mmx_srcinvcolor_srccolor
	dd mmx_srcinvcolor_srcinvcolor
	dd mmx_srcinvcolor_srcalpha
	dd mmx_srcinvcolor_srcinvalpha
	dd mmx_srcinvcolor_dstcolor
	dd mmx_srcinvcolor_dstinvcolor
	dd mmx_srcinvcolor_dstalpha
	dd mmx_srcinvcolor_dstinvalpha
	dd mmx_srcalpha_zero
	dd mmx_srcalpha_one
	dd mmx_srcalpha_srccolor
	dd mmx_srcalpha_srcinvcolor
	dd mmx_srcalpha_srcalpha
	dd mmx_srcalpha_srcinvalpha
	dd mmx_srcalpha_dstcolor
	dd mmx_srcalpha_dstinvcolor
	dd mmx_srcalpha_dstalpha
	dd mmx_srcalpha_dstinvalpha
	dd mmx_srcinvalpha_zero
	dd mmx_srcinvalpha_one
	dd mmx_srcinvalpha_srccolor
	dd mmx_srcinvalpha_srcinvcolor
	dd mmx_srcinvalpha_srcalpha
	dd mmx_srcinvalpha_srcinvalpha
	dd mmx_srcinvalpha_dstcolor
	dd mmx_srcinvalpha_dstinvcolor
	dd mmx_srcinvalpha_dstalpha
	dd mmx_srcinvalpha_dstinvalpha
	dd mmx_dstcolor_zero
	dd mmx_dstcolor_one
	dd mmx_dstcolor_srccolor
	dd mmx_dstcolor_srcinvcolor
	dd mmx_dstcolor_srcalpha
	dd mmx_dstcolor_srcinvalpha
	dd mmx_dstcolor_dstcolor
	dd mmx_dstcolor_dstinvcolor
	dd mmx_dstcolor_dstalpha
	dd mmx_dstcolor_dstinvalpha
	dd mmx_dstinvcolor_zero
	dd mmx_dstinvcolor_one
	dd mmx_dstinvcolor_srccolor
	dd mmx_dstinvcolor_srcinvcolor
	dd mmx_dstinvcolor_srcalpha
	dd mmx_dstinvcolor_srcinvalpha
	dd mmx_dstinvcolor_dstcolor
	dd mmx_dstinvcolor_dstinvcolor
	dd mmx_dstinvcolor_dstalpha
	dd mmx_dstinvcolor_dstinvalpha
	dd mmx_dstalpha_zero
	dd mmx_dstalpha_one
	dd mmx_dstalpha_srccolor
	dd mmx_dstalpha_srcinvcolor
	dd mmx_dstalpha_srcalpha
	dd mmx_dstalpha_srcinvalpha
	dd mmx_dstalpha_dstcolor
	dd mmx_dstalpha_dstinvcolor
	dd mmx_dstalpha_dstalpha
	dd mmx_dstalpha_dstinvalpha
	dd mmx_dstinvalpha_zero
	dd mmx_dstinvalpha_one
	dd mmx_dstinvalpha_srccolor
	dd mmx_dstinvalpha_srcinvcolor
	dd mmx_dstinvalpha_srcalpha
	dd mmx_dstinvalpha_srcinvalpha
	dd mmx_dstinvalpha_dstcolor
	dd mmx_dstinvalpha_dstinvcolor
	dd mmx_dstinvalpha_dstalpha
	dd mmx_dstinvalpha_dstinvalpha



mmx_zero_zero:
	r_begin
	push        ecx
	shr         ecx, 1
	jz          noxloop0
	xloop0:
	movq        [edi], mm7
	add         edi, 8
	dec         ecx
	jnz         xloop0
	noxloop0:
	pop         ecx
	and         ecx, 1
	xor         eax, eax
	rep         stosd
	r_end


mmx_zero_one:
	ret


mmx_zero_srccolor:
	r_begin
	xloop2:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pmullw      mm0, mm1
	add         esi, 4
	psrlw       mm0, 8
	add         edx, 4
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop2
	r_end


mmx_zero_srcinvcolor:
	r_begin
	xloop3:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pxor        mm0, mm6
	add         esi, 4
	pmullw      mm0, mm1
	add         edx, 4
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop3
	r_end


mmx_zero_srcalpha:
	r_begin
	xloop4:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm0
	add         esi, 4
	punpckhwd   mm0, mm0
	punpcklbw   mm1, mm7
	punpckhbw   mm0, mm7
	pmullw      mm0, mm4
	psrlw       mm0, 8
	add         edx, 4
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop4
	r_end


mmx_zero_srcinvalpha:
	r_begin
	xloop5:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm0
	add         esi, 4
	punpckhwd   mm0, mm0
	punpcklbw   mm1, mm7
	punpckhbw   mm0, mm7
	pmullw      mm0, mm4
	psrlw       mm0, 8
	pxor        mm0, mm6
	add         edx, 4
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop5
	r_end


mmx_zero_dstcolor:
	r_begin
	xloop6:
	movd        mm0, [edx]
	punpcklbw   mm0, mm7
	pmullw      mm0, mm0
	psrlw       mm0, 8
	packuswb    mm0, mm7
	add         edx, 4
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop6
	r_end


mmx_zero_dstinvcolor:
	r_begin
	xloop7:
	movd        mm0, [edx]
	punpcklbw   mm0, mm7
	movq        mm1, mm0
	pxor        mm1, mm6
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	add         edx, 4
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop7
	r_end


mmx_zero_dstalpha:
	r_begin
	xloop8:
	movd        mm0, [edx]
	movq        mm1, mm0
	punpcklbw   mm0, mm0
	add         edx, 4
	punpckhwd   mm0, mm0
	punpckhbw   mm0, mm7
	punpckhbw   mm1, mm7
	pmullw      mm0, mm5
	psrlw       mm0, 8
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop8
	r_end


mmx_zero_dstinvalpha:
	r_begin
	xloop9:
	movd        mm0, [edx]
	movq        mm1, mm0
	punpcklbw   mm0, mm0
	add         edx, 4
	punpckhwd   mm0, mm0
	punpckhbw   mm0, mm7
	punpckhbw   mm1, mm7
	pmullw      mm0, mm5
	psrlw       mm0, 8
	pxor        mm0, mm6
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop9
	r_end


mmx_one_zero:
	r_begin
	push        ecx
	shr         ecx, 1
	jz          noxloop10
	xloop10:
	movq        mm0, [esi]
	add         esi, 8
	movq        [edi], mm0
	add         edi, 8
	dec         ecx
	jnz         xloop10
	noxloop10:
	pop         ecx
	and         ecx, 1
	rep         movsd
	r_end


mmx_one_one:
	r_begin
	xloop11:
	movd        mm0, [esi]
	movd        mm1, [edx]
	add         esi, 4
	paddusb     mm0, mm1
	add         edx, 4
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop11
	r_end


mmx_one_srccolor:
	r_begin
	xloop12:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	paddusb     mm0, mm2
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop12
	r_end


mmx_one_srcinvcolor:
	r_begin
	xloop13:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pxor        mm0, mm6
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	paddusb     mm0, mm2
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop13
	r_end


mmx_one_srcalpha:
	r_begin
	xloop14:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm0, mm0
	add         esi, 4
	punpckhwd   mm0, mm0
	punpcklbw   mm1, mm7
	punpckhbw   mm0, mm7
	pmullw      mm0, mm4
	psrlw       mm0, 8
	add         edx, 4
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	paddusb     mm0, mm2
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop14
	r_end


mmx_one_srcinvalpha:
	r_begin
	xloop15:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm0, mm0
	add         esi, 4
	punpckhwd   mm0, mm0
	punpcklbw   mm1, mm7
	punpckhbw   mm0, mm7
	pmullw      mm0, mm4
	psrlw       mm0, 8
	pxor        mm0, mm6
	add         edx, 4
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	paddusb     mm0, mm2
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop15
	r_end


mmx_one_dstcolor:
	r_begin
	xloop16:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm1, mm7
	pmullw      mm1, mm1
	psrlw       mm1, 8
	packuswb    mm1, mm7
	paddusb     mm0, mm1
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop16
	r_end


mmx_one_dstinvcolor:
	r_begin
	xloop17
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm1, mm7
	movq        mm2, mm1
	pxor        mm2, mm6
	pmullw      mm1, mm2
	psrlw       mm1, 8
	packuswb    mm1, mm7
	paddusb     mm0, mm1
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop17
	r_end


mmx_one_dstalpha:
	r_begin
	xloop18:
	movd        mm0, [edx]
	movd        mm2, [esi]
	movq        mm1, mm0
	punpcklbw   mm0, mm0
	add         edx, 4
	punpckhwd   mm0, mm0
	punpckhbw   mm0, mm7
	punpcklbw   mm1, mm7
	pmullw      mm0, mm5
	psrlw       mm0, 8
	add         esi, 4
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	paddusb     mm0, mm2
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop18
	r_end


mmx_one_dstinvalpha:
	r_begin
	xloop19:
	movd        mm0, [edx]
	movd        mm2, [esi]
	movq        mm1, mm0
	punpcklbw   mm0, mm0
	add         edx, 4
	punpckhwd   mm0, mm0
	punpckhbw   mm0, mm7
	punpcklbw   mm1, mm7
	pmullw      mm0, mm5
	psrlw       mm0, 8
	pxor        mm0, mm6
	add         esi, 4
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	paddusb     mm0, mm2
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop19
	r_end


mmx_srccolor_zero:
	r_begin
	xloop20:
	movd        mm0, [esi]
	punpcklbw   mm0, mm7
	pmullw      mm0, mm0
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	dec         ecx
	jnz         xloop20
	r_end


mmx_srccolor_one:
	r_begin
	xloop21:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	pmullw      mm0, mm0
	psrlw       mm0, 8
	packuswb    mm0, mm7
	paddusb     mm0, mm1
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop21
	r_end


mmx_srccolor_srccolor:
	r_begin
	xloop22:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pmullw      mm1, mm0
	pmullw      mm0, mm0
	psrlw       mm1, 8
	psrlw       mm0, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop22
	r_end


mmx_srccolor_srcinvcolor:
	r_begin
	xloop23:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	movq        mm2, mm0
	pxor        mm2, mm6
	pmullw      mm0, mm0
	pmullw      mm1, mm2
	psrlw       mm0, 8
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop23
	r_end


mmx_srccolor_srcalpha:
	r_begin
	xloop24:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	pmullw      mm0, mm0
	punpckhbw   mm2, mm7
	add         esi, 4
	pmullw      mm2, mm4
	add         edx, 4
	psrlw       mm2, 8
	pmullw      mm1, mm2
	psrlw       mm0, 8
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop24
	r_end


mmx_srccolor_srcinvalpha:
	r_begin
	xloop25:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	pmullw      mm0, mm0
	punpckhbw   mm2, mm7
	add         esi, 4
	pmullw      mm2, mm4
	add         edx, 4
	psrlw       mm2, 8
	pxor        mm2, mm6
	pmullw      mm1, mm2
	psrlw       mm0, 8
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop25
	r_end


mmx_srccolor_dstcolor:
	r_begin
	xloop26:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pmullw      mm0, mm0
	pmullw      mm1, mm1
	psrlw       mm0, 8
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop26
	r_end


mmx_srccolor_dstinvcolor:
	r_begin
	xloop27:
	movd        mm1, [edx]
	movd        mm0, [esi]
	punpcklbw   mm1, mm7
	punpcklbw   mm0, mm7
	movq        mm2, mm1
	pmullw      mm0, mm0
	pxor        mm2, mm6
	pmullw      mm1, mm2
	psrlw       mm0, 8
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop27
	r_end


mmx_srccolor_dstalpha:
	r_begin
	xloop28:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhbw   mm2, mm7
	add         esi, 4
	pmullw      mm2, mm5
	add         edx, 4
	psrlw       mm2, 8
	pmullw      mm0, mm0
	pmullw      mm1, mm2
	psrlw       mm0, 8
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop28
	r_end


mmx_srccolor_dstinvalpha:
	r_begin
	xloop29:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhbw   mm2, mm7
	add         esi, 4
	pmullw      mm2, mm5
	add         edx, 4
	psrlw       mm2, 8
	pxor        mm2, mm6
	pmullw      mm0, mm0
	pmullw      mm1, mm2
	psrlw       mm0, 8
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop29
	r_end


mmx_srcinvcolor_zero:
	r_begin
	xloop30:
	movd        mm0, [esi]
	punpcklbw   mm0, mm7
	movq        mm1, mm0
	pxor        mm1, mm6
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	dec         ecx
	jnz         xloop30
	r_end


mmx_srcinvcolor_one:
	r_begin
	xloop31:
	movd        mm0, [esi]
	movd        mm2, [edx]
	punpcklbw   mm0, mm7
	movq        mm1, mm0
	pxor        mm1, mm6
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	paddusb     mm0, mm2
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop31
	r_end


mmx_srcinvcolor_srccolor:
	r_begin
	xloop32:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	movq        mm2, mm0
	pxor        mm2, mm6
	pmullw      mm1, mm0
	pmullw      mm0, mm2
	psrlw       mm1, 8
	psrlw       mm0, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop32
	r_end


mmx_srcinvcolor_srcinvcolor:
	r_begin
	xloop33:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	movq        mm2, mm0
	pxor        mm2, mm6
	pmullw      mm1, mm2
	pmullw      mm0, mm2
	psrlw       mm1, 8
	psrlw       mm0, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop33
	r_end


mmx_srcinvcolor_srcalpha:
	r_begin
	xloop34:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm3, mm0
	movq        mm2, mm0
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	punpcklbw   mm3, mm7
	punpckhbw   mm2, mm7
	pxor        mm3, mm6
	pmullw      mm2, mm4
	pmullw      mm0, mm3
	psrlw       mm2, 8
	add         esi, 4
	pmullw      mm1, mm2
	psrlw       mm0, 8
	psrlw       mm1, 8
	paddusw     mm0, mm1
	add         edx, 4
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop34
	r_end


mmx_srcinvcolor_srcinvalpha:
	r_begin
	xloop35:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm3, mm0
	movq        mm2, mm0
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	punpcklbw   mm3, mm7
	punpckhbw   mm2, mm7
	pxor        mm3, mm6
	pmullw      mm2, mm4
	pmullw      mm0, mm3
	psrlw       mm2, 8
	pxor        mm2, mm6
	add         esi, 4
	pmullw      mm1, mm2
	psrlw       mm0, 8
	psrlw       mm1, 8
	paddusw     mm0, mm1
	add         edx, 4
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop35
	r_end


mmx_srcinvcolor_dstcolor:
	r_begin
	xloop36:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	movq        mm2, mm0
	pxor        mm2, mm6
	pmullw      mm0, mm2
	pmullw      mm1, mm1
	psrlw       mm0, 8
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop36
	r_end


mmx_srcinvcolor_dstinvcolor:
	r_begin
	xloop37:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	movq        mm2, mm0
	movq        mm3, mm1
	pxor        mm2, mm6
	pxor        mm3, mm6
	pmullw      mm0, mm2
	pmullw      mm1, mm3
	psrlw       mm0, 8
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop37
	r_end


mmx_srcinvcolor_dstalpha:
	r_begin
	xloop38:
	movd        mm1, [edx]
	movd        mm0, [esi]
	movq        mm2, mm1
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	movq        mm3, mm0
	punpckhbw   mm2, mm7
	add         esi, 4
	pmullw      mm2, mm5
	add         edx, 4
	pxor        mm3, mm6
	psrlw       mm2, 8
	pmullw      mm0, mm3
	pmullw      mm1, mm2
	psrlw       mm0, 8
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop38
	r_end

mmx_srcinvcolor_dstinvalpha:
	r_begin
	xloop39:
	movd        mm1, [edx]
	movd        mm0, [esi]
	movq        mm2, mm1
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	movq        mm3, mm0
	punpckhbw   mm2, mm7
	pxor        mm3, mm6
	pmullw      mm2, mm5
	pmullw      mm0, mm3
	psrlw       mm2, 8
	add         esi, 4
	pxor        mm2, mm6
	add         edx, 4
	pmullw      mm1, mm2
	psrlw       mm0, 8
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop39
	r_end


mmx_srcalpha_zero:
	r_begin
	xloop40:
	movd        mm0, [esi]
	movq        mm2, mm0
	punpcklbw   mm2, mm2
	punpckhwd   mm2, mm2
	punpckhbw   mm2, mm7
	punpcklbw   mm0, mm7
	pmullw      mm2, mm4
	psrlw       mm2, 8
	pmullw      mm0, mm2
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	dec         ecx
	jnz         xloop40
	r_end


mmx_srcalpha_one:
	r_begin
	xloop41:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	add         esi, 4
	punpcklbw   mm2, mm2
	add         edx, 4
	punpckhwd   mm2, mm2
	punpckhbw   mm2, mm7
	pmullw      mm2, mm4
	psrlw       mm2, 8
	punpcklbw   mm0, mm7
	pmullw      mm0, mm2
	psrlw       mm0, 8
	packuswb    mm0, mm7
	paddusb     mm0, mm1
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop41
	r_end


mmx_srcalpha_srccolor:
	r_begin
	xloop42:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm1, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	pmullw      mm1, mm0
	punpckhbw   mm2, mm7
	psrlw       mm1, 8
	pmullw      mm2, mm4
	add         esi, 4
	psrlw       mm2, 8
	add         edx, 4
	pmullw      mm0, mm2
	psrlw       mm0, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop42
	r_end


mmx_srcalpha_srcinvcolor:
	r_begin
	xloop43:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm1, mm7
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	pxor        mm0, mm6
	punpckhwd   mm2, mm2
	pmullw      mm1, mm0
	punpckhbw   mm2, mm7
	psrlw       mm1, 8
	pmullw      mm2, mm4
	pxor        mm0, mm6
	add         esi, 4
	psrlw       mm2, 8
	add         edx, 4
	pmullw      mm0, mm2
	psrlw       mm0, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop43
	r_end


mmx_srcalpha_srcalpha:
	r_begin
	xloop44:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	add         esi, 4
	punpckhbw   mm2, mm7
	add         edx, 4
	pmullw      mm2, mm4
	psrlw       mm2, 8
	pmullw      mm0, mm2
	pmullw      mm1, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop44
	r_end


mmx_srcalpha_srcinvalpha:
	r_begin
	xloop45:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	add         esi, 4
	punpckhbw   mm2, mm7
	add         edx, 4
	pmullw      mm2, mm4
	psrlw       mm2, 8
	pmullw      mm0, mm2
	pxor        mm2, mm6
	pmullw      mm1, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop45
	r_end


mmx_srcalpha_dstcolor:
	r_begin
	xloop46:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	add         esi, 4
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhbw   mm2, mm7
	add         edx, 4
	pmullw      mm2, mm4
	psrlw       mm2, 8
	pmullw      mm0, mm2
	pmullw      mm1, mm1
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop46
	r_end


mmx_srcalpha_dstinvcolor:
	r_begin
	xloop47:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	add         esi, 4
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhbw   mm2, mm7
	add         edx, 4
	pmullw      mm2, mm4
	movq        mm3, mm1
	psrlw       mm2, 8
	pxor        mm3, mm6
	pmullw      mm0, mm2
	pmullw      mm1, mm3
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop47
	r_end


mmx_srcalpha_dstalpha:
	r_begin
	xloop48:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	movq        mm3, mm1
	punpcklbw   mm2, mm2
	punpcklbw   mm3, mm3
	punpckhwd   mm2, mm2
	punpckhwd   mm3, mm3
	punpckhbw   mm2, mm7
	punpckhbw   mm3, mm7
	pmullw      mm2, mm4
	pmullw      mm3, mm5
	psrlw       mm2, 8
	psrlw       mm3, 8
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pmullw      mm0, mm2
	pmullw      mm1, mm3
	add         esi, 4
	paddusw     mm0, mm1
	add         edx, 4
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop48
	r_end


mmx_srcalpha_dstinvalpha:
	r_begin
	xloop49:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	movq        mm3, mm1
	punpcklbw   mm2, mm2
	punpcklbw   mm3, mm3
	punpckhwd   mm2, mm2
	punpckhwd   mm3, mm3
	punpckhbw   mm2, mm7
	punpckhbw   mm3, mm7
	pmullw      mm2, mm4
	pmullw      mm3, mm5
	psrlw       mm2, 8
	psrlw       mm3, 8
	pxor        mm3, mm6
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pmullw      mm0, mm2
	pmullw      mm1, mm3
	add         esi, 4
	paddusw     mm0, mm1
	add         edx, 4
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop49
	r_end


mmx_srcinvalpha_zero:
	r_begin
	xloop50:
	movd        mm0, [esi]
	movq        mm2, mm0
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	add         esi, 4
	punpckhbw   mm2, mm7
	pmullw      mm2, mm4
	psrlw       mm2, 8
	pxor        mm2, mm6
	pmullw      mm0, mm2
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop50
	r_end


mmx_srcinvalpha_one:
	r_begin
	xloop51:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	add         esi, 4
	punpckhbw   mm2, mm7
	add         edx, 4
	pmullw      mm2, mm4
	psrlw       mm2, 8
	pxor        mm2, mm6
	pmullw      mm0, mm2
	psrlw       mm0, 8
	packuswb    mm0, mm7
	paddusb     mm0, mm1
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop51
	r_end


mmx_srcinvalpha_srccolor:
	r_begin
	xloop52:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	punpckhbw   mm2, mm7
	pmullw      mm1, mm0
	pmullw      mm2, mm4
	psrlw       mm2, 8
	add         esi, 4
	pxor        mm2, mm6
	add         edx, 4
	pmullw      mm0, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop52
	r_end


mmx_srcinvalpha_srcinvcolor:
	r_begin
	xloop53:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	pxor        mm0, mm6
	punpckhbw   mm2, mm7
	pmullw      mm1, mm0
	pmullw      mm2, mm4
	psrlw       mm2, 8
	add         esi, 4
	pxor        mm0, mm6
	pxor        mm2, mm6
	add         edx, 4
	pmullw      mm0, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop53
	r_end


mmx_srcinvalpha_srcalpha:
	r_begin
	xloop54:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm1, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpckhbw   mm2, mm7
	add         esi, 4
	pmullw      mm2, mm4
	psrlw       mm2, 8
	add         edx, 4
	pmullw      mm1, mm2
	pxor        mm2, mm6
	pmullw      mm0, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop54
	r_end


mmx_srcinvalpha_srcinvalpha:
	r_begin
	xloop55:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm1, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpckhbw   mm2, mm7
	add         esi, 4
	pmullw      mm2, mm4
	psrlw       mm2, 8
	add         edx, 4
	pxor        mm2, mm6
	pmullw      mm1, mm2
	pmullw      mm0, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop55
	r_end


mmx_srcinvalpha_dstcolor:
	r_begin
	xloop56:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	punpckhbw   mm2, mm7
	add         esi, 4
	pmullw      mm2, mm4
	add         edx, 4
	psrlw       mm2, 8
	pxor        mm2, mm6
	pmullw      mm0, mm2
	pmullw      mm1, mm1
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop56
	r_end


mmx_srcinvalpha_dstinvcolor:
	r_begin
	xloop57:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	movq        mm3, mm1
	punpckhbw   mm2, mm7
	add         esi, 4
	pmullw      mm2, mm4
	add         edx, 4
	psrlw       mm2, 8
	pxor        mm3, mm6
	pxor        mm2, mm6
	pmullw      mm0, mm2
	pmullw      mm1, mm3
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop57
	r_end


mmx_srcinvalpha_dstalpha:
	r_begin
	xloop58:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	movq        mm3, mm1
	punpcklbw   mm2, mm2
	punpcklbw   mm3, mm3
	punpckhwd   mm2, mm2
	punpckhwd   mm3, mm3
	punpckhbw   mm2, mm7
	punpckhbw   mm3, mm7
	pmullw      mm2, mm4
	pmullw      mm3, mm5
	psrlw       mm2, 8
	psrlw       mm3, 8
	pxor        mm2, mm6
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pmullw      mm0, mm2
	pmullw      mm1, mm3
	paddusw     mm0, mm1
	psrlw       mm0, 8
	add         esi, 4
	add         edx, 4
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop58
	r_end


mmx_srcinvalpha_dstinvalpha:
	r_begin
	xloop59:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	movq        mm3, mm1
	punpcklbw   mm2, mm2
	punpcklbw   mm3, mm3
	punpckhwd   mm2, mm2
	punpckhwd   mm3, mm3
	punpckhbw   mm2, mm7
	punpckhbw   mm3, mm7
	pmullw      mm2, mm4
	pmullw      mm3, mm5
	psrlw       mm2, 8
	psrlw       mm3, 8
	pxor        mm2, mm6
	pxor        mm3, mm6
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pmullw      mm0, mm2
	pmullw      mm1, mm3
	paddusw     mm0, mm1
	add         esi, 4
	psrlw       mm0, 8
	add         edx, 4
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop59
	r_end


mmx_dstcolor_zero:
	r_begin
	xloop60:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop60
	r_end


mmx_dstcolor_one:
	r_begin
	xloop61:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pmullw      mm0, mm1
	psrlw       mm0, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop61
	r_end


mmx_dstcolor_srccolor:
	r_begin
	xloop62:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pmullw      mm0, mm1
	psrlw       mm0, 8
	paddusw     mm0, mm0
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop62
	r_end


mmx_dstcolor_srcinvcolor:
	r_begin
	xloop63:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	movq        mm2, mm0
	pmullw      mm0, mm1
	pxor        mm2, mm6
	pmullw      mm1, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop63
	r_end


mmx_dstcolor_srcalpha:
	r_begin
	xloop64:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhbw   mm2, mm7
	pmullw      mm0, mm1
	pmullw      mm2, mm4
	add         esi, 4
	psrlw       mm2, 8
	add         edx, 4
	pmullw      mm1, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop64
	r_end


mmx_dstcolor_srcinvalpha:
	r_begin
	xloop65:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhbw   mm2, mm7
	pmullw      mm0, mm1
	pmullw      mm2, mm4
	add         esi, 4
	psrlw       mm2, 8
	add         edx, 4
	pxor        mm2, mm6
	pmullw      mm1, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop65
	r_end


mmx_dstcolor_dstcolor:
	r_begin
	xloop66:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pmullw      mm0, mm1
	pmullw      mm1, mm1
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop66
	r_end


mmx_dstcolor_dstinvcolor:
	r_begin
	xloop67:
	movd        mm1, [edx]
	movd        mm0, [esi]
	punpcklbw   mm1, mm7
	punpcklbw   mm0, mm7
	movq        mm2, mm1
	pmullw      mm0, mm1
	pxor        mm2, mm6
	pmullw      mm1, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop67
	r_end


mmx_dstcolor_dstalpha:
	r_begin
	xloop68:
	movd        mm1, [edx]
	movd        mm0, [esi]
	movq        mm3, mm1
	punpcklbw   mm0, mm7
	punpcklbw   mm3, mm3
	punpcklbw   mm1, mm7
	punpckhwd   mm3, mm3
	pmullw      mm0, mm1
	punpckhbw   mm3, mm7
	add         esi, 4
	pmullw      mm3, mm5
	psrlw       mm3, 8
	add         edx, 4
	pmullw      mm1, mm3
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop68
	r_end


mmx_dstcolor_dstinvalpha:
	r_begin
	xloop69:
	movd        mm1, [edx]
	movd        mm0, [esi]
	movq        mm3, mm1
	punpcklbw   mm0, mm7
	punpcklbw   mm3, mm3
	punpcklbw   mm1, mm7
	punpckhwd   mm3, mm3
	punpckhbw   mm3, mm7
	pmullw      mm0, mm1
	pmullw      mm3, mm5
	psrlw       mm3, 8
	pxor        mm3, mm6
	add         esi, 4
	pmullw      mm1, mm3
	paddusw     mm0, mm1
	psrlw       mm0, 8
	add         edx, 4
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop69
	r_end


mmx_dstinvcolor_zero:
	r_begin
	xloop70:
	movd        mm1, [edx]
	movd        mm0, [esi]
	punpcklbw   mm1, mm7
	punpcklbw   mm0, mm7
	pxor        mm1, mm6
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop70
	r_end


mmx_dstinvcolor_one:
	r_begin
	xloop71:
	movd        mm1, [edx]
	movd        mm0, [esi]
	punpcklbw   mm1, mm7
	punpcklbw   mm0, mm7
	pxor        mm1, mm6
	pmullw      mm0, mm1
	pxor        mm1, mm6
	psrlw       mm0, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop71
	r_end


mmx_dstinvcolor_srccolor:
	r_begin
	xloop72:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pxor        mm1, mm6
	movq        mm2, mm0
	pmullw      mm0, mm1
	pxor        mm1, mm6
	psrlw       mm0, 8
	pmullw      mm1, mm2
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop72
	r_end


mmx_dstinvcolor_srcinvcolor:
	r_begin
	xloop73:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pxor        mm1, mm6
	movq        mm2, mm0
	pxor        mm2, mm6
	pmullw      mm0, mm1
	pxor        mm1, mm6
	psrlw       mm0, 8
	pmullw      mm1, mm2
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop73
	r_end


mmx_dstinvcolor_srcalpha:
	r_begin
	xloop74:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhbw   mm2, mm7
	pxor        mm1, mm6
	pmullw      mm0, mm1
	pxor        mm1, mm6
	pmullw      mm2, mm4
	add         esi, 4
	psrlw       mm2, 8
	add         edx, 4
	pmullw      mm1, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop74
	r_end


mmx_dstinvcolor_srcinvalpha:
	r_begin
	xloop75:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhbw   mm2, mm7
	pxor        mm1, mm6
	pmullw      mm0, mm1
	pxor        mm1, mm6
	pmullw      mm2, mm4
	add         esi, 4
	psrlw       mm2, 8
	add         edx, 4
	pxor        mm2, mm6
	pmullw      mm1, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop75
	r_end


mmx_dstinvcolor_dstcolor:
	r_begin
	xloop76:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	pxor        mm1, mm6
	pmullw      mm0, mm1
	psrlw       mm0, 8
	pxor        mm1, mm6
	pmullw      mm1, mm1
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop76
	r_end


mmx_dstinvcolor_dstinvcolor:
	r_begin
	xloop77:
	movd        mm0, [esi]
	movd        mm1, [edx]
	punpcklbw   mm0, mm7
	punpcklbw   mm1, mm7
	movq        mm2, mm1
	pxor        mm1, mm6
	pmullw      mm0, mm1
	psrlw       mm0, 8
	pmullw      mm1, mm2
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         esi, 4
	add         edi, 4
	add         edx, 4
	dec         ecx
	jnz         xloop77
	r_end


mmx_dstinvcolor_dstalpha:
	r_begin
	xloop78:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhbw   mm2, mm7
	pxor        mm1, mm6
	pmullw      mm0, mm1
	pxor        mm1, mm6
	pmullw      mm2, mm5
	add         esi, 4
	psrlw       mm2, 8
	add         edx, 4
	pmullw      mm1, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop78
	r_end


mmx_dstinvcolor_dstinvalpha:
	r_begin
	xloop79:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhbw   mm2, mm7
	pxor        mm1, mm6
	pmullw      mm0, mm1
	pxor        mm1, mm6
	pmullw      mm2, mm5
	add         esi, 4
	psrlw       mm2, 8
	add         edx, 4
	pxor        mm2, mm6
	pmullw      mm1, mm2
	psrlw       mm0, 8
	psrlw       mm1, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop79
	r_end


mmx_dstalpha_zero:
	r_begin
	xloop80:
	movd        mm1, [edx]
	movd        mm0, [esi]
	punpcklbw   mm1, mm1
	punpcklbw   mm0, mm7
	punpckhwd   mm1, mm1
	punpckhbw   mm1, mm7
	add         esi, 4
	pmullw      mm1, mm5
	psrlw       mm1, 8
	add         edx, 4
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop80
	r_end


mmx_dstalpha_one:
	r_begin
	xloop81:
	movd        mm1, [edx]
	movd        mm0, [esi]
	movq        mm2, mm1
	punpcklbw   mm1, mm1
	punpcklbw   mm0, mm7
	punpckhwd   mm1, mm1
	punpckhbw   mm1, mm7
	add         esi, 4
	pmullw      mm1, mm5
	psrlw       mm1, 8
	add         edx, 4
	pmullw      mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	paddusb     mm0, mm2
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop81
	r_end


mmx_dstalpha_srccolor:
	r_begin
	xloop82:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	punpcklbw   mm1, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	pmullw      mm1, mm0
	punpckhbw   mm2, mm7
	psrlw       mm1, 8
	pmullw      mm2, mm5
	add         esi, 4
	psrlw       mm2, 8
	add         edx, 4
	pmullw      mm0, mm2
	psrlw       mm0, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop82
	r_end


mmx_dstalpha_srcinvcolor:
	r_begin
	xloop83:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	punpcklbw   mm1, mm7
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	pxor        mm0, mm6
	punpckhwd   mm2, mm2
	pmullw      mm1, mm0
	punpckhbw   mm2, mm7
	psrlw       mm1, 8
	pmullw      mm2, mm5
	pxor        mm0, mm6
	add         esi, 4
	psrlw       mm2, 8
	add         edx, 4
	pmullw      mm0, mm2
	psrlw       mm0, 8
	paddusw     mm0, mm1
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop83
	r_end


mmx_dstalpha_srcalpha:
	r_begin
	xloop84:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	movq        mm3, mm0
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm3, mm3
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	punpckhwd   mm3, mm3
	add         esi, 4
	punpckhbw   mm2, mm7
	punpckhbw   mm3, mm7
	add         edx, 4
	pmullw      mm2, mm5
	psrlw       mm2, 8
	pmullw      mm3, mm4
	psrlw       mm3, 8
	pmullw      mm0, mm2
	pmullw      mm1, mm3
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop84
	r_end


mmx_dstalpha_srcinvalpha:
	r_begin
	xloop85:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	movq        mm3, mm0
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm3, mm3
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	punpckhwd   mm3, mm3
	add         esi, 4
	punpckhbw   mm2, mm7
	punpckhbw   mm3, mm7
	add         edx, 4
	pmullw      mm2, mm5
	psrlw       mm2, 8
	pmullw      mm3, mm4
	psrlw       mm3, 8
	pmullw      mm0, mm2
	pxor        mm3, mm6
	pmullw      mm1, mm3
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop85
	r_end


mmx_dstalpha_dstcolor:
	r_begin
	xloop86:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	add         esi, 4
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhbw   mm2, mm7
	add         edx, 4
	pmullw      mm2, mm5
	psrlw       mm2, 8
	pmullw      mm0, mm2
	pmullw      mm1, mm1
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop86
	r_end


mmx_dstalpha_dstinvcolor:
	r_begin
	xloop87:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	add         esi, 4
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhbw   mm2, mm7
	add         edx, 4
	pmullw      mm2, mm5
	movq        mm3, mm1
	psrlw       mm2, 8
	pxor        mm3, mm6
	pmullw      mm0, mm2
	pmullw      mm1, mm3
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop87
	r_end


mmx_dstalpha_dstalpha:
	r_begin
	xloop88:
	movd        mm1, [edx]
	movd        mm0, [esi]
	movq        mm2, mm1
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	punpckhbw   mm2, mm7
	add         esi, 4
	pmullw      mm2, mm5
	psrlw       mm2, 8
	add         edx, 4
	pmullw      mm0, mm2
	pmullw      mm1, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop88
	r_end


mmx_dstalpha_dstinvalpha:
	r_begin
	xloop89:
	movd        mm1, [edx]
	movd        mm0, [esi]
	movq        mm2, mm1
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	punpckhbw   mm2, mm7
	add         esi, 4
	pmullw      mm2, mm5
	psrlw       mm2, 8
	add         edx, 4
	pmullw      mm0, mm2
	pxor        mm2, mm6
	pmullw      mm1, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop89
	r_end


mmx_dstinvalpha_zero:
	r_begin
	xloop90:
	movd        mm0, [esi]
	movq        mm2, [edx]
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	add         esi, 4
	punpckhbw   mm2, mm7
	pmullw      mm2, mm5
	add         edx, 4
	psrlw       mm2, 8
	pxor        mm2, mm6
	pmullw      mm0, mm2
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop90
	r_end


mmx_dstinvalpha_one:
	r_begin
	xloop91:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	punpcklbw   mm2, mm2
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	add         esi, 4
	punpckhbw   mm2, mm7
	add         edx, 4
	pmullw      mm2, mm5
	psrlw       mm2, 8
	pxor        mm2, mm6
	pmullw      mm0, mm2
	psrlw       mm0, 8
	packuswb    mm0, mm7
	paddusb     mm0, mm1
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop91
	r_end


mmx_dstinvalpha_srccolor:
	r_begin
	xloop92:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	punpckhbw   mm2, mm7
	pmullw      mm1, mm0
	pmullw      mm2, mm5
	psrlw       mm2, 8
	add         esi, 4
	pxor        mm2, mm6
	add         edx, 4
	pmullw      mm0, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop92
	r_end


mmx_dstinvalpha_srcinvcolor:
	r_begin
	xloop93:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	pxor        mm0, mm6
	punpckhbw   mm2, mm7
	pmullw      mm1, mm0
	pmullw      mm2, mm5
	psrlw       mm2, 8
	add         esi, 4
	pxor        mm0, mm6
	pxor        mm2, mm6
	add         edx, 4
	pmullw      mm0, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop93
	r_end


mmx_dstinvalpha_srcalpha:
	r_begin
	xloop94:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	movq        mm3, mm1
	punpcklbw   mm1, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm3, mm3
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpckhbw   mm2, mm7
	punpckhwd   mm3, mm3
	punpckhbw   mm3, mm7
	add         esi, 4
	pmullw      mm2, mm4
	psrlw       mm2, 8
	pmullw      mm3, mm5
	psrlw       mm3, 8
	add         edx, 4
	pmullw      mm1, mm2
	pxor        mm3, mm6
	pmullw      mm0, mm3
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop94
	r_end


mmx_dstinvalpha_srcinvalpha:
	r_begin
	xloop95:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm0
	movq        mm3, mm1
	punpcklbw   mm1, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm3, mm3
	punpcklbw   mm0, mm7
	punpckhwd   mm2, mm2
	punpckhbw   mm2, mm7
	punpckhwd   mm3, mm3
	punpckhbw   mm3, mm7
	add         esi, 4
	pmullw      mm2, mm4
	psrlw       mm2, 8
	pmullw      mm3, mm5
	psrlw       mm3, 8
	add         edx, 4
	pxor        mm2, mm6
	pxor        mm3, mm6
	pmullw      mm1, mm2
	pmullw      mm0, mm3
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop95
	r_end


mmx_dstinvalpha_dstcolor:
	r_begin
	xloop96:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	punpckhbw   mm2, mm7
	add         esi, 4
	pmullw      mm2, mm5
	add         edx, 4
	psrlw       mm2, 8
	pxor        mm2, mm6
	pmullw      mm0, mm2
	pmullw      mm1, mm1
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop96
	r_end


mmx_dstinvalpha_dstinvcolor:
	r_begin
	xloop97:
	movd        mm0, [esi]
	movd        mm1, [edx]
	movq        mm2, mm1
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	movq        mm3, mm1
	punpckhbw   mm2, mm7
	add         esi, 4
	pmullw      mm2, mm5
	add         edx, 4
	psrlw       mm2, 8
	pxor        mm3, mm6
	pxor        mm2, mm6
	pmullw      mm0, mm2
	pmullw      mm1, mm3
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop97
	r_end


mmx_dstinvalpha_dstalpha:
	r_begin
	xloop98:
	movd        mm1, [edx]
	movd        mm0, [esi]
	movq        mm2, mm1
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	punpckhbw   mm2, mm7
	pmullw      mm2, mm5
	psrlw       mm2, 8
	add         esi, 4
	pmullw      mm1, mm2
	pxor        mm2, mm6
	add         edx, 4
	pmullw      mm0, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop98
	r_end


mmx_dstinvalpha_dstinvalpha:
	r_begin
	xloop99:
	movd        mm1, [edx]
	movd        mm0, [esi]
	movq        mm2, mm1
	punpcklbw   mm0, mm7
	punpcklbw   mm2, mm2
	punpcklbw   mm1, mm7
	punpckhwd   mm2, mm2
	punpckhbw   mm2, mm7
	pmullw      mm2, mm5
	psrlw       mm2, 8
	pxor        mm2, mm6
	add         esi, 4
	pmullw      mm0, mm2
	pmullw      mm1, mm2
	paddusw     mm0, mm1
	psrlw       mm0, 8
	packuswb    mm0, mm7
	add         edx, 4
	movd        [edi], mm0
	add         edi, 4
	dec         ecx
	jnz         xloop99
	r_end
