;"endless creamy lollipopoid" variant of "codegrinder" by Kuemmel 
;main algorithm based on a shader by kusma (https://www.shadertoy.com/view/4d33RM)
;and the discussion on pout here => http://www.pouet.net/topic.php?which=10564
org 100h
use16
push 0a000h - 70	;center x axis
aas
pop es
mov al,13h
int 10h
mov ah,0x80			;do that here as ah needs to be zero before, al seems to be zero after int
mov fs,ax

;---full creamy palette generation
mov dx,0x3c9
palette:  				
	mov al,63
	out dx,al
	mov al,cl
	shr al,1
	out dx,al
	or al,00010011b
	out dx,al
loop palette
salc				;clear al for texture generation

;---seamless texture generation "borrowed" and slightly modified from 'lattice' by baze/3SC (2001)
texture:			
	mov	bx,cx
	rcl	dh,cl
	mov	ah,dh
	sar	ah,4
	adc	al,ah
	adc	al,byte[fs:bx+128]		;+127 looks also good
	;dec ax						;weirdo ;-)
	shr	al,1
	mov	byte[fs:bx],al
	not	bh
	mov	byte[fs:bx],al
loop texture

;---main intro routine	
fld1					;1
main_loop:
  mov ax,0xcccd  		
  mul di				;rrrola's trick => y=dh, x=dl
  sub dh,[si]			;center y axis
  xor bx,bx				;clear for correct addressing
  pusha
  fild  word[bx-8]		;dh = y			|1		   
  fild  word[bx-9]		;dl = x			|y			|1	  
  fmul	dword[bx+si]	;correct aspect ratio => sizecoding.org
  fld   st1				;y		       	|x			|y	  |1	  
  fmul  st0,st0			;y*y			|x			|y	  |1  	
  fld   st1				;x				|y*y	   	|x	  |y   	|1	
  fmul  st0,st0			;x*x	     	|y*y	   	|x	  |y	|1
  faddp st1,st0			;x*x+y*y	   	|x		  	|y	  |1
  fmul dword[si-258+f]	;(x*x+y*y)/f	|x	  		|y	  |1
  fsubr st0,st3			;1-xx*xx+yy*yy 	|x			|y	  |1	
  fabs				
  fsqrt
  fsubr st0,st3			;1-e			|x		  	|y	  |1
  fadd  st0,st3			;2-e			|x		  	|y	  |1
  fmul  st1,st0			;2-e			|x*(2-e)	|y	  |1
  fmulp st2,st0			;x*(2-e)		|y*(2-e)	|1	
  fistp word[bx-4]		;new y at al (only highbyte is interesting)
  fistp word[bx-5]		;new x at ah (overwrite former lowbyte)
  popa
  add al,[bp+si]		;inc y_movement
  sub ah,[bp+si]		;inc x_movement
  xchg ax,bx			;clear ax and load offset to bx
  fs xlatb				;texture addressing is al=(fs:bx+al) while al is zero
  stosb
loop main_loop
inc byte[bp+si]			;update global movement counter
in al,60h
  dec al
jnz main_loop
ret	
f dw 0x3150				;dword float constant for sphere size, first 2 bytes precision not needed
						;there is a similar number in code at +63, but shows more artifacts...so 2 Bytes more wasted ;-) 3160