;----------------------------------------------------------
; entry32 by st0ne
;
; Uncompress picture and display it
; Decoder part using 32 bit protected mode because it needs
; over 2 Mb of memory, and manipulates 4 bytes data.
; Look at hugi.nfo for more information on DMC algorithm.
;----------------------------------------------------------

.model tiny
.386
Ideal

&	equ	offset
xval	equ	dword ebx+(& _val-& _fpu)
xmin	equ	dword ebx+(& _min-& _fpu)
xmax	equ	dword ebx+(& _max-& _fpu)
xfpu	equ	dword ebx

struc	Node			;binary tree element
c0	dd	?		;count of zeros
c1	dd	?		;count of ones
n0	dd	?		;next Node when 0 encountered
n1	dd	?		;next Node when 1 encountered
ends	Node

segment cod32 use32
assume cs:cod32,ds:cod32


;----------------------------------------------------------------------
; Initialization [27]

	org	0000h
_fpu	dd	000000FDh	;fpu temp space
_esi	dd	& data		;data source
_edi	dd	00006000h	;data destination
_max	dd	?
_min	dd	?
_val	dd	?
	org	000Ch		;overlap variables and code

;Init selectors and fpu (12)
start:	mov	ds,ax
	mov	es,ax
	mov	fs,ax
	mov	gs,bx		;remember cs for later
	xor	ebx,ebx		;& _fpu
	fldcw	[ebx]		;'float' precision, round even

;Uncompress palette, logo and text (3)
	mov	cl,03h
@decod:	push	ecx
	
;----------------------------------------------------------------------
; Init prediction tree [66]

;Clear memory (11)
	mov	edi,ebp
	xor	eax,eax
	mov	ecx,00400000h
	rep	stosb

;Init nodes[j][i] (8)
	mov	esi,3EA7EF9Eh		;0.328
	mov	edi,ebp			;edi=&nodes[0][0]
	cdq				;for (j=0;i<256)

;Init i=0..126 (18)
@flush:	mov	eax,edi			;&nodes[j][0]
	mov	cl,7Fh			;for (i=0;i<127)
@fl1:	xchg	eax,esi
	stosd				;c0=0.328
	stosd				;c1=0.328
	xchg	eax,esi
	add	eax,size Node
	stosd				;n0=&nodes[j][2i+1]
	add	eax,size Node
	stosd				;n1=&nodes[j][2i+2]
	loop	@fl1

;Init i=127..255 (20)
	mov	eax,ebp			;&nodes[0][0]
	mov	cl,80h			;for (i=127;i<255)
@fl2:	xchg	eax,esi
	stosd				;c0=0.328
	stosd				;c1=0.328
	xchg	eax,esi
	stosd				;n0=&nodes[2*(i-127)][0]
	add	eax,256*size Node
_row	= dword $-4
	stosd				;n1=&nodes[2*(i-127)+1][0]
	add	eax,[ebx+(& _row-& _fpu)]
	loop	@fl2

;Init loop (9)
	add	edi,size Node		;j++
	dec	dl
	jnz	@flush
	mov	esi,ebp

;Here, esi=p=&nodes[0][0] (start of binary tree) and 
;edi=new=&nodes[256][256]+1 (start of nodes buffer for duplication)

;----------------------------------------------------------------------
; Decode using DMC [250]

;Init min,max,val (22)
	mov	[xmin],ecx
	mov	[xmax],01000000h
_big	= dword $-4
@e_val:	call	small @lodsb	;val=00aabbcc
	inc	ecx
	jnp	@e_val		;loop twice (exit when cl=3)
	
;Start do/while loop (4)
@e_do:	mov	dl,00h		;c=0
	mov	cl,08h

;Predict a value (24)
@e_for:	fld	[dword esi+(Node).c1]
	fld	[dword esi+(Node).c0]
	fadd	st(1),st
	mov	eax,[xmax]
	sub	eax,[xmin]
	dec	eax
	mov	[xfpu],eax
	fild	[xfpu]		;mid=(max-min-1)
	fmulp			;*(p.c0)
	fdivrp	st(1)		;/(p.c0+p.c1)
	fistp	[xfpu]

;Adjust value when bounds reached (18)
	mov	eax,[xfpu]
	or	eax,eax
	jnz	@e_if1
	inc	eax		;mid++
@e_if1:	add	eax,[xmin]	;mid+=min
	inc	eax
	cmp	eax,[xmax]
	jne	@e_if2
	dec	eax		;mid--
@e_if2:	dec	eax

;Update current character (21)
	cmp	eax,[xval]	;if (val>=mid)
	jg	@e_if3
	mov	[xmin],eax	;min=mid
	mov	dh,01h		;bit=1
	jmp	@e_el3
@e_if3:	mov	[xmax],eax	;max=mid
	mov	dh,00h		;bit=0
@e_el3:	add	dl,dl
	add	dl,dh		;c=(c<<1)+bit

;Prepare nodes pointers (13)
@up:	push	ecx
	mov	ecx,esi
	or	dh,dh
	jz	@up1
	add	ecx,& (Node).c1-& (Node).c0	;ecx=&(p->count[b])
@up1:	mov	esi,[ecx+(Node).n0]		;esi=(p->next[b])
	
;Test if duplication needed (22)
	mov	eax,40000000h
	cmp	[dword ecx+(Node).c0],eax
	jl	@up2
	fld	[dword esi+(Node).c0]	;nx->c0
	fadd	[dword esi+(Node).c1]	;+nx->c1
	fsub	[dword ecx+(Node).c0]	;-p->cx
	fstp	[xfpu]
	cmp	[xfpu],eax
	jl	@up2

;Duplication (32)
	fld	[dword esi+(Node).c0]
	fadd	[dword esi+(Node).c1]
	fdivr	[dword ecx+(Node).c0]	;r=(p->cx)/(nx->c0+nx->c1)
	fld	st
	xor	eax,eax
@dup:	fmul	[dword esi+(Node).c0]
	fst	[dword edi+(Node).c0]	;(new->c0)=r*(nx->c0)
	fsubr	[dword esi+(Node).c0]
	fstp	[dword esi+(Node).c0]	;(nx->c0)-=r*(nx->c0)
	cmpsd				;point to c1
	dec	eax
	jp	@dup			;do the same whith c1
	movsd				;copy n0 and point to n1
	movsd				;copy n1 and point to new++
	mov	[ecx+(Node).n0],edi	;(p->next[b])=new
	sub	[ecx+(Node).n0],size Node

;Update current node (10)
@up2:	fld1
	fadd	[dword ecx+(Node).c0]
	fstp	[dword ecx+(Node).c0]	;(p->count[b])++
	mov	esi,[ecx+(Node).n0]
	pop	ecx

;Process with next input char (20+35+7)
	mov	eax,[xmax]
	sub	eax,[xmin]
	cmp	eax,00000100h	;max-min<256
	jge	@e_nxt
	or	dh,dh
	jz	@e_if4
	dec	[xmax]		;max--
@e_if4:	call	small @lodsb
	shl	[xmin],16
	shr	[xmin],8
	shl	[xmax],16
	shr	[xmax],8
	mov	eax,[xmax]
	cmp	eax,[xmin]
	jg	@e_nxt
	mov	eax,[ebx+(& _big-& _fpu)]
	mov	[xmax],eax	;01000000h
@e_nxt:	dec	ecx
	jnz	@e_for

;Write char and test end (22)
	mov	al,dl
	xchg	edi,[ebx+(& _edi-& _fpu)]
	stosb
	xchg	edi,[ebx+(& _edi-& _fpu)]
	mov	eax,[xmax]
	dec	eax
	cmp	eax,[xval]
	jne	@e_do

;----------------------------------------------------------------------
; Process next object or return to 16 bit mode [18]

;Decode next object (9)
	pop	ecx
	dec	cl
	jnz	@decod

;Jump back to 16b mode (9)
	mov	esi,00006003h		;start at +3
	xor	edi,edi			;clear high word of edi
	db	66h
	retf				;retf 16b

;----------------------------------------------------------------------
; Read one byte of data to decompress [21]

;Read data
@lodsb:	xchg	esi,[ebx+(& _esi-& _fpu)]
	mov	eax,[xval]
	shl	eax,16
	shr	eax,8			;val=(val<<8) & 00FFFF00
	lodsb
	mov	[xval],eax		;val+=getchar()
	xchg	esi,[ebx+(& _esi-& _fpu)]
	db	66h
	ret

data:

ends	cod32
end	start