; TEXT.COM   by Chess Knight
;   version 2
;
; This program outputs a small fixed text file to stdout.
;
; "S-s-s-s-s", hissed Gollum.  "It must give us three guesseses,
; my preciouss, three guesseses."
;
; Created for 2nd size optimized code contest at
;   http://home.pages.de/~hugi-compo
;
; 2 transformations are performed.  First the text is decoded into an
; all caps version with spaces substituted for all CR/LF pairs.  Then
; some simple capitalization and end of line rules are applied to generate
; a 2nd (and final) version from the 1st version.
;
; The encoding method is as follows.  There are only 32 distinct characters
; after changing all characters to the same case:  the 26 letters and
; the 6 symbols EOL (combined CR/LF), space, comma, period, question mark,
; and dash.  (Actually, the text does not contain j, x, or z, and CR/LF's
; are converted to spaces).  The encoder tries to use previous text to
; guess the next character.  An arbitrary max. of 2 guesses based on the
; longest previous matching texts are encoded with 1 bit indicating whether
; the guess is correct.  If the 0 to 2 guesses are all wrong, then a
; 5 bit code of the correct character follows.
;
; 0 is CR/LF, 1-26 is A-Z, and 27=' ', 28=',', 29='-', 30='?', 31='.'
; All capitals follow the '.' or '?' characters.
;
;  Assembled with Turbo Assembler V3.1
;    tasm /m read
;    tlink /t read
;  /m enables multiple passes so that jumps are optimized
;  /t tells the linker to make a COM file.
;
;--------------------------------------------------------------------
;
.386
ARTIF   EQU     04BH    ;artificial.  Used to get around limitations of TASM
                        ;Must be adjusted to size of program.

_COM    SEGMENT WORD PUBLIC 'CODE' USE16
        ORG     100H
_SPIT   PROC    NEAR                            ;define far proc
        ASSUME  CS:_COM,DS:_COM
;
;  SI = offset of encoded data
;  DI = offset where decoded data goes
;  AH = number of bits to read before fetching next byte.
;  BX = offset of table for special characters - 27
;
        MOV     SI,OFFSET CMPRSTX
        MOV     DI,OFFSET OUTTXT
        CWD
;=========================================================================
;No more guesses, spit out next 5 bits as character
JNOCTX:

;------------------------------------------------------
; Decode 5 bit codes
; Entry:  DH=code, DI = place to store decoded letter
; Exit:   AL=character, BX = offset of table of non-alphabetic characters
        MOV     BX,OFFSET N_ALF-27
        MOV     AL,DH
        AND     AL,1FH
        CMP     AL,27           ;Test for non-letter characters:  " ,-?."
        JB      J360
        XLAT
J360:
        ADD     AL,40H
;------------------------------------------------------
        PUSH    OFFSET J400

;------------------------------------------------------
; Load 1 or 5 bits into DX
; Entry: DX=coded bits, SI=place from which to get encoded data
;        DH=number of bits left to fetch
; Exit:  CX=0
NXT5BT:
        MOV     CL,4
NXTBIT:
        INC     CX              ;CX=0 where all calls to NXTBIT are.
        XCHG    AX,DX
J100:
        DEC     DH
        JG      J110
        LODSB                   ;Get next byte
        MOV     DH,8
J110:
        SHL     AX,1
        LOOP    J100
        XCHG    AX,DX
        RETN

;---------------------------------------------------
; Symbol matching:
;   Search all previous text for longest matching context
;   Take closest if tie.
;   Guesses=2 at most (For this text, 2 is better than 3 or 1.)
; DI = Current search position
; SI = Current end of text
J200:
        MOV     BL,ARTIF
        PUSHA
        STD
        MOV     BP,2
        DEC     DI
        PUSH    DI
JSRCH:
        POP     SI
        CWD
        MOV     [BX],DX
        DEC     DI
        CMP     DI,OFFSET OUTTXT
        JBE     JNFIND          ;End of text, no more contexts
        PUSH    SI
        PUSH    DI
JLONGR: INC     DX
        CMPSB
        JE      JLONGR
        CMP     DX,BP           ;BP = size of longest found so far
        JL      JFULL
        JE      JSAME
        MOV     BL,ARTIF        ;Found a longer context
        MOV     BP,DX
JSAME:
;        CMP     BL,ARTIF+2
;        JE      JFULL
        MOV     AH,[BP+DI+1]    ;Get character to guess
        CMP     [BX-1],AH       ;If same as previous character,
        JE      JFULL           ;  skip it
        MOV     [BX],AH         ;Store the guess in memory
        INC     BX
JFULL:
        POP     DI
        JMP     JSRCH
;------------------------------------
;Found many contexts of size BP
JNFIND:
;        MOV     [BX],CL
        POPA
        MOV     [BX+2],CL
        CLD
JBGUES:
        MOV     AL,[BX]
        OR      AL,AL
        JE      JNOCTX          ;No more guesses, give up.
        INC     BX
        CALL    NXTBIT          ;Get next bit
        TEST    DH,20H          ;Test bit (6th bit of AH after SHL AX,1)
        JNE     JBGUES          ;Bad guess, try again
;Correct guess
J400:
        STOSB
        CMP     DI,OFFSET OUTTXT+903-22
        JLE     J200
;------------------------------------------------------
; exit program
        MOV     SI,OFFSET OUTTXT+1
        MOV     DX,DI           ;set DX for DOS text output while DI is right
        XOR     BX,BX
J500:
        MOV     CL,45           ;CX = 0 already
J505:
; EOL rules.  Change ' ' to CR/LF after 45 char. or after '?'
        CMP     AL,'?'
        LODSB
        LOOPNE  J510
        MOV     AX,0A0DH
        STOSW
        JMP     J500
J510:

; Capitalization rules.  Leave in upper case if preceded by '?' or '.'
        CMP     AL,'.'
        JB      J520
        OR      BL,AL
        SHR     AL,1
        AND     AL,20H
        XCHG    AX,BX
J520:

        STOSB
        CMP     SI,DX
        JB      J505
;        MOV     AX,0A0DH        ;Final CR/LF
        MOV     AL,0DH
        STOSW

        MOV     AX,0924H        ;24H = '$'
        STOSB
; Call DOS interrupt 21H, service 9 to output text
        INT     21H
;  terminate
        RETN

_SPIT   ENDP
;======================================================
; data
CMPRSTX DW      0d8deH, 02e30H, 0f7a4H, 0c861H, 0babbH, 05550H, 0b3eeH, 0fb9dH
        DW      0a6d9H, 0bbc9H, 0fa3eH, 03329H, 0c368H, 0ded3H, 0ee30H, 0acefH
        DW      073e5H, 076ebH, 067caH, 0d1d1H, 0fda5H, 0a2e3H, 03ebbH, 03c56H
        DW      069b9H, 04d87H, 0a1ddH, 02ae9H, 03b9cH, 0fb39H, 036dfH, 0c2fdH
        DW      03c5cH, 0a3b4H, 09bdeH, 0e9a5H, 0dfafH, 0fb93H, 053f6H, 04e9bH
        DW      0126dH, 065b9H, 03d87H, 0bbfbH, 0c2e8H, 06543H, 0f79eH, 0f597H
        DW      0efe5H, 0ef30H, 06391H, 07b94H, 0cbf6H, 0cdceH, 0f796H, 0f979H
        DW      07759H, 00338H, 012f6H, 0e1efH, 0f34bH, 0157bH, 0de97H, 034fdH
        DW      07b59H, 07687H, 0b89dH, 0bc6cH, 0af5bH, 03d3dH, 068e5H, 004e1H
        DW      093bfH, 040dbH, 09daeH, 039d9H, 09889H, 0ef40H, 04a44H, 0fd00H
        DW      0fe76H, 0c7fbH, 0e684H, 0bdfeH, 096f3H, 0fb5eH, 0e6d1H, 09be8H
        DW      04921H, 0bce3H, 0f134H, 04e87H, 07847H, 0936eH, 0f9beH, 0f4ccH
        DW      0a569H, 0debdH, 01d76H, 043e5H, 030bcH, 0feceH, 0edf4H, 07fc3H
        DW      02b2dH, 077baH, 0ca9aH, 0c286H, 000caH, 0df3eH, 07dbfH, 032f4H
        DW      0c294H, 0ae2dH, 0c9f2H, 036a7H, 0b26cH, 015edH, 0f382H, 03c02H
        DW      0afb9H, 0bebbH, 003e2H, 035f7H, 06eb7H, 06204H, 0ca11H, 0371dH
        DW      0813fH, 0f708H, 02f93H, 07927H, 020adH, 0c11dH, 0bcb4H, 0f7bcH
        DW      03aeeH, 09eccH, 0eed4H, 07defH, 0922eH, 09bbfH, 0c0e0H, 07708H
        DW      0cd86H, 079eeH, 0d9bbH, 0fe6dH, 08083H, 05910H, 0cb2eH, 02b3eH
        DW      0c38bH, 01136H, 04ea3H, 097cdH, 023efH, 0c77aH, 0f37eH, 0b269H
        DW      05f77H, 04fd7H, 0f47eH, 0dd5fH, 009dfH, 0bf43H, 0097bH, 0b47dH
        DW      0da9dH, 00700H, 08075H, 0fb01H, 05149H, 03d4eH, 0eb1dH, 0df1dH
        DW      01d30H, 0dfe4H, 0c8efH, 03ed8H, 0c30cH, 04109H, 09266H, 061a4H
        DW      05bf2H, 02c47H, 02647H, 0fe59H, 01ee6H, 080d2H, 04e0aH, 00ef7H
        DW      0d7f8H, 04c4fH, 0e4bdH, 003a4H, 04336H, 020bfH, 08a77H, 010bdH
        DW      0f08eH, 03a03H, 0e421H, 01c74H, 0edf0H, 0ba94H, 02f5dH, 0e8bbH
        DW      08732H
;        DB      000e0H
N_ALF   DB      ' '-40H, ','-40H, '-'-40H, '?'-40H, '.'-40H ;, 0DH-41H

CTXCHR  DB      264 DUP(?)      ;Room here for lots of garbage
OUTTXT  DW      ?

_COM    ENDS
        END     _SPIT

