#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define TEXT_FILE	"text.txt"
#define BUF_SIZE	1024
#define	MAX_DIST	512

typedef unsigned char Byte;
typedef unsigned short Word;

typedef struct _List {
  struct _List *next;
  unsigned pos;
} List;

char code_to_char_map[] = " \r\n$,-.?BINOPSTYabcdefghijklmnopqrstuvwxyz";
int char_to_code_map[256];
char stext[BUF_SIZE];
int slen = 0, sbit = 0;

void build_char_to_code_map() {
  int i;

  for (i = 0; i < 256; ++i) char_to_code_map[i] = -1;
  for (i = 0; code_to_char_map[i]; ++i)
    char_to_code_map[(Byte)code_to_char_map[i]] = i;
}

void to_squish(int bits, int whatever) {
  int i;

  i = bits;
  while (i--) {
    if (sbit == 0) { stext[slen] = 0; ++slen; sbit = 7; } else --sbit;
    stext[slen - 1] |= ((whatever >> i) & 1) << sbit;
  }
}

void flush_squish() {
  int i;

  for (i = 0; i < slen; ++i) printf("  db %d\n", (int)(Byte)stext[i]);
  slen = 0; sbit = 0;
}

void compress(const char *ztext, size_t zlen) {
  static List *where[256];
  List *here, *there;
  unsigned i, j, k, best_pos, best_len;
  char c;

/* Compress. */

  for (i = 0; i < 256; ++i) where[i] = 0;
  i = 0;
  while (i < zlen) {
    c = ztext[i];
    best_pos = best_len = 0;
    for (here = where[(Byte)c]; here; here = here->next) {
      j = here->pos; k = i;
      while (j < i && k < zlen && k < i + 10 && ztext[j] == ztext[k])
	{ ++j; ++k; }
      if (k - i > best_len) { best_pos = here->pos; best_len = k - i; }
    }
    if (best_len < 3) {
      if (char_to_code_map[c] == -1) {
	fprintf(stderr, "invalid character 0x%02x\n", (int)(Byte)c);
	exit(1);
      }
      to_squish(6, char_to_code_map[(Byte)c]);
      j = 1;
    }
    else {
      to_squish(11, best_pos - i);
      to_squish(3, best_len - 3);
      j = best_len;
    }
    while (j--) {
      c = ztext[i];
      if (i >= MAX_DIST) {
	for (here = 0, there = where[(Byte)ztext[i - MAX_DIST]];
	     there->pos != i - MAX_DIST; here = there, there = there->next);
	if (!here) {
	  here = where[(Byte)ztext[i - MAX_DIST]];
	  where[(Byte)ztext[i - MAX_DIST]] = here->next;
	  free(here);
	}
	else
	  { here->next = there->next; free(there); }
      }
      here = where[(Byte)c];
      there = malloc(sizeof(List));
      there->next = here;
      there->pos = i;
      where[(Byte)c] = there;
      ++i;
    }
  }

/* Add terminator. */

  to_squish(6, strlen(code_to_char_map));

/* Write. */

  flush_squish();

/* Clean up. */

  for (i = 0; i < 256; ++i) {
    here = where[i];
    while (here) { there = here->next; free(here); here = there; }
  }
}

int main() {
  int text_len;
  FILE *f;
  int i;
  char buf[BUF_SIZE];

/* Write out program code. */

  printf(	"  org 0x0100\n"
		"start:\n"
		"  mov di,code_to_char_map+%d\n"
		"  mov al,'a'\n"
		"  mov cl,26\n"
		".0:\n"
		"  stosb\n"
		"  inc ax\n"
		"  loop .0\n"
		"  push di\n"
		"  mov si,passage\n"
		"  mov bx,code_to_char_map\n"
		"  cwd\n"               /* ah = 0 -> dx = 0 */
		".1:\n"
		"  mov cl,6\n"
		"  call get_bits\n"
		"  cmp al,%d\n"
		"  ja expand\n"
		"  xlatb\n"
		"  stosb\n"
		"  jnz .1\n"
		"  pop dx\n"
		"  mov ah,0x09\n"
		"  int 0x21\n"          /* the `ret' here goes into
					 * `get_6_bits' -- an ugly hack, but
					 * it'll work unless I change the code
					 * in `get_6_bits'.
					 */
		"get_bits:\n"
		"  xor al,al\n"
		".0:\n"
		"  shl dl,1\n"
		"  jnz .1\n"
		"  mov dl,[si]\n"
		"  inc si\n"
		"  stc\n"
		"  rcl dl,1\n"
		".1:\n"
		"  rcl al,1\n"
		"  loop .0\n"
		"  ret\n"
		"expand:\n"
		"  mov ah,al\n"
		"  mov cl,5\n"
		"  call get_bits\n"
		"  mov cl,3\n"
		"  shl al,cl\n"
		"  shr ax,cl\n"
		"  xchg bp,ax\n"
		"  call get_bits\n"
		"  push si\n"
		"  lea si,[di+bp+0xf800]\n"
		"  add al,3\n"
		"  mov cl,al\n"
		"  rep movsb\n"
		"  pop si\n"
		"  jmp short start.1\n"
		"passage:\n",
	strlen(code_to_char_map) - 26,
	strlen(code_to_char_map));

/* Read data file. */

  f = fopen(TEXT_FILE, "rb");
  if (!f) return 1;
  text_len = fread(buf, 1, BUF_SIZE, f);
  if (text_len <= 0) return 1;
  fclose(f);
  buf[text_len++] = '$';

/* Compress. */

  build_char_to_code_map();
  compress(buf, text_len);

/* Write code-to-character map (less the last 26 letters). */

  printf("code_to_char_map:\n");
  for (i = 0; code_to_char_map[i + 26]; ++i)
    printf("  db %d\n", (int)(Byte)code_to_char_map[i]);

/* Done. */

  return 0;
}
