//////////////////////////////////////////////////////////////////////////////
//  Huffman compression                                                     //
//                                                                          //
//  Copyright (c) 1997 by Matjaz Trtnik aka maLi/MaLixa                     //
//  Email: mtrtnik@bigfoot.com                                              //
//  Web: http://www2.arnes.si/~ssdmalic/mali/                               //
//                                                                          //
//  The author takes no responsibility, if something in this document or    //
//  the accompanying classes causes any kind of data loss or damage to      //
//  your hardware.                                                          //
//                                                                          //
//  You can use this product strictly for *NON* commercial programs.        //
//  If you want to use it for commercial programs please contact author.    //
//                                                                          //
//  You are not permitted to distribute, sell or use any part of            //
//  this source for your software without special permision of author.      //
//                                                                          //
//////////////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <assert.h>
#include <math.h>

#define         NUMBYTES                         1024
#define         NUMBITS                            16
#define         INPUTFILENAME             "input.huf"
#define         OUTPUTFILENAME           "output.huf"
#define         TEMPFILENAME            "huffman.tmp"

typedef struct _HUFFNODE {
        char              byte;
        long              freq;
        struct _HUFFNODE *parent;
        struct _HUFFNODE *left;
        struct _HUFFNODE *right;
} HUFFNODE;

typedef struct _HUFFTAB {
        char              byte;
        char              numbits;
        long              freq;
        long              bits[NUMBITS];
} HUFFTAB;

FILE       *f;
long       fsize, numnodes, numbasenodes;
long       freq_tab[NUMBYTES];    // Frequency table
HUFFNODE  *nodelist[NUMBYTES];
HUFFNODE  *basenode[NUMBYTES];
HUFFNODE  *templist[NUMBYTES];
HUFFTAB    code_tab[NUMBYTES];

/////////////////////////////////
//  Return file size in bytes  //
/////////////////////////////////
long filesize(FILE *f)
{
  long pos, fsize;

  pos = ftell(f);
  fseek(f, 0L, SEEK_END);
  fsize = ftell(f);
  fseek(f, pos, SEEK_SET);

  return fsize;
}

/////////////////////////////////////////////////////////
//  Read input file to memory to speed up compression  //
/////////////////////////////////////////////////////////
char *huffman_readinputstream(char *inputstream)
{
  char *stream;

  f = fopen(inputstream, "r+b");
  assert(f != NULL);
  fsize = filesize(f);

  stream = (char *)calloc(fsize, sizeof(char));
  assert(stream != NULL);

  fread(stream, fsize, 1, f);
  fclose(f);

  return stream;
}


////////////////////////////
//  Make frequency table  //
////////////////////////////
void huffman_buildfreq(char *stream)
{
  long  k;

  for (k = 0; k < NUMBYTES; k++)
    freq_tab[k] = 0;

  for (k = 0; k < fsize; k++)
    freq_tab[stream[k]]++;
}

/////////////////////////////////////////////////////////
//  Sort node list table using straight insertion sort //
/////////////////////////////////////////////////////////
void huffman_sortnodelist(HUFFNODE *nodes[])
{
  HUFFNODE  *node;
  long       i, j, k;

  for (i = 0; i < numnodes-1; i++) {
    k = i;
    node = nodes[i];

    for (j = i+1; j < numnodes; j++)
      if (nodes[j]->freq < node->freq) {
        k = j;
        node = nodes[j];
      }

    if (k != i) {
      node = nodes[i];
      nodes[i] = nodes[k];
      nodes[k] = node;
    }
  }
}

//////////////////////////
//  Build huffman tree  //
//////////////////////////
void huffman_buildtree(char *stream)
{
  HUFFNODE  *node;
  HUFFNODE  *node1, *node2;  // Two nodes with smallest frequency
  long       k, n;

  // Build frequency table
  huffman_buildfreq(stream);

  // Build base nodes
  numbasenodes = 0;
  for (k = 0; k < NUMBYTES; k++) {
    if (freq_tab[k] > 0) {

      node = (HUFFNODE *)calloc(1, sizeof(HUFFNODE));
      assert(node != NULL);

      node->byte = k;
      node->freq = freq_tab[k];
      node->parent = NULL;
      node->left = NULL;
      node->right = NULL;

      nodelist[numbasenodes] = node;
      basenode[numbasenodes] = node;
      templist[numbasenodes] = node;

      numbasenodes++;
    }
  }

  // Build huffman tree
  numnodes = numbasenodes;
  n = numnodes;
  while (--n > 0) {

    // Sort node list go get two nodes with minor frequency
    huffman_sortnodelist(nodelist);

    node1 = nodelist[0];
    node2 = nodelist[1];

    // Make new node and link nodes
    node = (HUFFNODE *)calloc(1, sizeof(HUFFNODE));

    node->byte = 'x';
    node->freq = node1->freq + node2->freq;
    node->left = node1;
    node->right = node2;
    node->parent = NULL;

    node1->parent = node;
    node2->parent = node;

    // Add new node and remove old two
    nodelist[0] = node;
    for (k = 1; k < n; k++)
      nodelist[k] = nodelist[k+1];
    nodelist[n] = NULL;

    templist[numnodes++] = node;

  }

  for (k = 0; k < numnodes; k++)
    nodelist[k] = templist[k];
}


////////////////////////////////////
//  Write huffman tree to output  //
////////////////////////////////////
void huffman_writetree(HUFFNODE *root)
{
  if (root != NULL) {

    printf("\n%c = %d ",root->byte, root->freq);

    if (root->right != NULL)
      huffman_writetree(root->right);

    if (root->left != NULL)
      huffman_writetree(root->left);
  }
}

//////////////////////////////
//  Build huffan code node  //
//////////////////////////////
void huffman_buildcode(HUFFNODE *node)
{
  long  bitpos, b;
  long  bits[NUMBITS];
  long  freq = node->freq;
  char  byte = node->byte;

  bitpos = 0;

  while (node->parent != NULL) {

    if (node == node->parent->left)
      bits[bitpos++] = 0;
    else
      bits[bitpos++] = 1;

    node = node->parent;
  }

  // Codes are reversed since code building is done from leafs to root
  for (b = 0; b < bitpos; b++)
    code_tab[byte].bits[b] = bits[bitpos-b-1];

  code_tab[byte].numbits = bitpos;
  code_tab[byte].byte = byte;
  code_tab[byte].freq = freq;
}

////////////////////////////////////////////////////////
//  Return number of bits required to encode letter   //
//  Formula is: b = f(-log2(p);                       //
//  p    = probability of occurrence of the letter    //
//  f(x) = the closest integer greater or equal to x  //
////////////////////////////////////////////////////////
long huffman_getcodebits(float p)
{
  long bits;

  bits = (long)(-log2(p)+.5);

  return bits;
}


////////////////////////////////////////////
//  Write some statistics data to screen  //
////////////////////////////////////////////
void huffman_writestatistic(void)
{
  long    k, b;
  double  p;

  for (k = 0; k < numbasenodes; k++) {
    p = (double)basenode[k]->freq / (double)fsize;
    printf("\n%c = %d ==> probability:%4.2f, bits to encode letter:%d ==> huffman code: ",basenode[k]->byte, basenode[k]->freq, p,huffman_getcodebits(p));
    for (b = 0; b < code_tab[basenode[k]->byte].numbits; b++)
      printf("%d",code_tab[basenode[k]->byte].bits[b]);

  }
}

////////////////////////
//  Huffman encoding  //
////////////////////////
void huffman_encode(char *stream)
{
  char  byte, b;
  long  huffsymb;
  long  k, j, bitpos, numbits = 0;

  f = fopen(TEMPFILENAME,"w+b");

  // Count number of huffman symbols
  huffsymb = 0;
  for (k = 0; k < NUMBYTES; k++)
    if (code_tab[k].numbits > 0) huffsymb++;
  fwrite(&huffsymb, sizeof(huffsymb), 1, f);


  // Write huffman codes as header
  for (k = 0; k < NUMBYTES; k++) {

    if (code_tab[k].numbits > 0) {

      // Count number of all bits
      numbits += code_tab[k].freq * code_tab[k].numbits;

      // Original byte
      fwrite(&code_tab[k].byte, sizeof(code_tab[k].byte), 1, f);

      // Number of bits required to encode original letter
      fwrite(&code_tab[k].numbits, sizeof(code_tab[k].numbits), 1, f);

      j = 0;      // Huffman code bits counter
      bitpos = 7; // 8 bits counter
      b = 0;      // New symbol
      while (j < code_tab[k].numbits) {
        b |= (code_tab[k].bits[j++] << bitpos);
        if (--bitpos < 0) {
          fwrite(&b, sizeof(b), 1, f);
          b = 0;
          bitpos = 7;
        }
      }
      fwrite(&b, sizeof(b), 1, f);
    }
  }

  fwrite(&numbits, sizeof(numbits), 1, f);

  // Encode and write input data stream to file
  bitpos = 7; // 8 bits counter
  b = 0;      // New symbol
  for (k = 0; k < fsize; k++) {

    byte = stream[k];

    j = 0;      // Huffman code bits counter
    while (j < code_tab[byte].numbits) {

      b |= (code_tab[byte].bits[j++] << bitpos);

      if (--bitpos < 0) {
        fwrite(&b, sizeof(b), 1, f);
        b = 0;
        bitpos = 7;
      }
    }

  }
  fwrite(&b, sizeof(b), 1, f);

  k = ftell(f);
  printf("\nOriginal size   : %d bytes",fsize);
  printf("\nCompressed size : %d bytes",k);
  printf("\nCompressed ratio: %4.2f\n",(float)k/(float)fsize);

  fclose(f);
}

////////////////////////
//  Huffman decoding  //
////////////////////////
void huffman_decode(char *stream)
{
  HUFFNODE   *node;
  long        huffsymb;     // Number of huffman symbols
  char        numbits, numbytes, byte, b, bit;
  signed char bitpos;
  long        k, j, i, l = 0;


  huffsymb = stream[l] + (stream[l+1] << 8) + (stream[l+2] << 16) + (stream[l+3] << 24);
  l += 4;

  f = fopen(OUTPUTFILENAME,"w+b");

  // Read huffman code table
  for (k = 0; k < huffsymb; k++) {

    byte = stream[l++];
    numbits = stream[l++];
    numbytes = (numbits >> 3) + 1; // Get number of bytes needed to decode letter

    i = 0;
    for (j = 0; j < numbytes; j++) {
      b = stream[l++];
      bitpos = 8;
      while (numbits-- > 0 && bitpos-- > 0)
        code_tab[byte].bits[i++] = (b & (1 << bitpos)) >> bitpos;
    }
  }

  // Number of all encoded bits
  k = stream[l] + (stream[l+1] << 8) + (stream[l+2] << 16) + (stream[l+3] << 24);
  l += 4;

  // Number of all encoded bits
  node = nodelist[numnodes-1];
  b = stream[l++];
  bitpos = 7;
  while (k-- > 0) {
    bit = (b & (1 << bitpos)) >> bitpos--;
    if (bit == 0) {
      node = node->left;
      if (node->left == NULL) {
        fwrite(&node->byte, sizeof(node->byte), 1, f);
        node = nodelist[numnodes-1];
      }
    }
    else {
      node = node->right;
      if (node->right == NULL) {
        fwrite(&node->byte, sizeof(node->byte), 1, f);
        node = nodelist[numnodes-1];
      }
    }
    if (bitpos < 0) {
      b = stream[l++];
      bitpos = 7;
    }
  }
  fclose(f);
}

////////////////////
//  Main program  //
////////////////////
main()
{
  char   *stream;
  long    k;

  printf("Huffman compression\n");
  printf("Copyright (c) 1997 by Matjaz Trtnik\n");

  // Read input stream
  stream = huffman_readinputstream(INPUTFILENAME);

  // Build huffman tree
  huffman_buildtree(stream);

  // Get huffman codes
  memset(&code_tab, 0, sizeof(code_tab));
  for (k = 0; k < numbasenodes; k++)
    huffman_buildcode(basenode[k]);

  // Encode input stream and write it to file
  huffman_encode(stream);
  free(stream);

  // Read encoded stream
  stream = huffman_readinputstream(TEMPFILENAME);

  // Decode input stream and write original file
  huffman_decode(stream);
  free(stream);
  huffman_writestatistic();
  return 1;
}