// Compute the frequency of each word in the input stream.  An example
// program for skip lists.

// This version turns out to be slower than the Python version and the
// trie version in C, so far.

#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

enum { max_level = 64 };

struct skipnode;
typedef struct skipnode {
  char *word;
  int freq;
  int level;                    // at least 1
  struct skipnode *nexts[0];    // contains `level` pointers, longest skip last
} skipnode;

// Allocate a new skipnode, leaving its nexts and freq uninitialized.
static skipnode *new_skipnode(char *word, int level)
{
  assert(level >= 1);
  skipnode *new = malloc(sizeof(skipnode) + level * sizeof(skipnode*));
  if (!new) abort();

  new->word = strdup(word);
  new->level = level;
  /* for (int i = 0; i < level; i++) { */
  /*   new->nexts[i] = (void*)0xdeadbeef; */
  /* } */
  return new;
}

// We need a list head structure that is guaranteed to be at least as
// tall as any node in the list.  It’s convenient to be able to treat
// it as a skip list node, but one where we don’t care what data it
// holds.
static skipnode *new_skiplist()
{
  skipnode *rv = new_skipnode("XXX dummy data should not be used", max_level);
  rv->level = 1;
  rv->nexts[0] = NULL;
  return rv;
}

// Generate an exponentially distributed level >= 1.  Max return value
// is 64 on LP64.
static int generate_level()
{
  unsigned long raw = random();

  // I think there's an instruction on modern processors for this...
  int count = 1;
  while ((raw & 1) != 0) {
    count++;
    raw >>= 1;
  }

  assert(count <= max_level);
  return count;      // Using count/2 + 1 here reduces run time by 10%
}

// Increment the count for word.  For this simple program, I’ve
// inlined the skiplist node creation and searching subroutines.
static void increment(skipnode *head, char *word)
{
  skipnode *updates[max_level];

  skipnode *where = head;
  for (int i = head->level - 1; i >= 0; i--) {
    // Step past all the nodes of level i that precede the word.  This
    // is not written in the usual for-loop form because we want to
    // save the *previous* node at each level, in case we need to
    // insert.
    for (;;) {
      skipnode *next = where->nexts[i];
      if (!next || 0 >= strcmp(word, next->word)) break;
      where = next;
    }

    updates[i] = where;
  }

  // Did we find the word in the skip list already?
  skipnode *node = where->nexts[0];
  if (node && 0 == strcmp(word, node->word)) {
    node->freq++;
    return;
  }

  // If not, create and insert new node
  node = new_skipnode(word, generate_level());
  node->freq = 1;
  int copy_from_updates;
  if (node->level <= head->level) {
    copy_from_updates = node->level;
  } else {
    // We need to increase the level of the head.
    copy_from_updates = head->level;
    for (int i = head->level; i < node->level; i++) {
      head->nexts[i] = node;
      node->nexts[i] = NULL;
    }
    head->level = node->level;
  }

  for (int i = 0; i < copy_from_updates; i++) {
    node->nexts[i] = updates[i]->nexts[i];
    updates[i]->nexts[i] = node;
  }
}

static void iterate(skipnode *node,
                    void (*f)(void *userdata, char *word, int freq),
                    void *userdata)
{
  // Note we skip the head node, since its data is to be ignored.
  while ((node = node->nexts[0])) f(userdata, node->word, node->freq);
}

// Although for this program it isn’t really necessary, deallocation
// is kind of necessary in general, so not including it would give a
// false picture of the complexity of skip lists.
static void delete(skipnode *node)
{
  while (node) {
    skipnode *next = node->nexts[0];
    free(node->word);
    free(node);
    node = next;
  }
}

static void print_freq(void *userdata, char *word, int freq)
{
  printf("%8d %s\n", freq, word);
}

int main()
{
  skipnode *list = new_skiplist();
  char word[256];
  int wordlen = 0, c;

  while ((c = getchar()) != EOF) {
    if (isspace(c)) {
      if (wordlen) {
        word[wordlen] = '\0';
        increment(list, word);
        wordlen = 0;
      }
    } else {
      word[wordlen++] = c;
      assert(wordlen < sizeof(word));
    }
  }

  iterate(list, print_freq, NULL);

  delete(list);

  return 0;
}