/* Build a balanced binary tree in linear time from sorted data.

Builds the tree from a sorted input text file, bottom up, then
looks up line numbers in it.

The algorithm does not require advance knowledge of the number of
items, but it does require them to be sorted. It uses a stack of full
perfectly-balanced trees of strictly decreasing weight (number of
leaves), each a power of two. Upon adding a new leaf node to the top
of the stack, if the sizes of the trees in the stack are not strictly
decreasing from bottom to top, it merges them into bigger trees until
they are.

Leaf nodes look like {type = leaf_node, key = k, contents.leaf.val = v}.
Internal nodes look like {type = internal_node, key = kr,
contents.internal = { left = tl, right = tr} }, where kr is the leftmost
key in the subtree rooted at tr.

All internal nodes have two children, which are either leaf nodes or
internal nodes. Null pointers are only used as the representation of
an entirely empty tree, never as part of a nonempty tree.

I just wrote this C version to see if the Lua version was slow
because LuaJIT was slow or because the algorithm was slow. Seems
like it was the algorithm; this was only about 4× faster than
LuaJIT last time I benchmarked it.

I’m sure this is a well-known algorithm, but I don’t know who invented
it.

I did some test runs on my 1.6GHz Atom netbook, compiled with -O,
using commands like this:

    head -100 /usr/share/dict/words | \
        ./bbt $(head -100 /usr/share/dict/words | sort -R) \
        > tmp.out

Results are somewhat unreliable-looking:

* For 5 input terms:
        With tree: 741536 searches in 1.000000 seconds, 741536/sec
        With linear array: 730524 searches in 1.000002 seconds, 730522/sec
* For 10 input terms:
        With tree: 799524 searches in 1.028612 seconds, 777284/sec
        With linear array: 534141 searches in 1.000004 seconds, 534138/sec
* For 30 input terms:
        With tree: 782739 searches in 1.000008 seconds, 782732/sec
        With linear array: 215238 searches in 1.046392 seconds, 205695/sec
* For 100 input terms:
        With tree: 659934 searches in 1.000036 seconds, 659910/sec
        With linear array: 66132 searches in 1.003488 seconds, 65902/sec
* For 1000 input terms:
        With tree: 336663 searches in 1.004140 seconds, 335274/sec
        With linear array: 2997 searches in 1.031656 seconds, 2905/sec
* For 10 000:
        With tree: 189981 searches in 1.114791 seconds, 170418/sec
        With linear array: 9999 searches in 17.912256 seconds, 558/sec
* For 30 000 (this took 5 minutes):
        With tree: 119996 searches in 1.175557 seconds, 102075/sec
        With linear array: 29999 searches in 145.869930 seconds, 205/sec
* For the whole 98 569 word dictionary, but only doing 30 000 random 
  searches:
        < /usr/share/dict/words time ./bbt \
            $(< /usr/share/dict/words sort -R | head -30000) \
            > tmp.out-all
        With tree: 180000 searches in 1.079684 seconds, 166715/sec
        With linear array: 30000 searches in 516.880298 seconds, 58/sec

Note that the tree results for 10 000 terms are one-quarter as fast as
the results for 100 terms, rather than one-half as fast, as O(log N)
says they should be. For 30 000 terms, they’re 15.5% as fast, instead
of 44.7%, as the theory predicts.  I assume the extra factor of 3
comes from cache effects.

Contrary to my expectations, **the linear-search approach was never
faster than the tree approach**, even for 5 terms.

*/

#define _BSD_SOURCE 1           /* to get strdup() */

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <string.h>

#ifdef __GNUC__
#define attribute(x) __attribute__(x)
#else
#define attribute(x)
#endif

#define UNUSED attribute((unused))


/**************************/
/* Balanced binary trees. */
/**************************/

/* A fairly vanilla binary-tree node type. Because this code is for
 * purposes of demonstration rather than use, it's specialized to be a
 * map from strings to ints. */
typedef struct treenode {
  enum { internal_node, leaf_node } type;
  char *key;
  union {
    struct { int val; } leaf;
    struct { struct treenode *left, *right; } internal;
  } contents;
} treenode;

/* The internal state of the tree-building algorithm. */
enum { STACK_SIZE = 30 };
typedef struct tree_builder {
  int leaf_nodes;
  int stack_pointer;       /* Points at the slot the next push goes into. */
  struct tree { treenode *root; char *leftmost; } stack[STACK_SIZE];
} tree_builder;

/* Call this to initialize a new tree_builder structure. */
void
init_tree_builder(tree_builder *tb)
{
  tb->leaf_nodes = 0;
  tb->stack_pointer = 0;
  tb->stack[0].root = 0;        /* For the empty-input case */
}

/* Pop a tree off the tree_builder stack. */
static struct tree
pop(tree_builder *tb)
{
  assert(tb->stack_pointer > 0);
  return tb->stack[--tb->stack_pointer];
}

/* Push a tree onto the tree_builder stack. */
static void 
push(tree_builder *tb, treenode *node, char *leftmost)
{
  assert(tb->stack_pointer < sizeof(tb->stack)/sizeof(tb->stack[0]));
  tb->stack[tb->stack_pointer].leftmost = leftmost;
  tb->stack[tb->stack_pointer++].root = node;
}

/* Create a new treenode that is an internal node, or abort() if no space. */
static treenode *
new_internal_node(treenode *left, treenode *right, char *key)
{
  treenode *rv = malloc(sizeof(*rv));
  if (!rv) abort();

  rv->type = internal_node;
  rv->key = key;
  rv->contents.internal.left = left;
  rv->contents.internal.right = right;

  return rv;
}

/* Join the two top trees on the stack into a new unified tree on the stack */
static void
concatenate_trees(tree_builder *tb)
{
  struct tree left, right;

  right = pop(tb);
  left = pop(tb);
  push(tb, new_internal_node(left.root, right.root, right.leftmost),
       left.leftmost);
}

/* Create a new treenode for a name-value pair, or abort() if no space */
static treenode *
new_leaf_node(char *key, int val)
{
  treenode *rv = malloc(sizeof(*rv));
  if (!rv) abort();

  rv->type = leaf_node;
  rv->key = strdup(key);
  if (!rv->key) abort();
  rv->contents.leaf.val = val;

  return rv;
}

/* Adds a name-value pair to the tree builder. */
void
add_string(tree_builder *tb, char *key, int val)
{
  int x;
  treenode *node = new_leaf_node(key, val);

  push(tb, node, node->key);
  tb->leaf_nodes++;

  x = tb->leaf_nodes;
  while (!(x & 1)) {
    concatenate_trees(tb);
    x >>= 1;
  }
}

/* Return a single tree containing the entire contents of the tree builder */
treenode *
finish_tree(tree_builder *tb)
{
  while (tb->stack_pointer > 1) concatenate_trees(tb);
  return tb->stack[0].root;
}

/* Build a line-number tree from the contents of a text file. */
static treenode *
build_tree(FILE *input)
{
  char buf[256];                /* Uh, yeah. */
  char *nl;
  int line_no = 0;
  tree_builder tb;
  init_tree_builder(&tb);

  while (fgets(buf, sizeof(buf), input)) {
    if ((nl = strchr(buf, '\n'))) *nl = '\0';
    add_string(&tb, buf, ++line_no);
  }

  return finish_tree(&tb);
}

/* Find the nearest leafnode to the search key, or NULL if tree is empty. */
treenode *
search(treenode *tree, char *word)
{
  for (;;) {
    if (!tree) return NULL;
    if (tree->type == leaf_node) return tree;
    tree = (strcmp(tree->key, word) > 0) ? tree->contents.internal.left
                                         : tree->contents.internal.right;
  }
}

/* Holds the state of an iteration over a tree. */
typedef struct treenode_iterator {
  int stack_pointer;
  treenode *stack[STACK_SIZE];
} treenode_iterator;

/* Begins iteration over a tree. */
void
init_treenode_iterator(treenode_iterator *it, treenode *tree)
{
  it->stack_pointer = tree ? 1 : 0;
  it->stack[0] = tree;
}

/* Returns the next leaf node in the tree, or NULL if exhausted. */
treenode *
next_leafnode(treenode_iterator *it)
{
  treenode *next;
  if (it->stack_pointer == 0) return NULL;

  next = it->stack[--it->stack_pointer];
  while (next->type == internal_node) {
    it->stack[it->stack_pointer++] = next->contents.internal.right;
    next = next->contents.internal.left;
  }

  return next;
}


/********************************************************/
/* Simplest, dumbest symbol table, using linear search. */
/********************************************************/

/* How does it perform by comparison? */
/* XXX add code to find out! */

enum { entry_capacity = 131072, name_capacity = 1048576 };

typedef struct dumb_table {
  char names[name_capacity], *input_ptr, *name_ptr;
  void *values[entry_capacity];
  void **entry;          /* Points to the entry found by find_entry */
} dumb_table;

void
init_dumb_table(dumb_table *t)
{
  t->input_ptr = t->names;
  t->name_ptr = t->names;
}

/* Returns true if found; entry found is left in t->entry. */
int
find_entry(dumb_table *t, char *s)
{
  char *sp = t->names;
  for (t->entry = t->values; sp < t->name_ptr && 0 != strcmp(s, sp); 
       sp += strlen(sp)+1) {
    if (++t->entry == t->values + entry_capacity) abort();
  }
  return sp < t->name_ptr;
}

static inline void
read_character(dumb_table *t, char c) 
{
  if (t->names + name_capacity == t->input_ptr) abort(); /* buffer full. */
  *t->input_ptr++ = c; 
}

void discard_name(dumb_table *t) { t->input_ptr = t->name_ptr; }

void **
name_done(dumb_table *t) 
{
  read_character(t, 0);
  if (!find_entry(t, t->name_ptr)) return 0;
  discard_name(t); /* It was found, so we don’t need to save another copy. */
  return t->entry;
}

void
save_name(dumb_table *t, void *value) {
  *t->entry = value;
  t->name_ptr = t->input_ptr;
}


/*****************/
/* Main program. */
/*****************/

/* Print a representation of a treenode returned by search. */
static void
display(treenode *result, char *word)
{
  if (result) {
    printf("%s\t%s\t%d\n", word, result->key, result->contents.leaf.val);
  } else {
    printf("%s\tnil\n", word);
  }
}

/* Dumps out a tree for debugging. */
static void UNUSED
dump(treenode *nn, int depth)
{
  if (nn->type == internal_node) {
    dump(nn->contents.internal.left, depth + 1);
  }

  for (int ii=0; ii < depth; ii++) printf("  ");
  printf("%s", nn->key);
  if (nn->type == leaf_node) printf(" %d", nn->contents.leaf.val);
  printf("\n");

  if (nn->type == internal_node) {
    dump(nn->contents.internal.right, depth + 1);
  }
}

int 
main(int argc, char **argv)
{
  char **arg;
  treenode *tree = build_tree(stdin); /* e.g. /usr/share/dict/words */
  dumb_table dt;
  treenode_iterator it;
  treenode *each_leaf;
  int nsearches;
  struct timeval start, end, duration;
  double duration_float;

  gettimeofday(&start, 0);
  nsearches = 0;

  for (;;) {
    for (arg = &argv[1]; arg != &argv[argc]; arg++) {
      treenode *result = search(tree, *arg);
      if (nsearches < 100) display(result, *arg);
    }

    nsearches += argc-1;

    gettimeofday(&end, 0);
    timersub(&end, &start, &duration);
    if (duration.tv_sec) break;
  }

  duration_float = (duration.tv_sec + duration.tv_usec / (1000.0 * 1000));
  printf("With tree: %d searches in %f seconds, %d/sec\n",
         nsearches,
         duration_float,
         (int)(nsearches / duration_float));

  init_dumb_table(&dt);
  init_treenode_iterator(&it, tree);
  while ((each_leaf = next_leafnode(&it))) {
    char *s;
    for (s = each_leaf->key; *s; s++) read_character(&dt, *s);
    name_done(&dt);
    save_name(&dt, (void*)each_leaf->contents.leaf.val);
  }

  gettimeofday(&start, 0);
  nsearches = 0;

  for (;;) {
    for (arg = &argv[1]; arg != &argv[argc]; arg++) {
      if (find_entry(&dt, *arg)) {
        if (nsearches < 100) printf("%s\t%d\n", *arg, (int)*dt.entry);
      } else {
        if (nsearches < 100) printf("%s\tnil\n", *arg);
      }
    }

    nsearches += argc-1;

    gettimeofday(&end, 0);
    timersub(&end, &start, &duration);
    if (duration.tv_sec) break;
  }

  duration_float = (duration.tv_sec + duration.tv_usec / (1000.0 * 1000));
  printf("With linear array: %d searches in %f seconds, %d/sec\n",
         nsearches,
         duration_float,
         (int)(nsearches / duration_float));

  return 0;
}

