// Compute the frequency of each word in the input stream. An example // program for skip lists. // This version turns out to be slower than the Python version and the // trie version in C, so far. #include #include #include #include #include enum { max_level = 64 }; struct skipnode; typedef struct skipnode { char *word; int freq; int level; // at least 1 struct skipnode *nexts[0]; // contains `level` pointers, longest skip last } skipnode; // Allocate a new skipnode, leaving its nexts and freq uninitialized. static skipnode *new_skipnode(char *word, int level) { assert(level >= 1); skipnode *new = malloc(sizeof(skipnode) + level * sizeof(skipnode*)); if (!new) abort(); new->word = strdup(word); new->level = level; /* for (int i = 0; i < level; i++) { */ /* new->nexts[i] = (void*)0xdeadbeef; */ /* } */ return new; } // We need a list head structure that is guaranteed to be at least as // tall as any node in the list. It’s convenient to be able to treat // it as a skip list node, but one where we don’t care what data it // holds. static skipnode *new_skiplist() { skipnode *rv = new_skipnode("XXX dummy data should not be used", max_level); rv->level = 1; rv->nexts[0] = NULL; return rv; } // Generate an exponentially distributed level >= 1. Max return value // is 64 on LP64. static int generate_level() { unsigned long raw = random(); // I think there's an instruction on modern processors for this... int count = 1; while ((raw & 1) != 0) { count++; raw >>= 1; } assert(count <= max_level); return count; // Using count/2 + 1 here reduces run time by 10% } // Increment the count for word. For this simple program, I’ve // inlined the skiplist node creation and searching subroutines. static void increment(skipnode *head, char *word) { skipnode *updates[max_level]; skipnode *where = head; for (int i = head->level - 1; i >= 0; i--) { // Step past all the nodes of level i that precede the word. This // is not written in the usual for-loop form because we want to // save the *previous* node at each level, in case we need to // insert. for (;;) { skipnode *next = where->nexts[i]; if (!next || 0 >= strcmp(word, next->word)) break; where = next; } updates[i] = where; } // Did we find the word in the skip list already? skipnode *node = where->nexts[0]; if (node && 0 == strcmp(word, node->word)) { node->freq++; return; } // If not, create and insert new node node = new_skipnode(word, generate_level()); node->freq = 1; int copy_from_updates; if (node->level <= head->level) { copy_from_updates = node->level; } else { // We need to increase the level of the head. copy_from_updates = head->level; for (int i = head->level; i < node->level; i++) { head->nexts[i] = node; node->nexts[i] = NULL; } head->level = node->level; } for (int i = 0; i < copy_from_updates; i++) { node->nexts[i] = updates[i]->nexts[i]; updates[i]->nexts[i] = node; } } static void iterate(skipnode *node, void (*f)(void *userdata, char *word, int freq), void *userdata) { // Note we skip the head node, since its data is to be ignored. while ((node = node->nexts[0])) f(userdata, node->word, node->freq); } // Although for this program it isn’t really necessary, deallocation // is kind of necessary in general, so not including it would give a // false picture of the complexity of skip lists. static void delete(skipnode *node) { while (node) { skipnode *next = node->nexts[0]; free(node->word); free(node); node = next; } } static void print_freq(void *userdata, char *word, int freq) { printf("%8d %s\n", freq, word); } int main() { skipnode *list = new_skiplist(); char word[256]; int wordlen = 0, c; while ((c = getchar()) != EOF) { if (isspace(c)) { if (wordlen) { word[wordlen] = '\0'; increment(list, word); wordlen = 0; } } else { word[wordlen++] = c; assert(wordlen < sizeof(word)); } } iterate(list, print_freq, NULL); delete(list); return 0; }