// FP-persistent trie hash map using path copying. Based on Chris // Wellons’s and NRK’s ephemeral version // . Example using // an arena allocator and NUL-terminated unowned strings. #include #include #include #include #include #include #include "kmregion.h" // Trie node. The kids array is indexed by the lowest unused two bits // of the hash function. typedef struct trie { struct trie *kids[4]; char *k, *v; // Unowned. } trie; // Allocate a new trie node in the region, initializing all its // pointers. trie * new_node(char *k, char *v, trie **kids, kmregion *r) { trie *t = km_new(r, sizeof(*t)); t->k = k; t->v = v; memcpy(t->kids, kids, sizeof(t->kids)); return t; } typedef uint64_t hash_t; // djb2 hash with a different seed. static inline hash_t hash(char *s) { hash_t h = 53; while (*s) h = h * 33 + *s++; return h; } // Main recursive path-copying loop. trie * upsert_helper(char *k, char *v, trie *t, hash_t h, kmregion *r) { trie *kids[4] = {0}; if (!t) return new_node(k, v, kids, r); memcpy(kids, t->kids, sizeof(kids)); if (!strcmp(k, t->k)) return new_node(k, v, kids, r); kids[h & 3] = upsert_helper(k, v, kids[h & 3], h >> 2, r); return new_node(t->k, t->v, kids, r); } // Insert k=v into t, returning new t, allocating in r. trie * upsert(char *k, char *v, trie *t, kmregion *r) { return upsert_helper(k, v, t, hash(k), r); } // Return v associated with k, or NULL if not found. char * get(char *k, trie *t) { hash_t h = hash(k); while (t) { if (!strcmp(k, t->k)) return t->v; t = t->kids[h & 3]; h >>= 2; } return 0; // not found } // Copy a string into kmregion. Probably I should put this in // kmregion.h? But with the arguments reversed? static inline char * kstrdup(char *s, kmregion *r) { size_t n = strlen(s) + 1; char *t = km_new(r, n); memcpy(t, s, n); return t; } // Quick and dirty interactive testing harness. char inbuf[1024]; const char *delim = " \n"; __attribute__((weak)) // To permit linking elsewhere. int main(int argc, char **argv) { jmp_buf err; if (setjmp(err)) { fprintf(stderr, "Allocation error\n"); return 1; } kmregion *region = km_start(km_libc_disc, &err); trie *t = 0; trie *stack[64]; int sp; // Stack pointer. for (;;) { printf("⟥ "); fflush(stdout); if (!fgets(inbuf, sizeof(inbuf), stdin)) break; char *cmd = strtok(inbuf, delim); if (!cmd) continue; if (!strcmp(cmd, "g")) { // Get a key’s value char *k = strtok(0, delim); if (!k) { printf("g requires an argument\n"); continue; } char *v = get(k, t); printf(v ? "⟣ %s\n" : "not found\n", v); } else if (!strcmp(cmd, "s")) { // Set a key to a value char *k = strtok(0, delim); char *v = 0; if (k) v = strtok(0, delim); if (!v) { printf("s requires two arguments\n"); continue; } t = upsert(kstrdup(k, region), kstrdup(v, region), t, region); } else if (!strcmp(cmd, "(")) { // Push a context on the context stack if (sp == sizeof(stack) / sizeof(stack[0])) { printf("no\n"); continue; } stack[sp++] = t; printf("%d\n", sp); } else if (!strcmp(cmd, ")")) { // Pop a context if (!sp) { printf("no\n"); continue; } t = stack[--sp]; printf("%d\n", sp); } else { printf("g key, s key value, (, ), or ^D\n"); } } km_end(region); return 0; }