first commit

4 years ago · 991b63b5c9
commit 991b63b5c9
4 changed files with 935 additions and 0 deletions
--- a/20
+++ b/20
@ -0,0 +1,20 @@
+The MIT License (MIT)
+
+Copyright (c) 2020 Joshua J Baker
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,140 @@
+# hashmap.c
+
+[Hash map](https://en.wikipedia.org/wiki/Hash_table) implementation in C. 
+
+## Features
+
+- [Open addressing](https://en.wikipedia.org/wiki/Hash_table#Open_addressing) using [Robin Hood](https://en.wikipedia.org/wiki/Hash_table#Robin_Hood_hashing) hashing
+- Generic interface with support for variable sized items.
+- Built-in [SipHash](https://en.wikipedia.org/wiki/SipHash) or [MurmurHash3](https://en.wikipedia.org/wiki/MurmurHash) and allows for alternative algorithms.
+- ANSI C (C99)
+- Supports custom allocators
+- Pretty darn good performance. 🚀
+
+## Example
+
+```c
+#include <stdio.h>
+#include <string.h>
+#include "hashmap.h"
+
+struct user {
+    char *name;
+    int age;
+};
+
+int user_compare(const void *a, const void *b) {
+    const struct user *ua = a;
+    const struct user *ub = b;
+    return strcmp(ua->name, ub->name);
+}
+
+bool user_iter(const void *item, void *udata) {
+    const struct user *user = item;
+    printf("%s (age=%d)\n", user->name, user->age);
+    return true;
+}
+
+uint64_t user_hash(const void *item, uint64_t seed0, uint64_t seed1) {
+    const struct user *user = item;
+    return hashmap_sip(user->name, strlen(user->name), seed0, seed1);
+}
+
+int main() {
+    // create a new hash map where each item is a `struct user`. The second
+    // argument is the initial capacity. The third and fourth arguments are 
+    // optional seeds that are passed to the following hash function.
+    struct hashmap *map = hashmap_new(sizeof(struct user), 0, 0, 0, 
+                                     user_hash, user_compare);
+
+    // Here we'll load some users into the hash map. Each set operation
+    // performs a copy of the data that is pointed to in the second argument.
+    hashmap_set(map, &(struct user){ .name="Dale", .age=44 });
+    hashmap_set(map, &(struct user){ .name="Roger", .age=68 });
+    hashmap_set(map, &(struct user){ .name="Jane", .age=47 });
+
+    struct user *user; 
+    
+    printf("\n-- get some users --\n");
+    user = hashmap_get(map, &(struct user){ .name="Jane" });
+    printf("%s age=%d\n", user->name, user->age);
+
+    user = hashmap_get(map, &(struct user){ .name="Roger" });
+    printf("%s age=%d\n", user->name, user->age);
+
+    user = hashmap_get(map, &(struct user){ .name="Dale" });
+    printf("%s age=%d\n", user->name, user->age);
+
+    user = hashmap_get(map, &(struct user){ .name="Tom" });
+    printf("%s\n", user?"exists":"not exists");
+
+    printf("\n-- iterate over all users --\n");
+    hashmap_scan(map, user_iter, NULL);
+
+    hashmap_free(map);
+}
+
+// output:
+// -- get some users --
+// Jane age=47
+// Roger age=68
+// Dale age=44
+// not exists
+// 
+// -- iterate over all users --
+// Dale (age=44)
+// Roger (age=68)
+// Jane (age=47)
+```
+
+## Functions
+
+### Basic
+
+```sh
+hashmap_new      # allocate a new hash map
+hashmap_free     # free the hash map
+hashmap_count    # returns the number of items in the hash map
+hashmap_set      # insert or replace an existing item and return the previous
+hashmap_get      # get an existing item
+hashmap_delete   # delete and return an item
+```
+
+### Iteration
+
+```sh
+hashmap_scan     # iterate over items in hash map
+```
+
+### Hash helpers
+
+```sh
+hashmap_sip      # returns hash value for data using SipHash-2-4
+hashmap_murmur   # returns hash value for data using MurmurHash3
+```
+
+## Testing and benchmarks
+
+```sh
+$ cc -DHASHMAP_TEST hashmap.c && ./a.out              # run tests
+$ cc -DHASHMAP_TEST -O3 hashmap.c && BENCH=1 ./a.out  # run benchmarks
+```
+
+The following benchmarks were run on my 2019 Macbook Pro (2.4 GHz 8-Core Intel Core i9) using gcc-9.
+The items are simple 4-byte ints. 
+The hash function is MurmurHash3. 
+Testing with 5,000,000 items.
+The `(cap)` results are hashmaps that are created with an inital capacity of 5,000,000.
+
+```
+set            5000000 ops in 0.708 secs, 142 ns/op, 7057960 op/sec, 26.84 bytes/op
+get            5000000 ops in 0.303 secs, 61 ns/op, 16492723 op/sec
+delete         5000000 ops in 0.486 secs, 97 ns/op, 10280873 op/sec
+set (cap)      5000000 ops in 0.429 secs, 86 ns/op, 11641660 op/sec
+get (cap)      5000000 ops in 0.303 secs, 61 ns/op, 16490493 op/sec
+delete (cap)   5000000 ops in 0.410 secs, 82 ns/op, 12200091 op/sec
+```
+
+## License
+
+hashmap.c source code is available under the MIT License.
--- a/hashmap.c
+++ b/hashmap.c
@ -0,0 +1,743 @@
+// Copyright 2020 Joshua J Baker. All rights reserved.
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#include "hashmap.h"
+
+static void *(*_malloc)(size_t) = NULL;
+static void (*_free)(void *) = NULL;
+
+#define hmmalloc (_malloc?_malloc:malloc)
+#define hmfree (_free?_free:free)
+
+// hashmap_set_allocator allows for configuring a custom allocator for
+// all hashmap library operations. This function, if needed, should be called
+// only once at startup and a prior to calling hashmap_new().
+void hashmap_set_allocator(void *(*malloc)(size_t), void (*free)(void*)) 
+{
+    _malloc = malloc;
+    _free = free;
+}
+
+#define panic(_msg_) { \
+    fprintf(stderr, "panic: %s (%s:%d)\n", (_msg_), __FILE__, __LINE__); \
+    exit(1); \
+}
+
+struct bucket {
+    uint64_t hash:48;
+    uint64_t dib:16;
+};
+
+// hashmap is an open addressed hash map using robinhood hashing.
+struct hashmap {
+    bool oom;
+    size_t elsize;
+    size_t cap;
+    uint64_t seed0;
+    uint64_t seed1;
+    uint64_t (*hash)(const void *item, uint64_t seed0, uint64_t seed1);
+    int (*compare)(const void *a, const void *b);
+    size_t bucketsz;
+    size_t nbuckets;
+    size_t count;
+    size_t mask;
+    size_t growat;
+    size_t shrinkat;
+    void *buckets;
+    void *spare;
+};
+
+static struct bucket *bucket_at(struct hashmap *map, size_t index) {
+    return (struct bucket*)(((char*)map->buckets)+(map->bucketsz*index));
+}
+
+static void *bucket_item(struct bucket *entry) {
+    return ((char*)entry)+sizeof(struct bucket);
+}
+
+static uint64_t get_hash(struct hashmap *map, void *key) {
+    return map->hash(key, map->seed0, map->seed1) << 16 >> 16;
+}
+
+// hashmap_new returns a new hash map. 
+// Param `elsize` is the size of each element in the tree. Every element that
+// is inserted, deleted, or retrieved will be this size.
+// Param `cap` is the default lower capacity of the hashmap. Setting this to
+// zero will default to 16.
+// Params `seed0` and `seed1` are optional seed values that are passed to the 
+// following `hash` function. These can be any value you wish but it's often 
+// best to use randomly generated values.
+// Param `hash` is a function that generates a hash value for an item. It's
+// important that you provice good hash function, otherwise will perform poorly
+// or be vulnerable to Denial-of-service attacks. This implementation comes
+// with two helper functions `hashmap_sip()` and `hashmap_murmur()`
+// Param `compare` is a function that compares items in the tree. See the 
+// qsort stdlib function for an example of how this function works.
+// The hashmap must be freed with hashmap_free(). 
+struct hashmap *hashmap_new(size_t elsize, size_t cap, 
+                            uint64_t seed0, uint64_t seed1,
+                            uint64_t (*hash)(const void *item, 
+                                             uint64_t seed0, uint64_t seed1),
+                            int (*compare)(const void *a, const void *b))
+{
+    int ncap = 16;
+    if (cap < ncap) {
+        cap = ncap;
+    } else {
+        while (ncap < cap) {
+            ncap *= 2;
+        }
+        cap = ncap;
+    }
+    size_t bucketsz = sizeof(struct bucket) + elsize;
+    while (bucketsz & (sizeof(uintptr_t)-1)) {
+        bucketsz++;
+    }
+    struct hashmap *map = hmmalloc(sizeof(struct hashmap)+bucketsz);
+    if (!map) {
+        return NULL;
+    }
+    memset(map, 0, sizeof(struct hashmap));
+    map->elsize = elsize;
+    map->bucketsz = bucketsz;
+    map->seed0 = seed0;
+    map->seed1 = seed1;
+    map->hash = hash;
+    map->compare = compare;
+    map->spare = ((char*)map)+sizeof(struct hashmap);
+    map->cap = cap;
+    map->nbuckets = cap;
+    map->mask = map->nbuckets-1;
+    map->buckets = hmmalloc(map->bucketsz*map->nbuckets);
+    if (!map->buckets) {
+        hmfree(map);
+        return NULL;
+    }
+    memset(map->buckets, 0, map->bucketsz*map->nbuckets);
+    map->growat = map->nbuckets*0.75;
+    map->shrinkat = map->nbuckets*0.10;
+    return map;    
+}
+
+static bool resize(struct hashmap *map, size_t new_cap) {
+    struct hashmap *map2 = hashmap_new(map->elsize, new_cap, map->seed1, 
+                                       map->seed1, map->hash, map->compare);
+    if (!map2) {
+        return false;
+    }
+    for (size_t i = 0; i < map->nbuckets; i++) {
+        struct bucket *entry = bucket_at(map, i);
+        if (!entry->dib) {
+            continue;
+        }
+        entry->dib = 1;
+        size_t j = entry->hash & map2->mask;
+        for (;;) {
+            struct bucket *bucket = bucket_at(map2, j);
+            if (bucket->dib == 0) {
+                memcpy(bucket, entry, map->bucketsz);
+                break;
+            }
+            if (bucket->dib < entry->dib) {
+                memcpy(map2->spare, bucket, map->bucketsz);
+                memcpy(bucket, entry, map->bucketsz);
+                memcpy(entry, map2->spare, map->bucketsz);
+            }
+            j = (j + 1) & map2->mask;
+            entry->dib += 1;
+        }
+	}
+    hmfree(map->buckets);
+    map->buckets = map2->buckets;
+    map->nbuckets = map2->nbuckets;
+    map->mask = map2->mask;
+    map->growat = map2->growat;
+    map->shrinkat = map2->shrinkat;
+    hmfree(map2);
+    return true;
+}
+
+// hashmap_set inserts or replaces an item in the hash map. If an item is
+// replaced then it is returned otherwise NULL is returned. This operation
+// may allocate memory. If the system is unable to allocate additional
+// memory then NULL is returned and hashmap_oom() returns true.
+void *hashmap_set(struct hashmap *map, void *item) {
+    if (!item) {
+        panic("item is null");
+    }
+    map->oom = false;
+    if (map->count == map->growat) {
+        if (!resize(map, map->nbuckets*2)) {
+            map->oom = true;
+            return NULL;
+        }
+    }
+
+    char edata[map->bucketsz]; // VLA
+    struct bucket *entry = (void*)edata;
+    entry->hash = get_hash(map, item);
+    entry->dib = 1;
+    memcpy(bucket_item(entry), item, map->elsize);
+    
+    size_t i = entry->hash & map->mask;
+	for (;;) {
+        struct bucket *bucket = bucket_at(map, i);
+        if (bucket->dib == 0) {
+            memcpy(bucket, entry, map->bucketsz);
+            map->count++;
+			return NULL;
+		}
+        if (entry->hash == bucket->hash && 
+            map->compare(bucket_item(entry), bucket_item(bucket)) == 0)
+        {
+            memcpy(map->spare, bucket_item(bucket), map->elsize);
+            memcpy(bucket_item(bucket), bucket_item(entry), map->elsize);
+            return map->spare;
+		}
+        if (bucket->dib < entry->dib) {
+            memcpy(map->spare, bucket, map->bucketsz);
+            memcpy(bucket, entry, map->bucketsz);
+            memcpy(entry, map->spare, map->bucketsz);
+		}
+		i = (i + 1) & map->mask;
+        entry->dib += 1;
+	}
+}
+
+// hashmap_get returns the item based on the provided key. If the item is not
+// found then NULL is returned.
+void *hashmap_get(struct hashmap *map, void *key) {
+    if (!key) {
+        panic("key is null");
+    }
+    uint64_t hash = get_hash(map, key);
+	size_t i = hash & map->mask;
+	for (;;) {
+        struct bucket *bucket = bucket_at(map, i);
+		if (!bucket->dib) {
+			return NULL;
+		}
+		if (bucket->hash == hash && 
+            map->compare(key, bucket_item(bucket)) == 0)
+        {
+            return bucket_item(bucket);
+		}
+		i = (i + 1) & map->mask;
+	}
+}
+
+// hashmap_delete removes an item from the hash map and returns it. If the
+// item is not found then NULL is returned.
+void *hashmap_delete(struct hashmap *map, void *key) {
+    if (!key) {
+        panic("key is null");
+    }
+    map->oom = false;
+    uint64_t hash = get_hash(map, key);
+	size_t i = hash & map->mask;
+	for (;;) {
+        struct bucket *bucket = bucket_at(map, i);
+		if (!bucket->dib) {
+			return NULL;
+		}
+		if (bucket->hash == hash && 
+            map->compare(key, bucket_item(bucket)) == 0)
+        {
+            memcpy(map->spare, bucket_item(bucket), map->elsize);
+            bucket->dib = 0;
+            for (;;) {
+                struct bucket *prev = bucket;
+                i = (i + 1) & map->mask;
+                bucket = bucket_at(map, i);
+                if (bucket->dib <= 1) {
+                    prev->dib = 0;
+                    break;
+                }
+                memcpy(prev, bucket, map->bucketsz);
+                prev->dib--;
+            }
+            map->count--;
+            if (map->nbuckets > map->cap && map->count <= map->shrinkat) {
+                // Ignore the return value. It's ok for the resize operation to
+                // fail to allocate enough memory because a shrink operation
+                // does not change the integrity of the data.
+                resize(map, map->nbuckets/2);
+            }
+			return map->spare;
+		}
+		i = (i + 1) & map->mask;
+	}
+}
+
+// hashmap_count returns the number of items in the hash map.
+size_t hashmap_count(struct hashmap *map) {
+    return map->count;
+}
+
+// hashmap_free frees the hash map
+void hashmap_free(struct hashmap *map) {
+    if (!map) return;
+    hmfree(map->buckets);
+    hmfree(map);
+}
+
+// hashmap_scan iterates over all items in the hash map
+// Param `iter` can return false to stop iteration early.
+// Returns false if the iteration has been stopped early.
+bool hashmap_scan(struct hashmap *map, 
+                  bool (*iter)(const void *item, void *udata), void *udata)
+{
+    for (size_t i = 0; i < map->nbuckets; i++) {
+        struct bucket *bucket = bucket_at(map, i);
+        if (bucket->dib) {
+            if (!iter(bucket_item(bucket), udata)) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+//-----------------------------------------------------------------------------
+// SipHash reference C implementation
+//
+// Copyright (c) 2012-2016 Jean-Philippe Aumasson
+// <jeanphilippe.aumasson@gmail.com>
+// Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
+//
+// To the extent possible under law, the author(s) have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+//
+// You should have received a copy of the CC0 Public Domain Dedication along
+// with this software. If not, see
+// <http://creativecommons.org/publicdomain/zero/1.0/>.
+//
+// default: SipHash-2-4
+//-----------------------------------------------------------------------------
+static uint64_t SIP64(const uint8_t *in, const size_t inlen, 
+                      uint64_t seed0, uint64_t seed1) 
+{
+#define U8TO64_LE(p) \
+    {  (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \
+        ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \
+        ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \
+        ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) }
+#define U64TO8_LE(p, v) \
+    { U32TO8_LE((p), (uint32_t)((v))); \
+      U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); }
+#define U32TO8_LE(p, v) \
+    { (p)[0] = (uint8_t)((v)); \
+      (p)[1] = (uint8_t)((v) >> 8); \
+      (p)[2] = (uint8_t)((v) >> 16); \
+      (p)[3] = (uint8_t)((v) >> 24); }
+#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
+#define SIPROUND \
+    { v0 += v1; v1 = ROTL(v1, 13); \
+      v1 ^= v0; v0 = ROTL(v0, 32); \
+      v2 += v3; v3 = ROTL(v3, 16); \
+      v3 ^= v2; \
+      v0 += v3; v3 = ROTL(v3, 21); \
+      v3 ^= v0; \
+      v2 += v1; v1 = ROTL(v1, 17); \
+      v1 ^= v2; v2 = ROTL(v2, 32); }
+    uint64_t k0 = U8TO64_LE((uint8_t*)&seed0);
+    uint64_t k1 = U8TO64_LE((uint8_t*)&seed1);
+    uint64_t v3 = UINT64_C(0x7465646279746573) ^ k1;
+    uint64_t v2 = UINT64_C(0x6c7967656e657261) ^ k0;
+    uint64_t v1 = UINT64_C(0x646f72616e646f6d) ^ k1;
+    uint64_t v0 = UINT64_C(0x736f6d6570736575) ^ k0;
+    const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
+    for (; in != end; in += 8) {
+        uint64_t m = U8TO64_LE(in);
+        v3 ^= m;
+        SIPROUND; SIPROUND;
+        v0 ^= m;
+    }
+    const int left = inlen & 7;
+    uint64_t b = ((uint64_t)inlen) << 56;
+    switch (left) {
+    case 7: b |= ((uint64_t)in[6]) << 48;
+    case 6: b |= ((uint64_t)in[5]) << 40;
+    case 5: b |= ((uint64_t)in[4]) << 32;
+    case 4: b |= ((uint64_t)in[3]) << 24;
+    case 3: b |= ((uint64_t)in[2]) << 16;
+    case 2: b |= ((uint64_t)in[1]) << 8;
+    case 1: b |= ((uint64_t)in[0]); break;
+    case 0: break;
+    }
+    v3 ^= b;
+    SIPROUND; SIPROUND;
+    v0 ^= b;
+    v2 ^= 0xff;
+    SIPROUND; SIPROUND; SIPROUND; SIPROUND;
+    b = v0 ^ v1 ^ v2 ^ v3;
+    uint64_t out = 0;
+    U64TO8_LE((uint8_t*)&out, b);
+    return out;
+}
+
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+//
+// Murmur3_86_128
+//-----------------------------------------------------------------------------
+static void MM86128(const void *key, const int len, uint32_t seed, void *out) {
+#define	ROTL32(x, r) ((x << r) | (x >> (32 - r)))
+#define FMIX32(h) h^=h>>16; h*=0x85ebca6b; h^=h>>13; h*=0xc2b2ae35; h^=h>>16;
+    const uint8_t * data = (const uint8_t*)key;
+    const int nblocks = len / 16;
+    uint32_t h1 = seed;
+    uint32_t h2 = seed;
+    uint32_t h3 = seed;
+    uint32_t h4 = seed;
+    uint32_t c1 = 0x239b961b; 
+    uint32_t c2 = 0xab0e9789;
+    uint32_t c3 = 0x38b34ae5; 
+    uint32_t c4 = 0xa1e38b93;
+    const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
+    for (int i = -nblocks; i; i++) {
+        uint32_t k1 = blocks[i*4+0];
+        uint32_t k2 = blocks[i*4+1];
+        uint32_t k3 = blocks[i*4+2];
+        uint32_t k4 = blocks[i*4+3];
+        k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+        h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
+        k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+        h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
+        k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+        h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
+        k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+        h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
+    }
+    const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+    uint32_t k1 = 0;
+    uint32_t k2 = 0;
+    uint32_t k3 = 0;
+    uint32_t k4 = 0;
+    switch(len & 15) {
+    case 15: k4 ^= tail[14] << 16;
+    case 14: k4 ^= tail[13] << 8;
+    case 13: k4 ^= tail[12] << 0;
+             k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+    case 12: k3 ^= tail[11] << 24;
+    case 11: k3 ^= tail[10] << 16;
+    case 10: k3 ^= tail[ 9] << 8;
+    case  9: k3 ^= tail[ 8] << 0;
+             k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+    case  8: k2 ^= tail[ 7] << 24;
+    case  7: k2 ^= tail[ 6] << 16;
+    case  6: k2 ^= tail[ 5] << 8;
+    case  5: k2 ^= tail[ 4] << 0;
+             k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+    case  4: k1 ^= tail[ 3] << 24;
+    case  3: k1 ^= tail[ 2] << 16;
+    case  2: k1 ^= tail[ 1] << 8;
+    case  1: k1 ^= tail[ 0] << 0;
+             k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+    };
+    h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+    h1 += h2; h1 += h3; h1 += h4;
+    h2 += h1; h3 += h1; h4 += h1;
+    FMIX32(h1); FMIX32(h2); FMIX32(h3); FMIX32(h4);
+    h1 += h2; h1 += h3; h1 += h4;
+    h2 += h1; h3 += h1; h4 += h1;
+    ((uint32_t*)out)[0] = h1;
+    ((uint32_t*)out)[1] = h2;
+    ((uint32_t*)out)[2] = h3;
+    ((uint32_t*)out)[3] = h4;
+}
+
+// hashmap_sip returns a hash value for `data` using SipHash-2-4.
+uint64_t hashmap_sip(const void *data, size_t len, 
+                     uint64_t seed0, uint64_t seed1)
+{
+    return SIP64((uint8_t*)data, len, seed0, seed1);
+}
+
+// hashmap_murmur returns a hash value for `data` using Murmur3_86_128.
+uint64_t hashmap_murmur(const void *data, size_t len, 
+                        uint64_t seed0, uint64_t seed1)
+{
+    char out[16];
+    MM86128(data, len, seed0, &out);
+    return *(uint64_t*)out;
+}
+
+//==============================================================================
+// TESTS AND BENCHMARKS
+// $ cc -DHASHMAP_TEST hashmap.c && ./a.out              # run tests
+// $ cc -DHASHMAP_TEST -O3 hashmap.c && BENCH=1 ./a.out  # run benchmarks
+//==============================================================================
+// #ifndef HASHMAP_TEST
+// #define HASHMAP_TEST
+// #endif
+#ifdef HASHMAP_TEST
+
+static size_t deepcount(struct hashmap *map) {
+    size_t count = 0;
+    for (size_t i = 0; i < map->nbuckets; i++) {
+        if (bucket_at(map, i)->dib) {
+            count++;
+        }
+    }
+    return count;
+}
+
+
+#pragma GCC diagnostic ignored "-Wextra"
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <assert.h>
+#include <stdio.h>
+#include "hashmap.h"
+
+static uintptr_t total_allocs = 0;
+static uintptr_t total_mem = 0;
+
+static void *xmalloc(size_t size) {
+    void *mem = malloc(sizeof(uintptr_t)+size);
+    assert(mem);
+    *(uintptr_t*)mem = size;
+    total_allocs++;
+    total_mem += size;
+    return (char*)mem+sizeof(uintptr_t);
+}
+
+static void xfree(void *ptr) {
+    if (ptr) {
+        total_mem -= *(uintptr_t*)((char*)ptr-sizeof(uintptr_t));
+        free((char*)ptr-sizeof(uintptr_t));
+        total_allocs--;
+    }
+}
+
+static void shuffle(void *array, size_t numels, size_t elsize) {
+    char tmp[elsize];
+    char *arr = array;
+    for (size_t i = 0; i < numels - 1; i++) {
+        int j = i + rand() / (RAND_MAX / (numels - i) + 1);
+        memcpy(tmp, arr + j * elsize, elsize);
+        memcpy(arr + j * elsize, arr + i * elsize, elsize);
+        memcpy(arr + i * elsize, tmp, elsize);
+    }
+}
+
+static bool iter_ints(const void *item, void *udata) {
+    int *vals = *(int**)udata;
+    vals[*(int*)item] = 1;
+    return true;
+}
+
+static int compare_ints(const void *a, const void *b) {
+    return *(int*)a - *(int*)b;
+}
+
+static uint64_t hash_int(const void *item, uint64_t seed0, uint64_t seed1) {
+    return hashmap_murmur(item, sizeof(int), seed0, seed1);
+}
+
+static void all() {
+    int seed = getenv("SEED")?atoi(getenv("SEED")):time(NULL);
+    int N = getenv("N")?atoi(getenv("N")):2000;
+    printf("seed=%d, count=%d, item_size=%zu\n", seed, N, sizeof(int));
+    srand(seed);
+
+    // test sip and murmur hashes
+    assert(hashmap_sip("hello", 5, 1, 2) == 2957200328589801622);
+    assert(hashmap_murmur("hello", 5, 1, 2) == 1682575153221130884);
+
+    int *vals = xmalloc(N * sizeof(int));
+    for (int i = 0; i < N; i++) {
+        vals[i] = i;
+    }
+
+    struct hashmap *map = hashmap_new(sizeof(int), 0, seed, seed, 
+                                      hash_int, compare_ints);
+    shuffle(vals, N, sizeof(int));
+    for (int i = 0; i < N; i++) {
+        // // printf("== %d ==\n", vals[i]);
+        assert(map->count == i);
+        assert(map->count == hashmap_count(map));
+        assert(map->count == deepcount(map));
+        int *v;
+        assert(!hashmap_get(map, &vals[i]));
+        assert(!hashmap_delete(map, &vals[i]));
+        assert(!hashmap_set(map, &vals[i]));
+        for (int j = 0; j < i; j++) {
+            v = hashmap_get(map, &vals[j]);
+            assert(v && *v == vals[j]);
+        }
+        v = hashmap_set(map, &vals[i]);
+        assert(v && *v == vals[i]);
+        v = hashmap_get(map, &vals[i]);
+        assert(v && *v == vals[i]);
+        v = hashmap_delete(map, &vals[i]);
+        assert(v && *v == vals[i]);
+        assert(!hashmap_get(map, &vals[i]));
+        assert(!hashmap_delete(map, &vals[i]));
+        assert(!hashmap_set(map, &vals[i]));
+        assert(map->count == i+1);
+        assert(map->count == hashmap_count(map));
+        assert(map->count == deepcount(map));
+    }
+
+    int *vals2 = xmalloc(N * sizeof(int));
+    memset(vals2, 0, N * sizeof(int));
+    assert(hashmap_scan(map, iter_ints, &vals2));
+    for (int i = 0; i < N; i++) {
+        assert(vals2[i] == 1);
+    }
+    xfree(vals2);
+
+    shuffle(vals, N, sizeof(int));
+    for (int i = 0; i < N; i++) {
+        int *v;
+        v = hashmap_delete(map, &vals[i]);
+        assert(v && *v == vals[i]);
+        assert(!hashmap_get(map, &vals[i]));
+        assert(map->count == N-i-1);
+        assert(map->count == hashmap_count(map));
+        assert(map->count == deepcount(map));
+        for (int j = N-1; j > i; j--) {
+            v = hashmap_get(map, &vals[j]);
+            assert(v && *v == vals[j]);
+        }
+    }
+    assert(map->cap == map->nbuckets);
+
+
+
+
+    hashmap_free(map);
+
+    xfree(vals);
+
+    if (total_allocs != 0) {
+        fprintf(stderr, "total_allocs: expected 0, got %lu\n", total_allocs);
+        exit(1);
+    }
+}
+
+#define bench(name, N, code) {{ \
+    if (strlen(name) > 0) { \
+        printf("%-14s ", name); \
+    } \
+    size_t tmem = total_mem; \
+    size_t tallocs = total_allocs; \
+    uint64_t bytes = 0; \
+    clock_t begin = clock(); \
+    for (int i = 0; i < N; i++) { \
+        (code); \
+    } \
+    clock_t end = clock(); \
+    double elapsed_secs = (double)(end - begin) / CLOCKS_PER_SEC; \
+    double bytes_sec = (double)bytes/elapsed_secs; \
+    printf("%d ops in %.3f secs, %.0f ns/op, %.0f op/sec", \
+        N, elapsed_secs, \
+        elapsed_secs/(double)N*1e9, \
+        (double)N/elapsed_secs \
+    ); \
+    if (bytes > 0) { \
+        printf(", %.1f GB/sec", bytes_sec/1024/1024/1024); \
+    } \
+    if (total_mem > tmem) { \
+        size_t used_mem = total_mem-tmem; \
+        printf(", %.2f bytes/op", (double)used_mem/N); \
+    } \
+    if (total_allocs > tallocs) { \
+        size_t used_allocs = total_allocs-tallocs; \
+        printf(", %.2f allocs/op", (double)used_allocs/N); \
+    } \
+    printf("\n"); \
+}}
+
+static void benchmarks() {
+    int seed = getenv("SEED")?atoi(getenv("SEED")):time(NULL);
+    int N = getenv("N")?atoi(getenv("N")):5000000;
+    printf("seed=%d, count=%d, item_size=%zu\n", seed, N, sizeof(int));
+    srand(seed);
+
+
+    int *vals = xmalloc(N * sizeof(int));
+    for (int i = 0; i < N; i++) {
+        vals[i] = i;
+    }
+
+    shuffle(vals, N, sizeof(int));
+
+    struct hashmap *map;
+    shuffle(vals, N, sizeof(int));
+
+    map = hashmap_new(sizeof(int), 0, seed, seed, hash_int, compare_ints);
+    bench("set", N, {
+        int *v = hashmap_set(map, &vals[i]);
+        assert(!v);
+    })
+    shuffle(vals, N, sizeof(int));
+    bench("get", N, {
+        int *v = hashmap_get(map, &vals[i]);
+        assert(v && *v == vals[i]);
+    })
+    shuffle(vals, N, sizeof(int));
+    bench("delete", N, {
+        int *v = hashmap_delete(map, &vals[i]);
+        assert(v && *v == vals[i]);
+    })
+    hashmap_free(map);
+
+    map = hashmap_new(sizeof(int), N, seed, seed, hash_int, compare_ints);
+    bench("set (cap)", N, {
+        int *v = hashmap_set(map, &vals[i]);
+        assert(!v);
+    })
+    shuffle(vals, N, sizeof(int));
+    bench("get (cap)", N, {
+        int *v = hashmap_get(map, &vals[i]);
+        assert(v && *v == vals[i]);
+    })
+    shuffle(vals, N, sizeof(int));
+    bench("delete (cap)" , N, {
+        int *v = hashmap_delete(map, &vals[i]);
+        assert(v && *v == vals[i]);
+    })
+
+    hashmap_free(map);
+
+    
+    xfree(vals);
+
+    if (total_allocs != 0) {
+        fprintf(stderr, "total_allocs: expected 0, got %lu\n", total_allocs);
+        exit(1);
+    }
+}
+
+int main() {
+    hashmap_set_allocator(xmalloc, xfree);
+
+    if (getenv("BENCH")) {
+        printf("Running hashmap.c benchmarks...\n");
+        benchmarks();
+    } else {
+        printf("Running hashmap.c tests...\n");
+        all();
+        printf("PASSED\n");
+    }
+}
+
+
+#endif
+
+
+
--- a/hashmap.h
+++ b/hashmap.h
@ -0,0 +1,32 @@
+// Copyright 2020 Joshua J Baker. All rights reserved.
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file.
+
+#ifndef HASHMAP_H
+#define HASHMAP_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+struct hashmap;
+
+struct hashmap *hashmap_new(size_t elsize, size_t cap, 
+                            uint64_t seed0, uint64_t seed1,
+                            uint64_t (*hash)(const void *item, 
+                                             uint64_t seed0, uint64_t seed1),
+                            int (*compare)(const void *a, const void *b));
+void hashmap_free(struct hashmap *map);
+size_t hashmap_count(struct hashmap *map);
+void *hashmap_get(struct hashmap *map, void *item);
+void *hashmap_set(struct hashmap *map, void *item);
+void *hashmap_delete(struct hashmap *map, void *item);
+bool hashmap_scan(struct hashmap *map,
+                  bool (*iter)(const void *item, void *udata), void *udata);
+void hashmap_set_allocator(void *(*malloc)(size_t), void (*free)(void*));
+uint64_t hashmap_sip(const void *data, size_t len, 
+                     uint64_t seed0, uint64_t seed1);
+uint64_t hashmap_murmur(const void *data, size_t len, 
+                        uint64_t seed0, uint64_t seed1);
+
+#endif