first commit

pull/2/head
tidwall 4 years ago
commit 991b63b5c9

@ -0,0 +1,20 @@
The MIT License (MIT)
Copyright (c) 2020 Joshua J Baker
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

@ -0,0 +1,140 @@
# hashmap.c
[Hash map](https://en.wikipedia.org/wiki/Hash_table) implementation in C.
## Features
- [Open addressing](https://en.wikipedia.org/wiki/Hash_table#Open_addressing) using [Robin Hood](https://en.wikipedia.org/wiki/Hash_table#Robin_Hood_hashing) hashing
- Generic interface with support for variable sized items.
- Built-in [SipHash](https://en.wikipedia.org/wiki/SipHash) or [MurmurHash3](https://en.wikipedia.org/wiki/MurmurHash) and allows for alternative algorithms.
- ANSI C (C99)
- Supports custom allocators
- Pretty darn good performance. 🚀
## Example
```c
#include <stdio.h>
#include <string.h>
#include "hashmap.h"
struct user {
char *name;
int age;
};
int user_compare(const void *a, const void *b) {
const struct user *ua = a;
const struct user *ub = b;
return strcmp(ua->name, ub->name);
}
bool user_iter(const void *item, void *udata) {
const struct user *user = item;
printf("%s (age=%d)\n", user->name, user->age);
return true;
}
uint64_t user_hash(const void *item, uint64_t seed0, uint64_t seed1) {
const struct user *user = item;
return hashmap_sip(user->name, strlen(user->name), seed0, seed1);
}
int main() {
// create a new hash map where each item is a `struct user`. The second
// argument is the initial capacity. The third and fourth arguments are
// optional seeds that are passed to the following hash function.
struct hashmap *map = hashmap_new(sizeof(struct user), 0, 0, 0,
user_hash, user_compare);
// Here we'll load some users into the hash map. Each set operation
// performs a copy of the data that is pointed to in the second argument.
hashmap_set(map, &(struct user){ .name="Dale", .age=44 });
hashmap_set(map, &(struct user){ .name="Roger", .age=68 });
hashmap_set(map, &(struct user){ .name="Jane", .age=47 });
struct user *user;
printf("\n-- get some users --\n");
user = hashmap_get(map, &(struct user){ .name="Jane" });
printf("%s age=%d\n", user->name, user->age);
user = hashmap_get(map, &(struct user){ .name="Roger" });
printf("%s age=%d\n", user->name, user->age);
user = hashmap_get(map, &(struct user){ .name="Dale" });
printf("%s age=%d\n", user->name, user->age);
user = hashmap_get(map, &(struct user){ .name="Tom" });
printf("%s\n", user?"exists":"not exists");
printf("\n-- iterate over all users --\n");
hashmap_scan(map, user_iter, NULL);
hashmap_free(map);
}
// output:
// -- get some users --
// Jane age=47
// Roger age=68
// Dale age=44
// not exists
//
// -- iterate over all users --
// Dale (age=44)
// Roger (age=68)
// Jane (age=47)
```
## Functions
### Basic
```sh
hashmap_new # allocate a new hash map
hashmap_free # free the hash map
hashmap_count # returns the number of items in the hash map
hashmap_set # insert or replace an existing item and return the previous
hashmap_get # get an existing item
hashmap_delete # delete and return an item
```
### Iteration
```sh
hashmap_scan # iterate over items in hash map
```
### Hash helpers
```sh
hashmap_sip # returns hash value for data using SipHash-2-4
hashmap_murmur # returns hash value for data using MurmurHash3
```
## Testing and benchmarks
```sh
$ cc -DHASHMAP_TEST hashmap.c && ./a.out # run tests
$ cc -DHASHMAP_TEST -O3 hashmap.c && BENCH=1 ./a.out # run benchmarks
```
The following benchmarks were run on my 2019 Macbook Pro (2.4 GHz 8-Core Intel Core i9) using gcc-9.
The items are simple 4-byte ints.
The hash function is MurmurHash3.
Testing with 5,000,000 items.
The `(cap)` results are hashmaps that are created with an inital capacity of 5,000,000.
```
set 5000000 ops in 0.708 secs, 142 ns/op, 7057960 op/sec, 26.84 bytes/op
get 5000000 ops in 0.303 secs, 61 ns/op, 16492723 op/sec
delete 5000000 ops in 0.486 secs, 97 ns/op, 10280873 op/sec
set (cap) 5000000 ops in 0.429 secs, 86 ns/op, 11641660 op/sec
get (cap) 5000000 ops in 0.303 secs, 61 ns/op, 16490493 op/sec
delete (cap) 5000000 ops in 0.410 secs, 82 ns/op, 12200091 op/sec
```
## License
hashmap.c source code is available under the MIT License.

@ -0,0 +1,743 @@
// Copyright 2020 Joshua J Baker. All rights reserved.
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <stddef.h>
#include "hashmap.h"
static void *(*_malloc)(size_t) = NULL;
static void (*_free)(void *) = NULL;
#define hmmalloc (_malloc?_malloc:malloc)
#define hmfree (_free?_free:free)
// hashmap_set_allocator allows for configuring a custom allocator for
// all hashmap library operations. This function, if needed, should be called
// only once at startup and a prior to calling hashmap_new().
void hashmap_set_allocator(void *(*malloc)(size_t), void (*free)(void*))
{
_malloc = malloc;
_free = free;
}
#define panic(_msg_) { \
fprintf(stderr, "panic: %s (%s:%d)\n", (_msg_), __FILE__, __LINE__); \
exit(1); \
}
struct bucket {
uint64_t hash:48;
uint64_t dib:16;
};
// hashmap is an open addressed hash map using robinhood hashing.
struct hashmap {
bool oom;
size_t elsize;
size_t cap;
uint64_t seed0;
uint64_t seed1;
uint64_t (*hash)(const void *item, uint64_t seed0, uint64_t seed1);
int (*compare)(const void *a, const void *b);
size_t bucketsz;
size_t nbuckets;
size_t count;
size_t mask;
size_t growat;
size_t shrinkat;
void *buckets;
void *spare;
};
static struct bucket *bucket_at(struct hashmap *map, size_t index) {
return (struct bucket*)(((char*)map->buckets)+(map->bucketsz*index));
}
static void *bucket_item(struct bucket *entry) {
return ((char*)entry)+sizeof(struct bucket);
}
static uint64_t get_hash(struct hashmap *map, void *key) {
return map->hash(key, map->seed0, map->seed1) << 16 >> 16;
}
// hashmap_new returns a new hash map.
// Param `elsize` is the size of each element in the tree. Every element that
// is inserted, deleted, or retrieved will be this size.
// Param `cap` is the default lower capacity of the hashmap. Setting this to
// zero will default to 16.
// Params `seed0` and `seed1` are optional seed values that are passed to the
// following `hash` function. These can be any value you wish but it's often
// best to use randomly generated values.
// Param `hash` is a function that generates a hash value for an item. It's
// important that you provice good hash function, otherwise will perform poorly
// or be vulnerable to Denial-of-service attacks. This implementation comes
// with two helper functions `hashmap_sip()` and `hashmap_murmur()`
// Param `compare` is a function that compares items in the tree. See the
// qsort stdlib function for an example of how this function works.
// The hashmap must be freed with hashmap_free().
struct hashmap *hashmap_new(size_t elsize, size_t cap,
uint64_t seed0, uint64_t seed1,
uint64_t (*hash)(const void *item,
uint64_t seed0, uint64_t seed1),
int (*compare)(const void *a, const void *b))
{
int ncap = 16;
if (cap < ncap) {
cap = ncap;
} else {
while (ncap < cap) {
ncap *= 2;
}
cap = ncap;
}
size_t bucketsz = sizeof(struct bucket) + elsize;
while (bucketsz & (sizeof(uintptr_t)-1)) {
bucketsz++;
}
struct hashmap *map = hmmalloc(sizeof(struct hashmap)+bucketsz);
if (!map) {
return NULL;
}
memset(map, 0, sizeof(struct hashmap));
map->elsize = elsize;
map->bucketsz = bucketsz;
map->seed0 = seed0;
map->seed1 = seed1;
map->hash = hash;
map->compare = compare;
map->spare = ((char*)map)+sizeof(struct hashmap);
map->cap = cap;
map->nbuckets = cap;
map->mask = map->nbuckets-1;
map->buckets = hmmalloc(map->bucketsz*map->nbuckets);
if (!map->buckets) {
hmfree(map);
return NULL;
}
memset(map->buckets, 0, map->bucketsz*map->nbuckets);
map->growat = map->nbuckets*0.75;
map->shrinkat = map->nbuckets*0.10;
return map;
}
static bool resize(struct hashmap *map, size_t new_cap) {
struct hashmap *map2 = hashmap_new(map->elsize, new_cap, map->seed1,
map->seed1, map->hash, map->compare);
if (!map2) {
return false;
}
for (size_t i = 0; i < map->nbuckets; i++) {
struct bucket *entry = bucket_at(map, i);
if (!entry->dib) {
continue;
}
entry->dib = 1;
size_t j = entry->hash & map2->mask;
for (;;) {
struct bucket *bucket = bucket_at(map2, j);
if (bucket->dib == 0) {
memcpy(bucket, entry, map->bucketsz);
break;
}
if (bucket->dib < entry->dib) {
memcpy(map2->spare, bucket, map->bucketsz);
memcpy(bucket, entry, map->bucketsz);
memcpy(entry, map2->spare, map->bucketsz);
}
j = (j + 1) & map2->mask;
entry->dib += 1;
}
}
hmfree(map->buckets);
map->buckets = map2->buckets;
map->nbuckets = map2->nbuckets;
map->mask = map2->mask;
map->growat = map2->growat;
map->shrinkat = map2->shrinkat;
hmfree(map2);
return true;
}
// hashmap_set inserts or replaces an item in the hash map. If an item is
// replaced then it is returned otherwise NULL is returned. This operation
// may allocate memory. If the system is unable to allocate additional
// memory then NULL is returned and hashmap_oom() returns true.
void *hashmap_set(struct hashmap *map, void *item) {
if (!item) {
panic("item is null");
}
map->oom = false;
if (map->count == map->growat) {
if (!resize(map, map->nbuckets*2)) {
map->oom = true;
return NULL;
}
}
char edata[map->bucketsz]; // VLA
struct bucket *entry = (void*)edata;
entry->hash = get_hash(map, item);
entry->dib = 1;
memcpy(bucket_item(entry), item, map->elsize);
size_t i = entry->hash & map->mask;
for (;;) {
struct bucket *bucket = bucket_at(map, i);
if (bucket->dib == 0) {
memcpy(bucket, entry, map->bucketsz);
map->count++;
return NULL;
}
if (entry->hash == bucket->hash &&
map->compare(bucket_item(entry), bucket_item(bucket)) == 0)
{
memcpy(map->spare, bucket_item(bucket), map->elsize);
memcpy(bucket_item(bucket), bucket_item(entry), map->elsize);
return map->spare;
}
if (bucket->dib < entry->dib) {
memcpy(map->spare, bucket, map->bucketsz);
memcpy(bucket, entry, map->bucketsz);
memcpy(entry, map->spare, map->bucketsz);
}
i = (i + 1) & map->mask;
entry->dib += 1;
}
}
// hashmap_get returns the item based on the provided key. If the item is not
// found then NULL is returned.
void *hashmap_get(struct hashmap *map, void *key) {
if (!key) {
panic("key is null");
}
uint64_t hash = get_hash(map, key);
size_t i = hash & map->mask;
for (;;) {
struct bucket *bucket = bucket_at(map, i);
if (!bucket->dib) {
return NULL;
}
if (bucket->hash == hash &&
map->compare(key, bucket_item(bucket)) == 0)
{
return bucket_item(bucket);
}
i = (i + 1) & map->mask;
}
}
// hashmap_delete removes an item from the hash map and returns it. If the
// item is not found then NULL is returned.
void *hashmap_delete(struct hashmap *map, void *key) {
if (!key) {
panic("key is null");
}
map->oom = false;
uint64_t hash = get_hash(map, key);
size_t i = hash & map->mask;
for (;;) {
struct bucket *bucket = bucket_at(map, i);
if (!bucket->dib) {
return NULL;
}
if (bucket->hash == hash &&
map->compare(key, bucket_item(bucket)) == 0)
{
memcpy(map->spare, bucket_item(bucket), map->elsize);
bucket->dib = 0;
for (;;) {
struct bucket *prev = bucket;
i = (i + 1) & map->mask;
bucket = bucket_at(map, i);
if (bucket->dib <= 1) {
prev->dib = 0;
break;
}
memcpy(prev, bucket, map->bucketsz);
prev->dib--;
}
map->count--;
if (map->nbuckets > map->cap && map->count <= map->shrinkat) {
// Ignore the return value. It's ok for the resize operation to
// fail to allocate enough memory because a shrink operation
// does not change the integrity of the data.
resize(map, map->nbuckets/2);
}
return map->spare;
}
i = (i + 1) & map->mask;
}
}
// hashmap_count returns the number of items in the hash map.
size_t hashmap_count(struct hashmap *map) {
return map->count;
}
// hashmap_free frees the hash map
void hashmap_free(struct hashmap *map) {
if (!map) return;
hmfree(map->buckets);
hmfree(map);
}
// hashmap_scan iterates over all items in the hash map
// Param `iter` can return false to stop iteration early.
// Returns false if the iteration has been stopped early.
bool hashmap_scan(struct hashmap *map,
bool (*iter)(const void *item, void *udata), void *udata)
{
for (size_t i = 0; i < map->nbuckets; i++) {
struct bucket *bucket = bucket_at(map, i);
if (bucket->dib) {
if (!iter(bucket_item(bucket), udata)) {
return false;
}
}
}
return true;
}
//-----------------------------------------------------------------------------
// SipHash reference C implementation
//
// Copyright (c) 2012-2016 Jean-Philippe Aumasson
// <jeanphilippe.aumasson@gmail.com>
// Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
//
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
//
// You should have received a copy of the CC0 Public Domain Dedication along
// with this software. If not, see
// <http://creativecommons.org/publicdomain/zero/1.0/>.
//
// default: SipHash-2-4
//-----------------------------------------------------------------------------
static uint64_t SIP64(const uint8_t *in, const size_t inlen,
uint64_t seed0, uint64_t seed1)
{
#define U8TO64_LE(p) \
{ (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \
((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \
((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \
((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) }
#define U64TO8_LE(p, v) \
{ U32TO8_LE((p), (uint32_t)((v))); \
U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); }
#define U32TO8_LE(p, v) \
{ (p)[0] = (uint8_t)((v)); \
(p)[1] = (uint8_t)((v) >> 8); \
(p)[2] = (uint8_t)((v) >> 16); \
(p)[3] = (uint8_t)((v) >> 24); }
#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
#define SIPROUND \
{ v0 += v1; v1 = ROTL(v1, 13); \
v1 ^= v0; v0 = ROTL(v0, 32); \
v2 += v3; v3 = ROTL(v3, 16); \
v3 ^= v2; \
v0 += v3; v3 = ROTL(v3, 21); \
v3 ^= v0; \
v2 += v1; v1 = ROTL(v1, 17); \
v1 ^= v2; v2 = ROTL(v2, 32); }
uint64_t k0 = U8TO64_LE((uint8_t*)&seed0);
uint64_t k1 = U8TO64_LE((uint8_t*)&seed1);
uint64_t v3 = UINT64_C(0x7465646279746573) ^ k1;
uint64_t v2 = UINT64_C(0x6c7967656e657261) ^ k0;
uint64_t v1 = UINT64_C(0x646f72616e646f6d) ^ k1;
uint64_t v0 = UINT64_C(0x736f6d6570736575) ^ k0;
const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
for (; in != end; in += 8) {
uint64_t m = U8TO64_LE(in);
v3 ^= m;
SIPROUND; SIPROUND;
v0 ^= m;
}
const int left = inlen & 7;
uint64_t b = ((uint64_t)inlen) << 56;
switch (left) {
case 7: b |= ((uint64_t)in[6]) << 48;
case 6: b |= ((uint64_t)in[5]) << 40;
case 5: b |= ((uint64_t)in[4]) << 32;
case 4: b |= ((uint64_t)in[3]) << 24;
case 3: b |= ((uint64_t)in[2]) << 16;
case 2: b |= ((uint64_t)in[1]) << 8;
case 1: b |= ((uint64_t)in[0]); break;
case 0: break;
}
v3 ^= b;
SIPROUND; SIPROUND;
v0 ^= b;
v2 ^= 0xff;
SIPROUND; SIPROUND; SIPROUND; SIPROUND;
b = v0 ^ v1 ^ v2 ^ v3;
uint64_t out = 0;
U64TO8_LE((uint8_t*)&out, b);
return out;
}
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
//
// Murmur3_86_128
//-----------------------------------------------------------------------------
static void MM86128(const void *key, const int len, uint32_t seed, void *out) {
#define ROTL32(x, r) ((x << r) | (x >> (32 - r)))
#define FMIX32(h) h^=h>>16; h*=0x85ebca6b; h^=h>>13; h*=0xc2b2ae35; h^=h>>16;
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 16;
uint32_t h1 = seed;
uint32_t h2 = seed;
uint32_t h3 = seed;
uint32_t h4 = seed;
uint32_t c1 = 0x239b961b;
uint32_t c2 = 0xab0e9789;
uint32_t c3 = 0x38b34ae5;
uint32_t c4 = 0xa1e38b93;
const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
for (int i = -nblocks; i; i++) {
uint32_t k1 = blocks[i*4+0];
uint32_t k2 = blocks[i*4+1];
uint32_t k3 = blocks[i*4+2];
uint32_t k4 = blocks[i*4+3];
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
}
const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
uint32_t k1 = 0;
uint32_t k2 = 0;
uint32_t k3 = 0;
uint32_t k4 = 0;
switch(len & 15) {
case 15: k4 ^= tail[14] << 16;
case 14: k4 ^= tail[13] << 8;
case 13: k4 ^= tail[12] << 0;
k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
case 12: k3 ^= tail[11] << 24;
case 11: k3 ^= tail[10] << 16;
case 10: k3 ^= tail[ 9] << 8;
case 9: k3 ^= tail[ 8] << 0;
k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
case 8: k2 ^= tail[ 7] << 24;
case 7: k2 ^= tail[ 6] << 16;
case 6: k2 ^= tail[ 5] << 8;
case 5: k2 ^= tail[ 4] << 0;
k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
case 4: k1 ^= tail[ 3] << 24;
case 3: k1 ^= tail[ 2] << 16;
case 2: k1 ^= tail[ 1] << 8;
case 1: k1 ^= tail[ 0] << 0;
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
};
h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
h1 += h2; h1 += h3; h1 += h4;
h2 += h1; h3 += h1; h4 += h1;
FMIX32(h1); FMIX32(h2); FMIX32(h3); FMIX32(h4);
h1 += h2; h1 += h3; h1 += h4;
h2 += h1; h3 += h1; h4 += h1;
((uint32_t*)out)[0] = h1;
((uint32_t*)out)[1] = h2;
((uint32_t*)out)[2] = h3;
((uint32_t*)out)[3] = h4;
}
// hashmap_sip returns a hash value for `data` using SipHash-2-4.
uint64_t hashmap_sip(const void *data, size_t len,
uint64_t seed0, uint64_t seed1)
{
return SIP64((uint8_t*)data, len, seed0, seed1);
}
// hashmap_murmur returns a hash value for `data` using Murmur3_86_128.
uint64_t hashmap_murmur(const void *data, size_t len,
uint64_t seed0, uint64_t seed1)
{
char out[16];
MM86128(data, len, seed0, &out);
return *(uint64_t*)out;
}
//==============================================================================
// TESTS AND BENCHMARKS
// $ cc -DHASHMAP_TEST hashmap.c && ./a.out # run tests
// $ cc -DHASHMAP_TEST -O3 hashmap.c && BENCH=1 ./a.out # run benchmarks
//==============================================================================
// #ifndef HASHMAP_TEST
// #define HASHMAP_TEST
// #endif
#ifdef HASHMAP_TEST
static size_t deepcount(struct hashmap *map) {
size_t count = 0;
for (size_t i = 0; i < map->nbuckets; i++) {
if (bucket_at(map, i)->dib) {
count++;
}
}
return count;
}
#pragma GCC diagnostic ignored "-Wextra"
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <assert.h>
#include <stdio.h>
#include "hashmap.h"
static uintptr_t total_allocs = 0;
static uintptr_t total_mem = 0;
static void *xmalloc(size_t size) {
void *mem = malloc(sizeof(uintptr_t)+size);
assert(mem);
*(uintptr_t*)mem = size;
total_allocs++;
total_mem += size;
return (char*)mem+sizeof(uintptr_t);
}
static void xfree(void *ptr) {
if (ptr) {
total_mem -= *(uintptr_t*)((char*)ptr-sizeof(uintptr_t));
free((char*)ptr-sizeof(uintptr_t));
total_allocs--;
}
}
static void shuffle(void *array, size_t numels, size_t elsize) {
char tmp[elsize];
char *arr = array;
for (size_t i = 0; i < numels - 1; i++) {
int j = i + rand() / (RAND_MAX / (numels - i) + 1);
memcpy(tmp, arr + j * elsize, elsize);
memcpy(arr + j * elsize, arr + i * elsize, elsize);
memcpy(arr + i * elsize, tmp, elsize);
}
}
static bool iter_ints(const void *item, void *udata) {
int *vals = *(int**)udata;
vals[*(int*)item] = 1;
return true;
}
static int compare_ints(const void *a, const void *b) {
return *(int*)a - *(int*)b;
}
static uint64_t hash_int(const void *item, uint64_t seed0, uint64_t seed1) {
return hashmap_murmur(item, sizeof(int), seed0, seed1);
}
static void all() {
int seed = getenv("SEED")?atoi(getenv("SEED")):time(NULL);
int N = getenv("N")?atoi(getenv("N")):2000;
printf("seed=%d, count=%d, item_size=%zu\n", seed, N, sizeof(int));
srand(seed);
// test sip and murmur hashes
assert(hashmap_sip("hello", 5, 1, 2) == 2957200328589801622);
assert(hashmap_murmur("hello", 5, 1, 2) == 1682575153221130884);
int *vals = xmalloc(N * sizeof(int));
for (int i = 0; i < N; i++) {
vals[i] = i;
}
struct hashmap *map = hashmap_new(sizeof(int), 0, seed, seed,
hash_int, compare_ints);
shuffle(vals, N, sizeof(int));
for (int i = 0; i < N; i++) {
// // printf("== %d ==\n", vals[i]);
assert(map->count == i);
assert(map->count == hashmap_count(map));
assert(map->count == deepcount(map));
int *v;
assert(!hashmap_get(map, &vals[i]));
assert(!hashmap_delete(map, &vals[i]));
assert(!hashmap_set(map, &vals[i]));
for (int j = 0; j < i; j++) {
v = hashmap_get(map, &vals[j]);
assert(v && *v == vals[j]);
}
v = hashmap_set(map, &vals[i]);
assert(v && *v == vals[i]);
v = hashmap_get(map, &vals[i]);
assert(v && *v == vals[i]);
v = hashmap_delete(map, &vals[i]);
assert(v && *v == vals[i]);
assert(!hashmap_get(map, &vals[i]));
assert(!hashmap_delete(map, &vals[i]));
assert(!hashmap_set(map, &vals[i]));
assert(map->count == i+1);
assert(map->count == hashmap_count(map));
assert(map->count == deepcount(map));
}
int *vals2 = xmalloc(N * sizeof(int));
memset(vals2, 0, N * sizeof(int));
assert(hashmap_scan(map, iter_ints, &vals2));
for (int i = 0; i < N; i++) {
assert(vals2[i] == 1);
}
xfree(vals2);
shuffle(vals, N, sizeof(int));
for (int i = 0; i < N; i++) {
int *v;
v = hashmap_delete(map, &vals[i]);
assert(v && *v == vals[i]);
assert(!hashmap_get(map, &vals[i]));
assert(map->count == N-i-1);
assert(map->count == hashmap_count(map));
assert(map->count == deepcount(map));
for (int j = N-1; j > i; j--) {
v = hashmap_get(map, &vals[j]);
assert(v && *v == vals[j]);
}
}
assert(map->cap == map->nbuckets);
hashmap_free(map);
xfree(vals);
if (total_allocs != 0) {
fprintf(stderr, "total_allocs: expected 0, got %lu\n", total_allocs);
exit(1);
}
}
#define bench(name, N, code) {{ \
if (strlen(name) > 0) { \
printf("%-14s ", name); \
} \
size_t tmem = total_mem; \
size_t tallocs = total_allocs; \
uint64_t bytes = 0; \
clock_t begin = clock(); \
for (int i = 0; i < N; i++) { \
(code); \
} \
clock_t end = clock(); \
double elapsed_secs = (double)(end - begin) / CLOCKS_PER_SEC; \
double bytes_sec = (double)bytes/elapsed_secs; \
printf("%d ops in %.3f secs, %.0f ns/op, %.0f op/sec", \
N, elapsed_secs, \
elapsed_secs/(double)N*1e9, \
(double)N/elapsed_secs \
); \
if (bytes > 0) { \
printf(", %.1f GB/sec", bytes_sec/1024/1024/1024); \
} \
if (total_mem > tmem) { \
size_t used_mem = total_mem-tmem; \
printf(", %.2f bytes/op", (double)used_mem/N); \
} \
if (total_allocs > tallocs) { \
size_t used_allocs = total_allocs-tallocs; \
printf(", %.2f allocs/op", (double)used_allocs/N); \
} \
printf("\n"); \
}}
static void benchmarks() {
int seed = getenv("SEED")?atoi(getenv("SEED")):time(NULL);
int N = getenv("N")?atoi(getenv("N")):5000000;
printf("seed=%d, count=%d, item_size=%zu\n", seed, N, sizeof(int));
srand(seed);
int *vals = xmalloc(N * sizeof(int));
for (int i = 0; i < N; i++) {
vals[i] = i;
}
shuffle(vals, N, sizeof(int));
struct hashmap *map;
shuffle(vals, N, sizeof(int));
map = hashmap_new(sizeof(int), 0, seed, seed, hash_int, compare_ints);
bench("set", N, {
int *v = hashmap_set(map, &vals[i]);
assert(!v);
})
shuffle(vals, N, sizeof(int));
bench("get", N, {
int *v = hashmap_get(map, &vals[i]);
assert(v && *v == vals[i]);
})
shuffle(vals, N, sizeof(int));
bench("delete", N, {
int *v = hashmap_delete(map, &vals[i]);
assert(v && *v == vals[i]);
})
hashmap_free(map);
map = hashmap_new(sizeof(int), N, seed, seed, hash_int, compare_ints);
bench("set (cap)", N, {
int *v = hashmap_set(map, &vals[i]);
assert(!v);
})
shuffle(vals, N, sizeof(int));
bench("get (cap)", N, {
int *v = hashmap_get(map, &vals[i]);
assert(v && *v == vals[i]);
})
shuffle(vals, N, sizeof(int));
bench("delete (cap)" , N, {
int *v = hashmap_delete(map, &vals[i]);
assert(v && *v == vals[i]);
})
hashmap_free(map);
xfree(vals);
if (total_allocs != 0) {
fprintf(stderr, "total_allocs: expected 0, got %lu\n", total_allocs);
exit(1);
}
}
int main() {
hashmap_set_allocator(xmalloc, xfree);
if (getenv("BENCH")) {
printf("Running hashmap.c benchmarks...\n");
benchmarks();
} else {
printf("Running hashmap.c tests...\n");
all();
printf("PASSED\n");
}
}
#endif

@ -0,0 +1,32 @@
// Copyright 2020 Joshua J Baker. All rights reserved.
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.
#ifndef HASHMAP_H
#define HASHMAP_H
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
struct hashmap;
struct hashmap *hashmap_new(size_t elsize, size_t cap,
uint64_t seed0, uint64_t seed1,
uint64_t (*hash)(const void *item,
uint64_t seed0, uint64_t seed1),
int (*compare)(const void *a, const void *b));
void hashmap_free(struct hashmap *map);
size_t hashmap_count(struct hashmap *map);
void *hashmap_get(struct hashmap *map, void *item);
void *hashmap_set(struct hashmap *map, void *item);
void *hashmap_delete(struct hashmap *map, void *item);
bool hashmap_scan(struct hashmap *map,
bool (*iter)(const void *item, void *udata), void *udata);
void hashmap_set_allocator(void *(*malloc)(size_t), void (*free)(void*));
uint64_t hashmap_sip(const void *data, size_t len,
uint64_t seed0, uint64_t seed1);
uint64_t hashmap_murmur(const void *data, size_t len,
uint64_t seed0, uint64_t seed1);
#endif
Loading…
Cancel
Save