@ -9,22 +9,19 @@
# include <stddef.h>
# include <stddef.h>
# include "hashmap.h"
# include "hashmap.h"
static void * ( * _malloc ) ( size_t ) = NULL ;
# define GROW_AT 0.60
static void * ( * _realloc ) ( void * , size_t ) = NULL ;
# define SHRINK_AT 0.10
static void ( * _free ) ( void * ) = NULL ;
static void * ( * __malloc ) ( size_t ) = NULL ;
static void * ( * __realloc ) ( void * , size_t ) = NULL ;
static void ( * __free ) ( void * ) = NULL ;
// hashmap_set_allocator allows for configuring a custom allocator for
// hashmap_set_allocator allows for configuring a custom allocator for
// all hashmap library operations. This function, if needed, should be called
// all hashmap library operations. This function, if needed, should be called
// only once at startup and a prior to calling hashmap_new().
// only once at startup and a prior to calling hashmap_new().
void hashmap_set_allocator ( void * ( * malloc ) ( size_t ) , void ( * free ) ( void * ) )
void hashmap_set_allocator ( void * ( * malloc ) ( size_t ) , void ( * free ) ( void * ) ) {
{
__malloc = malloc ;
_malloc = malloc ;
__free = free ;
_free = free ;
}
# define panic(_msg_) { \
fprintf ( stderr , " panic: %s (%s:%d) \n " , ( _msg_ ) , __FILE__ , __LINE__ ) ; \
exit ( 1 ) ; \
}
}
struct bucket {
struct bucket {
@ -37,7 +34,6 @@ struct hashmap {
void * ( * malloc ) ( size_t ) ;
void * ( * malloc ) ( size_t ) ;
void * ( * realloc ) ( void * , size_t ) ;
void * ( * realloc ) ( void * , size_t ) ;
void ( * free ) ( void * ) ;
void ( * free ) ( void * ) ;
bool oom ;
size_t elsize ;
size_t elsize ;
size_t cap ;
size_t cap ;
uint64_t seed0 ;
uint64_t seed0 ;
@ -52,41 +48,50 @@ struct hashmap {
size_t mask ;
size_t mask ;
size_t growat ;
size_t growat ;
size_t shrinkat ;
size_t shrinkat ;
uint8_t growpower ;
bool oom ;
void * buckets ;
void * buckets ;
void * spare ;
void * spare ;
void * edata ;
void * edata ;
} ;
} ;
void hashmap_set_grow_by_power ( struct hashmap * map , size_t power ) {
map - > growpower = power < 1 ? 1 : power > 16 ? 16 : power ;
}
static struct bucket * bucket_at0 ( void * buckets , size_t bucketsz , size_t i ) {
return ( struct bucket * ) ( ( ( char * ) buckets ) + ( bucketsz * i ) ) ;
}
static struct bucket * bucket_at ( struct hashmap * map , size_t index ) {
static struct bucket * bucket_at ( struct hashmap * map , size_t index ) {
return ( struct bucket * ) ( ( ( char * ) map - > buckets ) + ( map - > bucketsz * index ) ) ;
return bucket_at0 ( map - > buckets , map - > bucketsz , index ) ;
}
}
static void * bucket_item ( struct bucket * entry ) {
static void * bucket_item ( struct bucket * entry ) {
return ( ( char * ) entry ) + sizeof ( struct bucket ) ;
return ( ( char * ) entry ) + sizeof ( struct bucket ) ;
}
}
static uint64_t clip_hash ( uint64_t hash ) {
return hash & 0xFFFFFFFFFFFF ;
}
static uint64_t get_hash ( struct hashmap * map , const void * key ) {
static uint64_t get_hash ( struct hashmap * map , const void * key ) {
return map - > hash ( key , map - > seed0 , map - > seed1 ) < < 16 > > 16 ;
return clip_hash ( map - > hash ( key , map - > seed0 , map - > seed1 ) ) ;
}
}
// hashmap_new_with_allocator returns a new hash map using a custom allocator.
// hashmap_new_with_allocator returns a new hash map using a custom allocator.
// See hashmap_new for more information information
// See hashmap_new for more information information
struct hashmap * hashmap_new_with_allocator (
struct hashmap * hashmap_new_with_allocator ( void * ( * _malloc ) ( size_t ) ,
void * ( * _malloc ) ( size_t ) ,
void * ( * _realloc ) ( void * , size_t ) , void ( * _free ) ( void * ) ,
void * ( * _realloc ) ( void * , size_t ) ,
size_t elsize , size_t cap , uint64_t seed0 , uint64_t seed1 ,
void ( * _free ) ( void * ) ,
uint64_t ( * hash ) ( const void * item , uint64_t seed0 , uint64_t seed1 ) ,
size_t elsize , size_t cap ,
int ( * compare ) ( const void * a , const void * b , void * udata ) ,
uint64_t seed0 , uint64_t seed1 ,
uint64_t ( * hash ) ( const void * item ,
uint64_t seed0 , uint64_t seed1 ) ,
int ( * compare ) ( const void * a , const void * b ,
void * udata ) ,
void ( * elfree ) ( void * item ) ,
void ( * elfree ) ( void * item ) ,
void * udata )
void * udata )
{
{
_malloc = _malloc ? _malloc : malloc;
_malloc = _malloc ? _malloc : __malloc ? __malloc : malloc ;
_realloc = _realloc ? _realloc : realloc;
_realloc = _realloc ? _realloc : __realloc ? __realloc : realloc ;
_free = _free ? _free : free;
_free = _free ? _free : __free ? __free : free;
size_t ncap = 16 ;
size_t ncap = 16 ;
if ( cap < ncap ) {
if ( cap < ncap ) {
cap = ncap ;
cap = ncap ;
@ -96,6 +101,7 @@ struct hashmap *hashmap_new_with_allocator(
}
}
cap = ncap ;
cap = ncap ;
}
}
// printf("%d\n", (int)cap);
size_t bucketsz = sizeof ( struct bucket ) + elsize ;
size_t bucketsz = sizeof ( struct bucket ) + elsize ;
while ( bucketsz & ( sizeof ( uintptr_t ) - 1 ) ) {
while ( bucketsz & ( sizeof ( uintptr_t ) - 1 ) ) {
bucketsz + + ;
bucketsz + + ;
@ -126,15 +132,15 @@ struct hashmap *hashmap_new_with_allocator(
return NULL ;
return NULL ;
}
}
memset ( map - > buckets , 0 , map - > bucketsz * map - > nbuckets ) ;
memset ( map - > buckets , 0 , map - > bucketsz * map - > nbuckets ) ;
map - > growat = map - > nbuckets * 0.75 ;
map - > growpower = 1 ;
map - > shrinkat = map - > nbuckets * 0.10 ;
map - > growat = map - > nbuckets * GROW_AT ;
map - > shrinkat = map - > nbuckets * SHRINK_AT ;
map - > malloc = _malloc ;
map - > malloc = _malloc ;
map - > realloc = _realloc ;
map - > realloc = _realloc ;
map - > free = _free ;
map - > free = _free ;
return map ;
return map ;
}
}
// hashmap_new returns a new hash map.
// hashmap_new returns a new hash map.
// Param `elsize` is the size of each element in the tree. Every element that
// Param `elsize` is the size of each element in the tree. Every element that
// is inserted, deleted, or retrieved will be this size.
// is inserted, deleted, or retrieved will be this size.
@ -152,21 +158,15 @@ struct hashmap *hashmap_new_with_allocator(
// The hashmap must be freed with hashmap_free().
// The hashmap must be freed with hashmap_free().
// Param `elfree` is a function that frees a specific item. This should be NULL
// Param `elfree` is a function that frees a specific item. This should be NULL
// unless you're storing some kind of reference data in the hash.
// unless you're storing some kind of reference data in the hash.
struct hashmap * hashmap_new ( size_t elsize , size_t cap ,
struct hashmap * hashmap_new ( size_t elsize , size_t cap , uint64_t seed0 ,
uint64_t seed0 , uint64_t seed1 ,
uint64_t seed1 ,
uint64_t ( * hash ) ( const void * item ,
uint64_t ( * hash ) ( const void * item , uint64_t seed0 , uint64_t seed1 ) ,
uint64_t seed0 , uint64_t seed1 ) ,
int ( * compare ) ( const void * a , const void * b , void * udata ) ,
int ( * compare ) ( const void * a , const void * b ,
void * udata ) ,
void ( * elfree ) ( void * item ) ,
void ( * elfree ) ( void * item ) ,
void * udata )
void * udata )
{
{
return hashmap_new_with_allocator (
return hashmap_new_with_allocator ( NULL , NULL , NULL , elsize , cap , seed0 ,
( _malloc ? _malloc : malloc ) ,
seed1 , hash , compare , elfree , udata ) ;
( _realloc ? _realloc : realloc ) ,
( _free ? _free : free ) ,
elsize , cap , seed0 , seed1 , hash , compare , elfree , udata
) ;
}
}
static void free_elements ( struct hashmap * map ) {
static void free_elements ( struct hashmap * map ) {
@ -178,7 +178,6 @@ static void free_elements(struct hashmap *map) {
}
}
}
}
// hashmap_clear quickly clears the map.
// hashmap_clear quickly clears the map.
// Every item is called with the element-freeing function given in hashmap_new,
// Every item is called with the element-freeing function given in hashmap_new,
// if present, to free any data referenced in the elements of the hashmap.
// if present, to free any data referenced in the elements of the hashmap.
@ -204,15 +203,11 @@ void hashmap_clear(struct hashmap *map, bool update_cap) {
map - > shrinkat = map - > nbuckets * 0.10 ;
map - > shrinkat = map - > nbuckets * 0.10 ;
}
}
static bool resize0 ( struct hashmap * map , size_t new_cap ) {
static bool resize ( struct hashmap * map , size_t new_cap ) {
struct hashmap * map2 = hashmap_new_with_allocator ( map - > malloc , map - > realloc ,
struct hashmap * map2 = hashmap_new_with_allocator ( map - > malloc , map - > realloc , map - > free ,
map - > free , map - > elsize , new_cap , map - > seed0 , map - > seed1 , map - > hash ,
map - > elsize , new_cap , map - > seed0 ,
map - > compare , map - > elfree , map - > udata ) ;
map - > seed1 , map - > hash , map - > compare ,
if ( ! map2 ) return false ;
map - > elfree , map - > udata ) ;
if ( ! map2 ) {
return false ;
}
for ( size_t i = 0 ; i < map - > nbuckets ; i + + ) {
for ( size_t i = 0 ; i < map - > nbuckets ; i + + ) {
struct bucket * entry = bucket_at ( map , i ) ;
struct bucket * entry = bucket_at ( map , i ) ;
if ( ! entry - > dib ) {
if ( ! entry - > dib ) {
@ -220,7 +215,7 @@ static bool resize(struct hashmap *map, size_t new_cap) {
}
}
entry - > dib = 1 ;
entry - > dib = 1 ;
size_t j = entry - > hash & map2 - > mask ;
size_t j = entry - > hash & map2 - > mask ;
for ( ; ; ) {
while( 1 ) {
struct bucket * bucket = bucket_at ( map2 , j ) ;
struct bucket * bucket = bucket_at ( map2 , j ) ;
if ( bucket - > dib = = 0 ) {
if ( bucket - > dib = = 0 ) {
memcpy ( bucket , entry , map - > bucketsz ) ;
memcpy ( bucket , entry , map - > bucketsz ) ;
@ -245,80 +240,98 @@ static bool resize(struct hashmap *map, size_t new_cap) {
return true ;
return true ;
}
}
// hashmap_set inserts or replaces an item in the hash map. If an item is
static bool resize ( struct hashmap * map , size_t new_cap ) {
// replaced then it is returned otherwise NULL is returned. This operation
return resize0 ( map , new_cap ) ;
// may allocate memory. If the system is unable to allocate additional
// memory then NULL is returned and hashmap_oom() returns true.
void * hashmap_set ( struct hashmap * map , const void * item ) {
if ( ! item ) {
panic ( " item is null " ) ;
}
}
// hashmap_set_with_hash works like hashmap_set but you provide your
// own hash. The 'hash' callback provided to the hashmap_new function
// will not be called
const void * hashmap_set_with_hash ( struct hashmap * map , const void * item ,
uint64_t hash )
{
hash = clip_hash ( hash ) ;
map - > oom = false ;
map - > oom = false ;
if ( map - > count = = map - > growat ) {
if ( map - > count = = map - > growat ) {
if ( ! resize ( map , map - > nbuckets * 2 ) ) {
if ( ! resize ( map , map - > nbuckets * ( 1 < < map - > growpower ) ) ) {
map - > oom = true ;
map - > oom = true ;
return NULL ;
return NULL ;
}
}
}
}
struct bucket * entry = map - > edata ;
struct bucket * entry = map - > edata ;
entry - > hash = get_hash ( map , item ) ;
entry - > hash = hash ;
entry - > dib = 1 ;
entry - > dib = 1 ;
memcpy ( bucket_item ( entry ) , item , map - > elsize ) ;
void * eitem = bucket_item ( entry ) ;
memcpy ( eitem , item , map - > elsize ) ;
void * bitem ;
size_t i = entry - > hash & map - > mask ;
size_t i = entry - > hash & map - > mask ;
for ( ; ; ) {
while ( 1 ) {
struct bucket * bucket = bucket_at ( map , i ) ;
struct bucket * bucket = bucket_at ( map , i ) ;
if ( bucket - > dib = = 0 ) {
if ( bucket - > dib = = 0 ) {
memcpy ( bucket , entry , map - > bucketsz ) ;
memcpy ( bucket , entry , map - > bucketsz ) ;
map - > count + + ;
map - > count + + ;
return NULL ;
return NULL ;
}
}
if ( entry - > hash = = bucket - > hash & &
bitem = bucket_item ( bucket ) ;
map - > compare ( bucket_item ( entry ) , bucket_item ( bucket ) ,
if ( entry - > hash = = bucket - > hash & & ( ! map - > compare | |
map - > udata ) = = 0 )
map - > compare ( eitem , bitem , map - > udata ) = = 0 ) )
{
{
memcpy ( map - > spare , bucket_item ( bucket ) , map - > elsize ) ;
memcpy ( map - > spare , bitem , map - > elsize ) ;
memcpy ( bucket_item ( bucket ) , bucket_item ( entry ) , map - > elsize ) ;
memcpy ( bitem , eitem , map - > elsize ) ;
return map - > spare ;
return map - > spare ;
}
}
if ( bucket - > dib < entry - > dib ) {
if ( bucket - > dib < entry - > dib ) {
memcpy ( map - > spare , bucket , map - > bucketsz ) ;
memcpy ( map - > spare , bucket , map - > bucketsz ) ;
memcpy ( bucket , entry , map - > bucketsz ) ;
memcpy ( bucket , entry , map - > bucketsz ) ;
memcpy ( entry , map - > spare , map - > bucketsz ) ;
memcpy ( entry , map - > spare , map - > bucketsz ) ;
eitem = bucket_item ( entry ) ;
}
}
i = ( i + 1 ) & map - > mask ;
i = ( i + 1 ) & map - > mask ;
entry - > dib + = 1 ;
entry - > dib + = 1 ;
}
}
}
}
// hashmap_get returns the item based on the provided key. If the item is not
// hashmap_set inserts or replaces an item in the hash map. If an item is
// found then NULL is returned.
// replaced then it is returned otherwise NULL is returned. This operation
void * hashmap_get ( struct hashmap * map , const void * key ) {
// may allocate memory. If the system is unable to allocate additional
if ( ! key ) {
// memory then NULL is returned and hashmap_oom() returns true.
panic ( " key is null " ) ;
const void * hashmap_set ( struct hashmap * map , const void * item ) {
return hashmap_set_with_hash ( map , item , get_hash ( map , item ) ) ;
}
}
uint64_t hash = get_hash ( map , key ) ;
// hashmap_get_with_hash works like hashmap_get but you provide your
// own hash. The 'hash' callback provided to the hashmap_new function
// will not be called
const void * hashmap_get_with_hash ( struct hashmap * map , const void * key ,
uint64_t hash )
{
hash = clip_hash ( hash ) ;
size_t i = hash & map - > mask ;
size_t i = hash & map - > mask ;
for ( ; ; ) {
while ( 1 ) {
struct bucket * bucket = bucket_at ( map , i ) ;
struct bucket * bucket = bucket_at ( map , i ) ;
if ( ! bucket - > dib ) {
if ( ! bucket - > dib ) return NULL ;
return NULL ;
if ( bucket - > hash = = hash ) {
void * bitem = bucket_item ( bucket ) ;
if ( ! map - > compare | | map - > compare ( key , bitem , map - > udata ) = = 0 ) {
return bitem ;
}
}
if ( bucket - > hash = = hash & &
map - > compare ( key , bucket_item ( bucket ) , map - > udata ) = = 0 )
{
return bucket_item ( bucket ) ;
}
}
i = ( i + 1 ) & map - > mask ;
i = ( i + 1 ) & map - > mask ;
}
}
}
}
// hashmap_get returns the item based on the provided key. If the item is not
// found then NULL is returned.
const void * hashmap_get ( struct hashmap * map , const void * key ) {
return hashmap_get_with_hash ( map , key , get_hash ( map , key ) ) ;
}
// hashmap_probe returns the item in the bucket at position or NULL if an item
// hashmap_probe returns the item in the bucket at position or NULL if an item
// is not set for that bucket. The position is 'moduloed' by the number of
// is not set for that bucket. The position is 'moduloed' by the number of
// buckets in the hashmap.
// buckets in the hashmap.
void * hashmap_probe ( struct hashmap * map , uint64_t position ) {
const void * hashmap_probe ( struct hashmap * map , uint64_t position ) {
size_t i = position & map - > mask ;
size_t i = position & map - > mask ;
struct bucket * bucket = bucket_at ( map , i ) ;
struct bucket * bucket = bucket_at ( map , i ) ;
if ( ! bucket - > dib ) {
if ( ! bucket - > dib ) {
@ -327,27 +340,27 @@ void *hashmap_probe(struct hashmap *map, uint64_t position) {
return bucket_item ( bucket ) ;
return bucket_item ( bucket ) ;
}
}
// hashmap_delete_with_hash works like hashmap_delete but you provide your
// hashmap_delete removes an item from the hash map and returns it. If the
// own hash. The 'hash' callback provided to the hashmap_new function
// item is not found then NULL is returned.
// will not be called
void * hashmap_delete ( struct hashmap * map , void * key ) {
const void * hashmap_delete _with_hash ( struct hashmap * map , const void * key ,
if ( ! key ) {
uint64_t hash )
panic ( " key is null " ) ;
{
}
hash = clip_hash ( hash ) ;
map - > oom = false ;
map - > oom = false ;
uint64_t hash = get_hash ( map , key ) ;
size_t i = hash & map - > mask ;
size_t i = hash & map - > mask ;
for ( ; ; ) {
while ( 1 ) {
struct bucket * bucket = bucket_at ( map , i ) ;
struct bucket * bucket = bucket_at ( map , i ) ;
if ( ! bucket - > dib ) {
if ( ! bucket - > dib ) {
return NULL ;
return NULL ;
}
}
if ( bucket - > hash = = hash & &
void * bitem = bucket_item ( bucket ) ;
map - > compare ( key , bucket_item ( bucket ) , map - > udata ) = = 0 )
if ( bucket - > hash = = hash & & ( ! map - > compare | |
map - > compare ( key , bitem , map - > udata ) = = 0 ) )
{
{
memcpy ( map - > spare , bucket_item ( bucket ) , map - > elsize ) ;
memcpy ( map - > spare , bitem , map - > elsize ) ;
bucket - > dib = 0 ;
bucket - > dib = 0 ;
for ( ; ; ) {
while( 1 ) {
struct bucket * prev = bucket ;
struct bucket * prev = bucket ;
i = ( i + 1 ) & map - > mask ;
i = ( i + 1 ) & map - > mask ;
bucket = bucket_at ( map , i ) ;
bucket = bucket_at ( map , i ) ;
@ -371,6 +384,12 @@ void *hashmap_delete(struct hashmap *map, void *key) {
}
}
}
}
// hashmap_delete removes an item from the hash map and returns it. If the
// item is not found then NULL is returned.
const void * hashmap_delete ( struct hashmap * map , const void * key ) {
return hashmap_delete_with_hash ( map , key , get_hash ( map , key ) ) ;
}
// hashmap_count returns the number of items in the hash map.
// hashmap_count returns the number of items in the hash map.
size_t hashmap_count ( struct hashmap * map ) {
size_t hashmap_count ( struct hashmap * map ) {
return map - > count ;
return map - > count ;
@ -400,16 +419,13 @@ bool hashmap_scan(struct hashmap *map,
{
{
for ( size_t i = 0 ; i < map - > nbuckets ; i + + ) {
for ( size_t i = 0 ; i < map - > nbuckets ; i + + ) {
struct bucket * bucket = bucket_at ( map , i ) ;
struct bucket * bucket = bucket_at ( map , i ) ;
if ( bucket - > dib ) {
if ( bucket - > dib & & ! iter ( bucket_item ( bucket ) , udata ) ) {
if ( ! iter ( bucket_item ( bucket ) , udata ) ) {
return false ;
return false ;
}
}
}
}
}
return true ;
return true ;
}
}
// hashmap_iter iterates one key at a time yielding a reference to an
// hashmap_iter iterates one key at a time yielding a reference to an
// entry at each iteration. Useful to write simple loops and avoid writing
// entry at each iteration. Useful to write simple loops and avoid writing
// dedicated callbacks and udata structures, as in hashmap_scan.
// dedicated callbacks and udata structures, as in hashmap_scan.
@ -428,19 +444,14 @@ bool hashmap_scan(struct hashmap *map,
//
//
// The function returns true if an item was retrieved; false if the end of the
// The function returns true if an item was retrieved; false if the end of the
// iteration has been reached.
// iteration has been reached.
bool hashmap_iter ( struct hashmap * map , size_t * i , void * * item )
bool hashmap_iter ( struct hashmap * map , size_t * i , void * * item ) {
{
struct bucket * bucket ;
struct bucket * bucket ;
do {
do {
if ( * i > = map - > nbuckets ) return false ;
if ( * i > = map - > nbuckets ) return false ;
bucket = bucket_at ( map , * i ) ;
bucket = bucket_at ( map , * i ) ;
( * i ) + + ;
( * i ) + + ;
} while ( ! bucket - > dib ) ;
} while ( ! bucket - > dib ) ;
* item = bucket_item ( bucket ) ;
* item = bucket_item ( bucket ) ;
return true ;
return true ;
}
}
@ -462,8 +473,8 @@ bool hashmap_iter(struct hashmap *map, size_t *i, void **item)
//
//
// default: SipHash-2-4
// default: SipHash-2-4
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
static uint64_t SIP64 ( const uint8_t * in , const size_t inlen ,
static uint64_t SIP64 ( const uint8_t * in , const size_t inlen , uint64_t seed0 ,
uint64_t seed0 , uint64_t seed1 )
uint64_t seed1 )
{
{
# define U8TO64_LE(p) \
# define U8TO64_LE(p) \
{ ( ( ( uint64_t ) ( ( p ) [ 0 ] ) ) | ( ( uint64_t ) ( ( p ) [ 1 ] ) < < 8 ) | \
{ ( ( ( uint64_t ) ( ( p ) [ 0 ] ) ) | ( ( uint64_t ) ( ( p ) [ 1 ] ) < < 8 ) | \
@ -597,21 +608,155 @@ static uint64_t MM86128(const void *key, const int len, uint32_t seed) {
return ( ( ( uint64_t ) h2 ) < < 32 ) | h1 ;
return ( ( ( uint64_t ) h2 ) < < 32 ) | h1 ;
}
}
//-----------------------------------------------------------------------------
// xxHash Library
// Copyright (c) 2012-2021 Yann Collet
// All rights reserved.
//
// BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
//
// xxHash3
//-----------------------------------------------------------------------------
# define XXH_PRIME_1 11400714785074694791ULL
# define XXH_PRIME_2 14029467366897019727ULL
# define XXH_PRIME_3 1609587929392839161ULL
# define XXH_PRIME_4 9650029242287828579ULL
# define XXH_PRIME_5 2870177450012600261ULL
static uint64_t XXH_read64 ( const void * memptr ) {
uint64_t val ;
memcpy ( & val , memptr , sizeof ( val ) ) ;
return val ;
}
static uint32_t XXH_read32 ( const void * memptr ) {
uint32_t val ;
memcpy ( & val , memptr , sizeof ( val ) ) ;
return val ;
}
static uint64_t XXH_rotl64 ( uint64_t x , int r ) {
return ( x < < r ) | ( x > > ( 64 - r ) ) ;
}
static uint64_t xxh3 ( const void * data , size_t len , uint64_t seed ) {
const uint8_t * p = ( const uint8_t * ) data ;
const uint8_t * const end = p + len ;
uint64_t h64 ;
if ( len > = 32 ) {
const uint8_t * const limit = end - 32 ;
uint64_t v1 = seed + XXH_PRIME_1 + XXH_PRIME_2 ;
uint64_t v2 = seed + XXH_PRIME_2 ;
uint64_t v3 = seed + 0 ;
uint64_t v4 = seed - XXH_PRIME_1 ;
do {
v1 + = XXH_read64 ( p ) * XXH_PRIME_2 ;
v1 = XXH_rotl64 ( v1 , 31 ) ;
v1 * = XXH_PRIME_1 ;
v2 + = XXH_read64 ( p + 8 ) * XXH_PRIME_2 ;
v2 = XXH_rotl64 ( v2 , 31 ) ;
v2 * = XXH_PRIME_1 ;
v3 + = XXH_read64 ( p + 16 ) * XXH_PRIME_2 ;
v3 = XXH_rotl64 ( v3 , 31 ) ;
v3 * = XXH_PRIME_1 ;
v4 + = XXH_read64 ( p + 24 ) * XXH_PRIME_2 ;
v4 = XXH_rotl64 ( v4 , 31 ) ;
v4 * = XXH_PRIME_1 ;
p + = 32 ;
} while ( p < = limit ) ;
h64 = XXH_rotl64 ( v1 , 1 ) + XXH_rotl64 ( v2 , 7 ) + XXH_rotl64 ( v3 , 12 ) +
XXH_rotl64 ( v4 , 18 ) ;
v1 * = XXH_PRIME_2 ;
v1 = XXH_rotl64 ( v1 , 31 ) ;
v1 * = XXH_PRIME_1 ;
h64 ^ = v1 ;
h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4 ;
v2 * = XXH_PRIME_2 ;
v2 = XXH_rotl64 ( v2 , 31 ) ;
v2 * = XXH_PRIME_1 ;
h64 ^ = v2 ;
h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4 ;
v3 * = XXH_PRIME_2 ;
v3 = XXH_rotl64 ( v3 , 31 ) ;
v3 * = XXH_PRIME_1 ;
h64 ^ = v3 ;
h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4 ;
v4 * = XXH_PRIME_2 ;
v4 = XXH_rotl64 ( v4 , 31 ) ;
v4 * = XXH_PRIME_1 ;
h64 ^ = v4 ;
h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4 ;
}
else {
h64 = seed + XXH_PRIME_5 ;
}
h64 + = ( uint64_t ) len ;
while ( p + 8 < = end ) {
uint64_t k1 = XXH_read64 ( p ) ;
k1 * = XXH_PRIME_2 ;
k1 = XXH_rotl64 ( k1 , 31 ) ;
k1 * = XXH_PRIME_1 ;
h64 ^ = k1 ;
h64 = XXH_rotl64 ( h64 , 27 ) * XXH_PRIME_1 + XXH_PRIME_4 ;
p + = 8 ;
}
if ( p + 4 < = end ) {
h64 ^ = ( uint64_t ) ( XXH_read32 ( p ) ) * XXH_PRIME_1 ;
h64 = XXH_rotl64 ( h64 , 23 ) * XXH_PRIME_2 + XXH_PRIME_3 ;
p + = 4 ;
}
while ( p < end ) {
h64 ^ = ( * p ) * XXH_PRIME_5 ;
h64 = XXH_rotl64 ( h64 , 11 ) * XXH_PRIME_1 ;
p + + ;
}
h64 ^ = h64 > > 33 ;
h64 * = XXH_PRIME_2 ;
h64 ^ = h64 > > 29 ;
h64 * = XXH_PRIME_3 ;
h64 ^ = h64 > > 32 ;
return h64 ;
}
// hashmap_sip returns a hash value for `data` using SipHash-2-4.
// hashmap_sip returns a hash value for `data` using SipHash-2-4.
uint64_t hashmap_sip ( const void * data , size_t len ,
uint64_t hashmap_sip ( const void * data , size_t len , uint64_t seed0 ,
uint64_t seed0 , uint64_t seed1 )
uint64_t seed1 )
{
{
return SIP64 ( ( uint8_t * ) data , len , seed0 , seed1 ) ;
return SIP64 ( ( uint8_t * ) data , len , seed0 , seed1 ) ;
}
}
// hashmap_murmur returns a hash value for `data` using Murmur3_86_128.
// hashmap_murmur returns a hash value for `data` using Murmur3_86_128.
uint64_t hashmap_murmur ( const void * data , size_t len ,
uint64_t hashmap_murmur ( const void * data , size_t len , uint64_t seed0 ,
uint64_t seed0 , uint64_t seed1 )
uint64_t seed1 )
{
{
( void ) seed1 ;
( void ) seed1 ;
return MM86128 ( data , len , seed0 ) ;
return MM86128 ( data , len , seed0 ) ;
}
}
uint64_t hashmap_xxhash3 ( const void * data , size_t len , uint64_t seed0 ,
uint64_t seed1 )
{
( void ) seed1 ;
return xxh3 ( data , len , seed0 ) ;
}
//==============================================================================
//==============================================================================
// TESTS AND BENCHMARKS
// TESTS AND BENCHMARKS
// $ cc -DHASHMAP_TEST hashmap.c && ./a.out # run tests
// $ cc -DHASHMAP_TEST hashmap.c && ./a.out # run tests
@ -630,10 +775,15 @@ static size_t deepcount(struct hashmap *map) {
}
}
# ifdef __GNUC__
# ifdef __GNUC__
# pragma GCC diagnostic ignored "-W unused-parameter "
# pragma GCC diagnostic ignored "-W pedantic "
# endif
# endif
# ifdef __clang__
# ifdef __clang__
# pragma GCC diagnostic ignored "-Wunknown-warning-option"
# pragma GCC diagnostic ignored "-Wcompound-token-split-by-macro"
# pragma GCC diagnostic ignored "-Wcompound-token-split-by-macro"
# pragma GCC diagnostic ignored "-Wgnu-statement-expression-from-macro-expansion"
# endif
# ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wunused-parameter"
# endif
# endif
# include <stdlib.h>
# include <stdlib.h>
@ -694,18 +844,22 @@ static int compare_strs(const void *a, const void *b, void *udata) {
}
}
static uint64_t hash_int ( const void * item , uint64_t seed0 , uint64_t seed1 ) {
static uint64_t hash_int ( const void * item , uint64_t seed0 , uint64_t seed1 ) {
return hashmap_murmur ( item , sizeof ( int ) , seed0 , seed1 ) ;
return hashmap_xxhash3 ( item , sizeof ( int ) , seed0 , seed1 ) ;
// return hashmap_sip(item, sizeof(int), seed0, seed1);
// return hashmap_murmur(item, sizeof(int), seed0, seed1);
}
}
static uint64_t hash_str ( const void * item , uint64_t seed0 , uint64_t seed1 ) {
static uint64_t hash_str ( const void * item , uint64_t seed0 , uint64_t seed1 ) {
return hashmap_murmur ( * ( char * * ) item , strlen ( * ( char * * ) item ) , seed0 , seed1 ) ;
return hashmap_xxhash3 ( * ( char * * ) item , strlen ( * ( char * * ) item ) , seed0 , seed1 ) ;
// return hashmap_sip(*(char**)item, strlen(*(char**)item), seed0, seed1);
// return hashmap_murmur(*(char**)item, strlen(*(char**)item), seed0, seed1);
}
}
static void free_str ( void * item ) {
static void free_str ( void * item ) {
xfree ( * ( char * * ) item ) ;
xfree ( * ( char * * ) item ) ;
}
}
static void all ( ) {
static void all ( void ) {
int seed = getenv ( " SEED " ) ? atoi ( getenv ( " SEED " ) ) : time ( NULL ) ;
int seed = getenv ( " SEED " ) ? atoi ( getenv ( " SEED " ) ) : time ( NULL ) ;
int N = getenv ( " N " ) ? atoi ( getenv ( " N " ) ) : 2000 ;
int N = getenv ( " N " ) ? atoi ( getenv ( " N " ) ) : 2000 ;
printf ( " seed=%d, count=%d, item_size=%zu \n " , seed , N , sizeof ( int ) ) ;
printf ( " seed=%d, count=%d, item_size=%zu \n " , seed , N , sizeof ( int ) ) ;
@ -716,6 +870,7 @@ static void all() {
// test sip and murmur hashes
// test sip and murmur hashes
assert ( hashmap_sip ( " hello " , 5 , 1 , 2 ) = = 2957200328589801622 ) ;
assert ( hashmap_sip ( " hello " , 5 , 1 , 2 ) = = 2957200328589801622 ) ;
assert ( hashmap_murmur ( " hello " , 5 , 1 , 2 ) = = 1682575153221130884 ) ;
assert ( hashmap_murmur ( " hello " , 5 , 1 , 2 ) = = 1682575153221130884 ) ;
assert ( hashmap_xxhash3 ( " hello " , 5 , 1 , 2 ) = = 2584346877953614258 ) ;
int * vals ;
int * vals ;
while ( ! ( vals = xmalloc ( N * sizeof ( int ) ) ) ) { }
while ( ! ( vals = xmalloc ( N * sizeof ( int ) ) ) ) { }
@ -733,7 +888,7 @@ static void all() {
assert ( map - > count = = ( size_t ) i ) ;
assert ( map - > count = = ( size_t ) i ) ;
assert ( map - > count = = hashmap_count ( map ) ) ;
assert ( map - > count = = hashmap_count ( map ) ) ;
assert ( map - > count = = deepcount ( map ) ) ;
assert ( map - > count = = deepcount ( map ) ) ;
int * v ;
const int * v ;
assert ( ! hashmap_get ( map , & vals [ i ] ) ) ;
assert ( ! hashmap_get ( map , & vals [ i ] ) ) ;
assert ( ! hashmap_delete ( map , & vals [ i ] ) ) ;
assert ( ! hashmap_delete ( map , & vals [ i ] ) ) ;
while ( true ) {
while ( true ) {
@ -788,7 +943,7 @@ static void all() {
shuffle ( vals , N , sizeof ( int ) ) ;
shuffle ( vals , N , sizeof ( int ) ) ;
for ( int i = 0 ; i < N ; i + + ) {
for ( int i = 0 ; i < N ; i + + ) {
int * v ;
const int * v ;
v = hashmap_delete ( map , & vals [ i ] ) ;
v = hashmap_delete ( map , & vals [ i ] ) ;
assert ( v & & * v = = vals [ i ] ) ;
assert ( v & & * v = = vals [ i ] ) ;
assert ( ! hashmap_get ( map , & vals [ i ] ) ) ;
assert ( ! hashmap_get ( map , & vals [ i ] ) ) ;
@ -841,7 +996,7 @@ static void all() {
for ( int i = 0 ; i < N ; i + + ) {
for ( int i = 0 ; i < N ; i + + ) {
char * str ;
char * str ;
while ( ! ( str = xmalloc ( 16 ) ) ) ;
while ( ! ( str = xmalloc ( 16 ) ) ) ;
s printf( str , " s%i " , i ) ;
s n printf( str , 16 , " s%i " , i ) ;
while ( ! hashmap_set ( map , & str ) ) ;
while ( ! hashmap_set ( map , & str ) ) ;
}
}
@ -851,7 +1006,7 @@ static void all() {
for ( int i = 0 ; i < N ; i + + ) {
for ( int i = 0 ; i < N ; i + + ) {
char * str ;
char * str ;
while ( ! ( str = xmalloc ( 16 ) ) ) ;
while ( ! ( str = xmalloc ( 16 ) ) ) ;
s printf( str , " s%i " , i ) ;
s n printf( str , 16 , " s%i " , i ) ;
while ( ! hashmap_set ( map , & str ) ) ;
while ( ! hashmap_set ( map , & str ) ) ;
}
}
@ -896,7 +1051,7 @@ static void all() {
printf ( " \n " ) ; \
printf ( " \n " ) ; \
} }
} }
static void benchmarks ( ) {
static void benchmarks ( void ) {
int seed = getenv ( " SEED " ) ? atoi ( getenv ( " SEED " ) ) : time ( NULL ) ;
int seed = getenv ( " SEED " ) ? atoi ( getenv ( " SEED " ) ) : time ( NULL ) ;
int N = getenv ( " N " ) ? atoi ( getenv ( " N " ) ) : 5000000 ;
int N = getenv ( " N " ) ? atoi ( getenv ( " N " ) ) : 5000000 ;
printf ( " seed=%d, count=%d, item_size=%zu \n " , seed , N , sizeof ( int ) ) ;
printf ( " seed=%d, count=%d, item_size=%zu \n " , seed , N , sizeof ( int ) ) ;
@ -916,17 +1071,17 @@ static void benchmarks() {
map = hashmap_new ( sizeof ( int ) , 0 , seed , seed , hash_int , compare_ints_udata ,
map = hashmap_new ( sizeof ( int ) , 0 , seed , seed , hash_int , compare_ints_udata ,
NULL , NULL ) ;
NULL , NULL ) ;
bench ( " set " , N , {
bench ( " set " , N , {
int * v = hashmap_set ( map , & vals [ i ] ) ;
const int * v = hashmap_set ( map , & vals [ i ] ) ;
assert ( ! v ) ;
assert ( ! v ) ;
} )
} )
shuffle ( vals , N , sizeof ( int ) ) ;
shuffle ( vals , N , sizeof ( int ) ) ;
bench ( " get " , N , {
bench ( " get " , N , {
int * v = hashmap_get ( map , & vals [ i ] ) ;
const int * v = hashmap_get ( map , & vals [ i ] ) ;
assert ( v & & * v = = vals [ i ] ) ;
assert ( v & & * v = = vals [ i ] ) ;
} )
} )
shuffle ( vals , N , sizeof ( int ) ) ;
shuffle ( vals , N , sizeof ( int ) ) ;
bench ( " delete " , N , {
bench ( " delete " , N , {
int * v = hashmap_delete ( map , & vals [ i ] ) ;
const int * v = hashmap_delete ( map , & vals [ i ] ) ;
assert ( v & & * v = = vals [ i ] ) ;
assert ( v & & * v = = vals [ i ] ) ;
} )
} )
hashmap_free ( map ) ;
hashmap_free ( map ) ;
@ -934,17 +1089,17 @@ static void benchmarks() {
map = hashmap_new ( sizeof ( int ) , N , seed , seed , hash_int , compare_ints_udata ,
map = hashmap_new ( sizeof ( int ) , N , seed , seed , hash_int , compare_ints_udata ,
NULL , NULL ) ;
NULL , NULL ) ;
bench ( " set (cap) " , N , {
bench ( " set (cap) " , N , {
int * v = hashmap_set ( map , & vals [ i ] ) ;
const int * v = hashmap_set ( map , & vals [ i ] ) ;
assert ( ! v ) ;
assert ( ! v ) ;
} )
} )
shuffle ( vals , N , sizeof ( int ) ) ;
shuffle ( vals , N , sizeof ( int ) ) ;
bench ( " get (cap) " , N , {
bench ( " get (cap) " , N , {
int * v = hashmap_get ( map , & vals [ i ] ) ;
const int * v = hashmap_get ( map , & vals [ i ] ) ;
assert ( v & & * v = = vals [ i ] ) ;
assert ( v & & * v = = vals [ i ] ) ;
} )
} )
shuffle ( vals , N , sizeof ( int ) ) ;
shuffle ( vals , N , sizeof ( int ) ) ;
bench ( " delete (cap) " , N , {
bench ( " delete (cap) " , N , {
int * v = hashmap_delete ( map , & vals [ i ] ) ;
const int * v = hashmap_delete ( map , & vals [ i ] ) ;
assert ( v & & * v = = vals [ i ] ) ;
assert ( v & & * v = = vals [ i ] ) ;
} )
} )
@ -959,7 +1114,7 @@ static void benchmarks() {
}
}
}
}
int main ( ) {
int main ( void ) {
hashmap_set_allocator ( xmalloc , xfree ) ;
hashmap_set_allocator ( xmalloc , xfree ) ;
if ( getenv ( " BENCH " ) ) {
if ( getenv ( " BENCH " ) ) {