librsync  2.3.4
hashtable.h
Go to the documentation of this file.
1/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2 *
3 * hashtable.h -- a generic open addressing hashtable.
4 *
5 * Copyright (C) 2003 by Donovan Baarda <abo@minkirri.apana.org.au>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20
21/** \file hashtable.h
22 * A generic open addressing hashtable.
23 *
24 * This is a minimal hashtable containing pointers to arbitrary entries with
25 * configurable hashtable size and support for custom hash() and cmp() methods.
26 * The cmp() method can either be a simple comparison between two keys, or can
27 * be against a special match object containing additional mutable state. This
28 * allows for things like deferred and cached evaluation of costly comparison
29 * data. The hash() function doesn't need to avoid clustering behaviour.
30 *
31 * It uses open addressing with quadratic probing for collisions. The
32 * MurmurHash3 finalization function is optionally used on the hash() output to
33 * avoid clustering and can be disabled by setting HASHTABLE_NMIX32. There is
34 * no support for removing entries, only adding them. Multiple entries with the
35 * same key can be added, and you can use a fancy cmp() function to find
36 * particular entries by more than just their key. There is an iterator for
37 * iterating through all entries in the hashtable. There are optional
38 * NAME_find() find/match/hashcmp/entrycmp stats counters that can be disabled
39 * by defining HASHTABLE_NSTATS. There is an optional simple k=1 bloom filter
40 * for speed that can be disabled by defining HASHTABLE_NBLOOM.
41 *
42 * The types and methods of the hashtable and its contents are specified by
43 * using \#define parameters set to their basenames (the prefixes for the *_t
44 * type and *_func() methods) before doing \#include "hashtable.h". This
45 * produces static inline type-safe methods that are either application
46 * optimized for speed or wrappers around void* implementation methods for
47 * compactness.
48 *
49 * \param ENTRY - the entry type basename.
50 *
51 * \param KEY - optional key type basename (default: ENTRY).
52 *
53 * \param MATCH - optional match type basename (default: KEY).
54 *
55 * \param NAME - optional hashtable type basename (default: ENTRY_hashtable).
56 *
57 * Example: \code
58 * typedef ... mykey_t;
59 * int mykey_hash(mykey_t const *e);
60 * int mykey_cmp(mykey_t *e, mykey_t const *o);
61 *
62 * typedef struct myentry {
63 * mykey_t key; // Inherit from mykey_t.
64 * ...extra entry value data...
65 * } myentry_t;
66 * void myentry_init(myentry_t *e, ...);
67 *
68 * #define ENTRY myentry
69 * #define KEY mykey
70 * #include "hashtable.h"
71 *
72 * hashtable_t *t;
73 * myentry_t entries[300];
74 * mykey_t k;
75 * myentry_t *e;
76 *
77 * t = myentry_hashtable_new(300);
78 * myentry_init(&entries[5], ...);
79 * myentry_hashtable_add(t, &entries[5]);
80 * k = ...;
81 * e = myentry_hashtable_find(t, &k);
82 *
83 * int i;
84 * for (e = myentry_hashtable_iter(t, &i); e != NULL;
85 * e = myentry_hashtable_next(t, &i))
86 * ...
87 *
88 * myentry_hashtable_free(t);
89 * \endcode
90 *
91 * The mykey_hash() and mykey_cmp() fuctions will typically take pointers to
92 * mykey/myentry instances the same as the pointers stored in the hashtable.
93 * However it is also possible for them to take "match objects" that are a
94 * "subclass" of the entry type that contain additional state for complicated
95 * comparision operations.
96 *
97 * Example: \code
98 * typedef struct mymatch {
99 * mykey_t key; // Inherit from mykey_t;
100 * ...extra match criteria and state data...
101 * } mymatch_t;
102 * int mymatch_cmp(mymatch_t *m, myentry_t const *e);
103 *
104 * #define ENTRY myentry
105 * #define KEY mykey
106 * #define MATCH mymatch
107 * #include "hashtable.h"
108 *
109 * ...
110 * mymatch_t m;
111 *
112 * t = myentry_hashtable_new(300);
113 * ...
114 * m = ...;
115 * e = myentry_hashtable_find(t, &m);
116 * \endcode
117 *
118 * The mymatch_cmp() function is only called for finding hashtable entries and
119 * can mutate the mymatch_t object for doing things like deferred and cached
120 * evaluation of expensive match data. It can also access the whole myentry_t
121 * object to match against more than just the key. */
122#ifndef HASHTABLE_H
123# define HASHTABLE_H
124
125# include <stdbool.h>
126
127/** The hashtable type. */
128typedef struct hashtable {
129 int size; /**< Size of allocated hashtable. */
130 int count; /**< Number of entries in hashtable. */
131 unsigned tmask; /**< Mask to get the hashtable index. */
132# ifndef HASHTABLE_NBLOOM
133 unsigned bshift; /**< Shift to get the bloomfilter index. */
134# endif
135# ifndef HASHTABLE_NSTATS
136 /* The following are for accumulating NAME_find() stats. */
137 long find_count; /**< The count of finds tried. */
138 long match_count; /**< The count of matches found. */
139 long hashcmp_count; /**< The count of hash compares done. */
140 long entrycmp_count; /**< The count of entry compares done. */
141# endif
142# ifndef HASHTABLE_NBLOOM
143 unsigned char *kbloom; /**< Bloom filter of hash keys with k=1. */
144# endif
145 void **etable; /**< Table of pointers to entries. */
146 unsigned ktable[]; /**< Table of hash keys. */
148
149/* void* implementations for the type-safe static inline wrappers below. */
150hashtable_t *_hashtable_new(int size);
151void _hashtable_free(hashtable_t *t);
152
153# ifndef HASHTABLE_NBLOOM
154static inline void hashtable_setbloom(hashtable_t *t, unsigned const h)
155{
156 /* Use upper bits for a "different hash". */
157 unsigned const i = h >> t->bshift;
158 t->kbloom[i / 8] |= (unsigned char)(1 << (i % 8));
159}
160
161static inline bool hashtable_getbloom(hashtable_t *t, unsigned const h)
162{
163 /* Use upper bits for a "different hash". */
164 unsigned const i = h >> t->bshift;
165 return (t->kbloom[i / 8] >> (i % 8)) & 1;
166}
167# endif
168
169/** MurmurHash3 finalization mix function. */
170static inline unsigned mix32(unsigned h)
171{
172 h ^= h >> 16;
173 h *= 0x85ebca6b;
174 h ^= h >> 13;
175 h *= 0xc2b2ae35;
176 h ^= h >> 16;
177 return h;
178}
179
180/** Ensure hash's are never zero. */
181static inline unsigned nozero(unsigned h)
182{
183 return h ? h : (unsigned)-1;
184}
185
186#endif /* !HASHTABLE_H */
187
188/* If ENTRY is defined, define type-dependent static inline methods. */
189#ifdef ENTRY
190
191# include <assert.h>
192# include <stddef.h>
193
194# define _JOIN2(x, y) x##y
195# define _JOIN(x, y) _JOIN2(x, y)
196
197# ifndef KEY
198# define KEY ENTRY
199# endif
200
201# ifndef MATCH
202# define MATCH KEY
203# endif
204
205# ifndef NAME
206# define NAME _JOIN(ENTRY, _hashtable)
207# endif
208
209# define ENTRY_t _JOIN(ENTRY, _t) /**< The entry type. */
210# define KEY_t _JOIN(KEY, _t) /**< The key type. */
211# define MATCH_t _JOIN(MATCH, _t) /**< The match type. */
212# define KEY_hash _JOIN(KEY, _hash) /**< The key hash(k) method. */
213# define MATCH_cmp _JOIN(MATCH, _cmp) /**< The match cmp(m, e) method. */
214/* The names for all the hashtable methods. */
215# define NAME_new _JOIN(NAME, _new)
216# define NAME_free _JOIN(NAME, _free)
217# define NAME_stats_init _JOIN(NAME, _stats_init)
218# define NAME_add _JOIN(NAME, _add)
219# define NAME_find _JOIN(NAME, _find)
220# define NAME_iter _JOIN(NAME, _iter)
221# define NAME_next _JOIN(NAME, _next)
222
223/* Modified hash() with/without mix32() and reserving zero for empty buckets. */
224# ifdef HASHTABLE_NMIX32
225# define _KEY_HASH(k) nozero(KEY_hash((KEY_t *)k))
226# else
227# define _KEY_HASH(k) nozero(mix32(KEY_hash((KEY_t *)k)))
228# endif
229
230/* Loop macro for probing table t for key hash hk, iterating with index i and
231 entry hash h, terminating at an empty bucket. */
232# define _for_probe(t, hk, i, h) \
233 unsigned const *const ktable = t->ktable;\
234 unsigned const tmask = t->tmask;\
235 unsigned i, s, h;\
236 for (i = hk & tmask, s = 0; (h = ktable[i]); i = (i + ++s) & tmask)
237
238/* Conditional macro for incrementing stats counters. */
239# ifndef HASHTABLE_NSTATS
240# define _stats_inc(c) (c++)
241# else
242# define _stats_inc(c)
243# endif
244
245/** Allocate and initialize a hashtable instance.
246 *
247 * The provided size is used as an indication of the number of entries you wish
248 * to add, but the allocated size will probably be larger depending on the
249 * implementation to enable optimisations or avoid degraded performance. It may
250 * be possible to fill the table beyond the requested size, but performance can
251 * start to degrade badly if it is over filled.
252 *
253 * \param size - The desired minimum size of the hash table.
254 *
255 * \return The initialized hashtable instance or NULL if it failed. */
256static inline hashtable_t *NAME_new(int size)
257{
258 return _hashtable_new(size);
259}
260
261/** Destroy and free a hashtable instance.
262 *
263 * This will free the hashtable, but will not free the entries in the
264 * hashtable. If you want to free the entries too, use a hashtable iterator to
265 * free the the entries first.
266 *
267 * \param *t - The hashtable to destroy and free. */
268static inline void NAME_free(hashtable_t *t)
269{
270 _hashtable_free(t);
271}
272
273/** Initialize hashtable stats counters.
274 *
275 * This will reset all the stats counters for the hashtable,
276 *
277 * \param *t - The hashtable to initializ stats for. */
278static inline void NAME_stats_init(hashtable_t *t)
279{
280# ifndef HASHTABLE_NSTATS
282# endif
283}
284
285/** Add an entry to a hashtable.
286 *
287 * This doesn't use MATCH_cmp() or do any checks for existing copies or
288 * instances, so it will add duplicates. If you want to avoid adding
289 * duplicates, use NAME_find() to check for existing entries first.
290 *
291 * \param *t - The hashtable to add to.
292 *
293 * \param *e - The entry object to add.
294 *
295 * \return The added entry, or NULL if the table is full. */
296static inline ENTRY_t *NAME_add(hashtable_t *t, ENTRY_t *e)
297{
298 unsigned he = _KEY_HASH(e);
299
300 assert(e != NULL);
301 if (t->count + 1 == t->size)
302 return NULL;
303# ifndef HASHTABLE_NBLOOM
304 hashtable_setbloom(t, he);
305# endif
306 _for_probe(t, he, i, h);
307 t->count++;
308 t->ktable[i] = he;
309 return t->etable[i] = e;
310}
311
312/** Find an entry in a hashtable.
313 *
314 * Uses MATCH_cmp() to find the first matching entry in the table in the same
315 * hash() bucket.
316 *
317 * \param *t - The hashtable to search.
318 *
319 * \param *m - The key or match object to search for.
320 *
321 * \return The first found entry, or NULL if nothing was found. */
322static inline ENTRY_t *NAME_find(hashtable_t *t, MATCH_t *m)
323{
324 assert(m != NULL);
325 unsigned hm = _KEY_HASH(m);
326 ENTRY_t *e;
327
328 _stats_inc(t->find_count);
329# ifndef HASHTABLE_NBLOOM
330 if (!hashtable_getbloom(t, hm))
331 return NULL;
332# endif
333 _for_probe(t, hm, i, he) {
334 _stats_inc(t->hashcmp_count);
335 if (hm == he) {
336 _stats_inc(t->entrycmp_count);
337 if (!MATCH_cmp(m, e = t->etable[i])) {
338 _stats_inc(t->match_count);
339 return e;
340 }
341 }
342 }
343 /* Also count the compare for the empty bucket. */
344 _stats_inc(t->hashcmp_count);
345 return NULL;
346}
347
348static inline ENTRY_t *NAME_next(hashtable_t *t, int *i);
349
350/** Initialize a iteration and return the first entry.
351 *
352 * This works together with NAME_next() for iterating through all entries in a
353 * hashtable.
354 *
355 * Example: \code
356 * for (e = NAME_iter(t, &i); e != NULL; e = NAME_next(t, &i))
357 * ...
358 * \endcode
359 *
360 * \param *t - the hashtable to iterate over.
361 *
362 * \param *i - the int iterator index to initialize.
363 *
364 * \return The first entry or NULL if the hashtable is empty. */
365static inline ENTRY_t *NAME_iter(hashtable_t *t, int *i)
366{
367 assert(t != NULL);
368 assert(i != NULL);
369 *i = 0;
370 return NAME_next(t, i);
371}
372
373/** Get the next entry from a hashtable iterator or NULL when finished.
374 *
375 * This works together with NAME_iter() for iterating through all entries in a
376 * hashtable.
377 *
378 * \param *t - the hashtable to iterate over.
379 *
380 * \param *i - the int iterator index to use.
381 *
382 * \return The next entry or NULL if the iterator is finished. */
383static inline ENTRY_t *NAME_next(hashtable_t *t, int *i)
384{
385 assert(t != NULL);
386 assert(i != NULL);
387 ENTRY_t *e = NULL;
388
389 while ((*i < t->size) && !(e = t->etable[(*i)++])) ;
390 return e;
391}
392
393# undef ENTRY
394# undef KEY
395# undef MATCH
396# undef NAME
397# undef ENTRY_t
398# undef KEY_t
399# undef MATCH_t
400# undef KEY_hash
401# undef MATCH_cmp
402# undef NAME_new
403# undef NAME_free
404# undef NAME_stats_init
405# undef NAME_add
406# undef NAME_find
407# undef NAME_iter
408# undef NAME_next
409# undef _KEY_HASH
410#endif /* ENTRY */
#define ENTRY_t
The entry type.
Definition: hashtable.h:209
static void NAME_stats_init(hashtable_t *t)
Initialize hashtable stats counters.
Definition: hashtable.h:278
static ENTRY_t * NAME_find(hashtable_t *t, MATCH_t *m)
Find an entry in a hashtable.
Definition: hashtable.h:322
static ENTRY_t * NAME_next(hashtable_t *t, int *i)
Get the next entry from a hashtable iterator or NULL when finished.
Definition: hashtable.h:383
#define MATCH_cmp
The match cmp(m, e) method.
Definition: hashtable.h:213
static ENTRY_t * NAME_iter(hashtable_t *t, int *i)
Initialize a iteration and return the first entry.
Definition: hashtable.h:365
static hashtable_t * NAME_new(int size)
Allocate and initialize a hashtable instance.
Definition: hashtable.h:256
struct hashtable hashtable_t
The hashtable type.
#define MATCH_t
The match type.
Definition: hashtable.h:211
static void NAME_free(hashtable_t *t)
Destroy and free a hashtable instance.
Definition: hashtable.h:268
static ENTRY_t * NAME_add(hashtable_t *t, ENTRY_t *e)
Add an entry to a hashtable.
Definition: hashtable.h:296
static unsigned mix32(unsigned h)
MurmurHash3 finalization mix function.
Definition: hashtable.h:170
static unsigned nozero(unsigned h)
Ensure hash's are never zero.
Definition: hashtable.h:181
The hashtable type.
Definition: hashtable.h:128
long find_count
The count of finds tried.
Definition: hashtable.h:137
unsigned ktable[]
Table of hash keys.
Definition: hashtable.h:146
long match_count
The count of matches found.
Definition: hashtable.h:138
int count
Number of entries in hashtable.
Definition: hashtable.h:130
unsigned char * kbloom
Bloom filter of hash keys with k=1.
Definition: hashtable.h:143
long entrycmp_count
The count of entry compares done.
Definition: hashtable.h:140
int size
Size of allocated hashtable.
Definition: hashtable.h:129
unsigned tmask
Mask to get the hashtable index.
Definition: hashtable.h:131
void ** etable
Table of pointers to entries.
Definition: hashtable.h:145
long hashcmp_count
The count of hash compares done.
Definition: hashtable.h:139
unsigned bshift
Shift to get the bloomfilter index.
Definition: hashtable.h:133