libcstl
Loading...
Searching...
No Matches
hash.h
Go to the documentation of this file.
1/*!
2 * @file
3 */
4
5#ifndef CSTL_HASH_H
6#define CSTL_HASH_H
7
8/*!
9 * @defgroup hash Hash table
10 * @ingroup lowlevel
11 * @brief A hash table utilizing separate chaining for collision resolution
12 */
13/*!
14 * @addtogroup hash
15 * @{
16 */
17
18#include "cstl/common.h"
19
20#include <stdbool.h>
21
22/*!
23 * @brief Function type for hashing a key into a bucket
24 *
25 * Each element inserted into the hash has an integer associated
26 * with it, aka a "key". This key must be "hashed" in order to fit
27 * into the hash table. The function that does this hashing must
28 * be of this type.
29 *
30 * @param[in] k The key to be hashed
31 * @param[in] m The size of the hash table
32 *
33 * @return A value in the range [0, m)
34 */
35typedef size_t cstl_hash_func_t(size_t k, size_t m);
36
37/*!
38 * @brief Node to anchor an element within a hash
39 *
40 * Users of the hash object declare this object within another
41 * object as follows:
42 * @code{.c}
43 * struct object {
44 * ...
45 * struct cstl_hash_node hnode;
46 * ...
47 * };
48 * @endcode
49 *
50 * When calling cstl_hash_init(), the caller passes the offset of @p hnode
51 * within their object as the @p off parameter of that function, e.g.
52 * @code{.c}
53 * offsetof(struct object, hnode)
54 * @endcode
55 */
57{
58 /*! @privatesection */
59 size_t key;
60 struct cstl_hash_node * next;
61
62 /*
63 * There's an optimization to be made here: when
64 * rehashing, it's possible that the node is rehashed
65 * but ends up in a bucket that has not yet been cleaned.
66 * if the *node* can be marked as having been cleaned,
67 * it can be skipped when the containing bucket is cleaned.
68 *
69 * whether or not carrying around that bit of state in
70 * this structure (for every node!) and the extra logic to
71 * accommodate the optimization are worthwhile is an open
72 * question. prevailing wisdom suggests that it is not.
73 */
74};
75
76/*!
77 * @brief Hash object
78 *
79 * Callers declare or allocate an object of this type to instantiate
80 * a hash. Users are encouraged to declare (and initialize) this
81 * object with the DECLARE_CSTL_HASH() macro. Any other declaration or
82 * allocation must be initialized via cstl_hash_init().
83 */
85{
86 /*! @privatesection */
87 struct
88 {
89 /*! @privatesection */
90 struct cstl_hash_bucket
91 {
92 /* list of nodes in the bucket */
93 struct cstl_hash_node * n;
94 /*
95 * clean state of the bucket.
96 * if cst matches the outer cst, then
97 * the bucket is clean.
98 */
99 bool cst;
100 } * at;
101
102 /*
103 * number of buckets in use and number of
104 * available buckets
105 */
106 size_t count, capacity;
107 cstl_hash_func_t * hash;
108 /*
109 * desired clean state. when the table is
110 * resized, this bit flips which causes all
111 * of the buckets to be regarded as dirty
112 */
113 bool cst;
114
115 struct
116 {
117 /*
118 * during a resize/rehash, @count are @hash
119 * are the desired parameters of the table
120 * after the rehash is complete. @clean is
121 * the index of the next bucket to be cleaned
122 * during the incremental sweep
123 *
124 * whether a rehash is in process is determined
125 * by whether or not @hash is NULL
126 */
127 size_t count, clean;
128 cstl_hash_func_t * hash;
129 } rh;
130 } bucket;
131
132 size_t count;
133 size_t off;
134};
135
136/*!
137 * @brief Constant initialization of a hash object
138 *
139 * @param TYPE The type of object that the hash will hold
140 * @param MEMB The name of the @p cstl_hash_node member within @p TYPE.
141 *
142 * @see cstl_hash_node for a description of the relationship between
143 * @p TYPE and @p MEMB
144 */
145#define CSTL_HASH_INITIALIZER(TYPE, MEMB) \
146 { \
147 .bucket = { \
148 .at = NULL, \
149 .count = 0, \
150 .capacity = 0, \
151 .hash = NULL, \
152 .cst = false, \
153 .rh = { \
154 .hash = NULL, \
155 }, \
156 }, \
157 .count = 0, \
158 .off = offsetof(TYPE, MEMB), \
159}
160/*!
161 * @brief (Statically) declare and initialize a hash
162 *
163 * @param NAME The name of the variable being declared
164 * @param TYPE The type of object that the hash will hold
165 * @param MEMB The name of the @p cstl_hash_node member within @p TYPE.
166 *
167 * @see cstl_hash_node for a description of the relationship between
168 * @p TYPE and @p MEMB
169 */
170#define DECLARE_CSTL_HASH(NAME, TYPE, MEMB) \
171 struct cstl_hash NAME = CSTL_HASH_INITIALIZER(TYPE, MEMB)
172
173/*!
174 * @brief Initialize a hash object
175 *
176 * @param[in,out] h A pointer to the object to be initialized
177 * @param[in] off The offset of the @p cstl_hash_node object within the
178 * object(s) that will be stored in the hash
179 *
180 * @note The hash object is not ready for use until it has been
181 * resized via the cstl_hash_resize() function
182 */
183static inline void cstl_hash_init(
184 struct cstl_hash * const h, const size_t off)
185{
186 h->bucket.at = NULL;
187 h->bucket.count = 0;
188 h->bucket.capacity = 0;
189 h->bucket.cst = 0;
190 h->bucket.hash = NULL;
191
192 h->bucket.rh.hash = NULL;
193
194 h->count = 0;
195 h->off = off;
196}
197
198/*!
199 * @brief Get the number of objects in the hash
200 *
201 * @param[in] h A pointer to the hash
202 *
203 * @return The number of objects in the hash
204 */
205size_t cstl_hash_size(const struct cstl_hash * const h)
206{
207 return h->count;
208}
209
210/*!
211 * @brief Get the average number of nodes per bucket
212 *
213 * @return The average number of nodes per bucket, i.e. the total number
214 * of nodes divided by the number of buckets
215 */
216float cstl_hash_load(const struct cstl_hash * const h)
217{
218 size_t count = h->bucket.count;
219 if (h->bucket.rh.hash != NULL) {
220 count = h->bucket.rh.count;
221 }
222 return (float)h->count / count;
223}
224
225/*!
226 * @brief Free memory associated with excess buckets
227 *
228 * As the table is resized, the number of buckets is increased or
229 * decreased. When decreasing, the memory associated with unused
230 * buckets is not released. This function releases that memory. The
231 * number of buckets is not changed.
232 *
233 * @param[in,out] h A pointer to a hash object
234 */
235void cstl_hash_shrink_to_fit(struct cstl_hash * h);
236
237/*!
238 * @brief Resize the hash table
239 *
240 * @param[in,out] h A pointer to the hash object
241 * @param[in] n The desired number of buckets in the table
242 * @param[in] f The function used by this hash object to hash keys. If this
243 * parameter is NULL, the existing hash function will be reused.
244 * If there is no existing hash function (because the hash object
245 * is in an initialized state), the cstl_hash_mul() function will
246 * be used.
247 *
248 * If @p n is zero, this function does nothing. Once this function has been
249 * called, cstl_hash_clear() must be called to re/de-initialize the object.
250 * If the function fails, the original hash object is undisturbed.
251 */
252void cstl_hash_resize(struct cstl_hash * h, size_t n, cstl_hash_func_t * f);
253
254/*!
255 * @brief Rehash the hash table
256 *
257 * After resizing or changing the hash function for the table, all
258 * nodes in the table must be moved to new buckets. This is done
259 * incrementally, spreading the time cost over multiple operations on
260 * the table. However, this function may be used to force the operation
261 * to run to completion immediately.
262 *
263 * If a rehash is not currently in progress, the function returns immediately.
264 *
265 * @param[in,out] h A pointer to the hash object
266 */
267void cstl_hash_rehash(struct cstl_hash * h);
268
269/*!
270 * @brief Insert an item into the hash
271 *
272 * @param[in] h A pointer to the hash object
273 * @param[in] k The key associated with the object being inserted
274 * @param[in] e A pointer to the object to insert
275 *
276 * If the caller maintains a pointer to the object or otherwise gets
277 * a pointer via cstl_hash_find() or cstl_hash_foreach(), the caller may modify
278 * the object as desired. However, the key associated with the object
279 * must not be changed.
280 */
281void cstl_hash_insert(struct cstl_hash * h, size_t k, void * e);
282
283/*!
284 * @brief Lookup/find a previously inserted object in the hash
285 *
286 * @param[in] h A pointer to the hash object
287 * @param[in] k The key associated with the object being sought
288 * @param[in] visit A pointer to a function that will be called for
289 * each object with a matching key. The called function
290 * should return a non-zero value when the desired object
291 * is found. The pointer may be NULL, in which case, the
292 * first object with a matching key will be returned.
293 * @param[in] priv A pointer, belonging to the caller, that will be passed
294 * to each invocation of the @p visit function
295 *
296 * @return A pointer to the object that was found
297 * @retval NULL No object with a matching key was found, or the @p visit
298 * function did not identify a matching object
299 */
300void * cstl_hash_find(struct cstl_hash * h, size_t k,
301 cstl_const_visit_func_t * visit, void * priv);
302
303/*!
304 * @brief Remove an object from the hash
305 *
306 * @param[in] h A pointer to the hash object
307 * @param[in] e A pointer to the object to be removed. This pointer must
308 * be to the *actual* object to be removed, not just to an
309 * object that would compare as equal
310 */
311void cstl_hash_erase(struct cstl_hash * h, void * e);
312
313/*!
314 * @brief Visit each object within a hash table
315 *
316 * @param[in] h A pointer to the hash object
317 * @param[in] visit A function to be called for each object in the table. The
318 * function should return zero to continue visiting objects
319 * or a non-zero value to terminate the foreach function.
320 * The visit function may alter the object (but not the key)
321 * and/or remove the *current* object from the table.
322 * @param[in] priv A pointer, belonging to the caller, that will be passed
323 * to each invocation of the @p visit function
324 *
325 * @note This function will force an in-progress rehash to complete. Consider
326 * cstl_hash_foreach_const() if the visited nodes do not need to be modified
327 * or removed.
328 *
329 * @return The value returned by the last invocation of @p visit or 0
330 */
331int cstl_hash_foreach(struct cstl_hash * h,
332 cstl_visit_func_t * visit, void * priv);
333
334/*!
335 * @brief Visit each object within a hash table
336 *
337 * @param[in] h A pointer to the hash object
338 * @param[in] visit A function to be called for each object in the table. The
339 * function should return zero to continue visiting objects
340 * or a non-zero value to terminate the foreach function.
341 * @param[in] priv A pointer, belonging to the caller, that will be passed
342 * to each invocation of the @p visit function
343 *
344 * @return The value returned by the last invocation of @p visit or 0
345 */
346int cstl_hash_foreach_const(const struct cstl_hash * h,
347 cstl_const_visit_func_t * visit, void * priv);
348
349/*!
350 * @brief Remove all elements from the hash
351 *
352 * @param[in] h A pointer to the hash
353 * @param[in] clr A pointer to a function to be called for each element in
354 * the hash. The function may be NULL.
355 *
356 * All elements are removed from the hash and the @p clr function is
357 * called for each element that was in the hash. The order in which
358 * the elements are removed and @p clr is called is not specified, but
359 * the callee may take ownership of an element at the time that @p clr
360 * is called for that element and not before.
361 *
362 * Upon return from this function, the hash contains no elements, and
363 * is as it was immediately after being initialized. No further operations
364 * on the hash are necessary to make it ready to go out of scope or be
365 * destroyed.
366 */
367void cstl_hash_clear(struct cstl_hash * h, cstl_xtor_func_t * clr);
368
369/*!
370 * @brief Swap the hash objects at the two given locations
371 *
372 * @param[in,out] a A pointer to a hash
373 * @param[in,out] b A pointer to a(nother) hash
374 *
375 * The hashes at the given locations will be swapped such that upon return,
376 * @p a will contain the hash previously pointed to by @p b and vice versa.
377 */
378static inline void cstl_hash_swap(struct cstl_hash * const a,
379 struct cstl_hash * const b)
380{
381 struct cstl_hash t;
382 cstl_swap(a, b, &t, sizeof(t));
383}
384
385/*!
386 * @name Built-in key hashing functions
387 * @{
388 */
389
390/*!
391 * @brief Hash by division
392 *
393 * The key is hashed by dividing it by the number of buckets in the
394 * table and returning the remainder.
395 *
396 * @param[in] k The key to be hashed
397 * @param[in] m The size of the hash table
398 *
399 * @return A value in the range [0, m)
400 */
401size_t cstl_hash_div(size_t k, size_t m);
402
403/*!
404 * @brief Hash by multiplication
405 *
406 * The key is hashed by multiplying it by a value @p phi and then
407 * multiplying the fractional portion of that result by @p m. In
408 * this case, the value @p phi is the golden ratio (1.618034), as
409 * suggested by Knuth.
410 *
411 * @param[in] k The key to be hashed
412 * @param[in] m The size of the hash table
413 *
414 * @return A value in the range [0, m)
415 */
416size_t cstl_hash_mul(size_t k, size_t m);
417
418/*!
419 * @}
420 */
421
422/*!
423 * @}
424 */
425
426#endif
static void cstl_swap(void *const x, void *const y, void *const t, const size_t sz)
Swap values at two memory locations via use of a third.
Definition common.h:137
void cstl_xtor_func_t(void *obj, void *priv)
Type for functions called to construct, clear, or destroy an object.
Definition common.h:97
int cstl_visit_func_t(void *obj, void *priv)
Type for visit callbacks from objects supporting foreach.
Definition common.h:65
int cstl_const_visit_func_t(const void *obj, void *priv)
Type for const visit callbacks from objects supporting foreach.
Definition common.h:80
void cstl_hash_resize(struct cstl_hash *h, size_t n, cstl_hash_func_t *f)
Resize the hash table.
Definition hash.c:255
size_t cstl_hash_func_t(size_t k, size_t m)
Function type for hashing a key into a bucket.
Definition hash.h:35
float cstl_hash_load(const struct cstl_hash *const h)
Get the average number of nodes per bucket.
Definition hash.h:216
void cstl_hash_erase(struct cstl_hash *h, void *e)
Remove an object from the hash.
Definition hash.c:434
int cstl_hash_foreach_const(const struct cstl_hash *h, cstl_const_visit_func_t *visit, void *priv)
Visit each object within a hash table.
Definition hash.c:225
void * cstl_hash_find(struct cstl_hash *h, size_t k, cstl_const_visit_func_t *visit, void *priv)
Lookup/find a previously inserted object in the hash.
Definition hash.c:386
int cstl_hash_foreach(struct cstl_hash *h, cstl_visit_func_t *visit, void *priv)
Visit each object within a hash table.
Definition hash.c:205
static void cstl_hash_swap(struct cstl_hash *const a, struct cstl_hash *const b)
Swap the hash objects at the two given locations.
Definition hash.h:378
size_t cstl_hash_div(size_t k, size_t m)
Hash by division.
Definition hash.c:10
void cstl_hash_insert(struct cstl_hash *h, size_t k, void *e)
Insert an item into the hash.
Definition hash.c:330
void cstl_hash_shrink_to_fit(struct cstl_hash *h)
Free memory associated with excess buckets.
Definition hash.c:315
size_t cstl_hash_mul(size_t k, size_t m)
Hash by multiplication.
Definition hash.c:15
size_t cstl_hash_size(const struct cstl_hash *const h)
Get the number of objects in the hash.
Definition hash.h:205
static void cstl_hash_init(struct cstl_hash *const h, const size_t off)
Initialize a hash object.
Definition hash.h:183
void cstl_hash_clear(struct cstl_hash *h, cstl_xtor_func_t *clr)
Remove all elements from the hash.
Definition hash.c:474
void cstl_hash_rehash(struct cstl_hash *h)
Rehash the hash table.
Definition hash.c:120
Node to anchor an element within a hash.
Definition hash.h:57
Hash object.
Definition hash.h:85