libcstl
Loading...
Searching...
No Matches
hash.c
Go to the documentation of this file.
1/*!
2 * @file
3 */
4
5#include "cstl/hash.h"
6
7#include <stdlib.h>
8#include <math.h>
9
10size_t cstl_hash_div(const size_t k, const size_t m)
11{
12 return k % m;
13}
14
15size_t cstl_hash_mul(const size_t k, const size_t m)
16{
17 static const float phi = 1.61803398875f;
18 const float M = phi * k;
19 return (M - floorf(M)) * m;
20}
21
22/*!
23 * @private
24 *
25 * Given a cstl_hash_node, return the containing object
26 */
27static void * __cstl_hash_element(const struct cstl_hash * const h,
28 const struct cstl_hash_node * const n)
29{
30 return (void *)((uintptr_t)n - h->off);
31}
32
33/*!
34 * @private
35 *
36 * Given an object, return the contained cstl_hash_node
37 */
38static struct cstl_hash_node * __cstl_hash_node(
39 const struct cstl_hash * const h, const void * const e)
40{
41 return (void *)((uintptr_t)e + h->off);
42}
43
44/*! @private */
45static struct cstl_hash_bucket * __cstl_hash_get_bucket(
46 struct cstl_hash * const h, const size_t k,
47 cstl_hash_func_t * const hash, const size_t count)
48{
49 const size_t i = hash(k, count);
50 if (i >= count) {
51 abort();
52 }
53 return &h->bucket.at[i];
54}
55
56#ifndef NO_DOC
57#define HASH_LIST_FOREACH(HEAD, CURR, NEXT) \
58 NEXT = HEAD; while ((CURR = NEXT) != NULL && (NEXT = (CURR)->next, !0))
59#define HASH_LIST_INSERT(HEAD, N) \
60 (N)->next = HEAD; HEAD = N
61#endif
62
63/*! @private */
64static void cstl_clean_bucket(
65 struct cstl_hash * const h, struct cstl_hash_bucket * const bk)
66{
67 /* only clean dirty buckets */
68 if (h->bucket.cst != bk->cst) {
69 struct cstl_hash_node * n, * nn;
70
71 /*
72 * move the list of nodes to a temporary location.
73 * this prevents any confusion if the cleaning results
74 * in a node being hashed back into the same bucket
75 */
76 n = bk->n;
77 bk->n = NULL;
78
79 /*
80 * for each node in the list, find it's new bucket
81 * and put the node into it. no need to remove from
82 * the current bucket since it's a singly linked list
83 */
84 HASH_LIST_FOREACH(n, n, nn) {
85 struct cstl_hash_bucket * const _bk =
86 __cstl_hash_get_bucket(
87 h, n->key, h->bucket.rh.hash, h->bucket.rh.count);
88 HASH_LIST_INSERT(_bk->n, n);
89 }
90
91 /* the bucket is clean, now */
92 bk->cst = h->bucket.cst;
93 }
94}
95
96/*! @private */
97static void __cstl_hash_rehash(struct cstl_hash * const h, size_t n)
98{
99 /* skip over already-cleaned buckets */
100 while (h->bucket.rh.clean < h->bucket.count
101 && h->bucket.at[h->bucket.rh.clean].cst == h->bucket.cst) {
102 h->bucket.rh.clean++;
103 }
104
105 while (h->bucket.rh.clean < h->bucket.count && n > 0) {
106 cstl_clean_bucket(h, &h->bucket.at[h->bucket.rh.clean]);
107 h->bucket.rh.clean++;
108 n--;
109 }
110
111 if (h->bucket.rh.clean >= h->bucket.count) {
112 /* everything is clean; mark the rehash as complete */
113 h->bucket.count = h->bucket.rh.count;
114 h->bucket.hash = h->bucket.rh.hash;
115
116 h->bucket.rh.hash = NULL;
117 }
118}
119
120void cstl_hash_rehash(struct cstl_hash * const h)
121{
122 if (h->bucket.rh.hash != NULL) {
123 __cstl_hash_rehash(h, SIZE_MAX);
124 }
125}
126
127/*!
128 * @private
129 *
130 * Given a key, return the associated hash bucket
131 */
132static struct cstl_hash_bucket * cstl_hash_get_bucket(
133 struct cstl_hash * const h, const size_t k)
134{
135 struct cstl_hash_bucket * bk;
136
137 bk = __cstl_hash_get_bucket(h, k, h->bucket.hash, h->bucket.count);
138
139 /*
140 * if rh.hash != NULL, then a rehash (following a resize or
141 * changing of the hash function) is in progress. try to
142 * clean a few buckets to move the process along.
143 */
144 if (h->bucket.rh.hash != NULL) {
145 struct cstl_hash_bucket * const _bk =
146 __cstl_hash_get_bucket(
147 h, k, h->bucket.rh.hash, h->bucket.rh.count);
148
149 /*
150 * for the given key, clean the bucket at the old
151 * location and at the new one. this ensures that
152 * regularly used buckets are cleaned quickly.
153 */
154 cstl_clean_bucket(h, bk);
155 cstl_clean_bucket(h, _bk);
156
157 /*
158 * also clean one more bucket. this ensures that
159 * infrequently used buckets get cleaned and the
160 * rehash completes in a reasonable amount of time
161 */
162 __cstl_hash_rehash(h, 1);
163
164 bk = _bk;
165 }
166
167 return bk;
168}
169
170/*!
171 * @private
172 *
173 * Visit each element within a particular bucket in the hash table
174 */
175static int cstl_hash_bucket_foreach(
176 const struct cstl_hash * const h, struct cstl_hash_node * n,
177 cstl_visit_func_t * const visit, void * const p)
178{
179 struct cstl_hash_node * nn;
180 int res = 0;
181
182 HASH_LIST_FOREACH(n, n, nn) {
183 if ((res = visit(__cstl_hash_element(h, n), p)) != 0) {
184 break;
185 }
186 }
187
188 return res;
189}
190
191/*! @private */
192static int __cstl_hash_foreach(const struct cstl_hash * const h,
193 cstl_visit_func_t * const visit, void * const p)
194{
195 int res;
196 unsigned int i;
197
198 for (i = 0, res = 0; i < h->bucket.count && res == 0; i++) {
199 res = cstl_hash_bucket_foreach(h, h->bucket.at[i].n, visit, p);
200 }
201
202 return res;
203}
204
205int cstl_hash_foreach(struct cstl_hash * const h,
206 cstl_visit_func_t * const visit, void * const p)
207{
209 return __cstl_hash_foreach(h, visit, p);
210}
211
212struct cstl_hash_foreach_visit_priv
213{
215 void * priv;
216};
217
218/*! @private */
219static int cstl_hash_foreach_visit(void * const e, void * const p)
220{
221 struct cstl_hash_foreach_visit_priv * const hfvp = p;
222 return hfvp->visit(e, hfvp->priv);
223}
224
225int cstl_hash_foreach_const(const struct cstl_hash * const h,
226 cstl_const_visit_func_t * const visit,
227 void * const p)
228{
229 /*
230 * the caller's function will be given a const pointer to
231 * the objects in the table, but internally, the foreach
232 * function generates a callback with a non-const pointer.
233 * we use an intermediate function that receives the non-const
234 * pointer but then calls the user's function which receives
235 * a const pointer.
236 */
237 struct cstl_hash_foreach_visit_priv hfvp;
238 hfvp.visit = visit;
239 hfvp.priv = p;
240 return __cstl_hash_foreach(h, cstl_hash_foreach_visit, &hfvp);
241}
242
243/*! @private */
244static void __cstl_hash_set_capacity(
245 struct cstl_hash * const h, const size_t sz)
246{
247 struct cstl_hash_bucket * const at =
248 realloc(h->bucket.at, sizeof(*at) * sz);
249 if (at != NULL) {
250 h->bucket.at = at;
251 h->bucket.capacity = sz;
252 }
253}
254
255void cstl_hash_resize(struct cstl_hash * const h,
256 const size_t count, cstl_hash_func_t * const hash)
257{
258 if (count > 0) {
259 if (count > h->bucket.capacity) {
260 __cstl_hash_set_capacity(h, count);
261 }
262
263 if (h->bucket.at != NULL
264 && count <= h->bucket.capacity
265 && (count != h->bucket.count
266 || (hash != NULL
267 && hash != h->bucket.hash))) {
268 unsigned int i;
269
270 /*
271 * can't resize until the previous one
272 * has finished. force it to finish now.
273 */
275
276 h->bucket.cst = !h->bucket.cst;
277
278 /*
279 * buckets successfully allocated. initialize them.
280 */
281 for (i = h->bucket.count; i < count; i++) {
282 h->bucket.at[i].n = NULL;
283 /* newly added buckets are clean */
284 h->bucket.at[i].cst = h->bucket.cst;
285 }
286
287 /*
288 * set the new hash function. there is no requirement
289 * that it be the same function as the current one
290 */
291 if (hash != NULL) {
292 h->bucket.rh.hash = hash;
293 } else if (h->bucket.hash != NULL) {
294 h->bucket.rh.hash = h->bucket.hash;
295 } else {
296 h->bucket.rh.hash = cstl_hash_mul;
297 }
298 h->bucket.rh.count = count;
299 h->bucket.rh.clean = 0;
300
301 if (h->bucket.hash == NULL) {
302 /* first resize */
303 h->bucket.hash = h->bucket.rh.hash;
304 h->bucket.count = h->bucket.rh.count;
305
306 h->bucket.rh.hash = NULL;
307 }
308 } /*
309 * else, allocation of buckets failed.
310 * nothing to clean up; just exit
311 */
312 }
313}
314
315void cstl_hash_shrink_to_fit(struct cstl_hash * const h)
316{
317 size_t count;
318
319 count = h->bucket.count;
320 if (h->bucket.rh.hash != NULL) {
321 count = h->bucket.rh.count;
322 }
323
324 if (h->bucket.capacity > count) {
326 __cstl_hash_set_capacity(h, h->bucket.count);
327 }
328}
329
330void cstl_hash_insert(struct cstl_hash * const h,
331 const size_t k, void * const e)
332{
333 struct cstl_hash_bucket * const bk = cstl_hash_get_bucket(h, k);
334 struct cstl_hash_node * const hn = __cstl_hash_node(h, e);
335
336 hn->key = k;
337
338 /*
339 * the bucket is a singly-linked/forward list.
340 * insert the new object at the front of the list.
341 */
342 HASH_LIST_INSERT(bk->n, hn);
343
344 h->count++;
345}
346
347/*! @private */
348struct cstl_hash_find_priv
349{
350 /* the hash, obviously */
351 struct cstl_hash * h;
352 /* the key being sought */
353 size_t k;
355 void * p;
356 /*
357 * initially null, set to the found object
358 * if the visit function indicates that it
359 * is the object being sought
360 */
361 const void * e;
362};
363
364/*! @private */
365static int cstl_hash_find_visit(void * const e, void * const p)
366{
367 struct cstl_hash_find_priv * const hfp = p;
368
369 /* only visit nodes with matching keys */
370 if (__cstl_hash_node(hfp->h, e)->key == hfp->k) {
371 /*
372 * if there is no visit function or the visit function
373 * indicates that this object is the correct one, set
374 * the pointer into the private structure and return
375 * 1 to stop the search
376 */
377 if (hfp->visit == NULL || hfp->visit(e, hfp->p) != 0) {
378 hfp->e = e;
379 return 1;
380 }
381 }
382
383 return 0;
384}
385
386void * cstl_hash_find(struct cstl_hash * const h, const size_t k,
387 cstl_const_visit_func_t * const visit, void * const p)
388{
389 struct cstl_hash_find_priv hfp;
390
391 hfp.h = h;
392 hfp.k = k;
393 hfp.visit = visit;
394 hfp.p = p;
395 hfp.e = NULL;
396
397 cstl_hash_bucket_foreach(
398 h, cstl_hash_get_bucket(h, k)->n, cstl_hash_find_visit, &hfp);
399 return (void *)hfp.e;
400}
401
402/*! @private */
403struct cstl_hash_erase_priv
404{
405 /*
406 * initially a pointer to the bucket. as the
407 * search progresses, this is a pointer to the
408 * pointer to the current object. maintaining
409 * this pointer allows the current object to
410 * be unlinked from the list if/when it's found
411 */
412 struct cstl_hash_node ** n;
413 /*
414 * a pointer to the object being sought, the
415 * object that will be removed from the hash
416 */
417 const void * e;
418};
419
420/*! @private */
421static int cstl_hash_erase_visit(void * const e, void * const p)
422{
423 struct cstl_hash_erase_priv * const hep = p;
424
425 if (hep->e == e) {
426 return 1;
427 }
428
429 /* not found; advance bucket pointer */
430 hep->n = &(*hep->n)->next;
431 return 0;
432}
433
434void cstl_hash_erase(struct cstl_hash * const h, void * const e)
435{
436 /*
437 * the object's key determines which
438 * bucket to search for the object
439 */
440 struct cstl_hash_bucket * const bk =
441 cstl_hash_get_bucket(h, __cstl_hash_node(h, e)->key);
442 struct cstl_hash_erase_priv hep;
443
444 hep.n = &bk->n;
445 hep.e = e;
446
447 /*
448 * if the foreach function returns non-zero, then
449 * the object was found and removed.
450 */
451 if (cstl_hash_bucket_foreach(
452 h, bk->n, cstl_hash_erase_visit, &hep) != 0) {
453 /* object found; splice it out of the list */
454 *hep.n = (*hep.n)->next;
455 h->count--;
456 }
457}
458
459/*! @private */
460struct cstl_hash_clear_priv
461{
462 cstl_xtor_func_t * clr;
463 void * priv;
464};
465
466/*! @private */
467static int cstl_hash_clear_visit(void * const e, void * const p)
468{
469 struct cstl_hash_clear_priv * const hcp = p;
470 hcp->clr(e, hcp->priv);
471 return 0;
472}
473
474void cstl_hash_clear(struct cstl_hash * const h, cstl_xtor_func_t * const clr)
475{
476 if (clr != NULL) {
477 struct cstl_hash_clear_priv hcp;
478
479 /* call the caller's clear function for each object */
480 hcp.clr = clr;
481 hcp.priv = NULL;
482 __cstl_hash_foreach(h, cstl_hash_clear_visit, &hcp);
483 }
484
485 free(h->bucket.at);
486 h->bucket.at = NULL;
487
488 h->bucket.count = 0;
489 h->bucket.capacity = 0;
490
491 h->bucket.rh.hash = NULL;
492
493 h->count = 0;
494}
495
496#ifdef __cfg_test__
497// GCOV_EXCL_START
498#include "internal/check.h"
499
500#include <stdlib.h>
501
502struct integer
503{
504 int v;
505 struct cstl_hash_node n;
506};
507
508void __test__cstl_hash_fill(struct cstl_hash * const h, const size_t n)
509{
510 unsigned int i;
511
512 for (i = 0; i < n; i++) {
513 struct integer * in = malloc(sizeof(*in));
514
515 in->v = i;
516 cstl_hash_insert(h, in->v, in);
517 }
518
519 ck_assert_uint_eq(n, cstl_hash_size(h));
520}
521
522static void __test_cstl_hash_free(void * const p, void * const x)
523{
524 (void)x;
525 free(p);
526}
527
528static int fill_visit(const void * const e, void * const p)
529{
530 (void)e;
531 *(int *)p += 1;
532 return 0;
533}
534
535START_TEST(fill)
536{
537 static const size_t n = 10;
538 int c;
539
540 DECLARE_CSTL_HASH(h, struct integer, n);
541 cstl_hash_resize(&h, 32, NULL);
542
543 __test__cstl_hash_fill(&h, n);
544
545 c = 0;
546 cstl_hash_foreach_const(&h, fill_visit, &c);
547 ck_assert_uint_eq(c, n);
548
549 cstl_hash_clear(&h, __test_cstl_hash_free);
550}
551
552static int manual_clear_visit(void * const e, void * const p)
553{
554 cstl_hash_erase((struct cstl_hash *)p, e);
555 free(e);
556 return 0;
557}
558
559START_TEST(manual_clear)
560{
561 static const size_t n = 10;
562
563 DECLARE_CSTL_HASH(h, struct integer, n);
564 cstl_hash_resize(&h, 32, NULL);
565 __test__cstl_hash_fill(&h, n);
566
567 cstl_hash_foreach(&h, manual_clear_visit, &h);
568 cstl_hash_clear(&h, NULL);
569}
570
571static size_t bad_hash_func(const size_t k, const size_t m)
572{
573 (void)k;
574 /*
575 * this function is supposed to return a
576 * value from [0, m), so returning m should
577 * cause an abort()
578 */
579 return m;
580}
581
582START_TEST(bad_hash)
583{
584 DECLARE_CSTL_HASH(h, struct integer, n);
585 cstl_hash_resize(&h, 32, bad_hash_func);
586 ck_assert_signal(SIGABRT, cstl_hash_find(&h, 0, NULL, NULL));
587 cstl_hash_clear(&h, __test_cstl_hash_free);
588}
589
590static void test_rehash(struct cstl_hash * const h,
591 const size_t maxk, const size_t count)
592{
593 unsigned int i;
594
595 /* all buckets should be dirty following a resize */
596 for (i = 0; i < h->bucket.count; i++) {
597 ck_assert_uint_ne(h->bucket.at[i].cst, h->bucket.cst);
598 }
599
600 /* perform operations on the table until the rehash is complete */
601 while (h->bucket.rh.hash != NULL) {
602 cstl_hash_find(h, rand() % maxk, NULL, NULL);
603 }
604
605 /* check that the table has the correct number of buckets */
606 ck_assert_uint_eq(h->bucket.count, count);
607
608 /* all valid buckets should be clean */
609 for (i = 0; i < h->bucket.count; i++) {
610 ck_assert_uint_eq(h->bucket.at[i].cst, h->bucket.cst);
611 }
612
613 /* excess buckets should be empty */
614 for (; i < h->bucket.capacity; i++) {
615 ck_assert_ptr_null(h->bucket.at[i].n);
616 }
617}
618
619START_TEST(resize)
620{
621 static const size_t n = 100;
622 const int i = rand() % n;
623 bool cst;
624
625 DECLARE_CSTL_HASH(h, struct integer, n);
626 void * e;
627
629 __test__cstl_hash_fill(&h, n);
630
631 e = cstl_hash_find(&h, i, NULL, NULL);
632 ck_assert_ptr_ne(e, NULL);
633
634 cst = h.bucket.cst;
636 ck_assert_uint_eq(cst, h.bucket.cst);
637 cstl_hash_resize(&h, 16, NULL);
638 ck_assert_uint_eq(cst, h.bucket.cst);
639 ck_assert_float_eq_tol(cstl_hash_load(&h), (float)n / 16, .01f);
640
641 cstl_hash_resize(&h, 20, NULL);
642 /* should use the new size even though rehash isn't complete */
643 ck_assert_float_eq_tol(cstl_hash_load(&h), (float)n / 20, .01f);
644 ck_assert_uint_ne(cst, h.bucket.cst);
646 ck_assert_uint_eq(cstl_hash_size(&h), n);
647 ck_assert_ptr_eq(e, cstl_hash_find(&h, i, NULL, NULL));
648
650 test_rehash(&h, n, h.bucket.rh.count);
651 ck_assert_uint_eq(cstl_hash_size(&h), n);
652 ck_assert_ptr_eq(e, cstl_hash_find(&h, i, NULL, NULL));
653
655 test_rehash(&h, n, h.bucket.rh.count);
656 ck_assert_uint_eq(cstl_hash_size(&h), n);
657 ck_assert_ptr_eq(e, cstl_hash_find(&h, i, NULL, NULL));
658
661 ck_assert_ptr_null((void *)(uintptr_t)h.bucket.rh.hash);
662 ck_assert_uint_eq(h.bucket.count, 12);
663 ck_assert_uint_eq(h.bucket.count, h.bucket.capacity);
664 ck_assert_uint_eq(cstl_hash_size(&h), n);
665 ck_assert_ptr_eq(e, cstl_hash_find(&h, i, NULL, NULL));
666
667 cstl_hash_erase(&h, e);
668 free(e);
669 ck_assert_ptr_eq(NULL, cstl_hash_find(&h, i, NULL, NULL));
670 ck_assert_uint_eq(cstl_hash_size(&h), n - 1);
671
672 cstl_hash_clear(&h, __test_cstl_hash_free);
673}
674
675Suite * hash_suite(void)
676{
677 Suite * const s = suite_create("hash");
678
679 TCase * tc;
680
681 tc = tcase_create("hash");
682 tcase_add_test(tc, fill);
683 tcase_add_test(tc, manual_clear);
684 tcase_add_test(tc, bad_hash);
685 tcase_add_test(tc, resize);
686 suite_add_tcase(s, tc);
687
688 return s;
689}
690
691// GCOV_EXCL_STOP
692#endif
void cstl_xtor_func_t(void *obj, void *priv)
Type for functions called to construct, clear, or destroy an object.
Definition common.h:97
int cstl_visit_func_t(void *obj, void *priv)
Type for visit callbacks from objects supporting foreach.
Definition common.h:65
int cstl_const_visit_func_t(const void *obj, void *priv)
Type for const visit callbacks from objects supporting foreach.
Definition common.h:80
void cstl_hash_resize(struct cstl_hash *const h, const size_t count, cstl_hash_func_t *const hash)
Resize the hash table.
Definition hash.c:255
size_t cstl_hash_func_t(size_t k, size_t m)
Function type for hashing a key into a bucket.
Definition hash.h:35
float cstl_hash_load(const struct cstl_hash *const h)
Get the average number of nodes per bucket.
Definition hash.h:216
void cstl_hash_erase(struct cstl_hash *const h, void *const e)
Remove an object from the hash.
Definition hash.c:434
int cstl_hash_foreach_const(const struct cstl_hash *const h, cstl_const_visit_func_t *const visit, void *const p)
Visit each object within a hash table.
Definition hash.c:225
void * cstl_hash_find(struct cstl_hash *const h, const size_t k, cstl_const_visit_func_t *const visit, void *const p)
Lookup/find a previously inserted object in the hash.
Definition hash.c:386
int cstl_hash_foreach(struct cstl_hash *const h, cstl_visit_func_t *const visit, void *const p)
Visit each object within a hash table.
Definition hash.c:205
size_t cstl_hash_div(const size_t k, const size_t m)
Hash by division.
Definition hash.c:10
void cstl_hash_insert(struct cstl_hash *const h, const size_t k, void *const e)
Insert an item into the hash.
Definition hash.c:330
void cstl_hash_shrink_to_fit(struct cstl_hash *const h)
Free memory associated with excess buckets.
Definition hash.c:315
size_t cstl_hash_mul(const size_t k, const size_t m)
Hash by multiplication.
Definition hash.c:15
size_t cstl_hash_size(const struct cstl_hash *const h)
Get the number of objects in the hash.
Definition hash.h:205
#define DECLARE_CSTL_HASH(NAME, TYPE, MEMB)
(Statically) declare and initialize a hash
Definition hash.h:170
void cstl_hash_clear(struct cstl_hash *const h, cstl_xtor_func_t *const clr)
Remove all elements from the hash.
Definition hash.c:474
void cstl_hash_rehash(struct cstl_hash *const h)
Rehash the hash table.
Definition hash.c:120
Node to anchor an element within a hash.
Definition hash.h:57
Hash object.
Definition hash.h:85