|
| 1 | +#include <stdio.h> |
| 2 | +#include <stdlib.h> |
| 3 | +#include <assert.h> |
| 4 | +#include "hash_table.h" |
| 5 | + |
| 6 | +typedef unsigned char small_int; |
| 7 | + |
| 8 | +// bucket |
| 9 | +typedef struct n |
| 10 | +{ |
| 11 | + Pointer data; // pointer to the data we are storing |
| 12 | + uint hash_value; // hash value of the data |
| 13 | + struct n* next; // next element in the bucket (NULL if it's the last) |
| 14 | +} |
| 15 | +n; |
| 16 | +typedef struct n* node; |
| 17 | + |
| 18 | +typedef struct hash_table |
| 19 | +{ |
| 20 | + uint curr_size; // used number of buckets |
| 21 | + uint max_capacity; // max number of buckets |
| 22 | + uint elements; // number of elements in the hash table |
| 23 | + uint exponent; // current explonent (in collaboration with find_func, it is used to find the hash value) |
| 24 | + uint next_split; // next bucket to be splited |
| 25 | + small_int* curr_num_of_elements; // number of elements in each bucket |
| 26 | + node* buckets; // buckets (lists) storing the data |
| 27 | + HashFunc hash; // function that hashes an element into a positive integer |
| 28 | + CompareFunc compare; // function that compares the elements |
| 29 | + DestroyFunc destroy; // function that destroys the elements, NULL if not |
| 30 | +} |
| 31 | +hash_table; |
| 32 | + |
| 33 | + |
| 34 | +#define STARTING_HASH_CAPACITY 50 // starting maximum number of buckets |
| 35 | +#define MAX_BUCKET_ELEMENTS 4 // maximum number of elements in the bucket |
| 36 | +#define INCREASE_SIZE 1.8 // increase percentage of maximum number of buckets |
| 37 | + |
| 38 | + |
| 39 | +// Function Prototypes |
| 40 | +static void hash_resize(HashTable ht); |
| 41 | +static uint get_bucket(HashTable ht, uint hash_value); |
| 42 | +static uint find_func(uint i); |
| 43 | +static uint hash_search(HashTable ht, Pointer value); |
| 44 | + |
| 45 | +void hash_init(HashTable* ht, HashFunc hash, CompareFunc compare, DestroyFunc destroy) |
| 46 | +{ |
| 47 | + assert(hash != NULL && compare != NULL); // a hash and compare function needs to be given |
| 48 | + |
| 49 | + *ht = malloc(sizeof(hash_table)); |
| 50 | + assert(*ht != NULL); // allocation failure |
| 51 | + |
| 52 | + (*ht)->buckets = calloc(sizeof(n), STARTING_HASH_CAPACITY); // allocate memory for the buckets |
| 53 | + assert((*ht)->buckets != NULL); // allocation failure |
| 54 | + |
| 55 | + (*ht)->curr_num_of_elements = calloc(sizeof(small_int), STARTING_HASH_CAPACITY); |
| 56 | + assert((*ht)->curr_num_of_elements != NULL); // allocation failure |
| 57 | + |
| 58 | + (*ht)->max_capacity = STARTING_HASH_CAPACITY; |
| 59 | + (*ht)->elements = 0; |
| 60 | + (*ht)->exponent = 0; |
| 61 | + |
| 62 | + (*ht)->curr_size = 1; |
| 63 | + (*ht)->next_split = 0; |
| 64 | + |
| 65 | + // initialize functions |
| 66 | + (*ht)->hash = hash; |
| 67 | + (*ht)->compare = compare; |
| 68 | + (*ht)->destroy = destroy; |
| 69 | +} |
| 70 | + |
| 71 | +bool hash_insert(HashTable ht, Pointer value) |
| 72 | +{ |
| 73 | + // check to see if value already exists |
| 74 | + uint bucket = hash_search(ht, value); |
| 75 | + if (bucket == -1) // value already exists |
| 76 | + return false; |
| 77 | + |
| 78 | + // create new node |
| 79 | + node new_node = malloc(sizeof(n)); |
| 80 | + assert(new_node != NULL); // allocation failure |
| 81 | + |
| 82 | + // fill node's contents |
| 83 | + new_node->data = value; |
| 84 | + |
| 85 | + uint hash_value = ht->hash(value); |
| 86 | + new_node->hash_value = hash_value; |
| 87 | + |
| 88 | + // insert the value at the start of the bucket |
| 89 | + new_node->next = ht->buckets[bucket]; |
| 90 | + ht->buckets[bucket] = new_node; |
| 91 | + |
| 92 | + ht->curr_num_of_elements[bucket]++; |
| 93 | + |
| 94 | + // split if an overflow occurs |
| 95 | + if (ht->curr_num_of_elements[bucket] > MAX_BUCKET_ELEMENTS) // max number of elements in the bucket exceeded - overflow |
| 96 | + { |
| 97 | + // get new bucket |
| 98 | + ht->curr_size++; |
| 99 | + |
| 100 | + // maximum capacity of buckets reached, increase size |
| 101 | + if (ht->curr_size == ht->max_capacity) |
| 102 | + hash_resize(ht); |
| 103 | + |
| 104 | + node* new_bucket = &(ht->buckets[ht->curr_size-1]); |
| 105 | + |
| 106 | + // bucket to be splitted |
| 107 | + node* old_bucket = &(ht->buckets[ht->next_split]); |
| 108 | + |
| 109 | + ht->next_split++; |
| 110 | + |
| 111 | + // split operation |
| 112 | + while ((*old_bucket) != NULL) |
| 113 | + { |
| 114 | + if (get_bucket(ht, (*old_bucket)->hash_value) != ht->next_split-1) |
| 115 | + { |
| 116 | + ht->curr_num_of_elements[ht->next_split-1]--; |
| 117 | + ht->curr_num_of_elements[ht->curr_size-1]++; |
| 118 | + |
| 119 | + *new_bucket = *old_bucket; |
| 120 | + *old_bucket = (*old_bucket)->next; |
| 121 | + new_bucket = &((*new_bucket)->next); |
| 122 | + *new_bucket = NULL; |
| 123 | + } |
| 124 | + else |
| 125 | + old_bucket = &((*old_bucket)->next); |
| 126 | + } |
| 127 | + |
| 128 | + if (find_func(ht->exponent+1) <= ht->curr_size) |
| 129 | + { |
| 130 | + // start next round of splitting |
| 131 | + ht->exponent++; |
| 132 | + ht->next_split = 0; |
| 133 | + } |
| 134 | + } |
| 135 | + |
| 136 | + ht->elements++; // value inserted, increment the number of elements in the hash table |
| 137 | + return true; |
| 138 | +} |
| 139 | + |
| 140 | +bool hash_remove(HashTable ht, Pointer value) |
| 141 | +{ |
| 142 | + // find the potential bucket the value belongs to |
| 143 | + uint h = get_bucket(ht, ht->hash(value)); |
| 144 | + |
| 145 | + node* bkt = &(ht->buckets[h]); |
| 146 | + |
| 147 | + // search for the value in the bucket |
| 148 | + while (*bkt != NULL) |
| 149 | + { |
| 150 | + Pointer bkt_value = (*bkt)->data; |
| 151 | + |
| 152 | + if (ht->compare(value, bkt_value) == 0) // value found |
| 153 | + { |
| 154 | + node tmp = *bkt; |
| 155 | + (*bkt) = (*bkt)->next; |
| 156 | + |
| 157 | + // if a destroy function exists, destroy the value |
| 158 | + if (ht->destroy != NULL) |
| 159 | + ht->destroy(tmp->data); |
| 160 | + |
| 161 | + free(tmp); |
| 162 | + |
| 163 | + ht->curr_num_of_elements[h]--; |
| 164 | + ht->elements--; // value removed, decrement the number of elements in the hash table |
| 165 | + return true; |
| 166 | + } |
| 167 | + else // search next value |
| 168 | + bkt = &((*bkt)->next); |
| 169 | + } |
| 170 | + return false; |
| 171 | +} |
| 172 | + |
| 173 | +bool hash_exists(HashTable ht, Pointer value) |
| 174 | +{ |
| 175 | + if (hash_search(ht, value) == -1) // value exists |
| 176 | + return true; |
| 177 | + |
| 178 | + // value does not exist |
| 179 | + return false; |
| 180 | +} |
| 181 | + |
| 182 | +// returns -1 if the value exists |
| 183 | +// if it does not exist, returns the bucket in which it should exist |
| 184 | +static uint hash_search(HashTable ht, Pointer value) |
| 185 | +{ |
| 186 | + // find the potential bucket the value belongs to |
| 187 | + uint h = get_bucket(ht, ht->hash(value)); |
| 188 | + |
| 189 | + // search for the value in the bucket |
| 190 | + node bkt = ht->buckets[h]; |
| 191 | + while (bkt != NULL) |
| 192 | + { |
| 193 | + Pointer bkt_value = bkt->data; |
| 194 | + if (ht->compare(value, bkt_value) == 0) // value found |
| 195 | + return -1; |
| 196 | + |
| 197 | + bkt = bkt->next; |
| 198 | + } |
| 199 | + return h; |
| 200 | +} |
| 201 | + |
| 202 | +static uint get_bucket(HashTable ht, uint hash_value) |
| 203 | +{ |
| 204 | + // use hash function i |
| 205 | + uint pos = hash_value % find_func(ht->exponent); |
| 206 | + |
| 207 | + if (pos < ht->next_split) |
| 208 | + // use hash function i+1 |
| 209 | + pos = hash_value % find_func(ht->exponent+1); |
| 210 | + |
| 211 | + return pos; |
| 212 | +} |
| 213 | + |
| 214 | +// resize by INCREASE_SIZE |
| 215 | +static void hash_resize(HashTable ht) |
| 216 | +{ |
| 217 | + uint old_cap = ht->max_capacity; |
| 218 | + ht->max_capacity *= INCREASE_SIZE; // increase capacity |
| 219 | + |
| 220 | + ht->buckets = realloc(ht->buckets, sizeof(*(ht->buckets)) * ht->max_capacity); |
| 221 | + assert(ht->buckets != NULL); // allocation failure |
| 222 | + |
| 223 | + ht->curr_num_of_elements = realloc(ht->curr_num_of_elements, sizeof(small_int) * ht->max_capacity); |
| 224 | + assert(ht->curr_num_of_elements != NULL); // allocation failure |
| 225 | + |
| 226 | + // initialize arrays to avoid annoying errors |
| 227 | + for (uint i = old_cap; i < ht->max_capacity; i++) |
| 228 | + { |
| 229 | + ht->buckets[i] = NULL; |
| 230 | + ht->curr_num_of_elements[i] = 0; |
| 231 | + } |
| 232 | +} |
| 233 | + |
| 234 | +void print_table(HashTable ht, VisitFunc visit) |
| 235 | +{ |
| 236 | + for (uint i = 0; i < ht->curr_size; i++) |
| 237 | + { |
| 238 | + node tmp = ht->buckets[i]; |
| 239 | + printf("(%d): ", i); |
| 240 | + while (tmp != NULL) |
| 241 | + { |
| 242 | + visit(tmp->data); |
| 243 | + tmp = tmp->next; |
| 244 | + } |
| 245 | + |
| 246 | + // print the number of elements in the bucket |
| 247 | + printf("|%d|", ht->curr_num_of_elements[i]); |
| 248 | + printf("\n"); |
| 249 | + } |
| 250 | +} |
| 251 | + |
| 252 | +uint hash_size(HashTable ht) |
| 253 | +{ |
| 254 | + return ht->elements; |
| 255 | +} |
| 256 | + |
| 257 | +// returns 2^i |
| 258 | +static uint find_func(uint i) |
| 259 | +{ |
| 260 | + return 1 << i; |
| 261 | +} |
| 262 | + |
| 263 | +DestroyFunc hash_set_destroy(HashTable ht, DestroyFunc new_destroy_func) |
| 264 | +{ |
| 265 | + DestroyFunc old_destroy_func = ht->destroy; |
| 266 | + ht->destroy = new_destroy_func; |
| 267 | + return old_destroy_func; |
| 268 | +} |
| 269 | + |
| 270 | +void hash_destroy(HashTable ht) |
| 271 | +{ |
| 272 | + // destroy buckets |
| 273 | + for (uint i = 0; i < ht->max_capacity; i++) |
| 274 | + { |
| 275 | + node bkt = ht->buckets[i]; |
| 276 | + while (bkt != NULL) |
| 277 | + { |
| 278 | + node tmp = bkt; |
| 279 | + bkt = bkt->next; |
| 280 | + |
| 281 | + // if a destroy function is given, destroy the elements |
| 282 | + if (ht->destroy != NULL) |
| 283 | + ht->destroy(tmp->data); |
| 284 | + free(tmp); |
| 285 | + } |
| 286 | + } |
| 287 | + free(ht->curr_num_of_elements); |
| 288 | + free(ht->buckets); |
| 289 | + free(ht); |
| 290 | +} |
0 commit comments