1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copied from https://github.com/git/git.git
4 * Generic implementation of hash-based key value mappings.
5 */
6 #include "erofs/hashmap.h"
7
8 #define FNV32_BASE ((unsigned int)0x811c9dc5)
9 #define FNV32_PRIME ((unsigned int)0x01000193)
10
strhash(const char * str)11 unsigned int strhash(const char *str)
12 {
13 unsigned int c, hash = FNV32_BASE;
14
15 while ((c = (unsigned char)*str++))
16 hash = (hash * FNV32_PRIME) ^ c;
17 return hash;
18 }
19
strihash(const char * str)20 unsigned int strihash(const char *str)
21 {
22 unsigned int c, hash = FNV32_BASE;
23
24 while ((c = (unsigned char)*str++)) {
25 if (c >= 'a' && c <= 'z')
26 c -= 'a' - 'A';
27 hash = (hash * FNV32_PRIME) ^ c;
28 }
29 return hash;
30 }
31
memhash(const void * buf,size_t len)32 unsigned int memhash(const void *buf, size_t len)
33 {
34 unsigned int hash = FNV32_BASE;
35 unsigned char *ucbuf = (unsigned char *)buf;
36
37 while (len--) {
38 unsigned int c = *ucbuf++;
39
40 hash = (hash * FNV32_PRIME) ^ c;
41 }
42 return hash;
43 }
44
memihash(const void * buf,size_t len)45 unsigned int memihash(const void *buf, size_t len)
46 {
47 unsigned int hash = FNV32_BASE;
48 unsigned char *ucbuf = (unsigned char *)buf;
49
50 while (len--) {
51 unsigned int c = *ucbuf++;
52
53 if (c >= 'a' && c <= 'z')
54 c -= 'a' - 'A';
55 hash = (hash * FNV32_PRIME) ^ c;
56 }
57 return hash;
58 }
59
60 #define HASHMAP_INITIAL_SIZE 64
61 /* grow / shrink by 2^2 */
62 #define HASHMAP_RESIZE_BITS 2
63 /* load factor in percent */
64 #define HASHMAP_LOAD_FACTOR 80
65
alloc_table(struct hashmap * map,unsigned int size)66 static void alloc_table(struct hashmap *map, unsigned int size)
67 {
68 map->tablesize = size;
69 map->table = calloc(size, sizeof(struct hashmap_entry *));
70 BUG_ON(!map->table);
71
72 /* calculate resize thresholds for new size */
73 map->grow_at = (unsigned int)((uint64_t)size * HASHMAP_LOAD_FACTOR / 100);
74 if (size <= HASHMAP_INITIAL_SIZE)
75 map->shrink_at = 0;
76 else
77 /*
78 * The shrink-threshold must be slightly smaller than
79 * (grow-threshold / resize-factor) to prevent erratic resizing,
80 * thus we divide by (resize-factor + 1).
81 */
82 map->shrink_at = map->grow_at / ((1 << HASHMAP_RESIZE_BITS) + 1);
83 }
84
entry_equals(const struct hashmap * map,const struct hashmap_entry * e1,const struct hashmap_entry * e2,const void * keydata)85 static inline int entry_equals(const struct hashmap *map,
86 const struct hashmap_entry *e1,
87 const struct hashmap_entry *e2,
88 const void *keydata)
89 {
90 return (e1 == e2) || (e1->hash == e2->hash && !map->cmpfn(e1, e2, keydata));
91 }
92
bucket(const struct hashmap * map,const struct hashmap_entry * key)93 static inline unsigned int bucket(const struct hashmap *map,
94 const struct hashmap_entry *key)
95 {
96 return key->hash & (map->tablesize - 1);
97 }
98
rehash(struct hashmap * map,unsigned int newsize)99 static void rehash(struct hashmap *map, unsigned int newsize)
100 {
101 unsigned int i, oldsize = map->tablesize;
102 struct hashmap_entry **oldtable = map->table;
103
104 alloc_table(map, newsize);
105 for (i = 0; i < oldsize; i++) {
106 struct hashmap_entry *e = oldtable[i];
107
108 while (e) {
109 struct hashmap_entry *next = e->next;
110 unsigned int b = bucket(map, e);
111
112 e->next = map->table[b];
113 map->table[b] = e;
114 e = next;
115 }
116 }
117 free(oldtable);
118 }
119
find_entry_ptr(const struct hashmap * map,const struct hashmap_entry * key,const void * keydata)120 static inline struct hashmap_entry **find_entry_ptr(const struct hashmap *map,
121 const struct hashmap_entry *key,
122 const void *keydata)
123 {
124 struct hashmap_entry **e = &map->table[bucket(map, key)];
125
126 while (*e && !entry_equals(map, *e, key, keydata))
127 e = &(*e)->next;
128 return e;
129 }
130
always_equal(const void * unused1,const void * unused2,const void * unused3)131 static int always_equal(const void *unused1, const void *unused2, const void *unused3)
132 {
133 return 0;
134 }
135
hashmap_init(struct hashmap * map,hashmap_cmp_fn equals_function,size_t initial_size)136 void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
137 size_t initial_size)
138 {
139 unsigned int size = HASHMAP_INITIAL_SIZE;
140
141 map->size = 0;
142 map->cmpfn = equals_function ? equals_function : always_equal;
143
144 /* calculate initial table size and allocate the table */
145 initial_size = (unsigned int)((uint64_t)initial_size * 100
146 / HASHMAP_LOAD_FACTOR);
147 while (initial_size > size)
148 size <<= HASHMAP_RESIZE_BITS;
149 alloc_table(map, size);
150 }
151
hashmap_free(struct hashmap * map)152 int hashmap_free(struct hashmap *map)
153 {
154 if (map && map->table) {
155 struct hashmap_iter iter;
156 struct hashmap_entry *e;
157
158 hashmap_iter_init(map, &iter);
159 e = hashmap_iter_next(&iter);
160 if (e)
161 return -EBUSY;
162
163 free(map->table);
164 memset(map, 0, sizeof(*map));
165 }
166 return 0;
167 }
168
hashmap_get(const struct hashmap * map,const void * key,const void * keydata)169 void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata)
170 {
171 return *find_entry_ptr(map, key, keydata);
172 }
173
hashmap_get_next(const struct hashmap * map,const void * entry)174 void *hashmap_get_next(const struct hashmap *map, const void *entry)
175 {
176 struct hashmap_entry *e = ((struct hashmap_entry *)entry)->next;
177
178 for (; e; e = e->next)
179 if (entry_equals(map, entry, e, NULL))
180 return e;
181 return NULL;
182 }
183
hashmap_add(struct hashmap * map,void * entry)184 void hashmap_add(struct hashmap *map, void *entry)
185 {
186 unsigned int b = bucket(map, entry);
187
188 /* add entry */
189 ((struct hashmap_entry *)entry)->next = map->table[b];
190 map->table[b] = entry;
191
192 /* fix size and rehash if appropriate */
193 map->size++;
194 if (map->size > map->grow_at)
195 rehash(map, map->tablesize << HASHMAP_RESIZE_BITS);
196 }
197
hashmap_remove(struct hashmap * map,const void * entry)198 void *hashmap_remove(struct hashmap *map, const void *entry)
199 {
200 struct hashmap_entry *old;
201 struct hashmap_entry **e = &map->table[bucket(map, entry)];
202
203 while (*e && *e != entry)
204 e = &(*e)->next;
205
206 if (!*e)
207 return NULL;
208
209 /* remove existing entry */
210 old = *e;
211 *e = old->next;
212 old->next = NULL;
213
214 /* fix size and rehash if appropriate */
215 map->size--;
216 if (map->size < map->shrink_at)
217 rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS);
218 return old;
219 }
220
hashmap_iter_init(struct hashmap * map,struct hashmap_iter * iter)221 void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)
222 {
223 iter->map = map;
224 iter->tablepos = 0;
225 iter->next = NULL;
226 }
227
hashmap_iter_next(struct hashmap_iter * iter)228 void *hashmap_iter_next(struct hashmap_iter *iter)
229 {
230 struct hashmap_entry *current = iter->next;
231
232 for (;;) {
233 if (current) {
234 iter->next = current->next;
235 return current;
236 }
237
238 if (iter->tablepos >= iter->map->tablesize)
239 return NULL;
240
241 current = iter->map->table[iter->tablepos++];
242 }
243 }
244
245 struct pool_entry {
246 struct hashmap_entry ent;
247 size_t len;
248 unsigned char data[FLEX_ARRAY];
249 };
250
pool_entry_cmp(const struct pool_entry * e1,const struct pool_entry * e2,const unsigned char * keydata)251 static int pool_entry_cmp(const struct pool_entry *e1,
252 const struct pool_entry *e2,
253 const unsigned char *keydata)
254 {
255 return e1->data != keydata &&
256 (e1->len != e2->len || memcmp(e1->data, keydata, e1->len));
257 }
258
memintern(const void * data,size_t len)259 const void *memintern(const void *data, size_t len)
260 {
261 static struct hashmap map;
262 struct pool_entry key, *e;
263
264 /* initialize string pool hashmap */
265 if (!map.tablesize)
266 hashmap_init(&map, (hashmap_cmp_fn)pool_entry_cmp, 0);
267
268 /* lookup interned string in pool */
269 hashmap_entry_init(&key, memhash(data, len));
270 key.len = len;
271 e = hashmap_get(&map, &key, data);
272 if (!e) {
273 /* not found: create it */
274 FLEX_ALLOC_MEM(e, data, data, len);
275 hashmap_entry_init(e, key.ent.hash);
276 e->len = len;
277 hashmap_add(&map, e);
278 }
279 return e->data;
280 }
281