xref: /aosp_15_r20/external/starlark-go/starlark/hashtable.go (revision 4947cdc739c985f6d86941e22894f5cefe7c9e9a)
1// Copyright 2017 The Bazel Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package starlark
6
7import (
8	"fmt"
9	_ "unsafe" // for go:linkname hack
10)
11
12// hashtable is used to represent Starlark dict and set values.
13// It is a hash table whose key/value entries form a doubly-linked list
14// in the order the entries were inserted.
15type hashtable struct {
16	table     []bucket  // len is zero or a power of two
17	bucket0   [1]bucket // inline allocation for small maps.
18	len       uint32
19	itercount uint32  // number of active iterators (ignored if frozen)
20	head      *entry  // insertion order doubly-linked list; may be nil
21	tailLink  **entry // address of nil link at end of list (perhaps &head)
22	frozen    bool
23}
24
25const bucketSize = 8
26
27type bucket struct {
28	entries [bucketSize]entry
29	next    *bucket // linked list of buckets
30}
31
32type entry struct {
33	hash       uint32 // nonzero => in use
34	key, value Value
35	next       *entry  // insertion order doubly-linked list; may be nil
36	prevLink   **entry // address of link to this entry (perhaps &head)
37}
38
39func (ht *hashtable) init(size int) {
40	if size < 0 {
41		panic("size < 0")
42	}
43	nb := 1
44	for overloaded(size, nb) {
45		nb = nb << 1
46	}
47	if nb < 2 {
48		ht.table = ht.bucket0[:1]
49	} else {
50		ht.table = make([]bucket, nb)
51	}
52	ht.tailLink = &ht.head
53}
54
55func (ht *hashtable) freeze() {
56	if !ht.frozen {
57		ht.frozen = true
58		for i := range ht.table {
59			for p := &ht.table[i]; p != nil; p = p.next {
60				for i := range p.entries {
61					e := &p.entries[i]
62					if e.hash != 0 {
63						e.key.Freeze()
64						e.value.Freeze()
65					}
66				}
67			}
68		}
69	}
70}
71
72func (ht *hashtable) insert(k, v Value) error {
73	if ht.frozen {
74		return fmt.Errorf("cannot insert into frozen hash table")
75	}
76	if ht.itercount > 0 {
77		return fmt.Errorf("cannot insert into hash table during iteration")
78	}
79	if ht.table == nil {
80		ht.init(1)
81	}
82	h, err := k.Hash()
83	if err != nil {
84		return err
85	}
86	if h == 0 {
87		h = 1 // zero is reserved
88	}
89
90retry:
91	var insert *entry
92
93	// Inspect each bucket in the bucket list.
94	p := &ht.table[h&(uint32(len(ht.table)-1))]
95	for {
96		for i := range p.entries {
97			e := &p.entries[i]
98			if e.hash != h {
99				if e.hash == 0 {
100					// Found empty entry; make a note.
101					insert = e
102				}
103				continue
104			}
105			if eq, err := Equal(k, e.key); err != nil {
106				return err // e.g. excessively recursive tuple
107			} else if !eq {
108				continue
109			}
110			// Key already present; update value.
111			e.value = v
112			return nil
113		}
114		if p.next == nil {
115			break
116		}
117		p = p.next
118	}
119
120	// Key not found.  p points to the last bucket.
121
122	// Does the number of elements exceed the buckets' load factor?
123	if overloaded(int(ht.len), len(ht.table)) {
124		ht.grow()
125		goto retry
126	}
127
128	if insert == nil {
129		// No space in existing buckets.  Add a new one to the bucket list.
130		b := new(bucket)
131		p.next = b
132		insert = &b.entries[0]
133	}
134
135	// Insert key/value pair.
136	insert.hash = h
137	insert.key = k
138	insert.value = v
139
140	// Append entry to doubly-linked list.
141	insert.prevLink = ht.tailLink
142	*ht.tailLink = insert
143	ht.tailLink = &insert.next
144
145	ht.len++
146
147	return nil
148}
149
150func overloaded(elems, buckets int) bool {
151	const loadFactor = 6.5 // just a guess
152	return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets)
153}
154
155func (ht *hashtable) grow() {
156	// Double the number of buckets and rehash.
157	// TODO(adonovan): opt:
158	// - avoid reentrant calls to ht.insert, and specialize it.
159	//   e.g. we know the calls to Equals will return false since
160	//   there are no duplicates among the old keys.
161	// - saving the entire hash in the bucket would avoid the need to
162	//   recompute the hash.
163	// - save the old buckets on a free list.
164	ht.table = make([]bucket, len(ht.table)<<1)
165	oldhead := ht.head
166	ht.head = nil
167	ht.tailLink = &ht.head
168	ht.len = 0
169	for e := oldhead; e != nil; e = e.next {
170		ht.insert(e.key, e.value)
171	}
172	ht.bucket0[0] = bucket{} // clear out unused initial bucket
173}
174
175func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) {
176	h, err := k.Hash()
177	if err != nil {
178		return nil, false, err // unhashable
179	}
180	if h == 0 {
181		h = 1 // zero is reserved
182	}
183	if ht.table == nil {
184		return None, false, nil // empty
185	}
186
187	// Inspect each bucket in the bucket list.
188	for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
189		for i := range p.entries {
190			e := &p.entries[i]
191			if e.hash == h {
192				if eq, err := Equal(k, e.key); err != nil {
193					return nil, false, err // e.g. excessively recursive tuple
194				} else if eq {
195					return e.value, true, nil // found
196				}
197			}
198		}
199	}
200	return None, false, nil // not found
201}
202
203// Items returns all the items in the map (as key/value pairs) in insertion order.
204func (ht *hashtable) items() []Tuple {
205	items := make([]Tuple, 0, ht.len)
206	array := make([]Value, ht.len*2) // allocate a single backing array
207	for e := ht.head; e != nil; e = e.next {
208		pair := Tuple(array[:2:2])
209		array = array[2:]
210		pair[0] = e.key
211		pair[1] = e.value
212		items = append(items, pair)
213	}
214	return items
215}
216
217func (ht *hashtable) first() (Value, bool) {
218	if ht.head != nil {
219		return ht.head.key, true
220	}
221	return None, false
222}
223
224func (ht *hashtable) keys() []Value {
225	keys := make([]Value, 0, ht.len)
226	for e := ht.head; e != nil; e = e.next {
227		keys = append(keys, e.key)
228	}
229	return keys
230}
231
232func (ht *hashtable) delete(k Value) (v Value, found bool, err error) {
233	if ht.frozen {
234		return nil, false, fmt.Errorf("cannot delete from frozen hash table")
235	}
236	if ht.itercount > 0 {
237		return nil, false, fmt.Errorf("cannot delete from hash table during iteration")
238	}
239	if ht.table == nil {
240		return None, false, nil // empty
241	}
242	h, err := k.Hash()
243	if err != nil {
244		return nil, false, err // unhashable
245	}
246	if h == 0 {
247		h = 1 // zero is reserved
248	}
249
250	// Inspect each bucket in the bucket list.
251	for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
252		for i := range p.entries {
253			e := &p.entries[i]
254			if e.hash == h {
255				if eq, err := Equal(k, e.key); err != nil {
256					return nil, false, err
257				} else if eq {
258					// Remove e from doubly-linked list.
259					*e.prevLink = e.next
260					if e.next == nil {
261						ht.tailLink = e.prevLink // deletion of last entry
262					} else {
263						e.next.prevLink = e.prevLink
264					}
265
266					v := e.value
267					*e = entry{}
268					ht.len--
269					return v, true, nil // found
270				}
271			}
272		}
273	}
274
275	// TODO(adonovan): opt: remove completely empty bucket from bucket list.
276
277	return None, false, nil // not found
278}
279
280func (ht *hashtable) clear() error {
281	if ht.frozen {
282		return fmt.Errorf("cannot clear frozen hash table")
283	}
284	if ht.itercount > 0 {
285		return fmt.Errorf("cannot clear hash table during iteration")
286	}
287	if ht.table != nil {
288		for i := range ht.table {
289			ht.table[i] = bucket{}
290		}
291	}
292	ht.head = nil
293	ht.tailLink = &ht.head
294	ht.len = 0
295	return nil
296}
297
298// dump is provided as an aid to debugging.
299func (ht *hashtable) dump() {
300	fmt.Printf("hashtable %p len=%d head=%p tailLink=%p",
301		ht, ht.len, ht.head, ht.tailLink)
302	if ht.tailLink != nil {
303		fmt.Printf(" *tailLink=%p", *ht.tailLink)
304	}
305	fmt.Println()
306	for j := range ht.table {
307		fmt.Printf("bucket chain %d\n", j)
308		for p := &ht.table[j]; p != nil; p = p.next {
309			fmt.Printf("bucket %p\n", p)
310			for i := range p.entries {
311				e := &p.entries[i]
312				fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n",
313					i, e, e.hash, e.key, e.value)
314				fmt.Printf("\t\tnext=%p &next=%p prev=%p",
315					e.next, &e.next, e.prevLink)
316				if e.prevLink != nil {
317					fmt.Printf(" *prev=%p", *e.prevLink)
318				}
319				fmt.Println()
320			}
321		}
322	}
323}
324
325func (ht *hashtable) iterate() *keyIterator {
326	if !ht.frozen {
327		ht.itercount++
328	}
329	return &keyIterator{ht: ht, e: ht.head}
330}
331
332type keyIterator struct {
333	ht *hashtable
334	e  *entry
335}
336
337func (it *keyIterator) Next(k *Value) bool {
338	if it.e != nil {
339		*k = it.e.key
340		it.e = it.e.next
341		return true
342	}
343	return false
344}
345
346func (it *keyIterator) Done() {
347	if !it.ht.frozen {
348		it.ht.itercount--
349	}
350}
351
352// hashString computes the hash of s.
353func hashString(s string) uint32 {
354	if len(s) >= 12 {
355		// Call the Go runtime's optimized hash implementation,
356		// which uses the AESENC instruction on amd64 machines.
357		return uint32(goStringHash(s, 0))
358	}
359	return softHashString(s)
360}
361
362//go:linkname goStringHash runtime.stringHash
363func goStringHash(s string, seed uintptr) uintptr
364
365// softHashString computes the 32-bit FNV-1a hash of s in software.
366func softHashString(s string) uint32 {
367	var h uint32 = 2166136261
368	for i := 0; i < len(s); i++ {
369		h ^= uint32(s[i])
370		h *= 16777619
371	}
372	return h
373}
374