xref: /aosp_15_r20/external/starlark-go/starlark/hashtable.go (revision 4947cdc739c985f6d86941e22894f5cefe7c9e9a)
1*4947cdc7SCole Faust// Copyright 2017 The Bazel Authors. All rights reserved.
2*4947cdc7SCole Faust// Use of this source code is governed by a BSD-style
3*4947cdc7SCole Faust// license that can be found in the LICENSE file.
4*4947cdc7SCole Faust
5*4947cdc7SCole Faustpackage starlark
6*4947cdc7SCole Faust
7*4947cdc7SCole Faustimport (
8*4947cdc7SCole Faust	"fmt"
9*4947cdc7SCole Faust	_ "unsafe" // for go:linkname hack
10*4947cdc7SCole Faust)
11*4947cdc7SCole Faust
12*4947cdc7SCole Faust// hashtable is used to represent Starlark dict and set values.
13*4947cdc7SCole Faust// It is a hash table whose key/value entries form a doubly-linked list
14*4947cdc7SCole Faust// in the order the entries were inserted.
15*4947cdc7SCole Fausttype hashtable struct {
16*4947cdc7SCole Faust	table     []bucket  // len is zero or a power of two
17*4947cdc7SCole Faust	bucket0   [1]bucket // inline allocation for small maps.
18*4947cdc7SCole Faust	len       uint32
19*4947cdc7SCole Faust	itercount uint32  // number of active iterators (ignored if frozen)
20*4947cdc7SCole Faust	head      *entry  // insertion order doubly-linked list; may be nil
21*4947cdc7SCole Faust	tailLink  **entry // address of nil link at end of list (perhaps &head)
22*4947cdc7SCole Faust	frozen    bool
23*4947cdc7SCole Faust}
24*4947cdc7SCole Faust
25*4947cdc7SCole Faustconst bucketSize = 8
26*4947cdc7SCole Faust
27*4947cdc7SCole Fausttype bucket struct {
28*4947cdc7SCole Faust	entries [bucketSize]entry
29*4947cdc7SCole Faust	next    *bucket // linked list of buckets
30*4947cdc7SCole Faust}
31*4947cdc7SCole Faust
32*4947cdc7SCole Fausttype entry struct {
33*4947cdc7SCole Faust	hash       uint32 // nonzero => in use
34*4947cdc7SCole Faust	key, value Value
35*4947cdc7SCole Faust	next       *entry  // insertion order doubly-linked list; may be nil
36*4947cdc7SCole Faust	prevLink   **entry // address of link to this entry (perhaps &head)
37*4947cdc7SCole Faust}
38*4947cdc7SCole Faust
39*4947cdc7SCole Faustfunc (ht *hashtable) init(size int) {
40*4947cdc7SCole Faust	if size < 0 {
41*4947cdc7SCole Faust		panic("size < 0")
42*4947cdc7SCole Faust	}
43*4947cdc7SCole Faust	nb := 1
44*4947cdc7SCole Faust	for overloaded(size, nb) {
45*4947cdc7SCole Faust		nb = nb << 1
46*4947cdc7SCole Faust	}
47*4947cdc7SCole Faust	if nb < 2 {
48*4947cdc7SCole Faust		ht.table = ht.bucket0[:1]
49*4947cdc7SCole Faust	} else {
50*4947cdc7SCole Faust		ht.table = make([]bucket, nb)
51*4947cdc7SCole Faust	}
52*4947cdc7SCole Faust	ht.tailLink = &ht.head
53*4947cdc7SCole Faust}
54*4947cdc7SCole Faust
55*4947cdc7SCole Faustfunc (ht *hashtable) freeze() {
56*4947cdc7SCole Faust	if !ht.frozen {
57*4947cdc7SCole Faust		ht.frozen = true
58*4947cdc7SCole Faust		for i := range ht.table {
59*4947cdc7SCole Faust			for p := &ht.table[i]; p != nil; p = p.next {
60*4947cdc7SCole Faust				for i := range p.entries {
61*4947cdc7SCole Faust					e := &p.entries[i]
62*4947cdc7SCole Faust					if e.hash != 0 {
63*4947cdc7SCole Faust						e.key.Freeze()
64*4947cdc7SCole Faust						e.value.Freeze()
65*4947cdc7SCole Faust					}
66*4947cdc7SCole Faust				}
67*4947cdc7SCole Faust			}
68*4947cdc7SCole Faust		}
69*4947cdc7SCole Faust	}
70*4947cdc7SCole Faust}
71*4947cdc7SCole Faust
72*4947cdc7SCole Faustfunc (ht *hashtable) insert(k, v Value) error {
73*4947cdc7SCole Faust	if ht.frozen {
74*4947cdc7SCole Faust		return fmt.Errorf("cannot insert into frozen hash table")
75*4947cdc7SCole Faust	}
76*4947cdc7SCole Faust	if ht.itercount > 0 {
77*4947cdc7SCole Faust		return fmt.Errorf("cannot insert into hash table during iteration")
78*4947cdc7SCole Faust	}
79*4947cdc7SCole Faust	if ht.table == nil {
80*4947cdc7SCole Faust		ht.init(1)
81*4947cdc7SCole Faust	}
82*4947cdc7SCole Faust	h, err := k.Hash()
83*4947cdc7SCole Faust	if err != nil {
84*4947cdc7SCole Faust		return err
85*4947cdc7SCole Faust	}
86*4947cdc7SCole Faust	if h == 0 {
87*4947cdc7SCole Faust		h = 1 // zero is reserved
88*4947cdc7SCole Faust	}
89*4947cdc7SCole Faust
90*4947cdc7SCole Faustretry:
91*4947cdc7SCole Faust	var insert *entry
92*4947cdc7SCole Faust
93*4947cdc7SCole Faust	// Inspect each bucket in the bucket list.
94*4947cdc7SCole Faust	p := &ht.table[h&(uint32(len(ht.table)-1))]
95*4947cdc7SCole Faust	for {
96*4947cdc7SCole Faust		for i := range p.entries {
97*4947cdc7SCole Faust			e := &p.entries[i]
98*4947cdc7SCole Faust			if e.hash != h {
99*4947cdc7SCole Faust				if e.hash == 0 {
100*4947cdc7SCole Faust					// Found empty entry; make a note.
101*4947cdc7SCole Faust					insert = e
102*4947cdc7SCole Faust				}
103*4947cdc7SCole Faust				continue
104*4947cdc7SCole Faust			}
105*4947cdc7SCole Faust			if eq, err := Equal(k, e.key); err != nil {
106*4947cdc7SCole Faust				return err // e.g. excessively recursive tuple
107*4947cdc7SCole Faust			} else if !eq {
108*4947cdc7SCole Faust				continue
109*4947cdc7SCole Faust			}
110*4947cdc7SCole Faust			// Key already present; update value.
111*4947cdc7SCole Faust			e.value = v
112*4947cdc7SCole Faust			return nil
113*4947cdc7SCole Faust		}
114*4947cdc7SCole Faust		if p.next == nil {
115*4947cdc7SCole Faust			break
116*4947cdc7SCole Faust		}
117*4947cdc7SCole Faust		p = p.next
118*4947cdc7SCole Faust	}
119*4947cdc7SCole Faust
120*4947cdc7SCole Faust	// Key not found.  p points to the last bucket.
121*4947cdc7SCole Faust
122*4947cdc7SCole Faust	// Does the number of elements exceed the buckets' load factor?
123*4947cdc7SCole Faust	if overloaded(int(ht.len), len(ht.table)) {
124*4947cdc7SCole Faust		ht.grow()
125*4947cdc7SCole Faust		goto retry
126*4947cdc7SCole Faust	}
127*4947cdc7SCole Faust
128*4947cdc7SCole Faust	if insert == nil {
129*4947cdc7SCole Faust		// No space in existing buckets.  Add a new one to the bucket list.
130*4947cdc7SCole Faust		b := new(bucket)
131*4947cdc7SCole Faust		p.next = b
132*4947cdc7SCole Faust		insert = &b.entries[0]
133*4947cdc7SCole Faust	}
134*4947cdc7SCole Faust
135*4947cdc7SCole Faust	// Insert key/value pair.
136*4947cdc7SCole Faust	insert.hash = h
137*4947cdc7SCole Faust	insert.key = k
138*4947cdc7SCole Faust	insert.value = v
139*4947cdc7SCole Faust
140*4947cdc7SCole Faust	// Append entry to doubly-linked list.
141*4947cdc7SCole Faust	insert.prevLink = ht.tailLink
142*4947cdc7SCole Faust	*ht.tailLink = insert
143*4947cdc7SCole Faust	ht.tailLink = &insert.next
144*4947cdc7SCole Faust
145*4947cdc7SCole Faust	ht.len++
146*4947cdc7SCole Faust
147*4947cdc7SCole Faust	return nil
148*4947cdc7SCole Faust}
149*4947cdc7SCole Faust
150*4947cdc7SCole Faustfunc overloaded(elems, buckets int) bool {
151*4947cdc7SCole Faust	const loadFactor = 6.5 // just a guess
152*4947cdc7SCole Faust	return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets)
153*4947cdc7SCole Faust}
154*4947cdc7SCole Faust
155*4947cdc7SCole Faustfunc (ht *hashtable) grow() {
156*4947cdc7SCole Faust	// Double the number of buckets and rehash.
157*4947cdc7SCole Faust	// TODO(adonovan): opt:
158*4947cdc7SCole Faust	// - avoid reentrant calls to ht.insert, and specialize it.
159*4947cdc7SCole Faust	//   e.g. we know the calls to Equals will return false since
160*4947cdc7SCole Faust	//   there are no duplicates among the old keys.
161*4947cdc7SCole Faust	// - saving the entire hash in the bucket would avoid the need to
162*4947cdc7SCole Faust	//   recompute the hash.
163*4947cdc7SCole Faust	// - save the old buckets on a free list.
164*4947cdc7SCole Faust	ht.table = make([]bucket, len(ht.table)<<1)
165*4947cdc7SCole Faust	oldhead := ht.head
166*4947cdc7SCole Faust	ht.head = nil
167*4947cdc7SCole Faust	ht.tailLink = &ht.head
168*4947cdc7SCole Faust	ht.len = 0
169*4947cdc7SCole Faust	for e := oldhead; e != nil; e = e.next {
170*4947cdc7SCole Faust		ht.insert(e.key, e.value)
171*4947cdc7SCole Faust	}
172*4947cdc7SCole Faust	ht.bucket0[0] = bucket{} // clear out unused initial bucket
173*4947cdc7SCole Faust}
174*4947cdc7SCole Faust
175*4947cdc7SCole Faustfunc (ht *hashtable) lookup(k Value) (v Value, found bool, err error) {
176*4947cdc7SCole Faust	h, err := k.Hash()
177*4947cdc7SCole Faust	if err != nil {
178*4947cdc7SCole Faust		return nil, false, err // unhashable
179*4947cdc7SCole Faust	}
180*4947cdc7SCole Faust	if h == 0 {
181*4947cdc7SCole Faust		h = 1 // zero is reserved
182*4947cdc7SCole Faust	}
183*4947cdc7SCole Faust	if ht.table == nil {
184*4947cdc7SCole Faust		return None, false, nil // empty
185*4947cdc7SCole Faust	}
186*4947cdc7SCole Faust
187*4947cdc7SCole Faust	// Inspect each bucket in the bucket list.
188*4947cdc7SCole Faust	for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
189*4947cdc7SCole Faust		for i := range p.entries {
190*4947cdc7SCole Faust			e := &p.entries[i]
191*4947cdc7SCole Faust			if e.hash == h {
192*4947cdc7SCole Faust				if eq, err := Equal(k, e.key); err != nil {
193*4947cdc7SCole Faust					return nil, false, err // e.g. excessively recursive tuple
194*4947cdc7SCole Faust				} else if eq {
195*4947cdc7SCole Faust					return e.value, true, nil // found
196*4947cdc7SCole Faust				}
197*4947cdc7SCole Faust			}
198*4947cdc7SCole Faust		}
199*4947cdc7SCole Faust	}
200*4947cdc7SCole Faust	return None, false, nil // not found
201*4947cdc7SCole Faust}
202*4947cdc7SCole Faust
203*4947cdc7SCole Faust// Items returns all the items in the map (as key/value pairs) in insertion order.
204*4947cdc7SCole Faustfunc (ht *hashtable) items() []Tuple {
205*4947cdc7SCole Faust	items := make([]Tuple, 0, ht.len)
206*4947cdc7SCole Faust	array := make([]Value, ht.len*2) // allocate a single backing array
207*4947cdc7SCole Faust	for e := ht.head; e != nil; e = e.next {
208*4947cdc7SCole Faust		pair := Tuple(array[:2:2])
209*4947cdc7SCole Faust		array = array[2:]
210*4947cdc7SCole Faust		pair[0] = e.key
211*4947cdc7SCole Faust		pair[1] = e.value
212*4947cdc7SCole Faust		items = append(items, pair)
213*4947cdc7SCole Faust	}
214*4947cdc7SCole Faust	return items
215*4947cdc7SCole Faust}
216*4947cdc7SCole Faust
217*4947cdc7SCole Faustfunc (ht *hashtable) first() (Value, bool) {
218*4947cdc7SCole Faust	if ht.head != nil {
219*4947cdc7SCole Faust		return ht.head.key, true
220*4947cdc7SCole Faust	}
221*4947cdc7SCole Faust	return None, false
222*4947cdc7SCole Faust}
223*4947cdc7SCole Faust
224*4947cdc7SCole Faustfunc (ht *hashtable) keys() []Value {
225*4947cdc7SCole Faust	keys := make([]Value, 0, ht.len)
226*4947cdc7SCole Faust	for e := ht.head; e != nil; e = e.next {
227*4947cdc7SCole Faust		keys = append(keys, e.key)
228*4947cdc7SCole Faust	}
229*4947cdc7SCole Faust	return keys
230*4947cdc7SCole Faust}
231*4947cdc7SCole Faust
232*4947cdc7SCole Faustfunc (ht *hashtable) delete(k Value) (v Value, found bool, err error) {
233*4947cdc7SCole Faust	if ht.frozen {
234*4947cdc7SCole Faust		return nil, false, fmt.Errorf("cannot delete from frozen hash table")
235*4947cdc7SCole Faust	}
236*4947cdc7SCole Faust	if ht.itercount > 0 {
237*4947cdc7SCole Faust		return nil, false, fmt.Errorf("cannot delete from hash table during iteration")
238*4947cdc7SCole Faust	}
239*4947cdc7SCole Faust	if ht.table == nil {
240*4947cdc7SCole Faust		return None, false, nil // empty
241*4947cdc7SCole Faust	}
242*4947cdc7SCole Faust	h, err := k.Hash()
243*4947cdc7SCole Faust	if err != nil {
244*4947cdc7SCole Faust		return nil, false, err // unhashable
245*4947cdc7SCole Faust	}
246*4947cdc7SCole Faust	if h == 0 {
247*4947cdc7SCole Faust		h = 1 // zero is reserved
248*4947cdc7SCole Faust	}
249*4947cdc7SCole Faust
250*4947cdc7SCole Faust	// Inspect each bucket in the bucket list.
251*4947cdc7SCole Faust	for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
252*4947cdc7SCole Faust		for i := range p.entries {
253*4947cdc7SCole Faust			e := &p.entries[i]
254*4947cdc7SCole Faust			if e.hash == h {
255*4947cdc7SCole Faust				if eq, err := Equal(k, e.key); err != nil {
256*4947cdc7SCole Faust					return nil, false, err
257*4947cdc7SCole Faust				} else if eq {
258*4947cdc7SCole Faust					// Remove e from doubly-linked list.
259*4947cdc7SCole Faust					*e.prevLink = e.next
260*4947cdc7SCole Faust					if e.next == nil {
261*4947cdc7SCole Faust						ht.tailLink = e.prevLink // deletion of last entry
262*4947cdc7SCole Faust					} else {
263*4947cdc7SCole Faust						e.next.prevLink = e.prevLink
264*4947cdc7SCole Faust					}
265*4947cdc7SCole Faust
266*4947cdc7SCole Faust					v := e.value
267*4947cdc7SCole Faust					*e = entry{}
268*4947cdc7SCole Faust					ht.len--
269*4947cdc7SCole Faust					return v, true, nil // found
270*4947cdc7SCole Faust				}
271*4947cdc7SCole Faust			}
272*4947cdc7SCole Faust		}
273*4947cdc7SCole Faust	}
274*4947cdc7SCole Faust
275*4947cdc7SCole Faust	// TODO(adonovan): opt: remove completely empty bucket from bucket list.
276*4947cdc7SCole Faust
277*4947cdc7SCole Faust	return None, false, nil // not found
278*4947cdc7SCole Faust}
279*4947cdc7SCole Faust
280*4947cdc7SCole Faustfunc (ht *hashtable) clear() error {
281*4947cdc7SCole Faust	if ht.frozen {
282*4947cdc7SCole Faust		return fmt.Errorf("cannot clear frozen hash table")
283*4947cdc7SCole Faust	}
284*4947cdc7SCole Faust	if ht.itercount > 0 {
285*4947cdc7SCole Faust		return fmt.Errorf("cannot clear hash table during iteration")
286*4947cdc7SCole Faust	}
287*4947cdc7SCole Faust	if ht.table != nil {
288*4947cdc7SCole Faust		for i := range ht.table {
289*4947cdc7SCole Faust			ht.table[i] = bucket{}
290*4947cdc7SCole Faust		}
291*4947cdc7SCole Faust	}
292*4947cdc7SCole Faust	ht.head = nil
293*4947cdc7SCole Faust	ht.tailLink = &ht.head
294*4947cdc7SCole Faust	ht.len = 0
295*4947cdc7SCole Faust	return nil
296*4947cdc7SCole Faust}
297*4947cdc7SCole Faust
298*4947cdc7SCole Faust// dump is provided as an aid to debugging.
299*4947cdc7SCole Faustfunc (ht *hashtable) dump() {
300*4947cdc7SCole Faust	fmt.Printf("hashtable %p len=%d head=%p tailLink=%p",
301*4947cdc7SCole Faust		ht, ht.len, ht.head, ht.tailLink)
302*4947cdc7SCole Faust	if ht.tailLink != nil {
303*4947cdc7SCole Faust		fmt.Printf(" *tailLink=%p", *ht.tailLink)
304*4947cdc7SCole Faust	}
305*4947cdc7SCole Faust	fmt.Println()
306*4947cdc7SCole Faust	for j := range ht.table {
307*4947cdc7SCole Faust		fmt.Printf("bucket chain %d\n", j)
308*4947cdc7SCole Faust		for p := &ht.table[j]; p != nil; p = p.next {
309*4947cdc7SCole Faust			fmt.Printf("bucket %p\n", p)
310*4947cdc7SCole Faust			for i := range p.entries {
311*4947cdc7SCole Faust				e := &p.entries[i]
312*4947cdc7SCole Faust				fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n",
313*4947cdc7SCole Faust					i, e, e.hash, e.key, e.value)
314*4947cdc7SCole Faust				fmt.Printf("\t\tnext=%p &next=%p prev=%p",
315*4947cdc7SCole Faust					e.next, &e.next, e.prevLink)
316*4947cdc7SCole Faust				if e.prevLink != nil {
317*4947cdc7SCole Faust					fmt.Printf(" *prev=%p", *e.prevLink)
318*4947cdc7SCole Faust				}
319*4947cdc7SCole Faust				fmt.Println()
320*4947cdc7SCole Faust			}
321*4947cdc7SCole Faust		}
322*4947cdc7SCole Faust	}
323*4947cdc7SCole Faust}
324*4947cdc7SCole Faust
325*4947cdc7SCole Faustfunc (ht *hashtable) iterate() *keyIterator {
326*4947cdc7SCole Faust	if !ht.frozen {
327*4947cdc7SCole Faust		ht.itercount++
328*4947cdc7SCole Faust	}
329*4947cdc7SCole Faust	return &keyIterator{ht: ht, e: ht.head}
330*4947cdc7SCole Faust}
331*4947cdc7SCole Faust
332*4947cdc7SCole Fausttype keyIterator struct {
333*4947cdc7SCole Faust	ht *hashtable
334*4947cdc7SCole Faust	e  *entry
335*4947cdc7SCole Faust}
336*4947cdc7SCole Faust
337*4947cdc7SCole Faustfunc (it *keyIterator) Next(k *Value) bool {
338*4947cdc7SCole Faust	if it.e != nil {
339*4947cdc7SCole Faust		*k = it.e.key
340*4947cdc7SCole Faust		it.e = it.e.next
341*4947cdc7SCole Faust		return true
342*4947cdc7SCole Faust	}
343*4947cdc7SCole Faust	return false
344*4947cdc7SCole Faust}
345*4947cdc7SCole Faust
346*4947cdc7SCole Faustfunc (it *keyIterator) Done() {
347*4947cdc7SCole Faust	if !it.ht.frozen {
348*4947cdc7SCole Faust		it.ht.itercount--
349*4947cdc7SCole Faust	}
350*4947cdc7SCole Faust}
351*4947cdc7SCole Faust
352*4947cdc7SCole Faust// hashString computes the hash of s.
353*4947cdc7SCole Faustfunc hashString(s string) uint32 {
354*4947cdc7SCole Faust	if len(s) >= 12 {
355*4947cdc7SCole Faust		// Call the Go runtime's optimized hash implementation,
356*4947cdc7SCole Faust		// which uses the AESENC instruction on amd64 machines.
357*4947cdc7SCole Faust		return uint32(goStringHash(s, 0))
358*4947cdc7SCole Faust	}
359*4947cdc7SCole Faust	return softHashString(s)
360*4947cdc7SCole Faust}
361*4947cdc7SCole Faust
362*4947cdc7SCole Faust//go:linkname goStringHash runtime.stringHash
363*4947cdc7SCole Faustfunc goStringHash(s string, seed uintptr) uintptr
364*4947cdc7SCole Faust
365*4947cdc7SCole Faust// softHashString computes the 32-bit FNV-1a hash of s in software.
366*4947cdc7SCole Faustfunc softHashString(s string) uint32 {
367*4947cdc7SCole Faust	var h uint32 = 2166136261
368*4947cdc7SCole Faust	for i := 0; i < len(s); i++ {
369*4947cdc7SCole Faust		h ^= uint32(s[i])
370*4947cdc7SCole Faust		h *= 16777619
371*4947cdc7SCole Faust	}
372*4947cdc7SCole Faust	return h
373*4947cdc7SCole Faust}
374