1*4947cdc7SCole Faust// Copyright 2017 The Bazel Authors. All rights reserved. 2*4947cdc7SCole Faust// Use of this source code is governed by a BSD-style 3*4947cdc7SCole Faust// license that can be found in the LICENSE file. 4*4947cdc7SCole Faust 5*4947cdc7SCole Faustpackage starlark 6*4947cdc7SCole Faust 7*4947cdc7SCole Faustimport ( 8*4947cdc7SCole Faust "fmt" 9*4947cdc7SCole Faust _ "unsafe" // for go:linkname hack 10*4947cdc7SCole Faust) 11*4947cdc7SCole Faust 12*4947cdc7SCole Faust// hashtable is used to represent Starlark dict and set values. 13*4947cdc7SCole Faust// It is a hash table whose key/value entries form a doubly-linked list 14*4947cdc7SCole Faust// in the order the entries were inserted. 15*4947cdc7SCole Fausttype hashtable struct { 16*4947cdc7SCole Faust table []bucket // len is zero or a power of two 17*4947cdc7SCole Faust bucket0 [1]bucket // inline allocation for small maps. 18*4947cdc7SCole Faust len uint32 19*4947cdc7SCole Faust itercount uint32 // number of active iterators (ignored if frozen) 20*4947cdc7SCole Faust head *entry // insertion order doubly-linked list; may be nil 21*4947cdc7SCole Faust tailLink **entry // address of nil link at end of list (perhaps &head) 22*4947cdc7SCole Faust frozen bool 23*4947cdc7SCole Faust} 24*4947cdc7SCole Faust 25*4947cdc7SCole Faustconst bucketSize = 8 26*4947cdc7SCole Faust 27*4947cdc7SCole Fausttype bucket struct { 28*4947cdc7SCole Faust entries [bucketSize]entry 29*4947cdc7SCole Faust next *bucket // linked list of buckets 30*4947cdc7SCole Faust} 31*4947cdc7SCole Faust 32*4947cdc7SCole Fausttype entry struct { 33*4947cdc7SCole Faust hash uint32 // nonzero => in use 34*4947cdc7SCole Faust key, value Value 35*4947cdc7SCole Faust next *entry // insertion order doubly-linked list; may be nil 36*4947cdc7SCole Faust prevLink **entry // address of link to this entry (perhaps &head) 37*4947cdc7SCole Faust} 38*4947cdc7SCole Faust 39*4947cdc7SCole Faustfunc (ht *hashtable) init(size int) { 40*4947cdc7SCole Faust if size < 0 { 41*4947cdc7SCole Faust panic("size < 0") 42*4947cdc7SCole Faust } 43*4947cdc7SCole Faust nb := 1 44*4947cdc7SCole Faust for overloaded(size, nb) { 45*4947cdc7SCole Faust nb = nb << 1 46*4947cdc7SCole Faust } 47*4947cdc7SCole Faust if nb < 2 { 48*4947cdc7SCole Faust ht.table = ht.bucket0[:1] 49*4947cdc7SCole Faust } else { 50*4947cdc7SCole Faust ht.table = make([]bucket, nb) 51*4947cdc7SCole Faust } 52*4947cdc7SCole Faust ht.tailLink = &ht.head 53*4947cdc7SCole Faust} 54*4947cdc7SCole Faust 55*4947cdc7SCole Faustfunc (ht *hashtable) freeze() { 56*4947cdc7SCole Faust if !ht.frozen { 57*4947cdc7SCole Faust ht.frozen = true 58*4947cdc7SCole Faust for i := range ht.table { 59*4947cdc7SCole Faust for p := &ht.table[i]; p != nil; p = p.next { 60*4947cdc7SCole Faust for i := range p.entries { 61*4947cdc7SCole Faust e := &p.entries[i] 62*4947cdc7SCole Faust if e.hash != 0 { 63*4947cdc7SCole Faust e.key.Freeze() 64*4947cdc7SCole Faust e.value.Freeze() 65*4947cdc7SCole Faust } 66*4947cdc7SCole Faust } 67*4947cdc7SCole Faust } 68*4947cdc7SCole Faust } 69*4947cdc7SCole Faust } 70*4947cdc7SCole Faust} 71*4947cdc7SCole Faust 72*4947cdc7SCole Faustfunc (ht *hashtable) insert(k, v Value) error { 73*4947cdc7SCole Faust if ht.frozen { 74*4947cdc7SCole Faust return fmt.Errorf("cannot insert into frozen hash table") 75*4947cdc7SCole Faust } 76*4947cdc7SCole Faust if ht.itercount > 0 { 77*4947cdc7SCole Faust return fmt.Errorf("cannot insert into hash table during iteration") 78*4947cdc7SCole Faust } 79*4947cdc7SCole Faust if ht.table == nil { 80*4947cdc7SCole Faust ht.init(1) 81*4947cdc7SCole Faust } 82*4947cdc7SCole Faust h, err := k.Hash() 83*4947cdc7SCole Faust if err != nil { 84*4947cdc7SCole Faust return err 85*4947cdc7SCole Faust } 86*4947cdc7SCole Faust if h == 0 { 87*4947cdc7SCole Faust h = 1 // zero is reserved 88*4947cdc7SCole Faust } 89*4947cdc7SCole Faust 90*4947cdc7SCole Faustretry: 91*4947cdc7SCole Faust var insert *entry 92*4947cdc7SCole Faust 93*4947cdc7SCole Faust // Inspect each bucket in the bucket list. 94*4947cdc7SCole Faust p := &ht.table[h&(uint32(len(ht.table)-1))] 95*4947cdc7SCole Faust for { 96*4947cdc7SCole Faust for i := range p.entries { 97*4947cdc7SCole Faust e := &p.entries[i] 98*4947cdc7SCole Faust if e.hash != h { 99*4947cdc7SCole Faust if e.hash == 0 { 100*4947cdc7SCole Faust // Found empty entry; make a note. 101*4947cdc7SCole Faust insert = e 102*4947cdc7SCole Faust } 103*4947cdc7SCole Faust continue 104*4947cdc7SCole Faust } 105*4947cdc7SCole Faust if eq, err := Equal(k, e.key); err != nil { 106*4947cdc7SCole Faust return err // e.g. excessively recursive tuple 107*4947cdc7SCole Faust } else if !eq { 108*4947cdc7SCole Faust continue 109*4947cdc7SCole Faust } 110*4947cdc7SCole Faust // Key already present; update value. 111*4947cdc7SCole Faust e.value = v 112*4947cdc7SCole Faust return nil 113*4947cdc7SCole Faust } 114*4947cdc7SCole Faust if p.next == nil { 115*4947cdc7SCole Faust break 116*4947cdc7SCole Faust } 117*4947cdc7SCole Faust p = p.next 118*4947cdc7SCole Faust } 119*4947cdc7SCole Faust 120*4947cdc7SCole Faust // Key not found. p points to the last bucket. 121*4947cdc7SCole Faust 122*4947cdc7SCole Faust // Does the number of elements exceed the buckets' load factor? 123*4947cdc7SCole Faust if overloaded(int(ht.len), len(ht.table)) { 124*4947cdc7SCole Faust ht.grow() 125*4947cdc7SCole Faust goto retry 126*4947cdc7SCole Faust } 127*4947cdc7SCole Faust 128*4947cdc7SCole Faust if insert == nil { 129*4947cdc7SCole Faust // No space in existing buckets. Add a new one to the bucket list. 130*4947cdc7SCole Faust b := new(bucket) 131*4947cdc7SCole Faust p.next = b 132*4947cdc7SCole Faust insert = &b.entries[0] 133*4947cdc7SCole Faust } 134*4947cdc7SCole Faust 135*4947cdc7SCole Faust // Insert key/value pair. 136*4947cdc7SCole Faust insert.hash = h 137*4947cdc7SCole Faust insert.key = k 138*4947cdc7SCole Faust insert.value = v 139*4947cdc7SCole Faust 140*4947cdc7SCole Faust // Append entry to doubly-linked list. 141*4947cdc7SCole Faust insert.prevLink = ht.tailLink 142*4947cdc7SCole Faust *ht.tailLink = insert 143*4947cdc7SCole Faust ht.tailLink = &insert.next 144*4947cdc7SCole Faust 145*4947cdc7SCole Faust ht.len++ 146*4947cdc7SCole Faust 147*4947cdc7SCole Faust return nil 148*4947cdc7SCole Faust} 149*4947cdc7SCole Faust 150*4947cdc7SCole Faustfunc overloaded(elems, buckets int) bool { 151*4947cdc7SCole Faust const loadFactor = 6.5 // just a guess 152*4947cdc7SCole Faust return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets) 153*4947cdc7SCole Faust} 154*4947cdc7SCole Faust 155*4947cdc7SCole Faustfunc (ht *hashtable) grow() { 156*4947cdc7SCole Faust // Double the number of buckets and rehash. 157*4947cdc7SCole Faust // TODO(adonovan): opt: 158*4947cdc7SCole Faust // - avoid reentrant calls to ht.insert, and specialize it. 159*4947cdc7SCole Faust // e.g. we know the calls to Equals will return false since 160*4947cdc7SCole Faust // there are no duplicates among the old keys. 161*4947cdc7SCole Faust // - saving the entire hash in the bucket would avoid the need to 162*4947cdc7SCole Faust // recompute the hash. 163*4947cdc7SCole Faust // - save the old buckets on a free list. 164*4947cdc7SCole Faust ht.table = make([]bucket, len(ht.table)<<1) 165*4947cdc7SCole Faust oldhead := ht.head 166*4947cdc7SCole Faust ht.head = nil 167*4947cdc7SCole Faust ht.tailLink = &ht.head 168*4947cdc7SCole Faust ht.len = 0 169*4947cdc7SCole Faust for e := oldhead; e != nil; e = e.next { 170*4947cdc7SCole Faust ht.insert(e.key, e.value) 171*4947cdc7SCole Faust } 172*4947cdc7SCole Faust ht.bucket0[0] = bucket{} // clear out unused initial bucket 173*4947cdc7SCole Faust} 174*4947cdc7SCole Faust 175*4947cdc7SCole Faustfunc (ht *hashtable) lookup(k Value) (v Value, found bool, err error) { 176*4947cdc7SCole Faust h, err := k.Hash() 177*4947cdc7SCole Faust if err != nil { 178*4947cdc7SCole Faust return nil, false, err // unhashable 179*4947cdc7SCole Faust } 180*4947cdc7SCole Faust if h == 0 { 181*4947cdc7SCole Faust h = 1 // zero is reserved 182*4947cdc7SCole Faust } 183*4947cdc7SCole Faust if ht.table == nil { 184*4947cdc7SCole Faust return None, false, nil // empty 185*4947cdc7SCole Faust } 186*4947cdc7SCole Faust 187*4947cdc7SCole Faust // Inspect each bucket in the bucket list. 188*4947cdc7SCole Faust for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { 189*4947cdc7SCole Faust for i := range p.entries { 190*4947cdc7SCole Faust e := &p.entries[i] 191*4947cdc7SCole Faust if e.hash == h { 192*4947cdc7SCole Faust if eq, err := Equal(k, e.key); err != nil { 193*4947cdc7SCole Faust return nil, false, err // e.g. excessively recursive tuple 194*4947cdc7SCole Faust } else if eq { 195*4947cdc7SCole Faust return e.value, true, nil // found 196*4947cdc7SCole Faust } 197*4947cdc7SCole Faust } 198*4947cdc7SCole Faust } 199*4947cdc7SCole Faust } 200*4947cdc7SCole Faust return None, false, nil // not found 201*4947cdc7SCole Faust} 202*4947cdc7SCole Faust 203*4947cdc7SCole Faust// Items returns all the items in the map (as key/value pairs) in insertion order. 204*4947cdc7SCole Faustfunc (ht *hashtable) items() []Tuple { 205*4947cdc7SCole Faust items := make([]Tuple, 0, ht.len) 206*4947cdc7SCole Faust array := make([]Value, ht.len*2) // allocate a single backing array 207*4947cdc7SCole Faust for e := ht.head; e != nil; e = e.next { 208*4947cdc7SCole Faust pair := Tuple(array[:2:2]) 209*4947cdc7SCole Faust array = array[2:] 210*4947cdc7SCole Faust pair[0] = e.key 211*4947cdc7SCole Faust pair[1] = e.value 212*4947cdc7SCole Faust items = append(items, pair) 213*4947cdc7SCole Faust } 214*4947cdc7SCole Faust return items 215*4947cdc7SCole Faust} 216*4947cdc7SCole Faust 217*4947cdc7SCole Faustfunc (ht *hashtable) first() (Value, bool) { 218*4947cdc7SCole Faust if ht.head != nil { 219*4947cdc7SCole Faust return ht.head.key, true 220*4947cdc7SCole Faust } 221*4947cdc7SCole Faust return None, false 222*4947cdc7SCole Faust} 223*4947cdc7SCole Faust 224*4947cdc7SCole Faustfunc (ht *hashtable) keys() []Value { 225*4947cdc7SCole Faust keys := make([]Value, 0, ht.len) 226*4947cdc7SCole Faust for e := ht.head; e != nil; e = e.next { 227*4947cdc7SCole Faust keys = append(keys, e.key) 228*4947cdc7SCole Faust } 229*4947cdc7SCole Faust return keys 230*4947cdc7SCole Faust} 231*4947cdc7SCole Faust 232*4947cdc7SCole Faustfunc (ht *hashtable) delete(k Value) (v Value, found bool, err error) { 233*4947cdc7SCole Faust if ht.frozen { 234*4947cdc7SCole Faust return nil, false, fmt.Errorf("cannot delete from frozen hash table") 235*4947cdc7SCole Faust } 236*4947cdc7SCole Faust if ht.itercount > 0 { 237*4947cdc7SCole Faust return nil, false, fmt.Errorf("cannot delete from hash table during iteration") 238*4947cdc7SCole Faust } 239*4947cdc7SCole Faust if ht.table == nil { 240*4947cdc7SCole Faust return None, false, nil // empty 241*4947cdc7SCole Faust } 242*4947cdc7SCole Faust h, err := k.Hash() 243*4947cdc7SCole Faust if err != nil { 244*4947cdc7SCole Faust return nil, false, err // unhashable 245*4947cdc7SCole Faust } 246*4947cdc7SCole Faust if h == 0 { 247*4947cdc7SCole Faust h = 1 // zero is reserved 248*4947cdc7SCole Faust } 249*4947cdc7SCole Faust 250*4947cdc7SCole Faust // Inspect each bucket in the bucket list. 251*4947cdc7SCole Faust for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { 252*4947cdc7SCole Faust for i := range p.entries { 253*4947cdc7SCole Faust e := &p.entries[i] 254*4947cdc7SCole Faust if e.hash == h { 255*4947cdc7SCole Faust if eq, err := Equal(k, e.key); err != nil { 256*4947cdc7SCole Faust return nil, false, err 257*4947cdc7SCole Faust } else if eq { 258*4947cdc7SCole Faust // Remove e from doubly-linked list. 259*4947cdc7SCole Faust *e.prevLink = e.next 260*4947cdc7SCole Faust if e.next == nil { 261*4947cdc7SCole Faust ht.tailLink = e.prevLink // deletion of last entry 262*4947cdc7SCole Faust } else { 263*4947cdc7SCole Faust e.next.prevLink = e.prevLink 264*4947cdc7SCole Faust } 265*4947cdc7SCole Faust 266*4947cdc7SCole Faust v := e.value 267*4947cdc7SCole Faust *e = entry{} 268*4947cdc7SCole Faust ht.len-- 269*4947cdc7SCole Faust return v, true, nil // found 270*4947cdc7SCole Faust } 271*4947cdc7SCole Faust } 272*4947cdc7SCole Faust } 273*4947cdc7SCole Faust } 274*4947cdc7SCole Faust 275*4947cdc7SCole Faust // TODO(adonovan): opt: remove completely empty bucket from bucket list. 276*4947cdc7SCole Faust 277*4947cdc7SCole Faust return None, false, nil // not found 278*4947cdc7SCole Faust} 279*4947cdc7SCole Faust 280*4947cdc7SCole Faustfunc (ht *hashtable) clear() error { 281*4947cdc7SCole Faust if ht.frozen { 282*4947cdc7SCole Faust return fmt.Errorf("cannot clear frozen hash table") 283*4947cdc7SCole Faust } 284*4947cdc7SCole Faust if ht.itercount > 0 { 285*4947cdc7SCole Faust return fmt.Errorf("cannot clear hash table during iteration") 286*4947cdc7SCole Faust } 287*4947cdc7SCole Faust if ht.table != nil { 288*4947cdc7SCole Faust for i := range ht.table { 289*4947cdc7SCole Faust ht.table[i] = bucket{} 290*4947cdc7SCole Faust } 291*4947cdc7SCole Faust } 292*4947cdc7SCole Faust ht.head = nil 293*4947cdc7SCole Faust ht.tailLink = &ht.head 294*4947cdc7SCole Faust ht.len = 0 295*4947cdc7SCole Faust return nil 296*4947cdc7SCole Faust} 297*4947cdc7SCole Faust 298*4947cdc7SCole Faust// dump is provided as an aid to debugging. 299*4947cdc7SCole Faustfunc (ht *hashtable) dump() { 300*4947cdc7SCole Faust fmt.Printf("hashtable %p len=%d head=%p tailLink=%p", 301*4947cdc7SCole Faust ht, ht.len, ht.head, ht.tailLink) 302*4947cdc7SCole Faust if ht.tailLink != nil { 303*4947cdc7SCole Faust fmt.Printf(" *tailLink=%p", *ht.tailLink) 304*4947cdc7SCole Faust } 305*4947cdc7SCole Faust fmt.Println() 306*4947cdc7SCole Faust for j := range ht.table { 307*4947cdc7SCole Faust fmt.Printf("bucket chain %d\n", j) 308*4947cdc7SCole Faust for p := &ht.table[j]; p != nil; p = p.next { 309*4947cdc7SCole Faust fmt.Printf("bucket %p\n", p) 310*4947cdc7SCole Faust for i := range p.entries { 311*4947cdc7SCole Faust e := &p.entries[i] 312*4947cdc7SCole Faust fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n", 313*4947cdc7SCole Faust i, e, e.hash, e.key, e.value) 314*4947cdc7SCole Faust fmt.Printf("\t\tnext=%p &next=%p prev=%p", 315*4947cdc7SCole Faust e.next, &e.next, e.prevLink) 316*4947cdc7SCole Faust if e.prevLink != nil { 317*4947cdc7SCole Faust fmt.Printf(" *prev=%p", *e.prevLink) 318*4947cdc7SCole Faust } 319*4947cdc7SCole Faust fmt.Println() 320*4947cdc7SCole Faust } 321*4947cdc7SCole Faust } 322*4947cdc7SCole Faust } 323*4947cdc7SCole Faust} 324*4947cdc7SCole Faust 325*4947cdc7SCole Faustfunc (ht *hashtable) iterate() *keyIterator { 326*4947cdc7SCole Faust if !ht.frozen { 327*4947cdc7SCole Faust ht.itercount++ 328*4947cdc7SCole Faust } 329*4947cdc7SCole Faust return &keyIterator{ht: ht, e: ht.head} 330*4947cdc7SCole Faust} 331*4947cdc7SCole Faust 332*4947cdc7SCole Fausttype keyIterator struct { 333*4947cdc7SCole Faust ht *hashtable 334*4947cdc7SCole Faust e *entry 335*4947cdc7SCole Faust} 336*4947cdc7SCole Faust 337*4947cdc7SCole Faustfunc (it *keyIterator) Next(k *Value) bool { 338*4947cdc7SCole Faust if it.e != nil { 339*4947cdc7SCole Faust *k = it.e.key 340*4947cdc7SCole Faust it.e = it.e.next 341*4947cdc7SCole Faust return true 342*4947cdc7SCole Faust } 343*4947cdc7SCole Faust return false 344*4947cdc7SCole Faust} 345*4947cdc7SCole Faust 346*4947cdc7SCole Faustfunc (it *keyIterator) Done() { 347*4947cdc7SCole Faust if !it.ht.frozen { 348*4947cdc7SCole Faust it.ht.itercount-- 349*4947cdc7SCole Faust } 350*4947cdc7SCole Faust} 351*4947cdc7SCole Faust 352*4947cdc7SCole Faust// hashString computes the hash of s. 353*4947cdc7SCole Faustfunc hashString(s string) uint32 { 354*4947cdc7SCole Faust if len(s) >= 12 { 355*4947cdc7SCole Faust // Call the Go runtime's optimized hash implementation, 356*4947cdc7SCole Faust // which uses the AESENC instruction on amd64 machines. 357*4947cdc7SCole Faust return uint32(goStringHash(s, 0)) 358*4947cdc7SCole Faust } 359*4947cdc7SCole Faust return softHashString(s) 360*4947cdc7SCole Faust} 361*4947cdc7SCole Faust 362*4947cdc7SCole Faust//go:linkname goStringHash runtime.stringHash 363*4947cdc7SCole Faustfunc goStringHash(s string, seed uintptr) uintptr 364*4947cdc7SCole Faust 365*4947cdc7SCole Faust// softHashString computes the 32-bit FNV-1a hash of s in software. 366*4947cdc7SCole Faustfunc softHashString(s string) uint32 { 367*4947cdc7SCole Faust var h uint32 = 2166136261 368*4947cdc7SCole Faust for i := 0; i < len(s); i++ { 369*4947cdc7SCole Faust h ^= uint32(s[i]) 370*4947cdc7SCole Faust h *= 16777619 371*4947cdc7SCole Faust } 372*4947cdc7SCole Faust return h 373*4947cdc7SCole Faust} 374