1// Copyright 2024 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package liveness 6 7import ( 8 "cmd/compile/internal/base" 9 "cmd/compile/internal/bitvec" 10 "cmd/compile/internal/ir" 11 "cmd/compile/internal/ssa" 12 "cmd/internal/src" 13 "fmt" 14 "os" 15 "path/filepath" 16 "sort" 17 "strings" 18) 19 20// MergeLocalsState encapsulates information about which AUTO 21// (stack-allocated) variables within a function can be safely 22// merged/overlapped, e.g. share a stack slot with some other auto). 23// An instance of MergeLocalsState is produced by MergeLocals() below 24// and then consumed in ssagen.AllocFrame. The map 'partition' 25// contains entries of the form <N,SL> where N is an *ir.Name and SL 26// is a slice holding the indices (within 'vars') of other variables 27// that share the same slot, specifically the slot of the first 28// element in the partition, which we'll call the "leader". For 29// example, if a function contains five variables where v1/v2/v3 are 30// safe to overlap and v4/v5 are safe to overlap, the MergeLocalsState 31// content might look like 32// 33// vars: [v1, v2, v3, v4, v5] 34// partition: v1 -> [1, 0, 2], v2 -> [1, 0, 2], v3 -> [1, 0, 2] 35// v4 -> [3, 4], v5 -> [3, 4] 36// 37// A nil MergeLocalsState indicates that no local variables meet the 38// necessary criteria for overlap. 39type MergeLocalsState struct { 40 // contains auto vars that participate in overlapping 41 vars []*ir.Name 42 // maps auto variable to overlap partition 43 partition map[*ir.Name][]int 44} 45 46// candRegion is a sub-range (start, end) corresponding to an interval 47// [st,en] within the list of candidate variables. 48type candRegion struct { 49 st, en int 50} 51 52// cstate holds state information we'll need during the analysis 53// phase of stack slot merging but can be discarded when the analysis 54// is done. 55type cstate struct { 56 fn *ir.Func 57 f *ssa.Func 58 lv *liveness 59 cands []*ir.Name 60 nameToSlot map[*ir.Name]int32 61 regions []candRegion 62 indirectUE map[ssa.ID][]*ir.Name 63 ivs []Intervals 64 hashDeselected map[*ir.Name]bool 65 trace int // debug trace level 66} 67 68// MergeLocals analyzes the specified ssa function f to determine which 69// of its auto variables can safely share the same stack slot, returning 70// a state object that describes how the overlap should be done. 71func MergeLocals(fn *ir.Func, f *ssa.Func) *MergeLocalsState { 72 73 // Create a container object for useful state info and then 74 // call collectMergeCandidates to see if there are vars suitable 75 // for stack slot merging. 76 cs := &cstate{ 77 fn: fn, 78 f: f, 79 trace: base.Debug.MergeLocalsTrace, 80 } 81 cs.collectMergeCandidates() 82 if len(cs.regions) == 0 { 83 return nil 84 } 85 86 // Kick off liveness analysis. 87 // 88 // If we have a local variable such as "r2" below that's written 89 // but then not read, something like: 90 // 91 // vardef r1 92 // r1.x = ... 93 // vardef r2 94 // r2.x = 0 95 // r2.y = ... 96 // <call foo> 97 // // no subsequent use of r2 98 // ... = r1.x 99 // 100 // then for the purpose of calculating stack maps at the call, we 101 // can ignore "r2" completely during liveness analysis for stack 102 // maps, however for stack slock merging we most definitely want 103 // to treat the writes as "uses". 104 cs.lv = newliveness(fn, f, cs.cands, cs.nameToSlot, 0) 105 cs.lv.conservativeWrites = true 106 cs.lv.prologue() 107 cs.lv.solve() 108 109 // Compute intervals for each candidate based on the liveness and 110 // on block effects. 111 cs.computeIntervals() 112 113 // Perform merging within each region of the candidates list. 114 rv := cs.performMerging() 115 if err := rv.check(); err != nil { 116 base.FatalfAt(fn.Pos(), "invalid mergelocals state: %v", err) 117 } 118 return rv 119} 120 121// Subsumed returns whether variable n is subsumed, e.g. appears 122// in an overlap position but is not the leader in that partition. 123func (mls *MergeLocalsState) Subsumed(n *ir.Name) bool { 124 if sl, ok := mls.partition[n]; ok && mls.vars[sl[0]] != n { 125 return true 126 } 127 return false 128} 129 130// IsLeader returns whether a variable n is the leader (first element) 131// in a sharing partition. 132func (mls *MergeLocalsState) IsLeader(n *ir.Name) bool { 133 if sl, ok := mls.partition[n]; ok && mls.vars[sl[0]] == n { 134 return true 135 } 136 return false 137} 138 139// Leader returns the leader variable for subsumed var n. 140func (mls *MergeLocalsState) Leader(n *ir.Name) *ir.Name { 141 if sl, ok := mls.partition[n]; ok { 142 if mls.vars[sl[0]] == n { 143 panic("variable is not subsumed") 144 } 145 return mls.vars[sl[0]] 146 } 147 panic("not a merge candidate") 148} 149 150// Followers writes a list of the followers for leader n into the slice tmp. 151func (mls *MergeLocalsState) Followers(n *ir.Name, tmp []*ir.Name) []*ir.Name { 152 tmp = tmp[:0] 153 sl, ok := mls.partition[n] 154 if !ok { 155 panic("no entry for leader") 156 } 157 if mls.vars[sl[0]] != n { 158 panic("followers invoked on subsumed var") 159 } 160 for _, k := range sl[1:] { 161 tmp = append(tmp, mls.vars[k]) 162 } 163 sort.SliceStable(tmp, func(i, j int) bool { 164 return tmp[i].Sym().Name < tmp[j].Sym().Name 165 }) 166 return tmp 167} 168 169// EstSavings returns the estimated reduction in stack size (number of bytes) for 170// the given merge locals state via a pair of ints, the first for non-pointer types and the second for pointer types. 171func (mls *MergeLocalsState) EstSavings() (int, int) { 172 totnp := 0 173 totp := 0 174 for n := range mls.partition { 175 if mls.Subsumed(n) { 176 sz := int(n.Type().Size()) 177 if n.Type().HasPointers() { 178 totp += sz 179 } else { 180 totnp += sz 181 } 182 } 183 } 184 return totnp, totp 185} 186 187// check tests for various inconsistencies and problems in mls, 188// returning an error if any problems are found. 189func (mls *MergeLocalsState) check() error { 190 if mls == nil { 191 return nil 192 } 193 used := make(map[int]bool) 194 seenv := make(map[*ir.Name]int) 195 for ii, v := range mls.vars { 196 if prev, ok := seenv[v]; ok { 197 return fmt.Errorf("duplicate var %q in vslots: %d and %d\n", 198 v.Sym().Name, ii, prev) 199 } 200 seenv[v] = ii 201 } 202 for k, sl := range mls.partition { 203 // length of slice value needs to be more than 1 204 if len(sl) < 2 { 205 return fmt.Errorf("k=%q v=%+v slice len %d invalid", 206 k.Sym().Name, sl, len(sl)) 207 } 208 // values in the slice need to be var indices 209 for i, v := range sl { 210 if v < 0 || v > len(mls.vars)-1 { 211 return fmt.Errorf("k=%q v=+%v slpos %d vslot %d out of range of m.v", k.Sym().Name, sl, i, v) 212 } 213 } 214 } 215 for k, sl := range mls.partition { 216 foundk := false 217 for i, v := range sl { 218 vv := mls.vars[v] 219 if i == 0 { 220 if !mls.IsLeader(vv) { 221 return fmt.Errorf("k=%s v=+%v slpos 0 vslot %d IsLeader(%q) is false should be true", k.Sym().Name, sl, v, vv.Sym().Name) 222 } 223 } else { 224 if !mls.Subsumed(vv) { 225 return fmt.Errorf("k=%s v=+%v slpos %d vslot %d Subsumed(%q) is false should be true", k.Sym().Name, sl, i, v, vv.Sym().Name) 226 } 227 if mls.Leader(vv) != mls.vars[sl[0]] { 228 return fmt.Errorf("k=%s v=+%v slpos %d vslot %d Leader(%q) got %v want %v", k.Sym().Name, sl, i, v, vv.Sym().Name, mls.Leader(vv), mls.vars[sl[0]]) 229 } 230 } 231 if vv == k { 232 foundk = true 233 if used[v] { 234 return fmt.Errorf("k=%s v=+%v val slice used violation at slpos %d vslot %d", k.Sym().Name, sl, i, v) 235 } 236 used[v] = true 237 } 238 } 239 if !foundk { 240 return fmt.Errorf("k=%s v=+%v slice value missing k", k.Sym().Name, sl) 241 } 242 vl := mls.vars[sl[0]] 243 for _, v := range sl[1:] { 244 vv := mls.vars[v] 245 if vv.Type().Size() > vl.Type().Size() { 246 return fmt.Errorf("k=%s v=+%v follower %s size %d larger than leader %s size %d", k.Sym().Name, sl, vv.Sym().Name, vv.Type().Size(), vl.Sym().Name, vl.Type().Size()) 247 } 248 if vv.Type().HasPointers() && !vl.Type().HasPointers() { 249 return fmt.Errorf("k=%s v=+%v follower %s hasptr=true but leader %s hasptr=false", k.Sym().Name, sl, vv.Sym().Name, vl.Sym().Name) 250 } 251 if vv.Type().Alignment() > vl.Type().Alignment() { 252 return fmt.Errorf("k=%s v=+%v follower %s align %d greater than leader %s align %d", k.Sym().Name, sl, vv.Sym().Name, vv.Type().Alignment(), vl.Sym().Name, vl.Type().Alignment()) 253 } 254 } 255 } 256 for i := range used { 257 if !used[i] { 258 return fmt.Errorf("pos %d var %q unused", i, mls.vars[i]) 259 } 260 } 261 return nil 262} 263 264func (mls *MergeLocalsState) String() string { 265 var leaders []*ir.Name 266 for n, sl := range mls.partition { 267 if n == mls.vars[sl[0]] { 268 leaders = append(leaders, n) 269 } 270 } 271 sort.Slice(leaders, func(i, j int) bool { 272 return leaders[i].Sym().Name < leaders[j].Sym().Name 273 }) 274 var sb strings.Builder 275 for _, n := range leaders { 276 sb.WriteString(n.Sym().Name + ":") 277 sl := mls.partition[n] 278 for _, k := range sl[1:] { 279 n := mls.vars[k] 280 sb.WriteString(" " + n.Sym().Name) 281 } 282 sb.WriteString("\n") 283 } 284 return sb.String() 285} 286 287// collectMergeCandidates visits all of the AUTO vars declared in 288// function fn and identifies a list of candidate variables for 289// merging / overlapping. On return the "cands" field of cs will be 290// filled in with our set of potentially overlappable candidate 291// variables, the "regions" field will hold regions/sequence of 292// compatible vars within the candidates list, "nameToSlot" field will 293// be populated, and the "indirectUE" field will be filled in with 294// information about indirect upwards-exposed uses in the func. 295func (cs *cstate) collectMergeCandidates() { 296 var cands []*ir.Name 297 298 // Collect up the available set of appropriate AUTOs in the 299 // function as a first step, and bail if we have fewer than 300 // two candidates. 301 for _, n := range cs.fn.Dcl { 302 if !n.Used() { 303 continue 304 } 305 if !ssa.IsMergeCandidate(n) { 306 continue 307 } 308 cands = append(cands, n) 309 } 310 if len(cands) < 2 { 311 return 312 } 313 314 // Sort by pointerness, size, and then name. 315 sort.SliceStable(cands, func(i, j int) bool { 316 return nameLess(cands[i], cands[j]) 317 }) 318 319 if cs.trace > 1 { 320 fmt.Fprintf(os.Stderr, "=-= raw cand list for func %v:\n", cs.fn) 321 for i := range cands { 322 dumpCand(cands[i], i) 323 } 324 } 325 326 // Now generate an initial pruned candidate list and regions list. 327 // This may be empty if we don't have enough compatible candidates. 328 initial, _ := cs.genRegions(cands) 329 if len(initial) < 2 { 330 return 331 } 332 333 // Set up for hash bisection if enabled. 334 cs.setupHashBisection(initial) 335 336 // Create and populate an indirect use table that we'll use 337 // during interval construction. As part of this process we may 338 // wind up tossing out additional candidates, so check to make 339 // sure we still have something to work with. 340 cs.cands, cs.regions = cs.populateIndirectUseTable(initial) 341 if len(cs.cands) < 2 { 342 return 343 } 344 345 // At this point we have a final pruned set of candidates and a 346 // corresponding set of regions for the candidates. Build a 347 // name-to-slot map for the candidates. 348 cs.nameToSlot = make(map[*ir.Name]int32) 349 for i, n := range cs.cands { 350 cs.nameToSlot[n] = int32(i) 351 } 352 353 if cs.trace > 1 { 354 fmt.Fprintf(os.Stderr, "=-= pruned candidate list for fn %v:\n", cs.fn) 355 for i := range cs.cands { 356 dumpCand(cs.cands[i], i) 357 } 358 } 359} 360 361// genRegions generates a set of regions within cands corresponding 362// to potentially overlappable/mergeable variables. 363func (cs *cstate) genRegions(cands []*ir.Name) ([]*ir.Name, []candRegion) { 364 var pruned []*ir.Name 365 var regions []candRegion 366 st := 0 367 for { 368 en := nextRegion(cands, st) 369 if en == -1 { 370 break 371 } 372 if st == en { 373 // region has just one element, we can skip it 374 st++ 375 continue 376 } 377 pst := len(pruned) 378 pen := pst + (en - st) 379 if cs.trace > 1 { 380 fmt.Fprintf(os.Stderr, "=-= addregion st=%d en=%d: add part %d -> %d\n", st, en, pst, pen) 381 } 382 383 // non-empty region, add to pruned 384 pruned = append(pruned, cands[st:en+1]...) 385 regions = append(regions, candRegion{st: pst, en: pen}) 386 st = en + 1 387 } 388 if len(pruned) < 2 { 389 return nil, nil 390 } 391 return pruned, regions 392} 393 394func (cs *cstate) dumpFunc() { 395 fmt.Fprintf(os.Stderr, "=-= mergelocalsdumpfunc %v:\n", cs.fn) 396 ii := 0 397 for k, b := range cs.f.Blocks { 398 fmt.Fprintf(os.Stderr, "b%d:\n", k) 399 for _, v := range b.Values { 400 pos := base.Ctxt.PosTable.Pos(v.Pos) 401 fmt.Fprintf(os.Stderr, "=-= %d L%d|C%d %s\n", ii, pos.RelLine(), pos.RelCol(), v.LongString()) 402 ii++ 403 } 404 } 405} 406 407func (cs *cstate) dumpFuncIfSelected() { 408 if base.Debug.MergeLocalsDumpFunc == "" { 409 return 410 } 411 if !strings.HasSuffix(fmt.Sprintf("%v", cs.fn), 412 base.Debug.MergeLocalsDumpFunc) { 413 return 414 } 415 cs.dumpFunc() 416} 417 418// setupHashBisection checks to see if any of the candidate 419// variables have been de-selected by our hash debug. Here 420// we also implement the -d=mergelocalshtrace flag, which turns 421// on debug tracing only if we have at least two candidates 422// selected by the hash debug for this function. 423func (cs *cstate) setupHashBisection(cands []*ir.Name) { 424 if base.Debug.MergeLocalsHash == "" { 425 return 426 } 427 deselected := make(map[*ir.Name]bool) 428 selCount := 0 429 for _, cand := range cands { 430 if !base.MergeLocalsHash.MatchPosWithInfo(cand.Pos(), "mergelocals", nil) { 431 deselected[cand] = true 432 } else { 433 deselected[cand] = false 434 selCount++ 435 } 436 } 437 if selCount < len(cands) { 438 cs.hashDeselected = deselected 439 } 440 if base.Debug.MergeLocalsHTrace != 0 && selCount >= 2 { 441 cs.trace = base.Debug.MergeLocalsHTrace 442 } 443} 444 445// populateIndirectUseTable creates and populates the "indirectUE" table 446// within cs by doing some additional analysis of how the vars in 447// cands are accessed in the function. 448// 449// It is possible to have situations where a given ir.Name is 450// non-address-taken at the source level, but whose address is 451// materialized in order to accommodate the needs of 452// architecture-dependent operations or one sort or another (examples 453// include things like LoweredZero/DuffZero, etc). The issue here is 454// that the SymAddr op will show up as touching a variable of 455// interest, but the subsequent memory op will not. This is generally 456// not an issue for computing whether something is live across a call, 457// but it is problematic for collecting the more fine-grained live 458// interval info that drives stack slot merging. 459// 460// To handle this problem, make a forward pass over each basic block 461// looking for instructions of the form vK := SymAddr(N) where N is a 462// raw candidate. Create an entry in a map at that point from vK to 463// its use count. Continue the walk, looking for uses of vK: when we 464// see one, record it in a side table as an upwards exposed use of N. 465// Each time we see a use, decrement the use count in the map, and if 466// we hit zero, remove the map entry. If we hit the end of the basic 467// block and we still have map entries, then evict the name in 468// question from the candidate set. 469func (cs *cstate) populateIndirectUseTable(cands []*ir.Name) ([]*ir.Name, []candRegion) { 470 471 // main indirect UE table, this is what we're producing in this func 472 indirectUE := make(map[ssa.ID][]*ir.Name) 473 474 // this map holds the current set of candidates; the set may 475 // shrink if we have to evict any candidates. 476 rawcands := make(map[*ir.Name]struct{}) 477 478 // maps ssa value V to the ir.Name it is taking the addr of, 479 // plus a count of the uses we've seen of V during a block walk. 480 pendingUses := make(map[ssa.ID]nameCount) 481 482 // A temporary indirect UE tab just for the current block 483 // being processed; used to help with evictions. 484 blockIndirectUE := make(map[ssa.ID][]*ir.Name) 485 486 // temporary map used to record evictions in a given block. 487 evicted := make(map[*ir.Name]bool) 488 for _, n := range cands { 489 rawcands[n] = struct{}{} 490 } 491 for k := 0; k < len(cs.f.Blocks); k++ { 492 genmapclear(pendingUses) 493 genmapclear(blockIndirectUE) 494 b := cs.f.Blocks[k] 495 for _, v := range b.Values { 496 if n, e := affectedVar(v); n != nil { 497 if _, ok := rawcands[n]; ok { 498 if e&ssa.SymAddr != 0 && v.Uses != 0 { 499 // we're taking the address of candidate var n 500 if _, ok := pendingUses[v.ID]; ok { 501 // should never happen 502 base.FatalfAt(v.Pos, "internal error: apparent multiple defs for SSA value %d", v.ID) 503 } 504 // Stash an entry in pendingUses recording 505 // that we took the address of "n" via this 506 // val. 507 pendingUses[v.ID] = nameCount{n: n, count: v.Uses} 508 if cs.trace > 2 { 509 fmt.Fprintf(os.Stderr, "=-= SymAddr(%s) on %s\n", 510 n.Sym().Name, v.LongString()) 511 } 512 } 513 } 514 } 515 for _, arg := range v.Args { 516 if nc, ok := pendingUses[arg.ID]; ok { 517 // We found a use of some value that took the 518 // address of nc.n. Record this inst as a 519 // potential indirect use. 520 if cs.trace > 2 { 521 fmt.Fprintf(os.Stderr, "=-= add indirectUE(%s) count=%d on %s\n", nc.n.Sym().Name, nc.count, v.LongString()) 522 } 523 blockIndirectUE[v.ID] = append(blockIndirectUE[v.ID], nc.n) 524 nc.count-- 525 if nc.count == 0 { 526 // That was the last use of the value. Clean 527 // up the entry in pendingUses. 528 if cs.trace > 2 { 529 fmt.Fprintf(os.Stderr, "=-= last use of v%d\n", 530 arg.ID) 531 } 532 delete(pendingUses, arg.ID) 533 } else { 534 // Not the last use; record the decremented 535 // use count and move on. 536 pendingUses[arg.ID] = nc 537 } 538 } 539 } 540 } 541 542 // We've reached the end of this basic block: if we have any 543 // leftover entries in pendingUses, then evict the 544 // corresponding names from the candidate set. The idea here 545 // is that if we materialized the address of some local and 546 // that value is flowing out of the block off somewhere else, 547 // we're going to treat that local as truly address-taken and 548 // not have it be a merge candidate. 549 genmapclear(evicted) 550 if len(pendingUses) != 0 { 551 for id, nc := range pendingUses { 552 if cs.trace > 2 { 553 fmt.Fprintf(os.Stderr, "=-= evicting %q due to pendingUse %d count %d\n", nc.n.Sym().Name, id, nc.count) 554 } 555 delete(rawcands, nc.n) 556 evicted[nc.n] = true 557 } 558 } 559 // Copy entries from blockIndirectUE into final indirectUE. Skip 560 // anything that we evicted in the loop above. 561 for id, sl := range blockIndirectUE { 562 for _, n := range sl { 563 if evicted[n] { 564 continue 565 } 566 indirectUE[id] = append(indirectUE[id], n) 567 if cs.trace > 2 { 568 fmt.Fprintf(os.Stderr, "=-= add final indUE v%d name %s\n", id, n.Sym().Name) 569 } 570 } 571 } 572 } 573 if len(rawcands) < 2 { 574 return nil, nil 575 } 576 cs.indirectUE = indirectUE 577 if cs.trace > 2 { 578 fmt.Fprintf(os.Stderr, "=-= iuetab:\n") 579 ids := make([]ssa.ID, 0, len(indirectUE)) 580 for k := range indirectUE { 581 ids = append(ids, k) 582 } 583 sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] }) 584 for _, id := range ids { 585 fmt.Fprintf(os.Stderr, " v%d:", id) 586 for _, n := range indirectUE[id] { 587 fmt.Fprintf(os.Stderr, " %s", n.Sym().Name) 588 } 589 fmt.Fprintf(os.Stderr, "\n") 590 } 591 } 592 593 pruned := cands[:0] 594 for k := range rawcands { 595 pruned = append(pruned, k) 596 } 597 sort.Slice(pruned, func(i, j int) bool { 598 return nameLess(pruned[i], pruned[j]) 599 }) 600 var regions []candRegion 601 pruned, regions = cs.genRegions(pruned) 602 if len(pruned) < 2 { 603 return nil, nil 604 } 605 return pruned, regions 606} 607 608// FIXME: bootstrap tool compiler is build with a "go 1.20" go.mod, so 609// we are not allowed to use map clear yet. Use this helper instead. 610func genmapclear[KT comparable, VT any](m map[KT]VT) { 611 for k := range m { 612 delete(m, k) 613 } 614} 615 616type nameCount struct { 617 n *ir.Name 618 count int32 619} 620 621// nameLess compares ci with cj to see if ci should be less than cj in 622// a relative ordering of candidate variables. This is used to sort 623// vars by pointerness (variables with pointers first), then in order 624// of decreasing alignment, then by decreasing size. We are assuming a 625// merging algorithm that merges later entries in the list into 626// earlier entries. An example ordered candidate list produced by 627// nameLess: 628// 629// idx name type align size 630// 0: abc [10]*int 8 80 631// 1: xyz [9]*int 8 72 632// 2: qrs [2]*int 8 16 633// 3: tuv [9]int 8 72 634// 4: wxy [9]int32 4 36 635// 5: jkl [8]int32 4 32 636func nameLess(ci, cj *ir.Name) bool { 637 if ci.Type().HasPointers() != cj.Type().HasPointers() { 638 return ci.Type().HasPointers() 639 } 640 if ci.Type().Alignment() != cj.Type().Alignment() { 641 return cj.Type().Alignment() < ci.Type().Alignment() 642 } 643 if ci.Type().Size() != cj.Type().Size() { 644 return cj.Type().Size() < ci.Type().Size() 645 } 646 if ci.Sym().Name != cj.Sym().Name { 647 return ci.Sym().Name < cj.Sym().Name 648 } 649 return fmt.Sprintf("%v", ci.Pos()) < fmt.Sprintf("%v", cj.Pos()) 650} 651 652// nextRegion starts at location idx and walks forward in the cands 653// slice looking for variables that are "compatible" (potentially 654// overlappable, in the sense that they could potentially share the 655// stack slot of cands[idx]); it returns the end of the new region 656// (range of compatible variables starting at idx). 657func nextRegion(cands []*ir.Name, idx int) int { 658 n := len(cands) 659 if idx >= n { 660 return -1 661 } 662 c0 := cands[idx] 663 szprev := c0.Type().Size() 664 alnprev := c0.Type().Alignment() 665 for j := idx + 1; j < n; j++ { 666 cj := cands[j] 667 szj := cj.Type().Size() 668 if szj > szprev { 669 return j - 1 670 } 671 alnj := cj.Type().Alignment() 672 if alnj > alnprev { 673 return j - 1 674 } 675 szprev = szj 676 alnprev = alnj 677 } 678 return n - 1 679} 680 681// mergeVisitRegion tries to perform overlapping of variables with a 682// given subrange of cands described by st and en (indices into our 683// candidate var list), where the variables within this range have 684// already been determined to be compatible with respect to type, 685// size, etc. Overlapping is done in a a greedy fashion: we select the 686// first element in the st->en range, then walk the rest of the 687// elements adding in vars whose lifetimes don't overlap with the 688// first element, then repeat the process until we run out of work. 689// Ordering of the candidates within the region [st,en] is important; 690// within the list the assumption is that if we overlap two variables 691// X and Y where X precedes Y in the list, we need to make X the 692// "leader" (keep X's slot and set Y's frame offset to X's) as opposed 693// to the other way around, since it's possible that Y is smaller in 694// size than X. 695func (cs *cstate) mergeVisitRegion(mls *MergeLocalsState, st, en int) { 696 if cs.trace > 1 { 697 fmt.Fprintf(os.Stderr, "=-= mergeVisitRegion(st=%d, en=%d)\n", st, en) 698 } 699 n := en - st + 1 700 used := bitvec.New(int32(n)) 701 702 nxt := func(slot int) int { 703 for c := slot - st; c < n; c++ { 704 if used.Get(int32(c)) { 705 continue 706 } 707 return c + st 708 } 709 return -1 710 } 711 712 navail := n 713 cands := cs.cands 714 ivs := cs.ivs 715 if cs.trace > 1 { 716 fmt.Fprintf(os.Stderr, " =-= navail = %d\n", navail) 717 } 718 for navail >= 2 { 719 leader := nxt(st) 720 used.Set(int32(leader - st)) 721 navail-- 722 723 if cs.trace > 1 { 724 fmt.Fprintf(os.Stderr, " =-= begin leader %d used=%s\n", leader, 725 used.String()) 726 } 727 elems := []int{leader} 728 lints := ivs[leader] 729 730 for succ := nxt(leader + 1); succ != -1; succ = nxt(succ + 1) { 731 732 // Skip if de-selected by merge locals hash. 733 if cs.hashDeselected != nil && cs.hashDeselected[cands[succ]] { 734 continue 735 } 736 // Skip if already used. 737 if used.Get(int32(succ - st)) { 738 continue 739 } 740 if cs.trace > 1 { 741 fmt.Fprintf(os.Stderr, " =-= overlap of %d[%v] {%s} with %d[%v] {%s} is: %v\n", leader, cands[leader], lints.String(), succ, cands[succ], ivs[succ].String(), lints.Overlaps(ivs[succ])) 742 } 743 744 // Can we overlap leader with this var? 745 if lints.Overlaps(ivs[succ]) { 746 continue 747 } else { 748 // Add to overlap set. 749 elems = append(elems, succ) 750 lints = lints.Merge(ivs[succ]) 751 } 752 } 753 if len(elems) > 1 { 754 // We found some things to overlap with leader. Add the 755 // candidate elements to "vars" and update "partition". 756 off := len(mls.vars) 757 sl := make([]int, len(elems)) 758 for i, candslot := range elems { 759 sl[i] = off + i 760 mls.vars = append(mls.vars, cands[candslot]) 761 mls.partition[cands[candslot]] = sl 762 } 763 navail -= (len(elems) - 1) 764 for i := range elems { 765 used.Set(int32(elems[i] - st)) 766 } 767 if cs.trace > 1 { 768 fmt.Fprintf(os.Stderr, "=-= overlapping %+v:\n", sl) 769 for i := range sl { 770 dumpCand(mls.vars[sl[i]], sl[i]) 771 } 772 for i, v := range elems { 773 fmt.Fprintf(os.Stderr, "=-= %d: sl=%d %s\n", i, v, ivs[v]) 774 } 775 } 776 } 777 } 778} 779 780// performMerging carries out variable merging within each of the 781// candidate ranges in regions, returning a state object 782// that describes the variable overlaps. 783func (cs *cstate) performMerging() *MergeLocalsState { 784 cands := cs.cands 785 786 mls := &MergeLocalsState{ 787 partition: make(map[*ir.Name][]int), 788 } 789 790 // Dump state before attempting overlap. 791 if cs.trace > 1 { 792 fmt.Fprintf(os.Stderr, "=-= cands live before overlap:\n") 793 for i := range cands { 794 c := cands[i] 795 fmt.Fprintf(os.Stderr, "%d: %v sz=%d ivs=%s\n", 796 i, c.Sym().Name, c.Type().Size(), cs.ivs[i].String()) 797 } 798 fmt.Fprintf(os.Stderr, "=-= regions (%d): ", len(cs.regions)) 799 for _, cr := range cs.regions { 800 fmt.Fprintf(os.Stderr, " [%d,%d]", cr.st, cr.en) 801 } 802 fmt.Fprintf(os.Stderr, "\n") 803 } 804 805 // Apply a greedy merge/overlap strategy within each region 806 // of compatible variables. 807 for _, cr := range cs.regions { 808 cs.mergeVisitRegion(mls, cr.st, cr.en) 809 } 810 if len(mls.vars) == 0 { 811 return nil 812 } 813 return mls 814} 815 816// computeIntervals performs a backwards sweep over the instructions 817// of the function we're compiling, building up an Intervals object 818// for each candidate variable by looking for upwards exposed uses 819// and kills. 820func (cs *cstate) computeIntervals() { 821 lv := cs.lv 822 ibuilders := make([]IntervalsBuilder, len(cs.cands)) 823 nvars := int32(len(lv.vars)) 824 liveout := bitvec.New(nvars) 825 826 cs.dumpFuncIfSelected() 827 828 // Count instructions. 829 ninstr := 0 830 for _, b := range lv.f.Blocks { 831 ninstr += len(b.Values) 832 } 833 // current instruction index during backwards walk 834 iidx := ninstr - 1 835 836 // Make a backwards pass over all blocks 837 for k := len(lv.f.Blocks) - 1; k >= 0; k-- { 838 b := lv.f.Blocks[k] 839 be := lv.blockEffects(b) 840 841 if cs.trace > 2 { 842 fmt.Fprintf(os.Stderr, "=-= liveout from tail of b%d: ", k) 843 for j := range lv.vars { 844 if be.liveout.Get(int32(j)) { 845 fmt.Fprintf(os.Stderr, " %q", lv.vars[j].Sym().Name) 846 } 847 } 848 fmt.Fprintf(os.Stderr, "\n") 849 } 850 851 // Take into account effects taking place at end of this basic 852 // block by comparing our current live set with liveout for 853 // the block. If a given var was not live before and is now 854 // becoming live we need to mark this transition with a 855 // builder "Live" call; similarly if a var was live before and 856 // is now no longer live, we need a "Kill" call. 857 for j := range lv.vars { 858 isLive := liveout.Get(int32(j)) 859 blockLiveOut := be.liveout.Get(int32(j)) 860 if isLive { 861 if !blockLiveOut { 862 if cs.trace > 2 { 863 fmt.Fprintf(os.Stderr, "=+= at instr %d block boundary kill of %v\n", iidx, lv.vars[j]) 864 } 865 ibuilders[j].Kill(iidx) 866 } 867 } else if blockLiveOut { 868 if cs.trace > 2 { 869 fmt.Fprintf(os.Stderr, "=+= at block-end instr %d %v becomes live\n", 870 iidx, lv.vars[j]) 871 } 872 ibuilders[j].Live(iidx) 873 } 874 } 875 876 // Set our working "currently live" set to the previously 877 // computed live out set for the block. 878 liveout.Copy(be.liveout) 879 880 // Now walk backwards through this block. 881 for i := len(b.Values) - 1; i >= 0; i-- { 882 v := b.Values[i] 883 884 if cs.trace > 2 { 885 fmt.Fprintf(os.Stderr, "=-= b%d instr %d: %s\n", k, iidx, v.LongString()) 886 } 887 888 // Update liveness based on what we see happening in this 889 // instruction. 890 pos, e := lv.valueEffects(v) 891 becomeslive := e&uevar != 0 892 iskilled := e&varkill != 0 893 if becomeslive && iskilled { 894 // we do not ever expect to see both a kill and an 895 // upwards exposed use given our size constraints. 896 panic("should never happen") 897 } 898 if iskilled && liveout.Get(pos) { 899 ibuilders[pos].Kill(iidx) 900 liveout.Unset(pos) 901 if cs.trace > 2 { 902 fmt.Fprintf(os.Stderr, "=+= at instr %d kill of %v\n", 903 iidx, lv.vars[pos]) 904 } 905 } else if becomeslive && !liveout.Get(pos) { 906 ibuilders[pos].Live(iidx) 907 liveout.Set(pos) 908 if cs.trace > 2 { 909 fmt.Fprintf(os.Stderr, "=+= at instr %d upwards-exposed use of %v\n", 910 iidx, lv.vars[pos]) 911 } 912 } 913 914 if cs.indirectUE != nil { 915 // Now handle "indirect" upwards-exposed uses. 916 ues := cs.indirectUE[v.ID] 917 for _, n := range ues { 918 if pos, ok := lv.idx[n]; ok { 919 if !liveout.Get(pos) { 920 ibuilders[pos].Live(iidx) 921 liveout.Set(pos) 922 if cs.trace > 2 { 923 fmt.Fprintf(os.Stderr, "=+= at instr %d v%d indirect upwards-exposed use of %v\n", iidx, v.ID, lv.vars[pos]) 924 } 925 } 926 } 927 } 928 } 929 iidx-- 930 } 931 932 // This check disabled for now due to the way scheduling works 933 // for ops that materialize values of local variables. For 934 // many architecture we have rewrite rules of this form: 935 // 936 // (LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVDaddr {sym} (SPanchored base mem)) 937 // (LocalAddr <t> {sym} base _) && !t.Elem().HasPointers() => (MOVDaddr {sym} base) 938 // 939 // which are designed to ensure that if you have a pointerful 940 // variable "abc" sequence 941 // 942 // v30 = VarDef <mem> {abc} v21 943 // v31 = LocalAddr <*SB> {abc} v2 v30 944 // v32 = Zero <mem> {SB} [2056] v31 v30 945 // 946 // this will be lowered into 947 // 948 // v30 = VarDef <mem> {sb} v21 949 // v106 = SPanchored <uintptr> v2 v30 950 // v31 = MOVDaddr <*SB> {sb} v106 951 // v3 = DUFFZERO <mem> [2056] v31 v30 952 // 953 // Note the SPanchored: this ensures that the scheduler won't 954 // move the MOVDaddr earlier than the vardef. With a variable 955 // "xyz" that has no pointers, howver, if we start with 956 // 957 // v66 = VarDef <mem> {t2} v65 958 // v67 = LocalAddr <*T> {t2} v2 v66 959 // v68 = Zero <mem> {T} [2056] v67 v66 960 // 961 // we might lower to 962 // 963 // v66 = VarDef <mem> {t2} v65 964 // v29 = MOVDaddr <*T> {t2} [2032] v2 965 // v43 = LoweredZero <mem> v67 v29 v66 966 // v68 = Zero [2056] v2 v43 967 // 968 // where that MOVDaddr can float around arbitrarily, meaning 969 // that we may see an upwards-exposed use to it before the 970 // VarDef. 971 // 972 // One avenue to restoring the check below would be to change 973 // the rewrite rules to something like 974 // 975 // (LocalAddr <t> {sym} base mem) && (t.Elem().HasPointers() || isMergeCandidate(t) => (MOVDaddr {sym} (SPanchored base mem)) 976 // 977 // however that change will have to be carefully evaluated, 978 // since it would constrain the scheduler for _all_ LocalAddr 979 // ops for potential merge candidates, even if we don't 980 // actually succeed in any overlaps. This will be revisitged in 981 // a later CL if possible. 982 // 983 const checkLiveOnEntry = false 984 if checkLiveOnEntry && b == lv.f.Entry { 985 for j, v := range lv.vars { 986 if liveout.Get(int32(j)) { 987 lv.f.Fatalf("%v %L recorded as live on entry", 988 lv.fn.Nname, v) 989 } 990 } 991 } 992 } 993 if iidx != -1 { 994 panic("iidx underflow") 995 } 996 997 // Finish intervals construction. 998 ivs := make([]Intervals, len(cs.cands)) 999 for i := range cs.cands { 1000 var err error 1001 ivs[i], err = ibuilders[i].Finish() 1002 if err != nil { 1003 cs.dumpFunc() 1004 base.FatalfAt(cs.cands[i].Pos(), "interval construct error for var %q in func %q (%d instrs): %v", cs.cands[i].Sym().Name, ir.FuncName(cs.fn), ninstr, err) 1005 } 1006 } 1007 cs.ivs = ivs 1008} 1009 1010func fmtFullPos(p src.XPos) string { 1011 var sb strings.Builder 1012 sep := "" 1013 base.Ctxt.AllPos(p, func(pos src.Pos) { 1014 fmt.Fprintf(&sb, sep) 1015 sep = "|" 1016 file := filepath.Base(pos.Filename()) 1017 fmt.Fprintf(&sb, "%s:%d:%d", file, pos.Line(), pos.Col()) 1018 }) 1019 return sb.String() 1020} 1021 1022func dumpCand(c *ir.Name, i int) { 1023 fmt.Fprintf(os.Stderr, " %d: %s %q sz=%d hp=%v align=%d t=%v\n", 1024 i, fmtFullPos(c.Pos()), c.Sym().Name, c.Type().Size(), 1025 c.Type().HasPointers(), c.Type().Alignment(), c.Type()) 1026} 1027 1028// for unit testing only. 1029func MakeMergeLocalsState(partition map[*ir.Name][]int, vars []*ir.Name) (*MergeLocalsState, error) { 1030 mls := &MergeLocalsState{partition: partition, vars: vars} 1031 if err := mls.check(); err != nil { 1032 return nil, err 1033 } 1034 return mls, nil 1035} 1036