1// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package inlheur
6
7import (
8	"cmd/compile/internal/base"
9	"cmd/compile/internal/ir"
10	"cmd/compile/internal/pgoir"
11	"cmd/compile/internal/types"
12	"fmt"
13	"os"
14	"sort"
15	"strconv"
16	"strings"
17)
18
19// These constants enumerate the set of possible ways/scenarios
20// in which we'll adjust the score of a given callsite.
21type scoreAdjustTyp uint
22
23// These constants capture the various ways in which the inliner's
24// scoring phase can adjust a callsite score based on heuristics. They
25// fall broadly into three categories:
26//
27// 1) adjustments based solely on the callsite context (ex: call
28// appears on panic path)
29//
30// 2) adjustments that take into account specific interesting values
31// passed at a call site (ex: passing a constant that could result in
32// cprop/deadcode in the caller)
33//
34// 3) adjustments that take into account values returned from the call
35// at a callsite (ex: call always returns the same inlinable function,
36// and return value flows unmodified into an indirect call)
37//
38// For categories 2 and 3 above, each adjustment can have either a
39// "must" version and a "may" version (but not both). Here the idea is
40// that in the "must" version the value flow is unconditional: if the
41// callsite executes, then the condition we're interested in (ex:
42// param feeding call) is guaranteed to happen. For the "may" version,
43// there may be control flow that could cause the benefit to be
44// bypassed.
45const (
46	// Category 1 adjustments (see above)
47	panicPathAdj scoreAdjustTyp = (1 << iota)
48	initFuncAdj
49	inLoopAdj
50
51	// Category 2 adjustments (see above).
52	passConstToIfAdj
53	passConstToNestedIfAdj
54	passConcreteToItfCallAdj
55	passConcreteToNestedItfCallAdj
56	passFuncToIndCallAdj
57	passFuncToNestedIndCallAdj
58	passInlinableFuncToIndCallAdj
59	passInlinableFuncToNestedIndCallAdj
60
61	// Category 3 adjustments.
62	returnFeedsConstToIfAdj
63	returnFeedsFuncToIndCallAdj
64	returnFeedsInlinableFuncToIndCallAdj
65	returnFeedsConcreteToInterfaceCallAdj
66
67	sentinelScoreAdj // sentinel; not a real adjustment
68)
69
70// This table records the specific values we use to adjust call
71// site scores in a given scenario.
72// NOTE: these numbers are chosen very arbitrarily; ideally
73// we will go through some sort of turning process to decide
74// what value for each one produces the best performance.
75
76var adjValues = map[scoreAdjustTyp]int{
77	panicPathAdj:                          40,
78	initFuncAdj:                           20,
79	inLoopAdj:                             -5,
80	passConstToIfAdj:                      -20,
81	passConstToNestedIfAdj:                -15,
82	passConcreteToItfCallAdj:              -30,
83	passConcreteToNestedItfCallAdj:        -25,
84	passFuncToIndCallAdj:                  -25,
85	passFuncToNestedIndCallAdj:            -20,
86	passInlinableFuncToIndCallAdj:         -45,
87	passInlinableFuncToNestedIndCallAdj:   -40,
88	returnFeedsConstToIfAdj:               -15,
89	returnFeedsFuncToIndCallAdj:           -25,
90	returnFeedsInlinableFuncToIndCallAdj:  -40,
91	returnFeedsConcreteToInterfaceCallAdj: -25,
92}
93
94// SetupScoreAdjustments interprets the value of the -d=inlscoreadj
95// debugging option, if set. The value of this flag is expected to be
96// a series of "/"-separated clauses of the form adj1:value1. Example:
97// -d=inlscoreadj=inLoopAdj=0/passConstToIfAdj=-99
98func SetupScoreAdjustments() {
99	if base.Debug.InlScoreAdj == "" {
100		return
101	}
102	if err := parseScoreAdj(base.Debug.InlScoreAdj); err != nil {
103		base.Fatalf("malformed -d=inlscoreadj argument %q: %v",
104			base.Debug.InlScoreAdj, err)
105	}
106}
107
108func adjStringToVal(s string) (scoreAdjustTyp, bool) {
109	for adj := scoreAdjustTyp(1); adj < sentinelScoreAdj; adj <<= 1 {
110		if adj.String() == s {
111			return adj, true
112		}
113	}
114	return 0, false
115}
116
117func parseScoreAdj(val string) error {
118	clauses := strings.Split(val, "/")
119	if len(clauses) == 0 {
120		return fmt.Errorf("no clauses")
121	}
122	for _, clause := range clauses {
123		elems := strings.Split(clause, ":")
124		if len(elems) < 2 {
125			return fmt.Errorf("clause %q: expected colon", clause)
126		}
127		if len(elems) != 2 {
128			return fmt.Errorf("clause %q has %d elements, wanted 2", clause,
129				len(elems))
130		}
131		adj, ok := adjStringToVal(elems[0])
132		if !ok {
133			return fmt.Errorf("clause %q: unknown adjustment", clause)
134		}
135		val, err := strconv.Atoi(elems[1])
136		if err != nil {
137			return fmt.Errorf("clause %q: malformed value: %v", clause, err)
138		}
139		adjValues[adj] = val
140	}
141	return nil
142}
143
144func adjValue(x scoreAdjustTyp) int {
145	if val, ok := adjValues[x]; ok {
146		return val
147	} else {
148		panic("internal error unregistered adjustment type")
149	}
150}
151
152var mayMustAdj = [...]struct{ may, must scoreAdjustTyp }{
153	{may: passConstToNestedIfAdj, must: passConstToIfAdj},
154	{may: passConcreteToNestedItfCallAdj, must: passConcreteToItfCallAdj},
155	{may: passFuncToNestedIndCallAdj, must: passFuncToNestedIndCallAdj},
156	{may: passInlinableFuncToNestedIndCallAdj, must: passInlinableFuncToIndCallAdj},
157}
158
159func isMay(x scoreAdjustTyp) bool {
160	return mayToMust(x) != 0
161}
162
163func isMust(x scoreAdjustTyp) bool {
164	return mustToMay(x) != 0
165}
166
167func mayToMust(x scoreAdjustTyp) scoreAdjustTyp {
168	for _, v := range mayMustAdj {
169		if x == v.may {
170			return v.must
171		}
172	}
173	return 0
174}
175
176func mustToMay(x scoreAdjustTyp) scoreAdjustTyp {
177	for _, v := range mayMustAdj {
178		if x == v.must {
179			return v.may
180		}
181	}
182	return 0
183}
184
185// computeCallSiteScore takes a given call site whose ir node is
186// 'call' and callee function is 'callee' and with previously computed
187// call site properties 'csflags', then computes a score for the
188// callsite that combines the size cost of the callee with heuristics
189// based on previously computed argument and function properties,
190// then stores the score and the adjustment mask in the appropriate
191// fields in 'cs'
192func (cs *CallSite) computeCallSiteScore(csa *callSiteAnalyzer, calleeProps *FuncProps) {
193	callee := cs.Callee
194	csflags := cs.Flags
195	call := cs.Call
196
197	// Start with the size-based score for the callee.
198	score := int(callee.Inl.Cost)
199	var tmask scoreAdjustTyp
200
201	if debugTrace&debugTraceScoring != 0 {
202		fmt.Fprintf(os.Stderr, "=-= scoring call to %s at %s , initial=%d\n",
203			callee.Sym().Name, fmtFullPos(call.Pos()), score)
204	}
205
206	// First some score adjustments to discourage inlining in selected cases.
207	if csflags&CallSiteOnPanicPath != 0 {
208		score, tmask = adjustScore(panicPathAdj, score, tmask)
209	}
210	if csflags&CallSiteInInitFunc != 0 {
211		score, tmask = adjustScore(initFuncAdj, score, tmask)
212	}
213
214	// Then adjustments to encourage inlining in selected cases.
215	if csflags&CallSiteInLoop != 0 {
216		score, tmask = adjustScore(inLoopAdj, score, tmask)
217	}
218
219	// Stop here if no callee props.
220	if calleeProps == nil {
221		cs.Score, cs.ScoreMask = score, tmask
222		return
223	}
224
225	// Walk through the actual expressions being passed at the call.
226	calleeRecvrParms := callee.Type().RecvParams()
227	for idx := range call.Args {
228		// ignore blanks
229		if calleeRecvrParms[idx].Sym == nil ||
230			calleeRecvrParms[idx].Sym.IsBlank() {
231			continue
232		}
233		arg := call.Args[idx]
234		pflag := calleeProps.ParamFlags[idx]
235		if debugTrace&debugTraceScoring != 0 {
236			fmt.Fprintf(os.Stderr, "=-= arg %d of %d: val %v flags=%s\n",
237				idx, len(call.Args), arg, pflag.String())
238		}
239
240		if len(cs.ArgProps) == 0 {
241			continue
242		}
243		argProps := cs.ArgProps[idx]
244
245		if debugTrace&debugTraceScoring != 0 {
246			fmt.Fprintf(os.Stderr, "=-= arg %d props %s value %v\n",
247				idx, argProps.String(), arg)
248		}
249
250		if argProps&ActualExprConstant != 0 {
251			if pflag&ParamMayFeedIfOrSwitch != 0 {
252				score, tmask = adjustScore(passConstToNestedIfAdj, score, tmask)
253			}
254			if pflag&ParamFeedsIfOrSwitch != 0 {
255				score, tmask = adjustScore(passConstToIfAdj, score, tmask)
256			}
257		}
258
259		if argProps&ActualExprIsConcreteConvIface != 0 {
260			// FIXME: ideally here it would be nice to make a
261			// distinction between the inlinable case and the
262			// non-inlinable case, but this is hard to do. Example:
263			//
264			//    type I interface { Tiny() int; Giant() }
265			//    type Conc struct { x int }
266			//    func (c *Conc) Tiny() int { return 42 }
267			//    func (c *Conc) Giant() { <huge amounts of code> }
268			//
269			//    func passConcToItf(c *Conc) {
270			//        makesItfMethodCall(c)
271			//    }
272			//
273			// In the code above, function properties will only tell
274			// us that 'makesItfMethodCall' invokes a method on its
275			// interface parameter, but we don't know whether it calls
276			// "Tiny" or "Giant". If we knew if called "Tiny", then in
277			// theory in addition to converting the interface call to
278			// a direct call, we could also inline (in which case
279			// we'd want to decrease the score even more).
280			//
281			// One thing we could do (not yet implemented) is iterate
282			// through all of the methods of "*Conc" that allow it to
283			// satisfy I, and if all are inlinable, then exploit that.
284			if pflag&ParamMayFeedInterfaceMethodCall != 0 {
285				score, tmask = adjustScore(passConcreteToNestedItfCallAdj, score, tmask)
286			}
287			if pflag&ParamFeedsInterfaceMethodCall != 0 {
288				score, tmask = adjustScore(passConcreteToItfCallAdj, score, tmask)
289			}
290		}
291
292		if argProps&(ActualExprIsFunc|ActualExprIsInlinableFunc) != 0 {
293			mayadj := passFuncToNestedIndCallAdj
294			mustadj := passFuncToIndCallAdj
295			if argProps&ActualExprIsInlinableFunc != 0 {
296				mayadj = passInlinableFuncToNestedIndCallAdj
297				mustadj = passInlinableFuncToIndCallAdj
298			}
299			if pflag&ParamMayFeedIndirectCall != 0 {
300				score, tmask = adjustScore(mayadj, score, tmask)
301			}
302			if pflag&ParamFeedsIndirectCall != 0 {
303				score, tmask = adjustScore(mustadj, score, tmask)
304			}
305		}
306	}
307
308	cs.Score, cs.ScoreMask = score, tmask
309}
310
311func adjustScore(typ scoreAdjustTyp, score int, mask scoreAdjustTyp) (int, scoreAdjustTyp) {
312
313	if isMust(typ) {
314		if mask&typ != 0 {
315			return score, mask
316		}
317		may := mustToMay(typ)
318		if mask&may != 0 {
319			// promote may to must, so undo may
320			score -= adjValue(may)
321			mask &^= may
322		}
323	} else if isMay(typ) {
324		must := mayToMust(typ)
325		if mask&(must|typ) != 0 {
326			return score, mask
327		}
328	}
329	if mask&typ == 0 {
330		if debugTrace&debugTraceScoring != 0 {
331			fmt.Fprintf(os.Stderr, "=-= applying adj %d for %s\n",
332				adjValue(typ), typ.String())
333		}
334		score += adjValue(typ)
335		mask |= typ
336	}
337	return score, mask
338}
339
340var resultFlagToPositiveAdj map[ResultPropBits]scoreAdjustTyp
341var paramFlagToPositiveAdj map[ParamPropBits]scoreAdjustTyp
342
343func setupFlagToAdjMaps() {
344	resultFlagToPositiveAdj = map[ResultPropBits]scoreAdjustTyp{
345		ResultIsAllocatedMem:     returnFeedsConcreteToInterfaceCallAdj,
346		ResultAlwaysSameFunc:     returnFeedsFuncToIndCallAdj,
347		ResultAlwaysSameConstant: returnFeedsConstToIfAdj,
348	}
349	paramFlagToPositiveAdj = map[ParamPropBits]scoreAdjustTyp{
350		ParamMayFeedInterfaceMethodCall: passConcreteToNestedItfCallAdj,
351		ParamFeedsInterfaceMethodCall:   passConcreteToItfCallAdj,
352		ParamMayFeedIndirectCall:        passInlinableFuncToNestedIndCallAdj,
353		ParamFeedsIndirectCall:          passInlinableFuncToIndCallAdj,
354	}
355}
356
357// LargestNegativeScoreAdjustment tries to estimate the largest possible
358// negative score adjustment that could be applied to a call of the
359// function with the specified props. Example:
360//
361//	func foo() {                  func bar(x int, p *int) int {
362//	   ...                          if x < 0 { *p = x }
363//	}                               return 99
364//	                              }
365//
366// Function 'foo' above on the left has no interesting properties,
367// thus as a result the most we'll adjust any call to is the value for
368// "call in loop". If the calculated cost of the function is 150, and
369// the in-loop adjustment is 5 (for example), then there is not much
370// point treating it as inlinable. On the other hand "bar" has a param
371// property (parameter "x" feeds unmodified to an "if" statement") and
372// a return property (always returns same constant) meaning that a
373// given call _could_ be rescored down as much as -35 points-- thus if
374// the size of "bar" is 100 (for example) then there is at least a
375// chance that scoring will enable inlining.
376func LargestNegativeScoreAdjustment(fn *ir.Func, props *FuncProps) int {
377	if resultFlagToPositiveAdj == nil {
378		setupFlagToAdjMaps()
379	}
380	var tmask scoreAdjustTyp
381	score := adjValues[inLoopAdj] // any call can be in a loop
382	for _, pf := range props.ParamFlags {
383		if adj, ok := paramFlagToPositiveAdj[pf]; ok {
384			score, tmask = adjustScore(adj, score, tmask)
385		}
386	}
387	for _, rf := range props.ResultFlags {
388		if adj, ok := resultFlagToPositiveAdj[rf]; ok {
389			score, tmask = adjustScore(adj, score, tmask)
390		}
391	}
392
393	if debugTrace&debugTraceScoring != 0 {
394		fmt.Fprintf(os.Stderr, "=-= largestScore(%v) is %d\n",
395			fn, score)
396	}
397
398	return score
399}
400
401// LargestPositiveScoreAdjustment tries to estimate the largest possible
402// positive score adjustment that could be applied to a given callsite.
403// At the moment we don't have very many positive score adjustments, so
404// this is just hard-coded, not table-driven.
405func LargestPositiveScoreAdjustment(fn *ir.Func) int {
406	return adjValues[panicPathAdj] + adjValues[initFuncAdj]
407}
408
409// callSiteTab contains entries for each call in the function
410// currently being processed by InlineCalls; this variable will either
411// be set to 'cstabCache' below (for non-inlinable routines) or to the
412// local 'cstab' entry in the fnInlHeur object for inlinable routines.
413//
414// NOTE: this assumes that inlining operations are happening in a serial,
415// single-threaded fashion,f which is true today but probably won't hold
416// in the future (for example, we might want to score the callsites
417// in multiple functions in parallel); if the inliner evolves in this
418// direction we'll need to come up with a different approach here.
419var callSiteTab CallSiteTab
420
421// scoreCallsCache caches a call site table and call site list between
422// invocations of ScoreCalls so that we can reuse previously allocated
423// storage.
424var scoreCallsCache scoreCallsCacheType
425
426type scoreCallsCacheType struct {
427	tab CallSiteTab
428	csl []*CallSite
429}
430
431// ScoreCalls assigns numeric scores to each of the callsites in
432// function 'fn'; the lower the score, the more helpful we think it
433// will be to inline.
434//
435// Unlike a lot of the other inline heuristics machinery, callsite
436// scoring can't be done as part of the CanInline call for a function,
437// due to fact that we may be working on a non-trivial SCC. So for
438// example with this SCC:
439//
440//	func foo(x int) {           func bar(x int, f func()) {
441//	  if x != 0 {                  f()
442//	    bar(x, func(){})           foo(x-1)
443//	  }                         }
444//	}
445//
446// We don't want to perform scoring for the 'foo' call in "bar" until
447// after foo has been analyzed, but it's conceivable that CanInline
448// might visit bar before foo for this SCC.
449func ScoreCalls(fn *ir.Func) {
450	if len(fn.Body) == 0 {
451		return
452	}
453	enableDebugTraceIfEnv()
454
455	nameFinder := newNameFinder(fn)
456
457	if debugTrace&debugTraceScoring != 0 {
458		fmt.Fprintf(os.Stderr, "=-= ScoreCalls(%v)\n", ir.FuncName(fn))
459	}
460
461	// If this is an inlinable function, use the precomputed
462	// call site table for it. If the function wasn't an inline
463	// candidate, collect a callsite table for it now.
464	var cstab CallSiteTab
465	if funcInlHeur, ok := fpmap[fn]; ok {
466		cstab = funcInlHeur.cstab
467	} else {
468		if len(scoreCallsCache.tab) != 0 {
469			panic("missing call to ScoreCallsCleanup")
470		}
471		if scoreCallsCache.tab == nil {
472			scoreCallsCache.tab = make(CallSiteTab)
473		}
474		if debugTrace&debugTraceScoring != 0 {
475			fmt.Fprintf(os.Stderr, "=-= building cstab for non-inl func %s\n",
476				ir.FuncName(fn))
477		}
478		cstab = computeCallSiteTable(fn, fn.Body, scoreCallsCache.tab, nil, 0,
479			nameFinder)
480	}
481
482	csa := makeCallSiteAnalyzer(fn)
483	const doCallResults = true
484	csa.scoreCallsRegion(fn, fn.Body, cstab, doCallResults, nil)
485
486	disableDebugTrace()
487}
488
489// scoreCallsRegion assigns numeric scores to each of the callsites in
490// region 'region' within function 'fn'. This can be called on
491// an entire function, or with 'region' set to a chunk of
492// code corresponding to an inlined call.
493func (csa *callSiteAnalyzer) scoreCallsRegion(fn *ir.Func, region ir.Nodes, cstab CallSiteTab, doCallResults bool, ic *ir.InlinedCallExpr) {
494	if debugTrace&debugTraceScoring != 0 {
495		fmt.Fprintf(os.Stderr, "=-= scoreCallsRegion(%v, %s) len(cstab)=%d\n",
496			ir.FuncName(fn), region[0].Op().String(), len(cstab))
497	}
498
499	// Sort callsites to avoid any surprises with non deterministic
500	// map iteration order (this is probably not needed, but here just
501	// in case).
502	csl := scoreCallsCache.csl[:0]
503	for _, cs := range cstab {
504		csl = append(csl, cs)
505	}
506	scoreCallsCache.csl = csl[:0]
507	sort.Slice(csl, func(i, j int) bool {
508		return csl[i].ID < csl[j].ID
509	})
510
511	// Score each call site.
512	var resultNameTab map[*ir.Name]resultPropAndCS
513	for _, cs := range csl {
514		var cprops *FuncProps
515		fihcprops := false
516		desercprops := false
517		if funcInlHeur, ok := fpmap[cs.Callee]; ok {
518			cprops = funcInlHeur.props
519			fihcprops = true
520		} else if cs.Callee.Inl != nil {
521			cprops = DeserializeFromString(cs.Callee.Inl.Properties)
522			desercprops = true
523		} else {
524			if base.Debug.DumpInlFuncProps != "" {
525				fmt.Fprintf(os.Stderr, "=-= *** unable to score call to %s from %s\n", cs.Callee.Sym().Name, fmtFullPos(cs.Call.Pos()))
526				panic("should never happen")
527			} else {
528				continue
529			}
530		}
531		cs.computeCallSiteScore(csa, cprops)
532
533		if doCallResults {
534			if debugTrace&debugTraceScoring != 0 {
535				fmt.Fprintf(os.Stderr, "=-= examineCallResults at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops)
536			}
537			resultNameTab = csa.examineCallResults(cs, resultNameTab)
538		}
539
540		if debugTrace&debugTraceScoring != 0 {
541			fmt.Fprintf(os.Stderr, "=-= scoring call at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops)
542		}
543	}
544
545	if resultNameTab != nil {
546		csa.rescoreBasedOnCallResultUses(fn, resultNameTab, cstab)
547	}
548
549	disableDebugTrace()
550
551	if ic != nil && callSiteTab != nil {
552		// Integrate the calls from this cstab into the table for the caller.
553		if err := callSiteTab.merge(cstab); err != nil {
554			base.FatalfAt(ic.Pos(), "%v", err)
555		}
556	} else {
557		callSiteTab = cstab
558	}
559}
560
561// ScoreCallsCleanup resets the state of the callsite cache
562// once ScoreCalls is done with a function.
563func ScoreCallsCleanup() {
564	if base.Debug.DumpInlCallSiteScores != 0 {
565		if allCallSites == nil {
566			allCallSites = make(CallSiteTab)
567		}
568		for call, cs := range callSiteTab {
569			allCallSites[call] = cs
570		}
571	}
572	for k := range scoreCallsCache.tab {
573		delete(scoreCallsCache.tab, k)
574	}
575}
576
577// GetCallSiteScore returns the previously calculated score for call
578// within fn.
579func GetCallSiteScore(fn *ir.Func, call *ir.CallExpr) (int, bool) {
580	if funcInlHeur, ok := fpmap[fn]; ok {
581		if cs, ok := funcInlHeur.cstab[call]; ok {
582			return cs.Score, true
583		}
584	}
585	if cs, ok := callSiteTab[call]; ok {
586		return cs.Score, true
587	}
588	return 0, false
589}
590
591// BudgetExpansion returns the amount to relax/expand the base
592// inlining budget when the new inliner is turned on; the inliner
593// will add the returned value to the hairiness budget.
594//
595// Background: with the new inliner, the score for a given callsite
596// can be adjusted down by some amount due to heuristics, however we
597// won't know whether this is going to happen until much later after
598// the CanInline call. This function returns the amount to relax the
599// budget initially (to allow for a large score adjustment); later on
600// in RevisitInlinability we'll look at each individual function to
601// demote it if needed.
602func BudgetExpansion(maxBudget int32) int32 {
603	if base.Debug.InlBudgetSlack != 0 {
604		return int32(base.Debug.InlBudgetSlack)
605	}
606	// In the default case, return maxBudget, which will effectively
607	// double the budget from 80 to 160; this should be good enough
608	// for most cases.
609	return maxBudget
610}
611
612var allCallSites CallSiteTab
613
614// DumpInlCallSiteScores is invoked by the inliner if the debug flag
615// "-d=dumpinlcallsitescores" is set; it dumps out a human-readable
616// summary of all (potentially) inlinable callsites in the package,
617// along with info on call site scoring and the adjustments made to a
618// given score. Here profile is the PGO profile in use (may be
619// nil), budgetCallback is a callback that can be invoked to find out
620// the original pre-adjustment hairiness limit for the function, and
621// inlineHotMaxBudget is the constant of the same name used in the
622// inliner. Sample output lines:
623//
624// Score  Adjustment  Status  Callee  CallerPos ScoreFlags
625// 115    40          DEMOTED cmd/compile/internal/abi.(*ABIParamAssignment).Offset     expand_calls.go:1679:14|6       panicPathAdj
626// 76     -5n         PROMOTED runtime.persistentalloc   mcheckmark.go:48:45|3   inLoopAdj
627// 201    0           --- PGO  unicode.DecodeRuneInString        utf8.go:312:30|1
628// 7      -5          --- PGO  internal/abi.Name.DataChecked     type.go:625:22|0        inLoopAdj
629//
630// In the dump above, "Score" is the final score calculated for the
631// callsite, "Adjustment" is the amount added to or subtracted from
632// the original hairiness estimate to form the score. "Status" shows
633// whether anything changed with the site -- did the adjustment bump
634// it down just below the threshold ("PROMOTED") or instead bump it
635// above the threshold ("DEMOTED"); this will be blank ("---") if no
636// threshold was crossed as a result of the heuristics. Note that
637// "Status" also shows whether PGO was involved. "Callee" is the name
638// of the function called, "CallerPos" is the position of the
639// callsite, and "ScoreFlags" is a digest of the specific properties
640// we used to make adjustments to callsite score via heuristics.
641func DumpInlCallSiteScores(profile *pgoir.Profile, budgetCallback func(fn *ir.Func, profile *pgoir.Profile) (int32, bool)) {
642
643	var indirectlyDueToPromotion func(cs *CallSite) bool
644	indirectlyDueToPromotion = func(cs *CallSite) bool {
645		bud, _ := budgetCallback(cs.Callee, profile)
646		hairyval := cs.Callee.Inl.Cost
647		score := int32(cs.Score)
648		if hairyval > bud && score <= bud {
649			return true
650		}
651		if cs.parent != nil {
652			return indirectlyDueToPromotion(cs.parent)
653		}
654		return false
655	}
656
657	genstatus := func(cs *CallSite) string {
658		hairyval := cs.Callee.Inl.Cost
659		bud, isPGO := budgetCallback(cs.Callee, profile)
660		score := int32(cs.Score)
661		st := "---"
662		expinl := false
663		switch {
664		case hairyval <= bud && score <= bud:
665			// "Normal" inlined case: hairy val sufficiently low that
666			// it would have been inlined anyway without heuristics.
667			expinl = true
668		case hairyval > bud && score > bud:
669			// "Normal" not inlined case: hairy val sufficiently high
670			// and scoring didn't lower it.
671		case hairyval > bud && score <= bud:
672			// Promoted: we would not have inlined it before, but
673			// after score adjustment we decided to inline.
674			st = "PROMOTED"
675			expinl = true
676		case hairyval <= bud && score > bud:
677			// Demoted: we would have inlined it before, but after
678			// score adjustment we decided not to inline.
679			st = "DEMOTED"
680		}
681		inlined := cs.aux&csAuxInlined != 0
682		indprom := false
683		if cs.parent != nil {
684			indprom = indirectlyDueToPromotion(cs.parent)
685		}
686		if inlined && indprom {
687			st += "|INDPROM"
688		}
689		if inlined && !expinl {
690			st += "|[NI?]"
691		} else if !inlined && expinl {
692			st += "|[IN?]"
693		}
694		if isPGO {
695			st += "|PGO"
696		}
697		return st
698	}
699
700	if base.Debug.DumpInlCallSiteScores != 0 {
701		var sl []*CallSite
702		for _, cs := range allCallSites {
703			sl = append(sl, cs)
704		}
705		sort.Slice(sl, func(i, j int) bool {
706			if sl[i].Score != sl[j].Score {
707				return sl[i].Score < sl[j].Score
708			}
709			fni := ir.PkgFuncName(sl[i].Callee)
710			fnj := ir.PkgFuncName(sl[j].Callee)
711			if fni != fnj {
712				return fni < fnj
713			}
714			ecsi := EncodeCallSiteKey(sl[i])
715			ecsj := EncodeCallSiteKey(sl[j])
716			return ecsi < ecsj
717		})
718
719		mkname := func(fn *ir.Func) string {
720			var n string
721			if fn == nil || fn.Nname == nil {
722				return "<nil>"
723			}
724			if fn.Sym().Pkg == types.LocalPkg {
725				n = "·" + fn.Sym().Name
726			} else {
727				n = ir.PkgFuncName(fn)
728			}
729			// don't try to print super-long names
730			if len(n) <= 64 {
731				return n
732			}
733			return n[:32] + "..." + n[len(n)-32:len(n)]
734		}
735
736		if len(sl) != 0 {
737			fmt.Fprintf(os.Stdout, "# scores for package %s\n", types.LocalPkg.Path)
738			fmt.Fprintf(os.Stdout, "# Score  Adjustment  Status  Callee  CallerPos Flags ScoreFlags\n")
739		}
740		for _, cs := range sl {
741			hairyval := cs.Callee.Inl.Cost
742			adj := int32(cs.Score) - hairyval
743			nm := mkname(cs.Callee)
744			ecc := EncodeCallSiteKey(cs)
745			fmt.Fprintf(os.Stdout, "%d  %d\t%s\t%s\t%s\t%s\n",
746				cs.Score, adj, genstatus(cs),
747				nm, ecc,
748				cs.ScoreMask.String())
749		}
750	}
751}
752