1// Copyright 2022 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package base
6
7import (
8	"bytes"
9	"cmd/internal/obj"
10	"cmd/internal/src"
11	"fmt"
12	"internal/bisect"
13	"io"
14	"os"
15	"path/filepath"
16	"strconv"
17	"strings"
18	"sync"
19)
20
21type hashAndMask struct {
22	// a hash h matches if (h^hash)&mask == 0
23	hash uint64
24	mask uint64
25	name string // base name, or base name + "0", "1", etc.
26}
27
28type HashDebug struct {
29	mu   sync.Mutex // for logfile, posTmp, bytesTmp
30	name string     // base name of the flag/variable.
31	// what file (if any) receives the yes/no logging?
32	// default is os.Stdout
33	logfile          io.Writer
34	posTmp           []src.Pos
35	bytesTmp         bytes.Buffer
36	matches          []hashAndMask // A hash matches if one of these matches.
37	excludes         []hashAndMask // explicitly excluded hash suffixes
38	bisect           *bisect.Matcher
39	fileSuffixOnly   bool // for Pos hashes, remove the directory prefix.
40	inlineSuffixOnly bool // for Pos hashes, remove all but the most inline position.
41}
42
43// SetInlineSuffixOnly controls whether hashing and reporting use the entire
44// inline position, or just the most-inline suffix.  Compiler debugging tends
45// to want the whole inlining, debugging user problems (loopvarhash, e.g.)
46// typically does not need to see the entire inline tree, there is just one
47// copy of the source code.
48func (d *HashDebug) SetInlineSuffixOnly(b bool) *HashDebug {
49	d.inlineSuffixOnly = b
50	return d
51}
52
53// The default compiler-debugging HashDebug, for "-d=gossahash=..."
54var hashDebug *HashDebug
55
56var FmaHash *HashDebug         // for debugging fused-multiply-add floating point changes
57var LoopVarHash *HashDebug     // for debugging shared/private loop variable changes
58var PGOHash *HashDebug         // for debugging PGO optimization decisions
59var MergeLocalsHash *HashDebug // for debugging local stack slot merging changes
60
61// DebugHashMatchPkgFunc reports whether debug variable Gossahash
62//
63//  1. is empty (returns true; this is a special more-quickly implemented case of 4 below)
64//
65//  2. is "y" or "Y" (returns true)
66//
67//  3. is "n" or "N" (returns false)
68//
69//  4. does not explicitly exclude the sha1 hash of pkgAndName (see step 6)
70//
71//  5. is a suffix of the sha1 hash of pkgAndName (returns true)
72//
73//  6. OR
74//     if the (non-empty) value is in the regular language
75//     "(-[01]+/)+?([01]+(/[01]+)+?"
76//     (exclude..)(....include...)
77//     test the [01]+ exclude substrings, if any suffix-match, return false (4 above)
78//     test the [01]+ include substrings, if any suffix-match, return true
79//     The include substrings AFTER the first slash are numbered 0,1, etc and
80//     are named fmt.Sprintf("%s%d", varname, number)
81//     As an extra-special case for multiple failure search,
82//     an excludes-only string ending in a slash (terminated, not separated)
83//     implicitly specifies the include string "0/1", that is, match everything.
84//     (Exclude strings are used for automated search for multiple failures.)
85//     Clause 6 is not really intended for human use and only
86//     matters for failures that require multiple triggers.
87//
88// Otherwise it returns false.
89//
90// Unless Flags.Gossahash is empty, when DebugHashMatchPkgFunc returns true the message
91//
92//	"%s triggered %s\n", varname, pkgAndName
93//
94// is printed on the file named in environment variable GSHS_LOGFILE,
95// or standard out if that is empty.  "Varname" is either the name of
96// the variable or the name of the substring, depending on which matched.
97//
98// Typical use:
99//
100//  1. you make a change to the compiler, say, adding a new phase
101//
102//  2. it is broken in some mystifying way, for example, make.bash builds a broken
103//     compiler that almost works, but crashes compiling a test in run.bash.
104//
105//  3. add this guard to the code, which by default leaves it broken, but does not
106//     run the broken new code if Flags.Gossahash is non-empty and non-matching:
107//
108//     if !base.DebugHashMatch(ir.PkgFuncName(fn)) {
109//     return nil // early exit, do nothing
110//     }
111//
112//  4. rebuild w/o the bad code,
113//     GOCOMPILEDEBUG=gossahash=n ./all.bash
114//     to verify that you put the guard in the right place with the right sense of the test.
115//
116//  5. use github.com/dr2chase/gossahash to search for the error:
117//
118//     go install github.com/dr2chase/gossahash@latest
119//
120//     gossahash -- <the thing that fails>
121//
122//     for example: GOMAXPROCS=1 gossahash -- ./all.bash
123//
124//  6. gossahash should return a single function whose miscompilation
125//     causes the problem, and you can focus on that.
126func DebugHashMatchPkgFunc(pkg, fn string) bool {
127	return hashDebug.MatchPkgFunc(pkg, fn, nil)
128}
129
130func DebugHashMatchPos(pos src.XPos) bool {
131	return hashDebug.MatchPos(pos, nil)
132}
133
134// HasDebugHash returns true if Flags.Gossahash is non-empty, which
135// results in hashDebug being not-nil.  I.e., if !HasDebugHash(),
136// there is no need to create the string for hashing and testing.
137func HasDebugHash() bool {
138	return hashDebug != nil
139}
140
141// TODO: Delete when we switch to bisect-only.
142func toHashAndMask(s, varname string) hashAndMask {
143	l := len(s)
144	if l > 64 {
145		s = s[l-64:]
146		l = 64
147	}
148	m := ^(^uint64(0) << l)
149	h, err := strconv.ParseUint(s, 2, 64)
150	if err != nil {
151		Fatalf("Could not parse %s (=%s) as a binary number", varname, s)
152	}
153
154	return hashAndMask{name: varname, hash: h, mask: m}
155}
156
157// NewHashDebug returns a new hash-debug tester for the
158// environment variable ev.  If ev is not set, it returns
159// nil, allowing a lightweight check for normal-case behavior.
160func NewHashDebug(ev, s string, file io.Writer) *HashDebug {
161	if s == "" {
162		return nil
163	}
164
165	hd := &HashDebug{name: ev, logfile: file}
166	if !strings.Contains(s, "/") {
167		m, err := bisect.New(s)
168		if err != nil {
169			Fatalf("%s: %v", ev, err)
170		}
171		hd.bisect = m
172		return hd
173	}
174
175	// TODO: Delete remainder of function when we switch to bisect-only.
176	ss := strings.Split(s, "/")
177	// first remove any leading exclusions; these are preceded with "-"
178	i := 0
179	for len(ss) > 0 {
180		s := ss[0]
181		if len(s) == 0 || len(s) > 0 && s[0] != '-' {
182			break
183		}
184		ss = ss[1:]
185		hd.excludes = append(hd.excludes, toHashAndMask(s[1:], fmt.Sprintf("%s%d", "HASH_EXCLUDE", i)))
186		i++
187	}
188	// hash searches may use additional EVs with 0, 1, 2, ... suffixes.
189	i = 0
190	for _, s := range ss {
191		if s == "" {
192			if i != 0 || len(ss) > 1 && ss[1] != "" || len(ss) > 2 {
193				Fatalf("Empty hash match string for %s should be first (and only) one", ev)
194			}
195			// Special case of should match everything.
196			hd.matches = append(hd.matches, toHashAndMask("0", fmt.Sprintf("%s0", ev)))
197			hd.matches = append(hd.matches, toHashAndMask("1", fmt.Sprintf("%s1", ev)))
198			break
199		}
200		if i == 0 {
201			hd.matches = append(hd.matches, toHashAndMask(s, ev))
202		} else {
203			hd.matches = append(hd.matches, toHashAndMask(s, fmt.Sprintf("%s%d", ev, i-1)))
204		}
205		i++
206	}
207	return hd
208}
209
210// TODO: Delete when we switch to bisect-only.
211func (d *HashDebug) excluded(hash uint64) bool {
212	for _, m := range d.excludes {
213		if (m.hash^hash)&m.mask == 0 {
214			return true
215		}
216	}
217	return false
218}
219
220// TODO: Delete when we switch to bisect-only.
221func hashString(hash uint64) string {
222	hstr := ""
223	if hash == 0 {
224		hstr = "0"
225	} else {
226		for ; hash != 0; hash = hash >> 1 {
227			hstr = string('0'+byte(hash&1)) + hstr
228		}
229	}
230	if len(hstr) > 24 {
231		hstr = hstr[len(hstr)-24:]
232	}
233	return hstr
234}
235
236// TODO: Delete when we switch to bisect-only.
237func (d *HashDebug) match(hash uint64) *hashAndMask {
238	for i, m := range d.matches {
239		if (m.hash^hash)&m.mask == 0 {
240			return &d.matches[i]
241		}
242	}
243	return nil
244}
245
246// MatchPkgFunc returns true if either the variable used to create d is
247// unset, or if its value is y, or if it is a suffix of the base-two
248// representation of the hash of pkg and fn.  If the variable is not nil,
249// then a true result is accompanied by stylized output to d.logfile, which
250// is used for automated bug search.
251func (d *HashDebug) MatchPkgFunc(pkg, fn string, note func() string) bool {
252	if d == nil {
253		return true
254	}
255	// Written this way to make inlining likely.
256	return d.matchPkgFunc(pkg, fn, note)
257}
258
259func (d *HashDebug) matchPkgFunc(pkg, fn string, note func() string) bool {
260	hash := bisect.Hash(pkg, fn)
261	return d.matchAndLog(hash, func() string { return pkg + "." + fn }, note)
262}
263
264// MatchPos is similar to MatchPkgFunc, but for hash computation
265// it uses the source position including all inlining information instead of
266// package name and path.
267// Note that the default answer for no environment variable (d == nil)
268// is "yes", do the thing.
269func (d *HashDebug) MatchPos(pos src.XPos, desc func() string) bool {
270	if d == nil {
271		return true
272	}
273	// Written this way to make inlining likely.
274	return d.matchPos(Ctxt, pos, desc)
275}
276
277func (d *HashDebug) matchPos(ctxt *obj.Link, pos src.XPos, note func() string) bool {
278	return d.matchPosWithInfo(ctxt, pos, nil, note)
279}
280
281func (d *HashDebug) matchPosWithInfo(ctxt *obj.Link, pos src.XPos, info any, note func() string) bool {
282	hash := d.hashPos(ctxt, pos)
283	if info != nil {
284		hash = bisect.Hash(hash, info)
285	}
286	return d.matchAndLog(hash,
287		func() string {
288			r := d.fmtPos(ctxt, pos)
289			if info != nil {
290				r += fmt.Sprintf(" (%v)", info)
291			}
292			return r
293		},
294		note)
295}
296
297// MatchPosWithInfo is similar to MatchPos, but with additional information
298// that is included for hash computation, so it can distinguish multiple
299// matches on the same source location.
300// Note that the default answer for no environment variable (d == nil)
301// is "yes", do the thing.
302func (d *HashDebug) MatchPosWithInfo(pos src.XPos, info any, desc func() string) bool {
303	if d == nil {
304		return true
305	}
306	// Written this way to make inlining likely.
307	return d.matchPosWithInfo(Ctxt, pos, info, desc)
308}
309
310// matchAndLog is the core matcher. It reports whether the hash matches the pattern.
311// If a report needs to be printed, match prints that report to the log file.
312// The text func must be non-nil and should return a user-readable
313// representation of what was hashed. The note func may be nil; if non-nil,
314// it should return additional information to display to the user when this
315// change is selected.
316func (d *HashDebug) matchAndLog(hash uint64, text, note func() string) bool {
317	if d.bisect != nil {
318		enabled := d.bisect.ShouldEnable(hash)
319		if d.bisect.ShouldPrint(hash) {
320			disabled := ""
321			if !enabled {
322				disabled = " [DISABLED]"
323			}
324			var t string
325			if !d.bisect.MarkerOnly() {
326				t = text()
327				if note != nil {
328					if n := note(); n != "" {
329						t += ": " + n + disabled
330						disabled = ""
331					}
332				}
333			}
334			d.log(d.name, hash, strings.TrimSpace(t+disabled))
335		}
336		return enabled
337	}
338
339	// TODO: Delete rest of function body when we switch to bisect-only.
340	if d.excluded(hash) {
341		return false
342	}
343	if m := d.match(hash); m != nil {
344		d.log(m.name, hash, text())
345		return true
346	}
347	return false
348}
349
350// short returns the form of file name to use for d.
351// The default is the full path, but fileSuffixOnly selects
352// just the final path element.
353func (d *HashDebug) short(name string) string {
354	if d.fileSuffixOnly {
355		return filepath.Base(name)
356	}
357	return name
358}
359
360// hashPos returns a hash of the position pos, including its entire inline stack.
361// If d.inlineSuffixOnly is true, hashPos only considers the innermost (leaf) position on the inline stack.
362func (d *HashDebug) hashPos(ctxt *obj.Link, pos src.XPos) uint64 {
363	if d.inlineSuffixOnly {
364		p := ctxt.InnermostPos(pos)
365		return bisect.Hash(d.short(p.Filename()), p.Line(), p.Col())
366	}
367	h := bisect.Hash()
368	ctxt.AllPos(pos, func(p src.Pos) {
369		h = bisect.Hash(h, d.short(p.Filename()), p.Line(), p.Col())
370	})
371	return h
372}
373
374// fmtPos returns a textual formatting of the position pos, including its entire inline stack.
375// If d.inlineSuffixOnly is true, fmtPos only considers the innermost (leaf) position on the inline stack.
376func (d *HashDebug) fmtPos(ctxt *obj.Link, pos src.XPos) string {
377	format := func(p src.Pos) string {
378		return fmt.Sprintf("%s:%d:%d", d.short(p.Filename()), p.Line(), p.Col())
379	}
380	if d.inlineSuffixOnly {
381		return format(ctxt.InnermostPos(pos))
382	}
383	var stk []string
384	ctxt.AllPos(pos, func(p src.Pos) {
385		stk = append(stk, format(p))
386	})
387	return strings.Join(stk, "; ")
388}
389
390// log prints a match with the given hash and textual formatting.
391// TODO: Delete varname parameter when we switch to bisect-only.
392func (d *HashDebug) log(varname string, hash uint64, text string) {
393	d.mu.Lock()
394	defer d.mu.Unlock()
395
396	file := d.logfile
397	if file == nil {
398		if tmpfile := os.Getenv("GSHS_LOGFILE"); tmpfile != "" {
399			var err error
400			file, err = os.OpenFile(tmpfile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
401			if err != nil {
402				Fatalf("could not open hash-testing logfile %s", tmpfile)
403				return
404			}
405		}
406		if file == nil {
407			file = os.Stdout
408		}
409		d.logfile = file
410	}
411
412	// Bisect output.
413	fmt.Fprintf(file, "%s %s\n", text, bisect.Marker(hash))
414
415	// Gossahash output.
416	// TODO: Delete rest of function when we switch to bisect-only.
417	fmt.Fprintf(file, "%s triggered %s %s\n", varname, text, hashString(hash))
418}
419