1// Copyright 2021 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package fuzz
6
7import (
8	"bytes"
9	"fmt"
10	"go/ast"
11	"go/parser"
12	"go/token"
13	"math"
14	"strconv"
15	"strings"
16	"unicode/utf8"
17)
18
19// encVersion1 will be the first line of a file with version 1 encoding.
20var encVersion1 = "go test fuzz v1"
21
22// marshalCorpusFile encodes an arbitrary number of arguments into the file format for the
23// corpus.
24func marshalCorpusFile(vals ...any) []byte {
25	if len(vals) == 0 {
26		panic("must have at least one value to marshal")
27	}
28	b := bytes.NewBuffer([]byte(encVersion1 + "\n"))
29	// TODO(katiehockman): keep uint8 and int32 encoding where applicable,
30	// instead of changing to byte and rune respectively.
31	for _, val := range vals {
32		switch t := val.(type) {
33		case int, int8, int16, int64, uint, uint16, uint32, uint64, bool:
34			fmt.Fprintf(b, "%T(%v)\n", t, t)
35		case float32:
36			if math.IsNaN(float64(t)) && math.Float32bits(t) != math.Float32bits(float32(math.NaN())) {
37				// We encode unusual NaNs as hex values, because that is how users are
38				// likely to encounter them in literature about floating-point encoding.
39				// This allows us to reproduce fuzz failures that depend on the specific
40				// NaN representation (for float32 there are about 2^24 possibilities!),
41				// not just the fact that the value is *a* NaN.
42				//
43				// Note that the specific value of float32(math.NaN()) can vary based on
44				// whether the architecture represents signaling NaNs using a low bit
45				// (as is common) or a high bit (as commonly implemented on MIPS
46				// hardware before around 2012). We believe that the increase in clarity
47				// from identifying "NaN" with math.NaN() is worth the slight ambiguity
48				// from a platform-dependent value.
49				fmt.Fprintf(b, "math.Float32frombits(0x%x)\n", math.Float32bits(t))
50			} else {
51				// We encode all other values — including the NaN value that is
52				// bitwise-identical to float32(math.Nan()) — using the default
53				// formatting, which is equivalent to strconv.FormatFloat with format
54				// 'g' and can be parsed by strconv.ParseFloat.
55				//
56				// For an ordinary floating-point number this format includes
57				// sufficiently many digits to reconstruct the exact value. For positive
58				// or negative infinity it is the string "+Inf" or "-Inf". For positive
59				// or negative zero it is "0" or "-0". For NaN, it is the string "NaN".
60				fmt.Fprintf(b, "%T(%v)\n", t, t)
61			}
62		case float64:
63			if math.IsNaN(t) && math.Float64bits(t) != math.Float64bits(math.NaN()) {
64				fmt.Fprintf(b, "math.Float64frombits(0x%x)\n", math.Float64bits(t))
65			} else {
66				fmt.Fprintf(b, "%T(%v)\n", t, t)
67			}
68		case string:
69			fmt.Fprintf(b, "string(%q)\n", t)
70		case rune: // int32
71			// Although rune and int32 are represented by the same type, only a subset
72			// of valid int32 values can be expressed as rune literals. Notably,
73			// negative numbers, surrogate halves, and values above unicode.MaxRune
74			// have no quoted representation.
75			//
76			// fmt with "%q" (and the corresponding functions in the strconv package)
77			// would quote out-of-range values to the Unicode replacement character
78			// instead of the original value (see https://go.dev/issue/51526), so
79			// they must be treated as int32 instead.
80			//
81			// We arbitrarily draw the line at UTF-8 validity, which biases toward the
82			// "rune" interpretation. (However, we accept either format as input.)
83			if utf8.ValidRune(t) {
84				fmt.Fprintf(b, "rune(%q)\n", t)
85			} else {
86				fmt.Fprintf(b, "int32(%v)\n", t)
87			}
88		case byte: // uint8
89			// For bytes, we arbitrarily prefer the character interpretation.
90			// (Every byte has a valid character encoding.)
91			fmt.Fprintf(b, "byte(%q)\n", t)
92		case []byte: // []uint8
93			fmt.Fprintf(b, "[]byte(%q)\n", t)
94		default:
95			panic(fmt.Sprintf("unsupported type: %T", t))
96		}
97	}
98	return b.Bytes()
99}
100
101// unmarshalCorpusFile decodes corpus bytes into their respective values.
102func unmarshalCorpusFile(b []byte) ([]any, error) {
103	if len(b) == 0 {
104		return nil, fmt.Errorf("cannot unmarshal empty string")
105	}
106	lines := bytes.Split(b, []byte("\n"))
107	if len(lines) < 2 {
108		return nil, fmt.Errorf("must include version and at least one value")
109	}
110	version := strings.TrimSuffix(string(lines[0]), "\r")
111	if version != encVersion1 {
112		return nil, fmt.Errorf("unknown encoding version: %s", version)
113	}
114	var vals []any
115	for _, line := range lines[1:] {
116		line = bytes.TrimSpace(line)
117		if len(line) == 0 {
118			continue
119		}
120		v, err := parseCorpusValue(line)
121		if err != nil {
122			return nil, fmt.Errorf("malformed line %q: %v", line, err)
123		}
124		vals = append(vals, v)
125	}
126	return vals, nil
127}
128
129func parseCorpusValue(line []byte) (any, error) {
130	fs := token.NewFileSet()
131	expr, err := parser.ParseExprFrom(fs, "(test)", line, 0)
132	if err != nil {
133		return nil, err
134	}
135	call, ok := expr.(*ast.CallExpr)
136	if !ok {
137		return nil, fmt.Errorf("expected call expression")
138	}
139	if len(call.Args) != 1 {
140		return nil, fmt.Errorf("expected call expression with 1 argument; got %d", len(call.Args))
141	}
142	arg := call.Args[0]
143
144	if arrayType, ok := call.Fun.(*ast.ArrayType); ok {
145		if arrayType.Len != nil {
146			return nil, fmt.Errorf("expected []byte or primitive type")
147		}
148		elt, ok := arrayType.Elt.(*ast.Ident)
149		if !ok || elt.Name != "byte" {
150			return nil, fmt.Errorf("expected []byte")
151		}
152		lit, ok := arg.(*ast.BasicLit)
153		if !ok || lit.Kind != token.STRING {
154			return nil, fmt.Errorf("string literal required for type []byte")
155		}
156		s, err := strconv.Unquote(lit.Value)
157		if err != nil {
158			return nil, err
159		}
160		return []byte(s), nil
161	}
162
163	var idType *ast.Ident
164	if selector, ok := call.Fun.(*ast.SelectorExpr); ok {
165		xIdent, ok := selector.X.(*ast.Ident)
166		if !ok || xIdent.Name != "math" {
167			return nil, fmt.Errorf("invalid selector type")
168		}
169		switch selector.Sel.Name {
170		case "Float64frombits":
171			idType = &ast.Ident{Name: "float64-bits"}
172		case "Float32frombits":
173			idType = &ast.Ident{Name: "float32-bits"}
174		default:
175			return nil, fmt.Errorf("invalid selector type")
176		}
177	} else {
178		idType, ok = call.Fun.(*ast.Ident)
179		if !ok {
180			return nil, fmt.Errorf("expected []byte or primitive type")
181		}
182		if idType.Name == "bool" {
183			id, ok := arg.(*ast.Ident)
184			if !ok {
185				return nil, fmt.Errorf("malformed bool")
186			}
187			if id.Name == "true" {
188				return true, nil
189			} else if id.Name == "false" {
190				return false, nil
191			} else {
192				return nil, fmt.Errorf("true or false required for type bool")
193			}
194		}
195	}
196
197	var (
198		val  string
199		kind token.Token
200	)
201	if op, ok := arg.(*ast.UnaryExpr); ok {
202		switch lit := op.X.(type) {
203		case *ast.BasicLit:
204			if op.Op != token.SUB {
205				return nil, fmt.Errorf("unsupported operation on int/float: %v", op.Op)
206			}
207			// Special case for negative numbers.
208			val = op.Op.String() + lit.Value // e.g. "-" + "124"
209			kind = lit.Kind
210		case *ast.Ident:
211			if lit.Name != "Inf" {
212				return nil, fmt.Errorf("expected operation on int or float type")
213			}
214			if op.Op == token.SUB {
215				val = "-Inf"
216			} else {
217				val = "+Inf"
218			}
219			kind = token.FLOAT
220		default:
221			return nil, fmt.Errorf("expected operation on int or float type")
222		}
223	} else {
224		switch lit := arg.(type) {
225		case *ast.BasicLit:
226			val, kind = lit.Value, lit.Kind
227		case *ast.Ident:
228			if lit.Name != "NaN" {
229				return nil, fmt.Errorf("literal value required for primitive type")
230			}
231			val, kind = "NaN", token.FLOAT
232		default:
233			return nil, fmt.Errorf("literal value required for primitive type")
234		}
235	}
236
237	switch typ := idType.Name; typ {
238	case "string":
239		if kind != token.STRING {
240			return nil, fmt.Errorf("string literal value required for type string")
241		}
242		return strconv.Unquote(val)
243	case "byte", "rune":
244		if kind == token.INT {
245			switch typ {
246			case "rune":
247				return parseInt(val, typ)
248			case "byte":
249				return parseUint(val, typ)
250			}
251		}
252		if kind != token.CHAR {
253			return nil, fmt.Errorf("character literal required for byte/rune types")
254		}
255		n := len(val)
256		if n < 2 {
257			return nil, fmt.Errorf("malformed character literal, missing single quotes")
258		}
259		code, _, _, err := strconv.UnquoteChar(val[1:n-1], '\'')
260		if err != nil {
261			return nil, err
262		}
263		if typ == "rune" {
264			return code, nil
265		}
266		if code >= 256 {
267			return nil, fmt.Errorf("can only encode single byte to a byte type")
268		}
269		return byte(code), nil
270	case "int", "int8", "int16", "int32", "int64":
271		if kind != token.INT {
272			return nil, fmt.Errorf("integer literal required for int types")
273		}
274		return parseInt(val, typ)
275	case "uint", "uint8", "uint16", "uint32", "uint64":
276		if kind != token.INT {
277			return nil, fmt.Errorf("integer literal required for uint types")
278		}
279		return parseUint(val, typ)
280	case "float32":
281		if kind != token.FLOAT && kind != token.INT {
282			return nil, fmt.Errorf("float or integer literal required for float32 type")
283		}
284		v, err := strconv.ParseFloat(val, 32)
285		return float32(v), err
286	case "float64":
287		if kind != token.FLOAT && kind != token.INT {
288			return nil, fmt.Errorf("float or integer literal required for float64 type")
289		}
290		return strconv.ParseFloat(val, 64)
291	case "float32-bits":
292		if kind != token.INT {
293			return nil, fmt.Errorf("integer literal required for math.Float32frombits type")
294		}
295		bits, err := parseUint(val, "uint32")
296		if err != nil {
297			return nil, err
298		}
299		return math.Float32frombits(bits.(uint32)), nil
300	case "float64-bits":
301		if kind != token.FLOAT && kind != token.INT {
302			return nil, fmt.Errorf("integer literal required for math.Float64frombits type")
303		}
304		bits, err := parseUint(val, "uint64")
305		if err != nil {
306			return nil, err
307		}
308		return math.Float64frombits(bits.(uint64)), nil
309	default:
310		return nil, fmt.Errorf("expected []byte or primitive type")
311	}
312}
313
314// parseInt returns an integer of value val and type typ.
315func parseInt(val, typ string) (any, error) {
316	switch typ {
317	case "int":
318		// The int type may be either 32 or 64 bits. If 32, the fuzz tests in the
319		// corpus may include 64-bit values produced by fuzzing runs on 64-bit
320		// architectures. When running those tests, we implicitly wrap the values to
321		// fit in a regular int. (The test case is still “interesting”, even if the
322		// specific values of its inputs are platform-dependent.)
323		i, err := strconv.ParseInt(val, 0, 64)
324		return int(i), err
325	case "int8":
326		i, err := strconv.ParseInt(val, 0, 8)
327		return int8(i), err
328	case "int16":
329		i, err := strconv.ParseInt(val, 0, 16)
330		return int16(i), err
331	case "int32", "rune":
332		i, err := strconv.ParseInt(val, 0, 32)
333		return int32(i), err
334	case "int64":
335		return strconv.ParseInt(val, 0, 64)
336	default:
337		panic("unreachable")
338	}
339}
340
341// parseUint returns an unsigned integer of value val and type typ.
342func parseUint(val, typ string) (any, error) {
343	switch typ {
344	case "uint":
345		i, err := strconv.ParseUint(val, 0, 64)
346		return uint(i), err
347	case "uint8", "byte":
348		i, err := strconv.ParseUint(val, 0, 8)
349		return uint8(i), err
350	case "uint16":
351		i, err := strconv.ParseUint(val, 0, 16)
352		return uint16(i), err
353	case "uint32":
354		i, err := strconv.ParseUint(val, 0, 32)
355		return uint32(i), err
356	case "uint64":
357		return strconv.ParseUint(val, 0, 64)
358	default:
359		panic("unreachable")
360	}
361}
362