1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package scanner
6
7import (
8	"bytes"
9	"fmt"
10	"io"
11	"strings"
12	"testing"
13	"unicode/utf8"
14)
15
16// A StringReader delivers its data one string segment at a time via Read.
17type StringReader struct {
18	data []string
19	step int
20}
21
22func (r *StringReader) Read(p []byte) (n int, err error) {
23	if r.step < len(r.data) {
24		s := r.data[r.step]
25		n = copy(p, s)
26		r.step++
27	} else {
28		err = io.EOF
29	}
30	return
31}
32
33func readRuneSegments(t *testing.T, segments []string) {
34	got := ""
35	want := strings.Join(segments, "")
36	s := new(Scanner).Init(&StringReader{data: segments})
37	for {
38		ch := s.Next()
39		if ch == EOF {
40			break
41		}
42		got += string(ch)
43	}
44	if got != want {
45		t.Errorf("segments=%v got=%s want=%s", segments, got, want)
46	}
47}
48
49var segmentList = [][]string{
50	{},
51	{""},
52	{"日", "本語"},
53	{"\u65e5", "\u672c", "\u8a9e"},
54	{"\U000065e5", " ", "\U0000672c", "\U00008a9e"},
55	{"\xe6", "\x97\xa5\xe6", "\x9c\xac\xe8\xaa\x9e"},
56	{"Hello", ", ", "World", "!"},
57	{"Hello", ", ", "", "World", "!"},
58}
59
60func TestNext(t *testing.T) {
61	for _, s := range segmentList {
62		readRuneSegments(t, s)
63	}
64}
65
66type token struct {
67	tok  rune
68	text string
69}
70
71var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
72
73var tokenList = []token{
74	{Comment, "// line comments"},
75	{Comment, "//"},
76	{Comment, "////"},
77	{Comment, "// comment"},
78	{Comment, "// /* comment */"},
79	{Comment, "// // comment //"},
80	{Comment, "//" + f100},
81
82	{Comment, "// general comments"},
83	{Comment, "/**/"},
84	{Comment, "/***/"},
85	{Comment, "/* comment */"},
86	{Comment, "/* // comment */"},
87	{Comment, "/* /* comment */"},
88	{Comment, "/*\n comment\n*/"},
89	{Comment, "/*" + f100 + "*/"},
90
91	{Comment, "// identifiers"},
92	{Ident, "a"},
93	{Ident, "a0"},
94	{Ident, "foobar"},
95	{Ident, "abc123"},
96	{Ident, "LGTM"},
97	{Ident, "_"},
98	{Ident, "_abc123"},
99	{Ident, "abc123_"},
100	{Ident, "_abc_123_"},
101	{Ident, "_äöü"},
102	{Ident, "_本"},
103	{Ident, "äöü"},
104	{Ident, "本"},
105	{Ident, "a۰۱۸"},
106	{Ident, "foo६४"},
107	{Ident, "bar9876"},
108	{Ident, f100},
109
110	{Comment, "// decimal ints"},
111	{Int, "0"},
112	{Int, "1"},
113	{Int, "9"},
114	{Int, "42"},
115	{Int, "1234567890"},
116
117	{Comment, "// octal ints"},
118	{Int, "00"},
119	{Int, "01"},
120	{Int, "07"},
121	{Int, "042"},
122	{Int, "01234567"},
123
124	{Comment, "// hexadecimal ints"},
125	{Int, "0x0"},
126	{Int, "0x1"},
127	{Int, "0xf"},
128	{Int, "0x42"},
129	{Int, "0x123456789abcDEF"},
130	{Int, "0x" + f100},
131	{Int, "0X0"},
132	{Int, "0X1"},
133	{Int, "0XF"},
134	{Int, "0X42"},
135	{Int, "0X123456789abcDEF"},
136	{Int, "0X" + f100},
137
138	{Comment, "// floats"},
139	{Float, "0."},
140	{Float, "1."},
141	{Float, "42."},
142	{Float, "01234567890."},
143	{Float, ".0"},
144	{Float, ".1"},
145	{Float, ".42"},
146	{Float, ".0123456789"},
147	{Float, "0.0"},
148	{Float, "1.0"},
149	{Float, "42.0"},
150	{Float, "01234567890.0"},
151	{Float, "0e0"},
152	{Float, "1e0"},
153	{Float, "42e0"},
154	{Float, "01234567890e0"},
155	{Float, "0E0"},
156	{Float, "1E0"},
157	{Float, "42E0"},
158	{Float, "01234567890E0"},
159	{Float, "0e+10"},
160	{Float, "1e-10"},
161	{Float, "42e+10"},
162	{Float, "01234567890e-10"},
163	{Float, "0E+10"},
164	{Float, "1E-10"},
165	{Float, "42E+10"},
166	{Float, "01234567890E-10"},
167
168	{Comment, "// chars"},
169	{Char, `' '`},
170	{Char, `'a'`},
171	{Char, `'本'`},
172	{Char, `'\a'`},
173	{Char, `'\b'`},
174	{Char, `'\f'`},
175	{Char, `'\n'`},
176	{Char, `'\r'`},
177	{Char, `'\t'`},
178	{Char, `'\v'`},
179	{Char, `'\''`},
180	{Char, `'\000'`},
181	{Char, `'\777'`},
182	{Char, `'\x00'`},
183	{Char, `'\xff'`},
184	{Char, `'\u0000'`},
185	{Char, `'\ufA16'`},
186	{Char, `'\U00000000'`},
187	{Char, `'\U0000ffAB'`},
188
189	{Comment, "// strings"},
190	{String, `" "`},
191	{String, `"a"`},
192	{String, `"本"`},
193	{String, `"\a"`},
194	{String, `"\b"`},
195	{String, `"\f"`},
196	{String, `"\n"`},
197	{String, `"\r"`},
198	{String, `"\t"`},
199	{String, `"\v"`},
200	{String, `"\""`},
201	{String, `"\000"`},
202	{String, `"\777"`},
203	{String, `"\x00"`},
204	{String, `"\xff"`},
205	{String, `"\u0000"`},
206	{String, `"\ufA16"`},
207	{String, `"\U00000000"`},
208	{String, `"\U0000ffAB"`},
209	{String, `"` + f100 + `"`},
210
211	{Comment, "// raw strings"},
212	{RawString, "``"},
213	{RawString, "`\\`"},
214	{RawString, "`" + "\n\n/* foobar */\n\n" + "`"},
215	{RawString, "`" + f100 + "`"},
216
217	{Comment, "// individual characters"},
218	// NUL character is not allowed
219	{'\x01', "\x01"},
220	{' ' - 1, string(' ' - 1)},
221	{'+', "+"},
222	{'/', "/"},
223	{'.', "."},
224	{'~', "~"},
225	{'(', "("},
226}
227
228func makeSource(pattern string) *bytes.Buffer {
229	var buf bytes.Buffer
230	for _, k := range tokenList {
231		fmt.Fprintf(&buf, pattern, k.text)
232	}
233	return &buf
234}
235
236func checkTok(t *testing.T, s *Scanner, line int, got, want rune, text string) {
237	if got != want {
238		t.Fatalf("tok = %s, want %s for %q", TokenString(got), TokenString(want), text)
239	}
240	if s.Line != line {
241		t.Errorf("line = %d, want %d for %q", s.Line, line, text)
242	}
243	stext := s.TokenText()
244	if stext != text {
245		t.Errorf("text = %q, want %q", stext, text)
246	} else {
247		// check idempotency of TokenText() call
248		stext = s.TokenText()
249		if stext != text {
250			t.Errorf("text = %q, want %q (idempotency check)", stext, text)
251		}
252	}
253}
254
255func checkTokErr(t *testing.T, s *Scanner, line int, want rune, text string) {
256	prevCount := s.ErrorCount
257	checkTok(t, s, line, s.Scan(), want, text)
258	if s.ErrorCount != prevCount+1 {
259		t.Fatalf("want error for %q", text)
260	}
261}
262
263func countNewlines(s string) int {
264	n := 0
265	for _, ch := range s {
266		if ch == '\n' {
267			n++
268		}
269	}
270	return n
271}
272
273func testScan(t *testing.T, mode uint) {
274	s := new(Scanner).Init(makeSource(" \t%s\n"))
275	s.Mode = mode
276	tok := s.Scan()
277	line := 1
278	for _, k := range tokenList {
279		if mode&SkipComments == 0 || k.tok != Comment {
280			checkTok(t, s, line, tok, k.tok, k.text)
281			tok = s.Scan()
282		}
283		line += countNewlines(k.text) + 1 // each token is on a new line
284	}
285	checkTok(t, s, line, tok, EOF, "")
286}
287
288func TestScan(t *testing.T) {
289	testScan(t, GoTokens)
290	testScan(t, GoTokens&^SkipComments)
291}
292
293func TestInvalidExponent(t *testing.T) {
294	const src = "1.5e 1.5E 1e+ 1e- 1.5z"
295	s := new(Scanner).Init(strings.NewReader(src))
296	s.Error = func(s *Scanner, msg string) {
297		const want = "exponent has no digits"
298		if msg != want {
299			t.Errorf("%s: got error %q; want %q", s.TokenText(), msg, want)
300		}
301	}
302	checkTokErr(t, s, 1, Float, "1.5e")
303	checkTokErr(t, s, 1, Float, "1.5E")
304	checkTokErr(t, s, 1, Float, "1e+")
305	checkTokErr(t, s, 1, Float, "1e-")
306	checkTok(t, s, 1, s.Scan(), Float, "1.5")
307	checkTok(t, s, 1, s.Scan(), Ident, "z")
308	checkTok(t, s, 1, s.Scan(), EOF, "")
309	if s.ErrorCount != 4 {
310		t.Errorf("%d errors, want 4", s.ErrorCount)
311	}
312}
313
314func TestPosition(t *testing.T) {
315	src := makeSource("\t\t\t\t%s\n")
316	s := new(Scanner).Init(src)
317	s.Mode = GoTokens &^ SkipComments
318	s.Scan()
319	pos := Position{"", 4, 1, 5}
320	for _, k := range tokenList {
321		if s.Offset != pos.Offset {
322			t.Errorf("offset = %d, want %d for %q", s.Offset, pos.Offset, k.text)
323		}
324		if s.Line != pos.Line {
325			t.Errorf("line = %d, want %d for %q", s.Line, pos.Line, k.text)
326		}
327		if s.Column != pos.Column {
328			t.Errorf("column = %d, want %d for %q", s.Column, pos.Column, k.text)
329		}
330		pos.Offset += 4 + len(k.text) + 1     // 4 tabs + token bytes + newline
331		pos.Line += countNewlines(k.text) + 1 // each token is on a new line
332		s.Scan()
333	}
334	// make sure there were no token-internal errors reported by scanner
335	if s.ErrorCount != 0 {
336		t.Errorf("%d errors", s.ErrorCount)
337	}
338}
339
340func TestScanZeroMode(t *testing.T) {
341	src := makeSource("%s\n")
342	str := src.String()
343	s := new(Scanner).Init(src)
344	s.Mode = 0       // don't recognize any token classes
345	s.Whitespace = 0 // don't skip any whitespace
346	tok := s.Scan()
347	for i, ch := range str {
348		if tok != ch {
349			t.Fatalf("%d. tok = %s, want %s", i, TokenString(tok), TokenString(ch))
350		}
351		tok = s.Scan()
352	}
353	if tok != EOF {
354		t.Fatalf("tok = %s, want EOF", TokenString(tok))
355	}
356	if s.ErrorCount != 0 {
357		t.Errorf("%d errors", s.ErrorCount)
358	}
359}
360
361func testScanSelectedMode(t *testing.T, mode uint, class rune) {
362	src := makeSource("%s\n")
363	s := new(Scanner).Init(src)
364	s.Mode = mode
365	tok := s.Scan()
366	for tok != EOF {
367		if tok < 0 && tok != class {
368			t.Fatalf("tok = %s, want %s", TokenString(tok), TokenString(class))
369		}
370		tok = s.Scan()
371	}
372	if s.ErrorCount != 0 {
373		t.Errorf("%d errors", s.ErrorCount)
374	}
375}
376
377func TestScanSelectedMask(t *testing.T) {
378	testScanSelectedMode(t, 0, 0)
379	testScanSelectedMode(t, ScanIdents, Ident)
380	// Don't test ScanInts and ScanNumbers since some parts of
381	// the floats in the source look like (invalid) octal ints
382	// and ScanNumbers may return either Int or Float.
383	testScanSelectedMode(t, ScanChars, Char)
384	testScanSelectedMode(t, ScanStrings, String)
385	testScanSelectedMode(t, SkipComments, 0)
386	testScanSelectedMode(t, ScanComments, Comment)
387}
388
389func TestScanCustomIdent(t *testing.T) {
390	const src = "faab12345 a12b123 a12 3b"
391	s := new(Scanner).Init(strings.NewReader(src))
392	// ident = ( 'a' | 'b' ) { digit } .
393	// digit = '0' .. '3' .
394	// with a maximum length of 4
395	s.IsIdentRune = func(ch rune, i int) bool {
396		return i == 0 && (ch == 'a' || ch == 'b') || 0 < i && i < 4 && '0' <= ch && ch <= '3'
397	}
398	checkTok(t, s, 1, s.Scan(), 'f', "f")
399	checkTok(t, s, 1, s.Scan(), Ident, "a")
400	checkTok(t, s, 1, s.Scan(), Ident, "a")
401	checkTok(t, s, 1, s.Scan(), Ident, "b123")
402	checkTok(t, s, 1, s.Scan(), Int, "45")
403	checkTok(t, s, 1, s.Scan(), Ident, "a12")
404	checkTok(t, s, 1, s.Scan(), Ident, "b123")
405	checkTok(t, s, 1, s.Scan(), Ident, "a12")
406	checkTok(t, s, 1, s.Scan(), Int, "3")
407	checkTok(t, s, 1, s.Scan(), Ident, "b")
408	checkTok(t, s, 1, s.Scan(), EOF, "")
409}
410
411func TestScanNext(t *testing.T) {
412	const BOM = '\uFEFF'
413	BOMs := string(BOM)
414	s := new(Scanner).Init(strings.NewReader(BOMs + "if a == bcd /* com" + BOMs + "ment */ {\n\ta += c\n}" + BOMs + "// line comment ending in eof"))
415	checkTok(t, s, 1, s.Scan(), Ident, "if") // the first BOM is ignored
416	checkTok(t, s, 1, s.Scan(), Ident, "a")
417	checkTok(t, s, 1, s.Scan(), '=', "=")
418	checkTok(t, s, 0, s.Next(), '=', "")
419	checkTok(t, s, 0, s.Next(), ' ', "")
420	checkTok(t, s, 0, s.Next(), 'b', "")
421	checkTok(t, s, 1, s.Scan(), Ident, "cd")
422	checkTok(t, s, 1, s.Scan(), '{', "{")
423	checkTok(t, s, 2, s.Scan(), Ident, "a")
424	checkTok(t, s, 2, s.Scan(), '+', "+")
425	checkTok(t, s, 0, s.Next(), '=', "")
426	checkTok(t, s, 2, s.Scan(), Ident, "c")
427	checkTok(t, s, 3, s.Scan(), '}', "}")
428	checkTok(t, s, 3, s.Scan(), BOM, BOMs)
429	checkTok(t, s, 3, s.Scan(), -1, "")
430	if s.ErrorCount != 0 {
431		t.Errorf("%d errors", s.ErrorCount)
432	}
433}
434
435func TestScanWhitespace(t *testing.T) {
436	var buf bytes.Buffer
437	var ws uint64
438	// start at 1, NUL character is not allowed
439	for ch := byte(1); ch < ' '; ch++ {
440		buf.WriteByte(ch)
441		ws |= 1 << ch
442	}
443	const orig = 'x'
444	buf.WriteByte(orig)
445
446	s := new(Scanner).Init(&buf)
447	s.Mode = 0
448	s.Whitespace = ws
449	tok := s.Scan()
450	if tok != orig {
451		t.Errorf("tok = %s, want %s", TokenString(tok), TokenString(orig))
452	}
453}
454
455func testError(t *testing.T, src, pos, msg string, tok rune) {
456	s := new(Scanner).Init(strings.NewReader(src))
457	errorCalled := false
458	s.Error = func(s *Scanner, m string) {
459		if !errorCalled {
460			// only look at first error
461			if p := s.Pos().String(); p != pos {
462				t.Errorf("pos = %q, want %q for %q", p, pos, src)
463			}
464			if m != msg {
465				t.Errorf("msg = %q, want %q for %q", m, msg, src)
466			}
467			errorCalled = true
468		}
469	}
470	tk := s.Scan()
471	if tk != tok {
472		t.Errorf("tok = %s, want %s for %q", TokenString(tk), TokenString(tok), src)
473	}
474	if !errorCalled {
475		t.Errorf("error handler not called for %q", src)
476	}
477	if s.ErrorCount == 0 {
478		t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
479	}
480}
481
482func TestError(t *testing.T) {
483	testError(t, "\x00", "<input>:1:1", "invalid character NUL", 0)
484	testError(t, "\x80", "<input>:1:1", "invalid UTF-8 encoding", utf8.RuneError)
485	testError(t, "\xff", "<input>:1:1", "invalid UTF-8 encoding", utf8.RuneError)
486
487	testError(t, "a\x00", "<input>:1:2", "invalid character NUL", Ident)
488	testError(t, "ab\x80", "<input>:1:3", "invalid UTF-8 encoding", Ident)
489	testError(t, "abc\xff", "<input>:1:4", "invalid UTF-8 encoding", Ident)
490
491	testError(t, `"a`+"\x00", "<input>:1:3", "invalid character NUL", String)
492	testError(t, `"ab`+"\x80", "<input>:1:4", "invalid UTF-8 encoding", String)
493	testError(t, `"abc`+"\xff", "<input>:1:5", "invalid UTF-8 encoding", String)
494
495	testError(t, "`a"+"\x00", "<input>:1:3", "invalid character NUL", RawString)
496	testError(t, "`ab"+"\x80", "<input>:1:4", "invalid UTF-8 encoding", RawString)
497	testError(t, "`abc"+"\xff", "<input>:1:5", "invalid UTF-8 encoding", RawString)
498
499	testError(t, `'\"'`, "<input>:1:3", "invalid char escape", Char)
500	testError(t, `"\'"`, "<input>:1:3", "invalid char escape", String)
501
502	testError(t, `01238`, "<input>:1:6", "invalid digit '8' in octal literal", Int)
503	testError(t, `01238123`, "<input>:1:9", "invalid digit '8' in octal literal", Int)
504	testError(t, `0x`, "<input>:1:3", "hexadecimal literal has no digits", Int)
505	testError(t, `0xg`, "<input>:1:3", "hexadecimal literal has no digits", Int)
506	testError(t, `'aa'`, "<input>:1:4", "invalid char literal", Char)
507	testError(t, `1.5e`, "<input>:1:5", "exponent has no digits", Float)
508	testError(t, `1.5E`, "<input>:1:5", "exponent has no digits", Float)
509	testError(t, `1.5e+`, "<input>:1:6", "exponent has no digits", Float)
510	testError(t, `1.5e-`, "<input>:1:6", "exponent has no digits", Float)
511
512	testError(t, `'`, "<input>:1:2", "literal not terminated", Char)
513	testError(t, `'`+"\n", "<input>:1:2", "literal not terminated", Char)
514	testError(t, `"abc`, "<input>:1:5", "literal not terminated", String)
515	testError(t, `"abc`+"\n", "<input>:1:5", "literal not terminated", String)
516	testError(t, "`abc\n", "<input>:2:1", "literal not terminated", RawString)
517	testError(t, `/*/`, "<input>:1:4", "comment not terminated", EOF)
518}
519
520// An errReader returns (0, err) where err is not io.EOF.
521type errReader struct{}
522
523func (errReader) Read(b []byte) (int, error) {
524	return 0, io.ErrNoProgress // some error that is not io.EOF
525}
526
527func TestIOError(t *testing.T) {
528	s := new(Scanner).Init(errReader{})
529	errorCalled := false
530	s.Error = func(s *Scanner, msg string) {
531		if !errorCalled {
532			if want := io.ErrNoProgress.Error(); msg != want {
533				t.Errorf("msg = %q, want %q", msg, want)
534			}
535			errorCalled = true
536		}
537	}
538	tok := s.Scan()
539	if tok != EOF {
540		t.Errorf("tok = %s, want EOF", TokenString(tok))
541	}
542	if !errorCalled {
543		t.Errorf("error handler not called")
544	}
545}
546
547func checkPos(t *testing.T, got, want Position) {
548	if got.Offset != want.Offset || got.Line != want.Line || got.Column != want.Column {
549		t.Errorf("got offset, line, column = %d, %d, %d; want %d, %d, %d",
550			got.Offset, got.Line, got.Column, want.Offset, want.Line, want.Column)
551	}
552}
553
554func checkNextPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
555	if ch := s.Next(); ch != char {
556		t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
557	}
558	want := Position{Offset: offset, Line: line, Column: column}
559	checkPos(t, s.Pos(), want)
560}
561
562func checkScanPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
563	want := Position{Offset: offset, Line: line, Column: column}
564	checkPos(t, s.Pos(), want)
565	if ch := s.Scan(); ch != char {
566		t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
567		if string(ch) != s.TokenText() {
568			t.Errorf("tok = %q, want %q", s.TokenText(), string(ch))
569		}
570	}
571	checkPos(t, s.Position, want)
572}
573
574func TestPos(t *testing.T) {
575	// corner case: empty source
576	s := new(Scanner).Init(strings.NewReader(""))
577	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
578	s.Peek() // peek doesn't affect the position
579	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
580
581	// corner case: source with only a newline
582	s = new(Scanner).Init(strings.NewReader("\n"))
583	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
584	checkNextPos(t, s, 1, 2, 1, '\n')
585	// after EOF position doesn't change
586	for i := 10; i > 0; i-- {
587		checkScanPos(t, s, 1, 2, 1, EOF)
588	}
589	if s.ErrorCount != 0 {
590		t.Errorf("%d errors", s.ErrorCount)
591	}
592
593	// corner case: source with only a single character
594	s = new(Scanner).Init(strings.NewReader("本"))
595	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
596	checkNextPos(t, s, 3, 1, 2, '本')
597	// after EOF position doesn't change
598	for i := 10; i > 0; i-- {
599		checkScanPos(t, s, 3, 1, 2, EOF)
600	}
601	if s.ErrorCount != 0 {
602		t.Errorf("%d errors", s.ErrorCount)
603	}
604
605	// positions after calling Next
606	s = new(Scanner).Init(strings.NewReader("  foo६४  \n\n本語\n"))
607	checkNextPos(t, s, 1, 1, 2, ' ')
608	s.Peek() // peek doesn't affect the position
609	checkNextPos(t, s, 2, 1, 3, ' ')
610	checkNextPos(t, s, 3, 1, 4, 'f')
611	checkNextPos(t, s, 4, 1, 5, 'o')
612	checkNextPos(t, s, 5, 1, 6, 'o')
613	checkNextPos(t, s, 8, 1, 7, '६')
614	checkNextPos(t, s, 11, 1, 8, '४')
615	checkNextPos(t, s, 12, 1, 9, ' ')
616	checkNextPos(t, s, 13, 1, 10, ' ')
617	checkNextPos(t, s, 14, 2, 1, '\n')
618	checkNextPos(t, s, 15, 3, 1, '\n')
619	checkNextPos(t, s, 18, 3, 2, '本')
620	checkNextPos(t, s, 21, 3, 3, '語')
621	checkNextPos(t, s, 22, 4, 1, '\n')
622	// after EOF position doesn't change
623	for i := 10; i > 0; i-- {
624		checkScanPos(t, s, 22, 4, 1, EOF)
625	}
626	if s.ErrorCount != 0 {
627		t.Errorf("%d errors", s.ErrorCount)
628	}
629
630	// positions after calling Scan
631	s = new(Scanner).Init(strings.NewReader("abc\n本語\n\nx"))
632	s.Mode = 0
633	s.Whitespace = 0
634	checkScanPos(t, s, 0, 1, 1, 'a')
635	s.Peek() // peek doesn't affect the position
636	checkScanPos(t, s, 1, 1, 2, 'b')
637	checkScanPos(t, s, 2, 1, 3, 'c')
638	checkScanPos(t, s, 3, 1, 4, '\n')
639	checkScanPos(t, s, 4, 2, 1, '本')
640	checkScanPos(t, s, 7, 2, 2, '語')
641	checkScanPos(t, s, 10, 2, 3, '\n')
642	checkScanPos(t, s, 11, 3, 1, '\n')
643	checkScanPos(t, s, 12, 4, 1, 'x')
644	// after EOF position doesn't change
645	for i := 10; i > 0; i-- {
646		checkScanPos(t, s, 13, 4, 2, EOF)
647	}
648	if s.ErrorCount != 0 {
649		t.Errorf("%d errors", s.ErrorCount)
650	}
651}
652
653type countReader int
654
655func (r *countReader) Read([]byte) (int, error) {
656	*r++
657	return 0, io.EOF
658}
659
660func TestNextEOFHandling(t *testing.T) {
661	var r countReader
662
663	// corner case: empty source
664	s := new(Scanner).Init(&r)
665
666	tok := s.Next()
667	if tok != EOF {
668		t.Error("1) EOF not reported")
669	}
670
671	tok = s.Peek()
672	if tok != EOF {
673		t.Error("2) EOF not reported")
674	}
675
676	if r != 1 {
677		t.Errorf("scanner called Read %d times, not once", r)
678	}
679}
680
681func TestScanEOFHandling(t *testing.T) {
682	var r countReader
683
684	// corner case: empty source
685	s := new(Scanner).Init(&r)
686
687	tok := s.Scan()
688	if tok != EOF {
689		t.Error("1) EOF not reported")
690	}
691
692	tok = s.Peek()
693	if tok != EOF {
694		t.Error("2) EOF not reported")
695	}
696
697	if r != 1 {
698		t.Errorf("scanner called Read %d times, not once", r)
699	}
700}
701
702func TestIssue29723(t *testing.T) {
703	s := new(Scanner).Init(strings.NewReader(`x "`))
704	s.Error = func(s *Scanner, _ string) {
705		got := s.TokenText() // this call shouldn't panic
706		const want = `"`
707		if got != want {
708			t.Errorf("got %q; want %q", got, want)
709		}
710	}
711	for r := s.Scan(); r != EOF; r = s.Scan() {
712	}
713}
714
715func TestNumbers(t *testing.T) {
716	for _, test := range []struct {
717		tok              rune
718		src, tokens, err string
719	}{
720		// binaries
721		{Int, "0b0", "0b0", ""},
722		{Int, "0b1010", "0b1010", ""},
723		{Int, "0B1110", "0B1110", ""},
724
725		{Int, "0b", "0b", "binary literal has no digits"},
726		{Int, "0b0190", "0b0190", "invalid digit '9' in binary literal"},
727		{Int, "0b01a0", "0b01 a0", ""}, // only accept 0-9
728
729		// binary floats (invalid)
730		{Float, "0b.", "0b.", "invalid radix point in binary literal"},
731		{Float, "0b.1", "0b.1", "invalid radix point in binary literal"},
732		{Float, "0b1.0", "0b1.0", "invalid radix point in binary literal"},
733		{Float, "0b1e10", "0b1e10", "'e' exponent requires decimal mantissa"},
734		{Float, "0b1P-1", "0b1P-1", "'P' exponent requires hexadecimal mantissa"},
735
736		// octals
737		{Int, "0o0", "0o0", ""},
738		{Int, "0o1234", "0o1234", ""},
739		{Int, "0O1234", "0O1234", ""},
740
741		{Int, "0o", "0o", "octal literal has no digits"},
742		{Int, "0o8123", "0o8123", "invalid digit '8' in octal literal"},
743		{Int, "0o1293", "0o1293", "invalid digit '9' in octal literal"},
744		{Int, "0o12a3", "0o12 a3", ""}, // only accept 0-9
745
746		// octal floats (invalid)
747		{Float, "0o.", "0o.", "invalid radix point in octal literal"},
748		{Float, "0o.2", "0o.2", "invalid radix point in octal literal"},
749		{Float, "0o1.2", "0o1.2", "invalid radix point in octal literal"},
750		{Float, "0o1E+2", "0o1E+2", "'E' exponent requires decimal mantissa"},
751		{Float, "0o1p10", "0o1p10", "'p' exponent requires hexadecimal mantissa"},
752
753		// 0-octals
754		{Int, "0", "0", ""},
755		{Int, "0123", "0123", ""},
756
757		{Int, "08123", "08123", "invalid digit '8' in octal literal"},
758		{Int, "01293", "01293", "invalid digit '9' in octal literal"},
759		{Int, "0F.", "0 F .", ""}, // only accept 0-9
760		{Int, "0123F.", "0123 F .", ""},
761		{Int, "0123456x", "0123456 x", ""},
762
763		// decimals
764		{Int, "1", "1", ""},
765		{Int, "1234", "1234", ""},
766
767		{Int, "1f", "1 f", ""}, // only accept 0-9
768
769		// decimal floats
770		{Float, "0.", "0.", ""},
771		{Float, "123.", "123.", ""},
772		{Float, "0123.", "0123.", ""},
773
774		{Float, ".0", ".0", ""},
775		{Float, ".123", ".123", ""},
776		{Float, ".0123", ".0123", ""},
777
778		{Float, "0.0", "0.0", ""},
779		{Float, "123.123", "123.123", ""},
780		{Float, "0123.0123", "0123.0123", ""},
781
782		{Float, "0e0", "0e0", ""},
783		{Float, "123e+0", "123e+0", ""},
784		{Float, "0123E-1", "0123E-1", ""},
785
786		{Float, "0.e+1", "0.e+1", ""},
787		{Float, "123.E-10", "123.E-10", ""},
788		{Float, "0123.e123", "0123.e123", ""},
789
790		{Float, ".0e-1", ".0e-1", ""},
791		{Float, ".123E+10", ".123E+10", ""},
792		{Float, ".0123E123", ".0123E123", ""},
793
794		{Float, "0.0e1", "0.0e1", ""},
795		{Float, "123.123E-10", "123.123E-10", ""},
796		{Float, "0123.0123e+456", "0123.0123e+456", ""},
797
798		{Float, "0e", "0e", "exponent has no digits"},
799		{Float, "0E+", "0E+", "exponent has no digits"},
800		{Float, "1e+f", "1e+ f", "exponent has no digits"},
801		{Float, "0p0", "0p0", "'p' exponent requires hexadecimal mantissa"},
802		{Float, "1.0P-1", "1.0P-1", "'P' exponent requires hexadecimal mantissa"},
803
804		// hexadecimals
805		{Int, "0x0", "0x0", ""},
806		{Int, "0x1234", "0x1234", ""},
807		{Int, "0xcafef00d", "0xcafef00d", ""},
808		{Int, "0XCAFEF00D", "0XCAFEF00D", ""},
809
810		{Int, "0x", "0x", "hexadecimal literal has no digits"},
811		{Int, "0x1g", "0x1 g", ""},
812
813		// hexadecimal floats
814		{Float, "0x0p0", "0x0p0", ""},
815		{Float, "0x12efp-123", "0x12efp-123", ""},
816		{Float, "0xABCD.p+0", "0xABCD.p+0", ""},
817		{Float, "0x.0189P-0", "0x.0189P-0", ""},
818		{Float, "0x1.ffffp+1023", "0x1.ffffp+1023", ""},
819
820		{Float, "0x.", "0x.", "hexadecimal literal has no digits"},
821		{Float, "0x0.", "0x0.", "hexadecimal mantissa requires a 'p' exponent"},
822		{Float, "0x.0", "0x.0", "hexadecimal mantissa requires a 'p' exponent"},
823		{Float, "0x1.1", "0x1.1", "hexadecimal mantissa requires a 'p' exponent"},
824		{Float, "0x1.1e0", "0x1.1e0", "hexadecimal mantissa requires a 'p' exponent"},
825		{Float, "0x1.2gp1a", "0x1.2 gp1a", "hexadecimal mantissa requires a 'p' exponent"},
826		{Float, "0x0p", "0x0p", "exponent has no digits"},
827		{Float, "0xeP-", "0xeP-", "exponent has no digits"},
828		{Float, "0x1234PAB", "0x1234P AB", "exponent has no digits"},
829		{Float, "0x1.2p1a", "0x1.2p1 a", ""},
830
831		// separators
832		{Int, "0b_1000_0001", "0b_1000_0001", ""},
833		{Int, "0o_600", "0o_600", ""},
834		{Int, "0_466", "0_466", ""},
835		{Int, "1_000", "1_000", ""},
836		{Float, "1_000.000_1", "1_000.000_1", ""},
837		{Int, "0x_f00d", "0x_f00d", ""},
838		{Float, "0x_f00d.0p1_2", "0x_f00d.0p1_2", ""},
839
840		{Int, "0b__1000", "0b__1000", "'_' must separate successive digits"},
841		{Int, "0o60___0", "0o60___0", "'_' must separate successive digits"},
842		{Int, "0466_", "0466_", "'_' must separate successive digits"},
843		{Float, "1_.", "1_.", "'_' must separate successive digits"},
844		{Float, "0._1", "0._1", "'_' must separate successive digits"},
845		{Float, "2.7_e0", "2.7_e0", "'_' must separate successive digits"},
846		{Int, "0x___0", "0x___0", "'_' must separate successive digits"},
847		{Float, "0x1.0_p0", "0x1.0_p0", "'_' must separate successive digits"},
848	} {
849		s := new(Scanner).Init(strings.NewReader(test.src))
850		var err string
851		s.Error = func(s *Scanner, msg string) {
852			if err == "" {
853				err = msg
854			}
855		}
856
857		for i, want := range strings.Split(test.tokens, " ") {
858			err = ""
859			tok := s.Scan()
860			lit := s.TokenText()
861			if i == 0 {
862				if tok != test.tok {
863					t.Errorf("%q: got token %s; want %s", test.src, TokenString(tok), TokenString(test.tok))
864				}
865				if err != test.err {
866					t.Errorf("%q: got error %q; want %q", test.src, err, test.err)
867				}
868			}
869			if lit != want {
870				t.Errorf("%q: got literal %q (%s); want %s", test.src, lit, TokenString(tok), want)
871			}
872		}
873
874		// make sure we read all
875		if tok := s.Scan(); tok != EOF {
876			t.Errorf("%q: got %s; want EOF", test.src, TokenString(tok))
877		}
878	}
879}
880
881func TestIssue30320(t *testing.T) {
882	for _, test := range []struct {
883		in, want string
884		mode     uint
885	}{
886		{"foo01.bar31.xx-0-1-1-0", "01 31 0 1 1 0", ScanInts},
887		{"foo0/12/0/5.67", "0 12 0 5 67", ScanInts},
888		{"xxx1e0yyy", "1 0", ScanInts},
889		{"1_2", "1_2", ScanInts},
890		{"xxx1.0yyy2e3ee", "1 0 2 3", ScanInts},
891		{"xxx1.0yyy2e3ee", "1.0 2e3", ScanFloats},
892	} {
893		got := extractInts(test.in, test.mode)
894		if got != test.want {
895			t.Errorf("%q: got %q; want %q", test.in, got, test.want)
896		}
897	}
898}
899
900func extractInts(t string, mode uint) (res string) {
901	var s Scanner
902	s.Init(strings.NewReader(t))
903	s.Mode = mode
904	for {
905		switch tok := s.Scan(); tok {
906		case Int, Float:
907			if len(res) > 0 {
908				res += " "
909			}
910			res += s.TokenText()
911		case EOF:
912			return
913		}
914	}
915}
916
917func TestIssue50909(t *testing.T) {
918	var s Scanner
919	s.Init(strings.NewReader("hello \n\nworld\n!\n"))
920	s.IsIdentRune = func(ch rune, _ int) bool { return ch != '\n' }
921
922	r := ""
923	n := 0
924	for s.Scan() != EOF && n < 10 {
925		r += s.TokenText()
926		n++
927	}
928
929	const R = "hello world!"
930	const N = 3
931	if r != R || n != N {
932		t.Errorf("got %q (n = %d); want %q (n = %d)", r, n, R, N)
933	}
934}
935