1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package token defines constants representing the lexical tokens of the Go
6// programming language and basic operations on tokens (printing, predicates).
7package token
8
9import (
10	"strconv"
11	"unicode"
12	"unicode/utf8"
13)
14
15// Token is the set of lexical tokens of the Go programming language.
16type Token int
17
18// The list of tokens.
19const (
20	// Special tokens
21	ILLEGAL Token = iota
22	EOF
23	COMMENT
24
25	literal_beg
26	// Identifiers and basic type literals
27	// (these tokens stand for classes of literals)
28	IDENT  // main
29	INT    // 12345
30	FLOAT  // 123.45
31	IMAG   // 123.45i
32	CHAR   // 'a'
33	STRING // "abc"
34	literal_end
35
36	operator_beg
37	// Operators and delimiters
38	ADD // +
39	SUB // -
40	MUL // *
41	QUO // /
42	REM // %
43
44	AND     // &
45	OR      // |
46	XOR     // ^
47	SHL     // <<
48	SHR     // >>
49	AND_NOT // &^
50
51	ADD_ASSIGN // +=
52	SUB_ASSIGN // -=
53	MUL_ASSIGN // *=
54	QUO_ASSIGN // /=
55	REM_ASSIGN // %=
56
57	AND_ASSIGN     // &=
58	OR_ASSIGN      // |=
59	XOR_ASSIGN     // ^=
60	SHL_ASSIGN     // <<=
61	SHR_ASSIGN     // >>=
62	AND_NOT_ASSIGN // &^=
63
64	LAND  // &&
65	LOR   // ||
66	ARROW // <-
67	INC   // ++
68	DEC   // --
69
70	EQL    // ==
71	LSS    // <
72	GTR    // >
73	ASSIGN // =
74	NOT    // !
75
76	NEQ      // !=
77	LEQ      // <=
78	GEQ      // >=
79	DEFINE   // :=
80	ELLIPSIS // ...
81
82	LPAREN // (
83	LBRACK // [
84	LBRACE // {
85	COMMA  // ,
86	PERIOD // .
87
88	RPAREN    // )
89	RBRACK    // ]
90	RBRACE    // }
91	SEMICOLON // ;
92	COLON     // :
93	operator_end
94
95	keyword_beg
96	// Keywords
97	BREAK
98	CASE
99	CHAN
100	CONST
101	CONTINUE
102
103	DEFAULT
104	DEFER
105	ELSE
106	FALLTHROUGH
107	FOR
108
109	FUNC
110	GO
111	GOTO
112	IF
113	IMPORT
114
115	INTERFACE
116	MAP
117	PACKAGE
118	RANGE
119	RETURN
120
121	SELECT
122	STRUCT
123	SWITCH
124	TYPE
125	VAR
126	keyword_end
127
128	additional_beg
129	// additional tokens, handled in an ad-hoc manner
130	TILDE
131	additional_end
132)
133
134var tokens = [...]string{
135	ILLEGAL: "ILLEGAL",
136
137	EOF:     "EOF",
138	COMMENT: "COMMENT",
139
140	IDENT:  "IDENT",
141	INT:    "INT",
142	FLOAT:  "FLOAT",
143	IMAG:   "IMAG",
144	CHAR:   "CHAR",
145	STRING: "STRING",
146
147	ADD: "+",
148	SUB: "-",
149	MUL: "*",
150	QUO: "/",
151	REM: "%",
152
153	AND:     "&",
154	OR:      "|",
155	XOR:     "^",
156	SHL:     "<<",
157	SHR:     ">>",
158	AND_NOT: "&^",
159
160	ADD_ASSIGN: "+=",
161	SUB_ASSIGN: "-=",
162	MUL_ASSIGN: "*=",
163	QUO_ASSIGN: "/=",
164	REM_ASSIGN: "%=",
165
166	AND_ASSIGN:     "&=",
167	OR_ASSIGN:      "|=",
168	XOR_ASSIGN:     "^=",
169	SHL_ASSIGN:     "<<=",
170	SHR_ASSIGN:     ">>=",
171	AND_NOT_ASSIGN: "&^=",
172
173	LAND:  "&&",
174	LOR:   "||",
175	ARROW: "<-",
176	INC:   "++",
177	DEC:   "--",
178
179	EQL:    "==",
180	LSS:    "<",
181	GTR:    ">",
182	ASSIGN: "=",
183	NOT:    "!",
184
185	NEQ:      "!=",
186	LEQ:      "<=",
187	GEQ:      ">=",
188	DEFINE:   ":=",
189	ELLIPSIS: "...",
190
191	LPAREN: "(",
192	LBRACK: "[",
193	LBRACE: "{",
194	COMMA:  ",",
195	PERIOD: ".",
196
197	RPAREN:    ")",
198	RBRACK:    "]",
199	RBRACE:    "}",
200	SEMICOLON: ";",
201	COLON:     ":",
202
203	BREAK:    "break",
204	CASE:     "case",
205	CHAN:     "chan",
206	CONST:    "const",
207	CONTINUE: "continue",
208
209	DEFAULT:     "default",
210	DEFER:       "defer",
211	ELSE:        "else",
212	FALLTHROUGH: "fallthrough",
213	FOR:         "for",
214
215	FUNC:   "func",
216	GO:     "go",
217	GOTO:   "goto",
218	IF:     "if",
219	IMPORT: "import",
220
221	INTERFACE: "interface",
222	MAP:       "map",
223	PACKAGE:   "package",
224	RANGE:     "range",
225	RETURN:    "return",
226
227	SELECT: "select",
228	STRUCT: "struct",
229	SWITCH: "switch",
230	TYPE:   "type",
231	VAR:    "var",
232
233	TILDE: "~",
234}
235
236// String returns the string corresponding to the token tok.
237// For operators, delimiters, and keywords the string is the actual
238// token character sequence (e.g., for the token [ADD], the string is
239// "+"). For all other tokens the string corresponds to the token
240// constant name (e.g. for the token [IDENT], the string is "IDENT").
241func (tok Token) String() string {
242	s := ""
243	if 0 <= tok && tok < Token(len(tokens)) {
244		s = tokens[tok]
245	}
246	if s == "" {
247		s = "token(" + strconv.Itoa(int(tok)) + ")"
248	}
249	return s
250}
251
252// A set of constants for precedence-based expression parsing.
253// Non-operators have lowest precedence, followed by operators
254// starting with precedence 1 up to unary operators. The highest
255// precedence serves as "catch-all" precedence for selector,
256// indexing, and other operator and delimiter tokens.
257const (
258	LowestPrec  = 0 // non-operators
259	UnaryPrec   = 6
260	HighestPrec = 7
261)
262
263// Precedence returns the operator precedence of the binary
264// operator op. If op is not a binary operator, the result
265// is LowestPrecedence.
266func (op Token) Precedence() int {
267	switch op {
268	case LOR:
269		return 1
270	case LAND:
271		return 2
272	case EQL, NEQ, LSS, LEQ, GTR, GEQ:
273		return 3
274	case ADD, SUB, OR, XOR:
275		return 4
276	case MUL, QUO, REM, SHL, SHR, AND, AND_NOT:
277		return 5
278	}
279	return LowestPrec
280}
281
282var keywords map[string]Token
283
284func init() {
285	keywords = make(map[string]Token, keyword_end-(keyword_beg+1))
286	for i := keyword_beg + 1; i < keyword_end; i++ {
287		keywords[tokens[i]] = i
288	}
289}
290
291// Lookup maps an identifier to its keyword token or [IDENT] (if not a keyword).
292func Lookup(ident string) Token {
293	if tok, is_keyword := keywords[ident]; is_keyword {
294		return tok
295	}
296	return IDENT
297}
298
299// Predicates
300
301// IsLiteral returns true for tokens corresponding to identifiers
302// and basic type literals; it returns false otherwise.
303func (tok Token) IsLiteral() bool { return literal_beg < tok && tok < literal_end }
304
305// IsOperator returns true for tokens corresponding to operators and
306// delimiters; it returns false otherwise.
307func (tok Token) IsOperator() bool {
308	return (operator_beg < tok && tok < operator_end) || tok == TILDE
309}
310
311// IsKeyword returns true for tokens corresponding to keywords;
312// it returns false otherwise.
313func (tok Token) IsKeyword() bool { return keyword_beg < tok && tok < keyword_end }
314
315// IsExported reports whether name starts with an upper-case letter.
316func IsExported(name string) bool {
317	ch, _ := utf8.DecodeRuneInString(name)
318	return unicode.IsUpper(ch)
319}
320
321// IsKeyword reports whether name is a Go keyword, such as "func" or "return".
322func IsKeyword(name string) bool {
323	// TODO: opt: use a perfect hash function instead of a global map.
324	_, ok := keywords[name]
325	return ok
326}
327
328// IsIdentifier reports whether name is a Go identifier, that is, a non-empty
329// string made up of letters, digits, and underscores, where the first character
330// is not a digit. Keywords are not identifiers.
331func IsIdentifier(name string) bool {
332	if name == "" || IsKeyword(name) {
333		return false
334	}
335	for i, c := range name {
336		if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) {
337			return false
338		}
339	}
340	return true
341}
342