1 #ifndef PEGEN_H
2 #define PEGEN_H
3
4 #define PY_SSIZE_T_CLEAN
5 #include <Python.h>
6 #include <token.h>
7 #include <pycore_ast.h>
8
9 #if 0
10 #define PyPARSE_YIELD_IS_KEYWORD 0x0001
11 #endif
12
13 #define PyPARSE_DONT_IMPLY_DEDENT 0x0002
14
15 #if 0
16 #define PyPARSE_WITH_IS_KEYWORD 0x0003
17 #define PyPARSE_PRINT_IS_FUNCTION 0x0004
18 #define PyPARSE_UNICODE_LITERALS 0x0008
19 #endif
20
21 #define PyPARSE_IGNORE_COOKIE 0x0010
22 #define PyPARSE_BARRY_AS_BDFL 0x0020
23 #define PyPARSE_TYPE_COMMENTS 0x0040
24 #define PyPARSE_ASYNC_HACKS 0x0080
25 #define PyPARSE_ALLOW_INCOMPLETE_INPUT 0x0100
26
27 #define CURRENT_POS (-5)
28
29 typedef struct _memo {
30 int type;
31 void *node;
32 int mark;
33 struct _memo *next;
34 } Memo;
35
36 typedef struct {
37 int type;
38 PyObject *bytes;
39 int level;
40 int lineno, col_offset, end_lineno, end_col_offset;
41 Memo *memo;
42 } Token;
43
44 typedef struct {
45 char *str;
46 int type;
47 } KeywordToken;
48
49
50 typedef struct {
51 struct {
52 int lineno;
53 char *comment; // The " <tag>" in "# type: ignore <tag>"
54 } *items;
55 size_t size;
56 size_t num_items;
57 } growable_comment_array;
58
59 typedef struct {
60 struct tok_state *tok;
61 Token **tokens;
62 int mark;
63 int fill, size;
64 PyArena *arena;
65 KeywordToken **keywords;
66 char **soft_keywords;
67 int n_keyword_lists;
68 int start_rule;
69 int *errcode;
70 int parsing_started;
71 PyObject* normalize;
72 int starting_lineno;
73 int starting_col_offset;
74 int error_indicator;
75 int flags;
76 int feature_version;
77 growable_comment_array type_ignore_comments;
78 Token *known_err_token;
79 int level;
80 int call_invalid_rules;
81 } Parser;
82
83 typedef struct {
84 cmpop_ty cmpop;
85 expr_ty expr;
86 } CmpopExprPair;
87
88 typedef struct {
89 expr_ty key;
90 expr_ty value;
91 } KeyValuePair;
92
93 typedef struct {
94 expr_ty key;
95 pattern_ty pattern;
96 } KeyPatternPair;
97
98 typedef struct {
99 arg_ty arg;
100 expr_ty value;
101 } NameDefaultPair;
102
103 typedef struct {
104 asdl_arg_seq *plain_names;
105 asdl_seq *names_with_defaults; // asdl_seq* of NameDefaultsPair's
106 } SlashWithDefault;
107
108 typedef struct {
109 arg_ty vararg;
110 asdl_seq *kwonlyargs; // asdl_seq* of NameDefaultsPair's
111 arg_ty kwarg;
112 } StarEtc;
113
114 typedef struct { operator_ty kind; } AugOperator;
115 typedef struct {
116 void *element;
117 int is_keyword;
118 } KeywordOrStarred;
119
120 // Internal parser functions
121 #if defined(Py_DEBUG)
122 void _PyPegen_clear_memo_statistics(void);
123 PyObject *_PyPegen_get_memo_statistics(void);
124 #endif
125
126 int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node);
127 int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
128 int _PyPegen_is_memoized(Parser *p, int type, void *pres);
129
130 int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
131 int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
132 int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
133 int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
134
135 Token *_PyPegen_expect_token(Parser *p, int type);
136 void* _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected);
137 Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
138 expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
139 expr_ty _PyPegen_soft_keyword_token(Parser *p);
140 Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
141 int _PyPegen_fill_token(Parser *p);
142 expr_ty _PyPegen_name_token(Parser *p);
143 expr_ty _PyPegen_number_token(Parser *p);
144 void *_PyPegen_string_token(Parser *p);
145 Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
146
147 // Error handling functions and APIs
148 typedef enum {
149 STAR_TARGETS,
150 DEL_TARGETS,
151 FOR_TARGETS
152 } TARGETS_TYPE;
153
154 int _Pypegen_raise_decode_error(Parser *p);
155 void _PyPegen_raise_tokenizer_init_error(PyObject *filename);
156 int _Pypegen_tokenizer_error(Parser *p);
157 void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...);
158 void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
159 Py_ssize_t lineno, Py_ssize_t col_offset,
160 Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
161 const char *errmsg, va_list va);
162 void _Pypegen_set_syntax_error(Parser* p, Token* last_token);
163 Py_LOCAL_INLINE(void *)
RAISE_ERROR_KNOWN_LOCATION(Parser * p,PyObject * errtype,Py_ssize_t lineno,Py_ssize_t col_offset,Py_ssize_t end_lineno,Py_ssize_t end_col_offset,const char * errmsg,...)164 RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
165 Py_ssize_t lineno, Py_ssize_t col_offset,
166 Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
167 const char *errmsg, ...)
168 {
169 va_list va;
170 va_start(va, errmsg);
171 Py_ssize_t _col_offset = (col_offset == CURRENT_POS ? CURRENT_POS : col_offset + 1);
172 Py_ssize_t _end_col_offset = (end_col_offset == CURRENT_POS ? CURRENT_POS : end_col_offset + 1);
173 _PyPegen_raise_error_known_location(p, errtype, lineno, _col_offset, end_lineno, _end_col_offset, errmsg, va);
174 va_end(va);
175 return NULL;
176 }
177 #define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, msg, ##__VA_ARGS__)
178 #define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, msg, ##__VA_ARGS__)
179 #define RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, msg, ...) \
180 RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (b)->end_lineno, (b)->end_col_offset, msg, ##__VA_ARGS__)
181 #define RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, msg, ...) \
182 RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (a)->end_lineno, (a)->end_col_offset, msg, ##__VA_ARGS__)
183 #define RAISE_SYNTAX_ERROR_STARTING_FROM(a, msg, ...) \
184 RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, CURRENT_POS, CURRENT_POS, msg, ##__VA_ARGS__)
185 #define RAISE_SYNTAX_ERROR_INVALID_TARGET(type, e) _RAISE_SYNTAX_ERROR_INVALID_TARGET(p, type, e)
186
187 Py_LOCAL_INLINE(void *)
CHECK_CALL(Parser * p,void * result)188 CHECK_CALL(Parser *p, void *result)
189 {
190 if (result == NULL) {
191 assert(PyErr_Occurred());
192 p->error_indicator = 1;
193 }
194 return result;
195 }
196
197 /* This is needed for helper functions that are allowed to
198 return NULL without an error. Example: _PyPegen_seq_extract_starred_exprs */
199 Py_LOCAL_INLINE(void *)
CHECK_CALL_NULL_ALLOWED(Parser * p,void * result)200 CHECK_CALL_NULL_ALLOWED(Parser *p, void *result)
201 {
202 if (result == NULL && PyErr_Occurred()) {
203 p->error_indicator = 1;
204 }
205 return result;
206 }
207
208 #define CHECK(type, result) ((type) CHECK_CALL(p, result))
209 #define CHECK_NULL_ALLOWED(type, result) ((type) CHECK_CALL_NULL_ALLOWED(p, result))
210
211 expr_ty _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type);
212 const char *_PyPegen_get_expr_name(expr_ty);
213 Py_LOCAL_INLINE(void *)
_RAISE_SYNTAX_ERROR_INVALID_TARGET(Parser * p,TARGETS_TYPE type,void * e)214 _RAISE_SYNTAX_ERROR_INVALID_TARGET(Parser *p, TARGETS_TYPE type, void *e)
215 {
216 expr_ty invalid_target = CHECK_NULL_ALLOWED(expr_ty, _PyPegen_get_invalid_target(e, type));
217 if (invalid_target != NULL) {
218 const char *msg;
219 if (type == STAR_TARGETS || type == FOR_TARGETS) {
220 msg = "cannot assign to %s";
221 }
222 else {
223 msg = "cannot delete %s";
224 }
225 return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
226 invalid_target,
227 msg,
228 _PyPegen_get_expr_name(invalid_target)
229 );
230 return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(invalid_target, "invalid syntax");
231 }
232 return NULL;
233 }
234
235 // Action utility functions
236
237 void *_PyPegen_dummy_name(Parser *p, ...);
238 void * _PyPegen_seq_last_item(asdl_seq *seq);
239 #define PyPegen_last_item(seq, type) ((type)_PyPegen_seq_last_item((asdl_seq*)seq))
240 void * _PyPegen_seq_first_item(asdl_seq *seq);
241 #define PyPegen_first_item(seq, type) ((type)_PyPegen_seq_first_item((asdl_seq*)seq))
242 #define UNUSED(expr) do { (void)(expr); } while (0)
243 #define EXTRA_EXPR(head, tail) head->lineno, (head)->col_offset, (tail)->end_lineno, (tail)->end_col_offset, p->arena
244 #define EXTRA _start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena
245 PyObject *_PyPegen_new_type_comment(Parser *, const char *);
246
247 Py_LOCAL_INLINE(PyObject *)
NEW_TYPE_COMMENT(Parser * p,Token * tc)248 NEW_TYPE_COMMENT(Parser *p, Token *tc)
249 {
250 if (tc == NULL) {
251 return NULL;
252 }
253 const char *bytes = PyBytes_AsString(tc->bytes);
254 if (bytes == NULL) {
255 goto error;
256 }
257 PyObject *tco = _PyPegen_new_type_comment(p, bytes);
258 if (tco == NULL) {
259 goto error;
260 }
261 return tco;
262 error:
263 p->error_indicator = 1; // Inline CHECK_CALL
264 return NULL;
265 }
266
267 Py_LOCAL_INLINE(void *)
INVALID_VERSION_CHECK(Parser * p,int version,char * msg,void * node)268 INVALID_VERSION_CHECK(Parser *p, int version, char *msg, void *node)
269 {
270 if (node == NULL) {
271 p->error_indicator = 1; // Inline CHECK_CALL
272 return NULL;
273 }
274 if (p->feature_version < version) {
275 p->error_indicator = 1;
276 return RAISE_SYNTAX_ERROR("%s only supported in Python 3.%i and greater",
277 msg, version);
278 }
279 return node;
280 }
281
282 #define CHECK_VERSION(type, version, msg, node) ((type) INVALID_VERSION_CHECK(p, version, msg, node))
283
284 arg_ty _PyPegen_add_type_comment_to_arg(Parser *, arg_ty, Token *);
285 PyObject *_PyPegen_new_identifier(Parser *, const char *);
286 asdl_seq *_PyPegen_singleton_seq(Parser *, void *);
287 asdl_seq *_PyPegen_seq_insert_in_front(Parser *, void *, asdl_seq *);
288 asdl_seq *_PyPegen_seq_append_to_end(Parser *, asdl_seq *, void *);
289 asdl_seq *_PyPegen_seq_flatten(Parser *, asdl_seq *);
290 expr_ty _PyPegen_join_names_with_dot(Parser *, expr_ty, expr_ty);
291 int _PyPegen_seq_count_dots(asdl_seq *);
292 alias_ty _PyPegen_alias_for_star(Parser *, int, int, int, int, PyArena *);
293 asdl_identifier_seq *_PyPegen_map_names_to_ids(Parser *, asdl_expr_seq *);
294 CmpopExprPair *_PyPegen_cmpop_expr_pair(Parser *, cmpop_ty, expr_ty);
295 asdl_int_seq *_PyPegen_get_cmpops(Parser *p, asdl_seq *);
296 asdl_expr_seq *_PyPegen_get_exprs(Parser *, asdl_seq *);
297 expr_ty _PyPegen_set_expr_context(Parser *, expr_ty, expr_context_ty);
298 KeyValuePair *_PyPegen_key_value_pair(Parser *, expr_ty, expr_ty);
299 asdl_expr_seq *_PyPegen_get_keys(Parser *, asdl_seq *);
300 asdl_expr_seq *_PyPegen_get_values(Parser *, asdl_seq *);
301 KeyPatternPair *_PyPegen_key_pattern_pair(Parser *, expr_ty, pattern_ty);
302 asdl_expr_seq *_PyPegen_get_pattern_keys(Parser *, asdl_seq *);
303 asdl_pattern_seq *_PyPegen_get_patterns(Parser *, asdl_seq *);
304 NameDefaultPair *_PyPegen_name_default_pair(Parser *, arg_ty, expr_ty, Token *);
305 SlashWithDefault *_PyPegen_slash_with_default(Parser *, asdl_arg_seq *, asdl_seq *);
306 StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty);
307 arguments_ty _PyPegen_make_arguments(Parser *, asdl_arg_seq *, SlashWithDefault *,
308 asdl_arg_seq *, asdl_seq *, StarEtc *);
309 arguments_ty _PyPegen_empty_arguments(Parser *);
310 AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
311 stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
312 stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
313 KeywordOrStarred *_PyPegen_keyword_or_starred(Parser *, void *, int);
314 asdl_expr_seq *_PyPegen_seq_extract_starred_exprs(Parser *, asdl_seq *);
315 asdl_keyword_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *);
316 expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *,
317 int lineno, int col_offset, int end_lineno,
318 int end_col_offset, PyArena *arena);
319 expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *);
320 expr_ty _PyPegen_ensure_imaginary(Parser *p, expr_ty);
321 expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
322 asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
323 int _PyPegen_check_barry_as_flufl(Parser *, Token *);
324 int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
325 mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
326 void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
327 expr_ty _PyPegen_get_last_comprehension_item(comprehension_ty comprehension);
328 void *_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions);
329
330 // Parser API
331
332 Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int, int *, PyArena *);
333 void _PyPegen_Parser_Free(Parser *);
334 mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
335 const char *, const char *, PyCompilerFlags *, int *, PyArena *);
336 void *_PyPegen_run_parser(Parser *);
337 mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, PyCompilerFlags *, PyArena *);
338 asdl_stmt_seq *_PyPegen_interactive_exit(Parser *);
339
340 // Generated function in parse.c - function definition in python.gram
341 void *_PyPegen_parse(Parser *);
342
343 #endif
344