1 /*
2 * This file compiles an abstract syntax tree (AST) into Python bytecode.
3 *
4 * The primary entry point is _PyAST_Compile(), which returns a
5 * PyCodeObject. The compiler makes several passes to build the code
6 * object:
7 * 1. Checks for future statements. See future.c
8 * 2. Builds a symbol table. See symtable.c.
9 * 3. Generate code for basic blocks. See compiler_mod() in this file.
10 * 4. Assemble the basic blocks into final code. See assemble() in
11 * this file.
12 * 5. Optimize the byte code (peephole optimizations).
13 *
14 * Note that compiler_mod() suggests module, but the module ast type
15 * (mod_ty) has cases for expressions and interactive statements.
16 *
17 * CAUTION: The VISIT_* macros abort the current function when they
18 * encounter a problem. So don't invoke them when there is memory
19 * which needs to be released. Code blocks are OK, as the compiler
20 * structure takes care of releasing those. Use the arena to manage
21 * objects.
22 */
23
24 #include <stdbool.h>
25
26 // Need _PyOpcode_RelativeJump of pycore_opcode.h
27 #define NEED_OPCODE_TABLES
28
29 #include "Python.h"
30 #include "pycore_ast.h" // _PyAST_GetDocString()
31 #include "pycore_code.h" // _PyCode_New()
32 #include "pycore_compile.h" // _PyFuture_FromAST()
33 #include "pycore_long.h" // _PyLong_GetZero()
34 #include "pycore_opcode.h" // _PyOpcode_Caches
35 #include "pycore_pymem.h" // _PyMem_IsPtrFreed()
36 #include "pycore_symtable.h" // PySTEntryObject
37
38
39 #define DEFAULT_BLOCK_SIZE 16
40 #define DEFAULT_CODE_SIZE 128
41 #define DEFAULT_LNOTAB_SIZE 16
42 #define DEFAULT_CNOTAB_SIZE 32
43
44 #define COMP_GENEXP 0
45 #define COMP_LISTCOMP 1
46 #define COMP_SETCOMP 2
47 #define COMP_DICTCOMP 3
48
49 /* A soft limit for stack use, to avoid excessive
50 * memory use for large constants, etc.
51 *
52 * The value 30 is plucked out of thin air.
53 * Code that could use more stack than this is
54 * rare, so the exact value is unimportant.
55 */
56 #define STACK_USE_GUIDELINE 30
57
58 /* If we exceed this limit, it should
59 * be considered a compiler bug.
60 * Currently it should be impossible
61 * to exceed STACK_USE_GUIDELINE * 100,
62 * as 100 is the maximum parse depth.
63 * For performance reasons we will
64 * want to reduce this to a
65 * few hundred in the future.
66 *
67 * NOTE: Whatever MAX_ALLOWED_STACK_USE is
68 * set to, it should never restrict what Python
69 * we can write, just how we compile it.
70 */
71 #define MAX_ALLOWED_STACK_USE (STACK_USE_GUIDELINE * 100)
72
73
74 /* Pseudo-instructions used in the compiler,
75 * but turned into NOPs or other instructions
76 * by the assembler. */
77 #define SETUP_FINALLY -1
78 #define SETUP_CLEANUP -2
79 #define SETUP_WITH -3
80 #define POP_BLOCK -4
81 #define JUMP -5
82 #define JUMP_NO_INTERRUPT -6
83 #define POP_JUMP_IF_FALSE -7
84 #define POP_JUMP_IF_TRUE -8
85 #define POP_JUMP_IF_NONE -9
86 #define POP_JUMP_IF_NOT_NONE -10
87
88 #define MIN_VIRTUAL_OPCODE -10
89 #define MAX_ALLOWED_OPCODE 254
90
91 #define IS_WITHIN_OPCODE_RANGE(opcode) \
92 ((opcode) >= MIN_VIRTUAL_OPCODE && (opcode) <= MAX_ALLOWED_OPCODE)
93
94 #define IS_VIRTUAL_OPCODE(opcode) ((opcode) < 0)
95
96 #define IS_VIRTUAL_JUMP_OPCODE(opcode) \
97 ((opcode) == JUMP || \
98 (opcode) == JUMP_NO_INTERRUPT || \
99 (opcode) == POP_JUMP_IF_NONE || \
100 (opcode) == POP_JUMP_IF_NOT_NONE || \
101 (opcode) == POP_JUMP_IF_FALSE || \
102 (opcode) == POP_JUMP_IF_TRUE)
103
104 /* opcodes which are not emitted in codegen stage, only by the assembler */
105 #define IS_ASSEMBLER_OPCODE(opcode) \
106 ((opcode) == JUMP_FORWARD || \
107 (opcode) == JUMP_BACKWARD || \
108 (opcode) == JUMP_BACKWARD_NO_INTERRUPT || \
109 (opcode) == POP_JUMP_FORWARD_IF_NONE || \
110 (opcode) == POP_JUMP_BACKWARD_IF_NONE || \
111 (opcode) == POP_JUMP_FORWARD_IF_NOT_NONE || \
112 (opcode) == POP_JUMP_BACKWARD_IF_NOT_NONE || \
113 (opcode) == POP_JUMP_FORWARD_IF_TRUE || \
114 (opcode) == POP_JUMP_BACKWARD_IF_TRUE || \
115 (opcode) == POP_JUMP_FORWARD_IF_FALSE || \
116 (opcode) == POP_JUMP_BACKWARD_IF_FALSE)
117
118
119 #define IS_BACKWARDS_JUMP_OPCODE(opcode) \
120 ((opcode) == JUMP_BACKWARD || \
121 (opcode) == JUMP_BACKWARD_NO_INTERRUPT || \
122 (opcode) == POP_JUMP_BACKWARD_IF_NONE || \
123 (opcode) == POP_JUMP_BACKWARD_IF_NOT_NONE || \
124 (opcode) == POP_JUMP_BACKWARD_IF_TRUE || \
125 (opcode) == POP_JUMP_BACKWARD_IF_FALSE)
126
127
128 #define IS_TOP_LEVEL_AWAIT(c) ( \
129 (c->c_flags->cf_flags & PyCF_ALLOW_TOP_LEVEL_AWAIT) \
130 && (c->u->u_ste->ste_type == ModuleBlock))
131
132 struct instr {
133 int i_opcode;
134 int i_oparg;
135 /* target block (if jump instruction) */
136 struct basicblock_ *i_target;
137 /* target block when exception is raised, should not be set by front-end. */
138 struct basicblock_ *i_except;
139 int i_lineno;
140 int i_end_lineno;
141 int i_col_offset;
142 int i_end_col_offset;
143 };
144
145 typedef struct excepthandler {
146 struct instr *setup;
147 int offset;
148 } ExceptHandler;
149
150 typedef struct exceptstack {
151 struct basicblock_ *handlers[CO_MAXBLOCKS+1];
152 int depth;
153 } ExceptStack;
154
155 #define LOG_BITS_PER_INT 5
156 #define MASK_LOW_LOG_BITS 31
157
158 static inline int
is_bit_set_in_table(const uint32_t * table,int bitindex)159 is_bit_set_in_table(const uint32_t *table, int bitindex) {
160 /* Is the relevant bit set in the relevant word? */
161 /* 256 bits fit into 8 32-bits words.
162 * Word is indexed by (bitindex>>ln(size of int in bits)).
163 * Bit within word is the low bits of bitindex.
164 */
165 if (bitindex >= 0 && bitindex < 256) {
166 uint32_t word = table[bitindex >> LOG_BITS_PER_INT];
167 return (word >> (bitindex & MASK_LOW_LOG_BITS)) & 1;
168 }
169 else {
170 return 0;
171 }
172 }
173
174 static inline int
is_relative_jump(struct instr * i)175 is_relative_jump(struct instr *i)
176 {
177 return is_bit_set_in_table(_PyOpcode_RelativeJump, i->i_opcode);
178 }
179
180 static inline int
is_block_push(struct instr * instr)181 is_block_push(struct instr *instr)
182 {
183 int opcode = instr->i_opcode;
184 return opcode == SETUP_FINALLY || opcode == SETUP_WITH || opcode == SETUP_CLEANUP;
185 }
186
187 static inline int
is_jump(struct instr * i)188 is_jump(struct instr *i)
189 {
190 return IS_VIRTUAL_JUMP_OPCODE(i->i_opcode) ||
191 is_bit_set_in_table(_PyOpcode_Jump, i->i_opcode);
192 }
193
194 static int
instr_size(struct instr * instruction)195 instr_size(struct instr *instruction)
196 {
197 int opcode = instruction->i_opcode;
198 assert(!IS_VIRTUAL_OPCODE(opcode));
199 int oparg = HAS_ARG(opcode) ? instruction->i_oparg : 0;
200 int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg);
201 int caches = _PyOpcode_Caches[opcode];
202 return extended_args + 1 + caches;
203 }
204
205 static void
write_instr(_Py_CODEUNIT * codestr,struct instr * instruction,int ilen)206 write_instr(_Py_CODEUNIT *codestr, struct instr *instruction, int ilen)
207 {
208 int opcode = instruction->i_opcode;
209 assert(!IS_VIRTUAL_OPCODE(opcode));
210 int oparg = HAS_ARG(opcode) ? instruction->i_oparg : 0;
211 int caches = _PyOpcode_Caches[opcode];
212 switch (ilen - caches) {
213 case 4:
214 *codestr++ = _Py_MAKECODEUNIT(EXTENDED_ARG, (oparg >> 24) & 0xFF);
215 /* fall through */
216 case 3:
217 *codestr++ = _Py_MAKECODEUNIT(EXTENDED_ARG, (oparg >> 16) & 0xFF);
218 /* fall through */
219 case 2:
220 *codestr++ = _Py_MAKECODEUNIT(EXTENDED_ARG, (oparg >> 8) & 0xFF);
221 /* fall through */
222 case 1:
223 *codestr++ = _Py_MAKECODEUNIT(opcode, oparg & 0xFF);
224 break;
225 default:
226 Py_UNREACHABLE();
227 }
228 while (caches--) {
229 *codestr++ = _Py_MAKECODEUNIT(CACHE, 0);
230 }
231 }
232
233 typedef struct basicblock_ {
234 /* Each basicblock in a compilation unit is linked via b_list in the
235 reverse order that the block are allocated. b_list points to the next
236 block, not to be confused with b_next, which is next by control flow. */
237 struct basicblock_ *b_list;
238 /* Exception stack at start of block, used by assembler to create the exception handling table */
239 ExceptStack *b_exceptstack;
240 /* pointer to an array of instructions, initially NULL */
241 struct instr *b_instr;
242 /* If b_next is non-NULL, it is a pointer to the next
243 block reached by normal control flow. */
244 struct basicblock_ *b_next;
245 /* number of instructions used */
246 int b_iused;
247 /* length of instruction array (b_instr) */
248 int b_ialloc;
249 /* Number of predecssors that a block has. */
250 int b_predecessors;
251 /* depth of stack upon entry of block, computed by stackdepth() */
252 int b_startdepth;
253 /* instruction offset for block, computed by assemble_jump_offsets() */
254 int b_offset;
255 /* Basic block has no fall through (it ends with a return, raise or jump) */
256 unsigned b_nofallthrough : 1;
257 /* Basic block is an exception handler that preserves lasti */
258 unsigned b_preserve_lasti : 1;
259 /* Used by compiler passes to mark whether they have visited a basic block. */
260 unsigned b_visited : 1;
261 /* Basic block exits scope (it ends with a return or raise) */
262 unsigned b_exit : 1;
263 /* b_return is true if a RETURN_VALUE opcode is inserted. */
264 unsigned b_return : 1;
265 } basicblock;
266
267 /* fblockinfo tracks the current frame block.
268
269 A frame block is used to handle loops, try/except, and try/finally.
270 It's called a frame block to distinguish it from a basic block in the
271 compiler IR.
272 */
273
274 enum fblocktype { WHILE_LOOP, FOR_LOOP, TRY_EXCEPT, FINALLY_TRY, FINALLY_END,
275 WITH, ASYNC_WITH, HANDLER_CLEANUP, POP_VALUE, EXCEPTION_HANDLER,
276 EXCEPTION_GROUP_HANDLER, ASYNC_COMPREHENSION_GENERATOR };
277
278 struct fblockinfo {
279 enum fblocktype fb_type;
280 basicblock *fb_block;
281 /* (optional) type-specific exit or cleanup block */
282 basicblock *fb_exit;
283 /* (optional) additional information required for unwinding */
284 void *fb_datum;
285 };
286
287 enum {
288 COMPILER_SCOPE_MODULE,
289 COMPILER_SCOPE_CLASS,
290 COMPILER_SCOPE_FUNCTION,
291 COMPILER_SCOPE_ASYNC_FUNCTION,
292 COMPILER_SCOPE_LAMBDA,
293 COMPILER_SCOPE_COMPREHENSION,
294 };
295
296 /* The following items change on entry and exit of code blocks.
297 They must be saved and restored when returning to a block.
298 */
299 struct compiler_unit {
300 PySTEntryObject *u_ste;
301
302 PyObject *u_name;
303 PyObject *u_qualname; /* dot-separated qualified name (lazy) */
304 int u_scope_type;
305
306 /* The following fields are dicts that map objects to
307 the index of them in co_XXX. The index is used as
308 the argument for opcodes that refer to those collections.
309 */
310 PyObject *u_consts; /* all constants */
311 PyObject *u_names; /* all names */
312 PyObject *u_varnames; /* local variables */
313 PyObject *u_cellvars; /* cell variables */
314 PyObject *u_freevars; /* free variables */
315
316 PyObject *u_private; /* for private name mangling */
317
318 Py_ssize_t u_argcount; /* number of arguments for block */
319 Py_ssize_t u_posonlyargcount; /* number of positional only arguments for block */
320 Py_ssize_t u_kwonlyargcount; /* number of keyword only arguments for block */
321 /* Pointer to the most recently allocated block. By following b_list
322 members, you can reach all early allocated blocks. */
323 basicblock *u_blocks;
324 basicblock *u_curblock; /* pointer to current block */
325
326 int u_nfblocks;
327 struct fblockinfo u_fblock[CO_MAXBLOCKS];
328
329 int u_firstlineno; /* the first lineno of the block */
330 int u_lineno; /* the lineno for the current stmt */
331 int u_col_offset; /* the offset of the current stmt */
332 int u_end_lineno; /* the end line of the current stmt */
333 int u_end_col_offset; /* the end offset of the current stmt */
334
335 /* true if we need to create an implicit basicblock before the next instr */
336 int u_need_new_implicit_block;
337 };
338
339 /* This struct captures the global state of a compilation.
340
341 The u pointer points to the current compilation unit, while units
342 for enclosing blocks are stored in c_stack. The u and c_stack are
343 managed by compiler_enter_scope() and compiler_exit_scope().
344
345 Note that we don't track recursion levels during compilation - the
346 task of detecting and rejecting excessive levels of nesting is
347 handled by the symbol analysis pass.
348
349 */
350
351 struct compiler {
352 PyObject *c_filename;
353 struct symtable *c_st;
354 PyFutureFeatures *c_future; /* pointer to module's __future__ */
355 PyCompilerFlags *c_flags;
356
357 int c_optimize; /* optimization level */
358 int c_interactive; /* true if in interactive mode */
359 int c_nestlevel;
360 PyObject *c_const_cache; /* Python dict holding all constants,
361 including names tuple */
362 struct compiler_unit *u; /* compiler state for current block */
363 PyObject *c_stack; /* Python list holding compiler_unit ptrs */
364 PyArena *c_arena; /* pointer to memory allocation arena */
365 };
366
367 typedef struct {
368 // A list of strings corresponding to name captures. It is used to track:
369 // - Repeated name assignments in the same pattern.
370 // - Different name assignments in alternatives.
371 // - The order of name assignments in alternatives.
372 PyObject *stores;
373 // If 0, any name captures against our subject will raise.
374 int allow_irrefutable;
375 // An array of blocks to jump to on failure. Jumping to fail_pop[i] will pop
376 // i items off of the stack. The end result looks like this (with each block
377 // falling through to the next):
378 // fail_pop[4]: POP_TOP
379 // fail_pop[3]: POP_TOP
380 // fail_pop[2]: POP_TOP
381 // fail_pop[1]: POP_TOP
382 // fail_pop[0]: NOP
383 basicblock **fail_pop;
384 // The current length of fail_pop.
385 Py_ssize_t fail_pop_size;
386 // The number of items on top of the stack that need to *stay* on top of the
387 // stack. Variable captures go beneath these. All of them will be popped on
388 // failure.
389 Py_ssize_t on_top;
390 } pattern_context;
391
392 static int compiler_enter_scope(struct compiler *, identifier, int, void *, int);
393 static void compiler_free(struct compiler *);
394 static basicblock *compiler_new_block(struct compiler *);
395 static int compiler_next_instr(basicblock *);
396 static int compiler_addop(struct compiler *, int);
397 static int compiler_addop_i(struct compiler *, int, Py_ssize_t);
398 static int compiler_addop_j(struct compiler *, int, basicblock *);
399 static int compiler_addop_j_noline(struct compiler *, int, basicblock *);
400 static int compiler_error(struct compiler *, const char *, ...);
401 static int compiler_warn(struct compiler *, const char *, ...);
402 static int compiler_nameop(struct compiler *, identifier, expr_context_ty);
403
404 static PyCodeObject *compiler_mod(struct compiler *, mod_ty);
405 static int compiler_visit_stmt(struct compiler *, stmt_ty);
406 static int compiler_visit_keyword(struct compiler *, keyword_ty);
407 static int compiler_visit_expr(struct compiler *, expr_ty);
408 static int compiler_augassign(struct compiler *, stmt_ty);
409 static int compiler_annassign(struct compiler *, stmt_ty);
410 static int compiler_subscript(struct compiler *, expr_ty);
411 static int compiler_slice(struct compiler *, expr_ty);
412
413 static int are_all_items_const(asdl_expr_seq *, Py_ssize_t, Py_ssize_t);
414
415
416 static int compiler_with(struct compiler *, stmt_ty, int);
417 static int compiler_async_with(struct compiler *, stmt_ty, int);
418 static int compiler_async_for(struct compiler *, stmt_ty);
419 static int validate_keywords(struct compiler *c, asdl_keyword_seq *keywords);
420 static int compiler_call_simple_kw_helper(struct compiler *c,
421 asdl_keyword_seq *keywords,
422 Py_ssize_t nkwelts);
423 static int compiler_call_helper(struct compiler *c, int n,
424 asdl_expr_seq *args,
425 asdl_keyword_seq *keywords);
426 static int compiler_try_except(struct compiler *, stmt_ty);
427 static int compiler_try_star_except(struct compiler *, stmt_ty);
428 static int compiler_set_qualname(struct compiler *);
429
430 static int compiler_sync_comprehension_generator(
431 struct compiler *c,
432 asdl_comprehension_seq *generators, int gen_index,
433 int depth,
434 expr_ty elt, expr_ty val, int type);
435
436 static int compiler_async_comprehension_generator(
437 struct compiler *c,
438 asdl_comprehension_seq *generators, int gen_index,
439 int depth,
440 expr_ty elt, expr_ty val, int type);
441
442 static int compiler_pattern(struct compiler *, pattern_ty, pattern_context *);
443 static int compiler_match(struct compiler *, stmt_ty);
444 static int compiler_pattern_subpattern(struct compiler *, pattern_ty,
445 pattern_context *);
446
447 static void clean_basic_block(basicblock *bb);
448
449 static PyCodeObject *assemble(struct compiler *, int addNone);
450
451 #define CAPSULE_NAME "compile.c compiler unit"
452
453 PyObject *
_Py_Mangle(PyObject * privateobj,PyObject * ident)454 _Py_Mangle(PyObject *privateobj, PyObject *ident)
455 {
456 /* Name mangling: __private becomes _classname__private.
457 This is independent from how the name is used. */
458 PyObject *result;
459 size_t nlen, plen, ipriv;
460 Py_UCS4 maxchar;
461 if (privateobj == NULL || !PyUnicode_Check(privateobj) ||
462 PyUnicode_READ_CHAR(ident, 0) != '_' ||
463 PyUnicode_READ_CHAR(ident, 1) != '_') {
464 Py_INCREF(ident);
465 return ident;
466 }
467 nlen = PyUnicode_GET_LENGTH(ident);
468 plen = PyUnicode_GET_LENGTH(privateobj);
469 /* Don't mangle __id__ or names with dots.
470
471 The only time a name with a dot can occur is when
472 we are compiling an import statement that has a
473 package name.
474
475 TODO(jhylton): Decide whether we want to support
476 mangling of the module name, e.g. __M.X.
477 */
478 if ((PyUnicode_READ_CHAR(ident, nlen-1) == '_' &&
479 PyUnicode_READ_CHAR(ident, nlen-2) == '_') ||
480 PyUnicode_FindChar(ident, '.', 0, nlen, 1) != -1) {
481 Py_INCREF(ident);
482 return ident; /* Don't mangle __whatever__ */
483 }
484 /* Strip leading underscores from class name */
485 ipriv = 0;
486 while (PyUnicode_READ_CHAR(privateobj, ipriv) == '_')
487 ipriv++;
488 if (ipriv == plen) {
489 Py_INCREF(ident);
490 return ident; /* Don't mangle if class is just underscores */
491 }
492 plen -= ipriv;
493
494 if (plen + nlen >= PY_SSIZE_T_MAX - 1) {
495 PyErr_SetString(PyExc_OverflowError,
496 "private identifier too large to be mangled");
497 return NULL;
498 }
499
500 maxchar = PyUnicode_MAX_CHAR_VALUE(ident);
501 if (PyUnicode_MAX_CHAR_VALUE(privateobj) > maxchar)
502 maxchar = PyUnicode_MAX_CHAR_VALUE(privateobj);
503
504 result = PyUnicode_New(1 + nlen + plen, maxchar);
505 if (!result)
506 return 0;
507 /* ident = "_" + priv[ipriv:] + ident # i.e. 1+plen+nlen bytes */
508 PyUnicode_WRITE(PyUnicode_KIND(result), PyUnicode_DATA(result), 0, '_');
509 if (PyUnicode_CopyCharacters(result, 1, privateobj, ipriv, plen) < 0) {
510 Py_DECREF(result);
511 return NULL;
512 }
513 if (PyUnicode_CopyCharacters(result, plen+1, ident, 0, nlen) < 0) {
514 Py_DECREF(result);
515 return NULL;
516 }
517 assert(_PyUnicode_CheckConsistency(result, 1));
518 return result;
519 }
520
521 static int
compiler_init(struct compiler * c)522 compiler_init(struct compiler *c)
523 {
524 memset(c, 0, sizeof(struct compiler));
525
526 c->c_const_cache = PyDict_New();
527 if (!c->c_const_cache) {
528 return 0;
529 }
530
531 c->c_stack = PyList_New(0);
532 if (!c->c_stack) {
533 Py_CLEAR(c->c_const_cache);
534 return 0;
535 }
536
537 return 1;
538 }
539
540 PyCodeObject *
_PyAST_Compile(mod_ty mod,PyObject * filename,PyCompilerFlags * flags,int optimize,PyArena * arena)541 _PyAST_Compile(mod_ty mod, PyObject *filename, PyCompilerFlags *flags,
542 int optimize, PyArena *arena)
543 {
544 struct compiler c;
545 PyCodeObject *co = NULL;
546 PyCompilerFlags local_flags = _PyCompilerFlags_INIT;
547 int merged;
548 if (!compiler_init(&c))
549 return NULL;
550 Py_INCREF(filename);
551 c.c_filename = filename;
552 c.c_arena = arena;
553 c.c_future = _PyFuture_FromAST(mod, filename);
554 if (c.c_future == NULL)
555 goto finally;
556 if (!flags) {
557 flags = &local_flags;
558 }
559 merged = c.c_future->ff_features | flags->cf_flags;
560 c.c_future->ff_features = merged;
561 flags->cf_flags = merged;
562 c.c_flags = flags;
563 c.c_optimize = (optimize == -1) ? _Py_GetConfig()->optimization_level : optimize;
564 c.c_nestlevel = 0;
565
566 _PyASTOptimizeState state;
567 state.optimize = c.c_optimize;
568 state.ff_features = merged;
569
570 if (!_PyAST_Optimize(mod, arena, &state)) {
571 goto finally;
572 }
573
574 c.c_st = _PySymtable_Build(mod, filename, c.c_future);
575 if (c.c_st == NULL) {
576 if (!PyErr_Occurred())
577 PyErr_SetString(PyExc_SystemError, "no symtable");
578 goto finally;
579 }
580
581 co = compiler_mod(&c, mod);
582
583 finally:
584 compiler_free(&c);
585 assert(co || PyErr_Occurred());
586 return co;
587 }
588
589 static void
compiler_free(struct compiler * c)590 compiler_free(struct compiler *c)
591 {
592 if (c->c_st)
593 _PySymtable_Free(c->c_st);
594 if (c->c_future)
595 PyObject_Free(c->c_future);
596 Py_XDECREF(c->c_filename);
597 Py_DECREF(c->c_const_cache);
598 Py_DECREF(c->c_stack);
599 }
600
601 static PyObject *
list2dict(PyObject * list)602 list2dict(PyObject *list)
603 {
604 Py_ssize_t i, n;
605 PyObject *v, *k;
606 PyObject *dict = PyDict_New();
607 if (!dict) return NULL;
608
609 n = PyList_Size(list);
610 for (i = 0; i < n; i++) {
611 v = PyLong_FromSsize_t(i);
612 if (!v) {
613 Py_DECREF(dict);
614 return NULL;
615 }
616 k = PyList_GET_ITEM(list, i);
617 if (PyDict_SetItem(dict, k, v) < 0) {
618 Py_DECREF(v);
619 Py_DECREF(dict);
620 return NULL;
621 }
622 Py_DECREF(v);
623 }
624 return dict;
625 }
626
627 /* Return new dict containing names from src that match scope(s).
628
629 src is a symbol table dictionary. If the scope of a name matches
630 either scope_type or flag is set, insert it into the new dict. The
631 values are integers, starting at offset and increasing by one for
632 each key.
633 */
634
635 static PyObject *
dictbytype(PyObject * src,int scope_type,int flag,Py_ssize_t offset)636 dictbytype(PyObject *src, int scope_type, int flag, Py_ssize_t offset)
637 {
638 Py_ssize_t i = offset, scope, num_keys, key_i;
639 PyObject *k, *v, *dest = PyDict_New();
640 PyObject *sorted_keys;
641
642 assert(offset >= 0);
643 if (dest == NULL)
644 return NULL;
645
646 /* Sort the keys so that we have a deterministic order on the indexes
647 saved in the returned dictionary. These indexes are used as indexes
648 into the free and cell var storage. Therefore if they aren't
649 deterministic, then the generated bytecode is not deterministic.
650 */
651 sorted_keys = PyDict_Keys(src);
652 if (sorted_keys == NULL)
653 return NULL;
654 if (PyList_Sort(sorted_keys) != 0) {
655 Py_DECREF(sorted_keys);
656 return NULL;
657 }
658 num_keys = PyList_GET_SIZE(sorted_keys);
659
660 for (key_i = 0; key_i < num_keys; key_i++) {
661 /* XXX this should probably be a macro in symtable.h */
662 long vi;
663 k = PyList_GET_ITEM(sorted_keys, key_i);
664 v = PyDict_GetItemWithError(src, k);
665 assert(v && PyLong_Check(v));
666 vi = PyLong_AS_LONG(v);
667 scope = (vi >> SCOPE_OFFSET) & SCOPE_MASK;
668
669 if (scope == scope_type || vi & flag) {
670 PyObject *item = PyLong_FromSsize_t(i);
671 if (item == NULL) {
672 Py_DECREF(sorted_keys);
673 Py_DECREF(dest);
674 return NULL;
675 }
676 i++;
677 if (PyDict_SetItem(dest, k, item) < 0) {
678 Py_DECREF(sorted_keys);
679 Py_DECREF(item);
680 Py_DECREF(dest);
681 return NULL;
682 }
683 Py_DECREF(item);
684 }
685 }
686 Py_DECREF(sorted_keys);
687 return dest;
688 }
689
690 static void
compiler_unit_check(struct compiler_unit * u)691 compiler_unit_check(struct compiler_unit *u)
692 {
693 basicblock *block;
694 for (block = u->u_blocks; block != NULL; block = block->b_list) {
695 assert(!_PyMem_IsPtrFreed(block));
696 if (block->b_instr != NULL) {
697 assert(block->b_ialloc > 0);
698 assert(block->b_iused >= 0);
699 assert(block->b_ialloc >= block->b_iused);
700 }
701 else {
702 assert (block->b_iused == 0);
703 assert (block->b_ialloc == 0);
704 }
705 }
706 }
707
708 static void
compiler_unit_free(struct compiler_unit * u)709 compiler_unit_free(struct compiler_unit *u)
710 {
711 basicblock *b, *next;
712
713 compiler_unit_check(u);
714 b = u->u_blocks;
715 while (b != NULL) {
716 if (b->b_instr)
717 PyObject_Free((void *)b->b_instr);
718 next = b->b_list;
719 PyObject_Free((void *)b);
720 b = next;
721 }
722 Py_CLEAR(u->u_ste);
723 Py_CLEAR(u->u_name);
724 Py_CLEAR(u->u_qualname);
725 Py_CLEAR(u->u_consts);
726 Py_CLEAR(u->u_names);
727 Py_CLEAR(u->u_varnames);
728 Py_CLEAR(u->u_freevars);
729 Py_CLEAR(u->u_cellvars);
730 Py_CLEAR(u->u_private);
731 PyObject_Free(u);
732 }
733
734 static int
compiler_set_qualname(struct compiler * c)735 compiler_set_qualname(struct compiler *c)
736 {
737 Py_ssize_t stack_size;
738 struct compiler_unit *u = c->u;
739 PyObject *name, *base;
740
741 base = NULL;
742 stack_size = PyList_GET_SIZE(c->c_stack);
743 assert(stack_size >= 1);
744 if (stack_size > 1) {
745 int scope, force_global = 0;
746 struct compiler_unit *parent;
747 PyObject *mangled, *capsule;
748
749 capsule = PyList_GET_ITEM(c->c_stack, stack_size - 1);
750 parent = (struct compiler_unit *)PyCapsule_GetPointer(capsule, CAPSULE_NAME);
751 assert(parent);
752
753 if (u->u_scope_type == COMPILER_SCOPE_FUNCTION
754 || u->u_scope_type == COMPILER_SCOPE_ASYNC_FUNCTION
755 || u->u_scope_type == COMPILER_SCOPE_CLASS) {
756 assert(u->u_name);
757 mangled = _Py_Mangle(parent->u_private, u->u_name);
758 if (!mangled)
759 return 0;
760 scope = _PyST_GetScope(parent->u_ste, mangled);
761 Py_DECREF(mangled);
762 assert(scope != GLOBAL_IMPLICIT);
763 if (scope == GLOBAL_EXPLICIT)
764 force_global = 1;
765 }
766
767 if (!force_global) {
768 if (parent->u_scope_type == COMPILER_SCOPE_FUNCTION
769 || parent->u_scope_type == COMPILER_SCOPE_ASYNC_FUNCTION
770 || parent->u_scope_type == COMPILER_SCOPE_LAMBDA)
771 {
772 _Py_DECLARE_STR(dot_locals, ".<locals>");
773 base = PyUnicode_Concat(parent->u_qualname,
774 &_Py_STR(dot_locals));
775 if (base == NULL)
776 return 0;
777 }
778 else {
779 Py_INCREF(parent->u_qualname);
780 base = parent->u_qualname;
781 }
782 }
783 }
784
785 if (base != NULL) {
786 _Py_DECLARE_STR(dot, ".");
787 name = PyUnicode_Concat(base, &_Py_STR(dot));
788 Py_DECREF(base);
789 if (name == NULL)
790 return 0;
791 PyUnicode_Append(&name, u->u_name);
792 if (name == NULL)
793 return 0;
794 }
795 else {
796 Py_INCREF(u->u_name);
797 name = u->u_name;
798 }
799 u->u_qualname = name;
800
801 return 1;
802 }
803
804
805 /* Allocate a new block and return a pointer to it.
806 Returns NULL on error.
807 */
808
809 static basicblock *
compiler_new_block(struct compiler * c)810 compiler_new_block(struct compiler *c)
811 {
812 basicblock *b;
813 struct compiler_unit *u;
814
815 u = c->u;
816 b = (basicblock *)PyObject_Calloc(1, sizeof(basicblock));
817 if (b == NULL) {
818 PyErr_NoMemory();
819 return NULL;
820 }
821 /* Extend the singly linked list of blocks with new block. */
822 b->b_list = u->u_blocks;
823 u->u_blocks = b;
824 return b;
825 }
826
827 static basicblock *
compiler_use_next_block(struct compiler * c,basicblock * block)828 compiler_use_next_block(struct compiler *c, basicblock *block)
829 {
830 assert(block != NULL);
831 c->u->u_curblock->b_next = block;
832 c->u->u_curblock = block;
833 c->u->u_need_new_implicit_block = 0;
834 return block;
835 }
836
837 static basicblock *
compiler_copy_block(struct compiler * c,basicblock * block)838 compiler_copy_block(struct compiler *c, basicblock *block)
839 {
840 /* Cannot copy a block if it has a fallthrough, since
841 * a block can only have one fallthrough predecessor.
842 */
843 assert(block->b_nofallthrough);
844 basicblock *result = compiler_new_block(c);
845 if (result == NULL) {
846 return NULL;
847 }
848 for (int i = 0; i < block->b_iused; i++) {
849 int n = compiler_next_instr(result);
850 if (n < 0) {
851 return NULL;
852 }
853 result->b_instr[n] = block->b_instr[i];
854 }
855 result->b_exit = block->b_exit;
856 result->b_nofallthrough = 1;
857 return result;
858 }
859
860 /* Returns the offset of the next instruction in the current block's
861 b_instr array. Resizes the b_instr as necessary.
862 Returns -1 on failure.
863 */
864
865 static int
compiler_next_instr(basicblock * b)866 compiler_next_instr(basicblock *b)
867 {
868 assert(b != NULL);
869 if (b->b_instr == NULL) {
870 b->b_instr = (struct instr *)PyObject_Calloc(
871 DEFAULT_BLOCK_SIZE, sizeof(struct instr));
872 if (b->b_instr == NULL) {
873 PyErr_NoMemory();
874 return -1;
875 }
876 b->b_ialloc = DEFAULT_BLOCK_SIZE;
877 }
878 else if (b->b_iused == b->b_ialloc) {
879 struct instr *tmp;
880 size_t oldsize, newsize;
881 oldsize = b->b_ialloc * sizeof(struct instr);
882 newsize = oldsize << 1;
883
884 if (oldsize > (SIZE_MAX >> 1)) {
885 PyErr_NoMemory();
886 return -1;
887 }
888
889 if (newsize == 0) {
890 PyErr_NoMemory();
891 return -1;
892 }
893 b->b_ialloc <<= 1;
894 tmp = (struct instr *)PyObject_Realloc(
895 (void *)b->b_instr, newsize);
896 if (tmp == NULL) {
897 PyErr_NoMemory();
898 return -1;
899 }
900 b->b_instr = tmp;
901 memset((char *)b->b_instr + oldsize, 0, newsize - oldsize);
902 }
903 return b->b_iused++;
904 }
905
906 /* Set the line number and column offset for the following instructions.
907
908 The line number is reset in the following cases:
909 - when entering a new scope
910 - on each statement
911 - on each expression and sub-expression
912 - before the "except" and "finally" clauses
913 */
914
915 #define SET_LOC(c, x) \
916 (c)->u->u_lineno = (x)->lineno; \
917 (c)->u->u_col_offset = (x)->col_offset; \
918 (c)->u->u_end_lineno = (x)->end_lineno; \
919 (c)->u->u_end_col_offset = (x)->end_col_offset;
920
921 // Artificial instructions
922 #define UNSET_LOC(c) \
923 (c)->u->u_lineno = -1; \
924 (c)->u->u_col_offset = -1; \
925 (c)->u->u_end_lineno = -1; \
926 (c)->u->u_end_col_offset = -1;
927
928 #define COPY_INSTR_LOC(old, new) \
929 (new).i_lineno = (old).i_lineno; \
930 (new).i_col_offset = (old).i_col_offset; \
931 (new).i_end_lineno = (old).i_end_lineno; \
932 (new).i_end_col_offset = (old).i_end_col_offset;
933
934 /* Return the stack effect of opcode with argument oparg.
935
936 Some opcodes have different stack effect when jump to the target and
937 when not jump. The 'jump' parameter specifies the case:
938
939 * 0 -- when not jump
940 * 1 -- when jump
941 * -1 -- maximal
942 */
943 static int
stack_effect(int opcode,int oparg,int jump)944 stack_effect(int opcode, int oparg, int jump)
945 {
946 switch (opcode) {
947 case NOP:
948 case EXTENDED_ARG:
949 case RESUME:
950 case CACHE:
951 return 0;
952
953 /* Stack manipulation */
954 case POP_TOP:
955 return -1;
956 case SWAP:
957 return 0;
958
959 /* Unary operators */
960 case UNARY_POSITIVE:
961 case UNARY_NEGATIVE:
962 case UNARY_NOT:
963 case UNARY_INVERT:
964 return 0;
965
966 case SET_ADD:
967 case LIST_APPEND:
968 return -1;
969 case MAP_ADD:
970 return -2;
971
972 case BINARY_SUBSCR:
973 return -1;
974 case STORE_SUBSCR:
975 return -3;
976 case DELETE_SUBSCR:
977 return -2;
978
979 case GET_ITER:
980 return 0;
981
982 case PRINT_EXPR:
983 return -1;
984 case LOAD_BUILD_CLASS:
985 return 1;
986
987 case RETURN_VALUE:
988 return -1;
989 case IMPORT_STAR:
990 return -1;
991 case SETUP_ANNOTATIONS:
992 return 0;
993 case ASYNC_GEN_WRAP:
994 case YIELD_VALUE:
995 return 0;
996 case POP_BLOCK:
997 return 0;
998 case POP_EXCEPT:
999 return -1;
1000
1001 case STORE_NAME:
1002 return -1;
1003 case DELETE_NAME:
1004 return 0;
1005 case UNPACK_SEQUENCE:
1006 return oparg-1;
1007 case UNPACK_EX:
1008 return (oparg&0xFF) + (oparg>>8);
1009 case FOR_ITER:
1010 /* -1 at end of iterator, 1 if continue iterating. */
1011 return jump > 0 ? -1 : 1;
1012 case SEND:
1013 return jump > 0 ? -1 : 0;
1014 case STORE_ATTR:
1015 return -2;
1016 case DELETE_ATTR:
1017 return -1;
1018 case STORE_GLOBAL:
1019 return -1;
1020 case DELETE_GLOBAL:
1021 return 0;
1022 case LOAD_CONST:
1023 return 1;
1024 case LOAD_NAME:
1025 return 1;
1026 case BUILD_TUPLE:
1027 case BUILD_LIST:
1028 case BUILD_SET:
1029 case BUILD_STRING:
1030 return 1-oparg;
1031 case BUILD_MAP:
1032 return 1 - 2*oparg;
1033 case BUILD_CONST_KEY_MAP:
1034 return -oparg;
1035 case LOAD_ATTR:
1036 return 0;
1037 case COMPARE_OP:
1038 case IS_OP:
1039 case CONTAINS_OP:
1040 return -1;
1041 case CHECK_EXC_MATCH:
1042 return 0;
1043 case CHECK_EG_MATCH:
1044 return 0;
1045 case IMPORT_NAME:
1046 return -1;
1047 case IMPORT_FROM:
1048 return 1;
1049
1050 /* Jumps */
1051 case JUMP_FORWARD:
1052 case JUMP_BACKWARD:
1053 case JUMP:
1054 case JUMP_BACKWARD_NO_INTERRUPT:
1055 case JUMP_NO_INTERRUPT:
1056 return 0;
1057
1058 case JUMP_IF_TRUE_OR_POP:
1059 case JUMP_IF_FALSE_OR_POP:
1060 return jump ? 0 : -1;
1061
1062 case POP_JUMP_BACKWARD_IF_NONE:
1063 case POP_JUMP_FORWARD_IF_NONE:
1064 case POP_JUMP_IF_NONE:
1065 case POP_JUMP_BACKWARD_IF_NOT_NONE:
1066 case POP_JUMP_FORWARD_IF_NOT_NONE:
1067 case POP_JUMP_IF_NOT_NONE:
1068 case POP_JUMP_FORWARD_IF_FALSE:
1069 case POP_JUMP_BACKWARD_IF_FALSE:
1070 case POP_JUMP_IF_FALSE:
1071 case POP_JUMP_FORWARD_IF_TRUE:
1072 case POP_JUMP_BACKWARD_IF_TRUE:
1073 case POP_JUMP_IF_TRUE:
1074 return -1;
1075
1076 case LOAD_GLOBAL:
1077 return (oparg & 1) + 1;
1078
1079 /* Exception handling pseudo-instructions */
1080 case SETUP_FINALLY:
1081 /* 0 in the normal flow.
1082 * Restore the stack position and push 1 value before jumping to
1083 * the handler if an exception be raised. */
1084 return jump ? 1 : 0;
1085 case SETUP_CLEANUP:
1086 /* As SETUP_FINALLY, but pushes lasti as well */
1087 return jump ? 2 : 0;
1088 case SETUP_WITH:
1089 /* 0 in the normal flow.
1090 * Restore the stack position to the position before the result
1091 * of __(a)enter__ and push 2 values before jumping to the handler
1092 * if an exception be raised. */
1093 return jump ? 1 : 0;
1094
1095 case PREP_RERAISE_STAR:
1096 return -1;
1097 case RERAISE:
1098 return -1;
1099 case PUSH_EXC_INFO:
1100 return 1;
1101
1102 case WITH_EXCEPT_START:
1103 return 1;
1104
1105 case LOAD_FAST:
1106 return 1;
1107 case STORE_FAST:
1108 return -1;
1109 case DELETE_FAST:
1110 return 0;
1111
1112 case RETURN_GENERATOR:
1113 return 0;
1114
1115 case RAISE_VARARGS:
1116 return -oparg;
1117
1118 /* Functions and calls */
1119 case PRECALL:
1120 return -oparg;
1121 case KW_NAMES:
1122 return 0;
1123 case CALL:
1124 return -1;
1125
1126 case CALL_FUNCTION_EX:
1127 return -2 - ((oparg & 0x01) != 0);
1128 case MAKE_FUNCTION:
1129 return 0 - ((oparg & 0x01) != 0) - ((oparg & 0x02) != 0) -
1130 ((oparg & 0x04) != 0) - ((oparg & 0x08) != 0);
1131 case BUILD_SLICE:
1132 if (oparg == 3)
1133 return -2;
1134 else
1135 return -1;
1136
1137 /* Closures */
1138 case MAKE_CELL:
1139 case COPY_FREE_VARS:
1140 return 0;
1141 case LOAD_CLOSURE:
1142 return 1;
1143 case LOAD_DEREF:
1144 case LOAD_CLASSDEREF:
1145 return 1;
1146 case STORE_DEREF:
1147 return -1;
1148 case DELETE_DEREF:
1149 return 0;
1150
1151 /* Iterators and generators */
1152 case GET_AWAITABLE:
1153 return 0;
1154
1155 case BEFORE_ASYNC_WITH:
1156 case BEFORE_WITH:
1157 return 1;
1158 case GET_AITER:
1159 return 0;
1160 case GET_ANEXT:
1161 return 1;
1162 case GET_YIELD_FROM_ITER:
1163 return 0;
1164 case END_ASYNC_FOR:
1165 return -2;
1166 case FORMAT_VALUE:
1167 /* If there's a fmt_spec on the stack, we go from 2->1,
1168 else 1->1. */
1169 return (oparg & FVS_MASK) == FVS_HAVE_SPEC ? -1 : 0;
1170 case LOAD_METHOD:
1171 return 1;
1172 case LOAD_ASSERTION_ERROR:
1173 return 1;
1174 case LIST_TO_TUPLE:
1175 return 0;
1176 case LIST_EXTEND:
1177 case SET_UPDATE:
1178 case DICT_MERGE:
1179 case DICT_UPDATE:
1180 return -1;
1181 case MATCH_CLASS:
1182 return -2;
1183 case GET_LEN:
1184 case MATCH_MAPPING:
1185 case MATCH_SEQUENCE:
1186 case MATCH_KEYS:
1187 return 1;
1188 case COPY:
1189 case PUSH_NULL:
1190 return 1;
1191 case BINARY_OP:
1192 return -1;
1193 default:
1194 return PY_INVALID_STACK_EFFECT;
1195 }
1196 return PY_INVALID_STACK_EFFECT; /* not reachable */
1197 }
1198
1199 int
PyCompile_OpcodeStackEffectWithJump(int opcode,int oparg,int jump)1200 PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
1201 {
1202 return stack_effect(opcode, oparg, jump);
1203 }
1204
1205 int
PyCompile_OpcodeStackEffect(int opcode,int oparg)1206 PyCompile_OpcodeStackEffect(int opcode, int oparg)
1207 {
1208 return stack_effect(opcode, oparg, -1);
1209 }
1210
is_end_of_basic_block(struct instr * instr)1211 static int is_end_of_basic_block(struct instr *instr)
1212 {
1213 int opcode = instr->i_opcode;
1214
1215 return is_jump(instr) ||
1216 opcode == RETURN_VALUE ||
1217 opcode == RAISE_VARARGS ||
1218 opcode == RERAISE;
1219 }
1220
1221 static int
compiler_use_new_implicit_block_if_needed(struct compiler * c)1222 compiler_use_new_implicit_block_if_needed(struct compiler *c)
1223 {
1224 if (c->u->u_need_new_implicit_block) {
1225 basicblock *b = compiler_new_block(c);
1226 if (b == NULL) {
1227 return -1;
1228 }
1229 compiler_use_next_block(c, b);
1230 }
1231 return 0;
1232 }
1233
1234 static void
compiler_check_if_end_of_block(struct compiler * c,struct instr * instr)1235 compiler_check_if_end_of_block(struct compiler *c, struct instr *instr)
1236 {
1237 if (is_end_of_basic_block(instr)) {
1238 c->u->u_need_new_implicit_block = 1;
1239 }
1240 }
1241
1242 /* Add an opcode with no argument.
1243 Returns 0 on failure, 1 on success.
1244 */
1245
1246 static int
compiler_addop_line(struct compiler * c,int opcode,int line,int end_line,int col_offset,int end_col_offset)1247 compiler_addop_line(struct compiler *c, int opcode, int line,
1248 int end_line, int col_offset, int end_col_offset)
1249 {
1250 assert(IS_WITHIN_OPCODE_RANGE(opcode));
1251 assert(!IS_ASSEMBLER_OPCODE(opcode));
1252 assert(!HAS_ARG(opcode) || IS_ARTIFICIAL(opcode));
1253
1254 if (compiler_use_new_implicit_block_if_needed(c) < 0) {
1255 return -1;
1256 }
1257
1258 basicblock *b = c->u->u_curblock;
1259 int off = compiler_next_instr(b);
1260 if (off < 0) {
1261 return 0;
1262 }
1263 struct instr *i = &b->b_instr[off];
1264 i->i_opcode = opcode;
1265 i->i_oparg = 0;
1266 if (opcode == RETURN_VALUE) {
1267 b->b_return = 1;
1268 }
1269 i->i_lineno = line;
1270 i->i_end_lineno = end_line;
1271 i->i_col_offset = col_offset;
1272 i->i_end_col_offset = end_col_offset;
1273
1274 compiler_check_if_end_of_block(c, i);
1275 return 1;
1276 }
1277
1278 static int
compiler_addop(struct compiler * c,int opcode)1279 compiler_addop(struct compiler *c, int opcode)
1280 {
1281 return compiler_addop_line(c, opcode, c->u->u_lineno, c->u->u_end_lineno,
1282 c->u->u_col_offset, c->u->u_end_col_offset);
1283 }
1284
1285 static int
compiler_addop_noline(struct compiler * c,int opcode)1286 compiler_addop_noline(struct compiler *c, int opcode)
1287 {
1288 return compiler_addop_line(c, opcode, -1, 0, 0, 0);
1289 }
1290
1291
1292 static Py_ssize_t
compiler_add_o(PyObject * dict,PyObject * o)1293 compiler_add_o(PyObject *dict, PyObject *o)
1294 {
1295 PyObject *v;
1296 Py_ssize_t arg;
1297
1298 v = PyDict_GetItemWithError(dict, o);
1299 if (!v) {
1300 if (PyErr_Occurred()) {
1301 return -1;
1302 }
1303 arg = PyDict_GET_SIZE(dict);
1304 v = PyLong_FromSsize_t(arg);
1305 if (!v) {
1306 return -1;
1307 }
1308 if (PyDict_SetItem(dict, o, v) < 0) {
1309 Py_DECREF(v);
1310 return -1;
1311 }
1312 Py_DECREF(v);
1313 }
1314 else
1315 arg = PyLong_AsLong(v);
1316 return arg;
1317 }
1318
1319 // Merge const *o* recursively and return constant key object.
1320 static PyObject*
merge_consts_recursive(struct compiler * c,PyObject * o)1321 merge_consts_recursive(struct compiler *c, PyObject *o)
1322 {
1323 // None and Ellipsis are singleton, and key is the singleton.
1324 // No need to merge object and key.
1325 if (o == Py_None || o == Py_Ellipsis) {
1326 Py_INCREF(o);
1327 return o;
1328 }
1329
1330 PyObject *key = _PyCode_ConstantKey(o);
1331 if (key == NULL) {
1332 return NULL;
1333 }
1334
1335 // t is borrowed reference
1336 PyObject *t = PyDict_SetDefault(c->c_const_cache, key, key);
1337 if (t != key) {
1338 // o is registered in c_const_cache. Just use it.
1339 Py_XINCREF(t);
1340 Py_DECREF(key);
1341 return t;
1342 }
1343
1344 // We registered o in c_const_cache.
1345 // When o is a tuple or frozenset, we want to merge its
1346 // items too.
1347 if (PyTuple_CheckExact(o)) {
1348 Py_ssize_t len = PyTuple_GET_SIZE(o);
1349 for (Py_ssize_t i = 0; i < len; i++) {
1350 PyObject *item = PyTuple_GET_ITEM(o, i);
1351 PyObject *u = merge_consts_recursive(c, item);
1352 if (u == NULL) {
1353 Py_DECREF(key);
1354 return NULL;
1355 }
1356
1357 // See _PyCode_ConstantKey()
1358 PyObject *v; // borrowed
1359 if (PyTuple_CheckExact(u)) {
1360 v = PyTuple_GET_ITEM(u, 1);
1361 }
1362 else {
1363 v = u;
1364 }
1365 if (v != item) {
1366 Py_INCREF(v);
1367 PyTuple_SET_ITEM(o, i, v);
1368 Py_DECREF(item);
1369 }
1370
1371 Py_DECREF(u);
1372 }
1373 }
1374 else if (PyFrozenSet_CheckExact(o)) {
1375 // *key* is tuple. And its first item is frozenset of
1376 // constant keys.
1377 // See _PyCode_ConstantKey() for detail.
1378 assert(PyTuple_CheckExact(key));
1379 assert(PyTuple_GET_SIZE(key) == 2);
1380
1381 Py_ssize_t len = PySet_GET_SIZE(o);
1382 if (len == 0) { // empty frozenset should not be re-created.
1383 return key;
1384 }
1385 PyObject *tuple = PyTuple_New(len);
1386 if (tuple == NULL) {
1387 Py_DECREF(key);
1388 return NULL;
1389 }
1390 Py_ssize_t i = 0, pos = 0;
1391 PyObject *item;
1392 Py_hash_t hash;
1393 while (_PySet_NextEntry(o, &pos, &item, &hash)) {
1394 PyObject *k = merge_consts_recursive(c, item);
1395 if (k == NULL) {
1396 Py_DECREF(tuple);
1397 Py_DECREF(key);
1398 return NULL;
1399 }
1400 PyObject *u;
1401 if (PyTuple_CheckExact(k)) {
1402 u = PyTuple_GET_ITEM(k, 1);
1403 Py_INCREF(u);
1404 Py_DECREF(k);
1405 }
1406 else {
1407 u = k;
1408 }
1409 PyTuple_SET_ITEM(tuple, i, u); // Steals reference of u.
1410 i++;
1411 }
1412
1413 // Instead of rewriting o, we create new frozenset and embed in the
1414 // key tuple. Caller should get merged frozenset from the key tuple.
1415 PyObject *new = PyFrozenSet_New(tuple);
1416 Py_DECREF(tuple);
1417 if (new == NULL) {
1418 Py_DECREF(key);
1419 return NULL;
1420 }
1421 assert(PyTuple_GET_ITEM(key, 1) == o);
1422 Py_DECREF(o);
1423 PyTuple_SET_ITEM(key, 1, new);
1424 }
1425
1426 return key;
1427 }
1428
1429 static Py_ssize_t
compiler_add_const(struct compiler * c,PyObject * o)1430 compiler_add_const(struct compiler *c, PyObject *o)
1431 {
1432 PyObject *key = merge_consts_recursive(c, o);
1433 if (key == NULL) {
1434 return -1;
1435 }
1436
1437 Py_ssize_t arg = compiler_add_o(c->u->u_consts, key);
1438 Py_DECREF(key);
1439 return arg;
1440 }
1441
1442 static int
compiler_addop_load_const(struct compiler * c,PyObject * o)1443 compiler_addop_load_const(struct compiler *c, PyObject *o)
1444 {
1445 Py_ssize_t arg = compiler_add_const(c, o);
1446 if (arg < 0)
1447 return 0;
1448 return compiler_addop_i(c, LOAD_CONST, arg);
1449 }
1450
1451 static int
compiler_addop_o(struct compiler * c,int opcode,PyObject * dict,PyObject * o)1452 compiler_addop_o(struct compiler *c, int opcode, PyObject *dict,
1453 PyObject *o)
1454 {
1455 Py_ssize_t arg = compiler_add_o(dict, o);
1456 if (arg < 0)
1457 return 0;
1458 return compiler_addop_i(c, opcode, arg);
1459 }
1460
1461 static int
compiler_addop_name(struct compiler * c,int opcode,PyObject * dict,PyObject * o)1462 compiler_addop_name(struct compiler *c, int opcode, PyObject *dict,
1463 PyObject *o)
1464 {
1465 Py_ssize_t arg;
1466
1467 PyObject *mangled = _Py_Mangle(c->u->u_private, o);
1468 if (!mangled)
1469 return 0;
1470 arg = compiler_add_o(dict, mangled);
1471 Py_DECREF(mangled);
1472 if (arg < 0)
1473 return 0;
1474 return compiler_addop_i(c, opcode, arg);
1475 }
1476
1477 /* Add an opcode with an integer argument.
1478 Returns 0 on failure, 1 on success.
1479 */
1480
1481 static int
compiler_addop_i_line(struct compiler * c,int opcode,Py_ssize_t oparg,int lineno,int end_lineno,int col_offset,int end_col_offset)1482 compiler_addop_i_line(struct compiler *c, int opcode, Py_ssize_t oparg,
1483 int lineno, int end_lineno,
1484 int col_offset, int end_col_offset)
1485 {
1486 /* oparg value is unsigned, but a signed C int is usually used to store
1487 it in the C code (like Python/ceval.c).
1488
1489 Limit to 32-bit signed C int (rather than INT_MAX) for portability.
1490
1491 The argument of a concrete bytecode instruction is limited to 8-bit.
1492 EXTENDED_ARG is used for 16, 24, and 32-bit arguments. */
1493
1494 assert(IS_WITHIN_OPCODE_RANGE(opcode));
1495 assert(!IS_ASSEMBLER_OPCODE(opcode));
1496 assert(HAS_ARG(opcode));
1497 assert(0 <= oparg && oparg <= 2147483647);
1498
1499 if (compiler_use_new_implicit_block_if_needed(c) < 0) {
1500 return -1;
1501 }
1502
1503 basicblock *b = c->u->u_curblock;
1504 int off = compiler_next_instr(b);
1505 if (off < 0) {
1506 return 0;
1507 }
1508 struct instr *i = &b->b_instr[off];
1509 i->i_opcode = opcode;
1510 i->i_oparg = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int);
1511 i->i_lineno = lineno;
1512 i->i_end_lineno = end_lineno;
1513 i->i_col_offset = col_offset;
1514 i->i_end_col_offset = end_col_offset;
1515
1516 compiler_check_if_end_of_block(c, i);
1517 return 1;
1518 }
1519
1520 static int
compiler_addop_i(struct compiler * c,int opcode,Py_ssize_t oparg)1521 compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg)
1522 {
1523 return compiler_addop_i_line(c, opcode, oparg,
1524 c->u->u_lineno, c->u->u_end_lineno,
1525 c->u->u_col_offset, c->u->u_end_col_offset);
1526 }
1527
1528 static int
compiler_addop_i_noline(struct compiler * c,int opcode,Py_ssize_t oparg)1529 compiler_addop_i_noline(struct compiler *c, int opcode, Py_ssize_t oparg)
1530 {
1531 return compiler_addop_i_line(c, opcode, oparg, -1, 0, 0, 0);
1532 }
1533
add_jump_to_block(struct compiler * c,int opcode,int lineno,int end_lineno,int col_offset,int end_col_offset,basicblock * target)1534 static int add_jump_to_block(struct compiler *c, int opcode,
1535 int lineno, int end_lineno,
1536 int col_offset, int end_col_offset,
1537 basicblock *target)
1538 {
1539 assert(IS_WITHIN_OPCODE_RANGE(opcode));
1540 assert(!IS_ASSEMBLER_OPCODE(opcode));
1541 assert(HAS_ARG(opcode) || IS_VIRTUAL_OPCODE(opcode));
1542 assert(target != NULL);
1543
1544 if (compiler_use_new_implicit_block_if_needed(c) < 0) {
1545 return -1;
1546 }
1547
1548 basicblock *b = c->u->u_curblock;
1549 int off = compiler_next_instr(b);
1550 struct instr *i = &b->b_instr[off];
1551 if (off < 0) {
1552 return 0;
1553 }
1554 i->i_opcode = opcode;
1555 i->i_target = target;
1556 i->i_lineno = lineno;
1557 i->i_end_lineno = end_lineno;
1558 i->i_col_offset = col_offset;
1559 i->i_end_col_offset = end_col_offset;
1560
1561 compiler_check_if_end_of_block(c, i);
1562 return 1;
1563 }
1564
1565 static int
compiler_addop_j(struct compiler * c,int opcode,basicblock * b)1566 compiler_addop_j(struct compiler *c, int opcode, basicblock *b)
1567 {
1568 return add_jump_to_block(c, opcode, c->u->u_lineno,
1569 c->u->u_end_lineno, c->u->u_col_offset,
1570 c->u->u_end_col_offset, b);
1571 }
1572
1573 static int
compiler_addop_j_noline(struct compiler * c,int opcode,basicblock * b)1574 compiler_addop_j_noline(struct compiler *c, int opcode, basicblock *b)
1575 {
1576 return add_jump_to_block(c, opcode, -1, 0, 0, 0, b);
1577 }
1578
1579 #define ADDOP(C, OP) { \
1580 if (!compiler_addop((C), (OP))) \
1581 return 0; \
1582 }
1583
1584 #define ADDOP_NOLINE(C, OP) { \
1585 if (!compiler_addop_noline((C), (OP))) \
1586 return 0; \
1587 }
1588
1589 #define ADDOP_IN_SCOPE(C, OP) { \
1590 if (!compiler_addop((C), (OP))) { \
1591 compiler_exit_scope(c); \
1592 return 0; \
1593 } \
1594 }
1595
1596 #define ADDOP_LOAD_CONST(C, O) { \
1597 if (!compiler_addop_load_const((C), (O))) \
1598 return 0; \
1599 }
1600
1601 /* Same as ADDOP_LOAD_CONST, but steals a reference. */
1602 #define ADDOP_LOAD_CONST_NEW(C, O) { \
1603 PyObject *__new_const = (O); \
1604 if (__new_const == NULL) { \
1605 return 0; \
1606 } \
1607 if (!compiler_addop_load_const((C), __new_const)) { \
1608 Py_DECREF(__new_const); \
1609 return 0; \
1610 } \
1611 Py_DECREF(__new_const); \
1612 }
1613
1614 #define ADDOP_N(C, OP, O, TYPE) { \
1615 assert(!HAS_CONST(OP)); /* use ADDOP_LOAD_CONST_NEW */ \
1616 if (!compiler_addop_o((C), (OP), (C)->u->u_ ## TYPE, (O))) { \
1617 Py_DECREF((O)); \
1618 return 0; \
1619 } \
1620 Py_DECREF((O)); \
1621 }
1622
1623 #define ADDOP_NAME(C, OP, O, TYPE) { \
1624 if (!compiler_addop_name((C), (OP), (C)->u->u_ ## TYPE, (O))) \
1625 return 0; \
1626 }
1627
1628 #define ADDOP_I(C, OP, O) { \
1629 if (!compiler_addop_i((C), (OP), (O))) \
1630 return 0; \
1631 }
1632
1633 #define ADDOP_I_NOLINE(C, OP, O) { \
1634 if (!compiler_addop_i_noline((C), (OP), (O))) \
1635 return 0; \
1636 }
1637
1638 #define ADDOP_JUMP(C, OP, O) { \
1639 if (!compiler_addop_j((C), (OP), (O))) \
1640 return 0; \
1641 }
1642
1643 /* Add a jump with no line number.
1644 * Used for artificial jumps that have no corresponding
1645 * token in the source code. */
1646 #define ADDOP_JUMP_NOLINE(C, OP, O) { \
1647 if (!compiler_addop_j_noline((C), (OP), (O))) \
1648 return 0; \
1649 }
1650
1651 #define ADDOP_COMPARE(C, CMP) { \
1652 if (!compiler_addcompare((C), (cmpop_ty)(CMP))) \
1653 return 0; \
1654 }
1655
1656 #define ADDOP_BINARY(C, BINOP) \
1657 RETURN_IF_FALSE(addop_binary((C), (BINOP), false))
1658
1659 #define ADDOP_INPLACE(C, BINOP) \
1660 RETURN_IF_FALSE(addop_binary((C), (BINOP), true))
1661
1662 /* VISIT and VISIT_SEQ takes an ASDL type as their second argument. They use
1663 the ASDL name to synthesize the name of the C type and the visit function.
1664 */
1665
1666 #define ADD_YIELD_FROM(C, await) \
1667 RETURN_IF_FALSE(compiler_add_yield_from((C), (await)))
1668
1669 #define POP_EXCEPT_AND_RERAISE(C) \
1670 RETURN_IF_FALSE(compiler_pop_except_and_reraise((C)))
1671
1672 #define ADDOP_YIELD(C) \
1673 RETURN_IF_FALSE(addop_yield(C))
1674
1675 #define VISIT(C, TYPE, V) {\
1676 if (!compiler_visit_ ## TYPE((C), (V))) \
1677 return 0; \
1678 }
1679
1680 #define VISIT_IN_SCOPE(C, TYPE, V) {\
1681 if (!compiler_visit_ ## TYPE((C), (V))) { \
1682 compiler_exit_scope(c); \
1683 return 0; \
1684 } \
1685 }
1686
1687 #define VISIT_SEQ(C, TYPE, SEQ) { \
1688 int _i; \
1689 asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \
1690 for (_i = 0; _i < asdl_seq_LEN(seq); _i++) { \
1691 TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, _i); \
1692 if (!compiler_visit_ ## TYPE((C), elt)) \
1693 return 0; \
1694 } \
1695 }
1696
1697 #define VISIT_SEQ_IN_SCOPE(C, TYPE, SEQ) { \
1698 int _i; \
1699 asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \
1700 for (_i = 0; _i < asdl_seq_LEN(seq); _i++) { \
1701 TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, _i); \
1702 if (!compiler_visit_ ## TYPE((C), elt)) { \
1703 compiler_exit_scope(c); \
1704 return 0; \
1705 } \
1706 } \
1707 }
1708
1709 #define RETURN_IF_FALSE(X) \
1710 if (!(X)) { \
1711 return 0; \
1712 }
1713
1714 static int
compiler_enter_scope(struct compiler * c,identifier name,int scope_type,void * key,int lineno)1715 compiler_enter_scope(struct compiler *c, identifier name,
1716 int scope_type, void *key, int lineno)
1717 {
1718 struct compiler_unit *u;
1719 basicblock *block;
1720
1721 u = (struct compiler_unit *)PyObject_Calloc(1, sizeof(
1722 struct compiler_unit));
1723 if (!u) {
1724 PyErr_NoMemory();
1725 return 0;
1726 }
1727 u->u_scope_type = scope_type;
1728 u->u_argcount = 0;
1729 u->u_posonlyargcount = 0;
1730 u->u_kwonlyargcount = 0;
1731 u->u_ste = PySymtable_Lookup(c->c_st, key);
1732 if (!u->u_ste) {
1733 compiler_unit_free(u);
1734 return 0;
1735 }
1736 Py_INCREF(name);
1737 u->u_name = name;
1738 u->u_varnames = list2dict(u->u_ste->ste_varnames);
1739 u->u_cellvars = dictbytype(u->u_ste->ste_symbols, CELL, 0, 0);
1740 if (!u->u_varnames || !u->u_cellvars) {
1741 compiler_unit_free(u);
1742 return 0;
1743 }
1744 if (u->u_ste->ste_needs_class_closure) {
1745 /* Cook up an implicit __class__ cell. */
1746 int res;
1747 assert(u->u_scope_type == COMPILER_SCOPE_CLASS);
1748 assert(PyDict_GET_SIZE(u->u_cellvars) == 0);
1749 res = PyDict_SetItem(u->u_cellvars, &_Py_ID(__class__),
1750 _PyLong_GetZero());
1751 if (res < 0) {
1752 compiler_unit_free(u);
1753 return 0;
1754 }
1755 }
1756
1757 u->u_freevars = dictbytype(u->u_ste->ste_symbols, FREE, DEF_FREE_CLASS,
1758 PyDict_GET_SIZE(u->u_cellvars));
1759 if (!u->u_freevars) {
1760 compiler_unit_free(u);
1761 return 0;
1762 }
1763
1764 u->u_blocks = NULL;
1765 u->u_nfblocks = 0;
1766 u->u_firstlineno = lineno;
1767 u->u_lineno = lineno;
1768 u->u_col_offset = 0;
1769 u->u_end_lineno = lineno;
1770 u->u_end_col_offset = 0;
1771 u->u_consts = PyDict_New();
1772 if (!u->u_consts) {
1773 compiler_unit_free(u);
1774 return 0;
1775 }
1776 u->u_names = PyDict_New();
1777 if (!u->u_names) {
1778 compiler_unit_free(u);
1779 return 0;
1780 }
1781
1782 u->u_private = NULL;
1783
1784 /* Push the old compiler_unit on the stack. */
1785 if (c->u) {
1786 PyObject *capsule = PyCapsule_New(c->u, CAPSULE_NAME, NULL);
1787 if (!capsule || PyList_Append(c->c_stack, capsule) < 0) {
1788 Py_XDECREF(capsule);
1789 compiler_unit_free(u);
1790 return 0;
1791 }
1792 Py_DECREF(capsule);
1793 u->u_private = c->u->u_private;
1794 Py_XINCREF(u->u_private);
1795 }
1796 c->u = u;
1797
1798 c->c_nestlevel++;
1799
1800 block = compiler_new_block(c);
1801 if (block == NULL)
1802 return 0;
1803 c->u->u_curblock = block;
1804
1805 if (u->u_scope_type == COMPILER_SCOPE_MODULE) {
1806 c->u->u_lineno = 0;
1807 }
1808 else {
1809 if (!compiler_set_qualname(c))
1810 return 0;
1811 }
1812 ADDOP_I(c, RESUME, 0);
1813
1814 if (u->u_scope_type == COMPILER_SCOPE_MODULE) {
1815 c->u->u_lineno = -1;
1816 }
1817 return 1;
1818 }
1819
1820 static void
compiler_exit_scope(struct compiler * c)1821 compiler_exit_scope(struct compiler *c)
1822 {
1823 // Don't call PySequence_DelItem() with an exception raised
1824 PyObject *exc_type, *exc_val, *exc_tb;
1825 PyErr_Fetch(&exc_type, &exc_val, &exc_tb);
1826
1827 c->c_nestlevel--;
1828 compiler_unit_free(c->u);
1829 /* Restore c->u to the parent unit. */
1830 Py_ssize_t n = PyList_GET_SIZE(c->c_stack) - 1;
1831 if (n >= 0) {
1832 PyObject *capsule = PyList_GET_ITEM(c->c_stack, n);
1833 c->u = (struct compiler_unit *)PyCapsule_GetPointer(capsule, CAPSULE_NAME);
1834 assert(c->u);
1835 /* we are deleting from a list so this really shouldn't fail */
1836 if (PySequence_DelItem(c->c_stack, n) < 0) {
1837 _PyErr_WriteUnraisableMsg("on removing the last compiler "
1838 "stack item", NULL);
1839 }
1840 compiler_unit_check(c->u);
1841 }
1842 else {
1843 c->u = NULL;
1844 }
1845
1846 PyErr_Restore(exc_type, exc_val, exc_tb);
1847 }
1848
1849 /* Search if variable annotations are present statically in a block. */
1850
1851 static int
find_ann(asdl_stmt_seq * stmts)1852 find_ann(asdl_stmt_seq *stmts)
1853 {
1854 int i, j, res = 0;
1855 stmt_ty st;
1856
1857 for (i = 0; i < asdl_seq_LEN(stmts); i++) {
1858 st = (stmt_ty)asdl_seq_GET(stmts, i);
1859 switch (st->kind) {
1860 case AnnAssign_kind:
1861 return 1;
1862 case For_kind:
1863 res = find_ann(st->v.For.body) ||
1864 find_ann(st->v.For.orelse);
1865 break;
1866 case AsyncFor_kind:
1867 res = find_ann(st->v.AsyncFor.body) ||
1868 find_ann(st->v.AsyncFor.orelse);
1869 break;
1870 case While_kind:
1871 res = find_ann(st->v.While.body) ||
1872 find_ann(st->v.While.orelse);
1873 break;
1874 case If_kind:
1875 res = find_ann(st->v.If.body) ||
1876 find_ann(st->v.If.orelse);
1877 break;
1878 case With_kind:
1879 res = find_ann(st->v.With.body);
1880 break;
1881 case AsyncWith_kind:
1882 res = find_ann(st->v.AsyncWith.body);
1883 break;
1884 case Try_kind:
1885 for (j = 0; j < asdl_seq_LEN(st->v.Try.handlers); j++) {
1886 excepthandler_ty handler = (excepthandler_ty)asdl_seq_GET(
1887 st->v.Try.handlers, j);
1888 if (find_ann(handler->v.ExceptHandler.body)) {
1889 return 1;
1890 }
1891 }
1892 res = find_ann(st->v.Try.body) ||
1893 find_ann(st->v.Try.finalbody) ||
1894 find_ann(st->v.Try.orelse);
1895 break;
1896 case TryStar_kind:
1897 for (j = 0; j < asdl_seq_LEN(st->v.TryStar.handlers); j++) {
1898 excepthandler_ty handler = (excepthandler_ty)asdl_seq_GET(
1899 st->v.TryStar.handlers, j);
1900 if (find_ann(handler->v.ExceptHandler.body)) {
1901 return 1;
1902 }
1903 }
1904 res = find_ann(st->v.TryStar.body) ||
1905 find_ann(st->v.TryStar.finalbody) ||
1906 find_ann(st->v.TryStar.orelse);
1907 break;
1908 case Match_kind:
1909 for (j = 0; j < asdl_seq_LEN(st->v.Match.cases); j++) {
1910 match_case_ty match_case = (match_case_ty)asdl_seq_GET(
1911 st->v.Match.cases, j);
1912 if (find_ann(match_case->body)) {
1913 return true;
1914 }
1915 }
1916 break;
1917 default:
1918 res = 0;
1919 }
1920 if (res) {
1921 break;
1922 }
1923 }
1924 return res;
1925 }
1926
1927 /*
1928 * Frame block handling functions
1929 */
1930
1931 static int
compiler_push_fblock(struct compiler * c,enum fblocktype t,basicblock * b,basicblock * exit,void * datum)1932 compiler_push_fblock(struct compiler *c, enum fblocktype t, basicblock *b,
1933 basicblock *exit, void *datum)
1934 {
1935 struct fblockinfo *f;
1936 if (c->u->u_nfblocks >= CO_MAXBLOCKS) {
1937 return compiler_error(c, "too many statically nested blocks");
1938 }
1939 f = &c->u->u_fblock[c->u->u_nfblocks++];
1940 f->fb_type = t;
1941 f->fb_block = b;
1942 f->fb_exit = exit;
1943 f->fb_datum = datum;
1944 return 1;
1945 }
1946
1947 static void
compiler_pop_fblock(struct compiler * c,enum fblocktype t,basicblock * b)1948 compiler_pop_fblock(struct compiler *c, enum fblocktype t, basicblock *b)
1949 {
1950 struct compiler_unit *u = c->u;
1951 assert(u->u_nfblocks > 0);
1952 u->u_nfblocks--;
1953 assert(u->u_fblock[u->u_nfblocks].fb_type == t);
1954 assert(u->u_fblock[u->u_nfblocks].fb_block == b);
1955 }
1956
1957 static int
compiler_call_exit_with_nones(struct compiler * c)1958 compiler_call_exit_with_nones(struct compiler *c) {
1959 ADDOP_LOAD_CONST(c, Py_None);
1960 ADDOP_LOAD_CONST(c, Py_None);
1961 ADDOP_LOAD_CONST(c, Py_None);
1962 ADDOP_I(c, PRECALL, 2);
1963 ADDOP_I(c, CALL, 2);
1964 return 1;
1965 }
1966
1967 static int
compiler_add_yield_from(struct compiler * c,int await)1968 compiler_add_yield_from(struct compiler *c, int await)
1969 {
1970 basicblock *start, *resume, *exit;
1971 start = compiler_new_block(c);
1972 resume = compiler_new_block(c);
1973 exit = compiler_new_block(c);
1974 if (start == NULL || resume == NULL || exit == NULL) {
1975 return 0;
1976 }
1977 compiler_use_next_block(c, start);
1978 ADDOP_JUMP(c, SEND, exit);
1979 compiler_use_next_block(c, resume);
1980 ADDOP(c, YIELD_VALUE);
1981 ADDOP_I(c, RESUME, await ? 3 : 2);
1982 ADDOP_JUMP(c, JUMP_NO_INTERRUPT, start);
1983 compiler_use_next_block(c, exit);
1984 return 1;
1985 }
1986
1987 static int
compiler_pop_except_and_reraise(struct compiler * c)1988 compiler_pop_except_and_reraise(struct compiler *c)
1989 {
1990 /* Stack contents
1991 * [exc_info, lasti, exc] COPY 3
1992 * [exc_info, lasti, exc, exc_info] POP_EXCEPT
1993 * [exc_info, lasti, exc] RERAISE 1
1994 * (exception_unwind clears the stack)
1995 */
1996
1997 ADDOP_I(c, COPY, 3);
1998 ADDOP(c, POP_EXCEPT);
1999 ADDOP_I(c, RERAISE, 1);
2000 return 1;
2001 }
2002
2003 /* Unwind a frame block. If preserve_tos is true, the TOS before
2004 * popping the blocks will be restored afterwards, unless another
2005 * return, break or continue is found. In which case, the TOS will
2006 * be popped.
2007 */
2008 static int
compiler_unwind_fblock(struct compiler * c,struct fblockinfo * info,int preserve_tos)2009 compiler_unwind_fblock(struct compiler *c, struct fblockinfo *info,
2010 int preserve_tos)
2011 {
2012 switch (info->fb_type) {
2013 case WHILE_LOOP:
2014 case EXCEPTION_HANDLER:
2015 case EXCEPTION_GROUP_HANDLER:
2016 case ASYNC_COMPREHENSION_GENERATOR:
2017 return 1;
2018
2019 case FOR_LOOP:
2020 /* Pop the iterator */
2021 if (preserve_tos) {
2022 ADDOP_I(c, SWAP, 2);
2023 }
2024 ADDOP(c, POP_TOP);
2025 return 1;
2026
2027 case TRY_EXCEPT:
2028 ADDOP(c, POP_BLOCK);
2029 return 1;
2030
2031 case FINALLY_TRY:
2032 /* This POP_BLOCK gets the line number of the unwinding statement */
2033 ADDOP(c, POP_BLOCK);
2034 if (preserve_tos) {
2035 if (!compiler_push_fblock(c, POP_VALUE, NULL, NULL, NULL)) {
2036 return 0;
2037 }
2038 }
2039 /* Emit the finally block */
2040 VISIT_SEQ(c, stmt, info->fb_datum);
2041 if (preserve_tos) {
2042 compiler_pop_fblock(c, POP_VALUE, NULL);
2043 }
2044 /* The finally block should appear to execute after the
2045 * statement causing the unwinding, so make the unwinding
2046 * instruction artificial */
2047 UNSET_LOC(c);
2048 return 1;
2049
2050 case FINALLY_END:
2051 if (preserve_tos) {
2052 ADDOP_I(c, SWAP, 2);
2053 }
2054 ADDOP(c, POP_TOP); /* exc_value */
2055 if (preserve_tos) {
2056 ADDOP_I(c, SWAP, 2);
2057 }
2058 ADDOP(c, POP_BLOCK);
2059 ADDOP(c, POP_EXCEPT);
2060 return 1;
2061
2062 case WITH:
2063 case ASYNC_WITH:
2064 SET_LOC(c, (stmt_ty)info->fb_datum);
2065 ADDOP(c, POP_BLOCK);
2066 if (preserve_tos) {
2067 ADDOP_I(c, SWAP, 2);
2068 }
2069 if(!compiler_call_exit_with_nones(c)) {
2070 return 0;
2071 }
2072 if (info->fb_type == ASYNC_WITH) {
2073 ADDOP_I(c, GET_AWAITABLE, 2);
2074 ADDOP_LOAD_CONST(c, Py_None);
2075 ADD_YIELD_FROM(c, 1);
2076 }
2077 ADDOP(c, POP_TOP);
2078 /* The exit block should appear to execute after the
2079 * statement causing the unwinding, so make the unwinding
2080 * instruction artificial */
2081 UNSET_LOC(c);
2082 return 1;
2083
2084 case HANDLER_CLEANUP:
2085 if (info->fb_datum) {
2086 ADDOP(c, POP_BLOCK);
2087 }
2088 if (preserve_tos) {
2089 ADDOP_I(c, SWAP, 2);
2090 }
2091 ADDOP(c, POP_BLOCK);
2092 ADDOP(c, POP_EXCEPT);
2093 if (info->fb_datum) {
2094 ADDOP_LOAD_CONST(c, Py_None);
2095 compiler_nameop(c, info->fb_datum, Store);
2096 compiler_nameop(c, info->fb_datum, Del);
2097 }
2098 return 1;
2099
2100 case POP_VALUE:
2101 if (preserve_tos) {
2102 ADDOP_I(c, SWAP, 2);
2103 }
2104 ADDOP(c, POP_TOP);
2105 return 1;
2106 }
2107 Py_UNREACHABLE();
2108 }
2109
2110 /** Unwind block stack. If loop is not NULL, then stop when the first loop is encountered. */
2111 static int
compiler_unwind_fblock_stack(struct compiler * c,int preserve_tos,struct fblockinfo ** loop)2112 compiler_unwind_fblock_stack(struct compiler *c, int preserve_tos, struct fblockinfo **loop) {
2113 if (c->u->u_nfblocks == 0) {
2114 return 1;
2115 }
2116 struct fblockinfo *top = &c->u->u_fblock[c->u->u_nfblocks-1];
2117 if (top->fb_type == EXCEPTION_GROUP_HANDLER) {
2118 return compiler_error(
2119 c, "'break', 'continue' and 'return' cannot appear in an except* block");
2120 }
2121 if (loop != NULL && (top->fb_type == WHILE_LOOP || top->fb_type == FOR_LOOP)) {
2122 *loop = top;
2123 return 1;
2124 }
2125 struct fblockinfo copy = *top;
2126 c->u->u_nfblocks--;
2127 if (!compiler_unwind_fblock(c, ©, preserve_tos)) {
2128 return 0;
2129 }
2130 if (!compiler_unwind_fblock_stack(c, preserve_tos, loop)) {
2131 return 0;
2132 }
2133 c->u->u_fblock[c->u->u_nfblocks] = copy;
2134 c->u->u_nfblocks++;
2135 return 1;
2136 }
2137
2138 /* Compile a sequence of statements, checking for a docstring
2139 and for annotations. */
2140
2141 static int
compiler_body(struct compiler * c,asdl_stmt_seq * stmts)2142 compiler_body(struct compiler *c, asdl_stmt_seq *stmts)
2143 {
2144 int i = 0;
2145 stmt_ty st;
2146 PyObject *docstring;
2147
2148 /* Set current line number to the line number of first statement.
2149 This way line number for SETUP_ANNOTATIONS will always
2150 coincide with the line number of first "real" statement in module.
2151 If body is empty, then lineno will be set later in assemble. */
2152 if (c->u->u_scope_type == COMPILER_SCOPE_MODULE && asdl_seq_LEN(stmts)) {
2153 st = (stmt_ty)asdl_seq_GET(stmts, 0);
2154 SET_LOC(c, st);
2155 }
2156 /* Every annotated class and module should have __annotations__. */
2157 if (find_ann(stmts)) {
2158 ADDOP(c, SETUP_ANNOTATIONS);
2159 }
2160 if (!asdl_seq_LEN(stmts))
2161 return 1;
2162 /* if not -OO mode, set docstring */
2163 if (c->c_optimize < 2) {
2164 docstring = _PyAST_GetDocString(stmts);
2165 if (docstring) {
2166 i = 1;
2167 st = (stmt_ty)asdl_seq_GET(stmts, 0);
2168 assert(st->kind == Expr_kind);
2169 VISIT(c, expr, st->v.Expr.value);
2170 UNSET_LOC(c);
2171 if (!compiler_nameop(c, &_Py_ID(__doc__), Store))
2172 return 0;
2173 }
2174 }
2175 for (; i < asdl_seq_LEN(stmts); i++)
2176 VISIT(c, stmt, (stmt_ty)asdl_seq_GET(stmts, i));
2177 return 1;
2178 }
2179
2180 static PyCodeObject *
compiler_mod(struct compiler * c,mod_ty mod)2181 compiler_mod(struct compiler *c, mod_ty mod)
2182 {
2183 PyCodeObject *co;
2184 int addNone = 1;
2185 _Py_DECLARE_STR(anon_module, "<module>");
2186 if (!compiler_enter_scope(c, &_Py_STR(anon_module), COMPILER_SCOPE_MODULE,
2187 mod, 1)) {
2188 return NULL;
2189 }
2190 c->u->u_lineno = 1;
2191 switch (mod->kind) {
2192 case Module_kind:
2193 if (!compiler_body(c, mod->v.Module.body)) {
2194 compiler_exit_scope(c);
2195 return 0;
2196 }
2197 break;
2198 case Interactive_kind:
2199 if (find_ann(mod->v.Interactive.body)) {
2200 ADDOP(c, SETUP_ANNOTATIONS);
2201 }
2202 c->c_interactive = 1;
2203 VISIT_SEQ_IN_SCOPE(c, stmt, mod->v.Interactive.body);
2204 break;
2205 case Expression_kind:
2206 VISIT_IN_SCOPE(c, expr, mod->v.Expression.body);
2207 addNone = 0;
2208 break;
2209 default:
2210 PyErr_Format(PyExc_SystemError,
2211 "module kind %d should not be possible",
2212 mod->kind);
2213 return 0;
2214 }
2215 co = assemble(c, addNone);
2216 compiler_exit_scope(c);
2217 return co;
2218 }
2219
2220 /* The test for LOCAL must come before the test for FREE in order to
2221 handle classes where name is both local and free. The local var is
2222 a method and the free var is a free var referenced within a method.
2223 */
2224
2225 static int
get_ref_type(struct compiler * c,PyObject * name)2226 get_ref_type(struct compiler *c, PyObject *name)
2227 {
2228 int scope;
2229 if (c->u->u_scope_type == COMPILER_SCOPE_CLASS &&
2230 _PyUnicode_EqualToASCIIString(name, "__class__"))
2231 return CELL;
2232 scope = _PyST_GetScope(c->u->u_ste, name);
2233 if (scope == 0) {
2234 PyErr_Format(PyExc_SystemError,
2235 "_PyST_GetScope(name=%R) failed: "
2236 "unknown scope in unit %S (%R); "
2237 "symbols: %R; locals: %R; globals: %R",
2238 name,
2239 c->u->u_name, c->u->u_ste->ste_id,
2240 c->u->u_ste->ste_symbols, c->u->u_varnames, c->u->u_names);
2241 return -1;
2242 }
2243 return scope;
2244 }
2245
2246 static int
compiler_lookup_arg(PyObject * dict,PyObject * name)2247 compiler_lookup_arg(PyObject *dict, PyObject *name)
2248 {
2249 PyObject *v;
2250 v = PyDict_GetItemWithError(dict, name);
2251 if (v == NULL)
2252 return -1;
2253 return PyLong_AS_LONG(v);
2254 }
2255
2256 static int
compiler_make_closure(struct compiler * c,PyCodeObject * co,Py_ssize_t flags,PyObject * qualname)2257 compiler_make_closure(struct compiler *c, PyCodeObject *co, Py_ssize_t flags,
2258 PyObject *qualname)
2259 {
2260 if (qualname == NULL)
2261 qualname = co->co_name;
2262
2263 if (co->co_nfreevars) {
2264 int i = co->co_nlocals + co->co_nplaincellvars;
2265 for (; i < co->co_nlocalsplus; ++i) {
2266 /* Bypass com_addop_varname because it will generate
2267 LOAD_DEREF but LOAD_CLOSURE is needed.
2268 */
2269 PyObject *name = PyTuple_GET_ITEM(co->co_localsplusnames, i);
2270
2271 /* Special case: If a class contains a method with a
2272 free variable that has the same name as a method,
2273 the name will be considered free *and* local in the
2274 class. It should be handled by the closure, as
2275 well as by the normal name lookup logic.
2276 */
2277 int reftype = get_ref_type(c, name);
2278 if (reftype == -1) {
2279 return 0;
2280 }
2281 int arg;
2282 if (reftype == CELL) {
2283 arg = compiler_lookup_arg(c->u->u_cellvars, name);
2284 }
2285 else {
2286 arg = compiler_lookup_arg(c->u->u_freevars, name);
2287 }
2288 if (arg == -1) {
2289 PyObject *freevars = _PyCode_GetFreevars(co);
2290 if (freevars == NULL) {
2291 PyErr_Clear();
2292 }
2293 PyErr_Format(PyExc_SystemError,
2294 "compiler_lookup_arg(name=%R) with reftype=%d failed in %S; "
2295 "freevars of code %S: %R",
2296 name,
2297 reftype,
2298 c->u->u_name,
2299 co->co_name,
2300 freevars);
2301 Py_DECREF(freevars);
2302 return 0;
2303 }
2304 ADDOP_I(c, LOAD_CLOSURE, arg);
2305 }
2306 flags |= 0x08;
2307 ADDOP_I(c, BUILD_TUPLE, co->co_nfreevars);
2308 }
2309 ADDOP_LOAD_CONST(c, (PyObject*)co);
2310 ADDOP_I(c, MAKE_FUNCTION, flags);
2311 return 1;
2312 }
2313
2314 static int
compiler_decorators(struct compiler * c,asdl_expr_seq * decos)2315 compiler_decorators(struct compiler *c, asdl_expr_seq* decos)
2316 {
2317 int i;
2318
2319 if (!decos)
2320 return 1;
2321
2322 for (i = 0; i < asdl_seq_LEN(decos); i++) {
2323 VISIT(c, expr, (expr_ty)asdl_seq_GET(decos, i));
2324 }
2325 return 1;
2326 }
2327
2328 static int
compiler_apply_decorators(struct compiler * c,asdl_expr_seq * decos)2329 compiler_apply_decorators(struct compiler *c, asdl_expr_seq* decos)
2330 {
2331 if (!decos)
2332 return 1;
2333
2334 int old_lineno = c->u->u_lineno;
2335 int old_end_lineno = c->u->u_end_lineno;
2336 int old_col_offset = c->u->u_col_offset;
2337 int old_end_col_offset = c->u->u_end_col_offset;
2338 for (Py_ssize_t i = asdl_seq_LEN(decos) - 1; i > -1; i--) {
2339 SET_LOC(c, (expr_ty)asdl_seq_GET(decos, i));
2340 ADDOP_I(c, PRECALL, 0);
2341 ADDOP_I(c, CALL, 0);
2342 }
2343 c->u->u_lineno = old_lineno;
2344 c->u->u_end_lineno = old_end_lineno;
2345 c->u->u_col_offset = old_col_offset;
2346 c->u->u_end_col_offset = old_end_col_offset;
2347 return 1;
2348 }
2349
2350 static int
compiler_visit_kwonlydefaults(struct compiler * c,asdl_arg_seq * kwonlyargs,asdl_expr_seq * kw_defaults)2351 compiler_visit_kwonlydefaults(struct compiler *c, asdl_arg_seq *kwonlyargs,
2352 asdl_expr_seq *kw_defaults)
2353 {
2354 /* Push a dict of keyword-only default values.
2355
2356 Return 0 on error, -1 if no dict pushed, 1 if a dict is pushed.
2357 */
2358 int i;
2359 PyObject *keys = NULL;
2360
2361 for (i = 0; i < asdl_seq_LEN(kwonlyargs); i++) {
2362 arg_ty arg = asdl_seq_GET(kwonlyargs, i);
2363 expr_ty default_ = asdl_seq_GET(kw_defaults, i);
2364 if (default_) {
2365 PyObject *mangled = _Py_Mangle(c->u->u_private, arg->arg);
2366 if (!mangled) {
2367 goto error;
2368 }
2369 if (keys == NULL) {
2370 keys = PyList_New(1);
2371 if (keys == NULL) {
2372 Py_DECREF(mangled);
2373 return 0;
2374 }
2375 PyList_SET_ITEM(keys, 0, mangled);
2376 }
2377 else {
2378 int res = PyList_Append(keys, mangled);
2379 Py_DECREF(mangled);
2380 if (res == -1) {
2381 goto error;
2382 }
2383 }
2384 if (!compiler_visit_expr(c, default_)) {
2385 goto error;
2386 }
2387 }
2388 }
2389 if (keys != NULL) {
2390 Py_ssize_t default_count = PyList_GET_SIZE(keys);
2391 PyObject *keys_tuple = PyList_AsTuple(keys);
2392 Py_DECREF(keys);
2393 ADDOP_LOAD_CONST_NEW(c, keys_tuple);
2394 ADDOP_I(c, BUILD_CONST_KEY_MAP, default_count);
2395 assert(default_count > 0);
2396 return 1;
2397 }
2398 else {
2399 return -1;
2400 }
2401
2402 error:
2403 Py_XDECREF(keys);
2404 return 0;
2405 }
2406
2407 static int
compiler_visit_annexpr(struct compiler * c,expr_ty annotation)2408 compiler_visit_annexpr(struct compiler *c, expr_ty annotation)
2409 {
2410 ADDOP_LOAD_CONST_NEW(c, _PyAST_ExprAsUnicode(annotation));
2411 return 1;
2412 }
2413
2414 static int
compiler_visit_argannotation(struct compiler * c,identifier id,expr_ty annotation,Py_ssize_t * annotations_len)2415 compiler_visit_argannotation(struct compiler *c, identifier id,
2416 expr_ty annotation, Py_ssize_t *annotations_len)
2417 {
2418 if (!annotation) {
2419 return 1;
2420 }
2421
2422 PyObject *mangled = _Py_Mangle(c->u->u_private, id);
2423 if (!mangled) {
2424 return 0;
2425 }
2426 ADDOP_LOAD_CONST(c, mangled);
2427 Py_DECREF(mangled);
2428
2429 if (c->c_future->ff_features & CO_FUTURE_ANNOTATIONS) {
2430 VISIT(c, annexpr, annotation);
2431 }
2432 else {
2433 if (annotation->kind == Starred_kind) {
2434 // *args: *Ts (where Ts is a TypeVarTuple).
2435 // Do [annotation_value] = [*Ts].
2436 // (Note that in theory we could end up here even for an argument
2437 // other than *args, but in practice the grammar doesn't allow it.)
2438 VISIT(c, expr, annotation->v.Starred.value);
2439 ADDOP_I(c, UNPACK_SEQUENCE, (Py_ssize_t) 1);
2440 }
2441 else {
2442 VISIT(c, expr, annotation);
2443 }
2444 }
2445 *annotations_len += 2;
2446 return 1;
2447 }
2448
2449 static int
compiler_visit_argannotations(struct compiler * c,asdl_arg_seq * args,Py_ssize_t * annotations_len)2450 compiler_visit_argannotations(struct compiler *c, asdl_arg_seq* args,
2451 Py_ssize_t *annotations_len)
2452 {
2453 int i;
2454 for (i = 0; i < asdl_seq_LEN(args); i++) {
2455 arg_ty arg = (arg_ty)asdl_seq_GET(args, i);
2456 if (!compiler_visit_argannotation(
2457 c,
2458 arg->arg,
2459 arg->annotation,
2460 annotations_len))
2461 return 0;
2462 }
2463 return 1;
2464 }
2465
2466 static int
compiler_visit_annotations(struct compiler * c,arguments_ty args,expr_ty returns)2467 compiler_visit_annotations(struct compiler *c, arguments_ty args,
2468 expr_ty returns)
2469 {
2470 /* Push arg annotation names and values.
2471 The expressions are evaluated out-of-order wrt the source code.
2472
2473 Return 0 on error, -1 if no annotations pushed, 1 if a annotations is pushed.
2474 */
2475 Py_ssize_t annotations_len = 0;
2476
2477 if (!compiler_visit_argannotations(c, args->args, &annotations_len))
2478 return 0;
2479 if (!compiler_visit_argannotations(c, args->posonlyargs, &annotations_len))
2480 return 0;
2481 if (args->vararg && args->vararg->annotation &&
2482 !compiler_visit_argannotation(c, args->vararg->arg,
2483 args->vararg->annotation, &annotations_len))
2484 return 0;
2485 if (!compiler_visit_argannotations(c, args->kwonlyargs, &annotations_len))
2486 return 0;
2487 if (args->kwarg && args->kwarg->annotation &&
2488 !compiler_visit_argannotation(c, args->kwarg->arg,
2489 args->kwarg->annotation, &annotations_len))
2490 return 0;
2491
2492 if (!compiler_visit_argannotation(c, &_Py_ID(return), returns,
2493 &annotations_len)) {
2494 return 0;
2495 }
2496
2497 if (annotations_len) {
2498 ADDOP_I(c, BUILD_TUPLE, annotations_len);
2499 return 1;
2500 }
2501
2502 return -1;
2503 }
2504
2505 static int
compiler_visit_defaults(struct compiler * c,arguments_ty args)2506 compiler_visit_defaults(struct compiler *c, arguments_ty args)
2507 {
2508 VISIT_SEQ(c, expr, args->defaults);
2509 ADDOP_I(c, BUILD_TUPLE, asdl_seq_LEN(args->defaults));
2510 return 1;
2511 }
2512
2513 static Py_ssize_t
compiler_default_arguments(struct compiler * c,arguments_ty args)2514 compiler_default_arguments(struct compiler *c, arguments_ty args)
2515 {
2516 Py_ssize_t funcflags = 0;
2517 if (args->defaults && asdl_seq_LEN(args->defaults) > 0) {
2518 if (!compiler_visit_defaults(c, args))
2519 return -1;
2520 funcflags |= 0x01;
2521 }
2522 if (args->kwonlyargs) {
2523 int res = compiler_visit_kwonlydefaults(c, args->kwonlyargs,
2524 args->kw_defaults);
2525 if (res == 0) {
2526 return -1;
2527 }
2528 else if (res > 0) {
2529 funcflags |= 0x02;
2530 }
2531 }
2532 return funcflags;
2533 }
2534
2535 static int
forbidden_name(struct compiler * c,identifier name,expr_context_ty ctx)2536 forbidden_name(struct compiler *c, identifier name, expr_context_ty ctx)
2537 {
2538
2539 if (ctx == Store && _PyUnicode_EqualToASCIIString(name, "__debug__")) {
2540 compiler_error(c, "cannot assign to __debug__");
2541 return 1;
2542 }
2543 if (ctx == Del && _PyUnicode_EqualToASCIIString(name, "__debug__")) {
2544 compiler_error(c, "cannot delete __debug__");
2545 return 1;
2546 }
2547 return 0;
2548 }
2549
2550 static int
compiler_check_debug_one_arg(struct compiler * c,arg_ty arg)2551 compiler_check_debug_one_arg(struct compiler *c, arg_ty arg)
2552 {
2553 if (arg != NULL) {
2554 if (forbidden_name(c, arg->arg, Store))
2555 return 0;
2556 }
2557 return 1;
2558 }
2559
2560 static int
compiler_check_debug_args_seq(struct compiler * c,asdl_arg_seq * args)2561 compiler_check_debug_args_seq(struct compiler *c, asdl_arg_seq *args)
2562 {
2563 if (args != NULL) {
2564 for (Py_ssize_t i = 0, n = asdl_seq_LEN(args); i < n; i++) {
2565 if (!compiler_check_debug_one_arg(c, asdl_seq_GET(args, i)))
2566 return 0;
2567 }
2568 }
2569 return 1;
2570 }
2571
2572 static int
compiler_check_debug_args(struct compiler * c,arguments_ty args)2573 compiler_check_debug_args(struct compiler *c, arguments_ty args)
2574 {
2575 if (!compiler_check_debug_args_seq(c, args->posonlyargs))
2576 return 0;
2577 if (!compiler_check_debug_args_seq(c, args->args))
2578 return 0;
2579 if (!compiler_check_debug_one_arg(c, args->vararg))
2580 return 0;
2581 if (!compiler_check_debug_args_seq(c, args->kwonlyargs))
2582 return 0;
2583 if (!compiler_check_debug_one_arg(c, args->kwarg))
2584 return 0;
2585 return 1;
2586 }
2587
2588 static int
compiler_function(struct compiler * c,stmt_ty s,int is_async)2589 compiler_function(struct compiler *c, stmt_ty s, int is_async)
2590 {
2591 PyCodeObject *co;
2592 PyObject *qualname, *docstring = NULL;
2593 arguments_ty args;
2594 expr_ty returns;
2595 identifier name;
2596 asdl_expr_seq* decos;
2597 asdl_stmt_seq *body;
2598 Py_ssize_t i, funcflags;
2599 int annotations;
2600 int scope_type;
2601 int firstlineno;
2602
2603 if (is_async) {
2604 assert(s->kind == AsyncFunctionDef_kind);
2605
2606 args = s->v.AsyncFunctionDef.args;
2607 returns = s->v.AsyncFunctionDef.returns;
2608 decos = s->v.AsyncFunctionDef.decorator_list;
2609 name = s->v.AsyncFunctionDef.name;
2610 body = s->v.AsyncFunctionDef.body;
2611
2612 scope_type = COMPILER_SCOPE_ASYNC_FUNCTION;
2613 } else {
2614 assert(s->kind == FunctionDef_kind);
2615
2616 args = s->v.FunctionDef.args;
2617 returns = s->v.FunctionDef.returns;
2618 decos = s->v.FunctionDef.decorator_list;
2619 name = s->v.FunctionDef.name;
2620 body = s->v.FunctionDef.body;
2621
2622 scope_type = COMPILER_SCOPE_FUNCTION;
2623 }
2624
2625 if (!compiler_check_debug_args(c, args))
2626 return 0;
2627
2628 if (!compiler_decorators(c, decos))
2629 return 0;
2630
2631 firstlineno = s->lineno;
2632 if (asdl_seq_LEN(decos)) {
2633 firstlineno = ((expr_ty)asdl_seq_GET(decos, 0))->lineno;
2634 }
2635
2636 funcflags = compiler_default_arguments(c, args);
2637 if (funcflags == -1) {
2638 return 0;
2639 }
2640
2641 annotations = compiler_visit_annotations(c, args, returns);
2642 if (annotations == 0) {
2643 return 0;
2644 }
2645 else if (annotations > 0) {
2646 funcflags |= 0x04;
2647 }
2648
2649 if (!compiler_enter_scope(c, name, scope_type, (void *)s, firstlineno)) {
2650 return 0;
2651 }
2652
2653 /* if not -OO mode, add docstring */
2654 if (c->c_optimize < 2) {
2655 docstring = _PyAST_GetDocString(body);
2656 }
2657 if (compiler_add_const(c, docstring ? docstring : Py_None) < 0) {
2658 compiler_exit_scope(c);
2659 return 0;
2660 }
2661
2662 c->u->u_argcount = asdl_seq_LEN(args->args);
2663 c->u->u_posonlyargcount = asdl_seq_LEN(args->posonlyargs);
2664 c->u->u_kwonlyargcount = asdl_seq_LEN(args->kwonlyargs);
2665 for (i = docstring ? 1 : 0; i < asdl_seq_LEN(body); i++) {
2666 VISIT_IN_SCOPE(c, stmt, (stmt_ty)asdl_seq_GET(body, i));
2667 }
2668 co = assemble(c, 1);
2669 qualname = c->u->u_qualname;
2670 Py_INCREF(qualname);
2671 compiler_exit_scope(c);
2672 if (co == NULL) {
2673 Py_XDECREF(qualname);
2674 Py_XDECREF(co);
2675 return 0;
2676 }
2677
2678 if (!compiler_make_closure(c, co, funcflags, qualname)) {
2679 Py_DECREF(qualname);
2680 Py_DECREF(co);
2681 return 0;
2682 }
2683 Py_DECREF(qualname);
2684 Py_DECREF(co);
2685
2686 if (!compiler_apply_decorators(c, decos))
2687 return 0;
2688 return compiler_nameop(c, name, Store);
2689 }
2690
2691 static int
compiler_class(struct compiler * c,stmt_ty s)2692 compiler_class(struct compiler *c, stmt_ty s)
2693 {
2694 PyCodeObject *co;
2695 int i, firstlineno;
2696 asdl_expr_seq *decos = s->v.ClassDef.decorator_list;
2697
2698 if (!compiler_decorators(c, decos))
2699 return 0;
2700
2701 firstlineno = s->lineno;
2702 if (asdl_seq_LEN(decos)) {
2703 firstlineno = ((expr_ty)asdl_seq_GET(decos, 0))->lineno;
2704 }
2705
2706 /* ultimately generate code for:
2707 <name> = __build_class__(<func>, <name>, *<bases>, **<keywords>)
2708 where:
2709 <func> is a zero arg function/closure created from the class body.
2710 It mutates its locals to build the class namespace.
2711 <name> is the class name
2712 <bases> is the positional arguments and *varargs argument
2713 <keywords> is the keyword arguments and **kwds argument
2714 This borrows from compiler_call.
2715 */
2716
2717 /* 1. compile the class body into a code object */
2718 if (!compiler_enter_scope(c, s->v.ClassDef.name,
2719 COMPILER_SCOPE_CLASS, (void *)s, firstlineno)) {
2720 return 0;
2721 }
2722 /* this block represents what we do in the new scope */
2723 {
2724 /* use the class name for name mangling */
2725 Py_INCREF(s->v.ClassDef.name);
2726 Py_XSETREF(c->u->u_private, s->v.ClassDef.name);
2727 /* load (global) __name__ ... */
2728 if (!compiler_nameop(c, &_Py_ID(__name__), Load)) {
2729 compiler_exit_scope(c);
2730 return 0;
2731 }
2732 /* ... and store it as __module__ */
2733 if (!compiler_nameop(c, &_Py_ID(__module__), Store)) {
2734 compiler_exit_scope(c);
2735 return 0;
2736 }
2737 assert(c->u->u_qualname);
2738 ADDOP_LOAD_CONST(c, c->u->u_qualname);
2739 if (!compiler_nameop(c, &_Py_ID(__qualname__), Store)) {
2740 compiler_exit_scope(c);
2741 return 0;
2742 }
2743 /* compile the body proper */
2744 if (!compiler_body(c, s->v.ClassDef.body)) {
2745 compiler_exit_scope(c);
2746 return 0;
2747 }
2748 /* The following code is artificial */
2749 UNSET_LOC(c);
2750 /* Return __classcell__ if it is referenced, otherwise return None */
2751 if (c->u->u_ste->ste_needs_class_closure) {
2752 /* Store __classcell__ into class namespace & return it */
2753 i = compiler_lookup_arg(c->u->u_cellvars, &_Py_ID(__class__));
2754 if (i < 0) {
2755 compiler_exit_scope(c);
2756 return 0;
2757 }
2758 assert(i == 0);
2759
2760 ADDOP_I(c, LOAD_CLOSURE, i);
2761 ADDOP_I(c, COPY, 1);
2762 if (!compiler_nameop(c, &_Py_ID(__classcell__), Store)) {
2763 compiler_exit_scope(c);
2764 return 0;
2765 }
2766 }
2767 else {
2768 /* No methods referenced __class__, so just return None */
2769 assert(PyDict_GET_SIZE(c->u->u_cellvars) == 0);
2770 ADDOP_LOAD_CONST(c, Py_None);
2771 }
2772 ADDOP_IN_SCOPE(c, RETURN_VALUE);
2773 /* create the code object */
2774 co = assemble(c, 1);
2775 }
2776 /* leave the new scope */
2777 compiler_exit_scope(c);
2778 if (co == NULL)
2779 return 0;
2780
2781 /* 2. load the 'build_class' function */
2782 ADDOP(c, PUSH_NULL);
2783 ADDOP(c, LOAD_BUILD_CLASS);
2784
2785 /* 3. load a function (or closure) made from the code object */
2786 if (!compiler_make_closure(c, co, 0, NULL)) {
2787 Py_DECREF(co);
2788 return 0;
2789 }
2790 Py_DECREF(co);
2791
2792 /* 4. load class name */
2793 ADDOP_LOAD_CONST(c, s->v.ClassDef.name);
2794
2795 /* 5. generate the rest of the code for the call */
2796 if (!compiler_call_helper(c, 2, s->v.ClassDef.bases, s->v.ClassDef.keywords))
2797 return 0;
2798 /* 6. apply decorators */
2799 if (!compiler_apply_decorators(c, decos))
2800 return 0;
2801
2802 /* 7. store into <name> */
2803 if (!compiler_nameop(c, s->v.ClassDef.name, Store))
2804 return 0;
2805 return 1;
2806 }
2807
2808 /* Return 0 if the expression is a constant value except named singletons.
2809 Return 1 otherwise. */
2810 static int
check_is_arg(expr_ty e)2811 check_is_arg(expr_ty e)
2812 {
2813 if (e->kind != Constant_kind) {
2814 return 1;
2815 }
2816 PyObject *value = e->v.Constant.value;
2817 return (value == Py_None
2818 || value == Py_False
2819 || value == Py_True
2820 || value == Py_Ellipsis);
2821 }
2822
2823 /* Check operands of identity chacks ("is" and "is not").
2824 Emit a warning if any operand is a constant except named singletons.
2825 Return 0 on error.
2826 */
2827 static int
check_compare(struct compiler * c,expr_ty e)2828 check_compare(struct compiler *c, expr_ty e)
2829 {
2830 Py_ssize_t i, n;
2831 int left = check_is_arg(e->v.Compare.left);
2832 n = asdl_seq_LEN(e->v.Compare.ops);
2833 for (i = 0; i < n; i++) {
2834 cmpop_ty op = (cmpop_ty)asdl_seq_GET(e->v.Compare.ops, i);
2835 int right = check_is_arg((expr_ty)asdl_seq_GET(e->v.Compare.comparators, i));
2836 if (op == Is || op == IsNot) {
2837 if (!right || !left) {
2838 const char *msg = (op == Is)
2839 ? "\"is\" with a literal. Did you mean \"==\"?"
2840 : "\"is not\" with a literal. Did you mean \"!=\"?";
2841 return compiler_warn(c, msg);
2842 }
2843 }
2844 left = right;
2845 }
2846 return 1;
2847 }
2848
compiler_addcompare(struct compiler * c,cmpop_ty op)2849 static int compiler_addcompare(struct compiler *c, cmpop_ty op)
2850 {
2851 int cmp;
2852 switch (op) {
2853 case Eq:
2854 cmp = Py_EQ;
2855 break;
2856 case NotEq:
2857 cmp = Py_NE;
2858 break;
2859 case Lt:
2860 cmp = Py_LT;
2861 break;
2862 case LtE:
2863 cmp = Py_LE;
2864 break;
2865 case Gt:
2866 cmp = Py_GT;
2867 break;
2868 case GtE:
2869 cmp = Py_GE;
2870 break;
2871 case Is:
2872 ADDOP_I(c, IS_OP, 0);
2873 return 1;
2874 case IsNot:
2875 ADDOP_I(c, IS_OP, 1);
2876 return 1;
2877 case In:
2878 ADDOP_I(c, CONTAINS_OP, 0);
2879 return 1;
2880 case NotIn:
2881 ADDOP_I(c, CONTAINS_OP, 1);
2882 return 1;
2883 default:
2884 Py_UNREACHABLE();
2885 }
2886 ADDOP_I(c, COMPARE_OP, cmp);
2887 return 1;
2888 }
2889
2890
2891
2892 static int
compiler_jump_if(struct compiler * c,expr_ty e,basicblock * next,int cond)2893 compiler_jump_if(struct compiler *c, expr_ty e, basicblock *next, int cond)
2894 {
2895 switch (e->kind) {
2896 case UnaryOp_kind:
2897 if (e->v.UnaryOp.op == Not)
2898 return compiler_jump_if(c, e->v.UnaryOp.operand, next, !cond);
2899 /* fallback to general implementation */
2900 break;
2901 case BoolOp_kind: {
2902 asdl_expr_seq *s = e->v.BoolOp.values;
2903 Py_ssize_t i, n = asdl_seq_LEN(s) - 1;
2904 assert(n >= 0);
2905 int cond2 = e->v.BoolOp.op == Or;
2906 basicblock *next2 = next;
2907 if (!cond2 != !cond) {
2908 next2 = compiler_new_block(c);
2909 if (next2 == NULL)
2910 return 0;
2911 }
2912 for (i = 0; i < n; ++i) {
2913 if (!compiler_jump_if(c, (expr_ty)asdl_seq_GET(s, i), next2, cond2))
2914 return 0;
2915 }
2916 if (!compiler_jump_if(c, (expr_ty)asdl_seq_GET(s, n), next, cond))
2917 return 0;
2918 if (next2 != next)
2919 compiler_use_next_block(c, next2);
2920 return 1;
2921 }
2922 case IfExp_kind: {
2923 basicblock *end, *next2;
2924 end = compiler_new_block(c);
2925 if (end == NULL)
2926 return 0;
2927 next2 = compiler_new_block(c);
2928 if (next2 == NULL)
2929 return 0;
2930 if (!compiler_jump_if(c, e->v.IfExp.test, next2, 0))
2931 return 0;
2932 if (!compiler_jump_if(c, e->v.IfExp.body, next, cond))
2933 return 0;
2934 ADDOP_JUMP_NOLINE(c, JUMP, end);
2935 compiler_use_next_block(c, next2);
2936 if (!compiler_jump_if(c, e->v.IfExp.orelse, next, cond))
2937 return 0;
2938 compiler_use_next_block(c, end);
2939 return 1;
2940 }
2941 case Compare_kind: {
2942 SET_LOC(c, e);
2943 Py_ssize_t i, n = asdl_seq_LEN(e->v.Compare.ops) - 1;
2944 if (n > 0) {
2945 if (!check_compare(c, e)) {
2946 return 0;
2947 }
2948 basicblock *cleanup = compiler_new_block(c);
2949 if (cleanup == NULL)
2950 return 0;
2951 VISIT(c, expr, e->v.Compare.left);
2952 for (i = 0; i < n; i++) {
2953 VISIT(c, expr,
2954 (expr_ty)asdl_seq_GET(e->v.Compare.comparators, i));
2955 ADDOP_I(c, SWAP, 2);
2956 ADDOP_I(c, COPY, 2);
2957 ADDOP_COMPARE(c, asdl_seq_GET(e->v.Compare.ops, i));
2958 ADDOP_JUMP(c, POP_JUMP_IF_FALSE, cleanup);
2959 }
2960 VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Compare.comparators, n));
2961 ADDOP_COMPARE(c, asdl_seq_GET(e->v.Compare.ops, n));
2962 ADDOP_JUMP(c, cond ? POP_JUMP_IF_TRUE : POP_JUMP_IF_FALSE, next);
2963 basicblock *end = compiler_new_block(c);
2964 if (end == NULL)
2965 return 0;
2966 ADDOP_JUMP_NOLINE(c, JUMP, end);
2967 compiler_use_next_block(c, cleanup);
2968 ADDOP(c, POP_TOP);
2969 if (!cond) {
2970 ADDOP_JUMP_NOLINE(c, JUMP, next);
2971 }
2972 compiler_use_next_block(c, end);
2973 return 1;
2974 }
2975 /* fallback to general implementation */
2976 break;
2977 }
2978 default:
2979 /* fallback to general implementation */
2980 break;
2981 }
2982
2983 /* general implementation */
2984 VISIT(c, expr, e);
2985 ADDOP_JUMP(c, cond ? POP_JUMP_IF_TRUE : POP_JUMP_IF_FALSE, next);
2986 return 1;
2987 }
2988
2989 static int
compiler_ifexp(struct compiler * c,expr_ty e)2990 compiler_ifexp(struct compiler *c, expr_ty e)
2991 {
2992 basicblock *end, *next;
2993
2994 assert(e->kind == IfExp_kind);
2995 end = compiler_new_block(c);
2996 if (end == NULL)
2997 return 0;
2998 next = compiler_new_block(c);
2999 if (next == NULL)
3000 return 0;
3001 if (!compiler_jump_if(c, e->v.IfExp.test, next, 0))
3002 return 0;
3003 VISIT(c, expr, e->v.IfExp.body);
3004 ADDOP_JUMP_NOLINE(c, JUMP, end);
3005 compiler_use_next_block(c, next);
3006 VISIT(c, expr, e->v.IfExp.orelse);
3007 compiler_use_next_block(c, end);
3008 return 1;
3009 }
3010
3011 static int
compiler_lambda(struct compiler * c,expr_ty e)3012 compiler_lambda(struct compiler *c, expr_ty e)
3013 {
3014 PyCodeObject *co;
3015 PyObject *qualname;
3016 Py_ssize_t funcflags;
3017 arguments_ty args = e->v.Lambda.args;
3018 assert(e->kind == Lambda_kind);
3019
3020 if (!compiler_check_debug_args(c, args))
3021 return 0;
3022
3023 funcflags = compiler_default_arguments(c, args);
3024 if (funcflags == -1) {
3025 return 0;
3026 }
3027
3028 _Py_DECLARE_STR(anon_lambda, "<lambda>");
3029 if (!compiler_enter_scope(c, &_Py_STR(anon_lambda), COMPILER_SCOPE_LAMBDA,
3030 (void *)e, e->lineno)) {
3031 return 0;
3032 }
3033 /* Make None the first constant, so the lambda can't have a
3034 docstring. */
3035 if (compiler_add_const(c, Py_None) < 0)
3036 return 0;
3037
3038 c->u->u_argcount = asdl_seq_LEN(args->args);
3039 c->u->u_posonlyargcount = asdl_seq_LEN(args->posonlyargs);
3040 c->u->u_kwonlyargcount = asdl_seq_LEN(args->kwonlyargs);
3041 VISIT_IN_SCOPE(c, expr, e->v.Lambda.body);
3042 if (c->u->u_ste->ste_generator) {
3043 co = assemble(c, 0);
3044 }
3045 else {
3046 ADDOP_IN_SCOPE(c, RETURN_VALUE);
3047 co = assemble(c, 1);
3048 }
3049 qualname = c->u->u_qualname;
3050 Py_INCREF(qualname);
3051 compiler_exit_scope(c);
3052 if (co == NULL) {
3053 Py_DECREF(qualname);
3054 return 0;
3055 }
3056
3057 if (!compiler_make_closure(c, co, funcflags, qualname)) {
3058 Py_DECREF(qualname);
3059 Py_DECREF(co);
3060 return 0;
3061 }
3062 Py_DECREF(qualname);
3063 Py_DECREF(co);
3064
3065 return 1;
3066 }
3067
3068 static int
compiler_if(struct compiler * c,stmt_ty s)3069 compiler_if(struct compiler *c, stmt_ty s)
3070 {
3071 basicblock *end, *next;
3072 assert(s->kind == If_kind);
3073 end = compiler_new_block(c);
3074 if (end == NULL) {
3075 return 0;
3076 }
3077 if (asdl_seq_LEN(s->v.If.orelse)) {
3078 next = compiler_new_block(c);
3079 if (next == NULL) {
3080 return 0;
3081 }
3082 }
3083 else {
3084 next = end;
3085 }
3086 if (!compiler_jump_if(c, s->v.If.test, next, 0)) {
3087 return 0;
3088 }
3089 VISIT_SEQ(c, stmt, s->v.If.body);
3090 if (asdl_seq_LEN(s->v.If.orelse)) {
3091 ADDOP_JUMP_NOLINE(c, JUMP, end);
3092 compiler_use_next_block(c, next);
3093 VISIT_SEQ(c, stmt, s->v.If.orelse);
3094 }
3095 compiler_use_next_block(c, end);
3096 return 1;
3097 }
3098
3099 static int
compiler_for(struct compiler * c,stmt_ty s)3100 compiler_for(struct compiler *c, stmt_ty s)
3101 {
3102 basicblock *start, *body, *cleanup, *end;
3103
3104 start = compiler_new_block(c);
3105 body = compiler_new_block(c);
3106 cleanup = compiler_new_block(c);
3107 end = compiler_new_block(c);
3108 if (start == NULL || body == NULL || end == NULL || cleanup == NULL) {
3109 return 0;
3110 }
3111 if (!compiler_push_fblock(c, FOR_LOOP, start, end, NULL)) {
3112 return 0;
3113 }
3114 VISIT(c, expr, s->v.For.iter);
3115 ADDOP(c, GET_ITER);
3116 compiler_use_next_block(c, start);
3117 ADDOP_JUMP(c, FOR_ITER, cleanup);
3118 compiler_use_next_block(c, body);
3119 VISIT(c, expr, s->v.For.target);
3120 VISIT_SEQ(c, stmt, s->v.For.body);
3121 /* Mark jump as artificial */
3122 UNSET_LOC(c);
3123 ADDOP_JUMP(c, JUMP, start);
3124 compiler_use_next_block(c, cleanup);
3125
3126 compiler_pop_fblock(c, FOR_LOOP, start);
3127
3128 VISIT_SEQ(c, stmt, s->v.For.orelse);
3129 compiler_use_next_block(c, end);
3130 return 1;
3131 }
3132
3133
3134 static int
compiler_async_for(struct compiler * c,stmt_ty s)3135 compiler_async_for(struct compiler *c, stmt_ty s)
3136 {
3137 basicblock *start, *except, *end;
3138 if (IS_TOP_LEVEL_AWAIT(c)){
3139 c->u->u_ste->ste_coroutine = 1;
3140 } else if (c->u->u_scope_type != COMPILER_SCOPE_ASYNC_FUNCTION) {
3141 return compiler_error(c, "'async for' outside async function");
3142 }
3143
3144 start = compiler_new_block(c);
3145 except = compiler_new_block(c);
3146 end = compiler_new_block(c);
3147
3148 if (start == NULL || except == NULL || end == NULL) {
3149 return 0;
3150 }
3151 VISIT(c, expr, s->v.AsyncFor.iter);
3152 ADDOP(c, GET_AITER);
3153
3154 compiler_use_next_block(c, start);
3155 if (!compiler_push_fblock(c, FOR_LOOP, start, end, NULL)) {
3156 return 0;
3157 }
3158 /* SETUP_FINALLY to guard the __anext__ call */
3159 ADDOP_JUMP(c, SETUP_FINALLY, except);
3160 ADDOP(c, GET_ANEXT);
3161 ADDOP_LOAD_CONST(c, Py_None);
3162 ADD_YIELD_FROM(c, 1);
3163 ADDOP(c, POP_BLOCK); /* for SETUP_FINALLY */
3164
3165 /* Success block for __anext__ */
3166 VISIT(c, expr, s->v.AsyncFor.target);
3167 VISIT_SEQ(c, stmt, s->v.AsyncFor.body);
3168 /* Mark jump as artificial */
3169 UNSET_LOC(c);
3170 ADDOP_JUMP(c, JUMP, start);
3171
3172 compiler_pop_fblock(c, FOR_LOOP, start);
3173
3174 /* Except block for __anext__ */
3175 compiler_use_next_block(c, except);
3176
3177 /* Use same line number as the iterator,
3178 * as the END_ASYNC_FOR succeeds the `for`, not the body. */
3179 SET_LOC(c, s->v.AsyncFor.iter);
3180 ADDOP(c, END_ASYNC_FOR);
3181
3182 /* `else` block */
3183 VISIT_SEQ(c, stmt, s->v.For.orelse);
3184
3185 compiler_use_next_block(c, end);
3186
3187 return 1;
3188 }
3189
3190 static int
compiler_while(struct compiler * c,stmt_ty s)3191 compiler_while(struct compiler *c, stmt_ty s)
3192 {
3193 basicblock *loop, *body, *end, *anchor = NULL;
3194 loop = compiler_new_block(c);
3195 body = compiler_new_block(c);
3196 anchor = compiler_new_block(c);
3197 end = compiler_new_block(c);
3198 if (loop == NULL || body == NULL || anchor == NULL || end == NULL) {
3199 return 0;
3200 }
3201 compiler_use_next_block(c, loop);
3202 if (!compiler_push_fblock(c, WHILE_LOOP, loop, end, NULL)) {
3203 return 0;
3204 }
3205 if (!compiler_jump_if(c, s->v.While.test, anchor, 0)) {
3206 return 0;
3207 }
3208
3209 compiler_use_next_block(c, body);
3210 VISIT_SEQ(c, stmt, s->v.While.body);
3211 SET_LOC(c, s);
3212 if (!compiler_jump_if(c, s->v.While.test, body, 1)) {
3213 return 0;
3214 }
3215
3216 compiler_pop_fblock(c, WHILE_LOOP, loop);
3217
3218 compiler_use_next_block(c, anchor);
3219 if (s->v.While.orelse) {
3220 VISIT_SEQ(c, stmt, s->v.While.orelse);
3221 }
3222 compiler_use_next_block(c, end);
3223
3224 return 1;
3225 }
3226
3227 static int
compiler_return(struct compiler * c,stmt_ty s)3228 compiler_return(struct compiler *c, stmt_ty s)
3229 {
3230 int preserve_tos = ((s->v.Return.value != NULL) &&
3231 (s->v.Return.value->kind != Constant_kind));
3232 if (c->u->u_ste->ste_type != FunctionBlock)
3233 return compiler_error(c, "'return' outside function");
3234 if (s->v.Return.value != NULL &&
3235 c->u->u_ste->ste_coroutine && c->u->u_ste->ste_generator)
3236 {
3237 return compiler_error(
3238 c, "'return' with value in async generator");
3239 }
3240 if (preserve_tos) {
3241 VISIT(c, expr, s->v.Return.value);
3242 } else {
3243 /* Emit instruction with line number for return value */
3244 if (s->v.Return.value != NULL) {
3245 SET_LOC(c, s->v.Return.value);
3246 ADDOP(c, NOP);
3247 }
3248 }
3249 if (s->v.Return.value == NULL || s->v.Return.value->lineno != s->lineno) {
3250 SET_LOC(c, s);
3251 ADDOP(c, NOP);
3252 }
3253
3254 if (!compiler_unwind_fblock_stack(c, preserve_tos, NULL))
3255 return 0;
3256 if (s->v.Return.value == NULL) {
3257 ADDOP_LOAD_CONST(c, Py_None);
3258 }
3259 else if (!preserve_tos) {
3260 ADDOP_LOAD_CONST(c, s->v.Return.value->v.Constant.value);
3261 }
3262 ADDOP(c, RETURN_VALUE);
3263
3264 return 1;
3265 }
3266
3267 static int
compiler_break(struct compiler * c)3268 compiler_break(struct compiler *c)
3269 {
3270 struct fblockinfo *loop = NULL;
3271 int u_lineno = c->u->u_lineno;
3272 int u_col_offset = c->u->u_col_offset;
3273 int u_end_lineno = c->u->u_end_lineno;
3274 int u_end_col_offset = c->u->u_end_col_offset;
3275 /* Emit instruction with line number */
3276 ADDOP(c, NOP);
3277 if (!compiler_unwind_fblock_stack(c, 0, &loop)) {
3278 return 0;
3279 }
3280 if (loop == NULL) {
3281 c->u->u_lineno = u_lineno;
3282 c->u->u_col_offset = u_col_offset;
3283 c->u->u_end_lineno = u_end_lineno;
3284 c->u->u_end_col_offset = u_end_col_offset;
3285 return compiler_error(c, "'break' outside loop");
3286 }
3287 if (!compiler_unwind_fblock(c, loop, 0)) {
3288 return 0;
3289 }
3290 ADDOP_JUMP(c, JUMP, loop->fb_exit);
3291 return 1;
3292 }
3293
3294 static int
compiler_continue(struct compiler * c)3295 compiler_continue(struct compiler *c)
3296 {
3297 struct fblockinfo *loop = NULL;
3298 int u_lineno = c->u->u_lineno;
3299 int u_col_offset = c->u->u_col_offset;
3300 int u_end_lineno = c->u->u_end_lineno;
3301 int u_end_col_offset = c->u->u_end_col_offset;
3302 /* Emit instruction with line number */
3303 ADDOP(c, NOP);
3304 if (!compiler_unwind_fblock_stack(c, 0, &loop)) {
3305 return 0;
3306 }
3307 if (loop == NULL) {
3308 c->u->u_lineno = u_lineno;
3309 c->u->u_col_offset = u_col_offset;
3310 c->u->u_end_lineno = u_end_lineno;
3311 c->u->u_end_col_offset = u_end_col_offset;
3312 return compiler_error(c, "'continue' not properly in loop");
3313 }
3314 ADDOP_JUMP(c, JUMP, loop->fb_block);
3315 return 1;
3316 }
3317
3318
3319 /* Code generated for "try: <body> finally: <finalbody>" is as follows:
3320
3321 SETUP_FINALLY L
3322 <code for body>
3323 POP_BLOCK
3324 <code for finalbody>
3325 JUMP E
3326 L:
3327 <code for finalbody>
3328 E:
3329
3330 The special instructions use the block stack. Each block
3331 stack entry contains the instruction that created it (here
3332 SETUP_FINALLY), the level of the value stack at the time the
3333 block stack entry was created, and a label (here L).
3334
3335 SETUP_FINALLY:
3336 Pushes the current value stack level and the label
3337 onto the block stack.
3338 POP_BLOCK:
3339 Pops en entry from the block stack.
3340
3341 The block stack is unwound when an exception is raised:
3342 when a SETUP_FINALLY entry is found, the raised and the caught
3343 exceptions are pushed onto the value stack (and the exception
3344 condition is cleared), and the interpreter jumps to the label
3345 gotten from the block stack.
3346 */
3347
3348 static int
compiler_try_finally(struct compiler * c,stmt_ty s)3349 compiler_try_finally(struct compiler *c, stmt_ty s)
3350 {
3351 basicblock *body, *end, *exit, *cleanup;
3352
3353 body = compiler_new_block(c);
3354 end = compiler_new_block(c);
3355 exit = compiler_new_block(c);
3356 cleanup = compiler_new_block(c);
3357 if (body == NULL || end == NULL || exit == NULL || cleanup == NULL) {
3358 return 0;
3359 }
3360 /* `try` block */
3361 ADDOP_JUMP(c, SETUP_FINALLY, end);
3362 compiler_use_next_block(c, body);
3363 if (!compiler_push_fblock(c, FINALLY_TRY, body, end, s->v.Try.finalbody))
3364 return 0;
3365 if (s->v.Try.handlers && asdl_seq_LEN(s->v.Try.handlers)) {
3366 if (!compiler_try_except(c, s))
3367 return 0;
3368 }
3369 else {
3370 VISIT_SEQ(c, stmt, s->v.Try.body);
3371 }
3372 ADDOP_NOLINE(c, POP_BLOCK);
3373 compiler_pop_fblock(c, FINALLY_TRY, body);
3374 VISIT_SEQ(c, stmt, s->v.Try.finalbody);
3375 ADDOP_JUMP_NOLINE(c, JUMP, exit);
3376 /* `finally` block */
3377 compiler_use_next_block(c, end);
3378
3379 UNSET_LOC(c);
3380 ADDOP_JUMP(c, SETUP_CLEANUP, cleanup);
3381 ADDOP(c, PUSH_EXC_INFO);
3382 if (!compiler_push_fblock(c, FINALLY_END, end, NULL, NULL))
3383 return 0;
3384 VISIT_SEQ(c, stmt, s->v.Try.finalbody);
3385 compiler_pop_fblock(c, FINALLY_END, end);
3386 ADDOP_I(c, RERAISE, 0);
3387 compiler_use_next_block(c, cleanup);
3388 POP_EXCEPT_AND_RERAISE(c);
3389 compiler_use_next_block(c, exit);
3390 return 1;
3391 }
3392
3393 static int
compiler_try_star_finally(struct compiler * c,stmt_ty s)3394 compiler_try_star_finally(struct compiler *c, stmt_ty s)
3395 {
3396 basicblock *body = compiler_new_block(c);
3397 if (body == NULL) {
3398 return 0;
3399 }
3400 basicblock *end = compiler_new_block(c);
3401 if (!end) {
3402 return 0;
3403 }
3404 basicblock *exit = compiler_new_block(c);
3405 if (!exit) {
3406 return 0;
3407 }
3408 basicblock *cleanup = compiler_new_block(c);
3409 if (!cleanup) {
3410 return 0;
3411 }
3412 /* `try` block */
3413 ADDOP_JUMP(c, SETUP_FINALLY, end);
3414 compiler_use_next_block(c, body);
3415 if (!compiler_push_fblock(c, FINALLY_TRY, body, end, s->v.TryStar.finalbody)) {
3416 return 0;
3417 }
3418 if (s->v.TryStar.handlers && asdl_seq_LEN(s->v.TryStar.handlers)) {
3419 if (!compiler_try_star_except(c, s)) {
3420 return 0;
3421 }
3422 }
3423 else {
3424 VISIT_SEQ(c, stmt, s->v.TryStar.body);
3425 }
3426 ADDOP_NOLINE(c, POP_BLOCK);
3427 compiler_pop_fblock(c, FINALLY_TRY, body);
3428 VISIT_SEQ(c, stmt, s->v.TryStar.finalbody);
3429 ADDOP_JUMP_NOLINE(c, JUMP, exit);
3430 /* `finally` block */
3431 compiler_use_next_block(c, end);
3432
3433 UNSET_LOC(c);
3434 ADDOP_JUMP(c, SETUP_CLEANUP, cleanup);
3435 ADDOP(c, PUSH_EXC_INFO);
3436 if (!compiler_push_fblock(c, FINALLY_END, end, NULL, NULL)) {
3437 return 0;
3438 }
3439 VISIT_SEQ(c, stmt, s->v.TryStar.finalbody);
3440 compiler_pop_fblock(c, FINALLY_END, end);
3441 ADDOP_I(c, RERAISE, 0);
3442 compiler_use_next_block(c, cleanup);
3443 POP_EXCEPT_AND_RERAISE(c);
3444 compiler_use_next_block(c, exit);
3445 return 1;
3446 }
3447
3448
3449 /*
3450 Code generated for "try: S except E1 as V1: S1 except E2 as V2: S2 ...":
3451 (The contents of the value stack is shown in [], with the top
3452 at the right; 'tb' is trace-back info, 'val' the exception's
3453 associated value, and 'exc' the exception.)
3454
3455 Value stack Label Instruction Argument
3456 [] SETUP_FINALLY L1
3457 [] <code for S>
3458 [] POP_BLOCK
3459 [] JUMP L0
3460
3461 [exc] L1: <evaluate E1> )
3462 [exc, E1] CHECK_EXC_MATCH )
3463 [exc, bool] POP_JUMP_IF_FALSE L2 ) only if E1
3464 [exc] <assign to V1> (or POP if no V1)
3465 [] <code for S1>
3466 JUMP L0
3467
3468 [exc] L2: <evaluate E2>
3469 .............................etc.......................
3470
3471 [exc] Ln+1: RERAISE # re-raise exception
3472
3473 [] L0: <next statement>
3474
3475 Of course, parts are not generated if Vi or Ei is not present.
3476 */
3477 static int
compiler_try_except(struct compiler * c,stmt_ty s)3478 compiler_try_except(struct compiler *c, stmt_ty s)
3479 {
3480 basicblock *body, *except, *end, *cleanup;
3481 Py_ssize_t i, n;
3482
3483 body = compiler_new_block(c);
3484 except = compiler_new_block(c);
3485 end = compiler_new_block(c);
3486 cleanup = compiler_new_block(c);
3487 if (body == NULL || except == NULL || end == NULL || cleanup == NULL)
3488 return 0;
3489 ADDOP_JUMP(c, SETUP_FINALLY, except);
3490 compiler_use_next_block(c, body);
3491 if (!compiler_push_fblock(c, TRY_EXCEPT, body, NULL, NULL))
3492 return 0;
3493 VISIT_SEQ(c, stmt, s->v.Try.body);
3494 compiler_pop_fblock(c, TRY_EXCEPT, body);
3495 ADDOP_NOLINE(c, POP_BLOCK);
3496 if (s->v.Try.orelse && asdl_seq_LEN(s->v.Try.orelse)) {
3497 VISIT_SEQ(c, stmt, s->v.Try.orelse);
3498 }
3499 ADDOP_JUMP_NOLINE(c, JUMP, end);
3500 n = asdl_seq_LEN(s->v.Try.handlers);
3501 compiler_use_next_block(c, except);
3502
3503 UNSET_LOC(c);
3504 ADDOP_JUMP(c, SETUP_CLEANUP, cleanup);
3505 ADDOP(c, PUSH_EXC_INFO);
3506 /* Runtime will push a block here, so we need to account for that */
3507 if (!compiler_push_fblock(c, EXCEPTION_HANDLER, NULL, NULL, NULL))
3508 return 0;
3509 for (i = 0; i < n; i++) {
3510 excepthandler_ty handler = (excepthandler_ty)asdl_seq_GET(
3511 s->v.Try.handlers, i);
3512 SET_LOC(c, handler);
3513 if (!handler->v.ExceptHandler.type && i < n-1) {
3514 return compiler_error(c, "default 'except:' must be last");
3515 }
3516 except = compiler_new_block(c);
3517 if (except == NULL)
3518 return 0;
3519 if (handler->v.ExceptHandler.type) {
3520 VISIT(c, expr, handler->v.ExceptHandler.type);
3521 ADDOP(c, CHECK_EXC_MATCH);
3522 ADDOP_JUMP(c, POP_JUMP_IF_FALSE, except);
3523 }
3524 if (handler->v.ExceptHandler.name) {
3525 basicblock *cleanup_end, *cleanup_body;
3526
3527 cleanup_end = compiler_new_block(c);
3528 cleanup_body = compiler_new_block(c);
3529 if (cleanup_end == NULL || cleanup_body == NULL) {
3530 return 0;
3531 }
3532
3533 compiler_nameop(c, handler->v.ExceptHandler.name, Store);
3534
3535 /*
3536 try:
3537 # body
3538 except type as name:
3539 try:
3540 # body
3541 finally:
3542 name = None # in case body contains "del name"
3543 del name
3544 */
3545
3546 /* second try: */
3547 ADDOP_JUMP(c, SETUP_CLEANUP, cleanup_end);
3548 compiler_use_next_block(c, cleanup_body);
3549 if (!compiler_push_fblock(c, HANDLER_CLEANUP, cleanup_body, NULL, handler->v.ExceptHandler.name))
3550 return 0;
3551
3552 /* second # body */
3553 VISIT_SEQ(c, stmt, handler->v.ExceptHandler.body);
3554 compiler_pop_fblock(c, HANDLER_CLEANUP, cleanup_body);
3555 /* name = None; del name; # Mark as artificial */
3556 UNSET_LOC(c);
3557 ADDOP(c, POP_BLOCK);
3558 ADDOP(c, POP_BLOCK);
3559 ADDOP(c, POP_EXCEPT);
3560 ADDOP_LOAD_CONST(c, Py_None);
3561 compiler_nameop(c, handler->v.ExceptHandler.name, Store);
3562 compiler_nameop(c, handler->v.ExceptHandler.name, Del);
3563 ADDOP_JUMP(c, JUMP, end);
3564
3565 /* except: */
3566 compiler_use_next_block(c, cleanup_end);
3567
3568 /* name = None; del name; # Mark as artificial */
3569 UNSET_LOC(c);
3570
3571 ADDOP_LOAD_CONST(c, Py_None);
3572 compiler_nameop(c, handler->v.ExceptHandler.name, Store);
3573 compiler_nameop(c, handler->v.ExceptHandler.name, Del);
3574
3575 ADDOP_I(c, RERAISE, 1);
3576 }
3577 else {
3578 basicblock *cleanup_body;
3579
3580 cleanup_body = compiler_new_block(c);
3581 if (!cleanup_body)
3582 return 0;
3583
3584 ADDOP(c, POP_TOP); /* exc_value */
3585 compiler_use_next_block(c, cleanup_body);
3586 if (!compiler_push_fblock(c, HANDLER_CLEANUP, cleanup_body, NULL, NULL))
3587 return 0;
3588 VISIT_SEQ(c, stmt, handler->v.ExceptHandler.body);
3589 compiler_pop_fblock(c, HANDLER_CLEANUP, cleanup_body);
3590 UNSET_LOC(c);
3591 ADDOP(c, POP_BLOCK);
3592 ADDOP(c, POP_EXCEPT);
3593 ADDOP_JUMP(c, JUMP, end);
3594 }
3595 compiler_use_next_block(c, except);
3596 }
3597 /* Mark as artificial */
3598 UNSET_LOC(c);
3599 compiler_pop_fblock(c, EXCEPTION_HANDLER, NULL);
3600 ADDOP_I(c, RERAISE, 0);
3601 compiler_use_next_block(c, cleanup);
3602 POP_EXCEPT_AND_RERAISE(c);
3603 compiler_use_next_block(c, end);
3604 return 1;
3605 }
3606
3607 /*
3608 Code generated for "try: S except* E1 as V1: S1 except* E2 as V2: S2 ...":
3609 (The contents of the value stack is shown in [], with the top
3610 at the right; 'tb' is trace-back info, 'val' the exception instance,
3611 and 'typ' the exception's type.)
3612
3613 Value stack Label Instruction Argument
3614 [] SETUP_FINALLY L1
3615 [] <code for S>
3616 [] POP_BLOCK
3617 [] JUMP L0
3618
3619 [exc] L1: COPY 1 ) save copy of the original exception
3620 [orig, exc] BUILD_LIST ) list for raised/reraised excs ("result")
3621 [orig, exc, res] SWAP 2
3622
3623 [orig, res, exc] <evaluate E1>
3624 [orig, res, exc, E1] CHECK_EG_MATCH
3625 [orig, res, rest/exc, match?] COPY 1
3626 [orig, res, rest/exc, match?, match?] POP_JUMP_IF_NONE C1
3627
3628 [orig, res, rest, match] <assign to V1> (or POP if no V1)
3629
3630 [orig, res, rest] SETUP_FINALLY R1
3631 [orig, res, rest] <code for S1>
3632 [orig, res, rest] JUMP L2
3633
3634 [orig, res, rest, i, v] R1: LIST_APPEND 3 ) exc raised in except* body - add to res
3635 [orig, res, rest, i] POP
3636 [orig, res, rest] JUMP LE2
3637
3638 [orig, res, rest] L2: NOP ) for lineno
3639 [orig, res, rest] JUMP LE2
3640
3641 [orig, res, rest/exc, None] C1: POP
3642
3643 [orig, res, rest] LE2: <evaluate E2>
3644 .............................etc.......................
3645
3646 [orig, res, rest] Ln+1: LIST_APPEND 1 ) add unhandled exc to res (could be None)
3647
3648 [orig, res] PREP_RERAISE_STAR
3649 [exc] COPY 1
3650 [exc, exc] POP_JUMP_IF_NOT_NONE RER
3651 [exc] POP_TOP
3652 [] JUMP L0
3653
3654 [exc] RER: SWAP 2
3655 [exc, prev_exc_info] POP_EXCEPT
3656 [exc] RERAISE 0
3657
3658 [] L0: <next statement>
3659 */
3660 static int
compiler_try_star_except(struct compiler * c,stmt_ty s)3661 compiler_try_star_except(struct compiler *c, stmt_ty s)
3662 {
3663 basicblock *body = compiler_new_block(c);
3664 if (body == NULL) {
3665 return 0;
3666 }
3667 basicblock *except = compiler_new_block(c);
3668 if (except == NULL) {
3669 return 0;
3670 }
3671 basicblock *orelse = compiler_new_block(c);
3672 if (orelse == NULL) {
3673 return 0;
3674 }
3675 basicblock *end = compiler_new_block(c);
3676 if (end == NULL) {
3677 return 0;
3678 }
3679 basicblock *cleanup = compiler_new_block(c);
3680 if (cleanup == NULL) {
3681 return 0;
3682 }
3683 basicblock *reraise_star = compiler_new_block(c);
3684 if (reraise_star == NULL) {
3685 return 0;
3686 }
3687
3688 ADDOP_JUMP(c, SETUP_FINALLY, except);
3689 compiler_use_next_block(c, body);
3690 if (!compiler_push_fblock(c, TRY_EXCEPT, body, NULL, NULL)) {
3691 return 0;
3692 }
3693 VISIT_SEQ(c, stmt, s->v.TryStar.body);
3694 compiler_pop_fblock(c, TRY_EXCEPT, body);
3695 ADDOP_NOLINE(c, POP_BLOCK);
3696 ADDOP_JUMP_NOLINE(c, JUMP, orelse);
3697 Py_ssize_t n = asdl_seq_LEN(s->v.TryStar.handlers);
3698 compiler_use_next_block(c, except);
3699
3700 UNSET_LOC(c);
3701 ADDOP_JUMP(c, SETUP_CLEANUP, cleanup);
3702 ADDOP(c, PUSH_EXC_INFO);
3703 /* Runtime will push a block here, so we need to account for that */
3704 if (!compiler_push_fblock(c, EXCEPTION_GROUP_HANDLER,
3705 NULL, NULL, "except handler")) {
3706 return 0;
3707 }
3708 for (Py_ssize_t i = 0; i < n; i++) {
3709 excepthandler_ty handler = (excepthandler_ty)asdl_seq_GET(
3710 s->v.TryStar.handlers, i);
3711 SET_LOC(c, handler);
3712 except = compiler_new_block(c);
3713 if (except == NULL) {
3714 return 0;
3715 }
3716 basicblock *except_with_error = compiler_new_block(c);
3717 if (except_with_error == NULL) {
3718 return 0;
3719 }
3720 basicblock *no_match = compiler_new_block(c);
3721 if (no_match == NULL) {
3722 return 0;
3723 }
3724 if (i == 0) {
3725 /* Push the original EG into the stack */
3726 /*
3727 [exc] COPY 1
3728 [orig, exc]
3729 */
3730 ADDOP_I(c, COPY, 1);
3731
3732 /* create empty list for exceptions raised/reraise in the except* blocks */
3733 /*
3734 [orig, exc] BUILD_LIST
3735 [orig, exc, []] SWAP 2
3736 [orig, [], exc]
3737 */
3738 ADDOP_I(c, BUILD_LIST, 0);
3739 ADDOP_I(c, SWAP, 2);
3740 }
3741 if (handler->v.ExceptHandler.type) {
3742 VISIT(c, expr, handler->v.ExceptHandler.type);
3743 ADDOP(c, CHECK_EG_MATCH);
3744 ADDOP_I(c, COPY, 1);
3745 ADDOP_JUMP(c, POP_JUMP_IF_NONE, no_match);
3746 }
3747
3748 basicblock *cleanup_end = compiler_new_block(c);
3749 if (cleanup_end == NULL) {
3750 return 0;
3751 }
3752 basicblock *cleanup_body = compiler_new_block(c);
3753 if (cleanup_body == NULL) {
3754 return 0;
3755 }
3756
3757 if (handler->v.ExceptHandler.name) {
3758 compiler_nameop(c, handler->v.ExceptHandler.name, Store);
3759 }
3760 else {
3761 ADDOP(c, POP_TOP); // match
3762 }
3763
3764 /*
3765 try:
3766 # body
3767 except type as name:
3768 try:
3769 # body
3770 finally:
3771 name = None # in case body contains "del name"
3772 del name
3773 */
3774 /* second try: */
3775 ADDOP_JUMP(c, SETUP_CLEANUP, cleanup_end);
3776 compiler_use_next_block(c, cleanup_body);
3777 if (!compiler_push_fblock(c, HANDLER_CLEANUP, cleanup_body, NULL, handler->v.ExceptHandler.name))
3778 return 0;
3779
3780 /* second # body */
3781 VISIT_SEQ(c, stmt, handler->v.ExceptHandler.body);
3782 compiler_pop_fblock(c, HANDLER_CLEANUP, cleanup_body);
3783 /* name = None; del name; # Mark as artificial */
3784 UNSET_LOC(c);
3785 ADDOP(c, POP_BLOCK);
3786 if (handler->v.ExceptHandler.name) {
3787 ADDOP_LOAD_CONST(c, Py_None);
3788 compiler_nameop(c, handler->v.ExceptHandler.name, Store);
3789 compiler_nameop(c, handler->v.ExceptHandler.name, Del);
3790 }
3791 ADDOP_JUMP(c, JUMP, except);
3792
3793 /* except: */
3794 compiler_use_next_block(c, cleanup_end);
3795
3796 /* name = None; del name; # Mark as artificial */
3797 UNSET_LOC(c);
3798
3799 if (handler->v.ExceptHandler.name) {
3800 ADDOP_LOAD_CONST(c, Py_None);
3801 compiler_nameop(c, handler->v.ExceptHandler.name, Store);
3802 compiler_nameop(c, handler->v.ExceptHandler.name, Del);
3803 }
3804
3805 /* add exception raised to the res list */
3806 ADDOP_I(c, LIST_APPEND, 3); // exc
3807 ADDOP(c, POP_TOP); // lasti
3808
3809 ADDOP_JUMP(c, JUMP, except_with_error);
3810 compiler_use_next_block(c, except);
3811 ADDOP(c, NOP); // to hold a propagated location info
3812 ADDOP_JUMP(c, JUMP, except_with_error);
3813 compiler_use_next_block(c, no_match);
3814 ADDOP(c, POP_TOP); // match (None)
3815
3816 compiler_use_next_block(c, except_with_error);
3817
3818 if (i == n - 1) {
3819 /* Add exc to the list (if not None it's the unhandled part of the EG) */
3820 ADDOP_I(c, LIST_APPEND, 1);
3821 ADDOP_JUMP(c, JUMP, reraise_star);
3822 }
3823 }
3824 /* Mark as artificial */
3825 UNSET_LOC(c);
3826 compiler_pop_fblock(c, EXCEPTION_GROUP_HANDLER, NULL);
3827 basicblock *reraise = compiler_new_block(c);
3828 if (!reraise) {
3829 return 0;
3830 }
3831
3832 compiler_use_next_block(c, reraise_star);
3833 ADDOP(c, PREP_RERAISE_STAR);
3834 ADDOP_I(c, COPY, 1);
3835 ADDOP_JUMP(c, POP_JUMP_IF_NOT_NONE, reraise);
3836
3837 /* Nothing to reraise */
3838 ADDOP(c, POP_TOP);
3839 ADDOP(c, POP_BLOCK);
3840 ADDOP(c, POP_EXCEPT);
3841 ADDOP_JUMP(c, JUMP, end);
3842 compiler_use_next_block(c, reraise);
3843 ADDOP(c, POP_BLOCK);
3844 ADDOP_I(c, SWAP, 2);
3845 ADDOP(c, POP_EXCEPT);
3846 ADDOP_I(c, RERAISE, 0);
3847 compiler_use_next_block(c, cleanup);
3848 POP_EXCEPT_AND_RERAISE(c);
3849 compiler_use_next_block(c, orelse);
3850 VISIT_SEQ(c, stmt, s->v.TryStar.orelse);
3851 compiler_use_next_block(c, end);
3852 return 1;
3853 }
3854
3855 static int
compiler_try(struct compiler * c,stmt_ty s)3856 compiler_try(struct compiler *c, stmt_ty s) {
3857 if (s->v.Try.finalbody && asdl_seq_LEN(s->v.Try.finalbody))
3858 return compiler_try_finally(c, s);
3859 else
3860 return compiler_try_except(c, s);
3861 }
3862
3863 static int
compiler_try_star(struct compiler * c,stmt_ty s)3864 compiler_try_star(struct compiler *c, stmt_ty s)
3865 {
3866 if (s->v.TryStar.finalbody && asdl_seq_LEN(s->v.TryStar.finalbody)) {
3867 return compiler_try_star_finally(c, s);
3868 }
3869 else {
3870 return compiler_try_star_except(c, s);
3871 }
3872 }
3873
3874 static int
compiler_import_as(struct compiler * c,identifier name,identifier asname)3875 compiler_import_as(struct compiler *c, identifier name, identifier asname)
3876 {
3877 /* The IMPORT_NAME opcode was already generated. This function
3878 merely needs to bind the result to a name.
3879
3880 If there is a dot in name, we need to split it and emit a
3881 IMPORT_FROM for each name.
3882 */
3883 Py_ssize_t len = PyUnicode_GET_LENGTH(name);
3884 Py_ssize_t dot = PyUnicode_FindChar(name, '.', 0, len, 1);
3885 if (dot == -2)
3886 return 0;
3887 if (dot != -1) {
3888 /* Consume the base module name to get the first attribute */
3889 while (1) {
3890 Py_ssize_t pos = dot + 1;
3891 PyObject *attr;
3892 dot = PyUnicode_FindChar(name, '.', pos, len, 1);
3893 if (dot == -2)
3894 return 0;
3895 attr = PyUnicode_Substring(name, pos, (dot != -1) ? dot : len);
3896 if (!attr)
3897 return 0;
3898 ADDOP_N(c, IMPORT_FROM, attr, names);
3899 if (dot == -1) {
3900 break;
3901 }
3902 ADDOP_I(c, SWAP, 2);
3903 ADDOP(c, POP_TOP);
3904 }
3905 if (!compiler_nameop(c, asname, Store)) {
3906 return 0;
3907 }
3908 ADDOP(c, POP_TOP);
3909 return 1;
3910 }
3911 return compiler_nameop(c, asname, Store);
3912 }
3913
3914 static int
compiler_import(struct compiler * c,stmt_ty s)3915 compiler_import(struct compiler *c, stmt_ty s)
3916 {
3917 /* The Import node stores a module name like a.b.c as a single
3918 string. This is convenient for all cases except
3919 import a.b.c as d
3920 where we need to parse that string to extract the individual
3921 module names.
3922 XXX Perhaps change the representation to make this case simpler?
3923 */
3924 Py_ssize_t i, n = asdl_seq_LEN(s->v.Import.names);
3925
3926 PyObject *zero = _PyLong_GetZero(); // borrowed reference
3927 for (i = 0; i < n; i++) {
3928 alias_ty alias = (alias_ty)asdl_seq_GET(s->v.Import.names, i);
3929 int r;
3930
3931 ADDOP_LOAD_CONST(c, zero);
3932 ADDOP_LOAD_CONST(c, Py_None);
3933 ADDOP_NAME(c, IMPORT_NAME, alias->name, names);
3934
3935 if (alias->asname) {
3936 r = compiler_import_as(c, alias->name, alias->asname);
3937 if (!r)
3938 return r;
3939 }
3940 else {
3941 identifier tmp = alias->name;
3942 Py_ssize_t dot = PyUnicode_FindChar(
3943 alias->name, '.', 0, PyUnicode_GET_LENGTH(alias->name), 1);
3944 if (dot != -1) {
3945 tmp = PyUnicode_Substring(alias->name, 0, dot);
3946 if (tmp == NULL)
3947 return 0;
3948 }
3949 r = compiler_nameop(c, tmp, Store);
3950 if (dot != -1) {
3951 Py_DECREF(tmp);
3952 }
3953 if (!r)
3954 return r;
3955 }
3956 }
3957 return 1;
3958 }
3959
3960 static int
compiler_from_import(struct compiler * c,stmt_ty s)3961 compiler_from_import(struct compiler *c, stmt_ty s)
3962 {
3963 Py_ssize_t i, n = asdl_seq_LEN(s->v.ImportFrom.names);
3964 PyObject *names;
3965
3966 ADDOP_LOAD_CONST_NEW(c, PyLong_FromLong(s->v.ImportFrom.level));
3967
3968 names = PyTuple_New(n);
3969 if (!names)
3970 return 0;
3971
3972 /* build up the names */
3973 for (i = 0; i < n; i++) {
3974 alias_ty alias = (alias_ty)asdl_seq_GET(s->v.ImportFrom.names, i);
3975 Py_INCREF(alias->name);
3976 PyTuple_SET_ITEM(names, i, alias->name);
3977 }
3978
3979 if (s->lineno > c->c_future->ff_lineno && s->v.ImportFrom.module &&
3980 _PyUnicode_EqualToASCIIString(s->v.ImportFrom.module, "__future__")) {
3981 Py_DECREF(names);
3982 return compiler_error(c, "from __future__ imports must occur "
3983 "at the beginning of the file");
3984 }
3985 ADDOP_LOAD_CONST_NEW(c, names);
3986
3987 if (s->v.ImportFrom.module) {
3988 ADDOP_NAME(c, IMPORT_NAME, s->v.ImportFrom.module, names);
3989 }
3990 else {
3991 _Py_DECLARE_STR(empty, "");
3992 ADDOP_NAME(c, IMPORT_NAME, &_Py_STR(empty), names);
3993 }
3994 for (i = 0; i < n; i++) {
3995 alias_ty alias = (alias_ty)asdl_seq_GET(s->v.ImportFrom.names, i);
3996 identifier store_name;
3997
3998 if (i == 0 && PyUnicode_READ_CHAR(alias->name, 0) == '*') {
3999 assert(n == 1);
4000 ADDOP(c, IMPORT_STAR);
4001 return 1;
4002 }
4003
4004 ADDOP_NAME(c, IMPORT_FROM, alias->name, names);
4005 store_name = alias->name;
4006 if (alias->asname)
4007 store_name = alias->asname;
4008
4009 if (!compiler_nameop(c, store_name, Store)) {
4010 return 0;
4011 }
4012 }
4013 /* remove imported module */
4014 ADDOP(c, POP_TOP);
4015 return 1;
4016 }
4017
4018 static int
compiler_assert(struct compiler * c,stmt_ty s)4019 compiler_assert(struct compiler *c, stmt_ty s)
4020 {
4021 basicblock *end;
4022
4023 /* Always emit a warning if the test is a non-zero length tuple */
4024 if ((s->v.Assert.test->kind == Tuple_kind &&
4025 asdl_seq_LEN(s->v.Assert.test->v.Tuple.elts) > 0) ||
4026 (s->v.Assert.test->kind == Constant_kind &&
4027 PyTuple_Check(s->v.Assert.test->v.Constant.value) &&
4028 PyTuple_Size(s->v.Assert.test->v.Constant.value) > 0))
4029 {
4030 if (!compiler_warn(c, "assertion is always true, "
4031 "perhaps remove parentheses?"))
4032 {
4033 return 0;
4034 }
4035 }
4036 if (c->c_optimize)
4037 return 1;
4038 end = compiler_new_block(c);
4039 if (end == NULL)
4040 return 0;
4041 if (!compiler_jump_if(c, s->v.Assert.test, end, 1))
4042 return 0;
4043 ADDOP(c, LOAD_ASSERTION_ERROR);
4044 if (s->v.Assert.msg) {
4045 VISIT(c, expr, s->v.Assert.msg);
4046 ADDOP_I(c, PRECALL, 0);
4047 ADDOP_I(c, CALL, 0);
4048 }
4049 ADDOP_I(c, RAISE_VARARGS, 1);
4050 compiler_use_next_block(c, end);
4051 return 1;
4052 }
4053
4054 static int
compiler_visit_stmt_expr(struct compiler * c,expr_ty value)4055 compiler_visit_stmt_expr(struct compiler *c, expr_ty value)
4056 {
4057 if (c->c_interactive && c->c_nestlevel <= 1) {
4058 VISIT(c, expr, value);
4059 ADDOP(c, PRINT_EXPR);
4060 return 1;
4061 }
4062
4063 if (value->kind == Constant_kind) {
4064 /* ignore constant statement */
4065 ADDOP(c, NOP);
4066 return 1;
4067 }
4068
4069 VISIT(c, expr, value);
4070 /* Mark POP_TOP as artificial */
4071 UNSET_LOC(c);
4072 ADDOP(c, POP_TOP);
4073 return 1;
4074 }
4075
4076 static int
compiler_visit_stmt(struct compiler * c,stmt_ty s)4077 compiler_visit_stmt(struct compiler *c, stmt_ty s)
4078 {
4079 Py_ssize_t i, n;
4080
4081 /* Always assign a lineno to the next instruction for a stmt. */
4082 SET_LOC(c, s);
4083
4084 switch (s->kind) {
4085 case FunctionDef_kind:
4086 return compiler_function(c, s, 0);
4087 case ClassDef_kind:
4088 return compiler_class(c, s);
4089 case Return_kind:
4090 return compiler_return(c, s);
4091 case Delete_kind:
4092 VISIT_SEQ(c, expr, s->v.Delete.targets)
4093 break;
4094 case Assign_kind:
4095 n = asdl_seq_LEN(s->v.Assign.targets);
4096 VISIT(c, expr, s->v.Assign.value);
4097 for (i = 0; i < n; i++) {
4098 if (i < n - 1) {
4099 ADDOP_I(c, COPY, 1);
4100 }
4101 VISIT(c, expr,
4102 (expr_ty)asdl_seq_GET(s->v.Assign.targets, i));
4103 }
4104 break;
4105 case AugAssign_kind:
4106 return compiler_augassign(c, s);
4107 case AnnAssign_kind:
4108 return compiler_annassign(c, s);
4109 case For_kind:
4110 return compiler_for(c, s);
4111 case While_kind:
4112 return compiler_while(c, s);
4113 case If_kind:
4114 return compiler_if(c, s);
4115 case Match_kind:
4116 return compiler_match(c, s);
4117 case Raise_kind:
4118 n = 0;
4119 if (s->v.Raise.exc) {
4120 VISIT(c, expr, s->v.Raise.exc);
4121 n++;
4122 if (s->v.Raise.cause) {
4123 VISIT(c, expr, s->v.Raise.cause);
4124 n++;
4125 }
4126 }
4127 ADDOP_I(c, RAISE_VARARGS, (int)n);
4128 break;
4129 case Try_kind:
4130 return compiler_try(c, s);
4131 case TryStar_kind:
4132 return compiler_try_star(c, s);
4133 case Assert_kind:
4134 return compiler_assert(c, s);
4135 case Import_kind:
4136 return compiler_import(c, s);
4137 case ImportFrom_kind:
4138 return compiler_from_import(c, s);
4139 case Global_kind:
4140 case Nonlocal_kind:
4141 break;
4142 case Expr_kind:
4143 return compiler_visit_stmt_expr(c, s->v.Expr.value);
4144 case Pass_kind:
4145 ADDOP(c, NOP);
4146 break;
4147 case Break_kind:
4148 return compiler_break(c);
4149 case Continue_kind:
4150 return compiler_continue(c);
4151 case With_kind:
4152 return compiler_with(c, s, 0);
4153 case AsyncFunctionDef_kind:
4154 return compiler_function(c, s, 1);
4155 case AsyncWith_kind:
4156 return compiler_async_with(c, s, 0);
4157 case AsyncFor_kind:
4158 return compiler_async_for(c, s);
4159 }
4160
4161 return 1;
4162 }
4163
4164 static int
unaryop(unaryop_ty op)4165 unaryop(unaryop_ty op)
4166 {
4167 switch (op) {
4168 case Invert:
4169 return UNARY_INVERT;
4170 case Not:
4171 return UNARY_NOT;
4172 case UAdd:
4173 return UNARY_POSITIVE;
4174 case USub:
4175 return UNARY_NEGATIVE;
4176 default:
4177 PyErr_Format(PyExc_SystemError,
4178 "unary op %d should not be possible", op);
4179 return 0;
4180 }
4181 }
4182
4183 static int
addop_binary(struct compiler * c,operator_ty binop,bool inplace)4184 addop_binary(struct compiler *c, operator_ty binop, bool inplace)
4185 {
4186 int oparg;
4187 switch (binop) {
4188 case Add:
4189 oparg = inplace ? NB_INPLACE_ADD : NB_ADD;
4190 break;
4191 case Sub:
4192 oparg = inplace ? NB_INPLACE_SUBTRACT : NB_SUBTRACT;
4193 break;
4194 case Mult:
4195 oparg = inplace ? NB_INPLACE_MULTIPLY : NB_MULTIPLY;
4196 break;
4197 case MatMult:
4198 oparg = inplace ? NB_INPLACE_MATRIX_MULTIPLY : NB_MATRIX_MULTIPLY;
4199 break;
4200 case Div:
4201 oparg = inplace ? NB_INPLACE_TRUE_DIVIDE : NB_TRUE_DIVIDE;
4202 break;
4203 case Mod:
4204 oparg = inplace ? NB_INPLACE_REMAINDER : NB_REMAINDER;
4205 break;
4206 case Pow:
4207 oparg = inplace ? NB_INPLACE_POWER : NB_POWER;
4208 break;
4209 case LShift:
4210 oparg = inplace ? NB_INPLACE_LSHIFT : NB_LSHIFT;
4211 break;
4212 case RShift:
4213 oparg = inplace ? NB_INPLACE_RSHIFT : NB_RSHIFT;
4214 break;
4215 case BitOr:
4216 oparg = inplace ? NB_INPLACE_OR : NB_OR;
4217 break;
4218 case BitXor:
4219 oparg = inplace ? NB_INPLACE_XOR : NB_XOR;
4220 break;
4221 case BitAnd:
4222 oparg = inplace ? NB_INPLACE_AND : NB_AND;
4223 break;
4224 case FloorDiv:
4225 oparg = inplace ? NB_INPLACE_FLOOR_DIVIDE : NB_FLOOR_DIVIDE;
4226 break;
4227 default:
4228 PyErr_Format(PyExc_SystemError, "%s op %d should not be possible",
4229 inplace ? "inplace" : "binary", binop);
4230 return 0;
4231 }
4232 ADDOP_I(c, BINARY_OP, oparg);
4233 return 1;
4234 }
4235
4236
4237 static int
addop_yield(struct compiler * c)4238 addop_yield(struct compiler *c) {
4239 if (c->u->u_ste->ste_generator && c->u->u_ste->ste_coroutine) {
4240 ADDOP(c, ASYNC_GEN_WRAP);
4241 }
4242 ADDOP(c, YIELD_VALUE);
4243 ADDOP_I(c, RESUME, 1);
4244 return 1;
4245 }
4246
4247 static int
compiler_nameop(struct compiler * c,identifier name,expr_context_ty ctx)4248 compiler_nameop(struct compiler *c, identifier name, expr_context_ty ctx)
4249 {
4250 int op, scope;
4251 Py_ssize_t arg;
4252 enum { OP_FAST, OP_GLOBAL, OP_DEREF, OP_NAME } optype;
4253
4254 PyObject *dict = c->u->u_names;
4255 PyObject *mangled;
4256
4257 assert(!_PyUnicode_EqualToASCIIString(name, "None") &&
4258 !_PyUnicode_EqualToASCIIString(name, "True") &&
4259 !_PyUnicode_EqualToASCIIString(name, "False"));
4260
4261 if (forbidden_name(c, name, ctx))
4262 return 0;
4263
4264 mangled = _Py_Mangle(c->u->u_private, name);
4265 if (!mangled)
4266 return 0;
4267
4268 op = 0;
4269 optype = OP_NAME;
4270 scope = _PyST_GetScope(c->u->u_ste, mangled);
4271 switch (scope) {
4272 case FREE:
4273 dict = c->u->u_freevars;
4274 optype = OP_DEREF;
4275 break;
4276 case CELL:
4277 dict = c->u->u_cellvars;
4278 optype = OP_DEREF;
4279 break;
4280 case LOCAL:
4281 if (c->u->u_ste->ste_type == FunctionBlock)
4282 optype = OP_FAST;
4283 break;
4284 case GLOBAL_IMPLICIT:
4285 if (c->u->u_ste->ste_type == FunctionBlock)
4286 optype = OP_GLOBAL;
4287 break;
4288 case GLOBAL_EXPLICIT:
4289 optype = OP_GLOBAL;
4290 break;
4291 default:
4292 /* scope can be 0 */
4293 break;
4294 }
4295
4296 /* XXX Leave assert here, but handle __doc__ and the like better */
4297 assert(scope || PyUnicode_READ_CHAR(name, 0) == '_');
4298
4299 switch (optype) {
4300 case OP_DEREF:
4301 switch (ctx) {
4302 case Load:
4303 op = (c->u->u_ste->ste_type == ClassBlock) ? LOAD_CLASSDEREF : LOAD_DEREF;
4304 break;
4305 case Store: op = STORE_DEREF; break;
4306 case Del: op = DELETE_DEREF; break;
4307 }
4308 break;
4309 case OP_FAST:
4310 switch (ctx) {
4311 case Load: op = LOAD_FAST; break;
4312 case Store: op = STORE_FAST; break;
4313 case Del: op = DELETE_FAST; break;
4314 }
4315 ADDOP_N(c, op, mangled, varnames);
4316 return 1;
4317 case OP_GLOBAL:
4318 switch (ctx) {
4319 case Load: op = LOAD_GLOBAL; break;
4320 case Store: op = STORE_GLOBAL; break;
4321 case Del: op = DELETE_GLOBAL; break;
4322 }
4323 break;
4324 case OP_NAME:
4325 switch (ctx) {
4326 case Load: op = LOAD_NAME; break;
4327 case Store: op = STORE_NAME; break;
4328 case Del: op = DELETE_NAME; break;
4329 }
4330 break;
4331 }
4332
4333 assert(op);
4334 arg = compiler_add_o(dict, mangled);
4335 Py_DECREF(mangled);
4336 if (arg < 0) {
4337 return 0;
4338 }
4339 if (op == LOAD_GLOBAL) {
4340 arg <<= 1;
4341 }
4342 return compiler_addop_i(c, op, arg);
4343 }
4344
4345 static int
compiler_boolop(struct compiler * c,expr_ty e)4346 compiler_boolop(struct compiler *c, expr_ty e)
4347 {
4348 basicblock *end;
4349 int jumpi;
4350 Py_ssize_t i, n;
4351 asdl_expr_seq *s;
4352
4353 assert(e->kind == BoolOp_kind);
4354 if (e->v.BoolOp.op == And)
4355 jumpi = JUMP_IF_FALSE_OR_POP;
4356 else
4357 jumpi = JUMP_IF_TRUE_OR_POP;
4358 end = compiler_new_block(c);
4359 if (end == NULL)
4360 return 0;
4361 s = e->v.BoolOp.values;
4362 n = asdl_seq_LEN(s) - 1;
4363 assert(n >= 0);
4364 for (i = 0; i < n; ++i) {
4365 VISIT(c, expr, (expr_ty)asdl_seq_GET(s, i));
4366 ADDOP_JUMP(c, jumpi, end);
4367 basicblock *next = compiler_new_block(c);
4368 if (next == NULL) {
4369 return 0;
4370 }
4371 compiler_use_next_block(c, next);
4372 }
4373 VISIT(c, expr, (expr_ty)asdl_seq_GET(s, n));
4374 compiler_use_next_block(c, end);
4375 return 1;
4376 }
4377
4378 static int
starunpack_helper(struct compiler * c,asdl_expr_seq * elts,int pushed,int build,int add,int extend,int tuple)4379 starunpack_helper(struct compiler *c, asdl_expr_seq *elts, int pushed,
4380 int build, int add, int extend, int tuple)
4381 {
4382 Py_ssize_t n = asdl_seq_LEN(elts);
4383 if (n > 2 && are_all_items_const(elts, 0, n)) {
4384 PyObject *folded = PyTuple_New(n);
4385 if (folded == NULL) {
4386 return 0;
4387 }
4388 PyObject *val;
4389 for (Py_ssize_t i = 0; i < n; i++) {
4390 val = ((expr_ty)asdl_seq_GET(elts, i))->v.Constant.value;
4391 Py_INCREF(val);
4392 PyTuple_SET_ITEM(folded, i, val);
4393 }
4394 if (tuple && !pushed) {
4395 ADDOP_LOAD_CONST_NEW(c, folded);
4396 } else {
4397 if (add == SET_ADD) {
4398 Py_SETREF(folded, PyFrozenSet_New(folded));
4399 if (folded == NULL) {
4400 return 0;
4401 }
4402 }
4403 ADDOP_I(c, build, pushed);
4404 ADDOP_LOAD_CONST_NEW(c, folded);
4405 ADDOP_I(c, extend, 1);
4406 if (tuple) {
4407 ADDOP(c, LIST_TO_TUPLE);
4408 }
4409 }
4410 return 1;
4411 }
4412
4413 int big = n+pushed > STACK_USE_GUIDELINE;
4414 int seen_star = 0;
4415 for (Py_ssize_t i = 0; i < n; i++) {
4416 expr_ty elt = asdl_seq_GET(elts, i);
4417 if (elt->kind == Starred_kind) {
4418 seen_star = 1;
4419 }
4420 }
4421 if (!seen_star && !big) {
4422 for (Py_ssize_t i = 0; i < n; i++) {
4423 expr_ty elt = asdl_seq_GET(elts, i);
4424 VISIT(c, expr, elt);
4425 }
4426 if (tuple) {
4427 ADDOP_I(c, BUILD_TUPLE, n+pushed);
4428 } else {
4429 ADDOP_I(c, build, n+pushed);
4430 }
4431 return 1;
4432 }
4433 int sequence_built = 0;
4434 if (big) {
4435 ADDOP_I(c, build, pushed);
4436 sequence_built = 1;
4437 }
4438 for (Py_ssize_t i = 0; i < n; i++) {
4439 expr_ty elt = asdl_seq_GET(elts, i);
4440 if (elt->kind == Starred_kind) {
4441 if (sequence_built == 0) {
4442 ADDOP_I(c, build, i+pushed);
4443 sequence_built = 1;
4444 }
4445 VISIT(c, expr, elt->v.Starred.value);
4446 ADDOP_I(c, extend, 1);
4447 }
4448 else {
4449 VISIT(c, expr, elt);
4450 if (sequence_built) {
4451 ADDOP_I(c, add, 1);
4452 }
4453 }
4454 }
4455 assert(sequence_built);
4456 if (tuple) {
4457 ADDOP(c, LIST_TO_TUPLE);
4458 }
4459 return 1;
4460 }
4461
4462 static int
unpack_helper(struct compiler * c,asdl_expr_seq * elts)4463 unpack_helper(struct compiler *c, asdl_expr_seq *elts)
4464 {
4465 Py_ssize_t n = asdl_seq_LEN(elts);
4466 int seen_star = 0;
4467 for (Py_ssize_t i = 0; i < n; i++) {
4468 expr_ty elt = asdl_seq_GET(elts, i);
4469 if (elt->kind == Starred_kind && !seen_star) {
4470 if ((i >= (1 << 8)) ||
4471 (n-i-1 >= (INT_MAX >> 8)))
4472 return compiler_error(c,
4473 "too many expressions in "
4474 "star-unpacking assignment");
4475 ADDOP_I(c, UNPACK_EX, (i + ((n-i-1) << 8)));
4476 seen_star = 1;
4477 }
4478 else if (elt->kind == Starred_kind) {
4479 return compiler_error(c,
4480 "multiple starred expressions in assignment");
4481 }
4482 }
4483 if (!seen_star) {
4484 ADDOP_I(c, UNPACK_SEQUENCE, n);
4485 }
4486 return 1;
4487 }
4488
4489 static int
assignment_helper(struct compiler * c,asdl_expr_seq * elts)4490 assignment_helper(struct compiler *c, asdl_expr_seq *elts)
4491 {
4492 Py_ssize_t n = asdl_seq_LEN(elts);
4493 RETURN_IF_FALSE(unpack_helper(c, elts));
4494 for (Py_ssize_t i = 0; i < n; i++) {
4495 expr_ty elt = asdl_seq_GET(elts, i);
4496 VISIT(c, expr, elt->kind != Starred_kind ? elt : elt->v.Starred.value);
4497 }
4498 return 1;
4499 }
4500
4501 static int
compiler_list(struct compiler * c,expr_ty e)4502 compiler_list(struct compiler *c, expr_ty e)
4503 {
4504 asdl_expr_seq *elts = e->v.List.elts;
4505 if (e->v.List.ctx == Store) {
4506 return assignment_helper(c, elts);
4507 }
4508 else if (e->v.List.ctx == Load) {
4509 return starunpack_helper(c, elts, 0, BUILD_LIST,
4510 LIST_APPEND, LIST_EXTEND, 0);
4511 }
4512 else
4513 VISIT_SEQ(c, expr, elts);
4514 return 1;
4515 }
4516
4517 static int
compiler_tuple(struct compiler * c,expr_ty e)4518 compiler_tuple(struct compiler *c, expr_ty e)
4519 {
4520 asdl_expr_seq *elts = e->v.Tuple.elts;
4521 if (e->v.Tuple.ctx == Store) {
4522 return assignment_helper(c, elts);
4523 }
4524 else if (e->v.Tuple.ctx == Load) {
4525 return starunpack_helper(c, elts, 0, BUILD_LIST,
4526 LIST_APPEND, LIST_EXTEND, 1);
4527 }
4528 else
4529 VISIT_SEQ(c, expr, elts);
4530 return 1;
4531 }
4532
4533 static int
compiler_set(struct compiler * c,expr_ty e)4534 compiler_set(struct compiler *c, expr_ty e)
4535 {
4536 return starunpack_helper(c, e->v.Set.elts, 0, BUILD_SET,
4537 SET_ADD, SET_UPDATE, 0);
4538 }
4539
4540 static int
are_all_items_const(asdl_expr_seq * seq,Py_ssize_t begin,Py_ssize_t end)4541 are_all_items_const(asdl_expr_seq *seq, Py_ssize_t begin, Py_ssize_t end)
4542 {
4543 Py_ssize_t i;
4544 for (i = begin; i < end; i++) {
4545 expr_ty key = (expr_ty)asdl_seq_GET(seq, i);
4546 if (key == NULL || key->kind != Constant_kind)
4547 return 0;
4548 }
4549 return 1;
4550 }
4551
4552 static int
compiler_subdict(struct compiler * c,expr_ty e,Py_ssize_t begin,Py_ssize_t end)4553 compiler_subdict(struct compiler *c, expr_ty e, Py_ssize_t begin, Py_ssize_t end)
4554 {
4555 Py_ssize_t i, n = end - begin;
4556 PyObject *keys, *key;
4557 int big = n*2 > STACK_USE_GUIDELINE;
4558 if (n > 1 && !big && are_all_items_const(e->v.Dict.keys, begin, end)) {
4559 for (i = begin; i < end; i++) {
4560 VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.values, i));
4561 }
4562 keys = PyTuple_New(n);
4563 if (keys == NULL) {
4564 return 0;
4565 }
4566 for (i = begin; i < end; i++) {
4567 key = ((expr_ty)asdl_seq_GET(e->v.Dict.keys, i))->v.Constant.value;
4568 Py_INCREF(key);
4569 PyTuple_SET_ITEM(keys, i - begin, key);
4570 }
4571 ADDOP_LOAD_CONST_NEW(c, keys);
4572 ADDOP_I(c, BUILD_CONST_KEY_MAP, n);
4573 return 1;
4574 }
4575 if (big) {
4576 ADDOP_I(c, BUILD_MAP, 0);
4577 }
4578 for (i = begin; i < end; i++) {
4579 VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.keys, i));
4580 VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.values, i));
4581 if (big) {
4582 ADDOP_I(c, MAP_ADD, 1);
4583 }
4584 }
4585 if (!big) {
4586 ADDOP_I(c, BUILD_MAP, n);
4587 }
4588 return 1;
4589 }
4590
4591 static int
compiler_dict(struct compiler * c,expr_ty e)4592 compiler_dict(struct compiler *c, expr_ty e)
4593 {
4594 Py_ssize_t i, n, elements;
4595 int have_dict;
4596 int is_unpacking = 0;
4597 n = asdl_seq_LEN(e->v.Dict.values);
4598 have_dict = 0;
4599 elements = 0;
4600 for (i = 0; i < n; i++) {
4601 is_unpacking = (expr_ty)asdl_seq_GET(e->v.Dict.keys, i) == NULL;
4602 if (is_unpacking) {
4603 if (elements) {
4604 if (!compiler_subdict(c, e, i - elements, i)) {
4605 return 0;
4606 }
4607 if (have_dict) {
4608 ADDOP_I(c, DICT_UPDATE, 1);
4609 }
4610 have_dict = 1;
4611 elements = 0;
4612 }
4613 if (have_dict == 0) {
4614 ADDOP_I(c, BUILD_MAP, 0);
4615 have_dict = 1;
4616 }
4617 VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.values, i));
4618 ADDOP_I(c, DICT_UPDATE, 1);
4619 }
4620 else {
4621 if (elements*2 > STACK_USE_GUIDELINE) {
4622 if (!compiler_subdict(c, e, i - elements, i + 1)) {
4623 return 0;
4624 }
4625 if (have_dict) {
4626 ADDOP_I(c, DICT_UPDATE, 1);
4627 }
4628 have_dict = 1;
4629 elements = 0;
4630 }
4631 else {
4632 elements++;
4633 }
4634 }
4635 }
4636 if (elements) {
4637 if (!compiler_subdict(c, e, n - elements, n)) {
4638 return 0;
4639 }
4640 if (have_dict) {
4641 ADDOP_I(c, DICT_UPDATE, 1);
4642 }
4643 have_dict = 1;
4644 }
4645 if (!have_dict) {
4646 ADDOP_I(c, BUILD_MAP, 0);
4647 }
4648 return 1;
4649 }
4650
4651 static int
compiler_compare(struct compiler * c,expr_ty e)4652 compiler_compare(struct compiler *c, expr_ty e)
4653 {
4654 Py_ssize_t i, n;
4655
4656 if (!check_compare(c, e)) {
4657 return 0;
4658 }
4659 VISIT(c, expr, e->v.Compare.left);
4660 assert(asdl_seq_LEN(e->v.Compare.ops) > 0);
4661 n = asdl_seq_LEN(e->v.Compare.ops) - 1;
4662 if (n == 0) {
4663 VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Compare.comparators, 0));
4664 ADDOP_COMPARE(c, asdl_seq_GET(e->v.Compare.ops, 0));
4665 }
4666 else {
4667 basicblock *cleanup = compiler_new_block(c);
4668 if (cleanup == NULL)
4669 return 0;
4670 for (i = 0; i < n; i++) {
4671 VISIT(c, expr,
4672 (expr_ty)asdl_seq_GET(e->v.Compare.comparators, i));
4673 ADDOP_I(c, SWAP, 2);
4674 ADDOP_I(c, COPY, 2);
4675 ADDOP_COMPARE(c, asdl_seq_GET(e->v.Compare.ops, i));
4676 ADDOP_JUMP(c, JUMP_IF_FALSE_OR_POP, cleanup);
4677 }
4678 VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Compare.comparators, n));
4679 ADDOP_COMPARE(c, asdl_seq_GET(e->v.Compare.ops, n));
4680 basicblock *end = compiler_new_block(c);
4681 if (end == NULL)
4682 return 0;
4683 ADDOP_JUMP_NOLINE(c, JUMP, end);
4684 compiler_use_next_block(c, cleanup);
4685 ADDOP_I(c, SWAP, 2);
4686 ADDOP(c, POP_TOP);
4687 compiler_use_next_block(c, end);
4688 }
4689 return 1;
4690 }
4691
4692 static PyTypeObject *
infer_type(expr_ty e)4693 infer_type(expr_ty e)
4694 {
4695 switch (e->kind) {
4696 case Tuple_kind:
4697 return &PyTuple_Type;
4698 case List_kind:
4699 case ListComp_kind:
4700 return &PyList_Type;
4701 case Dict_kind:
4702 case DictComp_kind:
4703 return &PyDict_Type;
4704 case Set_kind:
4705 case SetComp_kind:
4706 return &PySet_Type;
4707 case GeneratorExp_kind:
4708 return &PyGen_Type;
4709 case Lambda_kind:
4710 return &PyFunction_Type;
4711 case JoinedStr_kind:
4712 case FormattedValue_kind:
4713 return &PyUnicode_Type;
4714 case Constant_kind:
4715 return Py_TYPE(e->v.Constant.value);
4716 default:
4717 return NULL;
4718 }
4719 }
4720
4721 static int
check_caller(struct compiler * c,expr_ty e)4722 check_caller(struct compiler *c, expr_ty e)
4723 {
4724 switch (e->kind) {
4725 case Constant_kind:
4726 case Tuple_kind:
4727 case List_kind:
4728 case ListComp_kind:
4729 case Dict_kind:
4730 case DictComp_kind:
4731 case Set_kind:
4732 case SetComp_kind:
4733 case GeneratorExp_kind:
4734 case JoinedStr_kind:
4735 case FormattedValue_kind:
4736 return compiler_warn(c, "'%.200s' object is not callable; "
4737 "perhaps you missed a comma?",
4738 infer_type(e)->tp_name);
4739 default:
4740 return 1;
4741 }
4742 }
4743
4744 static int
check_subscripter(struct compiler * c,expr_ty e)4745 check_subscripter(struct compiler *c, expr_ty e)
4746 {
4747 PyObject *v;
4748
4749 switch (e->kind) {
4750 case Constant_kind:
4751 v = e->v.Constant.value;
4752 if (!(v == Py_None || v == Py_Ellipsis ||
4753 PyLong_Check(v) || PyFloat_Check(v) || PyComplex_Check(v) ||
4754 PyAnySet_Check(v)))
4755 {
4756 return 1;
4757 }
4758 /* fall through */
4759 case Set_kind:
4760 case SetComp_kind:
4761 case GeneratorExp_kind:
4762 case Lambda_kind:
4763 return compiler_warn(c, "'%.200s' object is not subscriptable; "
4764 "perhaps you missed a comma?",
4765 infer_type(e)->tp_name);
4766 default:
4767 return 1;
4768 }
4769 }
4770
4771 static int
check_index(struct compiler * c,expr_ty e,expr_ty s)4772 check_index(struct compiler *c, expr_ty e, expr_ty s)
4773 {
4774 PyObject *v;
4775
4776 PyTypeObject *index_type = infer_type(s);
4777 if (index_type == NULL
4778 || PyType_FastSubclass(index_type, Py_TPFLAGS_LONG_SUBCLASS)
4779 || index_type == &PySlice_Type) {
4780 return 1;
4781 }
4782
4783 switch (e->kind) {
4784 case Constant_kind:
4785 v = e->v.Constant.value;
4786 if (!(PyUnicode_Check(v) || PyBytes_Check(v) || PyTuple_Check(v))) {
4787 return 1;
4788 }
4789 /* fall through */
4790 case Tuple_kind:
4791 case List_kind:
4792 case ListComp_kind:
4793 case JoinedStr_kind:
4794 case FormattedValue_kind:
4795 return compiler_warn(c, "%.200s indices must be integers or slices, "
4796 "not %.200s; "
4797 "perhaps you missed a comma?",
4798 infer_type(e)->tp_name,
4799 index_type->tp_name);
4800 default:
4801 return 1;
4802 }
4803 }
4804
4805 static int
is_import_originated(struct compiler * c,expr_ty e)4806 is_import_originated(struct compiler *c, expr_ty e)
4807 {
4808 /* Check whether the global scope has an import named
4809 e, if it is a Name object. For not traversing all the
4810 scope stack every time this function is called, it will
4811 only check the global scope to determine whether something
4812 is imported or not. */
4813
4814 if (e->kind != Name_kind) {
4815 return 0;
4816 }
4817
4818 long flags = _PyST_GetSymbol(c->c_st->st_top, e->v.Name.id);
4819 return flags & DEF_IMPORT;
4820 }
4821
4822 // If an attribute access spans multiple lines, update the current start
4823 // location to point to the attribute name.
4824 static void
update_start_location_to_match_attr(struct compiler * c,expr_ty attr)4825 update_start_location_to_match_attr(struct compiler *c, expr_ty attr)
4826 {
4827 assert(attr->kind == Attribute_kind);
4828 if (c->u->u_lineno != attr->end_lineno) {
4829 c->u->u_lineno = attr->end_lineno;
4830 int len = (int)PyUnicode_GET_LENGTH(attr->v.Attribute.attr);
4831 if (len <= attr->end_col_offset) {
4832 c->u->u_col_offset = attr->end_col_offset - len;
4833 }
4834 else {
4835 // GH-94694: Somebody's compiling weird ASTs. Just drop the columns:
4836 c->u->u_col_offset = -1;
4837 c->u->u_end_col_offset = -1;
4838 }
4839 // Make sure the end position still follows the start position, even for
4840 // weird ASTs:
4841 c->u->u_end_lineno = Py_MAX(c->u->u_lineno, c->u->u_end_lineno);
4842 if (c->u->u_lineno == c->u->u_end_lineno) {
4843 c->u->u_end_col_offset = Py_MAX(c->u->u_col_offset,
4844 c->u->u_end_col_offset);
4845 }
4846 }
4847 }
4848
4849 // Return 1 if the method call was optimized, -1 if not, and 0 on error.
4850 static int
maybe_optimize_method_call(struct compiler * c,expr_ty e)4851 maybe_optimize_method_call(struct compiler *c, expr_ty e)
4852 {
4853 Py_ssize_t argsl, i, kwdsl;
4854 expr_ty meth = e->v.Call.func;
4855 asdl_expr_seq *args = e->v.Call.args;
4856 asdl_keyword_seq *kwds = e->v.Call.keywords;
4857
4858 /* Check that the call node is an attribute access */
4859 if (meth->kind != Attribute_kind || meth->v.Attribute.ctx != Load) {
4860 return -1;
4861 }
4862
4863 /* Check that the base object is not something that is imported */
4864 if (is_import_originated(c, meth->v.Attribute.value)) {
4865 return -1;
4866 }
4867
4868 /* Check that there aren't too many arguments */
4869 argsl = asdl_seq_LEN(args);
4870 kwdsl = asdl_seq_LEN(kwds);
4871 if (argsl + kwdsl + (kwdsl != 0) >= STACK_USE_GUIDELINE) {
4872 return -1;
4873 }
4874 /* Check that there are no *varargs types of arguments. */
4875 for (i = 0; i < argsl; i++) {
4876 expr_ty elt = asdl_seq_GET(args, i);
4877 if (elt->kind == Starred_kind) {
4878 return -1;
4879 }
4880 }
4881
4882 for (i = 0; i < kwdsl; i++) {
4883 keyword_ty kw = asdl_seq_GET(kwds, i);
4884 if (kw->arg == NULL) {
4885 return -1;
4886 }
4887 }
4888 /* Alright, we can optimize the code. */
4889 VISIT(c, expr, meth->v.Attribute.value);
4890 SET_LOC(c, meth);
4891 update_start_location_to_match_attr(c, meth);
4892 ADDOP_NAME(c, LOAD_METHOD, meth->v.Attribute.attr, names);
4893 VISIT_SEQ(c, expr, e->v.Call.args);
4894
4895 if (kwdsl) {
4896 VISIT_SEQ(c, keyword, kwds);
4897 if (!compiler_call_simple_kw_helper(c, kwds, kwdsl)) {
4898 return 0;
4899 };
4900 }
4901 SET_LOC(c, e);
4902 update_start_location_to_match_attr(c, meth);
4903 ADDOP_I(c, PRECALL, argsl + kwdsl);
4904 ADDOP_I(c, CALL, argsl + kwdsl);
4905 return 1;
4906 }
4907
4908 static int
validate_keywords(struct compiler * c,asdl_keyword_seq * keywords)4909 validate_keywords(struct compiler *c, asdl_keyword_seq *keywords)
4910 {
4911 Py_ssize_t nkeywords = asdl_seq_LEN(keywords);
4912 for (Py_ssize_t i = 0; i < nkeywords; i++) {
4913 keyword_ty key = ((keyword_ty)asdl_seq_GET(keywords, i));
4914 if (key->arg == NULL) {
4915 continue;
4916 }
4917 if (forbidden_name(c, key->arg, Store)) {
4918 return -1;
4919 }
4920 for (Py_ssize_t j = i + 1; j < nkeywords; j++) {
4921 keyword_ty other = ((keyword_ty)asdl_seq_GET(keywords, j));
4922 if (other->arg && !PyUnicode_Compare(key->arg, other->arg)) {
4923 SET_LOC(c, other);
4924 compiler_error(c, "keyword argument repeated: %U", key->arg);
4925 return -1;
4926 }
4927 }
4928 }
4929 return 0;
4930 }
4931
4932 static int
compiler_call(struct compiler * c,expr_ty e)4933 compiler_call(struct compiler *c, expr_ty e)
4934 {
4935 if (validate_keywords(c, e->v.Call.keywords) == -1) {
4936 return 0;
4937 }
4938 int ret = maybe_optimize_method_call(c, e);
4939 if (ret >= 0) {
4940 return ret;
4941 }
4942 if (!check_caller(c, e->v.Call.func)) {
4943 return 0;
4944 }
4945 SET_LOC(c, e->v.Call.func);
4946 ADDOP(c, PUSH_NULL);
4947 SET_LOC(c, e);
4948 VISIT(c, expr, e->v.Call.func);
4949 return compiler_call_helper(c, 0,
4950 e->v.Call.args,
4951 e->v.Call.keywords);
4952 }
4953
4954 static int
compiler_joined_str(struct compiler * c,expr_ty e)4955 compiler_joined_str(struct compiler *c, expr_ty e)
4956 {
4957
4958 Py_ssize_t value_count = asdl_seq_LEN(e->v.JoinedStr.values);
4959 if (value_count > STACK_USE_GUIDELINE) {
4960 _Py_DECLARE_STR(empty, "");
4961 ADDOP_LOAD_CONST_NEW(c, Py_NewRef(&_Py_STR(empty)));
4962 ADDOP_NAME(c, LOAD_METHOD, &_Py_ID(join), names);
4963 ADDOP_I(c, BUILD_LIST, 0);
4964 for (Py_ssize_t i = 0; i < asdl_seq_LEN(e->v.JoinedStr.values); i++) {
4965 VISIT(c, expr, asdl_seq_GET(e->v.JoinedStr.values, i));
4966 ADDOP_I(c, LIST_APPEND, 1);
4967 }
4968 ADDOP_I(c, PRECALL, 1);
4969 ADDOP_I(c, CALL, 1);
4970 }
4971 else {
4972 VISIT_SEQ(c, expr, e->v.JoinedStr.values);
4973 if (asdl_seq_LEN(e->v.JoinedStr.values) != 1) {
4974 ADDOP_I(c, BUILD_STRING, asdl_seq_LEN(e->v.JoinedStr.values));
4975 }
4976 }
4977 return 1;
4978 }
4979
4980 /* Used to implement f-strings. Format a single value. */
4981 static int
compiler_formatted_value(struct compiler * c,expr_ty e)4982 compiler_formatted_value(struct compiler *c, expr_ty e)
4983 {
4984 /* Our oparg encodes 2 pieces of information: the conversion
4985 character, and whether or not a format_spec was provided.
4986
4987 Convert the conversion char to 3 bits:
4988 : 000 0x0 FVC_NONE The default if nothing specified.
4989 !s : 001 0x1 FVC_STR
4990 !r : 010 0x2 FVC_REPR
4991 !a : 011 0x3 FVC_ASCII
4992
4993 next bit is whether or not we have a format spec:
4994 yes : 100 0x4
4995 no : 000 0x0
4996 */
4997
4998 int conversion = e->v.FormattedValue.conversion;
4999 int oparg;
5000
5001 /* The expression to be formatted. */
5002 VISIT(c, expr, e->v.FormattedValue.value);
5003
5004 switch (conversion) {
5005 case 's': oparg = FVC_STR; break;
5006 case 'r': oparg = FVC_REPR; break;
5007 case 'a': oparg = FVC_ASCII; break;
5008 case -1: oparg = FVC_NONE; break;
5009 default:
5010 PyErr_Format(PyExc_SystemError,
5011 "Unrecognized conversion character %d", conversion);
5012 return 0;
5013 }
5014 if (e->v.FormattedValue.format_spec) {
5015 /* Evaluate the format spec, and update our opcode arg. */
5016 VISIT(c, expr, e->v.FormattedValue.format_spec);
5017 oparg |= FVS_HAVE_SPEC;
5018 }
5019
5020 /* And push our opcode and oparg */
5021 ADDOP_I(c, FORMAT_VALUE, oparg);
5022
5023 return 1;
5024 }
5025
5026 static int
compiler_subkwargs(struct compiler * c,asdl_keyword_seq * keywords,Py_ssize_t begin,Py_ssize_t end)5027 compiler_subkwargs(struct compiler *c, asdl_keyword_seq *keywords, Py_ssize_t begin, Py_ssize_t end)
5028 {
5029 Py_ssize_t i, n = end - begin;
5030 keyword_ty kw;
5031 PyObject *keys, *key;
5032 assert(n > 0);
5033 int big = n*2 > STACK_USE_GUIDELINE;
5034 if (n > 1 && !big) {
5035 for (i = begin; i < end; i++) {
5036 kw = asdl_seq_GET(keywords, i);
5037 VISIT(c, expr, kw->value);
5038 }
5039 keys = PyTuple_New(n);
5040 if (keys == NULL) {
5041 return 0;
5042 }
5043 for (i = begin; i < end; i++) {
5044 key = ((keyword_ty) asdl_seq_GET(keywords, i))->arg;
5045 Py_INCREF(key);
5046 PyTuple_SET_ITEM(keys, i - begin, key);
5047 }
5048 ADDOP_LOAD_CONST_NEW(c, keys);
5049 ADDOP_I(c, BUILD_CONST_KEY_MAP, n);
5050 return 1;
5051 }
5052 if (big) {
5053 ADDOP_I_NOLINE(c, BUILD_MAP, 0);
5054 }
5055 for (i = begin; i < end; i++) {
5056 kw = asdl_seq_GET(keywords, i);
5057 ADDOP_LOAD_CONST(c, kw->arg);
5058 VISIT(c, expr, kw->value);
5059 if (big) {
5060 ADDOP_I_NOLINE(c, MAP_ADD, 1);
5061 }
5062 }
5063 if (!big) {
5064 ADDOP_I(c, BUILD_MAP, n);
5065 }
5066 return 1;
5067 }
5068
5069 /* Used by compiler_call_helper and maybe_optimize_method_call to emit
5070 * KW_NAMES before CALL.
5071 * Returns 1 on success, 0 on error.
5072 */
5073 static int
compiler_call_simple_kw_helper(struct compiler * c,asdl_keyword_seq * keywords,Py_ssize_t nkwelts)5074 compiler_call_simple_kw_helper(struct compiler *c,
5075 asdl_keyword_seq *keywords,
5076 Py_ssize_t nkwelts)
5077 {
5078 PyObject *names;
5079 names = PyTuple_New(nkwelts);
5080 if (names == NULL) {
5081 return 0;
5082 }
5083 for (int i = 0; i < nkwelts; i++) {
5084 keyword_ty kw = asdl_seq_GET(keywords, i);
5085 Py_INCREF(kw->arg);
5086 PyTuple_SET_ITEM(names, i, kw->arg);
5087 }
5088 Py_ssize_t arg = compiler_add_const(c, names);
5089 if (arg < 0) {
5090 return 0;
5091 }
5092 Py_DECREF(names);
5093 ADDOP_I(c, KW_NAMES, arg);
5094 return 1;
5095 }
5096
5097
5098 /* shared code between compiler_call and compiler_class */
5099 static int
compiler_call_helper(struct compiler * c,int n,asdl_expr_seq * args,asdl_keyword_seq * keywords)5100 compiler_call_helper(struct compiler *c,
5101 int n, /* Args already pushed */
5102 asdl_expr_seq *args,
5103 asdl_keyword_seq *keywords)
5104 {
5105 Py_ssize_t i, nseen, nelts, nkwelts;
5106
5107 if (validate_keywords(c, keywords) == -1) {
5108 return 0;
5109 }
5110
5111 nelts = asdl_seq_LEN(args);
5112 nkwelts = asdl_seq_LEN(keywords);
5113
5114 if (nelts + nkwelts*2 > STACK_USE_GUIDELINE) {
5115 goto ex_call;
5116 }
5117 for (i = 0; i < nelts; i++) {
5118 expr_ty elt = asdl_seq_GET(args, i);
5119 if (elt->kind == Starred_kind) {
5120 goto ex_call;
5121 }
5122 }
5123 for (i = 0; i < nkwelts; i++) {
5124 keyword_ty kw = asdl_seq_GET(keywords, i);
5125 if (kw->arg == NULL) {
5126 goto ex_call;
5127 }
5128 }
5129
5130 /* No * or ** args, so can use faster calling sequence */
5131 for (i = 0; i < nelts; i++) {
5132 expr_ty elt = asdl_seq_GET(args, i);
5133 assert(elt->kind != Starred_kind);
5134 VISIT(c, expr, elt);
5135 }
5136 if (nkwelts) {
5137 VISIT_SEQ(c, keyword, keywords);
5138 if (!compiler_call_simple_kw_helper(c, keywords, nkwelts)) {
5139 return 0;
5140 };
5141 }
5142 ADDOP_I(c, PRECALL, n + nelts + nkwelts);
5143 ADDOP_I(c, CALL, n + nelts + nkwelts);
5144 return 1;
5145
5146 ex_call:
5147
5148 /* Do positional arguments. */
5149 if (n ==0 && nelts == 1 && ((expr_ty)asdl_seq_GET(args, 0))->kind == Starred_kind) {
5150 VISIT(c, expr, ((expr_ty)asdl_seq_GET(args, 0))->v.Starred.value);
5151 }
5152 else if (starunpack_helper(c, args, n, BUILD_LIST,
5153 LIST_APPEND, LIST_EXTEND, 1) == 0) {
5154 return 0;
5155 }
5156 /* Then keyword arguments */
5157 if (nkwelts) {
5158 /* Has a new dict been pushed */
5159 int have_dict = 0;
5160
5161 nseen = 0; /* the number of keyword arguments on the stack following */
5162 for (i = 0; i < nkwelts; i++) {
5163 keyword_ty kw = asdl_seq_GET(keywords, i);
5164 if (kw->arg == NULL) {
5165 /* A keyword argument unpacking. */
5166 if (nseen) {
5167 if (!compiler_subkwargs(c, keywords, i - nseen, i)) {
5168 return 0;
5169 }
5170 if (have_dict) {
5171 ADDOP_I(c, DICT_MERGE, 1);
5172 }
5173 have_dict = 1;
5174 nseen = 0;
5175 }
5176 if (!have_dict) {
5177 ADDOP_I(c, BUILD_MAP, 0);
5178 have_dict = 1;
5179 }
5180 VISIT(c, expr, kw->value);
5181 ADDOP_I(c, DICT_MERGE, 1);
5182 }
5183 else {
5184 nseen++;
5185 }
5186 }
5187 if (nseen) {
5188 /* Pack up any trailing keyword arguments. */
5189 if (!compiler_subkwargs(c, keywords, nkwelts - nseen, nkwelts)) {
5190 return 0;
5191 }
5192 if (have_dict) {
5193 ADDOP_I(c, DICT_MERGE, 1);
5194 }
5195 have_dict = 1;
5196 }
5197 assert(have_dict);
5198 }
5199 ADDOP_I(c, CALL_FUNCTION_EX, nkwelts > 0);
5200 return 1;
5201 }
5202
5203
5204 /* List and set comprehensions and generator expressions work by creating a
5205 nested function to perform the actual iteration. This means that the
5206 iteration variables don't leak into the current scope.
5207 The defined function is called immediately following its definition, with the
5208 result of that call being the result of the expression.
5209 The LC/SC version returns the populated container, while the GE version is
5210 flagged in symtable.c as a generator, so it returns the generator object
5211 when the function is called.
5212
5213 Possible cleanups:
5214 - iterate over the generator sequence instead of using recursion
5215 */
5216
5217
5218 static int
compiler_comprehension_generator(struct compiler * c,asdl_comprehension_seq * generators,int gen_index,int depth,expr_ty elt,expr_ty val,int type)5219 compiler_comprehension_generator(struct compiler *c,
5220 asdl_comprehension_seq *generators, int gen_index,
5221 int depth,
5222 expr_ty elt, expr_ty val, int type)
5223 {
5224 comprehension_ty gen;
5225 gen = (comprehension_ty)asdl_seq_GET(generators, gen_index);
5226 if (gen->is_async) {
5227 return compiler_async_comprehension_generator(
5228 c, generators, gen_index, depth, elt, val, type);
5229 } else {
5230 return compiler_sync_comprehension_generator(
5231 c, generators, gen_index, depth, elt, val, type);
5232 }
5233 }
5234
5235 static int
compiler_sync_comprehension_generator(struct compiler * c,asdl_comprehension_seq * generators,int gen_index,int depth,expr_ty elt,expr_ty val,int type)5236 compiler_sync_comprehension_generator(struct compiler *c,
5237 asdl_comprehension_seq *generators, int gen_index,
5238 int depth,
5239 expr_ty elt, expr_ty val, int type)
5240 {
5241 /* generate code for the iterator, then each of the ifs,
5242 and then write to the element */
5243
5244 comprehension_ty gen;
5245 basicblock *start, *anchor, *if_cleanup;
5246 Py_ssize_t i, n;
5247
5248 start = compiler_new_block(c);
5249 if_cleanup = compiler_new_block(c);
5250 anchor = compiler_new_block(c);
5251
5252 if (start == NULL || if_cleanup == NULL || anchor == NULL) {
5253 return 0;
5254 }
5255
5256 gen = (comprehension_ty)asdl_seq_GET(generators, gen_index);
5257
5258 if (gen_index == 0) {
5259 /* Receive outermost iter as an implicit argument */
5260 c->u->u_argcount = 1;
5261 ADDOP_I(c, LOAD_FAST, 0);
5262 }
5263 else {
5264 /* Sub-iter - calculate on the fly */
5265 /* Fast path for the temporary variable assignment idiom:
5266 for y in [f(x)]
5267 */
5268 asdl_expr_seq *elts;
5269 switch (gen->iter->kind) {
5270 case List_kind:
5271 elts = gen->iter->v.List.elts;
5272 break;
5273 case Tuple_kind:
5274 elts = gen->iter->v.Tuple.elts;
5275 break;
5276 default:
5277 elts = NULL;
5278 }
5279 if (asdl_seq_LEN(elts) == 1) {
5280 expr_ty elt = asdl_seq_GET(elts, 0);
5281 if (elt->kind != Starred_kind) {
5282 VISIT(c, expr, elt);
5283 start = NULL;
5284 }
5285 }
5286 if (start) {
5287 VISIT(c, expr, gen->iter);
5288 ADDOP(c, GET_ITER);
5289 }
5290 }
5291 if (start) {
5292 depth++;
5293 compiler_use_next_block(c, start);
5294 ADDOP_JUMP(c, FOR_ITER, anchor);
5295 }
5296 VISIT(c, expr, gen->target);
5297
5298 /* XXX this needs to be cleaned up...a lot! */
5299 n = asdl_seq_LEN(gen->ifs);
5300 for (i = 0; i < n; i++) {
5301 expr_ty e = (expr_ty)asdl_seq_GET(gen->ifs, i);
5302 if (!compiler_jump_if(c, e, if_cleanup, 0))
5303 return 0;
5304 }
5305
5306 if (++gen_index < asdl_seq_LEN(generators))
5307 if (!compiler_comprehension_generator(c,
5308 generators, gen_index, depth,
5309 elt, val, type))
5310 return 0;
5311
5312 /* only append after the last for generator */
5313 if (gen_index >= asdl_seq_LEN(generators)) {
5314 /* comprehension specific code */
5315 switch (type) {
5316 case COMP_GENEXP:
5317 VISIT(c, expr, elt);
5318 ADDOP_YIELD(c);
5319 ADDOP(c, POP_TOP);
5320 break;
5321 case COMP_LISTCOMP:
5322 VISIT(c, expr, elt);
5323 ADDOP_I(c, LIST_APPEND, depth + 1);
5324 break;
5325 case COMP_SETCOMP:
5326 VISIT(c, expr, elt);
5327 ADDOP_I(c, SET_ADD, depth + 1);
5328 break;
5329 case COMP_DICTCOMP:
5330 /* With '{k: v}', k is evaluated before v, so we do
5331 the same. */
5332 VISIT(c, expr, elt);
5333 VISIT(c, expr, val);
5334 ADDOP_I(c, MAP_ADD, depth + 1);
5335 break;
5336 default:
5337 return 0;
5338 }
5339 }
5340 compiler_use_next_block(c, if_cleanup);
5341 if (start) {
5342 ADDOP_JUMP(c, JUMP, start);
5343 compiler_use_next_block(c, anchor);
5344 }
5345
5346 return 1;
5347 }
5348
5349 static int
compiler_async_comprehension_generator(struct compiler * c,asdl_comprehension_seq * generators,int gen_index,int depth,expr_ty elt,expr_ty val,int type)5350 compiler_async_comprehension_generator(struct compiler *c,
5351 asdl_comprehension_seq *generators, int gen_index,
5352 int depth,
5353 expr_ty elt, expr_ty val, int type)
5354 {
5355 comprehension_ty gen;
5356 basicblock *start, *if_cleanup, *except;
5357 Py_ssize_t i, n;
5358 start = compiler_new_block(c);
5359 except = compiler_new_block(c);
5360 if_cleanup = compiler_new_block(c);
5361
5362 if (start == NULL || if_cleanup == NULL || except == NULL) {
5363 return 0;
5364 }
5365
5366 gen = (comprehension_ty)asdl_seq_GET(generators, gen_index);
5367
5368 if (gen_index == 0) {
5369 /* Receive outermost iter as an implicit argument */
5370 c->u->u_argcount = 1;
5371 ADDOP_I(c, LOAD_FAST, 0);
5372 }
5373 else {
5374 /* Sub-iter - calculate on the fly */
5375 VISIT(c, expr, gen->iter);
5376 ADDOP(c, GET_AITER);
5377 }
5378
5379 compiler_use_next_block(c, start);
5380 /* Runtime will push a block here, so we need to account for that */
5381 if (!compiler_push_fblock(c, ASYNC_COMPREHENSION_GENERATOR, start,
5382 NULL, NULL)) {
5383 return 0;
5384 }
5385
5386 ADDOP_JUMP(c, SETUP_FINALLY, except);
5387 ADDOP(c, GET_ANEXT);
5388 ADDOP_LOAD_CONST(c, Py_None);
5389 ADD_YIELD_FROM(c, 1);
5390 ADDOP(c, POP_BLOCK);
5391 VISIT(c, expr, gen->target);
5392
5393 n = asdl_seq_LEN(gen->ifs);
5394 for (i = 0; i < n; i++) {
5395 expr_ty e = (expr_ty)asdl_seq_GET(gen->ifs, i);
5396 if (!compiler_jump_if(c, e, if_cleanup, 0))
5397 return 0;
5398 }
5399
5400 depth++;
5401 if (++gen_index < asdl_seq_LEN(generators))
5402 if (!compiler_comprehension_generator(c,
5403 generators, gen_index, depth,
5404 elt, val, type))
5405 return 0;
5406
5407 /* only append after the last for generator */
5408 if (gen_index >= asdl_seq_LEN(generators)) {
5409 /* comprehension specific code */
5410 switch (type) {
5411 case COMP_GENEXP:
5412 VISIT(c, expr, elt);
5413 ADDOP_YIELD(c);
5414 ADDOP(c, POP_TOP);
5415 break;
5416 case COMP_LISTCOMP:
5417 VISIT(c, expr, elt);
5418 ADDOP_I(c, LIST_APPEND, depth + 1);
5419 break;
5420 case COMP_SETCOMP:
5421 VISIT(c, expr, elt);
5422 ADDOP_I(c, SET_ADD, depth + 1);
5423 break;
5424 case COMP_DICTCOMP:
5425 /* With '{k: v}', k is evaluated before v, so we do
5426 the same. */
5427 VISIT(c, expr, elt);
5428 VISIT(c, expr, val);
5429 ADDOP_I(c, MAP_ADD, depth + 1);
5430 break;
5431 default:
5432 return 0;
5433 }
5434 }
5435 compiler_use_next_block(c, if_cleanup);
5436 ADDOP_JUMP(c, JUMP, start);
5437
5438 compiler_pop_fblock(c, ASYNC_COMPREHENSION_GENERATOR, start);
5439
5440 compiler_use_next_block(c, except);
5441 //UNSET_LOC(c);
5442
5443 ADDOP(c, END_ASYNC_FOR);
5444
5445 return 1;
5446 }
5447
5448 static int
compiler_comprehension(struct compiler * c,expr_ty e,int type,identifier name,asdl_comprehension_seq * generators,expr_ty elt,expr_ty val)5449 compiler_comprehension(struct compiler *c, expr_ty e, int type,
5450 identifier name, asdl_comprehension_seq *generators, expr_ty elt,
5451 expr_ty val)
5452 {
5453 PyCodeObject *co = NULL;
5454 comprehension_ty outermost;
5455 PyObject *qualname = NULL;
5456 int scope_type = c->u->u_scope_type;
5457 int is_async_generator = 0;
5458 int is_top_level_await = IS_TOP_LEVEL_AWAIT(c);
5459
5460 outermost = (comprehension_ty) asdl_seq_GET(generators, 0);
5461 if (!compiler_enter_scope(c, name, COMPILER_SCOPE_COMPREHENSION,
5462 (void *)e, e->lineno))
5463 {
5464 goto error;
5465 }
5466 SET_LOC(c, e);
5467
5468 is_async_generator = c->u->u_ste->ste_coroutine;
5469
5470 if (is_async_generator && type != COMP_GENEXP &&
5471 scope_type != COMPILER_SCOPE_ASYNC_FUNCTION &&
5472 scope_type != COMPILER_SCOPE_COMPREHENSION &&
5473 !is_top_level_await)
5474 {
5475 compiler_error(c, "asynchronous comprehension outside of "
5476 "an asynchronous function");
5477 goto error_in_scope;
5478 }
5479
5480 if (type != COMP_GENEXP) {
5481 int op;
5482 switch (type) {
5483 case COMP_LISTCOMP:
5484 op = BUILD_LIST;
5485 break;
5486 case COMP_SETCOMP:
5487 op = BUILD_SET;
5488 break;
5489 case COMP_DICTCOMP:
5490 op = BUILD_MAP;
5491 break;
5492 default:
5493 PyErr_Format(PyExc_SystemError,
5494 "unknown comprehension type %d", type);
5495 goto error_in_scope;
5496 }
5497
5498 ADDOP_I(c, op, 0);
5499 }
5500
5501 if (!compiler_comprehension_generator(c, generators, 0, 0, elt,
5502 val, type))
5503 goto error_in_scope;
5504
5505 if (type != COMP_GENEXP) {
5506 ADDOP(c, RETURN_VALUE);
5507 }
5508
5509 co = assemble(c, 1);
5510 qualname = c->u->u_qualname;
5511 Py_INCREF(qualname);
5512 compiler_exit_scope(c);
5513 if (is_top_level_await && is_async_generator){
5514 c->u->u_ste->ste_coroutine = 1;
5515 }
5516 if (co == NULL)
5517 goto error;
5518
5519 if (!compiler_make_closure(c, co, 0, qualname)) {
5520 goto error;
5521 }
5522 Py_DECREF(qualname);
5523 Py_DECREF(co);
5524
5525 VISIT(c, expr, outermost->iter);
5526
5527 if (outermost->is_async) {
5528 ADDOP(c, GET_AITER);
5529 } else {
5530 ADDOP(c, GET_ITER);
5531 }
5532
5533 ADDOP_I(c, PRECALL, 0);
5534 ADDOP_I(c, CALL, 0);
5535
5536 if (is_async_generator && type != COMP_GENEXP) {
5537 ADDOP_I(c, GET_AWAITABLE, 0);
5538 ADDOP_LOAD_CONST(c, Py_None);
5539 ADD_YIELD_FROM(c, 1);
5540 }
5541
5542 return 1;
5543 error_in_scope:
5544 compiler_exit_scope(c);
5545 error:
5546 Py_XDECREF(qualname);
5547 Py_XDECREF(co);
5548 return 0;
5549 }
5550
5551 static int
compiler_genexp(struct compiler * c,expr_ty e)5552 compiler_genexp(struct compiler *c, expr_ty e)
5553 {
5554 assert(e->kind == GeneratorExp_kind);
5555 _Py_DECLARE_STR(anon_genexpr, "<genexpr>");
5556 return compiler_comprehension(c, e, COMP_GENEXP, &_Py_STR(anon_genexpr),
5557 e->v.GeneratorExp.generators,
5558 e->v.GeneratorExp.elt, NULL);
5559 }
5560
5561 static int
compiler_listcomp(struct compiler * c,expr_ty e)5562 compiler_listcomp(struct compiler *c, expr_ty e)
5563 {
5564 assert(e->kind == ListComp_kind);
5565 _Py_DECLARE_STR(anon_listcomp, "<listcomp>");
5566 return compiler_comprehension(c, e, COMP_LISTCOMP, &_Py_STR(anon_listcomp),
5567 e->v.ListComp.generators,
5568 e->v.ListComp.elt, NULL);
5569 }
5570
5571 static int
compiler_setcomp(struct compiler * c,expr_ty e)5572 compiler_setcomp(struct compiler *c, expr_ty e)
5573 {
5574 assert(e->kind == SetComp_kind);
5575 _Py_DECLARE_STR(anon_setcomp, "<setcomp>");
5576 return compiler_comprehension(c, e, COMP_SETCOMP, &_Py_STR(anon_setcomp),
5577 e->v.SetComp.generators,
5578 e->v.SetComp.elt, NULL);
5579 }
5580
5581
5582 static int
compiler_dictcomp(struct compiler * c,expr_ty e)5583 compiler_dictcomp(struct compiler *c, expr_ty e)
5584 {
5585 assert(e->kind == DictComp_kind);
5586 _Py_DECLARE_STR(anon_dictcomp, "<dictcomp>");
5587 return compiler_comprehension(c, e, COMP_DICTCOMP, &_Py_STR(anon_dictcomp),
5588 e->v.DictComp.generators,
5589 e->v.DictComp.key, e->v.DictComp.value);
5590 }
5591
5592
5593 static int
compiler_visit_keyword(struct compiler * c,keyword_ty k)5594 compiler_visit_keyword(struct compiler *c, keyword_ty k)
5595 {
5596 VISIT(c, expr, k->value);
5597 return 1;
5598 }
5599
5600
5601 static int
compiler_with_except_finish(struct compiler * c,basicblock * cleanup)5602 compiler_with_except_finish(struct compiler *c, basicblock * cleanup) {
5603 UNSET_LOC(c);
5604 basicblock *exit;
5605 exit = compiler_new_block(c);
5606 if (exit == NULL)
5607 return 0;
5608 ADDOP_JUMP(c, POP_JUMP_IF_TRUE, exit);
5609 ADDOP_I(c, RERAISE, 2);
5610 compiler_use_next_block(c, cleanup);
5611 POP_EXCEPT_AND_RERAISE(c);
5612 compiler_use_next_block(c, exit);
5613 ADDOP(c, POP_TOP); /* exc_value */
5614 ADDOP(c, POP_BLOCK);
5615 ADDOP(c, POP_EXCEPT);
5616 ADDOP(c, POP_TOP);
5617 ADDOP(c, POP_TOP);
5618 return 1;
5619 }
5620
5621 /*
5622 Implements the async with statement.
5623
5624 The semantics outlined in that PEP are as follows:
5625
5626 async with EXPR as VAR:
5627 BLOCK
5628
5629 It is implemented roughly as:
5630
5631 context = EXPR
5632 exit = context.__aexit__ # not calling it
5633 value = await context.__aenter__()
5634 try:
5635 VAR = value # if VAR present in the syntax
5636 BLOCK
5637 finally:
5638 if an exception was raised:
5639 exc = copy of (exception, instance, traceback)
5640 else:
5641 exc = (None, None, None)
5642 if not (await exit(*exc)):
5643 raise
5644 */
5645 static int
compiler_async_with(struct compiler * c,stmt_ty s,int pos)5646 compiler_async_with(struct compiler *c, stmt_ty s, int pos)
5647 {
5648 basicblock *block, *final, *exit, *cleanup;
5649 withitem_ty item = asdl_seq_GET(s->v.AsyncWith.items, pos);
5650
5651 assert(s->kind == AsyncWith_kind);
5652 if (IS_TOP_LEVEL_AWAIT(c)){
5653 c->u->u_ste->ste_coroutine = 1;
5654 } else if (c->u->u_scope_type != COMPILER_SCOPE_ASYNC_FUNCTION){
5655 return compiler_error(c, "'async with' outside async function");
5656 }
5657
5658 block = compiler_new_block(c);
5659 final = compiler_new_block(c);
5660 exit = compiler_new_block(c);
5661 cleanup = compiler_new_block(c);
5662 if (!block || !final || !exit || !cleanup)
5663 return 0;
5664
5665 /* Evaluate EXPR */
5666 VISIT(c, expr, item->context_expr);
5667
5668 ADDOP(c, BEFORE_ASYNC_WITH);
5669 ADDOP_I(c, GET_AWAITABLE, 1);
5670 ADDOP_LOAD_CONST(c, Py_None);
5671 ADD_YIELD_FROM(c, 1);
5672
5673 ADDOP_JUMP(c, SETUP_WITH, final);
5674
5675 /* SETUP_WITH pushes a finally block. */
5676 compiler_use_next_block(c, block);
5677 if (!compiler_push_fblock(c, ASYNC_WITH, block, final, s)) {
5678 return 0;
5679 }
5680
5681 if (item->optional_vars) {
5682 VISIT(c, expr, item->optional_vars);
5683 }
5684 else {
5685 /* Discard result from context.__aenter__() */
5686 ADDOP(c, POP_TOP);
5687 }
5688
5689 pos++;
5690 if (pos == asdl_seq_LEN(s->v.AsyncWith.items))
5691 /* BLOCK code */
5692 VISIT_SEQ(c, stmt, s->v.AsyncWith.body)
5693 else if (!compiler_async_with(c, s, pos))
5694 return 0;
5695
5696 compiler_pop_fblock(c, ASYNC_WITH, block);
5697 ADDOP(c, POP_BLOCK);
5698 /* End of body; start the cleanup */
5699
5700 /* For successful outcome:
5701 * call __exit__(None, None, None)
5702 */
5703 SET_LOC(c, s);
5704 if(!compiler_call_exit_with_nones(c))
5705 return 0;
5706 ADDOP_I(c, GET_AWAITABLE, 2);
5707 ADDOP_LOAD_CONST(c, Py_None);
5708 ADD_YIELD_FROM(c, 1);
5709
5710 ADDOP(c, POP_TOP);
5711
5712 ADDOP_JUMP(c, JUMP, exit);
5713
5714 /* For exceptional outcome: */
5715 compiler_use_next_block(c, final);
5716
5717 ADDOP_JUMP(c, SETUP_CLEANUP, cleanup);
5718 ADDOP(c, PUSH_EXC_INFO);
5719 ADDOP(c, WITH_EXCEPT_START);
5720 ADDOP_I(c, GET_AWAITABLE, 2);
5721 ADDOP_LOAD_CONST(c, Py_None);
5722 ADD_YIELD_FROM(c, 1);
5723 compiler_with_except_finish(c, cleanup);
5724
5725 compiler_use_next_block(c, exit);
5726 return 1;
5727 }
5728
5729
5730 /*
5731 Implements the with statement from PEP 343.
5732 with EXPR as VAR:
5733 BLOCK
5734 is implemented as:
5735 <code for EXPR>
5736 SETUP_WITH E
5737 <code to store to VAR> or POP_TOP
5738 <code for BLOCK>
5739 LOAD_CONST (None, None, None)
5740 CALL_FUNCTION_EX 0
5741 JUMP EXIT
5742 E: WITH_EXCEPT_START (calls EXPR.__exit__)
5743 POP_JUMP_IF_TRUE T:
5744 RERAISE
5745 T: POP_TOP (remove exception from stack)
5746 POP_EXCEPT
5747 POP_TOP
5748 EXIT:
5749 */
5750
5751 static int
compiler_with(struct compiler * c,stmt_ty s,int pos)5752 compiler_with(struct compiler *c, stmt_ty s, int pos)
5753 {
5754 basicblock *block, *final, *exit, *cleanup;
5755 withitem_ty item = asdl_seq_GET(s->v.With.items, pos);
5756
5757 assert(s->kind == With_kind);
5758
5759 block = compiler_new_block(c);
5760 final = compiler_new_block(c);
5761 exit = compiler_new_block(c);
5762 cleanup = compiler_new_block(c);
5763 if (!block || !final || !exit || !cleanup)
5764 return 0;
5765
5766 /* Evaluate EXPR */
5767 VISIT(c, expr, item->context_expr);
5768 /* Will push bound __exit__ */
5769 ADDOP(c, BEFORE_WITH);
5770 ADDOP_JUMP(c, SETUP_WITH, final);
5771
5772 /* SETUP_WITH pushes a finally block. */
5773 compiler_use_next_block(c, block);
5774 if (!compiler_push_fblock(c, WITH, block, final, s)) {
5775 return 0;
5776 }
5777
5778 if (item->optional_vars) {
5779 VISIT(c, expr, item->optional_vars);
5780 }
5781 else {
5782 /* Discard result from context.__enter__() */
5783 ADDOP(c, POP_TOP);
5784 }
5785
5786 pos++;
5787 if (pos == asdl_seq_LEN(s->v.With.items))
5788 /* BLOCK code */
5789 VISIT_SEQ(c, stmt, s->v.With.body)
5790 else if (!compiler_with(c, s, pos))
5791 return 0;
5792
5793
5794 /* Mark all following code as artificial */
5795 UNSET_LOC(c);
5796 ADDOP(c, POP_BLOCK);
5797 compiler_pop_fblock(c, WITH, block);
5798
5799 /* End of body; start the cleanup. */
5800
5801 /* For successful outcome:
5802 * call __exit__(None, None, None)
5803 */
5804 SET_LOC(c, s);
5805 if (!compiler_call_exit_with_nones(c))
5806 return 0;
5807 ADDOP(c, POP_TOP);
5808 ADDOP_JUMP(c, JUMP, exit);
5809
5810 /* For exceptional outcome: */
5811 compiler_use_next_block(c, final);
5812
5813 ADDOP_JUMP(c, SETUP_CLEANUP, cleanup);
5814 ADDOP(c, PUSH_EXC_INFO);
5815 ADDOP(c, WITH_EXCEPT_START);
5816 compiler_with_except_finish(c, cleanup);
5817
5818 compiler_use_next_block(c, exit);
5819 return 1;
5820 }
5821
5822 static int
compiler_visit_expr1(struct compiler * c,expr_ty e)5823 compiler_visit_expr1(struct compiler *c, expr_ty e)
5824 {
5825 switch (e->kind) {
5826 case NamedExpr_kind:
5827 VISIT(c, expr, e->v.NamedExpr.value);
5828 ADDOP_I(c, COPY, 1);
5829 VISIT(c, expr, e->v.NamedExpr.target);
5830 break;
5831 case BoolOp_kind:
5832 return compiler_boolop(c, e);
5833 case BinOp_kind:
5834 VISIT(c, expr, e->v.BinOp.left);
5835 VISIT(c, expr, e->v.BinOp.right);
5836 ADDOP_BINARY(c, e->v.BinOp.op);
5837 break;
5838 case UnaryOp_kind:
5839 VISIT(c, expr, e->v.UnaryOp.operand);
5840 ADDOP(c, unaryop(e->v.UnaryOp.op));
5841 break;
5842 case Lambda_kind:
5843 return compiler_lambda(c, e);
5844 case IfExp_kind:
5845 return compiler_ifexp(c, e);
5846 case Dict_kind:
5847 return compiler_dict(c, e);
5848 case Set_kind:
5849 return compiler_set(c, e);
5850 case GeneratorExp_kind:
5851 return compiler_genexp(c, e);
5852 case ListComp_kind:
5853 return compiler_listcomp(c, e);
5854 case SetComp_kind:
5855 return compiler_setcomp(c, e);
5856 case DictComp_kind:
5857 return compiler_dictcomp(c, e);
5858 case Yield_kind:
5859 if (c->u->u_ste->ste_type != FunctionBlock)
5860 return compiler_error(c, "'yield' outside function");
5861 if (e->v.Yield.value) {
5862 VISIT(c, expr, e->v.Yield.value);
5863 }
5864 else {
5865 ADDOP_LOAD_CONST(c, Py_None);
5866 }
5867 ADDOP_YIELD(c);
5868 break;
5869 case YieldFrom_kind:
5870 if (c->u->u_ste->ste_type != FunctionBlock)
5871 return compiler_error(c, "'yield' outside function");
5872
5873 if (c->u->u_scope_type == COMPILER_SCOPE_ASYNC_FUNCTION)
5874 return compiler_error(c, "'yield from' inside async function");
5875
5876 VISIT(c, expr, e->v.YieldFrom.value);
5877 ADDOP(c, GET_YIELD_FROM_ITER);
5878 ADDOP_LOAD_CONST(c, Py_None);
5879 ADD_YIELD_FROM(c, 0);
5880 break;
5881 case Await_kind:
5882 if (!IS_TOP_LEVEL_AWAIT(c)){
5883 if (c->u->u_ste->ste_type != FunctionBlock){
5884 return compiler_error(c, "'await' outside function");
5885 }
5886
5887 if (c->u->u_scope_type != COMPILER_SCOPE_ASYNC_FUNCTION &&
5888 c->u->u_scope_type != COMPILER_SCOPE_COMPREHENSION){
5889 return compiler_error(c, "'await' outside async function");
5890 }
5891 }
5892
5893 VISIT(c, expr, e->v.Await.value);
5894 ADDOP_I(c, GET_AWAITABLE, 0);
5895 ADDOP_LOAD_CONST(c, Py_None);
5896 ADD_YIELD_FROM(c, 1);
5897 break;
5898 case Compare_kind:
5899 return compiler_compare(c, e);
5900 case Call_kind:
5901 return compiler_call(c, e);
5902 case Constant_kind:
5903 ADDOP_LOAD_CONST(c, e->v.Constant.value);
5904 break;
5905 case JoinedStr_kind:
5906 return compiler_joined_str(c, e);
5907 case FormattedValue_kind:
5908 return compiler_formatted_value(c, e);
5909 /* The following exprs can be assignment targets. */
5910 case Attribute_kind:
5911 VISIT(c, expr, e->v.Attribute.value);
5912 update_start_location_to_match_attr(c, e);
5913 switch (e->v.Attribute.ctx) {
5914 case Load:
5915 {
5916 ADDOP_NAME(c, LOAD_ATTR, e->v.Attribute.attr, names);
5917 break;
5918 }
5919 case Store:
5920 if (forbidden_name(c, e->v.Attribute.attr, e->v.Attribute.ctx)) {
5921 return 0;
5922 }
5923 ADDOP_NAME(c, STORE_ATTR, e->v.Attribute.attr, names);
5924 break;
5925 case Del:
5926 ADDOP_NAME(c, DELETE_ATTR, e->v.Attribute.attr, names);
5927 break;
5928 }
5929 break;
5930 case Subscript_kind:
5931 return compiler_subscript(c, e);
5932 case Starred_kind:
5933 switch (e->v.Starred.ctx) {
5934 case Store:
5935 /* In all legitimate cases, the Starred node was already replaced
5936 * by compiler_list/compiler_tuple. XXX: is that okay? */
5937 return compiler_error(c,
5938 "starred assignment target must be in a list or tuple");
5939 default:
5940 return compiler_error(c,
5941 "can't use starred expression here");
5942 }
5943 break;
5944 case Slice_kind:
5945 return compiler_slice(c, e);
5946 case Name_kind:
5947 return compiler_nameop(c, e->v.Name.id, e->v.Name.ctx);
5948 /* child nodes of List and Tuple will have expr_context set */
5949 case List_kind:
5950 return compiler_list(c, e);
5951 case Tuple_kind:
5952 return compiler_tuple(c, e);
5953 }
5954 return 1;
5955 }
5956
5957 static int
compiler_visit_expr(struct compiler * c,expr_ty e)5958 compiler_visit_expr(struct compiler *c, expr_ty e)
5959 {
5960 int old_lineno = c->u->u_lineno;
5961 int old_end_lineno = c->u->u_end_lineno;
5962 int old_col_offset = c->u->u_col_offset;
5963 int old_end_col_offset = c->u->u_end_col_offset;
5964 SET_LOC(c, e);
5965 int res = compiler_visit_expr1(c, e);
5966 c->u->u_lineno = old_lineno;
5967 c->u->u_end_lineno = old_end_lineno;
5968 c->u->u_col_offset = old_col_offset;
5969 c->u->u_end_col_offset = old_end_col_offset;
5970 return res;
5971 }
5972
5973 static int
compiler_augassign(struct compiler * c,stmt_ty s)5974 compiler_augassign(struct compiler *c, stmt_ty s)
5975 {
5976 assert(s->kind == AugAssign_kind);
5977 expr_ty e = s->v.AugAssign.target;
5978
5979 int old_lineno = c->u->u_lineno;
5980 int old_end_lineno = c->u->u_end_lineno;
5981 int old_col_offset = c->u->u_col_offset;
5982 int old_end_col_offset = c->u->u_end_col_offset;
5983 SET_LOC(c, e);
5984
5985 switch (e->kind) {
5986 case Attribute_kind:
5987 VISIT(c, expr, e->v.Attribute.value);
5988 ADDOP_I(c, COPY, 1);
5989 update_start_location_to_match_attr(c, e);
5990 ADDOP_NAME(c, LOAD_ATTR, e->v.Attribute.attr, names);
5991 break;
5992 case Subscript_kind:
5993 VISIT(c, expr, e->v.Subscript.value);
5994 VISIT(c, expr, e->v.Subscript.slice);
5995 ADDOP_I(c, COPY, 2);
5996 ADDOP_I(c, COPY, 2);
5997 ADDOP(c, BINARY_SUBSCR);
5998 break;
5999 case Name_kind:
6000 if (!compiler_nameop(c, e->v.Name.id, Load))
6001 return 0;
6002 break;
6003 default:
6004 PyErr_Format(PyExc_SystemError,
6005 "invalid node type (%d) for augmented assignment",
6006 e->kind);
6007 return 0;
6008 }
6009
6010 c->u->u_lineno = old_lineno;
6011 c->u->u_end_lineno = old_end_lineno;
6012 c->u->u_col_offset = old_col_offset;
6013 c->u->u_end_col_offset = old_end_col_offset;
6014
6015 VISIT(c, expr, s->v.AugAssign.value);
6016 ADDOP_INPLACE(c, s->v.AugAssign.op);
6017
6018 SET_LOC(c, e);
6019
6020 switch (e->kind) {
6021 case Attribute_kind:
6022 update_start_location_to_match_attr(c, e);
6023 ADDOP_I(c, SWAP, 2);
6024 ADDOP_NAME(c, STORE_ATTR, e->v.Attribute.attr, names);
6025 break;
6026 case Subscript_kind:
6027 ADDOP_I(c, SWAP, 3);
6028 ADDOP_I(c, SWAP, 2);
6029 ADDOP(c, STORE_SUBSCR);
6030 break;
6031 case Name_kind:
6032 return compiler_nameop(c, e->v.Name.id, Store);
6033 default:
6034 Py_UNREACHABLE();
6035 }
6036 return 1;
6037 }
6038
6039 static int
check_ann_expr(struct compiler * c,expr_ty e)6040 check_ann_expr(struct compiler *c, expr_ty e)
6041 {
6042 VISIT(c, expr, e);
6043 ADDOP(c, POP_TOP);
6044 return 1;
6045 }
6046
6047 static int
check_annotation(struct compiler * c,stmt_ty s)6048 check_annotation(struct compiler *c, stmt_ty s)
6049 {
6050 /* Annotations of complex targets does not produce anything
6051 under annotations future */
6052 if (c->c_future->ff_features & CO_FUTURE_ANNOTATIONS) {
6053 return 1;
6054 }
6055
6056 /* Annotations are only evaluated in a module or class. */
6057 if (c->u->u_scope_type == COMPILER_SCOPE_MODULE ||
6058 c->u->u_scope_type == COMPILER_SCOPE_CLASS) {
6059 return check_ann_expr(c, s->v.AnnAssign.annotation);
6060 }
6061 return 1;
6062 }
6063
6064 static int
check_ann_subscr(struct compiler * c,expr_ty e)6065 check_ann_subscr(struct compiler *c, expr_ty e)
6066 {
6067 /* We check that everything in a subscript is defined at runtime. */
6068 switch (e->kind) {
6069 case Slice_kind:
6070 if (e->v.Slice.lower && !check_ann_expr(c, e->v.Slice.lower)) {
6071 return 0;
6072 }
6073 if (e->v.Slice.upper && !check_ann_expr(c, e->v.Slice.upper)) {
6074 return 0;
6075 }
6076 if (e->v.Slice.step && !check_ann_expr(c, e->v.Slice.step)) {
6077 return 0;
6078 }
6079 return 1;
6080 case Tuple_kind: {
6081 /* extended slice */
6082 asdl_expr_seq *elts = e->v.Tuple.elts;
6083 Py_ssize_t i, n = asdl_seq_LEN(elts);
6084 for (i = 0; i < n; i++) {
6085 if (!check_ann_subscr(c, asdl_seq_GET(elts, i))) {
6086 return 0;
6087 }
6088 }
6089 return 1;
6090 }
6091 default:
6092 return check_ann_expr(c, e);
6093 }
6094 }
6095
6096 static int
compiler_annassign(struct compiler * c,stmt_ty s)6097 compiler_annassign(struct compiler *c, stmt_ty s)
6098 {
6099 expr_ty targ = s->v.AnnAssign.target;
6100 PyObject* mangled;
6101
6102 assert(s->kind == AnnAssign_kind);
6103
6104 /* We perform the actual assignment first. */
6105 if (s->v.AnnAssign.value) {
6106 VISIT(c, expr, s->v.AnnAssign.value);
6107 VISIT(c, expr, targ);
6108 }
6109 switch (targ->kind) {
6110 case Name_kind:
6111 if (forbidden_name(c, targ->v.Name.id, Store))
6112 return 0;
6113 /* If we have a simple name in a module or class, store annotation. */
6114 if (s->v.AnnAssign.simple &&
6115 (c->u->u_scope_type == COMPILER_SCOPE_MODULE ||
6116 c->u->u_scope_type == COMPILER_SCOPE_CLASS)) {
6117 if (c->c_future->ff_features & CO_FUTURE_ANNOTATIONS) {
6118 VISIT(c, annexpr, s->v.AnnAssign.annotation)
6119 }
6120 else {
6121 VISIT(c, expr, s->v.AnnAssign.annotation);
6122 }
6123 ADDOP_NAME(c, LOAD_NAME, &_Py_ID(__annotations__), names);
6124 mangled = _Py_Mangle(c->u->u_private, targ->v.Name.id);
6125 ADDOP_LOAD_CONST_NEW(c, mangled);
6126 ADDOP(c, STORE_SUBSCR);
6127 }
6128 break;
6129 case Attribute_kind:
6130 if (forbidden_name(c, targ->v.Attribute.attr, Store))
6131 return 0;
6132 if (!s->v.AnnAssign.value &&
6133 !check_ann_expr(c, targ->v.Attribute.value)) {
6134 return 0;
6135 }
6136 break;
6137 case Subscript_kind:
6138 if (!s->v.AnnAssign.value &&
6139 (!check_ann_expr(c, targ->v.Subscript.value) ||
6140 !check_ann_subscr(c, targ->v.Subscript.slice))) {
6141 return 0;
6142 }
6143 break;
6144 default:
6145 PyErr_Format(PyExc_SystemError,
6146 "invalid node type (%d) for annotated assignment",
6147 targ->kind);
6148 return 0;
6149 }
6150 /* Annotation is evaluated last. */
6151 if (!s->v.AnnAssign.simple && !check_annotation(c, s)) {
6152 return 0;
6153 }
6154 return 1;
6155 }
6156
6157 /* Raises a SyntaxError and returns 0.
6158 If something goes wrong, a different exception may be raised.
6159 */
6160
6161 static int
compiler_error(struct compiler * c,const char * format,...)6162 compiler_error(struct compiler *c, const char *format, ...)
6163 {
6164 va_list vargs;
6165 #ifdef HAVE_STDARG_PROTOTYPES
6166 va_start(vargs, format);
6167 #else
6168 va_start(vargs);
6169 #endif
6170 PyObject *msg = PyUnicode_FromFormatV(format, vargs);
6171 va_end(vargs);
6172 if (msg == NULL) {
6173 return 0;
6174 }
6175 PyObject *loc = PyErr_ProgramTextObject(c->c_filename, c->u->u_lineno);
6176 if (loc == NULL) {
6177 Py_INCREF(Py_None);
6178 loc = Py_None;
6179 }
6180 PyObject *args = Py_BuildValue("O(OiiOii)", msg, c->c_filename,
6181 c->u->u_lineno, c->u->u_col_offset + 1, loc,
6182 c->u->u_end_lineno, c->u->u_end_col_offset + 1);
6183 Py_DECREF(msg);
6184 if (args == NULL) {
6185 goto exit;
6186 }
6187 PyErr_SetObject(PyExc_SyntaxError, args);
6188 exit:
6189 Py_DECREF(loc);
6190 Py_XDECREF(args);
6191 return 0;
6192 }
6193
6194 /* Emits a SyntaxWarning and returns 1 on success.
6195 If a SyntaxWarning raised as error, replaces it with a SyntaxError
6196 and returns 0.
6197 */
6198 static int
compiler_warn(struct compiler * c,const char * format,...)6199 compiler_warn(struct compiler *c, const char *format, ...)
6200 {
6201 va_list vargs;
6202 #ifdef HAVE_STDARG_PROTOTYPES
6203 va_start(vargs, format);
6204 #else
6205 va_start(vargs);
6206 #endif
6207 PyObject *msg = PyUnicode_FromFormatV(format, vargs);
6208 va_end(vargs);
6209 if (msg == NULL) {
6210 return 0;
6211 }
6212 if (PyErr_WarnExplicitObject(PyExc_SyntaxWarning, msg, c->c_filename,
6213 c->u->u_lineno, NULL, NULL) < 0)
6214 {
6215 if (PyErr_ExceptionMatches(PyExc_SyntaxWarning)) {
6216 /* Replace the SyntaxWarning exception with a SyntaxError
6217 to get a more accurate error report */
6218 PyErr_Clear();
6219 assert(PyUnicode_AsUTF8(msg) != NULL);
6220 compiler_error(c, PyUnicode_AsUTF8(msg));
6221 }
6222 Py_DECREF(msg);
6223 return 0;
6224 }
6225 Py_DECREF(msg);
6226 return 1;
6227 }
6228
6229 static int
compiler_subscript(struct compiler * c,expr_ty e)6230 compiler_subscript(struct compiler *c, expr_ty e)
6231 {
6232 expr_context_ty ctx = e->v.Subscript.ctx;
6233 int op = 0;
6234
6235 if (ctx == Load) {
6236 if (!check_subscripter(c, e->v.Subscript.value)) {
6237 return 0;
6238 }
6239 if (!check_index(c, e->v.Subscript.value, e->v.Subscript.slice)) {
6240 return 0;
6241 }
6242 }
6243
6244 switch (ctx) {
6245 case Load: op = BINARY_SUBSCR; break;
6246 case Store: op = STORE_SUBSCR; break;
6247 case Del: op = DELETE_SUBSCR; break;
6248 }
6249 assert(op);
6250 VISIT(c, expr, e->v.Subscript.value);
6251 VISIT(c, expr, e->v.Subscript.slice);
6252 ADDOP(c, op);
6253 return 1;
6254 }
6255
6256 static int
compiler_slice(struct compiler * c,expr_ty s)6257 compiler_slice(struct compiler *c, expr_ty s)
6258 {
6259 int n = 2;
6260 assert(s->kind == Slice_kind);
6261
6262 /* only handles the cases where BUILD_SLICE is emitted */
6263 if (s->v.Slice.lower) {
6264 VISIT(c, expr, s->v.Slice.lower);
6265 }
6266 else {
6267 ADDOP_LOAD_CONST(c, Py_None);
6268 }
6269
6270 if (s->v.Slice.upper) {
6271 VISIT(c, expr, s->v.Slice.upper);
6272 }
6273 else {
6274 ADDOP_LOAD_CONST(c, Py_None);
6275 }
6276
6277 if (s->v.Slice.step) {
6278 n++;
6279 VISIT(c, expr, s->v.Slice.step);
6280 }
6281 ADDOP_I(c, BUILD_SLICE, n);
6282 return 1;
6283 }
6284
6285
6286 // PEP 634: Structural Pattern Matching
6287
6288 // To keep things simple, all compiler_pattern_* and pattern_helper_* routines
6289 // follow the convention of consuming TOS (the subject for the given pattern)
6290 // and calling jump_to_fail_pop on failure (no match).
6291
6292 // When calling into these routines, it's important that pc->on_top be kept
6293 // updated to reflect the current number of items that we are using on the top
6294 // of the stack: they will be popped on failure, and any name captures will be
6295 // stored *underneath* them on success. This lets us defer all names stores
6296 // until the *entire* pattern matches.
6297
6298 #define WILDCARD_CHECK(N) \
6299 ((N)->kind == MatchAs_kind && !(N)->v.MatchAs.name)
6300
6301 #define WILDCARD_STAR_CHECK(N) \
6302 ((N)->kind == MatchStar_kind && !(N)->v.MatchStar.name)
6303
6304 // Limit permitted subexpressions, even if the parser & AST validator let them through
6305 #define MATCH_VALUE_EXPR(N) \
6306 ((N)->kind == Constant_kind || (N)->kind == Attribute_kind)
6307
6308 // Allocate or resize pc->fail_pop to allow for n items to be popped on failure.
6309 static int
ensure_fail_pop(struct compiler * c,pattern_context * pc,Py_ssize_t n)6310 ensure_fail_pop(struct compiler *c, pattern_context *pc, Py_ssize_t n)
6311 {
6312 Py_ssize_t size = n + 1;
6313 if (size <= pc->fail_pop_size) {
6314 return 1;
6315 }
6316 Py_ssize_t needed = sizeof(basicblock*) * size;
6317 basicblock **resized = PyObject_Realloc(pc->fail_pop, needed);
6318 if (resized == NULL) {
6319 PyErr_NoMemory();
6320 return 0;
6321 }
6322 pc->fail_pop = resized;
6323 while (pc->fail_pop_size < size) {
6324 basicblock *new_block;
6325 RETURN_IF_FALSE(new_block = compiler_new_block(c));
6326 pc->fail_pop[pc->fail_pop_size++] = new_block;
6327 }
6328 return 1;
6329 }
6330
6331 // Use op to jump to the correct fail_pop block.
6332 static int
jump_to_fail_pop(struct compiler * c,pattern_context * pc,int op)6333 jump_to_fail_pop(struct compiler *c, pattern_context *pc, int op)
6334 {
6335 // Pop any items on the top of the stack, plus any objects we were going to
6336 // capture on success:
6337 Py_ssize_t pops = pc->on_top + PyList_GET_SIZE(pc->stores);
6338 RETURN_IF_FALSE(ensure_fail_pop(c, pc, pops));
6339 ADDOP_JUMP(c, op, pc->fail_pop[pops]);
6340 return 1;
6341 }
6342
6343 // Build all of the fail_pop blocks and reset fail_pop.
6344 static int
emit_and_reset_fail_pop(struct compiler * c,pattern_context * pc)6345 emit_and_reset_fail_pop(struct compiler *c, pattern_context *pc)
6346 {
6347 if (!pc->fail_pop_size) {
6348 assert(pc->fail_pop == NULL);
6349 return 1;
6350 }
6351 while (--pc->fail_pop_size) {
6352 compiler_use_next_block(c, pc->fail_pop[pc->fail_pop_size]);
6353 if (!compiler_addop(c, POP_TOP)) {
6354 pc->fail_pop_size = 0;
6355 PyObject_Free(pc->fail_pop);
6356 pc->fail_pop = NULL;
6357 return 0;
6358 }
6359 }
6360 compiler_use_next_block(c, pc->fail_pop[0]);
6361 PyObject_Free(pc->fail_pop);
6362 pc->fail_pop = NULL;
6363 return 1;
6364 }
6365
6366 static int
compiler_error_duplicate_store(struct compiler * c,identifier n)6367 compiler_error_duplicate_store(struct compiler *c, identifier n)
6368 {
6369 return compiler_error(c, "multiple assignments to name %R in pattern", n);
6370 }
6371
6372 // Duplicate the effect of 3.10's ROT_* instructions using SWAPs.
6373 static int
pattern_helper_rotate(struct compiler * c,Py_ssize_t count)6374 pattern_helper_rotate(struct compiler *c, Py_ssize_t count)
6375 {
6376 while (1 < count) {
6377 ADDOP_I(c, SWAP, count--);
6378 }
6379 return 1;
6380 }
6381
6382 static int
pattern_helper_store_name(struct compiler * c,identifier n,pattern_context * pc)6383 pattern_helper_store_name(struct compiler *c, identifier n, pattern_context *pc)
6384 {
6385 if (n == NULL) {
6386 ADDOP(c, POP_TOP);
6387 return 1;
6388 }
6389 if (forbidden_name(c, n, Store)) {
6390 return 0;
6391 }
6392 // Can't assign to the same name twice:
6393 int duplicate = PySequence_Contains(pc->stores, n);
6394 if (duplicate < 0) {
6395 return 0;
6396 }
6397 if (duplicate) {
6398 return compiler_error_duplicate_store(c, n);
6399 }
6400 // Rotate this object underneath any items we need to preserve:
6401 Py_ssize_t rotations = pc->on_top + PyList_GET_SIZE(pc->stores) + 1;
6402 RETURN_IF_FALSE(pattern_helper_rotate(c, rotations));
6403 return !PyList_Append(pc->stores, n);
6404 }
6405
6406
6407 static int
pattern_unpack_helper(struct compiler * c,asdl_pattern_seq * elts)6408 pattern_unpack_helper(struct compiler *c, asdl_pattern_seq *elts)
6409 {
6410 Py_ssize_t n = asdl_seq_LEN(elts);
6411 int seen_star = 0;
6412 for (Py_ssize_t i = 0; i < n; i++) {
6413 pattern_ty elt = asdl_seq_GET(elts, i);
6414 if (elt->kind == MatchStar_kind && !seen_star) {
6415 if ((i >= (1 << 8)) ||
6416 (n-i-1 >= (INT_MAX >> 8)))
6417 return compiler_error(c,
6418 "too many expressions in "
6419 "star-unpacking sequence pattern");
6420 ADDOP_I(c, UNPACK_EX, (i + ((n-i-1) << 8)));
6421 seen_star = 1;
6422 }
6423 else if (elt->kind == MatchStar_kind) {
6424 return compiler_error(c,
6425 "multiple starred expressions in sequence pattern");
6426 }
6427 }
6428 if (!seen_star) {
6429 ADDOP_I(c, UNPACK_SEQUENCE, n);
6430 }
6431 return 1;
6432 }
6433
6434 static int
pattern_helper_sequence_unpack(struct compiler * c,asdl_pattern_seq * patterns,Py_ssize_t star,pattern_context * pc)6435 pattern_helper_sequence_unpack(struct compiler *c, asdl_pattern_seq *patterns,
6436 Py_ssize_t star, pattern_context *pc)
6437 {
6438 RETURN_IF_FALSE(pattern_unpack_helper(c, patterns));
6439 Py_ssize_t size = asdl_seq_LEN(patterns);
6440 // We've now got a bunch of new subjects on the stack. They need to remain
6441 // there after each subpattern match:
6442 pc->on_top += size;
6443 for (Py_ssize_t i = 0; i < size; i++) {
6444 // One less item to keep track of each time we loop through:
6445 pc->on_top--;
6446 pattern_ty pattern = asdl_seq_GET(patterns, i);
6447 RETURN_IF_FALSE(compiler_pattern_subpattern(c, pattern, pc));
6448 }
6449 return 1;
6450 }
6451
6452 // Like pattern_helper_sequence_unpack, but uses BINARY_SUBSCR instead of
6453 // UNPACK_SEQUENCE / UNPACK_EX. This is more efficient for patterns with a
6454 // starred wildcard like [first, *_] / [first, *_, last] / [*_, last] / etc.
6455 static int
pattern_helper_sequence_subscr(struct compiler * c,asdl_pattern_seq * patterns,Py_ssize_t star,pattern_context * pc)6456 pattern_helper_sequence_subscr(struct compiler *c, asdl_pattern_seq *patterns,
6457 Py_ssize_t star, pattern_context *pc)
6458 {
6459 // We need to keep the subject around for extracting elements:
6460 pc->on_top++;
6461 Py_ssize_t size = asdl_seq_LEN(patterns);
6462 for (Py_ssize_t i = 0; i < size; i++) {
6463 pattern_ty pattern = asdl_seq_GET(patterns, i);
6464 if (WILDCARD_CHECK(pattern)) {
6465 continue;
6466 }
6467 if (i == star) {
6468 assert(WILDCARD_STAR_CHECK(pattern));
6469 continue;
6470 }
6471 ADDOP_I(c, COPY, 1);
6472 if (i < star) {
6473 ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(i));
6474 }
6475 else {
6476 // The subject may not support negative indexing! Compute a
6477 // nonnegative index:
6478 ADDOP(c, GET_LEN);
6479 ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(size - i));
6480 ADDOP_BINARY(c, Sub);
6481 }
6482 ADDOP(c, BINARY_SUBSCR);
6483 RETURN_IF_FALSE(compiler_pattern_subpattern(c, pattern, pc));
6484 }
6485 // Pop the subject, we're done with it:
6486 pc->on_top--;
6487 ADDOP(c, POP_TOP);
6488 return 1;
6489 }
6490
6491 // Like compiler_pattern, but turn off checks for irrefutability.
6492 static int
compiler_pattern_subpattern(struct compiler * c,pattern_ty p,pattern_context * pc)6493 compiler_pattern_subpattern(struct compiler *c, pattern_ty p, pattern_context *pc)
6494 {
6495 int allow_irrefutable = pc->allow_irrefutable;
6496 pc->allow_irrefutable = 1;
6497 RETURN_IF_FALSE(compiler_pattern(c, p, pc));
6498 pc->allow_irrefutable = allow_irrefutable;
6499 return 1;
6500 }
6501
6502 static int
compiler_pattern_as(struct compiler * c,pattern_ty p,pattern_context * pc)6503 compiler_pattern_as(struct compiler *c, pattern_ty p, pattern_context *pc)
6504 {
6505 assert(p->kind == MatchAs_kind);
6506 if (p->v.MatchAs.pattern == NULL) {
6507 // An irrefutable match:
6508 if (!pc->allow_irrefutable) {
6509 if (p->v.MatchAs.name) {
6510 const char *e = "name capture %R makes remaining patterns unreachable";
6511 return compiler_error(c, e, p->v.MatchAs.name);
6512 }
6513 const char *e = "wildcard makes remaining patterns unreachable";
6514 return compiler_error(c, e);
6515 }
6516 return pattern_helper_store_name(c, p->v.MatchAs.name, pc);
6517 }
6518 // Need to make a copy for (possibly) storing later:
6519 pc->on_top++;
6520 ADDOP_I(c, COPY, 1);
6521 RETURN_IF_FALSE(compiler_pattern(c, p->v.MatchAs.pattern, pc));
6522 // Success! Store it:
6523 pc->on_top--;
6524 RETURN_IF_FALSE(pattern_helper_store_name(c, p->v.MatchAs.name, pc));
6525 return 1;
6526 }
6527
6528 static int
compiler_pattern_star(struct compiler * c,pattern_ty p,pattern_context * pc)6529 compiler_pattern_star(struct compiler *c, pattern_ty p, pattern_context *pc)
6530 {
6531 assert(p->kind == MatchStar_kind);
6532 RETURN_IF_FALSE(pattern_helper_store_name(c, p->v.MatchStar.name, pc));
6533 return 1;
6534 }
6535
6536 static int
validate_kwd_attrs(struct compiler * c,asdl_identifier_seq * attrs,asdl_pattern_seq * patterns)6537 validate_kwd_attrs(struct compiler *c, asdl_identifier_seq *attrs, asdl_pattern_seq* patterns)
6538 {
6539 // Any errors will point to the pattern rather than the arg name as the
6540 // parser is only supplying identifiers rather than Name or keyword nodes
6541 Py_ssize_t nattrs = asdl_seq_LEN(attrs);
6542 for (Py_ssize_t i = 0; i < nattrs; i++) {
6543 identifier attr = ((identifier)asdl_seq_GET(attrs, i));
6544 SET_LOC(c, ((pattern_ty) asdl_seq_GET(patterns, i)));
6545 if (forbidden_name(c, attr, Store)) {
6546 return -1;
6547 }
6548 for (Py_ssize_t j = i + 1; j < nattrs; j++) {
6549 identifier other = ((identifier)asdl_seq_GET(attrs, j));
6550 if (!PyUnicode_Compare(attr, other)) {
6551 SET_LOC(c, ((pattern_ty) asdl_seq_GET(patterns, j)));
6552 compiler_error(c, "attribute name repeated in class pattern: %U", attr);
6553 return -1;
6554 }
6555 }
6556 }
6557 return 0;
6558 }
6559
6560 static int
compiler_pattern_class(struct compiler * c,pattern_ty p,pattern_context * pc)6561 compiler_pattern_class(struct compiler *c, pattern_ty p, pattern_context *pc)
6562 {
6563 assert(p->kind == MatchClass_kind);
6564 asdl_pattern_seq *patterns = p->v.MatchClass.patterns;
6565 asdl_identifier_seq *kwd_attrs = p->v.MatchClass.kwd_attrs;
6566 asdl_pattern_seq *kwd_patterns = p->v.MatchClass.kwd_patterns;
6567 Py_ssize_t nargs = asdl_seq_LEN(patterns);
6568 Py_ssize_t nattrs = asdl_seq_LEN(kwd_attrs);
6569 Py_ssize_t nkwd_patterns = asdl_seq_LEN(kwd_patterns);
6570 if (nattrs != nkwd_patterns) {
6571 // AST validator shouldn't let this happen, but if it does,
6572 // just fail, don't crash out of the interpreter
6573 const char * e = "kwd_attrs (%d) / kwd_patterns (%d) length mismatch in class pattern";
6574 return compiler_error(c, e, nattrs, nkwd_patterns);
6575 }
6576 if (INT_MAX < nargs || INT_MAX < nargs + nattrs - 1) {
6577 const char *e = "too many sub-patterns in class pattern %R";
6578 return compiler_error(c, e, p->v.MatchClass.cls);
6579 }
6580 if (nattrs) {
6581 RETURN_IF_FALSE(!validate_kwd_attrs(c, kwd_attrs, kwd_patterns));
6582 SET_LOC(c, p);
6583 }
6584 VISIT(c, expr, p->v.MatchClass.cls);
6585 PyObject *attr_names;
6586 RETURN_IF_FALSE(attr_names = PyTuple_New(nattrs));
6587 Py_ssize_t i;
6588 for (i = 0; i < nattrs; i++) {
6589 PyObject *name = asdl_seq_GET(kwd_attrs, i);
6590 Py_INCREF(name);
6591 PyTuple_SET_ITEM(attr_names, i, name);
6592 }
6593 ADDOP_LOAD_CONST_NEW(c, attr_names);
6594 ADDOP_I(c, MATCH_CLASS, nargs);
6595 ADDOP_I(c, COPY, 1);
6596 ADDOP_LOAD_CONST(c, Py_None);
6597 ADDOP_I(c, IS_OP, 1);
6598 // TOS is now a tuple of (nargs + nattrs) attributes (or None):
6599 pc->on_top++;
6600 RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6601 ADDOP_I(c, UNPACK_SEQUENCE, nargs + nattrs);
6602 pc->on_top += nargs + nattrs - 1;
6603 for (i = 0; i < nargs + nattrs; i++) {
6604 pc->on_top--;
6605 pattern_ty pattern;
6606 if (i < nargs) {
6607 // Positional:
6608 pattern = asdl_seq_GET(patterns, i);
6609 }
6610 else {
6611 // Keyword:
6612 pattern = asdl_seq_GET(kwd_patterns, i - nargs);
6613 }
6614 if (WILDCARD_CHECK(pattern)) {
6615 ADDOP(c, POP_TOP);
6616 continue;
6617 }
6618 RETURN_IF_FALSE(compiler_pattern_subpattern(c, pattern, pc));
6619 }
6620 // Success! Pop the tuple of attributes:
6621 return 1;
6622 }
6623
6624 static int
compiler_pattern_mapping(struct compiler * c,pattern_ty p,pattern_context * pc)6625 compiler_pattern_mapping(struct compiler *c, pattern_ty p, pattern_context *pc)
6626 {
6627 assert(p->kind == MatchMapping_kind);
6628 asdl_expr_seq *keys = p->v.MatchMapping.keys;
6629 asdl_pattern_seq *patterns = p->v.MatchMapping.patterns;
6630 Py_ssize_t size = asdl_seq_LEN(keys);
6631 Py_ssize_t npatterns = asdl_seq_LEN(patterns);
6632 if (size != npatterns) {
6633 // AST validator shouldn't let this happen, but if it does,
6634 // just fail, don't crash out of the interpreter
6635 const char * e = "keys (%d) / patterns (%d) length mismatch in mapping pattern";
6636 return compiler_error(c, e, size, npatterns);
6637 }
6638 // We have a double-star target if "rest" is set
6639 PyObject *star_target = p->v.MatchMapping.rest;
6640 // We need to keep the subject on top during the mapping and length checks:
6641 pc->on_top++;
6642 ADDOP(c, MATCH_MAPPING);
6643 RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6644 if (!size && !star_target) {
6645 // If the pattern is just "{}", we're done! Pop the subject:
6646 pc->on_top--;
6647 ADDOP(c, POP_TOP);
6648 return 1;
6649 }
6650 if (size) {
6651 // If the pattern has any keys in it, perform a length check:
6652 ADDOP(c, GET_LEN);
6653 ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(size));
6654 ADDOP_COMPARE(c, GtE);
6655 RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6656 }
6657 if (INT_MAX < size - 1) {
6658 return compiler_error(c, "too many sub-patterns in mapping pattern");
6659 }
6660 // Collect all of the keys into a tuple for MATCH_KEYS and
6661 // **rest. They can either be dotted names or literals:
6662
6663 // Maintaining a set of Constant_kind kind keys allows us to raise a
6664 // SyntaxError in the case of duplicates.
6665 PyObject *seen = PySet_New(NULL);
6666 if (seen == NULL) {
6667 return 0;
6668 }
6669
6670 // NOTE: goto error on failure in the loop below to avoid leaking `seen`
6671 for (Py_ssize_t i = 0; i < size; i++) {
6672 expr_ty key = asdl_seq_GET(keys, i);
6673 if (key == NULL) {
6674 const char *e = "can't use NULL keys in MatchMapping "
6675 "(set 'rest' parameter instead)";
6676 SET_LOC(c, ((pattern_ty) asdl_seq_GET(patterns, i)));
6677 compiler_error(c, e);
6678 goto error;
6679 }
6680
6681 if (key->kind == Constant_kind) {
6682 int in_seen = PySet_Contains(seen, key->v.Constant.value);
6683 if (in_seen < 0) {
6684 goto error;
6685 }
6686 if (in_seen) {
6687 const char *e = "mapping pattern checks duplicate key (%R)";
6688 compiler_error(c, e, key->v.Constant.value);
6689 goto error;
6690 }
6691 if (PySet_Add(seen, key->v.Constant.value)) {
6692 goto error;
6693 }
6694 }
6695
6696 else if (key->kind != Attribute_kind) {
6697 const char *e = "mapping pattern keys may only match literals and attribute lookups";
6698 compiler_error(c, e);
6699 goto error;
6700 }
6701 if (!compiler_visit_expr(c, key)) {
6702 goto error;
6703 }
6704 }
6705
6706 // all keys have been checked; there are no duplicates
6707 Py_DECREF(seen);
6708
6709 ADDOP_I(c, BUILD_TUPLE, size);
6710 ADDOP(c, MATCH_KEYS);
6711 // There's now a tuple of keys and a tuple of values on top of the subject:
6712 pc->on_top += 2;
6713 ADDOP_I(c, COPY, 1);
6714 ADDOP_LOAD_CONST(c, Py_None);
6715 ADDOP_I(c, IS_OP, 1);
6716 RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6717 // So far so good. Use that tuple of values on the stack to match
6718 // sub-patterns against:
6719 ADDOP_I(c, UNPACK_SEQUENCE, size);
6720 pc->on_top += size - 1;
6721 for (Py_ssize_t i = 0; i < size; i++) {
6722 pc->on_top--;
6723 pattern_ty pattern = asdl_seq_GET(patterns, i);
6724 RETURN_IF_FALSE(compiler_pattern_subpattern(c, pattern, pc));
6725 }
6726 // If we get this far, it's a match! Whatever happens next should consume
6727 // the tuple of keys and the subject:
6728 pc->on_top -= 2;
6729 if (star_target) {
6730 // If we have a starred name, bind a dict of remaining items to it (this may
6731 // seem a bit inefficient, but keys is rarely big enough to actually impact
6732 // runtime):
6733 // rest = dict(TOS1)
6734 // for key in TOS:
6735 // del rest[key]
6736 ADDOP_I(c, BUILD_MAP, 0); // [subject, keys, empty]
6737 ADDOP_I(c, SWAP, 3); // [empty, keys, subject]
6738 ADDOP_I(c, DICT_UPDATE, 2); // [copy, keys]
6739 ADDOP_I(c, UNPACK_SEQUENCE, size); // [copy, keys...]
6740 while (size) {
6741 ADDOP_I(c, COPY, 1 + size--); // [copy, keys..., copy]
6742 ADDOP_I(c, SWAP, 2); // [copy, keys..., copy, key]
6743 ADDOP(c, DELETE_SUBSCR); // [copy, keys...]
6744 }
6745 RETURN_IF_FALSE(pattern_helper_store_name(c, star_target, pc));
6746 }
6747 else {
6748 ADDOP(c, POP_TOP); // Tuple of keys.
6749 ADDOP(c, POP_TOP); // Subject.
6750 }
6751 return 1;
6752
6753 error:
6754 Py_DECREF(seen);
6755 return 0;
6756 }
6757
6758 static int
compiler_pattern_or(struct compiler * c,pattern_ty p,pattern_context * pc)6759 compiler_pattern_or(struct compiler *c, pattern_ty p, pattern_context *pc)
6760 {
6761 assert(p->kind == MatchOr_kind);
6762 basicblock *end;
6763 RETURN_IF_FALSE(end = compiler_new_block(c));
6764 Py_ssize_t size = asdl_seq_LEN(p->v.MatchOr.patterns);
6765 assert(size > 1);
6766 // We're going to be messing with pc. Keep the original info handy:
6767 pattern_context old_pc = *pc;
6768 Py_INCREF(pc->stores);
6769 // control is the list of names bound by the first alternative. It is used
6770 // for checking different name bindings in alternatives, and for correcting
6771 // the order in which extracted elements are placed on the stack.
6772 PyObject *control = NULL;
6773 // NOTE: We can't use returning macros anymore! goto error on error.
6774 for (Py_ssize_t i = 0; i < size; i++) {
6775 pattern_ty alt = asdl_seq_GET(p->v.MatchOr.patterns, i);
6776 SET_LOC(c, alt);
6777 PyObject *pc_stores = PyList_New(0);
6778 if (pc_stores == NULL) {
6779 goto error;
6780 }
6781 Py_SETREF(pc->stores, pc_stores);
6782 // An irrefutable sub-pattern must be last, if it is allowed at all:
6783 pc->allow_irrefutable = (i == size - 1) && old_pc.allow_irrefutable;
6784 pc->fail_pop = NULL;
6785 pc->fail_pop_size = 0;
6786 pc->on_top = 0;
6787 if (!compiler_addop_i(c, COPY, 1) || !compiler_pattern(c, alt, pc)) {
6788 goto error;
6789 }
6790 // Success!
6791 Py_ssize_t nstores = PyList_GET_SIZE(pc->stores);
6792 if (!i) {
6793 // This is the first alternative, so save its stores as a "control"
6794 // for the others (they can't bind a different set of names, and
6795 // might need to be reordered):
6796 assert(control == NULL);
6797 control = pc->stores;
6798 Py_INCREF(control);
6799 }
6800 else if (nstores != PyList_GET_SIZE(control)) {
6801 goto diff;
6802 }
6803 else if (nstores) {
6804 // There were captures. Check to see if we differ from control:
6805 Py_ssize_t icontrol = nstores;
6806 while (icontrol--) {
6807 PyObject *name = PyList_GET_ITEM(control, icontrol);
6808 Py_ssize_t istores = PySequence_Index(pc->stores, name);
6809 if (istores < 0) {
6810 PyErr_Clear();
6811 goto diff;
6812 }
6813 if (icontrol != istores) {
6814 // Reorder the names on the stack to match the order of the
6815 // names in control. There's probably a better way of doing
6816 // this; the current solution is potentially very
6817 // inefficient when each alternative subpattern binds lots
6818 // of names in different orders. It's fine for reasonable
6819 // cases, though, and the peephole optimizer will ensure
6820 // that the final code is as efficient as possible.
6821 assert(istores < icontrol);
6822 Py_ssize_t rotations = istores + 1;
6823 // Perform the same rotation on pc->stores:
6824 PyObject *rotated = PyList_GetSlice(pc->stores, 0,
6825 rotations);
6826 if (rotated == NULL ||
6827 PyList_SetSlice(pc->stores, 0, rotations, NULL) ||
6828 PyList_SetSlice(pc->stores, icontrol - istores,
6829 icontrol - istores, rotated))
6830 {
6831 Py_XDECREF(rotated);
6832 goto error;
6833 }
6834 Py_DECREF(rotated);
6835 // That just did:
6836 // rotated = pc_stores[:rotations]
6837 // del pc_stores[:rotations]
6838 // pc_stores[icontrol-istores:icontrol-istores] = rotated
6839 // Do the same thing to the stack, using several
6840 // rotations:
6841 while (rotations--) {
6842 if (!pattern_helper_rotate(c, icontrol + 1)){
6843 goto error;
6844 }
6845 }
6846 }
6847 }
6848 }
6849 assert(control);
6850 if (!compiler_addop_j(c, JUMP, end) ||
6851 !emit_and_reset_fail_pop(c, pc))
6852 {
6853 goto error;
6854 }
6855 }
6856 Py_DECREF(pc->stores);
6857 *pc = old_pc;
6858 Py_INCREF(pc->stores);
6859 // Need to NULL this for the PyObject_Free call in the error block.
6860 old_pc.fail_pop = NULL;
6861 // No match. Pop the remaining copy of the subject and fail:
6862 if (!compiler_addop(c, POP_TOP) || !jump_to_fail_pop(c, pc, JUMP)) {
6863 goto error;
6864 }
6865 compiler_use_next_block(c, end);
6866 Py_ssize_t nstores = PyList_GET_SIZE(control);
6867 // There's a bunch of stuff on the stack between where the new stores
6868 // are and where they need to be:
6869 // - The other stores.
6870 // - A copy of the subject.
6871 // - Anything else that may be on top of the stack.
6872 // - Any previous stores we've already stashed away on the stack.
6873 Py_ssize_t nrots = nstores + 1 + pc->on_top + PyList_GET_SIZE(pc->stores);
6874 for (Py_ssize_t i = 0; i < nstores; i++) {
6875 // Rotate this capture to its proper place on the stack:
6876 if (!pattern_helper_rotate(c, nrots)) {
6877 goto error;
6878 }
6879 // Update the list of previous stores with this new name, checking for
6880 // duplicates:
6881 PyObject *name = PyList_GET_ITEM(control, i);
6882 int dupe = PySequence_Contains(pc->stores, name);
6883 if (dupe < 0) {
6884 goto error;
6885 }
6886 if (dupe) {
6887 compiler_error_duplicate_store(c, name);
6888 goto error;
6889 }
6890 if (PyList_Append(pc->stores, name)) {
6891 goto error;
6892 }
6893 }
6894 Py_DECREF(old_pc.stores);
6895 Py_DECREF(control);
6896 // NOTE: Returning macros are safe again.
6897 // Pop the copy of the subject:
6898 ADDOP(c, POP_TOP);
6899 return 1;
6900 diff:
6901 compiler_error(c, "alternative patterns bind different names");
6902 error:
6903 PyObject_Free(old_pc.fail_pop);
6904 Py_DECREF(old_pc.stores);
6905 Py_XDECREF(control);
6906 return 0;
6907 }
6908
6909
6910 static int
compiler_pattern_sequence(struct compiler * c,pattern_ty p,pattern_context * pc)6911 compiler_pattern_sequence(struct compiler *c, pattern_ty p, pattern_context *pc)
6912 {
6913 assert(p->kind == MatchSequence_kind);
6914 asdl_pattern_seq *patterns = p->v.MatchSequence.patterns;
6915 Py_ssize_t size = asdl_seq_LEN(patterns);
6916 Py_ssize_t star = -1;
6917 int only_wildcard = 1;
6918 int star_wildcard = 0;
6919 // Find a starred name, if it exists. There may be at most one:
6920 for (Py_ssize_t i = 0; i < size; i++) {
6921 pattern_ty pattern = asdl_seq_GET(patterns, i);
6922 if (pattern->kind == MatchStar_kind) {
6923 if (star >= 0) {
6924 const char *e = "multiple starred names in sequence pattern";
6925 return compiler_error(c, e);
6926 }
6927 star_wildcard = WILDCARD_STAR_CHECK(pattern);
6928 only_wildcard &= star_wildcard;
6929 star = i;
6930 continue;
6931 }
6932 only_wildcard &= WILDCARD_CHECK(pattern);
6933 }
6934 // We need to keep the subject on top during the sequence and length checks:
6935 pc->on_top++;
6936 ADDOP(c, MATCH_SEQUENCE);
6937 RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6938 if (star < 0) {
6939 // No star: len(subject) == size
6940 ADDOP(c, GET_LEN);
6941 ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(size));
6942 ADDOP_COMPARE(c, Eq);
6943 RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6944 }
6945 else if (size > 1) {
6946 // Star: len(subject) >= size - 1
6947 ADDOP(c, GET_LEN);
6948 ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(size - 1));
6949 ADDOP_COMPARE(c, GtE);
6950 RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6951 }
6952 // Whatever comes next should consume the subject:
6953 pc->on_top--;
6954 if (only_wildcard) {
6955 // Patterns like: [] / [_] / [_, _] / [*_] / [_, *_] / [_, _, *_] / etc.
6956 ADDOP(c, POP_TOP);
6957 }
6958 else if (star_wildcard) {
6959 RETURN_IF_FALSE(pattern_helper_sequence_subscr(c, patterns, star, pc));
6960 }
6961 else {
6962 RETURN_IF_FALSE(pattern_helper_sequence_unpack(c, patterns, star, pc));
6963 }
6964 return 1;
6965 }
6966
6967 static int
compiler_pattern_value(struct compiler * c,pattern_ty p,pattern_context * pc)6968 compiler_pattern_value(struct compiler *c, pattern_ty p, pattern_context *pc)
6969 {
6970 assert(p->kind == MatchValue_kind);
6971 expr_ty value = p->v.MatchValue.value;
6972 if (!MATCH_VALUE_EXPR(value)) {
6973 const char *e = "patterns may only match literals and attribute lookups";
6974 return compiler_error(c, e);
6975 }
6976 VISIT(c, expr, value);
6977 ADDOP_COMPARE(c, Eq);
6978 RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6979 return 1;
6980 }
6981
6982 static int
compiler_pattern_singleton(struct compiler * c,pattern_ty p,pattern_context * pc)6983 compiler_pattern_singleton(struct compiler *c, pattern_ty p, pattern_context *pc)
6984 {
6985 assert(p->kind == MatchSingleton_kind);
6986 ADDOP_LOAD_CONST(c, p->v.MatchSingleton.value);
6987 ADDOP_COMPARE(c, Is);
6988 RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE));
6989 return 1;
6990 }
6991
6992 static int
compiler_pattern(struct compiler * c,pattern_ty p,pattern_context * pc)6993 compiler_pattern(struct compiler *c, pattern_ty p, pattern_context *pc)
6994 {
6995 SET_LOC(c, p);
6996 switch (p->kind) {
6997 case MatchValue_kind:
6998 return compiler_pattern_value(c, p, pc);
6999 case MatchSingleton_kind:
7000 return compiler_pattern_singleton(c, p, pc);
7001 case MatchSequence_kind:
7002 return compiler_pattern_sequence(c, p, pc);
7003 case MatchMapping_kind:
7004 return compiler_pattern_mapping(c, p, pc);
7005 case MatchClass_kind:
7006 return compiler_pattern_class(c, p, pc);
7007 case MatchStar_kind:
7008 return compiler_pattern_star(c, p, pc);
7009 case MatchAs_kind:
7010 return compiler_pattern_as(c, p, pc);
7011 case MatchOr_kind:
7012 return compiler_pattern_or(c, p, pc);
7013 }
7014 // AST validator shouldn't let this happen, but if it does,
7015 // just fail, don't crash out of the interpreter
7016 const char *e = "invalid match pattern node in AST (kind=%d)";
7017 return compiler_error(c, e, p->kind);
7018 }
7019
7020 static int
compiler_match_inner(struct compiler * c,stmt_ty s,pattern_context * pc)7021 compiler_match_inner(struct compiler *c, stmt_ty s, pattern_context *pc)
7022 {
7023 VISIT(c, expr, s->v.Match.subject);
7024 basicblock *end;
7025 RETURN_IF_FALSE(end = compiler_new_block(c));
7026 Py_ssize_t cases = asdl_seq_LEN(s->v.Match.cases);
7027 assert(cases > 0);
7028 match_case_ty m = asdl_seq_GET(s->v.Match.cases, cases - 1);
7029 int has_default = WILDCARD_CHECK(m->pattern) && 1 < cases;
7030 for (Py_ssize_t i = 0; i < cases - has_default; i++) {
7031 m = asdl_seq_GET(s->v.Match.cases, i);
7032 SET_LOC(c, m->pattern);
7033 // Only copy the subject if we're *not* on the last case:
7034 if (i != cases - has_default - 1) {
7035 ADDOP_I(c, COPY, 1);
7036 }
7037 RETURN_IF_FALSE(pc->stores = PyList_New(0));
7038 // Irrefutable cases must be either guarded, last, or both:
7039 pc->allow_irrefutable = m->guard != NULL || i == cases - 1;
7040 pc->fail_pop = NULL;
7041 pc->fail_pop_size = 0;
7042 pc->on_top = 0;
7043 // NOTE: Can't use returning macros here (they'll leak pc->stores)!
7044 if (!compiler_pattern(c, m->pattern, pc)) {
7045 Py_DECREF(pc->stores);
7046 return 0;
7047 }
7048 assert(!pc->on_top);
7049 // It's a match! Store all of the captured names (they're on the stack).
7050 Py_ssize_t nstores = PyList_GET_SIZE(pc->stores);
7051 for (Py_ssize_t n = 0; n < nstores; n++) {
7052 PyObject *name = PyList_GET_ITEM(pc->stores, n);
7053 if (!compiler_nameop(c, name, Store)) {
7054 Py_DECREF(pc->stores);
7055 return 0;
7056 }
7057 }
7058 Py_DECREF(pc->stores);
7059 // NOTE: Returning macros are safe again.
7060 if (m->guard) {
7061 RETURN_IF_FALSE(ensure_fail_pop(c, pc, 0));
7062 RETURN_IF_FALSE(compiler_jump_if(c, m->guard, pc->fail_pop[0], 0));
7063 }
7064 // Success! Pop the subject off, we're done with it:
7065 if (i != cases - has_default - 1) {
7066 ADDOP(c, POP_TOP);
7067 }
7068 VISIT_SEQ(c, stmt, m->body);
7069 UNSET_LOC(c);
7070 ADDOP_JUMP(c, JUMP, end);
7071 // If the pattern fails to match, we want the line number of the
7072 // cleanup to be associated with the failed pattern, not the last line
7073 // of the body
7074 SET_LOC(c, m->pattern);
7075 RETURN_IF_FALSE(emit_and_reset_fail_pop(c, pc));
7076 }
7077 if (has_default) {
7078 // A trailing "case _" is common, and lets us save a bit of redundant
7079 // pushing and popping in the loop above:
7080 m = asdl_seq_GET(s->v.Match.cases, cases - 1);
7081 SET_LOC(c, m->pattern);
7082 if (cases == 1) {
7083 // No matches. Done with the subject:
7084 ADDOP(c, POP_TOP);
7085 }
7086 else {
7087 // Show line coverage for default case (it doesn't create bytecode)
7088 ADDOP(c, NOP);
7089 }
7090 if (m->guard) {
7091 RETURN_IF_FALSE(compiler_jump_if(c, m->guard, end, 0));
7092 }
7093 VISIT_SEQ(c, stmt, m->body);
7094 UNSET_LOC(c);
7095 }
7096 compiler_use_next_block(c, end);
7097 return 1;
7098 }
7099
7100 static int
compiler_match(struct compiler * c,stmt_ty s)7101 compiler_match(struct compiler *c, stmt_ty s)
7102 {
7103 pattern_context pc;
7104 pc.fail_pop = NULL;
7105 int result = compiler_match_inner(c, s, &pc);
7106 PyObject_Free(pc.fail_pop);
7107 return result;
7108 }
7109
7110 #undef WILDCARD_CHECK
7111 #undef WILDCARD_STAR_CHECK
7112
7113 /* End of the compiler section, beginning of the assembler section */
7114
7115 /* do depth-first search of basic block graph, starting with block.
7116 post records the block indices in post-order.
7117
7118 XXX must handle implicit jumps from one block to next
7119 */
7120
7121
7122 struct assembler {
7123 PyObject *a_bytecode; /* bytes containing bytecode */
7124 PyObject *a_except_table; /* bytes containing exception table */
7125 basicblock *a_entry;
7126 int a_offset; /* offset into bytecode */
7127 int a_nblocks; /* number of reachable blocks */
7128 int a_except_table_off; /* offset into exception table */
7129 int a_prevlineno; /* lineno of last emitted line in line table */
7130 int a_prev_end_lineno; /* end_lineno of last emitted line in line table */
7131 int a_lineno; /* lineno of last emitted instruction */
7132 int a_end_lineno; /* end_lineno of last emitted instruction */
7133 int a_lineno_start; /* bytecode start offset of current lineno */
7134 int a_end_lineno_start; /* bytecode start offset of current end_lineno */
7135 /* Location Info */
7136 PyObject* a_linetable; /* bytes containing location info */
7137 int a_location_off; /* offset of last written location info frame */
7138 };
7139
7140 Py_LOCAL_INLINE(void)
stackdepth_push(basicblock *** sp,basicblock * b,int depth)7141 stackdepth_push(basicblock ***sp, basicblock *b, int depth)
7142 {
7143 assert(b->b_startdepth < 0 || b->b_startdepth == depth);
7144 if (b->b_startdepth < depth && b->b_startdepth < 100) {
7145 assert(b->b_startdepth < 0);
7146 b->b_startdepth = depth;
7147 *(*sp)++ = b;
7148 }
7149 }
7150
7151 /* Find the flow path that needs the largest stack. We assume that
7152 * cycles in the flow graph have no net effect on the stack depth.
7153 */
7154 static int
stackdepth(struct compiler * c)7155 stackdepth(struct compiler *c)
7156 {
7157 basicblock *b, *entryblock = NULL;
7158 basicblock **stack, **sp;
7159 int nblocks = 0, maxdepth = 0;
7160 for (b = c->u->u_blocks; b != NULL; b = b->b_list) {
7161 b->b_startdepth = INT_MIN;
7162 entryblock = b;
7163 nblocks++;
7164 }
7165 assert(entryblock!= NULL);
7166 stack = (basicblock **)PyObject_Malloc(sizeof(basicblock *) * nblocks);
7167 if (!stack) {
7168 PyErr_NoMemory();
7169 return -1;
7170 }
7171
7172 sp = stack;
7173 if (c->u->u_ste->ste_generator || c->u->u_ste->ste_coroutine) {
7174 stackdepth_push(&sp, entryblock, 1);
7175 } else {
7176 stackdepth_push(&sp, entryblock, 0);
7177 }
7178 while (sp != stack) {
7179 b = *--sp;
7180 int depth = b->b_startdepth;
7181 assert(depth >= 0);
7182 basicblock *next = b->b_next;
7183 for (int i = 0; i < b->b_iused; i++) {
7184 struct instr *instr = &b->b_instr[i];
7185 int effect = stack_effect(instr->i_opcode, instr->i_oparg, 0);
7186 if (effect == PY_INVALID_STACK_EFFECT) {
7187 PyErr_Format(PyExc_SystemError,
7188 "compiler stack_effect(opcode=%d, arg=%i) failed",
7189 instr->i_opcode, instr->i_oparg);
7190 return -1;
7191 }
7192 int new_depth = depth + effect;
7193 if (new_depth > maxdepth) {
7194 maxdepth = new_depth;
7195 }
7196 assert(depth >= 0); /* invalid code or bug in stackdepth() */
7197 if (is_jump(instr) || is_block_push(instr)) {
7198 effect = stack_effect(instr->i_opcode, instr->i_oparg, 1);
7199 assert(effect != PY_INVALID_STACK_EFFECT);
7200 int target_depth = depth + effect;
7201 if (target_depth > maxdepth) {
7202 maxdepth = target_depth;
7203 }
7204 assert(target_depth >= 0); /* invalid code or bug in stackdepth() */
7205 stackdepth_push(&sp, instr->i_target, target_depth);
7206 }
7207 depth = new_depth;
7208 assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode));
7209 if (instr->i_opcode == JUMP_NO_INTERRUPT ||
7210 instr->i_opcode == JUMP ||
7211 instr->i_opcode == RETURN_VALUE ||
7212 instr->i_opcode == RAISE_VARARGS ||
7213 instr->i_opcode == RERAISE)
7214 {
7215 /* remaining code is dead */
7216 next = NULL;
7217 break;
7218 }
7219 }
7220 if (next != NULL) {
7221 assert(b->b_nofallthrough == 0);
7222 stackdepth_push(&sp, next, depth);
7223 }
7224 }
7225 PyObject_Free(stack);
7226 return maxdepth;
7227 }
7228
7229 static int
assemble_init(struct assembler * a,int nblocks,int firstlineno)7230 assemble_init(struct assembler *a, int nblocks, int firstlineno)
7231 {
7232 memset(a, 0, sizeof(struct assembler));
7233 a->a_prevlineno = a->a_lineno = firstlineno;
7234 a->a_prev_end_lineno = a->a_end_lineno = firstlineno;
7235 a->a_linetable = NULL;
7236 a->a_location_off = 0;
7237 a->a_except_table = NULL;
7238 a->a_bytecode = PyBytes_FromStringAndSize(NULL, DEFAULT_CODE_SIZE);
7239 if (a->a_bytecode == NULL) {
7240 goto error;
7241 }
7242 a->a_linetable = PyBytes_FromStringAndSize(NULL, DEFAULT_CNOTAB_SIZE);
7243 if (a->a_linetable == NULL) {
7244 goto error;
7245 }
7246 a->a_except_table = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE);
7247 if (a->a_except_table == NULL) {
7248 goto error;
7249 }
7250 if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) {
7251 PyErr_NoMemory();
7252 goto error;
7253 }
7254 return 1;
7255 error:
7256 Py_XDECREF(a->a_bytecode);
7257 Py_XDECREF(a->a_linetable);
7258 Py_XDECREF(a->a_except_table);
7259 return 0;
7260 }
7261
7262 static void
assemble_free(struct assembler * a)7263 assemble_free(struct assembler *a)
7264 {
7265 Py_XDECREF(a->a_bytecode);
7266 Py_XDECREF(a->a_linetable);
7267 Py_XDECREF(a->a_except_table);
7268 }
7269
7270 static int
blocksize(basicblock * b)7271 blocksize(basicblock *b)
7272 {
7273 int i;
7274 int size = 0;
7275
7276 for (i = 0; i < b->b_iused; i++) {
7277 size += instr_size(&b->b_instr[i]);
7278 }
7279 return size;
7280 }
7281
7282 static basicblock *
push_except_block(ExceptStack * stack,struct instr * setup)7283 push_except_block(ExceptStack *stack, struct instr *setup) {
7284 assert(is_block_push(setup));
7285 int opcode = setup->i_opcode;
7286 basicblock * target = setup->i_target;
7287 if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) {
7288 target->b_preserve_lasti = 1;
7289 }
7290 stack->handlers[++stack->depth] = target;
7291 return target;
7292 }
7293
7294 static basicblock *
pop_except_block(ExceptStack * stack)7295 pop_except_block(ExceptStack *stack) {
7296 assert(stack->depth > 0);
7297 return stack->handlers[--stack->depth];
7298 }
7299
7300 static basicblock *
except_stack_top(ExceptStack * stack)7301 except_stack_top(ExceptStack *stack) {
7302 return stack->handlers[stack->depth];
7303 }
7304
7305 static ExceptStack *
make_except_stack(void)7306 make_except_stack(void) {
7307 ExceptStack *new = PyMem_Malloc(sizeof(ExceptStack));
7308 if (new == NULL) {
7309 PyErr_NoMemory();
7310 return NULL;
7311 }
7312 new->depth = 0;
7313 new->handlers[0] = NULL;
7314 return new;
7315 }
7316
7317 static ExceptStack *
copy_except_stack(ExceptStack * stack)7318 copy_except_stack(ExceptStack *stack) {
7319 ExceptStack *copy = PyMem_Malloc(sizeof(ExceptStack));
7320 if (copy == NULL) {
7321 PyErr_NoMemory();
7322 return NULL;
7323 }
7324 memcpy(copy, stack, sizeof(ExceptStack));
7325 return copy;
7326 }
7327
7328 static int
label_exception_targets(basicblock * entry)7329 label_exception_targets(basicblock *entry) {
7330 int nblocks = 0;
7331 for (basicblock *b = entry; b != NULL; b = b->b_next) {
7332 b->b_visited = 0;
7333 nblocks++;
7334 }
7335 basicblock **todo_stack = PyMem_Malloc(sizeof(basicblock *)*nblocks);
7336 if (todo_stack == NULL) {
7337 PyErr_NoMemory();
7338 return -1;
7339 }
7340 ExceptStack *except_stack = make_except_stack();
7341 if (except_stack == NULL) {
7342 PyMem_Free(todo_stack);
7343 PyErr_NoMemory();
7344 return -1;
7345 }
7346 except_stack->depth = 0;
7347 todo_stack[0] = entry;
7348 entry->b_visited = 1;
7349 entry->b_exceptstack = except_stack;
7350 basicblock **todo = &todo_stack[1];
7351 basicblock *handler = NULL;
7352 while (todo > todo_stack) {
7353 todo--;
7354 basicblock *b = todo[0];
7355 assert(b->b_visited == 1);
7356 except_stack = b->b_exceptstack;
7357 assert(except_stack != NULL);
7358 b->b_exceptstack = NULL;
7359 handler = except_stack_top(except_stack);
7360 for (int i = 0; i < b->b_iused; i++) {
7361 struct instr *instr = &b->b_instr[i];
7362 if (is_block_push(instr)) {
7363 if (!instr->i_target->b_visited) {
7364 ExceptStack *copy = copy_except_stack(except_stack);
7365 if (copy == NULL) {
7366 goto error;
7367 }
7368 instr->i_target->b_exceptstack = copy;
7369 todo[0] = instr->i_target;
7370 instr->i_target->b_visited = 1;
7371 todo++;
7372 }
7373 handler = push_except_block(except_stack, instr);
7374 }
7375 else if (instr->i_opcode == POP_BLOCK) {
7376 handler = pop_except_block(except_stack);
7377 }
7378 else if (is_jump(instr)) {
7379 instr->i_except = handler;
7380 assert(i == b->b_iused -1);
7381 if (!instr->i_target->b_visited) {
7382 if (b->b_nofallthrough == 0) {
7383 ExceptStack *copy = copy_except_stack(except_stack);
7384 if (copy == NULL) {
7385 goto error;
7386 }
7387 instr->i_target->b_exceptstack = copy;
7388 }
7389 else {
7390 instr->i_target->b_exceptstack = except_stack;
7391 except_stack = NULL;
7392 }
7393 todo[0] = instr->i_target;
7394 instr->i_target->b_visited = 1;
7395 todo++;
7396 }
7397 }
7398 else {
7399 instr->i_except = handler;
7400 }
7401 }
7402 if (b->b_nofallthrough == 0 && !b->b_next->b_visited) {
7403 assert(except_stack != NULL);
7404 b->b_next->b_exceptstack = except_stack;
7405 todo[0] = b->b_next;
7406 b->b_next->b_visited = 1;
7407 todo++;
7408 }
7409 else if (except_stack != NULL) {
7410 PyMem_Free(except_stack);
7411 }
7412 }
7413 #ifdef Py_DEBUG
7414 for (basicblock *b = entry; b != NULL; b = b->b_next) {
7415 assert(b->b_exceptstack == NULL);
7416 }
7417 #endif
7418 PyMem_Free(todo_stack);
7419 return 0;
7420 error:
7421 PyMem_Free(todo_stack);
7422 PyMem_Free(except_stack);
7423 return -1;
7424 }
7425
7426
7427 static void
convert_exception_handlers_to_nops(basicblock * entry)7428 convert_exception_handlers_to_nops(basicblock *entry) {
7429 for (basicblock *b = entry; b != NULL; b = b->b_next) {
7430 for (int i = 0; i < b->b_iused; i++) {
7431 struct instr *instr = &b->b_instr[i];
7432 if (is_block_push(instr) || instr->i_opcode == POP_BLOCK) {
7433 instr->i_opcode = NOP;
7434 }
7435 }
7436 }
7437 }
7438
7439 static inline void
write_except_byte(struct assembler * a,int byte)7440 write_except_byte(struct assembler *a, int byte) {
7441 unsigned char *p = (unsigned char *) PyBytes_AS_STRING(a->a_except_table);
7442 p[a->a_except_table_off++] = byte;
7443 }
7444
7445 #define CONTINUATION_BIT 64
7446
7447 static void
assemble_emit_exception_table_item(struct assembler * a,int value,int msb)7448 assemble_emit_exception_table_item(struct assembler *a, int value, int msb)
7449 {
7450 assert ((msb | 128) == 128);
7451 assert(value >= 0 && value < (1 << 30));
7452 if (value >= 1 << 24) {
7453 write_except_byte(a, (value >> 24) | CONTINUATION_BIT | msb);
7454 msb = 0;
7455 }
7456 if (value >= 1 << 18) {
7457 write_except_byte(a, ((value >> 18)&0x3f) | CONTINUATION_BIT | msb);
7458 msb = 0;
7459 }
7460 if (value >= 1 << 12) {
7461 write_except_byte(a, ((value >> 12)&0x3f) | CONTINUATION_BIT | msb);
7462 msb = 0;
7463 }
7464 if (value >= 1 << 6) {
7465 write_except_byte(a, ((value >> 6)&0x3f) | CONTINUATION_BIT | msb);
7466 msb = 0;
7467 }
7468 write_except_byte(a, (value&0x3f) | msb);
7469 }
7470
7471 /* See Objects/exception_handling_notes.txt for details of layout */
7472 #define MAX_SIZE_OF_ENTRY 20
7473
7474 static int
assemble_emit_exception_table_entry(struct assembler * a,int start,int end,basicblock * handler)7475 assemble_emit_exception_table_entry(struct assembler *a, int start, int end, basicblock *handler)
7476 {
7477 Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table);
7478 if (a->a_except_table_off + MAX_SIZE_OF_ENTRY >= len) {
7479 if (_PyBytes_Resize(&a->a_except_table, len * 2) < 0)
7480 return 0;
7481 }
7482 int size = end-start;
7483 assert(end > start);
7484 int target = handler->b_offset;
7485 int depth = handler->b_startdepth - 1;
7486 if (handler->b_preserve_lasti) {
7487 depth -= 1;
7488 }
7489 assert(depth >= 0);
7490 int depth_lasti = (depth<<1) | handler->b_preserve_lasti;
7491 assemble_emit_exception_table_item(a, start, (1<<7));
7492 assemble_emit_exception_table_item(a, size, 0);
7493 assemble_emit_exception_table_item(a, target, 0);
7494 assemble_emit_exception_table_item(a, depth_lasti, 0);
7495 return 1;
7496 }
7497
7498 static int
assemble_exception_table(struct assembler * a)7499 assemble_exception_table(struct assembler *a)
7500 {
7501 basicblock *b;
7502 int ioffset = 0;
7503 basicblock *handler = NULL;
7504 int start = -1;
7505 for (b = a->a_entry; b != NULL; b = b->b_next) {
7506 ioffset = b->b_offset;
7507 for (int i = 0; i < b->b_iused; i++) {
7508 struct instr *instr = &b->b_instr[i];
7509 if (instr->i_except != handler) {
7510 if (handler != NULL) {
7511 RETURN_IF_FALSE(assemble_emit_exception_table_entry(a, start, ioffset, handler));
7512 }
7513 start = ioffset;
7514 handler = instr->i_except;
7515 }
7516 ioffset += instr_size(instr);
7517 }
7518 }
7519 if (handler != NULL) {
7520 RETURN_IF_FALSE(assemble_emit_exception_table_entry(a, start, ioffset, handler));
7521 }
7522 return 1;
7523 }
7524
7525 /* Code location emitting code. See locations.md for a description of the format. */
7526
7527 #define MSB 0x80
7528
7529 static void
write_location_byte(struct assembler * a,int val)7530 write_location_byte(struct assembler* a, int val)
7531 {
7532 PyBytes_AS_STRING(a->a_linetable)[a->a_location_off] = val&255;
7533 a->a_location_off++;
7534 }
7535
7536
7537 static uint8_t *
location_pointer(struct assembler * a)7538 location_pointer(struct assembler* a)
7539 {
7540 return (uint8_t *)PyBytes_AS_STRING(a->a_linetable) +
7541 a->a_location_off;
7542 }
7543
7544 static void
write_location_first_byte(struct assembler * a,int code,int length)7545 write_location_first_byte(struct assembler* a, int code, int length)
7546 {
7547 a->a_location_off += write_location_entry_start(
7548 location_pointer(a), code, length);
7549 }
7550
7551 static void
write_location_varint(struct assembler * a,unsigned int val)7552 write_location_varint(struct assembler* a, unsigned int val)
7553 {
7554 uint8_t *ptr = location_pointer(a);
7555 a->a_location_off += write_varint(ptr, val);
7556 }
7557
7558
7559 static void
write_location_signed_varint(struct assembler * a,int val)7560 write_location_signed_varint(struct assembler* a, int val)
7561 {
7562 uint8_t *ptr = location_pointer(a);
7563 a->a_location_off += write_signed_varint(ptr, val);
7564 }
7565
7566 static void
write_location_info_short_form(struct assembler * a,int length,int column,int end_column)7567 write_location_info_short_form(struct assembler* a, int length, int column, int end_column)
7568 {
7569 assert(length > 0 && length <= 8);
7570 int column_low_bits = column & 7;
7571 int column_group = column >> 3;
7572 assert(column < 80);
7573 assert(end_column >= column);
7574 assert(end_column - column < 16);
7575 write_location_first_byte(a, PY_CODE_LOCATION_INFO_SHORT0 + column_group, length);
7576 write_location_byte(a, (column_low_bits << 4) | (end_column - column));
7577 }
7578
7579 static void
write_location_info_oneline_form(struct assembler * a,int length,int line_delta,int column,int end_column)7580 write_location_info_oneline_form(struct assembler* a, int length, int line_delta, int column, int end_column)
7581 {
7582 assert(length > 0 && length <= 8);
7583 assert(line_delta >= 0 && line_delta < 3);
7584 assert(column < 128);
7585 assert(end_column < 128);
7586 write_location_first_byte(a, PY_CODE_LOCATION_INFO_ONE_LINE0 + line_delta, length);
7587 write_location_byte(a, column);
7588 write_location_byte(a, end_column);
7589 }
7590
7591 static void
write_location_info_long_form(struct assembler * a,struct instr * i,int length)7592 write_location_info_long_form(struct assembler* a, struct instr* i, int length)
7593 {
7594 assert(length > 0 && length <= 8);
7595 write_location_first_byte(a, PY_CODE_LOCATION_INFO_LONG, length);
7596 write_location_signed_varint(a, i->i_lineno - a->a_lineno);
7597 assert(i->i_end_lineno >= i->i_lineno);
7598 write_location_varint(a, i->i_end_lineno - i->i_lineno);
7599 write_location_varint(a, i->i_col_offset+1);
7600 write_location_varint(a, i->i_end_col_offset+1);
7601 }
7602
7603 static void
write_location_info_none(struct assembler * a,int length)7604 write_location_info_none(struct assembler* a, int length)
7605 {
7606 write_location_first_byte(a, PY_CODE_LOCATION_INFO_NONE, length);
7607 }
7608
7609 static void
write_location_info_no_column(struct assembler * a,int length,int line_delta)7610 write_location_info_no_column(struct assembler* a, int length, int line_delta)
7611 {
7612 write_location_first_byte(a, PY_CODE_LOCATION_INFO_NO_COLUMNS, length);
7613 write_location_signed_varint(a, line_delta);
7614 }
7615
7616 #define THEORETICAL_MAX_ENTRY_SIZE 25 /* 1 + 6 + 6 + 6 + 6 */
7617
7618 static int
write_location_info_entry(struct assembler * a,struct instr * i,int isize)7619 write_location_info_entry(struct assembler* a, struct instr* i, int isize)
7620 {
7621 Py_ssize_t len = PyBytes_GET_SIZE(a->a_linetable);
7622 if (a->a_location_off + THEORETICAL_MAX_ENTRY_SIZE >= len) {
7623 assert(len > THEORETICAL_MAX_ENTRY_SIZE);
7624 if (_PyBytes_Resize(&a->a_linetable, len*2) < 0) {
7625 return 0;
7626 }
7627 }
7628 if (i->i_lineno < 0) {
7629 write_location_info_none(a, isize);
7630 return 1;
7631 }
7632 int line_delta = i->i_lineno - a->a_lineno;
7633 int column = i->i_col_offset;
7634 int end_column = i->i_end_col_offset;
7635 assert(column >= -1);
7636 assert(end_column >= -1);
7637 if (column < 0 || end_column < 0) {
7638 if (i->i_end_lineno == i->i_lineno || i->i_end_lineno == -1) {
7639 write_location_info_no_column(a, isize, line_delta);
7640 a->a_lineno = i->i_lineno;
7641 return 1;
7642 }
7643 }
7644 else if (i->i_end_lineno == i->i_lineno) {
7645 if (line_delta == 0 && column < 80 && end_column - column < 16 && end_column >= column) {
7646 write_location_info_short_form(a, isize, column, end_column);
7647 return 1;
7648 }
7649 if (line_delta >= 0 && line_delta < 3 && column < 128 && end_column < 128) {
7650 write_location_info_oneline_form(a, isize, line_delta, column, end_column);
7651 a->a_lineno = i->i_lineno;
7652 return 1;
7653 }
7654 }
7655 write_location_info_long_form(a, i, isize);
7656 a->a_lineno = i->i_lineno;
7657 return 1;
7658 }
7659
7660 static int
assemble_emit_location(struct assembler * a,struct instr * i)7661 assemble_emit_location(struct assembler* a, struct instr* i)
7662 {
7663 int isize = instr_size(i);
7664 while (isize > 8) {
7665 if (!write_location_info_entry(a, i, 8)) {
7666 return 0;
7667 }
7668 isize -= 8;
7669 }
7670 return write_location_info_entry(a, i, isize);
7671 }
7672
7673 /* assemble_emit()
7674 Extend the bytecode with a new instruction.
7675 Update lnotab if necessary.
7676 */
7677
7678 static int
assemble_emit(struct assembler * a,struct instr * i)7679 assemble_emit(struct assembler *a, struct instr *i)
7680 {
7681 Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode);
7682 _Py_CODEUNIT *code;
7683
7684 int size = instr_size(i);
7685 if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) {
7686 if (len > PY_SSIZE_T_MAX / 2)
7687 return 0;
7688 if (_PyBytes_Resize(&a->a_bytecode, len * 2) < 0)
7689 return 0;
7690 }
7691 code = (_Py_CODEUNIT *)PyBytes_AS_STRING(a->a_bytecode) + a->a_offset;
7692 a->a_offset += size;
7693 write_instr(code, i, size);
7694 return 1;
7695 }
7696
7697 static void
normalize_jumps(struct assembler * a)7698 normalize_jumps(struct assembler *a)
7699 {
7700 for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
7701 b->b_visited = 0;
7702 }
7703 for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
7704 b->b_visited = 1;
7705 if (b->b_iused == 0) {
7706 continue;
7707 }
7708 struct instr *last = &b->b_instr[b->b_iused-1];
7709 assert(!IS_ASSEMBLER_OPCODE(last->i_opcode));
7710 if (is_jump(last)) {
7711 bool is_forward = last->i_target->b_visited == 0;
7712 switch(last->i_opcode) {
7713 case JUMP:
7714 last->i_opcode = is_forward ? JUMP_FORWARD : JUMP_BACKWARD;
7715 break;
7716 case JUMP_NO_INTERRUPT:
7717 last->i_opcode = is_forward ?
7718 JUMP_FORWARD : JUMP_BACKWARD_NO_INTERRUPT;
7719 break;
7720 case POP_JUMP_IF_NOT_NONE:
7721 last->i_opcode = is_forward ?
7722 POP_JUMP_FORWARD_IF_NOT_NONE : POP_JUMP_BACKWARD_IF_NOT_NONE;
7723 break;
7724 case POP_JUMP_IF_NONE:
7725 last->i_opcode = is_forward ?
7726 POP_JUMP_FORWARD_IF_NONE : POP_JUMP_BACKWARD_IF_NONE;
7727 break;
7728 case POP_JUMP_IF_FALSE:
7729 last->i_opcode = is_forward ?
7730 POP_JUMP_FORWARD_IF_FALSE : POP_JUMP_BACKWARD_IF_FALSE;
7731 break;
7732 case POP_JUMP_IF_TRUE:
7733 last->i_opcode = is_forward ?
7734 POP_JUMP_FORWARD_IF_TRUE : POP_JUMP_BACKWARD_IF_TRUE;
7735 break;
7736 case JUMP_IF_TRUE_OR_POP:
7737 case JUMP_IF_FALSE_OR_POP:
7738 if (!is_forward) {
7739 /* As far as we can tell, the compiler never emits
7740 * these jumps with a backwards target. If/when this
7741 * exception is raised, we have found a use case for
7742 * a backwards version of this jump (or to replace
7743 * it with the sequence (COPY 1, POP_JUMP_IF_T/F, POP)
7744 */
7745 PyErr_Format(PyExc_SystemError,
7746 "unexpected %s jumping backwards",
7747 last->i_opcode == JUMP_IF_TRUE_OR_POP ?
7748 "JUMP_IF_TRUE_OR_POP" : "JUMP_IF_FALSE_OR_POP");
7749 }
7750 break;
7751 }
7752 }
7753 }
7754 }
7755
7756 static void
assemble_jump_offsets(struct assembler * a,struct compiler * c)7757 assemble_jump_offsets(struct assembler *a, struct compiler *c)
7758 {
7759 basicblock *b;
7760 int bsize, totsize, extended_arg_recompile;
7761 int i;
7762
7763 /* Compute the size of each block and fixup jump args.
7764 Replace block pointer with position in bytecode. */
7765 do {
7766 totsize = 0;
7767 for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
7768 bsize = blocksize(b);
7769 b->b_offset = totsize;
7770 totsize += bsize;
7771 }
7772 extended_arg_recompile = 0;
7773 for (b = c->u->u_blocks; b != NULL; b = b->b_list) {
7774 bsize = b->b_offset;
7775 for (i = 0; i < b->b_iused; i++) {
7776 struct instr *instr = &b->b_instr[i];
7777 int isize = instr_size(instr);
7778 /* Relative jumps are computed relative to
7779 the instruction pointer after fetching
7780 the jump instruction.
7781 */
7782 bsize += isize;
7783 if (is_jump(instr)) {
7784 instr->i_oparg = instr->i_target->b_offset;
7785 if (is_relative_jump(instr)) {
7786 if (instr->i_oparg < bsize) {
7787 assert(IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
7788 instr->i_oparg = bsize - instr->i_oparg;
7789 }
7790 else {
7791 assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
7792 instr->i_oparg -= bsize;
7793 }
7794 }
7795 else {
7796 assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
7797 }
7798 if (instr_size(instr) != isize) {
7799 extended_arg_recompile = 1;
7800 }
7801 }
7802 }
7803 }
7804
7805 /* XXX: This is an awful hack that could hurt performance, but
7806 on the bright side it should work until we come up
7807 with a better solution.
7808
7809 The issue is that in the first loop blocksize() is called
7810 which calls instr_size() which requires i_oparg be set
7811 appropriately. There is a bootstrap problem because
7812 i_oparg is calculated in the second loop above.
7813
7814 So we loop until we stop seeing new EXTENDED_ARGs.
7815 The only EXTENDED_ARGs that could be popping up are
7816 ones in jump instructions. So this should converge
7817 fairly quickly.
7818 */
7819 } while (extended_arg_recompile);
7820 }
7821
7822 static PyObject *
dict_keys_inorder(PyObject * dict,Py_ssize_t offset)7823 dict_keys_inorder(PyObject *dict, Py_ssize_t offset)
7824 {
7825 PyObject *tuple, *k, *v;
7826 Py_ssize_t i, pos = 0, size = PyDict_GET_SIZE(dict);
7827
7828 tuple = PyTuple_New(size);
7829 if (tuple == NULL)
7830 return NULL;
7831 while (PyDict_Next(dict, &pos, &k, &v)) {
7832 i = PyLong_AS_LONG(v);
7833 Py_INCREF(k);
7834 assert((i - offset) < size);
7835 assert((i - offset) >= 0);
7836 PyTuple_SET_ITEM(tuple, i - offset, k);
7837 }
7838 return tuple;
7839 }
7840
7841 static PyObject *
consts_dict_keys_inorder(PyObject * dict)7842 consts_dict_keys_inorder(PyObject *dict)
7843 {
7844 PyObject *consts, *k, *v;
7845 Py_ssize_t i, pos = 0, size = PyDict_GET_SIZE(dict);
7846
7847 consts = PyList_New(size); /* PyCode_Optimize() requires a list */
7848 if (consts == NULL)
7849 return NULL;
7850 while (PyDict_Next(dict, &pos, &k, &v)) {
7851 i = PyLong_AS_LONG(v);
7852 /* The keys of the dictionary can be tuples wrapping a constant.
7853 * (see compiler_add_o and _PyCode_ConstantKey). In that case
7854 * the object we want is always second. */
7855 if (PyTuple_CheckExact(k)) {
7856 k = PyTuple_GET_ITEM(k, 1);
7857 }
7858 Py_INCREF(k);
7859 assert(i < size);
7860 assert(i >= 0);
7861 PyList_SET_ITEM(consts, i, k);
7862 }
7863 return consts;
7864 }
7865
7866 static int
compute_code_flags(struct compiler * c)7867 compute_code_flags(struct compiler *c)
7868 {
7869 PySTEntryObject *ste = c->u->u_ste;
7870 int flags = 0;
7871 if (ste->ste_type == FunctionBlock) {
7872 flags |= CO_NEWLOCALS | CO_OPTIMIZED;
7873 if (ste->ste_nested)
7874 flags |= CO_NESTED;
7875 if (ste->ste_generator && !ste->ste_coroutine)
7876 flags |= CO_GENERATOR;
7877 if (!ste->ste_generator && ste->ste_coroutine)
7878 flags |= CO_COROUTINE;
7879 if (ste->ste_generator && ste->ste_coroutine)
7880 flags |= CO_ASYNC_GENERATOR;
7881 if (ste->ste_varargs)
7882 flags |= CO_VARARGS;
7883 if (ste->ste_varkeywords)
7884 flags |= CO_VARKEYWORDS;
7885 }
7886
7887 /* (Only) inherit compilerflags in PyCF_MASK */
7888 flags |= (c->c_flags->cf_flags & PyCF_MASK);
7889
7890 if ((IS_TOP_LEVEL_AWAIT(c)) &&
7891 ste->ste_coroutine &&
7892 !ste->ste_generator) {
7893 flags |= CO_COROUTINE;
7894 }
7895
7896 return flags;
7897 }
7898
7899 // Merge *obj* with constant cache.
7900 // Unlike merge_consts_recursive(), this function doesn't work recursively.
7901 static int
merge_const_one(struct compiler * c,PyObject ** obj)7902 merge_const_one(struct compiler *c, PyObject **obj)
7903 {
7904 PyObject *key = _PyCode_ConstantKey(*obj);
7905 if (key == NULL) {
7906 return 0;
7907 }
7908
7909 // t is borrowed reference
7910 PyObject *t = PyDict_SetDefault(c->c_const_cache, key, key);
7911 Py_DECREF(key);
7912 if (t == NULL) {
7913 return 0;
7914 }
7915 if (t == key) { // obj is new constant.
7916 return 1;
7917 }
7918
7919 if (PyTuple_CheckExact(t)) {
7920 // t is still borrowed reference
7921 t = PyTuple_GET_ITEM(t, 1);
7922 }
7923
7924 Py_INCREF(t);
7925 Py_DECREF(*obj);
7926 *obj = t;
7927 return 1;
7928 }
7929
7930 // This is in codeobject.c.
7931 extern void _Py_set_localsplus_info(int, PyObject *, unsigned char,
7932 PyObject *, PyObject *);
7933
7934 static void
compute_localsplus_info(struct compiler * c,int nlocalsplus,PyObject * names,PyObject * kinds)7935 compute_localsplus_info(struct compiler *c, int nlocalsplus,
7936 PyObject *names, PyObject *kinds)
7937 {
7938 PyObject *k, *v;
7939 Py_ssize_t pos = 0;
7940 while (PyDict_Next(c->u->u_varnames, &pos, &k, &v)) {
7941 int offset = (int)PyLong_AS_LONG(v);
7942 assert(offset >= 0);
7943 assert(offset < nlocalsplus);
7944 // For now we do not distinguish arg kinds.
7945 _PyLocals_Kind kind = CO_FAST_LOCAL;
7946 if (PyDict_GetItem(c->u->u_cellvars, k) != NULL) {
7947 kind |= CO_FAST_CELL;
7948 }
7949 _Py_set_localsplus_info(offset, k, kind, names, kinds);
7950 }
7951 int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames);
7952
7953 // This counter mirrors the fix done in fix_cell_offsets().
7954 int numdropped = 0;
7955 pos = 0;
7956 while (PyDict_Next(c->u->u_cellvars, &pos, &k, &v)) {
7957 if (PyDict_GetItem(c->u->u_varnames, k) != NULL) {
7958 // Skip cells that are already covered by locals.
7959 numdropped += 1;
7960 continue;
7961 }
7962 int offset = (int)PyLong_AS_LONG(v);
7963 assert(offset >= 0);
7964 offset += nlocals - numdropped;
7965 assert(offset < nlocalsplus);
7966 _Py_set_localsplus_info(offset, k, CO_FAST_CELL, names, kinds);
7967 }
7968
7969 pos = 0;
7970 while (PyDict_Next(c->u->u_freevars, &pos, &k, &v)) {
7971 int offset = (int)PyLong_AS_LONG(v);
7972 assert(offset >= 0);
7973 offset += nlocals - numdropped;
7974 assert(offset < nlocalsplus);
7975 _Py_set_localsplus_info(offset, k, CO_FAST_FREE, names, kinds);
7976 }
7977 }
7978
7979 static PyCodeObject *
makecode(struct compiler * c,struct assembler * a,PyObject * constslist,int maxdepth,int nlocalsplus)7980 makecode(struct compiler *c, struct assembler *a, PyObject *constslist,
7981 int maxdepth, int nlocalsplus)
7982 {
7983 PyCodeObject *co = NULL;
7984 PyObject *names = NULL;
7985 PyObject *consts = NULL;
7986 PyObject *localsplusnames = NULL;
7987 PyObject *localspluskinds = NULL;
7988
7989 names = dict_keys_inorder(c->u->u_names, 0);
7990 if (!names) {
7991 goto error;
7992 }
7993 if (!merge_const_one(c, &names)) {
7994 goto error;
7995 }
7996
7997 int flags = compute_code_flags(c);
7998 if (flags < 0) {
7999 goto error;
8000 }
8001
8002 consts = PyList_AsTuple(constslist); /* PyCode_New requires a tuple */
8003 if (consts == NULL) {
8004 goto error;
8005 }
8006 if (!merge_const_one(c, &consts)) {
8007 goto error;
8008 }
8009
8010 assert(c->u->u_posonlyargcount < INT_MAX);
8011 assert(c->u->u_argcount < INT_MAX);
8012 assert(c->u->u_kwonlyargcount < INT_MAX);
8013 int posonlyargcount = (int)c->u->u_posonlyargcount;
8014 int posorkwargcount = (int)c->u->u_argcount;
8015 assert(INT_MAX - posonlyargcount - posorkwargcount > 0);
8016 int kwonlyargcount = (int)c->u->u_kwonlyargcount;
8017
8018 localsplusnames = PyTuple_New(nlocalsplus);
8019 if (localsplusnames == NULL) {
8020 goto error;
8021 }
8022 localspluskinds = PyBytes_FromStringAndSize(NULL, nlocalsplus);
8023 if (localspluskinds == NULL) {
8024 goto error;
8025 }
8026 compute_localsplus_info(c, nlocalsplus, localsplusnames, localspluskinds);
8027
8028 struct _PyCodeConstructor con = {
8029 .filename = c->c_filename,
8030 .name = c->u->u_name,
8031 .qualname = c->u->u_qualname ? c->u->u_qualname : c->u->u_name,
8032 .flags = flags,
8033
8034 .code = a->a_bytecode,
8035 .firstlineno = c->u->u_firstlineno,
8036 .linetable = a->a_linetable,
8037
8038 .consts = consts,
8039 .names = names,
8040
8041 .localsplusnames = localsplusnames,
8042 .localspluskinds = localspluskinds,
8043
8044 .argcount = posonlyargcount + posorkwargcount,
8045 .posonlyargcount = posonlyargcount,
8046 .kwonlyargcount = kwonlyargcount,
8047
8048 .stacksize = maxdepth,
8049
8050 .exceptiontable = a->a_except_table,
8051 };
8052
8053 if (_PyCode_Validate(&con) < 0) {
8054 goto error;
8055 }
8056
8057 if (!merge_const_one(c, &localsplusnames)) {
8058 goto error;
8059 }
8060 con.localsplusnames = localsplusnames;
8061
8062 co = _PyCode_New(&con);
8063 if (co == NULL) {
8064 goto error;
8065 }
8066
8067 error:
8068 Py_XDECREF(names);
8069 Py_XDECREF(consts);
8070 Py_XDECREF(localsplusnames);
8071 Py_XDECREF(localspluskinds);
8072 return co;
8073 }
8074
8075
8076 /* For debugging purposes only */
8077 #if 0
8078 static void
8079 dump_instr(struct instr *i)
8080 {
8081 const char *jrel = (is_relative_jump(i)) ? "jrel " : "";
8082 const char *jabs = (is_jump(i) && !is_relative_jump(i))? "jabs " : "";
8083
8084 char arg[128];
8085
8086 *arg = '\0';
8087 if (HAS_ARG(i->i_opcode)) {
8088 sprintf(arg, "arg: %d ", i->i_oparg);
8089 }
8090 fprintf(stderr, "line: %d, opcode: %d %s%s%s\n",
8091 i->i_lineno, i->i_opcode, arg, jabs, jrel);
8092 }
8093
8094 static void
8095 dump_basicblock(const basicblock *b)
8096 {
8097 const char *b_return = b->b_return ? "return " : "";
8098 fprintf(stderr, "used: %d, depth: %d, offset: %d %s\n",
8099 b->b_iused, b->b_startdepth, b->b_offset, b_return);
8100 if (b->b_instr) {
8101 int i;
8102 for (i = 0; i < b->b_iused; i++) {
8103 fprintf(stderr, " [%02d] ", i);
8104 dump_instr(b->b_instr + i);
8105 }
8106 }
8107 }
8108 #endif
8109
8110
8111 static int
8112 normalize_basic_block(basicblock *bb);
8113
8114 static int
8115 optimize_cfg(struct compiler *c, struct assembler *a, PyObject *consts);
8116
8117 static int
8118 trim_unused_consts(struct compiler *c, struct assembler *a, PyObject *consts);
8119
8120 /* Duplicates exit BBs, so that line numbers can be propagated to them */
8121 static int
8122 duplicate_exits_without_lineno(struct compiler *c);
8123
8124 static int
8125 extend_block(basicblock *bb);
8126
8127 static int *
build_cellfixedoffsets(struct compiler * c)8128 build_cellfixedoffsets(struct compiler *c)
8129 {
8130 int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames);
8131 int ncellvars = (int)PyDict_GET_SIZE(c->u->u_cellvars);
8132 int nfreevars = (int)PyDict_GET_SIZE(c->u->u_freevars);
8133
8134 int noffsets = ncellvars + nfreevars;
8135 int *fixed = PyMem_New(int, noffsets);
8136 if (fixed == NULL) {
8137 PyErr_NoMemory();
8138 return NULL;
8139 }
8140 for (int i = 0; i < noffsets; i++) {
8141 fixed[i] = nlocals + i;
8142 }
8143
8144 PyObject *varname, *cellindex;
8145 Py_ssize_t pos = 0;
8146 while (PyDict_Next(c->u->u_cellvars, &pos, &varname, &cellindex)) {
8147 PyObject *varindex = PyDict_GetItem(c->u->u_varnames, varname);
8148 if (varindex != NULL) {
8149 assert(PyLong_AS_LONG(cellindex) < INT_MAX);
8150 assert(PyLong_AS_LONG(varindex) < INT_MAX);
8151 int oldindex = (int)PyLong_AS_LONG(cellindex);
8152 int argoffset = (int)PyLong_AS_LONG(varindex);
8153 fixed[oldindex] = argoffset;
8154 }
8155 }
8156
8157 return fixed;
8158 }
8159
8160 static inline int
insert_instruction(basicblock * block,int pos,struct instr * instr)8161 insert_instruction(basicblock *block, int pos, struct instr *instr) {
8162 if (compiler_next_instr(block) < 0) {
8163 return -1;
8164 }
8165 for (int i = block->b_iused-1; i > pos; i--) {
8166 block->b_instr[i] = block->b_instr[i-1];
8167 }
8168 block->b_instr[pos] = *instr;
8169 return 0;
8170 }
8171
8172 static int
insert_prefix_instructions(struct compiler * c,basicblock * entryblock,int * fixed,int nfreevars)8173 insert_prefix_instructions(struct compiler *c, basicblock *entryblock,
8174 int *fixed, int nfreevars)
8175 {
8176
8177 int flags = compute_code_flags(c);
8178 if (flags < 0) {
8179 return -1;
8180 }
8181 assert(c->u->u_firstlineno > 0);
8182
8183 /* Add the generator prefix instructions. */
8184 if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
8185 struct instr make_gen = {
8186 .i_opcode = RETURN_GENERATOR,
8187 .i_oparg = 0,
8188 .i_lineno = c->u->u_firstlineno,
8189 .i_col_offset = -1,
8190 .i_end_lineno = c->u->u_firstlineno,
8191 .i_end_col_offset = -1,
8192 .i_target = NULL,
8193 };
8194 if (insert_instruction(entryblock, 0, &make_gen) < 0) {
8195 return -1;
8196 }
8197 struct instr pop_top = {
8198 .i_opcode = POP_TOP,
8199 .i_oparg = 0,
8200 .i_lineno = -1,
8201 .i_col_offset = -1,
8202 .i_end_lineno = -1,
8203 .i_end_col_offset = -1,
8204 .i_target = NULL,
8205 };
8206 if (insert_instruction(entryblock, 1, &pop_top) < 0) {
8207 return -1;
8208 }
8209 }
8210
8211 /* Set up cells for any variable that escapes, to be put in a closure. */
8212 const int ncellvars = (int)PyDict_GET_SIZE(c->u->u_cellvars);
8213 if (ncellvars) {
8214 // c->u->u_cellvars has the cells out of order so we sort them
8215 // before adding the MAKE_CELL instructions. Note that we
8216 // adjust for arg cells, which come first.
8217 const int nvars = ncellvars + (int)PyDict_GET_SIZE(c->u->u_varnames);
8218 int *sorted = PyMem_RawCalloc(nvars, sizeof(int));
8219 if (sorted == NULL) {
8220 PyErr_NoMemory();
8221 return -1;
8222 }
8223 for (int i = 0; i < ncellvars; i++) {
8224 sorted[fixed[i]] = i + 1;
8225 }
8226 for (int i = 0, ncellsused = 0; ncellsused < ncellvars; i++) {
8227 int oldindex = sorted[i] - 1;
8228 if (oldindex == -1) {
8229 continue;
8230 }
8231 struct instr make_cell = {
8232 .i_opcode = MAKE_CELL,
8233 // This will get fixed in offset_derefs().
8234 .i_oparg = oldindex,
8235 .i_lineno = -1,
8236 .i_col_offset = -1,
8237 .i_end_lineno = -1,
8238 .i_end_col_offset = -1,
8239 .i_target = NULL,
8240 };
8241 if (insert_instruction(entryblock, ncellsused, &make_cell) < 0) {
8242 return -1;
8243 }
8244 ncellsused += 1;
8245 }
8246 PyMem_RawFree(sorted);
8247 }
8248
8249 if (nfreevars) {
8250 struct instr copy_frees = {
8251 .i_opcode = COPY_FREE_VARS,
8252 .i_oparg = nfreevars,
8253 .i_lineno = -1,
8254 .i_col_offset = -1,
8255 .i_end_lineno = -1,
8256 .i_end_col_offset = -1,
8257 .i_target = NULL,
8258 };
8259 if (insert_instruction(entryblock, 0, ©_frees) < 0) {
8260 return -1;
8261 }
8262
8263 }
8264
8265 return 0;
8266 }
8267
8268 /* Make sure that all returns have a line number, even if early passes
8269 * have failed to propagate a correct line number.
8270 * The resulting line number may not be correct according to PEP 626,
8271 * but should be "good enough", and no worse than in older versions. */
8272 static void
guarantee_lineno_for_exits(struct assembler * a,int firstlineno)8273 guarantee_lineno_for_exits(struct assembler *a, int firstlineno) {
8274 int lineno = firstlineno;
8275 assert(lineno > 0);
8276 for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
8277 if (b->b_iused == 0) {
8278 continue;
8279 }
8280 struct instr *last = &b->b_instr[b->b_iused-1];
8281 if (last->i_lineno < 0) {
8282 if (last->i_opcode == RETURN_VALUE) {
8283 for (int i = 0; i < b->b_iused; i++) {
8284 assert(b->b_instr[i].i_lineno < 0);
8285
8286 b->b_instr[i].i_lineno = lineno;
8287 }
8288 }
8289 }
8290 else {
8291 lineno = last->i_lineno;
8292 }
8293 }
8294 }
8295
8296 static int
fix_cell_offsets(struct compiler * c,basicblock * entryblock,int * fixedmap)8297 fix_cell_offsets(struct compiler *c, basicblock *entryblock, int *fixedmap)
8298 {
8299 int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames);
8300 int ncellvars = (int)PyDict_GET_SIZE(c->u->u_cellvars);
8301 int nfreevars = (int)PyDict_GET_SIZE(c->u->u_freevars);
8302 int noffsets = ncellvars + nfreevars;
8303
8304 // First deal with duplicates (arg cells).
8305 int numdropped = 0;
8306 for (int i = 0; i < noffsets ; i++) {
8307 if (fixedmap[i] == i + nlocals) {
8308 fixedmap[i] -= numdropped;
8309 }
8310 else {
8311 // It was a duplicate (cell/arg).
8312 numdropped += 1;
8313 }
8314 }
8315
8316 // Then update offsets, either relative to locals or by cell2arg.
8317 for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
8318 for (int i = 0; i < b->b_iused; i++) {
8319 struct instr *inst = &b->b_instr[i];
8320 // This is called before extended args are generated.
8321 assert(inst->i_opcode != EXTENDED_ARG);
8322 assert(inst->i_opcode != EXTENDED_ARG_QUICK);
8323 int oldoffset = inst->i_oparg;
8324 switch(inst->i_opcode) {
8325 case MAKE_CELL:
8326 case LOAD_CLOSURE:
8327 case LOAD_DEREF:
8328 case STORE_DEREF:
8329 case DELETE_DEREF:
8330 case LOAD_CLASSDEREF:
8331 assert(oldoffset >= 0);
8332 assert(oldoffset < noffsets);
8333 assert(fixedmap[oldoffset] >= 0);
8334 inst->i_oparg = fixedmap[oldoffset];
8335 }
8336 }
8337 }
8338
8339 return numdropped;
8340 }
8341
8342 static void
8343 propagate_line_numbers(struct assembler *a);
8344
8345 static PyCodeObject *
assemble(struct compiler * c,int addNone)8346 assemble(struct compiler *c, int addNone)
8347 {
8348 basicblock *b, *entryblock;
8349 struct assembler a;
8350 int j, nblocks;
8351 PyCodeObject *co = NULL;
8352 PyObject *consts = NULL;
8353 memset(&a, 0, sizeof(struct assembler));
8354
8355 /* Make sure every block that falls off the end returns None. */
8356 if (!c->u->u_curblock->b_return) {
8357 UNSET_LOC(c);
8358 if (addNone)
8359 ADDOP_LOAD_CONST(c, Py_None);
8360 ADDOP(c, RETURN_VALUE);
8361 }
8362
8363 for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) {
8364 if (normalize_basic_block(b)) {
8365 return NULL;
8366 }
8367 }
8368
8369 for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) {
8370 if (extend_block(b)) {
8371 return NULL;
8372 }
8373 }
8374
8375 nblocks = 0;
8376 entryblock = NULL;
8377 for (b = c->u->u_blocks; b != NULL; b = b->b_list) {
8378 nblocks++;
8379 entryblock = b;
8380 }
8381 assert(entryblock != NULL);
8382
8383 assert(PyDict_GET_SIZE(c->u->u_varnames) < INT_MAX);
8384 assert(PyDict_GET_SIZE(c->u->u_cellvars) < INT_MAX);
8385 assert(PyDict_GET_SIZE(c->u->u_freevars) < INT_MAX);
8386 int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames);
8387 int ncellvars = (int)PyDict_GET_SIZE(c->u->u_cellvars);
8388 int nfreevars = (int)PyDict_GET_SIZE(c->u->u_freevars);
8389 assert(INT_MAX - nlocals - ncellvars > 0);
8390 assert(INT_MAX - nlocals - ncellvars - nfreevars > 0);
8391 int nlocalsplus = nlocals + ncellvars + nfreevars;
8392 int *cellfixedoffsets = build_cellfixedoffsets(c);
8393 if (cellfixedoffsets == NULL) {
8394 goto error;
8395 }
8396
8397 /* Set firstlineno if it wasn't explicitly set. */
8398 if (!c->u->u_firstlineno) {
8399 if (entryblock->b_instr && entryblock->b_instr->i_lineno) {
8400 c->u->u_firstlineno = entryblock->b_instr->i_lineno;
8401 }
8402 else {
8403 c->u->u_firstlineno = 1;
8404 }
8405 }
8406
8407 // This must be called before fix_cell_offsets().
8408 if (insert_prefix_instructions(c, entryblock, cellfixedoffsets, nfreevars)) {
8409 goto error;
8410 }
8411
8412 if (!assemble_init(&a, nblocks, c->u->u_firstlineno))
8413 goto error;
8414 a.a_entry = entryblock;
8415 a.a_nblocks = nblocks;
8416
8417 int numdropped = fix_cell_offsets(c, entryblock, cellfixedoffsets);
8418 PyMem_Free(cellfixedoffsets); // At this point we're done with it.
8419 cellfixedoffsets = NULL;
8420 if (numdropped < 0) {
8421 goto error;
8422 }
8423 nlocalsplus -= numdropped;
8424
8425 consts = consts_dict_keys_inorder(c->u->u_consts);
8426 if (consts == NULL) {
8427 goto error;
8428 }
8429
8430 if (optimize_cfg(c, &a, consts)) {
8431 goto error;
8432 }
8433 if (duplicate_exits_without_lineno(c)) {
8434 return NULL;
8435 }
8436 if (trim_unused_consts(c, &a, consts)) {
8437 goto error;
8438 }
8439 propagate_line_numbers(&a);
8440 guarantee_lineno_for_exits(&a, c->u->u_firstlineno);
8441 int maxdepth = stackdepth(c);
8442 if (maxdepth < 0) {
8443 goto error;
8444 }
8445 /* TO DO -- For 3.12, make sure that `maxdepth <= MAX_ALLOWED_STACK_USE` */
8446
8447 if (label_exception_targets(entryblock)) {
8448 goto error;
8449 }
8450 convert_exception_handlers_to_nops(entryblock);
8451 for (basicblock *b = a.a_entry; b != NULL; b = b->b_next) {
8452 clean_basic_block(b);
8453 }
8454
8455 /* Order of basic blocks must have been determined by now */
8456 normalize_jumps(&a);
8457
8458 /* Can't modify the bytecode after computing jump offsets. */
8459 assemble_jump_offsets(&a, c);
8460
8461 /* Emit code. */
8462 for(b = entryblock; b != NULL; b = b->b_next) {
8463 for (j = 0; j < b->b_iused; j++)
8464 if (!assemble_emit(&a, &b->b_instr[j]))
8465 goto error;
8466 }
8467
8468 /* Emit location info */
8469 a.a_lineno = c->u->u_firstlineno;
8470 for(b = entryblock; b != NULL; b = b->b_next) {
8471 for (j = 0; j < b->b_iused; j++)
8472 if (!assemble_emit_location(&a, &b->b_instr[j]))
8473 goto error;
8474 }
8475
8476 if (!assemble_exception_table(&a)) {
8477 goto error;
8478 }
8479 if (_PyBytes_Resize(&a.a_except_table, a.a_except_table_off) < 0) {
8480 goto error;
8481 }
8482 if (!merge_const_one(c, &a.a_except_table)) {
8483 goto error;
8484 }
8485
8486 if (_PyBytes_Resize(&a.a_linetable, a.a_location_off) < 0) {
8487 goto error;
8488 }
8489 if (!merge_const_one(c, &a.a_linetable)) {
8490 goto error;
8491 }
8492
8493 if (_PyBytes_Resize(&a.a_bytecode, a.a_offset * sizeof(_Py_CODEUNIT)) < 0) {
8494 goto error;
8495 }
8496 if (!merge_const_one(c, &a.a_bytecode)) {
8497 goto error;
8498 }
8499
8500 co = makecode(c, &a, consts, maxdepth, nlocalsplus);
8501 error:
8502 Py_XDECREF(consts);
8503 assemble_free(&a);
8504 if (cellfixedoffsets != NULL) {
8505 PyMem_Free(cellfixedoffsets);
8506 }
8507 return co;
8508 }
8509
8510 static PyObject*
get_const_value(int opcode,int oparg,PyObject * co_consts)8511 get_const_value(int opcode, int oparg, PyObject *co_consts)
8512 {
8513 PyObject *constant = NULL;
8514 assert(HAS_CONST(opcode));
8515 if (opcode == LOAD_CONST) {
8516 constant = PyList_GET_ITEM(co_consts, oparg);
8517 }
8518
8519 if (constant == NULL) {
8520 PyErr_SetString(PyExc_SystemError,
8521 "Internal error: failed to get value of a constant");
8522 return NULL;
8523 }
8524 Py_INCREF(constant);
8525 return constant;
8526 }
8527
8528 /* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
8529 with LOAD_CONST (c1, c2, ... cn).
8530 The consts table must still be in list form so that the
8531 new constant (c1, c2, ... cn) can be appended.
8532 Called with codestr pointing to the first LOAD_CONST.
8533 */
8534 static int
fold_tuple_on_constants(struct compiler * c,struct instr * inst,int n,PyObject * consts)8535 fold_tuple_on_constants(struct compiler *c,
8536 struct instr *inst,
8537 int n, PyObject *consts)
8538 {
8539 /* Pre-conditions */
8540 assert(PyList_CheckExact(consts));
8541 assert(inst[n].i_opcode == BUILD_TUPLE);
8542 assert(inst[n].i_oparg == n);
8543
8544 for (int i = 0; i < n; i++) {
8545 if (!HAS_CONST(inst[i].i_opcode)) {
8546 return 0;
8547 }
8548 }
8549
8550 /* Buildup new tuple of constants */
8551 PyObject *newconst = PyTuple_New(n);
8552 if (newconst == NULL) {
8553 return -1;
8554 }
8555 for (int i = 0; i < n; i++) {
8556 int op = inst[i].i_opcode;
8557 int arg = inst[i].i_oparg;
8558 PyObject *constant = get_const_value(op, arg, consts);
8559 if (constant == NULL) {
8560 return -1;
8561 }
8562 PyTuple_SET_ITEM(newconst, i, constant);
8563 }
8564 if (merge_const_one(c, &newconst) == 0) {
8565 Py_DECREF(newconst);
8566 return -1;
8567 }
8568
8569 Py_ssize_t index;
8570 for (index = 0; index < PyList_GET_SIZE(consts); index++) {
8571 if (PyList_GET_ITEM(consts, index) == newconst) {
8572 break;
8573 }
8574 }
8575 if (index == PyList_GET_SIZE(consts)) {
8576 if ((size_t)index >= (size_t)INT_MAX - 1) {
8577 Py_DECREF(newconst);
8578 PyErr_SetString(PyExc_OverflowError, "too many constants");
8579 return -1;
8580 }
8581 if (PyList_Append(consts, newconst)) {
8582 Py_DECREF(newconst);
8583 return -1;
8584 }
8585 }
8586 Py_DECREF(newconst);
8587 for (int i = 0; i < n; i++) {
8588 inst[i].i_opcode = NOP;
8589 }
8590 inst[n].i_opcode = LOAD_CONST;
8591 inst[n].i_oparg = (int)index;
8592 return 0;
8593 }
8594
8595 #define VISITED (-1)
8596
8597 // Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the
8598 // same effect.
8599 static int
swaptimize(basicblock * block,int * ix)8600 swaptimize(basicblock *block, int *ix)
8601 {
8602 // NOTE: "./python -m test test_patma" serves as a good, quick stress test
8603 // for this function. Make sure to blow away cached *.pyc files first!
8604 assert(*ix < block->b_iused);
8605 struct instr *instructions = &block->b_instr[*ix];
8606 // Find the length of the current sequence of SWAPs and NOPs, and record the
8607 // maximum depth of the stack manipulations:
8608 assert(instructions[0].i_opcode == SWAP);
8609 int depth = instructions[0].i_oparg;
8610 int len = 0;
8611 int more = false;
8612 int limit = block->b_iused - *ix;
8613 while (++len < limit) {
8614 int opcode = instructions[len].i_opcode;
8615 if (opcode == SWAP) {
8616 depth = Py_MAX(depth, instructions[len].i_oparg);
8617 more = true;
8618 }
8619 else if (opcode != NOP) {
8620 break;
8621 }
8622 }
8623 // It's already optimal if there's only one SWAP:
8624 if (!more) {
8625 return 0;
8626 }
8627 // Create an array with elements {0, 1, 2, ..., depth - 1}:
8628 int *stack = PyMem_Malloc(depth * sizeof(int));
8629 if (stack == NULL) {
8630 PyErr_NoMemory();
8631 return -1;
8632 }
8633 for (int i = 0; i < depth; i++) {
8634 stack[i] = i;
8635 }
8636 // Simulate the combined effect of these instructions by "running" them on
8637 // our "stack":
8638 for (int i = 0; i < len; i++) {
8639 if (instructions[i].i_opcode == SWAP) {
8640 int oparg = instructions[i].i_oparg;
8641 int top = stack[0];
8642 // SWAPs are 1-indexed:
8643 stack[0] = stack[oparg - 1];
8644 stack[oparg - 1] = top;
8645 }
8646 }
8647 // Now we can begin! Our approach here is based on a solution to a closely
8648 // related problem (https://cs.stackexchange.com/a/13938). It's easiest to
8649 // think of this algorithm as determining the steps needed to efficiently
8650 // "un-shuffle" our stack. By performing the moves in *reverse* order,
8651 // though, we can efficiently *shuffle* it! For this reason, we will be
8652 // replacing instructions starting from the *end* of the run. Since the
8653 // solution is optimal, we don't need to worry about running out of space:
8654 int current = len - 1;
8655 for (int i = 0; i < depth; i++) {
8656 // Skip items that have already been visited, or just happen to be in
8657 // the correct location:
8658 if (stack[i] == VISITED || stack[i] == i) {
8659 continue;
8660 }
8661 // Okay, we've found an item that hasn't been visited. It forms a cycle
8662 // with other items; traversing the cycle and swapping each item with
8663 // the next will put them all in the correct place. The weird
8664 // loop-and-a-half is necessary to insert 0 into every cycle, since we
8665 // can only swap from that position:
8666 int j = i;
8667 while (true) {
8668 // Skip the actual swap if our item is zero, since swapping the top
8669 // item with itself is pointless:
8670 if (j) {
8671 assert(0 <= current);
8672 // SWAPs are 1-indexed:
8673 instructions[current].i_opcode = SWAP;
8674 instructions[current--].i_oparg = j + 1;
8675 }
8676 if (stack[j] == VISITED) {
8677 // Completed the cycle:
8678 assert(j == i);
8679 break;
8680 }
8681 int next_j = stack[j];
8682 stack[j] = VISITED;
8683 j = next_j;
8684 }
8685 }
8686 // NOP out any unused instructions:
8687 while (0 <= current) {
8688 instructions[current--].i_opcode = NOP;
8689 }
8690 PyMem_Free(stack);
8691 *ix += len - 1;
8692 return 0;
8693 }
8694
8695 // This list is pretty small, since it's only okay to reorder opcodes that:
8696 // - can't affect control flow (like jumping or raising exceptions)
8697 // - can't invoke arbitrary code (besides finalizers)
8698 // - only touch the TOS (and pop it when finished)
8699 #define SWAPPABLE(opcode) \
8700 ((opcode) == STORE_FAST || (opcode) == POP_TOP)
8701
8702 #define STORES_TO(instr) \
8703 (((instr).i_opcode == STORE_FAST) ? (instr).i_oparg : -1)
8704
8705 static int
next_swappable_instruction(basicblock * block,int i,int lineno)8706 next_swappable_instruction(basicblock *block, int i, int lineno)
8707 {
8708 while (++i < block->b_iused) {
8709 struct instr *instruction = &block->b_instr[i];
8710 if (0 <= lineno && instruction->i_lineno != lineno) {
8711 // Optimizing across this instruction could cause user-visible
8712 // changes in the names bound between line tracing events!
8713 return -1;
8714 }
8715 if (instruction->i_opcode == NOP) {
8716 continue;
8717 }
8718 if (SWAPPABLE(instruction->i_opcode)) {
8719 return i;
8720 }
8721 return -1;
8722 }
8723 return -1;
8724 }
8725
8726 // Attempt to apply SWAPs statically by swapping *instructions* rather than
8727 // stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42)
8728 // with the more efficient NOP, STORE_FAST(42), POP_TOP.
8729 static void
apply_static_swaps(basicblock * block,int i)8730 apply_static_swaps(basicblock *block, int i)
8731 {
8732 // SWAPs are to our left, and potential swaperands are to our right:
8733 for (; 0 <= i; i--) {
8734 assert(i < block->b_iused);
8735 struct instr *swap = &block->b_instr[i];
8736 if (swap->i_opcode != SWAP) {
8737 if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) {
8738 // Nope, but we know how to handle these. Keep looking:
8739 continue;
8740 }
8741 // We can't reason about what this instruction does. Bail:
8742 return;
8743 }
8744 int j = next_swappable_instruction(block, i, -1);
8745 if (j < 0) {
8746 return;
8747 }
8748 int k = j;
8749 int lineno = block->b_instr[j].i_lineno;
8750 for (int count = swap->i_oparg - 1; 0 < count; count--) {
8751 k = next_swappable_instruction(block, k, lineno);
8752 if (k < 0) {
8753 return;
8754 }
8755 }
8756 // The reordering is not safe if the two instructions to be swapped
8757 // store to the same location, or if any intervening instruction stores
8758 // to the same location as either of them.
8759 int store_j = STORES_TO(block->b_instr[j]);
8760 int store_k = STORES_TO(block->b_instr[k]);
8761 if (store_j >= 0 || store_k >= 0) {
8762 if (store_j == store_k) {
8763 return;
8764 }
8765 for (int idx = j + 1; idx < k; idx++) {
8766 int store_idx = STORES_TO(block->b_instr[idx]);
8767 if (store_idx >= 0 && (store_idx == store_j || store_idx == store_k)) {
8768 return;
8769 }
8770 }
8771 }
8772
8773 // Success!
8774 swap->i_opcode = NOP;
8775 struct instr temp = block->b_instr[j];
8776 block->b_instr[j] = block->b_instr[k];
8777 block->b_instr[k] = temp;
8778 }
8779 }
8780
8781 // Attempt to eliminate jumps to jumps by updating inst to jump to
8782 // target->i_target using the provided opcode. Return whether or not the
8783 // optimization was successful.
8784 static bool
jump_thread(struct instr * inst,struct instr * target,int opcode)8785 jump_thread(struct instr *inst, struct instr *target, int opcode)
8786 {
8787 assert(!IS_VIRTUAL_OPCODE(opcode) || IS_VIRTUAL_JUMP_OPCODE(opcode));
8788 assert(is_jump(inst));
8789 assert(is_jump(target));
8790 // bpo-45773: If inst->i_target == target->i_target, then nothing actually
8791 // changes (and we fall into an infinite loop):
8792 if (inst->i_lineno == target->i_lineno &&
8793 inst->i_target != target->i_target)
8794 {
8795 inst->i_target = target->i_target;
8796 inst->i_opcode = opcode;
8797 return true;
8798 }
8799 return false;
8800 }
8801
8802 /* Maximum size of basic block that should be copied in optimizer */
8803 #define MAX_COPY_SIZE 4
8804
8805 /* Optimization */
8806 static int
optimize_basic_block(struct compiler * c,basicblock * bb,PyObject * consts)8807 optimize_basic_block(struct compiler *c, basicblock *bb, PyObject *consts)
8808 {
8809 assert(PyList_CheckExact(consts));
8810 struct instr nop;
8811 nop.i_opcode = NOP;
8812 struct instr *target;
8813 for (int i = 0; i < bb->b_iused; i++) {
8814 struct instr *inst = &bb->b_instr[i];
8815 int oparg = inst->i_oparg;
8816 int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0;
8817 if (is_jump(inst) || is_block_push(inst)) {
8818 /* Skip over empty basic blocks. */
8819 while (inst->i_target->b_iused == 0) {
8820 inst->i_target = inst->i_target->b_next;
8821 }
8822 target = &inst->i_target->b_instr[0];
8823 assert(!IS_ASSEMBLER_OPCODE(target->i_opcode));
8824 }
8825 else {
8826 target = &nop;
8827 }
8828 assert(!IS_ASSEMBLER_OPCODE(inst->i_opcode));
8829 switch (inst->i_opcode) {
8830 /* Remove LOAD_CONST const; conditional jump */
8831 case LOAD_CONST:
8832 {
8833 PyObject* cnt;
8834 int is_true;
8835 int jump_if_true;
8836 switch(nextop) {
8837 case POP_JUMP_IF_FALSE:
8838 case POP_JUMP_IF_TRUE:
8839 cnt = get_const_value(inst->i_opcode, oparg, consts);
8840 if (cnt == NULL) {
8841 goto error;
8842 }
8843 is_true = PyObject_IsTrue(cnt);
8844 Py_DECREF(cnt);
8845 if (is_true == -1) {
8846 goto error;
8847 }
8848 inst->i_opcode = NOP;
8849 jump_if_true = nextop == POP_JUMP_IF_TRUE;
8850 if (is_true == jump_if_true) {
8851 bb->b_instr[i+1].i_opcode = JUMP;
8852 bb->b_nofallthrough = 1;
8853 }
8854 else {
8855 bb->b_instr[i+1].i_opcode = NOP;
8856 }
8857 break;
8858 case JUMP_IF_FALSE_OR_POP:
8859 case JUMP_IF_TRUE_OR_POP:
8860 cnt = get_const_value(inst->i_opcode, oparg, consts);
8861 if (cnt == NULL) {
8862 goto error;
8863 }
8864 is_true = PyObject_IsTrue(cnt);
8865 Py_DECREF(cnt);
8866 if (is_true == -1) {
8867 goto error;
8868 }
8869 jump_if_true = nextop == JUMP_IF_TRUE_OR_POP;
8870 if (is_true == jump_if_true) {
8871 bb->b_instr[i+1].i_opcode = JUMP;
8872 bb->b_nofallthrough = 1;
8873 }
8874 else {
8875 inst->i_opcode = NOP;
8876 bb->b_instr[i+1].i_opcode = NOP;
8877 }
8878 break;
8879 case IS_OP:
8880 cnt = get_const_value(inst->i_opcode, oparg, consts);
8881 if (cnt == NULL) {
8882 goto error;
8883 }
8884 int jump_op = i+2 < bb->b_iused ? bb->b_instr[i+2].i_opcode : 0;
8885 if (Py_IsNone(cnt) && (jump_op == POP_JUMP_IF_FALSE || jump_op == POP_JUMP_IF_TRUE)) {
8886 unsigned char nextarg = bb->b_instr[i+1].i_oparg;
8887 inst->i_opcode = NOP;
8888 bb->b_instr[i+1].i_opcode = NOP;
8889 bb->b_instr[i+2].i_opcode = nextarg ^ (jump_op == POP_JUMP_IF_FALSE) ?
8890 POP_JUMP_IF_NOT_NONE : POP_JUMP_IF_NONE;
8891 }
8892 Py_DECREF(cnt);
8893 break;
8894 }
8895 break;
8896 }
8897
8898 /* Try to fold tuples of constants.
8899 Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1).
8900 Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2).
8901 Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */
8902 case BUILD_TUPLE:
8903 if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) {
8904 switch(oparg) {
8905 case 1:
8906 inst->i_opcode = NOP;
8907 bb->b_instr[i+1].i_opcode = NOP;
8908 continue;
8909 case 2:
8910 case 3:
8911 inst->i_opcode = NOP;
8912 bb->b_instr[i+1].i_opcode = SWAP;
8913 continue;
8914 }
8915 }
8916 if (i >= oparg) {
8917 if (fold_tuple_on_constants(c, inst-oparg, oparg, consts)) {
8918 goto error;
8919 }
8920 }
8921 break;
8922
8923 /* Simplify conditional jump to conditional jump where the
8924 result of the first test implies the success of a similar
8925 test or the failure of the opposite test.
8926 Arises in code like:
8927 "a and b or c"
8928 "(a and b) and c"
8929 "(a or b) or c"
8930 "(a or b) and c"
8931 x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_FALSE_OR_POP z
8932 --> x:JUMP_IF_FALSE_OR_POP z
8933 x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_TRUE_OR_POP z
8934 --> x:POP_JUMP_IF_FALSE y+1
8935 where y+1 is the instruction following the second test.
8936 */
8937 case JUMP_IF_FALSE_OR_POP:
8938 switch (target->i_opcode) {
8939 case POP_JUMP_IF_FALSE:
8940 i -= jump_thread(inst, target, POP_JUMP_IF_FALSE);
8941 break;
8942 case JUMP:
8943 case JUMP_IF_FALSE_OR_POP:
8944 i -= jump_thread(inst, target, JUMP_IF_FALSE_OR_POP);
8945 break;
8946 case JUMP_IF_TRUE_OR_POP:
8947 case POP_JUMP_IF_TRUE:
8948 if (inst->i_lineno == target->i_lineno) {
8949 // We don't need to bother checking for loops here,
8950 // since a block's b_next cannot point to itself:
8951 assert(inst->i_target != inst->i_target->b_next);
8952 inst->i_opcode = POP_JUMP_IF_FALSE;
8953 inst->i_target = inst->i_target->b_next;
8954 --i;
8955 }
8956 break;
8957 }
8958 break;
8959 case JUMP_IF_TRUE_OR_POP:
8960 switch (target->i_opcode) {
8961 case POP_JUMP_IF_TRUE:
8962 i -= jump_thread(inst, target, POP_JUMP_IF_TRUE);
8963 break;
8964 case JUMP:
8965 case JUMP_IF_TRUE_OR_POP:
8966 i -= jump_thread(inst, target, JUMP_IF_TRUE_OR_POP);
8967 break;
8968 case JUMP_IF_FALSE_OR_POP:
8969 case POP_JUMP_IF_FALSE:
8970 if (inst->i_lineno == target->i_lineno) {
8971 // We don't need to bother checking for loops here,
8972 // since a block's b_next cannot point to itself:
8973 assert(inst->i_target != inst->i_target->b_next);
8974 inst->i_opcode = POP_JUMP_IF_TRUE;
8975 inst->i_target = inst->i_target->b_next;
8976 --i;
8977 }
8978 break;
8979 }
8980 break;
8981 case POP_JUMP_IF_NOT_NONE:
8982 case POP_JUMP_IF_NONE:
8983 switch (target->i_opcode) {
8984 case JUMP:
8985 i -= jump_thread(inst, target, inst->i_opcode);
8986 }
8987 break;
8988 case POP_JUMP_IF_FALSE:
8989 switch (target->i_opcode) {
8990 case JUMP:
8991 i -= jump_thread(inst, target, POP_JUMP_IF_FALSE);
8992 }
8993 break;
8994 case POP_JUMP_IF_TRUE:
8995 switch (target->i_opcode) {
8996 case JUMP:
8997 i -= jump_thread(inst, target, POP_JUMP_IF_TRUE);
8998 }
8999 break;
9000 case JUMP:
9001 switch (target->i_opcode) {
9002 case JUMP:
9003 i -= jump_thread(inst, target, JUMP);
9004 }
9005 break;
9006 case FOR_ITER:
9007 if (target->i_opcode == JUMP) {
9008 /* This will not work now because the jump (at target) could
9009 * be forward or backward and FOR_ITER only jumps forward. We
9010 * can re-enable this if ever we implement a backward version
9011 * of FOR_ITER.
9012 */
9013 /*
9014 i -= jump_thread(inst, target, FOR_ITER);
9015 */
9016 }
9017 break;
9018 case SWAP:
9019 if (oparg == 1) {
9020 inst->i_opcode = NOP;
9021 break;
9022 }
9023 if (swaptimize(bb, &i)) {
9024 goto error;
9025 }
9026 apply_static_swaps(bb, i);
9027 break;
9028 case KW_NAMES:
9029 break;
9030 case PUSH_NULL:
9031 if (nextop == LOAD_GLOBAL && (inst[1].i_opcode & 1) == 0) {
9032 inst->i_opcode = NOP;
9033 inst->i_oparg = 0;
9034 inst[1].i_oparg |= 1;
9035 }
9036 break;
9037 default:
9038 /* All HAS_CONST opcodes should be handled with LOAD_CONST */
9039 assert (!HAS_CONST(inst->i_opcode));
9040 }
9041 }
9042 return 0;
9043 error:
9044 return -1;
9045 }
9046
9047 static bool
basicblock_has_lineno(const basicblock * bb)9048 basicblock_has_lineno(const basicblock *bb) {
9049 for (int i = 0; i < bb->b_iused; i++) {
9050 if (bb->b_instr[i].i_lineno > 0) {
9051 return true;
9052 }
9053 }
9054 return false;
9055 }
9056
9057 /* If this block ends with an unconditional jump to an exit block,
9058 * then remove the jump and extend this block with the target.
9059 */
9060 static int
extend_block(basicblock * bb)9061 extend_block(basicblock *bb) {
9062 if (bb->b_iused == 0) {
9063 return 0;
9064 }
9065 struct instr *last = &bb->b_instr[bb->b_iused-1];
9066 if (last->i_opcode != JUMP &&
9067 last->i_opcode != JUMP_FORWARD &&
9068 last->i_opcode != JUMP_BACKWARD) {
9069 return 0;
9070 }
9071 if (last->i_target->b_exit && last->i_target->b_iused <= MAX_COPY_SIZE) {
9072 basicblock *to_copy = last->i_target;
9073 if (basicblock_has_lineno(to_copy)) {
9074 /* copy only blocks without line number (like implicit 'return None's) */
9075 return 0;
9076 }
9077 last->i_opcode = NOP;
9078 for (int i = 0; i < to_copy->b_iused; i++) {
9079 int index = compiler_next_instr(bb);
9080 if (index < 0) {
9081 return -1;
9082 }
9083 bb->b_instr[index] = to_copy->b_instr[i];
9084 }
9085 bb->b_exit = 1;
9086 }
9087 return 0;
9088 }
9089
9090 static void
clean_basic_block(basicblock * bb)9091 clean_basic_block(basicblock *bb) {
9092 /* Remove NOPs when legal to do so. */
9093 int dest = 0;
9094 int prev_lineno = -1;
9095 for (int src = 0; src < bb->b_iused; src++) {
9096 int lineno = bb->b_instr[src].i_lineno;
9097 if (bb->b_instr[src].i_opcode == NOP) {
9098 /* Eliminate no-op if it doesn't have a line number */
9099 if (lineno < 0) {
9100 continue;
9101 }
9102 /* or, if the previous instruction had the same line number. */
9103 if (prev_lineno == lineno) {
9104 continue;
9105 }
9106 /* or, if the next instruction has same line number or no line number */
9107 if (src < bb->b_iused - 1) {
9108 int next_lineno = bb->b_instr[src+1].i_lineno;
9109 if (next_lineno == lineno) {
9110 continue;
9111 }
9112 if (next_lineno < 0) {
9113 COPY_INSTR_LOC(bb->b_instr[src], bb->b_instr[src+1]);
9114 continue;
9115 }
9116 }
9117 else {
9118 basicblock* next = bb->b_next;
9119 while (next && next->b_iused == 0) {
9120 next = next->b_next;
9121 }
9122 /* or if last instruction in BB and next BB has same line number */
9123 if (next) {
9124 if (lineno == next->b_instr[0].i_lineno) {
9125 continue;
9126 }
9127 }
9128 }
9129
9130 }
9131 if (dest != src) {
9132 bb->b_instr[dest] = bb->b_instr[src];
9133 }
9134 dest++;
9135 prev_lineno = lineno;
9136 }
9137 assert(dest <= bb->b_iused);
9138 bb->b_iused = dest;
9139 }
9140
9141 static int
normalize_basic_block(basicblock * bb)9142 normalize_basic_block(basicblock *bb) {
9143 /* Mark blocks as exit and/or nofallthrough.
9144 Raise SystemError if CFG is malformed. */
9145 for (int i = 0; i < bb->b_iused; i++) {
9146 assert(!IS_ASSEMBLER_OPCODE(bb->b_instr[i].i_opcode));
9147 switch(bb->b_instr[i].i_opcode) {
9148 case RETURN_VALUE:
9149 case RAISE_VARARGS:
9150 case RERAISE:
9151 bb->b_exit = 1;
9152 bb->b_nofallthrough = 1;
9153 break;
9154 case JUMP:
9155 case JUMP_NO_INTERRUPT:
9156 bb->b_nofallthrough = 1;
9157 /* fall through */
9158 case POP_JUMP_IF_NOT_NONE:
9159 case POP_JUMP_IF_NONE:
9160 case POP_JUMP_IF_FALSE:
9161 case POP_JUMP_IF_TRUE:
9162 case JUMP_IF_FALSE_OR_POP:
9163 case JUMP_IF_TRUE_OR_POP:
9164 case FOR_ITER:
9165 if (i != bb->b_iused-1) {
9166 PyErr_SetString(PyExc_SystemError, "malformed control flow graph.");
9167 return -1;
9168 }
9169 /* Skip over empty basic blocks. */
9170 while (bb->b_instr[i].i_target->b_iused == 0) {
9171 bb->b_instr[i].i_target = bb->b_instr[i].i_target->b_next;
9172 }
9173
9174 }
9175 }
9176 return 0;
9177 }
9178
9179 static int
mark_reachable(struct assembler * a)9180 mark_reachable(struct assembler *a) {
9181 basicblock **stack, **sp;
9182 sp = stack = (basicblock **)PyObject_Malloc(sizeof(basicblock *) * a->a_nblocks);
9183 if (stack == NULL) {
9184 return -1;
9185 }
9186 a->a_entry->b_predecessors = 1;
9187 *sp++ = a->a_entry;
9188 while (sp > stack) {
9189 basicblock *b = *(--sp);
9190 if (b->b_next && !b->b_nofallthrough) {
9191 if (b->b_next->b_predecessors == 0) {
9192 *sp++ = b->b_next;
9193 }
9194 b->b_next->b_predecessors++;
9195 }
9196 for (int i = 0; i < b->b_iused; i++) {
9197 basicblock *target;
9198 struct instr *instr = &b->b_instr[i];
9199 if (is_jump(instr) || is_block_push(instr)) {
9200 target = instr->i_target;
9201 if (target->b_predecessors == 0) {
9202 *sp++ = target;
9203 }
9204 target->b_predecessors++;
9205 }
9206 }
9207 }
9208 PyObject_Free(stack);
9209 return 0;
9210 }
9211
9212 static void
eliminate_empty_basic_blocks(basicblock * entry)9213 eliminate_empty_basic_blocks(basicblock *entry) {
9214 /* Eliminate empty blocks */
9215 for (basicblock *b = entry; b != NULL; b = b->b_next) {
9216 basicblock *next = b->b_next;
9217 if (next) {
9218 while (next->b_iused == 0 && next->b_next) {
9219 next = next->b_next;
9220 }
9221 b->b_next = next;
9222 }
9223 }
9224 for (basicblock *b = entry; b != NULL; b = b->b_next) {
9225 if (b->b_iused == 0) {
9226 continue;
9227 }
9228 if (is_jump(&b->b_instr[b->b_iused-1])) {
9229 basicblock *target = b->b_instr[b->b_iused-1].i_target;
9230 while (target->b_iused == 0) {
9231 target = target->b_next;
9232 }
9233 b->b_instr[b->b_iused-1].i_target = target;
9234 }
9235 }
9236 }
9237
9238
9239 /* If an instruction has no line number, but it's predecessor in the BB does,
9240 * then copy the line number. If a successor block has no line number, and only
9241 * one predecessor, then inherit the line number.
9242 * This ensures that all exit blocks (with one predecessor) receive a line number.
9243 * Also reduces the size of the line number table,
9244 * but has no impact on the generated line number events.
9245 */
9246 static void
propagate_line_numbers(struct assembler * a)9247 propagate_line_numbers(struct assembler *a) {
9248 for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
9249 if (b->b_iused == 0) {
9250 continue;
9251 }
9252
9253 // Not a real instruction, only to store positions
9254 // from previous instructions and propagate them.
9255 struct instr prev_instr = {
9256 .i_lineno = -1,
9257 .i_col_offset = -1,
9258 .i_end_lineno = -1,
9259 .i_end_col_offset = -1,
9260 };
9261 for (int i = 0; i < b->b_iused; i++) {
9262 if (b->b_instr[i].i_lineno < 0) {
9263 COPY_INSTR_LOC(prev_instr, b->b_instr[i]);
9264 }
9265 else {
9266 COPY_INSTR_LOC(b->b_instr[i], prev_instr);
9267 }
9268 }
9269 if (!b->b_nofallthrough && b->b_next->b_predecessors == 1) {
9270 assert(b->b_next->b_iused);
9271 if (b->b_next->b_instr[0].i_lineno < 0) {
9272 COPY_INSTR_LOC(prev_instr, b->b_next->b_instr[0]);
9273 }
9274 }
9275 if (is_jump(&b->b_instr[b->b_iused-1])) {
9276 basicblock *target = b->b_instr[b->b_iused-1].i_target;
9277 if (target->b_predecessors == 1) {
9278 if (target->b_instr[0].i_lineno < 0) {
9279 COPY_INSTR_LOC(prev_instr, target->b_instr[0]);
9280 }
9281 }
9282 }
9283 }
9284 }
9285
9286 /* Perform optimizations on a control flow graph.
9287 The consts object should still be in list form to allow new constants
9288 to be appended.
9289
9290 All transformations keep the code size the same or smaller.
9291 For those that reduce size, the gaps are initially filled with
9292 NOPs. Later those NOPs are removed.
9293 */
9294
9295 static int
optimize_cfg(struct compiler * c,struct assembler * a,PyObject * consts)9296 optimize_cfg(struct compiler *c, struct assembler *a, PyObject *consts)
9297 {
9298 for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
9299 if (optimize_basic_block(c, b, consts)) {
9300 return -1;
9301 }
9302 clean_basic_block(b);
9303 assert(b->b_predecessors == 0);
9304 }
9305 for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) {
9306 if (extend_block(b)) {
9307 return -1;
9308 }
9309 }
9310 if (mark_reachable(a)) {
9311 return -1;
9312 }
9313 /* Delete unreachable instructions */
9314 for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
9315 if (b->b_predecessors == 0) {
9316 b->b_iused = 0;
9317 b->b_nofallthrough = 0;
9318 }
9319 }
9320 eliminate_empty_basic_blocks(a->a_entry);
9321 for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
9322 clean_basic_block(b);
9323 }
9324 /* Delete jump instructions made redundant by previous step. If a non-empty
9325 block ends with a jump instruction, check if the next non-empty block
9326 reached through normal flow control is the target of that jump. If it
9327 is, then the jump instruction is redundant and can be deleted.
9328 */
9329 int maybe_empty_blocks = 0;
9330 for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
9331 if (b->b_iused > 0) {
9332 struct instr *b_last_instr = &b->b_instr[b->b_iused - 1];
9333 assert(!IS_ASSEMBLER_OPCODE(b_last_instr->i_opcode));
9334 if (b_last_instr->i_opcode == JUMP ||
9335 b_last_instr->i_opcode == JUMP_NO_INTERRUPT) {
9336 if (b_last_instr->i_target == b->b_next) {
9337 assert(b->b_next->b_iused);
9338 b->b_nofallthrough = 0;
9339 b_last_instr->i_opcode = NOP;
9340 maybe_empty_blocks = 1;
9341 }
9342 }
9343 }
9344 }
9345 if (maybe_empty_blocks) {
9346 eliminate_empty_basic_blocks(a->a_entry);
9347 }
9348 return 0;
9349 }
9350
9351 // Remove trailing unused constants.
9352 static int
trim_unused_consts(struct compiler * c,struct assembler * a,PyObject * consts)9353 trim_unused_consts(struct compiler *c, struct assembler *a, PyObject *consts)
9354 {
9355 assert(PyList_CheckExact(consts));
9356
9357 // The first constant may be docstring; keep it always.
9358 int max_const_index = 0;
9359 for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) {
9360 for (int i = 0; i < b->b_iused; i++) {
9361 if ((b->b_instr[i].i_opcode == LOAD_CONST ||
9362 b->b_instr[i].i_opcode == KW_NAMES) &&
9363 b->b_instr[i].i_oparg > max_const_index) {
9364 max_const_index = b->b_instr[i].i_oparg;
9365 }
9366 }
9367 }
9368 if (max_const_index+1 < PyList_GET_SIZE(consts)) {
9369 //fprintf(stderr, "removing trailing consts: max=%d, size=%d\n",
9370 // max_const_index, (int)PyList_GET_SIZE(consts));
9371 if (PyList_SetSlice(consts, max_const_index+1,
9372 PyList_GET_SIZE(consts), NULL) < 0) {
9373 return 1;
9374 }
9375 }
9376 return 0;
9377 }
9378
9379 static inline int
is_exit_without_lineno(basicblock * b)9380 is_exit_without_lineno(basicblock *b) {
9381 if (!b->b_exit) {
9382 return 0;
9383 }
9384 for (int i = 0; i < b->b_iused; i++) {
9385 if (b->b_instr[i].i_lineno >= 0) {
9386 return 0;
9387 }
9388 }
9389 return 1;
9390 }
9391
9392 /* PEP 626 mandates that the f_lineno of a frame is correct
9393 * after a frame terminates. It would be prohibitively expensive
9394 * to continuously update the f_lineno field at runtime,
9395 * so we make sure that all exiting instruction (raises and returns)
9396 * have a valid line number, allowing us to compute f_lineno lazily.
9397 * We can do this by duplicating the exit blocks without line number
9398 * so that none have more than one predecessor. We can then safely
9399 * copy the line number from the sole predecessor block.
9400 */
9401 static int
duplicate_exits_without_lineno(struct compiler * c)9402 duplicate_exits_without_lineno(struct compiler *c)
9403 {
9404 /* Copy all exit blocks without line number that are targets of a jump.
9405 */
9406 for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) {
9407 if (b->b_iused > 0 && is_jump(&b->b_instr[b->b_iused-1])) {
9408 basicblock *target = b->b_instr[b->b_iused-1].i_target;
9409 if (is_exit_without_lineno(target) && target->b_predecessors > 1) {
9410 basicblock *new_target = compiler_copy_block(c, target);
9411 if (new_target == NULL) {
9412 return -1;
9413 }
9414 COPY_INSTR_LOC(b->b_instr[b->b_iused-1], new_target->b_instr[0]);
9415 b->b_instr[b->b_iused-1].i_target = new_target;
9416 target->b_predecessors--;
9417 new_target->b_predecessors = 1;
9418 new_target->b_next = target->b_next;
9419 target->b_next = new_target;
9420 }
9421 }
9422 }
9423 /* Eliminate empty blocks */
9424 for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) {
9425 while (b->b_next && b->b_next->b_iused == 0) {
9426 b->b_next = b->b_next->b_next;
9427 }
9428 }
9429 /* Any remaining reachable exit blocks without line number can only be reached by
9430 * fall through, and thus can only have a single predecessor */
9431 for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) {
9432 if (!b->b_nofallthrough && b->b_next && b->b_iused > 0) {
9433 if (is_exit_without_lineno(b->b_next)) {
9434 assert(b->b_next->b_iused > 0);
9435 COPY_INSTR_LOC(b->b_instr[b->b_iused-1], b->b_next->b_instr[0]);
9436 }
9437 }
9438 }
9439 return 0;
9440 }
9441
9442
9443 /* Retained for API compatibility.
9444 * Optimization is now done in optimize_cfg */
9445
9446 PyObject *
PyCode_Optimize(PyObject * code,PyObject * Py_UNUSED (consts),PyObject * Py_UNUSED (names),PyObject * Py_UNUSED (lnotab_obj))9447 PyCode_Optimize(PyObject *code, PyObject* Py_UNUSED(consts),
9448 PyObject *Py_UNUSED(names), PyObject *Py_UNUSED(lnotab_obj))
9449 {
9450 Py_INCREF(code);
9451 return code;
9452 }
9453