Skip to content

Commit 18c5f9d

Browse files
pablogsalmiss-islington
authored andcommitted
bpo-37500: Make sure dead code does not generate bytecode but also detect syntax errors (GH-14612)
https://bugs.python.org/issue37500 Add a new field to the compiler structure that allows to be configured so no bytecode is emitted. In this way is possible to detect errors by walking the nodes while preserving optimizations. https://bugs.python.org/issue37500
1 parent cd6e83b commit 18c5f9d

File tree

4 files changed

+174
-18
lines changed

4 files changed

+174
-18
lines changed

Lib/test/test_compile.py

+34
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,40 @@ def test_stack_overflow(self):
697697
# complex statements.
698698
compile("if a: b\n" * 200000, "<dummy>", "exec")
699699

700+
# Multiple users rely on the fact that CPython does not generate
701+
# bytecode for dead code blocks. See bpo-37500 for more context.
702+
@support.cpython_only
703+
def test_dead_blocks_do_not_generate_bytecode(self):
704+
def unused_block_if():
705+
if 0:
706+
return 42
707+
708+
def unused_block_while():
709+
while 0:
710+
return 42
711+
712+
def unused_block_if_else():
713+
if 1:
714+
return None
715+
else:
716+
return 42
717+
718+
def unused_block_while_else():
719+
while 1:
720+
return None
721+
else:
722+
return 42
723+
724+
funcs = [unused_block_if, unused_block_while,
725+
unused_block_if_else, unused_block_while_else]
726+
727+
for func in funcs:
728+
opcodes = list(dis.get_instructions(func))
729+
self.assertEqual(2, len(opcodes))
730+
self.assertEqual('LOAD_CONST', opcodes[0].opname)
731+
self.assertEqual(None, opcodes[0].argval)
732+
self.assertEqual('RETURN_VALUE', opcodes[1].opname)
733+
700734

701735
class TestExpressionStackSize(unittest.TestCase):
702736
# These tests check that the computed stack size for a code object

Lib/test/test_syntax.py

+35-6
Original file line numberDiff line numberDiff line change
@@ -697,18 +697,47 @@ def test_break_outside_loop(self):
697697
self._check_error("break", "outside loop")
698698

699699
def test_yield_outside_function(self):
700-
self._check_error("if 0: yield", "outside function")
701-
self._check_error("class C:\n if 0: yield", "outside function")
700+
self._check_error("if 0: yield", "outside function")
701+
self._check_error("if 0: yield\nelse: x=1", "outside function")
702+
self._check_error("if 1: pass\nelse: yield", "outside function")
703+
self._check_error("while 0: yield", "outside function")
704+
self._check_error("while 0: yield\nelse: x=1", "outside function")
705+
self._check_error("class C:\n if 0: yield", "outside function")
706+
self._check_error("class C:\n if 1: pass\n else: yield",
707+
"outside function")
708+
self._check_error("class C:\n while 0: yield", "outside function")
709+
self._check_error("class C:\n while 0: yield\n else: x = 1",
710+
"outside function")
702711

703712
def test_return_outside_function(self):
704-
self._check_error("if 0: return", "outside function")
705-
self._check_error("class C:\n if 0: return", "outside function")
713+
self._check_error("if 0: return", "outside function")
714+
self._check_error("if 0: return\nelse: x=1", "outside function")
715+
self._check_error("if 1: pass\nelse: return", "outside function")
716+
self._check_error("while 0: return", "outside function")
717+
self._check_error("class C:\n if 0: return", "outside function")
718+
self._check_error("class C:\n while 0: return", "outside function")
719+
self._check_error("class C:\n while 0: return\n else: x=1",
720+
"outside function")
721+
self._check_error("class C:\n if 0: return\n else: x= 1",
722+
"outside function")
723+
self._check_error("class C:\n if 1: pass\n else: return",
724+
"outside function")
706725

707726
def test_break_outside_loop(self):
708-
self._check_error("if 0: break", "outside loop")
727+
self._check_error("if 0: break", "outside loop")
728+
self._check_error("if 0: break\nelse: x=1", "outside loop")
729+
self._check_error("if 1: pass\nelse: break", "outside loop")
730+
self._check_error("class C:\n if 0: break", "outside loop")
731+
self._check_error("class C:\n if 1: pass\n else: break",
732+
"outside loop")
709733

710734
def test_continue_outside_loop(self):
711-
self._check_error("if 0: continue", "not properly in loop")
735+
self._check_error("if 0: continue", "not properly in loop")
736+
self._check_error("if 0: continue\nelse: x=1", "not properly in loop")
737+
self._check_error("if 1: pass\nelse: continue", "not properly in loop")
738+
self._check_error("class C:\n if 0: continue", "not properly in loop")
739+
self._check_error("class C:\n if 1: pass\n else: continue",
740+
"not properly in loop")
712741

713742
def test_unexpected_indent(self):
714743
self._check_error("foo()\n bar()\n", "unexpected indent",

Lib/test/test_sys_settrace.py

+41-7
Original file line numberDiff line numberDiff line change
@@ -53,22 +53,52 @@ def basic():
5353
# following that clause?
5454

5555

56-
# The entire "while 0:" statement is optimized away. No code
57-
# exists for it, so the line numbers skip directly from "del x"
58-
# to "x = 1".
59-
def arigo_example():
56+
# Some constructs like "while 0:", "if 0:" or "if 1:...else:..." are optimized
57+
# away. No code # exists for them, so the line numbers skip directly from
58+
# "del x" to "x = 1".
59+
def arigo_example0():
6060
x = 1
6161
del x
6262
while 0:
6363
pass
6464
x = 1
6565

66-
arigo_example.events = [(0, 'call'),
66+
arigo_example0.events = [(0, 'call'),
6767
(1, 'line'),
6868
(2, 'line'),
6969
(5, 'line'),
7070
(5, 'return')]
7171

72+
def arigo_example1():
73+
x = 1
74+
del x
75+
if 0:
76+
pass
77+
x = 1
78+
79+
arigo_example1.events = [(0, 'call'),
80+
(1, 'line'),
81+
(2, 'line'),
82+
(5, 'line'),
83+
(5, 'return')]
84+
85+
def arigo_example2():
86+
x = 1
87+
del x
88+
if 1:
89+
x = 1
90+
else:
91+
pass
92+
return None
93+
94+
arigo_example2.events = [(0, 'call'),
95+
(1, 'line'),
96+
(2, 'line'),
97+
(4, 'line'),
98+
(7, 'line'),
99+
(7, 'return')]
100+
101+
72102
# check that lines consisting of just one instruction get traced:
73103
def one_instr_line():
74104
x = 1
@@ -349,8 +379,12 @@ def fn(*args):
349379

350380
def test_01_basic(self):
351381
self.run_test(basic)
352-
def test_02_arigo(self):
353-
self.run_test(arigo_example)
382+
def test_02_arigo0(self):
383+
self.run_test(arigo_example0)
384+
def test_02_arigo1(self):
385+
self.run_test(arigo_example1)
386+
def test_02_arigo2(self):
387+
self.run_test(arigo_example2)
354388
def test_03_one_instr(self):
355389
self.run_test(one_instr_line)
356390
def test_04_no_pop_blocks(self):

Python/compile.c

+64-5
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,11 @@ struct compiler {
161161
int c_optimize; /* optimization level */
162162
int c_interactive; /* true if in interactive mode */
163163
int c_nestlevel;
164+
int c_do_not_emit_bytecode; /* The compiler won't emit any bytecode
165+
if this value is different from zero.
166+
This can be used to temporarily visit
167+
nodes without emitting bytecode to
168+
check only errors. */
164169

165170
PyObject *c_const_cache; /* Python dict holding all constants,
166171
including names tuple */
@@ -340,6 +345,7 @@ PyAST_CompileObject(mod_ty mod, PyObject *filename, PyCompilerFlags *flags,
340345
c.c_flags = flags;
341346
c.c_optimize = (optimize == -1) ? config->optimization_level : optimize;
342347
c.c_nestlevel = 0;
348+
c.c_do_not_emit_bytecode = 0;
343349

344350
if (!_PyAST_Optimize(mod, arena, c.c_optimize)) {
345351
goto finally;
@@ -1152,6 +1158,9 @@ compiler_addop(struct compiler *c, int opcode)
11521158
struct instr *i;
11531159
int off;
11541160
assert(!HAS_ARG(opcode));
1161+
if (c->c_do_not_emit_bytecode) {
1162+
return 1;
1163+
}
11551164
off = compiler_next_instr(c, c->u->u_curblock);
11561165
if (off < 0)
11571166
return 0;
@@ -1305,6 +1314,10 @@ merge_consts_recursive(struct compiler *c, PyObject *o)
13051314
static Py_ssize_t
13061315
compiler_add_const(struct compiler *c, PyObject *o)
13071316
{
1317+
if (c->c_do_not_emit_bytecode) {
1318+
return 0;
1319+
}
1320+
13081321
PyObject *key = merge_consts_recursive(c, o);
13091322
if (key == NULL) {
13101323
return -1;
@@ -1318,6 +1331,10 @@ compiler_add_const(struct compiler *c, PyObject *o)
13181331
static int
13191332
compiler_addop_load_const(struct compiler *c, PyObject *o)
13201333
{
1334+
if (c->c_do_not_emit_bytecode) {
1335+
return 1;
1336+
}
1337+
13211338
Py_ssize_t arg = compiler_add_const(c, o);
13221339
if (arg < 0)
13231340
return 0;
@@ -1328,6 +1345,10 @@ static int
13281345
compiler_addop_o(struct compiler *c, int opcode, PyObject *dict,
13291346
PyObject *o)
13301347
{
1348+
if (c->c_do_not_emit_bytecode) {
1349+
return 1;
1350+
}
1351+
13311352
Py_ssize_t arg = compiler_add_o(c, dict, o);
13321353
if (arg < 0)
13331354
return 0;
@@ -1339,6 +1360,11 @@ compiler_addop_name(struct compiler *c, int opcode, PyObject *dict,
13391360
PyObject *o)
13401361
{
13411362
Py_ssize_t arg;
1363+
1364+
if (c->c_do_not_emit_bytecode) {
1365+
return 1;
1366+
}
1367+
13421368
PyObject *mangled = _Py_Mangle(c->u->u_private, o);
13431369
if (!mangled)
13441370
return 0;
@@ -1359,6 +1385,10 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg)
13591385
struct instr *i;
13601386
int off;
13611387

1388+
if (c->c_do_not_emit_bytecode) {
1389+
return 1;
1390+
}
1391+
13621392
/* oparg value is unsigned, but a signed C int is usually used to store
13631393
it in the C code (like Python/ceval.c).
13641394
@@ -1385,6 +1415,10 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute)
13851415
struct instr *i;
13861416
int off;
13871417

1418+
if (c->c_do_not_emit_bytecode) {
1419+
return 1;
1420+
}
1421+
13881422
assert(HAS_ARG(opcode));
13891423
assert(b != NULL);
13901424
off = compiler_next_instr(c, c->u->u_curblock);
@@ -1519,6 +1553,17 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute)
15191553
} \
15201554
}
15211555

1556+
/* These macros allows to check only for errors and not emmit bytecode
1557+
* while visiting nodes.
1558+
*/
1559+
1560+
#define BEGIN_DO_NOT_EMIT_BYTECODE { \
1561+
c->c_do_not_emit_bytecode++;
1562+
1563+
#define END_DO_NOT_EMIT_BYTECODE \
1564+
c->c_do_not_emit_bytecode--; \
1565+
}
1566+
15221567
/* Search if variable annotations are present statically in a block. */
15231568

15241569
static int
@@ -2546,13 +2591,23 @@ compiler_if(struct compiler *c, stmt_ty s)
25462591
return 0;
25472592

25482593
constant = expr_constant(s->v.If.test);
2549-
/* constant = 0: "if 0" Leave the optimizations to
2550-
* the pephole optimizer to check for syntax errors
2551-
* in the block.
2594+
/* constant = 0: "if 0"
25522595
* constant = 1: "if 1", "if 2", ...
25532596
* constant = -1: rest */
2554-
if (constant == 1) {
2597+
if (constant == 0) {
2598+
BEGIN_DO_NOT_EMIT_BYTECODE
2599+
VISIT_SEQ(c, stmt, s->v.If.body);
2600+
END_DO_NOT_EMIT_BYTECODE
2601+
if (s->v.If.orelse) {
2602+
VISIT_SEQ(c, stmt, s->v.If.orelse);
2603+
}
2604+
} else if (constant == 1) {
25552605
VISIT_SEQ(c, stmt, s->v.If.body);
2606+
if (s->v.If.orelse) {
2607+
BEGIN_DO_NOT_EMIT_BYTECODE
2608+
VISIT_SEQ(c, stmt, s->v.If.orelse);
2609+
END_DO_NOT_EMIT_BYTECODE
2610+
}
25562611
} else {
25572612
if (asdl_seq_LEN(s->v.If.orelse)) {
25582613
next = compiler_new_block(c);
@@ -2662,8 +2717,12 @@ compiler_while(struct compiler *c, stmt_ty s)
26622717
int constant = expr_constant(s->v.While.test);
26632718

26642719
if (constant == 0) {
2665-
if (s->v.While.orelse)
2720+
BEGIN_DO_NOT_EMIT_BYTECODE
2721+
VISIT_SEQ(c, stmt, s->v.While.body);
2722+
END_DO_NOT_EMIT_BYTECODE
2723+
if (s->v.While.orelse) {
26662724
VISIT_SEQ(c, stmt, s->v.While.orelse);
2725+
}
26672726
return 1;
26682727
}
26692728
loop = compiler_new_block(c);

0 commit comments

Comments
 (0)