Skip to content

Commit 18d16e9

Browse files
authored
gh-102676: Add more convenience properties to dis.Instruction (#103969)
Adds start_offset, cache_offset, end_offset, baseopcode, baseopname, jump_target and oparg to dis.Instruction. Also slightly improves the disassembly output by allowing opnames to overflow into the space reserved for opargs.
1 parent 845e593 commit 18d16e9

File tree

4 files changed

+435
-196
lines changed

4 files changed

+435
-196
lines changed

Doc/library/dis.rst

+42
Original file line numberDiff line numberDiff line change
@@ -342,10 +342,25 @@ details of bytecode instructions as :class:`Instruction` instances:
342342
human readable name for operation
343343

344344

345+
.. data:: baseopcode
346+
347+
numeric code for the base operation if operation is specialized;
348+
otherwise equal to :data:`opcode`
349+
350+
351+
.. data:: baseopname
352+
353+
human readable name for the base operation if operation is specialized;
354+
otherwise equal to :data:`opname`
355+
356+
345357
.. data:: arg
346358

347359
numeric argument to operation (if any), otherwise ``None``
348360

361+
.. data:: oparg
362+
363+
alias for :data:`arg`
349364

350365
.. data:: argval
351366

@@ -363,6 +378,22 @@ details of bytecode instructions as :class:`Instruction` instances:
363378
start index of operation within bytecode sequence
364379

365380

381+
.. data:: start_offset
382+
383+
start index of operation within bytecode sequence, including prefixed
384+
``EXTENDED_ARG`` operations if present; otherwise equal to :data:`offset`
385+
386+
387+
.. data:: cache_offset
388+
389+
start index of the cache entries following the operation
390+
391+
392+
.. data:: end_offset
393+
394+
end index of the cache entries following the operation
395+
396+
366397
.. data:: starts_line
367398

368399
line started by this opcode (if any), otherwise ``None``
@@ -373,6 +404,12 @@ details of bytecode instructions as :class:`Instruction` instances:
373404
``True`` if other code jumps to here, otherwise ``False``
374405

375406

407+
.. data:: jump_target
408+
409+
bytecode index of the jump target if this is a jump operation,
410+
otherwise ``None``
411+
412+
376413
.. data:: positions
377414

378415
:class:`dis.Positions` object holding the
@@ -384,6 +421,11 @@ details of bytecode instructions as :class:`Instruction` instances:
384421

385422
Field ``positions`` is added.
386423

424+
.. versionchanged:: 3.13
425+
426+
Added fields ``start_offset``, ``cache_offset``, ``end_offset``,
427+
``baseopname``, ``baseopcode``, ``jump_target`` and ``oparg``.
428+
387429

388430
.. class:: Positions
389431

Lib/dis.py

+90-20
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ def show_code(co, *, file=None):
265265
'argval',
266266
'argrepr',
267267
'offset',
268+
'start_offset',
268269
'starts_line',
269270
'is_jump_target',
270271
'positions'
@@ -278,6 +279,10 @@ def show_code(co, *, file=None):
278279
_Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg"
279280
_Instruction.argrepr.__doc__ = "Human readable description of operation argument"
280281
_Instruction.offset.__doc__ = "Start index of operation within bytecode sequence"
282+
_Instruction.start_offset.__doc__ = (
283+
"Start index of operation within bytecode sequence, including extended args if present; "
284+
"otherwise equal to Instruction.offset"
285+
)
281286
_Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None"
282287
_Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False"
283288
_Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction"
@@ -288,8 +293,26 @@ def show_code(co, *, file=None):
288293
_OPNAME_WIDTH = 20
289294
_OPARG_WIDTH = 5
290295

296+
def _get_jump_target(op, arg, offset):
297+
"""Gets the bytecode offset of the jump target if this is a jump instruction.
298+
299+
Otherwise return None.
300+
"""
301+
deop = _deoptop(op)
302+
caches = _inline_cache_entries[deop]
303+
if deop in hasjrel:
304+
if _is_backward_jump(deop):
305+
arg = -arg
306+
target = offset + 2 + arg*2
307+
target += 2 * caches
308+
elif deop in hasjabs:
309+
target = arg*2
310+
else:
311+
target = None
312+
return target
313+
291314
class Instruction(_Instruction):
292-
"""Details for a bytecode operation
315+
"""Details for a bytecode operation.
293316
294317
Defined fields:
295318
opname - human readable name for operation
@@ -298,14 +321,55 @@ class Instruction(_Instruction):
298321
argval - resolved arg value (if known), otherwise same as arg
299322
argrepr - human readable description of operation argument
300323
offset - start index of operation within bytecode sequence
324+
start_offset - start index of operation within bytecode sequence including extended args if present;
325+
otherwise equal to Instruction.offset
301326
starts_line - line started by this opcode (if any), otherwise None
302327
is_jump_target - True if other code jumps to here, otherwise False
303328
positions - Optional dis.Positions object holding the span of source code
304329
covered by this instruction
305330
"""
306331

332+
@property
333+
def oparg(self):
334+
"""Alias for Instruction.arg."""
335+
return self.arg
336+
337+
@property
338+
def baseopcode(self):
339+
"""Numeric code for the base operation if operation is specialized.
340+
341+
Otherwise equal to Instruction.opcode.
342+
"""
343+
return _deoptop(self.opcode)
344+
345+
@property
346+
def baseopname(self):
347+
"""Human readable name for the base operation if operation is specialized.
348+
349+
Otherwise equal to Instruction.opname.
350+
"""
351+
return opname[self.baseopcode]
352+
353+
@property
354+
def cache_offset(self):
355+
"""Start index of the cache entries following the operation."""
356+
return self.offset + 2
357+
358+
@property
359+
def end_offset(self):
360+
"""End index of the cache entries following the operation."""
361+
return self.cache_offset + _inline_cache_entries[self.opcode]*2
362+
363+
@property
364+
def jump_target(self):
365+
"""Bytecode index of the jump target if this is a jump operation.
366+
367+
Otherwise return None.
368+
"""
369+
return _get_jump_target(self.opcode, self.arg, self.offset)
370+
307371
def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4):
308-
"""Format instruction details for inclusion in disassembly output
372+
"""Format instruction details for inclusion in disassembly output.
309373
310374
*lineno_width* sets the width of the line number field (0 omits it)
311375
*mark_as_current* inserts a '-->' marker arrow as part of the line
@@ -335,12 +399,19 @@ def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4):
335399
fields.append(self.opname.ljust(_OPNAME_WIDTH))
336400
# Column: Opcode argument
337401
if self.arg is not None:
338-
fields.append(repr(self.arg).rjust(_OPARG_WIDTH))
402+
arg = repr(self.arg)
403+
# If opname is longer than _OPNAME_WIDTH, we allow it to overflow into
404+
# the space reserved for oparg. This results in fewer misaligned opargs
405+
# in the disassembly output.
406+
opname_excess = max(0, len(self.opname) - _OPNAME_WIDTH)
407+
fields.append(repr(self.arg).rjust(_OPARG_WIDTH - opname_excess))
339408
# Column: Opcode argument details
340409
if self.argrepr:
341410
fields.append('(' + self.argrepr + ')')
342411
return ' '.join(fields).rstrip()
343412

413+
def __str__(self):
414+
return self._disassemble()
344415

345416
def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False):
346417
"""Iterator for the opcodes in methods, functions or code
@@ -454,7 +525,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
454525
for i in range(start, end):
455526
labels.add(target)
456527
starts_line = None
457-
for offset, op, arg in _unpack_opargs(code):
528+
for offset, start_offset, op, arg in _unpack_opargs(code):
458529
if linestarts is not None:
459530
starts_line = linestarts.get(offset, None)
460531
if starts_line is not None:
@@ -526,7 +597,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
526597
argrepr = _intrinsic_2_descs[arg]
527598
yield Instruction(_all_opname[op], op,
528599
arg, argval, argrepr,
529-
offset, starts_line, is_jump_target, positions)
600+
offset, start_offset, starts_line, is_jump_target, positions)
530601
caches = _inline_cache_entries[deop]
531602
if not caches:
532603
continue
@@ -546,7 +617,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
546617
else:
547618
argrepr = ""
548619
yield Instruction(
549-
"CACHE", CACHE, 0, None, argrepr, offset, None, False,
620+
"CACHE", CACHE, 0, None, argrepr, offset, offset, None, False,
550621
Positions(*next(co_positions, ()))
551622
)
552623

@@ -632,6 +703,7 @@ def _disassemble_str(source, **kwargs):
632703

633704
def _unpack_opargs(code):
634705
extended_arg = 0
706+
extended_args_offset = 0 # Number of EXTENDED_ARG instructions preceding the current instruction
635707
caches = 0
636708
for i in range(0, len(code), 2):
637709
# Skip inline CACHE entries:
@@ -652,7 +724,13 @@ def _unpack_opargs(code):
652724
else:
653725
arg = None
654726
extended_arg = 0
655-
yield (i, op, arg)
727+
if deop == EXTENDED_ARG:
728+
extended_args_offset += 1
729+
yield (i, i, op, arg)
730+
else:
731+
start_offset = i - extended_args_offset*2
732+
yield (i, start_offset, op, arg)
733+
extended_args_offset = 0
656734

657735
def findlabels(code):
658736
"""Detect all offsets in a byte code which are jump targets.
@@ -661,18 +739,10 @@ def findlabels(code):
661739
662740
"""
663741
labels = []
664-
for offset, op, arg in _unpack_opargs(code):
742+
for offset, _, op, arg in _unpack_opargs(code):
665743
if arg is not None:
666-
deop = _deoptop(op)
667-
caches = _inline_cache_entries[deop]
668-
if deop in hasjrel:
669-
if _is_backward_jump(deop):
670-
arg = -arg
671-
label = offset + 2 + arg*2
672-
label += 2 * caches
673-
elif deop in hasjabs:
674-
label = arg*2
675-
else:
744+
label = _get_jump_target(op, arg, offset)
745+
if label is None:
676746
continue
677747
if label not in labels:
678748
labels.append(label)
@@ -701,7 +771,7 @@ def _find_imports(co):
701771

702772
consts = co.co_consts
703773
names = co.co_names
704-
opargs = [(op, arg) for _, op, arg in _unpack_opargs(co.co_code)
774+
opargs = [(op, arg) for _, _, op, arg in _unpack_opargs(co.co_code)
705775
if op != EXTENDED_ARG]
706776
for i, (op, oparg) in enumerate(opargs):
707777
if op == IMPORT_NAME and i >= 2:
@@ -723,7 +793,7 @@ def _find_store_names(co):
723793
}
724794

725795
names = co.co_names
726-
for _, op, arg in _unpack_opargs(co.co_code):
796+
for _, _, op, arg in _unpack_opargs(co.co_code):
727797
if op in STORE_OPS:
728798
yield names[arg]
729799

0 commit comments

Comments
 (0)