Skip to content

Commit 4926dd0

Browse files
committed
Add Inquisitor to explain mode
1 parent bccd56d commit 4926dd0

File tree

13 files changed

+357
-16
lines changed

13 files changed

+357
-16
lines changed

hypothesis-python/RELEASE.rst

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
RELEASE_TYPE: minor
2+
3+
This release upgrades the :ref:`explain phase <phases>` (:issue:`3411`).
4+
5+
* Following the first failure, Hypothesis will (:ref:`usually <phases>`) track which
6+
lines of code were executed by passing and failing examples, and report where they
7+
diverged - with some heuristics to drop unhelpful reports. This is an existing
8+
feature, now upgraded and newly enabled by default.
9+
10+
* After shrinking to a minimal failing example, Hypothesis will try to find parts of
11+
the example -- e.g. separate args to :func:`@given() <hypothesis.given>` -- which
12+
can vary freely without changing the result of that minimal failing example.
13+
If the automated experiments run without finding a passing variation, we leave a
14+
comment in the final report:
15+
16+
.. code-block:: python
17+
18+
test_x_divided_by_y(
19+
x=0, # or any other generated value
20+
y=0,
21+
)
22+
23+
Just remember that the *lack* of an explanation sometimes just means that Hypothesis
24+
couldn't efficiently find one, not that no explanation (or simpler failing example)
25+
exists.

hypothesis-python/docs/settings.rst

+27-2
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,37 @@ Hypothesis divides tests into logically distinct phases:
6060
4. Mutating examples for :ref:`targeted property-based testing <targeted-search>` (requires generate phase).
6161
5. Attempting to shrink an example found in previous phases (other than phase 1 - explicit examples cannot be shrunk).
6262
This turns potentially large and complicated examples which may be hard to read into smaller and simpler ones.
63-
6. Attempting to explain the cause of the failure, by identifying suspicious lines of code
64-
(e.g. the earliest lines which are never run on passing inputs, and always run on failures).
63+
6. Attempting to explain why your test failed (requires shrink phase).
64+
65+
.. note::
66+
67+
The explain phase has two parts, each of which is best-effort - if Hypothesis can't
68+
find a useful explanation, we'll just print the minimal failing example.
69+
70+
Following the first failure, Hypothesis will (:ref:`usually <phases>`) track which
71+
lines of code are always run on failing but never on passing inputs.
6572
This relies on :func:`python:sys.settrace`, and is therefore automatically disabled on
6673
PyPy or if you are using :pypi:`coverage` or a debugger. If there are no clearly
6774
suspicious lines of code, :pep:`we refuse the temptation to guess <20>`.
6875

76+
After shrinking to a minimal failing example, Hypothesis will try to find parts of
77+
the example -- e.g. separate args to :func:`@given() <hypothesis.given>` -- which
78+
can vary freely without changing the result of that minimal failing example.
79+
If the automated experiments run without finding a passing variation, we leave a
80+
comment in the final report:
81+
82+
.. code-block:: python
83+
84+
test_x_divided_by_y(
85+
x=0, # or any other generated value
86+
y=0,
87+
)
88+
89+
Just remember that the *lack* of an explanation sometimes just means that Hypothesis
90+
couldn't efficiently find one, not that no explanation (or simpler failing example)
91+
exists.
92+
93+
6994
The phases setting provides you with fine grained control over which of these run,
7095
with each phase corresponding to a value on the :class:`~hypothesis.Phase` enum:
7196

hypothesis-python/src/hypothesis/control.py

+20-3
Original file line numberDiff line numberDiff line change
@@ -74,21 +74,38 @@ def __init__(self, data, is_final=False, close_on_capture=True):
7474
# The printer will discard duplicates which return different representations.
7575
self.known_object_printers = defaultdict(list)
7676

77-
def record_call(self, obj, func, a, kw):
77+
def record_call(self, obj, func, args, kwargs, arg_slices=None):
7878
name = get_pretty_function_description(func)
7979
self.known_object_printers[IDKey(obj)].append(
80-
lambda obj, p, cycle: p.text("<...>") if cycle else p.repr_call(name, a, kw)
80+
lambda obj, p, cycle: (
81+
p.text("<...>")
82+
if cycle
83+
else p.repr_call(name, args, kwargs, arg_slices=arg_slices)
84+
)
8185
)
8286

8387
def prep_args_kwargs_from_strategies(self, arg_strategies, kwarg_strategies):
8488
arg_labels = {}
8589
all_s = [(None, s) for s in arg_strategies] + list(kwarg_strategies.items())
8690
args = []
8791
kwargs = {}
88-
for k, s in all_s:
92+
for i, (k, s) in enumerate(all_s):
93+
start_idx = self.data.index
8994
obj = self.data.draw(s)
95+
end_idx = self.data.index
9096
assert k is not None
9197
kwargs[k] = obj
98+
99+
# This high up the stack, we can't see or really do much with the conjecture
100+
# Example objects - not least because they're only materialized after the
101+
# test case is completed. Instead, we'll stash the (start_idx, end_idx)
102+
# pair on our data object for the ConjectureRunner engine to deal with, and
103+
# pass a dict of such out so that the pretty-printer knows where to place
104+
# the which-parts-matter comments later.
105+
if start_idx != end_idx:
106+
arg_labels[k or i] = (start_idx, end_idx)
107+
self.data.arg_slices.add((start_idx, end_idx))
108+
92109
return args, kwargs, arg_labels
93110

94111
def __enter__(self):

hypothesis-python/src/hypothesis/core.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -760,12 +760,13 @@ def run(data):
760760
args = self.stuff.args
761761
kwargs = dict(self.stuff.kwargs)
762762
if example_kwargs is None:
763-
a, kw, _ = context.prep_args_kwargs_from_strategies(
763+
a, kw, argslices = context.prep_args_kwargs_from_strategies(
764764
(), self.stuff.given_kwargs
765765
)
766766
assert not a, "strategies all moved to kwargs by now"
767767
else:
768768
kw = example_kwargs
769+
argslices = {}
769770
kwargs.update(kw)
770771
if expected_failure is not None:
771772
nonlocal text_repr
@@ -785,7 +786,11 @@ def run(data):
785786
args,
786787
kwargs,
787788
force_split=True,
789+
arg_slices=argslices,
788790
)
791+
if (0, 0) in context.data.slice_comments:
792+
printer.break_()
793+
printer.text("# " + context.data.slice_comments[(0, 0)])
789794
report(printer.getvalue())
790795
return test(*args, **kwargs)
791796

@@ -966,6 +971,7 @@ def run_engine(self):
966971
fragments = []
967972

968973
ran_example = ConjectureData.for_buffer(falsifying_example.buffer)
974+
ran_example.slice_comments = falsifying_example.slice_comments
969975
assert info.__expected_exception is not None
970976
try:
971977
with with_reporter(fragments.append):

hypothesis-python/src/hypothesis/internal/conjecture/data.py

+9
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,8 @@ class ConjectureResult:
776776
tags: FrozenSet[StructuralCoverageTag] = attr.ib()
777777
forced_indices: FrozenSet[int] = attr.ib(repr=False)
778778
examples: Examples = attr.ib(repr=False)
779+
arg_slices: Set[Tuple[int, int]] = attr.ib(repr=False)
780+
slice_comments: Dict[Tuple[int, int], str] = attr.ib(repr=False)
779781

780782
index: int = attr.ib(init=False)
781783

@@ -860,6 +862,11 @@ def __init__(
860862
self.depth = -1
861863
self.__example_record = ExampleRecord()
862864

865+
# Slice indices for discrete reportable parts that which-parts-matter can
866+
# try varying, to report if the minimal example always fails anyway.
867+
self.arg_slices: Set[Tuple[int, int]] = set()
868+
self.slice_comments: Dict[Tuple[int, int], str] = {}
869+
863870
self.extra_information = ExtraInformation()
864871

865872
self.start_example(TOP_LABEL)
@@ -893,6 +900,8 @@ def as_result(self) -> Union[ConjectureResult, _Overrun]:
893900
target_observations=self.target_observations,
894901
tags=frozenset(self.tags),
895902
forced_indices=frozenset(self.forced_indices),
903+
arg_slices=self.arg_slices,
904+
slice_comments=self.slice_comments,
896905
)
897906
assert self.__result is not None
898907
self.blocks.transfer_ownership(self.__result)

hypothesis-python/src/hypothesis/internal/conjecture/engine.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -985,7 +985,13 @@ def shrink(self, example, predicate=None, allow_transition=None):
985985
return s.shrink_target
986986

987987
def new_shrinker(self, example, predicate=None, allow_transition=None):
988-
return Shrinker(self, example, predicate, allow_transition)
988+
return Shrinker(
989+
self,
990+
example,
991+
predicate,
992+
allow_transition,
993+
explain=Phase.explain in self.settings.phases,
994+
)
989995

990996
def cached_test_function(self, buffer, error_on_discard=False, extend=0):
991997
"""Checks the tree to see if we've tested this buffer, and returns the
@@ -1076,6 +1082,17 @@ def event_to_string(self, event):
10761082
pass
10771083
return result
10781084

1085+
def passing_buffers(self, prefix=b""):
1086+
"""Return a collection of bytestrings which cause the test to pass.
1087+
1088+
Optionally restrict this by a certain prefix, which is useful for explain mode.
1089+
"""
1090+
return frozenset(
1091+
buf
1092+
for buf in self.__data_cache
1093+
if buf.startswith(prefix) and self.__data_cache[buf].status == Status.VALID
1094+
)
1095+
10791096

10801097
class ContainsDiscard(Exception):
10811098
pass

hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py

+139-2
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ def accept(self):
261261
accept.__name__ = fn.__name__
262262
return property(accept)
263263

264-
def __init__(self, engine, initial, predicate, allow_transition):
264+
def __init__(self, engine, initial, predicate, allow_transition, explain):
265265
"""Create a shrinker for a particular engine, with a given starting
266266
point and predicate. When shrink() is called it will attempt to find an
267267
example for which predicate is True and which is strictly smaller than
@@ -300,6 +300,8 @@ def __init__(self, engine, initial, predicate, allow_transition):
300300
# testing and learning purposes.
301301
self.extra_dfas = {}
302302

303+
self.should_explain = explain
304+
303305
@derived_value # type: ignore
304306
def cached_calculations(self):
305307
return {}
@@ -437,12 +439,15 @@ def shrink(self):
437439
if not any(self.shrink_target.buffer) or self.incorporate_new_buffer(
438440
bytes(len(self.shrink_target.buffer))
439441
):
442+
self.explain()
440443
return
441444

442445
try:
443446
self.greedy_shrink()
444447
except StopShrinking:
445-
pass
448+
# If we stopped shrinking because we're making slow progress (instead of
449+
# reaching a local optimum), don't run the explain-phase logic.
450+
self.should_explain = False
446451
finally:
447452
if self.engine.report_debug_info:
448453

@@ -488,6 +493,138 @@ def s(n):
488493
)
489494
)
490495
self.debug("")
496+
self.explain()
497+
498+
def explain(self):
499+
if not self.should_explain or not self.shrink_target.arg_slices:
500+
return
501+
from hypothesis.internal.conjecture.engine import BUFFER_SIZE
502+
503+
self.max_stall = 1e999
504+
shrink_target = self.shrink_target
505+
buffer = shrink_target.buffer
506+
chunks = defaultdict(list)
507+
508+
# Before we start running experiments, let's check for known inputs which would
509+
# make them redundant. The shrinking process means that we've already tried many
510+
# variations on the minimal example, so this can save a lot of time.
511+
seen_passing_buffers = self.engine.passing_buffers(
512+
prefix=buffer[: min(self.shrink_target.arg_slices)[0]]
513+
)
514+
515+
# Now that we've shrunk to a minimal failing example, it's time to try
516+
# varying each part that we've noted will go in the final report. Consider
517+
# slices in largest-first order
518+
for start, end in sorted(
519+
self.shrink_target.arg_slices, key=lambda x: (-(x[1] - x[0]), x)
520+
):
521+
# Check for any previous examples that match the prefix and suffix,
522+
# so we can skip if we found a passing example while shrinking.
523+
if any(
524+
seen.startswith(buffer[:start]) and seen.endswith(buffer[end:])
525+
for seen in seen_passing_buffers
526+
):
527+
continue
528+
529+
# Run our experiments
530+
n_same_failures = 0
531+
note = "or any other generated value"
532+
# TODO: is 100 same-failures out of 500 attempts a good heuristic?
533+
for n_attempt in range(500): # pragma: no branch
534+
# no-branch here because we don't coverage-test the abort-at-500 logic.
535+
536+
if n_attempt - 10 > n_same_failures * 5:
537+
# stop early if we're seeing mostly invalid examples
538+
break # pragma: no cover
539+
540+
buf_attempt_fixed = bytearray(buffer)
541+
buf_attempt_fixed[start:end] = [
542+
self.random.randint(0, 255) for _ in range(end - start)
543+
]
544+
result = self.engine.cached_test_function(
545+
buf_attempt_fixed, extend=BUFFER_SIZE - len(buf_attempt_fixed)
546+
)
547+
548+
# Turns out this was a variable-length part, so grab the infix...
549+
if (
550+
result.status == Status.OVERRUN
551+
or len(buf_attempt_fixed) != len(result.buffer)
552+
or not result.buffer.endswith(buffer[end:])
553+
):
554+
for ex, res in zip(shrink_target.examples, result.examples):
555+
assert ex.start == res.start
556+
assert ex.start <= start
557+
assert ex.label == res.label
558+
if start == ex.start and end == ex.end:
559+
res_end = res.end
560+
break
561+
else:
562+
raise NotImplementedError("Expected matching prefixes")
563+
564+
buf_attempt_fixed = (
565+
buffer[:start] + result.buffer[start:res_end] + buffer[end:]
566+
)
567+
chunks[(start, end)].append(result.buffer[start:res_end])
568+
result = self.engine.cached_test_function(buf_attempt_fixed)
569+
570+
if (
571+
result.status == Status.OVERRUN
572+
or len(buf_attempt_fixed) != len(result.buffer)
573+
or not result.buffer.endswith(buffer[end:])
574+
):
575+
raise NotImplementedError("This should never happen")
576+
else:
577+
chunks[(start, end)].append(result.buffer[start:end])
578+
579+
if shrink_target is not self.shrink_target: # pragma: no cover
580+
# If we've shrunk further without meaning to, bail out.
581+
self.shrink_target.slice_comments.clear()
582+
return
583+
if result.status == Status.VALID:
584+
# The test passed, indicating that this param can't vary freely.
585+
# However, it's really hard to write a simple and reliable covering
586+
# test, because of our `seen_passing_buffers` check above.
587+
break # pragma: no cover
588+
elif self.__predicate(result): # pragma: no branch
589+
n_same_failures += 1
590+
if n_same_failures >= 100:
591+
self.shrink_target.slice_comments[(start, end)] = note
592+
break
593+
594+
# Finally, if we've found multiple independently-variable parts, check whether
595+
# they can all be varied together.
596+
if len(self.shrink_target.slice_comments) <= 1:
597+
return
598+
n_same_failures_together = 0
599+
chunks_by_start_index = sorted(chunks.items())
600+
for _ in range(500): # pragma: no branch
601+
# no-branch here because we don't coverage-test the abort-at-500 logic.
602+
new_buf = bytearray()
603+
prev_end = 0
604+
for (start, end), ls in chunks_by_start_index:
605+
assert prev_end <= start < end, "these chunks must be nonoverlapping"
606+
new_buf.extend(buffer[prev_end:start])
607+
new_buf.extend(self.random.choice(ls))
608+
prev_end = end
609+
610+
result = self.engine.cached_test_function(new_buf)
611+
612+
# This *can't* be a shrink because none of the components were.
613+
assert shrink_target is self.shrink_target
614+
if result.status == Status.VALID:
615+
# TODO: cover this branch.
616+
# I might need to save or retrieve passing chunks too???
617+
self.shrink_target.slice_comments[
618+
(0, 0)
619+
] = "The test sometimes passed when commented parts were varied together."
620+
break # Test passed, this param can't vary freely.
621+
elif self.__predicate(result): # pragma: no branch
622+
n_same_failures_together += 1
623+
if n_same_failures_together >= 100:
624+
self.shrink_target.slice_comments[
625+
(0, 0)
626+
] = "The test always failed when commented parts were varied together."
627+
break
491628

492629
def greedy_shrink(self):
493630
"""Run a full set of greedy shrinks (that is, ones that will only ever

0 commit comments

Comments
 (0)