studioph
diff --git a/‎hypothesis-python/RELEASE.rst
+25 b/‎hypothesis-python/RELEASE.rst
+25
diff --git a/‎hypothesis-python/docs/settings.rst
+27-2 b/‎hypothesis-python/docs/settings.rst
+27-2
diff --git a/‎hypothesis-python/src/hypothesis/control.py
+20-3 b/‎hypothesis-python/src/hypothesis/control.py
+20-3
diff --git a/‎hypothesis-python/src/hypothesis/core.py
+7-1 b/‎hypothesis-python/src/hypothesis/core.py
+7-1
diff --git a/‎hypothesis-python/src/hypothesis/internal/conjecture/data.py
+9 b/‎hypothesis-python/src/hypothesis/internal/conjecture/data.py
+9
diff --git a/‎hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+18-1 b/‎hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+18-1
diff --git a/‎hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
+139-2 b/‎hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
+139-2
@@ -0,0 +1,25 @@
+RELEASE_TYPE: minor
+
+This release upgrades the :ref:`explain phase <phases>` (:issue:`3411`).
+
+* Following the first failure, Hypothesis will (:ref:`usually <phases>`) track which
+  lines of code were executed by passing and failing examples, and report where they
+  diverged - with some heuristics to drop unhelpful reports.  This is an existing
+  feature, now upgraded and newly enabled by default.
+
+* After shrinking to a minimal failing example, Hypothesis will try to find parts of
+  the example -- e.g. separate args to :func:`@given() <hypothesis.given>` -- which
+  can vary freely without changing the result of that minimal failing example.
+  If the automated experiments run without finding a passing variation, we leave a
+  comment in the final report:
+
+  .. code-block:: python
+
+      test_x_divided_by_y(
+          x=0,  # or any other generated value
+          y=0,
+      )
+
+Just remember that the *lack* of an explanation sometimes just means that Hypothesis
+couldn't efficiently find one, not that no explanation (or simpler failing example)
+exists.
@@ -60,12 +60,37 @@ Hypothesis divides tests into logically distinct phases:
 4. Mutating examples for :ref:`targeted property-based testing <targeted-search>` (requires generate phase).
 5. Attempting to shrink an example found in previous phases (other than phase 1 - explicit examples cannot be shrunk).
    This turns potentially large and complicated examples which may be hard to read into smaller and simpler ones.
-6. Attempting to explain the cause of the failure, by identifying suspicious lines of code
-   (e.g. the earliest lines which are never run on passing inputs, and always run on failures).
+6. Attempting to explain why your test failed (requires shrink phase).
+
+.. note::
+
+   The explain phase has two parts, each of which is best-effort - if Hypothesis can't
+   find a useful explanation, we'll just print the minimal failing example.
+
+   Following the first failure, Hypothesis will (:ref:`usually <phases>`) track which
+   lines of code are always run on failing but never on passing inputs.
    This relies on :func:`python:sys.settrace`, and is therefore automatically disabled on
    PyPy or if you are using :pypi:`coverage` or a debugger.  If there are no clearly
    suspicious lines of code, :pep:`we refuse the temptation to guess <20>`.
 
+   After shrinking to a minimal failing example, Hypothesis will try to find parts of
+   the example -- e.g. separate args to :func:`@given() <hypothesis.given>` -- which
+   can vary freely without changing the result of that minimal failing example.
+   If the automated experiments run without finding a passing variation, we leave a
+   comment in the final report:
+
+   .. code-block:: python
+
+       test_x_divided_by_y(
+           x=0,  # or any other generated value
+           y=0,
+       )
+
+   Just remember that the *lack* of an explanation sometimes just means that Hypothesis
+   couldn't efficiently find one, not that no explanation (or simpler failing example)
+   exists.
+
+
 The phases setting provides you with fine grained control over which of these run,
 with each phase corresponding to a value on the :class:`~hypothesis.Phase` enum:
 
 
@@ -74,21 +74,38 @@ def __init__(self, data, is_final=False, close_on_capture=True):
         # The printer will discard duplicates which return different representations.
         self.known_object_printers = defaultdict(list)
 
-    def record_call(self, obj, func, a, kw):
+    def record_call(self, obj, func, args, kwargs, arg_slices=None):
         name = get_pretty_function_description(func)
         self.known_object_printers[IDKey(obj)].append(
-            lambda obj, p, cycle: p.text("<...>") if cycle else p.repr_call(name, a, kw)
+            lambda obj, p, cycle: (
+                p.text("<...>")
+                if cycle
+                else p.repr_call(name, args, kwargs, arg_slices=arg_slices)
+            )
         )
 
     def prep_args_kwargs_from_strategies(self, arg_strategies, kwarg_strategies):
         arg_labels = {}
         all_s = [(None, s) for s in arg_strategies] + list(kwarg_strategies.items())
         args = []
         kwargs = {}
-        for k, s in all_s:
+        for i, (k, s) in enumerate(all_s):
+            start_idx = self.data.index
             obj = self.data.draw(s)
+            end_idx = self.data.index
             assert k is not None
             kwargs[k] = obj
+
+            # This high up the stack, we can't see or really do much with the conjecture
+            # Example objects - not least because they're only materialized after the
+            # test case is completed.  Instead, we'll stash the (start_idx, end_idx)
+            # pair on our data object for the ConjectureRunner engine to deal with, and
+            # pass a dict of such out so that the pretty-printer knows where to place
+            # the which-parts-matter comments later.
+            if start_idx != end_idx:
+                arg_labels[k or i] = (start_idx, end_idx)
+                self.data.arg_slices.add((start_idx, end_idx))
+
         return args, kwargs, arg_labels
 
     def __enter__(self):
 
@@ -760,12 +760,13 @@ def run(data):
                         args = self.stuff.args
                         kwargs = dict(self.stuff.kwargs)
                         if example_kwargs is None:
-                            a, kw, _ = context.prep_args_kwargs_from_strategies(
+                            a, kw, argslices = context.prep_args_kwargs_from_strategies(
                                 (), self.stuff.given_kwargs
                             )
                             assert not a, "strategies all moved to kwargs by now"
                         else:
                             kw = example_kwargs
+                            argslices = {}
                         kwargs.update(kw)
                         if expected_failure is not None:
                             nonlocal text_repr
@@ -785,7 +786,11 @@ def run(data):
                                     args,
                                     kwargs,
                                     force_split=True,
+                                    arg_slices=argslices,
                                 )
+                            if (0, 0) in context.data.slice_comments:
+                                printer.break_()
+                                printer.text("# " + context.data.slice_comments[(0, 0)])
                             report(printer.getvalue())
                         return test(*args, **kwargs)
 
@@ -966,6 +971,7 @@ def run_engine(self):
             fragments = []
 
             ran_example = ConjectureData.for_buffer(falsifying_example.buffer)
+            ran_example.slice_comments = falsifying_example.slice_comments
             assert info.__expected_exception is not None
             try:
                 with with_reporter(fragments.append):
 
@@ -776,6 +776,8 @@ class ConjectureResult:
     tags: FrozenSet[StructuralCoverageTag] = attr.ib()
     forced_indices: FrozenSet[int] = attr.ib(repr=False)
     examples: Examples = attr.ib(repr=False)
+    arg_slices: Set[Tuple[int, int]] = attr.ib(repr=False)
+    slice_comments: Dict[Tuple[int, int], str] = attr.ib(repr=False)
 
     index: int = attr.ib(init=False)
 
@@ -860,6 +862,11 @@ def __init__(
         self.depth = -1
         self.__example_record = ExampleRecord()
 
+        # Slice indices for discrete reportable parts that which-parts-matter can
+        # try varying, to report if the minimal example always fails anyway.
+        self.arg_slices: Set[Tuple[int, int]] = set()
+        self.slice_comments: Dict[Tuple[int, int], str] = {}
+
         self.extra_information = ExtraInformation()
 
         self.start_example(TOP_LABEL)
@@ -893,6 +900,8 @@ def as_result(self) -> Union[ConjectureResult, _Overrun]:
                 target_observations=self.target_observations,
                 tags=frozenset(self.tags),
                 forced_indices=frozenset(self.forced_indices),
+                arg_slices=self.arg_slices,
+                slice_comments=self.slice_comments,
             )
             assert self.__result is not None
             self.blocks.transfer_ownership(self.__result)
 
@@ -985,7 +985,13 @@ def shrink(self, example, predicate=None, allow_transition=None):
         return s.shrink_target
 
     def new_shrinker(self, example, predicate=None, allow_transition=None):
-        return Shrinker(self, example, predicate, allow_transition)
+        return Shrinker(
+            self,
+            example,
+            predicate,
+            allow_transition,
+            explain=Phase.explain in self.settings.phases,
+        )
 
     def cached_test_function(self, buffer, error_on_discard=False, extend=0):
         """Checks the tree to see if we've tested this buffer, and returns the
@@ -1076,6 +1082,17 @@ def event_to_string(self, event):
             pass
         return result
 
+    def passing_buffers(self, prefix=b""):
+        """Return a collection of bytestrings which cause the test to pass.
+
+        Optionally restrict this by a certain prefix, which is useful for explain mode.
+        """
+        return frozenset(
+            buf
+            for buf in self.__data_cache
+            if buf.startswith(prefix) and self.__data_cache[buf].status == Status.VALID
+        )
+
 
 class ContainsDiscard(Exception):
     pass
@@ -261,7 +261,7 @@ def accept(self):
         accept.__name__ = fn.__name__
         return property(accept)
 
-    def __init__(self, engine, initial, predicate, allow_transition):
+    def __init__(self, engine, initial, predicate, allow_transition, explain):
         """Create a shrinker for a particular engine, with a given starting
         point and predicate. When shrink() is called it will attempt to find an
         example for which predicate is True and which is strictly smaller than
@@ -300,6 +300,8 @@ def __init__(self, engine, initial, predicate, allow_transition):
         # testing and learning purposes.
         self.extra_dfas = {}
 
+        self.should_explain = explain
+
     @derived_value  # type: ignore
     def cached_calculations(self):
         return {}
@@ -437,12 +439,15 @@ def shrink(self):
         if not any(self.shrink_target.buffer) or self.incorporate_new_buffer(
             bytes(len(self.shrink_target.buffer))
         ):
+            self.explain()
             return
 
         try:
             self.greedy_shrink()
         except StopShrinking:
-            pass
+            # If we stopped shrinking because we're making slow progress (instead of
+            # reaching a local optimum), don't run the explain-phase logic.
+            self.should_explain = False
         finally:
             if self.engine.report_debug_info:
 
@@ -488,6 +493,138 @@ def s(n):
                             )
                         )
                 self.debug("")
+        self.explain()
+
+    def explain(self):
+        if not self.should_explain or not self.shrink_target.arg_slices:
+            return
+        from hypothesis.internal.conjecture.engine import BUFFER_SIZE
+
+        self.max_stall = 1e999
+        shrink_target = self.shrink_target
+        buffer = shrink_target.buffer
+        chunks = defaultdict(list)
+
+        # Before we start running experiments, let's check for known inputs which would
+        # make them redundant.  The shrinking process means that we've already tried many
+        # variations on the minimal example, so this can save a lot of time.
+        seen_passing_buffers = self.engine.passing_buffers(
+            prefix=buffer[: min(self.shrink_target.arg_slices)[0]]
+        )
+
+        # Now that we've shrunk to a minimal failing example, it's time to try
+        # varying each part that we've noted will go in the final report.  Consider
+        # slices in largest-first order
+        for start, end in sorted(
+            self.shrink_target.arg_slices, key=lambda x: (-(x[1] - x[0]), x)
+        ):
+            # Check for any previous examples that match the prefix and suffix,
+            # so we can skip if we found a passing example while shrinking.
+            if any(
+                seen.startswith(buffer[:start]) and seen.endswith(buffer[end:])
+                for seen in seen_passing_buffers
+            ):
+                continue
+
+            # Run our experiments
+            n_same_failures = 0
+            note = "or any other generated value"
+            # TODO: is 100 same-failures out of 500 attempts a good heuristic?
+            for n_attempt in range(500):  # pragma: no branch
+                # no-branch here because we don't coverage-test the abort-at-500 logic.
+
+                if n_attempt - 10 > n_same_failures * 5:
+                    # stop early if we're seeing mostly invalid examples
+                    break  # pragma: no cover
+
+                buf_attempt_fixed = bytearray(buffer)
+                buf_attempt_fixed[start:end] = [
+                    self.random.randint(0, 255) for _ in range(end - start)
+                ]
+                result = self.engine.cached_test_function(
+                    buf_attempt_fixed, extend=BUFFER_SIZE - len(buf_attempt_fixed)
+                )
+
+                # Turns out this was a variable-length part, so grab the infix...
+                if (
+                    result.status == Status.OVERRUN
+                    or len(buf_attempt_fixed) != len(result.buffer)
+                    or not result.buffer.endswith(buffer[end:])
+                ):
+                    for ex, res in zip(shrink_target.examples, result.examples):
+                        assert ex.start == res.start
+                        assert ex.start <= start
+                        assert ex.label == res.label
+                        if start == ex.start and end == ex.end:
+                            res_end = res.end
+                            break
+                    else:
+                        raise NotImplementedError("Expected matching prefixes")
+
+                    buf_attempt_fixed = (
+                        buffer[:start] + result.buffer[start:res_end] + buffer[end:]
+                    )
+                    chunks[(start, end)].append(result.buffer[start:res_end])
+                    result = self.engine.cached_test_function(buf_attempt_fixed)
+
+                    if (
+                        result.status == Status.OVERRUN
+                        or len(buf_attempt_fixed) != len(result.buffer)
+                        or not result.buffer.endswith(buffer[end:])
+                    ):
+                        raise NotImplementedError("This should never happen")
+                else:
+                    chunks[(start, end)].append(result.buffer[start:end])
+
+                if shrink_target is not self.shrink_target:  # pragma: no cover
+                    # If we've shrunk further without meaning to, bail out.
+                    self.shrink_target.slice_comments.clear()
+                    return
+                if result.status == Status.VALID:
+                    # The test passed, indicating that this param can't vary freely.
+                    # However, it's really hard to write a simple and reliable covering
+                    # test, because of our `seen_passing_buffers` check above.
+                    break  # pragma: no cover
+                elif self.__predicate(result):  # pragma: no branch
+                    n_same_failures += 1
+                    if n_same_failures >= 100:
+                        self.shrink_target.slice_comments[(start, end)] = note
+                        break
+
+        # Finally, if we've found multiple independently-variable parts, check whether
+        # they can all be varied together.
+        if len(self.shrink_target.slice_comments) <= 1:
+            return
+        n_same_failures_together = 0
+        chunks_by_start_index = sorted(chunks.items())
+        for _ in range(500):  # pragma: no branch
+            # no-branch here because we don't coverage-test the abort-at-500 logic.
+            new_buf = bytearray()
+            prev_end = 0
+            for (start, end), ls in chunks_by_start_index:
+                assert prev_end <= start < end, "these chunks must be nonoverlapping"
+                new_buf.extend(buffer[prev_end:start])
+                new_buf.extend(self.random.choice(ls))
+                prev_end = end
+
+            result = self.engine.cached_test_function(new_buf)
+
+            # This *can't* be a shrink because none of the components were.
+            assert shrink_target is self.shrink_target
+            if result.status == Status.VALID:
+                # TODO: cover this branch.
+                #       I might need to save or retrieve passing chunks too???
+                self.shrink_target.slice_comments[
+                    (0, 0)
+                ] = "The test sometimes passed when commented parts were varied together."
+                break  # Test passed, this param can't vary freely.
+            elif self.__predicate(result):  # pragma: no branch
+                n_same_failures_together += 1
+                if n_same_failures_together >= 100:
+                    self.shrink_target.slice_comments[
+                        (0, 0)
+                    ] = "The test always failed when commented parts were varied together."
+                    break
 
     def greedy_shrink(self):
         """Run a full set of greedy shrinks (that is, ones that will only ever