Start a new examples directory

DRMacIver · DRMacIver · commit 33deddd2fa60 · 2016-01-15T13:24:19.000Z
This example demonstrates testing a binary search.
diff --git a/examples/test_binary_search.py b/examples/test_binary_search.py
@@ -0,0 +1,114 @@
+from hypothesis import given
+import hypothesis.strategies as st
+
+
+def binary_search(ls, v):
+    """
+    Take a list ls and a value v such that ls is sorted and v is comparable
+    with the elements of ls.
+
+    Return an index i such that 0 <= i <= len(v) with the properties:
+
+    1. ls.insert(i, v) is sorted
+    2. ls.insert(j, v) is not sorted for j < i
+    """
+    # Without this check we will get an index error on the next line when the
+    # list is empty.
+    if not ls:
+        return 0
+
+    # Without this check we will miss the case where the insertion point should
+    # be zero: The invariant we maintain in the next section is that lo is
+    # always strictly lower than the insertion point.
+    if v <= ls[0]:
+        return 0
+
+    # Invariant: There is no insertion point i with i <= lo
+    lo = 0
+
+    # Invariant: There is an insertion point i with i <= hi
+    hi = len(ls)
+    while lo + 1 < hi:
+        mid = (lo + hi) // 2
+        if v > ls[mid]:
+            # Inserting v anywhere below mid would result in an unsorted list
+            # because it's > the value at mid. Therefore mid is a valid new lo
+            lo = mid
+        # Uncommenting the following lines will cause this to return a valid
+        # insertion point which is not always minimal.
+        # elif v == ls[mid]:
+        #   return mid
+        else:
+            # Either v == ls[mid] in which case mid is a valid insertion point
+            # or v < ls[mid], in which case all valid insertion points must be
+            # < hi. Either way, mid is a valid new hi.
+            hi = mid
+    assert lo + 1 == hi
+    # We now know that there is a valid insertion point <= hi and there is no
+    # valid insertion point < hi because hi - 1 is lo. Therefore hi is the
+    # answer we were seeking
+    return hi
+
+
+def is_sorted(ls):
+    """Is this list sorted?"""
+    for i in range(len(ls) - 1):
+        if ls[i] > ls[i + 1]:
+            return False
+    return True
+
+
+Values = st.integers()
+
+# We generate arbitrary lists and turn this into generating sorting lists
+# by just sorting them.
+SortedLists = st.lists(Values).map(sorted)
+
+# We could also do it this way, but that would be a bad idea:
+# SortedLists = st.lists(Values).filter(is_sorted)
+# The problem is that Hypothesis will only generate long sorted lists with very
+# low probability, so we are much better off post-processing values into the
+# form we want than filtering them out.
+
+
+@given(ls=SortedLists, v=Values)
+def test_insert_is_sorted(ls, v):
+    """
+    We test the first invariant: binary_search should return an index such that
+    inserting the value provided at that index would result in a sorted set.
+    """
+    ls.insert(binary_search(ls, v), v)
+    assert is_sorted(ls)
+
+
+@given(ls=SortedLists, v=Values)
+def test_is_minimal(ls, v):
+    """
+    We test the second invariant: binary_search should return an index such
+    that no smaller index is a valid insertion point for v
+    """
+    for i in range(binary_search(ls, v)):
+        ls2 = list(ls)
+        ls2.insert(i, v)
+        assert not is_sorted(ls2)
+
+
+@given(ls=SortedLists, v=Values)
+def test_inserts_into_same_place_twice(ls, v):
+    """
+    In this we test a *consequence* of the second invariant: When we insert a
+    value into a list twice, the insertion point should be the same both times.
+    This is because we know that v is > the previous element and == the next
+    element.
+
+    In theory if the former passes, this should always pass. In practice,
+    failures are detected by this test with much higher probability because it
+    deliberately puts the data into a shape that is likely to trigger a
+    failure.
+
+    This is an instance of a good general category of test: Testing how the
+    function moves in responses to changes in the underlying data.
+    """
+    i = binary_search(ls, v)
+    ls.insert(i, v)
+    assert binary_search(ls, v) == i