|
| 1 | +from hypothesis import given |
| 2 | +import hypothesis.strategies as st |
| 3 | + |
| 4 | + |
| 5 | +def binary_search(ls, v): |
| 6 | + """ |
| 7 | + Take a list ls and a value v such that ls is sorted and v is comparable |
| 8 | + with the elements of ls. |
| 9 | +
|
| 10 | + Return an index i such that 0 <= i <= len(v) with the properties: |
| 11 | +
|
| 12 | + 1. ls.insert(i, v) is sorted |
| 13 | + 2. ls.insert(j, v) is not sorted for j < i |
| 14 | + """ |
| 15 | + # Without this check we will get an index error on the next line when the |
| 16 | + # list is empty. |
| 17 | + if not ls: |
| 18 | + return 0 |
| 19 | + |
| 20 | + # Without this check we will miss the case where the insertion point should |
| 21 | + # be zero: The invariant we maintain in the next section is that lo is |
| 22 | + # always strictly lower than the insertion point. |
| 23 | + if v <= ls[0]: |
| 24 | + return 0 |
| 25 | + |
| 26 | + # Invariant: There is no insertion point i with i <= lo |
| 27 | + lo = 0 |
| 28 | + |
| 29 | + # Invariant: There is an insertion point i with i <= hi |
| 30 | + hi = len(ls) |
| 31 | + while lo + 1 < hi: |
| 32 | + mid = (lo + hi) // 2 |
| 33 | + if v > ls[mid]: |
| 34 | + # Inserting v anywhere below mid would result in an unsorted list |
| 35 | + # because it's > the value at mid. Therefore mid is a valid new lo |
| 36 | + lo = mid |
| 37 | + # Uncommenting the following lines will cause this to return a valid |
| 38 | + # insertion point which is not always minimal. |
| 39 | + # elif v == ls[mid]: |
| 40 | + # return mid |
| 41 | + else: |
| 42 | + # Either v == ls[mid] in which case mid is a valid insertion point |
| 43 | + # or v < ls[mid], in which case all valid insertion points must be |
| 44 | + # < hi. Either way, mid is a valid new hi. |
| 45 | + hi = mid |
| 46 | + assert lo + 1 == hi |
| 47 | + # We now know that there is a valid insertion point <= hi and there is no |
| 48 | + # valid insertion point < hi because hi - 1 is lo. Therefore hi is the |
| 49 | + # answer we were seeking |
| 50 | + return hi |
| 51 | + |
| 52 | + |
| 53 | +def is_sorted(ls): |
| 54 | + """Is this list sorted?""" |
| 55 | + for i in range(len(ls) - 1): |
| 56 | + if ls[i] > ls[i + 1]: |
| 57 | + return False |
| 58 | + return True |
| 59 | + |
| 60 | + |
| 61 | +Values = st.integers() |
| 62 | + |
| 63 | +# We generate arbitrary lists and turn this into generating sorting lists |
| 64 | +# by just sorting them. |
| 65 | +SortedLists = st.lists(Values).map(sorted) |
| 66 | + |
| 67 | +# We could also do it this way, but that would be a bad idea: |
| 68 | +# SortedLists = st.lists(Values).filter(is_sorted) |
| 69 | +# The problem is that Hypothesis will only generate long sorted lists with very |
| 70 | +# low probability, so we are much better off post-processing values into the |
| 71 | +# form we want than filtering them out. |
| 72 | + |
| 73 | + |
| 74 | +@given(ls=SortedLists, v=Values) |
| 75 | +def test_insert_is_sorted(ls, v): |
| 76 | + """ |
| 77 | + We test the first invariant: binary_search should return an index such that |
| 78 | + inserting the value provided at that index would result in a sorted set. |
| 79 | + """ |
| 80 | + ls.insert(binary_search(ls, v), v) |
| 81 | + assert is_sorted(ls) |
| 82 | + |
| 83 | + |
| 84 | +@given(ls=SortedLists, v=Values) |
| 85 | +def test_is_minimal(ls, v): |
| 86 | + """ |
| 87 | + We test the second invariant: binary_search should return an index such |
| 88 | + that no smaller index is a valid insertion point for v |
| 89 | + """ |
| 90 | + for i in range(binary_search(ls, v)): |
| 91 | + ls2 = list(ls) |
| 92 | + ls2.insert(i, v) |
| 93 | + assert not is_sorted(ls2) |
| 94 | + |
| 95 | + |
| 96 | +@given(ls=SortedLists, v=Values) |
| 97 | +def test_inserts_into_same_place_twice(ls, v): |
| 98 | + """ |
| 99 | + In this we test a *consequence* of the second invariant: When we insert a |
| 100 | + value into a list twice, the insertion point should be the same both times. |
| 101 | + This is because we know that v is > the previous element and == the next |
| 102 | + element. |
| 103 | +
|
| 104 | + In theory if the former passes, this should always pass. In practice, |
| 105 | + failures are detected by this test with much higher probability because it |
| 106 | + deliberately puts the data into a shape that is likely to trigger a |
| 107 | + failure. |
| 108 | +
|
| 109 | + This is an instance of a good general category of test: Testing how the |
| 110 | + function moves in responses to changes in the underlying data. |
| 111 | + """ |
| 112 | + i = binary_search(ls, v) |
| 113 | + ls.insert(i, v) |
| 114 | + assert binary_search(ls, v) == i |
0 commit comments