|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +""" |
| 3 | +Helper functions to generate range-like data for DatetimeArray |
| 4 | +(and possibly TimedeltaArray/PeriodArray) |
| 5 | +""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | + |
| 9 | +from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp |
| 10 | + |
| 11 | +from pandas.tseries.offsets import Tick, generate_range |
| 12 | + |
| 13 | + |
| 14 | +def generate_regular_range(start, end, periods, freq): |
| 15 | + """ |
| 16 | + Generate a range of dates with the spans between dates described by |
| 17 | + the given `freq` DateOffset. |
| 18 | +
|
| 19 | + Parameters |
| 20 | + ---------- |
| 21 | + start : Timestamp or None |
| 22 | + first point of produced date range |
| 23 | + end : Timestamp or None |
| 24 | + last point of produced date range |
| 25 | + periods : int |
| 26 | + number of periods in produced date range |
| 27 | + freq : DateOffset |
| 28 | + describes space between dates in produced date range |
| 29 | +
|
| 30 | + Returns |
| 31 | + ------- |
| 32 | + ndarray[np.int64] representing nanosecond unix timestamps |
| 33 | + """ |
| 34 | + if isinstance(freq, Tick): |
| 35 | + stride = freq.nanos |
| 36 | + if periods is None: |
| 37 | + b = Timestamp(start).value |
| 38 | + # cannot just use e = Timestamp(end) + 1 because arange breaks when |
| 39 | + # stride is too large, see GH10887 |
| 40 | + e = (b + (Timestamp(end).value - b) // stride * stride + |
| 41 | + stride // 2 + 1) |
| 42 | + # end.tz == start.tz by this point due to _generate implementation |
| 43 | + tz = start.tz |
| 44 | + elif start is not None: |
| 45 | + b = Timestamp(start).value |
| 46 | + e = _generate_range_overflow_safe(b, periods, stride, side='start') |
| 47 | + tz = start.tz |
| 48 | + elif end is not None: |
| 49 | + e = Timestamp(end).value + stride |
| 50 | + b = _generate_range_overflow_safe(e, periods, stride, side='end') |
| 51 | + tz = end.tz |
| 52 | + else: |
| 53 | + raise ValueError("at least 'start' or 'end' should be specified " |
| 54 | + "if a 'period' is given.") |
| 55 | + |
| 56 | + with np.errstate(over="raise"): |
| 57 | + # If the range is sufficiently large, np.arange may overflow |
| 58 | + # and incorrectly return an empty array if not caught. |
| 59 | + try: |
| 60 | + values = np.arange(b, e, stride, dtype=np.int64) |
| 61 | + except FloatingPointError: |
| 62 | + xdr = [b] |
| 63 | + while xdr[-1] != e: |
| 64 | + xdr.append(xdr[-1] + stride) |
| 65 | + values = np.array(xdr[:-1], dtype=np.int64) |
| 66 | + |
| 67 | + else: |
| 68 | + tz = None |
| 69 | + # start and end should have the same timezone by this point |
| 70 | + if start is not None: |
| 71 | + tz = start.tz |
| 72 | + elif end is not None: |
| 73 | + tz = end.tz |
| 74 | + |
| 75 | + xdr = generate_range(start=start, end=end, |
| 76 | + periods=periods, offset=freq) |
| 77 | + |
| 78 | + values = np.array([x.value for x in xdr], dtype=np.int64) |
| 79 | + |
| 80 | + return values, tz |
| 81 | + |
| 82 | + |
| 83 | +def _generate_range_overflow_safe(endpoint, periods, stride, side='start'): |
| 84 | + """ |
| 85 | + Calculate the second endpoint for passing to np.arange, checking |
| 86 | + to avoid an integer overflow. Catch OverflowError and re-raise |
| 87 | + as OutOfBoundsDatetime. |
| 88 | +
|
| 89 | + Parameters |
| 90 | + ---------- |
| 91 | + endpoint : int |
| 92 | + nanosecond timestamp of the known endpoint of the desired range |
| 93 | + periods : int |
| 94 | + number of periods in the desired range |
| 95 | + stride : int |
| 96 | + nanoseconds between periods in the desired range |
| 97 | + side : {'start', 'end'} |
| 98 | + which end of the range `endpoint` refers to |
| 99 | +
|
| 100 | + Returns |
| 101 | + ------- |
| 102 | + other_end : int |
| 103 | +
|
| 104 | + Raises |
| 105 | + ------ |
| 106 | + OutOfBoundsDatetime |
| 107 | + """ |
| 108 | + # GH#14187 raise instead of incorrectly wrapping around |
| 109 | + assert side in ['start', 'end'] |
| 110 | + |
| 111 | + i64max = np.uint64(np.iinfo(np.int64).max) |
| 112 | + msg = ('Cannot generate range with {side}={endpoint} and ' |
| 113 | + 'periods={periods}' |
| 114 | + .format(side=side, endpoint=endpoint, periods=periods)) |
| 115 | + |
| 116 | + with np.errstate(over="raise"): |
| 117 | + # if periods * strides cannot be multiplied within the *uint64* bounds, |
| 118 | + # we cannot salvage the operation by recursing, so raise |
| 119 | + try: |
| 120 | + addend = np.uint64(periods) * np.uint64(np.abs(stride)) |
| 121 | + except FloatingPointError: |
| 122 | + raise OutOfBoundsDatetime(msg) |
| 123 | + |
| 124 | + if np.abs(addend) <= i64max: |
| 125 | + # relatively easy case without casting concerns |
| 126 | + return _generate_range_overflow_safe_signed( |
| 127 | + endpoint, periods, stride, side) |
| 128 | + |
| 129 | + elif ((endpoint > 0 and side == 'start' and stride > 0) or |
| 130 | + (endpoint < 0 and side == 'end' and stride > 0)): |
| 131 | + # no chance of not-overflowing |
| 132 | + raise OutOfBoundsDatetime(msg) |
| 133 | + |
| 134 | + elif (side == 'end' and endpoint > i64max and endpoint - stride <= i64max): |
| 135 | + # in _generate_regular_range we added `stride` thereby overflowing |
| 136 | + # the bounds. Adjust to fix this. |
| 137 | + return _generate_range_overflow_safe(endpoint - stride, |
| 138 | + periods - 1, stride, side) |
| 139 | + |
| 140 | + # split into smaller pieces |
| 141 | + mid_periods = periods // 2 |
| 142 | + remaining = periods - mid_periods |
| 143 | + assert 0 < remaining < periods, (remaining, periods, endpoint, stride) |
| 144 | + |
| 145 | + midpoint = _generate_range_overflow_safe(endpoint, mid_periods, |
| 146 | + stride, side) |
| 147 | + return _generate_range_overflow_safe(midpoint, remaining, stride, side) |
| 148 | + |
| 149 | + |
| 150 | +def _generate_range_overflow_safe_signed(endpoint, periods, stride, side): |
| 151 | + """ |
| 152 | + A special case for _generate_range_overflow_safe where `periods * stride` |
| 153 | + can be calculated without overflowing int64 bounds. |
| 154 | + """ |
| 155 | + assert side in ['start', 'end'] |
| 156 | + if side == 'end': |
| 157 | + stride *= -1 |
| 158 | + |
| 159 | + with np.errstate(over="raise"): |
| 160 | + addend = np.int64(periods) * np.int64(stride) |
| 161 | + try: |
| 162 | + # easy case with no overflows |
| 163 | + return np.int64(endpoint) + addend |
| 164 | + except (FloatingPointError, OverflowError): |
| 165 | + # with endpoint negative and addend positive we risk |
| 166 | + # FloatingPointError; with reversed signed we risk OverflowError |
| 167 | + pass |
| 168 | + |
| 169 | + # if stride and endpoint had opposite signs, then endpoint + addend |
| 170 | + # should never overflow. so they must have the same signs |
| 171 | + assert (stride > 0 and endpoint >= 0) or (stride < 0 and endpoint <= 0) |
| 172 | + |
| 173 | + if stride > 0: |
| 174 | + # watch out for very special case in which we just slightly |
| 175 | + # exceed implementation bounds, but when passing the result to |
| 176 | + # np.arange will get a result slightly within the bounds |
| 177 | + assert endpoint >= 0 |
| 178 | + result = np.uint64(endpoint) + np.uint64(addend) |
| 179 | + i64max = np.uint64(np.iinfo(np.int64).max) |
| 180 | + assert result > i64max |
| 181 | + if result <= i64max + np.uint64(stride): |
| 182 | + return result |
| 183 | + |
| 184 | + raise OutOfBoundsDatetime('Cannot generate range with ' |
| 185 | + '{side}={endpoint} and ' |
| 186 | + 'periods={periods}' |
| 187 | + .format(side=side, endpoint=endpoint, |
| 188 | + periods=periods)) |
0 commit comments