Skip to content

Commit 8bb0b9f

Browse files
committed
MAINT: Split test_tile into test_cut and test_qcut
1 parent 825e8c3 commit 8bb0b9f

File tree

2 files changed

+205
-196
lines changed

2 files changed

+205
-196
lines changed

pandas/tests/reshape/test_tile.py pandas/tests/reshape/test_cut.py

+6-196
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,14 @@
1-
import os
2-
import pytest
3-
41
import numpy as np
5-
from pandas.compat import zip
2+
import pytest
63

74
import pandas as pd
8-
from pandas import (DataFrame, Series, isna, to_datetime, DatetimeIndex, Index,
9-
Timestamp, Interval, IntervalIndex, Categorical,
10-
cut, qcut, date_range, timedelta_range, NaT,
11-
TimedeltaIndex)
12-
from pandas.tseries.offsets import Nano, Day
13-
import pandas.util.testing as tm
5+
from pandas import (
6+
Categorical, DataFrame, DatetimeIndex, Index, Interval, IntervalIndex,
7+
Series, TimedeltaIndex, Timestamp, cut, date_range, isna, qcut,
8+
timedelta_range, to_datetime)
149
from pandas.api.types import CategoricalDtype as CDT
15-
16-
from pandas.core.algorithms import quantile
1710
import pandas.core.reshape.tile as tmod
11+
import pandas.util.testing as tm
1812

1913

2014
def test_simple():
@@ -211,44 +205,6 @@ def test_inf_handling():
211205
assert result_ser[0] == Interval(-np.inf, 2)
212206

213207

214-
def test_qcut():
215-
arr = np.random.randn(1000)
216-
217-
# We store the bins as Index that have been
218-
# rounded to comparisons are a bit tricky.
219-
labels, bins = qcut(arr, 4, retbins=True)
220-
ex_bins = quantile(arr, [0, .25, .5, .75, 1.])
221-
222-
result = labels.categories.left.values
223-
assert np.allclose(result, ex_bins[:-1], atol=1e-2)
224-
225-
result = labels.categories.right.values
226-
assert np.allclose(result, ex_bins[1:], atol=1e-2)
227-
228-
ex_levels = cut(arr, ex_bins, include_lowest=True)
229-
tm.assert_categorical_equal(labels, ex_levels)
230-
231-
232-
def test_qcut_bounds():
233-
arr = np.random.randn(1000)
234-
235-
factor = qcut(arr, 10, labels=False)
236-
assert len(np.unique(factor)) == 10
237-
238-
239-
def test_qcut_specify_quantiles():
240-
arr = np.random.randn(100)
241-
factor = qcut(arr, [0, .25, .5, .75, 1.])
242-
243-
expected = qcut(arr, 4)
244-
tm.assert_categorical_equal(factor, expected)
245-
246-
247-
def test_qcut_all_bins_same():
248-
with pytest.raises(ValueError, match="edges.*unique"):
249-
qcut([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3)
250-
251-
252208
def test_cut_out_of_bounds():
253209
arr = np.random.randn(100)
254210
result = cut(arr, [-1, 0, 1])
@@ -286,31 +242,6 @@ def test_cut_pass_labels_compat():
286242
tm.assert_categorical_equal(result, exp)
287243

288244

289-
def test_qcut_include_lowest():
290-
values = np.arange(10)
291-
ii = qcut(values, 4)
292-
293-
ex_levels = IntervalIndex([Interval(-0.001, 2.25), Interval(2.25, 4.5),
294-
Interval(4.5, 6.75), Interval(6.75, 9)])
295-
tm.assert_index_equal(ii.categories, ex_levels)
296-
297-
298-
def test_qcut_nas():
299-
arr = np.random.randn(100)
300-
arr[:20] = np.nan
301-
302-
result = qcut(arr, 4)
303-
assert isna(result[:20]).all()
304-
305-
306-
def test_qcut_index():
307-
result = qcut([0, 2], 2)
308-
intervals = [Interval(-0.001, 1), Interval(1, 2)]
309-
310-
expected = Categorical(intervals, ordered=True)
311-
tm.assert_categorical_equal(result, expected)
312-
313-
314245
@pytest.mark.parametrize("x", [np.arange(11.), np.arange(11.) / 1e10])
315246
def test_round_frac_just_works(x):
316247
# It works.
@@ -329,30 +260,6 @@ def test_round_frac(val, precision, expected):
329260
assert result == expected
330261

331262

332-
def test_qcut_binning_issues(datapath):
333-
# see gh-1978, gh-1979
334-
cut_file = datapath(os.path.join("reshape", "data", "cut_data.csv"))
335-
arr = np.loadtxt(cut_file)
336-
result = qcut(arr, 20)
337-
338-
starts = []
339-
ends = []
340-
341-
for lev in np.unique(result):
342-
s = lev.left
343-
e = lev.right
344-
assert s != e
345-
346-
starts.append(float(s))
347-
ends.append(float(e))
348-
349-
for (sp, sn), (ep, en) in zip(zip(starts[:-1], starts[1:]),
350-
zip(ends[:-1], ends[1:])):
351-
assert sp < sn
352-
assert ep < en
353-
assert ep <= sn
354-
355-
356263
def test_cut_return_intervals():
357264
ser = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
358265
result = cut(ser, 3)
@@ -365,17 +272,6 @@ def test_cut_return_intervals():
365272
tm.assert_series_equal(result, expected)
366273

367274

368-
def test_qcut_return_intervals():
369-
ser = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
370-
res = qcut(ser, [0, 0.333, 0.666, 1])
371-
372-
exp_levels = np.array([Interval(-0.001, 2.664),
373-
Interval(2.664, 5.328), Interval(5.328, 8)])
374-
exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(
375-
CDT(ordered=True))
376-
tm.assert_series_equal(res, exp)
377-
378-
379275
def test_series_ret_bins():
380276
# see gh-8589
381277
ser = Series(np.arange(4))
@@ -406,47 +302,6 @@ def test_cut_duplicates_bin(kwargs, msg):
406302
tm.assert_series_equal(result, expected)
407303

408304

409-
@pytest.mark.parametrize("kwargs,msg", [
410-
(dict(duplicates="drop"), None),
411-
(dict(), "Bin edges must be unique"),
412-
(dict(duplicates="raise"), "Bin edges must be unique"),
413-
(dict(duplicates="foo"), "invalid value for 'duplicates' parameter")
414-
])
415-
def test_qcut_duplicates_bin(kwargs, msg):
416-
# see gh-7751
417-
values = [0, 0, 0, 0, 1, 2, 3]
418-
419-
if msg is not None:
420-
with pytest.raises(ValueError, match=msg):
421-
qcut(values, 3, **kwargs)
422-
else:
423-
result = qcut(values, 3, **kwargs)
424-
expected = IntervalIndex([Interval(-0.001, 1), Interval(1, 3)])
425-
tm.assert_index_equal(result.categories, expected)
426-
427-
428-
@pytest.mark.parametrize("data,start,end", [
429-
(9.0, 8.999, 9.0),
430-
(0.0, -0.001, 0.0),
431-
(-9.0, -9.001, -9.0),
432-
])
433-
@pytest.mark.parametrize("length", [1, 2])
434-
@pytest.mark.parametrize("labels", [None, False])
435-
def test_single_quantile(data, start, end, length, labels):
436-
# see gh-15431
437-
ser = Series([data] * length)
438-
result = qcut(ser, 1, labels=labels)
439-
440-
if labels is None:
441-
intervals = IntervalIndex([Interval(start, end)] *
442-
length, closed="right")
443-
expected = Series(intervals).astype(CDT(ordered=True))
444-
else:
445-
expected = Series([0] * length)
446-
447-
tm.assert_series_equal(result, expected)
448-
449-
450305
@pytest.mark.parametrize("data", [9.0, -9.0, 0.0])
451306
@pytest.mark.parametrize("length", [1, 2])
452307
def test_single_bin(data, length):
@@ -474,21 +329,6 @@ def test_cut_read_only(array_1_writeable, array_2_writeable):
474329
cut(hundred_elements, array_2))
475330

476331

477-
@pytest.mark.parametrize("ser", [
478-
Series(DatetimeIndex(["20180101", NaT, "20180103"])),
479-
Series(TimedeltaIndex(["0 days", NaT, "2 days"]))],
480-
ids=lambda x: str(x.dtype))
481-
def test_qcut_nat(ser):
482-
# see gh-19768
483-
intervals = IntervalIndex.from_tuples([
484-
(ser[0] - Nano(), ser[2] - Day()),
485-
np.nan, (ser[2] - Day(), ser[2])])
486-
expected = Series(Categorical(intervals, ordered=True))
487-
488-
result = qcut(ser, 2)
489-
tm.assert_series_equal(result, expected)
490-
491-
492332
@pytest.mark.parametrize("conv", [
493333
lambda v: Timestamp(v),
494334
lambda v: to_datetime(v),
@@ -558,24 +398,6 @@ def test_datetime_tz_cut(bins, box):
558398
tm.assert_series_equal(result, expected)
559399

560400

561-
@pytest.mark.parametrize("bins", [3, np.linspace(0, 1, 4)])
562-
def test_datetime_tz_qcut(bins):
563-
# see gh-19872
564-
tz = "US/Eastern"
565-
ser = Series(date_range("20130101", periods=3, tz=tz))
566-
567-
result = qcut(ser, bins)
568-
expected = Series(IntervalIndex([
569-
Interval(Timestamp("2012-12-31 23:59:59.999999999", tz=tz),
570-
Timestamp("2013-01-01 16:00:00", tz=tz)),
571-
Interval(Timestamp("2013-01-01 16:00:00", tz=tz),
572-
Timestamp("2013-01-02 08:00:00", tz=tz)),
573-
Interval(Timestamp("2013-01-02 08:00:00", tz=tz),
574-
Timestamp("2013-01-03 00:00:00", tz=tz))])).astype(
575-
CDT(ordered=True))
576-
tm.assert_series_equal(result, expected)
577-
578-
579401
def test_datetime_nan_error():
580402
msg = "bins must be of datetime64 dtype"
581403

@@ -623,15 +445,3 @@ def test_timedelta_cut_roundtrip():
623445
"2 days 00:00:00",
624446
"3 days 00:00:00"])
625447
tm.assert_index_equal(result_bins, expected_bins)
626-
627-
628-
@pytest.mark.parametrize("arg,expected_bins", [
629-
[timedelta_range("1day", periods=3),
630-
TimedeltaIndex(["1 days", "2 days", "3 days"])],
631-
[date_range("20180101", periods=3),
632-
DatetimeIndex(["2018-01-01", "2018-01-02", "2018-01-03"])]])
633-
def test_date_like_qcut_bins(arg, expected_bins):
634-
# see gh-19891
635-
ser = Series(arg)
636-
result, result_bins = qcut(ser, 2, retbins=True)
637-
tm.assert_index_equal(result_bins, expected_bins)

0 commit comments

Comments
 (0)