Skip to content

Commit ddc84d9

Browse files
author
Krzysztof Chomski
committed
Merge branch 'master' of github.com:pandas-dev/pandas
* 'master' of github.com:pandas-dev/pandas: (188 commits) Separate out _convert_datetime_to_tsobject (pandas-dev#17715) DOC: remove whatsnew note for xref pandas-dev#17131 BUG: Regression in .loc accepting a boolean Index as an indexer (pandas-dev#17738) DEPR: Deprecate cdate_range and merge into bdate_range (pandas-dev#17691) CLN: replace %s syntax with .format in pandas.core: categorical, common, config, config_init (pandas-dev#17735) Fixed the memory usage explanation of categorical in gotchas from O(nm) to O(n+m) (pandas-dev#17736) TST: add backward compat for offset testing for pickles (pandas-dev#17733) remove unused time conversion funcs (pandas-dev#17711) DEPR: Deprecate convert parameter in take (pandas-dev#17352) BUG:Time Grouper bug fix when applied for list groupers (pandas-dev#17587) BUG: Fix some PeriodIndex resampling issues (pandas-dev#16153) BUG: Fix unexpected sort in groupby (pandas-dev#17621) DOC: Fixed typo in documentation for 'pandas.DataFrame.replace' (pandas-dev#17731) BUG: Fix series rename called with str altering name rather index (GH17407) (pandas-dev#17654) DOC: Add examples for MultiIndex.get_locs + cleanups (pandas-dev#17675) Doc improvements for IntervalIndex and Interval (pandas-dev#17714) BUG: DataFrame sort_values and multiple "by" columns fails to order NaT correctly Last of the timezones funcs (pandas-dev#17669) Add missing file to _pyxfiles, delete commented-out (pandas-dev#17712) update imports of DateParseError, remove unused imports from tslib (pandas-dev#17713) ...
2 parents 7818486 + a3d538a commit ddc84d9

File tree

305 files changed

+12203
-7264
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

305 files changed

+12203
-7264
lines changed

.github/ISSUE_TEMPLATE.md

+6
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@
1212

1313
**Note**: Many problems can be resolved by simply upgrading `pandas` to the latest version. Before submitting, please check if that solution works for you. If possible, you may want to check if `master` addresses this issue, but that is not necessary.
1414

15+
For documentation-related issues, you can check the latest versions of the docs on `master` here:
16+
17+
https://pandas-docs.github.io/pandas-docs-travis/
18+
19+
If the issue has not been resolved there, go ahead and file it in the issue tracker.
20+
1521
#### Expected Output
1622

1723
#### Output of ``pd.show_versions()``

.travis.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ matrix:
3737
- JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network"
3838
- dist: trusty
3939
env:
40-
- JOB="2.7_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8"
40+
- JOB="2.7_LOCALE" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true
4141
addons:
4242
apt:
4343
packages:
@@ -62,7 +62,7 @@ matrix:
6262
# In allow_failures
6363
- dist: trusty
6464
env:
65-
- JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network"
65+
- JOB="2.7_SLOW" SLOW=true
6666
# In allow_failures
6767
- dist: trusty
6868
env:
@@ -82,7 +82,7 @@ matrix:
8282
allow_failures:
8383
- dist: trusty
8484
env:
85-
- JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network"
85+
- JOB="2.7_SLOW" SLOW=true
8686
- dist: trusty
8787
env:
8888
- JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true

MANIFEST.in

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ include LICENSE
33
include RELEASE.md
44
include README.rst
55
include setup.py
6+
include pyproject.toml
67

78
graft doc
89
prune doc/build

appveyor.yml

+7-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ install:
5959

6060
# install our build environment
6161
- cmd: conda config --set show_channel_urls true --set always_yes true --set changeps1 false
62-
- cmd: conda update -q conda
62+
# - cmd: conda update -q conda
6363
- cmd: conda config --set ssl_verify false
6464

6565
# add the pandas channel *before* defaults to have defaults take priority
@@ -74,12 +74,18 @@ install:
7474
# create our env
7575
- cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist
7676
- cmd: activate pandas
77+
- cmd: pip install moto
7778
- SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run
7879
- cmd: echo "installing requirements from %REQ%"
7980
- cmd: conda install -n pandas --file=%REQ%
8081
- cmd: conda list -n pandas
8182
- cmd: echo "installing requirements from %REQ% - done"
8283

84+
# add some pip only reqs to the env
85+
- SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip
86+
- cmd: echo "installing requirements from %REQ%"
87+
- cmd: pip install -Ur %REQ%
88+
8389
# build em using the local source checkout in the correct windows env
8490
- cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace'
8591

asv_bench/asv.conf.json

+6-4
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,10 @@
117117
// with results. If the commit is `null`, regression detection is
118118
// skipped for the matching benchmark.
119119
//
120-
// "regressions_first_commits": {
121-
// "some_benchmark": "352cdf", // Consider regressions only after this commit
122-
// "another_benchmark": null, // Skip regression detection altogether
123-
// }
120+
"regressions_first_commits": {
121+
".*": "v0.20.0"
122+
},
123+
"regression_thresholds": {
124+
".*": 0.05
125+
}
124126
}

asv_bench/benchmarks/categoricals.py

+3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ def time_value_counts_dropna(self):
6767
def time_rendering(self):
6868
str(self.sel)
6969

70+
def time_set_categories(self):
71+
self.ts.cat.set_categories(self.ts.cat.categories[::2])
72+
7073

7174
class Categoricals3(object):
7275
goal_time = 0.2

asv_bench/benchmarks/index_object.py

+20
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,23 @@ def time_datetime_level_values_full(self):
199199

200200
def time_datetime_level_values_sliced(self):
201201
self.mi[:10].values
202+
203+
204+
class Range(object):
205+
goal_time = 0.2
206+
207+
def setup(self):
208+
self.idx_inc = RangeIndex(start=0, stop=10**7, step=3)
209+
self.idx_dec = RangeIndex(start=10**7, stop=-1, step=-3)
210+
211+
def time_max(self):
212+
self.idx_inc.max()
213+
214+
def time_max_trivial(self):
215+
self.idx_dec.max()
216+
217+
def time_min(self):
218+
self.idx_dec.min()
219+
220+
def time_min_trivial(self):
221+
self.idx_inc.min()

asv_bench/benchmarks/io_bench.py

+30
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
from .pandas_vb_common import *
23
from pandas import concat, Timestamp, compat
34
try:
@@ -192,3 +193,32 @@ def time_read_nrows(self, compression, engine):
192193
ext = ".bz2"
193194
pd.read_csv(self.big_fname + ext, nrows=10,
194195
compression=compression, engine=engine)
196+
197+
198+
class read_json_lines(object):
199+
goal_time = 0.2
200+
fname = "__test__.json"
201+
202+
def setup(self):
203+
self.N = 100000
204+
self.C = 5
205+
self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]))
206+
self.df.to_json(self.fname,orient="records",lines=True)
207+
208+
def teardown(self):
209+
try:
210+
os.remove(self.fname)
211+
except:
212+
pass
213+
214+
def time_read_json_lines(self):
215+
pd.read_json(self.fname, lines=True)
216+
217+
def time_read_json_lines_chunk(self):
218+
pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4))
219+
220+
def peakmem_read_json_lines(self):
221+
pd.read_json(self.fname, lines=True)
222+
223+
def peakmem_read_json_lines_chunk(self):
224+
pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4))

asv_bench/benchmarks/period.py

+88
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,35 @@
22
from pandas import Series, Period, PeriodIndex, date_range
33

44

5+
class PeriodProperties(object):
6+
def setup(self):
7+
self.per = Period('2012-06-01', freq='M')
8+
9+
def time_year(self):
10+
self.per.year
11+
12+
def time_month(self):
13+
self.per.month
14+
15+
def time_quarter(self):
16+
self.per.quarter
17+
18+
def time_day(self):
19+
self.per.day
20+
21+
def time_hour(self):
22+
self.per.hour
23+
24+
def time_minute(self):
25+
self.per.second
26+
27+
def time_second(self):
28+
self.per.second
29+
30+
def time_leap_year(self):
31+
self.per.is_leapyear
32+
33+
534
class Constructor(object):
635
goal_time = 0.2
736

@@ -49,6 +78,65 @@ def time_value_counts_pindex(self):
4978
self.i.value_counts()
5079

5180

81+
class Properties(object):
82+
def setup(self):
83+
self.per = Period('2017-09-06 08:28', freq='min')
84+
85+
def time_year(self):
86+
self.per.year
87+
88+
def time_month(self):
89+
self.per.month
90+
91+
def time_day(self):
92+
self.per.day
93+
94+
def time_hour(self):
95+
self.per.hour
96+
97+
def time_minute(self):
98+
self.per.minute
99+
100+
def time_second(self):
101+
self.per.second
102+
103+
def time_is_leap_year(self):
104+
self.per.is_leap_year
105+
106+
def time_quarter(self):
107+
self.per.quarter
108+
109+
def time_qyear(self):
110+
self.per.qyear
111+
112+
def time_week(self):
113+
self.per.week
114+
115+
def time_daysinmonth(self):
116+
self.per.daysinmonth
117+
118+
def time_dayofweek(self):
119+
self.per.dayofweek
120+
121+
def time_dayofyear(self):
122+
self.per.dayofyear
123+
124+
def time_start_time(self):
125+
self.per.start_time
126+
127+
def time_end_time(self):
128+
self.per.end_time
129+
130+
def time_to_timestamp():
131+
self.per.to_timestamp()
132+
133+
def time_now():
134+
self.per.now()
135+
136+
def time_asfreq():
137+
self.per.asfreq('A')
138+
139+
52140
class period_standard_indexing(object):
53141
goal_time = 0.2
54142

asv_bench/benchmarks/sparse.py

+66-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
from itertools import repeat
1+
import itertools
22

33
from .pandas_vb_common import *
44
import scipy.sparse
5-
from pandas import SparseSeries, SparseDataFrame
5+
from pandas import SparseSeries, SparseDataFrame, SparseArray
66

77

88
class sparse_series_to_frame(object):
@@ -23,6 +23,69 @@ def time_sparse_series_to_frame(self):
2323
SparseDataFrame(self.series)
2424

2525

26+
class sparse_array_constructor(object):
27+
goal_time = 0.2
28+
29+
def setup(self):
30+
np.random.seed(1)
31+
self.int64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=0, dtype=np.int64)
32+
self.int64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=0, dtype=np.int64)
33+
34+
self.float64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=np.nan, dtype=np.float64)
35+
self.float64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=np.nan, dtype=np.float64)
36+
37+
self.object_nan_fill_value_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=np.nan)
38+
self.object_nan_fill_value_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=np.nan)
39+
40+
self.object_non_nan_fill_value_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=0)
41+
self.object_non_nan_fill_value_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=0)
42+
43+
def make_numeric_array(self, length, dense_size, fill_value, dtype):
44+
arr = np.array([fill_value] * length, dtype=dtype)
45+
indexer = np.unique(np.random.randint(0, length, dense_size))
46+
arr[indexer] = np.random.randint(0, 100, len(indexer))
47+
return (arr, fill_value, dtype)
48+
49+
def make_object_array(self, length, dense_size, fill_value):
50+
elems = np.array(['a', 0.0, False, 1, 2], dtype=np.object)
51+
arr = np.array([fill_value] * length, dtype=np.object)
52+
indexer = np.unique(np.random.randint(0, length, dense_size))
53+
arr[indexer] = np.random.choice(elems, len(indexer))
54+
return (arr, fill_value, np.object)
55+
56+
def time_sparse_array_constructor_int64_10percent(self):
57+
arr, fill_value, dtype = self.int64_10percent
58+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
59+
60+
def time_sparse_array_constructor_int64_1percent(self):
61+
arr, fill_value, dtype = self.int64_1percent
62+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
63+
64+
def time_sparse_array_constructor_float64_10percent(self):
65+
arr, fill_value, dtype = self.float64_10percent
66+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
67+
68+
def time_sparse_array_constructor_float64_1percent(self):
69+
arr, fill_value, dtype = self.float64_1percent
70+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
71+
72+
def time_sparse_array_constructor_object_nan_fill_value_10percent(self):
73+
arr, fill_value, dtype = self.object_nan_fill_value_10percent
74+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
75+
76+
def time_sparse_array_constructor_object_nan_fill_value_1percent(self):
77+
arr, fill_value, dtype = self.object_nan_fill_value_1percent
78+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
79+
80+
def time_sparse_array_constructor_object_non_nan_fill_value_10percent(self):
81+
arr, fill_value, dtype = self.object_non_nan_fill_value_10percent
82+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
83+
84+
def time_sparse_array_constructor_object_non_nan_fill_value_1percent(self):
85+
arr, fill_value, dtype = self.object_non_nan_fill_value_1percent
86+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
87+
88+
2689
class sparse_frame_constructor(object):
2790
goal_time = 0.2
2891

@@ -33,7 +96,7 @@ def time_sparse_from_scipy(self):
3396
SparseDataFrame(scipy.sparse.rand(1000, 1000, 0.005))
3497

3598
def time_sparse_from_dict(self):
36-
SparseDataFrame(dict(zip(range(1000), repeat([0]))))
99+
SparseDataFrame(dict(zip(range(1000), itertools.repeat([0]))))
37100

38101

39102
class sparse_series_from_coo(object):

asv_bench/benchmarks/timeseries.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def setup(self):
5656
self.no_freq = self.rng7[:50000].append(self.rng7[50002:])
5757
self.d_freq = self.rng7[:50000].append(self.rng7[50000:])
5858

59-
self.rng8 = date_range(start='1/1/1700', freq='B', periods=100000)
59+
self.rng8 = date_range(start='1/1/1700', freq='B', periods=75000)
6060
self.b_freq = self.rng8[:50000].append(self.rng8[50000:])
6161

6262
def time_add_timedelta(self):

0 commit comments

Comments
 (0)