Skip to content

Commit 39e7b69

Browse files
hexgnujreback
authored andcommittedFeb 14, 2018
Performance increase rolling min max (pandas-dev#19549)
1 parent 76f175b commit 39e7b69

File tree

6 files changed

+70
-32
lines changed

6 files changed

+70
-32
lines changed
 

‎asv_bench/benchmarks/rolling.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,26 @@ class Methods(object):
1616

1717
def setup(self, constructor, window, dtype, method):
1818
N = 10**5
19-
arr = np.random.random(N).astype(dtype)
19+
arr = (100 * np.random.random(N)).astype(dtype)
2020
self.roll = getattr(pd, constructor)(arr).rolling(window)
2121

2222
def time_rolling(self, constructor, window, dtype, method):
2323
getattr(self.roll, method)()
2424

25+
class VariableWindowMethods(Methods):
26+
sample_time = 0.2
27+
params = (['DataFrame', 'Series'],
28+
['50s', '1h', '1d'],
29+
['int', 'float'],
30+
['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
31+
'sum'])
32+
param_names = ['contructor', 'window', 'dtype', 'method']
33+
34+
def setup(self, constructor, window, dtype, method):
35+
N = 10**5
36+
arr = (100 * np.random.random(N)).astype(dtype)
37+
index = pd.date_range('2017-01-01', periods=N, freq='5s')
38+
self.roll = getattr(pd, constructor)(arr, index=index).rolling(window)
2539

2640
class Pairwise(object):
2741

‎doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ Performance Improvements
645645
- Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`)
646646
- Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`)
647647
- Improved performance of :func:`DataFrameGroupBy.rank` (:issue:`15779`)
648+
- Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`)
648649

649650
.. _whatsnew_0230.docs:
650651

‎pandas/_libs/src/headers/cmath

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#ifndef _PANDAS_MATH_H_
2+
#define _PANDAS_MATH_H_
3+
4+
// In older versions of Visual Studio there wasn't a std::signbit defined
5+
// This defines it using _copysign
6+
#if defined(_MSC_VER) && (_MSC_VER < 1800)
7+
#include <cmath>
8+
namespace std {
9+
__inline int signbit(double num) { return _copysign(1.0, num) < 0; }
10+
}
11+
#else
12+
#include <cmath>
13+
#endif
14+
15+
#endif

‎pandas/_libs/src/headers/math.h

-11
This file was deleted.

‎pandas/_libs/window.pyx

+36-18
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
cimport cython
55
from cython cimport Py_ssize_t
6+
from libcpp.deque cimport deque
67

78
from libc.stdlib cimport malloc, free
89

@@ -12,7 +13,7 @@ from numpy cimport ndarray, double_t, int64_t, float64_t
1213
cnp.import_array()
1314

1415

15-
cdef extern from "../src/headers/math.h":
16+
cdef extern from "../src/headers/cmath" namespace "std":
1617
int signbit(double) nogil
1718
double sqrt(double x) nogil
1819

@@ -1222,8 +1223,9 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp,
12221223
cdef:
12231224
numeric ai
12241225
bint is_variable, should_replace
1225-
int64_t s, e, N, i, j, removed
1226+
int64_t N, i, removed, window_i
12261227
Py_ssize_t nobs = 0
1228+
deque Q[int64_t]
12271229
ndarray[int64_t] starti, endi
12281230
ndarray[numeric, ndim=1] output
12291231
cdef:
@@ -1242,32 +1244,48 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp,
12421244

12431245
output = np.empty(N, dtype=input.dtype)
12441246

1247+
Q = deque[int64_t]()
1248+
12451249
if is_variable:
12461250

12471251
with nogil:
12481252

1249-
for i in range(N):
1250-
s = starti[i]
1251-
e = endi[i]
1253+
# This is using a modified version of the C++ code in this
1254+
# SO post: http://bit.ly/2nOoHlY
1255+
# The original impl didn't deal with variable window sizes
1256+
# So the code was optimized for that
12521257

1253-
r = input[s]
1254-
nobs = 0
1255-
for j in range(s, e):
1258+
for i from starti[0] <= i < endi[0]:
1259+
ai = init_mm(input[i], &nobs, is_max)
12561260

1257-
# adds, death at the i offset
1258-
ai = init_mm(input[j], &nobs, is_max)
1261+
if is_max:
1262+
while not Q.empty() and ai >= input[Q.back()]:
1263+
Q.pop_back()
1264+
else:
1265+
while not Q.empty() and ai <= input[Q.back()]:
1266+
Q.pop_back()
1267+
Q.push_back(i)
12591268

1260-
if is_max:
1261-
if ai > r:
1262-
r = ai
1263-
else:
1264-
if ai < r:
1265-
r = ai
1269+
for i from endi[0] <= i < N:
1270+
output[i-1] = calc_mm(minp, nobs, input[Q.front()])
12661271

1267-
output[i] = calc_mm(minp, nobs, r)
1272+
ai = init_mm(input[i], &nobs, is_max)
12681273

1269-
else:
1274+
if is_max:
1275+
while not Q.empty() and ai >= input[Q.back()]:
1276+
Q.pop_back()
1277+
else:
1278+
while not Q.empty() and ai <= input[Q.back()]:
1279+
Q.pop_back()
12701280

1281+
while not Q.empty() and Q.front() <= i - (endi[i] - starti[i]):
1282+
Q.pop_front()
1283+
1284+
Q.push_back(i)
1285+
1286+
output[N-1] = calc_mm(minp, nobs, input[Q.front()])
1287+
1288+
else:
12711289
# setup the rings of death!
12721290
ring = <numeric *>malloc(win * sizeof(numeric))
12731291
death = <int64_t *>malloc(win * sizeof(int64_t))

‎setup.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,8 @@ def pxd(name):
617617
'pyxfile': '_libs/testing'},
618618
'_libs.window': {
619619
'pyxfile': '_libs/window',
620-
'pxdfiles': ['_libs/skiplist', '_libs/src/util']},
620+
'pxdfiles': ['_libs/skiplist', '_libs/src/util'],
621+
'language': 'c++'},
621622
'_libs.writers': {
622623
'pyxfile': '_libs/writers',
623624
'pxdfiles': ['_libs/src/util']},
@@ -640,11 +641,11 @@ def pxd(name):
640641
sources=sources,
641642
depends=data.get('depends', []),
642643
include_dirs=include,
644+
language=data.get('language', 'c'),
643645
extra_compile_args=extra_compile_args)
644646

645647
extensions.append(obj)
646648

647-
648649
# ----------------------------------------------------------------------
649650
# msgpack
650651

0 commit comments

Comments
 (0)