Skip to content

Commit 221c8a7

Browse files
jbrockmendeljreback
authored andcommitted
CLN: assorted cleanups (#29314)
1 parent 1f4fcb5 commit 221c8a7

16 files changed

+135
-173
lines changed

pandas/_libs/algos.pyx

+26-61
Original file line numberDiff line numberDiff line change
@@ -379,28 +379,34 @@ ctypedef fused algos_t:
379379
uint8_t
380380

381381

382+
def _validate_limit(nobs: int, limit=None) -> int:
383+
if limit is None:
384+
lim = nobs
385+
else:
386+
if not util.is_integer_object(limit):
387+
raise ValueError('Limit must be an integer')
388+
if limit < 1:
389+
raise ValueError('Limit must be greater than 0')
390+
lim = limit
391+
392+
return lim
393+
394+
382395
@cython.boundscheck(False)
383396
@cython.wraparound(False)
384397
def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
385398
cdef:
386399
Py_ssize_t i, j, nleft, nright
387400
ndarray[int64_t, ndim=1] indexer
388-
algos_t cur, next
401+
algos_t cur, next_val
389402
int lim, fill_count = 0
390403

391404
nleft = len(old)
392405
nright = len(new)
393406
indexer = np.empty(nright, dtype=np.int64)
394407
indexer[:] = -1
395408

396-
if limit is None:
397-
lim = nright
398-
else:
399-
if not util.is_integer_object(limit):
400-
raise ValueError('Limit must be an integer')
401-
if limit < 1:
402-
raise ValueError('Limit must be greater than 0')
403-
lim = limit
409+
lim = _validate_limit(nright, limit)
404410

405411
if nleft == 0 or nright == 0 or new[nright - 1] < old[0]:
406412
return indexer
@@ -426,9 +432,9 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
426432
j += 1
427433
break
428434

429-
next = old[i + 1]
435+
next_val = old[i + 1]
430436

431-
while j < nright and cur <= new[j] < next:
437+
while j < nright and cur <= new[j] < next_val:
432438
if new[j] == cur:
433439
indexer[j] = i
434440
elif fill_count < lim:
@@ -438,16 +444,14 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
438444

439445
fill_count = 0
440446
i += 1
441-
cur = next
447+
cur = next_val
442448

443449
return indexer
444450

445451

446452
@cython.boundscheck(False)
447453
@cython.wraparound(False)
448-
def pad_inplace(algos_t[:] values,
449-
const uint8_t[:] mask,
450-
limit=None):
454+
def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
451455
cdef:
452456
Py_ssize_t i, N
453457
algos_t val
@@ -459,14 +463,7 @@ def pad_inplace(algos_t[:] values,
459463
if N == 0:
460464
return
461465

462-
if limit is None:
463-
lim = N
464-
else:
465-
if not util.is_integer_object(limit):
466-
raise ValueError('Limit must be an integer')
467-
if limit < 1:
468-
raise ValueError('Limit must be greater than 0')
469-
lim = limit
466+
lim = _validate_limit(N, limit)
470467

471468
val = values[0]
472469
for i in range(N):
@@ -482,9 +479,7 @@ def pad_inplace(algos_t[:] values,
482479

483480
@cython.boundscheck(False)
484481
@cython.wraparound(False)
485-
def pad_2d_inplace(algos_t[:, :] values,
486-
const uint8_t[:, :] mask,
487-
limit=None):
482+
def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None):
488483
cdef:
489484
Py_ssize_t i, j, N, K
490485
algos_t val
@@ -496,14 +491,7 @@ def pad_2d_inplace(algos_t[:, :] values,
496491
if N == 0:
497492
return
498493

499-
if limit is None:
500-
lim = N
501-
else:
502-
if not util.is_integer_object(limit):
503-
raise ValueError('Limit must be an integer')
504-
if limit < 1:
505-
raise ValueError('Limit must be greater than 0')
506-
lim = limit
494+
lim = _validate_limit(N, limit)
507495

508496
for j in range(K):
509497
fill_count = 0
@@ -559,14 +547,7 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
559547
indexer = np.empty(nright, dtype=np.int64)
560548
indexer[:] = -1
561549

562-
if limit is None:
563-
lim = nright
564-
else:
565-
if not util.is_integer_object(limit):
566-
raise ValueError('Limit must be an integer')
567-
if limit < 1:
568-
raise ValueError('Limit must be greater than 0')
569-
lim = limit
550+
lim = _validate_limit(nright, limit)
570551

571552
if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]:
572553
return indexer
@@ -612,9 +593,7 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
612593

613594
@cython.boundscheck(False)
614595
@cython.wraparound(False)
615-
def backfill_inplace(algos_t[:] values,
616-
const uint8_t[:] mask,
617-
limit=None):
596+
def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
618597
cdef:
619598
Py_ssize_t i, N
620599
algos_t val
@@ -626,14 +605,7 @@ def backfill_inplace(algos_t[:] values,
626605
if N == 0:
627606
return
628607

629-
if limit is None:
630-
lim = N
631-
else:
632-
if not util.is_integer_object(limit):
633-
raise ValueError('Limit must be an integer')
634-
if limit < 1:
635-
raise ValueError('Limit must be greater than 0')
636-
lim = limit
608+
lim = _validate_limit(N, limit)
637609

638610
val = values[N - 1]
639611
for i in range(N - 1, -1, -1):
@@ -663,14 +635,7 @@ def backfill_2d_inplace(algos_t[:, :] values,
663635
if N == 0:
664636
return
665637

666-
if limit is None:
667-
lim = N
668-
else:
669-
if not util.is_integer_object(limit):
670-
raise ValueError('Limit must be an integer')
671-
if limit < 1:
672-
raise ValueError('Limit must be greater than 0')
673-
lim = limit
638+
lim = _validate_limit(N, limit)
674639

675640
for j in range(K):
676641
fill_count = 0

pandas/_libs/algos_common_helper.pxi.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def diff_2d(ndarray[diff_t, ndim=2] arr,
2323
ndarray[out_t, ndim=2] out,
2424
Py_ssize_t periods, int axis):
2525
cdef:
26-
Py_ssize_t i, j, sx, sy
26+
Py_ssize_t i, j, sx, sy, start, stop
2727

2828
# Disable for unsupported dtype combinations,
2929
# see https://github.com/cython/cython/issues/2646

pandas/_libs/groupby.pxd

-6
This file was deleted.

pandas/_libs/groupby.pyx

+7
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@ _int64_max = np.iinfo(np.int64).max
2727

2828
cdef float64_t NaN = <float64_t>np.NaN
2929

30+
cdef enum InterpolationEnumType:
31+
INTERPOLATION_LINEAR,
32+
INTERPOLATION_LOWER,
33+
INTERPOLATION_HIGHER,
34+
INTERPOLATION_NEAREST,
35+
INTERPOLATION_MIDPOINT
36+
3037

3138
cdef inline float64_t median_linear(float64_t* a, int n) nogil:
3239
cdef:

pandas/_libs/lib.pyx

+3-2
Original file line numberDiff line numberDiff line change
@@ -698,8 +698,7 @@ def generate_bins_dt64(ndarray[int64_t] values, const int64_t[:] binner,
698698

699699
@cython.boundscheck(False)
700700
@cython.wraparound(False)
701-
def get_level_sorter(const int64_t[:] label,
702-
const int64_t[:] starts):
701+
def get_level_sorter(const int64_t[:] label, const int64_t[:] starts):
703702
"""
704703
argsort for a single level of a multi-index, keeping the order of higher
705704
levels unchanged. `starts` points to starts of same-key indices w.r.t
@@ -1677,6 +1676,7 @@ cpdef bint is_datetime64_array(ndarray values):
16771676
return validator.validate(values)
16781677

16791678

1679+
# TODO: only non-here use is in test
16801680
def is_datetime_with_singletz_array(values: ndarray) -> bool:
16811681
"""
16821682
Check values have the same tzinfo attribute.
@@ -1720,6 +1720,7 @@ cdef class AnyTimedeltaValidator(TimedeltaValidator):
17201720
return is_timedelta(value)
17211721

17221722

1723+
# TODO: only non-here use is in test
17231724
cpdef bint is_timedelta_or_timedelta64_array(ndarray values):
17241725
""" infer with timedeltas and/or nat/none """
17251726
cdef:

pandas/_libs/parsers.pyx

+6-4
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ cdef class TextReader:
278278
object true_values, false_values
279279
object handle
280280
bint na_filter, keep_default_na, verbose, has_usecols, has_mi_columns
281-
int64_t parser_start
281+
uint64_t parser_start
282282
list clocks
283283
char *c_encoding
284284
kh_str_starts_t *false_set
@@ -710,11 +710,11 @@ cdef class TextReader:
710710
# header is now a list of lists, so field_count should use header[0]
711711

712712
cdef:
713-
Py_ssize_t i, start, field_count, passed_count, unnamed_count # noqa
713+
Py_ssize_t i, start, field_count, passed_count, unnamed_count
714714
char *word
715715
object name, old_name
716716
int status
717-
int64_t hr, data_line
717+
uint64_t hr, data_line
718718
char *errors = "strict"
719719
StringPath path = _string_path(self.c_encoding)
720720

@@ -1015,12 +1015,14 @@ cdef class TextReader:
10151015
else:
10161016
end = min(start + rows, self.parser.lines)
10171017

1018+
# FIXME: dont leave commented-out
10181019
# # skip footer
10191020
# if footer > 0:
10201021
# end -= footer
10211022

10221023
num_cols = -1
1023-
for i in range(self.parser.lines):
1024+
# Py_ssize_t cast prevents build warning
1025+
for i in range(<Py_ssize_t>self.parser.lines):
10241026
num_cols = (num_cols < self.parser.line_fields[i]) * \
10251027
self.parser.line_fields[i] + \
10261028
(num_cols >= self.parser.line_fields[i]) * num_cols

0 commit comments

Comments
 (0)