Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix na_position type in IndexEngine #61062

Merged
merged 9 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,7 @@ Indexing
- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
- Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`)
- Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`)
- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)

Expand Down
24 changes: 12 additions & 12 deletions pandas/_libs/hashtable.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ cdef class HashTable:

cdef class UInt64HashTable(HashTable):
cdef kh_uint64_t *table
cdef int64_t na_position
cdef Py_ssize_t na_position
cdef bint uses_mask

cpdef get_item(self, uint64_t val)
Expand All @@ -51,7 +51,7 @@ cdef class UInt64HashTable(HashTable):

cdef class Int64HashTable(HashTable):
cdef kh_int64_t *table
cdef int64_t na_position
cdef Py_ssize_t na_position
cdef bint uses_mask

cpdef get_item(self, int64_t val)
Expand All @@ -61,7 +61,7 @@ cdef class Int64HashTable(HashTable):

cdef class UInt32HashTable(HashTable):
cdef kh_uint32_t *table
cdef int64_t na_position
cdef Py_ssize_t na_position
cdef bint uses_mask

cpdef get_item(self, uint32_t val)
Expand All @@ -71,7 +71,7 @@ cdef class UInt32HashTable(HashTable):

cdef class Int32HashTable(HashTable):
cdef kh_int32_t *table
cdef int64_t na_position
cdef Py_ssize_t na_position
cdef bint uses_mask

cpdef get_item(self, int32_t val)
Expand All @@ -81,7 +81,7 @@ cdef class Int32HashTable(HashTable):

cdef class UInt16HashTable(HashTable):
cdef kh_uint16_t *table
cdef int64_t na_position
cdef Py_ssize_t na_position
cdef bint uses_mask

cpdef get_item(self, uint16_t val)
Expand All @@ -91,7 +91,7 @@ cdef class UInt16HashTable(HashTable):

cdef class Int16HashTable(HashTable):
cdef kh_int16_t *table
cdef int64_t na_position
cdef Py_ssize_t na_position
cdef bint uses_mask

cpdef get_item(self, int16_t val)
Expand All @@ -101,7 +101,7 @@ cdef class Int16HashTable(HashTable):

cdef class UInt8HashTable(HashTable):
cdef kh_uint8_t *table
cdef int64_t na_position
cdef Py_ssize_t na_position
cdef bint uses_mask

cpdef get_item(self, uint8_t val)
Expand All @@ -111,7 +111,7 @@ cdef class UInt8HashTable(HashTable):

cdef class Int8HashTable(HashTable):
cdef kh_int8_t *table
cdef int64_t na_position
cdef Py_ssize_t na_position
cdef bint uses_mask

cpdef get_item(self, int8_t val)
Expand All @@ -121,7 +121,7 @@ cdef class Int8HashTable(HashTable):

cdef class Float64HashTable(HashTable):
cdef kh_float64_t *table
cdef int64_t na_position
cdef Py_ssize_t na_position
cdef bint uses_mask

cpdef get_item(self, float64_t val)
Expand All @@ -131,7 +131,7 @@ cdef class Float64HashTable(HashTable):

cdef class Float32HashTable(HashTable):
cdef kh_float32_t *table
cdef int64_t na_position
cdef Py_ssize_t na_position
cdef bint uses_mask

cpdef get_item(self, float32_t val)
Expand All @@ -141,7 +141,7 @@ cdef class Float32HashTable(HashTable):

cdef class Complex64HashTable(HashTable):
cdef kh_complex64_t *table
cdef int64_t na_position
cdef Py_ssize_t na_position
cdef bint uses_mask

cpdef get_item(self, complex64_t val)
Expand All @@ -151,7 +151,7 @@ cdef class Complex64HashTable(HashTable):

cdef class Complex128HashTable(HashTable):
cdef kh_complex128_t *table
cdef int64_t na_position
cdef Py_ssize_t na_position
cdef bint uses_mask

cpdef get_item(self, complex128_t val)
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ cdef class {{name}}HashTable(HashTable):
int ret = 0
{{c_type}} val
khiter_t k
int8_t na_position = self.na_position
Py_ssize_t na_position = self.na_position

if self.uses_mask and mask is None:
raise NotImplementedError # pragma: no cover
Expand Down Expand Up @@ -567,7 +567,7 @@ cdef class {{name}}HashTable(HashTable):
Int64Vector self_locs = Int64Vector()
Int64VectorData *l
Int64VectorData *sl
int8_t na_position = self.na_position
Py_ssize_t na_position = self.na_position

l = &locs.data
sl = &self_locs.data
Expand Down Expand Up @@ -609,7 +609,7 @@ cdef class {{name}}HashTable(HashTable):
{{c_type}} val
khiter_t k
intp_t[::1] locs = np.empty(n, dtype=np.intp)
int8_t na_position = self.na_position
Py_ssize_t na_position = self.na_position

if self.uses_mask and mask is None:
raise NotImplementedError # pragma: no cover
Expand Down
7 changes: 4 additions & 3 deletions pandas/tests/libs/test_hashtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,18 +149,19 @@ def test_map_locations(self, table_type, dtype, writable):
def test_map_locations_mask(self, table_type, dtype, writable):
if table_type == ht.PyObjectHashTable:
pytest.skip("Mask not supported for object")
N = 3
N = 129 # must be > 128 to test GH#58924
table = table_type(uses_mask=True)
keys = (np.arange(N) + N).astype(dtype)
keys.flags.writeable = writable
table.map_locations(keys, np.array([False, False, True]))
mask = np.concatenate([np.repeat(False, N - 1), [True]], axis=0)
table.map_locations(keys, mask)
for i in range(N - 1):
assert table.get_item(keys[i]) == i

with pytest.raises(KeyError, match=re.escape(str(keys[N - 1]))):
table.get_item(keys[N - 1])

assert table.get_na() == 2
assert table.get_na() == N - 1

def test_lookup(self, table_type, dtype, writable):
N = 3
Expand Down
Loading