Skip to content

Commit c61aa36

Browse files
committed
BUG: don't mangle NaN-float-values and pd.NaT (GH 22295)
it is more or less the clean-up after PR pandas-dev#21904 and PR pandas-dev#22207, the underlying hash-map handles all cases correctly out-of-the box and thus no special handling is needed.
1 parent 68273a7 commit c61aa36

File tree

1 file changed

+4
-30
lines changed

1 file changed

+4
-30
lines changed

pandas/_libs/hashtable_class_helper.pxi.in

+4-30
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,6 @@ cdef class {{name}}HashTable(HashTable):
470470
int ret = 0
471471
{{dtype}}_t val
472472
khiter_t k
473-
bint seen_na = 0
474473
{{name}}Vector uniques = {{name}}Vector()
475474
{{name}}VectorData *ud
476475

@@ -479,30 +478,13 @@ cdef class {{name}}HashTable(HashTable):
479478
with nogil:
480479
for i in range(n):
481480
val = values[i]
482-
{{if float_group}}
483-
if val == val:
484-
k = kh_get_{{dtype}}(self.table, val)
485-
if k == self.table.n_buckets:
486-
kh_put_{{dtype}}(self.table, val, &ret)
487-
if needs_resize(ud):
488-
with gil:
489-
uniques.resize()
490-
append_data_{{dtype}}(ud, val)
491-
elif not seen_na:
492-
seen_na = 1
493-
if needs_resize(ud):
494-
with gil:
495-
uniques.resize()
496-
append_data_{{dtype}}(ud, NAN)
497-
{{else}}
498481
k = kh_get_{{dtype}}(self.table, val)
499482
if k == self.table.n_buckets:
500483
kh_put_{{dtype}}(self.table, val, &ret)
501484
if needs_resize(ud):
502485
with gil:
503486
uniques.resize()
504487
append_data_{{dtype}}(ud, val)
505-
{{endif}}
506488
return uniques.to_array()
507489

508490
{{endfor}}
@@ -854,19 +836,11 @@ cdef class PyObjectHashTable(HashTable):
854836
for i in range(n):
855837
val = values[i]
856838
hash(val)
839+
k = kh_get_pymap(self.table, <PyObject*>val)
840+
if k == self.table.n_buckets:
841+
kh_put_pymap(self.table, <PyObject*>val, &ret)
842+
uniques.append(val)
857843

858-
# `val is None` below is exception to prevent mangling of None and
859-
# other NA values; note however that other NA values (ex: pd.NaT
860-
# and np.nan) will still get mangled, so many not be a permanent
861-
# solution; see GH 20866
862-
if not checknull(val) or val is None:
863-
k = kh_get_pymap(self.table, <PyObject*>val)
864-
if k == self.table.n_buckets:
865-
kh_put_pymap(self.table, <PyObject*>val, &ret)
866-
uniques.append(val)
867-
elif not seen_na:
868-
seen_na = 1
869-
uniques.append(nan)
870844

871845
return uniques.to_array()
872846

0 commit comments

Comments
 (0)