@@ -13,9 +13,6 @@ from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX,
13
13
14
14
# core.common import for fast inference checks
15
15
16
- npy_int64_max = np.iinfo(np.int64).max
17
-
18
-
19
16
cpdef bint is_float(object obj):
20
17
return util.is_float_object(obj)
21
18
@@ -629,21 +626,100 @@ cdef extern from "parse_helper.h":
629
626
630
627
cdef int64_t iINT64_MAX = < int64_t> INT64_MAX
631
628
cdef int64_t iINT64_MIN = < int64_t> INT64_MIN
629
+ cdef uint64_t iUINT64_MAX = < uint64_t> UINT64_MAX
630
+
631
+
632
+ cdef inline bint _check_uint64_nan(bint seen_uint, bint seen_null,
633
+ bint coerce_numeric) except - 1 :
634
+ """
635
+ Check whether we have encountered uint64 when handling a NaN element.
636
+
637
+ If uint64 has been encountered, we cannot safely cast to float64 due
638
+ to truncation problems (this would occur if we return a numeric array
639
+ containing a NaN element).
640
+
641
+ Returns
642
+ -------
643
+ return_values : bool
644
+ Whether or not we should return the original input array to avoid
645
+ data truncation.
646
+ """
647
+ if seen_null and seen_uint:
648
+ if not coerce_numeric:
649
+ return True
650
+ else :
651
+ raise ValueError (" uint64 array detected, and such an "
652
+ " array cannot contain NaN." )
653
+
654
+ return False
632
655
633
656
634
- def maybe_convert_numeric (object[:] values , set na_values ,
657
+ cdef inline bint _check_uint64_int64_conflict(bint seen_sint, bint seen_uint,
658
+ bint coerce_numeric) except - 1 :
659
+ """
660
+ Check whether we have encountered both int64 and uint64 elements.
661
+
662
+ If both have been encountered, we cannot safely cast to an integer
663
+ dtype since none is large enough to hold both types of elements.
664
+
665
+ Returns
666
+ -------
667
+ return_values : bool
668
+ Whether or not we should return the original input array to avoid
669
+ data truncation.
670
+ """
671
+ if seen_sint and seen_uint:
672
+ if not coerce_numeric:
673
+ return True
674
+ else :
675
+ raise ValueError (" uint64 and negative values detected. "
676
+ " Cannot safely return a numeric array "
677
+ " without truncating data." )
678
+
679
+ return False
680
+
681
+
682
+ def maybe_convert_numeric (ndarray[object] values , set na_values ,
635
683
bint convert_empty = True , bint coerce_numeric = False ):
636
684
"""
637
- Type inference function-- convert strings to numeric (potentially) and
638
- convert to proper dtype array
685
+ Convert object array to a numeric array if possible.
686
+
687
+ Parameters
688
+ ----------
689
+ values : ndarray
690
+ Array of object elements to convert.
691
+ na_values : set
692
+ Set of values that should be interpreted as NaN.
693
+ convert_empty : bool, default True
694
+ If an empty array-like object is encountered, whether to interpret
695
+ that element as NaN or not. If set to False, a ValueError will be
696
+ raised if such an element is encountered and 'coerce_numeric' is False.
697
+ coerce_numeric : bool, default False
698
+ If initial attempts to convert to numeric have failed, whether to
699
+ force conversion to numeric via alternative methods or by setting the
700
+ element to NaN. Otherwise, an Exception will be raised when such an
701
+ element is encountered.
702
+
703
+ This boolean also has an impact on how conversion behaves when a
704
+ numeric array has no suitable numerical dtype to return (i.e. uint64,
705
+ int32, uint8). If set to False, the original object array will be
706
+ returned. Otherwise, a ValueError will be raised.
707
+
708
+ Returns
709
+ -------
710
+ numeric_array : array of converted object values to numerical ones
639
711
"""
640
712
cdef:
641
713
int status, maybe_int
642
714
Py_ssize_t i, n = values.size
643
715
ndarray[float64_t] floats = np.empty(n, dtype = ' f8' )
644
716
ndarray[complex128_t] complexes = np.empty(n, dtype = ' c16' )
645
717
ndarray[int64_t] ints = np.empty(n, dtype = ' i8' )
718
+ ndarray[uint64_t] uints = np.empty(n, dtype = ' u8' )
646
719
ndarray[uint8_t] bools = np.empty(n, dtype = ' u1' )
720
+ bint seen_null = False
721
+ bint seen_uint = False
722
+ bint seen_sint = False
647
723
bint seen_float = False
648
724
bint seen_complex = False
649
725
bint seen_int = False
@@ -655,22 +731,60 @@ def maybe_convert_numeric(object[:] values, set na_values,
655
731
val = values[i]
656
732
657
733
if val.__hash__ is not None and val in na_values:
734
+ seen_null = True
735
+ if _check_uint64_nan(seen_uint, seen_null,
736
+ coerce_numeric):
737
+ return values
738
+
658
739
floats[i] = complexes[i] = nan
659
740
seen_float = True
660
741
elif util.is_float_object(val):
742
+ if val != val:
743
+ seen_null = True
744
+ if _check_uint64_nan(seen_uint, seen_null,
745
+ coerce_numeric):
746
+ return values
747
+
661
748
floats[i] = complexes[i] = val
662
749
seen_float = True
663
750
elif util.is_integer_object(val):
664
- floats[i] = ints[i] = val
751
+ floats[i] = complexes[i] = val
752
+ as_int = int (val)
665
753
seen_int = True
754
+
755
+ seen_uint = seen_uint or (as_int > iINT64_MAX)
756
+ seen_sint = seen_sint or (as_int < 0 )
757
+
758
+ if (_check_uint64_nan(seen_uint, seen_null, coerce_numeric) or
759
+ _check_uint64_int64_conflict(seen_sint, seen_uint,
760
+ coerce_numeric)):
761
+ return values
762
+
763
+ if seen_uint:
764
+ uints[i] = as_int
765
+ elif seen_sint:
766
+ ints[i] = as_int
767
+ else :
768
+ uints[i] = as_int
769
+ ints[i] = as_int
666
770
elif util.is_bool_object(val):
667
- floats[i] = ints[i] = bools[i] = val
771
+ floats[i] = uints[i] = ints[i] = bools[i] = val
668
772
seen_bool = True
669
773
elif val is None :
774
+ seen_null = True
775
+ if _check_uint64_nan(seen_uint, seen_null,
776
+ coerce_numeric):
777
+ return values
778
+
670
779
floats[i] = complexes[i] = nan
671
780
seen_float = True
672
781
elif hasattr (val, ' __len__' ) and len (val) == 0 :
673
782
if convert_empty or coerce_numeric:
783
+ seen_null = True
784
+ if _check_uint64_nan(seen_uint, seen_null,
785
+ coerce_numeric):
786
+ return values
787
+
674
788
floats[i] = complexes[i] = nan
675
789
seen_float = True
676
790
else :
@@ -686,24 +800,61 @@ def maybe_convert_numeric(object[:] values, set na_values,
686
800
status = floatify(val, & fval, & maybe_int)
687
801
688
802
if fval in na_values:
803
+ seen_null = True
804
+ if _check_uint64_nan(seen_uint, seen_null,
805
+ coerce_numeric):
806
+ return values
807
+
689
808
floats[i] = complexes[i] = nan
690
809
seen_float = True
691
810
else :
811
+ if fval != fval:
812
+ seen_null = True
813
+ if _check_uint64_nan(seen_uint, seen_null,
814
+ coerce_numeric):
815
+ return values
816
+
692
817
floats[i] = fval
693
818
694
- if not seen_float:
695
- if maybe_int:
696
- as_int = int (val)
819
+ if maybe_int:
820
+ as_int = int (val)
697
821
698
- if as_int <= iINT64_MAX and as_int >= iINT64_MIN:
822
+ if as_int in na_values:
823
+ seen_float = True
824
+ seen_null = True
825
+ else :
826
+ seen_uint = seen_uint or (as_int > iINT64_MAX)
827
+ seen_sint = seen_sint or (as_int < 0 )
828
+ seen_int = True
829
+
830
+ if (_check_uint64_nan(seen_uint, seen_null,
831
+ coerce_numeric) or
832
+ _check_uint64_int64_conflict(seen_sint, seen_uint,
833
+ coerce_numeric)):
834
+ return values
835
+
836
+ if not (seen_float or as_int in na_values):
837
+ if as_int < iINT64_MIN or as_int > iUINT64_MAX:
838
+ raise ValueError (' Integer out of range.' )
839
+
840
+ if seen_uint:
841
+ uints[i] = as_int
842
+ elif seen_sint:
699
843
ints[i] = as_int
700
844
else :
701
- raise ValueError (' integer out of range' )
702
- else :
703
- seen_float = True
845
+ uints[i] = as_int
846
+ ints[i] = as_int
847
+ else :
848
+ seen_float = True
704
849
except (TypeError , ValueError ) as e:
705
850
if not coerce_numeric:
706
851
raise type (e)(str (e) + ' at position {}' .format(i))
852
+ elif " uint64" in str (e): # Exception from check functions.
853
+ raise
854
+ seen_null = True
855
+ if _check_uint64_nan(seen_uint, seen_null,
856
+ coerce_numeric):
857
+ return values
707
858
708
859
floats[i] = nan
709
860
seen_float = True
@@ -713,9 +864,14 @@ def maybe_convert_numeric(object[:] values, set na_values,
713
864
elif seen_float:
714
865
return floats
715
866
elif seen_int:
716
- return ints
867
+ if seen_uint:
868
+ return uints
869
+ else :
870
+ return ints
717
871
elif seen_bool:
718
872
return bools.view(np.bool_)
873
+ elif seen_uint:
874
+ return uints
719
875
return ints
720
876
721
877
@@ -810,7 +966,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
810
966
floats[i] = < float64_t> val
811
967
complexes[i] = < double complex > val
812
968
if not seen_null:
813
- seen_uint = seen_uint or (int (val) > npy_int64_max )
969
+ seen_uint = seen_uint or (int (val) > iINT64_MAX )
814
970
seen_sint = seen_sint or (val < 0 )
815
971
816
972
if seen_uint and seen_sint:
0 commit comments