@@ -21,14 +21,7 @@ from cpython cimport (Py_INCREF, PyTuple_SET_ITEM,
21
21
PyBytes_Check,
22
22
PyUnicode_Check,
23
23
PyTuple_New,
24
- PyObject_RichCompareBool,
25
- PyBytes_GET_SIZE,
26
- PyUnicode_GET_SIZE)
27
-
28
- try :
29
- from cpython cimport PyString_GET_SIZE
30
- except ImportError :
31
- from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE
24
+ PyObject_RichCompareBool)
32
25
33
26
cimport cpython
34
27
@@ -38,7 +31,7 @@ from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
38
31
PyDateTime_IMPORT)
39
32
PyDateTime_IMPORT
40
33
41
- from tslib import NaT, Timestamp, Timedelta, array_to_datetime
34
+ from tslib import NaT, array_to_datetime
42
35
from missing cimport checknull
43
36
44
37
@@ -127,28 +120,6 @@ def item_from_zerodim(object val):
127
120
return util.unbox_if_zerodim(val)
128
121
129
122
130
- @ cython.wraparound (False )
131
- @ cython.boundscheck (False )
132
- def fast_unique (ndarray[object] values ):
133
- cdef:
134
- Py_ssize_t i, n = len (values)
135
- list uniques = []
136
- dict table = {}
137
- object val, stub = 0
138
-
139
- for i from 0 <= i < n:
140
- val = values[i]
141
- if val not in table:
142
- table[val] = stub
143
- uniques.append(val)
144
- try :
145
- uniques.sort()
146
- except Exception :
147
- pass
148
-
149
- return uniques
150
-
151
-
152
123
@ cython.wraparound (False )
153
124
@ cython.boundscheck (False )
154
125
def fast_unique_multiple (list arrays ):
@@ -368,30 +339,6 @@ def has_infs_f8(ndarray[float64_t] arr):
368
339
return False
369
340
370
341
371
- def convert_timestamps (ndarray values ):
372
- cdef:
373
- object val, f, result
374
- dict cache = {}
375
- Py_ssize_t i, n = len (values)
376
- ndarray[object ] out
377
-
378
- # for HDFStore, a bit temporary but...
379
-
380
- from datetime import datetime
381
- f = datetime.fromtimestamp
382
-
383
- out = np.empty(n, dtype = ' O' )
384
-
385
- for i in range (n):
386
- val = util.get_value_1d(values, i)
387
- if val in cache:
388
- out[i] = cache[val]
389
- else :
390
- cache[val] = out[i] = f(val)
391
-
392
- return out
393
-
394
-
395
342
def maybe_indices_to_slice (ndarray[int64_t] indices , int max_len ):
396
343
cdef:
397
344
Py_ssize_t i, n = len (indices)
@@ -731,145 +678,6 @@ def clean_index_list(list obj):
731
678
return np.asarray(obj), 0
732
679
733
680
734
- ctypedef fused pandas_string:
735
- str
736
- unicode
737
- bytes
738
-
739
-
740
- @ cython.boundscheck (False )
741
- @ cython.wraparound (False )
742
- cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr):
743
- """ return the maximum size of elements in a 1-dim string array """
744
- cdef:
745
- Py_ssize_t i, m = 0 , l = 0 , length = arr.shape[0 ]
746
- pandas_string v
747
-
748
- for i in range (length):
749
- v = arr[i]
750
- if PyString_Check(v):
751
- l = PyString_GET_SIZE(v)
752
- elif PyBytes_Check(v):
753
- l = PyBytes_GET_SIZE(v)
754
- elif PyUnicode_Check(v):
755
- l = PyUnicode_GET_SIZE(v)
756
-
757
- if l > m:
758
- m = l
759
-
760
- return m
761
-
762
-
763
- @ cython.boundscheck (False )
764
- @ cython.wraparound (False )
765
- def string_array_replace_from_nan_rep (
766
- ndarray[object , ndim = 1 ] arr, object nan_rep ,
767
- object replace = None ):
768
- """
769
- Replace the values in the array with 'replacement' if
770
- they are 'nan_rep'. Return the same array.
771
- """
772
-
773
- cdef int length = arr.shape[0 ], i = 0
774
- if replace is None :
775
- replace = np.nan
776
-
777
- for i from 0 <= i < length:
778
- if arr[i] == nan_rep:
779
- arr[i] = replace
780
-
781
- return arr
782
-
783
-
784
- @ cython.boundscheck (False )
785
- @ cython.wraparound (False )
786
- def convert_json_to_lines (object arr ):
787
- """
788
- replace comma separated json with line feeds, paying special attention
789
- to quotes & brackets
790
- """
791
- cdef:
792
- Py_ssize_t i = 0 , num_open_brackets_seen = 0 , length
793
- bint in_quotes = 0 , is_escaping = 0
794
- ndarray[uint8_t] narr
795
- unsigned char v, comma, left_bracket, right_brack, newline
796
-
797
- newline = ord (' \n ' )
798
- comma = ord (' ,' )
799
- left_bracket = ord (' {' )
800
- right_bracket = ord (' }' )
801
- quote = ord (' "' )
802
- backslash = ord (' \\ ' )
803
-
804
- narr = np.frombuffer(arr.encode(' utf-8' ), dtype = ' u1' ).copy()
805
- length = narr.shape[0 ]
806
- for i in range (length):
807
- v = narr[i]
808
- if v == quote and i > 0 and not is_escaping:
809
- in_quotes = ~ in_quotes
810
- if v == backslash or is_escaping:
811
- is_escaping = ~ is_escaping
812
- if v == comma: # commas that should be \n
813
- if num_open_brackets_seen == 0 and not in_quotes:
814
- narr[i] = newline
815
- elif v == left_bracket:
816
- if not in_quotes:
817
- num_open_brackets_seen += 1
818
- elif v == right_bracket:
819
- if not in_quotes:
820
- num_open_brackets_seen -= 1
821
-
822
- return narr.tostring().decode(' utf-8' )
823
-
824
-
825
- @ cython.boundscheck (False )
826
- @ cython.wraparound (False )
827
- def write_csv_rows (list data , ndarray data_index ,
828
- int nlevels , ndarray cols , object writer ):
829
-
830
- cdef int N, j, i, ncols
831
- cdef list rows
832
- cdef object val
833
-
834
- # In crude testing, N>100 yields little marginal improvement
835
- N= 100
836
-
837
- # pre-allocate rows
838
- ncols = len (cols)
839
- rows = [[None ] * (nlevels + ncols) for x in range (N)]
840
-
841
- j = - 1
842
- if nlevels == 1 :
843
- for j in range (len (data_index)):
844
- row = rows[j % N]
845
- row[0 ] = data_index[j]
846
- for i in range (ncols):
847
- row[1 + i] = data[i][j]
848
-
849
- if j >= N - 1 and j % N == N - 1 :
850
- writer.writerows(rows)
851
- elif nlevels > 1 :
852
- for j in range (len (data_index)):
853
- row = rows[j % N]
854
- row[:nlevels] = list (data_index[j])
855
- for i in range (ncols):
856
- row[nlevels + i] = data[i][j]
857
-
858
- if j >= N - 1 and j % N == N - 1 :
859
- writer.writerows(rows)
860
- else :
861
- for j in range (len (data_index)):
862
- row = rows[j % N]
863
- for i in range (ncols):
864
- row[i] = data[i][j]
865
-
866
- if j >= N - 1 and j % N == N - 1 :
867
- writer.writerows(rows)
868
-
869
- if j >= 0 and (j < N - 1 or (j % N) != N - 1 ):
870
- writer.writerows(rows[:((j + 1 ) % N)])
871
-
872
-
873
681
# ------------------------------------------------------------------------------
874
682
# Groupby-related functions
875
683
0 commit comments