Skip to content

Commit 04db779

Browse files
toobazjreback
authored andcommittedDec 19, 2017
Construct 1d array from listlike (#18769)
1 parent 856d9e5 commit 04db779

File tree

11 files changed

+108
-54
lines changed

11 files changed

+108
-54
lines changed
 

‎asv_bench/benchmarks/ctors.py

+28-8
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,36 @@
11
import numpy as np
2-
from pandas import DataFrame, Series, Index, DatetimeIndex, Timestamp
2+
from pandas import Series, Index, DatetimeIndex, Timestamp
33

44
from .pandas_vb_common import setup # noqa
55

66

7-
class Constructors(object):
7+
class SeriesConstructors(object):
8+
9+
goal_time = 0.2
10+
11+
param_names = ["data_fmt", "with_index"]
12+
params = [[lambda x: x,
13+
list,
14+
lambda arr: list(arr.astype(str)),
15+
lambda arr: dict(zip(range(len(arr)), arr)),
16+
lambda arr: [(i, -i) for i in arr],
17+
lambda arr: [[i, -i] for i in arr],
18+
lambda arr: ([(i, -i) for i in arr][:-1] + [None]),
19+
lambda arr: ([[i, -i] for i in arr][:-1] + [None])],
20+
[False, True]]
21+
22+
def setup(self, data_fmt, with_index):
23+
N = 10**4
24+
np.random.seed(1234)
25+
arr = np.random.randn(N)
26+
self.data = data_fmt(arr)
27+
self.index = np.arange(N) if with_index else None
28+
29+
def time_series_constructor(self, data_fmt, with_index):
30+
Series(self.data, index=self.index)
31+
32+
33+
class SeriesDtypesConstructors(object):
834

935
goal_time = 0.2
1036

@@ -19,12 +45,6 @@ def setup(self):
1945
self.s = Series([Timestamp('20110101'), Timestamp('20120101'),
2046
Timestamp('20130101')] * N * 10)
2147

22-
def time_frame_from_ndarray(self):
23-
DataFrame(self.arr)
24-
25-
def time_series_from_ndarray(self):
26-
Series(self.data, index=self.index)
27-
2848
def time_index_from_array_string(self):
2949
Index(self.arr_str)
3050

‎asv_bench/benchmarks/frame_ctor.py

+12
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,15 @@ def setup(self, nrows):
8181
def time_frame_from_records_generator(self, nrows):
8282
# issue-6700
8383
self.df = DataFrame.from_records(self.gen, nrows=nrows)
84+
85+
86+
class FromNDArray(object):
87+
88+
goal_time = 0.2
89+
90+
def setup(self):
91+
N = 100000
92+
self.data = np.random.randn(N)
93+
94+
def time_frame_from_ndarray(self):
95+
self.df = DataFrame(self.data)

‎pandas/_libs/lib.pyx

-17
Original file line numberDiff line numberDiff line change
@@ -148,23 +148,6 @@ def item_from_zerodim(object val):
148148
return util.unbox_if_zerodim(val)
149149

150150

151-
@cython.wraparound(False)
152-
@cython.boundscheck(False)
153-
cpdef ndarray[object] list_to_object_array(list obj):
154-
"""
155-
Convert list to object ndarray. Seriously can\'t believe
156-
I had to write this function.
157-
"""
158-
cdef:
159-
Py_ssize_t i, n = len(obj)
160-
ndarray[object] arr = np.empty(n, dtype=object)
161-
162-
for i in range(n):
163-
arr[i] = obj[i]
164-
165-
return arr
166-
167-
168151
@cython.wraparound(False)
169152
@cython.boundscheck(False)
170153
def fast_unique(ndarray[object] values):

‎pandas/_libs/src/inference.pyx

+3-1
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,9 @@ def infer_dtype(object value, bint skipna=False):
349349
else:
350350
if not isinstance(value, list):
351351
value = list(value)
352-
values = list_to_object_array(value)
352+
from pandas.core.dtypes.cast import (
353+
construct_1d_object_array_from_listlike)
354+
values = construct_1d_object_array_from_listlike(value)
353355

354356
values = getattr(values, 'values', values)
355357
val = _try_infer_map(values)

‎pandas/core/algorithms.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
from warnings import warn, catch_warnings
77
import numpy as np
88

9-
from pandas.core.dtypes.cast import maybe_promote
9+
from pandas.core.dtypes.cast import (
10+
maybe_promote, construct_1d_object_array_from_listlike)
1011
from pandas.core.dtypes.generic import (
1112
ABCSeries, ABCIndex,
1213
ABCIndexClass, ABCCategorical)
@@ -171,7 +172,7 @@ def _ensure_arraylike(values):
171172
if inferred in ['mixed', 'string', 'unicode']:
172173
if isinstance(values, tuple):
173174
values = list(values)
174-
values = lib.list_to_object_array(values)
175+
values = construct_1d_object_array_from_listlike(values)
175176
else:
176177
values = np.asarray(values)
177178
return values
@@ -401,7 +402,7 @@ def isin(comps, values):
401402
.format(values_type=type(values).__name__))
402403

403404
if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)):
404-
values = lib.list_to_object_array(list(values))
405+
values = construct_1d_object_array_from_listlike(list(values))
405406

406407
comps, dtype, _ = _ensure_data(comps)
407408
values, _, _ = _ensure_data(values, dtype=dtype)

‎pandas/core/common.py

+6-12
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
2222
from pandas.api import types
2323
from pandas.core.dtypes import common
24+
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
2425

2526
# compat
2627
from pandas.errors import ( # noqa
@@ -381,25 +382,18 @@ def _asarray_tuplesafe(values, dtype=None):
381382
return values.values
382383

383384
if isinstance(values, list) and dtype in [np.object_, object]:
384-
return lib.list_to_object_array(values)
385+
return construct_1d_object_array_from_listlike(values)
385386

386387
result = np.asarray(values, dtype=dtype)
387388

388389
if issubclass(result.dtype.type, compat.string_types):
389390
result = np.asarray(values, dtype=object)
390391

391392
if result.ndim == 2:
392-
if isinstance(values, list):
393-
return lib.list_to_object_array(values)
394-
else:
395-
# Making a 1D array that safely contains tuples is a bit tricky
396-
# in numpy, leading to the following
397-
try:
398-
result = np.empty(len(values), dtype=object)
399-
result[:] = values
400-
except ValueError:
401-
# we have a list-of-list
402-
result[:] = [tuple(x) for x in values]
393+
# Avoid building an array of arrays:
394+
# TODO: verify whether any path hits this except #18819 (invalid)
395+
values = [tuple(x) for x in values]
396+
result = construct_1d_object_array_from_listlike(values)
403397

404398
return result
405399

‎pandas/core/dtypes/cast.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def maybe_convert_platform(values):
4242
""" try to do platform conversion, allow ndarray or list here """
4343

4444
if isinstance(values, (list, tuple)):
45-
values = lib.list_to_object_array(list(values))
45+
values = construct_1d_object_array_from_listlike(list(values))
4646
if getattr(values, 'dtype', None) == np.object_:
4747
if hasattr(values, '_values'):
4848
values = values._values
@@ -1162,3 +1162,28 @@ def construct_1d_arraylike_from_scalar(value, length, dtype):
11621162
subarr.fill(value)
11631163

11641164
return subarr
1165+
1166+
1167+
def construct_1d_object_array_from_listlike(values):
1168+
"""
1169+
Transform any list-like object in a 1-dimensional numpy array of object
1170+
dtype.
1171+
1172+
Parameters
1173+
----------
1174+
values : any iterable which has a len()
1175+
1176+
Raises
1177+
------
1178+
TypeError
1179+
* If `values` does not have a len()
1180+
1181+
Returns
1182+
-------
1183+
1-dimensional numpy array of dtype object
1184+
"""
1185+
# numpy will try to interpret nested lists as further dimensions, hence
1186+
# making a 1D array that contains list-likes is a bit tricky:
1187+
result = np.empty(len(values), dtype='object')
1188+
result[:] = values
1189+
return result

‎pandas/core/ops.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@
3333
is_list_like, is_offsetlike,
3434
is_scalar,
3535
_ensure_object)
36-
from pandas.core.dtypes.cast import maybe_upcast_putmask, find_common_type
36+
from pandas.core.dtypes.cast import (
37+
maybe_upcast_putmask, find_common_type,
38+
construct_1d_object_array_from_listlike)
3739
from pandas.core.dtypes.generic import (
3840
ABCSeries,
3941
ABCDataFrame,
@@ -740,7 +742,7 @@ def wrapper(left, right, name=name, na_op=na_op):
740742

741743
def _comp_method_OBJECT_ARRAY(op, x, y):
742744
if isinstance(y, list):
743-
y = lib.list_to_object_array(y)
745+
y = construct_1d_object_array_from_listlike(y)
744746
if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)):
745747
if not is_object_dtype(y.dtype):
746748
y = y.astype(np.object_)
@@ -891,7 +893,7 @@ def na_op(x, y):
891893
result = op(x, y)
892894
except TypeError:
893895
if isinstance(y, list):
894-
y = lib.list_to_object_array(y)
896+
y = construct_1d_object_array_from_listlike(y)
895897

896898
if isinstance(y, (np.ndarray, ABCSeries)):
897899
if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)):

‎pandas/tests/dtypes/test_cast.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
infer_dtype_from_array,
2222
maybe_convert_string_to_object,
2323
maybe_convert_scalar,
24-
find_common_type)
24+
find_common_type,
25+
construct_1d_object_array_from_listlike)
2526
from pandas.core.dtypes.dtypes import (
2627
CategoricalDtype,
2728
DatetimeTZDtype,
@@ -407,3 +408,17 @@ def test_period_dtype(self):
407408
np.dtype('datetime64[ns]'), np.object, np.int64]:
408409
assert find_common_type([dtype, dtype2]) == np.object
409410
assert find_common_type([dtype2, dtype]) == np.object
411+
412+
@pytest.mark.parametrize('datum1', [1, 2., "3", (4, 5), [6, 7], None])
413+
@pytest.mark.parametrize('datum2', [8, 9., "10", (11, 12), [13, 14], None])
414+
def test_cast_1d_array(self, datum1, datum2):
415+
data = [datum1, datum2]
416+
result = construct_1d_object_array_from_listlike(data)
417+
418+
# Direct comparison fails: https://github.com/numpy/numpy/issues/10218
419+
assert result.dtype == 'object'
420+
assert list(result) == data
421+
422+
@pytest.mark.parametrize('val', [1, 2., None])
423+
def test_cast_1d_array_invalid_scalar(self, val):
424+
pytest.raises(TypeError, construct_1d_object_array_from_listlike, val)

‎pandas/tests/frame/test_constructors.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
MultiIndex, Timedelta, Timestamp,
2222
date_range, Categorical)
2323
import pandas as pd
24-
import pandas._libs.lib as lib
2524
import pandas.util.testing as tm
25+
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
2626

2727
from pandas.tests.frame.common import TestData
2828

@@ -1199,7 +1199,7 @@ def test_constructor_from_items(self):
11991199
DataFrame.from_items(row_items, orient='index')
12001200

12011201
# orient='index', but thar be tuples
1202-
arr = lib.list_to_object_array(
1202+
arr = construct_1d_object_array_from_listlike(
12031203
[('bar', 'baz')] * len(self.mixed_frame))
12041204
self.mixed_frame['foo'] = arr
12051205
row_items = [(idx, list(self.mixed_frame.xs(idx)))

‎pandas/tests/indexes/test_multi.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from pandas.errors import PerformanceWarning, UnsortedIndexError
1919
from pandas.core.dtypes.dtypes import CategoricalDtype
2020
from pandas.core.indexes.base import InvalidIndexError
21-
from pandas._libs import lib
21+
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
2222
from pandas._libs.lib import Timestamp
2323

2424
import pandas.util.testing as tm
@@ -913,7 +913,7 @@ def test_from_product_invalid_input(self):
913913
def test_from_product_datetimeindex(self):
914914
dt_index = date_range('2000-01-01', periods=2)
915915
mi = pd.MultiIndex.from_product([[1, 2], dt_index])
916-
etalon = lib.list_to_object_array([(1, pd.Timestamp(
916+
etalon = construct_1d_object_array_from_listlike([(1, pd.Timestamp(
917917
'2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp(
918918
'2000-01-01')), (2, pd.Timestamp('2000-01-02'))])
919919
tm.assert_numpy_array_equal(mi.values, etalon)
@@ -938,11 +938,11 @@ def test_values_boxed(self):
938938
(1, pd.Timestamp('2000-01-04')),
939939
(2, pd.Timestamp('2000-01-02')),
940940
(3, pd.Timestamp('2000-01-03'))]
941-
mi = pd.MultiIndex.from_tuples(tuples)
942-
tm.assert_numpy_array_equal(mi.values,
943-
lib.list_to_object_array(tuples))
941+
result = pd.MultiIndex.from_tuples(tuples)
942+
expected = construct_1d_object_array_from_listlike(tuples)
943+
tm.assert_numpy_array_equal(result.values, expected)
944944
# Check that code branches for boxed values produce identical results
945-
tm.assert_numpy_array_equal(mi.values[:4], mi[:4].values)
945+
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
946946

947947
def test_append(self):
948948
result = self.index[:3].append(self.index[3:])

0 commit comments

Comments
 (0)