Skip to content

Commit b831699

Browse files
committed
Refactor read_csv bug fix with PR comments (#18186)
1 parent 009311a commit b831699

File tree

3 files changed

+5
-5
lines changed

3 files changed

+5
-5
lines changed

doc/source/whatsnew/v0.21.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ Bug Fixes
6464
- Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`)
6565
- Bug in :class:`IntervalIndex` constructor when a list of intervals is passed with non-default ``closed`` (:issue:`18334`)
6666
- Bug in :meth:`IntervalIndex.copy` when copying and ``IntervalIndex`` with non-default ``closed`` (:issue:`18339`)
67-
- Bug in ``pd.read_csv`` when reading numeric category fields with high cardinality (:issue `18186`)
67+
- Bug in :func:``read_csv`` when reading numeric category fields with high cardinality (:issue:`18186`)
6868

6969
Conversion
7070
^^^^^^^^^^

pandas/_libs/parsers.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -2227,7 +2227,7 @@ def _concatenate_chunks(list chunks):
22272227
for name in names:
22282228
arrs = [chunk.pop(name) for chunk in chunks]
22292229
# Check each arr for consistent types.
2230-
dtypes = set([a.dtype for a in arrs])
2230+
dtypes = {a.dtype for a in arrs}
22312231
numpy_dtypes = {x for x in dtypes if not is_categorical_dtype(x)}
22322232
if len(numpy_dtypes) > 1:
22332233
common_type = np.find_common_type(numpy_dtypes, [])

pandas/tests/io/parser/dtypes.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,11 @@ def test_categorical_dtype(self):
118118
def test_categorical_dtype_high_cardinality_numeric(self):
119119
# GH 18186
120120
data = sorted([str(i) for i in range(10**6)])
121-
expected = pd.DataFrame({'a': Categorical(data, ordered=True)})
121+
expected = DataFrame({'a': Categorical(data, ordered=True)})
122122
actual = self.read_csv(StringIO('a\n' + '\n'.join(data)),
123123
dtype='category')
124-
actual.a.cat.reorder_categories(sorted(actual.a.cat.categories),
125-
ordered=True, inplace=True)
124+
actual["a"] = actual["a"].cat.reorder_categories(
125+
sorted(actual.a.cat.categories), ordered=True)
126126
tm.assert_frame_equal(actual, expected)
127127

128128
def test_categorical_dtype_encoding(self):

0 commit comments

Comments
 (0)