diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 170e7f14da397..642093db3ded6 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -194,6 +194,8 @@ I/O - Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`) - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`) +- Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`) +- Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`) - - - diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 725e2d28ffd67..4bae067ee5196 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -227,7 +227,7 @@ def _write(self, obj, orient, double_precision, ensure_ascii, def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, - convert_axes=True, convert_dates=True, keep_default_dates=True, + convert_axes=None, convert_dates=True, keep_default_dates=True, numpy=False, precise_float=False, date_unit=None, encoding=None, lines=False, chunksize=None, compression='infer'): """ @@ -277,18 +277,25 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, 'table' as an allowed value for the ``orient`` argument typ : type of object to recover (series or frame), default 'frame' - dtype : boolean or dict, default True + dtype : boolean or dict, default None If True, infer dtypes; if a dict of column to dtype, then use those; if False, then don't infer dtypes at all, applies only to the data. - Not applicable with ``orient='table'``. + For all ``orient`` values except ``'table'``, default is True. - .. versionchanged:: 0.25 + .. versionchanged:: 0.25.0 - Not applicable with ``orient='table'``. + Not applicable for ``orient='table'``. - convert_axes : boolean, default True + convert_axes : boolean, default None Try to convert the axes to the proper dtypes. + + For all ``orient`` values except ``'table'``, default is True. + + .. versionchanged:: 0.25.0 + + Not applicable for ``orient='table'``. + convert_dates : boolean, default True List of columns to parse for dates; If True, then try to parse datelike columns default is True; a column label is datelike if @@ -417,8 +424,13 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, if orient == 'table' and dtype: raise ValueError("cannot pass both dtype and orient='table'") + if orient == 'table' and convert_axes: + raise ValueError("cannot pass both convert_axes and orient='table'") - dtype = orient != 'table' if dtype is None else dtype + if dtype is None and orient != 'table': + dtype = True + if convert_axes is None and orient != 'table': + convert_axes = True compression = _infer_compression(path_or_buf, compression) filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( @@ -692,7 +704,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, # don't try to coerce, unless a force conversion if use_dtypes: - if self.dtype is False: + if not self.dtype: return data, False elif self.dtype is True: pass diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 3002d1dfb5f8a..351b495e5d8fc 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -564,17 +564,10 @@ def test_multiindex(self, index_names): result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) - @pytest.mark.parametrize("strict_check", [ - pytest.param(True, marks=pytest.mark.xfail), - False - ]) - def test_empty_frame_roundtrip(self, strict_check): + def test_empty_frame_roundtrip(self): # GH 21287 df = pd.DataFrame([], columns=['a', 'b', 'c']) expected = df.copy() out = df.to_json(orient='table') result = pd.read_json(out, orient='table') - # TODO: When DF coercion issue (#21345) is resolved tighten type checks - tm.assert_frame_equal(expected, result, - check_dtype=strict_check, - check_index_type=strict_check) + tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index fecd0f0572757..ed598b730d960 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -194,7 +194,7 @@ def _check_orient(df, orient, dtype=None, numpy=False, else: unser = unser.sort_index() - if dtype is False: + if not dtype: check_dtype = False if not convert_axes and df.index.dtype.type == np.datetime64: @@ -1202,6 +1202,16 @@ def test_data_frame_size_after_to_json(self): assert size_before == size_after + @pytest.mark.parametrize('index', [None, [1, 2], [1., 2.], ['a', 'b'], + ['1', '2'], ['1.', '2.']]) + @pytest.mark.parametrize('columns', [['a', 'b'], ['1', '2'], ['1.', '2.']]) + def test_from_json_to_json_table_index_and_columns(self, index, columns): + # GH25433 GH25435 + expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns) + dfjson = expected.to_json(orient='table') + result = pd.read_json(dfjson, orient='table') + assert_frame_equal(result, expected) + def test_from_json_to_json_table_dtypes(self): # GH21345 expected = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']}) @@ -1214,9 +1224,18 @@ def test_read_json_table_dtype_raises(self, dtype): # GH21345 df = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']}) dfjson = df.to_json(orient='table') - with pytest.raises(ValueError): + msg = "cannot pass both dtype and orient='table'" + with pytest.raises(ValueError, match=msg): pd.read_json(dfjson, orient='table', dtype=dtype) + def test_read_json_table_convert_axes_raises(self): + # GH25433 GH25435 + df = DataFrame([[1, 2], [3, 4]], index=[1., 2.], columns=['1.', '2.']) + dfjson = df.to_json(orient='table') + msg = "cannot pass both convert_axes and orient='table'" + with pytest.raises(ValueError, match=msg): + pd.read_json(dfjson, orient='table', convert_axes=True) + @pytest.mark.parametrize('data, expected', [ (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']), {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),