Skip to content

Commit 6704590

Browse files
janoshmroeschke
andauthored
Raise FileNotFoundError in read_json if input looks like file path but file is missing (#46718)
* raise FileNotFoundError in _get_data_from_filepath() * update tests test_read_non_existent + test_read_expands_user_home_dir * add changelog entry in doc/source/whatsnew/v1.5.0.rst * use pandas.io.common._compression_to_extension instead of hard-coded extensions * move changelog entry from IO to other API changes * fix ImportError from _compression_to_extension -> _extension_to_compression rename * add test read_json very long file path * remove extra period in extension checking Co-authored-by: Matthew Roeschke <emailformattr@gmail.com>
1 parent 46eac3a commit 6704590

File tree

4 files changed

+29
-2
lines changed

4 files changed

+29
-2
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,7 @@ Other API changes
435435
<https://developers.googleblog.com/2022/02/making-oauth-flows-safer.html?m=1#disallowed-oob>`_.
436436
The ``auth_local_webserver = False`` option is planned to stop working in
437437
October 2022. (:issue:`46312`)
438+
- :func:`read_json` now raises ``FileNotFoundError`` (previously ``ValueError``) when input is a string ending in ``.json``, ``.json.gz``, ``.json.bz2``, etc. but no such file exists. (:issue:`29102`)
438439
-
439440

440441
.. ---------------------------------------------------------------------------

pandas/io/json/_json.py

+12
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252

5353
from pandas.io.common import (
5454
IOHandles,
55+
_extension_to_compression,
5556
file_exists,
5657
get_handle,
5758
is_fsspec_url,
@@ -698,6 +699,9 @@ def _get_data_from_filepath(self, filepath_or_buffer):
698699
699700
This method turns (1) into (2) to simplify the rest of the processing.
700701
It returns input types (2) and (3) unchanged.
702+
703+
It raises FileNotFoundError if the input is a string ending in
704+
one of .json, .json.gz, .json.bz2, etc. but no such file exists.
701705
"""
702706
# if it is a string but the file does not exist, it might be a JSON string
703707
filepath_or_buffer = stringify_path(filepath_or_buffer)
@@ -716,6 +720,14 @@ def _get_data_from_filepath(self, filepath_or_buffer):
716720
errors=self.encoding_errors,
717721
)
718722
filepath_or_buffer = self.handles.handle
723+
elif (
724+
isinstance(filepath_or_buffer, str)
725+
and filepath_or_buffer.lower().endswith(
726+
(".json",) + tuple(f".json{c}" for c in _extension_to_compression)
727+
)
728+
and not file_exists(filepath_or_buffer)
729+
):
730+
raise FileNotFoundError(f"File {filepath_or_buffer} does not exist")
719731

720732
return filepath_or_buffer
721733

pandas/tests/io/json/test_pandas.py

+14
Original file line numberDiff line numberDiff line change
@@ -1566,6 +1566,20 @@ def test_read_json_with_url_value(self, url):
15661566
expected = DataFrame({"url": [url]})
15671567
tm.assert_frame_equal(result, expected)
15681568

1569+
@pytest.mark.parametrize(
1570+
"compression",
1571+
["", ".gz", ".bz2", ".tar"],
1572+
)
1573+
def test_read_json_with_very_long_file_path(self, compression):
1574+
# GH 46718
1575+
long_json_path = f'{"a" * 1000}.json{compression}'
1576+
with pytest.raises(
1577+
FileNotFoundError, match=f"File {long_json_path} does not exist"
1578+
):
1579+
# path too long for Windows is handled in file_exists() but raises in
1580+
# _get_data_from_filepath()
1581+
read_json(long_json_path)
1582+
15691583
@pytest.mark.parametrize(
15701584
"date_format,key", [("epoch", 86400000), ("iso", "P1DT0H0M0S")]
15711585
)

pandas/tests/io/test_common.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def test_iterator(self):
187187
(pd.read_hdf, "tables", FileNotFoundError, "h5"),
188188
(pd.read_stata, "os", FileNotFoundError, "dta"),
189189
(pd.read_sas, "os", FileNotFoundError, "sas7bdat"),
190-
(pd.read_json, "os", ValueError, "json"),
190+
(pd.read_json, "os", FileNotFoundError, "json"),
191191
(pd.read_pickle, "os", FileNotFoundError, "pickle"),
192192
],
193193
)
@@ -253,7 +253,7 @@ def test_write_missing_parent_directory(self, method, module, error_class, fn_ex
253253
(pd.read_hdf, "tables", FileNotFoundError, "h5"),
254254
(pd.read_stata, "os", FileNotFoundError, "dta"),
255255
(pd.read_sas, "os", FileNotFoundError, "sas7bdat"),
256-
(pd.read_json, "os", ValueError, "json"),
256+
(pd.read_json, "os", FileNotFoundError, "json"),
257257
(pd.read_pickle, "os", FileNotFoundError, "pickle"),
258258
],
259259
)

0 commit comments

Comments
 (0)