Skip to content

Commit 8e1708d

Browse files
forbdonutTomAugspurger
authored andcommitted
BUG: Fix read of py3 PeriodIndex DataFrame HDF made in py2 (#16781) (#16790)
In Python3, reading a DataFrame with a PeriodIndex from an HDF file created in Python2 would incorrectly return a DataFrame with an Int64Index. (cherry picked from commit 794e060)
1 parent 2069768 commit 8e1708d

File tree

4 files changed

+23
-2
lines changed

4 files changed

+23
-2
lines changed

doc/source/whatsnew/v0.20.3.txt

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Bug Fixes
3838
~~~~~~~~~
3939
- Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`)
4040
- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`)
41+
- Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`)
4142

4243

4344
Conversion

pandas/io/pytables.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2584,8 +2584,8 @@ def read_index_node(self, node, start=None, stop=None):
25842584
if 'name' in node._v_attrs:
25852585
name = _ensure_str(node._v_attrs.name)
25862586

2587-
index_class = self._alias_to_class(getattr(node._v_attrs,
2588-
'index_class', ''))
2587+
index_class = self._alias_to_class(_ensure_decoded(
2588+
getattr(node._v_attrs, 'index_class', '')))
25892589
factory = self._get_index_factory(index_class)
25902590

25912591
kwargs = {}
Binary file not shown.

pandas/tests/io/test_pytables.py

+20
Original file line numberDiff line numberDiff line change
@@ -5207,6 +5207,26 @@ def test_fspath(self):
52075207
with pd.HDFStore(path) as store:
52085208
assert os.fspath(store) == str(path)
52095209

5210+
def test_read_py2_hdf_file_in_py3(self):
5211+
# GH 16781
5212+
5213+
# tests reading a PeriodIndex DataFrame written in Python2 in Python3
5214+
5215+
# the file was generated in Python 2.7 like so:
5216+
#
5217+
# df = pd.DataFrame([1.,2,3], index=pd.PeriodIndex(
5218+
# ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
5219+
# df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p')
5220+
5221+
expected = pd.DataFrame([1., 2, 3], index=pd.PeriodIndex(
5222+
['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
5223+
5224+
with ensure_clean_store(
5225+
tm.get_data_path('periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
5226+
mode='r') as store:
5227+
result = store['p']
5228+
assert_frame_equal(result, expected)
5229+
52105230

52115231
class TestHDFComplexValues(Base):
52125232
# GH10447

0 commit comments

Comments
 (0)