Skip to content

Commit 57afd5f

Browse files
committed
Handle more date/datetime/time formats
1 parent 9e57d91 commit 57afd5f

File tree

7 files changed

+1494
-1447
lines changed

7 files changed

+1494
-1447
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ New features
2727
Other Enhancements
2828
^^^^^^^^^^^^^^^^^^
2929

30+
`read_sas` function now recognizes most of frequently used date (datetime) formats in SAS7BDAT files.
3031

3132

3233
.. _whatsnew_0210.api_breaking:

pandas/io/sas/sas7bdat.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ class SAS7BDATReader(BaseIterator):
4444
index : column identifier, defaults to None
4545
Column to use as index.
4646
convert_dates : boolean, defaults to True
47-
Attempt to convert dates to Pandas datetime values. Note all
48-
SAS date formats are supported.
47+
Attempt to convert dates to Pandas datetime values. Note that
48+
some rarely used SAS date formats may be unsupported.
4949
blank_missing : boolean, defaults to True
5050
Convert empty strings to missing values (SAS uses blanks to
5151
indicate missing character variables).
@@ -655,9 +655,15 @@ def _chunk_to_dataframe(self):
655655
rslt[name] = self._byte_chunk[jb, :].view(
656656
dtype=self.byte_order + 'd')
657657
rslt[name] = np.asarray(rslt[name], dtype=np.float64)
658-
if self.convert_dates and (self.column_formats[j] == "MMDDYY"):
659-
epoch = pd.datetime(1960, 1, 1)
660-
rslt[name] = epoch + pd.to_timedelta(rslt[name], unit='d')
658+
if self.convert_dates:
659+
unit = None
660+
if self.column_formats[j] in const.sas_date_formats:
661+
unit = 'd'
662+
elif self.column_formats[j] in const.sas_datetime_formats:
663+
unit = 's'
664+
if unit:
665+
rslt[name] = pd.to_datetime(rslt[name], unit=unit,
666+
origin="1960-01-01")
661667
jb += 1
662668
elif self.column_types[j] == b's':
663669
rslt[name] = self._string_chunk[js, :]

pandas/io/sas/sas_constants.py

+24
Original file line numberDiff line numberDiff line change
@@ -145,3 +145,27 @@ class index:
145145
b"\xFF\xFF\xFF\xFE": index.columnListIndex,
146146
b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": index.columnListIndex,
147147
b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": index.columnListIndex}
148+
149+
150+
# List of frequently used SAS date and datetime formats
151+
# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm
152+
# https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java
153+
sas_date_formats = ("DATE", "DAY", "DDMMYY", "DOWNAME", "JULDAY", "JULIAN",
154+
"MMDDYY", "MMYY", "MMYYC", "MMYYD", "MMYYP", "MMYYS",
155+
"MMYYN", "MONNAME", "MONTH", "MONYY", "QTR", "QTRR",
156+
"NENGO", "WEEKDATE", "WEEKDATX", "WEEKDAY", "WEEKV",
157+
"WORDDATE", "WORDDATX", "YEAR", "YYMM", "YYMMC", "YYMMD",
158+
"YYMMP", "YYMMS", "YYMMN", "YYMON", "YYMMDD", "YYQ",
159+
"YYQC", "YYQD", "YYQP", "YYQS", "YYQN", "YYQR", "YYQRC",
160+
"YYQRD", "YYQRP", "YYQRS", "YYQRN",
161+
"YYMMDDP", "YYMMDDC", "E8601DA", "YYMMDDN", "MMDDYYC",
162+
"MMDDYYS", "MMDDYYD", "YYMMDDS", "B8601DA", "DDMMYYN",
163+
"YYMMDDD", "DDMMYYB", "DDMMYYP", "MMDDYYP", "YYMMDDB",
164+
"MMDDYYN", "DDMMYYC", "DDMMYYD", "DDMMYYS",
165+
"MINGUO")
166+
167+
sas_datetime_formats = ("DATETIME", "DTWKDATX",
168+
"B8601DN", "B8601DT", "B8601DX", "B8601DZ", "B8601LX",
169+
"E8601DN", "E8601DT", "E8601DX", "E8601DZ", "E8601LX",
170+
"DATEAMPM", "DTDATE", "DTMONYY", "DTMONYY", "DTWKDATX",
171+
"DTYEAR", "TOD", "MDYAMPM")

pandas/tests/io/sas/data/datetime.csv

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Date1,Date2,DateTime,DateTimeHi,Taiw
2+
1677-09-22,1677-09-22,1677-09-21 00:12:44,1677-09-21 00:12:43.145226,1912-01-01
3+
1960-01-01,1960-01-01,1960-01-01 00:00:00,1960-01-01 00:00:00.000000,1960-01-01
4+
2016-02-29,2016-02-29,2016-02-29 23:59:59,2016-02-29 23:59:59.123456,2016-02-29
5+
2262-04-11,2262-04-11,2262-04-11 23:47:16,2262-04-11 23:47:16.854774,2262-04-11
128 KB
Binary file not shown.

0 commit comments

Comments
 (0)