2
2
3
3
import bz2
4
4
import codecs
5
- from contextlib import closing , contextmanager
6
5
import csv
7
6
import gzip
8
7
import lzma
8
+ from http .client import HTTPException # noqa
9
9
import mmap
10
10
import os
11
+ from urllib .error import URLError # noqa
12
+ from urllib .parse import ( # noqa
13
+ urlencode , urljoin , urlparse as parse_url , uses_netloc , uses_params ,
14
+ uses_relative )
15
+ from urllib .request import pathname2url , urlopen
11
16
import zipfile
12
17
13
18
import pandas .compat as compat
14
- from pandas .compat import BytesIO , StringIO , string_types , text_type
19
+ from pandas .compat import BytesIO , string_types , text_type
15
20
from pandas .errors import ( # noqa
16
21
AbstractMethodError , DtypeWarning , EmptyDataError , ParserError ,
17
22
ParserWarning )
18
23
19
- from pandas .core .dtypes .common import is_file_like , is_number
20
-
21
- from pandas .io .formats .printing import pprint_thing
24
+ from pandas .core .dtypes .common import is_file_like
22
25
23
26
# gh-12665: Alias for now and remove later.
24
27
CParserError = ParserError
31
34
'-nan' , '' }
32
35
33
36
34
- if compat .PY3 :
35
- from urllib .request import urlopen , pathname2url
36
- _urlopen = urlopen
37
- from urllib .parse import urlparse as parse_url
38
- from urllib .parse import (uses_relative , uses_netloc , uses_params ,
39
- urlencode , urljoin )
40
- from urllib .error import URLError
41
- from http .client import HTTPException # noqa
42
- else :
43
- from urllib2 import urlopen as _urlopen
44
- from urllib import urlencode , pathname2url # noqa
45
- from urlparse import urlparse as parse_url
46
- from urlparse import uses_relative , uses_netloc , uses_params , urljoin
47
- from urllib2 import URLError # noqa
48
- from httplib import HTTPException # noqa
49
- from contextlib import contextmanager , closing # noqa
50
- from functools import wraps # noqa
51
-
52
- # @wraps(_urlopen)
53
- @contextmanager
54
- def urlopen (* args , ** kwargs ):
55
- with closing (_urlopen (* args , ** kwargs )) as f :
56
- yield f
57
-
58
-
59
37
_VALID_URLS = set (uses_relative + uses_netloc + uses_params )
60
38
_VALID_URLS .discard ('' )
61
39
@@ -72,10 +50,6 @@ def __next__(self):
72
50
raise AbstractMethodError (self )
73
51
74
52
75
- if not compat .PY3 :
76
- BaseIterator .next = lambda self : self .__next__ ()
77
-
78
-
79
53
def _is_url (url ):
80
54
"""Check to see if a URL has a valid protocol.
81
55
@@ -189,7 +163,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
189
163
----------
190
164
filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
191
165
or buffer
192
- encoding : the encoding to use to decode py3 bytes, default is 'utf-8'
166
+ compression : {{'gzip', 'bz2', 'zip', 'xz', None}}, optional
167
+ encoding : the encoding to use to decode bytes, default is 'utf-8'
193
168
mode : str, optional
194
169
195
170
Returns
@@ -202,7 +177,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
202
177
filepath_or_buffer = _stringify_path (filepath_or_buffer )
203
178
204
179
if _is_url (filepath_or_buffer ):
205
- req = _urlopen (filepath_or_buffer )
180
+ req = urlopen (filepath_or_buffer )
206
181
content_encoding = req .headers .get ('Content-Encoding' , None )
207
182
if content_encoding == 'gzip' :
208
183
# Override compression based on Content-Encoding header
@@ -361,10 +336,6 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
361
336
362
337
if compression :
363
338
364
- if compat .PY2 and not is_path and encoding :
365
- msg = 'compression with encoding is not yet supported in Python 2'
366
- raise ValueError (msg )
367
-
368
339
# GZ Compression
369
340
if compression == 'gzip' :
370
341
if is_path :
@@ -376,11 +347,6 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
376
347
elif compression == 'bz2' :
377
348
if is_path :
378
349
f = bz2 .BZ2File (path_or_buf , mode )
379
- elif compat .PY2 :
380
- # Python 2's bz2 module can't take file objects, so have to
381
- # run through decompress manually
382
- f = StringIO (bz2 .decompress (path_or_buf .read ()))
383
- path_or_buf .close ()
384
350
else :
385
351
f = bz2 .BZ2File (path_or_buf )
386
352
@@ -415,24 +381,19 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
415
381
handles .append (f )
416
382
417
383
elif is_path :
418
- if compat .PY2 :
419
- # Python 2
420
- mode = "wb" if mode == "w" else mode
421
- f = open (path_or_buf , mode )
422
- elif encoding :
423
- # Python 3 and encoding
384
+ if encoding :
385
+ # Encoding
424
386
f = open (path_or_buf , mode , encoding = encoding , newline = "" )
425
387
elif is_text :
426
- # Python 3 and no explicit encoding
388
+ # No explicit encoding
427
389
f = open (path_or_buf , mode , errors = 'replace' , newline = "" )
428
390
else :
429
- # Python 3 and binary mode
391
+ # Binary mode
430
392
f = open (path_or_buf , mode )
431
393
handles .append (f )
432
394
433
- # in Python 3, convert BytesIO or fileobjects passed with an encoding
434
- if (compat .PY3 and is_text and
435
- (compression or isinstance (f , need_text_wrapping ))):
395
+ # Convert BytesIO or file objects passed with an encoding
396
+ if is_text and (compression or isinstance (f , need_text_wrapping )):
436
397
from io import TextIOWrapper
437
398
f = TextIOWrapper (f , encoding = encoding , newline = '' )
438
399
handles .append (f )
@@ -499,11 +460,9 @@ def __iter__(self):
499
460
def __next__ (self ):
500
461
newline = self .mmap .readline ()
501
462
502
- # readline returns bytes, not str, in Python 3,
503
- # but Python's CSV reader expects str, so convert
504
- # the output to str before continuing
505
- if compat .PY3 :
506
- newline = compat .bytes_to_str (newline )
463
+ # readline returns bytes, not str, but Python's CSV reader
464
+ # expects str, so convert the output to str before continuing
465
+ newline = compat .bytes_to_str (newline )
507
466
508
467
# mmap doesn't raise if reading past the allocated
509
468
# data but instead returns an empty string, so raise
@@ -513,14 +472,10 @@ def __next__(self):
513
472
return newline
514
473
515
474
516
- if not compat .PY3 :
517
- MMapWrapper .next = lambda self : self .__next__ ()
518
-
519
-
520
475
class UTF8Recoder (BaseIterator ):
521
476
522
477
"""
523
- Iterator that reads an encoded stream and reencodes the input to UTF-8
478
+ Iterator that reads an encoded stream and re-encodes the input to UTF-8
524
479
"""
525
480
526
481
def __init__ (self , f , encoding ):
@@ -536,82 +491,10 @@ def next(self):
536
491
return next (self .reader ).encode ("utf-8" )
537
492
538
493
539
- if compat .PY3 : # pragma: no cover
540
- def UnicodeReader (f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
541
- # ignore encoding
542
- return csv .reader (f , dialect = dialect , ** kwds )
543
-
544
- def UnicodeWriter (f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
545
- return csv .writer (f , dialect = dialect , ** kwds )
546
- else :
547
- class UnicodeReader (BaseIterator ):
548
-
549
- """
550
- A CSV reader which will iterate over lines in the CSV file "f",
551
- which is encoded in the given encoding.
552
-
553
- On Python 3, this is replaced (below) by csv.reader, which handles
554
- unicode.
555
- """
556
-
557
- def __init__ (self , f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
558
- f = UTF8Recoder (f , encoding )
559
- self .reader = csv .reader (f , dialect = dialect , ** kwds )
560
-
561
- def __next__ (self ):
562
- row = next (self .reader )
563
- return [compat .text_type (s , "utf-8" ) for s in row ]
564
-
565
- class UnicodeWriter (object ):
566
-
567
- """
568
- A CSV writer which will write rows to CSV file "f",
569
- which is encoded in the given encoding.
570
- """
571
-
572
- def __init__ (self , f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
573
- # Redirect output to a queue
574
- self .queue = StringIO ()
575
- self .writer = csv .writer (self .queue , dialect = dialect , ** kwds )
576
- self .stream = f
577
- self .encoder = codecs .getincrementalencoder (encoding )()
578
- self .quoting = kwds .get ("quoting" , None )
579
-
580
- def writerow (self , row ):
581
- def _check_as_is (x ):
582
- return (self .quoting == csv .QUOTE_NONNUMERIC and
583
- is_number (x )) or isinstance (x , str )
584
-
585
- row = [x if _check_as_is (x )
586
- else pprint_thing (x ).encode ("utf-8" ) for x in row ]
587
-
588
- self .writer .writerow ([s for s in row ])
589
- # Fetch UTF-8 output from the queue ...
590
- data = self .queue .getvalue ()
591
- data = data .decode ("utf-8" )
592
- # ... and re-encode it into the target encoding
593
- data = self .encoder .encode (data )
594
- # write to the target stream
595
- self .stream .write (data )
596
- # empty queue
597
- self .queue .truncate (0 )
598
-
599
- def writerows (self , rows ):
600
- def _check_as_is (x ):
601
- return (self .quoting == csv .QUOTE_NONNUMERIC and
602
- is_number (x )) or isinstance (x , str )
603
-
604
- for i , row in enumerate (rows ):
605
- rows [i ] = [x if _check_as_is (x )
606
- else pprint_thing (x ).encode ("utf-8" ) for x in row ]
607
-
608
- self .writer .writerows ([[s for s in row ] for row in rows ])
609
- # Fetch UTF-8 output from the queue ...
610
- data = self .queue .getvalue ()
611
- data = data .decode ("utf-8" )
612
- # ... and re-encode it into the target encoding
613
- data = self .encoder .encode (data )
614
- # write to the target stream
615
- self .stream .write (data )
616
- # empty queue
617
- self .queue .truncate (0 )
494
+ def UnicodeReader (f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
495
+ # ignore encoding
496
+ return csv .reader (f , dialect = dialect , ** kwds )
497
+
498
+
499
+ def UnicodeWriter (f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
500
+ return csv .writer (f , dialect = dialect , ** kwds )
0 commit comments