1
1
"""Common IO api utilities"""
2
2
3
3
import codecs
4
- from contextlib import closing , contextmanager
5
4
import csv
5
+ from http .client import HTTPException # noqa
6
6
import mmap
7
7
import os
8
+ from urllib .error import URLError # noqa
9
+ from urllib .parse import ( # noqa
10
+ urlencode , urljoin , urlparse as parse_url , uses_netloc , uses_params ,
11
+ uses_relative )
12
+ from urllib .request import pathname2url , urlopen
8
13
import zipfile
9
14
10
15
import pandas .compat as compat
11
- from pandas .compat import BytesIO , StringIO , string_types , text_type
16
+ from pandas .compat import BytesIO , string_types , text_type
12
17
from pandas .errors import ( # noqa
13
18
AbstractMethodError , DtypeWarning , EmptyDataError , ParserError ,
14
19
ParserWarning )
15
20
16
- from pandas .core .dtypes .common import is_file_like , is_number
17
-
18
- from pandas .io .formats .printing import pprint_thing
21
+ from pandas .core .dtypes .common import is_file_like
19
22
20
23
# gh-12665: Alias for now and remove later.
21
24
CParserError = ParserError
28
31
'-nan' , '' }
29
32
30
33
31
- if compat .PY3 :
32
- from urllib .request import urlopen , pathname2url
33
- _urlopen = urlopen
34
- from urllib .parse import urlparse as parse_url
35
- from urllib .parse import (uses_relative , uses_netloc , uses_params ,
36
- urlencode , urljoin )
37
- from urllib .error import URLError
38
- from http .client import HTTPException # noqa
39
- else :
40
- from urllib2 import urlopen as _urlopen
41
- from urllib import urlencode , pathname2url # noqa
42
- from urlparse import urlparse as parse_url
43
- from urlparse import uses_relative , uses_netloc , uses_params , urljoin
44
- from urllib2 import URLError # noqa
45
- from httplib import HTTPException # noqa
46
- from contextlib import contextmanager , closing # noqa
47
- from functools import wraps # noqa
48
-
49
- # @wraps(_urlopen)
50
- @contextmanager
51
- def urlopen (* args , ** kwargs ):
52
- with closing (_urlopen (* args , ** kwargs )) as f :
53
- yield f
34
+ _urlopen = urlopen
54
35
55
36
56
37
_VALID_URLS = set (uses_relative + uses_netloc + uses_params )
@@ -69,10 +50,6 @@ def __next__(self):
69
50
raise AbstractMethodError (self )
70
51
71
52
72
- if not compat .PY3 :
73
- BaseIterator .next = lambda self : self .__next__ ()
74
-
75
-
76
53
def _is_url (url ):
77
54
"""Check to see if a URL has a valid protocol.
78
55
@@ -186,7 +163,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
186
163
----------
187
164
filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
188
165
or buffer
189
- encoding : the encoding to use to decode py3 bytes, default is 'utf-8'
166
+ compression : str, optional
167
+ encoding : the encoding to use to decode bytes, default is 'utf-8'
190
168
mode : str, optional
191
169
192
170
Returns
@@ -358,10 +336,6 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
358
336
359
337
if compression :
360
338
361
- if compat .PY2 and not is_path and encoding :
362
- msg = 'compression with encoding is not yet supported in Python 2'
363
- raise ValueError (msg )
364
-
365
339
# GZ Compression
366
340
if compression == 'gzip' :
367
341
import gzip
@@ -375,11 +349,6 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
375
349
import bz2
376
350
if is_path :
377
351
f = bz2 .BZ2File (path_or_buf , mode )
378
- elif compat .PY2 :
379
- # Python 2's bz2 module can't take file objects, so have to
380
- # run through decompress manually
381
- f = StringIO (bz2 .decompress (path_or_buf .read ()))
382
- path_or_buf .close ()
383
352
else :
384
353
f = bz2 .BZ2File (path_or_buf )
385
354
@@ -415,24 +384,19 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
415
384
handles .append (f )
416
385
417
386
elif is_path :
418
- if compat .PY2 :
419
- # Python 2
420
- mode = "wb" if mode == "w" else mode
421
- f = open (path_or_buf , mode )
422
- elif encoding :
423
- # Python 3 and encoding
387
+ if encoding :
388
+ # Encoding
424
389
f = open (path_or_buf , mode , encoding = encoding , newline = "" )
425
390
elif is_text :
426
- # Python 3 and no explicit encoding
391
+ # No explicit encoding
427
392
f = open (path_or_buf , mode , errors = 'replace' , newline = "" )
428
393
else :
429
- # Python 3 and binary mode
394
+ # Binary mode
430
395
f = open (path_or_buf , mode )
431
396
handles .append (f )
432
397
433
- # in Python 3, convert BytesIO or fileobjects passed with an encoding
434
- if (compat .PY3 and is_text and
435
- (compression or isinstance (f , need_text_wrapping ))):
398
+ # Convert BytesIO or file objects passed with an encoding
399
+ if is_text and (compression or isinstance (f , need_text_wrapping )):
436
400
from io import TextIOWrapper
437
401
f = TextIOWrapper (f , encoding = encoding , newline = '' )
438
402
handles .append (f )
@@ -499,11 +463,9 @@ def __iter__(self):
499
463
def __next__ (self ):
500
464
newline = self .mmap .readline ()
501
465
502
- # readline returns bytes, not str, in Python 3,
503
- # but Python's CSV reader expects str, so convert
504
- # the output to str before continuing
505
- if compat .PY3 :
506
- newline = compat .bytes_to_str (newline )
466
+ # readline returns bytes, not str, but Python's CSV reader
467
+ # expects str, so convert the output to str before continuing
468
+ newline = compat .bytes_to_str (newline )
507
469
508
470
# mmap doesn't raise if reading past the allocated
509
471
# data but instead returns an empty string, so raise
@@ -513,14 +475,10 @@ def __next__(self):
513
475
return newline
514
476
515
477
516
- if not compat .PY3 :
517
- MMapWrapper .next = lambda self : self .__next__ ()
518
-
519
-
520
478
class UTF8Recoder (BaseIterator ):
521
479
522
480
"""
523
- Iterator that reads an encoded stream and reencodes the input to UTF-8
481
+ Iterator that reads an encoded stream and re-encodes the input to UTF-8
524
482
"""
525
483
526
484
def __init__ (self , f , encoding ):
@@ -536,82 +494,10 @@ def next(self):
536
494
return next (self .reader ).encode ("utf-8" )
537
495
538
496
539
- if compat .PY3 : # pragma: no cover
540
- def UnicodeReader (f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
541
- # ignore encoding
542
- return csv .reader (f , dialect = dialect , ** kwds )
543
-
544
- def UnicodeWriter (f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
545
- return csv .writer (f , dialect = dialect , ** kwds )
546
- else :
547
- class UnicodeReader (BaseIterator ):
548
-
549
- """
550
- A CSV reader which will iterate over lines in the CSV file "f",
551
- which is encoded in the given encoding.
552
-
553
- On Python 3, this is replaced (below) by csv.reader, which handles
554
- unicode.
555
- """
556
-
557
- def __init__ (self , f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
558
- f = UTF8Recoder (f , encoding )
559
- self .reader = csv .reader (f , dialect = dialect , ** kwds )
560
-
561
- def __next__ (self ):
562
- row = next (self .reader )
563
- return [compat .text_type (s , "utf-8" ) for s in row ]
564
-
565
- class UnicodeWriter (object ):
566
-
567
- """
568
- A CSV writer which will write rows to CSV file "f",
569
- which is encoded in the given encoding.
570
- """
571
-
572
- def __init__ (self , f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
573
- # Redirect output to a queue
574
- self .queue = StringIO ()
575
- self .writer = csv .writer (self .queue , dialect = dialect , ** kwds )
576
- self .stream = f
577
- self .encoder = codecs .getincrementalencoder (encoding )()
578
- self .quoting = kwds .get ("quoting" , None )
579
-
580
- def writerow (self , row ):
581
- def _check_as_is (x ):
582
- return (self .quoting == csv .QUOTE_NONNUMERIC and
583
- is_number (x )) or isinstance (x , str )
584
-
585
- row = [x if _check_as_is (x )
586
- else pprint_thing (x ).encode ("utf-8" ) for x in row ]
587
-
588
- self .writer .writerow ([s for s in row ])
589
- # Fetch UTF-8 output from the queue ...
590
- data = self .queue .getvalue ()
591
- data = data .decode ("utf-8" )
592
- # ... and re-encode it into the target encoding
593
- data = self .encoder .encode (data )
594
- # write to the target stream
595
- self .stream .write (data )
596
- # empty queue
597
- self .queue .truncate (0 )
598
-
599
- def writerows (self , rows ):
600
- def _check_as_is (x ):
601
- return (self .quoting == csv .QUOTE_NONNUMERIC and
602
- is_number (x )) or isinstance (x , str )
603
-
604
- for i , row in enumerate (rows ):
605
- rows [i ] = [x if _check_as_is (x )
606
- else pprint_thing (x ).encode ("utf-8" ) for x in row ]
607
-
608
- self .writer .writerows ([[s for s in row ] for row in rows ])
609
- # Fetch UTF-8 output from the queue ...
610
- data = self .queue .getvalue ()
611
- data = data .decode ("utf-8" )
612
- # ... and re-encode it into the target encoding
613
- data = self .encoder .encode (data )
614
- # write to the target stream
615
- self .stream .write (data )
616
- # empty queue
617
- self .queue .truncate (0 )
497
+ def UnicodeReader (f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
498
+ # ignore encoding
499
+ return csv .reader (f , dialect = dialect , ** kwds )
500
+
501
+
502
+ def UnicodeWriter (f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
503
+ return csv .writer (f , dialect = dialect , ** kwds )
0 commit comments