feat(io): Retrofit streams as context-managers.

ankostis · ankostis · commit 2ec8c99e5229 · 2016-10-24T15:17:00.000+02:00
+ feat(util): add logger.
+ feat(util): add suppress-ex context-handler (from PY3 sources).
diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst
@@ -35,29 +35,28 @@ Databases support query and/or addition of objects using simple interfaces. They
 Both have two sets of methods, one of which allows interacting with single objects, the other one allowing to handle a stream of objects simultaneously and asynchronously.
 
 Acquiring information about an object from a database is easy if you have a SHA1 to refer to the object::
-    
-    
+
+
     ldb = LooseObjectDB(fixture_path("../../../.git/objects"))
-    
+
     for sha1 in ldb.sha_iter():
         oinfo = ldb.info(sha1)
-        ostream = ldb.stream(sha1)
-        assert oinfo[:3] == ostream[:3]
-        
-        assert len(ostream.read()) == ostream.size
-    # END for each sha in database
-    
+        with =ldb.stream(sha1) as ostream:
+            assert oinfo[:3] == ostream[:3]
+
+            assert len(ostream.read()) == ostream.size
+
 To store information, you prepare an *IStream* object with the required information. The provided stream will be read and converted into an object, and the respective 20 byte SHA1 identifier is stored in the IStream object::
-    
+
     data = "my data"
-    istream = IStream("blob", len(data), StringIO(data))
-    
-    # the object does not yet have a sha
-    assert istream.binsha is None
-    ldb.store(istream)
-    # now the sha is set
-    assert len(istream.binsha) == 20
-    assert ldb.has_object(istream.binsha)
+    with IStream("blob", len(data), StringIO(data)) as istream:
+
+        # the object does not yet have a sha
+        assert istream.binsha is None
+        ldb.store(istream)
+        # now the sha is set
+        assert len(istream.binsha) == 20
+        assert ldb.has_object(istream.binsha)
 
 **********************
 Asynchronous Operation
@@ -67,33 +66,33 @@ For each read or write method that allows a single-object to be handled, an *_as
 Using asynchronous operations is easy, but chaining multiple operations together to form a complex one would require you to read the docs of the *async* package. At the current time, due to the *GIL*, the *GitDB* can only achieve true concurrency during zlib compression and decompression if big objects, if the respective c modules where compiled in *async*.
 
 Asynchronous operations are scheduled by a *ThreadPool* which resides in the *gitdb.util* module::
-    
+
     from gitdb.util import pool
-    
+
     # set the pool to use two threads
     pool.set_size(2)
-    
+
     # synchronize the mode of operation
     pool.set_size(0)
-    
-    
+
+
 Use async methods with readers, which supply items to be processed. The result is given through readers as well::
-    
+
     from async import IteratorReader
-    
+
     # Create a reader from an iterator
     reader = IteratorReader(ldb.sha_iter())
-    
+
     # get reader for object streams
     info_reader = ldb.stream_async(reader)
-    
+
     # read one
     info = info_reader.read(1)[0]
-    
+
     # read all the rest until depletion
     ostreams = info_reader.read()
-    
-    
+
+
 
 *********
 Databases
diff --git a/gitdb/base.py b/gitdb/base.py
@@ -3,7 +3,7 @@
 # This module is part of GitDB and is released under
 # the New BSD License: http://www.opensource.org/licenses/bsd-license.php
 """Module with basic data structures - they are designed to be lightweight and fast"""
-from gitdb.util import bin_to_hex
+from gitdb.util import bin_to_hex, suppress
 
 from gitdb.fun import (
     type_id_to_type_map,
@@ -134,8 +134,15 @@ def __init__(self, *args, **kwargs):
 
     #{ Stream Reader Interface
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        with suppress():
+            self.stream.close()
+
     def read(self, size=-1):
-        return self[3].read(size)
+        return self.stream.read(size)
 
     @property
     def stream(self):
@@ -171,9 +178,16 @@ def __new__(cls, packoffset, type, size, stream, *args):
         """Helps with the initialization of subclasses"""
         return tuple.__new__(cls, (packoffset, type, size, stream))
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        with suppress():
+            self.stream.close()
+
     #{ Stream Reader Interface
     def read(self, size=-1):
-        return self[3].read(size)
+        return self.stream.read(size)
 
     @property
     def stream(self):
@@ -189,9 +203,16 @@ class ODeltaPackStream(ODeltaPackInfo):
     def __new__(cls, packoffset, type, size, delta_info, stream):
         return tuple.__new__(cls, (packoffset, type, size, delta_info, stream))
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        with suppress():
+            self.stream.close()
+
     #{ Stream Reader Interface
     def read(self, size=-1):
-        return self[4].read(size)
+        return self.stream.read(size)
 
     @property
     def stream(self):
@@ -216,6 +237,13 @@ def __new__(cls, type, size, stream, sha=None):
     def __init__(self, type, size, stream, sha=None):
         list.__init__(self, (sha, type, size, stream, None))
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        with suppress():
+            self._stream().close()
+
     #{ Interface
     @property
     def hexsha(self):
@@ -239,7 +267,7 @@ def _set_error(self, exc):
     def read(self, size=-1):
         """Implements a simple stream reader interface, passing the read call on
             to our internal stream"""
-        return self[3].read(size)
+        return self._stream().read(size)
 
     #} END stream reader interface
 
@@ -312,4 +340,10 @@ class InvalidOStream(InvalidOInfo):
     """Carries information about an invalid ODB stream"""
     __slots__ = tuple()
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
+
 #} END ODB Bases
diff --git a/gitdb/db/mem.py b/gitdb/db/mem.py
@@ -100,13 +100,12 @@ def stream_copy(self, sha_iter, odb):
                 continue
             # END check object existence
 
-            ostream = self.stream(sha)
-            # compressed data including header
-            sio = BytesIO(ostream.stream.data())
-            istream = IStream(ostream.type, ostream.size, sio, sha)
-
-            odb.store(istream)
-            count += 1
+            with self.stream(sha) as ostream:
+                # compressed data including header
+                sio = BytesIO(ostream.stream.data())
+                with IStream(ostream.type, ostream.size, sio, sha) as istream:
+                    odb.store(istream)
+                    count += 1
         # END for each sha
         return count
     #} END interface
diff --git a/gitdb/pack.py b/gitdb/pack.py
@@ -63,8 +63,8 @@
 
 from gitdb.const import NULL_BYTE
 from gitdb.utils.compat import (
-    izip, 
-    buffer, 
+    izip,
+    buffer,
     xrange,
     to_bytes
 )
@@ -184,7 +184,7 @@ class IndexWriter(object):
     __slots__ = '_objs'
 
     def __init__(self):
-        self._objs = list()
+        self._objs = []
 
     def append(self, binsha, crc, offset):
         """Append one piece of object information"""
@@ -223,7 +223,7 @@ def write(self, pack_sha, write):
             sha_write(pack('>L', t[1] & 0xffffffff))
         # END for each crc
 
-        tmplist = list()
+        tmplist = []
         # offset 32
         for t in self._objs:
             ofs = t[2]
@@ -370,7 +370,7 @@ def _initialize(self):
     def _read_fanout(self, byte_offset):
         """Generate a fanout table from our data"""
         d = self._cursor.map()
-        out = list()
+        out = []
         append = out.append
         for i in xrange(256):
             append(unpack_from('>L', d, byte_offset + i * 4)[0])
@@ -601,7 +601,7 @@ def collect_streams(self, offset):
             delta chain.
             If the object at offset is no delta, the size of the list is 1.
         :param offset: specifies the first byte of the object within this pack"""
-        out = list()
+        out = []
         c = self._cursor
         while True:
             ostream = pack_object_at(c, offset, True)[1]
diff --git a/gitdb/stream.py b/gitdb/stream.py
@@ -24,6 +24,7 @@
     make_sha,
     write,
     close,
+    suppress,
 )
 
 from gitdb.const import NULL_BYTE, BYTE_SPACE
@@ -142,7 +143,7 @@ def data(self):
     def close(self):
         """Close our underlying stream of compressed bytes if this was allowed during initialization
         :return: True if we closed the underlying stream
-        :note: can be called safely 
+        :note: can be called safely
         """
         if self._close:
             if hasattr(self._m, 'close'):
@@ -289,11 +290,11 @@ def read(self, size=-1):
         # if we hit the end of the stream
         # NOTE: Behavior changed in PY2.7 onward, which requires special handling to make the tests work properly.
         # They are thorough, and I assume it is truly working.
-        # Why is this logic as convoluted as it is ? Please look at the table in 
+        # Why is this logic as convoluted as it is ? Please look at the table in
         # https://github.com/gitpython-developers/gitdb/issues/19 to learn about the test-results.
         # Bascially, on py2.6, you want to use branch 1, whereas on all other python version, the second branch
-        # will be the one that works. 
-        # However, the zlib VERSIONs as well as the platform check is used to further match the entries in the 
+        # will be the one that works.
+        # However, the zlib VERSIONs as well as the platform check is used to further match the entries in the
         # table in the github issue. This is it ... it was the only way I could make this work everywhere.
         # IT's CERTAINLY GOING TO BITE US IN THE FUTURE ... .
         if PY26 or ((zlib.ZLIB_VERSION == '1.2.7' or zlib.ZLIB_VERSION == '1.2.5') and not sys.platform == 'darwin'):
@@ -566,6 +567,12 @@ def __init__(self):
 
     #{ Stream Interface
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
+
     def write(self, data):
         """:raise IOError: If not all bytes could be written
         :param data: byte object
@@ -593,11 +600,20 @@ class FlexibleSha1Writer(Sha1Writer):
 
     """Writer producing a sha1 while passing on the written bytes to the given
     write function"""
-    __slots__ = 'writer'
+    __slots__ = ('writer', '_no_close_writer')
 
-    def __init__(self, writer):
+    def __init__(self, writer, no_close_writer=False):
         Sha1Writer.__init__(self)
         self.writer = writer
+        self._no_close_writer = no_close_writer
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        if not self._no_close_writer:
+            with suppress():
+                self.writer.close()
 
     def write(self, data):
         Sha1Writer.write(self, data)
@@ -614,6 +630,13 @@ def __init__(self):
         self.buf = BytesIO()
         self.zip = zlib.compressobj(zlib.Z_BEST_SPEED)
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        with suppress():
+            self.close()
+
     def __getattr__(self, attr):
         return getattr(self.buf, attr)
 
@@ -658,6 +681,13 @@ def __init__(self, fd):
 
     #{ Stream Interface
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        with suppress():
+            self.close()
+
     def write(self, data):
         """:raise IOError: If not all bytes could be written
         :return: length of incoming data"""
@@ -690,6 +720,13 @@ def __init__(self, fd):
         self._fd = fd
         self._pos = 0
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        with suppress():
+            self.close()
+
     def write(self, data):
         self._pos += len(data)
         os.write(self._fd, data)
@@ -719,6 +756,12 @@ class NullStream(object):
     Use it like /dev/null"""
     __slots__ = tuple()
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
+
     def read(self, size=0):
         return ''
 
diff --git a/gitdb/test/lib.py b/gitdb/test/lib.py
@@ -24,6 +24,9 @@
 
 #{ Bases
 
+log = logging.getLogger(__name__)
+
+
 class TestBase(unittest.TestCase):
     """Base class for all tests
 
@@ -47,8 +50,8 @@ def setUpClass(cls):
 
         cls.gitrepopath = os.environ.get(cls.k_env_git_repo)
         if not cls.gitrepopath:
-            logging.info(
-                "You can set the %s environment variable to a .git repository of your choice - defaulting to the gitdb repository", cls.k_env_git_repo)
+            log.info("You can set the %s environment variable to a .git repository of your choice"
+                     " - defaulting to the gitdb repository", cls.k_env_git_repo)
             ospd = os.path.dirname
             cls.gitrepopath = os.path.join(ospd(ospd(ospd(__file__))), '.git')
         # end assure gitrepo is set
diff --git a/gitdb/test/test_stream.py b/gitdb/test/test_stream.py
diff --git a/gitdb/util.py b/gitdb/util.py