Skip to content

Commit 2ec8c99

Browse files
committed
feat(io): Retrofit streams as context-managers.
+ feat(util): add logger. + feat(util): add suppress-ex context-handler (from PY3 sources).
1 parent 941b6c7 commit 2ec8c99

File tree

8 files changed

+176
-59
lines changed

8 files changed

+176
-59
lines changed

doc/source/tutorial.rst

Lines changed: 29 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -35,29 +35,28 @@ Databases support query and/or addition of objects using simple interfaces. They
3535
Both have two sets of methods, one of which allows interacting with single objects, the other one allowing to handle a stream of objects simultaneously and asynchronously.
3636

3737
Acquiring information about an object from a database is easy if you have a SHA1 to refer to the object::
38-
39-
38+
39+
4040
ldb = LooseObjectDB(fixture_path("../../../.git/objects"))
41-
41+
4242
for sha1 in ldb.sha_iter():
4343
oinfo = ldb.info(sha1)
44-
ostream = ldb.stream(sha1)
45-
assert oinfo[:3] == ostream[:3]
46-
47-
assert len(ostream.read()) == ostream.size
48-
# END for each sha in database
49-
44+
with =ldb.stream(sha1) as ostream:
45+
assert oinfo[:3] == ostream[:3]
46+
47+
assert len(ostream.read()) == ostream.size
48+
5049
To store information, you prepare an *IStream* object with the required information. The provided stream will be read and converted into an object, and the respective 20 byte SHA1 identifier is stored in the IStream object::
51-
50+
5251
data = "my data"
53-
istream = IStream("blob", len(data), StringIO(data))
54-
55-
# the object does not yet have a sha
56-
assert istream.binsha is None
57-
ldb.store(istream)
58-
# now the sha is set
59-
assert len(istream.binsha) == 20
60-
assert ldb.has_object(istream.binsha)
52+
with IStream("blob", len(data), StringIO(data)) as istream:
53+
54+
# the object does not yet have a sha
55+
assert istream.binsha is None
56+
ldb.store(istream)
57+
# now the sha is set
58+
assert len(istream.binsha) == 20
59+
assert ldb.has_object(istream.binsha)
6160

6261
**********************
6362
Asynchronous Operation
@@ -67,33 +66,33 @@ For each read or write method that allows a single-object to be handled, an *_as
6766
Using asynchronous operations is easy, but chaining multiple operations together to form a complex one would require you to read the docs of the *async* package. At the current time, due to the *GIL*, the *GitDB* can only achieve true concurrency during zlib compression and decompression if big objects, if the respective c modules where compiled in *async*.
6867

6968
Asynchronous operations are scheduled by a *ThreadPool* which resides in the *gitdb.util* module::
70-
69+
7170
from gitdb.util import pool
72-
71+
7372
# set the pool to use two threads
7473
pool.set_size(2)
75-
74+
7675
# synchronize the mode of operation
7776
pool.set_size(0)
78-
79-
77+
78+
8079
Use async methods with readers, which supply items to be processed. The result is given through readers as well::
81-
80+
8281
from async import IteratorReader
83-
82+
8483
# Create a reader from an iterator
8584
reader = IteratorReader(ldb.sha_iter())
86-
85+
8786
# get reader for object streams
8887
info_reader = ldb.stream_async(reader)
89-
88+
9089
# read one
9190
info = info_reader.read(1)[0]
92-
91+
9392
# read all the rest until depletion
9493
ostreams = info_reader.read()
95-
96-
94+
95+
9796

9897
*********
9998
Databases

gitdb/base.py

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# This module is part of GitDB and is released under
44
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
55
"""Module with basic data structures - they are designed to be lightweight and fast"""
6-
from gitdb.util import bin_to_hex
6+
from gitdb.util import bin_to_hex, suppress
77

88
from gitdb.fun import (
99
type_id_to_type_map,
@@ -134,8 +134,15 @@ def __init__(self, *args, **kwargs):
134134

135135
#{ Stream Reader Interface
136136

137+
def __enter__(self):
138+
return self
139+
140+
def __exit__(self, exc_type, exc_value, traceback):
141+
with suppress():
142+
self.stream.close()
143+
137144
def read(self, size=-1):
138-
return self[3].read(size)
145+
return self.stream.read(size)
139146

140147
@property
141148
def stream(self):
@@ -171,9 +178,16 @@ def __new__(cls, packoffset, type, size, stream, *args):
171178
"""Helps with the initialization of subclasses"""
172179
return tuple.__new__(cls, (packoffset, type, size, stream))
173180

181+
def __enter__(self):
182+
return self
183+
184+
def __exit__(self, exc_type, exc_value, traceback):
185+
with suppress():
186+
self.stream.close()
187+
174188
#{ Stream Reader Interface
175189
def read(self, size=-1):
176-
return self[3].read(size)
190+
return self.stream.read(size)
177191

178192
@property
179193
def stream(self):
@@ -189,9 +203,16 @@ class ODeltaPackStream(ODeltaPackInfo):
189203
def __new__(cls, packoffset, type, size, delta_info, stream):
190204
return tuple.__new__(cls, (packoffset, type, size, delta_info, stream))
191205

206+
def __enter__(self):
207+
return self
208+
209+
def __exit__(self, exc_type, exc_value, traceback):
210+
with suppress():
211+
self.stream.close()
212+
192213
#{ Stream Reader Interface
193214
def read(self, size=-1):
194-
return self[4].read(size)
215+
return self.stream.read(size)
195216

196217
@property
197218
def stream(self):
@@ -216,6 +237,13 @@ def __new__(cls, type, size, stream, sha=None):
216237
def __init__(self, type, size, stream, sha=None):
217238
list.__init__(self, (sha, type, size, stream, None))
218239

240+
def __enter__(self):
241+
return self
242+
243+
def __exit__(self, exc_type, exc_value, traceback):
244+
with suppress():
245+
self._stream().close()
246+
219247
#{ Interface
220248
@property
221249
def hexsha(self):
@@ -239,7 +267,7 @@ def _set_error(self, exc):
239267
def read(self, size=-1):
240268
"""Implements a simple stream reader interface, passing the read call on
241269
to our internal stream"""
242-
return self[3].read(size)
270+
return self._stream().read(size)
243271

244272
#} END stream reader interface
245273

@@ -312,4 +340,10 @@ class InvalidOStream(InvalidOInfo):
312340
"""Carries information about an invalid ODB stream"""
313341
__slots__ = tuple()
314342

343+
def __enter__(self):
344+
return self
345+
346+
def __exit__(self, exc_type, exc_value, traceback):
347+
pass
348+
315349
#} END ODB Bases

gitdb/db/mem.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,12 @@ def stream_copy(self, sha_iter, odb):
100100
continue
101101
# END check object existence
102102

103-
ostream = self.stream(sha)
104-
# compressed data including header
105-
sio = BytesIO(ostream.stream.data())
106-
istream = IStream(ostream.type, ostream.size, sio, sha)
107-
108-
odb.store(istream)
109-
count += 1
103+
with self.stream(sha) as ostream:
104+
# compressed data including header
105+
sio = BytesIO(ostream.stream.data())
106+
with IStream(ostream.type, ostream.size, sio, sha) as istream:
107+
odb.store(istream)
108+
count += 1
110109
# END for each sha
111110
return count
112111
#} END interface

gitdb/pack.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@
6363

6464
from gitdb.const import NULL_BYTE
6565
from gitdb.utils.compat import (
66-
izip,
67-
buffer,
66+
izip,
67+
buffer,
6868
xrange,
6969
to_bytes
7070
)
@@ -184,7 +184,7 @@ class IndexWriter(object):
184184
__slots__ = '_objs'
185185

186186
def __init__(self):
187-
self._objs = list()
187+
self._objs = []
188188

189189
def append(self, binsha, crc, offset):
190190
"""Append one piece of object information"""
@@ -223,7 +223,7 @@ def write(self, pack_sha, write):
223223
sha_write(pack('>L', t[1] & 0xffffffff))
224224
# END for each crc
225225

226-
tmplist = list()
226+
tmplist = []
227227
# offset 32
228228
for t in self._objs:
229229
ofs = t[2]
@@ -370,7 +370,7 @@ def _initialize(self):
370370
def _read_fanout(self, byte_offset):
371371
"""Generate a fanout table from our data"""
372372
d = self._cursor.map()
373-
out = list()
373+
out = []
374374
append = out.append
375375
for i in xrange(256):
376376
append(unpack_from('>L', d, byte_offset + i * 4)[0])
@@ -601,7 +601,7 @@ def collect_streams(self, offset):
601601
delta chain.
602602
If the object at offset is no delta, the size of the list is 1.
603603
:param offset: specifies the first byte of the object within this pack"""
604-
out = list()
604+
out = []
605605
c = self._cursor
606606
while True:
607607
ostream = pack_object_at(c, offset, True)[1]

gitdb/stream.py

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
make_sha,
2525
write,
2626
close,
27+
suppress,
2728
)
2829

2930
from gitdb.const import NULL_BYTE, BYTE_SPACE
@@ -142,7 +143,7 @@ def data(self):
142143
def close(self):
143144
"""Close our underlying stream of compressed bytes if this was allowed during initialization
144145
:return: True if we closed the underlying stream
145-
:note: can be called safely
146+
:note: can be called safely
146147
"""
147148
if self._close:
148149
if hasattr(self._m, 'close'):
@@ -289,11 +290,11 @@ def read(self, size=-1):
289290
# if we hit the end of the stream
290291
# NOTE: Behavior changed in PY2.7 onward, which requires special handling to make the tests work properly.
291292
# They are thorough, and I assume it is truly working.
292-
# Why is this logic as convoluted as it is ? Please look at the table in
293+
# Why is this logic as convoluted as it is ? Please look at the table in
293294
# https://github.com/gitpython-developers/gitdb/issues/19 to learn about the test-results.
294295
# Bascially, on py2.6, you want to use branch 1, whereas on all other python version, the second branch
295-
# will be the one that works.
296-
# However, the zlib VERSIONs as well as the platform check is used to further match the entries in the
296+
# will be the one that works.
297+
# However, the zlib VERSIONs as well as the platform check is used to further match the entries in the
297298
# table in the github issue. This is it ... it was the only way I could make this work everywhere.
298299
# IT's CERTAINLY GOING TO BITE US IN THE FUTURE ... .
299300
if PY26 or ((zlib.ZLIB_VERSION == '1.2.7' or zlib.ZLIB_VERSION == '1.2.5') and not sys.platform == 'darwin'):
@@ -566,6 +567,12 @@ def __init__(self):
566567

567568
#{ Stream Interface
568569

570+
def __enter__(self):
571+
return self
572+
573+
def __exit__(self, exc_type, exc_value, traceback):
574+
pass
575+
569576
def write(self, data):
570577
""":raise IOError: If not all bytes could be written
571578
:param data: byte object
@@ -593,11 +600,20 @@ class FlexibleSha1Writer(Sha1Writer):
593600

594601
"""Writer producing a sha1 while passing on the written bytes to the given
595602
write function"""
596-
__slots__ = 'writer'
603+
__slots__ = ('writer', '_no_close_writer')
597604

598-
def __init__(self, writer):
605+
def __init__(self, writer, no_close_writer=False):
599606
Sha1Writer.__init__(self)
600607
self.writer = writer
608+
self._no_close_writer = no_close_writer
609+
610+
def __enter__(self):
611+
return self
612+
613+
def __exit__(self, exc_type, exc_value, traceback):
614+
if not self._no_close_writer:
615+
with suppress():
616+
self.writer.close()
601617

602618
def write(self, data):
603619
Sha1Writer.write(self, data)
@@ -614,6 +630,13 @@ def __init__(self):
614630
self.buf = BytesIO()
615631
self.zip = zlib.compressobj(zlib.Z_BEST_SPEED)
616632

633+
def __enter__(self):
634+
return self
635+
636+
def __exit__(self, exc_type, exc_value, traceback):
637+
with suppress():
638+
self.close()
639+
617640
def __getattr__(self, attr):
618641
return getattr(self.buf, attr)
619642

@@ -658,6 +681,13 @@ def __init__(self, fd):
658681

659682
#{ Stream Interface
660683

684+
def __enter__(self):
685+
return self
686+
687+
def __exit__(self, exc_type, exc_value, traceback):
688+
with suppress():
689+
self.close()
690+
661691
def write(self, data):
662692
""":raise IOError: If not all bytes could be written
663693
:return: length of incoming data"""
@@ -690,6 +720,13 @@ def __init__(self, fd):
690720
self._fd = fd
691721
self._pos = 0
692722

723+
def __enter__(self):
724+
return self
725+
726+
def __exit__(self, exc_type, exc_value, traceback):
727+
with suppress():
728+
self.close()
729+
693730
def write(self, data):
694731
self._pos += len(data)
695732
os.write(self._fd, data)
@@ -719,6 +756,12 @@ class NullStream(object):
719756
Use it like /dev/null"""
720757
__slots__ = tuple()
721758

759+
def __enter__(self):
760+
return self
761+
762+
def __exit__(self, exc_type, exc_value, traceback):
763+
pass
764+
722765
def read(self, size=0):
723766
return ''
724767

gitdb/test/lib.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424

2525
#{ Bases
2626

27+
log = logging.getLogger(__name__)
28+
29+
2730
class TestBase(unittest.TestCase):
2831
"""Base class for all tests
2932
@@ -47,8 +50,8 @@ def setUpClass(cls):
4750

4851
cls.gitrepopath = os.environ.get(cls.k_env_git_repo)
4952
if not cls.gitrepopath:
50-
logging.info(
51-
"You can set the %s environment variable to a .git repository of your choice - defaulting to the gitdb repository", cls.k_env_git_repo)
53+
log.info("You can set the %s environment variable to a .git repository of your choice"
54+
" - defaulting to the gitdb repository", cls.k_env_git_repo)
5255
ospd = os.path.dirname
5356
cls.gitrepopath = os.path.join(ospd(ospd(ospd(__file__))), '.git')
5457
# end assure gitrepo is set

0 commit comments

Comments
 (0)