Skip to content

Commit 47e8884

Browse files
committed
fix(pack): restore packers as LazyMixins
Speed-up for non-error commits: + no-leak contextlibing(v.2.0.0, 941b6c7): 5.20 + Prev commit, no LazyMixin(v.2.1.0.dev1): 7.70 + This commit, LazyMixin: 5.50
1 parent 534c9bb commit 47e8884

File tree

1 file changed

+75
-88
lines changed

1 file changed

+75
-88
lines changed

gitdb/pack.py

Lines changed: 75 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def write(self, pack_sha, write):
247247
return sha
248248

249249

250-
class PackIndexFile(object):
250+
class PackIndexFile(LazyMixin):
251251

252252
"""A pack index provides offsets into the corresponding pack, allowing to find
253253
locations for offsets faster."""
@@ -261,15 +261,19 @@ class PackIndexFile(object):
261261
_sha_list_offset = 8 + 1024
262262
index_v2_signature = b'\xfftOc'
263263
index_version_default = 2
264-
_entered = 0
265264

266265
def __init__(self, indexpath):
267266
self._indexpath = indexpath
267+
self._entered = 0
268+
self._cursor = None
268269

269270
def __enter__(self):
270-
if not hasattr(self, '_cursor'):
271-
assert self._entered == 0, (self, self._indexpath)
272-
self._prepare_enter()
271+
if self._entered == 0:
272+
# Note: We don't lock the file when reading as we cannot be sure
273+
# that we can actually write to the location - it could be a read-only
274+
# alternate for instance
275+
assert self._cursor is None, self._cursor
276+
self._cursor = self._make_cursor()
273277
self._entered += 1
274278

275279
return self
@@ -279,20 +283,19 @@ def __exit__(self, exc_type, exc_value, traceback):
279283
assert self._entered >= 0, (self, self._indexpath)
280284
if self._entered == 0:
281285
self._cursor._destroy()
282-
del self._cursor
283-
del self._fanout_table
284-
285-
def _prepare_enter(self):
286-
# Note: We don't lock the file when reading as we cannot be sure
287-
# that we can actually write to the location - it could be a read-only
288-
# alternate for instance
289-
self._cursor = mman.make_cursor(self._indexpath).use_region()
286+
self._cursor = None
287+
288+
def _make_cursor(self):
289+
cursor = mman.make_cursor(self._indexpath).use_region()
290+
290291
# We will assume that the index will always fully fit into memory !
291-
if mman.window_size() > 0 and self._cursor.file_size() > mman.window_size():
292+
if mman.window_size() > 0 and cursor.file_size() > mman.window_size():
292293
raise AssertionError("The index file at %s is too large to fit into a mapped window (%i > %i). This is a limitation of the implementation" % (
293-
self._indexpath, self._cursor.file_size(), mman.window_size()))
294-
# END assert window size
294+
self._indexpath, cursor.file_size(), mman.window_size()))
295295

296+
return cursor
297+
298+
def _set_cache_(self, attr):
296299
# now its time to initialize everything - if we are here, someone wants
297300
# to access the fanout table or related properties
298301

@@ -312,7 +315,24 @@ def _prepare_enter(self):
312315

313316
# INITIALIZE DATA
314317
# byte offset is 8 if version is 2, 0 otherwise
315-
self._initialize()
318+
self._fanout_table = self._read_fanout((self._version == 2) * 8)
319+
320+
if self._version == 2:
321+
self._crc_list_offset = self._sha_list_offset + self.size() * 20
322+
self._pack_offset = self._crc_list_offset + self.size() * 4
323+
self._pack_64_offset = self._pack_offset + self.size() * 4
324+
# END setup base
325+
326+
def _read_fanout(self, byte_offset):
327+
"""Generate a fanout table from our data"""
328+
d = self._cursor.map()
329+
out = []
330+
append = out.append
331+
for i in xrange(256):
332+
append(unpack_from('>L', d, byte_offset + i * 4)[0])
333+
# END for each entry
334+
return out
335+
316336
# END handle attributes
317337

318338
#{ Access V1
@@ -366,30 +386,6 @@ def _crc_v2(self, i):
366386

367387
#} END access V2
368388

369-
#{ Initialization
370-
371-
def _initialize(self):
372-
"""initialize base data"""
373-
self._fanout_table = self._read_fanout((self._version == 2) * 8)
374-
375-
if self._version == 2:
376-
self._crc_list_offset = self._sha_list_offset + self.size() * 20
377-
self._pack_offset = self._crc_list_offset + self.size() * 4
378-
self._pack_64_offset = self._pack_offset + self.size() * 4
379-
# END setup base
380-
381-
def _read_fanout(self, byte_offset):
382-
"""Generate a fanout table from our data"""
383-
d = self._cursor.map()
384-
out = []
385-
append = out.append
386-
for i in xrange(256):
387-
append(unpack_from('>L', d, byte_offset + i * 4)[0])
388-
# END for each entry
389-
return out
390-
391-
#} END initialization
392-
393389
#{ Properties
394390
def version(self):
395391
return self._version
@@ -434,7 +430,7 @@ def sha_to_index(self, sha):
434430
:param sha: 20 byte sha to lookup"""
435431
first_byte = byte_ord(sha[0])
436432
get_sha = self.sha
437-
lo = 0
433+
lo = 0 # lower index, the left bound of the bisection
438434
if first_byte != 0:
439435
lo = self._fanout_table[first_byte - 1]
440436
hi = self._fanout_table[first_byte] # the upper, right bound of the bisection
@@ -514,7 +510,7 @@ def sha_to_index(self, sha):
514510
#} END properties
515511

516512

517-
class PackFile(object):
513+
class PackFile(LazyMixin):
518514

519515
"""A pack is a file written according to the Version 2 for git packs
520516
@@ -543,32 +539,30 @@ class PackFile(object):
543539
def __init__(self, packpath):
544540
self._packpath = packpath
545541
self._entered = 0
542+
self._cursor = None
546543

547544
def __enter__(self):
548-
if not hasattr(self, '_cursor'):
549-
assert self._entered == 0, (self, self._packpath)
550-
self._prepare_enter()
545+
if self._entered == 0:
546+
assert self._cursor is None, self._cursor
547+
self._cursor = mman.make_cursor(self._packpath).use_region()
551548
self._entered += 1
552549

553550
return self
554551

555552
def __exit__(self, exc_type, exc_value, traceback):
556553
self._entered -= 1
557-
assert self._entered >= 0, (self, self._indexpath)
554+
assert self._entered >= 0, (self, self._packpath)
558555
if self._entered == 0:
559556
self._cursor._destroy()
560-
del self._cursor
561-
562-
def _prepare_enter(self):
563-
# we fill the whole cache, whichever attribute gets queried first
564-
self._cursor = mman.make_cursor(self._packpath).use_region()
557+
self._cursor = None
565558

566-
# read the header information
559+
def _set_cache_(self, attr):
560+
# Fill cache by reading the header information.
567561
type_id, self._version, self._size = unpack_from(">LLL", self._cursor.map(), 0)
568562

569563
# TODO: figure out whether we should better keep the lock, or maybe
570564
# add a .keep file instead ?
571-
if type_id != self.pack_signature:
565+
if type_id != PackFile.pack_signature:
572566
raise ParseError("Invalid pack signature: %i" % type_id)
573567

574568
def _iter_objects(self, start_offset, as_stream=True):
@@ -681,12 +675,12 @@ def stream_iter(self, start_offset=0):
681675
#} END Read-Database like Interface
682676

683677

684-
class PackEntity(object):
678+
class PackEntity(LazyMixin):
685679

686680
"""Combines the PackIndexFile and the PackFile into one, allowing the
687681
actual objects to be resolved and iterated"""
688682

689-
__slots__ = ('_basename',
683+
__slots__ = ('_basename', # Could have been int, but better limit scurpulus nesting.
690684
'_index', # our index file
691685
'_pack', # our pack file
692686
'_offset_map', # on demand dict mapping one offset to the next consecutive one
@@ -713,41 +707,34 @@ def __enter__(self):
713707
return self
714708

715709
def __exit__(self, exc_type, exc_value, traceback):
716-
if self._index:
717-
self._index.__exit__(exc_type, exc_value, traceback)
718-
if self._pack:
719-
self._pack.__exit__(exc_type, exc_value, traceback)
710+
self._index.__exit__(exc_type, exc_value, traceback)
711+
self._pack.__exit__(exc_type, exc_value, traceback)
720712
self._entered = False
721713

722-
@property
723-
def offset_map(self):
724-
if not hasattr(self, '_offset_map'):
725-
# currently this can only be _offset_map
726-
# TODO: make this a simple sorted offset array which can be bisected
727-
# to find the respective entry, from which we can take a +1 easily
728-
# This might be slower, but should also be much lighter in memory !
729-
offsets_sorted = sorted(self._index.offsets())
730-
last_offset = len(self._pack.data()) - self._pack.footer_size
731-
assert offsets_sorted, "Cannot handle empty indices"
732-
733-
offset_map = None
734-
if len(offsets_sorted) == 1:
735-
offset_map = {offsets_sorted[0]: last_offset}
736-
else:
737-
iter_offsets = iter(offsets_sorted)
738-
iter_offsets_plus_one = iter(offsets_sorted)
739-
next(iter_offsets_plus_one)
740-
consecutive = izip(iter_offsets, iter_offsets_plus_one)
741-
742-
offset_map = dict(consecutive)
743-
744-
# the last offset is not yet set
745-
offset_map[offsets_sorted[-1]] = last_offset
746-
# END handle offset amount
714+
def _set_cache_(self, attr):
715+
# currently this can only be _offset_map
716+
# TODO: make this a simple sorted offset array which can be bisected
717+
# to find the respective entry, from which we can take a +1 easily
718+
# This might be slower, but should also be much lighter in memory !
719+
offsets_sorted = sorted(self._index.offsets())
720+
last_offset = len(self._pack.data()) - self._pack.footer_size
721+
assert offsets_sorted, "Cannot handle empty indices"
722+
723+
offset_map = None
724+
if len(offsets_sorted) == 1:
725+
offset_map = {offsets_sorted[0]: last_offset}
726+
else:
727+
iter_offsets = iter(offsets_sorted)
728+
iter_offsets_plus_one = iter(offsets_sorted)
729+
next(iter_offsets_plus_one)
730+
consecutive = izip(iter_offsets, iter_offsets_plus_one)
747731

748-
self._offset_map = offset_map
732+
offset_map = dict(consecutive)
749733

750-
return self._offset_map
734+
# the last offset is not yet set
735+
offset_map[offsets_sorted[-1]] = last_offset
736+
# END handle offset amount
737+
self._offset_map = offset_map
751738

752739
def _sha_to_index(self, sha):
753740
""":return: index for the given sha, or raise"""
@@ -870,7 +857,7 @@ def is_valid_stream(self, sha, use_crc=False):
870857

871858
index = self._sha_to_index(sha)
872859
offset = self._index.offset(index)
873-
next_offset = self.offset_map[offset]
860+
next_offset = self._offset_map[offset]
874861
crc_value = self._index.crc(index)
875862

876863
# create the current crc value, on the compressed object data

0 commit comments

Comments
 (0)