fix(pack): restore packers as LazyMixins

ankostis · ankostis · commit 47e88843942c · 2016-10-24T22:48:36.000+02:00
Speed-up for non-error commits: + no-leak contextlibing(v.2.0.0, 941b6c7): 5.20 + Prev commit, no LazyMixin(v.2.1.0.dev1): 7.70 + This commit, LazyMixin: 5.50
diff --git a/gitdb/pack.py b/gitdb/pack.py
@@ -247,7 +247,7 @@ def write(self, pack_sha, write):
         return sha
 
 
-class PackIndexFile(object):
+class PackIndexFile(LazyMixin):
 
     """A pack index provides offsets into the corresponding pack, allowing to find
     locations for offsets faster."""
@@ -261,15 +261,19 @@ class PackIndexFile(object):
     _sha_list_offset = 8 + 1024
     index_v2_signature = b'\xfftOc'
     index_version_default = 2
-    _entered = 0
 
     def __init__(self, indexpath):
         self._indexpath = indexpath
+        self._entered = 0
+        self._cursor = None
 
     def __enter__(self):
-        if not hasattr(self, '_cursor'):
-            assert self._entered == 0, (self, self._indexpath)
-            self._prepare_enter()
+        if self._entered == 0:
+            # Note: We don't lock the file when reading as we cannot be sure
+            # that we can actually write to the location - it could be a read-only
+            # alternate for instance
+            assert self._cursor is None, self._cursor
+            self._cursor = self._make_cursor()
         self._entered += 1
 
         return self
@@ -279,20 +283,19 @@ def __exit__(self, exc_type, exc_value, traceback):
         assert self._entered >= 0, (self, self._indexpath)
         if self._entered == 0:
             self._cursor._destroy()
-            del self._cursor
-            del self._fanout_table
-
-    def _prepare_enter(self):
-        # Note: We don't lock the file when reading as we cannot be sure
-        # that we can actually write to the location - it could be a read-only
-        # alternate for instance
-        self._cursor = mman.make_cursor(self._indexpath).use_region()
+            self._cursor = None
+
+    def _make_cursor(self):
+        cursor = mman.make_cursor(self._indexpath).use_region()
+
         # We will assume that the index will always fully fit into memory !
-        if mman.window_size() > 0 and self._cursor.file_size() > mman.window_size():
+        if mman.window_size() > 0 and cursor.file_size() > mman.window_size():
             raise AssertionError("The index file at %s is too large to fit into a mapped window (%i > %i). This is a limitation of the implementation" % (
-                self._indexpath, self._cursor.file_size(), mman.window_size()))
-        # END assert window size
+                self._indexpath, cursor.file_size(), mman.window_size()))
 
+        return cursor
+
+    def _set_cache_(self, attr):
         # now its time to initialize everything - if we are here, someone wants
         # to access the fanout table or related properties
 
@@ -312,7 +315,24 @@ def _prepare_enter(self):
 
         # INITIALIZE DATA
         # byte offset is 8 if version is 2, 0 otherwise
-        self._initialize()
+        self._fanout_table = self._read_fanout((self._version == 2) * 8)
+
+        if self._version == 2:
+            self._crc_list_offset = self._sha_list_offset + self.size() * 20
+            self._pack_offset = self._crc_list_offset + self.size() * 4
+            self._pack_64_offset = self._pack_offset + self.size() * 4
+        # END setup base
+
+    def _read_fanout(self, byte_offset):
+        """Generate a fanout table from our data"""
+        d = self._cursor.map()
+        out = []
+        append = out.append
+        for i in xrange(256):
+            append(unpack_from('>L', d, byte_offset + i * 4)[0])
+        # END for each entry
+        return out
+
         # END handle attributes
 
     #{ Access V1
@@ -366,30 +386,6 @@ def _crc_v2(self, i):
 
     #} END access V2
 
-    #{ Initialization
-
-    def _initialize(self):
-        """initialize base data"""
-        self._fanout_table = self._read_fanout((self._version == 2) * 8)
-
-        if self._version == 2:
-            self._crc_list_offset = self._sha_list_offset + self.size() * 20
-            self._pack_offset = self._crc_list_offset + self.size() * 4
-            self._pack_64_offset = self._pack_offset + self.size() * 4
-        # END setup base
-
-    def _read_fanout(self, byte_offset):
-        """Generate a fanout table from our data"""
-        d = self._cursor.map()
-        out = []
-        append = out.append
-        for i in xrange(256):
-            append(unpack_from('>L', d, byte_offset + i * 4)[0])
-        # END for each entry
-        return out
-
-    #} END initialization
-
     #{ Properties
     def version(self):
         return self._version
@@ -434,7 +430,7 @@ def sha_to_index(self, sha):
         :param sha: 20 byte sha to lookup"""
         first_byte = byte_ord(sha[0])
         get_sha = self.sha
-        lo = 0
+        lo = 0  # lower index, the left bound of the bisection
         if first_byte != 0:
             lo = self._fanout_table[first_byte - 1]
         hi = self._fanout_table[first_byte]     # the upper, right bound of the bisection
@@ -514,7 +510,7 @@ def sha_to_index(self, sha):
     #} END properties
 
 
-class PackFile(object):
+class PackFile(LazyMixin):
 
     """A pack is a file written according to the Version 2 for git packs
 
@@ -543,32 +539,30 @@ class PackFile(object):
     def __init__(self, packpath):
         self._packpath = packpath
         self._entered = 0
+        self._cursor = None
 
     def __enter__(self):
-        if not hasattr(self, '_cursor'):
-            assert self._entered == 0, (self, self._packpath)
-            self._prepare_enter()
+        if self._entered == 0:
+            assert self._cursor is None, self._cursor
+            self._cursor = mman.make_cursor(self._packpath).use_region()
         self._entered += 1
 
         return self
 
     def __exit__(self, exc_type, exc_value, traceback):
         self._entered -= 1
-        assert self._entered >= 0, (self, self._indexpath)
+        assert self._entered >= 0, (self, self._packpath)
         if self._entered == 0:
             self._cursor._destroy()
-            del self._cursor
-
-    def _prepare_enter(self):
-        # we fill the whole cache, whichever attribute gets queried first
-        self._cursor = mman.make_cursor(self._packpath).use_region()
+            self._cursor = None
 
-        # read the header information
+    def _set_cache_(self, attr):
+        # Fill cache by reading the header information.
         type_id, self._version, self._size = unpack_from(">LLL", self._cursor.map(), 0)
 
         # TODO: figure out whether we should better keep the lock, or maybe
         # add a .keep file instead ?
-        if type_id != self.pack_signature:
+        if type_id != PackFile.pack_signature:
             raise ParseError("Invalid pack signature: %i" % type_id)
 
     def _iter_objects(self, start_offset, as_stream=True):
@@ -681,12 +675,12 @@ def stream_iter(self, start_offset=0):
     #} END Read-Database like Interface
 
 
-class PackEntity(object):
+class PackEntity(LazyMixin):
 
     """Combines the PackIndexFile and the PackFile into one, allowing the
     actual objects to be resolved and iterated"""
 
-    __slots__ = ('_basename',
+    __slots__ = ('_basename',        # Could have been int, but better limit scurpulus nesting.
                  '_index',           # our index file
                  '_pack',            # our pack file
                  '_offset_map',      # on demand dict mapping one offset to the next consecutive one
@@ -713,41 +707,34 @@ def __enter__(self):
         return self
 
     def __exit__(self, exc_type, exc_value, traceback):
-        if self._index:
-            self._index.__exit__(exc_type, exc_value, traceback)
-        if self._pack:
-            self._pack.__exit__(exc_type, exc_value, traceback)
+        self._index.__exit__(exc_type, exc_value, traceback)
+        self._pack.__exit__(exc_type, exc_value, traceback)
         self._entered = False
 
-    @property
-    def offset_map(self):
-        if not hasattr(self, '_offset_map'):
-            # currently this can only be _offset_map
-            # TODO: make this a simple sorted offset array which can be bisected
-            # to find the respective entry, from which we can take a +1 easily
-            # This might be slower, but should also be much lighter in memory !
-            offsets_sorted = sorted(self._index.offsets())
-            last_offset = len(self._pack.data()) - self._pack.footer_size
-            assert offsets_sorted, "Cannot handle empty indices"
-
-            offset_map = None
-            if len(offsets_sorted) == 1:
-                offset_map = {offsets_sorted[0]: last_offset}
-            else:
-                iter_offsets = iter(offsets_sorted)
-                iter_offsets_plus_one = iter(offsets_sorted)
-                next(iter_offsets_plus_one)
-                consecutive = izip(iter_offsets, iter_offsets_plus_one)
-
-                offset_map = dict(consecutive)
-
-                # the last offset is not yet set
-                offset_map[offsets_sorted[-1]] = last_offset
-            # END handle offset amount
+    def _set_cache_(self, attr):
+        # currently this can only be _offset_map
+        # TODO: make this a simple sorted offset array which can be bisected
+        # to find the respective entry, from which we can take a +1 easily
+        # This might be slower, but should also be much lighter in memory !
+        offsets_sorted = sorted(self._index.offsets())
+        last_offset = len(self._pack.data()) - self._pack.footer_size
+        assert offsets_sorted, "Cannot handle empty indices"
+
+        offset_map = None
+        if len(offsets_sorted) == 1:
+            offset_map = {offsets_sorted[0]: last_offset}
+        else:
+            iter_offsets = iter(offsets_sorted)
+            iter_offsets_plus_one = iter(offsets_sorted)
+            next(iter_offsets_plus_one)
+            consecutive = izip(iter_offsets, iter_offsets_plus_one)
 
-            self._offset_map = offset_map
+            offset_map = dict(consecutive)
 
-        return self._offset_map
+            # the last offset is not yet set
+            offset_map[offsets_sorted[-1]] = last_offset
+        # END handle offset amount
+        self._offset_map = offset_map
 
     def _sha_to_index(self, sha):
         """:return: index for the given sha, or raise"""
@@ -870,7 +857,7 @@ def is_valid_stream(self, sha, use_crc=False):
 
             index = self._sha_to_index(sha)
             offset = self._index.offset(index)
-            next_offset = self.offset_map[offset]
+            next_offset = self._offset_map[offset]
             crc_value = self._index.crc(index)
 
             # create the current crc value, on the compressed object data