Skip to content

Commit 9e313a4

Browse files
committed
gitdb now uses 20 byte shas internally only, reducing the need to convert shas around all the time, saving previous function calls, and memory after all
1 parent e3d5ad1 commit 9e313a4

13 files changed

+85
-93
lines changed

base.py

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""Module with basic data structures - they are designed to be lightweight and fast"""
22
from util import (
3-
to_hex_sha,
4-
to_bin_sha,
3+
bin_to_hex,
54
zlib
65
)
76

@@ -17,13 +16,13 @@
1716
#{ ODB Bases
1817

1918
class OInfo(tuple):
20-
"""Carries information about an object in an ODB, provdiing information
21-
about the sha of the object, the type_string as well as the uncompressed size
19+
"""Carries information about an object in an ODB, provding information
20+
about the binary sha of the object, the type_string as well as the uncompressed size
2221
in bytes.
2322
2423
It can be accessed using tuple notation and using attribute access notation::
2524
26-
assert dbi[0] == dbi.sha
25+
assert dbi[0] == dbi.binsha
2726
assert dbi[1] == dbi.type
2827
assert dbi[2] == dbi.size
2928
@@ -38,18 +37,14 @@ def __init__(self, *args):
3837

3938
#{ Interface
4039
@property
41-
def sha(self):
40+
def binsha(self):
41+
""":return: our sha as binary, 20 bytes"""
4242
return self[0]
43-
43+
4444
@property
4545
def hexsha(self):
4646
""":return: our sha, hex encoded, 40 bytes"""
47-
return to_hex_sha(self[0])
48-
49-
@property
50-
def binsha(self):
51-
""":return: our sha as binary, 20 bytes"""
52-
return to_bin_sha(self[0])
47+
return bin_to_hex(self[0])
5348

5449
@property
5550
def type(self):
@@ -197,16 +192,10 @@ def __init__(self, type, size, stream, sha=None):
197192
list.__init__(self, (sha, type, size, stream, None))
198193

199194
#{ Interface
200-
201195
@property
202196
def hexsha(self):
203197
""":return: our sha, hex encoded, 40 bytes"""
204-
return to_hex_sha(self[0])
205-
206-
@property
207-
def binsha(self):
208-
""":return: our sha as binary, 20 bytes"""
209-
return to_bin_sha(self[0])
198+
return bin_to_hex(self[0])
210199

211200
def _error(self):
212201
""":return: the error that occurred when processing the stream, or None"""
@@ -231,13 +220,13 @@ def read(self, size=-1):
231220

232221
#{ interface
233222

234-
def _set_sha(self, sha):
235-
self[0] = sha
223+
def _set_binsha(self, binsha):
224+
self[0] = binsha
236225

237-
def _sha(self):
226+
def _binsha(self):
238227
return self[0]
239228

240-
sha = property(_sha, _set_sha)
229+
binsha = property(_binsha, _set_binsha)
241230

242231

243232
def _type(self):
@@ -280,9 +269,13 @@ def __init__(self, sha, exc):
280269
tuple.__init__(self, (sha, exc))
281270

282271
@property
283-
def sha(self):
272+
def binsha(self):
284273
return self[0]
285274

275+
@property
276+
def hexsha(self):
277+
return bin_to_hex(self[0])
278+
286279
@property
287280
def error(self):
288281
""":return: exception instance explaining the failure"""

db/base.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22
from gitdb.util import (
33
pool,
44
join,
5-
LazyMixin,
6-
to_bin_sha
5+
LazyMixin
76
)
87

98
from gitdb.exc import BadObject
@@ -20,31 +19,30 @@
2019

2120
class ObjectDBR(object):
2221
"""Defines an interface for object database lookup.
23-
Objects are identified either by hex-sha (40 bytes) or
24-
by sha (20 bytes)"""
22+
Objects are identified either by their 20 byte bin sha"""
2523

2624
def __contains__(self, sha):
2725
return self.has_obj
2826

2927
#{ Query Interface
3028
def has_object(self, sha):
3129
"""
32-
:return: True if the object identified by the given 40 byte hexsha or 20 bytes
30+
:return: True if the object identified by the given 20 bytes
3331
binary sha is contained in the database"""
3432
raise NotImplementedError("To be implemented in subclass")
3533

3634
def has_object_async(self, reader):
3735
"""Return a reader yielding information about the membership of objects
3836
as identified by shas
39-
:param reader: Reader yielding 20 byte or 40 byte shas.
37+
:param reader: Reader yielding 20 byte shas.
4038
:return: async.Reader yielding tuples of (sha, bool) pairs which indicate
4139
whether the given sha exists in the database or not"""
4240
task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha)))
4341
return pool.add_task(task)
4442

4543
def info(self, sha):
4644
""" :return: OInfo instance
47-
:param sha: 40 bytes hexsha or 20 bytes binary sha
45+
:param sha: bytes binary sha
4846
:raise BadObject:"""
4947
raise NotImplementedError("To be implemented in subclass")
5048

@@ -57,7 +55,7 @@ def info_async(self, reader):
5755

5856
def stream(self, sha):
5957
""":return: OStream instance
60-
:param sha: 40 bytes hexsha or 20 bytes binary sha
58+
:param sha: 20 bytes binary sha
6159
:raise BadObject:"""
6260
raise NotImplementedError("To be implemented in subclass")
6361

@@ -192,11 +190,10 @@ def _set_cache_(self, attr):
192190
super(CompoundDB, self)._set_cache_(attr)
193191

194192
def _db_query(self, sha):
195-
""":return: database containing the given 20 or 40 byte sha
193+
""":return: database containing the given 20 byte sha
196194
:raise BadObject:"""
197195
# most databases use binary representations, prevent converting
198196
# it everytime a database is being queried
199-
sha = to_bin_sha(sha)
200197
try:
201198
return self._db_cache[sha]
202199
except KeyError:

db/loose.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525
from gitdb.util import (
2626
file_contents_ro_filepath,
2727
ENOENT,
28-
to_hex_sha,
2928
hex_to_bin,
29+
bin_to_hex,
3030
exists,
3131
chmod,
3232
isdir,
@@ -100,7 +100,7 @@ def _map_loose_object(self, sha):
100100
"""
101101
:return: memory map of that file to allow random read access
102102
:raise BadObject: if object could not be located"""
103-
db_path = self.db_path(self.object_path(to_hex_sha(sha)))
103+
db_path = self.db_path(self.object_path(bin_to_hex(sha)))
104104
try:
105105
return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
106106
except OSError,e:
@@ -109,11 +109,11 @@ def _map_loose_object(self, sha):
109109
try:
110110
return file_contents_ro_filepath(db_path)
111111
except OSError:
112-
raise BadObject(to_hex_sha(sha))
112+
raise BadObject(sha)
113113
# didn't work because of our flag, don't try it again
114114
self._fd_open_flags = 0
115115
else:
116-
raise BadObject(to_hex_sha(sha))
116+
raise BadObject(sha)
117117
# END handle error
118118
# END exception handling
119119
try:
@@ -144,7 +144,7 @@ def stream(self, sha):
144144

145145
def has_object(self, sha):
146146
try:
147-
self.readable_db_object_path(to_hex_sha(sha))
147+
self.readable_db_object_path(bin_to_hex(sha))
148148
return True
149149
except BadObject:
150150
return False
@@ -158,7 +158,7 @@ def store(self, istream):
158158
# open a tmp file to write the data to
159159
fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
160160

161-
if istream.sha is None:
161+
if istream.binsha is None:
162162
writer = FDCompressedSha1Writer(fd)
163163
else:
164164
writer = FDStream(fd)
@@ -167,7 +167,7 @@ def store(self, istream):
167167

168168
try:
169169
try:
170-
if istream.sha is not None:
170+
if istream.binsha is not None:
171171
# copy as much as possible, the actual uncompressed item size might
172172
# be smaller than the compressed version
173173
stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size)
@@ -187,7 +187,7 @@ def store(self, istream):
187187
# END assure tmpfile removal on error
188188

189189
hexsha = None
190-
if istream.sha:
190+
if istream.binsha:
191191
hexsha = istream.hexsha
192192
else:
193193
hexsha = writer.sha(as_hex=True)
@@ -206,7 +206,7 @@ def store(self, istream):
206206
chmod(obj_path, 0444)
207207
# END handle dry_run
208208

209-
istream.sha = hexsha
209+
istream.binsha = hex_to_bin(hexsha)
210210
return istream
211211

212212
def sha_iter(self):

db/mem.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
IStream,
1111
)
1212

13-
from gitdb.util import to_bin_sha
1413
from gitdb.exc import (
1514
BadObject,
1615
UnsupportedOperation
@@ -54,22 +53,21 @@ def store(self, istream):
5453
# don't provide a size, the stream is written in object format, hence the
5554
# header needs decompression
5655
decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False)
57-
self._cache[istream.binsha] = OStream(istream.sha, istream.type, istream.size, decomp_stream)
56+
self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream)
5857

5958
return istream
6059

6160
def store_async(self, reader):
6261
raise UnsupportedOperation("MemoryDBs cannot currently be used for async write access")
6362

6463
def has_object(self, sha):
65-
return to_bin_sha(sha) in self._cache
64+
return sha in self._cache
6665

6766
def info(self, sha):
6867
# we always return streams, which are infos as well
6968
return self.stream(sha)
7069

7170
def stream(self, sha):
72-
sha = to_bin_sha(sha)
7371
try:
7472
ostream = self._cache[sha]
7573
# rewind stream for the next one to read

db/pack.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,7 @@
55
CachingDB
66
)
77

8-
from gitdb.util import (
9-
to_bin_sha,
10-
LazyMixin
11-
)
8+
from gitdb.util import LazyMixin
129

1310
from gitdb.exc import (
1411
BadObject,
@@ -65,7 +62,6 @@ def _pack_info(self, sha):
6562
self._sort_entities()
6663
# END update sorting
6764

68-
sha = to_bin_sha(sha)
6965
for item in self._entities:
7066
index = item[2](sha)
7167
if index is not None:

test/db/lib.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ def _assert_object_writing_simple(self, db):
4141
istream = IStream(str_blob_type, len(data), StringIO(data))
4242
new_istream = db.store(istream)
4343
assert new_istream is istream
44-
assert db.has_object(istream.sha)
44+
assert db.has_object(istream.binsha)
4545

46-
info = db.info(istream.sha)
46+
info = db.info(istream.binsha)
4747
assert isinstance(info, OInfo)
4848
assert info.type == istream.type and info.size == istream.size
4949

50-
stream = db.stream(istream.sha)
50+
stream = db.stream(istream.binsha)
5151
assert isinstance(stream, OStream)
52-
assert stream.sha == info.sha and stream.type == info.type
52+
assert stream.binsha == info.binsha and stream.type == info.type
5353
assert stream.read() == data
5454
# END for each item
5555

@@ -80,10 +80,10 @@ def _assert_object_writing(self, db):
8080

8181
# store returns same istream instance, with new sha set
8282
my_istream = db.store(istream)
83-
sha = istream.sha
83+
sha = istream.binsha
8484
assert my_istream is istream
8585
assert db.has_object(sha) != dry_run
86-
assert len(sha) == 40 # for now we require 40 byte shas as default
86+
assert len(sha) == 20
8787

8888
# verify data - the slow way, we want to run code
8989
if not dry_run:
@@ -107,12 +107,12 @@ def _assert_object_writing(self, db):
107107
# identical to what we fed in
108108
ostream.seek(0)
109109
istream.stream = ostream
110-
assert istream.sha is not None
111-
prev_sha = istream.sha
110+
assert istream.binsha is not None
111+
prev_sha = istream.binsha
112112

113113
db.set_ostream(ZippedStoreShaWriter())
114114
db.store(istream)
115-
assert istream.sha == prev_sha
115+
assert istream.binsha == prev_sha
116116
new_ostream = db.ostream()
117117

118118
# note: only works as long our store write uses the same compression
@@ -143,12 +143,12 @@ def istream_generator(offset=0, ni=ni):
143143

144144
for stream in istreams:
145145
assert stream.error is None
146-
assert len(stream.sha) == 40
146+
assert len(stream.binsha) == 20
147147
assert isinstance(stream, IStream)
148148
# END assert each stream
149149

150150
# test has-object-async - we must have all previously added ones
151-
reader = IteratorReader( istream.sha for istream in istreams )
151+
reader = IteratorReader( istream.binsha for istream in istreams )
152152
hasobject_reader = db.has_object_async(reader)
153153
count = 0
154154
for sha, has_object in hasobject_reader:
@@ -158,7 +158,7 @@ def istream_generator(offset=0, ni=ni):
158158
assert count == ni
159159

160160
# read the objects we have just written
161-
reader = IteratorReader( istream.sha for istream in istreams )
161+
reader = IteratorReader( istream.binsha for istream in istreams )
162162
ostream_reader = db.stream_async(reader)
163163

164164
# read items individually to prevent hitting possible sys-limits
@@ -171,7 +171,7 @@ def istream_generator(offset=0, ni=ni):
171171
assert count == ni
172172

173173
# get info about our items
174-
reader = IteratorReader( istream.sha for istream in istreams )
174+
reader = IteratorReader( istream.binsha for istream in istreams )
175175
info_reader = db.info_async(reader)
176176

177177
count = 0
@@ -186,7 +186,7 @@ def istream_generator(offset=0, ni=ni):
186186
# add 2500 items, and obtain their output streams
187187
nni = 2500
188188
reader = IteratorReader(istream_generator(offset=ni, ni=nni))
189-
istream_to_sha = lambda istreams: [ istream.sha for istream in istreams ]
189+
istream_to_sha = lambda istreams: [ istream.binsha for istream in istreams ]
190190

191191
istream_reader = db.store_async(reader)
192192
istream_reader.set_post_cb(istream_to_sha)

0 commit comments

Comments
 (0)