Skip to content
This repository was archived by the owner on Apr 14, 2024. It is now read-only.

Commit ac7d475

Browse files
committed
Fixed bug caused by incorrect comparison of 'filled-up' binary shas, as it would compare even the filling, instead of taking the canonical length into account and hence ignore the last 4 bytes if the original hex sha had an odd length
1 parent e9e0496 commit ac7d475

File tree

6 files changed

+49
-21
lines changed

6 files changed

+49
-21
lines changed

db/base.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,8 @@ def partial_to_complete_sha_hex(self, partial_hexsha):
287287
databases = list()
288288
_databases_recursive(self, databases)
289289

290-
if len(partial_hexsha) % 2 != 0:
290+
len_partial_hexsha = len(partial_hexsha)
291+
if len_partial_hexsha % 2 != 0:
291292
partial_binsha = hex_to_bin(partial_hexsha + "0")
292293
else:
293294
partial_binsha = hex_to_bin(partial_hexsha)
@@ -300,7 +301,7 @@ def partial_to_complete_sha_hex(self, partial_hexsha):
300301
if hasattr(db, 'partial_to_complete_sha_hex'):
301302
full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
302303
else:
303-
full_bin_sha = db.partial_to_complete_sha(partial_binsha)
304+
full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
304305
# END handle database type
305306
except BadObject:
306307
continue

db/pack.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -177,13 +177,17 @@ def entities(self):
177177
""":return: list of pack entities operated upon by this database"""
178178
return [ item[1] for item in self._entities ]
179179

180-
def partial_to_complete_sha(self, partial_binsha):
180+
def partial_to_complete_sha(self, partial_binsha, canonical_length):
181181
""":return: 20 byte sha as inferred by the given partial binary sha
182+
:param partial_binsha: binary sha with less than 20 bytes
183+
:param canonical_length: length of the corresponding canonical representation.
184+
It is required as binary sha's cannot display whether the original hex sha
185+
had an odd or even number of characters
182186
:raise AmbiguousObjectName:
183187
:raise BadObject: """
184188
candidate = None
185189
for item in self._entities:
186-
item_index = item[1].index().partial_sha_to_index(partial_binsha)
190+
item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
187191
if item_index is not None:
188192
sha = item[1].index().sha(item_index)
189193
if candidate and candidate != sha:

fun.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@
4040
chunk_size = 1000*mmap.PAGESIZE
4141

4242
__all__ = ('is_loose_object', 'loose_object_header_info', 'msb_size', 'pack_object_header_info',
43-
'write_object', 'loose_object_header', 'stream_copy', 'apply_delta_data' )
43+
'write_object', 'loose_object_header', 'stream_copy', 'apply_delta_data',
44+
'is_equal_canonical_sha' )
4445

4546
#{ Routines
4647

@@ -223,5 +224,23 @@ def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, target_fi
223224
# yes, lets use the exact same error message that git uses :)
224225
assert i == delta_buf_size, "delta replay has gone wild"
225226

227+
228+
def is_equal_canonical_sha(canonical_length, match, sha1):
229+
"""
230+
:return: True if the given lhs and rhs 20 byte binary shas
231+
The comparison will take the canonical_length of the match sha into account,
232+
hence the comparison will only use the last 4 bytes for uneven canonical representations
233+
:param match: less than 20 byte sha
234+
:param sha1: 20 byte sha"""
235+
binary_length = canonical_length/2
236+
if match[:binary_length] != sha1[:binary_length]:
237+
return False
238+
239+
if canonical_length - binary_length and \
240+
(ord(match[-1]) ^ ord(sha1[len(match)-1])) & 0xf0:
241+
return False
242+
# END handle uneven canonnical length
243+
return True
244+
226245
#} END routines
227246

pack.py

+16-12
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from fun import (
1414
pack_object_header_info,
15+
is_equal_canonical_sha,
1516
type_id_to_type_map,
1617
write_object,
1718
stream_copy,
@@ -303,24 +304,26 @@ def sha_to_index(self, sha):
303304
# END bisect
304305
return None
305306

306-
def partial_sha_to_index(self, partial_sha):
307-
""":return: index as in `sha_to_index` or None if the sha was not found in this
308-
index file
309-
:param partial_sha: an at least two bytes of a partial sha
307+
def partial_sha_to_index(self, partial_bin_sha, canonical_length):
308+
"""
309+
:return: index as in `sha_to_index` or None if the sha was not found in this
310+
index file
311+
:param partial_bin_sha: an at least two bytes of a partial binary sha
312+
:param canonical_length: lenght of the original hexadecimal representation of the
313+
given partial binary sha
310314
:raise AmbiguousObjectName:"""
311-
if len(partial_sha) < 2:
315+
if len(partial_bin_sha) < 2:
312316
raise ValueError("Require at least 2 bytes of partial sha")
313317

314-
first_byte = ord(partial_sha[0])
318+
first_byte = ord(partial_bin_sha[0])
315319
get_sha = self.sha
316320
lo = 0 # lower index, the left bound of the bisection
317321
if first_byte != 0:
318322
lo = self._fanout_table[first_byte-1]
319323
hi = self._fanout_table[first_byte] # the upper, right bound of the bisection
320324

321-
len_partial = len(partial_sha)
322325
# fill the partial to full 20 bytes
323-
filled_sha = partial_sha + '\0'*(20 - len_partial)
326+
filled_sha = partial_bin_sha + '\0'*(20 - len(partial_bin_sha))
324327

325328
# find lowest
326329
while lo < hi:
@@ -336,14 +339,15 @@ def partial_sha_to_index(self, partial_sha):
336339
lo = mid + 1
337340
# END handle midpoint
338341
# END bisect
339-
if lo < self.size:
342+
343+
if lo < self.size():
340344
cur_sha = get_sha(lo)
341-
if cur_sha[:len_partial] == partial_sha:
345+
if is_equal_canonical_sha(canonical_length, partial_bin_sha, cur_sha):
342346
next_sha = None
343-
if lo+1 < self.size:
347+
if lo+1 < self.size():
344348
next_sha = get_sha(lo+1)
345349
if next_sha and next_sha == cur_sha:
346-
raise AmbiguousObjectName(partial_sha)
350+
raise AmbiguousObjectName(partial_bin_sha)
347351
return lo
348352
# END if we have a match
349353
# END if we found something

test/db/test_pack.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def test_writing(self, path):
5555
for i, sha in enumerate(sha_list):
5656
short_sha = sha[:max((i % max_bytes), min_bytes)]
5757
try:
58-
assert pdb.partial_to_complete_sha(short_sha) == sha
58+
assert pdb.partial_to_complete_sha(short_sha, len(short_sha)*2) == sha
5959
except AmbiguousObjectName:
6060
num_ambiguous += 1
6161
pass # valid, we can have short objects
@@ -67,4 +67,4 @@ def test_writing(self, path):
6767
# assert num_ambiguous
6868

6969
# non-existing
70-
self.failUnlessRaises(BadObject, pdb.partial_to_complete_sha, "\0\0")
70+
self.failUnlessRaises(BadObject, pdb.partial_to_complete_sha, "\0\0", 4)

test/test_pack.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,10 @@ def _assert_index_file(self, index, version, size):
6060

6161
# verify partial sha
6262
for l in (4,8,11,17,20):
63-
assert index.partial_sha_to_index(sha[:l]) == oidx
63+
assert index.partial_sha_to_index(sha[:l], l*2) == oidx
6464

6565
# END for each object index in indexfile
66-
self.failUnlessRaises(ValueError, index.partial_sha_to_index, "\0")
66+
self.failUnlessRaises(ValueError, index.partial_sha_to_index, "\0", 2)
6767

6868

6969
def _assert_pack_file(self, pack, version, size):

0 commit comments

Comments
 (0)