Skip to content
This repository was archived by the owner on Apr 14, 2024. It is now read-only.

Commit e750ce0

Browse files
committed
Added method to obtain a full sha from a partial sha, for each of the databases. The IndexFile has a new method to retrieve an index from a partial sha, instead of from a full sha, to provide the required functionality
1 parent 46bf471 commit e750ce0

File tree

9 files changed

+211
-7
lines changed

9 files changed

+211
-7
lines changed

db/git.py

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,33 @@
99
from ref import ReferenceDB
1010

1111
from gitdb.util import LazyMixin
12-
from gitdb.exc import InvalidDBRoot
12+
from gitdb.exc import (
13+
InvalidDBRoot,
14+
BadObject,
15+
AmbiguousObjectName
16+
)
1317
import os
1418

19+
from gitdb.util import hex_to_bin
20+
1521
__all__ = ('GitDB', )
1622

23+
24+
def _databases_recursive(database, output):
25+
"""Fill output list with database from db, in order. Deals with Loose, Packed
26+
and compound databases."""
27+
if isinstance(database, CompoundDB):
28+
compounds = list()
29+
dbs = database.databases()
30+
output.extend(db for db in dbs if not isinstance(db, CompoundDB))
31+
for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
32+
_databases_recursive(cdb, output)
33+
else:
34+
output.append(database)
35+
# END handle database type
36+
37+
38+
1739
class GitDB(FileDBBase, ObjectDBW, CompoundDB):
1840
"""A git-style object database, which contains all objects in the 'objects'
1941
subdirectory"""
@@ -71,4 +93,45 @@ def ostream(self):
7193

7294
def set_ostream(self, ostream):
7395
return self._loose_db.set_ostream(ostream)
96+
7497
#} END objectdbw interface
98+
99+
#{ Interface
100+
101+
def partial_to_complete_sha_hex(self, partial_hexsha):
102+
"""
103+
:return: 20 byte binary sha1 from the given less-than-40 byte hexsha
104+
:param partial_hexsha: hexsha with less than 40 byte
105+
:raise AmbiguousObjectName: """
106+
databases = list()
107+
_databases_recursive(self, databases)
108+
109+
if len(partial_hexsha) % 2 != 0:
110+
partial_binsha = hex_to_bin(partial_hexsha + "0")
111+
else:
112+
partial_binsha = hex_to_bin(partial_hexsha)
113+
# END assure successful binary conversion
114+
115+
candidate = None
116+
for db in databases:
117+
full_bin_sha = None
118+
try:
119+
if isinstance(db, LooseObjectDB):
120+
full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
121+
else:
122+
full_bin_sha = db.partial_to_complete_sha(partial_binsha)
123+
# END handle database type
124+
except BadObject:
125+
continue
126+
# END ignore bad objects
127+
if full_bin_sha:
128+
if candidate and candidate != full_bin_sha:
129+
raise AmbiguousObjectName(partial_hexsha)
130+
candidate = full_bin_sha
131+
# END handle candidate
132+
# END for each db
133+
if not candidate:
134+
raise BadObject(partial_binsha)
135+
return candidate
136+
137+
#} END interface

db/loose.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88
from gitdb.exc import (
99
InvalidDBRoot,
10-
BadObject,
10+
BadObject,
11+
AmbiguousObjectName
1112
)
1213

1314
from gitdb.stream import (
@@ -102,6 +103,23 @@ def readable_db_object_path(self, hexsha):
102103
# END handle cache
103104
raise BadObject(hexsha)
104105

106+
def partial_to_complete_sha_hex(self, partial_hexsha):
107+
""":return: 20 byte binary sha1 string which matches the given name uniquely
108+
:param name: hexadecimal partial name
109+
:raise AmbiguousObjectName:
110+
:raise BadObject: """
111+
candidate = None
112+
for binsha in self.sha_iter():
113+
if bin_to_hex(binsha).startswith(partial_hexsha):
114+
# it can't ever find the same object twice
115+
if candidate is not None:
116+
raise AmbiguousObjectName(partial_hexsha)
117+
candidate = binsha
118+
# END for each object
119+
if candidate is None:
120+
raise BadObject(partial_hexsha)
121+
return candidate
122+
105123
#} END interface
106124

107125
def _map_loose_object(self, sha):

db/pack.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from gitdb.exc import (
1111
BadObject,
1212
UnsupportedOperation,
13+
AmbiguousObjectName
1314
)
1415

1516
from gitdb.pack import PackEntity
@@ -175,5 +176,26 @@ def update_cache(self, force=False):
175176
def entities(self):
176177
""":return: list of pack entities operated upon by this database"""
177178
return [ item[1] for item in self._entities ]
179+
180+
def partial_to_complete_sha(self, partial_binsha):
181+
""":return: 20 byte sha as inferred by the given partial binary sha
182+
:raise AmbiguousObjectName:
183+
:raise BadObject: """
184+
candidate = None
185+
for item in self._entities:
186+
item_index = item[1].index().partial_sha_to_index(partial_binsha)
187+
if item_index is not None:
188+
sha = item[1].index().sha(item_index)
189+
if candidate and candidate != sha:
190+
raise AmbiguousObjectName(partial_binsha)
191+
candidate = sha
192+
# END handle full sha could be found
193+
# END for each entity
194+
195+
if candidate:
196+
return candidate
197+
198+
# still not found ?
199+
raise BadObject(partial_binsha)
178200

179201
#} END interface

exc.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@ class BadObject(ODBError):
1313

1414
def __str__(self):
1515
return "BadObject: %s" % to_hex_sha(self.args[0])
16-
16+
17+
class AmbiguousObjectName(ODBError):
18+
"""Thrown if a possibly shortened name does not uniquely represent a single object
19+
in the database"""
20+
1721
class BadObjectType(ODBError):
1822
"""The object had an unsupported type"""
1923

pack.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,53 @@ def sha_to_index(self, sha):
302302
# END handle midpoint
303303
# END bisect
304304
return None
305-
305+
306+
def partial_sha_to_index(self, partial_sha):
307+
""":return: index as in `sha_to_index` or None if the sha was not found in this
308+
index file
309+
:param partial_sha: an at least two bytes of a partial sha
310+
:raise AmbiguousObjectName:"""
311+
if len(partial_sha) < 2:
312+
raise ValueError("Require at least 2 bytes of partial sha")
313+
314+
first_byte = ord(partial_sha[0])
315+
get_sha = self.sha
316+
lo = 0 # lower index, the left bound of the bisection
317+
if first_byte != 0:
318+
lo = self._fanout_table[first_byte-1]
319+
hi = self._fanout_table[first_byte] # the upper, right bound of the bisection
320+
321+
len_partial = len(partial_sha)
322+
# fill the partial to full 20 bytes
323+
filled_sha = partial_sha + '\0'*(20 - len_partial)
324+
325+
# find lowest
326+
while lo < hi:
327+
mid = (lo + hi) / 2
328+
c = cmp(filled_sha, get_sha(mid))
329+
if c < 0:
330+
hi = mid
331+
elif not c:
332+
# perfect match
333+
lo = mid
334+
break
335+
else:
336+
lo = mid + 1
337+
# END handle midpoint
338+
# END bisect
339+
if lo < self.size:
340+
cur_sha = get_sha(lo)
341+
if cur_sha[:len_partial] == partial_sha:
342+
next_sha = None
343+
if lo+1 < self.size:
344+
next_sha = get_sha(lo+1)
345+
if next_sha and next_sha == cur_sha:
346+
raise AmbiguousObjectName(partial_sha)
347+
return lo
348+
# END if we have a match
349+
# END if we found something
350+
return None
351+
306352
if 'PackIndexFile_sha_to_index' in globals():
307353
# NOTE: Its just about 25% faster, the major bottleneck might be the attr
308354
# accesses

test/db/test_git.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from lib import *
2+
from gitdb.exc import BadObject
23
from gitdb.db import GitDB
34
from gitdb.base import OStream, OInfo
4-
from gitdb.util import hex_to_bin
5+
from gitdb.util import hex_to_bin, bin_to_hex
56

67
class TestGitDB(TestDBBase):
78

@@ -16,7 +17,15 @@ def test_reading(self):
1617
assert isinstance(gdb.info(gitdb_sha), OInfo)
1718
assert isinstance(gdb.stream(gitdb_sha), OStream)
1819
assert gdb.size() > 200
19-
assert len(list(gdb.sha_iter())) == gdb.size()
20+
sha_list = list(gdb.sha_iter())
21+
assert len(sha_list) == gdb.size()
22+
23+
# test partial shas
24+
for binsha in sha_list:
25+
assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8]) == binsha
26+
# END for each sha
27+
28+
self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
2029

2130
@with_rw_directory
2231
def test_writing(self, path):

test/db/test_loose.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from lib import *
22
from gitdb.db import LooseObjectDB
3+
from gitdb.exc import BadObject
4+
from gitdb.util import bin_to_hex
35

46
class TestLooseDB(TestDBBase):
57

68
@with_rw_directory
7-
def test_writing(self, path):
9+
def test_basics(self, path):
810
ldb = LooseObjectDB(path)
911

1012
# write data
@@ -16,3 +18,13 @@ def test_writing(self, path):
1618
assert shas and len(shas[0]) == 20
1719

1820
assert len(shas) == ldb.size()
21+
22+
# verify find short object
23+
long_sha = bin_to_hex(shas[-1])
24+
for short_sha in (long_sha[:20], long_sha[:5]):
25+
assert bin_to_hex(ldb.partial_to_complete_sha_hex(short_sha)) == long_sha
26+
# END for each sha
27+
28+
self.failUnlessRaises(BadObject, ldb.partial_to_complete_sha_hex, '0000')
29+
# raises if no object could be foudn
30+

test/db/test_pack.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
from gitdb.db import PackedDB
33
from gitdb.test.lib import fixture_path
44

5+
from gitdb.exc import BadObject, AmbiguousObjectName
6+
57
import os
68
import random
79

@@ -44,3 +46,25 @@ def test_writing(self, path):
4446
info = pdb.info(sha)
4547
stream = pdb.stream(sha)
4648
# END for each sha to query
49+
50+
51+
# test short finding - be a bit more brutal here
52+
max_bytes = 19
53+
min_bytes = 2
54+
num_ambiguous = 0
55+
for i, sha in enumerate(sha_list):
56+
short_sha = sha[:max((i % max_bytes), min_bytes)]
57+
try:
58+
assert pdb.partial_to_complete_sha(short_sha) == sha
59+
except AmbiguousObjectName:
60+
num_ambiguous += 1
61+
pass # valid, we can have short objects
62+
# END exception handling
63+
# END for each sha to find
64+
65+
# we should have at least one ambiguous, considering the small sizes
66+
# but in our pack, there is no ambigious ...
67+
# assert num_ambiguous
68+
69+
# non-existing
70+
self.failUnlessRaises(BadObject, pdb.partial_to_complete_sha, "\0\0")

test/test_pack.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,13 @@ def _assert_index_file(self, index, version, size):
5757
assert entry[0] == index.offset(oidx)
5858
assert entry[1] == sha
5959
assert entry[2] == index.crc(oidx)
60+
61+
# verify partial sha
62+
for l in (4,8,11,17,20):
63+
assert index.partial_sha_to_index(sha[:l]) == oidx
64+
6065
# END for each object index in indexfile
66+
self.failUnlessRaises(ValueError, index.partial_sha_to_index, "\0")
6167

6268

6369
def _assert_pack_file(self, pack, version, size):

0 commit comments

Comments
 (0)