Skip to content

Commit 17f5d13

Browse files
committed
Added test to assure blame can deal with binary patches.
Fixes gitpython-developers#74
1 parent 1531d78 commit 17f5d13

File tree

4 files changed

+44
-15
lines changed

4 files changed

+44
-15
lines changed

git/cmd.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
)
3232

3333
execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output',
34-
'with_exceptions', 'as_process',
34+
'with_exceptions', 'as_process', 'stdout_as_string',
3535
'output_stream')
3636

3737
log = logging.getLogger('git.cmd')
@@ -411,6 +411,7 @@ def execute(self, command,
411411
with_exceptions=True,
412412
as_process=False,
413413
output_stream=None,
414+
stdout_as_string=True,
414415
**subprocess_kwargs
415416
):
416417
"""Handles executing the command on the shell and consumes and returns
@@ -454,6 +455,11 @@ def execute(self, command,
454455
output pipe to the given output stream directly.
455456
Judging from the implementation, you shouldn't use this flag !
456457
458+
:param stdout_as_string:
459+
if False, the commands standard output will be bytes. Otherwise, it will be
460+
decoded into a string using the default encoding (usually utf-8).
461+
The latter can fail, if the output contains binary data.
462+
457463
:param subprocess_kwargs:
458464
Keyword arguments to be passed to subprocess.Popen. Please note that
459465
some of the valid kwargs are already set by this method, the ones you
@@ -545,7 +551,7 @@ def execute(self, command,
545551
else:
546552
raise GitCommandError(command, status, stderr_value)
547553

548-
if isinstance(stdout_value, bytes): # could also be output_stream
554+
if isinstance(stdout_value, bytes) and stdout_as_string: # could also be output_stream
549555
stdout_value = stdout_value.decode(defenc)
550556

551557
# Allow access to the command's status code

git/repo/base.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -587,14 +587,28 @@ def blame(self, rev, file):
587587
A list of tuples associating a Commit object with a list of lines that
588588
changed within the given commit. The Commit objects will be given in order
589589
of appearance."""
590-
data = self.git.blame(rev, '--', file, p=True)
590+
data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False)
591591
commits = dict()
592592
blames = list()
593593
info = None
594594

595-
for line in data.splitlines(False):
596-
parts = self.re_whitespace.split(line, 1)
597-
firstpart = parts[0]
595+
keepends = True
596+
for line in data.splitlines(keepends):
597+
try:
598+
line = line.rstrip().decode(defenc)
599+
except UnicodeDecodeError:
600+
firstpart = ''
601+
is_binary = True
602+
else:
603+
# As we don't have an idea when the binary data ends, as it could contain multiple newlines
604+
# in the process. So we rely on being able to decode to tell us what is is.
605+
# This can absolutely fail even on text files, but even if it does, we should be fine treating it
606+
# as binary instead
607+
parts = self.re_whitespace.split(line, 1)
608+
firstpart = parts[0]
609+
is_binary = False
610+
# end handle decode of line
611+
598612
if self.re_hexsha_only.search(firstpart):
599613
# handles
600614
# 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start
@@ -651,10 +665,18 @@ def blame(self, rev, file):
651665
message=info['summary'])
652666
commits[sha] = c
653667
# END if commit objects needs initial creation
654-
m = self.re_tab_full_line.search(line)
655-
text, = m.groups()
668+
if not is_binary:
669+
if line and line[0] == '\t':
670+
line = line[1:]
671+
else:
672+
# NOTE: We are actually parsing lines out of binary data, which can lead to the
673+
# binary being split up along the newline separator. We will append this to the blame
674+
# we are currently looking at, even though it should be concatenated with the last line
675+
# we have seen.
676+
pass
677+
# end handle line contents
656678
blames[-1][0] = c
657-
blames[-1][1].append(text)
679+
blames[-1][1].append(line)
658680
info = {'id': sha}
659681
# END if we collected commit info
660682
# END distinguish filename,summary,rest

git/test/fixtures/blame_binary

14.5 KB
Binary file not shown.

git/test/test_repo.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,7 @@
3333
from git.util import join_path_native
3434
from git.exc import BadObject
3535
from gitdb.util import bin_to_hex
36-
from git.compat import (
37-
string_types,
38-
defenc
39-
)
36+
from git.compat import string_types
4037
from gitdb.test.lib import with_rw_directory
4138

4239
import os
@@ -275,15 +272,14 @@ def test_archive(self):
275272

276273
@patch.object(Git, '_call_process')
277274
def test_should_display_blame_information(self, git):
278-
git.return_value = fixture('blame').decode(defenc)
275+
git.return_value = fixture('blame')
279276
b = self.rorepo.blame('master', 'lib/git.py')
280277
assert_equal(13, len(b))
281278
assert_equal(2, len(b[0]))
282279
# assert_equal(25, reduce(lambda acc, x: acc + len(x[-1]), b))
283280
assert_equal(hash(b[0][0]), hash(b[9][0]))
284281
c = b[0][0]
285282
assert_true(git.called)
286-
assert_equal(git.call_args, (('blame', 'master', '--', 'lib/git.py'), {'p': True}))
287283

288284
assert_equal('634396b2f541a9f2d58b00be1a07f0c358b999b3', c.hexsha)
289285
assert_equal('Tom Preston-Werner', c.author.name)
@@ -300,6 +296,11 @@ def test_should_display_blame_information(self, git):
300296
assert_true(isinstance(tlist[0], string_types))
301297
assert_true(len(tlist) < sum(len(t) for t in tlist)) # test for single-char bug
302298

299+
# BINARY BLAME
300+
git.return_value = fixture('blame_binary')
301+
blames = self.rorepo.blame('master', 'rps')
302+
assert len(blames) == 2
303+
303304
def test_blame_real(self):
304305
c = 0
305306
nml = 0 # amount of multi-lines per blame

0 commit comments

Comments
 (0)