Skip to content

Replace password in URI by stars if present to avoid leaking secrets in logs #1198

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Prev Previous commit
Next Next commit
Use copy and not inplace remove password + working case test
  • Loading branch information
mickours committed Mar 16, 2021
commit ffddedf5467df993b7a42fbd15afacb901bca6d7
16 changes: 8 additions & 8 deletions git/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def read_all_from_possibly_closed_stream(stream):
if status != 0:
errstr = read_all_from_possibly_closed_stream(self.proc.stderr)
log.debug('AutoInterrupt wait stderr: %r' % (errstr,))
raise GitCommandError(self.args, status, errstr)
raise GitCommandError(remove_password_if_present(self.args), status, errstr)
# END status handling
return status
# END auto interrupt
Expand Down Expand Up @@ -638,7 +638,7 @@ def execute(self, command,

:param env:
A dictionary of environment variables to be passed to `subprocess.Popen`.

:param max_chunk_size:
Maximum number of bytes in one chunk of data passed to the output_stream in
one invocation of write() method. If the given number is not positive then
Expand Down Expand Up @@ -706,7 +706,7 @@ def execute(self, command,
if is_win:
cmd_not_found_exception = OSError
if kill_after_timeout:
raise GitCommandError(command, '"kill_after_timeout" feature is not supported on Windows.')
raise GitCommandError(redacted_command, '"kill_after_timeout" feature is not supported on Windows.')
else:
if sys.version_info[0] > 2:
cmd_not_found_exception = FileNotFoundError # NOQA # exists, flake8 unknown @UndefinedVariable
Expand All @@ -721,7 +721,7 @@ def execute(self, command,
if istream:
istream_ok = "<valid stream>"
log.debug("Popen(%s, cwd=%s, universal_newlines=%s, shell=%s, istream=%s)",
command, cwd, universal_newlines, shell, istream_ok)
redacted_command, cwd, universal_newlines, shell, istream_ok)
try:
proc = Popen(command,
env=env,
Expand All @@ -737,7 +737,7 @@ def execute(self, command,
**subprocess_kwargs
)
except cmd_not_found_exception as err:
raise GitCommandNotFound(command, err) from err
raise GitCommandNotFound(redacted_command, err) from err

if as_process:
return self.AutoInterrupt(proc, command)
Expand Down Expand Up @@ -787,7 +787,7 @@ def _kill_process(pid):
watchdog.cancel()
if kill_check.isSet():
stderr_value = ('Timeout: the command "%s" did not complete in %d '
'secs.' % (" ".join(command), kill_after_timeout))
'secs.' % (" ".join(redacted_command), kill_after_timeout))
if not universal_newlines:
stderr_value = stderr_value.encode(defenc)
# strip trailing "\n"
Expand All @@ -811,7 +811,7 @@ def _kill_process(pid):
proc.stderr.close()

if self.GIT_PYTHON_TRACE == 'full':
cmdstr = " ".join(command)
cmdstr = " ".join(redacted_command)

def as_text(stdout_value):
return not output_stream and safe_decode(stdout_value) or '<OUTPUT_STREAM>'
Expand All @@ -827,7 +827,7 @@ def as_text(stdout_value):
# END handle debug printing

if with_exceptions and status != 0:
raise GitCommandError(command, status, stderr_value, stdout_value)
raise GitCommandError(redacted_command, status, stderr_value, stdout_value)

if isinstance(stdout_value, bytes) and stdout_as_string: # could also be output_stream
stdout_value = safe_decode(stdout_value)
Expand Down
6 changes: 4 additions & 2 deletions git/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,9 @@ def remove_password_if_present(cmdline):

This should be used for every log line that print a command line.
"""
new_cmdline = []
for index, to_parse in enumerate(cmdline):
new_cmdline.append(to_parse)
try:
url = urlsplit(to_parse)
# Remove password from the URL if present
Expand All @@ -358,11 +360,11 @@ def remove_password_if_present(cmdline):

edited_url = url._replace(
netloc=url.netloc.replace(url.password, "*****"))
cmdline[index] = urlunsplit(edited_url)
new_cmdline[index] = urlunsplit(edited_url)
except ValueError:
# This is not a valid URL
pass
return cmdline
return new_cmdline


#} END utilities
Expand Down
5 changes: 4 additions & 1 deletion test/test_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,6 @@ def test_clone_from_with_path_contains_unicode(self):

@with_rw_directory
def test_leaking_password_in_clone_logs(self, rw_dir):
"""Check that the password is not printed on the logs"""
password = "fakepassword1234"
try:
Repo.clone_from(
Expand All @@ -249,6 +248,10 @@ def test_leaking_password_in_clone_logs(self, rw_dir):
to_path=rw_dir)
except GitCommandError as err:
assert password not in str(err), "The error message '%s' should not contain the password" % err
# Working example from a blank private project
Repo.clone_from(
url="https://gitlab+deploy-token-392045:mLWhVus7bjLsy8xj8q2V@gitlab.com/mercierm/test_git_python",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might be a danger as it breaks the self-isolation even more (after all, GitPythons test rely on its own repository already), but I think it's OK to go with it and fix it when it breaks.
Maybe it never does.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree! I simply did not find another way to test the working use case. Maybe we can use a deploy token with the GitPython repository on Github but I think it is not available for public repo. Maybe you can create a private repo inside the gitpython-developers account in order to keep the responsibility in the same place?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about that too and will do that when it breaks. It's certainly the lazy way of doing things and it's basically a mine that is primed to blow sometime in the future, but… it's going to be okay ;).

While writing this I thought to myself that I don't want to be that lazy and took a look at the github ways of doing this. It turns out it only supports repository deploy keys, which indeed are full blown ssh keys that I don't want to deal with right now. This makes your account one of the pillars of the happiness of GitPython's CI, something not everyone can say about themselves :D.

to_path=rw_dir)

@with_rw_repo('HEAD')
def test_max_chunk_size(self, repo):
Expand Down
7 changes: 5 additions & 2 deletions test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,6 @@ def test_pickle_tzoffset(self):
self.assertEqual(t1._name, t2._name)

def test_remove_password_from_command_line(self):
"""Check that the password is not printed on the logs"""
password = "fakepassword1234"
url_with_pass = "https://fakeuser:{}@fakerepo.example.com/testrepo".format(password)
url_without_pass = "https://fakerepo.example.com/testrepo"
Expand All @@ -334,6 +333,10 @@ def test_remove_password_from_command_line(self):
cmd_2 = ["git", "clone", "-v", url_without_pass]
cmd_3 = ["no", "url", "in", "this", "one"]

assert password not in remove_password_if_present(cmd_1)
redacted_cmd_1 = remove_password_if_present(cmd_1)
assert password not in " ".join(redacted_cmd_1)
# Check that we use a copy
assert cmd_1 is not redacted_cmd_1
assert password in " ".join(cmd_1)
assert cmd_2 == remove_password_if_present(cmd_2)
assert cmd_3 == remove_password_if_present(cmd_3)