From 930b3ffc6ab4b3ea651a4aa34aaf6c3ed4dc623c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 8 Mar 2019 14:31:55 -0800 Subject: [PATCH 1/3] commit.go: support multi-line header continuations When Git wishes to continue one or more of a commit's extra headers on more than a single line, it writes out the following: parent: tree: gpgsig: -----BEGIN PGP SIGNATURE----- -----END PGP SIGNATURE----- Our current parsing implementation does not handle this correctly, based on a misunderstanding that one line is equivalent to one extra header, and vice versa. In fact, the situation presently is even more dire than not parsing the 'gpgsig' header incorrectly: we'll split the signature end ending line into their own "headers" and in doing so trim off the leading whitespace. In practice, this means that we can corrupt commits when round-tripping them in many interesting ways [1]. To address the situation, we do two things: 1. Teach gitobj that when we are parsing extra headers for a commit, _and_ a header line begins with a single whitespace character, we are in fact continuing the last known header. 2. Likewise, teach gitobj that when encoding a commit which has an extra header whose value contains a LF character, replace each LF with a leading space, to round trip commits of this form successfully. Together, (1) and (2) means that we parse the 'gpgsig' header in the above example as a _single_ entry in the commit's 'ExtraHeaders' field, as expected. [1]: https://github.com/git-lfs/git-lfs/issues/3530 --- commit.go | 24 +++++++++++++++++++----- commit_test.go | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/commit.go b/commit.go index 8a21631..48ea696 100644 --- a/commit.go +++ b/commit.go @@ -132,10 +132,23 @@ func (c *Commit) Decode(from io.Reader, size int64) (n int, err error) { c.Committer = "" } default: - c.ExtraHeaders = append(c.ExtraHeaders, &ExtraHeader{ - K: fields[0], - V: strings.Join(fields[1:], " "), - }) + if strings.HasPrefix(s.Text(), " ") { + idx := len(c.ExtraHeaders) - 1 + hdr := c.ExtraHeaders[idx] + + // Append the line of text (removing the + // leading space) to the last header + // that we parsed, adding a newline + // between the two. + hdr.V = strings.Join(append( + []string{hdr.V}, s.Text()[1:], + ), "\n") + } else { + c.ExtraHeaders = append(c.ExtraHeaders, &ExtraHeader{ + K: fields[0], + V: strings.Join(fields[1:], " "), + }) + } } } else { messageParts = append(messageParts, s.Text()) @@ -177,7 +190,8 @@ func (c *Commit) Encode(to io.Writer) (n int, err error) { n = n + n2 for _, hdr := range c.ExtraHeaders { - n3, err := fmt.Fprintf(to, "%s %s\n", hdr.K, hdr.V) + n3, err := fmt.Fprintf(to, "%s %s\n", + hdr.K, strings.Replace(hdr.V, "\n", "\n ", -1)) if err != nil { return n, err } diff --git a/commit_test.go b/commit_test.go index e35457c..882a09e 100644 --- a/commit_test.go +++ b/commit_test.go @@ -10,6 +10,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestCommitReturnsCorrectObjectType(t *testing.T) { @@ -20,6 +21,8 @@ func TestCommitEncoding(t *testing.T) { author := &Signature{Name: "John Doe", Email: "john@example.com", When: time.Now()} committer := &Signature{Name: "Jane Doe", Email: "jane@example.com", When: time.Now()} + sig := "-----BEGIN PGP SIGNATURE-----\n\n-----END PGP SIGNATURE-----" + c := &Commit{ Author: author.String(), Committer: committer.String(), @@ -29,6 +32,7 @@ func TestCommitEncoding(t *testing.T) { TreeID: []byte("cccccccccccccccccccc"), ExtraHeaders: []*ExtraHeader{ {"foo", "bar"}, + {"gpgsig", sig}, }, Message: "initial commit", } @@ -44,6 +48,9 @@ func TestCommitEncoding(t *testing.T) { assertLine(t, buf, "author %s", author.String()) assertLine(t, buf, "committer %s", committer.String()) assertLine(t, buf, "foo bar") + assertLine(t, buf, "gpgsig -----BEGIN PGP SIGNATURE-----") + assertLine(t, buf, " ") + assertLine(t, buf, " -----END PGP SIGNATURE-----") assertLine(t, buf, "") assertLine(t, buf, "initial commit") @@ -164,6 +171,41 @@ func TestCommitDecodingWithWhitespace(t *testing.T) { assert.Equal(t, "tree <- initial commit", commit.Message) } +func TestCommitDecodingMultilineHeader(t *testing.T) { + author := &Signature{Name: "", Email: "john@example.com", When: time.Now()} + committer := &Signature{Name: "", Email: "jane@example.com", When: time.Now()} + + treeId := []byte("cccccccccccccccccccc") + + from := new(bytes.Buffer) + + fmt.Fprintf(from, "author %s\n", author) + fmt.Fprintf(from, "committer %s\n", committer) + fmt.Fprintf(from, "tree %s\n", hex.EncodeToString(treeId)) + fmt.Fprintf(from, "gpgsig -----BEGIN PGP SIGNATURE-----\n") + fmt.Fprintf(from, " \n") + fmt.Fprintf(from, " -----END PGP SIGNATURE-----\n") + fmt.Fprintf(from, "\ninitial commit\n") + + flen := from.Len() + + commit := new(Commit) + n, err := commit.Decode(from, int64(flen)) + + require.Nil(t, err) + require.Equal(t, flen, n) + require.Len(t, commit.ExtraHeaders, 1) + + hdr := commit.ExtraHeaders[0] + + assert.Equal(t, "gpgsig", hdr.K) + assert.EqualValues(t, []string{ + "-----BEGIN PGP SIGNATURE-----", + "", + "-----END PGP SIGNATURE-----"}, + strings.Split(hdr.V, "\n")) +} + func assertLine(t *testing.T, buf *bytes.Buffer, wanted string, args ...interface{}) { got, err := buf.ReadString('\n') if err == io.EOF { From c2e7ae162081c247fb11d6594870b6c6001b3080 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 8 Mar 2019 21:09:37 -0800 Subject: [PATCH 2/3] commit.go: parse whitespace-only lines during continuations When parsing an extra header that is continued over multiple lines, an earlier check on the length of whitespace-separated fields caused the loop to terminate early, dropping continuation lines that consist only of whitespace. Tweak the logic slightly in order to capture these, and allow us to successfully round-trip commit parsing. --- commit.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/commit.go b/commit.go index 48ea696..c8556f0 100644 --- a/commit.go +++ b/commit.go @@ -105,7 +105,18 @@ func (c *Commit) Decode(from io.Reader, size int64) (n int, err error) { continue } - if fields := strings.Fields(text); len(fields) > 0 && !finishedHeaders { + if fields := strings.Fields(text); !finishedHeaders { + if len(fields) == 0 { + // Executing in this block means that we got a + // whitespace-only line, while parsing a header. + // + // Append it to the last-parsed header, and + // continue. + c.ExtraHeaders[len(c.ExtraHeaders)-1].V += + fmt.Sprintf("\n%s", text[1:]) + continue + } + switch fields[0] { case "tree": id, err := hex.DecodeString(fields[1]) From 69666fc4cf8e159f206021f8750160af633953fc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 8 Mar 2019 21:10:40 -0800 Subject: [PATCH 3/3] object_db_test.go: add a round-trip commit parsing test --- object_db_test.go | 60 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/object_db_test.go b/object_db_test.go index 6f09a9d..eb63619 100644 --- a/object_db_test.go +++ b/object_db_test.go @@ -15,6 +15,44 @@ import ( "github.com/stretchr/testify/require" ) +const roundTripCommitSha string = `561ed224a6bd39232d902ad8023c0ebe44fbf6c5` +const roundTripCommit string = `tree f2ebdf9c967f69d57b370901f9344596ec47e51c +parent fe8fbf7de1cd9f08ae642e502bf5de94e523cc08 +author brian m. carlson 1543506816 +0000 +committer brian m. carlson 1543506816 +0000 +gpgsig -----BEGIN PGP SIGNATURE----- + Version: GnuPG/MacGPG2 v2.2.9 (Darwin) + + iQIGBAABCgAwFiEETbktHYzuflTwZxNFLQybwS+Cs6EFAlwAC4cSHGJrMjIwNEBn + aXRodWIuY29tAAoJEC0Mm8EvgrOhiRMN/2rTxkBb5BeQQeq7rPiIW8+29FzuvPeD + /DhxlRKwKut9h4qhtxNQszTezxhP4PLOkuMvUax2pGXCQ8cjkSswagmycev+AB4d + s0loG4SrEwvH8nAdr6qfNx4ZproRJ8QaEJqyN9SqF7PCWrUAoJKehdgA38WtYFws + ON+nIwzDIvgpoNI+DzgWrx16SOTp87xt8RaJOVK9JNZQk8zBh7rR2viS9CWLysmz + wOh3j4XI1TZ5IFJfpCxZzUDFgb6K3wpAX6Vux5F1f3cN5MsJn6WUJCmYCvwofeeZ + 6LMqKgry7EA12l7Tv/JtmMeh+rbT5WLdMIsjascUaHRhpJDNqqHCKMEj1zh3QZNY + Hycdcs24JouVAtPwg07f1ncPU3aE624LnNRA9A6Ih6SkkKE4tgMVA5qkObDfwzLE + lWyBj2QKySaIdSlU2EcoH3UK33v/ofrRr3+bUkDgxdqeV/RkBVvfpeMwFVSFWseE + bCcotryLCZF7vBQU+pKC+EaZxQV9L5+McGzcDYxUmqrhwtR+azRBYFOw+lOT4sYD + FxdLFWCtmDhKPX5Ajci2gmyfgCwdIeDhSuOf2iQQGRpE6y7aka4AlaE= + =UyqL + -----END PGP SIGNATURE----- + +pack/set: ignore packs without indices + +When we look for packs to read, we look for a pack file, and then an +index, and fail if either one is missing. When Git looks for packs to +read, it looks only for indices and then checks if the pack is present. + +The Git approach handles the case when there is an extra pack that lacks +an index, while our approach does not. Consequently, we can get various +errors (showing up so far only on Windows) when an index is missing. + +If the index file cannot be read for any reason, simply skip the entire +pack altogether and continue on. This leaves us no more or less +functional than Git in terms of discovering objects and makes our error +handling more robust. +` + func TestDecodeObject(t *testing.T) { sha := "af5626b4a114abcb82d63db7c8082c3c4756e51b" contents := "Hello, world!\n" @@ -223,6 +261,28 @@ func TestWriteCommit(t *testing.T) { assert.NotNil(t, s.(*memoryStorer).fs[hex.EncodeToString(sha)]) } +func TestWriteCommitWithGPGSignature(t *testing.T) { + b, err := NewMemoryBackend(nil) + require.NoError(t, err) + + odb, err := FromBackend(b) + require.NoError(t, err) + + commit := new(Commit) + _, err = commit.Decode( + strings.NewReader(roundTripCommit), int64(len(roundTripCommit))) + require.NoError(t, err) + + buf := new(bytes.Buffer) + commit.Encode(buf) + assert.Equal(t, roundTripCommit, buf.String()) + + sha, err := odb.WriteCommit(commit) + + assert.Nil(t, err) + assert.Equal(t, roundTripCommitSha, hex.EncodeToString(sha)) +} + func TestDecodeTag(t *testing.T) { const sha = "7639ba293cd2c457070e8446ecdea56682af0f48" tagShaHex, err := hex.DecodeString(sha)