fix: Invalid UTF-8 commit messages in JSON API responses (#37542)

This commit is contained in:
Nicolas
2026-05-07 16:19:45 +02:00
committed by GitHub
parent 2200ed7499
commit c9b9e376fb
54 changed files with 221 additions and 215 deletions

View File

@@ -11,19 +11,28 @@ import (
"os/exec"
"strings"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git/gitcmd"
"code.gitea.io/gitea/modules/util"
)
type CommitMessage struct {
MessageRaw string
messageUTF8 *string
messageTitle *string
messageBody *string
}
// Commit represents a git commit.
type Commit struct {
Tree // FIXME: bad design, this field can be nil if the commit is from "last commit cache"
ID ObjectID
Author *Signature // never nil
Committer *Signature // never nil
CommitMessage string
Signature *CommitSignature
CommitMessage
ID ObjectID
Author *Signature // never nil
Committer *Signature // never nil
Signature *CommitSignature
Parents []ObjectID // ID strings
submoduleCache *ObjectCache[*SubModule]
@@ -35,19 +44,28 @@ type CommitSignature struct {
Payload string
}
// Message returns the commit message. Same as retrieving CommitMessage directly.
func (c *Commit) Message() string {
// FIXME: GIT-COMMIT-MESSAGE-ENCODING: this logic is not right
// * When need to use commit message in templates/database, it should be valid UTF-8
// * When need to get the original commit message, it should just use "c.CommitMessage"
// It's not easy to refactor at the moment, many templates need to be updated and tested
return c.CommitMessage
func (c *CommitMessage) MessageUTF8() string {
if c.messageUTF8 == nil {
bs := charset.ToUTF8(util.UnsafeStringToBytes(c.MessageRaw), charset.ConvertOpts{ErrorReplacement: []byte{'?'}})
c.messageUTF8 = new(util.UnsafeBytesToString(bs))
}
return *c.messageUTF8
}
// Summary returns first line of commit message.
// The string is forced to be valid UTF8
func (c *Commit) Summary() string {
return strings.ToValidUTF8(strings.Split(strings.TrimSpace(c.CommitMessage), "\n")[0], "?")
func (c *CommitMessage) MessageTitle() string {
if c.messageTitle == nil {
s, _, _ := strings.Cut(strings.TrimSpace(c.MessageUTF8()), "\n")
c.messageTitle = new(strings.TrimSpace(s))
}
return *c.messageTitle
}
func (c *CommitMessage) MessageBody() string {
if c.messageBody == nil {
_, s, _ := strings.Cut(strings.TrimSpace(c.MessageUTF8()), "\n")
c.messageBody = new(strings.TrimSpace(s))
}
return *c.messageBody
}
// ParentID returns oid of n-th parent (0-based index).

View File

@@ -66,7 +66,7 @@ func convertPGPSignature(c *object.Commit) *CommitSignature {
func convertCommit(c *object.Commit) *Commit {
return &Commit{
ID: ParseGogitHash(c.Hash),
CommitMessage: c.Message,
CommitMessage: CommitMessage{MessageRaw: c.Message},
Committer: &c.Committer,
Author: &c.Author,
Signature: convertPGPSignature(c),

View File

@@ -92,7 +92,7 @@ func CommitFromReader(gitRepo *Repository, objectID ObjectID, reader io.Reader)
}
}
commit.CommitMessage = messageSB.String()
commit.MessageRaw = messageSB.String()
if commit.Signature != nil {
commit.Signature.Payload = payloadSB.String()
}

View File

@@ -95,7 +95,7 @@ signed commit`, commitFromReader.Signature.Payload)
commitFromReader2, err := CommitFromReader(gitRepo, sha, strings.NewReader(commitString+"\n\n"))
assert.NoError(t, err)
commitFromReader.CommitMessage += "\n\n"
commitFromReader.CommitMessage.MessageRaw += "\n\n"
commitFromReader.Signature.Payload += "\n\n"
assert.Equal(t, commitFromReader, commitFromReader2)
}

View File

@@ -91,7 +91,7 @@ empty commit`, commitFromReader.Signature.Payload)
commitFromReader2, err := CommitFromReader(gitRepo, sha, strings.NewReader(commitString+"\n\n"))
assert.NoError(t, err)
commitFromReader.CommitMessage += "\n\n"
commitFromReader.CommitMessage.MessageRaw += "\n\n"
commitFromReader.Signature.Payload += "\n\n"
assert.Equal(t, commitFromReader, commitFromReader2)
}
@@ -154,11 +154,20 @@ ISO-8859-1`, commitFromReader.Signature.Payload)
commitFromReader2, err := CommitFromReader(gitRepo, sha, strings.NewReader(commitString+"\n\n"))
assert.NoError(t, err)
commitFromReader.CommitMessage += "\n\n"
commitFromReader.CommitMessage.MessageRaw += "\n\n"
commitFromReader.Signature.Payload += "\n\n"
assert.Equal(t, commitFromReader, commitFromReader2)
}
func TestCommitMessageSanitizesInvalidUTF8(t *testing.T) {
commit := &Commit{
CommitMessage: CommitMessage{MessageRaw: "title \xff\n\n\n\nbody \xff\n\n\n"},
}
assert.Equal(t, "title ÿ", commit.MessageTitle())
assert.Equal(t, "body ÿ", commit.MessageBody())
assert.Equal(t, "title ÿ\n\n\n\nbody ÿ\n\n\n", commit.MessageUTF8())
}
func TestHasPreviousCommit(t *testing.T) {
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")

View File

@@ -111,8 +111,6 @@ func (p *Parser) parseRef(refBlock string) (map[string]string, error) {
len(fields), len(p.format.fieldNames))
}
for i, field := range fields {
field = strings.TrimSpace(field)
var fieldKey string
var fieldVal string
before, after, ok := strings.Cut(field, " ")

View File

@@ -116,12 +116,12 @@ func TestParser(t *testing.T) {
},
{
"refname:short": "v0.0.2",
"contents": "Update CI config (#651)",
"contents": "Update CI config (#651)\n\n",
"author": "John Doe <john.doe@foo.com> 1521643174 +0000",
},
{
"refname:short": "v0.0.3",
"contents": "Fixed code sample for bash completion (#687)",
"contents": "Fixed code sample for bash completion (#687)\n\n",
"author": "Foo Baz <foo@baz.com> 1524836750 +0200",
},
},

View File

@@ -10,7 +10,6 @@ import (
"bytes"
"io"
"sort"
"strings"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/gitcmd"
@@ -97,7 +96,7 @@ func findLFSFileFunc(repo *git.Repository, objectID git.ObjectID, revListReader
result := LFSResult{
Name: curPath + fname,
SHA: curCommit.ID.String(),
Summary: strings.Split(strings.TrimSpace(curCommit.CommitMessage), "\n")[0],
Summary: curCommit.MessageTitle(),
When: curCommit.Author.When,
ParentHashes: curCommit.Parents,
}

View File

@@ -54,7 +54,7 @@ func TestGetTagCommitWithSignature(t *testing.T) {
assert.NotNil(t, commit)
assert.NotNil(t, commit.Signature)
// test that signature is not in message
assert.Equal(t, "signed-commit\n", commit.CommitMessage)
assert.Equal(t, "signed-commit\n", commit.CommitMessage.MessageRaw)
}
func TestGetCommitWithBadCommitID(t *testing.T) {

View File

@@ -176,15 +176,14 @@ func parseTagRef(ref map[string]string) (tag *Tag, err error) {
}
tag.Tagger = parseSignatureFromCommitLine(ref["creator"])
tag.Message = ref["contents"]
tag.MessageRaw = ref["contents"]
// strip any signature if present in contents field
_, tag.Message, _ = parsePayloadSignature(util.UnsafeStringToBytes(tag.Message), 0)
_, tag.MessageRaw, _ = parsePayloadSignature(util.UnsafeStringToBytes(tag.MessageRaw), 0)
// annotated tag with GPG signature
if tag.Type == "tag" && ref["contents:signature"] != "" {
payload := fmt.Sprintf("object %s\ntype commit\ntag %s\ntagger %s\n\n%s\n",
tag.Object, tag.Name, ref["creator"], strings.TrimSpace(tag.Message))
payload := fmt.Sprintf("object %s\ntype commit\ntag %s\ntagger %s\n\n%s", tag.Object, tag.Name, ref["creator"], tag.MessageRaw)
tag.Signature = &CommitSignature{
Signature: ref["contents:signature"],
Payload: payload,

View File

@@ -64,12 +64,12 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) {
return nil, err
}
tag := &Tag{
Name: name,
ID: tagID,
Object: commitID,
Type: tp,
Tagger: commit.Committer,
Message: commit.Message(),
Name: name,
ID: tagID,
Object: commitID,
Type: tp,
Tagger: commit.Committer,
CommitMessage: CommitMessage{MessageRaw: commit.CommitMessage.MessageRaw},
}
repo.tagCache.Set(tagID.String(), tag)
@@ -86,12 +86,12 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) {
}
tag := &Tag{
Name: name,
ID: tagID,
Object: commitID.Type().MustID(gogitTag.Target[:]),
Type: tp,
Tagger: &gogitTag.Tagger,
Message: gogitTag.Message,
Name: name,
ID: tagID,
Object: commitID.Type().MustID(gogitTag.Target[:]),
Type: tp,
Tagger: &gogitTag.Tagger,
CommitMessage: CommitMessage{MessageRaw: gogitTag.Message},
}
repo.tagCache.Set(tagID.String(), tag)

View File

@@ -71,12 +71,12 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) {
return nil, err
}
tag := &Tag{
Name: name,
ID: tagID,
Object: commitID,
Type: tp,
Tagger: commit.Committer,
Message: commit.Message(),
Name: name,
ID: tagID,
Object: commitID,
Type: tp,
Tagger: commit.Committer,
CommitMessage: commit.CommitMessage,
}
repo.tagCache.Set(tagID.String(), tag)

View File

@@ -211,13 +211,13 @@ func TestRepository_parseTagRef(t *testing.T) {
},
want: &Tag{
Name: "v1.9.1",
ID: MustIDFromString("ab23e4b7f4cd0caafe0174c0e7ef6d651ba72889"),
Object: MustIDFromString("ab23e4b7f4cd0caafe0174c0e7ef6d651ba72889"),
Type: "commit",
Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n",
Signature: nil,
Name: "v1.9.1",
ID: MustIDFromString("ab23e4b7f4cd0caafe0174c0e7ef6d651ba72889"),
Object: MustIDFromString("ab23e4b7f4cd0caafe0174c0e7ef6d651ba72889"),
Type: "commit",
Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
CommitMessage: CommitMessage{MessageRaw: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n"},
Signature: nil,
},
},
@@ -240,13 +240,13 @@ func TestRepository_parseTagRef(t *testing.T) {
},
want: &Tag{
Name: "v0.0.1",
ID: MustIDFromString("8c68a1f06fc59c655b7e3905b159d761e91c53c9"),
Object: MustIDFromString("3325fd8a973321fd59455492976c042dde3fd1ca"),
Type: "tag",
Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n",
Signature: nil,
Name: "v0.0.1",
ID: MustIDFromString("8c68a1f06fc59c655b7e3905b159d761e91c53c9"),
Object: MustIDFromString("3325fd8a973321fd59455492976c042dde3fd1ca"),
Type: "tag",
Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
CommitMessage: CommitMessage{MessageRaw: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n"},
Signature: nil,
},
},
@@ -263,6 +263,7 @@ func TestRepository_parseTagRef(t *testing.T) {
* add changelog of v1.9.1
* Update CHANGELOG.md
-----BEGIN PGP SIGNATURE-----
aBCGzBAABCgAdFiEEyWRwv/q1Q6IjSv+D4IPOwzt33PoFAmI8jbIACgkQ4IPOwzt3
@@ -298,12 +299,12 @@ qbHDASXl
},
want: &Tag{
Name: "v0.0.1",
ID: MustIDFromString("8c68a1f06fc59c655b7e3905b159d761e91c53c9"),
Object: MustIDFromString("3325fd8a973321fd59455492976c042dde3fd1ca"),
Type: "tag",
Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
Message: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md",
Name: "v0.0.1",
ID: MustIDFromString("8c68a1f06fc59c655b7e3905b159d761e91c53c9"),
Object: MustIDFromString("3325fd8a973321fd59455492976c042dde3fd1ca"),
Type: "tag",
Tagger: parseSignatureFromCommitLine("Foo Bar <foo@bar.com> 1565789218 +0300"),
CommitMessage: CommitMessage{MessageRaw: "Add changelog of v1.9.1 (#7859)\n\n* add changelog of v1.9.1\n* Update CHANGELOG.md\n"},
Signature: &CommitSignature{
Signature: `-----BEGIN PGP SIGNATURE-----

View File

@@ -12,12 +12,13 @@ import (
// Tag represents a Git tag.
type Tag struct {
CommitMessage
Name string
ID ObjectID
Object ObjectID // The id of this commit object
Type string
Tagger *Signature
Message string
Signature *CommitSignature
}
@@ -87,7 +88,7 @@ func parseTagData(objectFormat ObjectFormat, data []byte) (*Tag, error) {
pos += eol + 1
}
payload, msg, sign := parsePayloadSignature(data, pos)
tag.Message = msg
tag.MessageRaw = msg
if len(sign) > 0 {
tag.Signature = &CommitSignature{Signature: sign, Payload: payload}
}

View File

@@ -28,7 +28,6 @@ tagger Lucas Michot <lucas@semalead.com> 1484491741 +0100
Object: MustIDFromString("3b114ab800c6432ad42387ccf6bc8d4388a2885a"),
Type: "commit",
Tagger: &Signature{Name: "Lucas Michot", Email: "lucas@semalead.com", When: time.Unix(1484491741, 0).In(time.FixedZone("", 3600))},
Message: "",
Signature: nil,
},
},
@@ -43,13 +42,13 @@ o
ono`,
expected: Tag{
Name: "",
ID: Sha1ObjectFormat.EmptyObjectID(),
Object: MustIDFromString("7cdf42c0b1cc763ab7e4c33c47a24e27c66bfccc"),
Type: "commit",
Tagger: &Signature{Name: "Lucas Michot", Email: "lucas@semalead.com", When: time.Unix(1484553735, 0).In(time.FixedZone("", 3600))},
Message: "test message\no\n\nono",
Signature: nil,
Name: "",
ID: Sha1ObjectFormat.EmptyObjectID(),
Object: MustIDFromString("7cdf42c0b1cc763ab7e4c33c47a24e27c66bfccc"),
Type: "commit",
Tagger: &Signature{Name: "Lucas Michot", Email: "lucas@semalead.com", When: time.Unix(1484553735, 0).In(time.FixedZone("", 3600))},
CommitMessage: CommitMessage{MessageRaw: "test message\no\n\nono"},
Signature: nil,
},
},
{
@@ -64,12 +63,12 @@ dummy signature
-----END SSH SIGNATURE-----
`,
expected: Tag{
Name: "",
ID: Sha1ObjectFormat.EmptyObjectID(),
Object: MustIDFromString("7cdf42c0b1cc763ab7e4c33c47a24e27c66bfaaa"),
Type: "commit",
Tagger: &Signature{Name: "dummy user", Email: "dummy-email@example.com", When: time.Unix(1484491741, 0).In(time.FixedZone("", 3600))},
Message: "dummy message",
Name: "",
ID: Sha1ObjectFormat.EmptyObjectID(),
Object: MustIDFromString("7cdf42c0b1cc763ab7e4c33c47a24e27c66bfaaa"),
Type: "commit",
Tagger: &Signature{Name: "dummy user", Email: "dummy-email@example.com", When: time.Unix(1484491741, 0).In(time.FixedZone("", 3600))},
CommitMessage: CommitMessage{MessageRaw: "dummy message"},
Signature: &CommitSignature{
Signature: `-----BEGIN SSH SIGNATURE-----
dummy signature
@@ -93,5 +92,5 @@ dummy message`,
tag, err := parseTagData(Sha1ObjectFormat, []byte("type commit\n\nfoo\n-----BEGIN SSH SIGNATURE-----\ncorrupted..."))
assert.NoError(t, err)
assert.Equal(t, "foo\n-----BEGIN SSH SIGNATURE-----\ncorrupted...", tag.Message)
assert.Equal(t, "foo\n-----BEGIN SSH SIGNATURE-----\ncorrupted...", tag.CommitMessage.MessageRaw)
}