Refactor markup rendering to accept general "protocol:" prefix (#29276)

Follow #29024 Major changes: * refactor validLinksPattern to fullURLPattern and add comments, now it accepts "protocol:" prefix * rename `IsLink*` to `IsFullURL*`, and remove unnecessray "mailto:" check * fix some comments (by the way) * rename EmojiShortCodeRegex -> emojiShortCodeRegex (by the way)
2025-12-19 15:37:49 +09:00 · 2024-02-21 18:08:08 +08:00
parent 4e536edaea
commit 6130522aa8
4 changed files with 38 additions and 32 deletions
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@@ -53,38 +53,38 @@ var (
 	// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
 	shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
-	// anySHA1Pattern splits url containing SHA into parts
+	// anyHashPattern splits url containing SHA into parts
 	anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
 	// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
 	comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)
-	validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
+	// fullURLPattern matches full URL like "mailto:...", "https://..." and "ssh+git://..."
 	fullURLPattern = regexp.MustCompile(`^[a-z][-+\w]+:`)
-	// While this email regex is definitely not perfect and I'm sure you can come up
+	// emailRegex is definitely not perfect with edge cases,
-	// with edge cases, it is still accepted by the CommonMark specification, as
+	// it is still accepted by the CommonMark specification, as well as the HTML5 spec:
 	// well as the HTML5 spec:
 	//   http://spec.commonmark.org/0.28/#email-address
 	//   https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
 	emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))")
-	// blackfriday extensions create IDs like fn:user-content-footnote
+	// blackfridayExtRegex is for blackfriday extensions create IDs like fn:user-content-footnote
 	blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
-	// EmojiShortCodeRegex find emoji by alias like :smile:
+	// emojiShortCodeRegex find emoji by alias like :smile:
-	EmojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
+	emojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
 )
 // CSS class for action keywords (e.g. "closes: #1")
 const keywordClass = "issue-keyword"
-// IsLink reports whether link fits valid format.
+// IsFullURLBytes reports whether link fits valid format.
-func IsLink(link []byte) bool {
+func IsFullURLBytes(link []byte) bool {
-	return validLinksPattern.Match(link)
+	return fullURLPattern.Match(link)
 }
-func IsLinkStr(link string) bool {
+func IsFullURLString(link string) bool {
-	return validLinksPattern.MatchString(link)
+	return fullURLPattern.MatchString(link)
 }
 // regexp for full links to issues/pulls
@@ -399,7 +399,7 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) {
 				if attr.Key != "src" {
 					continue
 				}
-				if len(attr.Val) > 0 && !IsLinkStr(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") {
+				if len(attr.Val) > 0 && !IsFullURLString(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") {
 					attr.Val = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), attr.Val)
 				}
 				attr.Val = camoHandleLink(attr.Val)
@@ -650,7 +650,7 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
 			if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
 				// There is no equal in this argument; this is a mandatory arg
 				if props["name"] == "" {
-					if IsLinkStr(v) {
+					if IsFullURLString(v) {
 						// If we clearly see it is a link, we save it so
 						// But first we need to ensure, that if both mandatory args provided
@@ -725,7 +725,7 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
 			DataAtom:   atom.A,
 		}
 		childNode.Parent = linkNode
-		absoluteLink := IsLinkStr(link)
+		absoluteLink := IsFullURLString(link)
 		if !absoluteLink {
 			if image {
 				link = strings.ReplaceAll(link, " ", "+")
@@ -1059,7 +1059,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
 	start := 0
 	next := node.NextSibling
 	for node != nil && node != next && start < len(node.Data) {
-		m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
+		m := emojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
 		if m == nil {
 			return
 		}
--- a/modules/markup/html_test.go
+++ b/modules/markup/html_test.go
@@ -204,6 +204,15 @@ func TestRender_links(t *testing.T) {
 	test(
 		"magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download",
 		`<p><a href="magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&amp;dn=download" rel="nofollow">magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&amp;dn=download</a></p>`)
 	test(
 		`[link](https://example.com)`,
 		`<p><a href="https://example.com" rel="nofollow">link</a></p>`)
 	test(
 		`[link](mailto:test@example.com)`,
 		`<p><a href="mailto:test@example.com" rel="nofollow">link</a></p>`)
 	test(
 		`[link](javascript:xss)`,
 		`<p>link</p>`)
 	// Test that should *not* be turned into URL
 	test(
@@ -673,3 +682,9 @@ func TestIssue18471(t *testing.T) {
 	assert.NoError(t, err)
 	assert.Equal(t, "<a href=\"http://domain/org/repo/compare/783b039...da951ce\" class=\"compare\"><code class=\"nohighlight\">783b039...da951ce</code></a>", res.String())
 }
 func TestIsFullURL(t *testing.T) {
 	assert.True(t, markup.IsFullURLString("https://example.com"))
 	assert.True(t, markup.IsFullURLString("mailto:test@example.com"))
 	assert.False(t, markup.IsFullURLString("/foo:bar"))
 }
--- a/modules/markup/markdown/goldmark.go
+++ b/modules/markup/markdown/goldmark.go
@@ -26,8 +26,6 @@ import (
 	"github.com/yuin/goldmark/util"
 )
 var byteMailto = []byte("mailto:")
 // ASTTransformer is a default transformer of the goldmark tree.
 type ASTTransformer struct{}
@@ -84,7 +82,7 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
 			// 2. If they're not wrapped with a link they need a link wrapper
 			// Check if the destination is a real link
-			if len(v.Destination) > 0 && !markup.IsLink(v.Destination) {
+			if len(v.Destination) > 0 && !markup.IsFullURLBytes(v.Destination) {
 				v.Destination = []byte(giteautil.URLJoin(
 					ctx.Links.ResolveMediaLink(ctx.IsWiki),
 					strings.TrimLeft(string(v.Destination), "/"),
@@ -130,23 +128,17 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
 		case *ast.Link:
 			// Links need their href to munged to be a real value
 			link := v.Destination
-			if len(link) > 0 && !markup.IsLink(link) &&
+			isAnchorFragment := len(link) > 0 && link[0] == '#'
-				link[0] != '#' && !bytes.HasPrefix(link, byteMailto) {
+			if !isAnchorFragment && !markup.IsFullURLBytes(link) {
-				// special case: this is not a link, a hash link or a mailto:, so it's a
+				base := ctx.Links.Base
 				// relative URL
 				var base string
 				if ctx.IsWiki {
 					base = ctx.Links.WikiLink()
 				} else if ctx.Links.HasBranchInfo() {
 					base = ctx.Links.SrcLink()
 				} else {
 					base = ctx.Links.Base
 				}
 				link = []byte(giteautil.URLJoin(base, string(link)))
 			}
-			if len(link) > 0 && link[0] == '#' {
+			if isAnchorFragment {
 				link = []byte("#user-content-" + string(link)[1:])
 			}
 			v.Destination = link
--- a/modules/markup/orgmode/orgmode.go
+++ b/modules/markup/orgmode/orgmode.go
@@ -136,8 +136,7 @@ type Writer struct {
 func (r *Writer) resolveLink(kind, link string) string {
 	link = strings.TrimPrefix(link, "file:")
 	if !strings.HasPrefix(link, "#") && // not a URL fragment
-		!markup.IsLinkStr(link) && // not an absolute URL
+		!markup.IsFullURLString(link) {
 		!strings.HasPrefix(link, "mailto:") {
 		if kind == "regular" {
 			// orgmode reports the link kind as "regular" for "[[ImageLink.svg][The Image Desc]]"
 			// so we need to try to guess the link kind again here