mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-31 21:28:11 +09:00 
			
		
		
		
	Refactor markup rendering to accept general "protocol:" prefix (#29276)
Follow #29024 Major changes: * refactor validLinksPattern to fullURLPattern and add comments, now it accepts "protocol:" prefix * rename `IsLink*` to `IsFullURL*`, and remove unnecessray "mailto:" check * fix some comments (by the way) * rename EmojiShortCodeRegex -> emojiShortCodeRegex (by the way)
This commit is contained in:
		| @@ -53,38 +53,38 @@ var ( | |||||||
| 	// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax | 	// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax | ||||||
| 	shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) | 	shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) | ||||||
|  |  | ||||||
| 	// anySHA1Pattern splits url containing SHA into parts | 	// anyHashPattern splits url containing SHA into parts | ||||||
| 	anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`) | 	anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`) | ||||||
|  |  | ||||||
| 	// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash" | 	// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash" | ||||||
| 	comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`) | 	comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`) | ||||||
|  |  | ||||||
| 	validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`) | 	// fullURLPattern matches full URL like "mailto:...", "https://..." and "ssh+git://..." | ||||||
|  | 	fullURLPattern = regexp.MustCompile(`^[a-z][-+\w]+:`) | ||||||
|  |  | ||||||
| 	// While this email regex is definitely not perfect and I'm sure you can come up | 	// emailRegex is definitely not perfect with edge cases, | ||||||
| 	// with edge cases, it is still accepted by the CommonMark specification, as | 	// it is still accepted by the CommonMark specification, as well as the HTML5 spec: | ||||||
| 	// well as the HTML5 spec: |  | ||||||
| 	//   http://spec.commonmark.org/0.28/#email-address | 	//   http://spec.commonmark.org/0.28/#email-address | ||||||
| 	//   https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) | 	//   https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) | ||||||
| 	emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))") | 	emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))") | ||||||
|  |  | ||||||
| 	// blackfriday extensions create IDs like fn:user-content-footnote | 	// blackfridayExtRegex is for blackfriday extensions create IDs like fn:user-content-footnote | ||||||
| 	blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`) | 	blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`) | ||||||
|  |  | ||||||
| 	// EmojiShortCodeRegex find emoji by alias like :smile: | 	// emojiShortCodeRegex find emoji by alias like :smile: | ||||||
| 	EmojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`) | 	emojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`) | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // CSS class for action keywords (e.g. "closes: #1") | // CSS class for action keywords (e.g. "closes: #1") | ||||||
| const keywordClass = "issue-keyword" | const keywordClass = "issue-keyword" | ||||||
|  |  | ||||||
| // IsLink reports whether link fits valid format. | // IsFullURLBytes reports whether link fits valid format. | ||||||
| func IsLink(link []byte) bool { | func IsFullURLBytes(link []byte) bool { | ||||||
| 	return validLinksPattern.Match(link) | 	return fullURLPattern.Match(link) | ||||||
| } | } | ||||||
|  |  | ||||||
| func IsLinkStr(link string) bool { | func IsFullURLString(link string) bool { | ||||||
| 	return validLinksPattern.MatchString(link) | 	return fullURLPattern.MatchString(link) | ||||||
| } | } | ||||||
|  |  | ||||||
| // regexp for full links to issues/pulls | // regexp for full links to issues/pulls | ||||||
| @@ -399,7 +399,7 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) { | |||||||
| 				if attr.Key != "src" { | 				if attr.Key != "src" { | ||||||
| 					continue | 					continue | ||||||
| 				} | 				} | ||||||
| 				if len(attr.Val) > 0 && !IsLinkStr(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") { | 				if len(attr.Val) > 0 && !IsFullURLString(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") { | ||||||
| 					attr.Val = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), attr.Val) | 					attr.Val = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), attr.Val) | ||||||
| 				} | 				} | ||||||
| 				attr.Val = camoHandleLink(attr.Val) | 				attr.Val = camoHandleLink(attr.Val) | ||||||
| @@ -650,7 +650,7 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) { | |||||||
| 			if equalPos := strings.IndexByte(v, '='); equalPos == -1 { | 			if equalPos := strings.IndexByte(v, '='); equalPos == -1 { | ||||||
| 				// There is no equal in this argument; this is a mandatory arg | 				// There is no equal in this argument; this is a mandatory arg | ||||||
| 				if props["name"] == "" { | 				if props["name"] == "" { | ||||||
| 					if IsLinkStr(v) { | 					if IsFullURLString(v) { | ||||||
| 						// If we clearly see it is a link, we save it so | 						// If we clearly see it is a link, we save it so | ||||||
|  |  | ||||||
| 						// But first we need to ensure, that if both mandatory args provided | 						// But first we need to ensure, that if both mandatory args provided | ||||||
| @@ -725,7 +725,7 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) { | |||||||
| 			DataAtom:   atom.A, | 			DataAtom:   atom.A, | ||||||
| 		} | 		} | ||||||
| 		childNode.Parent = linkNode | 		childNode.Parent = linkNode | ||||||
| 		absoluteLink := IsLinkStr(link) | 		absoluteLink := IsFullURLString(link) | ||||||
| 		if !absoluteLink { | 		if !absoluteLink { | ||||||
| 			if image { | 			if image { | ||||||
| 				link = strings.ReplaceAll(link, " ", "+") | 				link = strings.ReplaceAll(link, " ", "+") | ||||||
| @@ -1059,7 +1059,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) { | |||||||
| 	start := 0 | 	start := 0 | ||||||
| 	next := node.NextSibling | 	next := node.NextSibling | ||||||
| 	for node != nil && node != next && start < len(node.Data) { | 	for node != nil && node != next && start < len(node.Data) { | ||||||
| 		m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:]) | 		m := emojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:]) | ||||||
| 		if m == nil { | 		if m == nil { | ||||||
| 			return | 			return | ||||||
| 		} | 		} | ||||||
|   | |||||||
| @@ -204,6 +204,15 @@ func TestRender_links(t *testing.T) { | |||||||
| 	test( | 	test( | ||||||
| 		"magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download", | 		"magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download", | ||||||
| 		`<p><a href="magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download" rel="nofollow">magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download</a></p>`) | 		`<p><a href="magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download" rel="nofollow">magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download</a></p>`) | ||||||
|  | 	test( | ||||||
|  | 		`[link](https://example.com)`, | ||||||
|  | 		`<p><a href="https://example.com" rel="nofollow">link</a></p>`) | ||||||
|  | 	test( | ||||||
|  | 		`[link](mailto:test@example.com)`, | ||||||
|  | 		`<p><a href="mailto:test@example.com" rel="nofollow">link</a></p>`) | ||||||
|  | 	test( | ||||||
|  | 		`[link](javascript:xss)`, | ||||||
|  | 		`<p>link</p>`) | ||||||
|  |  | ||||||
| 	// Test that should *not* be turned into URL | 	// Test that should *not* be turned into URL | ||||||
| 	test( | 	test( | ||||||
| @@ -673,3 +682,9 @@ func TestIssue18471(t *testing.T) { | |||||||
| 	assert.NoError(t, err) | 	assert.NoError(t, err) | ||||||
| 	assert.Equal(t, "<a href=\"http://domain/org/repo/compare/783b039...da951ce\" class=\"compare\"><code class=\"nohighlight\">783b039...da951ce</code></a>", res.String()) | 	assert.Equal(t, "<a href=\"http://domain/org/repo/compare/783b039...da951ce\" class=\"compare\"><code class=\"nohighlight\">783b039...da951ce</code></a>", res.String()) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func TestIsFullURL(t *testing.T) { | ||||||
|  | 	assert.True(t, markup.IsFullURLString("https://example.com")) | ||||||
|  | 	assert.True(t, markup.IsFullURLString("mailto:test@example.com")) | ||||||
|  | 	assert.False(t, markup.IsFullURLString("/foo:bar")) | ||||||
|  | } | ||||||
|   | |||||||
| @@ -26,8 +26,6 @@ import ( | |||||||
| 	"github.com/yuin/goldmark/util" | 	"github.com/yuin/goldmark/util" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| var byteMailto = []byte("mailto:") |  | ||||||
|  |  | ||||||
| // ASTTransformer is a default transformer of the goldmark tree. | // ASTTransformer is a default transformer of the goldmark tree. | ||||||
| type ASTTransformer struct{} | type ASTTransformer struct{} | ||||||
|  |  | ||||||
| @@ -84,7 +82,7 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa | |||||||
| 			// 2. If they're not wrapped with a link they need a link wrapper | 			// 2. If they're not wrapped with a link they need a link wrapper | ||||||
|  |  | ||||||
| 			// Check if the destination is a real link | 			// Check if the destination is a real link | ||||||
| 			if len(v.Destination) > 0 && !markup.IsLink(v.Destination) { | 			if len(v.Destination) > 0 && !markup.IsFullURLBytes(v.Destination) { | ||||||
| 				v.Destination = []byte(giteautil.URLJoin( | 				v.Destination = []byte(giteautil.URLJoin( | ||||||
| 					ctx.Links.ResolveMediaLink(ctx.IsWiki), | 					ctx.Links.ResolveMediaLink(ctx.IsWiki), | ||||||
| 					strings.TrimLeft(string(v.Destination), "/"), | 					strings.TrimLeft(string(v.Destination), "/"), | ||||||
| @@ -130,23 +128,17 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa | |||||||
| 		case *ast.Link: | 		case *ast.Link: | ||||||
| 			// Links need their href to munged to be a real value | 			// Links need their href to munged to be a real value | ||||||
| 			link := v.Destination | 			link := v.Destination | ||||||
| 			if len(link) > 0 && !markup.IsLink(link) && | 			isAnchorFragment := len(link) > 0 && link[0] == '#' | ||||||
| 				link[0] != '#' && !bytes.HasPrefix(link, byteMailto) { | 			if !isAnchorFragment && !markup.IsFullURLBytes(link) { | ||||||
| 				// special case: this is not a link, a hash link or a mailto:, so it's a | 				base := ctx.Links.Base | ||||||
| 				// relative URL |  | ||||||
|  |  | ||||||
| 				var base string |  | ||||||
| 				if ctx.IsWiki { | 				if ctx.IsWiki { | ||||||
| 					base = ctx.Links.WikiLink() | 					base = ctx.Links.WikiLink() | ||||||
| 				} else if ctx.Links.HasBranchInfo() { | 				} else if ctx.Links.HasBranchInfo() { | ||||||
| 					base = ctx.Links.SrcLink() | 					base = ctx.Links.SrcLink() | ||||||
| 				} else { |  | ||||||
| 					base = ctx.Links.Base |  | ||||||
| 				} | 				} | ||||||
|  |  | ||||||
| 				link = []byte(giteautil.URLJoin(base, string(link))) | 				link = []byte(giteautil.URLJoin(base, string(link))) | ||||||
| 			} | 			} | ||||||
| 			if len(link) > 0 && link[0] == '#' { | 			if isAnchorFragment { | ||||||
| 				link = []byte("#user-content-" + string(link)[1:]) | 				link = []byte("#user-content-" + string(link)[1:]) | ||||||
| 			} | 			} | ||||||
| 			v.Destination = link | 			v.Destination = link | ||||||
|   | |||||||
| @@ -136,8 +136,7 @@ type Writer struct { | |||||||
| func (r *Writer) resolveLink(kind, link string) string { | func (r *Writer) resolveLink(kind, link string) string { | ||||||
| 	link = strings.TrimPrefix(link, "file:") | 	link = strings.TrimPrefix(link, "file:") | ||||||
| 	if !strings.HasPrefix(link, "#") && // not a URL fragment | 	if !strings.HasPrefix(link, "#") && // not a URL fragment | ||||||
| 		!markup.IsLinkStr(link) && // not an absolute URL | 		!markup.IsFullURLString(link) { | ||||||
| 		!strings.HasPrefix(link, "mailto:") { |  | ||||||
| 		if kind == "regular" { | 		if kind == "regular" { | ||||||
| 			// orgmode reports the link kind as "regular" for "[[ImageLink.svg][The Image Desc]]" | 			// orgmode reports the link kind as "regular" for "[[ImageLink.svg][The Image Desc]]" | ||||||
| 			// so we need to try to guess the link kind again here | 			// so we need to try to guess the link kind again here | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user