mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-27 00:23:41 +09:00 
			
		
		
		
	Restructure markup & markdown to prepare for multiple markup language… (#2411)
* restructure markup & markdown to prepare for multiple markup languages support * adjust some functions between markdown and markup * fix tests * improve the comments
This commit is contained in:
		
							
								
								
									
										517
									
								
								modules/markup/html.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										517
									
								
								modules/markup/html.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,517 @@ | ||||
| // Copyright 2017 The Gitea Authors. All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package markup | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"net/url" | ||||
| 	"path" | ||||
| 	"path/filepath" | ||||
| 	"regexp" | ||||
| 	"strings" | ||||
|  | ||||
| 	"code.gitea.io/gitea/modules/base" | ||||
| 	"code.gitea.io/gitea/modules/log" | ||||
| 	"code.gitea.io/gitea/modules/setting" | ||||
|  | ||||
| 	"github.com/Unknwon/com" | ||||
| 	"golang.org/x/net/html" | ||||
| ) | ||||
|  | ||||
| // Issue name styles | ||||
| const ( | ||||
| 	IssueNameStyleNumeric      = "numeric" | ||||
| 	IssueNameStyleAlphanumeric = "alphanumeric" | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	// NOTE: All below regex matching do not perform any extra validation. | ||||
| 	// Thus a link is produced even if the linked entity does not exist. | ||||
| 	// While fast, this is also incorrect and lead to false positives. | ||||
| 	// TODO: fix invalid linking issue | ||||
|  | ||||
| 	// MentionPattern matches string that mentions someone, e.g. @Unknwon | ||||
| 	MentionPattern = regexp.MustCompile(`(\s|^|\W)@[0-9a-zA-Z-_\.]+`) | ||||
|  | ||||
| 	// IssueNumericPattern matches string that references to a numeric issue, e.g. #1287 | ||||
| 	IssueNumericPattern = regexp.MustCompile(`( |^|\()#[0-9]+\b`) | ||||
| 	// IssueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234 | ||||
| 	IssueAlphanumericPattern = regexp.MustCompile(`( |^|\()[A-Z]{1,10}-[1-9][0-9]*\b`) | ||||
| 	// CrossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository | ||||
| 	// e.g. gogits/gogs#12345 | ||||
| 	CrossReferenceIssueNumericPattern = regexp.MustCompile(`( |^)[0-9a-zA-Z]+/[0-9a-zA-Z]+#[0-9]+\b`) | ||||
|  | ||||
| 	// Sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae | ||||
| 	// Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length | ||||
| 	// so that abbreviated hash links can be used as well. This matches git and github useability. | ||||
| 	Sha1CurrentPattern = regexp.MustCompile(`(?:^|\s|\()([0-9a-f]{7,40})\b`) | ||||
|  | ||||
| 	// ShortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax | ||||
| 	ShortLinkPattern = regexp.MustCompile(`(\[\[.*?\]\]\w*)`) | ||||
|  | ||||
| 	// AnySHA1Pattern allows to split url containing SHA into parts | ||||
| 	AnySHA1Pattern = regexp.MustCompile(`(http\S*)://(\S+)/(\S+)/(\S+)/(\S+)/([0-9a-f]{40})(?:/?([^#\s]+)?(?:#(\S+))?)?`) | ||||
|  | ||||
| 	validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`) | ||||
| ) | ||||
|  | ||||
| // regexp for full links to issues/pulls | ||||
| var issueFullPattern *regexp.Regexp | ||||
|  | ||||
| // IsLink reports whether link fits valid format. | ||||
| func IsLink(link []byte) bool { | ||||
| 	return isLink(link) | ||||
| } | ||||
|  | ||||
| // isLink reports whether link fits valid format. | ||||
| func isLink(link []byte) bool { | ||||
| 	return validLinksPattern.Match(link) | ||||
| } | ||||
|  | ||||
| func getIssueFullPattern() *regexp.Regexp { | ||||
| 	if issueFullPattern == nil { | ||||
| 		appURL := setting.AppURL | ||||
| 		if len(appURL) > 0 && appURL[len(appURL)-1] != '/' { | ||||
| 			appURL += "/" | ||||
| 		} | ||||
| 		issueFullPattern = regexp.MustCompile(appURL + | ||||
| 			`\w+/\w+/(?:issues|pulls)/((?:\w{1,10}-)?[1-9][0-9]*)([\?|#]\S+.(\S+)?)?\b`) | ||||
| 	} | ||||
| 	return issueFullPattern | ||||
| } | ||||
|  | ||||
| // FindAllMentions matches mention patterns in given content | ||||
| // and returns a list of found user names without @ prefix. | ||||
| func FindAllMentions(content string) []string { | ||||
| 	mentions := MentionPattern.FindAllString(content, -1) | ||||
| 	for i := range mentions { | ||||
| 		mentions[i] = mentions[i][strings.Index(mentions[i], "@")+1:] // Strip @ character | ||||
| 	} | ||||
| 	return mentions | ||||
| } | ||||
|  | ||||
| // cutoutVerbosePrefix cutouts URL prefix including sub-path to | ||||
| // return a clean unified string of request URL path. | ||||
| func cutoutVerbosePrefix(prefix string) string { | ||||
| 	if len(prefix) == 0 || prefix[0] != '/' { | ||||
| 		return prefix | ||||
| 	} | ||||
| 	count := 0 | ||||
| 	for i := 0; i < len(prefix); i++ { | ||||
| 		if prefix[i] == '/' { | ||||
| 			count++ | ||||
| 		} | ||||
| 		if count >= 3+setting.AppSubURLDepth { | ||||
| 			return prefix[:i] | ||||
| 		} | ||||
| 	} | ||||
| 	return prefix | ||||
| } | ||||
|  | ||||
| // URLJoin joins url components, like path.Join, but preserving contents | ||||
| func URLJoin(base string, elems ...string) string { | ||||
| 	u, err := url.Parse(base) | ||||
| 	if err != nil { | ||||
| 		log.Error(4, "URLJoin: Invalid base URL %s", base) | ||||
| 		return "" | ||||
| 	} | ||||
| 	joinArgs := make([]string, 0, len(elems)+1) | ||||
| 	joinArgs = append(joinArgs, u.Path) | ||||
| 	joinArgs = append(joinArgs, elems...) | ||||
| 	u.Path = path.Join(joinArgs...) | ||||
| 	return u.String() | ||||
| } | ||||
|  | ||||
| // RenderIssueIndexPattern renders issue indexes to corresponding links. | ||||
| func RenderIssueIndexPattern(rawBytes []byte, urlPrefix string, metas map[string]string) []byte { | ||||
| 	urlPrefix = cutoutVerbosePrefix(urlPrefix) | ||||
|  | ||||
| 	pattern := IssueNumericPattern | ||||
| 	if metas["style"] == IssueNameStyleAlphanumeric { | ||||
| 		pattern = IssueAlphanumericPattern | ||||
| 	} | ||||
|  | ||||
| 	ms := pattern.FindAll(rawBytes, -1) | ||||
| 	for _, m := range ms { | ||||
| 		if m[0] == ' ' || m[0] == '(' { | ||||
| 			m = m[1:] // ignore leading space or opening parentheses | ||||
| 		} | ||||
| 		var link string | ||||
| 		if metas == nil { | ||||
| 			link = fmt.Sprintf(`<a href="%s">%s</a>`, URLJoin(urlPrefix, "issues", string(m[1:])), m) | ||||
| 		} else { | ||||
| 			// Support for external issue tracker | ||||
| 			if metas["style"] == IssueNameStyleAlphanumeric { | ||||
| 				metas["index"] = string(m) | ||||
| 			} else { | ||||
| 				metas["index"] = string(m[1:]) | ||||
| 			} | ||||
| 			link = fmt.Sprintf(`<a href="%s">%s</a>`, com.Expand(metas["format"], metas), m) | ||||
| 		} | ||||
| 		rawBytes = bytes.Replace(rawBytes, m, []byte(link), 1) | ||||
| 	} | ||||
| 	return rawBytes | ||||
| } | ||||
|  | ||||
| // IsSameDomain checks if given url string has the same hostname as current Gitea instance | ||||
| func IsSameDomain(s string) bool { | ||||
| 	if strings.HasPrefix(s, "/") { | ||||
| 		return true | ||||
| 	} | ||||
| 	if uapp, err := url.Parse(setting.AppURL); err == nil { | ||||
| 		if u, err := url.Parse(s); err == nil { | ||||
| 			return u.Host == uapp.Host | ||||
| 		} | ||||
| 		return false | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
|  | ||||
| // renderFullSha1Pattern renders SHA containing URLs | ||||
| func renderFullSha1Pattern(rawBytes []byte, urlPrefix string) []byte { | ||||
| 	ms := AnySHA1Pattern.FindAllSubmatch(rawBytes, -1) | ||||
| 	for _, m := range ms { | ||||
| 		all := m[0] | ||||
| 		protocol := string(m[1]) | ||||
| 		paths := string(m[2]) | ||||
| 		path := protocol + "://" + paths | ||||
| 		author := string(m[3]) | ||||
| 		repoName := string(m[4]) | ||||
| 		path = URLJoin(path, author, repoName) | ||||
| 		ltype := "src" | ||||
| 		itemType := m[5] | ||||
| 		if IsSameDomain(paths) { | ||||
| 			ltype = string(itemType) | ||||
| 		} else if string(itemType) == "commit" { | ||||
| 			ltype = "commit" | ||||
| 		} | ||||
| 		sha := m[6] | ||||
| 		var subtree string | ||||
| 		if len(m) > 7 && len(m[7]) > 0 { | ||||
| 			subtree = string(m[7]) | ||||
| 		} | ||||
| 		var line []byte | ||||
| 		if len(m) > 8 && len(m[8]) > 0 { | ||||
| 			line = m[8] | ||||
| 		} | ||||
| 		urlSuffix := "" | ||||
| 		text := base.ShortSha(string(sha)) | ||||
| 		if subtree != "" { | ||||
| 			urlSuffix = "/" + subtree | ||||
| 			text += urlSuffix | ||||
| 		} | ||||
| 		if line != nil { | ||||
| 			value := string(line) | ||||
| 			urlSuffix += "#" | ||||
| 			urlSuffix += value | ||||
| 			text += " (" | ||||
| 			text += value | ||||
| 			text += ")" | ||||
| 		} | ||||
| 		rawBytes = bytes.Replace(rawBytes, all, []byte(fmt.Sprintf( | ||||
| 			`<a href="%s">%s</a>`, URLJoin(path, ltype, string(sha))+urlSuffix, text)), -1) | ||||
| 	} | ||||
| 	return rawBytes | ||||
| } | ||||
|  | ||||
| // RenderFullIssuePattern renders issues-like URLs | ||||
| func RenderFullIssuePattern(rawBytes []byte) []byte { | ||||
| 	ms := getIssueFullPattern().FindAllSubmatch(rawBytes, -1) | ||||
| 	for _, m := range ms { | ||||
| 		all := m[0] | ||||
| 		id := string(m[1]) | ||||
| 		text := "#" + id | ||||
| 		// TODO if m[2] is not nil, then link is to a comment, | ||||
| 		// and we should indicate that in the text somehow | ||||
| 		rawBytes = bytes.Replace(rawBytes, all, []byte(fmt.Sprintf( | ||||
| 			`<a href="%s">%s</a>`, string(all), text)), -1) | ||||
| 	} | ||||
| 	return rawBytes | ||||
| } | ||||
|  | ||||
| func firstIndexOfByte(sl []byte, target byte) int { | ||||
| 	for i := 0; i < len(sl); i++ { | ||||
| 		if sl[i] == target { | ||||
| 			return i | ||||
| 		} | ||||
| 	} | ||||
| 	return -1 | ||||
| } | ||||
|  | ||||
| func lastIndexOfByte(sl []byte, target byte) int { | ||||
| 	for i := len(sl) - 1; i >= 0; i-- { | ||||
| 		if sl[i] == target { | ||||
| 			return i | ||||
| 		} | ||||
| 	} | ||||
| 	return -1 | ||||
| } | ||||
|  | ||||
| // RenderShortLinks processes [[syntax]] | ||||
| // | ||||
| // noLink flag disables making link tags when set to true | ||||
| // so this function just replaces the whole [[...]] with the content text | ||||
| // | ||||
| // isWikiMarkdown is a flag to choose linking url prefix | ||||
| func RenderShortLinks(rawBytes []byte, urlPrefix string, noLink bool, isWikiMarkdown bool) []byte { | ||||
| 	ms := ShortLinkPattern.FindAll(rawBytes, -1) | ||||
| 	for _, m := range ms { | ||||
| 		orig := bytes.TrimSpace(m) | ||||
| 		m = orig[2:] | ||||
| 		tailPos := lastIndexOfByte(m, ']') + 1 | ||||
| 		tail := []byte{} | ||||
| 		if tailPos < len(m) { | ||||
| 			tail = m[tailPos:] | ||||
| 			m = m[:tailPos-1] | ||||
| 		} | ||||
| 		m = m[:len(m)-2] | ||||
| 		props := map[string]string{} | ||||
|  | ||||
| 		// MediaWiki uses [[link|text]], while GitHub uses [[text|link]] | ||||
| 		// It makes page handling terrible, but we prefer GitHub syntax | ||||
| 		// And fall back to MediaWiki only when it is obvious from the look | ||||
| 		// Of text and link contents | ||||
| 		sl := bytes.Split(m, []byte("|")) | ||||
| 		for _, v := range sl { | ||||
| 			switch bytes.Count(v, []byte("=")) { | ||||
|  | ||||
| 			// Piped args without = sign, these are mandatory arguments | ||||
| 			case 0: | ||||
| 				{ | ||||
| 					sv := string(v) | ||||
| 					if props["name"] == "" { | ||||
| 						if isLink(v) { | ||||
| 							// If we clearly see it is a link, we save it so | ||||
|  | ||||
| 							// But first we need to ensure, that if both mandatory args provided | ||||
| 							// look like links, we stick to GitHub syntax | ||||
| 							if props["link"] != "" { | ||||
| 								props["name"] = props["link"] | ||||
| 							} | ||||
|  | ||||
| 							props["link"] = strings.TrimSpace(sv) | ||||
| 						} else { | ||||
| 							props["name"] = sv | ||||
| 						} | ||||
| 					} else { | ||||
| 						props["link"] = strings.TrimSpace(sv) | ||||
| 					} | ||||
| 				} | ||||
|  | ||||
| 			// Piped args with = sign, these are optional arguments | ||||
| 			case 1: | ||||
| 				{ | ||||
| 					sep := firstIndexOfByte(v, '=') | ||||
| 					key, val := string(v[:sep]), html.UnescapeString(string(v[sep+1:])) | ||||
| 					lastCharIndex := len(val) - 1 | ||||
| 					if (val[0] == '"' || val[0] == '\'') && (val[lastCharIndex] == '"' || val[lastCharIndex] == '\'') { | ||||
| 						val = val[1:lastCharIndex] | ||||
| 					} | ||||
| 					props[key] = val | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		var name string | ||||
| 		var link string | ||||
| 		if props["link"] != "" { | ||||
| 			link = props["link"] | ||||
| 		} else if props["name"] != "" { | ||||
| 			link = props["name"] | ||||
| 		} | ||||
| 		if props["title"] != "" { | ||||
| 			name = props["title"] | ||||
| 		} else if props["name"] != "" { | ||||
| 			name = props["name"] | ||||
| 		} else { | ||||
| 			name = link | ||||
| 		} | ||||
|  | ||||
| 		name += string(tail) | ||||
| 		image := false | ||||
| 		ext := filepath.Ext(string(link)) | ||||
| 		if ext != "" { | ||||
| 			switch ext { | ||||
| 			case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg": | ||||
| 				{ | ||||
| 					image = true | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		absoluteLink := isLink([]byte(link)) | ||||
| 		if !absoluteLink { | ||||
| 			link = strings.Replace(link, " ", "+", -1) | ||||
| 		} | ||||
| 		if image { | ||||
| 			if !absoluteLink { | ||||
| 				if IsSameDomain(urlPrefix) { | ||||
| 					urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1) | ||||
| 				} | ||||
| 				if isWikiMarkdown { | ||||
| 					link = URLJoin("wiki", "raw", link) | ||||
| 				} | ||||
| 				link = URLJoin(urlPrefix, link) | ||||
| 			} | ||||
| 			title := props["title"] | ||||
| 			if title == "" { | ||||
| 				title = props["alt"] | ||||
| 			} | ||||
| 			if title == "" { | ||||
| 				title = path.Base(string(name)) | ||||
| 			} | ||||
| 			alt := props["alt"] | ||||
| 			if alt == "" { | ||||
| 				alt = name | ||||
| 			} | ||||
| 			if alt != "" { | ||||
| 				alt = `alt="` + alt + `"` | ||||
| 			} | ||||
| 			name = fmt.Sprintf(`<img src="%s" %s title="%s" />`, link, alt, title) | ||||
| 		} else if !absoluteLink { | ||||
| 			if isWikiMarkdown { | ||||
| 				link = URLJoin("wiki", link) | ||||
| 			} | ||||
| 			link = URLJoin(urlPrefix, link) | ||||
| 		} | ||||
| 		if noLink { | ||||
| 			rawBytes = bytes.Replace(rawBytes, orig, []byte(name), -1) | ||||
| 		} else { | ||||
| 			rawBytes = bytes.Replace(rawBytes, orig, | ||||
| 				[]byte(fmt.Sprintf(`<a href="%s">%s</a>`, link, name)), -1) | ||||
| 		} | ||||
| 	} | ||||
| 	return rawBytes | ||||
| } | ||||
|  | ||||
| // RenderCrossReferenceIssueIndexPattern renders issue indexes from other repositories to corresponding links. | ||||
| func RenderCrossReferenceIssueIndexPattern(rawBytes []byte, urlPrefix string, metas map[string]string) []byte { | ||||
| 	ms := CrossReferenceIssueNumericPattern.FindAll(rawBytes, -1) | ||||
| 	for _, m := range ms { | ||||
| 		if m[0] == ' ' || m[0] == '(' { | ||||
| 			m = m[1:] // ignore leading space or opening parentheses | ||||
| 		} | ||||
|  | ||||
| 		repo := string(bytes.Split(m, []byte("#"))[0]) | ||||
| 		issue := string(bytes.Split(m, []byte("#"))[1]) | ||||
|  | ||||
| 		link := fmt.Sprintf(`<a href="%s">%s</a>`, URLJoin(setting.AppURL, repo, "issues", issue), m) | ||||
| 		rawBytes = bytes.Replace(rawBytes, m, []byte(link), 1) | ||||
| 	} | ||||
| 	return rawBytes | ||||
| } | ||||
|  | ||||
| // renderSha1CurrentPattern renders SHA1 strings to corresponding links that assumes in the same repository. | ||||
| func renderSha1CurrentPattern(rawBytes []byte, urlPrefix string) []byte { | ||||
| 	ms := Sha1CurrentPattern.FindAllSubmatch(rawBytes, -1) | ||||
| 	for _, m := range ms { | ||||
| 		hash := m[1] | ||||
| 		// The regex does not lie, it matches the hash pattern. | ||||
| 		// However, a regex cannot know if a hash actually exists or not. | ||||
| 		// We could assume that a SHA1 hash should probably contain alphas AND numerics | ||||
| 		// but that is not always the case. | ||||
| 		// Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash | ||||
| 		// as used by git and github for linking and thus we have to do similar. | ||||
| 		rawBytes = bytes.Replace(rawBytes, hash, []byte(fmt.Sprintf( | ||||
| 			`<a href="%s">%s</a>`, URLJoin(urlPrefix, "commit", string(hash)), base.ShortSha(string(hash)))), -1) | ||||
| 	} | ||||
| 	return rawBytes | ||||
| } | ||||
|  | ||||
| // RenderSpecialLink renders mentions, indexes and SHA1 strings to corresponding links. | ||||
| func RenderSpecialLink(rawBytes []byte, urlPrefix string, metas map[string]string, isWikiMarkdown bool) []byte { | ||||
| 	ms := MentionPattern.FindAll(rawBytes, -1) | ||||
| 	for _, m := range ms { | ||||
| 		m = m[bytes.Index(m, []byte("@")):] | ||||
| 		rawBytes = bytes.Replace(rawBytes, m, | ||||
| 			[]byte(fmt.Sprintf(`<a href="%s">%s</a>`, URLJoin(setting.AppURL, string(m[1:])), m)), -1) | ||||
| 	} | ||||
|  | ||||
| 	rawBytes = RenderFullIssuePattern(rawBytes) | ||||
| 	rawBytes = RenderShortLinks(rawBytes, urlPrefix, false, isWikiMarkdown) | ||||
| 	rawBytes = RenderIssueIndexPattern(rawBytes, urlPrefix, metas) | ||||
| 	rawBytes = RenderCrossReferenceIssueIndexPattern(rawBytes, urlPrefix, metas) | ||||
| 	rawBytes = renderFullSha1Pattern(rawBytes, urlPrefix) | ||||
| 	rawBytes = renderSha1CurrentPattern(rawBytes, urlPrefix) | ||||
| 	return rawBytes | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	leftAngleBracket  = []byte("</") | ||||
| 	rightAngleBracket = []byte(">") | ||||
| ) | ||||
|  | ||||
| var noEndTags = []string{"img", "input", "br", "hr"} | ||||
|  | ||||
| // PostProcess treats different types of HTML differently, | ||||
| // and only renders special links for plain text blocks. | ||||
| func PostProcess(rawHTML []byte, urlPrefix string, metas map[string]string, isWikiMarkdown bool) []byte { | ||||
| 	startTags := make([]string, 0, 5) | ||||
| 	var buf bytes.Buffer | ||||
| 	tokenizer := html.NewTokenizer(bytes.NewReader(rawHTML)) | ||||
|  | ||||
| OUTER_LOOP: | ||||
| 	for html.ErrorToken != tokenizer.Next() { | ||||
| 		token := tokenizer.Token() | ||||
| 		switch token.Type { | ||||
| 		case html.TextToken: | ||||
| 			buf.Write(RenderSpecialLink([]byte(token.String()), urlPrefix, metas, isWikiMarkdown)) | ||||
|  | ||||
| 		case html.StartTagToken: | ||||
| 			buf.WriteString(token.String()) | ||||
| 			tagName := token.Data | ||||
| 			// If this is an excluded tag, we skip processing all output until a close tag is encountered. | ||||
| 			if strings.EqualFold("a", tagName) || strings.EqualFold("code", tagName) || strings.EqualFold("pre", tagName) { | ||||
| 				stackNum := 1 | ||||
| 				for html.ErrorToken != tokenizer.Next() { | ||||
| 					token = tokenizer.Token() | ||||
|  | ||||
| 					// Copy the token to the output verbatim | ||||
| 					buf.Write(RenderShortLinks([]byte(token.String()), urlPrefix, true, isWikiMarkdown)) | ||||
|  | ||||
| 					if token.Type == html.StartTagToken && !com.IsSliceContainsStr(noEndTags, token.Data) { | ||||
| 						stackNum++ | ||||
| 					} | ||||
|  | ||||
| 					// If this is the close tag to the outer-most, we are done | ||||
| 					if token.Type == html.EndTagToken { | ||||
| 						stackNum-- | ||||
|  | ||||
| 						if stackNum <= 0 && strings.EqualFold(tagName, token.Data) { | ||||
| 							break | ||||
| 						} | ||||
| 					} | ||||
| 				} | ||||
| 				continue OUTER_LOOP | ||||
| 			} | ||||
|  | ||||
| 			if !com.IsSliceContainsStr(noEndTags, tagName) { | ||||
| 				startTags = append(startTags, tagName) | ||||
| 			} | ||||
|  | ||||
| 		case html.EndTagToken: | ||||
| 			if len(startTags) == 0 { | ||||
| 				buf.WriteString(token.String()) | ||||
| 				break | ||||
| 			} | ||||
|  | ||||
| 			buf.Write(leftAngleBracket) | ||||
| 			buf.WriteString(startTags[len(startTags)-1]) | ||||
| 			buf.Write(rightAngleBracket) | ||||
| 			startTags = startTags[:len(startTags)-1] | ||||
| 		default: | ||||
| 			buf.WriteString(token.String()) | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if io.EOF == tokenizer.Err() { | ||||
| 		return buf.Bytes() | ||||
| 	} | ||||
|  | ||||
| 	// If we are not at the end of the input, then some other parsing error has occurred, | ||||
| 	// so return the input verbatim. | ||||
| 	return rawHTML | ||||
| } | ||||
							
								
								
									
										460
									
								
								modules/markup/html_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										460
									
								
								modules/markup/html_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,460 @@ | ||||
| // Copyright 2017 The Gitea Authors. All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package markup_test | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"testing" | ||||
|  | ||||
| 	_ "code.gitea.io/gitea/modules/markdown" | ||||
| 	. "code.gitea.io/gitea/modules/markup" | ||||
| 	"code.gitea.io/gitea/modules/setting" | ||||
|  | ||||
| 	"github.com/stretchr/testify/assert" | ||||
| ) | ||||
|  | ||||
| const AppURL = "http://localhost:3000/" | ||||
| const Repo = "gogits/gogs" | ||||
| const AppSubURL = AppURL + Repo + "/" | ||||
|  | ||||
| var numericMetas = map[string]string{ | ||||
| 	"format": "https://someurl.com/{user}/{repo}/{index}", | ||||
| 	"user":   "someUser", | ||||
| 	"repo":   "someRepo", | ||||
| 	"style":  IssueNameStyleNumeric, | ||||
| } | ||||
|  | ||||
| var alphanumericMetas = map[string]string{ | ||||
| 	"format": "https://someurl.com/{user}/{repo}/{index}", | ||||
| 	"user":   "someUser", | ||||
| 	"repo":   "someRepo", | ||||
| 	"style":  IssueNameStyleAlphanumeric, | ||||
| } | ||||
|  | ||||
| // numericLink an HTML to a numeric-style issue | ||||
| func numericIssueLink(baseURL string, index int) string { | ||||
| 	return link(URLJoin(baseURL, strconv.Itoa(index)), fmt.Sprintf("#%d", index)) | ||||
| } | ||||
|  | ||||
| // alphanumLink an HTML link to an alphanumeric-style issue | ||||
| func alphanumIssueLink(baseURL string, name string) string { | ||||
| 	return link(URLJoin(baseURL, name), name) | ||||
| } | ||||
|  | ||||
| // urlContentsLink an HTML link whose contents is the target URL | ||||
| func urlContentsLink(href string) string { | ||||
| 	return link(href, href) | ||||
| } | ||||
|  | ||||
| // link an HTML link | ||||
| func link(href, contents string) string { | ||||
| 	return fmt.Sprintf("<a href=\"%s\">%s</a>", href, contents) | ||||
| } | ||||
|  | ||||
| func testRenderIssueIndexPattern(t *testing.T, input, expected string, metas map[string]string) { | ||||
| 	assert.Equal(t, expected, | ||||
| 		string(RenderIssueIndexPattern([]byte(input), AppSubURL, metas))) | ||||
| } | ||||
|  | ||||
| func TestURLJoin(t *testing.T) { | ||||
| 	type test struct { | ||||
| 		Expected string | ||||
| 		Base     string | ||||
| 		Elements []string | ||||
| 	} | ||||
| 	newTest := func(expected, base string, elements ...string) test { | ||||
| 		return test{Expected: expected, Base: base, Elements: elements} | ||||
| 	} | ||||
| 	for _, test := range []test{ | ||||
| 		newTest("https://try.gitea.io/a/b/c", | ||||
| 			"https://try.gitea.io", "a/b", "c"), | ||||
| 		newTest("https://try.gitea.io/a/b/c", | ||||
| 			"https://try.gitea.io/", "/a/b/", "/c/"), | ||||
| 		newTest("https://try.gitea.io/a/c", | ||||
| 			"https://try.gitea.io/", "/a/./b/", "../c/"), | ||||
| 		newTest("a/b/c", | ||||
| 			"a", "b/c/"), | ||||
| 		newTest("a/b/d", | ||||
| 			"a/", "b/c/", "/../d/"), | ||||
| 	} { | ||||
| 		assert.Equal(t, test.Expected, URLJoin(test.Base, test.Elements...)) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestRender_IssueIndexPattern(t *testing.T) { | ||||
| 	// numeric: render inputs without valid mentions | ||||
| 	test := func(s string) { | ||||
| 		testRenderIssueIndexPattern(t, s, s, nil) | ||||
| 		testRenderIssueIndexPattern(t, s, s, numericMetas) | ||||
| 	} | ||||
|  | ||||
| 	// should not render anything when there are no mentions | ||||
| 	test("") | ||||
| 	test("this is a test") | ||||
| 	test("test 123 123 1234") | ||||
| 	test("#") | ||||
| 	test("# # #") | ||||
| 	test("# 123") | ||||
| 	test("#abcd") | ||||
| 	test("##1234") | ||||
| 	test("test#1234") | ||||
| 	test("#1234test") | ||||
| 	test(" test #1234test") | ||||
|  | ||||
| 	// should not render issue mention without leading space | ||||
| 	test("test#54321 issue") | ||||
|  | ||||
| 	// should not render issue mention without trailing space | ||||
| 	test("test #54321issue") | ||||
| } | ||||
|  | ||||
| func TestRender_IssueIndexPattern2(t *testing.T) { | ||||
| 	setting.AppURL = AppURL | ||||
| 	setting.AppSubURL = AppSubURL | ||||
|  | ||||
| 	// numeric: render inputs with valid mentions | ||||
| 	test := func(s, expectedFmt string, indices ...int) { | ||||
| 		links := make([]interface{}, len(indices)) | ||||
| 		for i, index := range indices { | ||||
| 			links[i] = numericIssueLink(URLJoin(setting.AppSubURL, "issues"), index) | ||||
| 		} | ||||
| 		expectedNil := fmt.Sprintf(expectedFmt, links...) | ||||
| 		testRenderIssueIndexPattern(t, s, expectedNil, nil) | ||||
|  | ||||
| 		for i, index := range indices { | ||||
| 			links[i] = numericIssueLink("https://someurl.com/someUser/someRepo/", index) | ||||
| 		} | ||||
| 		expectedNum := fmt.Sprintf(expectedFmt, links...) | ||||
| 		testRenderIssueIndexPattern(t, s, expectedNum, numericMetas) | ||||
| 	} | ||||
|  | ||||
| 	// should render freestanding mentions | ||||
| 	test("#1234 test", "%s test", 1234) | ||||
| 	test("test #8 issue", "test %s issue", 8) | ||||
| 	test("test issue #1234", "test issue %s", 1234) | ||||
|  | ||||
| 	// should render mentions in parentheses | ||||
| 	test("(#54321 issue)", "(%s issue)", 54321) | ||||
| 	test("test (#9801 extra) issue", "test (%s extra) issue", 9801) | ||||
| 	test("test (#1)", "test (%s)", 1) | ||||
|  | ||||
| 	// should render multiple issue mentions in the same line | ||||
| 	test("#54321 #1243", "%s %s", 54321, 1243) | ||||
| 	test("wow (#54321 #1243)", "wow (%s %s)", 54321, 1243) | ||||
| 	test("(#4)(#5)", "(%s)(%s)", 4, 5) | ||||
| 	test("#1 (#4321) test", "%s (%s) test", 1, 4321) | ||||
| } | ||||
|  | ||||
| func TestRender_IssueIndexPattern3(t *testing.T) { | ||||
| 	setting.AppURL = AppURL | ||||
| 	setting.AppSubURL = AppSubURL | ||||
|  | ||||
| 	// alphanumeric: render inputs without valid mentions | ||||
| 	test := func(s string) { | ||||
| 		testRenderIssueIndexPattern(t, s, s, alphanumericMetas) | ||||
| 	} | ||||
| 	test("") | ||||
| 	test("this is a test") | ||||
| 	test("test 123 123 1234") | ||||
| 	test("#") | ||||
| 	test("##1234") | ||||
| 	test("# 123") | ||||
| 	test("#abcd") | ||||
| 	test("test #123") | ||||
| 	test("abc-1234")         // issue prefix must be capital | ||||
| 	test("ABc-1234")         // issue prefix must be _all_ capital | ||||
| 	test("ABCDEFGHIJK-1234") // the limit is 10 characters in the prefix | ||||
| 	test("ABC1234")          // dash is required | ||||
| 	test("test ABC- test")   // number is required | ||||
| 	test("test -1234 test")  // prefix is required | ||||
| 	test("testABC-123 test") // leading space is required | ||||
| 	test("test ABC-123test") // trailing space is required | ||||
| 	test("ABC-0123")         // no leading zero | ||||
| } | ||||
|  | ||||
| func TestRender_IssueIndexPattern4(t *testing.T) { | ||||
| 	setting.AppURL = AppURL | ||||
| 	setting.AppSubURL = AppSubURL | ||||
|  | ||||
| 	// alphanumeric: render inputs with valid mentions | ||||
| 	test := func(s, expectedFmt string, names ...string) { | ||||
| 		links := make([]interface{}, len(names)) | ||||
| 		for i, name := range names { | ||||
| 			links[i] = alphanumIssueLink("https://someurl.com/someUser/someRepo/", name) | ||||
| 		} | ||||
| 		expected := fmt.Sprintf(expectedFmt, links...) | ||||
| 		testRenderIssueIndexPattern(t, s, expected, alphanumericMetas) | ||||
| 	} | ||||
| 	test("OTT-1234 test", "%s test", "OTT-1234") | ||||
| 	test("test T-12 issue", "test %s issue", "T-12") | ||||
| 	test("test issue ABCDEFGHIJ-1234567890", "test issue %s", "ABCDEFGHIJ-1234567890") | ||||
| } | ||||
|  | ||||
| func TestRender_AutoLink(t *testing.T) { | ||||
| 	setting.AppURL = AppURL | ||||
| 	setting.AppSubURL = AppSubURL | ||||
|  | ||||
| 	test := func(input, expected string) { | ||||
| 		buffer := RenderSpecialLink([]byte(input), setting.AppSubURL, nil, false) | ||||
| 		assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) | ||||
| 		buffer = RenderSpecialLink([]byte(input), setting.AppSubURL, nil, true) | ||||
| 		assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) | ||||
| 	} | ||||
|  | ||||
| 	// render valid issue URLs | ||||
| 	test(URLJoin(setting.AppSubURL, "issues", "3333"), | ||||
| 		numericIssueLink(URLJoin(setting.AppSubURL, "issues"), 3333)) | ||||
|  | ||||
| 	// render external issue URLs | ||||
| 	for _, externalURL := range []string{ | ||||
| 		"http://1111/2222/ssss-issues/3333?param=blah&blahh=333", | ||||
| 		"http://test.com/issues/33333", | ||||
| 		"https://issues/333"} { | ||||
| 		test(externalURL, externalURL) | ||||
| 	} | ||||
|  | ||||
| 	// render valid commit URLs | ||||
| 	tmp := URLJoin(AppSubURL, "commit", "d8a994ef243349f321568f9e36d5c3f444b99cae") | ||||
| 	test(tmp, "<a href=\""+tmp+"\">d8a994ef24</a>") | ||||
| 	tmp += "#diff-2" | ||||
| 	test(tmp, "<a href=\""+tmp+"\">d8a994ef24 (diff-2)</a>") | ||||
|  | ||||
| 	// render other commit URLs | ||||
| 	tmp = "https://external-link.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2" | ||||
| 	test(tmp, "<a href=\""+tmp+"\">d8a994ef24 (diff-2)</a>") | ||||
| } | ||||
|  | ||||
| func TestRender_Commits(t *testing.T) { | ||||
| 	setting.AppURL = AppURL | ||||
| 	setting.AppSubURL = AppSubURL | ||||
|  | ||||
| 	test := func(input, expected string) { | ||||
| 		buffer := RenderString(".md", input, setting.AppSubURL, nil) | ||||
| 		assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) | ||||
| 	} | ||||
|  | ||||
| 	var sha = "b6dd6210eaebc915fd5be5579c58cce4da2e2579" | ||||
| 	var commit = URLJoin(AppSubURL, "commit", sha) | ||||
| 	var subtree = URLJoin(commit, "src") | ||||
| 	var tree = strings.Replace(subtree, "/commit/", "/tree/", -1) | ||||
| 	var src = strings.Replace(subtree, "/commit/", "/src/", -1) | ||||
|  | ||||
| 	test(sha, `<p><a href="`+commit+`" rel="nofollow">b6dd6210ea</a></p>`) | ||||
| 	test(sha[:7], `<p><a href="`+commit[:len(commit)-(40-7)]+`" rel="nofollow">b6dd621</a></p>`) | ||||
| 	test(sha[:39], `<p><a href="`+commit[:len(commit)-(40-39)]+`" rel="nofollow">b6dd6210ea</a></p>`) | ||||
| 	test(commit, `<p><a href="`+commit+`" rel="nofollow">b6dd6210ea</a></p>`) | ||||
| 	test(tree, `<p><a href="`+src+`" rel="nofollow">b6dd6210ea/src</a></p>`) | ||||
| 	test("commit "+sha, `<p>commit <a href="`+commit+`" rel="nofollow">b6dd6210ea</a></p>`) | ||||
| } | ||||
|  | ||||
| func TestRender_CrossReferences(t *testing.T) { | ||||
| 	setting.AppURL = AppURL | ||||
| 	setting.AppSubURL = AppSubURL | ||||
|  | ||||
| 	test := func(input, expected string) { | ||||
| 		buffer := RenderString("a.md", input, setting.AppSubURL, nil) | ||||
| 		assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) | ||||
| 	} | ||||
|  | ||||
| 	test( | ||||
| 		"gogits/gogs#12345", | ||||
| 		`<p><a href="`+URLJoin(AppURL, "gogits", "gogs", "issues", "12345")+`" rel="nofollow">gogits/gogs#12345</a></p>`) | ||||
| } | ||||
|  | ||||
| func TestRender_FullIssueURLs(t *testing.T) { | ||||
| 	setting.AppURL = AppURL | ||||
| 	setting.AppSubURL = AppSubURL | ||||
|  | ||||
| 	test := func(input, expected string) { | ||||
| 		result := RenderFullIssuePattern([]byte(input)) | ||||
| 		assert.Equal(t, expected, string(result)) | ||||
| 	} | ||||
| 	test("Here is a link https://git.osgeo.org/gogs/postgis/postgis/pulls/6", | ||||
| 		"Here is a link https://git.osgeo.org/gogs/postgis/postgis/pulls/6") | ||||
| 	test("Look here http://localhost:3000/person/repo/issues/4", | ||||
| 		`Look here <a href="http://localhost:3000/person/repo/issues/4">#4</a>`) | ||||
| 	test("http://localhost:3000/person/repo/issues/4#issuecomment-1234", | ||||
| 		`<a href="http://localhost:3000/person/repo/issues/4#issuecomment-1234">#4</a>`) | ||||
| } | ||||
|  | ||||
| func TestRegExp_MentionPattern(t *testing.T) { | ||||
| 	trueTestCases := []string{ | ||||
| 		"@Unknwon", | ||||
| 		"@ANT_123", | ||||
| 		"@xxx-DiN0-z-A..uru..s-xxx", | ||||
| 		"   @lol   ", | ||||
| 		" @Te/st", | ||||
| 	} | ||||
| 	falseTestCases := []string{ | ||||
| 		"@ 0", | ||||
| 		"@ ", | ||||
| 		"@", | ||||
| 		"", | ||||
| 		"ABC", | ||||
| 	} | ||||
|  | ||||
| 	for _, testCase := range trueTestCases { | ||||
| 		res := MentionPattern.MatchString(testCase) | ||||
| 		if !res { | ||||
| 			println() | ||||
| 			println(testCase) | ||||
| 		} | ||||
| 		assert.True(t, res) | ||||
| 	} | ||||
| 	for _, testCase := range falseTestCases { | ||||
| 		res := MentionPattern.MatchString(testCase) | ||||
| 		if res { | ||||
| 			println() | ||||
| 			println(testCase) | ||||
| 		} | ||||
| 		assert.False(t, res) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestRegExp_IssueNumericPattern(t *testing.T) { | ||||
| 	trueTestCases := []string{ | ||||
| 		"#1234", | ||||
| 		"#0", | ||||
| 		"#1234567890987654321", | ||||
| 	} | ||||
| 	falseTestCases := []string{ | ||||
| 		"# 1234", | ||||
| 		"# 0", | ||||
| 		"# ", | ||||
| 		"#", | ||||
| 		"#ABC", | ||||
| 		"#1A2B", | ||||
| 		"", | ||||
| 		"ABC", | ||||
| 	} | ||||
|  | ||||
| 	for _, testCase := range trueTestCases { | ||||
| 		assert.True(t, IssueNumericPattern.MatchString(testCase)) | ||||
| 	} | ||||
| 	for _, testCase := range falseTestCases { | ||||
| 		assert.False(t, IssueNumericPattern.MatchString(testCase)) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestRegExp_IssueAlphanumericPattern(t *testing.T) { | ||||
| 	trueTestCases := []string{ | ||||
| 		"ABC-1234", | ||||
| 		"A-1", | ||||
| 		"RC-80", | ||||
| 		"ABCDEFGHIJ-1234567890987654321234567890", | ||||
| 	} | ||||
| 	falseTestCases := []string{ | ||||
| 		"RC-08", | ||||
| 		"PR-0", | ||||
| 		"ABCDEFGHIJK-1", | ||||
| 		"PR_1", | ||||
| 		"", | ||||
| 		"#ABC", | ||||
| 		"", | ||||
| 		"ABC", | ||||
| 		"GG-", | ||||
| 		"rm-1", | ||||
| 	} | ||||
|  | ||||
| 	for _, testCase := range trueTestCases { | ||||
| 		assert.True(t, IssueAlphanumericPattern.MatchString(testCase)) | ||||
| 	} | ||||
| 	for _, testCase := range falseTestCases { | ||||
| 		assert.False(t, IssueAlphanumericPattern.MatchString(testCase)) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestRegExp_Sha1CurrentPattern(t *testing.T) { | ||||
| 	trueTestCases := []string{ | ||||
| 		"d8a994ef243349f321568f9e36d5c3f444b99cae", | ||||
| 		"abcdefabcdefabcdefabcdefabcdefabcdefabcd", | ||||
| 	} | ||||
| 	falseTestCases := []string{ | ||||
| 		"test", | ||||
| 		"abcdefg", | ||||
| 		"abcdefghijklmnopqrstuvwxyzabcdefghijklmn", | ||||
| 		"abcdefghijklmnopqrstuvwxyzabcdefghijklmO", | ||||
| 	} | ||||
|  | ||||
| 	for _, testCase := range trueTestCases { | ||||
| 		assert.True(t, Sha1CurrentPattern.MatchString(testCase)) | ||||
| 	} | ||||
| 	for _, testCase := range falseTestCases { | ||||
| 		assert.False(t, Sha1CurrentPattern.MatchString(testCase)) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestRegExp_AnySHA1Pattern(t *testing.T) { | ||||
| 	testCases := map[string][]string{ | ||||
| 		"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": { | ||||
| 			"https", | ||||
| 			"github.com", | ||||
| 			"jquery", | ||||
| 			"jquery", | ||||
| 			"blob", | ||||
| 			"a644101ed04d0beacea864ce805e0c4f86ba1cd1", | ||||
| 			"test/unit/event.js", | ||||
| 			"L2703", | ||||
| 		}, | ||||
| 		"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": { | ||||
| 			"https", | ||||
| 			"github.com", | ||||
| 			"jquery", | ||||
| 			"jquery", | ||||
| 			"blob", | ||||
| 			"a644101ed04d0beacea864ce805e0c4f86ba1cd1", | ||||
| 			"test/unit/event.js", | ||||
| 			"", | ||||
| 		}, | ||||
| 		"https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": { | ||||
| 			"https", | ||||
| 			"github.com", | ||||
| 			"jquery", | ||||
| 			"jquery", | ||||
| 			"commit", | ||||
| 			"0705be475092aede1eddae01319ec931fb9c65fc", | ||||
| 			"", | ||||
| 			"", | ||||
| 		}, | ||||
| 		"https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": { | ||||
| 			"https", | ||||
| 			"github.com", | ||||
| 			"jquery", | ||||
| 			"jquery", | ||||
| 			"tree", | ||||
| 			"0705be475092aede1eddae01319ec931fb9c65fc", | ||||
| 			"src", | ||||
| 			"", | ||||
| 		}, | ||||
| 		"https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": { | ||||
| 			"https", | ||||
| 			"try.gogs.io", | ||||
| 			"gogs", | ||||
| 			"gogs", | ||||
| 			"commit", | ||||
| 			"d8a994ef243349f321568f9e36d5c3f444b99cae", | ||||
| 			"", | ||||
| 			"diff-2", | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	for k, v := range testCases { | ||||
| 		assert.Equal(t, AnySHA1Pattern.FindStringSubmatch(k)[1:], v) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestMisc_IsSameDomain(t *testing.T) { | ||||
| 	setting.AppURL = AppURL | ||||
| 	setting.AppSubURL = AppSubURL | ||||
|  | ||||
| 	var sha = "b6dd6210eaebc915fd5be5579c58cce4da2e2579" | ||||
| 	var commit = URLJoin(AppSubURL, "commit", sha) | ||||
|  | ||||
| 	assert.True(t, IsSameDomain(commit)) | ||||
| 	assert.False(t, IsSameDomain("http://google.com/ncr")) | ||||
| 	assert.False(t, IsSameDomain("favicon.ico")) | ||||
| } | ||||
| @@ -9,6 +9,12 @@ import ( | ||||
| 	"strings" | ||||
| ) | ||||
|  | ||||
| // Init initialize regexps for markdown parsing | ||||
| func Init() { | ||||
| 	getIssueFullPattern() | ||||
| 	NewSanitizer() | ||||
| } | ||||
|  | ||||
| // Parser defines an interface for parsering markup file to HTML | ||||
| type Parser interface { | ||||
| 	Name() string // markup format name | ||||
| @@ -17,66 +23,94 @@ type Parser interface { | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	parsers = make(map[string]Parser) | ||||
| 	extParsers = make(map[string]Parser) | ||||
| 	parsers    = make(map[string]Parser) | ||||
| ) | ||||
|  | ||||
| // RegisterParser registers a new markup file parser | ||||
| func RegisterParser(parser Parser) { | ||||
| 	parsers[parser.Name()] = parser | ||||
| 	for _, ext := range parser.Extensions() { | ||||
| 		parsers[strings.ToLower(ext)] = parser | ||||
| 		extParsers[strings.ToLower(ext)] = parser | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // GetParserByFileName get parser by filename | ||||
| func GetParserByFileName(filename string) Parser { | ||||
| 	extension := strings.ToLower(filepath.Ext(filename)) | ||||
| 	return extParsers[extension] | ||||
| } | ||||
|  | ||||
| // GetParserByType returns a parser according type | ||||
| func GetParserByType(tp string) Parser { | ||||
| 	return parsers[tp] | ||||
| } | ||||
|  | ||||
| // Render renders markup file to HTML with all specific handling stuff. | ||||
| func Render(filename string, rawBytes []byte, urlPrefix string, metas map[string]string) []byte { | ||||
| 	return render(filename, rawBytes, urlPrefix, metas, false) | ||||
| 	return renderFile(filename, rawBytes, urlPrefix, metas, false) | ||||
| } | ||||
|  | ||||
| func render(filename string, rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { | ||||
| 	extension := strings.ToLower(filepath.Ext(filename)) | ||||
| 	if parser, ok := parsers[extension]; ok { | ||||
| 		return parser.Render(rawBytes, urlPrefix, metas, isWiki) | ||||
| 	} | ||||
| 	return nil | ||||
| // RenderByType renders markup to HTML with special links and returns string type. | ||||
| func RenderByType(tp string, rawBytes []byte, urlPrefix string, metas map[string]string) []byte { | ||||
| 	return renderByType(tp, rawBytes, urlPrefix, metas, false) | ||||
| } | ||||
|  | ||||
| // RenderString renders Markdown to HTML with special links and returns string type. | ||||
| func RenderString(filename string, raw, urlPrefix string, metas map[string]string) string { | ||||
| 	return string(render(filename, []byte(raw), urlPrefix, metas, false)) | ||||
| 	return string(renderFile(filename, []byte(raw), urlPrefix, metas, false)) | ||||
| } | ||||
|  | ||||
| // RenderWiki renders markdown wiki page to HTML and return HTML string | ||||
| func RenderWiki(filename string, rawBytes []byte, urlPrefix string, metas map[string]string) string { | ||||
| 	return string(render(filename, rawBytes, urlPrefix, metas, true)) | ||||
| 	return string(renderFile(filename, rawBytes, urlPrefix, metas, true)) | ||||
| } | ||||
|  | ||||
| func render(parser Parser, rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { | ||||
| 	urlPrefix = strings.Replace(urlPrefix, " ", "+", -1) | ||||
| 	result := parser.Render(rawBytes, urlPrefix, metas, isWiki) | ||||
| 	result = PostProcess(result, urlPrefix, metas, isWiki) | ||||
| 	return SanitizeBytes(result) | ||||
| } | ||||
|  | ||||
| func renderByType(tp string, rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { | ||||
| 	if parser, ok := parsers[tp]; ok { | ||||
| 		return render(parser, rawBytes, urlPrefix, metas, isWiki) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func renderFile(filename string, rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { | ||||
| 	extension := strings.ToLower(filepath.Ext(filename)) | ||||
| 	if parser, ok := extParsers[extension]; ok { | ||||
| 		return render(parser, rawBytes, urlPrefix, metas, isWiki) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // Type returns if markup format via the filename | ||||
| func Type(filename string) string { | ||||
| 	extension := strings.ToLower(filepath.Ext(filename)) | ||||
| 	if parser, ok := parsers[extension]; ok { | ||||
| 	if parser := GetParserByFileName(filename); parser != nil { | ||||
| 		return parser.Name() | ||||
| 	} | ||||
| 	return "" | ||||
| } | ||||
|  | ||||
| // ReadmeFileType reports whether name looks like a README file | ||||
| // based on its name and find the parser via its ext name | ||||
| func ReadmeFileType(name string) (string, bool) { | ||||
| 	if IsReadmeFile(name) { | ||||
| 		return Type(name), true | ||||
| // IsMarkupFile reports whether file is a markup type file | ||||
| func IsMarkupFile(name, markup string) bool { | ||||
| 	if parser := GetParserByFileName(name); parser != nil { | ||||
| 		return parser.Name() == markup | ||||
| 	} | ||||
| 	return "", false | ||||
| 	return false | ||||
| } | ||||
|  | ||||
| // IsReadmeFile reports whether name looks like a README file | ||||
| // based on its name. | ||||
| func IsReadmeFile(name string) bool { | ||||
| 	name = strings.ToLower(name) | ||||
| 	if len(name) < 6 { | ||||
| 		return false | ||||
| 	} | ||||
|  | ||||
| 	name = strings.ToLower(name) | ||||
| 	if len(name) == 6 { | ||||
| 	} else if len(name) == 6 { | ||||
| 		return name == "readme" | ||||
| 	} | ||||
| 	return name[:7] == "readme." | ||||
|   | ||||
| @@ -2,11 +2,14 @@ | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package markup | ||||
| package markup_test | ||||
|  | ||||
| import ( | ||||
| 	"testing" | ||||
|  | ||||
| 	_ "code.gitea.io/gitea/modules/markdown" | ||||
| 	. "code.gitea.io/gitea/modules/markup" | ||||
|  | ||||
| 	"github.com/stretchr/testify/assert" | ||||
| ) | ||||
|  | ||||
|   | ||||
							
								
								
									
										58
									
								
								modules/markup/sanitizer.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								modules/markup/sanitizer.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| // Copyright 2017 The Gitea Authors. All rights reserved. | ||||
| // Copyright 2017 The Gogs Authors. All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package markup | ||||
|  | ||||
| import ( | ||||
| 	"regexp" | ||||
| 	"sync" | ||||
|  | ||||
| 	"code.gitea.io/gitea/modules/setting" | ||||
|  | ||||
| 	"github.com/microcosm-cc/bluemonday" | ||||
| ) | ||||
|  | ||||
| // Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow | ||||
| // any modification to the underlying policies once it's been created. | ||||
| type Sanitizer struct { | ||||
| 	policy *bluemonday.Policy | ||||
| 	init   sync.Once | ||||
| } | ||||
|  | ||||
| var sanitizer = &Sanitizer{} | ||||
|  | ||||
| // NewSanitizer initializes sanitizer with allowed attributes based on settings. | ||||
| // Multiple calls to this function will only create one instance of Sanitizer during | ||||
| // entire application lifecycle. | ||||
| func NewSanitizer() { | ||||
| 	sanitizer.init.Do(func() { | ||||
| 		sanitizer.policy = bluemonday.UGCPolicy() | ||||
| 		// We only want to allow HighlightJS specific classes for code blocks | ||||
| 		sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^language-\w+$`)).OnElements("code") | ||||
|  | ||||
| 		// Checkboxes | ||||
| 		sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") | ||||
| 		sanitizer.policy.AllowAttrs("checked", "disabled").OnElements("input") | ||||
|  | ||||
| 		// Custom URL-Schemes | ||||
| 		sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. | ||||
| func Sanitize(s string) string { | ||||
| 	NewSanitizer() | ||||
| 	return sanitizer.policy.Sanitize(s) | ||||
| } | ||||
|  | ||||
| // SanitizeBytes takes a []byte slice that contains a HTML fragment or document and applies policy whitelist. | ||||
| func SanitizeBytes(b []byte) []byte { | ||||
| 	if len(b) == 0 { | ||||
| 		// nothing to sanitize | ||||
| 		return b | ||||
| 	} | ||||
| 	NewSanitizer() | ||||
| 	return sanitizer.policy.SanitizeBytes(b) | ||||
| } | ||||
							
								
								
									
										44
									
								
								modules/markup/sanitizer_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								modules/markup/sanitizer_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | ||||
| // Copyright 2017 The Gitea Authors. All rights reserved. | ||||
| // Copyright 2017 The Gogs Authors. All rights reserved. | ||||
| // Use of this source code is governed by a MIT-style | ||||
| // license that can be found in the LICENSE file. | ||||
|  | ||||
| package markup | ||||
|  | ||||
| import ( | ||||
| 	"testing" | ||||
|  | ||||
| 	"github.com/stretchr/testify/assert" | ||||
| ) | ||||
|  | ||||
| func Test_Sanitizer(t *testing.T) { | ||||
| 	NewSanitizer() | ||||
| 	testCases := []string{ | ||||
| 		// Regular | ||||
| 		`<a onblur="alert(secret)" href="http://www.google.com">Google</a>`, `<a href="http://www.google.com" rel="nofollow">Google</a>`, | ||||
|  | ||||
| 		// Code highlighting class | ||||
| 		`<code class="random string"></code>`, `<code></code>`, | ||||
| 		`<code class="language-random ui tab active menu attached animating sidebar following bar center"></code>`, `<code></code>`, | ||||
| 		`<code class="language-go"></code>`, `<code class="language-go"></code>`, | ||||
|  | ||||
| 		// Input checkbox | ||||
| 		`<input type="hidden">`, ``, | ||||
| 		`<input type="checkbox">`, `<input type="checkbox">`, | ||||
| 		`<input checked disabled autofocus>`, `<input checked="" disabled="">`, | ||||
|  | ||||
| 		// Code highlight injection | ||||
| 		`<code class="language-random ui tab active menu attached animating sidebar following bar center"></code>`, `<code></code>`, | ||||
| 		`<code class="language-lol ui tab active menu attached animating sidebar following bar center"> | ||||
| <code class="language-lol ui container input huge basic segment center"> </code> | ||||
| <img src="https://try.gogs.io/img/favicon.png" width="200" height="200"> | ||||
| <code class="language-lol ui container input massive basic segment">Hello there! Something has gone wrong, we are working on it.</code> | ||||
| <code class="language-lol ui container input huge basic segment">In the meantime, play a game with us at <a href="http://example.com/">example.com</a>.</code> | ||||
| </code>`, "<code>\n<code>\u00a0</code>\n<img src=\"https://try.gogs.io/img/favicon.png\" width=\"200\" height=\"200\">\n<code>Hello there! Something has gone wrong, we are working on it.</code>\n<code>In the meantime, play a game with us at\u00a0<a href=\"http://example.com/\" rel=\"nofollow\">example.com</a>.</code>\n</code>", | ||||
| 	} | ||||
|  | ||||
| 	for i := 0; i < len(testCases); i += 2 { | ||||
| 		assert.Equal(t, testCases[i+1], Sanitize(testCases[i])) | ||||
| 		assert.Equal(t, testCases[i+1], string(SanitizeBytes([]byte(testCases[i])))) | ||||
| 	} | ||||
| } | ||||
		Reference in New Issue
	
	Block a user