mirror of
https://github.com/go-gitea/gitea.git
synced 2026-05-06 04:01:05 +09:00
| go | from | to | | --- | --- | --- | | connectrpc.com/connect | `1.19.1 ` | `1.19.2` | | github.com/Azure/go-ntlmssp | `0.1.0` | `0.1.1` | | github.com/alecthomas/chroma/v2 | `2.23.1` | `2.24.1` | | github.com/aws/aws-sdk-go-v2/credentials | `1.19.15` | `1.19.16` | | github.com/aws/aws-sdk-go-v2/service/codecommit | `1.33.13` | `1.33.14` | | github.com/blevesearch/bleve/v2 | `2.5.7` | `2.6.0` | | github.com/caddyserver/certmagic | `0.25.2` | `0.25.3` | | github.com/fsnotify/fsnotify | `1.9.0` | `1.10.1` | | github.com/getkin/kin-openapi | `0.134.0` | `0.137.0` | | github.com/go-co-op/gocron/v2 | `2.21.0` | `2.21.1` | | github.com/go-sql-driver/mysql | `1.9.3` | `1.10.0` | | github.com/go-webauthn/webauthn | `0.16.5` | `0.17.2` | | github.com/klauspost/compress | `1.18.5` | `1.18.6` | | github.com/mattn/go-isatty | `0.0.21` | `0.0.22` | | github.com/mattn/go-sqlite3 | `1.14.42` | `1.14.44` | | github.com/minio/minio-go/v7 | `7.0.100` | `7.1.0` | | github.com/redis/go-redis/v9 | `9.18.0` | `9.19.0` | | google.golang.org/grpc | `1.80.0` | `1.81.0` | | gopkg.in/ini.v1 | `1.67.1` | `1.67.2` | | js | from | to | | --- | --- | --- | | @codemirror/search | `6.6.0` | `6.7.0` | | @primer/octicons | `19.24.1` | `19.25.0` | | clippie | `4.1.14` | `4.1.15` | | easymde | `2.20.0` | `2.21.0` | | postcss | `8.5.10` | `8.5.13` | | rolldown-license-plugin | `3.0.1` | `3.0.4` | | swagger-ui-dist | `5.32.4` | `5.32.5` | | vite | `8.0.9` | `8.0.10` | | vite-string-plugin | `2.0.2` | `2.0.4` | | vue | `3.5.32` | `3.5.33` | | @typescript-eslint/parser | `8.59.0` | `8.59.1` | | eslint | `10.2.1` | `10.3.0` | | eslint-plugin-vue | `10.8.0` | `10.9.0` | | globals | `17.5.0` | `17.6.0` | | material-icon-theme | `5.33.1` | `5.34.0` | | spectral-cli-bundle | `1.0.7` | `1.0.8` | | stylelint | `17.8.0` | `17.10.0` | | typescript-eslint | `8.59.0` | `8.59.1` | | updates | `17.16.3` | `17.16.8` | | vitest | `4.1.4` | `4.1.5` | | vue-tsc | `3.2.7` | `3.2.8` | | pnpm | `10.33.0` | `10.33.2` | | py | from | to | | --- | --- | --- | | click | `8.3.2` | `8.3.3` | | pathspec | `1.0.4` | `1.1.1` | --------- Co-authored-by: silverwind <me@silverwind.io> Co-authored-by: Claude (Opus 4.7) <noreply@anthropic.com>
313 lines
10 KiB
Go
313 lines
10 KiB
Go
// Copyright 2026 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package highlight
|
|
|
|
import (
|
|
"path"
|
|
"strings"
|
|
"sync"
|
|
|
|
"code.gitea.io/gitea/modules/analyze"
|
|
"code.gitea.io/gitea/modules/log"
|
|
|
|
"github.com/alecthomas/chroma/v2"
|
|
"github.com/alecthomas/chroma/v2/lexers"
|
|
"github.com/go-enry/go-enry/v2"
|
|
)
|
|
|
|
const (
|
|
mapKeyLowerPrefix = "lower/"
|
|
LanguagePlaintext = "plaintext"
|
|
chromaLexerFallback = "fallback"
|
|
)
|
|
|
|
// chromaLexers is fully managed by us to do fast lookup for chroma lexers by file name or language name
|
|
// Don't use lexers.Get because it is very slow in many cases (iterate all rules, filepath glob match, etc.)
|
|
var chromaLexers = sync.OnceValue(func() (ret struct {
|
|
conflictingExtLangMap map[string]string
|
|
conflictingAliasLangMap map[string]string
|
|
|
|
lowerNameMap map[string]chroma.Lexer // lexer name (lang name) in lower-case
|
|
fileBaseMap map[string]chroma.Lexer
|
|
fileExtMap map[string]chroma.Lexer
|
|
fileParts []struct {
|
|
part string
|
|
lexer chroma.Lexer
|
|
}
|
|
},
|
|
) {
|
|
ret.lowerNameMap = make(map[string]chroma.Lexer)
|
|
ret.fileBaseMap = make(map[string]chroma.Lexer)
|
|
ret.fileExtMap = make(map[string]chroma.Lexer)
|
|
|
|
// Chroma has conflicts in file extension for different languages,
|
|
// When we need to do fast render, there is no way to detect the language by content,
|
|
// So we can only choose some default languages for the conflicted file extensions.
|
|
ret.conflictingExtLangMap = map[string]string{
|
|
".as": "ActionScript 3", // ActionScript
|
|
".asm": "NASM", // TASM, NASM, RGBDS Assembly, Z80 Assembly
|
|
".ASM": "NASM",
|
|
".bas": "VB.net", // QBasic
|
|
".bf": "Beef", // Brainfuck
|
|
".fs": "FSharp", // Forth
|
|
".gd": "GDScript", // GDScript3
|
|
".h": "C", // Objective-C
|
|
".hcl": "Terraform", // HCL
|
|
".hh": "C++", // HolyC
|
|
".inc": "PHP", // ObjectPascal, POVRay, SourcePawn, PHTML
|
|
".m": "Objective-C", // Matlab, Mathematica, Mason
|
|
".mc": "Mason", // MonkeyC
|
|
".mod": "AMPL", // Modula-2
|
|
".network": "SYSTEMD", // INI
|
|
".php": "PHP", // PHTML
|
|
".php3": "PHP", // PHTML
|
|
".php4": "PHP", // PHTML
|
|
".php5": "PHP", // PHTML
|
|
".pl": "Perl", // Prolog, Raku
|
|
".pm": "Perl", // Promela, Raku
|
|
".pp": "ObjectPascal", // Puppet
|
|
".s": "ArmAsm", // GAS
|
|
".S": "ArmAsm", // R, GAS
|
|
".service": "SYSTEMD", // INI
|
|
".socket": "SYSTEMD", // INI
|
|
".sql": "SQL", // MySQL
|
|
".t": "Perl", // Raku
|
|
".ts": "TypeScript", // TypoScript
|
|
".v": "V", // verilog
|
|
".xslt": "HTML", // XML
|
|
}
|
|
// use widely used language names as the default mapping to resolve name alias conflict
|
|
ret.conflictingAliasLangMap = map[string]string{
|
|
"hcl": "HCL", // Terraform
|
|
"v": "V", // verilog
|
|
}
|
|
|
|
isPlainPattern := func(key string) bool {
|
|
return !strings.ContainsAny(key, "*?[]") // only support simple patterns
|
|
}
|
|
|
|
setFileNameMapWithLowerKey := func(m map[string]chroma.Lexer, key string, lexer chroma.Lexer) {
|
|
if _, conflict := m[key]; conflict {
|
|
panic("duplicate key in lexer map: " + key + ", need to add it to conflictingExtLangMap")
|
|
}
|
|
m[key] = lexer
|
|
m[mapKeyLowerPrefix+strings.ToLower(key)] = lexer
|
|
}
|
|
|
|
processFileName := func(fileName string, lexer chroma.Lexer) bool {
|
|
if isPlainPattern(fileName) {
|
|
// full base name match
|
|
setFileNameMapWithLowerKey(ret.fileBaseMap, fileName, lexer)
|
|
return true
|
|
}
|
|
if strings.HasPrefix(fileName, "*") {
|
|
// ext name match: "*.js"
|
|
fileExt := strings.Trim(fileName, "*")
|
|
if isPlainPattern(fileExt) {
|
|
presetName := ret.conflictingExtLangMap[fileExt]
|
|
if presetName == "" || lexer.Config().Name == presetName {
|
|
setFileNameMapWithLowerKey(ret.fileExtMap, fileExt, lexer)
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
if strings.HasSuffix(fileName, "*") {
|
|
// part match: "*.env.*"
|
|
filePart := strings.Trim(fileName, "*")
|
|
if isPlainPattern(filePart) {
|
|
ret.fileParts = append(ret.fileParts, struct {
|
|
part string
|
|
lexer chroma.Lexer
|
|
}{
|
|
part: filePart,
|
|
lexer: lexer,
|
|
})
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
expandGlobPatterns := func(patterns []string) []string {
|
|
// expand patterns like "file.[ch]" to "file.c" and "file.h", only one pair of "[]" is supported, enough for current Chroma lexers
|
|
for idx, s := range patterns {
|
|
idx1 := strings.IndexByte(s, '[')
|
|
idx2 := strings.IndexByte(s, ']')
|
|
if idx1 != -1 && idx2 != -1 && idx2 > idx1+1 {
|
|
left, mid, right := s[:idx1], s[idx1+1:idx2], s[idx2+1:]
|
|
patterns[idx] = left + mid[0:1] + right
|
|
for i := 1; i < len(mid); i++ {
|
|
patterns = append(patterns, left+mid[i:i+1]+right)
|
|
}
|
|
}
|
|
}
|
|
return patterns
|
|
}
|
|
|
|
processLexerNameAliases := func(lexer chroma.Lexer) {
|
|
cfg := lexer.Config()
|
|
lowerName := strings.ToLower(cfg.Name)
|
|
if _, conflicted := ret.lowerNameMap[lowerName]; conflicted {
|
|
panic("duplicate language name in lexer map: " + lowerName)
|
|
}
|
|
ret.lowerNameMap[lowerName] = lexer
|
|
|
|
for _, name := range cfg.Aliases {
|
|
lowerName := strings.ToLower(name)
|
|
if overriddenName, overridden := ret.conflictingAliasLangMap[lowerName]; overridden && overriddenName != cfg.Name {
|
|
continue
|
|
}
|
|
if existingLexer, conflict := ret.lowerNameMap[lowerName]; conflict && existingLexer.Config().Name != cfg.Name {
|
|
panic("duplicate alias in lexer map: " + name + ", conflict between " + existingLexer.Config().Name + " and " + cfg.Name)
|
|
}
|
|
ret.lowerNameMap[lowerName] = lexer
|
|
}
|
|
}
|
|
|
|
// the main loop: build our lookup maps for lexers
|
|
for _, lexer := range lexers.GlobalLexerRegistry.Lexers {
|
|
cfg := lexer.Config()
|
|
processLexerNameAliases(lexer)
|
|
for _, s := range expandGlobPatterns(cfg.Filenames) {
|
|
if !processFileName(s, lexer) {
|
|
panic("unsupported file name pattern in lexer: " + s)
|
|
}
|
|
}
|
|
for _, s := range expandGlobPatterns(cfg.AliasFilenames) {
|
|
if !processFileName(s, lexer) {
|
|
panic("unsupported alias file name pattern in lexer: " + s)
|
|
}
|
|
}
|
|
}
|
|
|
|
// final check: make sure the default overriding mapping is correct, nothing is missing
|
|
for lowerName, lexerName := range ret.conflictingAliasLangMap {
|
|
if lexer, ok := ret.lowerNameMap[lowerName]; !ok || lexer.Config().Name != lexerName {
|
|
panic("missing default name-lang mapping for: " + lowerName)
|
|
}
|
|
}
|
|
for ext, lexerName := range ret.conflictingExtLangMap {
|
|
if lexer, ok := ret.fileExtMap[ext]; !ok || lexer.Config().Name != lexerName {
|
|
panic("missing default ext-lang mapping for: " + ext)
|
|
}
|
|
}
|
|
return ret
|
|
})
|
|
|
|
func normalizeFileNameLang(fileName, fileLang string) (string, string) {
|
|
fileName = path.Base(fileName)
|
|
fileLang, _, _ = strings.Cut(fileLang, "?") // maybe, the value from gitattributes might contain `?` parameters?
|
|
ext := path.Ext(fileName)
|
|
// the "lang" might come from enry or gitattributes, it has different naming for some languages
|
|
switch fileLang {
|
|
case "F#":
|
|
fileLang = "FSharp"
|
|
case "Pascal":
|
|
fileLang = "ObjectPascal"
|
|
case "C":
|
|
if ext == ".C" || ext == ".H" {
|
|
fileLang = "C++"
|
|
}
|
|
}
|
|
return fileName, fileLang
|
|
}
|
|
|
|
func DetectChromaLexerByFileName(fileName, fileLang string) chroma.Lexer {
|
|
lexer, _ := detectChromaLexerByFileName(fileName, fileLang)
|
|
return lexer
|
|
}
|
|
|
|
func detectChromaLexerByFileName(fileName, fileLang string) (_ chroma.Lexer, byLang bool) {
|
|
fileName, fileLang = normalizeFileNameLang(fileName, fileLang)
|
|
fileExt := path.Ext(fileName)
|
|
|
|
// apply custom mapping for file extension, highest priority, for example:
|
|
// * ".my-js" -> ".js"
|
|
// * ".my-html" -> "HTML"
|
|
if fileExt != "" {
|
|
if val, ok := globalVars().highlightMapping[fileExt]; ok {
|
|
if strings.HasPrefix(val, ".") {
|
|
fileName = "dummy" + val
|
|
fileLang = ""
|
|
} else {
|
|
fileLang = val
|
|
}
|
|
}
|
|
}
|
|
|
|
// try to use language for lexer name
|
|
if fileLang != "" {
|
|
lexer := chromaLexers().lowerNameMap[strings.ToLower(fileLang)]
|
|
if lexer != nil {
|
|
return lexer, true
|
|
}
|
|
}
|
|
|
|
if fileName == "" {
|
|
return lexers.Fallback, false
|
|
}
|
|
|
|
// try base name
|
|
{
|
|
baseName := path.Base(fileName)
|
|
if lexer, ok := chromaLexers().fileBaseMap[baseName]; ok {
|
|
return lexer, false
|
|
} else if lexer, ok = chromaLexers().fileBaseMap[mapKeyLowerPrefix+strings.ToLower(baseName)]; ok {
|
|
return lexer, false
|
|
}
|
|
}
|
|
|
|
if fileExt == "" {
|
|
return lexers.Fallback, false
|
|
}
|
|
|
|
// try ext name
|
|
{
|
|
if lexer, ok := chromaLexers().fileExtMap[fileExt]; ok {
|
|
return lexer, false
|
|
} else if lexer, ok = chromaLexers().fileExtMap[mapKeyLowerPrefix+strings.ToLower(fileExt)]; ok {
|
|
return lexer, false
|
|
}
|
|
}
|
|
|
|
// try file part match, for example: ".env.local" for "*.env.*"
|
|
// it assumes that there must be a dot in filename (fileExt isn't empty)
|
|
for _, item := range chromaLexers().fileParts {
|
|
if strings.Contains(fileName, item.part) {
|
|
return item.lexer, false
|
|
}
|
|
}
|
|
return lexers.Fallback, false
|
|
}
|
|
|
|
// detectChromaLexerWithAnalyze returns a chroma lexer by given file name, language and code content. All parameters can be optional.
|
|
// When code content is provided, it will be slow if no lexer is found by file name or language.
|
|
// If no lexer is found, it will return the fallback lexer.
|
|
func detectChromaLexerWithAnalyze(fileName, lang string, code []byte) chroma.Lexer {
|
|
lexer, byLang := detectChromaLexerByFileName(fileName, lang)
|
|
|
|
// if lang is provided, and it matches a lexer, use it directly
|
|
if byLang {
|
|
return lexer
|
|
}
|
|
|
|
// if a lexer is detected and there is no conflict for the file extension, use it directly
|
|
fileExt := path.Ext(fileName)
|
|
_, hasConflicts := chromaLexers().conflictingExtLangMap[fileExt]
|
|
if !hasConflicts && lexer != lexers.Fallback {
|
|
return lexer
|
|
}
|
|
|
|
// try to detect language by content, for best guessing for the language
|
|
// when using "code" to detect, analyze.GetCodeLanguage is slow, it iterates many rules to detect language from content
|
|
analyzedLanguage := analyze.GetCodeLanguage(fileName, code)
|
|
lexer = DetectChromaLexerByFileName(fileName, analyzedLanguage)
|
|
if lexer == lexers.Fallback {
|
|
if analyzedLanguage != enry.OtherLanguage {
|
|
log.Warn("No chroma lexer found for enry detected language: %s (file: %s), need to fix the language mapping between enry and chroma.", analyzedLanguage, fileName)
|
|
}
|
|
}
|
|
return lexer
|
|
}
|