mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-11-03 08:02:36 +09:00 
			
		
		
		
	
		
			
				
	
	
		
			103 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
			
		
		
	
	
			103 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
package chardet
 | 
						|
 | 
						|
import (
 | 
						|
	"bytes"
 | 
						|
)
 | 
						|
 | 
						|
type recognizer2022 struct {
 | 
						|
	charset string
 | 
						|
	escapes [][]byte
 | 
						|
}
 | 
						|
 | 
						|
func (r *recognizer2022) Match(input *recognizerInput) (output recognizerOutput) {
 | 
						|
	return recognizerOutput{
 | 
						|
		Charset:    r.charset,
 | 
						|
		Confidence: r.matchConfidence(input.input),
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (r *recognizer2022) matchConfidence(input []byte) int {
 | 
						|
	var hits, misses, shifts int
 | 
						|
input:
 | 
						|
	for i := 0; i < len(input); i++ {
 | 
						|
		c := input[i]
 | 
						|
		if c == 0x1B {
 | 
						|
			for _, esc := range r.escapes {
 | 
						|
				if bytes.HasPrefix(input[i+1:], esc) {
 | 
						|
					hits++
 | 
						|
					i += len(esc)
 | 
						|
					continue input
 | 
						|
				}
 | 
						|
			}
 | 
						|
			misses++
 | 
						|
		} else if c == 0x0E || c == 0x0F {
 | 
						|
			shifts++
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if hits == 0 {
 | 
						|
		return 0
 | 
						|
	}
 | 
						|
	quality := (100*hits - 100*misses) / (hits + misses)
 | 
						|
	if hits+shifts < 5 {
 | 
						|
		quality -= (5 - (hits + shifts)) * 10
 | 
						|
	}
 | 
						|
	if quality < 0 {
 | 
						|
		quality = 0
 | 
						|
	}
 | 
						|
	return quality
 | 
						|
}
 | 
						|
 | 
						|
var escapeSequences_2022JP = [][]byte{
 | 
						|
	{0x24, 0x28, 0x43}, // KS X 1001:1992
 | 
						|
	{0x24, 0x28, 0x44}, // JIS X 212-1990
 | 
						|
	{0x24, 0x40},       // JIS C 6226-1978
 | 
						|
	{0x24, 0x41},       // GB 2312-80
 | 
						|
	{0x24, 0x42},       // JIS X 208-1983
 | 
						|
	{0x26, 0x40},       // JIS X 208 1990, 1997
 | 
						|
	{0x28, 0x42},       // ASCII
 | 
						|
	{0x28, 0x48},       // JIS-Roman
 | 
						|
	{0x28, 0x49},       // Half-width katakana
 | 
						|
	{0x28, 0x4a},       // JIS-Roman
 | 
						|
	{0x2e, 0x41},       // ISO 8859-1
 | 
						|
	{0x2e, 0x46},       // ISO 8859-7
 | 
						|
}
 | 
						|
 | 
						|
var escapeSequences_2022KR = [][]byte{
 | 
						|
	{0x24, 0x29, 0x43},
 | 
						|
}
 | 
						|
 | 
						|
var escapeSequences_2022CN = [][]byte{
 | 
						|
	{0x24, 0x29, 0x41}, // GB 2312-80
 | 
						|
	{0x24, 0x29, 0x47}, // CNS 11643-1992 Plane 1
 | 
						|
	{0x24, 0x2A, 0x48}, // CNS 11643-1992 Plane 2
 | 
						|
	{0x24, 0x29, 0x45}, // ISO-IR-165
 | 
						|
	{0x24, 0x2B, 0x49}, // CNS 11643-1992 Plane 3
 | 
						|
	{0x24, 0x2B, 0x4A}, // CNS 11643-1992 Plane 4
 | 
						|
	{0x24, 0x2B, 0x4B}, // CNS 11643-1992 Plane 5
 | 
						|
	{0x24, 0x2B, 0x4C}, // CNS 11643-1992 Plane 6
 | 
						|
	{0x24, 0x2B, 0x4D}, // CNS 11643-1992 Plane 7
 | 
						|
	{0x4e},             // SS2
 | 
						|
	{0x4f},             // SS3
 | 
						|
}
 | 
						|
 | 
						|
func newRecognizer_2022JP() *recognizer2022 {
 | 
						|
	return &recognizer2022{
 | 
						|
		"ISO-2022-JP",
 | 
						|
		escapeSequences_2022JP,
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func newRecognizer_2022KR() *recognizer2022 {
 | 
						|
	return &recognizer2022{
 | 
						|
		"ISO-2022-KR",
 | 
						|
		escapeSequences_2022KR,
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func newRecognizer_2022CN() *recognizer2022 {
 | 
						|
	return &recognizer2022{
 | 
						|
		"ISO-2022-CN",
 | 
						|
		escapeSequences_2022CN,
 | 
						|
	}
 | 
						|
}
 |