mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-11-03 08:02:36 +09:00 
			
		
		
		
	
		
			
				
	
	
		
			72 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
			
		
		
	
	
			72 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
package chardet
 | 
						|
 | 
						|
import (
 | 
						|
	"bytes"
 | 
						|
)
 | 
						|
 | 
						|
var utf8Bom = []byte{0xEF, 0xBB, 0xBF}
 | 
						|
 | 
						|
type recognizerUtf8 struct {
 | 
						|
}
 | 
						|
 | 
						|
func newRecognizer_utf8() *recognizerUtf8 {
 | 
						|
	return &recognizerUtf8{}
 | 
						|
}
 | 
						|
 | 
						|
func (*recognizerUtf8) Match(input *recognizerInput) (output recognizerOutput) {
 | 
						|
	output = recognizerOutput{
 | 
						|
		Charset: "UTF-8",
 | 
						|
	}
 | 
						|
	hasBom := bytes.HasPrefix(input.raw, utf8Bom)
 | 
						|
	inputLen := len(input.raw)
 | 
						|
	var numValid, numInvalid uint32
 | 
						|
	var trailBytes uint8
 | 
						|
	for i := 0; i < inputLen; i++ {
 | 
						|
		c := input.raw[i]
 | 
						|
		if c&0x80 == 0 {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		if c&0xE0 == 0xC0 {
 | 
						|
			trailBytes = 1
 | 
						|
		} else if c&0xF0 == 0xE0 {
 | 
						|
			trailBytes = 2
 | 
						|
		} else if c&0xF8 == 0xF0 {
 | 
						|
			trailBytes = 3
 | 
						|
		} else {
 | 
						|
			numInvalid++
 | 
						|
			if numInvalid > 5 {
 | 
						|
				break
 | 
						|
			}
 | 
						|
			trailBytes = 0
 | 
						|
		}
 | 
						|
 | 
						|
		for i++; i < inputLen; i++ {
 | 
						|
			c = input.raw[i]
 | 
						|
			if c&0xC0 != 0x80 {
 | 
						|
				numInvalid++
 | 
						|
				break
 | 
						|
			}
 | 
						|
			if trailBytes--; trailBytes == 0 {
 | 
						|
				numValid++
 | 
						|
				break
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	if hasBom && numInvalid == 0 {
 | 
						|
		output.Confidence = 100
 | 
						|
	} else if hasBom && numValid > numInvalid*10 {
 | 
						|
		output.Confidence = 80
 | 
						|
	} else if numValid > 3 && numInvalid == 0 {
 | 
						|
		output.Confidence = 100
 | 
						|
	} else if numValid > 0 && numInvalid == 0 {
 | 
						|
		output.Confidence = 80
 | 
						|
	} else if numValid == 0 && numInvalid == 0 {
 | 
						|
		// Plain ASCII
 | 
						|
		output.Confidence = 10
 | 
						|
	} else if numValid > numInvalid*10 {
 | 
						|
		output.Confidence = 25
 | 
						|
	}
 | 
						|
	return
 | 
						|
}
 |