mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-31 21:28:11 +09:00 
			
		
		
		
	Added all required dependencies
This commit is contained in:
		
							
								
								
									
										136
									
								
								vendor/github.com/gogits/chardet/detector.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								vendor/github.com/gogits/chardet/detector.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,136 @@ | ||||
| // Package chardet ports character set detection from ICU. | ||||
| package chardet | ||||
|  | ||||
| import ( | ||||
| 	"errors" | ||||
| 	"sort" | ||||
| ) | ||||
|  | ||||
| // Result contains all the information that charset detector gives. | ||||
| type Result struct { | ||||
| 	// IANA name of the detected charset. | ||||
| 	Charset string | ||||
| 	// IANA name of the detected language. It may be empty for some charsets. | ||||
| 	Language string | ||||
| 	// Confidence of the Result. Scale from 1 to 100. The bigger, the more confident. | ||||
| 	Confidence int | ||||
| } | ||||
|  | ||||
| // Detector implements charset detection. | ||||
| type Detector struct { | ||||
| 	recognizers []recognizer | ||||
| 	stripTag    bool | ||||
| } | ||||
|  | ||||
| // List of charset recognizers | ||||
| var recognizers = []recognizer{ | ||||
| 	newRecognizer_utf8(), | ||||
| 	newRecognizer_utf16be(), | ||||
| 	newRecognizer_utf16le(), | ||||
| 	newRecognizer_utf32be(), | ||||
| 	newRecognizer_utf32le(), | ||||
| 	newRecognizer_8859_1_en(), | ||||
| 	newRecognizer_8859_1_da(), | ||||
| 	newRecognizer_8859_1_de(), | ||||
| 	newRecognizer_8859_1_es(), | ||||
| 	newRecognizer_8859_1_fr(), | ||||
| 	newRecognizer_8859_1_it(), | ||||
| 	newRecognizer_8859_1_nl(), | ||||
| 	newRecognizer_8859_1_no(), | ||||
| 	newRecognizer_8859_1_pt(), | ||||
| 	newRecognizer_8859_1_sv(), | ||||
| 	newRecognizer_8859_2_cs(), | ||||
| 	newRecognizer_8859_2_hu(), | ||||
| 	newRecognizer_8859_2_pl(), | ||||
| 	newRecognizer_8859_2_ro(), | ||||
| 	newRecognizer_8859_5_ru(), | ||||
| 	newRecognizer_8859_6_ar(), | ||||
| 	newRecognizer_8859_7_el(), | ||||
| 	newRecognizer_8859_8_I_he(), | ||||
| 	newRecognizer_8859_8_he(), | ||||
| 	newRecognizer_windows_1251(), | ||||
| 	newRecognizer_windows_1256(), | ||||
| 	newRecognizer_KOI8_R(), | ||||
| 	newRecognizer_8859_9_tr(), | ||||
|  | ||||
| 	newRecognizer_sjis(), | ||||
| 	newRecognizer_gb_18030(), | ||||
| 	newRecognizer_euc_jp(), | ||||
| 	newRecognizer_euc_kr(), | ||||
| 	newRecognizer_big5(), | ||||
|  | ||||
| 	newRecognizer_2022JP(), | ||||
| 	newRecognizer_2022KR(), | ||||
| 	newRecognizer_2022CN(), | ||||
|  | ||||
| 	newRecognizer_IBM424_he_rtl(), | ||||
| 	newRecognizer_IBM424_he_ltr(), | ||||
| 	newRecognizer_IBM420_ar_rtl(), | ||||
| 	newRecognizer_IBM420_ar_ltr(), | ||||
| } | ||||
|  | ||||
| // NewTextDetector creates a Detector for plain text. | ||||
| func NewTextDetector() *Detector { | ||||
| 	return &Detector{recognizers, false} | ||||
| } | ||||
|  | ||||
| // NewHtmlDetector creates a Detector for Html. | ||||
| func NewHtmlDetector() *Detector { | ||||
| 	return &Detector{recognizers, true} | ||||
| } | ||||
|  | ||||
| var ( | ||||
| 	NotDetectedError = errors.New("Charset not detected.") | ||||
| ) | ||||
|  | ||||
| // DetectBest returns the Result with highest Confidence. | ||||
| func (d *Detector) DetectBest(b []byte) (r *Result, err error) { | ||||
| 	var all []Result | ||||
| 	if all, err = d.DetectAll(b); err == nil { | ||||
| 		r = &all[0] | ||||
| 	} | ||||
| 	return | ||||
| } | ||||
|  | ||||
| // DetectAll returns all Results which have non-zero Confidence. The Results are sorted by Confidence in descending order. | ||||
| func (d *Detector) DetectAll(b []byte) ([]Result, error) { | ||||
| 	input := newRecognizerInput(b, d.stripTag) | ||||
| 	outputChan := make(chan recognizerOutput) | ||||
| 	for _, r := range d.recognizers { | ||||
| 		go matchHelper(r, input, outputChan) | ||||
| 	} | ||||
| 	outputs := make([]recognizerOutput, 0, len(d.recognizers)) | ||||
| 	for i := 0; i < len(d.recognizers); i++ { | ||||
| 		o := <-outputChan | ||||
| 		if o.Confidence > 0 { | ||||
| 			outputs = append(outputs, o) | ||||
| 		} | ||||
| 	} | ||||
| 	if len(outputs) == 0 { | ||||
| 		return nil, NotDetectedError | ||||
| 	} | ||||
|  | ||||
| 	sort.Sort(recognizerOutputs(outputs)) | ||||
| 	dedupOutputs := make([]Result, 0, len(outputs)) | ||||
| 	foundCharsets := make(map[string]struct{}, len(outputs)) | ||||
| 	for _, o := range outputs { | ||||
| 		if _, found := foundCharsets[o.Charset]; !found { | ||||
| 			dedupOutputs = append(dedupOutputs, Result(o)) | ||||
| 			foundCharsets[o.Charset] = struct{}{} | ||||
| 		} | ||||
| 	} | ||||
| 	if len(dedupOutputs) == 0 { | ||||
| 		return nil, NotDetectedError | ||||
| 	} | ||||
| 	return dedupOutputs, nil | ||||
| } | ||||
|  | ||||
| func matchHelper(r recognizer, input *recognizerInput, outputChan chan<- recognizerOutput) { | ||||
| 	outputChan <- r.Match(input) | ||||
| } | ||||
|  | ||||
| type recognizerOutputs []recognizerOutput | ||||
|  | ||||
| func (r recognizerOutputs) Len() int           { return len(r) } | ||||
| func (r recognizerOutputs) Less(i, j int) bool { return r[i].Confidence > r[j].Confidence } | ||||
| func (r recognizerOutputs) Swap(i, j int)      { r[i], r[j] = r[j], r[i] } | ||||
		Reference in New Issue
	
	Block a user