mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-29 10:57:44 +09:00 
			
		
		
		
	Search bar for issues/pulls (#530)
This commit is contained in:
		
							
								
								
									
										131
									
								
								vendor/github.com/blevesearch/bleve/analysis/tokenizer/unicode/unicode.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								vendor/github.com/blevesearch/bleve/analysis/tokenizer/unicode/unicode.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,131 @@ | ||||
| //  Copyright (c) 2014 Couchbase, Inc. | ||||
| // | ||||
| // Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| // you may not use this file except in compliance with the License. | ||||
| // You may obtain a copy of the License at | ||||
| // | ||||
| // 		http://www.apache.org/licenses/LICENSE-2.0 | ||||
| // | ||||
| // Unless required by applicable law or agreed to in writing, software | ||||
| // distributed under the License is distributed on an "AS IS" BASIS, | ||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| // See the License for the specific language governing permissions and | ||||
| // limitations under the License. | ||||
|  | ||||
| package unicode | ||||
|  | ||||
| import ( | ||||
| 	"github.com/blevesearch/segment" | ||||
|  | ||||
| 	"github.com/blevesearch/bleve/analysis" | ||||
| 	"github.com/blevesearch/bleve/registry" | ||||
| ) | ||||
|  | ||||
| const Name = "unicode" | ||||
|  | ||||
| type UnicodeTokenizer struct { | ||||
| } | ||||
|  | ||||
| func NewUnicodeTokenizer() *UnicodeTokenizer { | ||||
| 	return &UnicodeTokenizer{} | ||||
| } | ||||
|  | ||||
| func (rt *UnicodeTokenizer) Tokenize(input []byte) analysis.TokenStream { | ||||
| 	rvx := make([]analysis.TokenStream, 0, 10) // When rv gets full, append to rvx. | ||||
| 	rv := make(analysis.TokenStream, 0, 1) | ||||
|  | ||||
| 	ta := []analysis.Token(nil) | ||||
| 	taNext := 0 | ||||
|  | ||||
| 	segmenter := segment.NewWordSegmenterDirect(input) | ||||
| 	start := 0 | ||||
| 	pos := 1 | ||||
|  | ||||
| 	guessRemaining := func(end int) int { | ||||
| 		avgSegmentLen := end / (len(rv) + 1) | ||||
| 		if avgSegmentLen < 1 { | ||||
| 			avgSegmentLen = 1 | ||||
| 		} | ||||
|  | ||||
| 		remainingLen := len(input) - end | ||||
|  | ||||
| 		return remainingLen / avgSegmentLen | ||||
| 	} | ||||
|  | ||||
| 	for segmenter.Segment() { | ||||
| 		segmentBytes := segmenter.Bytes() | ||||
| 		end := start + len(segmentBytes) | ||||
| 		if segmenter.Type() != segment.None { | ||||
| 			if taNext >= len(ta) { | ||||
| 				remainingSegments := guessRemaining(end) | ||||
| 				if remainingSegments > 1000 { | ||||
| 					remainingSegments = 1000 | ||||
| 				} | ||||
| 				if remainingSegments < 1 { | ||||
| 					remainingSegments = 1 | ||||
| 				} | ||||
|  | ||||
| 				ta = make([]analysis.Token, remainingSegments) | ||||
| 				taNext = 0 | ||||
| 			} | ||||
|  | ||||
| 			token := &ta[taNext] | ||||
| 			taNext++ | ||||
|  | ||||
| 			token.Term = segmentBytes | ||||
| 			token.Start = start | ||||
| 			token.End = end | ||||
| 			token.Position = pos | ||||
| 			token.Type = convertType(segmenter.Type()) | ||||
|  | ||||
| 			if len(rv) >= cap(rv) { // When rv is full, save it into rvx. | ||||
| 				rvx = append(rvx, rv) | ||||
|  | ||||
| 				rvCap := cap(rv) * 2 | ||||
| 				if rvCap > 256 { | ||||
| 					rvCap = 256 | ||||
| 				} | ||||
|  | ||||
| 				rv = make(analysis.TokenStream, 0, rvCap) // Next rv cap is bigger. | ||||
| 			} | ||||
|  | ||||
| 			rv = append(rv, token) | ||||
| 			pos++ | ||||
| 		} | ||||
| 		start = end | ||||
| 	} | ||||
|  | ||||
| 	if len(rvx) > 0 { | ||||
| 		n := len(rv) | ||||
| 		for _, r := range rvx { | ||||
| 			n += len(r) | ||||
| 		} | ||||
| 		rall := make(analysis.TokenStream, 0, n) | ||||
| 		for _, r := range rvx { | ||||
| 			rall = append(rall, r...) | ||||
| 		} | ||||
| 		return append(rall, rv...) | ||||
| 	} | ||||
|  | ||||
| 	return rv | ||||
| } | ||||
|  | ||||
| func UnicodeTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { | ||||
| 	return NewUnicodeTokenizer(), nil | ||||
| } | ||||
|  | ||||
| func init() { | ||||
| 	registry.RegisterTokenizer(Name, UnicodeTokenizerConstructor) | ||||
| } | ||||
|  | ||||
| func convertType(segmentWordType int) analysis.TokenType { | ||||
| 	switch segmentWordType { | ||||
| 	case segment.Ideo: | ||||
| 		return analysis.Ideographic | ||||
| 	case segment.Kana: | ||||
| 		return analysis.Ideographic | ||||
| 	case segment.Number: | ||||
| 		return analysis.Numeric | ||||
| 	} | ||||
| 	return analysis.AlphaNumeric | ||||
| } | ||||
		Reference in New Issue
	
	Block a user