mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-27 00:23:41 +09:00 
			
		
		
		
	Improve issue & code search (#33860)
Each "indexer" should provide the "search modes" they support by themselves. And we need to remove the "fuzzy" search for code.
This commit is contained in:
		| @@ -23,11 +23,19 @@ type GrepResult struct { | ||||
| 	LineCodes   []string | ||||
| } | ||||
|  | ||||
| type GrepModeType string | ||||
|  | ||||
| const ( | ||||
| 	GrepModeExact  GrepModeType = "exact" | ||||
| 	GrepModeWords  GrepModeType = "words" | ||||
| 	GrepModeRegexp GrepModeType = "regexp" | ||||
| ) | ||||
|  | ||||
| type GrepOptions struct { | ||||
| 	RefName           string | ||||
| 	MaxResultLimit    int | ||||
| 	ContextLineNumber int | ||||
| 	IsFuzzy           bool | ||||
| 	GrepMode          GrepModeType | ||||
| 	MaxLineLength     int // the maximum length of a line to parse, exceeding chars will be truncated | ||||
| 	PathspecList      []string | ||||
| } | ||||
| @@ -52,15 +60,23 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO | ||||
| 	 2^@repo: go-gitea/gitea | ||||
| 	*/ | ||||
| 	var results []*GrepResult | ||||
| 	cmd := NewCommand("grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name") | ||||
| 	cmd := NewCommand("grep", "--null", "--break", "--heading", "--line-number", "--full-name") | ||||
| 	cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber)) | ||||
| 	if opts.IsFuzzy { | ||||
| 		words := strings.Fields(search) | ||||
| 		for _, word := range words { | ||||
| 			cmd.AddOptionValues("-e", strings.TrimLeft(word, "-")) | ||||
| 		} | ||||
| 	} else { | ||||
| 	if opts.GrepMode == GrepModeExact { | ||||
| 		cmd.AddArguments("--fixed-strings") | ||||
| 		cmd.AddOptionValues("-e", strings.TrimLeft(search, "-")) | ||||
| 	} else if opts.GrepMode == GrepModeRegexp { | ||||
| 		cmd.AddArguments("--perl-regexp") | ||||
| 		cmd.AddOptionValues("-e", strings.TrimLeft(search, "-")) | ||||
| 	} else /* words */ { | ||||
| 		words := strings.Fields(search) | ||||
| 		cmd.AddArguments("--fixed-strings", "--ignore-case") | ||||
| 		for i, word := range words { | ||||
| 			cmd.AddOptionValues("-e", strings.TrimLeft(word, "-")) | ||||
| 			if i < len(words)-1 { | ||||
| 				cmd.AddOptionValues("--and") | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD")) | ||||
| 	cmd.AddDashesAndList(opts.PathspecList...) | ||||
|   | ||||
| @@ -17,6 +17,7 @@ import ( | ||||
| 	"code.gitea.io/gitea/modules/charset" | ||||
| 	"code.gitea.io/gitea/modules/git" | ||||
| 	"code.gitea.io/gitea/modules/gitrepo" | ||||
| 	"code.gitea.io/gitea/modules/indexer" | ||||
| 	path_filter "code.gitea.io/gitea/modules/indexer/code/bleve/token/path" | ||||
| 	"code.gitea.io/gitea/modules/indexer/code/internal" | ||||
| 	indexer_internal "code.gitea.io/gitea/modules/indexer/internal" | ||||
| @@ -136,6 +137,10 @@ type Indexer struct { | ||||
| 	indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much | ||||
| } | ||||
|  | ||||
| func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { | ||||
| 	return indexer.SearchModesExactWords() | ||||
| } | ||||
|  | ||||
| // NewIndexer creates a new bleve local indexer | ||||
| func NewIndexer(indexDir string) *Indexer { | ||||
| 	inner := inner_bleve.NewIndexer(indexDir, repoIndexerLatestVersion, generateBleveIndexMapping) | ||||
| @@ -267,19 +272,18 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int | ||||
| 	pathQuery.FieldVal = "Filename" | ||||
| 	pathQuery.SetBoost(10) | ||||
|  | ||||
| 	keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword) | ||||
| 	if isPhrase { | ||||
| 		q := bleve.NewMatchPhraseQuery(keywordAsPhrase) | ||||
| 	if opts.SearchMode == indexer.SearchModeExact { | ||||
| 		q := bleve.NewMatchPhraseQuery(opts.Keyword) | ||||
| 		q.FieldVal = "Content" | ||||
| 		if opts.IsKeywordFuzzy { | ||||
| 			q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(keywordAsPhrase) | ||||
| 		} | ||||
| 		contentQuery = q | ||||
| 	} else { | ||||
| 	} else /* words */ { | ||||
| 		q := bleve.NewMatchQuery(opts.Keyword) | ||||
| 		q.FieldVal = "Content" | ||||
| 		if opts.IsKeywordFuzzy { | ||||
| 		if opts.SearchMode == indexer.SearchModeFuzzy { | ||||
| 			// this logic doesn't seem right, it is only used to pass the test-case `Keyword:    "dESCRIPTION"`, which doesn't seem to be a real-life use-case. | ||||
| 			q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword) | ||||
| 		} else { | ||||
| 			q.Operator = query.MatchQueryOperatorAnd | ||||
| 		} | ||||
| 		contentQuery = q | ||||
| 	} | ||||
|   | ||||
| @@ -16,6 +16,7 @@ import ( | ||||
| 	"code.gitea.io/gitea/modules/charset" | ||||
| 	"code.gitea.io/gitea/modules/git" | ||||
| 	"code.gitea.io/gitea/modules/gitrepo" | ||||
| 	"code.gitea.io/gitea/modules/indexer" | ||||
| 	"code.gitea.io/gitea/modules/indexer/code/internal" | ||||
| 	indexer_internal "code.gitea.io/gitea/modules/indexer/internal" | ||||
| 	inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" | ||||
| @@ -24,7 +25,6 @@ import ( | ||||
| 	"code.gitea.io/gitea/modules/setting" | ||||
| 	"code.gitea.io/gitea/modules/timeutil" | ||||
| 	"code.gitea.io/gitea/modules/typesniffer" | ||||
| 	"code.gitea.io/gitea/modules/util" | ||||
|  | ||||
| 	"github.com/go-enry/go-enry/v2" | ||||
| 	"github.com/olivere/elastic/v7" | ||||
| @@ -46,6 +46,10 @@ type Indexer struct { | ||||
| 	indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much | ||||
| } | ||||
|  | ||||
| func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { | ||||
| 	return indexer.SearchModesExactWords() | ||||
| } | ||||
|  | ||||
| // NewIndexer creates a new elasticsearch indexer | ||||
| func NewIndexer(url, indexerName string) *Indexer { | ||||
| 	inner := inner_elasticsearch.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping) | ||||
| @@ -361,15 +365,10 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan | ||||
| // Search searches for codes and language stats by given conditions. | ||||
| func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { | ||||
| 	var contentQuery elastic.Query | ||||
| 	keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword) | ||||
| 	if isPhrase { | ||||
| 		contentQuery = elastic.NewMatchPhraseQuery("content", keywordAsPhrase) | ||||
| 	} else { | ||||
| 		// TODO: this is the old logic, but not really using "fuzziness" | ||||
| 		// * IsKeywordFuzzy=true: "best_fields" | ||||
| 		// * IsKeywordFuzzy=false: "phrase_prefix" | ||||
| 		contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword). | ||||
| 			Type(util.Iif(opts.IsKeywordFuzzy, esMultiMatchTypeBestFields, esMultiMatchTypePhrasePrefix)) | ||||
| 	if opts.SearchMode == indexer.SearchModeExact { | ||||
| 		contentQuery = elastic.NewMatchPhraseQuery("content", opts.Keyword) | ||||
| 	} else /* words */ { | ||||
| 		contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).Type(esMultiMatchTypeBestFields).Operator("and") | ||||
| 	} | ||||
| 	kwQuery := elastic.NewBoolQuery().Should( | ||||
| 		contentQuery, | ||||
|   | ||||
| @@ -9,6 +9,7 @@ import ( | ||||
| 	"strings" | ||||
|  | ||||
| 	"code.gitea.io/gitea/modules/git" | ||||
| 	"code.gitea.io/gitea/modules/indexer" | ||||
| 	code_indexer "code.gitea.io/gitea/modules/indexer/code" | ||||
| 	"code.gitea.io/gitea/modules/setting" | ||||
| ) | ||||
| @@ -23,11 +24,16 @@ func indexSettingToGitGrepPathspecList() (list []string) { | ||||
| 	return list | ||||
| } | ||||
|  | ||||
| func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, isFuzzy bool) (searchResults []*code_indexer.Result, total int, err error) { | ||||
| 	// TODO: it should also respect ParseKeywordAsPhrase and clarify the "fuzzy" behavior | ||||
| func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, searchMode indexer.SearchModeType) (searchResults []*code_indexer.Result, total int, err error) { | ||||
| 	grepMode := git.GrepModeWords | ||||
| 	if searchMode == indexer.SearchModeExact { | ||||
| 		grepMode = git.GrepModeExact | ||||
| 	} else if searchMode == indexer.SearchModeRegexp { | ||||
| 		grepMode = git.GrepModeRegexp | ||||
| 	} | ||||
| 	res, err := git.GrepSearch(ctx, gitRepo, keyword, git.GrepOptions{ | ||||
| 		ContextLineNumber: 1, | ||||
| 		IsFuzzy:           isFuzzy, | ||||
| 		GrepMode:          grepMode, | ||||
| 		RefName:           ref.String(), | ||||
| 		PathspecList:      indexSettingToGitGrepPathspecList(), | ||||
| 	}) | ||||
|   | ||||
| @@ -14,6 +14,7 @@ import ( | ||||
| 	"code.gitea.io/gitea/models/db" | ||||
| 	repo_model "code.gitea.io/gitea/models/repo" | ||||
| 	"code.gitea.io/gitea/modules/graceful" | ||||
| 	"code.gitea.io/gitea/modules/indexer" | ||||
| 	"code.gitea.io/gitea/modules/indexer/code/bleve" | ||||
| 	"code.gitea.io/gitea/modules/indexer/code/elasticsearch" | ||||
| 	"code.gitea.io/gitea/modules/indexer/code/internal" | ||||
| @@ -302,3 +303,11 @@ func populateRepoIndexer(ctx context.Context) { | ||||
| 	} | ||||
| 	log.Info("Done (re)populating the repo indexer with existing repositories") | ||||
| } | ||||
|  | ||||
| func SupportedSearchModes() []indexer.SearchMode { | ||||
| 	gi := globalIndexer.Load() | ||||
| 	if gi == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| 	return (*gi).SupportedSearchModes() | ||||
| } | ||||
|   | ||||
| @@ -11,6 +11,7 @@ import ( | ||||
|  | ||||
| 	"code.gitea.io/gitea/models/db" | ||||
| 	"code.gitea.io/gitea/models/unittest" | ||||
| 	indexer_module "code.gitea.io/gitea/modules/indexer" | ||||
| 	"code.gitea.io/gitea/modules/indexer/code/bleve" | ||||
| 	"code.gitea.io/gitea/modules/indexer/code/elasticsearch" | ||||
| 	"code.gitea.io/gitea/modules/indexer/code/internal" | ||||
| @@ -39,10 +40,11 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { | ||||
| 		assert.NoError(t, setupRepositoryIndexes(t.Context(), indexer)) | ||||
|  | ||||
| 		keywords := []struct { | ||||
| 			RepoIDs []int64 | ||||
| 			Keyword string | ||||
| 			Langs   int | ||||
| 			Results []codeSearchResult | ||||
| 			RepoIDs    []int64 | ||||
| 			Keyword    string | ||||
| 			Langs      int | ||||
| 			SearchMode indexer_module.SearchModeType | ||||
| 			Results    []codeSearchResult | ||||
| 		}{ | ||||
| 			// Search for an exact match on the contents of a file | ||||
| 			// This scenario yields a single result (the file README.md on the repo '1') | ||||
| @@ -183,9 +185,10 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { | ||||
| 			}, | ||||
| 			// Search for matches on the contents of files regardless of case. | ||||
| 			{ | ||||
| 				RepoIDs: nil, | ||||
| 				Keyword: "dESCRIPTION", | ||||
| 				Langs:   1, | ||||
| 				RepoIDs:    nil, | ||||
| 				Keyword:    "dESCRIPTION", | ||||
| 				Langs:      1, | ||||
| 				SearchMode: indexer_module.SearchModeFuzzy, | ||||
| 				Results: []codeSearchResult{ | ||||
| 					{ | ||||
| 						Filename: "README.md", | ||||
| @@ -193,7 +196,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { | ||||
| 					}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			// Search for an exact match on the filename within the repo '62' (case insenstive). | ||||
| 			// Search for an exact match on the filename within the repo '62' (case-insensitive). | ||||
| 			// This scenario yields a single result (the file avocado.md on the repo '62') | ||||
| 			{ | ||||
| 				RepoIDs: []int64{62}, | ||||
| @@ -206,7 +209,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { | ||||
| 					}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			// Search for matches on the contents of files when the criteria is a expression. | ||||
| 			// Search for matches on the contents of files when the criteria are an expression. | ||||
| 			{ | ||||
| 				RepoIDs: []int64{62}, | ||||
| 				Keyword: "console.log", | ||||
| @@ -218,7 +221,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { | ||||
| 					}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			// Search for matches on the contents of files when the criteria is part of a expression. | ||||
| 			// Search for matches on the contents of files when the criteria are parts of an expression. | ||||
| 			{ | ||||
| 				RepoIDs: []int64{62}, | ||||
| 				Keyword: "log", | ||||
| @@ -235,16 +238,16 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { | ||||
| 		for _, kw := range keywords { | ||||
| 			t.Run(kw.Keyword, func(t *testing.T) { | ||||
| 				total, res, langs, err := indexer.Search(t.Context(), &internal.SearchOptions{ | ||||
| 					RepoIDs: kw.RepoIDs, | ||||
| 					Keyword: kw.Keyword, | ||||
| 					RepoIDs:    kw.RepoIDs, | ||||
| 					Keyword:    kw.Keyword, | ||||
| 					SearchMode: kw.SearchMode, | ||||
| 					Paginator: &db.ListOptions{ | ||||
| 						Page:     1, | ||||
| 						PageSize: 10, | ||||
| 					}, | ||||
| 					IsKeywordFuzzy: true, | ||||
| 				}) | ||||
| 				assert.NoError(t, err) | ||||
| 				assert.Len(t, langs, kw.Langs) | ||||
| 				require.NoError(t, err) | ||||
| 				require.Len(t, langs, kw.Langs) | ||||
|  | ||||
| 				hits := make([]codeSearchResult, 0, len(res)) | ||||
|  | ||||
| @@ -289,7 +292,7 @@ func TestBleveIndexAndSearch(t *testing.T) { | ||||
| 	_, err := idx.Init(t.Context()) | ||||
| 	require.NoError(t, err) | ||||
|  | ||||
| 	testIndexer("beleve", t, idx) | ||||
| 	testIndexer("bleve", t, idx) | ||||
| } | ||||
|  | ||||
| func TestESIndexAndSearch(t *testing.T) { | ||||
|   | ||||
| @@ -9,6 +9,7 @@ import ( | ||||
|  | ||||
| 	"code.gitea.io/gitea/models/db" | ||||
| 	repo_model "code.gitea.io/gitea/models/repo" | ||||
| 	"code.gitea.io/gitea/modules/indexer" | ||||
| 	"code.gitea.io/gitea/modules/indexer/internal" | ||||
| ) | ||||
|  | ||||
| @@ -18,6 +19,7 @@ type Indexer interface { | ||||
| 	Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error | ||||
| 	Delete(ctx context.Context, repoID int64) error | ||||
| 	Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) | ||||
| 	SupportedSearchModes() []indexer.SearchMode | ||||
| } | ||||
|  | ||||
| type SearchOptions struct { | ||||
| @@ -25,7 +27,7 @@ type SearchOptions struct { | ||||
| 	Keyword  string | ||||
| 	Language string | ||||
|  | ||||
| 	IsKeywordFuzzy bool | ||||
| 	SearchMode indexer.SearchModeType | ||||
|  | ||||
| 	db.Paginator | ||||
| } | ||||
| @@ -41,6 +43,10 @@ type dummyIndexer struct { | ||||
| 	internal.Indexer | ||||
| } | ||||
|  | ||||
| func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (d *dummyIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error { | ||||
| 	return fmt.Errorf("indexer is not ready") | ||||
| } | ||||
|   | ||||
| @@ -10,9 +10,7 @@ import ( | ||||
| 	"code.gitea.io/gitea/modules/log" | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	filenameMatchNumberOfLines = 7 // Copied from github search | ||||
| ) | ||||
| const filenameMatchNumberOfLines = 7 // Copied from GitHub search | ||||
|  | ||||
| func FilenameIndexerID(repoID int64, filename string) string { | ||||
| 	return internal.Base36(repoID) + "_" + filename | ||||
| @@ -48,11 +46,3 @@ func FilenameMatchIndexPos(content string) (int, int) { | ||||
| 	} | ||||
| 	return 0, len(content) | ||||
| } | ||||
|  | ||||
| func ParseKeywordAsPhrase(keyword string) (string, bool) { | ||||
| 	if strings.HasPrefix(keyword, `"`) && strings.HasSuffix(keyword, `"`) && len(keyword) > 1 { | ||||
| 		// only remove the prefix and suffix quotes, no need to decode the content at the moment | ||||
| 		return keyword[1 : len(keyword)-1], true | ||||
| 	} | ||||
| 	return "", false | ||||
| } | ||||
|   | ||||
| @@ -1,30 +0,0 @@ | ||||
| // Copyright 2025 The Gitea Authors. All rights reserved. | ||||
| // SPDX-License-Identifier: MIT | ||||
|  | ||||
| package internal | ||||
|  | ||||
| import ( | ||||
| 	"testing" | ||||
|  | ||||
| 	"github.com/stretchr/testify/assert" | ||||
| ) | ||||
|  | ||||
| func TestParseKeywordAsPhrase(t *testing.T) { | ||||
| 	cases := []struct { | ||||
| 		keyword  string | ||||
| 		phrase   string | ||||
| 		isPhrase bool | ||||
| 	}{ | ||||
| 		{``, "", false}, | ||||
| 		{`a`, "", false}, | ||||
| 		{`"`, "", false}, | ||||
| 		{`"a`, "", false}, | ||||
| 		{`"a"`, "a", true}, | ||||
| 		{`""\"""`, `"\""`, true}, | ||||
| 	} | ||||
| 	for _, c := range cases { | ||||
| 		phrase, isPhrase := ParseKeywordAsPhrase(c.keyword) | ||||
| 		assert.Equal(t, c.phrase, phrase, "keyword=%q", c.keyword) | ||||
| 		assert.Equal(t, c.isPhrase, isPhrase, "keyword=%q", c.keyword) | ||||
| 	} | ||||
| } | ||||
| @@ -129,7 +129,6 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res | ||||
| } | ||||
|  | ||||
| // PerformSearch perform a search on a repository | ||||
| // if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2 | ||||
| func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) { | ||||
| 	if opts == nil || len(opts.Keyword) == 0 { | ||||
| 		return 0, nil, nil, nil | ||||
|   | ||||
							
								
								
									
										54
									
								
								modules/indexer/indexer.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								modules/indexer/indexer.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,54 @@ | ||||
| // Copyright 2025 The Gitea Authors. All rights reserved. | ||||
| // SPDX-License-Identifier: MIT | ||||
|  | ||||
| package indexer | ||||
|  | ||||
| type SearchModeType string | ||||
|  | ||||
| const ( | ||||
| 	SearchModeExact  SearchModeType = "exact" | ||||
| 	SearchModeWords  SearchModeType = "words" | ||||
| 	SearchModeFuzzy  SearchModeType = "fuzzy" | ||||
| 	SearchModeRegexp SearchModeType = "regexp" | ||||
| ) | ||||
|  | ||||
| type SearchMode struct { | ||||
| 	ModeValue    SearchModeType | ||||
| 	TooltipTrKey string | ||||
| 	TitleTrKey   string | ||||
| } | ||||
|  | ||||
| func SearchModesExactWords() []SearchMode { | ||||
| 	return []SearchMode{ | ||||
| 		{ | ||||
| 			ModeValue:    SearchModeExact, | ||||
| 			TooltipTrKey: "search.exact_tooltip", | ||||
| 			TitleTrKey:   "search.exact", | ||||
| 		}, | ||||
| 		{ | ||||
| 			ModeValue:    SearchModeWords, | ||||
| 			TooltipTrKey: "search.words_tooltip", | ||||
| 			TitleTrKey:   "search.words", | ||||
| 		}, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func SearchModesExactWordsFuzzy() []SearchMode { | ||||
| 	return append(SearchModesExactWords(), []SearchMode{ | ||||
| 		{ | ||||
| 			ModeValue:    SearchModeFuzzy, | ||||
| 			TooltipTrKey: "search.fuzzy_tooltip", | ||||
| 			TitleTrKey:   "search.fuzzy", | ||||
| 		}, | ||||
| 	}...) | ||||
| } | ||||
|  | ||||
| func GitGrepSupportedSearchModes() []SearchMode { | ||||
| 	return append(SearchModesExactWords(), []SearchMode{ | ||||
| 		{ | ||||
| 			ModeValue:    SearchModeRegexp, | ||||
| 			TooltipTrKey: "search.regexp_tooltip", | ||||
| 			TitleTrKey:   "search.regexp", | ||||
| 		}, | ||||
| 	}...) | ||||
| } | ||||
| @@ -28,6 +28,16 @@ func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query | ||||
| 	return q | ||||
| } | ||||
|  | ||||
| // MatchAndQuery generates a match query for the given phrase, field and analyzer | ||||
| func MatchAndQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchQuery { | ||||
| 	q := bleve.NewMatchQuery(matchPhrase) | ||||
| 	q.FieldVal = field | ||||
| 	q.Analyzer = analyzer | ||||
| 	q.Fuzziness = fuzziness | ||||
| 	q.Operator = query.MatchQueryOperatorAnd | ||||
| 	return q | ||||
| } | ||||
|  | ||||
| // BoolFieldQuery generates a bool field query for the given value and field | ||||
| func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery { | ||||
| 	q := bleve.NewBoolFieldQuery(value) | ||||
|   | ||||
| @@ -6,6 +6,7 @@ package bleve | ||||
| import ( | ||||
| 	"context" | ||||
|  | ||||
| 	"code.gitea.io/gitea/modules/indexer" | ||||
| 	indexer_internal "code.gitea.io/gitea/modules/indexer/internal" | ||||
| 	inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" | ||||
| 	"code.gitea.io/gitea/modules/indexer/issues/internal" | ||||
| @@ -120,6 +121,10 @@ type Indexer struct { | ||||
| 	indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much | ||||
| } | ||||
|  | ||||
| func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { | ||||
| 	return indexer.SearchModesExactWordsFuzzy() | ||||
| } | ||||
|  | ||||
| // NewIndexer creates a new bleve local indexer | ||||
| func NewIndexer(indexDir string) *Indexer { | ||||
| 	inner := inner_bleve.NewIndexer(indexDir, issueIndexerLatestVersion, generateIssueIndexMapping) | ||||
| @@ -157,16 +162,23 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( | ||||
| 	var queries []query.Query | ||||
|  | ||||
| 	if options.Keyword != "" { | ||||
| 		fuzziness := 0 | ||||
| 		if options.IsFuzzyKeyword { | ||||
| 			fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword) | ||||
| 		if options.SearchMode == indexer.SearchModeWords || options.SearchMode == indexer.SearchModeFuzzy { | ||||
| 			fuzziness := 0 | ||||
| 			if options.SearchMode == indexer.SearchModeFuzzy { | ||||
| 				fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword) | ||||
| 			} | ||||
| 			queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ | ||||
| 				inner_bleve.MatchAndQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), | ||||
| 				inner_bleve.MatchAndQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), | ||||
| 				inner_bleve.MatchAndQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), | ||||
| 			}...)) | ||||
| 		} else /* exact */ { | ||||
| 			queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ | ||||
| 				inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, 0), | ||||
| 				inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, 0), | ||||
| 				inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, 0), | ||||
| 			}...)) | ||||
| 		} | ||||
|  | ||||
| 		queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ | ||||
| 			inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), | ||||
| 			inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), | ||||
| 			inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), | ||||
| 		}...)) | ||||
| 	} | ||||
|  | ||||
| 	if len(options.RepoIDs) > 0 || options.AllPublic { | ||||
|   | ||||
| @@ -5,9 +5,11 @@ package db | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"strings" | ||||
|  | ||||
| 	"code.gitea.io/gitea/models/db" | ||||
| 	issue_model "code.gitea.io/gitea/models/issues" | ||||
| 	"code.gitea.io/gitea/modules/indexer" | ||||
| 	indexer_internal "code.gitea.io/gitea/modules/indexer/internal" | ||||
| 	inner_db "code.gitea.io/gitea/modules/indexer/internal/db" | ||||
| 	"code.gitea.io/gitea/modules/indexer/issues/internal" | ||||
| @@ -22,6 +24,10 @@ type Indexer struct { | ||||
| 	indexer_internal.Indexer | ||||
| } | ||||
|  | ||||
| func (i *Indexer) SupportedSearchModes() []indexer.SearchMode { | ||||
| 	return indexer.SearchModesExactWords() | ||||
| } | ||||
|  | ||||
| func NewIndexer() *Indexer { | ||||
| 	return &Indexer{ | ||||
| 		Indexer: &inner_db.Indexer{}, | ||||
| @@ -38,6 +44,26 @@ func (i *Indexer) Delete(_ context.Context, _ ...int64) error { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func buildMatchQuery(mode indexer.SearchModeType, colName, keyword string) builder.Cond { | ||||
| 	if mode == indexer.SearchModeExact { | ||||
| 		return db.BuildCaseInsensitiveLike("issue.name", keyword) | ||||
| 	} | ||||
|  | ||||
| 	// match words | ||||
| 	cond := builder.NewCond() | ||||
| 	fields := strings.Fields(keyword) | ||||
| 	if len(fields) == 0 { | ||||
| 		return builder.Expr("1=1") | ||||
| 	} | ||||
| 	for _, field := range fields { | ||||
| 		if field == "" { | ||||
| 			continue | ||||
| 		} | ||||
| 		cond = cond.And(db.BuildCaseInsensitiveLike(colName, field)) | ||||
| 	} | ||||
| 	return cond | ||||
| } | ||||
|  | ||||
| // Search searches for issues | ||||
| func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) { | ||||
| 	// FIXME: I tried to avoid importing models here, but it seems to be impossible. | ||||
| @@ -60,14 +86,14 @@ func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( | ||||
| 		subQuery := builder.Select("id").From("issue").Where(repoCond) | ||||
|  | ||||
| 		cond = builder.Or( | ||||
| 			db.BuildCaseInsensitiveLike("issue.name", options.Keyword), | ||||
| 			db.BuildCaseInsensitiveLike("issue.content", options.Keyword), | ||||
| 			buildMatchQuery(options.SearchMode, "issue.name", options.Keyword), | ||||
| 			buildMatchQuery(options.SearchMode, "issue.content", options.Keyword), | ||||
| 			builder.In("issue.id", builder.Select("issue_id"). | ||||
| 				From("comment"). | ||||
| 				Where(builder.And( | ||||
| 					builder.Eq{"type": issue_model.CommentTypeComment}, | ||||
| 					builder.In("issue_id", subQuery), | ||||
| 					db.BuildCaseInsensitiveLike("content", options.Keyword), | ||||
| 					buildMatchQuery(options.SearchMode, "content", options.Keyword), | ||||
| 				)), | ||||
| 			), | ||||
| 		) | ||||
|   | ||||
| @@ -10,6 +10,7 @@ import ( | ||||
| 	"strings" | ||||
|  | ||||
| 	"code.gitea.io/gitea/modules/graceful" | ||||
| 	"code.gitea.io/gitea/modules/indexer" | ||||
| 	indexer_internal "code.gitea.io/gitea/modules/indexer/internal" | ||||
| 	inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" | ||||
| 	"code.gitea.io/gitea/modules/indexer/issues/internal" | ||||
| @@ -33,6 +34,11 @@ type Indexer struct { | ||||
| 	indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much | ||||
| } | ||||
|  | ||||
| func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { | ||||
| 	// TODO: es supports fuzzy search, but our code doesn't at the moment, and actually the default fuzziness is already "AUTO" | ||||
| 	return indexer.SearchModesExactWords() | ||||
| } | ||||
|  | ||||
| // NewIndexer creates a new elasticsearch indexer | ||||
| func NewIndexer(url, indexerName string) *Indexer { | ||||
| 	inner := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping) | ||||
| @@ -146,12 +152,11 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( | ||||
| 	query := elastic.NewBoolQuery() | ||||
|  | ||||
| 	if options.Keyword != "" { | ||||
| 		searchType := esMultiMatchTypePhrasePrefix | ||||
| 		if options.IsFuzzyKeyword { | ||||
| 			searchType = esMultiMatchTypeBestFields | ||||
| 		if options.SearchMode == indexer.SearchModeExact { | ||||
| 			query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypePhrasePrefix)) | ||||
| 		} else /* words */ { | ||||
| 			query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypeBestFields).Operator("and")) | ||||
| 		} | ||||
|  | ||||
| 		query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(searchType)) | ||||
| 	} | ||||
|  | ||||
| 	if len(options.RepoIDs) > 0 { | ||||
|   | ||||
| @@ -14,6 +14,7 @@ import ( | ||||
| 	db_model "code.gitea.io/gitea/models/db" | ||||
| 	repo_model "code.gitea.io/gitea/models/repo" | ||||
| 	"code.gitea.io/gitea/modules/graceful" | ||||
| 	"code.gitea.io/gitea/modules/indexer" | ||||
| 	"code.gitea.io/gitea/modules/indexer/issues/bleve" | ||||
| 	"code.gitea.io/gitea/modules/indexer/issues/db" | ||||
| 	"code.gitea.io/gitea/modules/indexer/issues/elasticsearch" | ||||
| @@ -313,3 +314,11 @@ func CountIssues(ctx context.Context, opts *SearchOptions) (int64, error) { | ||||
| 	_, total, err := SearchIssues(ctx, opts) | ||||
| 	return total, err | ||||
| } | ||||
|  | ||||
| func SupportedSearchModes() []indexer.SearchMode { | ||||
| 	gi := globalIndexer.Load() | ||||
| 	if gi == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| 	return (*gi).SupportedSearchModes() | ||||
| } | ||||
|   | ||||
| @@ -7,6 +7,7 @@ import ( | ||||
| 	"context" | ||||
| 	"fmt" | ||||
|  | ||||
| 	"code.gitea.io/gitea/modules/indexer" | ||||
| 	"code.gitea.io/gitea/modules/indexer/internal" | ||||
| ) | ||||
|  | ||||
| @@ -16,6 +17,7 @@ type Indexer interface { | ||||
| 	Index(ctx context.Context, issue ...*IndexerData) error | ||||
| 	Delete(ctx context.Context, ids ...int64) error | ||||
| 	Search(ctx context.Context, options *SearchOptions) (*SearchResult, error) | ||||
| 	SupportedSearchModes() []indexer.SearchMode | ||||
| } | ||||
|  | ||||
| // NewDummyIndexer returns a dummy indexer | ||||
| @@ -29,6 +31,10 @@ type dummyIndexer struct { | ||||
| 	internal.Indexer | ||||
| } | ||||
|  | ||||
| func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode { | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (d *dummyIndexer) Index(_ context.Context, _ ...*IndexerData) error { | ||||
| 	return fmt.Errorf("indexer is not ready") | ||||
| } | ||||
|   | ||||
| @@ -7,6 +7,7 @@ import ( | ||||
| 	"strconv" | ||||
|  | ||||
| 	"code.gitea.io/gitea/models/db" | ||||
| 	"code.gitea.io/gitea/modules/indexer" | ||||
| 	"code.gitea.io/gitea/modules/optional" | ||||
| 	"code.gitea.io/gitea/modules/timeutil" | ||||
| ) | ||||
| @@ -77,7 +78,7 @@ type SearchResult struct { | ||||
| type SearchOptions struct { | ||||
| 	Keyword string // keyword to search | ||||
|  | ||||
| 	IsFuzzyKeyword bool // if false the levenshtein distance is 0 | ||||
| 	SearchMode indexer.SearchModeType | ||||
|  | ||||
| 	RepoIDs   []int64 // repository IDs which the issues belong to | ||||
| 	AllPublic bool    // if include all public repositories | ||||
|   | ||||
| @@ -10,6 +10,7 @@ import ( | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
|  | ||||
| 	"code.gitea.io/gitea/modules/indexer" | ||||
| 	indexer_internal "code.gitea.io/gitea/modules/indexer/internal" | ||||
| 	inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch" | ||||
| 	"code.gitea.io/gitea/modules/indexer/issues/internal" | ||||
| @@ -35,6 +36,10 @@ type Indexer struct { | ||||
| 	indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much | ||||
| } | ||||
|  | ||||
| func (b *Indexer) SupportedSearchModes() []indexer.SearchMode { | ||||
| 	return indexer.SearchModesExactWords() | ||||
| } | ||||
|  | ||||
| // NewIndexer creates a new meilisearch indexer | ||||
| func NewIndexer(url, apiKey, indexerName string) *Indexer { | ||||
| 	settings := &meilisearch.Settings{ | ||||
| @@ -230,9 +235,8 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( | ||||
| 		limit = 1 | ||||
| 	} | ||||
|  | ||||
| 	keyword := options.Keyword | ||||
| 	if !options.IsFuzzyKeyword { | ||||
| 		// to make it non fuzzy ("typo tolerance" in meilisearch terms), we have to quote the keyword(s) | ||||
| 	keyword := options.Keyword // default to match "words" | ||||
| 	if options.SearchMode == indexer.SearchModeExact { | ||||
| 		// https://www.meilisearch.com/docs/reference/api/search#phrase-search | ||||
| 		keyword = doubleQuoteKeyword(keyword) | ||||
| 	} | ||||
|   | ||||
		Reference in New Issue
	
	Block a user