mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-27 00:23:41 +09:00 
			
		
		
		
	meilisearch does not have an search option to contorl fuzzynes per query right now: - https://github.com/meilisearch/meilisearch/issues/1192 - https://github.com/orgs/meilisearch/discussions/377 - https://github.com/meilisearch/meilisearch/discussions/1096 so we have to create a workaround by post-filter the search result in gitea until this is addressed. For future works I added an option in backend only atm, to enable fuzzynes for issue indexer too. And also refactored the code so the fuzzy option is equal in logic to code indexer --- *Sponsored by Kithara Software GmbH*
		
			
				
	
	
		
			323 lines
		
	
	
		
			8.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			323 lines
		
	
	
		
			8.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2023 The Gitea Authors. All rights reserved.
 | |
| // SPDX-License-Identifier: MIT
 | |
| 
 | |
| package meilisearch
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"errors"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 
 | |
| 	indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
 | |
| 	inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch"
 | |
| 	"code.gitea.io/gitea/modules/indexer/issues/internal"
 | |
| 
 | |
| 	"github.com/meilisearch/meilisearch-go"
 | |
| )
 | |
| 
 | |
| const (
 | |
| 	issueIndexerLatestVersion = 3
 | |
| 
 | |
| 	// TODO: make this configurable if necessary
 | |
| 	maxTotalHits = 10000
 | |
| )
 | |
| 
 | |
| // ErrMalformedResponse is never expected as we initialize the indexer ourself and so define the types.
 | |
| var ErrMalformedResponse = errors.New("meilisearch returned unexpected malformed content")
 | |
| 
 | |
| var _ internal.Indexer = &Indexer{}
 | |
| 
 | |
| // Indexer implements Indexer interface
 | |
| type Indexer struct {
 | |
| 	inner                    *inner_meilisearch.Indexer
 | |
| 	indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much
 | |
| }
 | |
| 
 | |
| // NewIndexer creates a new meilisearch indexer
 | |
| func NewIndexer(url, apiKey, indexerName string) *Indexer {
 | |
| 	settings := &meilisearch.Settings{
 | |
| 		// The default ranking rules of meilisearch are: ["words", "typo", "proximity", "attribute", "sort", "exactness"]
 | |
| 		// So even if we specify the sort order, it could not be respected because the priority of "sort" is so low.
 | |
| 		// So we need to specify the ranking rules to make sure the sort order is respected.
 | |
| 		// See https://www.meilisearch.com/docs/learn/core_concepts/relevancy
 | |
| 		RankingRules: []string{"sort", // make sure "sort" has the highest priority
 | |
| 			"words", "typo", "proximity", "attribute", "exactness"},
 | |
| 
 | |
| 		SearchableAttributes: []string{
 | |
| 			"title",
 | |
| 			"content",
 | |
| 			"comments",
 | |
| 		},
 | |
| 		DisplayedAttributes: []string{
 | |
| 			"id",
 | |
| 			"title",
 | |
| 			"content",
 | |
| 			"comments",
 | |
| 		},
 | |
| 		FilterableAttributes: []string{
 | |
| 			"repo_id",
 | |
| 			"is_public",
 | |
| 			"is_pull",
 | |
| 			"is_closed",
 | |
| 			"label_ids",
 | |
| 			"no_label",
 | |
| 			"milestone_id",
 | |
| 			"project_id",
 | |
| 			"project_board_id",
 | |
| 			"poster_id",
 | |
| 			"assignee_id",
 | |
| 			"mention_ids",
 | |
| 			"reviewed_ids",
 | |
| 			"review_requested_ids",
 | |
| 			"subscriber_ids",
 | |
| 			"updated_unix",
 | |
| 		},
 | |
| 		SortableAttributes: []string{
 | |
| 			"updated_unix",
 | |
| 			"created_unix",
 | |
| 			"deadline_unix",
 | |
| 			"comment_count",
 | |
| 			"id",
 | |
| 		},
 | |
| 		Pagination: &meilisearch.Pagination{
 | |
| 			MaxTotalHits: maxTotalHits,
 | |
| 		},
 | |
| 	}
 | |
| 
 | |
| 	inner := inner_meilisearch.NewIndexer(url, apiKey, indexerName, issueIndexerLatestVersion, settings)
 | |
| 	indexer := &Indexer{
 | |
| 		inner:   inner,
 | |
| 		Indexer: inner,
 | |
| 	}
 | |
| 	return indexer
 | |
| }
 | |
| 
 | |
| // Index will save the index data
 | |
| func (b *Indexer) Index(_ context.Context, issues ...*internal.IndexerData) error {
 | |
| 	if len(issues) == 0 {
 | |
| 		return nil
 | |
| 	}
 | |
| 	for _, issue := range issues {
 | |
| 		_, err := b.inner.Client.Index(b.inner.VersionedIndexName()).AddDocuments(issue)
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 	// TODO: bulk send index data
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Delete deletes indexes by ids
 | |
| func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
 | |
| 	if len(ids) == 0 {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	for _, id := range ids {
 | |
| 		_, err := b.inner.Client.Index(b.inner.VersionedIndexName()).DeleteDocument(strconv.FormatInt(id, 10))
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 	// TODO: bulk send deletes
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Search searches for issues by given conditions.
 | |
| // Returns the matching issue IDs
 | |
| func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
 | |
| 	query := inner_meilisearch.FilterAnd{}
 | |
| 
 | |
| 	if len(options.RepoIDs) > 0 {
 | |
| 		q := &inner_meilisearch.FilterOr{}
 | |
| 		q.Or(inner_meilisearch.NewFilterIn("repo_id", options.RepoIDs...))
 | |
| 		if options.AllPublic {
 | |
| 			q.Or(inner_meilisearch.NewFilterEq("is_public", true))
 | |
| 		}
 | |
| 		query.And(q)
 | |
| 	}
 | |
| 
 | |
| 	if options.IsPull.Has() {
 | |
| 		query.And(inner_meilisearch.NewFilterEq("is_pull", options.IsPull.Value()))
 | |
| 	}
 | |
| 	if options.IsClosed.Has() {
 | |
| 		query.And(inner_meilisearch.NewFilterEq("is_closed", options.IsClosed.Value()))
 | |
| 	}
 | |
| 
 | |
| 	if options.NoLabelOnly {
 | |
| 		query.And(inner_meilisearch.NewFilterEq("no_label", true))
 | |
| 	} else {
 | |
| 		if len(options.IncludedLabelIDs) > 0 {
 | |
| 			q := &inner_meilisearch.FilterAnd{}
 | |
| 			for _, labelID := range options.IncludedLabelIDs {
 | |
| 				q.And(inner_meilisearch.NewFilterEq("label_ids", labelID))
 | |
| 			}
 | |
| 			query.And(q)
 | |
| 		} else if len(options.IncludedAnyLabelIDs) > 0 {
 | |
| 			query.And(inner_meilisearch.NewFilterIn("label_ids", options.IncludedAnyLabelIDs...))
 | |
| 		}
 | |
| 		if len(options.ExcludedLabelIDs) > 0 {
 | |
| 			q := &inner_meilisearch.FilterAnd{}
 | |
| 			for _, labelID := range options.ExcludedLabelIDs {
 | |
| 				q.And(inner_meilisearch.NewFilterNot(inner_meilisearch.NewFilterEq("label_ids", labelID)))
 | |
| 			}
 | |
| 			query.And(q)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if len(options.MilestoneIDs) > 0 {
 | |
| 		query.And(inner_meilisearch.NewFilterIn("milestone_id", options.MilestoneIDs...))
 | |
| 	}
 | |
| 
 | |
| 	if options.ProjectID != nil {
 | |
| 		query.And(inner_meilisearch.NewFilterEq("project_id", *options.ProjectID))
 | |
| 	}
 | |
| 	if options.ProjectBoardID != nil {
 | |
| 		query.And(inner_meilisearch.NewFilterEq("project_board_id", *options.ProjectBoardID))
 | |
| 	}
 | |
| 
 | |
| 	if options.PosterID != nil {
 | |
| 		query.And(inner_meilisearch.NewFilterEq("poster_id", *options.PosterID))
 | |
| 	}
 | |
| 
 | |
| 	if options.AssigneeID != nil {
 | |
| 		query.And(inner_meilisearch.NewFilterEq("assignee_id", *options.AssigneeID))
 | |
| 	}
 | |
| 
 | |
| 	if options.MentionID != nil {
 | |
| 		query.And(inner_meilisearch.NewFilterEq("mention_ids", *options.MentionID))
 | |
| 	}
 | |
| 
 | |
| 	if options.ReviewedID != nil {
 | |
| 		query.And(inner_meilisearch.NewFilterEq("reviewed_ids", *options.ReviewedID))
 | |
| 	}
 | |
| 	if options.ReviewRequestedID != nil {
 | |
| 		query.And(inner_meilisearch.NewFilterEq("review_requested_ids", *options.ReviewRequestedID))
 | |
| 	}
 | |
| 
 | |
| 	if options.SubscriberID != nil {
 | |
| 		query.And(inner_meilisearch.NewFilterEq("subscriber_ids", *options.SubscriberID))
 | |
| 	}
 | |
| 
 | |
| 	if options.UpdatedAfterUnix != nil {
 | |
| 		query.And(inner_meilisearch.NewFilterGte("updated_unix", *options.UpdatedAfterUnix))
 | |
| 	}
 | |
| 	if options.UpdatedBeforeUnix != nil {
 | |
| 		query.And(inner_meilisearch.NewFilterLte("updated_unix", *options.UpdatedBeforeUnix))
 | |
| 	}
 | |
| 
 | |
| 	if options.SortBy == "" {
 | |
| 		options.SortBy = internal.SortByCreatedAsc
 | |
| 	}
 | |
| 	sortBy := []string{
 | |
| 		parseSortBy(options.SortBy),
 | |
| 		"id:desc",
 | |
| 	}
 | |
| 
 | |
| 	skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits)
 | |
| 
 | |
| 	searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(options.Keyword, &meilisearch.SearchRequest{
 | |
| 		Filter:           query.Statement(),
 | |
| 		Limit:            int64(limit),
 | |
| 		Offset:           int64(skip),
 | |
| 		Sort:             sortBy,
 | |
| 		MatchingStrategy: "all",
 | |
| 	})
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	hits, err := nonFuzzyWorkaround(searchRes, options.Keyword, options.IsFuzzyKeyword)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	return &internal.SearchResult{
 | |
| 		Total: searchRes.EstimatedTotalHits,
 | |
| 		Hits:  hits,
 | |
| 	}, nil
 | |
| }
 | |
| 
 | |
| func parseSortBy(sortBy internal.SortBy) string {
 | |
| 	field := strings.TrimPrefix(string(sortBy), "-")
 | |
| 	if strings.HasPrefix(string(sortBy), "-") {
 | |
| 		return field + ":desc"
 | |
| 	}
 | |
| 	return field + ":asc"
 | |
| }
 | |
| 
 | |
| // nonFuzzyWorkaround is needed as meilisearch does not have an exact search
 | |
| // and you can only change "typo tolerance" per index. So we have to post-filter the results
 | |
| // https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance
 | |
| // TODO: remove once https://github.com/orgs/meilisearch/discussions/377 is addressed
 | |
| func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, isFuzzy bool) ([]internal.Match, error) {
 | |
| 	hits := make([]internal.Match, 0, len(searchRes.Hits))
 | |
| 	for _, hit := range searchRes.Hits {
 | |
| 		hit, ok := hit.(map[string]any)
 | |
| 		if !ok {
 | |
| 			return nil, ErrMalformedResponse
 | |
| 		}
 | |
| 
 | |
| 		if !isFuzzy {
 | |
| 			keyword = strings.ToLower(keyword)
 | |
| 
 | |
| 			// declare a anon func to check if the title, content or at least one comment contains the keyword
 | |
| 			found, err := func() (bool, error) {
 | |
| 				// check if title match first
 | |
| 				title, ok := hit["title"].(string)
 | |
| 				if !ok {
 | |
| 					return false, ErrMalformedResponse
 | |
| 				} else if strings.Contains(strings.ToLower(title), keyword) {
 | |
| 					return true, nil
 | |
| 				}
 | |
| 
 | |
| 				// check if content has a match
 | |
| 				content, ok := hit["content"].(string)
 | |
| 				if !ok {
 | |
| 					return false, ErrMalformedResponse
 | |
| 				} else if strings.Contains(strings.ToLower(content), keyword) {
 | |
| 					return true, nil
 | |
| 				}
 | |
| 
 | |
| 				// now check for each comment if one has a match
 | |
| 				// so we first try to cast and skip if there are no comments
 | |
| 				comments, ok := hit["comments"].([]any)
 | |
| 				if !ok {
 | |
| 					return false, ErrMalformedResponse
 | |
| 				} else if len(comments) == 0 {
 | |
| 					return false, nil
 | |
| 				}
 | |
| 
 | |
| 				// now we iterate over all and report as soon as we detect one match
 | |
| 				for i := range comments {
 | |
| 					comment, ok := comments[i].(string)
 | |
| 					if !ok {
 | |
| 						return false, ErrMalformedResponse
 | |
| 					}
 | |
| 					if strings.Contains(strings.ToLower(comment), keyword) {
 | |
| 						return true, nil
 | |
| 					}
 | |
| 				}
 | |
| 
 | |
| 				// we got no match
 | |
| 				return false, nil
 | |
| 			}()
 | |
| 
 | |
| 			if err != nil {
 | |
| 				return nil, err
 | |
| 			} else if !found {
 | |
| 				continue
 | |
| 			}
 | |
| 		}
 | |
| 		issueID, ok := hit["id"].(float64)
 | |
| 		if !ok {
 | |
| 			return nil, ErrMalformedResponse
 | |
| 		}
 | |
| 		hits = append(hits, internal.Match{
 | |
| 			ID: int64(issueID),
 | |
| 		})
 | |
| 	}
 | |
| 	return hits, nil
 | |
| }
 |