mirror of
https://github.com/go-gitea/gitea.git
synced 2026-05-23 05:42:33 +09:00
Drops `github.com/olivere/elastic/v7` (unmaintained) and replaces it
with a small in-house wrapper that speaks the Elasticsearch REST API
directly via `net/http`. The subset used by Gitea (`_cluster/health`,
`_bulk`, `_doc`, `_delete_by_query`, `_refresh`, `_search`, `HEAD`/`PUT`
index) is stable across the targeted servers, so no client library is
needed.
**Targets tested**
- Elasticsearch 7, 8, 9
- OpenSearch 1, 2, 3
**Why not `go-elasticsearch`?**
The official client enforces an `X-Elastic-Product` server-identity
check that OpenSearch deliberately fails, which would force shipping a
transport shim to defeat it. Going direct over `net/http` removes that
fight along with several MB of transitive deps (`elastic-transport-go`,
`go.opentelemetry.io/otel{,/metric,/trace}`, `auto/sdk`, `easyjson`,
`intern`, `logr`, `stdr`).
Replaces: #30755
Fixes: https://github.com/go-gitea/gitea/issues/30752
---
This PR was written with the help of Claude Opus 4.7
---------
Co-authored-by: Claude (Opus 4.7) <noreply@anthropic.com>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
256 lines
7.7 KiB
Go
256 lines
7.7 KiB
Go
// Copyright 2019 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package elasticsearch
|
|
|
|
import (
|
|
"context"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"code.gitea.io/gitea/modules/graceful"
|
|
"code.gitea.io/gitea/modules/indexer"
|
|
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
|
es "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
|
|
"code.gitea.io/gitea/modules/indexer/issues/internal"
|
|
"code.gitea.io/gitea/modules/util"
|
|
)
|
|
|
|
const issueIndexerLatestVersion = 3
|
|
|
|
var _ internal.Indexer = &Indexer{}
|
|
|
|
// Indexer implements Indexer interface
|
|
type Indexer struct {
|
|
*es.Indexer
|
|
}
|
|
|
|
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
|
|
// TODO: es supports fuzzy search, but our code doesn't at the moment, and actually the default fuzziness is already "AUTO"
|
|
return indexer.SearchModesExactWords()
|
|
}
|
|
|
|
// NewIndexer creates a new elasticsearch indexer
|
|
func NewIndexer(url, indexerName string) *Indexer {
|
|
return &Indexer{Indexer: es.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping)}
|
|
}
|
|
|
|
const (
|
|
defaultMapping = `
|
|
{
|
|
"mappings": {
|
|
"properties": {
|
|
"id": { "type": "integer", "index": true },
|
|
"repo_id": { "type": "integer", "index": true },
|
|
"is_public": { "type": "boolean", "index": true },
|
|
|
|
"title": { "type": "text", "index": true },
|
|
"content": { "type": "text", "index": true },
|
|
"comments": { "type" : "text", "index": true },
|
|
|
|
"is_pull": { "type": "boolean", "index": true },
|
|
"is_closed": { "type": "boolean", "index": true },
|
|
"is_archived": { "type": "boolean", "index": true },
|
|
"label_ids": { "type": "integer", "index": true },
|
|
"no_label": { "type": "boolean", "index": true },
|
|
"milestone_id": { "type": "integer", "index": true },
|
|
"project_ids": { "type": "integer", "index": true },
|
|
"no_project": { "type": "boolean", "index": true },
|
|
"poster_id": { "type": "integer", "index": true },
|
|
"assignee_id": { "type": "integer", "index": true },
|
|
"mention_ids": { "type": "integer", "index": true },
|
|
"reviewed_ids": { "type": "integer", "index": true },
|
|
"review_requested_ids": { "type": "integer", "index": true },
|
|
"subscriber_ids": { "type": "integer", "index": true },
|
|
"updated_unix": { "type": "integer", "index": true },
|
|
|
|
"created_unix": { "type": "integer", "index": true },
|
|
"deadline_unix": { "type": "integer", "index": true },
|
|
"comment_count": { "type": "integer", "index": true }
|
|
}
|
|
}
|
|
}
|
|
`
|
|
)
|
|
|
|
// Index will save the index data
|
|
func (b *Indexer) Index(ctx context.Context, issues ...*internal.IndexerData) error {
|
|
if len(issues) == 0 {
|
|
return nil
|
|
} else if len(issues) == 1 {
|
|
issue := issues[0]
|
|
return b.Indexer.Index(ctx, strconv.FormatInt(issue.ID, 10), issue)
|
|
}
|
|
|
|
ops := make([]es.BulkOp, 0, len(issues))
|
|
for _, issue := range issues {
|
|
ops = append(ops, es.IndexOp(strconv.FormatInt(issue.ID, 10), issue))
|
|
}
|
|
return b.Bulk(graceful.GetManager().HammerContext(), ops)
|
|
}
|
|
|
|
// Delete deletes indexes by ids
|
|
func (b *Indexer) Delete(ctx context.Context, ids ...int64) error {
|
|
if len(ids) == 0 {
|
|
return nil
|
|
} else if len(ids) == 1 {
|
|
return b.Indexer.Delete(ctx, strconv.FormatInt(ids[0], 10))
|
|
}
|
|
|
|
ops := make([]es.BulkOp, 0, len(ids))
|
|
for _, id := range ids {
|
|
ops = append(ops, es.DeleteOp(strconv.FormatInt(id, 10)))
|
|
}
|
|
return b.Bulk(graceful.GetManager().HammerContext(), ops)
|
|
}
|
|
|
|
// Search searches for issues by given conditions.
|
|
// Returns the matching issue IDs
|
|
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
|
|
query := es.NewBoolQuery()
|
|
|
|
if options.Keyword != "" {
|
|
searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue)
|
|
mm := es.NewMultiMatchQuery(options.Keyword, "title", "content", "comments")
|
|
if searchMode == indexer.SearchModeExact {
|
|
mm = mm.Type(es.MultiMatchTypePhrasePrefix)
|
|
} else {
|
|
mm = mm.Type(es.MultiMatchTypeBestFields).Operator("and")
|
|
}
|
|
query.Must(mm)
|
|
}
|
|
|
|
if len(options.RepoIDs) > 0 {
|
|
q := es.NewBoolQuery()
|
|
q.Should(es.TermsQuery("repo_id", es.ToAnySlice(options.RepoIDs)...))
|
|
if options.AllPublic {
|
|
q.Should(es.TermQuery("is_public", true))
|
|
}
|
|
query.Must(q)
|
|
}
|
|
|
|
if options.IsPull.Has() {
|
|
query.Must(es.TermQuery("is_pull", options.IsPull.Value()))
|
|
}
|
|
if options.IsClosed.Has() {
|
|
query.Must(es.TermQuery("is_closed", options.IsClosed.Value()))
|
|
}
|
|
if options.IsArchived.Has() {
|
|
query.Must(es.TermQuery("is_archived", options.IsArchived.Value()))
|
|
}
|
|
|
|
if options.NoLabelOnly {
|
|
query.Must(es.TermQuery("no_label", true))
|
|
} else {
|
|
if len(options.IncludedLabelIDs) > 0 {
|
|
q := es.NewBoolQuery()
|
|
for _, labelID := range options.IncludedLabelIDs {
|
|
q.Must(es.TermQuery("label_ids", labelID))
|
|
}
|
|
query.Must(q)
|
|
} else if len(options.IncludedAnyLabelIDs) > 0 {
|
|
query.Must(es.TermsQuery("label_ids", es.ToAnySlice(options.IncludedAnyLabelIDs)...))
|
|
}
|
|
if len(options.ExcludedLabelIDs) > 0 {
|
|
q := es.NewBoolQuery()
|
|
for _, labelID := range options.ExcludedLabelIDs {
|
|
q.MustNot(es.TermQuery("label_ids", labelID))
|
|
}
|
|
query.Must(q)
|
|
}
|
|
}
|
|
|
|
if len(options.MilestoneIDs) > 0 {
|
|
query.Must(es.TermsQuery("milestone_id", es.ToAnySlice(options.MilestoneIDs)...))
|
|
}
|
|
|
|
if options.NoProjectOnly {
|
|
query.Must(es.TermQuery("no_project", true))
|
|
} else if len(options.ProjectIDs) > 0 {
|
|
// FIXME: ISSUE-MULTIPLE-PROJECTS-FILTER: this logic is not right, it should use "AND" but not "OR"
|
|
query.Must(es.TermsQuery("project_ids", es.ToAnySlice(options.ProjectIDs)...))
|
|
}
|
|
|
|
if options.PosterID != "" {
|
|
// "(none)" becomes 0, it means no poster
|
|
posterIDInt64, _ := strconv.ParseInt(options.PosterID, 10, 64)
|
|
query.Must(es.TermQuery("poster_id", posterIDInt64))
|
|
}
|
|
|
|
if options.AssigneeID != "" {
|
|
if options.AssigneeID == "(any)" {
|
|
query.Must(es.NewRangeQuery("assignee_id").Gte(1))
|
|
} else {
|
|
// "(none)" becomes 0, it means no assignee
|
|
assigneeIDInt64, _ := strconv.ParseInt(options.AssigneeID, 10, 64)
|
|
query.Must(es.TermQuery("assignee_id", assigneeIDInt64))
|
|
}
|
|
}
|
|
|
|
if options.MentionID.Has() {
|
|
query.Must(es.TermQuery("mention_ids", options.MentionID.Value()))
|
|
}
|
|
|
|
if options.ReviewedID.Has() {
|
|
query.Must(es.TermQuery("reviewed_ids", options.ReviewedID.Value()))
|
|
}
|
|
if options.ReviewRequestedID.Has() {
|
|
query.Must(es.TermQuery("review_requested_ids", options.ReviewRequestedID.Value()))
|
|
}
|
|
|
|
if options.SubscriberID.Has() {
|
|
query.Must(es.TermQuery("subscriber_ids", options.SubscriberID.Value()))
|
|
}
|
|
|
|
if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() {
|
|
q := es.NewRangeQuery("updated_unix")
|
|
if options.UpdatedAfterUnix.Has() {
|
|
q.Gte(options.UpdatedAfterUnix.Value())
|
|
}
|
|
if options.UpdatedBeforeUnix.Has() {
|
|
q.Lte(options.UpdatedBeforeUnix.Value())
|
|
}
|
|
query.Must(q)
|
|
}
|
|
|
|
if options.SortBy == "" {
|
|
options.SortBy = internal.SortByCreatedAsc
|
|
}
|
|
sortBy := []es.SortField{
|
|
parseSortBy(options.SortBy),
|
|
{Field: "id", Desc: true},
|
|
}
|
|
|
|
// See https://stackoverflow.com/questions/35206409/elasticsearch-2-1-result-window-is-too-large-index-max-result-window/35221900
|
|
// TODO: make it configurable since it's configurable in elasticsearch
|
|
const maxPageSize = 10000
|
|
|
|
skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxPageSize)
|
|
resp, err := b.Indexer.Search(ctx, es.SearchRequest{
|
|
Query: query,
|
|
Sort: sortBy,
|
|
From: skip,
|
|
Size: limit,
|
|
TrackTotal: true,
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
hits := make([]internal.Match, 0, len(resp.Hits))
|
|
for _, hit := range resp.Hits {
|
|
id, _ := strconv.ParseInt(hit.ID, 10, 64)
|
|
hits = append(hits, internal.Match{ID: id})
|
|
}
|
|
|
|
return &internal.SearchResult{
|
|
Total: resp.Total,
|
|
Hits: hits,
|
|
}, nil
|
|
}
|
|
|
|
func parseSortBy(sortBy internal.SortBy) es.SortField {
|
|
field, desc := strings.CutPrefix(string(sortBy), "-")
|
|
return es.SortField{Field: field, Desc: desc}
|
|
}
|