Files
gitea/modules/indexer/issues/elasticsearch/elasticsearch.go
silverwind abcfa53040 Replace olivere/elastic with REST API client, add OpenSearch support (#37411)
Drops `github.com/olivere/elastic/v7` (unmaintained) and replaces it
with a small in-house wrapper that speaks the Elasticsearch REST API
directly via `net/http`. The subset used by Gitea (`_cluster/health`,
`_bulk`, `_doc`, `_delete_by_query`, `_refresh`, `_search`, `HEAD`/`PUT`
index) is stable across the targeted servers, so no client library is
needed.

**Targets tested**
- Elasticsearch 7, 8, 9
- OpenSearch 1, 2, 3

**Why not `go-elasticsearch`?**
The official client enforces an `X-Elastic-Product` server-identity
check that OpenSearch deliberately fails, which would force shipping a
transport shim to defeat it. Going direct over `net/http` removes that
fight along with several MB of transitive deps (`elastic-transport-go`,
`go.opentelemetry.io/otel{,/metric,/trace}`, `auto/sdk`, `easyjson`,
`intern`, `logr`, `stdr`).

Replaces: #30755
Fixes: https://github.com/go-gitea/gitea/issues/30752

---
This PR was written with the help of Claude Opus 4.7

---------

Co-authored-by: Claude (Opus 4.7) <noreply@anthropic.com>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2026-05-02 00:12:54 +02:00

256 lines
7.7 KiB
Go

// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package elasticsearch
import (
"context"
"strconv"
"strings"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/indexer"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
es "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
"code.gitea.io/gitea/modules/indexer/issues/internal"
"code.gitea.io/gitea/modules/util"
)
const issueIndexerLatestVersion = 3
var _ internal.Indexer = &Indexer{}
// Indexer implements Indexer interface
type Indexer struct {
*es.Indexer
}
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
// TODO: es supports fuzzy search, but our code doesn't at the moment, and actually the default fuzziness is already "AUTO"
return indexer.SearchModesExactWords()
}
// NewIndexer creates a new elasticsearch indexer
func NewIndexer(url, indexerName string) *Indexer {
return &Indexer{Indexer: es.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping)}
}
const (
defaultMapping = `
{
"mappings": {
"properties": {
"id": { "type": "integer", "index": true },
"repo_id": { "type": "integer", "index": true },
"is_public": { "type": "boolean", "index": true },
"title": { "type": "text", "index": true },
"content": { "type": "text", "index": true },
"comments": { "type" : "text", "index": true },
"is_pull": { "type": "boolean", "index": true },
"is_closed": { "type": "boolean", "index": true },
"is_archived": { "type": "boolean", "index": true },
"label_ids": { "type": "integer", "index": true },
"no_label": { "type": "boolean", "index": true },
"milestone_id": { "type": "integer", "index": true },
"project_ids": { "type": "integer", "index": true },
"no_project": { "type": "boolean", "index": true },
"poster_id": { "type": "integer", "index": true },
"assignee_id": { "type": "integer", "index": true },
"mention_ids": { "type": "integer", "index": true },
"reviewed_ids": { "type": "integer", "index": true },
"review_requested_ids": { "type": "integer", "index": true },
"subscriber_ids": { "type": "integer", "index": true },
"updated_unix": { "type": "integer", "index": true },
"created_unix": { "type": "integer", "index": true },
"deadline_unix": { "type": "integer", "index": true },
"comment_count": { "type": "integer", "index": true }
}
}
}
`
)
// Index will save the index data
func (b *Indexer) Index(ctx context.Context, issues ...*internal.IndexerData) error {
if len(issues) == 0 {
return nil
} else if len(issues) == 1 {
issue := issues[0]
return b.Indexer.Index(ctx, strconv.FormatInt(issue.ID, 10), issue)
}
ops := make([]es.BulkOp, 0, len(issues))
for _, issue := range issues {
ops = append(ops, es.IndexOp(strconv.FormatInt(issue.ID, 10), issue))
}
return b.Bulk(graceful.GetManager().HammerContext(), ops)
}
// Delete deletes indexes by ids
func (b *Indexer) Delete(ctx context.Context, ids ...int64) error {
if len(ids) == 0 {
return nil
} else if len(ids) == 1 {
return b.Indexer.Delete(ctx, strconv.FormatInt(ids[0], 10))
}
ops := make([]es.BulkOp, 0, len(ids))
for _, id := range ids {
ops = append(ops, es.DeleteOp(strconv.FormatInt(id, 10)))
}
return b.Bulk(graceful.GetManager().HammerContext(), ops)
}
// Search searches for issues by given conditions.
// Returns the matching issue IDs
func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
query := es.NewBoolQuery()
if options.Keyword != "" {
searchMode := util.IfZero(options.SearchMode, b.SupportedSearchModes()[0].ModeValue)
mm := es.NewMultiMatchQuery(options.Keyword, "title", "content", "comments")
if searchMode == indexer.SearchModeExact {
mm = mm.Type(es.MultiMatchTypePhrasePrefix)
} else {
mm = mm.Type(es.MultiMatchTypeBestFields).Operator("and")
}
query.Must(mm)
}
if len(options.RepoIDs) > 0 {
q := es.NewBoolQuery()
q.Should(es.TermsQuery("repo_id", es.ToAnySlice(options.RepoIDs)...))
if options.AllPublic {
q.Should(es.TermQuery("is_public", true))
}
query.Must(q)
}
if options.IsPull.Has() {
query.Must(es.TermQuery("is_pull", options.IsPull.Value()))
}
if options.IsClosed.Has() {
query.Must(es.TermQuery("is_closed", options.IsClosed.Value()))
}
if options.IsArchived.Has() {
query.Must(es.TermQuery("is_archived", options.IsArchived.Value()))
}
if options.NoLabelOnly {
query.Must(es.TermQuery("no_label", true))
} else {
if len(options.IncludedLabelIDs) > 0 {
q := es.NewBoolQuery()
for _, labelID := range options.IncludedLabelIDs {
q.Must(es.TermQuery("label_ids", labelID))
}
query.Must(q)
} else if len(options.IncludedAnyLabelIDs) > 0 {
query.Must(es.TermsQuery("label_ids", es.ToAnySlice(options.IncludedAnyLabelIDs)...))
}
if len(options.ExcludedLabelIDs) > 0 {
q := es.NewBoolQuery()
for _, labelID := range options.ExcludedLabelIDs {
q.MustNot(es.TermQuery("label_ids", labelID))
}
query.Must(q)
}
}
if len(options.MilestoneIDs) > 0 {
query.Must(es.TermsQuery("milestone_id", es.ToAnySlice(options.MilestoneIDs)...))
}
if options.NoProjectOnly {
query.Must(es.TermQuery("no_project", true))
} else if len(options.ProjectIDs) > 0 {
// FIXME: ISSUE-MULTIPLE-PROJECTS-FILTER: this logic is not right, it should use "AND" but not "OR"
query.Must(es.TermsQuery("project_ids", es.ToAnySlice(options.ProjectIDs)...))
}
if options.PosterID != "" {
// "(none)" becomes 0, it means no poster
posterIDInt64, _ := strconv.ParseInt(options.PosterID, 10, 64)
query.Must(es.TermQuery("poster_id", posterIDInt64))
}
if options.AssigneeID != "" {
if options.AssigneeID == "(any)" {
query.Must(es.NewRangeQuery("assignee_id").Gte(1))
} else {
// "(none)" becomes 0, it means no assignee
assigneeIDInt64, _ := strconv.ParseInt(options.AssigneeID, 10, 64)
query.Must(es.TermQuery("assignee_id", assigneeIDInt64))
}
}
if options.MentionID.Has() {
query.Must(es.TermQuery("mention_ids", options.MentionID.Value()))
}
if options.ReviewedID.Has() {
query.Must(es.TermQuery("reviewed_ids", options.ReviewedID.Value()))
}
if options.ReviewRequestedID.Has() {
query.Must(es.TermQuery("review_requested_ids", options.ReviewRequestedID.Value()))
}
if options.SubscriberID.Has() {
query.Must(es.TermQuery("subscriber_ids", options.SubscriberID.Value()))
}
if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() {
q := es.NewRangeQuery("updated_unix")
if options.UpdatedAfterUnix.Has() {
q.Gte(options.UpdatedAfterUnix.Value())
}
if options.UpdatedBeforeUnix.Has() {
q.Lte(options.UpdatedBeforeUnix.Value())
}
query.Must(q)
}
if options.SortBy == "" {
options.SortBy = internal.SortByCreatedAsc
}
sortBy := []es.SortField{
parseSortBy(options.SortBy),
{Field: "id", Desc: true},
}
// See https://stackoverflow.com/questions/35206409/elasticsearch-2-1-result-window-is-too-large-index-max-result-window/35221900
// TODO: make it configurable since it's configurable in elasticsearch
const maxPageSize = 10000
skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxPageSize)
resp, err := b.Indexer.Search(ctx, es.SearchRequest{
Query: query,
Sort: sortBy,
From: skip,
Size: limit,
TrackTotal: true,
})
if err != nil {
return nil, err
}
hits := make([]internal.Match, 0, len(resp.Hits))
for _, hit := range resp.Hits {
id, _ := strconv.ParseInt(hit.ID, 10, 64)
hits = append(hits, internal.Match{ID: id})
}
return &internal.SearchResult{
Total: resp.Total,
Hits: hits,
}, nil
}
func parseSortBy(sortBy internal.SortBy) es.SortField {
field, desc := strings.CutPrefix(string(sortBy), "-")
return es.SortField{Field: field, Desc: desc}
}