Refactor git command context & pipeline (#36406)

Less and simpler code, fewer bugs
This commit is contained in:
wxiaoguang
2026-01-21 09:35:14 +08:00
committed by GitHub
parent f6db180a80
commit 9ea91e036f
36 changed files with 286 additions and 434 deletions

View File

@@ -82,7 +82,7 @@ func NewBatchChecker(repo *git.Repository, treeish string, attributes []string)
WithStdout(lw).
RunWithStderr(ctx)
if err != nil && !git.IsErrCanceledOrKilled(err) {
if err != nil && !gitcmd.IsErrorCanceledOrKilled(err) {
log.Error("Attribute checker for commit %s exits with error: %v", treeish, err)
}
checker.cancel()

View File

@@ -44,8 +44,7 @@ func newCatFileBatch(ctx context.Context, repoPath string, cmdCatFile *gitcmd.Co
cmdCatFile = cmdCatFile.
WithDir(repoPath).
WithStdinWriter(&batchStdinWriter).
WithStdoutReader(&batchStdoutReader).
WithUseContextTimeout(true)
WithStdoutReader(&batchStdoutReader)
err := cmdCatFile.StartWithStderr(ctx)
if err != nil {

View File

@@ -5,10 +5,8 @@ package git
import (
"bufio"
"context"
"fmt"
"io"
"os"
"regexp"
"strconv"
"strings"
@@ -284,30 +282,16 @@ func GetAffectedFiles(repo *Repository, branchName, oldCommitID, newCommitID str
}
oldCommitID = startCommitID
}
stdoutReader, stdoutWriter, err := os.Pipe()
if err != nil {
log.Error("Unable to create os.Pipe for %s", repo.Path)
return nil, err
}
defer func() {
_ = stdoutReader.Close()
_ = stdoutWriter.Close()
}()
affectedFiles := make([]string, 0, 32)
// Run `git diff --name-only` to get the names of the changed files
err = gitcmd.NewCommand("diff", "--name-only").AddDynamicArguments(oldCommitID, newCommitID).
var stdoutReader io.ReadCloser
err := gitcmd.NewCommand("diff", "--name-only").AddDynamicArguments(oldCommitID, newCommitID).
WithEnv(env).
WithDir(repo.Path).
WithStdout(stdoutWriter).
WithPipelineFunc(func(ctx context.Context, cancel context.CancelFunc) error {
// Close the writer end of the pipe to begin processing
_ = stdoutWriter.Close()
defer func() {
// Close the reader on return to terminate the git command if necessary
_ = stdoutReader.Close()
}()
WithStdoutReader(&stdoutReader).
WithPipelineFunc(func(ctx gitcmd.Context) error {
// Now scan the output from the command
scanner := bufio.NewScanner(stdoutReader)
for scanner.Scan() {

View File

@@ -4,8 +4,6 @@
package git
import (
"context"
"errors"
"fmt"
"strings"
@@ -143,10 +141,3 @@ func IsErrMoreThanOne(err error) bool {
func (err *ErrMoreThanOne) Error() string {
return fmt.Sprintf("ErrMoreThanOne Error: %v: %s\n%s", err.Err, err.StdErr, err.StdOut)
}
func IsErrCanceledOrKilled(err error) bool {
// When "cancel()" a git command's context, the returned error of "Run()" could be one of them:
// - context.Canceled
// - *exec.ExitError: "signal: killed"
return err != nil && (errors.Is(err, context.Canceled) || err.Error() == "signal: killed")
}

View File

@@ -12,7 +12,6 @@ import (
"path/filepath"
"runtime"
"strings"
"time"
"code.gitea.io/gitea/modules/git/gitcmd"
"code.gitea.io/gitea/modules/log"
@@ -140,10 +139,6 @@ func InitSimple() error {
log.Warn("git module has been initialized already, duplicate init may work but it's better to fix it")
}
if setting.Git.Timeout.Default > 0 {
gitcmd.SetDefaultCommandExecutionTimeout(time.Duration(setting.Git.Timeout.Default) * time.Second)
}
if err := gitcmd.SetExecutablePath(setting.Git.Path); err != nil {
return err
}

View File

@@ -28,13 +28,6 @@ import (
// In most cases, it shouldn't be used. Use AddXxx function instead
type TrustedCmdArgs []internal.CmdArg
// defaultCommandExecutionTimeout default command execution timeout duration
var defaultCommandExecutionTimeout = 360 * time.Second
func SetDefaultCommandExecutionTimeout(timeout time.Duration) {
defaultCommandExecutionTimeout = timeout
}
// DefaultLocale is the default LC_ALL to run git commands in.
const DefaultLocale = "C"
@@ -44,13 +37,14 @@ type Command struct {
prog string
args []string
preErrors []error
cmd *exec.Cmd // for debug purpose only
configArgs []string
opts runOpts
cmd *exec.Cmd
cmdCtx context.Context
cmdCancel context.CancelFunc
cmdFinished context.CancelFunc
cmdCancel process.CancelCauseFunc
cmdFinished process.FinishedFunc
cmdStartTime time.Time
cmdStdinWriter *io.WriteCloser
@@ -209,11 +203,9 @@ func ToTrustedCmdArgs(args []string) TrustedCmdArgs {
return ret
}
// runOpts represents parameters to run the command. If UseContextTimeout is specified, then Timeout is ignored.
type runOpts struct {
Env []string
Timeout time.Duration
UseContextTimeout bool
Env []string
Timeout time.Duration
// Dir is the working dir for the git command, however:
// FIXME: this could be incorrect in many cases, for example:
@@ -236,7 +228,7 @@ type runOpts struct {
// Use new functions like WithStdinWriter to avoid such problems.
Stdin io.Reader
PipelineFunc func(context.Context, context.CancelFunc) error
PipelineFunc func(Context) error
}
func commonBaseEnvs() []string {
@@ -321,16 +313,11 @@ func (c *Command) WithStdin(stdin io.Reader) *Command {
return c
}
func (c *Command) WithPipelineFunc(f func(context.Context, context.CancelFunc) error) *Command {
func (c *Command) WithPipelineFunc(f func(Context) error) *Command {
c.opts.PipelineFunc = f
return c
}
func (c *Command) WithUseContextTimeout(useContextTimeout bool) *Command {
c.opts.UseContextTimeout = useContextTimeout
return c
}
// WithParentCallerInfo can be used to set the caller info (usually function name) of the parent function of the caller.
// For most cases, "Run" family functions can get its caller info automatically
// But if you need to call "Run" family functions in a wrapper function: "FeatureFunc -> GeneralWrapperFunc -> RunXxx",
@@ -363,9 +350,7 @@ func (c *Command) Start(ctx context.Context) (retErr error) {
defer func() {
if retErr != nil {
// release the pipes to avoid resource leak
safeClosePtrCloser(c.cmdStdoutReader)
safeClosePtrCloser(c.cmdStderrReader)
safeClosePtrCloser(c.cmdStdinWriter)
c.closeStdioPipes()
// if error occurs, we must also finish the task, otherwise, cmdFinished will be called in "Wait" function
if c.cmdFinished != nil {
c.cmdFinished()
@@ -380,12 +365,6 @@ func (c *Command) Start(ctx context.Context) (retErr error) {
return err
}
// We must not change the provided options
timeout := c.opts.Timeout
if timeout <= 0 {
timeout = defaultCommandExecutionTimeout
}
cmdLogString := c.LogString()
if c.callerInfo == "" {
c.WithParentCallerInfo()
@@ -399,83 +378,85 @@ func (c *Command) Start(ctx context.Context) (retErr error) {
span.SetAttributeString(gtprof.TraceAttrFuncCaller, c.callerInfo)
span.SetAttributeString(gtprof.TraceAttrGitCommand, cmdLogString)
if c.opts.UseContextTimeout {
if c.opts.Timeout <= 0 {
c.cmdCtx, c.cmdCancel, c.cmdFinished = process.GetManager().AddContext(ctx, desc)
} else {
c.cmdCtx, c.cmdCancel, c.cmdFinished = process.GetManager().AddContextTimeout(ctx, timeout, desc)
c.cmdCtx, c.cmdCancel, c.cmdFinished = process.GetManager().AddContextTimeout(ctx, c.opts.Timeout, desc)
}
c.cmdStartTime = time.Now()
cmd := exec.CommandContext(ctx, c.prog, append(c.configArgs, c.args...)...)
c.cmd = cmd // for debug purpose only
c.cmd = exec.CommandContext(ctx, c.prog, append(c.configArgs, c.args...)...)
if c.opts.Env == nil {
cmd.Env = os.Environ()
c.cmd.Env = os.Environ()
} else {
cmd.Env = c.opts.Env
c.cmd.Env = c.opts.Env
}
process.SetSysProcAttribute(cmd)
cmd.Env = append(cmd.Env, CommonGitCmdEnvs()...)
cmd.Dir = c.opts.Dir
cmd.Stdout = c.opts.Stdout
cmd.Stdin = c.opts.Stdin
process.SetSysProcAttribute(c.cmd)
c.cmd.Env = append(c.cmd.Env, CommonGitCmdEnvs()...)
c.cmd.Dir = c.opts.Dir
c.cmd.Stdout = c.opts.Stdout
c.cmd.Stdin = c.opts.Stdin
if _, err := safeAssignPipe(c.cmdStdinWriter, cmd.StdinPipe); err != nil {
if _, err := safeAssignPipe(c.cmdStdinWriter, c.cmd.StdinPipe); err != nil {
return err
}
if _, err := safeAssignPipe(c.cmdStdoutReader, cmd.StdoutPipe); err != nil {
if _, err := safeAssignPipe(c.cmdStdoutReader, c.cmd.StdoutPipe); err != nil {
return err
}
if _, err := safeAssignPipe(c.cmdStderrReader, cmd.StderrPipe); err != nil {
if _, err := safeAssignPipe(c.cmdStderrReader, c.cmd.StderrPipe); err != nil {
return err
}
if c.cmdManagedStderr != nil {
if cmd.Stderr != nil {
if c.cmd.Stderr != nil {
panic("CombineStderr needs managed (but not caller-provided) stderr pipe")
}
cmd.Stderr = c.cmdManagedStderr
c.cmd.Stderr = c.cmdManagedStderr
}
return cmd.Start()
return c.cmd.Start()
}
func (c *Command) closeStdioPipes() {
safeClosePtrCloser(c.cmdStdoutReader)
safeClosePtrCloser(c.cmdStderrReader)
safeClosePtrCloser(c.cmdStdinWriter)
}
func (c *Command) Wait() error {
defer func() {
safeClosePtrCloser(c.cmdStdoutReader)
safeClosePtrCloser(c.cmdStderrReader)
safeClosePtrCloser(c.cmdStdinWriter)
c.closeStdioPipes()
c.cmdFinished()
}()
cmd, ctx, cancel := c.cmd, c.cmdCtx, c.cmdCancel
if c.opts.PipelineFunc != nil {
err := c.opts.PipelineFunc(ctx, cancel)
if err != nil {
cancel()
errWait := cmd.Wait()
return errors.Join(err, errWait)
errCallback := c.opts.PipelineFunc(&cmdContext{Context: c.cmdCtx, cmd: c})
// after the pipeline function returns, we can safely cancel the command context and close the stdio pipes
c.cmdCancel(errCallback)
c.closeStdioPipes()
errWait := c.cmd.Wait()
errCause := context.Cause(c.cmdCtx)
// the pipeline function should be able to know whether it succeeds or fails
if errCallback == nil && (errCause == nil || errors.Is(errCause, context.Canceled)) {
return nil
}
return errors.Join(errCallback, errCause, errWait)
}
errWait := cmd.Wait()
// there might be other goroutines using the context or pipes, so we just wait for the command to finish
errWait := c.cmd.Wait()
elapsed := time.Since(c.cmdStartTime)
if elapsed > time.Second {
log.Debug("slow git.Command.Run: %s (%s)", c, elapsed)
log.Debug("slow git.Command.Run: %s (%s)", c, elapsed) // TODO: no need to log this for long-running commands
}
// Here the logic is different from "PipelineFunc" case,
// because PipelineFunc can return error if it fails, it knows whether it succeeds or fails.
// But in normal case, the caller just runs the git command, the command's exit code is the source of truth.
// If the caller need to know whether the command error is caused by cancellation, it should check the "err" by itself.
errCause := context.Cause(c.cmdCtx)
if errors.Is(errCause, context.Canceled) {
// if the ctx is canceled without other error, it must be caused by normal cancellation
return errCause
}
if errWait != nil {
// no matter whether there is other cause error, if "Wait" also has error,
// it's likely the error is caused by Wait error (from git command)
return errWait
}
return errCause
return errors.Join(errCause, errWait)
}
func (c *Command) StartWithStderr(ctx context.Context) RunStdError {
@@ -513,59 +494,6 @@ func (c *Command) Run(ctx context.Context) (err error) {
return c.Wait()
}
type RunStdError interface {
error
Unwrap() error
Stderr() string
}
type runStdError struct {
err error // usually the low-level error like `*exec.ExitError`
stderr string // git command's stderr output
errMsg string // the cached error message for Error() method
}
func (r *runStdError) Error() string {
// FIXME: GIT-CMD-STDERR: it is a bad design, the stderr should not be put in the error message
// But a lot of code only checks `strings.Contains(err.Error(), "git error")`
if r.errMsg == "" {
r.errMsg = fmt.Sprintf("%s - %s", r.err.Error(), strings.TrimSpace(r.stderr))
}
return r.errMsg
}
func (r *runStdError) Unwrap() error {
return r.err
}
func (r *runStdError) Stderr() string {
return r.stderr
}
func ErrorAsStderr(err error) (string, bool) {
var runErr RunStdError
if errors.As(err, &runErr) {
return runErr.Stderr(), true
}
return "", false
}
func StderrHasPrefix(err error, prefix string) bool {
stderr, ok := ErrorAsStderr(err)
if !ok {
return false
}
return strings.HasPrefix(stderr, prefix)
}
func IsErrorExitCode(err error, code int) bool {
var exitError *exec.ExitError
if errors.As(err, &exitError) {
return exitError.ExitCode() == code
}
return false
}
// RunStdString runs the command and returns stdout/stderr as string. and store stderr to returned error (err combined with stderr).
func (c *Command) RunStdString(ctx context.Context) (stdout, stderr string, runErr RunStdError) {
stdoutBytes, stderrBytes, runErr := c.WithParentCallerInfo().runStdBytes(ctx)

View File

@@ -1,38 +0,0 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
//go:build race
package gitcmd
import (
"context"
"testing"
"time"
)
func TestRunWithContextNoTimeout(t *testing.T) {
maxLoops := 10
// 'git --version' does not block so it must be finished before the timeout triggered.
for i := 0; i < maxLoops; i++ {
cmd := NewCommand("--version")
if err := cmd.Run(t.Context()); err != nil {
t.Fatal(err)
}
}
}
func TestRunWithContextTimeout(t *testing.T) {
maxLoops := 10
// 'git hash-object --stdin' blocks on stdin so we can have the timeout triggered.
for i := 0; i < maxLoops; i++ {
cmd := NewCommand("hash-object", "--stdin")
if err := cmd.WithTimeout(1 * time.Millisecond).Run(t.Context()); err != nil {
if err != context.DeadlineExceeded {
t.Fatalf("Testing %d/%d: %v", i, maxLoops, err)
}
}
}
}

View File

@@ -4,9 +4,11 @@
package gitcmd
import (
"context"
"fmt"
"os"
"testing"
"time"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/tempdir"
@@ -111,3 +113,15 @@ func TestRunStdError(t *testing.T) {
require.ErrorAs(t, fmt.Errorf("wrapped %w", err), &asErr)
}
func TestRunWithContextTimeout(t *testing.T) {
t.Run("NoTimeout", func(t *testing.T) {
// 'git --version' does not block so it must be finished before the timeout triggered.
err := NewCommand("--version").Run(t.Context())
require.NoError(t, err)
})
t.Run("WithTimeout", func(t *testing.T) {
err := NewCommand("hash-object", "--stdin").WithTimeout(1 * time.Millisecond).Run(t.Context())
require.ErrorIs(t, err, context.DeadlineExceeded)
})
}

View File

@@ -0,0 +1,28 @@
// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitcmd
import (
"context"
)
type Context interface {
context.Context
// CancelWithCause is a helper function to cancel the context with a specific error cause
// And it returns the same error for convenience, to break the PipelineFunc easily
CancelWithCause(err error) error
// In the future, this interface will be extended to support stdio pipe readers/writers
}
type cmdContext struct {
context.Context
cmd *Command
}
func (c *cmdContext) CancelWithCause(err error) error {
c.cmd.cmdCancel(err)
return err
}

View File

@@ -0,0 +1,78 @@
// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitcmd
import (
"context"
"errors"
"fmt"
"os/exec"
"strings"
)
type RunStdError interface {
error
Unwrap() error
Stderr() string
}
type runStdError struct {
err error // usually the low-level error like `*exec.ExitError`
stderr string // git command's stderr output
errMsg string // the cached error message for Error() method
}
func (r *runStdError) Error() string {
// FIXME: GIT-CMD-STDERR: it is a bad design, the stderr should not be put in the error message
// But a lot of code only checks `strings.Contains(err.Error(), "git error")`
if r.errMsg == "" {
r.errMsg = fmt.Sprintf("%s - %s", r.err.Error(), strings.TrimSpace(r.stderr))
}
return r.errMsg
}
func (r *runStdError) Unwrap() error {
return r.err
}
func (r *runStdError) Stderr() string {
return r.stderr
}
func ErrorAsStderr(err error) (string, bool) {
var runErr RunStdError
if errors.As(err, &runErr) {
return runErr.Stderr(), true
}
return "", false
}
func StderrHasPrefix(err error, prefix string) bool {
stderr, ok := ErrorAsStderr(err)
if !ok {
return false
}
return strings.HasPrefix(stderr, prefix)
}
func IsErrorExitCode(err error, code int) bool {
var exitError *exec.ExitError
if errors.As(err, &exitError) {
return exitError.ExitCode() == code
}
return false
}
func IsErrorSignalKilled(err error) bool {
var exitError *exec.ExitError
return errors.As(err, &exitError) && exitError.String() == "signal: killed"
}
func IsErrorCanceledOrKilled(err error) bool {
// When "cancel()" a git command's context, the returned error of "Run()" could be one of them:
// - context.Canceled
// - *exec.ExitError: "signal: killed"
// TODO: in the future, we need to use unified error type from gitcmd.Run to check whether it is manually canceled
return errors.Is(err, context.Canceled) || IsErrorSignalKilled(err)
}

View File

@@ -77,9 +77,7 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
var stdoutReader io.ReadCloser
err := cmd.WithDir(repo.Path).
WithStdoutReader(&stdoutReader).
WithPipelineFunc(func(ctx context.Context, cancel context.CancelFunc) error {
defer stdoutReader.Close()
WithPipelineFunc(func(ctx gitcmd.Context) error {
isInBlock := false
rd := bufio.NewReaderSize(stdoutReader, util.IfZero(opts.MaxLineLength, 16*1024))
var res *GrepResult
@@ -105,8 +103,7 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
}
if line == "" {
if len(results) >= opts.MaxResultLimit {
cancel()
break
return ctx.CancelWithCause(nil)
}
isInBlock = false
continue

View File

@@ -74,9 +74,9 @@ func (err *ErrInvalidCloneAddr) Unwrap() error {
func IsRemoteNotExistError(err error) bool {
// see: https://github.com/go-gitea/gitea/issues/32889#issuecomment-2571848216
// Should not add space in the end, sometimes git will add a `:`
prefix1 := "exit status 128 - fatal: No such remote" // git < 2.30
prefix2 := "exit status 2 - error: No such remote" // git >= 2.30
return strings.HasPrefix(err.Error(), prefix1) || strings.HasPrefix(err.Error(), prefix2)
prefix1 := "fatal: No such remote" // git < 2.30, exit status 128
prefix2 := "error: No such remote" // git >= 2.30. exit status 2
return gitcmd.StderrHasPrefix(err, prefix1) || gitcmd.StderrHasPrefix(err, prefix2)
}
// ParseRemoteAddr checks if given remote address is valid,

View File

@@ -5,9 +5,8 @@ package git
import (
"bufio"
"context"
"fmt"
"os"
"io"
"sort"
"strconv"
"strings"
@@ -55,15 +54,6 @@ func (repo *Repository) GetCodeActivityStats(fromTime time.Time, branch string)
}
stats.CommitCountInAllBranches = c
stdoutReader, stdoutWriter, err := os.Pipe()
if err != nil {
return nil, err
}
defer func() {
_ = stdoutReader.Close()
_ = stdoutWriter.Close()
}()
gitCmd := gitcmd.NewCommand("log", "--numstat", "--no-merges", "--pretty=format:---%n%h%n%aN%n%aE%n", "--date=iso").
AddOptionFormat("--since=%s", since)
if len(branch) == 0 {
@@ -72,11 +62,11 @@ func (repo *Repository) GetCodeActivityStats(fromTime time.Time, branch string)
gitCmd.AddArguments("--first-parent").AddDynamicArguments(branch)
}
var stdoutReader io.ReadCloser
err = gitCmd.
WithDir(repo.Path).
WithStdout(stdoutWriter).
WithPipelineFunc(func(ctx context.Context, cancel context.CancelFunc) error {
_ = stdoutWriter.Close()
WithStdoutReader(&stdoutReader).
WithPipelineFunc(func(ctx gitcmd.Context) error {
scanner := bufio.NewScanner(stdoutReader)
scanner.Split(bufio.ScanLines)
stats.CommitCount = 0

View File

@@ -7,7 +7,7 @@ import (
"bufio"
"context"
"fmt"
"os"
"io"
"code.gitea.io/gitea/modules/git/gitcmd"
"code.gitea.io/gitea/modules/log"
@@ -21,23 +21,15 @@ type TemplateSubmoduleCommit struct {
// GetTemplateSubmoduleCommits returns a list of submodules paths and their commits from a repository
// This function is only for generating new repos based on existing template, the template couldn't be too large.
func GetTemplateSubmoduleCommits(ctx context.Context, repoPath string) (submoduleCommits []TemplateSubmoduleCommit, _ error) {
stdoutReader, stdoutWriter, err := os.Pipe()
if err != nil {
return nil, err
}
err = gitcmd.NewCommand("ls-tree", "-r", "--", "HEAD").
var stdoutReader io.ReadCloser
err := gitcmd.NewCommand("ls-tree", "-r", "--", "HEAD").
WithDir(repoPath).
WithStdout(stdoutWriter).
WithPipelineFunc(func(ctx context.Context, cancel context.CancelFunc) error {
_ = stdoutWriter.Close()
defer stdoutReader.Close()
WithStdoutReader(&stdoutReader).
WithPipelineFunc(func(ctx gitcmd.Context) error {
scanner := bufio.NewScanner(stdoutReader)
for scanner.Scan() {
entry, err := parseLsTreeLine(scanner.Bytes())
if err != nil {
cancel()
return err
}
if entry.EntryMode == EntryModeCommit {