Files
gitea/services/actions/clear_tasks.go
Zettat123 899ede1d55 Introduce ActionRunAttempt to represent each execution of a run (#37119)
This PR introduces a new `ActionRunAttempt` model and makes Actions
execution attempt-scoped.

**Main Changes**

- Each workflow run trigger generates a new `ActionRunAttempt`. The
triggered jobs are then associated with this new `ActionRunAttempt`
record.
- Each rerun now creates:
  - a new `ActionRunAttempt` record for the workflow run
- a full new set of `ActionRunJob` records for the new
`ActionRunAttempt`
- For jobs that need to be rerun, the new job records are created as
runnable jobs in the new attempt.
- For jobs that do not need to be rerun, new job records are still
created in the new attempt, but they reuse the result of the previous
attempt instead of executing again.
- Introduce `rerunPlan` to manage each rerun and refactored rerun flow
into a two-phase plan-based model:
  - `buildRerunPlan`
  - `execRerunPlan`
- `RerunFailedWorkflowRun` and `RerunFailed` no longer directly derives
all jobs that need to be rerun; this step is now handled by
`buildRerunPlan`.
- Converted artifacts from run-scoped to attempt-scoped:
  - uploads are now associated with `RunAttemptID`
  - listing, download, and deletion resolve against the current attempt
- Added attempt-aware web Actions views:
- the default run page shows the latest attempt
(`/actions/runs/{run_id}`)
- previous attempt pages show jobs and artifacts for that attempt
(`/actions/runs/{run_id}/attempts/{attempt_num}`)
- New APIs:
  - `/repos/{owner}/{repo}/actions/runs/{run}/attempts/{attempt}`
  - `/repos/{owner}/{repo}/actions/runs/{run}/attempts/{attempt}/jobs`
- New configuration `MAX_RERUN_ATTEMPTS`
  - https://gitea.com/gitea/docs/pulls/383

**Compatibility**

- Existing legacy runs use `LatestAttemptID = 0` and legacy jobs use
`RunAttemptID = 0`. Therefore, these fields can be used to identify
legacy runs and jobs and provide backward compatibility.
- If a legacy run is rerun, an `ActionRunAttempt` with `attempt=1` will
be created to represent the original execution. Then a new
`ActionRunAttempt` with `attempt=2` will be created for the real rerun.
- Existing artifact records are not backfilled; legacy artifacts
continue to use `RunAttemptID = 0`.

**Improvements**

- It is now easier to inspect and download logs from previous attempts.
-
[`run_attempt`](https://docs.github.com/en/actions/reference/workflows-and-actions/contexts#github-context)
semantics are now aligned with GitHub.
- > A unique number for each attempt of a particular workflow run in a
repository. This number begins at 1 for the workflow run's first
attempt, and increments with each re-run.
- Rerun behavior is now clearer and more explicit.
- Instead of mutating the status of previous jobs in place, each rerun
creates a new attempt with a full new set of job records.
- Artifacts produced by different reruns can now be listed separately.

Signed-off-by: Zettat123 <zettat123@gmail.com>
Co-authored-by: silverwind <me@silverwind.io>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
Co-authored-by: Giteabot <teabot@gitea.io>
2026-04-23 23:33:41 +00:00

205 lines
7.5 KiB
Go

// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package actions
import (
"context"
"fmt"
"time"
actions_model "code.gitea.io/gitea/models/actions"
"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/actions"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
webhook_module "code.gitea.io/gitea/modules/webhook"
)
// StopZombieTasks stops the task which have running status, but haven't been updated for a long time
func StopZombieTasks(ctx context.Context) error {
return stopTasks(ctx, actions_model.FindTaskOptions{
Status: actions_model.StatusRunning,
UpdatedBefore: timeutil.TimeStamp(time.Now().Add(-setting.Actions.ZombieTaskTimeout).Unix()),
})
}
// StopEndlessTasks stops the tasks which have running status and continuous updates, but don't end for a long time
func StopEndlessTasks(ctx context.Context) error {
return stopTasks(ctx, actions_model.FindTaskOptions{
Status: actions_model.StatusRunning,
StartedBefore: timeutil.TimeStamp(time.Now().Add(-setting.Actions.EndlessTaskTimeout).Unix()),
})
}
func CancelPreviousJobs(ctx context.Context, repoID int64, ref, workflowID string, event webhook_module.HookEventType) error {
jobs, err := actions_model.CancelPreviousJobs(ctx, repoID, ref, workflowID, event)
NotifyWorkflowJobsAndRunsStatusUpdate(ctx, jobs)
EmitJobsIfReadyByJobs(jobs)
return err
}
func CleanRepoScheduleTasks(ctx context.Context, repo *repo_model.Repository) error {
jobs, err := actions_model.CleanRepoScheduleTasks(ctx, repo)
NotifyWorkflowJobsAndRunsStatusUpdate(ctx, jobs)
EmitJobsIfReadyByJobs(jobs)
return err
}
func shouldBlockJobByConcurrency(ctx context.Context, job *actions_model.ActionRunJob) (bool, error) {
if job.RawConcurrency != "" && !job.IsConcurrencyEvaluated {
// when the job depends on other jobs, we cannot evaluate its concurrency, so it should be blocked and will be evaluated again when its dependencies are done
return true, nil
}
if job.ConcurrencyGroup == "" || job.ConcurrencyCancel {
return false, nil
}
attempts, jobs, err := actions_model.GetConcurrentRunAttemptsAndJobs(ctx, job.RepoID, job.ConcurrencyGroup, []actions_model.Status{actions_model.StatusRunning})
if err != nil {
return false, fmt.Errorf("GetConcurrentRunAttemptsAndJobs: %w", err)
}
return len(attempts) > 0 || len(jobs) > 0, nil
}
// PrepareToStartJobWithConcurrency prepares a job to start by its evaluated concurrency group and cancelling previous jobs if necessary.
// It returns the new status of the job (either StatusBlocked or StatusWaiting), any cancelled jobs, and any error encountered during the process.
func PrepareToStartJobWithConcurrency(ctx context.Context, job *actions_model.ActionRunJob) (actions_model.Status, []*actions_model.ActionRunJob, error) {
shouldBlock, err := shouldBlockJobByConcurrency(ctx, job)
if err != nil {
return actions_model.StatusBlocked, nil, err
}
// even if the current job is blocked, we still need to cancel previous "waiting/blocked" jobs in the same concurrency group
jobs, err := actions_model.CancelPreviousJobsByJobConcurrency(ctx, job)
if err != nil {
return actions_model.StatusBlocked, nil, fmt.Errorf("CancelPreviousJobsByJobConcurrency: %w", err)
}
return util.Iif(shouldBlock, actions_model.StatusBlocked, actions_model.StatusWaiting), jobs, nil
}
func shouldBlockRunByConcurrency(ctx context.Context, attempt *actions_model.ActionRunAttempt) (bool, error) {
if attempt.ConcurrencyGroup == "" || attempt.ConcurrencyCancel {
return false, nil
}
attempts, jobs, err := actions_model.GetConcurrentRunAttemptsAndJobs(ctx, attempt.RepoID, attempt.ConcurrencyGroup, []actions_model.Status{actions_model.StatusRunning})
if err != nil {
return false, fmt.Errorf("find concurrent runs and jobs: %w", err)
}
return len(attempts) > 0 || len(jobs) > 0, nil
}
// PrepareToStartRunWithConcurrency prepares a run attempt to start by its evaluated concurrency group and cancelling previous jobs if necessary.
// It returns the new status of the run attempt (either StatusBlocked or StatusWaiting), any cancelled jobs, and any error encountered during the process.
func PrepareToStartRunWithConcurrency(ctx context.Context, attempt *actions_model.ActionRunAttempt) (actions_model.Status, []*actions_model.ActionRunJob, error) {
shouldBlock, err := shouldBlockRunByConcurrency(ctx, attempt)
if err != nil {
return actions_model.StatusBlocked, nil, err
}
// even if the current run is blocked, we still need to cancel previous "waiting/blocked" jobs in the same concurrency group
jobs, err := actions_model.CancelPreviousJobsByRunConcurrency(ctx, attempt)
if err != nil {
return actions_model.StatusBlocked, nil, fmt.Errorf("CancelPreviousJobsByRunConcurrency: %w", err)
}
return util.Iif(shouldBlock, actions_model.StatusBlocked, actions_model.StatusWaiting), jobs, nil
}
func stopTasks(ctx context.Context, opts actions_model.FindTaskOptions) error {
tasks, err := db.Find[actions_model.ActionTask](ctx, opts)
if err != nil {
return fmt.Errorf("find tasks: %w", err)
}
jobs := make([]*actions_model.ActionRunJob, 0, len(tasks))
for _, task := range tasks {
if err := db.WithTx(ctx, func(ctx context.Context) error {
if err := actions_model.StopTask(ctx, task.ID, actions_model.StatusFailure); err != nil {
return err
}
if err := task.LoadJob(ctx); err != nil {
return err
}
jobs = append(jobs, task.Job)
return nil
}); err != nil {
log.Warn("Cannot stop task %v: %v", task.ID, err)
continue
}
remove, err := actions.TransferLogs(ctx, task.LogFilename)
if err != nil {
log.Warn("Cannot transfer logs of task %v: %v", task.ID, err)
continue
}
task.LogInStorage = true
if err := actions_model.UpdateTask(ctx, task, "log_in_storage"); err != nil {
log.Warn("Cannot update task %v: %v", task.ID, err)
continue
}
remove()
}
NotifyWorkflowJobsAndRunsStatusUpdate(ctx, jobs)
EmitJobsIfReadyByJobs(jobs)
return nil
}
// CancelAbandonedJobs cancels jobs that have not been picked by any runner for a long time
func CancelAbandonedJobs(ctx context.Context) error {
jobs, err := db.Find[actions_model.ActionRunJob](ctx, actions_model.FindRunJobOptions{
Statuses: []actions_model.Status{actions_model.StatusWaiting, actions_model.StatusBlocked},
UpdatedBefore: timeutil.TimeStampNow().AddDuration(-setting.Actions.AbandonedJobTimeout),
})
if err != nil {
log.Warn("find abandoned tasks: %v", err)
return err
}
now := timeutil.TimeStampNow()
updatedJobs := []*actions_model.ActionRunJob{}
for _, job := range jobs {
job.Status = actions_model.StatusCancelled
job.Stopped = now
updated := false
if err := db.WithTx(ctx, func(ctx context.Context) error {
n, err := actions_model.UpdateRunJob(ctx, job, nil, "status", "stopped")
if err != nil {
return err
}
if err := job.LoadAttributes(ctx); err != nil {
return err
}
updated = n > 0
return nil
}); err != nil {
log.Warn("cancel abandoned job %v: %v", job.ID, err)
// go on
}
if job.Run == nil || job.Run.Repo == nil {
continue // error occurs during loading attributes, the following code that depends on "Run.Repo" will fail, so ignore and skip
}
if updated {
CreateCommitStatusForRunJobs(ctx, job.Run, job)
updatedJobs = append(updatedJobs, job)
}
}
NotifyWorkflowJobsAndRunsStatusUpdate(ctx, updatedJobs)
EmitJobsIfReadyByJobs(updatedJobs)
return nil
}