mirror of
https://github.com/go-gitea/gitea.git
synced 2026-05-06 04:01:05 +09:00
This PR introduces a new `ActionRunAttempt` model and makes Actions
execution attempt-scoped.
**Main Changes**
- Each workflow run trigger generates a new `ActionRunAttempt`. The
triggered jobs are then associated with this new `ActionRunAttempt`
record.
- Each rerun now creates:
- a new `ActionRunAttempt` record for the workflow run
- a full new set of `ActionRunJob` records for the new
`ActionRunAttempt`
- For jobs that need to be rerun, the new job records are created as
runnable jobs in the new attempt.
- For jobs that do not need to be rerun, new job records are still
created in the new attempt, but they reuse the result of the previous
attempt instead of executing again.
- Introduce `rerunPlan` to manage each rerun and refactored rerun flow
into a two-phase plan-based model:
- `buildRerunPlan`
- `execRerunPlan`
- `RerunFailedWorkflowRun` and `RerunFailed` no longer directly derives
all jobs that need to be rerun; this step is now handled by
`buildRerunPlan`.
- Converted artifacts from run-scoped to attempt-scoped:
- uploads are now associated with `RunAttemptID`
- listing, download, and deletion resolve against the current attempt
- Added attempt-aware web Actions views:
- the default run page shows the latest attempt
(`/actions/runs/{run_id}`)
- previous attempt pages show jobs and artifacts for that attempt
(`/actions/runs/{run_id}/attempts/{attempt_num}`)
- New APIs:
- `/repos/{owner}/{repo}/actions/runs/{run}/attempts/{attempt}`
- `/repos/{owner}/{repo}/actions/runs/{run}/attempts/{attempt}/jobs`
- New configuration `MAX_RERUN_ATTEMPTS`
- https://gitea.com/gitea/docs/pulls/383
**Compatibility**
- Existing legacy runs use `LatestAttemptID = 0` and legacy jobs use
`RunAttemptID = 0`. Therefore, these fields can be used to identify
legacy runs and jobs and provide backward compatibility.
- If a legacy run is rerun, an `ActionRunAttempt` with `attempt=1` will
be created to represent the original execution. Then a new
`ActionRunAttempt` with `attempt=2` will be created for the real rerun.
- Existing artifact records are not backfilled; legacy artifacts
continue to use `RunAttemptID = 0`.
**Improvements**
- It is now easier to inspect and download logs from previous attempts.
-
[`run_attempt`](https://docs.github.com/en/actions/reference/workflows-and-actions/contexts#github-context)
semantics are now aligned with GitHub.
- > A unique number for each attempt of a particular workflow run in a
repository. This number begins at 1 for the workflow run's first
attempt, and increments with each re-run.
- Rerun behavior is now clearer and more explicit.
- Instead of mutating the status of previous jobs in place, each rerun
creates a new attempt with a full new set of job records.
- Artifacts produced by different reruns can now be listed separately.
Signed-off-by: Zettat123 <zettat123@gmail.com>
Co-authored-by: silverwind <me@silverwind.io>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
Co-authored-by: Giteabot <teabot@gitea.io>
421 lines
16 KiB
Go
421 lines
16 KiB
Go
// Copyright 2024 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package actions
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"slices"
|
|
|
|
actions_model "code.gitea.io/gitea/models/actions"
|
|
"code.gitea.io/gitea/models/db"
|
|
repo_model "code.gitea.io/gitea/models/repo"
|
|
"code.gitea.io/gitea/models/unit"
|
|
user_model "code.gitea.io/gitea/models/user"
|
|
"code.gitea.io/gitea/modules/container"
|
|
"code.gitea.io/gitea/modules/setting"
|
|
"code.gitea.io/gitea/modules/util"
|
|
|
|
"github.com/nektos/act/pkg/model"
|
|
"go.yaml.in/yaml/v4"
|
|
)
|
|
|
|
// GetFailedJobsForRerun returns the failed or cancelled jobs in a run.
|
|
func GetFailedJobsForRerun(allJobs []*actions_model.ActionRunJob) []*actions_model.ActionRunJob {
|
|
var jobsToRerun []*actions_model.ActionRunJob
|
|
|
|
for _, job := range allJobs {
|
|
if job.Status == actions_model.StatusFailure || job.Status == actions_model.StatusCancelled {
|
|
jobsToRerun = append(jobsToRerun, job)
|
|
}
|
|
}
|
|
|
|
return jobsToRerun
|
|
}
|
|
|
|
// RerunWorkflowRunJobs reruns the given jobs of a workflow run.
|
|
// An empty jobsToRerun means rerunning the whole run. Otherwise jobsToRerun contains only the user-requested target jobs;
|
|
// downstream dependent jobs are expanded internally while building the rerun plan.
|
|
//
|
|
// The three stages below (legacy backfill, plan build, plan exec) deliberately run in separate DB transactions
|
|
// rather than one big outer transaction:
|
|
// - execRerunPlan performs slow work (loading variables, YAML unmarshal, concurrency expression evaluation)
|
|
// before opening its own transaction, so the tx stays focused on inserts/updates.
|
|
// - The legacy backfill is idempotent-friendly: if it succeeds but a later stage fails, a subsequent rerun
|
|
// will observe run.LatestAttemptID != 0 and skip the backfill, continuing naturally. No data corruption
|
|
// or stuck state results from partial progress.
|
|
//
|
|
// Fast validations that can catch failures early (workflow disabled, run not done, etc.) are therefore
|
|
// pushed into validateRerun so we rarely enter createOriginalAttemptForLegacyRun only to fail afterwards.
|
|
func RerunWorkflowRunJobs(ctx context.Context, repo *repo_model.Repository, run *actions_model.ActionRun, triggerUser *user_model.User, jobsToRerun []*actions_model.ActionRunJob) (*actions_model.ActionRunAttempt, error) {
|
|
if err := validateRerun(ctx, run, repo, triggerUser, jobsToRerun); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if run.LatestAttemptID == 0 {
|
|
if err := createOriginalAttemptForLegacyRun(ctx, run); err != nil {
|
|
return nil, fmt.Errorf("create attempt for legacy run: %w", err)
|
|
}
|
|
}
|
|
|
|
plan, err := buildRerunPlan(ctx, run, triggerUser, jobsToRerun)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return execRerunPlan(ctx, plan)
|
|
}
|
|
|
|
func validateRerun(ctx context.Context, run *actions_model.ActionRun, repo *repo_model.Repository, triggerUser *user_model.User, jobsToRerun []*actions_model.ActionRunJob) error {
|
|
if !run.Status.IsDone() {
|
|
return util.NewInvalidArgumentErrorf("this workflow run is not done")
|
|
}
|
|
if repo == nil {
|
|
return util.NewInvalidArgumentErrorf("repo is required")
|
|
}
|
|
if run.RepoID != repo.ID {
|
|
return util.NewInvalidArgumentErrorf("run %d does not belong to repo %d", run.ID, repo.ID)
|
|
}
|
|
for _, job := range jobsToRerun {
|
|
if job.RunID != run.ID {
|
|
return util.NewInvalidArgumentErrorf("job %d does not belong to workflow run %d", job.ID, run.ID)
|
|
}
|
|
}
|
|
if triggerUser == nil {
|
|
return util.NewInvalidArgumentErrorf("trigger user is required")
|
|
}
|
|
cfgUnit := repo.MustGetUnit(ctx, unit.TypeActions)
|
|
cfg := cfgUnit.ActionsConfig()
|
|
if cfg.IsWorkflowDisabled(run.WorkflowID) {
|
|
return util.NewInvalidArgumentErrorf("workflow %s is disabled", run.WorkflowID)
|
|
}
|
|
|
|
// Legacy runs (LatestAttemptID == 0) conceptually have only attempt 1, so they can never be at the cap.
|
|
// For non-legacy runs, look up the latest attempt and reject when its number is already at the configured cap.
|
|
if run.LatestAttemptID > 0 {
|
|
latestAttempt, has, err := run.GetLatestAttempt(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("GetLatestAttempt: %w", err)
|
|
}
|
|
if has && latestAttempt.Attempt >= setting.Actions.MaxRerunAttempts {
|
|
return util.NewInvalidArgumentErrorf("workflow run has reached the maximum of %d attempts", setting.Actions.MaxRerunAttempts)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// rerunPlan is a read-only snapshot of the inputs needed to execute a rerun.
|
|
// It holds no to-be-persisted entities and no intermediate evaluation results;
|
|
// execRerunPlan constructs and evaluates the new ActionRunAttempt itself.
|
|
type rerunPlan struct {
|
|
run *actions_model.ActionRun
|
|
templateAttempt *actions_model.ActionRunAttempt
|
|
templateJobs actions_model.ActionJobList
|
|
rerunJobIDs container.Set[string]
|
|
triggerUser *user_model.User
|
|
}
|
|
|
|
// buildRerunPlan constructs a rerunPlan for the given workflow run without writing to the database.
|
|
// jobsToRerun contains only the user-requested target jobs. An empty jobsToRerun means the entire run should be rerun.
|
|
// It loads the latest attempt as a template and expands jobsToRerun to include all transitive downstream dependents.
|
|
// The construction of new-attempt and concurrency evaluation are deferred to execRerunPlan so that the plan remains a pure input snapshot.
|
|
func buildRerunPlan(ctx context.Context, run *actions_model.ActionRun, triggerUser *user_model.User, jobsToRerun []*actions_model.ActionRunJob) (*rerunPlan, error) {
|
|
if err := run.LoadAttributes(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
templateAttempt, hasTemplateAttempt, err := run.GetLatestAttempt(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !hasTemplateAttempt {
|
|
return nil, util.NewNotExistErrorf("latest attempt not found")
|
|
}
|
|
|
|
templateJobs, err := actions_model.GetRunJobsByRunAndAttemptID(ctx, run.ID, templateAttempt.ID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("load template jobs: %w", err)
|
|
}
|
|
if len(templateJobs) == 0 {
|
|
return nil, util.NewNotExistErrorf("no template jobs")
|
|
}
|
|
|
|
plan := &rerunPlan{
|
|
run: run,
|
|
templateAttempt: templateAttempt,
|
|
templateJobs: templateJobs,
|
|
triggerUser: triggerUser,
|
|
}
|
|
|
|
if err := plan.expandRerunJobIDs(jobsToRerun); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return plan, nil
|
|
}
|
|
|
|
// execRerunPlan executes the rerun plan built by buildRerunPlan.
|
|
// It loads run variables, constructs the new ActionRunAttempt and evaluates run-level concurrency (all outside the transaction to keep the tx short).
|
|
// Inside a single database transaction it then inserts the new attempt, clones all template jobs, evaluates job-level concurrency for rerun jobs,
|
|
// and updates the run's latest_attempt_id.
|
|
// Jobs not in the rerun set are cloned as pass-through: their status is preserved and SourceTaskID points to the original task so the UI can still display their results.
|
|
// The attempt's final status is derived only from the rerun jobs, not the pass-through jobs.
|
|
// Notifications and commit statuses are sent after the transaction commits.
|
|
func execRerunPlan(ctx context.Context, plan *rerunPlan) (*actions_model.ActionRunAttempt, error) {
|
|
vars, err := actions_model.GetVariablesOfRun(ctx, plan.run)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("get run %d variables: %w", plan.run.ID, err)
|
|
}
|
|
|
|
newAttempt := &actions_model.ActionRunAttempt{
|
|
RepoID: plan.run.RepoID,
|
|
RunID: plan.run.ID,
|
|
Attempt: plan.templateAttempt.Attempt + 1,
|
|
TriggerUserID: plan.triggerUser.ID,
|
|
Status: actions_model.StatusWaiting,
|
|
}
|
|
|
|
if plan.run.RawConcurrency != "" {
|
|
var rawConcurrency model.RawConcurrency
|
|
if err := yaml.Unmarshal([]byte(plan.run.RawConcurrency), &rawConcurrency); err != nil {
|
|
return nil, fmt.Errorf("unmarshal raw concurrency: %w", err)
|
|
}
|
|
if err := EvaluateRunConcurrencyFillModel(ctx, plan.run, newAttempt, &rawConcurrency, vars, nil); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
var newJobs, newJobsToRerun actions_model.ActionJobList
|
|
var cancelledConcurrencyJobs []*actions_model.ActionRunJob
|
|
|
|
err = db.WithTx(ctx, func(ctx context.Context) error {
|
|
newAttemptStatus, jobsToCancel, err := PrepareToStartRunWithConcurrency(ctx, newAttempt)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cancelledConcurrencyJobs = append(cancelledConcurrencyJobs, jobsToCancel...)
|
|
newAttempt.Status = newAttemptStatus
|
|
shouldBlock := newAttemptStatus == actions_model.StatusBlocked
|
|
|
|
if err := db.Insert(ctx, newAttempt); err != nil {
|
|
if _, getErr := actions_model.GetRunAttemptByRunIDAndAttemptNum(ctx, plan.run.ID, newAttempt.Attempt); getErr == nil {
|
|
return util.NewAlreadyExistErrorf("workflow run attempt %d for run %d already exists", newAttempt.Attempt, plan.run.ID)
|
|
}
|
|
return err
|
|
}
|
|
|
|
plan.run.LatestAttemptID = newAttempt.ID
|
|
if err := actions_model.UpdateRun(ctx, plan.run, "latest_attempt_id"); err != nil {
|
|
return err
|
|
}
|
|
|
|
hasWaitingJobs := false
|
|
newJobs = make(actions_model.ActionJobList, 0, len(plan.templateJobs))
|
|
newJobsToRerun = make(actions_model.ActionJobList, 0, len(plan.rerunJobIDs))
|
|
for _, templateJob := range plan.templateJobs {
|
|
newJob := cloneRunJobForAttempt(templateJob, newAttempt)
|
|
if plan.rerunJobIDs.Contains(templateJob.JobID) {
|
|
shouldBlockJob := shouldBlock || plan.hasRerunDependency(templateJob)
|
|
|
|
newJob.Status = util.Iif(shouldBlockJob, actions_model.StatusBlocked, actions_model.StatusWaiting)
|
|
newJob.TaskID = 0
|
|
newJob.SourceTaskID = 0
|
|
newJob.Started = 0
|
|
newJob.Stopped = 0
|
|
newJob.ConcurrencyGroup = ""
|
|
newJob.ConcurrencyCancel = false
|
|
newJob.IsConcurrencyEvaluated = false
|
|
|
|
if newJob.RawConcurrency != "" && !shouldBlockJob {
|
|
if err := EvaluateJobConcurrencyFillModel(ctx, plan.run, newAttempt, newJob, vars, nil); err != nil {
|
|
return fmt.Errorf("evaluate job concurrency: %w", err)
|
|
}
|
|
newJob.Status, jobsToCancel, err = PrepareToStartJobWithConcurrency(ctx, newJob)
|
|
if err != nil {
|
|
return fmt.Errorf("prepare to start job with concurrency: %w", err)
|
|
}
|
|
cancelledConcurrencyJobs = append(cancelledConcurrencyJobs, jobsToCancel...)
|
|
}
|
|
|
|
newJobsToRerun = append(newJobsToRerun, newJob)
|
|
} else {
|
|
newJob.TaskID = 0
|
|
newJob.SourceTaskID = templateJob.EffectiveTaskID()
|
|
newJob.Started = templateJob.Started
|
|
newJob.Stopped = templateJob.Stopped
|
|
}
|
|
|
|
if err := db.Insert(ctx, newJob); err != nil {
|
|
return err
|
|
}
|
|
hasWaitingJobs = hasWaitingJobs || newJob.Status == actions_model.StatusWaiting
|
|
newJobs = append(newJobs, newJob)
|
|
}
|
|
|
|
newAttempt.Status = actions_model.AggregateJobStatus(newJobsToRerun)
|
|
if err := actions_model.UpdateRunAttempt(ctx, newAttempt, "status"); err != nil {
|
|
return err
|
|
}
|
|
|
|
if hasWaitingJobs {
|
|
if err := actions_model.IncreaseTaskVersion(ctx, plan.run.OwnerID, plan.run.RepoID); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := plan.run.LoadAttributes(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
NotifyWorkflowJobsAndRunsStatusUpdate(ctx, cancelledConcurrencyJobs)
|
|
EmitJobsIfReadyByJobs(cancelledConcurrencyJobs)
|
|
|
|
CreateCommitStatusForRunJobs(ctx, plan.run, newJobs...)
|
|
NotifyWorkflowJobsAndRunsStatusUpdate(ctx, newJobsToRerun)
|
|
|
|
return newAttempt, nil
|
|
}
|
|
|
|
func (p *rerunPlan) expandRerunJobIDs(jobsToRerun []*actions_model.ActionRunJob) error {
|
|
templateJobIDs := make(container.Set[string])
|
|
for _, job := range p.templateJobs {
|
|
templateJobIDs.Add(job.JobID)
|
|
}
|
|
|
|
if len(jobsToRerun) == 0 {
|
|
p.rerunJobIDs = templateJobIDs
|
|
return nil
|
|
}
|
|
|
|
rerunJobIDs := make(container.Set[string])
|
|
for _, job := range jobsToRerun {
|
|
if !templateJobIDs.Contains(job.JobID) {
|
|
return util.NewInvalidArgumentErrorf("job %q does not exist in the latest attempt", job.JobID)
|
|
}
|
|
rerunJobIDs.Add(job.JobID)
|
|
}
|
|
|
|
for {
|
|
found := false
|
|
for _, job := range p.templateJobs {
|
|
if rerunJobIDs.Contains(job.JobID) {
|
|
continue
|
|
}
|
|
for _, need := range job.Needs {
|
|
if rerunJobIDs.Contains(need) {
|
|
found = true
|
|
rerunJobIDs.Add(job.JobID)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if !found {
|
|
break
|
|
}
|
|
}
|
|
|
|
p.rerunJobIDs = rerunJobIDs
|
|
return nil
|
|
}
|
|
|
|
func (p *rerunPlan) hasRerunDependency(job *actions_model.ActionRunJob) bool {
|
|
for _, need := range job.Needs {
|
|
if p.rerunJobIDs.Contains(need) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func cloneRunJobForAttempt(templateJob *actions_model.ActionRunJob, attempt *actions_model.ActionRunAttempt) *actions_model.ActionRunJob {
|
|
return &actions_model.ActionRunJob{
|
|
RunID: templateJob.RunID,
|
|
RunAttemptID: attempt.ID,
|
|
RepoID: templateJob.RepoID,
|
|
OwnerID: templateJob.OwnerID,
|
|
CommitSHA: templateJob.CommitSHA,
|
|
IsForkPullRequest: templateJob.IsForkPullRequest,
|
|
Name: templateJob.Name,
|
|
Attempt: attempt.Attempt,
|
|
WorkflowPayload: slices.Clone(templateJob.WorkflowPayload),
|
|
JobID: templateJob.JobID,
|
|
AttemptJobID: templateJob.AttemptJobID,
|
|
Needs: slices.Clone(templateJob.Needs),
|
|
RunsOn: slices.Clone(templateJob.RunsOn),
|
|
Status: templateJob.Status,
|
|
RawConcurrency: templateJob.RawConcurrency,
|
|
IsConcurrencyEvaluated: templateJob.IsConcurrencyEvaluated,
|
|
ConcurrencyGroup: templateJob.ConcurrencyGroup,
|
|
ConcurrencyCancel: templateJob.ConcurrencyCancel,
|
|
TokenPermissions: templateJob.TokenPermissions,
|
|
}
|
|
}
|
|
|
|
// createOriginalAttemptForLegacyRun creates a real attempt=1 for a legacy run and updates the existing legacy jobs and artifacts in place
|
|
// so the original execution becomes attempt-aware before the rerun plan is built and all subsequent logic can use real attempts.
|
|
// Tasks are not modified: they reference jobs by JobID, so updating jobs implicitly carries the new attempt linkage.
|
|
func createOriginalAttemptForLegacyRun(ctx context.Context, run *actions_model.ActionRun) error {
|
|
return db.WithTx(ctx, func(ctx context.Context) error {
|
|
jobs, err := actions_model.GetRunJobsByRunAndAttemptID(ctx, run.ID, 0)
|
|
if err != nil {
|
|
return fmt.Errorf("load legacy run jobs: %w", err)
|
|
}
|
|
if len(jobs) == 0 {
|
|
return fmt.Errorf("run %d has no jobs", run.ID)
|
|
}
|
|
|
|
originalAttempt := &actions_model.ActionRunAttempt{
|
|
RepoID: run.RepoID,
|
|
RunID: run.ID,
|
|
Attempt: 1,
|
|
TriggerUserID: run.TriggerUserID,
|
|
|
|
// Legacy concurrency fields on ActionRun are intentionally NOT backfilled onto this original attempt.
|
|
// They only matter while a run is actively being scheduled, and backfilling them for completed legacy runs
|
|
// would add migration/runtime cost without changing any future concurrency behavior.
|
|
|
|
Status: run.Status,
|
|
Created: run.Created,
|
|
Started: run.Started,
|
|
Stopped: run.Stopped,
|
|
}
|
|
|
|
// Use NoAutoTime so xorm does not overwrite Created with the current time on insert.
|
|
if _, err := db.GetEngine(ctx).NoAutoTime().Insert(originalAttempt); err != nil {
|
|
if _, getErr := actions_model.GetRunAttemptByRunIDAndAttemptNum(ctx, run.ID, originalAttempt.Attempt); getErr == nil {
|
|
return util.NewAlreadyExistErrorf("workflow run attempt %d for run %d already exists", originalAttempt.Attempt, run.ID)
|
|
}
|
|
return err
|
|
}
|
|
|
|
// backfill attempt related fields for jobs
|
|
for i, job := range jobs {
|
|
job.RunAttemptID = originalAttempt.ID
|
|
job.Attempt = originalAttempt.Attempt
|
|
job.AttemptJobID = int64(i + 1)
|
|
if _, err := db.GetEngine(ctx).ID(job.ID).Cols("run_attempt_id", "attempt", "attempt_job_id").Update(job); err != nil {
|
|
return fmt.Errorf("backfill legacy run jobs: %w", err)
|
|
}
|
|
}
|
|
|
|
// backfill "run_attempt_id" field for artifacts
|
|
if _, err := db.GetEngine(ctx).
|
|
Where("run_id=? AND run_attempt_id=0", run.ID).
|
|
Cols("run_attempt_id").
|
|
Update(&actions_model.ActionArtifact{RunAttemptID: originalAttempt.ID}); err != nil {
|
|
return fmt.Errorf("backfill legacy artifacts: %w", err)
|
|
}
|
|
|
|
// update "latest_attempt_id" for the run
|
|
run.LatestAttemptID = originalAttempt.ID
|
|
return actions_model.UpdateRun(ctx, run, "latest_attempt_id")
|
|
})
|
|
}
|