Add ability to download subpath archive (#36371)

closes: https://github.com/go-gitea/gitea/issues/4478

---------

Signed-off-by: wxiaoguang <wxiaoguang@gmail.com>
Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
TheFox0x7
2026-01-16 10:31:12 +01:00
committed by GitHub
parent 67e75f30a8
commit 69c5921d71
18 changed files with 230 additions and 134 deletions

View File

@@ -6,6 +6,8 @@ package pull
import (
"context"
"errors"
"fmt"
"code.gitea.io/gitea/models/db"
git_model "code.gitea.io/gitea/models/git"
@@ -14,8 +16,6 @@ import (
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/glob"
"code.gitea.io/gitea/modules/log"
"github.com/pkg/errors"
)
// MergeRequiredContextsCommitStatus returns a commit status state for given required contexts
@@ -69,7 +69,7 @@ func MergeRequiredContextsCommitStatus(commitStatuses []*git_model.CommitStatus,
func IsPullCommitStatusPass(ctx context.Context, pr *issues_model.PullRequest) (bool, error) {
pb, err := git_model.GetFirstMatchProtectedBranchRule(ctx, pr.BaseRepoID, pr.BaseBranch)
if err != nil {
return false, errors.Wrap(err, "GetLatestCommitStatus")
return false, fmt.Errorf("GetLatestCommitStatus: %w", err)
}
if pb == nil || !pb.EnableStatusCheck {
return true, nil
@@ -86,19 +86,19 @@ func IsPullCommitStatusPass(ctx context.Context, pr *issues_model.PullRequest) (
func GetPullRequestCommitStatusState(ctx context.Context, pr *issues_model.PullRequest) (commitstatus.CommitStatusState, error) {
// Ensure HeadRepo is loaded
if err := pr.LoadHeadRepo(ctx); err != nil {
return "", errors.Wrap(err, "LoadHeadRepo")
return "", fmt.Errorf("LoadHeadRepo: %w", err)
}
// check if all required status checks are successful
headGitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, pr.HeadRepo)
if err != nil {
return "", errors.Wrap(err, "OpenRepository")
return "", fmt.Errorf("OpenRepository: %w", err)
}
defer closer.Close()
if pr.Flow == issues_model.PullRequestFlowGithub {
if exist, err := git_model.IsBranchExist(ctx, pr.HeadRepo.ID, pr.HeadBranch); err != nil {
return "", errors.Wrap(err, "IsBranchExist")
return "", fmt.Errorf("IsBranchExist: %w", err)
} else if !exist {
return "", errors.New("Head branch does not exist, can not merge")
}
@@ -118,17 +118,17 @@ func GetPullRequestCommitStatusState(ctx context.Context, pr *issues_model.PullR
}
if err := pr.LoadBaseRepo(ctx); err != nil {
return "", errors.Wrap(err, "LoadBaseRepo")
return "", fmt.Errorf("LoadBaseRepo: %w", err)
}
commitStatuses, err := git_model.GetLatestCommitStatus(ctx, pr.BaseRepo.ID, sha, db.ListOptionsAll)
if err != nil {
return "", errors.Wrap(err, "GetLatestCommitStatus")
return "", fmt.Errorf("GetLatestCommitStatus: %w", err)
}
pb, err := git_model.GetFirstMatchProtectedBranchRule(ctx, pr.BaseRepoID, pr.BaseBranch)
if err != nil {
return "", errors.Wrap(err, "LoadProtectedBranch")
return "", fmt.Errorf("LoadProtectedBranch: %w", err)
}
var requiredContexts []string
if pb != nil {

View File

@@ -8,7 +8,6 @@ import (
"errors"
"fmt"
"io"
"net/http"
"os"
"strings"
"time"
@@ -16,6 +15,7 @@ import (
"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/gitcmd"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/httplib"
@@ -24,6 +24,7 @@ import (
"code.gitea.io/gitea/modules/queue"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/modules/util"
gitea_context "code.gitea.io/gitea/services/context"
)
@@ -36,58 +37,31 @@ type ArchiveRequest struct {
Repo *repo_model.Repository
Type repo_model.ArchiveType
CommitID string
Paths []string
archiveRefShortName string // the ref short name to download the archive, for example: "master", "v1.0.0", "commit id"
}
// ErrUnknownArchiveFormat request archive format is not supported
type ErrUnknownArchiveFormat struct {
RequestNameType string
}
// Error implements error
func (err ErrUnknownArchiveFormat) Error() string {
return "unknown format: " + err.RequestNameType
}
// Is implements error
func (ErrUnknownArchiveFormat) Is(err error) bool {
_, ok := err.(ErrUnknownArchiveFormat)
return ok
}
// RepoRefNotFoundError is returned when a requested reference (commit, tag) was not found.
type RepoRefNotFoundError struct {
RefShortName string
}
// Error implements error.
func (e RepoRefNotFoundError) Error() string {
return "unrecognized repository reference: " + e.RefShortName
}
func (e RepoRefNotFoundError) Is(err error) bool {
_, ok := err.(RepoRefNotFoundError)
return ok
}
// NewRequest creates an archival request, based on the URI. The
// resulting ArchiveRequest is suitable for being passed to Await()
// if it's determined that the request still needs to be satisfied.
func NewRequest(repo *repo_model.Repository, gitRepo *git.Repository, archiveRefExt string) (*ArchiveRequest, error) {
func NewRequest(repo *repo_model.Repository, gitRepo *git.Repository, archiveRefExt string, paths []string) (*ArchiveRequest, error) {
// here the archiveRefShortName is not a clear ref, it could be a tag, branch or commit id
archiveRefShortName, archiveType := repo_model.SplitArchiveNameType(archiveRefExt)
if archiveType == repo_model.ArchiveUnknown {
return nil, ErrUnknownArchiveFormat{archiveRefExt}
return nil, util.NewInvalidArgumentErrorf("unknown format: %s", archiveRefExt)
}
if archiveType == repo_model.ArchiveBundle && len(paths) != 0 {
return nil, util.NewInvalidArgumentErrorf("cannot specify paths when requesting a bundle")
}
// Get corresponding commit.
commitID, err := gitRepo.ConvertToGitID(archiveRefShortName)
if err != nil {
return nil, RepoRefNotFoundError{RefShortName: archiveRefShortName}
return nil, util.NewNotExistErrorf("unrecognized repository reference: %s", archiveRefShortName)
}
r := &ArchiveRequest{Repo: repo, archiveRefShortName: archiveRefShortName, Type: archiveType}
r := &ArchiveRequest{Repo: repo, archiveRefShortName: archiveRefShortName, Type: archiveType, Paths: paths}
r.CommitID = commitID.String()
return r, nil
}
@@ -159,6 +133,7 @@ func (aReq *ArchiveRequest) Stream(ctx context.Context, w io.Writer) error {
w,
setting.Repository.PrefixArchiveFiles,
aReq.CommitID,
aReq.Paths,
)
}
@@ -339,7 +314,7 @@ func DeleteRepositoryArchives(ctx context.Context) error {
return storage.Clean(storage.RepoArchives)
}
func ServeRepoArchive(ctx *gitea_context.Base, archiveReq *ArchiveRequest) {
func ServeRepoArchive(ctx *gitea_context.Base, archiveReq *ArchiveRequest) error {
// Add nix format link header so tarballs lock correctly:
// https://github.com/nixos/nix/blob/56763ff918eb308db23080e560ed2ea3e00c80a7/doc/manual/src/protocols/tarball-fetcher.md
ctx.Resp.Header().Add("Link", fmt.Sprintf(`<%s/archive/%s.%s?rev=%s>; rel="immutable"`,
@@ -350,20 +325,22 @@ func ServeRepoArchive(ctx *gitea_context.Base, archiveReq *ArchiveRequest) {
))
downloadName := archiveReq.Repo.Name + "-" + archiveReq.GetArchiveName()
if setting.Repository.StreamArchives {
if setting.Repository.StreamArchives || len(archiveReq.Paths) > 0 {
// the header must be set before starting streaming even an error would occur,
// because errors may happen in git command and such cases aren't in our control.
httplib.ServeSetHeaders(ctx.Resp, &httplib.ServeHeaderOptions{Filename: downloadName})
if err := archiveReq.Stream(ctx, ctx.Resp); err != nil && !ctx.Written() {
log.Error("Archive %v streaming failed: %v", archiveReq, err)
ctx.HTTPError(http.StatusInternalServerError)
if gitcmd.StderrHasPrefix(err, "fatal: pathspec") {
return util.NewInvalidArgumentErrorf("path doesn't exist or is invalid")
}
return fmt.Errorf("archive repo %s: failed to stream: %w", archiveReq.Repo.FullName(), err)
}
return
return nil
}
archiver, err := archiveReq.Await(ctx)
if err != nil {
log.Error("Archive %v await failed: %v", archiveReq, err)
ctx.HTTPError(http.StatusInternalServerError)
return
return fmt.Errorf("archive repo %s: failed to await: %w", archiveReq.Repo.FullName(), err)
}
rPath := archiver.RelativePath()
@@ -372,15 +349,13 @@ func ServeRepoArchive(ctx *gitea_context.Base, archiveReq *ArchiveRequest) {
u, err := storage.RepoArchives.URL(rPath, downloadName, ctx.Req.Method, nil)
if u != nil && err == nil {
ctx.Redirect(u.String())
return
return nil
}
}
fr, err := storage.RepoArchives.Open(rPath)
if err != nil {
log.Error("Archive %v open file failed: %v", archiveReq, err)
ctx.HTTPError(http.StatusInternalServerError)
return
return fmt.Errorf("archive repo %s: failed to open archive file: %w", archiveReq.Repo.FullName(), err)
}
defer fr.Close()
@@ -388,4 +363,5 @@ func ServeRepoArchive(ctx *gitea_context.Base, archiveReq *ArchiveRequest) {
Filename: downloadName,
LastModified: archiver.CreatedUnix.AsLocalTime(),
})
return nil
}

View File

@@ -8,11 +8,13 @@ import (
"time"
"code.gitea.io/gitea/models/unittest"
"code.gitea.io/gitea/modules/util"
"code.gitea.io/gitea/services/contexttest"
_ "code.gitea.io/gitea/models/actions"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMain(m *testing.M) {
@@ -29,47 +31,47 @@ func TestArchive_Basic(t *testing.T) {
contexttest.LoadGitRepo(t, ctx)
defer ctx.Repo.GitRepo.Close()
bogusReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".zip")
bogusReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".zip", nil)
assert.NoError(t, err)
assert.NotNil(t, bogusReq)
assert.Equal(t, firstCommit+".zip", bogusReq.GetArchiveName())
// Check a series of bogus requests.
// Step 1, valid commit with a bad extension.
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".unknown")
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".unknown", nil)
assert.Error(t, err)
assert.Nil(t, bogusReq)
// Step 2, missing commit.
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, "dbffff.zip")
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, "dbffff.zip", nil)
assert.Error(t, err)
assert.Nil(t, bogusReq)
// Step 3, doesn't look like branch/tag/commit.
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, "db.zip")
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, "db.zip", nil)
assert.Error(t, err)
assert.Nil(t, bogusReq)
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, "master.zip")
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, "master.zip", nil)
assert.NoError(t, err)
assert.NotNil(t, bogusReq)
assert.Equal(t, "master.zip", bogusReq.GetArchiveName())
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, "test/archive.zip")
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, "test/archive.zip", nil)
assert.NoError(t, err)
assert.NotNil(t, bogusReq)
assert.Equal(t, "test-archive.zip", bogusReq.GetArchiveName())
// Now two valid requests, firstCommit with valid extensions.
zipReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".zip")
zipReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".zip", nil)
assert.NoError(t, err)
assert.NotNil(t, zipReq)
tgzReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".tar.gz")
tgzReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".tar.gz", nil)
assert.NoError(t, err)
assert.NotNil(t, tgzReq)
secondReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, secondCommit+".bundle")
secondReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, secondCommit+".bundle", nil)
assert.NoError(t, err)
assert.NotNil(t, secondReq)
@@ -89,7 +91,7 @@ func TestArchive_Basic(t *testing.T) {
// Sleep two seconds to make sure the queue doesn't change.
time.Sleep(2 * time.Second)
zipReq2, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".zip")
zipReq2, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".zip", nil)
assert.NoError(t, err)
// This zipReq should match what's sitting in the queue, as we haven't
// let it release yet. From the consumer's point of view, this looks like
@@ -104,12 +106,12 @@ func TestArchive_Basic(t *testing.T) {
// Now we'll submit a request and TimedWaitForCompletion twice, before and
// after we release it. We should trigger both the timeout and non-timeout
// cases.
timedReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, secondCommit+".tar.gz")
timedReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, secondCommit+".tar.gz", nil)
assert.NoError(t, err)
assert.NotNil(t, timedReq)
doArchive(t.Context(), timedReq)
zipReq2, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".zip")
zipReq2, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".zip", nil)
assert.NoError(t, err)
// Now, we're guaranteed to have released the original zipReq from the queue.
// Ensure that we don't get handed back the released entry somehow, but they
@@ -124,9 +126,13 @@ func TestArchive_Basic(t *testing.T) {
// Ideally, the extension would match what we originally requested.
assert.NotEqual(t, zipReq.GetArchiveName(), tgzReq.GetArchiveName())
assert.NotEqual(t, zipReq.GetArchiveName(), secondReq.GetArchiveName())
}
func TestErrUnknownArchiveFormat(t *testing.T) {
err := ErrUnknownArchiveFormat{RequestNameType: "xxx"}
assert.ErrorIs(t, err, ErrUnknownArchiveFormat{})
t.Run("BadPath", func(t *testing.T) {
badRequest, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".tar.gz", []string{"not-a-path"})
require.NoError(t, err)
err = ServeRepoArchive(ctx.Base, badRequest)
require.Error(t, err)
assert.ErrorIs(t, err, util.ErrInvalidArgument)
assert.ErrorContains(t, err, "path doesn't exist or is invalid")
})
}