mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-29 10:57:44 +09:00 
			
		
		
		
	Improve checkIfPRContentChanged (#22611)
The code for checking if a commit has caused a change in a PR is extremely inefficient and affects the head repository instead of using a temporary repository. This PR therefore makes several significant improvements: * A temporary repo like that used in merging. * The diff code is then significant improved to use a three-way diff instead of comparing diffs (possibly binary) line-by-line - in memory... Ref #22578 Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
		| @@ -4,6 +4,7 @@ | |||||||
| package util | package util | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"errors" | ||||||
| 	"io" | 	"io" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| @@ -17,3 +18,24 @@ func ReadAtMost(r io.Reader, buf []byte) (n int, err error) { | |||||||
| 	} | 	} | ||||||
| 	return n, err | 	return n, err | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // ErrNotEmpty is an error reported when there is a non-empty reader | ||||||
|  | var ErrNotEmpty = errors.New("not-empty") | ||||||
|  |  | ||||||
|  | // IsEmptyReader reads a reader and ensures it is empty | ||||||
|  | func IsEmptyReader(r io.Reader) (err error) { | ||||||
|  | 	var buf [1]byte | ||||||
|  |  | ||||||
|  | 	for { | ||||||
|  | 		n, err := r.Read(buf[:]) | ||||||
|  | 		if err != nil { | ||||||
|  | 			if err == io.EOF { | ||||||
|  | 				return nil | ||||||
|  | 			} | ||||||
|  | 			return err | ||||||
|  | 		} | ||||||
|  | 		if n > 0 { | ||||||
|  | 			return ErrNotEmpty | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|   | |||||||
| @@ -4,14 +4,12 @@ | |||||||
| package pull | package pull | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"bufio" |  | ||||||
| 	"bytes" |  | ||||||
| 	"context" | 	"context" | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"io" | 	"io" | ||||||
|  | 	"os" | ||||||
| 	"regexp" | 	"regexp" | ||||||
| 	"strings" | 	"strings" | ||||||
| 	"time" |  | ||||||
|  |  | ||||||
| 	"code.gitea.io/gitea/models" | 	"code.gitea.io/gitea/models" | ||||||
| 	"code.gitea.io/gitea/models/db" | 	"code.gitea.io/gitea/models/db" | ||||||
| @@ -29,6 +27,7 @@ import ( | |||||||
| 	repo_module "code.gitea.io/gitea/modules/repository" | 	repo_module "code.gitea.io/gitea/modules/repository" | ||||||
| 	"code.gitea.io/gitea/modules/setting" | 	"code.gitea.io/gitea/modules/setting" | ||||||
| 	"code.gitea.io/gitea/modules/sync" | 	"code.gitea.io/gitea/modules/sync" | ||||||
|  | 	"code.gitea.io/gitea/modules/util" | ||||||
| 	issue_service "code.gitea.io/gitea/services/issue" | 	issue_service "code.gitea.io/gitea/services/issue" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| @@ -351,69 +350,56 @@ func AddTestPullRequestTask(doer *user_model.User, repoID int64, branch string, | |||||||
| // checkIfPRContentChanged checks if diff to target branch has changed by push | // checkIfPRContentChanged checks if diff to target branch has changed by push | ||||||
| // A commit can be considered to leave the PR untouched if the patch/diff with its merge base is unchanged | // A commit can be considered to leave the PR untouched if the patch/diff with its merge base is unchanged | ||||||
| func checkIfPRContentChanged(ctx context.Context, pr *issues_model.PullRequest, oldCommitID, newCommitID string) (hasChanged bool, err error) { | func checkIfPRContentChanged(ctx context.Context, pr *issues_model.PullRequest, oldCommitID, newCommitID string) (hasChanged bool, err error) { | ||||||
| 	if err = pr.LoadHeadRepo(ctx); err != nil { | 	tmpBasePath, err := createTemporaryRepo(ctx, pr) | ||||||
| 		return false, fmt.Errorf("LoadHeadRepo: %w", err) | 	if err != nil { | ||||||
| 	} else if pr.HeadRepo == nil { | 		log.Error("CreateTemporaryRepo: %v", err) | ||||||
| 		// corrupt data assumed changed | 		return false, err | ||||||
| 		return true, nil |  | ||||||
| 	} | 	} | ||||||
|  | 	defer func() { | ||||||
|  | 		if err := repo_module.RemoveTemporaryPath(tmpBasePath); err != nil { | ||||||
|  | 			log.Error("checkIfPRContentChanged: RemoveTemporaryPath: %s", err) | ||||||
|  | 		} | ||||||
|  | 	}() | ||||||
|  |  | ||||||
| 	if err = pr.LoadBaseRepo(ctx); err != nil { | 	tmpRepo, err := git.OpenRepository(ctx, tmpBasePath) | ||||||
| 		return false, fmt.Errorf("LoadBaseRepo: %w", err) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	headGitRepo, err := git.OpenRepository(ctx, pr.HeadRepo.RepoPath()) |  | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return false, fmt.Errorf("OpenRepository: %w", err) | 		return false, fmt.Errorf("OpenRepository: %w", err) | ||||||
| 	} | 	} | ||||||
| 	defer headGitRepo.Close() | 	defer tmpRepo.Close() | ||||||
|  |  | ||||||
| 	// Add a temporary remote. | 	// Find the merge-base | ||||||
| 	tmpRemote := "checkIfPRContentChanged-" + fmt.Sprint(time.Now().UnixNano()) | 	_, base, err := tmpRepo.GetMergeBase("", "base", "tracking") | ||||||
| 	if err = headGitRepo.AddRemote(tmpRemote, pr.BaseRepo.RepoPath(), true); err != nil { |  | ||||||
| 		return false, fmt.Errorf("AddRemote: %s/%s-%s: %w", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err) |  | ||||||
| 	} |  | ||||||
| 	defer func() { |  | ||||||
| 		if err := headGitRepo.RemoveRemote(tmpRemote); err != nil { |  | ||||||
| 			log.Error("checkIfPRContentChanged: RemoveRemote: %s/%s-%s: %v", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err) |  | ||||||
| 		} |  | ||||||
| 	}() |  | ||||||
| 	// To synchronize repo and get a base ref |  | ||||||
| 	_, base, err := headGitRepo.GetMergeBase(tmpRemote, pr.BaseBranch, pr.HeadBranch) |  | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return false, fmt.Errorf("GetMergeBase: %w", err) | 		return false, fmt.Errorf("GetMergeBase: %w", err) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	diffBefore := &bytes.Buffer{} | 	cmd := git.NewCommand(ctx, "diff", "--name-only", "-z").AddDynamicArguments(newCommitID, oldCommitID, base) | ||||||
| 	diffAfter := &bytes.Buffer{} | 	stdoutReader, stdoutWriter, err := os.Pipe() | ||||||
| 	if err := headGitRepo.GetDiffFromMergeBase(base, oldCommitID, diffBefore); err != nil { | 	if err != nil { | ||||||
| 		// If old commit not found, assume changed. | 		return false, fmt.Errorf("unable to open pipe for to run diff: %w", err) | ||||||
| 		log.Debug("GetDiffFromMergeBase: %v", err) |  | ||||||
| 		return true, nil |  | ||||||
| 	} |  | ||||||
| 	if err := headGitRepo.GetDiffFromMergeBase(base, newCommitID, diffAfter); err != nil { |  | ||||||
| 		// New commit should be found |  | ||||||
| 		return false, fmt.Errorf("GetDiffFromMergeBase: %w", err) |  | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	diffBeforeLines := bufio.NewScanner(diffBefore) | 	if err := cmd.Run(&git.RunOpts{ | ||||||
| 	diffAfterLines := bufio.NewScanner(diffAfter) | 		Dir:    tmpBasePath, | ||||||
|  | 		Stdout: stdoutWriter, | ||||||
| 	for diffBeforeLines.Scan() && diffAfterLines.Scan() { | 		PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error { | ||||||
| 		if strings.HasPrefix(diffBeforeLines.Text(), "index") && strings.HasPrefix(diffAfterLines.Text(), "index") { | 			_ = stdoutWriter.Close() | ||||||
| 			// file hashes can change without the diff changing | 			defer func() { | ||||||
| 			continue | 				_ = stdoutReader.Close() | ||||||
| 		} else if strings.HasPrefix(diffBeforeLines.Text(), "@@") && strings.HasPrefix(diffAfterLines.Text(), "@@") { | 			}() | ||||||
| 			// the location of the difference may change | 			return util.IsEmptyReader(stdoutReader) | ||||||
| 			continue | 		}, | ||||||
| 		} else if !bytes.Equal(diffBeforeLines.Bytes(), diffAfterLines.Bytes()) { | 	}); err != nil { | ||||||
|  | 		if err == util.ErrNotEmpty { | ||||||
| 			return true, nil | 			return true, nil | ||||||
| 		} | 		} | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	if diffBeforeLines.Scan() || diffAfterLines.Scan() { | 		log.Error("Unable to run diff on %s %s %s in tempRepo for PR[%d]%s/%s...%s/%s: Error: %v", | ||||||
| 		// Diffs not of equal length | 			newCommitID, oldCommitID, base, | ||||||
| 		return true, nil | 			pr.ID, pr.BaseRepo.FullName(), pr.BaseBranch, pr.HeadRepo.FullName(), pr.HeadBranch, | ||||||
|  | 			err) | ||||||
|  |  | ||||||
|  | 		return false, fmt.Errorf("Unable to run git diff --name-only -z %s %s %s: %w", newCommitID, oldCommitID, base, err) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return false, nil | 	return false, nil | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user