mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-29 10:57:44 +09:00 
			
		
		
		
	Improve checkIfPRContentChanged (#22611)
The code for checking if a commit has caused a change in a PR is extremely inefficient and affects the head repository instead of using a temporary repository. This PR therefore makes several significant improvements: * A temporary repo like that used in merging. * The diff code is then significant improved to use a three-way diff instead of comparing diffs (possibly binary) line-by-line - in memory... Ref #22578 Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
		| @@ -4,14 +4,12 @@ | ||||
| package pull | ||||
|  | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"bytes" | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"os" | ||||
| 	"regexp" | ||||
| 	"strings" | ||||
| 	"time" | ||||
|  | ||||
| 	"code.gitea.io/gitea/models" | ||||
| 	"code.gitea.io/gitea/models/db" | ||||
| @@ -29,6 +27,7 @@ import ( | ||||
| 	repo_module "code.gitea.io/gitea/modules/repository" | ||||
| 	"code.gitea.io/gitea/modules/setting" | ||||
| 	"code.gitea.io/gitea/modules/sync" | ||||
| 	"code.gitea.io/gitea/modules/util" | ||||
| 	issue_service "code.gitea.io/gitea/services/issue" | ||||
| ) | ||||
|  | ||||
| @@ -351,69 +350,56 @@ func AddTestPullRequestTask(doer *user_model.User, repoID int64, branch string, | ||||
| // checkIfPRContentChanged checks if diff to target branch has changed by push | ||||
| // A commit can be considered to leave the PR untouched if the patch/diff with its merge base is unchanged | ||||
| func checkIfPRContentChanged(ctx context.Context, pr *issues_model.PullRequest, oldCommitID, newCommitID string) (hasChanged bool, err error) { | ||||
| 	if err = pr.LoadHeadRepo(ctx); err != nil { | ||||
| 		return false, fmt.Errorf("LoadHeadRepo: %w", err) | ||||
| 	} else if pr.HeadRepo == nil { | ||||
| 		// corrupt data assumed changed | ||||
| 		return true, nil | ||||
| 	tmpBasePath, err := createTemporaryRepo(ctx, pr) | ||||
| 	if err != nil { | ||||
| 		log.Error("CreateTemporaryRepo: %v", err) | ||||
| 		return false, err | ||||
| 	} | ||||
| 	defer func() { | ||||
| 		if err := repo_module.RemoveTemporaryPath(tmpBasePath); err != nil { | ||||
| 			log.Error("checkIfPRContentChanged: RemoveTemporaryPath: %s", err) | ||||
| 		} | ||||
| 	}() | ||||
|  | ||||
| 	if err = pr.LoadBaseRepo(ctx); err != nil { | ||||
| 		return false, fmt.Errorf("LoadBaseRepo: %w", err) | ||||
| 	} | ||||
|  | ||||
| 	headGitRepo, err := git.OpenRepository(ctx, pr.HeadRepo.RepoPath()) | ||||
| 	tmpRepo, err := git.OpenRepository(ctx, tmpBasePath) | ||||
| 	if err != nil { | ||||
| 		return false, fmt.Errorf("OpenRepository: %w", err) | ||||
| 	} | ||||
| 	defer headGitRepo.Close() | ||||
| 	defer tmpRepo.Close() | ||||
|  | ||||
| 	// Add a temporary remote. | ||||
| 	tmpRemote := "checkIfPRContentChanged-" + fmt.Sprint(time.Now().UnixNano()) | ||||
| 	if err = headGitRepo.AddRemote(tmpRemote, pr.BaseRepo.RepoPath(), true); err != nil { | ||||
| 		return false, fmt.Errorf("AddRemote: %s/%s-%s: %w", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err) | ||||
| 	} | ||||
| 	defer func() { | ||||
| 		if err := headGitRepo.RemoveRemote(tmpRemote); err != nil { | ||||
| 			log.Error("checkIfPRContentChanged: RemoveRemote: %s/%s-%s: %v", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err) | ||||
| 		} | ||||
| 	}() | ||||
| 	// To synchronize repo and get a base ref | ||||
| 	_, base, err := headGitRepo.GetMergeBase(tmpRemote, pr.BaseBranch, pr.HeadBranch) | ||||
| 	// Find the merge-base | ||||
| 	_, base, err := tmpRepo.GetMergeBase("", "base", "tracking") | ||||
| 	if err != nil { | ||||
| 		return false, fmt.Errorf("GetMergeBase: %w", err) | ||||
| 	} | ||||
|  | ||||
| 	diffBefore := &bytes.Buffer{} | ||||
| 	diffAfter := &bytes.Buffer{} | ||||
| 	if err := headGitRepo.GetDiffFromMergeBase(base, oldCommitID, diffBefore); err != nil { | ||||
| 		// If old commit not found, assume changed. | ||||
| 		log.Debug("GetDiffFromMergeBase: %v", err) | ||||
| 		return true, nil | ||||
| 	} | ||||
| 	if err := headGitRepo.GetDiffFromMergeBase(base, newCommitID, diffAfter); err != nil { | ||||
| 		// New commit should be found | ||||
| 		return false, fmt.Errorf("GetDiffFromMergeBase: %w", err) | ||||
| 	cmd := git.NewCommand(ctx, "diff", "--name-only", "-z").AddDynamicArguments(newCommitID, oldCommitID, base) | ||||
| 	stdoutReader, stdoutWriter, err := os.Pipe() | ||||
| 	if err != nil { | ||||
| 		return false, fmt.Errorf("unable to open pipe for to run diff: %w", err) | ||||
| 	} | ||||
|  | ||||
| 	diffBeforeLines := bufio.NewScanner(diffBefore) | ||||
| 	diffAfterLines := bufio.NewScanner(diffAfter) | ||||
|  | ||||
| 	for diffBeforeLines.Scan() && diffAfterLines.Scan() { | ||||
| 		if strings.HasPrefix(diffBeforeLines.Text(), "index") && strings.HasPrefix(diffAfterLines.Text(), "index") { | ||||
| 			// file hashes can change without the diff changing | ||||
| 			continue | ||||
| 		} else if strings.HasPrefix(diffBeforeLines.Text(), "@@") && strings.HasPrefix(diffAfterLines.Text(), "@@") { | ||||
| 			// the location of the difference may change | ||||
| 			continue | ||||
| 		} else if !bytes.Equal(diffBeforeLines.Bytes(), diffAfterLines.Bytes()) { | ||||
| 	if err := cmd.Run(&git.RunOpts{ | ||||
| 		Dir:    tmpBasePath, | ||||
| 		Stdout: stdoutWriter, | ||||
| 		PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error { | ||||
| 			_ = stdoutWriter.Close() | ||||
| 			defer func() { | ||||
| 				_ = stdoutReader.Close() | ||||
| 			}() | ||||
| 			return util.IsEmptyReader(stdoutReader) | ||||
| 		}, | ||||
| 	}); err != nil { | ||||
| 		if err == util.ErrNotEmpty { | ||||
| 			return true, nil | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if diffBeforeLines.Scan() || diffAfterLines.Scan() { | ||||
| 		// Diffs not of equal length | ||||
| 		return true, nil | ||||
| 		log.Error("Unable to run diff on %s %s %s in tempRepo for PR[%d]%s/%s...%s/%s: Error: %v", | ||||
| 			newCommitID, oldCommitID, base, | ||||
| 			pr.ID, pr.BaseRepo.FullName(), pr.BaseBranch, pr.HeadRepo.FullName(), pr.HeadBranch, | ||||
| 			err) | ||||
|  | ||||
| 		return false, fmt.Errorf("Unable to run git diff --name-only -z %s %s %s: %w", newCommitID, oldCommitID, base, err) | ||||
| 	} | ||||
|  | ||||
| 	return false, nil | ||||
|   | ||||
		Reference in New Issue
	
	Block a user