mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-29 10:57:44 +09:00 
			
		
		
		
	Use cat-file --batch in GetLanguageStats (#14685)
* Use cat-file --batch in GetLanguageStats This PR moves to using a single cat-file --batch in GetLanguageStats significantly reducing the number of processes spawned during language stat processing. Signed-off-by: Andrew Thornton <art27@cantab.net> * placate lint Signed-off-by: Andrew Thornton <art27@cantab.net> * Update modules/git/repo_language_stats_nogogit.go Co-authored-by: a1012112796 <1012112796@qq.com> Co-authored-by: Lauris BH <lauris@nix.lv> Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: a1012112796 <1012112796@qq.com> Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
This commit is contained in:
		| @@ -7,9 +7,11 @@ | |||||||
| package git | package git | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"bufio" | ||||||
| 	"bytes" | 	"bytes" | ||||||
| 	"io" | 	"io" | ||||||
| 	"io/ioutil" | 	"math" | ||||||
|  | 	"strings" | ||||||
|  |  | ||||||
| 	"code.gitea.io/gitea/modules/analyze" | 	"code.gitea.io/gitea/modules/analyze" | ||||||
|  |  | ||||||
| @@ -18,16 +20,60 @@ import ( | |||||||
|  |  | ||||||
| // GetLanguageStats calculates language stats for git repository at specified commit | // GetLanguageStats calculates language stats for git repository at specified commit | ||||||
| func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { | func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { | ||||||
| 	// FIXME: We can be more efficient here... | 	// We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. | ||||||
| 	// | 	// so let's create a batch stdin and stdout | ||||||
| 	// We're expecting that we will be reading a lot of blobs and the trees |  | ||||||
| 	// Thus we should use a shared `cat-file --batch` to get all of this data | 	batchStdinReader, batchStdinWriter := io.Pipe() | ||||||
| 	// And keep the buffers around with resets as necessary. | 	batchStdoutReader, batchStdoutWriter := io.Pipe() | ||||||
| 	// | 	defer func() { | ||||||
| 	// It's more complicated so... | 		_ = batchStdinReader.Close() | ||||||
| 	commit, err := repo.GetCommit(commitID) | 		_ = batchStdinWriter.Close() | ||||||
|  | 		_ = batchStdoutReader.Close() | ||||||
|  | 		_ = batchStdoutWriter.Close() | ||||||
|  | 	}() | ||||||
|  |  | ||||||
|  | 	go func() { | ||||||
|  | 		stderr := strings.Builder{} | ||||||
|  | 		err := NewCommand("cat-file", "--batch").RunInDirFullPipeline(repo.Path, batchStdoutWriter, &stderr, batchStdinReader) | ||||||
|  | 		if err != nil { | ||||||
|  | 			_ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String())) | ||||||
|  | 			_ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String())) | ||||||
|  | 		} else { | ||||||
|  | 			_ = batchStdoutWriter.Close() | ||||||
|  | 			_ = batchStdinReader.Close() | ||||||
|  | 		} | ||||||
|  | 	}() | ||||||
|  |  | ||||||
|  | 	// For simplicities sake we'll us a buffered reader | ||||||
|  | 	batchReader := bufio.NewReader(batchStdoutReader) | ||||||
|  |  | ||||||
|  | 	writeID := func(id string) error { | ||||||
|  | 		_, err := batchStdinWriter.Write([]byte(id)) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return err | ||||||
|  | 		} | ||||||
|  | 		_, err = batchStdinWriter.Write([]byte{'\n'}) | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if err := writeID(commitID); err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  | 	shaBytes, typ, size, err := ReadBatchLine(batchReader) | ||||||
|  | 	if typ != "commit" { | ||||||
|  | 		log("Unable to get commit for: %s. Err: %v", commitID, err) | ||||||
|  | 		return nil, ErrNotExist{commitID, ""} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	sha, err := NewIDFromString(string(shaBytes)) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		log("Unable to get commit for: %s", commitID) | 		log("Unable to get commit for: %s. Err: %v", commitID, err) | ||||||
|  | 		return nil, ErrNotExist{commitID, ""} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	commit, err := CommitFromReader(repo, sha, io.LimitReader(batchReader, size)) | ||||||
|  | 	if err != nil { | ||||||
|  | 		log("Unable to get commit for: %s. Err: %v", commitID, err) | ||||||
| 		return nil, err | 		return nil, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -38,17 +84,45 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err | |||||||
| 		return nil, err | 		return nil, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	contentBuf := bytes.Buffer{} | ||||||
|  | 	var content []byte | ||||||
| 	sizes := make(map[string]int64) | 	sizes := make(map[string]int64) | ||||||
| 	for _, f := range entries { | 	for _, f := range entries { | ||||||
|  | 		contentBuf.Reset() | ||||||
|  | 		content = contentBuf.Bytes() | ||||||
| 		if f.Size() == 0 || enry.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) || | 		if f.Size() == 0 || enry.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) || | ||||||
| 			enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) { | 			enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) { | ||||||
| 			continue | 			continue | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		// If content can not be read or file is too big just do detection by filename | 		// If content can not be read or file is too big just do detection by filename | ||||||
| 		var content []byte |  | ||||||
| 		if f.Size() <= bigFileSize { | 		if f.Size() <= bigFileSize { | ||||||
| 			content, _ = readFile(f, fileSizeLimit) | 			if err := writeID(f.ID.String()); err != nil { | ||||||
|  | 				return nil, err | ||||||
|  | 			} | ||||||
|  | 			_, _, size, err := ReadBatchLine(batchReader) | ||||||
|  | 			if err != nil { | ||||||
|  | 				log("Error reading blob: %s Err: %v", f.ID.String(), err) | ||||||
|  | 				return nil, err | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			sizeToRead := size | ||||||
|  | 			discard := int64(0) | ||||||
|  | 			if size > fileSizeLimit { | ||||||
|  | 				sizeToRead = fileSizeLimit | ||||||
|  | 				discard = size - fileSizeLimit | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			_, err = contentBuf.ReadFrom(io.LimitReader(batchReader, sizeToRead)) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return nil, err | ||||||
|  | 			} | ||||||
|  | 			content = contentBuf.Bytes() | ||||||
|  | 			err = discardFull(batchReader, discard) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return nil, err | ||||||
|  | 			} | ||||||
| 		} | 		} | ||||||
| 		if enry.IsGenerated(f.Name(), content) { | 		if enry.IsGenerated(f.Name(), content) { | ||||||
| 			continue | 			continue | ||||||
| @@ -86,24 +160,20 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err | |||||||
| 	return sizes, nil | 	return sizes, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| func readFile(entry *TreeEntry, limit int64) ([]byte, error) { | func discardFull(rd *bufio.Reader, discard int64) error { | ||||||
| 	// FIXME: We can probably be a little more efficient here... see above | 	if discard > math.MaxInt32 { | ||||||
| 	r, err := entry.Blob().DataAsync() | 		n, err := rd.Discard(math.MaxInt32) | ||||||
| 	if err != nil { | 		discard -= int64(n) | ||||||
| 		return nil, err | 		if err != nil { | ||||||
|  | 			return err | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 	defer r.Close() | 	for discard > 0 { | ||||||
|  | 		n, err := rd.Discard(int(discard)) | ||||||
| 	if limit <= 0 { | 		discard -= int64(n) | ||||||
| 		return ioutil.ReadAll(r) | 		if err != nil { | ||||||
|  | 			return err | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
|  | 	return nil | ||||||
| 	size := entry.Size() |  | ||||||
| 	if limit > 0 && size > limit { |  | ||||||
| 		size = limit |  | ||||||
| 	} |  | ||||||
| 	buf := bytes.NewBuffer(nil) |  | ||||||
| 	buf.Grow(int(size)) |  | ||||||
| 	_, err = io.Copy(buf, io.LimitReader(r, limit)) |  | ||||||
| 	return buf.Bytes(), err |  | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user