refactor: simplify ParseCatFileTreeLine and catBatchParseTreeEntries (#37210)

Simplify ParseCatFileTreeLine: it is faster without the preset buffers,
and easier to read and maintain.

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: wxiaoguang <2114189+wxiaoguang@users.noreply.github.com>
Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
Copilot
2026-04-14 12:03:26 +00:00
committed by GitHub
parent b55528b1a2
commit 84d5c99e64
6 changed files with 84 additions and 107 deletions

View File

@@ -10,6 +10,7 @@ import (
"errors"
"io"
"math"
"slices"
"strconv"
"strings"
"sync/atomic"
@@ -172,77 +173,46 @@ headerLoop:
return id, DiscardFull(rd, size-n+1)
}
// git tree files are a list:
// <mode-in-ascii> SP <fname> NUL <binary Hash>
//
// Unfortunately this 20-byte notation is somewhat in conflict to all other git tools
// Therefore we need some method to convert these binary hashes to hex hashes
// ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream
// This carefully avoids allocations - except where fnameBuf is too small.
// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
//
// Each line is composed of:
// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <binary HASH>
//
// We don't attempt to convert the raw HASH to save a lot of time
func ParseCatFileTreeLine(objectFormat ObjectFormat, rd BufferedReader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
var readBytes []byte
// Read the Mode & fname
readBytes, err = rd.ReadSlice('\x00')
if err != nil {
return mode, fname, sha, n, err
}
idx := bytes.IndexByte(readBytes, ' ')
if idx < 0 {
log.Debug("missing space in readBytes ParseCatFileTreeLine: %s", readBytes)
return mode, fname, sha, n, &ErrNotExist{}
}
n += idx + 1
copy(modeBuf, readBytes[:idx])
if len(modeBuf) >= idx {
modeBuf = modeBuf[:idx]
} else {
modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
}
mode = modeBuf
readBytes = readBytes[idx+1:]
// Deal with the fname
copy(fnameBuf, readBytes)
if len(fnameBuf) > len(readBytes) {
fnameBuf = fnameBuf[:len(readBytes)]
} else {
fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...)
}
for err == bufio.ErrBufferFull {
readBytes, err = rd.ReadSlice('\x00')
fnameBuf = append(fnameBuf, readBytes...)
}
n += len(fnameBuf)
if err != nil {
return mode, fname, sha, n, err
}
fnameBuf = fnameBuf[:len(fnameBuf)-1]
fname = fnameBuf
// Deal with the binary hash
idx = 0
length := objectFormat.FullLength() / 2
for idx < length {
var read int
read, err = rd.Read(shaBuf[idx:length])
n += read
if err != nil {
return mode, fname, sha, n, err
// Each entry is composed of:
// <mode-in-ascii-dropping-initial-zeros> SP <name> NUL <binary-hash>
func ParseCatFileTreeLine(objectFormat ObjectFormat, rd BufferedReader) (mode EntryMode, name string, objID ObjectID, n int, err error) {
// use the in-buffer memory as much as possible to avoid extra allocations
bufBytes, err := rd.ReadSlice('\x00')
const maxEntryInfoBytes = 1024 * 1024
if errors.Is(err, bufio.ErrBufferFull) {
bufBytes = slices.Clone(bufBytes)
for len(bufBytes) < maxEntryInfoBytes && errors.Is(err, bufio.ErrBufferFull) {
var tmp []byte
tmp, err = rd.ReadSlice('\x00')
bufBytes = append(bufBytes, tmp...)
}
idx += read
}
sha = shaBuf
return mode, fname, sha, n, err
if err != nil {
return mode, name, objID, len(bufBytes), err
}
idx := bytes.IndexByte(bufBytes, ' ')
if idx < 0 {
return mode, name, objID, len(bufBytes), errors.New("invalid CatFileTreeLine output")
}
mode = ParseEntryMode(util.UnsafeBytesToString(bufBytes[:idx]))
name = string(bufBytes[idx+1 : len(bufBytes)-1]) // trim the NUL terminator, it needs a copy because the bufBytes will be reused by the reader
if mode == EntryModeNoEntry {
return mode, name, objID, len(bufBytes), errors.New("invalid entry mode: " + string(bufBytes[:idx]))
}
switch objectFormat {
case Sha1ObjectFormat:
objID = &Sha1Hash{}
case Sha256ObjectFormat:
objID = &Sha256Hash{}
default:
panic("unsupported object format: " + objectFormat.Name())
}
readIDLen, err := io.ReadFull(rd, objID.RawValue())
return mode, name, objID, len(bufBytes) + readIDLen, err
}
func DiscardFull(rd BufferedReader, discard int64) error {