git-lfs/lfs/gitscanner_catfilebatchcheck.go
brian m. carlson 95b83c5c1e
lfs: parse git cat-file --batch-check output for SHA-256
Currently, we have several hard-coded 40-based values here for the
length of an object ID.  Let's switch to finding the length of the
object ID, which will be the offset of the first space, and using that
as the length to compute other data from.  That will work for both SHA-1
and SHA-256, as well as any future hash algorithm that may be
implemented.
2020-07-29 20:53:43 +00:00

118 lines
2.6 KiB
Go

package lfs
import (
"bufio"
"fmt"
"io/ioutil"
"strconv"
"strings"
"github.com/git-lfs/git-lfs/git"
)
// runCatFileBatchCheck uses 'git cat-file --batch-check' to get the type and
// size of a git object. Any object that isn't of type blob and under the
// blobSizeCutoff will be ignored, unless it's a locked file. revs is a channel
// over which strings containing git sha1s will be sent. It returns a channel
// from which sha1 strings can be read.
func runCatFileBatchCheck(smallRevCh chan string, lockableCh chan string, lockableSet *lockableNameSet, revs *StringChannelWrapper, errCh chan error) error {
cmd, err := git.CatFile()
if err != nil {
return err
}
go func() {
scanner := &catFileBatchCheckScanner{s: bufio.NewScanner(cmd.Stdout), limit: blobSizeCutoff}
for r := range revs.Results {
cmd.Stdin.Write([]byte(r + "\n"))
hasNext := scanner.Scan()
if err := scanner.Err(); err != nil {
errCh <- err
} else if b := scanner.LFSBlobOID(); len(b) > 0 {
smallRevCh <- b
} else if b := scanner.GitBlobOID(); len(b) > 0 {
if name, ok := lockableSet.Check(b); ok {
lockableCh <- name
}
}
if !hasNext {
break
}
}
if err := revs.Wait(); err != nil {
errCh <- err
}
cmd.Stdin.Close()
stderr, _ := ioutil.ReadAll(cmd.Stderr)
err := cmd.Wait()
if err != nil {
errCh <- fmt.Errorf("error in git cat-file --batch-check: %v %v", err, string(stderr))
}
close(smallRevCh)
close(errCh)
}()
return nil
}
type catFileBatchCheckScanner struct {
s *bufio.Scanner
limit int
lfsBlobOID string
gitBlobOID string
}
func (s *catFileBatchCheckScanner) LFSBlobOID() string {
return s.lfsBlobOID
}
func (s *catFileBatchCheckScanner) GitBlobOID() string {
return s.gitBlobOID
}
func (s *catFileBatchCheckScanner) Err() error {
return s.s.Err()
}
func (s *catFileBatchCheckScanner) Scan() bool {
lfsBlobSha, gitBlobSha, hasNext := s.next()
s.lfsBlobOID = lfsBlobSha
s.gitBlobOID = gitBlobSha
return hasNext
}
func (s *catFileBatchCheckScanner) next() (string, string, bool) {
hasNext := s.s.Scan()
line := s.s.Text()
lineLen := len(line)
oidLen := strings.IndexByte(line, ' ')
// Format is:
// <hash> <type> <size>
// type is at a fixed spot, if we see that it's "blob", we can avoid
// splitting the line just to get the size.
if oidLen == -1 || lineLen < oidLen+6 {
return "", "", hasNext
}
if line[oidLen+1:oidLen+5] != "blob" {
return "", "", hasNext
}
size, err := strconv.Atoi(line[oidLen+6 : lineLen])
if err != nil {
return "", "", hasNext
}
blobSha := line[0:oidLen]
if size >= s.limit {
return "", blobSha, hasNext
}
return blobSha, "", hasNext
}