git-lfs/lfs/gitscanner_catfilebatch.go
Chris Darroch 7f9b3d7f76 lfs: make all blob size checks consistent
The blobSizeCutoff limit is defined such that "any file
with a size below this cutoff will be scanned" when looking
for LFS pointer blobs.  In general this is true, but there
are few instances where the check is applied such that blobs
matching the cutoff limit are also scanned.

We therefore adjust these to match the same comparison logic
used elsewhere with this limit.

Two of these checks are in the PointerScanner.next() method
used by the runCatFileBatch() function; they were introduced
in commit 844c0b0db2bf3d39f52773e5f63ee10683a28342 in
PR #2070 and scan Git objects for eligible LFS pointers
when reading the output of "git cat-file --batch".  Note
that the corresponding runCatFileBatchCheck() function's
catFileBatchCheckScanner.next() method excludes blobs whose
size is above or equal to the limit (with a default limit of
blobSizeCutoff); this aligns with other usage and checks
of the blobSizeCutoff maximum as well.

The other check is in the DecodePointerFromFile()
function where it is used to exclude from consideration as
Git LFS pointers any files larger than blobSizeCutoff.
This function is ultimately called by the "git lfs checkout"
command; the original implementation was added in commit
001ddcd7cfae528a6594be9bab4562c6659a00e7 in PR #527.
2021-03-11 13:08:19 -08:00

169 lines
3.6 KiB
Go

package lfs
import (
"bytes"
"crypto/sha256"
"fmt"
"io"
"github.com/git-lfs/git-lfs/config"
"github.com/git-lfs/git-lfs/git"
)
// runCatFileBatch uses 'git cat-file --batch' to get the object contents of a
// git object, given its sha1. The contents will be decoded into a Git LFS
// pointer. Git Blob SHA1s are read from the sha1Ch channel and fed to STDIN.
// Results are parsed from STDOUT, and any eligible LFS pointers are sent to
// pointerCh. If a Git Blob is not an LFS pointer, check the lockableSet to see
// if that blob is for a locked file. Any errors are sent to errCh. An error is
// returned if the 'git cat-file' command fails to start.
func runCatFileBatch(pointerCh chan *WrappedPointer, lockableCh chan string, lockableSet *lockableNameSet, revs *StringChannelWrapper, errCh chan error, gitEnv, osEnv config.Environment) error {
scanner, err := NewPointerScanner(gitEnv, osEnv)
if err != nil {
return err
}
go func() {
canScan := true
for r := range revs.Results {
canScan = scanner.Scan(r)
if err := scanner.Err(); err != nil {
errCh <- err
} else if p := scanner.Pointer(); p != nil {
pointerCh <- p
} else if b := scanner.BlobSHA(); git.HasValidObjectIDLength(b) {
if name, ok := lockableSet.Check(b); ok {
lockableCh <- name
}
}
if !canScan {
break
}
}
if canScan {
if err := revs.Wait(); err != nil {
errCh <- err
}
}
if err := scanner.Close(); err != nil {
errCh <- err
}
close(pointerCh)
close(errCh)
close(lockableCh)
}()
return nil
}
type PointerScanner struct {
scanner *git.ObjectScanner
blobSha string
contentsSha string
pointer *WrappedPointer
err error
}
func NewPointerScanner(gitEnv, osEnv config.Environment) (*PointerScanner, error) {
scanner, err := git.NewObjectScanner(gitEnv, osEnv)
if err != nil {
return nil, err
}
return &PointerScanner{scanner: scanner}, nil
}
func (s *PointerScanner) BlobSHA() string {
return s.blobSha
}
func (s *PointerScanner) ContentsSha() string {
return s.contentsSha
}
func (s *PointerScanner) Pointer() *WrappedPointer {
return s.pointer
}
func (s *PointerScanner) Err() error {
return s.err
}
func (s *PointerScanner) Scan(sha string) bool {
s.pointer, s.err = nil, nil
s.blobSha, s.contentsSha = "", ""
b, c, p, err := s.next(sha)
s.blobSha = b
s.contentsSha = c
s.pointer = p
if err != nil {
if err != io.EOF {
s.err = err
}
return false
}
return true
}
func (s *PointerScanner) Close() error {
return s.scanner.Close()
}
func (s *PointerScanner) next(blob string) (string, string, *WrappedPointer, error) {
if !s.scanner.Scan(blob) {
if err := s.scanner.Err(); err != nil {
return "", "", nil, err
}
return "", "", nil, io.EOF
}
blobSha := s.scanner.Sha1()
size := s.scanner.Size()
sha := sha256.New()
var buf *bytes.Buffer
var to io.Writer = sha
if size < blobSizeCutoff {
buf = bytes.NewBuffer(make([]byte, 0, size))
to = io.MultiWriter(to, buf)
}
read, err := io.CopyN(to, s.scanner.Contents(), int64(size))
if err != nil {
return blobSha, "", nil, err
}
if int64(size) != read {
return blobSha, "", nil, fmt.Errorf("expected %d bytes, read %d bytes", size, read)
}
var pointer *WrappedPointer
var contentsSha string
if size < blobSizeCutoff {
if p, err := DecodePointer(bytes.NewReader(buf.Bytes())); err != nil {
contentsSha = fmt.Sprintf("%x", sha.Sum(nil))
} else {
pointer = &WrappedPointer{
Sha1: blobSha,
Pointer: p,
}
contentsSha = p.Oid
}
} else {
contentsSha = fmt.Sprintf("%x", sha.Sum(nil))
}
return blobSha, contentsSha, pointer, err
}