2016-11-17 22:54:05 +00:00
|
|
|
package lfs
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"bytes"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"io/ioutil"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
2016-12-06 23:52:28 +00:00
|
|
|
|
2019-08-09 15:25:23 +00:00
|
|
|
"github.com/git-lfs/git-lfs/config"
|
2016-12-06 23:52:28 +00:00
|
|
|
"github.com/git-lfs/git-lfs/filepathfilter"
|
2017-08-21 10:16:10 +00:00
|
|
|
"github.com/git-lfs/git-lfs/git"
|
2016-11-17 22:54:05 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// An entry from ls-tree or rev-list including a blob sha and tree path
|
|
|
|
type TreeBlob struct {
|
|
|
|
Sha1 string
|
|
|
|
Filename string
|
|
|
|
}
|
|
|
|
|
2019-08-09 15:25:23 +00:00
|
|
|
func runScanTree(cb GitScannerFoundPointer, ref string, filter *filepathfilter.Filter, osEnv config.Environment) error {
|
2016-11-17 22:54:05 +00:00
|
|
|
// We don't use the nameMap approach here since that's imprecise when >1 file
|
|
|
|
// can be using the same content
|
2016-12-06 23:52:28 +00:00
|
|
|
treeShas, err := lsTreeBlobs(ref, filter)
|
2016-11-17 22:54:05 +00:00
|
|
|
if err != nil {
|
2016-11-29 17:56:03 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2019-08-09 15:25:23 +00:00
|
|
|
pcw, err := catFileBatchTree(treeShas, osEnv)
|
2016-11-29 17:56:03 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
2016-11-17 22:54:05 +00:00
|
|
|
}
|
|
|
|
|
2016-11-29 17:56:03 +00:00
|
|
|
for p := range pcw.Results {
|
|
|
|
cb(p, nil)
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := pcw.Wait(); err != nil {
|
|
|
|
cb(nil, err)
|
|
|
|
}
|
|
|
|
return nil
|
2016-11-17 22:54:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// catFileBatchTree uses git cat-file --batch to get the object contents
|
|
|
|
// of a git object, given its sha1. The contents will be decoded into
|
|
|
|
// a Git LFS pointer. treeblobs is a channel over which blob entries
|
|
|
|
// will be sent. It returns a channel from which point.Pointers can be read.
|
2019-08-09 15:25:23 +00:00
|
|
|
func catFileBatchTree(treeblobs *TreeBlobChannelWrapper, osEnv config.Environment) (*PointerChannelWrapper, error) {
|
|
|
|
scanner, err := NewPointerScanner(osEnv)
|
2016-11-17 22:54:05 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
pointers := make(chan *WrappedPointer, chanBufSize)
|
|
|
|
errchan := make(chan error, 10) // Multiple errors possible
|
|
|
|
|
|
|
|
go func() {
|
2018-11-08 19:12:03 +00:00
|
|
|
hasNext := true
|
2016-11-17 22:54:05 +00:00
|
|
|
for t := range treeblobs.Results {
|
2018-11-08 19:12:03 +00:00
|
|
|
hasNext = scanner.Scan(t.Sha1)
|
|
|
|
|
2016-11-19 00:00:57 +00:00
|
|
|
if p := scanner.Pointer(); p != nil {
|
2016-11-18 21:20:51 +00:00
|
|
|
p.Name = t.Filename
|
|
|
|
pointers <- p
|
2016-11-17 22:54:05 +00:00
|
|
|
}
|
2016-11-18 22:26:29 +00:00
|
|
|
|
2016-11-19 00:00:57 +00:00
|
|
|
if err := scanner.Err(); err != nil {
|
2016-11-18 22:26:29 +00:00
|
|
|
errchan <- err
|
|
|
|
}
|
|
|
|
|
|
|
|
if !hasNext {
|
|
|
|
break
|
|
|
|
}
|
2016-11-17 22:54:05 +00:00
|
|
|
}
|
2016-11-18 21:20:51 +00:00
|
|
|
|
2018-11-08 19:12:03 +00:00
|
|
|
// If the scanner quit early, we may still have treeblobs to
|
|
|
|
// read, so waiting for it to close will cause a deadlock.
|
|
|
|
if hasNext {
|
|
|
|
// Deal with nested error from incoming treeblobs
|
|
|
|
err := treeblobs.Wait()
|
|
|
|
if err != nil {
|
|
|
|
errchan <- err
|
|
|
|
}
|
2016-11-17 22:54:05 +00:00
|
|
|
}
|
|
|
|
|
2017-03-22 21:15:02 +00:00
|
|
|
if err = scanner.Close(); err != nil {
|
|
|
|
errchan <- err
|
2016-11-17 22:54:05 +00:00
|
|
|
}
|
2017-03-22 21:15:02 +00:00
|
|
|
|
2016-11-17 22:54:05 +00:00
|
|
|
close(pointers)
|
|
|
|
close(errchan)
|
|
|
|
}()
|
|
|
|
|
|
|
|
return NewPointerChannelWrapper(pointers, errchan), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use ls-tree at ref to find a list of candidate tree blobs which might be lfs files
|
|
|
|
// The returned channel will be sent these blobs which should be sent to catFileBatchTree
|
|
|
|
// for final check & conversion to Pointer
|
2016-12-06 23:52:28 +00:00
|
|
|
func lsTreeBlobs(ref string, filter *filepathfilter.Filter) (*TreeBlobChannelWrapper, error) {
|
2017-08-21 10:16:10 +00:00
|
|
|
cmd, err := git.LsTree(ref)
|
2016-11-17 22:54:05 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
cmd.Stdin.Close()
|
|
|
|
|
|
|
|
blobs := make(chan TreeBlob, chanBufSize)
|
|
|
|
errchan := make(chan error, 1)
|
|
|
|
|
|
|
|
go func() {
|
2016-11-19 00:19:45 +00:00
|
|
|
scanner := newLsTreeScanner(cmd.Stdout)
|
|
|
|
for scanner.Scan() {
|
2016-12-06 23:52:28 +00:00
|
|
|
if t := scanner.TreeBlob(); t != nil && filter.Allows(t.Filename) {
|
2016-11-19 00:19:45 +00:00
|
|
|
blobs <- *t
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-11-17 22:54:05 +00:00
|
|
|
stderr, _ := ioutil.ReadAll(cmd.Stderr)
|
|
|
|
err := cmd.Wait()
|
|
|
|
if err != nil {
|
|
|
|
errchan <- fmt.Errorf("Error in git ls-tree: %v %v", err, string(stderr))
|
|
|
|
}
|
|
|
|
close(blobs)
|
|
|
|
close(errchan)
|
|
|
|
}()
|
|
|
|
|
|
|
|
return NewTreeBlobChannelWrapper(blobs, errchan), nil
|
|
|
|
}
|
|
|
|
|
2016-11-19 00:19:45 +00:00
|
|
|
type lsTreeScanner struct {
|
|
|
|
s *bufio.Scanner
|
|
|
|
tree *TreeBlob
|
|
|
|
}
|
2016-11-17 22:54:05 +00:00
|
|
|
|
2016-11-19 00:19:45 +00:00
|
|
|
func newLsTreeScanner(r io.Reader) *lsTreeScanner {
|
|
|
|
s := bufio.NewScanner(r)
|
|
|
|
s.Split(scanNullLines)
|
|
|
|
return &lsTreeScanner{s: s}
|
|
|
|
}
|
2016-11-17 22:54:05 +00:00
|
|
|
|
2016-11-19 00:19:45 +00:00
|
|
|
func (s *lsTreeScanner) TreeBlob() *TreeBlob {
|
|
|
|
return s.tree
|
|
|
|
}
|
2016-11-17 22:54:05 +00:00
|
|
|
|
2016-11-19 00:19:45 +00:00
|
|
|
func (s *lsTreeScanner) Err() error {
|
|
|
|
return nil
|
|
|
|
}
|
2016-11-17 22:54:05 +00:00
|
|
|
|
2016-11-19 00:19:45 +00:00
|
|
|
func (s *lsTreeScanner) Scan() bool {
|
|
|
|
t, hasNext := s.next()
|
|
|
|
s.tree = t
|
|
|
|
return hasNext
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *lsTreeScanner) next() (*TreeBlob, bool) {
|
|
|
|
hasNext := s.s.Scan()
|
|
|
|
line := s.s.Text()
|
|
|
|
parts := strings.SplitN(line, "\t", 2)
|
|
|
|
if len(parts) < 2 {
|
|
|
|
return nil, hasNext
|
|
|
|
}
|
|
|
|
|
|
|
|
attrs := strings.SplitN(parts[0], " ", 4)
|
|
|
|
if len(attrs) < 4 {
|
|
|
|
return nil, hasNext
|
|
|
|
}
|
|
|
|
|
|
|
|
if attrs[1] != "blob" {
|
|
|
|
return nil, hasNext
|
|
|
|
}
|
|
|
|
|
|
|
|
sz, err := strconv.ParseInt(strings.TrimSpace(attrs[3]), 10, 64)
|
|
|
|
if err != nil {
|
|
|
|
return nil, hasNext
|
|
|
|
}
|
|
|
|
|
|
|
|
if sz < blobSizeCutoff {
|
|
|
|
sha1 := attrs[2]
|
|
|
|
filename := parts[1]
|
|
|
|
return &TreeBlob{Sha1: sha1, Filename: filename}, hasNext
|
2016-11-17 22:54:05 +00:00
|
|
|
}
|
2016-11-19 00:19:45 +00:00
|
|
|
return nil, hasNext
|
2016-11-17 22:54:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func scanNullLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
|
|
|
if atEOF && len(data) == 0 {
|
|
|
|
return 0, nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if i := bytes.IndexByte(data, '\000'); i >= 0 {
|
|
|
|
// We have a full null-terminated line.
|
|
|
|
return i + 1, data[0:i], nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we're at EOF, we have a final, non-terminated line. Return it.
|
|
|
|
if atEOF {
|
|
|
|
return len(data), data, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Request more data.
|
|
|
|
return 0, nil, nil
|
|
|
|
}
|