2015-04-23 16:20:36 +00:00
|
|
|
package lfs
|
2014-10-03 16:08:00 +00:00
|
|
|
|
|
|
|
import (
|
2014-10-07 15:16:54 +00:00
|
|
|
"bufio"
|
2014-10-03 16:08:00 +00:00
|
|
|
"bytes"
|
2015-08-11 14:54:22 +00:00
|
|
|
"errors"
|
2015-08-21 14:19:16 +00:00
|
|
|
"fmt"
|
2014-10-07 15:16:54 +00:00
|
|
|
"io"
|
2016-03-31 11:50:00 +00:00
|
|
|
"io/ioutil"
|
2014-10-07 15:16:54 +00:00
|
|
|
"os/exec"
|
2014-10-20 18:49:15 +00:00
|
|
|
"regexp"
|
2014-10-03 16:08:00 +00:00
|
|
|
"strconv"
|
2014-10-07 17:22:16 +00:00
|
|
|
"strings"
|
2015-10-28 16:06:36 +00:00
|
|
|
"sync"
|
2014-10-07 17:22:16 +00:00
|
|
|
"time"
|
2015-05-13 19:43:41 +00:00
|
|
|
|
2015-08-21 14:19:16 +00:00
|
|
|
"github.com/github/git-lfs/git"
|
2016-07-07 16:16:13 +00:00
|
|
|
"github.com/github/git-lfs/tools"
|
2016-05-23 18:02:27 +00:00
|
|
|
"github.com/rubyist/tracerx"
|
2014-10-03 16:08:00 +00:00
|
|
|
)
|
|
|
|
|
2014-10-11 14:28:46 +00:00
|
|
|
const (
|
2015-03-19 19:30:55 +00:00
|
|
|
// blobSizeCutoff is used to determine which files to scan for Git LFS
|
|
|
|
// pointers. Any file with a size below this cutoff will be scanned.
|
2015-07-24 04:53:36 +00:00
|
|
|
blobSizeCutoff = 1024
|
2014-10-13 15:07:46 +00:00
|
|
|
|
|
|
|
// stdoutBufSize is the size of the buffers given to a sub-process stdout
|
|
|
|
stdoutBufSize = 16384
|
|
|
|
|
2015-03-19 19:30:55 +00:00
|
|
|
// chanBufSize is the size of the channels used to pass data from one
|
|
|
|
// sub-process to another.
|
2014-10-13 15:07:46 +00:00
|
|
|
chanBufSize = 100
|
2014-10-07 16:33:00 +00:00
|
|
|
)
|
2014-10-07 15:59:59 +00:00
|
|
|
|
2015-08-03 15:58:34 +00:00
|
|
|
var (
|
|
|
|
// Arguments to append to a git log call which will limit the output to
|
|
|
|
// lfs changes and format the output suitable for parseLogOutput.. method(s)
|
|
|
|
logLfsSearchArgs = []string{
|
|
|
|
"-G", "oid sha256:", // only diffs which include an lfs file SHA change
|
2015-08-05 09:46:14 +00:00
|
|
|
"-p", // include diff so we can read the SHA
|
2015-08-05 16:24:36 +00:00
|
|
|
"-U12", // Make sure diff context is always big enough to support 10 extension lines to get whole pointer
|
2015-08-03 15:58:34 +00:00
|
|
|
`--format=lfs-commit-sha: %H %P`, // just a predictable commit header we can detect
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2015-05-27 19:45:18 +00:00
|
|
|
// WrappedPointer wraps a pointer.Pointer and provides the git sha1
|
2014-10-11 14:28:46 +00:00
|
|
|
// and the file name associated with the object, taken from the
|
|
|
|
// rev-list output.
|
2015-05-27 19:45:18 +00:00
|
|
|
type WrappedPointer struct {
|
2014-10-27 20:40:21 +00:00
|
|
|
Sha1 string
|
|
|
|
Name string
|
|
|
|
SrcName string
|
|
|
|
Size int64
|
|
|
|
Status string
|
2015-04-23 16:20:36 +00:00
|
|
|
*Pointer
|
2014-10-07 17:05:09 +00:00
|
|
|
}
|
|
|
|
|
2014-10-28 16:11:20 +00:00
|
|
|
// indexFile is used when scanning the index. It stores the name of
|
|
|
|
// the file, the status of the file in the index, and, in the case of
|
|
|
|
// a moved or copied file, the original name of the file.
|
|
|
|
type indexFile struct {
|
|
|
|
Name string
|
|
|
|
SrcName string
|
|
|
|
Status string
|
|
|
|
}
|
|
|
|
|
2014-10-20 18:49:15 +00:00
|
|
|
var z40 = regexp.MustCompile(`\^?0{40}`)
|
|
|
|
|
2015-08-11 14:54:22 +00:00
|
|
|
type ScanningMode int
|
|
|
|
|
|
|
|
const (
|
|
|
|
ScanRefsMode = ScanningMode(iota) // 0 - or default scan mode
|
|
|
|
ScanAllMode = ScanningMode(iota)
|
|
|
|
ScanLeftToRemoteMode = ScanningMode(iota)
|
|
|
|
)
|
|
|
|
|
2015-07-05 18:18:03 +00:00
|
|
|
type ScanRefsOptions struct {
|
2015-08-11 14:54:22 +00:00
|
|
|
ScanMode ScanningMode
|
|
|
|
RemoteName string
|
2015-07-05 18:18:03 +00:00
|
|
|
SkipDeletedBlobs bool
|
|
|
|
nameMap map[string]string
|
2015-10-28 16:06:36 +00:00
|
|
|
mutex *sync.Mutex
|
|
|
|
}
|
|
|
|
|
|
|
|
func (o *ScanRefsOptions) GetName(sha string) (string, bool) {
|
|
|
|
o.mutex.Lock()
|
|
|
|
name, ok := o.nameMap[sha]
|
|
|
|
o.mutex.Unlock()
|
|
|
|
return name, ok
|
|
|
|
}
|
|
|
|
|
|
|
|
func (o *ScanRefsOptions) SetName(sha, name string) {
|
|
|
|
o.mutex.Lock()
|
|
|
|
o.nameMap[sha] = name
|
|
|
|
o.mutex.Unlock()
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewScanRefsOptions() *ScanRefsOptions {
|
|
|
|
return &ScanRefsOptions{
|
|
|
|
nameMap: make(map[string]string, 0),
|
|
|
|
mutex: &sync.Mutex{},
|
|
|
|
}
|
2015-07-05 18:18:03 +00:00
|
|
|
}
|
|
|
|
|
2015-05-27 19:45:18 +00:00
|
|
|
// ScanRefs takes a ref and returns a slice of WrappedPointer objects
|
2015-03-19 19:30:55 +00:00
|
|
|
// for all Git LFS pointers it finds for that ref.
|
2015-07-27 16:26:45 +00:00
|
|
|
// Reports unique oids once only, not multiple times if >1 file uses the same content
|
2015-07-05 18:18:03 +00:00
|
|
|
func ScanRefs(refLeft, refRight string, opt *ScanRefsOptions) ([]*WrappedPointer, error) {
|
2016-03-31 11:00:44 +00:00
|
|
|
s, err := ScanRefsToChan(refLeft, refRight, opt)
|
2015-09-03 15:15:42 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
pointers := make([]*WrappedPointer, 0)
|
2016-03-31 11:00:44 +00:00
|
|
|
for p := range s.Results {
|
2015-09-03 15:15:42 +00:00
|
|
|
pointers = append(pointers, p)
|
|
|
|
}
|
2016-03-31 11:00:44 +00:00
|
|
|
err = s.Wait()
|
2015-09-03 15:15:42 +00:00
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
return pointers, err
|
2015-09-03 15:15:42 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// ScanRefsToChan takes a ref and returns a channel of WrappedPointer objects
|
|
|
|
// for all Git LFS pointers it finds for that ref.
|
|
|
|
// Reports unique oids once only, not multiple times if >1 file uses the same content
|
2016-03-31 11:00:44 +00:00
|
|
|
func ScanRefsToChan(refLeft, refRight string, opt *ScanRefsOptions) (*PointerChannelWrapper, error) {
|
2015-07-05 18:18:03 +00:00
|
|
|
if opt == nil {
|
2015-10-28 16:06:36 +00:00
|
|
|
opt = NewScanRefsOptions()
|
2015-07-05 18:18:03 +00:00
|
|
|
}
|
2015-08-11 14:54:22 +00:00
|
|
|
if refLeft == "" {
|
|
|
|
opt.ScanMode = ScanAllMode
|
|
|
|
}
|
2015-05-14 10:44:43 +00:00
|
|
|
|
2014-10-07 17:22:16 +00:00
|
|
|
start := time.Now()
|
2015-05-14 10:44:43 +00:00
|
|
|
defer func() {
|
|
|
|
tracerx.PerformanceSince("scan", start)
|
|
|
|
}()
|
2014-10-07 17:22:16 +00:00
|
|
|
|
2015-10-28 16:06:36 +00:00
|
|
|
revs, err := revListShas(refLeft, refRight, opt)
|
2014-10-07 20:57:18 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
smallShas, err := catFileBatchCheck(revs)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
pointers, err := catFileBatch(smallShas)
|
2014-10-07 20:57:18 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
|
2015-09-03 15:15:42 +00:00
|
|
|
retchan := make(chan *WrappedPointer, chanBufSize)
|
2016-03-31 11:00:44 +00:00
|
|
|
errchan := make(chan error, 1)
|
2015-09-03 15:15:42 +00:00
|
|
|
go func() {
|
2016-03-31 11:00:44 +00:00
|
|
|
for p := range pointers.Results {
|
2015-10-28 16:06:36 +00:00
|
|
|
if name, ok := opt.GetName(p.Sha1); ok {
|
2015-09-03 15:15:42 +00:00
|
|
|
p.Name = name
|
|
|
|
}
|
|
|
|
retchan <- p
|
2014-10-08 13:04:07 +00:00
|
|
|
}
|
2016-03-31 11:00:44 +00:00
|
|
|
err := pointers.Wait()
|
|
|
|
if err != nil {
|
|
|
|
errchan <- err
|
|
|
|
}
|
2015-09-03 15:15:42 +00:00
|
|
|
close(retchan)
|
2016-03-31 11:00:44 +00:00
|
|
|
close(errchan)
|
2015-09-03 15:15:42 +00:00
|
|
|
}()
|
2014-10-07 15:16:54 +00:00
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
return NewPointerChannelWrapper(retchan, errchan), nil
|
2014-10-07 15:16:54 +00:00
|
|
|
}
|
|
|
|
|
2015-10-28 16:06:36 +00:00
|
|
|
type indexFileMap struct {
|
|
|
|
nameMap map[string]*indexFile
|
|
|
|
mutex *sync.Mutex
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *indexFileMap) Get(sha string) (*indexFile, bool) {
|
|
|
|
m.mutex.Lock()
|
|
|
|
index, ok := m.nameMap[sha]
|
|
|
|
m.mutex.Unlock()
|
|
|
|
return index, ok
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *indexFileMap) Set(sha string, index *indexFile) {
|
|
|
|
m.mutex.Lock()
|
|
|
|
m.nameMap[sha] = index
|
|
|
|
m.mutex.Unlock()
|
|
|
|
}
|
|
|
|
|
2015-05-27 19:45:18 +00:00
|
|
|
// ScanIndex returns a slice of WrappedPointer objects for all
|
2015-03-19 19:30:55 +00:00
|
|
|
// Git LFS pointers it finds in the index.
|
2015-07-27 16:26:45 +00:00
|
|
|
// Reports unique oids once only, not multiple times if >1 file uses the same content
|
2015-05-27 19:45:18 +00:00
|
|
|
func ScanIndex() ([]*WrappedPointer, error) {
|
2015-10-28 16:06:36 +00:00
|
|
|
indexMap := &indexFileMap{
|
|
|
|
nameMap: make(map[string]*indexFile, 0),
|
|
|
|
mutex: &sync.Mutex{},
|
|
|
|
}
|
2015-05-14 10:44:43 +00:00
|
|
|
|
2014-10-27 16:42:38 +00:00
|
|
|
start := time.Now()
|
2015-05-14 10:44:43 +00:00
|
|
|
defer func() {
|
|
|
|
tracerx.PerformanceSince("scan-staging", start)
|
|
|
|
}()
|
2014-10-27 16:42:38 +00:00
|
|
|
|
2015-10-28 16:06:36 +00:00
|
|
|
revs, err := revListIndex(false, indexMap)
|
2014-10-27 16:42:38 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-10-28 16:06:36 +00:00
|
|
|
cachedRevs, err := revListIndex(true, indexMap)
|
2014-10-28 21:35:13 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
allRevsErr := make(chan error, 5) // can be multiple errors below
|
|
|
|
allRevsChan := make(chan string, 1)
|
|
|
|
allRevs := NewStringChannelWrapper(allRevsChan, allRevsErr)
|
2014-10-28 21:35:13 +00:00
|
|
|
go func() {
|
|
|
|
seenRevs := make(map[string]bool, 0)
|
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
for rev := range revs.Results {
|
2014-10-28 21:35:13 +00:00
|
|
|
seenRevs[rev] = true
|
2016-03-31 11:00:44 +00:00
|
|
|
allRevsChan <- rev
|
|
|
|
}
|
|
|
|
err := revs.Wait()
|
|
|
|
if err != nil {
|
|
|
|
allRevsErr <- err
|
2014-10-28 21:35:13 +00:00
|
|
|
}
|
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
for rev := range cachedRevs.Results {
|
2014-10-28 21:35:13 +00:00
|
|
|
if _, ok := seenRevs[rev]; !ok {
|
2016-03-31 11:00:44 +00:00
|
|
|
allRevsChan <- rev
|
2014-10-28 21:35:13 +00:00
|
|
|
}
|
|
|
|
}
|
2016-03-31 11:00:44 +00:00
|
|
|
err = cachedRevs.Wait()
|
|
|
|
if err != nil {
|
|
|
|
allRevsErr <- err
|
|
|
|
}
|
|
|
|
close(allRevsChan)
|
|
|
|
close(allRevsErr)
|
2014-10-28 21:35:13 +00:00
|
|
|
}()
|
|
|
|
|
|
|
|
smallShas, err := catFileBatchCheck(allRevs)
|
2014-10-27 16:42:38 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
pointerc, err := catFileBatch(smallShas)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-05-27 19:45:18 +00:00
|
|
|
pointers := make([]*WrappedPointer, 0)
|
2016-03-31 11:00:44 +00:00
|
|
|
for p := range pointerc.Results {
|
2015-10-28 16:06:36 +00:00
|
|
|
if e, ok := indexMap.Get(p.Sha1); ok {
|
2014-10-27 19:47:07 +00:00
|
|
|
p.Name = e.Name
|
|
|
|
p.Status = e.Status
|
2014-10-27 20:40:21 +00:00
|
|
|
p.SrcName = e.SrcName
|
2014-10-27 16:42:38 +00:00
|
|
|
}
|
|
|
|
pointers = append(pointers, p)
|
|
|
|
}
|
2016-03-31 11:00:44 +00:00
|
|
|
err = pointerc.Wait()
|
2014-10-27 16:42:38 +00:00
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
return pointers, err
|
2014-10-27 16:42:38 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2016-07-13 21:24:43 +00:00
|
|
|
// Get additional arguments needed to limit 'git rev-list' to just the changes
|
|
|
|
// in revTo that are also not on remoteName.
|
2016-07-08 22:42:03 +00:00
|
|
|
//
|
2016-07-13 21:24:43 +00:00
|
|
|
// Returns a slice of string command arguments, and a slice of string git
|
|
|
|
// commits to pass to `git rev-list` via STDIN.
|
2016-07-13 17:00:00 +00:00
|
|
|
func revListArgsRefVsRemote(refTo, remoteName string) ([]string, []string) {
|
2016-02-25 16:49:52 +00:00
|
|
|
// We need to check that the locally cached versions of remote refs are still
|
|
|
|
// present on the remote before we use them as a 'from' point. If the
|
|
|
|
// server implements garbage collection and a remote branch had been deleted
|
|
|
|
// since we last did 'git fetch --prune', then the objects in that branch may
|
|
|
|
// have also been deleted on the server if unreferenced.
|
|
|
|
// If some refs are missing on the remote, use a more explicit diff
|
|
|
|
|
2016-03-01 10:01:24 +00:00
|
|
|
cachedRemoteRefs, _ := git.CachedRemoteRefs(remoteName)
|
|
|
|
actualRemoteRefs, _ := git.RemoteRefs(remoteName)
|
2016-02-25 16:49:52 +00:00
|
|
|
|
|
|
|
// Only check for missing refs on remote; if the ref is different it has moved
|
|
|
|
// forward probably, and if not and the ref has changed to a non-descendant
|
|
|
|
// (force push) then that will cause a re-evaluation in a subsequent command anyway
|
2016-07-07 16:16:13 +00:00
|
|
|
missingRefs := tools.NewStringSet()
|
2016-02-25 16:49:52 +00:00
|
|
|
for _, cachedRef := range cachedRemoteRefs {
|
|
|
|
found := false
|
|
|
|
for _, realRemoteRef := range actualRemoteRefs {
|
|
|
|
if cachedRef.Type == realRemoteRef.Type && cachedRef.Name == realRemoteRef.Name {
|
|
|
|
found = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !found {
|
|
|
|
missingRefs.Add(cachedRef.Name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(missingRefs) > 0 {
|
|
|
|
// Use only the non-missing refs as 'from' points
|
2016-07-08 22:42:03 +00:00
|
|
|
commits := make([]string, 1, len(cachedRemoteRefs)+1)
|
2016-07-08 22:29:49 +00:00
|
|
|
commits[0] = refTo
|
2016-02-25 16:49:52 +00:00
|
|
|
for _, cachedRef := range cachedRemoteRefs {
|
|
|
|
if !missingRefs.Contains(cachedRef.Name) {
|
2016-07-08 22:29:49 +00:00
|
|
|
commits = append(commits, "^"+cachedRef.Sha)
|
2016-02-25 16:49:52 +00:00
|
|
|
}
|
|
|
|
}
|
2016-07-13 17:00:00 +00:00
|
|
|
return []string{"--stdin"}, commits
|
2016-02-25 16:49:52 +00:00
|
|
|
} else {
|
|
|
|
// Safe to use cached
|
2016-07-13 17:00:00 +00:00
|
|
|
return []string{refTo, "--not", "--remotes=" + remoteName}, nil
|
2016-02-25 16:49:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-07 16:44:28 +00:00
|
|
|
// revListShas uses git rev-list to return the list of object sha1s
|
|
|
|
// for the given ref. If all is true, ref is ignored. It returns a
|
|
|
|
// channel from which sha1 strings can be read.
|
2016-03-31 11:00:44 +00:00
|
|
|
func revListShas(refLeft, refRight string, opt *ScanRefsOptions) (*StringChannelWrapper, error) {
|
2014-10-07 15:16:54 +00:00
|
|
|
refArgs := []string{"rev-list", "--objects"}
|
2016-07-08 22:29:49 +00:00
|
|
|
var stdin []string
|
2015-08-11 14:54:22 +00:00
|
|
|
switch opt.ScanMode {
|
|
|
|
case ScanRefsMode:
|
2015-07-05 18:18:03 +00:00
|
|
|
if opt.SkipDeletedBlobs {
|
|
|
|
refArgs = append(refArgs, "--no-walk")
|
|
|
|
} else {
|
|
|
|
refArgs = append(refArgs, "--do-walk")
|
|
|
|
}
|
|
|
|
|
2014-10-20 18:49:15 +00:00
|
|
|
refArgs = append(refArgs, refLeft)
|
|
|
|
if refRight != "" && !z40.MatchString(refRight) {
|
|
|
|
refArgs = append(refArgs, refRight)
|
|
|
|
}
|
2015-08-11 14:54:22 +00:00
|
|
|
case ScanAllMode:
|
|
|
|
refArgs = append(refArgs, "--all")
|
|
|
|
case ScanLeftToRemoteMode:
|
2016-07-13 17:00:00 +00:00
|
|
|
args, commits := revListArgsRefVsRemote(refLeft, opt.RemoteName)
|
|
|
|
refArgs = append(refArgs, args...)
|
|
|
|
if len(commits) > 0 {
|
2016-07-08 22:29:49 +00:00
|
|
|
stdin = commits
|
|
|
|
}
|
2015-08-11 14:54:22 +00:00
|
|
|
default:
|
|
|
|
return nil, errors.New("scanner: unknown scan type: " + strconv.Itoa(int(opt.ScanMode)))
|
2014-10-07 15:16:54 +00:00
|
|
|
}
|
2014-10-03 16:17:26 +00:00
|
|
|
|
2016-03-19 18:40:30 +00:00
|
|
|
// Use "--" at the end of the command to disambiguate arguments as refs,
|
|
|
|
// so Git doesn't complain about ambiguity if you happen to also have a
|
|
|
|
// file named "master".
|
|
|
|
refArgs = append(refArgs, "--")
|
|
|
|
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd, err := startCommand("git", refArgs...)
|
2014-10-07 15:16:54 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2014-10-05 13:35:26 +00:00
|
|
|
|
2016-07-08 22:29:49 +00:00
|
|
|
if len(stdin) > 0 {
|
|
|
|
cmd.Stdin.Write([]byte(strings.Join(stdin, "\n")))
|
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Close()
|
2014-10-03 16:08:00 +00:00
|
|
|
|
2014-10-07 20:57:18 +00:00
|
|
|
revs := make(chan string, chanBufSize)
|
2016-03-31 15:38:55 +00:00
|
|
|
errchan := make(chan error, 5) // may be multiple errors
|
2014-10-07 15:16:54 +00:00
|
|
|
|
|
|
|
go func() {
|
2014-10-07 15:59:59 +00:00
|
|
|
scanner := bufio.NewScanner(cmd.Stdout)
|
2014-10-07 15:16:54 +00:00
|
|
|
for scanner.Scan() {
|
2014-10-08 13:04:07 +00:00
|
|
|
line := strings.TrimSpace(scanner.Text())
|
2014-10-11 14:28:46 +00:00
|
|
|
if len(line) < 40 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2014-10-08 13:04:07 +00:00
|
|
|
sha1 := line[0:40]
|
|
|
|
if len(line) > 40 {
|
2015-10-28 16:06:36 +00:00
|
|
|
opt.SetName(sha1, line[41:len(line)])
|
2014-10-08 13:04:07 +00:00
|
|
|
}
|
|
|
|
revs <- sha1
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
2016-02-17 18:57:45 +00:00
|
|
|
|
2016-03-31 11:50:00 +00:00
|
|
|
stderr, _ := ioutil.ReadAll(cmd.Stderr)
|
2016-03-31 11:00:44 +00:00
|
|
|
err := cmd.Wait()
|
|
|
|
if err != nil {
|
2016-03-31 11:50:00 +00:00
|
|
|
errchan <- fmt.Errorf("Error in git rev-list --objects: %v %v", err, string(stderr))
|
2016-03-31 15:38:55 +00:00
|
|
|
} else {
|
|
|
|
// Special case detection of ambiguous refs; lower level commands like
|
|
|
|
// git rev-list do not return non-zero exit codes in this case, just warn
|
|
|
|
ambiguousRegex := regexp.MustCompile(`warning: refname (.*) is ambiguous`)
|
|
|
|
if match := ambiguousRegex.FindStringSubmatch(string(stderr)); match != nil {
|
|
|
|
// Promote to fatal & exit
|
2016-04-01 08:43:11 +00:00
|
|
|
errchan <- fmt.Errorf("Error: ref %s is ambiguous", match[1])
|
2016-03-31 15:38:55 +00:00
|
|
|
}
|
2016-03-31 11:00:44 +00:00
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
close(revs)
|
2016-03-31 11:00:44 +00:00
|
|
|
close(errchan)
|
2014-10-07 15:16:54 +00:00
|
|
|
}()
|
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
return NewStringChannelWrapper(revs, errchan), nil
|
2014-10-07 15:16:54 +00:00
|
|
|
}
|
|
|
|
|
2014-10-28 16:11:20 +00:00
|
|
|
// revListIndex uses git diff-index to return the list of object sha1s
|
|
|
|
// for in the indexf. It returns a channel from which sha1 strings can be read.
|
|
|
|
// The namMap will be filled indexFile pointers mapping sha1s to indexFiles.
|
2016-03-31 11:00:44 +00:00
|
|
|
func revListIndex(cache bool, indexMap *indexFileMap) (*StringChannelWrapper, error) {
|
2014-10-28 21:35:13 +00:00
|
|
|
cmdArgs := []string{"diff-index", "-M"}
|
|
|
|
if cache {
|
|
|
|
cmdArgs = append(cmdArgs, "--cached")
|
|
|
|
}
|
|
|
|
cmdArgs = append(cmdArgs, "HEAD")
|
|
|
|
|
|
|
|
cmd, err := startCommand("git", cmdArgs...)
|
2014-10-27 16:42:38 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
cmd.Stdin.Close()
|
|
|
|
|
|
|
|
revs := make(chan string, chanBufSize)
|
2016-03-31 11:00:44 +00:00
|
|
|
errchan := make(chan error, 1)
|
2014-10-27 16:42:38 +00:00
|
|
|
|
|
|
|
go func() {
|
|
|
|
scanner := bufio.NewScanner(cmd.Stdout)
|
|
|
|
for scanner.Scan() {
|
|
|
|
// Format is:
|
|
|
|
// :100644 100644 c5b3d83a7542255ec7856487baa5e83d65b1624c 9e82ac1b514be060945392291b5b3108c22f6fe3 M foo.gif
|
2014-10-27 19:47:07 +00:00
|
|
|
// :<old mode> <new mode> <old sha1> <new sha1> <status>\t<file name>[\t<file name>]
|
2014-10-27 16:42:38 +00:00
|
|
|
line := scanner.Text()
|
|
|
|
parts := strings.Split(line, "\t")
|
|
|
|
if len(parts) < 2 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
description := strings.Split(parts[0], " ")
|
|
|
|
files := parts[1:len(parts)]
|
|
|
|
|
2014-10-27 19:47:07 +00:00
|
|
|
if len(description) >= 5 {
|
2014-10-27 20:40:21 +00:00
|
|
|
status := description[4][0:1]
|
2014-10-27 16:42:38 +00:00
|
|
|
sha1 := description[3]
|
2014-10-27 19:47:07 +00:00
|
|
|
if status == "M" {
|
|
|
|
sha1 = description[2] // This one is modified but not added
|
|
|
|
}
|
2015-10-28 16:06:36 +00:00
|
|
|
indexMap.Set(sha1, &indexFile{files[len(files)-1], files[0], status})
|
2014-10-27 16:42:38 +00:00
|
|
|
revs <- sha1
|
|
|
|
}
|
|
|
|
}
|
2016-02-17 18:57:45 +00:00
|
|
|
|
2016-03-31 11:52:01 +00:00
|
|
|
// Note: deliberately not checking result code here, because doing that
|
|
|
|
// can fail fsck process too early since clean filter will detect errors
|
|
|
|
// and set this to non-zero. How to cope with this better?
|
|
|
|
// stderr, _ := ioutil.ReadAll(cmd.Stderr)
|
|
|
|
// err := cmd.Wait()
|
|
|
|
// if err != nil {
|
|
|
|
// errchan <- fmt.Errorf("Error in git diff-index: %v %v", err, string(stderr))
|
|
|
|
// }
|
2016-03-31 11:50:00 +00:00
|
|
|
cmd.Wait()
|
2014-10-27 16:42:38 +00:00
|
|
|
close(revs)
|
2016-03-31 11:00:44 +00:00
|
|
|
close(errchan)
|
2014-10-27 16:42:38 +00:00
|
|
|
}()
|
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
return NewStringChannelWrapper(revs, errchan), nil
|
2014-10-27 16:42:38 +00:00
|
|
|
}
|
|
|
|
|
2014-10-07 16:44:28 +00:00
|
|
|
// catFileBatchCheck uses git cat-file --batch-check to get the type
|
|
|
|
// and size of a git object. Any object that isn't of type blob and
|
|
|
|
// under the blobSizeCutoff will be ignored. revs is a channel over
|
|
|
|
// which strings containing git sha1s will be sent. It returns a channel
|
|
|
|
// from which sha1 strings can be read.
|
2016-03-31 11:00:44 +00:00
|
|
|
func catFileBatchCheck(revs *StringChannelWrapper) (*StringChannelWrapper, error) {
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd, err := startCommand("git", "cat-file", "--batch-check")
|
2014-10-07 15:16:54 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
|
|
|
|
2014-10-07 20:57:18 +00:00
|
|
|
smallRevs := make(chan string, chanBufSize)
|
2016-03-31 11:00:44 +00:00
|
|
|
errchan := make(chan error, 2) // up to 2 errors, one from each goroutine
|
2014-10-03 16:08:00 +00:00
|
|
|
|
2014-10-07 15:16:54 +00:00
|
|
|
go func() {
|
2014-10-07 15:59:59 +00:00
|
|
|
scanner := bufio.NewScanner(cmd.Stdout)
|
2014-10-07 15:16:54 +00:00
|
|
|
for scanner.Scan() {
|
|
|
|
line := scanner.Text()
|
2016-02-15 20:02:50 +00:00
|
|
|
lineLen := len(line)
|
|
|
|
|
2014-10-07 20:57:18 +00:00
|
|
|
// Format is:
|
|
|
|
// <sha1> <type> <size>
|
|
|
|
// type is at a fixed spot, if we see that it's "blob", we can avoid
|
|
|
|
// splitting the line just to get the size.
|
2016-02-15 20:02:50 +00:00
|
|
|
if lineLen < 46 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if line[41:45] != "blob" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
size, err := strconv.Atoi(line[46:lineLen])
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if size < blobSizeCutoff {
|
|
|
|
smallRevs <- line[0:40]
|
2014-10-07 15:16:54 +00:00
|
|
|
}
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
2016-02-15 20:02:50 +00:00
|
|
|
|
2016-03-31 11:50:00 +00:00
|
|
|
stderr, _ := ioutil.ReadAll(cmd.Stderr)
|
2016-03-31 11:00:44 +00:00
|
|
|
err := cmd.Wait()
|
|
|
|
if err != nil {
|
2016-03-31 11:50:00 +00:00
|
|
|
errchan <- fmt.Errorf("Error in git cat-file --batch-check: %v %v", err, string(stderr))
|
2016-03-31 11:00:44 +00:00
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
close(smallRevs)
|
2016-03-31 11:00:44 +00:00
|
|
|
close(errchan)
|
2014-10-07 15:16:54 +00:00
|
|
|
}()
|
2014-10-03 16:08:00 +00:00
|
|
|
|
2014-10-07 15:16:54 +00:00
|
|
|
go func() {
|
2016-03-31 11:00:44 +00:00
|
|
|
for r := range revs.Results {
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Write([]byte(r + "\n"))
|
2014-10-07 15:16:54 +00:00
|
|
|
}
|
2016-03-31 11:00:44 +00:00
|
|
|
err := revs.Wait()
|
|
|
|
if err != nil {
|
|
|
|
// We can share errchan with other goroutine since that won't close it
|
|
|
|
// until we close the stdin below
|
|
|
|
errchan <- err
|
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Close()
|
2014-10-07 15:16:54 +00:00
|
|
|
}()
|
2014-10-03 16:17:26 +00:00
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
return NewStringChannelWrapper(smallRevs, errchan), nil
|
2014-10-07 15:16:54 +00:00
|
|
|
}
|
|
|
|
|
2014-10-07 16:44:28 +00:00
|
|
|
// catFileBatch uses git cat-file --batch to get the object contents
|
|
|
|
// of a git object, given its sha1. The contents will be decoded into
|
2015-03-19 19:30:55 +00:00
|
|
|
// a Git LFS pointer. revs is a channel over which strings containing Git SHA1s
|
|
|
|
// will be sent. It returns a channel from which point.Pointers can be read.
|
2016-03-31 11:00:44 +00:00
|
|
|
func catFileBatch(revs *StringChannelWrapper) (*PointerChannelWrapper, error) {
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd, err := startCommand("git", "cat-file", "--batch")
|
2014-10-07 15:16:54 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-05-27 19:45:18 +00:00
|
|
|
pointers := make(chan *WrappedPointer, chanBufSize)
|
2016-03-31 11:00:44 +00:00
|
|
|
errchan := make(chan error, 5) // shared by 2 goroutines & may add more detail errors?
|
2014-10-07 15:16:54 +00:00
|
|
|
|
|
|
|
go func() {
|
|
|
|
for {
|
2014-10-07 16:33:00 +00:00
|
|
|
l, err := cmd.Stdout.ReadBytes('\n')
|
2014-10-07 20:57:18 +00:00
|
|
|
if err != nil {
|
2014-10-07 15:16:54 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:59 +00:00
|
|
|
// Line is formatted:
|
|
|
|
// <sha1> <type> <size>
|
2014-10-07 15:16:54 +00:00
|
|
|
fields := bytes.Fields(l)
|
|
|
|
s, _ := strconv.Atoi(string(fields[2]))
|
|
|
|
|
|
|
|
nbuf := make([]byte, s)
|
2014-10-07 16:33:00 +00:00
|
|
|
_, err = io.ReadFull(cmd.Stdout, nbuf)
|
2014-10-07 15:16:54 +00:00
|
|
|
if err != nil {
|
|
|
|
break // Legit errors
|
|
|
|
}
|
|
|
|
|
2015-04-23 16:20:36 +00:00
|
|
|
p, err := DecodePointer(bytes.NewBuffer(nbuf))
|
2014-10-07 15:16:54 +00:00
|
|
|
if err == nil {
|
2015-05-27 19:45:18 +00:00
|
|
|
pointers <- &WrappedPointer{
|
2014-10-27 20:40:21 +00:00
|
|
|
Sha1: string(fields[0]),
|
|
|
|
Size: p.Size,
|
|
|
|
Pointer: p,
|
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
}
|
|
|
|
|
2014-10-07 16:33:00 +00:00
|
|
|
_, err = cmd.Stdout.ReadBytes('\n') // Extra \n inserted by cat-file
|
2014-10-07 20:57:18 +00:00
|
|
|
if err != nil {
|
2014-10-07 15:16:54 +00:00
|
|
|
break
|
|
|
|
}
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
2016-02-17 18:57:45 +00:00
|
|
|
|
2016-03-31 11:50:00 +00:00
|
|
|
stderr, _ := ioutil.ReadAll(cmd.Stderr)
|
2016-03-31 11:00:44 +00:00
|
|
|
err = cmd.Wait()
|
|
|
|
if err != nil {
|
2016-03-31 11:50:00 +00:00
|
|
|
errchan <- fmt.Errorf("Error in git cat-file --batch: %v %v", err, string(stderr))
|
2016-03-31 11:00:44 +00:00
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
close(pointers)
|
2016-03-31 11:00:44 +00:00
|
|
|
close(errchan)
|
2014-10-07 15:16:54 +00:00
|
|
|
}()
|
2014-10-03 16:08:00 +00:00
|
|
|
|
2014-10-07 15:16:54 +00:00
|
|
|
go func() {
|
2016-03-31 11:00:44 +00:00
|
|
|
for r := range revs.Results {
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Write([]byte(r + "\n"))
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
2016-03-31 11:00:44 +00:00
|
|
|
err := revs.Wait()
|
|
|
|
if err != nil {
|
|
|
|
// We can share errchan with other goroutine since that won't close it
|
|
|
|
// until we close the stdin below
|
|
|
|
errchan <- err
|
|
|
|
}
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Close()
|
2014-10-07 15:16:54 +00:00
|
|
|
}()
|
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
return NewPointerChannelWrapper(pointers, errchan), nil
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
2014-10-07 15:59:59 +00:00
|
|
|
|
|
|
|
type wrappedCmd struct {
|
|
|
|
Stdin io.WriteCloser
|
2014-10-07 16:33:00 +00:00
|
|
|
Stdout *bufio.Reader
|
2016-03-31 11:50:00 +00:00
|
|
|
Stderr *bufio.Reader
|
2014-10-07 15:59:59 +00:00
|
|
|
*exec.Cmd
|
|
|
|
}
|
|
|
|
|
2014-10-07 16:44:28 +00:00
|
|
|
// startCommand starts up a command and creates a stdin pipe and a buffered
|
2016-03-31 11:50:00 +00:00
|
|
|
// stdout & stderr pipes, wrapped in a wrappedCmd. The stdout buffer will be of stdoutBufSize
|
2014-10-07 16:44:28 +00:00
|
|
|
// bytes.
|
2014-10-07 15:59:59 +00:00
|
|
|
func startCommand(command string, args ...string) (*wrappedCmd, error) {
|
|
|
|
cmd := exec.Command(command, args...)
|
|
|
|
stdout, err := cmd.StdoutPipe()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2016-03-31 11:50:00 +00:00
|
|
|
stderr, err := cmd.StderrPipe()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2014-10-07 15:59:59 +00:00
|
|
|
|
|
|
|
stdin, err := cmd.StdinPipe()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2014-10-07 17:22:16 +00:00
|
|
|
tracerx.Printf("run_command: %s %s", command, strings.Join(args, " "))
|
2014-10-07 15:59:59 +00:00
|
|
|
if err := cmd.Start(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2016-03-31 11:50:00 +00:00
|
|
|
return &wrappedCmd{
|
|
|
|
stdin,
|
|
|
|
bufio.NewReaderSize(stdout, stdoutBufSize),
|
|
|
|
bufio.NewReaderSize(stderr, stdoutBufSize),
|
|
|
|
cmd,
|
|
|
|
}, nil
|
2014-10-07 15:59:59 +00:00
|
|
|
}
|
2015-07-27 16:26:45 +00:00
|
|
|
|
|
|
|
// An entry from ls-tree or rev-list including a blob sha and tree path
|
|
|
|
type TreeBlob struct {
|
|
|
|
Sha1 string
|
|
|
|
Filename string
|
|
|
|
}
|
|
|
|
|
|
|
|
// ScanTree takes a ref and returns a slice of WrappedPointer objects in the tree at that ref
|
|
|
|
// Differs from ScanRefs in that multiple files in the tree with the same content are all reported
|
|
|
|
func ScanTree(ref string) ([]*WrappedPointer, error) {
|
|
|
|
start := time.Now()
|
|
|
|
defer func() {
|
|
|
|
tracerx.PerformanceSince("scan", start)
|
|
|
|
}()
|
|
|
|
|
|
|
|
// We don't use the nameMap approach here since that's imprecise when >1 file
|
|
|
|
// can be using the same content
|
|
|
|
treeShas, err := lsTreeBlobs(ref)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
pointerc, err := catFileBatchTree(treeShas)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
pointers := make([]*WrappedPointer, 0)
|
2016-03-31 11:00:44 +00:00
|
|
|
for p := range pointerc.Results {
|
2015-07-27 16:26:45 +00:00
|
|
|
pointers = append(pointers, p)
|
|
|
|
}
|
2016-03-31 11:00:44 +00:00
|
|
|
err = pointerc.Wait()
|
2015-07-27 16:26:45 +00:00
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
return pointers, err
|
2015-07-27 16:26:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// catFileBatchTree uses git cat-file --batch to get the object contents
|
|
|
|
// of a git object, given its sha1. The contents will be decoded into
|
|
|
|
// a Git LFS pointer. treeblobs is a channel over which blob entries
|
|
|
|
// will be sent. It returns a channel from which point.Pointers can be read.
|
2016-03-31 11:00:44 +00:00
|
|
|
func catFileBatchTree(treeblobs *TreeBlobChannelWrapper) (*PointerChannelWrapper, error) {
|
2015-07-27 16:26:45 +00:00
|
|
|
cmd, err := startCommand("git", "cat-file", "--batch")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
pointers := make(chan *WrappedPointer, chanBufSize)
|
2016-03-31 11:00:44 +00:00
|
|
|
errchan := make(chan error, 10) // Multiple errors possible
|
2015-07-27 16:26:45 +00:00
|
|
|
|
|
|
|
go func() {
|
2016-03-31 11:00:44 +00:00
|
|
|
for t := range treeblobs.Results {
|
2015-07-27 16:26:45 +00:00
|
|
|
cmd.Stdin.Write([]byte(t.Sha1 + "\n"))
|
|
|
|
l, err := cmd.Stdout.ReadBytes('\n')
|
|
|
|
if err != nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
// Line is formatted:
|
|
|
|
// <sha1> <type> <size>
|
|
|
|
fields := bytes.Fields(l)
|
|
|
|
s, _ := strconv.Atoi(string(fields[2]))
|
|
|
|
|
|
|
|
nbuf := make([]byte, s)
|
|
|
|
_, err = io.ReadFull(cmd.Stdout, nbuf)
|
|
|
|
if err != nil {
|
|
|
|
break // Legit errors
|
|
|
|
}
|
|
|
|
|
|
|
|
p, err := DecodePointer(bytes.NewBuffer(nbuf))
|
|
|
|
if err == nil {
|
|
|
|
pointers <- &WrappedPointer{
|
|
|
|
Sha1: string(fields[0]),
|
|
|
|
Size: p.Size,
|
|
|
|
Pointer: p,
|
|
|
|
Name: t.Filename,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = cmd.Stdout.ReadBytes('\n') // Extra \n inserted by cat-file
|
|
|
|
if err != nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2016-03-31 11:00:44 +00:00
|
|
|
// Deal with nested error from incoming treeblobs
|
|
|
|
err := treeblobs.Wait()
|
|
|
|
if err != nil {
|
|
|
|
errchan <- err
|
|
|
|
}
|
2016-02-19 23:35:18 +00:00
|
|
|
|
2015-07-27 16:26:45 +00:00
|
|
|
cmd.Stdin.Close()
|
2016-03-31 11:00:44 +00:00
|
|
|
|
|
|
|
// also errors from our command
|
2016-03-31 11:50:00 +00:00
|
|
|
stderr, _ := ioutil.ReadAll(cmd.Stderr)
|
2016-03-31 11:00:44 +00:00
|
|
|
err = cmd.Wait()
|
|
|
|
if err != nil {
|
2016-03-31 11:50:00 +00:00
|
|
|
errchan <- fmt.Errorf("Error in git cat-file: %v %v", err, string(stderr))
|
2016-03-31 11:00:44 +00:00
|
|
|
}
|
2016-02-19 23:35:18 +00:00
|
|
|
close(pointers)
|
2016-03-31 11:00:44 +00:00
|
|
|
close(errchan)
|
2015-07-27 16:26:45 +00:00
|
|
|
}()
|
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
return NewPointerChannelWrapper(pointers, errchan), nil
|
2015-07-27 16:26:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Use ls-tree at ref to find a list of candidate tree blobs which might be lfs files
|
|
|
|
// The returned channel will be sent these blobs which should be sent to catFileBatchTree
|
|
|
|
// for final check & conversion to Pointer
|
2016-03-31 11:00:44 +00:00
|
|
|
func lsTreeBlobs(ref string) (*TreeBlobChannelWrapper, error) {
|
2015-07-27 16:26:45 +00:00
|
|
|
// Snapshot using ls-tree
|
|
|
|
lsArgs := []string{"ls-tree",
|
|
|
|
"-r", // recurse
|
|
|
|
"-l", // report object size (we'll need this)
|
2016-02-19 23:09:19 +00:00
|
|
|
"-z", // null line termination
|
2015-07-27 16:26:45 +00:00
|
|
|
"--full-tree", // start at the root regardless of where we are in it
|
|
|
|
ref}
|
|
|
|
|
|
|
|
cmd, err := startCommand("git", lsArgs...)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
cmd.Stdin.Close()
|
|
|
|
|
|
|
|
blobs := make(chan TreeBlob, chanBufSize)
|
2016-03-31 11:00:44 +00:00
|
|
|
errchan := make(chan error, 1)
|
2015-07-27 16:26:45 +00:00
|
|
|
|
|
|
|
go func() {
|
2016-02-19 22:48:33 +00:00
|
|
|
parseLsTree(cmd.Stdout, blobs)
|
2016-03-31 11:50:00 +00:00
|
|
|
stderr, _ := ioutil.ReadAll(cmd.Stderr)
|
2016-03-31 11:00:44 +00:00
|
|
|
err := cmd.Wait()
|
|
|
|
if err != nil {
|
2016-03-31 11:50:00 +00:00
|
|
|
errchan <- fmt.Errorf("Error in git ls-tree: %v %v", err, string(stderr))
|
2016-03-31 11:00:44 +00:00
|
|
|
}
|
2015-07-27 16:26:45 +00:00
|
|
|
close(blobs)
|
2016-03-31 11:00:44 +00:00
|
|
|
close(errchan)
|
2015-07-27 16:26:45 +00:00
|
|
|
}()
|
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
return NewTreeBlobChannelWrapper(blobs, errchan), nil
|
2015-07-27 16:26:45 +00:00
|
|
|
}
|
2015-08-03 15:58:34 +00:00
|
|
|
|
2016-02-19 22:48:33 +00:00
|
|
|
func parseLsTree(reader io.Reader, output chan TreeBlob) {
|
|
|
|
scanner := bufio.NewScanner(reader)
|
2016-02-19 23:32:18 +00:00
|
|
|
scanner.Split(scanNullLines)
|
2016-02-19 22:48:33 +00:00
|
|
|
for scanner.Scan() {
|
2016-02-19 23:24:26 +00:00
|
|
|
line := scanner.Text()
|
|
|
|
parts := strings.SplitN(line, "\t", 2)
|
|
|
|
if len(parts) < 2 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
attrs := strings.SplitN(parts[0], " ", 4)
|
|
|
|
if len(attrs) < 4 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if attrs[1] != "blob" {
|
2016-02-19 22:48:33 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2016-02-19 23:24:26 +00:00
|
|
|
sz, err := strconv.ParseInt(strings.TrimSpace(attrs[3]), 10, 64)
|
2016-02-19 22:48:33 +00:00
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if sz < blobSizeCutoff {
|
2016-02-19 23:24:26 +00:00
|
|
|
sha1 := attrs[2]
|
|
|
|
filename := parts[1]
|
2016-02-19 22:48:33 +00:00
|
|
|
output <- TreeBlob{sha1, filename}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-19 23:32:18 +00:00
|
|
|
func scanNullLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
2016-02-19 23:04:35 +00:00
|
|
|
if atEOF && len(data) == 0 {
|
|
|
|
return 0, nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if i := bytes.IndexByte(data, '\000'); i >= 0 {
|
|
|
|
// We have a full null-terminated line.
|
|
|
|
return i + 1, data[0:i], nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we're at EOF, we have a final, non-terminated line. Return it.
|
|
|
|
if atEOF {
|
|
|
|
return len(data), data, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Request more data.
|
|
|
|
return 0, nil, nil
|
|
|
|
}
|
|
|
|
|
2015-09-11 16:48:44 +00:00
|
|
|
// ScanUnpushed scans history for all LFS pointers which have been added but not
|
|
|
|
// pushed to the named remote. remoteName can be left blank to mean 'any remote'
|
|
|
|
func ScanUnpushed(remoteName string) ([]*WrappedPointer, error) {
|
2015-08-03 15:58:34 +00:00
|
|
|
|
|
|
|
start := time.Now()
|
|
|
|
defer func() {
|
|
|
|
tracerx.PerformanceSince("scan", start)
|
|
|
|
}()
|
|
|
|
|
2015-09-11 17:01:28 +00:00
|
|
|
pointerchan, err := ScanUnpushedToChan(remoteName)
|
2015-08-03 15:58:34 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
pointers := make([]*WrappedPointer, 0, 10)
|
2016-03-31 11:00:44 +00:00
|
|
|
for p := range pointerchan.Results {
|
2015-08-03 15:58:34 +00:00
|
|
|
pointers = append(pointers, p)
|
|
|
|
}
|
2016-03-31 11:00:44 +00:00
|
|
|
err = pointerchan.Wait()
|
|
|
|
return pointers, err
|
2015-08-03 15:58:34 +00:00
|
|
|
}
|
|
|
|
|
2015-08-21 14:19:16 +00:00
|
|
|
// ScanPreviousVersions scans changes reachable from ref (commit) back to since.
|
|
|
|
// Returns pointers for *previous* versions that overlap that time. Does not
|
|
|
|
// return pointers which were still in use at ref (use ScanRef for that)
|
|
|
|
func ScanPreviousVersions(ref string, since time.Time) ([]*WrappedPointer, error) {
|
|
|
|
start := time.Now()
|
|
|
|
defer func() {
|
|
|
|
tracerx.PerformanceSince("scan", start)
|
|
|
|
}()
|
|
|
|
|
2015-09-29 22:32:50 +00:00
|
|
|
pointerchan, err := ScanPreviousVersionsToChan(ref, since)
|
2015-08-21 14:19:16 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
pointers := make([]*WrappedPointer, 0, 10)
|
2016-03-31 11:00:44 +00:00
|
|
|
for p := range pointerchan.Results {
|
2015-08-21 14:19:16 +00:00
|
|
|
pointers = append(pointers, p)
|
|
|
|
}
|
2016-03-31 11:00:44 +00:00
|
|
|
err = pointerchan.Wait()
|
|
|
|
return pointers, err
|
2015-08-21 14:19:16 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2015-09-29 22:32:50 +00:00
|
|
|
// ScanPreviousVersionsToChan scans changes reachable from ref (commit) back to since.
|
|
|
|
// Returns channel of pointers for *previous* versions that overlap that time. Does not
|
|
|
|
// include pointers which were still in use at ref (use ScanRefsToChan for that)
|
2016-03-31 11:00:44 +00:00
|
|
|
func ScanPreviousVersionsToChan(ref string, since time.Time) (*PointerChannelWrapper, error) {
|
2015-09-29 22:32:50 +00:00
|
|
|
return logPreviousSHAs(ref, since)
|
|
|
|
}
|
|
|
|
|
2015-09-11 17:01:28 +00:00
|
|
|
// ScanUnpushedToChan scans history for all LFS pointers which have been added but
|
2015-09-11 16:48:44 +00:00
|
|
|
// not pushed to the named remote. remoteName can be left blank to mean 'any remote'
|
2015-08-03 15:58:34 +00:00
|
|
|
// return progressively in a channel
|
2016-03-31 11:00:44 +00:00
|
|
|
func ScanUnpushedToChan(remoteName string) (*PointerChannelWrapper, error) {
|
2015-08-03 15:58:34 +00:00
|
|
|
logArgs := []string{"log",
|
|
|
|
"--branches", "--tags", // include all locally referenced commits
|
2015-09-11 16:48:44 +00:00
|
|
|
"--not"} // but exclude everything that comes after
|
|
|
|
|
|
|
|
if len(remoteName) == 0 {
|
|
|
|
logArgs = append(logArgs, "--remotes")
|
|
|
|
} else {
|
|
|
|
logArgs = append(logArgs, fmt.Sprintf("--remotes=%v", remoteName))
|
2015-08-03 15:58:34 +00:00
|
|
|
}
|
|
|
|
// Add standard search args to find lfs references
|
|
|
|
logArgs = append(logArgs, logLfsSearchArgs...)
|
|
|
|
|
|
|
|
cmd, err := startCommand("git", logArgs...)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
cmd.Stdin.Close()
|
|
|
|
|
|
|
|
pchan := make(chan *WrappedPointer, chanBufSize)
|
2016-03-31 11:00:44 +00:00
|
|
|
errchan := make(chan error, 1)
|
2015-08-03 15:58:34 +00:00
|
|
|
|
2016-02-19 23:35:18 +00:00
|
|
|
go func() {
|
|
|
|
parseLogOutputToPointers(cmd.Stdout, LogDiffAdditions, nil, nil, pchan)
|
2016-03-31 11:50:00 +00:00
|
|
|
stderr, _ := ioutil.ReadAll(cmd.Stderr)
|
2016-03-31 11:00:44 +00:00
|
|
|
err := cmd.Wait()
|
|
|
|
if err != nil {
|
2016-03-31 11:50:00 +00:00
|
|
|
errchan <- fmt.Errorf("Error in git log: %v %v", err, string(stderr))
|
2016-03-31 11:00:44 +00:00
|
|
|
}
|
|
|
|
close(pchan)
|
|
|
|
close(errchan)
|
2016-02-19 23:35:18 +00:00
|
|
|
}()
|
2015-08-03 15:58:34 +00:00
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
return NewPointerChannelWrapper(pchan, errchan), nil
|
2015-08-03 15:58:34 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2015-08-21 14:19:16 +00:00
|
|
|
// logPreviousVersions scans history for all previous versions of LFS pointers
|
|
|
|
// from 'since' up to (but not including) the final state at ref
|
2016-03-31 11:00:44 +00:00
|
|
|
func logPreviousSHAs(ref string, since time.Time) (*PointerChannelWrapper, error) {
|
2015-08-21 14:19:16 +00:00
|
|
|
logArgs := []string{"log",
|
|
|
|
fmt.Sprintf("--since=%v", git.FormatGitDate(since)),
|
|
|
|
}
|
|
|
|
// Add standard search args to find lfs references
|
|
|
|
logArgs = append(logArgs, logLfsSearchArgs...)
|
|
|
|
// ending at ref
|
|
|
|
logArgs = append(logArgs, ref)
|
|
|
|
|
|
|
|
cmd, err := startCommand("git", logArgs...)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
cmd.Stdin.Close()
|
|
|
|
|
|
|
|
pchan := make(chan *WrappedPointer, chanBufSize)
|
2016-03-31 11:00:44 +00:00
|
|
|
errchan := make(chan error, 1)
|
2015-08-21 14:19:16 +00:00
|
|
|
|
|
|
|
// we pull out deletions, since we want the previous SHAs at commits in the range
|
|
|
|
// this means we pick up all previous versions that could have been checked
|
|
|
|
// out in the date range, not just if the commit which *introduced* them is in the range
|
2016-02-19 23:35:18 +00:00
|
|
|
go func() {
|
|
|
|
parseLogOutputToPointers(cmd.Stdout, LogDiffDeletions, nil, nil, pchan)
|
2016-03-31 11:50:00 +00:00
|
|
|
stderr, _ := ioutil.ReadAll(cmd.Stderr)
|
2016-03-31 11:00:44 +00:00
|
|
|
err := cmd.Wait()
|
|
|
|
if err != nil {
|
2016-03-31 11:50:00 +00:00
|
|
|
errchan <- fmt.Errorf("Error in git log: %v %v", err, string(stderr))
|
2016-03-31 11:00:44 +00:00
|
|
|
}
|
|
|
|
close(pchan)
|
|
|
|
close(errchan)
|
2016-02-19 23:35:18 +00:00
|
|
|
}()
|
2015-08-21 14:19:16 +00:00
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
return NewPointerChannelWrapper(pchan, errchan), nil
|
2015-08-21 14:19:16 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2015-08-03 15:58:34 +00:00
|
|
|
// When scanning diffs e.g. parseLogOutputToPointers, which direction of diff to include
|
|
|
|
// data from, i.e. '+' or '-'. Depending on what you're scanning for either might be useful
|
|
|
|
type LogDiffDirection byte
|
|
|
|
|
|
|
|
const (
|
|
|
|
LogDiffAdditions = LogDiffDirection('+') // include '+' diffs
|
|
|
|
LogDiffDeletions = LogDiffDirection('-') // include '-' diffs
|
|
|
|
)
|
|
|
|
|
|
|
|
// parseLogOutputToPointers parses log output formatted as per logLfsSearchArgs & return pointers
|
|
|
|
// log: a stream of output from git log with at least logLfsSearchArgs specified
|
|
|
|
// dir: whether to include results from + or - diffs
|
|
|
|
// includePaths, excludePaths: filter the results by filename
|
2016-03-31 11:00:44 +00:00
|
|
|
// results: a channel which will receive the pointers (caller must close)
|
2015-08-03 15:58:34 +00:00
|
|
|
func parseLogOutputToPointers(log io.Reader, dir LogDiffDirection,
|
|
|
|
includePaths, excludePaths []string, results chan *WrappedPointer) {
|
|
|
|
|
|
|
|
// For each commit we'll get something like this:
|
|
|
|
/*
|
|
|
|
lfs-commit-sha: 60fde3d23553e10a55e2a32ed18c20f65edd91e7 e2eaf1c10b57da7b98eb5d722ec5912ddeb53ea1
|
|
|
|
|
|
|
|
diff --git a/1D_Noise.png b/1D_Noise.png
|
|
|
|
new file mode 100644
|
|
|
|
index 0000000..2622b4a
|
|
|
|
--- /dev/null
|
|
|
|
+++ b/1D_Noise.png
|
|
|
|
@@ -0,0 +1,3 @@
|
|
|
|
+version https://git-lfs.github.com/spec/v1
|
|
|
|
+oid sha256:f5d84da40ab1f6aa28df2b2bf1ade2cdcd4397133f903c12b4106641b10e1ed6
|
|
|
|
+size 1289
|
|
|
|
*/
|
|
|
|
// There can be multiple diffs per commit (multiple binaries)
|
|
|
|
// Also when a binary is changed the diff will include a '-' line for the old SHA
|
|
|
|
|
|
|
|
// Define regexes to capture commit & diff headers
|
|
|
|
commitHeaderRegex := regexp.MustCompile(`^lfs-commit-sha: ([A-Fa-f0-9]{40})(?: ([A-Fa-f0-9]{40}))*`)
|
|
|
|
fileHeaderRegex := regexp.MustCompile(`diff --git a\/(.+?)\s+b\/(.+)`)
|
|
|
|
fileMergeHeaderRegex := regexp.MustCompile(`diff --cc (.+)`)
|
2015-08-05 13:48:12 +00:00
|
|
|
pointerDataRegex := regexp.MustCompile(`^([\+\- ])(version https://git-lfs|oid sha256|size|ext-).*$`)
|
2015-08-03 15:58:34 +00:00
|
|
|
var pointerData bytes.Buffer
|
|
|
|
var currentFilename string
|
|
|
|
currentFileIncluded := true
|
|
|
|
|
|
|
|
// Utility func used at several points below (keep in narrow scope)
|
|
|
|
finishLastPointer := func() {
|
|
|
|
if pointerData.Len() > 0 {
|
|
|
|
if currentFileIncluded {
|
|
|
|
p, err := DecodePointer(&pointerData)
|
|
|
|
if err == nil {
|
|
|
|
results <- &WrappedPointer{Name: currentFilename, Size: p.Size, Pointer: p}
|
2015-08-07 14:36:46 +00:00
|
|
|
} else {
|
|
|
|
tracerx.Printf("Unable to parse pointer from log: %v", err)
|
2015-08-03 15:58:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
pointerData.Reset()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
scanner := bufio.NewScanner(log)
|
|
|
|
for scanner.Scan() {
|
|
|
|
line := scanner.Text()
|
|
|
|
if match := commitHeaderRegex.FindStringSubmatch(line); match != nil {
|
|
|
|
// Currently we're not pulling out commit groupings, but could if we wanted
|
|
|
|
// This just acts as a delimiter for finishing a multiline pointer
|
|
|
|
finishLastPointer()
|
|
|
|
|
|
|
|
} else if match := fileHeaderRegex.FindStringSubmatch(line); match != nil {
|
|
|
|
// Finding a regular file header
|
|
|
|
finishLastPointer()
|
|
|
|
// Pertinent file name depends on whether we're listening to additions or removals
|
|
|
|
if dir == LogDiffAdditions {
|
|
|
|
currentFilename = match[2]
|
|
|
|
} else {
|
|
|
|
currentFilename = match[1]
|
|
|
|
}
|
|
|
|
currentFileIncluded = FilenamePassesIncludeExcludeFilter(currentFilename, includePaths, excludePaths)
|
|
|
|
} else if match := fileMergeHeaderRegex.FindStringSubmatch(line); match != nil {
|
|
|
|
// Git merge file header is a little different, only one file
|
|
|
|
finishLastPointer()
|
|
|
|
currentFilename = match[1]
|
|
|
|
currentFileIncluded = FilenamePassesIncludeExcludeFilter(currentFilename, includePaths, excludePaths)
|
|
|
|
} else if currentFileIncluded {
|
|
|
|
if match := pointerDataRegex.FindStringSubmatch(line); match != nil {
|
|
|
|
// An LFS pointer data line
|
|
|
|
// Include only the entirety of one side of the diff
|
|
|
|
// -U3 will ensure we always get all of it, even if only
|
|
|
|
// the SHA changed (version & size the same)
|
|
|
|
changeType := match[1][0]
|
|
|
|
// Always include unchanged context lines (normally just the version line)
|
|
|
|
if LogDiffDirection(changeType) == dir || changeType == ' ' {
|
|
|
|
// Must skip diff +/- marker
|
|
|
|
pointerData.WriteString(line[1:])
|
|
|
|
pointerData.WriteString("\n") // newline was stripped off by scanner
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Final pointer if in progress
|
|
|
|
finishLastPointer()
|
2016-03-31 11:00:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Interface for all types of wrapper around a channel of results and an error channel
|
|
|
|
// Implementors will expose a type-specific channel for results
|
|
|
|
// Call the Wait() function after processing the results channel to catch any errors
|
|
|
|
// that occurred during the async processing
|
|
|
|
type ChannelWrapper interface {
|
|
|
|
// Call this after processing results channel to check for async errors
|
|
|
|
Wait() error
|
|
|
|
}
|
2015-08-03 15:58:34 +00:00
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
// Base implementation of channel wrapper to just deal with errors
|
|
|
|
type BaseChannelWrapper struct {
|
|
|
|
errorChan <-chan error
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *BaseChannelWrapper) Wait() error {
|
|
|
|
|
|
|
|
var err error
|
|
|
|
for e := range w.errorChan {
|
|
|
|
if err != nil {
|
|
|
|
// Combine in case multiple errors
|
|
|
|
err = fmt.Errorf("%v\n%v", err, e)
|
|
|
|
|
|
|
|
} else {
|
|
|
|
err = e
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// ChannelWrapper for pointer Scan* functions to more easily return async error data via Wait()
|
|
|
|
// See NewPointerChannelWrapper for construction / use
|
|
|
|
type PointerChannelWrapper struct {
|
|
|
|
*BaseChannelWrapper
|
|
|
|
Results <-chan *WrappedPointer
|
|
|
|
}
|
|
|
|
|
|
|
|
// Construct a new channel wrapper for WrappedPointer
|
2016-03-31 11:04:59 +00:00
|
|
|
// Caller can use s.Results directly for normal processing then call Wait() to finish & check for errors
|
2016-03-31 11:00:44 +00:00
|
|
|
// Scan function is required to create error channel large enough not to block (usually 1 is ok)
|
|
|
|
func NewPointerChannelWrapper(pointerChan <-chan *WrappedPointer, errorChan <-chan error) *PointerChannelWrapper {
|
|
|
|
return &PointerChannelWrapper{&BaseChannelWrapper{errorChan}, pointerChan}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ChannelWrapper for string channel functions to more easily return async error data via Wait()
|
2016-03-31 11:04:59 +00:00
|
|
|
// Caller can use s.Results directly for normal processing then call Wait() to finish & check for errors
|
2016-03-31 11:00:44 +00:00
|
|
|
// See NewStringChannelWrapper for construction / use
|
|
|
|
type StringChannelWrapper struct {
|
|
|
|
*BaseChannelWrapper
|
|
|
|
Results <-chan string
|
|
|
|
}
|
|
|
|
|
|
|
|
// Construct a new channel wrapper for string
|
2016-03-31 11:04:59 +00:00
|
|
|
// Caller can use s.Results directly for normal processing then call Wait() to finish & check for errors
|
2016-03-31 11:00:44 +00:00
|
|
|
func NewStringChannelWrapper(stringChan <-chan string, errorChan <-chan error) *StringChannelWrapper {
|
|
|
|
return &StringChannelWrapper{&BaseChannelWrapper{errorChan}, stringChan}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ChannelWrapper for TreeBlob channel functions to more easily return async error data via Wait()
|
|
|
|
// See NewTreeBlobChannelWrapper for construction / use
|
|
|
|
type TreeBlobChannelWrapper struct {
|
|
|
|
*BaseChannelWrapper
|
|
|
|
Results <-chan TreeBlob
|
|
|
|
}
|
2015-08-03 15:58:34 +00:00
|
|
|
|
2016-03-31 11:00:44 +00:00
|
|
|
// Construct a new channel wrapper for TreeBlob
|
2016-03-31 11:04:59 +00:00
|
|
|
// Caller can use s.Results directly for normal processing then call Wait() to finish & check for errors
|
2016-03-31 11:00:44 +00:00
|
|
|
func NewTreeBlobChannelWrapper(treeBlobChan <-chan TreeBlob, errorChan <-chan error) *TreeBlobChannelWrapper {
|
|
|
|
return &TreeBlobChannelWrapper{&BaseChannelWrapper{errorChan}, treeBlobChan}
|
2015-08-03 15:58:34 +00:00
|
|
|
}
|