2015-04-23 16:20:36 +00:00
|
|
|
package lfs
|
2014-10-03 16:08:00 +00:00
|
|
|
|
|
|
|
import (
|
2014-10-07 15:16:54 +00:00
|
|
|
"bufio"
|
2014-10-03 16:08:00 +00:00
|
|
|
"bytes"
|
2015-08-11 14:54:22 +00:00
|
|
|
"errors"
|
2014-10-07 15:16:54 +00:00
|
|
|
"io"
|
|
|
|
"os/exec"
|
2014-10-20 18:49:15 +00:00
|
|
|
"regexp"
|
2014-10-03 16:08:00 +00:00
|
|
|
"strconv"
|
2014-10-07 17:22:16 +00:00
|
|
|
"strings"
|
|
|
|
"time"
|
2015-05-13 19:43:41 +00:00
|
|
|
|
2015-05-25 18:20:50 +00:00
|
|
|
"github.com/github/git-lfs/vendor/_nuts/github.com/rubyist/tracerx"
|
2014-10-03 16:08:00 +00:00
|
|
|
)
|
|
|
|
|
2014-10-11 14:28:46 +00:00
|
|
|
const (
|
2015-03-19 19:30:55 +00:00
|
|
|
// blobSizeCutoff is used to determine which files to scan for Git LFS
|
|
|
|
// pointers. Any file with a size below this cutoff will be scanned.
|
2015-07-24 04:53:36 +00:00
|
|
|
blobSizeCutoff = 1024
|
2014-10-13 15:07:46 +00:00
|
|
|
|
|
|
|
// stdoutBufSize is the size of the buffers given to a sub-process stdout
|
|
|
|
stdoutBufSize = 16384
|
|
|
|
|
2015-03-19 19:30:55 +00:00
|
|
|
// chanBufSize is the size of the channels used to pass data from one
|
|
|
|
// sub-process to another.
|
2014-10-13 15:07:46 +00:00
|
|
|
chanBufSize = 100
|
2014-10-07 16:33:00 +00:00
|
|
|
)
|
2014-10-07 15:59:59 +00:00
|
|
|
|
2015-08-03 15:58:34 +00:00
|
|
|
var (
|
|
|
|
// Arguments to append to a git log call which will limit the output to
|
|
|
|
// lfs changes and format the output suitable for parseLogOutput.. method(s)
|
|
|
|
logLfsSearchArgs = []string{
|
|
|
|
"-G", "oid sha256:", // only diffs which include an lfs file SHA change
|
2015-08-05 09:46:14 +00:00
|
|
|
"-p", // include diff so we can read the SHA
|
2015-08-05 16:24:36 +00:00
|
|
|
"-U12", // Make sure diff context is always big enough to support 10 extension lines to get whole pointer
|
2015-08-03 15:58:34 +00:00
|
|
|
`--format=lfs-commit-sha: %H %P`, // just a predictable commit header we can detect
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2015-05-27 19:45:18 +00:00
|
|
|
// WrappedPointer wraps a pointer.Pointer and provides the git sha1
|
2014-10-11 14:28:46 +00:00
|
|
|
// and the file name associated with the object, taken from the
|
|
|
|
// rev-list output.
|
2015-05-27 19:45:18 +00:00
|
|
|
type WrappedPointer struct {
|
2014-10-27 20:40:21 +00:00
|
|
|
Sha1 string
|
|
|
|
Name string
|
|
|
|
SrcName string
|
|
|
|
Size int64
|
|
|
|
Status string
|
2015-04-23 16:20:36 +00:00
|
|
|
*Pointer
|
2014-10-07 17:05:09 +00:00
|
|
|
}
|
|
|
|
|
2014-10-28 16:11:20 +00:00
|
|
|
// indexFile is used when scanning the index. It stores the name of
|
|
|
|
// the file, the status of the file in the index, and, in the case of
|
|
|
|
// a moved or copied file, the original name of the file.
|
|
|
|
type indexFile struct {
|
|
|
|
Name string
|
|
|
|
SrcName string
|
|
|
|
Status string
|
|
|
|
}
|
|
|
|
|
2014-10-20 18:49:15 +00:00
|
|
|
var z40 = regexp.MustCompile(`\^?0{40}`)
|
|
|
|
|
2015-08-11 14:54:22 +00:00
|
|
|
type ScanningMode int
|
|
|
|
|
|
|
|
const (
|
|
|
|
ScanRefsMode = ScanningMode(iota) // 0 - or default scan mode
|
|
|
|
ScanAllMode = ScanningMode(iota)
|
|
|
|
ScanLeftToRemoteMode = ScanningMode(iota)
|
|
|
|
)
|
|
|
|
|
2015-07-05 18:18:03 +00:00
|
|
|
type ScanRefsOptions struct {
|
2015-08-11 14:54:22 +00:00
|
|
|
ScanMode ScanningMode
|
|
|
|
RemoteName string
|
2015-07-05 18:18:03 +00:00
|
|
|
SkipDeletedBlobs bool
|
|
|
|
nameMap map[string]string
|
|
|
|
}
|
|
|
|
|
2015-05-27 19:45:18 +00:00
|
|
|
// ScanRefs takes a ref and returns a slice of WrappedPointer objects
|
2015-03-19 19:30:55 +00:00
|
|
|
// for all Git LFS pointers it finds for that ref.
|
2015-07-27 16:26:45 +00:00
|
|
|
// Reports unique oids once only, not multiple times if >1 file uses the same content
|
2015-07-05 18:18:03 +00:00
|
|
|
func ScanRefs(refLeft, refRight string, opt *ScanRefsOptions) ([]*WrappedPointer, error) {
|
|
|
|
if opt == nil {
|
|
|
|
opt = &ScanRefsOptions{}
|
|
|
|
}
|
2015-08-11 14:54:22 +00:00
|
|
|
if refLeft == "" {
|
|
|
|
opt.ScanMode = ScanAllMode
|
|
|
|
}
|
2015-07-05 18:18:03 +00:00
|
|
|
opt.nameMap = make(map[string]string, 0)
|
2015-05-14 10:44:43 +00:00
|
|
|
|
2014-10-07 17:22:16 +00:00
|
|
|
start := time.Now()
|
2015-05-14 10:44:43 +00:00
|
|
|
defer func() {
|
|
|
|
tracerx.PerformanceSince("scan", start)
|
|
|
|
}()
|
2014-10-07 17:22:16 +00:00
|
|
|
|
2015-07-05 18:18:03 +00:00
|
|
|
revs, err := revListShas(refLeft, refRight, *opt)
|
2014-10-07 20:57:18 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
smallShas, err := catFileBatchCheck(revs)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
pointerc, err := catFileBatch(smallShas)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
|
2015-05-27 19:45:18 +00:00
|
|
|
pointers := make([]*WrappedPointer, 0)
|
2014-10-07 15:16:54 +00:00
|
|
|
for p := range pointerc {
|
2015-07-05 18:18:03 +00:00
|
|
|
if name, ok := opt.nameMap[p.Sha1]; ok {
|
2014-10-08 13:04:07 +00:00
|
|
|
p.Name = name
|
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
pointers = append(pointers, p)
|
|
|
|
}
|
|
|
|
|
|
|
|
return pointers, nil
|
|
|
|
}
|
|
|
|
|
2015-05-27 19:45:18 +00:00
|
|
|
// ScanIndex returns a slice of WrappedPointer objects for all
|
2015-03-19 19:30:55 +00:00
|
|
|
// Git LFS pointers it finds in the index.
|
2015-07-27 16:26:45 +00:00
|
|
|
// Reports unique oids once only, not multiple times if >1 file uses the same content
|
2015-05-27 19:45:18 +00:00
|
|
|
func ScanIndex() ([]*WrappedPointer, error) {
|
2014-10-27 19:47:07 +00:00
|
|
|
nameMap := make(map[string]*indexFile, 0)
|
2015-05-14 10:44:43 +00:00
|
|
|
|
2014-10-27 16:42:38 +00:00
|
|
|
start := time.Now()
|
2015-05-14 10:44:43 +00:00
|
|
|
defer func() {
|
|
|
|
tracerx.PerformanceSince("scan-staging", start)
|
|
|
|
}()
|
2014-10-27 16:42:38 +00:00
|
|
|
|
2014-10-28 21:35:13 +00:00
|
|
|
revs, err := revListIndex(false, nameMap)
|
2014-10-27 16:42:38 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2014-10-28 21:35:13 +00:00
|
|
|
cachedRevs, err := revListIndex(true, nameMap)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
allRevs := make(chan string)
|
|
|
|
go func() {
|
|
|
|
seenRevs := make(map[string]bool, 0)
|
|
|
|
|
|
|
|
for rev := range revs {
|
|
|
|
seenRevs[rev] = true
|
|
|
|
allRevs <- rev
|
|
|
|
}
|
|
|
|
|
|
|
|
for rev := range cachedRevs {
|
|
|
|
if _, ok := seenRevs[rev]; !ok {
|
|
|
|
allRevs <- rev
|
|
|
|
}
|
|
|
|
}
|
|
|
|
close(allRevs)
|
|
|
|
}()
|
|
|
|
|
|
|
|
smallShas, err := catFileBatchCheck(allRevs)
|
2014-10-27 16:42:38 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
pointerc, err := catFileBatch(smallShas)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-05-27 19:45:18 +00:00
|
|
|
pointers := make([]*WrappedPointer, 0)
|
2014-10-27 16:42:38 +00:00
|
|
|
for p := range pointerc {
|
2014-10-27 19:47:07 +00:00
|
|
|
if e, ok := nameMap[p.Sha1]; ok {
|
|
|
|
p.Name = e.Name
|
|
|
|
p.Status = e.Status
|
2014-10-27 20:40:21 +00:00
|
|
|
p.SrcName = e.SrcName
|
2014-10-27 16:42:38 +00:00
|
|
|
}
|
|
|
|
pointers = append(pointers, p)
|
|
|
|
}
|
|
|
|
|
|
|
|
return pointers, nil
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2014-10-07 16:44:28 +00:00
|
|
|
// revListShas uses git rev-list to return the list of object sha1s
|
|
|
|
// for the given ref. If all is true, ref is ignored. It returns a
|
|
|
|
// channel from which sha1 strings can be read.
|
2015-07-05 18:18:03 +00:00
|
|
|
func revListShas(refLeft, refRight string, opt ScanRefsOptions) (chan string, error) {
|
2014-10-07 15:16:54 +00:00
|
|
|
refArgs := []string{"rev-list", "--objects"}
|
2015-08-11 14:54:22 +00:00
|
|
|
switch opt.ScanMode {
|
|
|
|
case ScanRefsMode:
|
2015-07-05 18:18:03 +00:00
|
|
|
if opt.SkipDeletedBlobs {
|
|
|
|
refArgs = append(refArgs, "--no-walk")
|
|
|
|
} else {
|
|
|
|
refArgs = append(refArgs, "--do-walk")
|
|
|
|
}
|
|
|
|
|
2014-10-20 18:49:15 +00:00
|
|
|
refArgs = append(refArgs, refLeft)
|
|
|
|
if refRight != "" && !z40.MatchString(refRight) {
|
|
|
|
refArgs = append(refArgs, refRight)
|
|
|
|
}
|
2015-08-11 14:54:22 +00:00
|
|
|
case ScanAllMode:
|
|
|
|
refArgs = append(refArgs, "--all")
|
|
|
|
case ScanLeftToRemoteMode:
|
|
|
|
refArgs = append(refArgs, refLeft, "--not", "--remotes="+opt.RemoteName)
|
|
|
|
default:
|
|
|
|
return nil, errors.New("scanner: unknown scan type: " + strconv.Itoa(int(opt.ScanMode)))
|
2014-10-07 15:16:54 +00:00
|
|
|
}
|
2014-10-03 16:17:26 +00:00
|
|
|
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd, err := startCommand("git", refArgs...)
|
2014-10-07 15:16:54 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2014-10-05 13:35:26 +00:00
|
|
|
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Close()
|
2014-10-03 16:08:00 +00:00
|
|
|
|
2014-10-07 20:57:18 +00:00
|
|
|
revs := make(chan string, chanBufSize)
|
2014-10-07 15:16:54 +00:00
|
|
|
|
|
|
|
go func() {
|
2014-10-07 15:59:59 +00:00
|
|
|
scanner := bufio.NewScanner(cmd.Stdout)
|
2014-10-07 15:16:54 +00:00
|
|
|
for scanner.Scan() {
|
2014-10-08 13:04:07 +00:00
|
|
|
line := strings.TrimSpace(scanner.Text())
|
2014-10-11 14:28:46 +00:00
|
|
|
if len(line) < 40 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2014-10-08 13:04:07 +00:00
|
|
|
sha1 := line[0:40]
|
|
|
|
if len(line) > 40 {
|
2015-07-05 18:18:03 +00:00
|
|
|
opt.nameMap[sha1] = line[41:len(line)]
|
2014-10-08 13:04:07 +00:00
|
|
|
}
|
|
|
|
revs <- sha1
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
close(revs)
|
|
|
|
}()
|
|
|
|
|
|
|
|
return revs, nil
|
|
|
|
}
|
|
|
|
|
2014-10-28 16:11:20 +00:00
|
|
|
// revListIndex uses git diff-index to return the list of object sha1s
|
|
|
|
// for in the indexf. It returns a channel from which sha1 strings can be read.
|
|
|
|
// The namMap will be filled indexFile pointers mapping sha1s to indexFiles.
|
2014-10-28 21:35:13 +00:00
|
|
|
func revListIndex(cache bool, nameMap map[string]*indexFile) (chan string, error) {
|
|
|
|
cmdArgs := []string{"diff-index", "-M"}
|
|
|
|
if cache {
|
|
|
|
cmdArgs = append(cmdArgs, "--cached")
|
|
|
|
}
|
|
|
|
cmdArgs = append(cmdArgs, "HEAD")
|
|
|
|
|
|
|
|
cmd, err := startCommand("git", cmdArgs...)
|
2014-10-27 16:42:38 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
cmd.Stdin.Close()
|
|
|
|
|
|
|
|
revs := make(chan string, chanBufSize)
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
scanner := bufio.NewScanner(cmd.Stdout)
|
|
|
|
for scanner.Scan() {
|
|
|
|
// Format is:
|
|
|
|
// :100644 100644 c5b3d83a7542255ec7856487baa5e83d65b1624c 9e82ac1b514be060945392291b5b3108c22f6fe3 M foo.gif
|
2014-10-27 19:47:07 +00:00
|
|
|
// :<old mode> <new mode> <old sha1> <new sha1> <status>\t<file name>[\t<file name>]
|
2014-10-27 16:42:38 +00:00
|
|
|
line := scanner.Text()
|
|
|
|
parts := strings.Split(line, "\t")
|
|
|
|
if len(parts) < 2 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
description := strings.Split(parts[0], " ")
|
|
|
|
files := parts[1:len(parts)]
|
|
|
|
|
2014-10-27 19:47:07 +00:00
|
|
|
if len(description) >= 5 {
|
2014-10-27 20:40:21 +00:00
|
|
|
status := description[4][0:1]
|
2014-10-27 16:42:38 +00:00
|
|
|
sha1 := description[3]
|
2014-10-27 19:47:07 +00:00
|
|
|
if status == "M" {
|
|
|
|
sha1 = description[2] // This one is modified but not added
|
|
|
|
}
|
2014-10-27 20:40:21 +00:00
|
|
|
nameMap[sha1] = &indexFile{files[len(files)-1], files[0], status}
|
2014-10-27 16:42:38 +00:00
|
|
|
revs <- sha1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
close(revs)
|
|
|
|
}()
|
|
|
|
|
|
|
|
return revs, nil
|
|
|
|
}
|
|
|
|
|
2014-10-07 16:44:28 +00:00
|
|
|
// catFileBatchCheck uses git cat-file --batch-check to get the type
|
|
|
|
// and size of a git object. Any object that isn't of type blob and
|
|
|
|
// under the blobSizeCutoff will be ignored. revs is a channel over
|
|
|
|
// which strings containing git sha1s will be sent. It returns a channel
|
|
|
|
// from which sha1 strings can be read.
|
2014-10-07 15:16:54 +00:00
|
|
|
func catFileBatchCheck(revs chan string) (chan string, error) {
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd, err := startCommand("git", "cat-file", "--batch-check")
|
2014-10-07 15:16:54 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
|
|
|
|
2014-10-07 20:57:18 +00:00
|
|
|
smallRevs := make(chan string, chanBufSize)
|
2014-10-03 16:08:00 +00:00
|
|
|
|
2014-10-07 15:16:54 +00:00
|
|
|
go func() {
|
2014-10-07 15:59:59 +00:00
|
|
|
scanner := bufio.NewScanner(cmd.Stdout)
|
2014-10-07 15:16:54 +00:00
|
|
|
for scanner.Scan() {
|
|
|
|
line := scanner.Text()
|
2014-10-07 20:57:18 +00:00
|
|
|
// Format is:
|
|
|
|
// <sha1> <type> <size>
|
|
|
|
// type is at a fixed spot, if we see that it's "blob", we can avoid
|
|
|
|
// splitting the line just to get the size.
|
2014-10-07 15:16:54 +00:00
|
|
|
if line[41:45] == "blob" {
|
|
|
|
size, err := strconv.Atoi(line[46:len(line)])
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
2014-10-07 15:59:59 +00:00
|
|
|
if size < blobSizeCutoff {
|
2014-10-07 15:16:54 +00:00
|
|
|
smallRevs <- line[0:40]
|
|
|
|
}
|
|
|
|
}
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
close(smallRevs)
|
|
|
|
}()
|
2014-10-03 16:08:00 +00:00
|
|
|
|
2014-10-07 15:16:54 +00:00
|
|
|
go func() {
|
|
|
|
for r := range revs {
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Write([]byte(r + "\n"))
|
2014-10-07 15:16:54 +00:00
|
|
|
}
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Close()
|
2014-10-07 15:16:54 +00:00
|
|
|
}()
|
2014-10-03 16:17:26 +00:00
|
|
|
|
2014-10-07 15:16:54 +00:00
|
|
|
return smallRevs, nil
|
|
|
|
}
|
|
|
|
|
2014-10-07 16:44:28 +00:00
|
|
|
// catFileBatch uses git cat-file --batch to get the object contents
|
|
|
|
// of a git object, given its sha1. The contents will be decoded into
|
2015-03-19 19:30:55 +00:00
|
|
|
// a Git LFS pointer. revs is a channel over which strings containing Git SHA1s
|
|
|
|
// will be sent. It returns a channel from which point.Pointers can be read.
|
2015-05-27 19:45:18 +00:00
|
|
|
func catFileBatch(revs chan string) (chan *WrappedPointer, error) {
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd, err := startCommand("git", "cat-file", "--batch")
|
2014-10-07 15:16:54 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-05-27 19:45:18 +00:00
|
|
|
pointers := make(chan *WrappedPointer, chanBufSize)
|
2014-10-07 15:16:54 +00:00
|
|
|
|
|
|
|
go func() {
|
|
|
|
for {
|
2014-10-07 16:33:00 +00:00
|
|
|
l, err := cmd.Stdout.ReadBytes('\n')
|
2014-10-07 20:57:18 +00:00
|
|
|
if err != nil {
|
2014-10-07 15:16:54 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:59 +00:00
|
|
|
// Line is formatted:
|
|
|
|
// <sha1> <type> <size>
|
2014-10-07 15:16:54 +00:00
|
|
|
fields := bytes.Fields(l)
|
|
|
|
s, _ := strconv.Atoi(string(fields[2]))
|
|
|
|
|
|
|
|
nbuf := make([]byte, s)
|
2014-10-07 16:33:00 +00:00
|
|
|
_, err = io.ReadFull(cmd.Stdout, nbuf)
|
2014-10-07 15:16:54 +00:00
|
|
|
if err != nil {
|
|
|
|
break // Legit errors
|
|
|
|
}
|
|
|
|
|
2015-04-23 16:20:36 +00:00
|
|
|
p, err := DecodePointer(bytes.NewBuffer(nbuf))
|
2014-10-07 15:16:54 +00:00
|
|
|
if err == nil {
|
2015-05-27 19:45:18 +00:00
|
|
|
pointers <- &WrappedPointer{
|
2014-10-27 20:40:21 +00:00
|
|
|
Sha1: string(fields[0]),
|
|
|
|
Size: p.Size,
|
|
|
|
Pointer: p,
|
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
}
|
|
|
|
|
2014-10-07 16:33:00 +00:00
|
|
|
_, err = cmd.Stdout.ReadBytes('\n') // Extra \n inserted by cat-file
|
2014-10-07 20:57:18 +00:00
|
|
|
if err != nil {
|
2014-10-07 15:16:54 +00:00
|
|
|
break
|
|
|
|
}
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
close(pointers)
|
|
|
|
}()
|
2014-10-03 16:08:00 +00:00
|
|
|
|
2014-10-07 15:16:54 +00:00
|
|
|
go func() {
|
|
|
|
for r := range revs {
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Write([]byte(r + "\n"))
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Close()
|
2014-10-07 15:16:54 +00:00
|
|
|
}()
|
|
|
|
|
2014-10-03 16:08:00 +00:00
|
|
|
return pointers, nil
|
|
|
|
}
|
2014-10-07 15:59:59 +00:00
|
|
|
|
|
|
|
type wrappedCmd struct {
|
|
|
|
Stdin io.WriteCloser
|
2014-10-07 16:33:00 +00:00
|
|
|
Stdout *bufio.Reader
|
2014-10-07 15:59:59 +00:00
|
|
|
*exec.Cmd
|
|
|
|
}
|
|
|
|
|
2014-10-07 16:44:28 +00:00
|
|
|
// startCommand starts up a command and creates a stdin pipe and a buffered
|
2015-04-08 23:37:07 +00:00
|
|
|
// stdout pipe, wrapped in a wrappedCmd. The stdout buffer will be of stdoutBufSize
|
2014-10-07 16:44:28 +00:00
|
|
|
// bytes.
|
2014-10-07 15:59:59 +00:00
|
|
|
func startCommand(command string, args ...string) (*wrappedCmd, error) {
|
|
|
|
cmd := exec.Command(command, args...)
|
|
|
|
stdout, err := cmd.StdoutPipe()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
stdin, err := cmd.StdinPipe()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2014-10-07 17:22:16 +00:00
|
|
|
tracerx.Printf("run_command: %s %s", command, strings.Join(args, " "))
|
2014-10-07 15:59:59 +00:00
|
|
|
if err := cmd.Start(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2014-10-07 16:33:00 +00:00
|
|
|
return &wrappedCmd{stdin, bufio.NewReaderSize(stdout, stdoutBufSize), cmd}, nil
|
2014-10-07 15:59:59 +00:00
|
|
|
}
|
2015-07-27 16:26:45 +00:00
|
|
|
|
|
|
|
// An entry from ls-tree or rev-list including a blob sha and tree path
|
|
|
|
type TreeBlob struct {
|
|
|
|
Sha1 string
|
|
|
|
Filename string
|
|
|
|
}
|
|
|
|
|
|
|
|
// ScanTree takes a ref and returns a slice of WrappedPointer objects in the tree at that ref
|
|
|
|
// Differs from ScanRefs in that multiple files in the tree with the same content are all reported
|
|
|
|
func ScanTree(ref string) ([]*WrappedPointer, error) {
|
|
|
|
start := time.Now()
|
|
|
|
defer func() {
|
|
|
|
tracerx.PerformanceSince("scan", start)
|
|
|
|
}()
|
|
|
|
|
|
|
|
// We don't use the nameMap approach here since that's imprecise when >1 file
|
|
|
|
// can be using the same content
|
|
|
|
treeShas, err := lsTreeBlobs(ref)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
pointerc, err := catFileBatchTree(treeShas)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
pointers := make([]*WrappedPointer, 0)
|
|
|
|
for p := range pointerc {
|
|
|
|
pointers = append(pointers, p)
|
|
|
|
}
|
|
|
|
|
|
|
|
return pointers, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// catFileBatchTree uses git cat-file --batch to get the object contents
|
|
|
|
// of a git object, given its sha1. The contents will be decoded into
|
|
|
|
// a Git LFS pointer. treeblobs is a channel over which blob entries
|
|
|
|
// will be sent. It returns a channel from which point.Pointers can be read.
|
|
|
|
func catFileBatchTree(treeblobs chan TreeBlob) (chan *WrappedPointer, error) {
|
|
|
|
cmd, err := startCommand("git", "cat-file", "--batch")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
pointers := make(chan *WrappedPointer, chanBufSize)
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
for t := range treeblobs {
|
|
|
|
cmd.Stdin.Write([]byte(t.Sha1 + "\n"))
|
|
|
|
l, err := cmd.Stdout.ReadBytes('\n')
|
|
|
|
if err != nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
// Line is formatted:
|
|
|
|
// <sha1> <type> <size>
|
|
|
|
fields := bytes.Fields(l)
|
|
|
|
s, _ := strconv.Atoi(string(fields[2]))
|
|
|
|
|
|
|
|
nbuf := make([]byte, s)
|
|
|
|
_, err = io.ReadFull(cmd.Stdout, nbuf)
|
|
|
|
if err != nil {
|
|
|
|
break // Legit errors
|
|
|
|
}
|
|
|
|
|
|
|
|
p, err := DecodePointer(bytes.NewBuffer(nbuf))
|
|
|
|
if err == nil {
|
|
|
|
pointers <- &WrappedPointer{
|
|
|
|
Sha1: string(fields[0]),
|
|
|
|
Size: p.Size,
|
|
|
|
Pointer: p,
|
|
|
|
Name: t.Filename,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = cmd.Stdout.ReadBytes('\n') // Extra \n inserted by cat-file
|
|
|
|
if err != nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
close(pointers)
|
|
|
|
cmd.Stdin.Close()
|
|
|
|
}()
|
|
|
|
|
|
|
|
return pointers, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use ls-tree at ref to find a list of candidate tree blobs which might be lfs files
|
|
|
|
// The returned channel will be sent these blobs which should be sent to catFileBatchTree
|
|
|
|
// for final check & conversion to Pointer
|
|
|
|
func lsTreeBlobs(ref string) (chan TreeBlob, error) {
|
|
|
|
// Snapshot using ls-tree
|
|
|
|
lsArgs := []string{"ls-tree",
|
|
|
|
"-r", // recurse
|
|
|
|
"-l", // report object size (we'll need this)
|
|
|
|
"--full-tree", // start at the root regardless of where we are in it
|
|
|
|
ref}
|
|
|
|
|
|
|
|
cmd, err := startCommand("git", lsArgs...)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
cmd.Stdin.Close()
|
|
|
|
|
|
|
|
blobs := make(chan TreeBlob, chanBufSize)
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
scanner := bufio.NewScanner(cmd.Stdout)
|
|
|
|
regex := regexp.MustCompile(`^\d+\s+blob\s+([0-9a-zA-Z]{40})\s+(\d+)\s+(.*)$`)
|
|
|
|
for scanner.Scan() {
|
|
|
|
line := strings.TrimSpace(scanner.Text())
|
|
|
|
if match := regex.FindStringSubmatch(line); match != nil {
|
|
|
|
sz, err := strconv.ParseInt(match[2], 10, 64)
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
sha1 := match[1]
|
|
|
|
filename := match[3]
|
|
|
|
if sz < blobSizeCutoff {
|
|
|
|
blobs <- TreeBlob{sha1, filename}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
close(blobs)
|
|
|
|
}()
|
|
|
|
|
|
|
|
return blobs, nil
|
|
|
|
}
|
2015-08-03 15:58:34 +00:00
|
|
|
|
|
|
|
// ScanUnpushed scans history for all LFS pointers which have been added but not pushed to any remote
|
|
|
|
func ScanUnpushed() ([]*WrappedPointer, error) {
|
|
|
|
|
|
|
|
start := time.Now()
|
|
|
|
defer func() {
|
|
|
|
tracerx.PerformanceSince("scan", start)
|
|
|
|
}()
|
|
|
|
|
|
|
|
pointerchan, err := logUnpushedSHAs()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
pointers := make([]*WrappedPointer, 0, 10)
|
|
|
|
for p := range pointerchan {
|
|
|
|
pointers = append(pointers, p)
|
|
|
|
}
|
|
|
|
return pointers, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// logUnpushedSHAs scans history for all LFS pointers which have been added but not pushed to any remote,
|
|
|
|
// return progressively in a channel
|
|
|
|
func logUnpushedSHAs() (chan *WrappedPointer, error) {
|
|
|
|
logArgs := []string{"log",
|
|
|
|
"--branches", "--tags", // include all locally referenced commits
|
|
|
|
"--not", "--remotes", // but exclude everything reachable from any remote
|
|
|
|
}
|
|
|
|
// Add standard search args to find lfs references
|
|
|
|
logArgs = append(logArgs, logLfsSearchArgs...)
|
|
|
|
|
|
|
|
cmd, err := startCommand("git", logArgs...)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
cmd.Stdin.Close()
|
|
|
|
|
|
|
|
pchan := make(chan *WrappedPointer, chanBufSize)
|
|
|
|
|
|
|
|
go parseLogOutputToPointers(cmd.Stdout, LogDiffAdditions, nil, nil, pchan)
|
|
|
|
|
|
|
|
return pchan, nil
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// When scanning diffs e.g. parseLogOutputToPointers, which direction of diff to include
|
|
|
|
// data from, i.e. '+' or '-'. Depending on what you're scanning for either might be useful
|
|
|
|
type LogDiffDirection byte
|
|
|
|
|
|
|
|
const (
|
|
|
|
LogDiffAdditions = LogDiffDirection('+') // include '+' diffs
|
|
|
|
LogDiffDeletions = LogDiffDirection('-') // include '-' diffs
|
|
|
|
)
|
|
|
|
|
|
|
|
// parseLogOutputToPointers parses log output formatted as per logLfsSearchArgs & return pointers
|
|
|
|
// log: a stream of output from git log with at least logLfsSearchArgs specified
|
|
|
|
// dir: whether to include results from + or - diffs
|
|
|
|
// includePaths, excludePaths: filter the results by filename
|
|
|
|
// results: a channel which will receive the pointers
|
|
|
|
func parseLogOutputToPointers(log io.Reader, dir LogDiffDirection,
|
|
|
|
includePaths, excludePaths []string, results chan *WrappedPointer) {
|
|
|
|
|
|
|
|
// For each commit we'll get something like this:
|
|
|
|
/*
|
|
|
|
lfs-commit-sha: 60fde3d23553e10a55e2a32ed18c20f65edd91e7 e2eaf1c10b57da7b98eb5d722ec5912ddeb53ea1
|
|
|
|
|
|
|
|
diff --git a/1D_Noise.png b/1D_Noise.png
|
|
|
|
new file mode 100644
|
|
|
|
index 0000000..2622b4a
|
|
|
|
--- /dev/null
|
|
|
|
+++ b/1D_Noise.png
|
|
|
|
@@ -0,0 +1,3 @@
|
|
|
|
+version https://git-lfs.github.com/spec/v1
|
|
|
|
+oid sha256:f5d84da40ab1f6aa28df2b2bf1ade2cdcd4397133f903c12b4106641b10e1ed6
|
|
|
|
+size 1289
|
|
|
|
*/
|
|
|
|
// There can be multiple diffs per commit (multiple binaries)
|
|
|
|
// Also when a binary is changed the diff will include a '-' line for the old SHA
|
|
|
|
|
|
|
|
// Define regexes to capture commit & diff headers
|
|
|
|
commitHeaderRegex := regexp.MustCompile(`^lfs-commit-sha: ([A-Fa-f0-9]{40})(?: ([A-Fa-f0-9]{40}))*`)
|
|
|
|
fileHeaderRegex := regexp.MustCompile(`diff --git a\/(.+?)\s+b\/(.+)`)
|
|
|
|
fileMergeHeaderRegex := regexp.MustCompile(`diff --cc (.+)`)
|
2015-08-05 13:48:12 +00:00
|
|
|
pointerDataRegex := regexp.MustCompile(`^([\+\- ])(version https://git-lfs|oid sha256|size|ext-).*$`)
|
2015-08-03 15:58:34 +00:00
|
|
|
var pointerData bytes.Buffer
|
|
|
|
var currentFilename string
|
|
|
|
currentFileIncluded := true
|
|
|
|
|
|
|
|
// Utility func used at several points below (keep in narrow scope)
|
|
|
|
finishLastPointer := func() {
|
|
|
|
if pointerData.Len() > 0 {
|
|
|
|
if currentFileIncluded {
|
|
|
|
p, err := DecodePointer(&pointerData)
|
|
|
|
if err == nil {
|
|
|
|
results <- &WrappedPointer{Name: currentFilename, Size: p.Size, Pointer: p}
|
2015-08-07 14:36:46 +00:00
|
|
|
} else {
|
|
|
|
tracerx.Printf("Unable to parse pointer from log: %v", err)
|
2015-08-03 15:58:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
pointerData.Reset()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
scanner := bufio.NewScanner(log)
|
|
|
|
for scanner.Scan() {
|
|
|
|
line := scanner.Text()
|
|
|
|
if match := commitHeaderRegex.FindStringSubmatch(line); match != nil {
|
|
|
|
// Currently we're not pulling out commit groupings, but could if we wanted
|
|
|
|
// This just acts as a delimiter for finishing a multiline pointer
|
|
|
|
finishLastPointer()
|
|
|
|
|
|
|
|
} else if match := fileHeaderRegex.FindStringSubmatch(line); match != nil {
|
|
|
|
// Finding a regular file header
|
|
|
|
finishLastPointer()
|
|
|
|
// Pertinent file name depends on whether we're listening to additions or removals
|
|
|
|
if dir == LogDiffAdditions {
|
|
|
|
currentFilename = match[2]
|
|
|
|
} else {
|
|
|
|
currentFilename = match[1]
|
|
|
|
}
|
|
|
|
currentFileIncluded = FilenamePassesIncludeExcludeFilter(currentFilename, includePaths, excludePaths)
|
|
|
|
} else if match := fileMergeHeaderRegex.FindStringSubmatch(line); match != nil {
|
|
|
|
// Git merge file header is a little different, only one file
|
|
|
|
finishLastPointer()
|
|
|
|
currentFilename = match[1]
|
|
|
|
currentFileIncluded = FilenamePassesIncludeExcludeFilter(currentFilename, includePaths, excludePaths)
|
|
|
|
} else if currentFileIncluded {
|
|
|
|
if match := pointerDataRegex.FindStringSubmatch(line); match != nil {
|
|
|
|
// An LFS pointer data line
|
|
|
|
// Include only the entirety of one side of the diff
|
|
|
|
// -U3 will ensure we always get all of it, even if only
|
|
|
|
// the SHA changed (version & size the same)
|
|
|
|
changeType := match[1][0]
|
|
|
|
// Always include unchanged context lines (normally just the version line)
|
|
|
|
if LogDiffDirection(changeType) == dir || changeType == ' ' {
|
|
|
|
// Must skip diff +/- marker
|
|
|
|
pointerData.WriteString(line[1:])
|
|
|
|
pointerData.WriteString("\n") // newline was stripped off by scanner
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Final pointer if in progress
|
|
|
|
finishLastPointer()
|
|
|
|
|
|
|
|
close(results)
|
|
|
|
|
|
|
|
}
|