2014-10-03 16:08:00 +00:00
|
|
|
package scanner
|
|
|
|
|
|
|
|
import (
|
2014-10-07 15:16:54 +00:00
|
|
|
"bufio"
|
2014-10-03 16:08:00 +00:00
|
|
|
"bytes"
|
|
|
|
"github.com/github/git-media/pointer"
|
2014-10-07 17:22:16 +00:00
|
|
|
"github.com/rubyist/tracerx"
|
2014-10-07 15:16:54 +00:00
|
|
|
"io"
|
|
|
|
"os/exec"
|
2014-10-03 16:08:00 +00:00
|
|
|
"strconv"
|
2014-10-07 17:22:16 +00:00
|
|
|
"strings"
|
|
|
|
"time"
|
2014-10-03 16:08:00 +00:00
|
|
|
)
|
|
|
|
|
2014-10-11 14:28:46 +00:00
|
|
|
const (
|
2014-10-08 14:16:23 +00:00
|
|
|
blobSizeCutoff = 130
|
2014-10-07 16:33:00 +00:00
|
|
|
stdoutBufSize = 16384
|
2014-10-07 20:57:18 +00:00
|
|
|
chanBufSize = 100
|
2014-10-07 16:33:00 +00:00
|
|
|
)
|
2014-10-07 15:59:59 +00:00
|
|
|
|
2014-10-11 14:28:46 +00:00
|
|
|
// wrappedPointer wraps a pointer.Pointer and provides the git sha1
|
|
|
|
// and the file name associated with the object, taken from the
|
|
|
|
// rev-list output.
|
2014-10-07 17:05:09 +00:00
|
|
|
type wrappedPointer struct {
|
|
|
|
Sha1 string
|
2014-10-08 13:04:07 +00:00
|
|
|
Name string
|
2014-10-07 17:05:09 +00:00
|
|
|
*pointer.Pointer
|
|
|
|
}
|
|
|
|
|
2014-10-07 16:44:28 +00:00
|
|
|
// Scan takes a ref and returns a slice of pointer.Pointer objects
|
|
|
|
// for all git media pointers it finds for that ref.
|
2014-10-07 17:05:09 +00:00
|
|
|
func Scan(ref string) ([]*wrappedPointer, error) {
|
2014-10-08 13:04:07 +00:00
|
|
|
nameMap := make(map[string]string, 0)
|
2014-10-07 17:22:16 +00:00
|
|
|
start := time.Now()
|
|
|
|
|
2014-10-08 13:04:07 +00:00
|
|
|
revs, err := revListShas(ref, ref == "", nameMap)
|
2014-10-07 20:57:18 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
smallShas, err := catFileBatchCheck(revs)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
pointerc, err := catFileBatch(smallShas)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
|
2014-10-07 17:05:09 +00:00
|
|
|
pointers := make([]*wrappedPointer, 0)
|
2014-10-07 15:16:54 +00:00
|
|
|
for p := range pointerc {
|
2014-10-08 13:04:07 +00:00
|
|
|
if name, ok := nameMap[p.Sha1]; ok {
|
|
|
|
p.Name = name
|
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
pointers = append(pointers, p)
|
|
|
|
}
|
|
|
|
|
2014-10-07 17:22:16 +00:00
|
|
|
tracerx.PerformanceSince("scan", start)
|
|
|
|
|
2014-10-07 15:16:54 +00:00
|
|
|
return pointers, nil
|
|
|
|
}
|
|
|
|
|
2014-10-07 16:44:28 +00:00
|
|
|
// revListShas uses git rev-list to return the list of object sha1s
|
|
|
|
// for the given ref. If all is true, ref is ignored. It returns a
|
|
|
|
// channel from which sha1 strings can be read.
|
2014-10-08 13:04:07 +00:00
|
|
|
func revListShas(ref string, all bool, nameMap map[string]string) (chan string, error) {
|
2014-10-07 15:16:54 +00:00
|
|
|
refArgs := []string{"rev-list", "--objects"}
|
|
|
|
if all {
|
|
|
|
refArgs = append(refArgs, "--all")
|
|
|
|
} else {
|
2014-10-07 17:11:26 +00:00
|
|
|
refArgs = append(refArgs, "--no-walk")
|
2014-10-07 15:16:54 +00:00
|
|
|
refArgs = append(refArgs, ref)
|
|
|
|
}
|
2014-10-03 16:17:26 +00:00
|
|
|
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd, err := startCommand("git", refArgs...)
|
2014-10-07 15:16:54 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2014-10-05 13:35:26 +00:00
|
|
|
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Close()
|
2014-10-03 16:08:00 +00:00
|
|
|
|
2014-10-07 20:57:18 +00:00
|
|
|
revs := make(chan string, chanBufSize)
|
2014-10-07 15:16:54 +00:00
|
|
|
|
|
|
|
go func() {
|
2014-10-07 15:59:59 +00:00
|
|
|
scanner := bufio.NewScanner(cmd.Stdout)
|
2014-10-07 15:16:54 +00:00
|
|
|
for scanner.Scan() {
|
2014-10-08 13:04:07 +00:00
|
|
|
line := strings.TrimSpace(scanner.Text())
|
2014-10-11 14:28:46 +00:00
|
|
|
if len(line) < 40 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2014-10-08 13:04:07 +00:00
|
|
|
sha1 := line[0:40]
|
|
|
|
if len(line) > 40 {
|
|
|
|
nameMap[sha1] = line[41:len(line)]
|
|
|
|
}
|
|
|
|
revs <- sha1
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
close(revs)
|
|
|
|
}()
|
|
|
|
|
|
|
|
return revs, nil
|
|
|
|
}
|
|
|
|
|
2014-10-07 16:44:28 +00:00
|
|
|
// catFileBatchCheck uses git cat-file --batch-check to get the type
|
|
|
|
// and size of a git object. Any object that isn't of type blob and
|
|
|
|
// under the blobSizeCutoff will be ignored. revs is a channel over
|
|
|
|
// which strings containing git sha1s will be sent. It returns a channel
|
|
|
|
// from which sha1 strings can be read.
|
2014-10-07 15:16:54 +00:00
|
|
|
func catFileBatchCheck(revs chan string) (chan string, error) {
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd, err := startCommand("git", "cat-file", "--batch-check")
|
2014-10-07 15:16:54 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
|
|
|
|
2014-10-07 20:57:18 +00:00
|
|
|
smallRevs := make(chan string, chanBufSize)
|
2014-10-03 16:08:00 +00:00
|
|
|
|
2014-10-07 15:16:54 +00:00
|
|
|
go func() {
|
2014-10-07 15:59:59 +00:00
|
|
|
scanner := bufio.NewScanner(cmd.Stdout)
|
2014-10-07 15:16:54 +00:00
|
|
|
for scanner.Scan() {
|
|
|
|
line := scanner.Text()
|
2014-10-07 20:57:18 +00:00
|
|
|
// Format is:
|
|
|
|
// <sha1> <type> <size>
|
|
|
|
// type is at a fixed spot, if we see that it's "blob", we can avoid
|
|
|
|
// splitting the line just to get the size.
|
2014-10-07 15:16:54 +00:00
|
|
|
if line[41:45] == "blob" {
|
|
|
|
size, err := strconv.Atoi(line[46:len(line)])
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
2014-10-07 15:59:59 +00:00
|
|
|
if size < blobSizeCutoff {
|
2014-10-07 15:16:54 +00:00
|
|
|
smallRevs <- line[0:40]
|
|
|
|
}
|
|
|
|
}
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
close(smallRevs)
|
|
|
|
}()
|
2014-10-03 16:08:00 +00:00
|
|
|
|
2014-10-07 15:16:54 +00:00
|
|
|
go func() {
|
|
|
|
for r := range revs {
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Write([]byte(r + "\n"))
|
2014-10-07 15:16:54 +00:00
|
|
|
}
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Close()
|
2014-10-07 15:16:54 +00:00
|
|
|
}()
|
2014-10-03 16:17:26 +00:00
|
|
|
|
2014-10-07 15:16:54 +00:00
|
|
|
return smallRevs, nil
|
|
|
|
}
|
|
|
|
|
2014-10-07 16:44:28 +00:00
|
|
|
// catFileBatch uses git cat-file --batch to get the object contents
|
|
|
|
// of a git object, given its sha1. The contents will be decoded into
|
|
|
|
// a git media pointer. revs is a channel over which strings containing
|
|
|
|
// git sha1s will be sent. It returns a channel from which point.Pointers
|
|
|
|
// can be read.
|
2014-10-07 17:05:09 +00:00
|
|
|
func catFileBatch(revs chan string) (chan *wrappedPointer, error) {
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd, err := startCommand("git", "cat-file", "--batch")
|
2014-10-07 15:16:54 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2014-10-07 20:57:18 +00:00
|
|
|
pointers := make(chan *wrappedPointer, chanBufSize)
|
2014-10-07 15:16:54 +00:00
|
|
|
|
|
|
|
go func() {
|
|
|
|
for {
|
2014-10-07 16:33:00 +00:00
|
|
|
l, err := cmd.Stdout.ReadBytes('\n')
|
2014-10-07 20:57:18 +00:00
|
|
|
if err != nil {
|
2014-10-07 15:16:54 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:59 +00:00
|
|
|
// Line is formatted:
|
|
|
|
// <sha1> <type> <size>
|
2014-10-07 15:16:54 +00:00
|
|
|
fields := bytes.Fields(l)
|
|
|
|
s, _ := strconv.Atoi(string(fields[2]))
|
|
|
|
|
|
|
|
nbuf := make([]byte, s)
|
2014-10-07 16:33:00 +00:00
|
|
|
_, err = io.ReadFull(cmd.Stdout, nbuf)
|
2014-10-07 15:16:54 +00:00
|
|
|
if err != nil {
|
|
|
|
break // Legit errors
|
|
|
|
}
|
|
|
|
|
|
|
|
p, err := pointer.Decode(bytes.NewBuffer(nbuf))
|
|
|
|
if err == nil {
|
2014-10-08 13:04:07 +00:00
|
|
|
pointers <- &wrappedPointer{string(fields[0]), "", p}
|
2014-10-07 15:16:54 +00:00
|
|
|
}
|
|
|
|
|
2014-10-07 16:33:00 +00:00
|
|
|
_, err = cmd.Stdout.ReadBytes('\n') // Extra \n inserted by cat-file
|
2014-10-07 20:57:18 +00:00
|
|
|
if err != nil {
|
2014-10-07 15:16:54 +00:00
|
|
|
break
|
|
|
|
}
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
2014-10-07 15:16:54 +00:00
|
|
|
close(pointers)
|
|
|
|
}()
|
2014-10-03 16:08:00 +00:00
|
|
|
|
2014-10-07 15:16:54 +00:00
|
|
|
go func() {
|
|
|
|
for r := range revs {
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Write([]byte(r + "\n"))
|
2014-10-03 16:08:00 +00:00
|
|
|
}
|
2014-10-07 15:59:59 +00:00
|
|
|
cmd.Stdin.Close()
|
2014-10-07 15:16:54 +00:00
|
|
|
}()
|
|
|
|
|
2014-10-03 16:08:00 +00:00
|
|
|
return pointers, nil
|
|
|
|
}
|
2014-10-07 15:59:59 +00:00
|
|
|
|
|
|
|
type wrappedCmd struct {
|
|
|
|
Stdin io.WriteCloser
|
2014-10-07 16:33:00 +00:00
|
|
|
Stdout *bufio.Reader
|
2014-10-07 15:59:59 +00:00
|
|
|
*exec.Cmd
|
|
|
|
}
|
|
|
|
|
2014-10-07 16:44:28 +00:00
|
|
|
// startCommand starts up a command and creates a stdin pipe and a buffered
|
|
|
|
// stdout pipe, wrapped in a wrappedCmd. The stdout buffer wille be of stdoutBufSize
|
|
|
|
// bytes.
|
2014-10-07 15:59:59 +00:00
|
|
|
func startCommand(command string, args ...string) (*wrappedCmd, error) {
|
|
|
|
cmd := exec.Command(command, args...)
|
|
|
|
stdout, err := cmd.StdoutPipe()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
stdin, err := cmd.StdinPipe()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2014-10-07 17:22:16 +00:00
|
|
|
tracerx.Printf("run_command: %s %s", command, strings.Join(args, " "))
|
2014-10-07 15:59:59 +00:00
|
|
|
if err := cmd.Start(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2014-10-07 16:33:00 +00:00
|
|
|
return &wrappedCmd{stdin, bufio.NewReaderSize(stdout, stdoutBufSize), cmd}, nil
|
2014-10-07 15:59:59 +00:00
|
|
|
}
|