git-lfs/scanner/scanner.go

168 lines
3.1 KiB
Go
Raw Normal View History

2014-10-03 16:08:00 +00:00
package scanner
import (
"bufio"
2014-10-03 16:08:00 +00:00
"bytes"
"github.com/github/git-media/pointer"
2014-10-07 15:59:59 +00:00
// "github.com/rubyist/tracerx"
"io"
"os/exec"
2014-10-03 16:08:00 +00:00
"strconv"
)
2014-10-07 16:33:00 +00:00
var (
blobSizeCutoff = 125
stdoutBufSize = 16384
)
2014-10-07 15:59:59 +00:00
func Scan(ref string) ([]*pointer.Pointer, error) {
revs, _ := revListStream(ref, ref == "")
smallShas, _ := catFileBatchCheck(revs)
pointerc, _ := catFileBatch(smallShas)
pointers := make([]*pointer.Pointer, 0)
for p := range pointerc {
pointers = append(pointers, p)
}
return pointers, nil
}
func revListStream(ref string, all bool) (chan string, error) {
refArgs := []string{"rev-list", "--objects"}
if all {
refArgs = append(refArgs, "--all")
} else {
refArgs = append(refArgs, ref)
}
2014-10-07 15:59:59 +00:00
cmd, err := startCommand("git", refArgs...)
if err != nil {
return nil, err
}
2014-10-07 15:59:59 +00:00
cmd.Stdin.Close()
2014-10-03 16:08:00 +00:00
revs := make(chan string)
go func() {
2014-10-07 15:59:59 +00:00
scanner := bufio.NewScanner(cmd.Stdout)
for scanner.Scan() {
revs <- scanner.Text()[0:40]
2014-10-03 16:08:00 +00:00
}
close(revs)
}()
return revs, nil
}
func catFileBatchCheck(revs chan string) (chan string, error) {
2014-10-07 15:59:59 +00:00
cmd, err := startCommand("git", "cat-file", "--batch-check")
if err != nil {
return nil, err
2014-10-03 16:08:00 +00:00
}
smallRevs := make(chan string)
2014-10-03 16:08:00 +00:00
go func() {
2014-10-07 15:59:59 +00:00
scanner := bufio.NewScanner(cmd.Stdout)
for scanner.Scan() {
line := scanner.Text()
if line[41:45] == "blob" {
size, err := strconv.Atoi(line[46:len(line)])
if err != nil {
continue
}
2014-10-07 15:59:59 +00:00
if size < blobSizeCutoff {
smallRevs <- line[0:40]
}
}
2014-10-03 16:08:00 +00:00
}
close(smallRevs)
}()
2014-10-03 16:08:00 +00:00
go func() {
for r := range revs {
2014-10-07 15:59:59 +00:00
cmd.Stdin.Write([]byte(r + "\n"))
}
2014-10-07 15:59:59 +00:00
cmd.Stdin.Close()
}()
return smallRevs, nil
}
func catFileBatch(revs chan string) (chan *pointer.Pointer, error) {
2014-10-07 15:59:59 +00:00
cmd, err := startCommand("git", "cat-file", "--batch")
if err != nil {
return nil, err
}
pointers := make(chan *pointer.Pointer)
go func() {
for {
2014-10-07 16:33:00 +00:00
l, err := cmd.Stdout.ReadBytes('\n')
if err != nil { // Probably check for EOF
break
}
2014-10-07 15:59:59 +00:00
// Line is formatted:
// <sha1> <type> <size>
fields := bytes.Fields(l)
s, _ := strconv.Atoi(string(fields[2]))
nbuf := make([]byte, s)
2014-10-07 16:33:00 +00:00
_, err = io.ReadFull(cmd.Stdout, nbuf)
if err != nil {
break // Legit errors
}
p, err := pointer.Decode(bytes.NewBuffer(nbuf))
if err == nil {
pointers <- p
}
2014-10-07 16:33:00 +00:00
_, err = cmd.Stdout.ReadBytes('\n') // Extra \n inserted by cat-file
if err != nil { // Probably check for EOF
break
}
2014-10-03 16:08:00 +00:00
}
close(pointers)
}()
2014-10-03 16:08:00 +00:00
// writes shas to cat-file stdin
go func() {
for r := range revs {
2014-10-07 15:59:59 +00:00
cmd.Stdin.Write([]byte(r + "\n"))
2014-10-03 16:08:00 +00:00
}
2014-10-07 15:59:59 +00:00
cmd.Stdin.Close()
}()
2014-10-03 16:08:00 +00:00
return pointers, nil
}
2014-10-07 15:59:59 +00:00
type wrappedCmd struct {
Stdin io.WriteCloser
2014-10-07 16:33:00 +00:00
Stdout *bufio.Reader
2014-10-07 15:59:59 +00:00
*exec.Cmd
}
func startCommand(command string, args ...string) (*wrappedCmd, error) {
cmd := exec.Command(command, args...)
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, err
}
stdin, err := cmd.StdinPipe()
if err != nil {
return nil, err
}
if err := cmd.Start(); err != nil {
return nil, err
}
2014-10-07 16:33:00 +00:00
return &wrappedCmd{stdin, bufio.NewReaderSize(stdout, stdoutBufSize), cmd}, nil
2014-10-07 15:59:59 +00:00
}