Merge pull request #1688 from git-lfs/catfilebatch-unittests

Catfilebatch unittests
This commit is contained in:
risk danger olson 2016-11-21 08:32:03 -09:00 committed by GitHub
commit cf908f61ae
6 changed files with 372 additions and 215 deletions

@ -23,44 +23,43 @@ func runCatFileBatch(pointerCh chan *WrappedPointer, revs *StringChannelWrapper,
return err
}
go catFileBatchOutput(pointerCh, cmd, errCh)
go catFileBatchInput(cmd, revs, errCh)
go func() {
scanner := &catFileBatchScanner{r: cmd.Stdout}
for r := range revs.Results {
cmd.Stdin.Write([]byte(r + "\n"))
canScan := scanner.Scan()
if p := scanner.Pointer(); p != nil {
pointerCh <- p
}
if err := scanner.Err(); err != nil {
errCh <- err
}
if !canScan {
break
}
}
if err := revs.Wait(); err != nil {
errCh <- err
}
cmd.Stdin.Close()
stderr, _ := ioutil.ReadAll(cmd.Stderr)
err := cmd.Wait()
if err != nil {
errCh <- fmt.Errorf("Error in git cat-file --batch: %v %v", err, string(stderr))
}
close(pointerCh)
close(errCh)
}()
return nil
}
func catFileBatchOutput(pointerCh chan *WrappedPointer, cmd *wrappedCmd, errCh chan error) {
scanner := &catFileBatchScanner{r: cmd.Stdout}
for scanner.Scan() {
pointerCh <- scanner.Pointer()
}
if err := scanner.Err(); err != nil {
errCh <- err
}
stderr, _ := ioutil.ReadAll(cmd.Stderr)
err := cmd.Wait()
if err != nil {
errCh <- fmt.Errorf("Error in git cat-file --batch: %v %v", err, string(stderr))
}
close(pointerCh)
close(errCh)
}
func catFileBatchInput(cmd *wrappedCmd, revs *StringChannelWrapper, errCh chan error) {
for r := range revs.Results {
cmd.Stdin.Write([]byte(r + "\n"))
}
err := revs.Wait()
if err != nil {
// We can share errchan with other goroutine since that won't close it
// until we close the stdin below
errCh <- err
}
cmd.Stdin.Close()
}
type catFileBatchScanner struct {
r *bufio.Reader
pointer *WrappedPointer
@ -77,59 +76,52 @@ func (s *catFileBatchScanner) Err() error {
func (s *catFileBatchScanner) Scan() bool {
s.pointer, s.err = nil, nil
p, err := scanPointer(s.r)
p, err := s.next()
s.pointer = p
if err != nil {
// EOF halts scanning, but isn't a reportable error
if err != io.EOF {
s.err = err
}
return false
}
s.pointer = p
return true
}
func scanPointer(r *bufio.Reader) (*WrappedPointer, error) {
func (s *catFileBatchScanner) next() (*WrappedPointer, error) {
l, err := s.r.ReadBytes('\n')
if err != nil {
return nil, err
}
// Line is formatted:
// <sha1> <type> <size>
fields := bytes.Fields(l)
if len(fields) < 3 {
return nil, errors.Wrap(fmt.Errorf("Invalid: %q", string(l)), "git cat-file --batch")
}
size, _ := strconv.Atoi(string(fields[2]))
buf := make([]byte, size)
read, err := io.ReadFull(s.r, buf)
if err != nil {
return nil, err
}
if size != read {
return nil, fmt.Errorf("expected %d bytes, read %d bytes", size, read)
}
p, err := DecodePointer(bytes.NewBuffer(buf[:read]))
var pointer *WrappedPointer
for pointer == nil {
l, err := r.ReadBytes('\n')
if err != nil {
return nil, err
}
// Line is formatted:
// <sha1> <type> <size>
fields := bytes.Fields(l)
if len(fields) < 3 {
return nil, errors.Wrap(fmt.Errorf("Invalid: %q", string(l)), "git cat-file --batch:")
}
size, _ := strconv.Atoi(string(fields[2]))
buf := make([]byte, size)
read, err := io.ReadFull(r, buf)
if err != nil {
return nil, err
}
if size != read {
return nil, fmt.Errorf("expected %d bytes, read %d bytes", size, read)
}
p, err := DecodePointer(bytes.NewBuffer(buf[0:read]))
if err == nil {
pointer = &WrappedPointer{
Sha1: string(fields[0]),
Pointer: p,
}
}
_, err = r.ReadBytes('\n') // Extra \n inserted by cat-file
if err != nil {
return nil, err
if err == nil {
pointer = &WrappedPointer{
Sha1: string(fields[0]),
Pointer: p,
}
}
return pointer, nil
_, err = s.r.ReadBytes('\n') // Extra \n inserted by cat-file
return pointer, err
}

@ -3,7 +3,6 @@ package lfs
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"strconv"
)
@ -19,47 +18,45 @@ func runCatFileBatchCheck(smallRevCh chan string, revs *StringChannelWrapper, er
return err
}
go catFileBatchCheckOutput(smallRevCh, cmd, errCh)
go catFileBatchCheckInput(cmd, revs, errCh)
go func() {
scanner := &catFileBatchCheckScanner{s: bufio.NewScanner(cmd.Stdout), limit: blobSizeCutoff}
for r := range revs.Results {
cmd.Stdin.Write([]byte(r + "\n"))
hasNext := scanner.Scan()
if b := scanner.BlobOID(); len(b) > 0 {
smallRevCh <- b
}
if err := scanner.Err(); err != nil {
errCh <- err
}
if !hasNext {
break
}
}
if err := revs.Wait(); err != nil {
errCh <- err
}
cmd.Stdin.Close()
stderr, _ := ioutil.ReadAll(cmd.Stderr)
err := cmd.Wait()
if err != nil {
errCh <- fmt.Errorf("Error in git cat-file --batch-check: %v %v", err, string(stderr))
}
close(smallRevCh)
close(errCh)
}()
return nil
}
func catFileBatchCheckOutput(smallRevCh chan string, cmd *wrappedCmd, errCh chan error) {
scanner := &catFileBatchCheckScanner{s: bufio.NewScanner(cmd.Stdout)}
for scanner.Scan() {
smallRevCh <- scanner.BlobOID()
}
if err := scanner.Err(); err != nil {
errCh <- err
}
stderr, _ := ioutil.ReadAll(cmd.Stderr)
err := cmd.Wait()
if err != nil {
errCh <- fmt.Errorf("Error in git cat-file --batch-check: %v %v", err, string(stderr))
}
close(smallRevCh)
close(errCh)
}
func catFileBatchCheckInput(cmd *wrappedCmd, revs *StringChannelWrapper, errCh chan error) {
for r := range revs.Results {
cmd.Stdin.Write([]byte(r + "\n"))
}
err := revs.Wait()
if err != nil {
// We can share errchan with other goroutine since that won't close it
// until we close the stdin below
errCh <- err
}
cmd.Stdin.Close()
}
type catFileBatchCheckScanner struct {
s *bufio.Scanner
limit int
blobOID string
err error
}
func (s *catFileBatchCheckScanner) BlobOID() string {
@ -67,51 +64,41 @@ func (s *catFileBatchCheckScanner) BlobOID() string {
}
func (s *catFileBatchCheckScanner) Err() error {
return s.err
return s.s.Err()
}
func (s *catFileBatchCheckScanner) Scan() bool {
s.blobOID, s.err = "", nil
b, err := scanBlobOID(s.s)
if err != nil {
// EOF halts scanning, but isn't a reportable error
if err != io.EOF {
s.err = err
}
return false
}
s.blobOID = ""
b, hasNext := s.next()
s.blobOID = b
return true
return hasNext
}
func scanBlobOID(s *bufio.Scanner) (string, error) {
objType := "blob"
for s.Scan() {
line := s.Text()
lineLen := len(line)
func (s *catFileBatchCheckScanner) next() (string, bool) {
hasNext := s.s.Scan()
line := s.s.Text()
lineLen := len(line)
// Format is:
// <sha1> <type> <size>
// type is at a fixed spot, if we see that it's "blob", we can avoid
// splitting the line just to get the size.
if lineLen < 46 {
continue
}
if line[41:45] != objType {
continue
}
size, err := strconv.Atoi(line[46:lineLen])
if err != nil {
continue
}
if size < blobSizeCutoff {
return line[0:40], nil
}
// Format is:
// <sha1> <type> <size>
// type is at a fixed spot, if we see that it's "blob", we can avoid
// splitting the line just to get the size.
if lineLen < 46 {
return "", hasNext
}
return "", io.EOF
if line[41:45] != "blob" {
return "", hasNext
}
size, err := strconv.Atoi(line[46:lineLen])
if err != nil {
return "", hasNext
}
if size >= s.limit {
return "", hasNext
}
return line[0:40], hasNext
}

@ -0,0 +1,60 @@
package lfs
import (
"bufio"
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
func TestCatFileBatchCheckScannerWithValidOutput(t *testing.T) {
lines := []string{
"short line",
"0000000000000000000000000000000000000000 BLOB capitalized",
"0000000000000000000000000000000000000001 blob not-a-size",
"0000000000000000000000000000000000000002 blob 123",
"0000000000000000000000000000000000000003 blob 1 0",
"0000000000000000000000000000000000000004 blob 123456789",
}
r := strings.NewReader(strings.Join(lines, "\n"))
s := &catFileBatchCheckScanner{
s: bufio.NewScanner(r),
limit: 1024,
}
assertNextOID(t, s, "")
assertNextOID(t, s, "")
assertNextOID(t, s, "")
assertNextOID(t, s, "0000000000000000000000000000000000000002")
assertNextOID(t, s, "")
assertNextOID(t, s, "")
assertScannerDone(t, s)
assert.Equal(t, "", s.BlobOID())
}
type stringScanner interface {
Next() (string, bool, error)
Err() error
Scan() bool
}
type genericScanner interface {
Err() error
Scan() bool
}
func assertNextScan(t *testing.T, scanner genericScanner) {
assert.True(t, scanner.Scan())
assert.Nil(t, scanner.Err())
}
func assertNextOID(t *testing.T, scanner *catFileBatchCheckScanner, oid string) {
assertNextScan(t, scanner)
assert.Equal(t, oid, scanner.BlobOID())
}
func assertScannerDone(t *testing.T, scanner genericScanner) {
assert.False(t, scanner.Scan())
assert.Nil(t, scanner.Err())
}

@ -0,0 +1,103 @@
package lfs
import (
"bufio"
"bytes"
"fmt"
"io"
"math/rand"
"testing"
"github.com/stretchr/testify/assert"
)
func TestCatFileBatchScannerWithValidOutput(t *testing.T) {
blobs := []*Pointer{
&Pointer{
Version: "https://git-lfs.github.com/spec/v1",
Oid: "e71eefd918ea175b8f362611f981f648dbf9888ff74865077cb4c9077728f350",
Size: 123,
OidType: "sha256",
},
&Pointer{
Version: "https://git-lfs.github.com/spec/v1",
Oid: "0eb69b651be65d5a61d6bebf2c53c811a5bf8031951111000e2077f4d7fe43b1",
Size: 132,
OidType: "sha256",
},
}
reader := fakeReaderWithRandoData(t, blobs)
if reader == nil {
return
}
scanner := &catFileBatchScanner{r: bufio.NewReader(reader)}
for i := 0; i < 5; i++ {
assertNextEmptyPointer(t, scanner)
}
assertNextPointer(t, scanner, "e71eefd918ea175b8f362611f981f648dbf9888ff74865077cb4c9077728f350")
for i := 0; i < 5; i++ {
assertNextEmptyPointer(t, scanner)
}
assertNextPointer(t, scanner, "0eb69b651be65d5a61d6bebf2c53c811a5bf8031951111000e2077f4d7fe43b1")
for i := 0; i < 5; i++ {
assertNextEmptyPointer(t, scanner)
}
assertScannerDone(t, scanner)
assert.Nil(t, scanner.Pointer())
}
func assertNextPointer(t *testing.T, scanner *catFileBatchScanner, oid string) {
assertNextScan(t, scanner)
p := scanner.Pointer()
assert.NotNil(t, p)
assert.Equal(t, oid, p.Oid)
}
func assertNextEmptyPointer(t *testing.T, scanner *catFileBatchScanner) {
assertNextScan(t, scanner)
assert.Nil(t, scanner.Pointer())
}
func fakeReaderWithRandoData(t *testing.T, blobs []*Pointer) io.Reader {
buf := &bytes.Buffer{}
rngbuf := make([]byte, 1000) // just under blob size cutoff
rng := rand.New(rand.NewSource(0))
for i := 0; i < 5; i++ {
n, err := io.ReadFull(rng, rngbuf)
if err != nil {
t.Fatalf("error reading from rng: %+v", err)
}
writeFakeBuffer(t, buf, rngbuf, n)
}
for _, b := range blobs {
ptrtext := b.Encoded()
writeFakeBuffer(t, buf, []byte(ptrtext), len(ptrtext))
for i := 0; i < 5; i++ {
n, err := io.ReadFull(rng, rngbuf)
if err != nil {
t.Fatalf("error reading from rng: %+v", err)
}
writeFakeBuffer(t, buf, rngbuf, n)
}
}
return bytes.NewBuffer(buf.Bytes())
}
func writeFakeBuffer(t *testing.T, buf *bytes.Buffer, by []byte, size int) {
header := fmt.Sprintf("0000000000000000000000000000000000000000 blob %d", size)
t.Log(header)
buf.WriteString(header + "\n")
buf.Write(by)
buf.Write([]byte("\n"))
}

@ -41,38 +41,25 @@ func catFileBatchTree(treeblobs *TreeBlobChannelWrapper) (*PointerChannelWrapper
errchan := make(chan error, 10) // Multiple errors possible
go func() {
scanner := &catFileBatchScanner{r: cmd.Stdout}
for t := range treeblobs.Results {
cmd.Stdin.Write([]byte(t.Sha1 + "\n"))
l, err := cmd.Stdout.ReadBytes('\n')
if err != nil {
break
hasNext := scanner.Scan()
if p := scanner.Pointer(); p != nil {
p.Name = t.Filename
pointers <- p
}
// Line is formatted:
// <sha1> <type> <size>
fields := bytes.Fields(l)
s, _ := strconv.Atoi(string(fields[2]))
nbuf := make([]byte, s)
_, err = io.ReadFull(cmd.Stdout, nbuf)
if err != nil {
break // Legit errors
if err := scanner.Err(); err != nil {
errchan <- err
}
p, err := DecodePointer(bytes.NewBuffer(nbuf))
if err == nil {
pointers <- &WrappedPointer{
Sha1: string(fields[0]),
Pointer: p,
Name: t.Filename,
}
}
_, err = cmd.Stdout.ReadBytes('\n') // Extra \n inserted by cat-file
if err != nil {
if !hasNext {
break
}
}
// Deal with nested error from incoming treeblobs
err := treeblobs.Wait()
if err != nil {
@ -116,7 +103,13 @@ func lsTreeBlobs(ref string) (*TreeBlobChannelWrapper, error) {
errchan := make(chan error, 1)
go func() {
parseLsTree(cmd.Stdout, blobs)
scanner := newLsTreeScanner(cmd.Stdout)
for scanner.Scan() {
if t := scanner.TreeBlob(); t != nil {
blobs <- *t
}
}
stderr, _ := ioutil.ReadAll(cmd.Stderr)
err := cmd.Wait()
if err != nil {
@ -129,36 +122,59 @@ func lsTreeBlobs(ref string) (*TreeBlobChannelWrapper, error) {
return NewTreeBlobChannelWrapper(blobs, errchan), nil
}
func parseLsTree(reader io.Reader, output chan TreeBlob) {
scanner := bufio.NewScanner(reader)
scanner.Split(scanNullLines)
for scanner.Scan() {
line := scanner.Text()
parts := strings.SplitN(line, "\t", 2)
if len(parts) < 2 {
continue
}
type lsTreeScanner struct {
s *bufio.Scanner
tree *TreeBlob
}
attrs := strings.SplitN(parts[0], " ", 4)
if len(attrs) < 4 {
continue
}
func newLsTreeScanner(r io.Reader) *lsTreeScanner {
s := bufio.NewScanner(r)
s.Split(scanNullLines)
return &lsTreeScanner{s: s}
}
if attrs[1] != "blob" {
continue
}
func (s *lsTreeScanner) TreeBlob() *TreeBlob {
return s.tree
}
sz, err := strconv.ParseInt(strings.TrimSpace(attrs[3]), 10, 64)
if err != nil {
continue
}
func (s *lsTreeScanner) Err() error {
return nil
}
if sz < blobSizeCutoff {
sha1 := attrs[2]
filename := parts[1]
output <- TreeBlob{sha1, filename}
}
func (s *lsTreeScanner) Scan() bool {
t, hasNext := s.next()
s.tree = t
return hasNext
}
func (s *lsTreeScanner) next() (*TreeBlob, bool) {
hasNext := s.s.Scan()
line := s.s.Text()
parts := strings.SplitN(line, "\t", 2)
if len(parts) < 2 {
return nil, hasNext
}
attrs := strings.SplitN(parts[0], " ", 4)
if len(attrs) < 4 {
return nil, hasNext
}
if attrs[1] != "blob" {
return nil, hasNext
}
sz, err := strconv.ParseInt(strings.TrimSpace(attrs[3]), 10, 64)
if err != nil {
return nil, hasNext
}
if sz < blobSizeCutoff {
sha1 := attrs[2]
filename := parts[1]
return &TreeBlob{Sha1: sha1, Filename: filename}, hasNext
}
return nil, hasNext
}
func scanNullLines(data []byte, atEOF bool) (advance int, token []byte, err error) {

@ -244,33 +244,32 @@ func TestParseLogOutputToPointersDeletion(t *testing.T) {
assert.Equal(t, "radial_2.png", pointers[2].Name)
assert.Equal(t, "334c8a0a520cf9f58189dba5a9a26c7bff2769b4a3cc199650c00618bde5b9dd", pointers[2].Oid)
assert.Equal(t, int64(16849), pointers[2].Size)
}
func TestLsTreeParser(t *testing.T) {
stdout := "100644 blob d899f6551a51cf19763c5955c7a06a2726f018e9 42 .gitattributes\000100644 blob 4d343e022e11a8618db494dc3c501e80c7e18197 126 PB SCN 16 Odhrán.wav"
scanner := newLsTreeScanner(strings.NewReader(stdout))
blobs := make(chan TreeBlob, 2)
parseLsTree(strings.NewReader(stdout), blobs)
close(blobs)
assertNextTreeBlob(t, scanner, "d899f6551a51cf19763c5955c7a06a2726f018e9", ".gitattributes")
assertNextTreeBlob(t, scanner, "4d343e022e11a8618db494dc3c501e80c7e18197", "PB SCN 16 Odhrán.wav")
assertScannerDone(t, scanner)
}
<-blobs // gitattributes
blob := <-blobs
if blob.Sha1 != "4d343e022e11a8618db494dc3c501e80c7e18197" {
t.Errorf("Bad sha1: %q", blob.Sha1)
}
if blob.Filename != "PB SCN 16 Odhrán.wav" {
t.Errorf("Bad name: %q", blob.Filename)
}
func assertNextTreeBlob(t *testing.T, scanner *lsTreeScanner, oid, filename string) {
assertNextScan(t, scanner)
b := scanner.TreeBlob()
assert.NotNil(t, b)
assert.Equal(t, oid, b.Sha1)
assert.Equal(t, filename, b.Filename)
}
func BenchmarkLsTreeParser(b *testing.B) {
stdout := "100644 blob d899f6551a51cf19763c5955c7a06a2726f018e9 42 .gitattributes\000100644 blob 4d343e022e11a8618db494dc3c501e80c7e18197 126 PB SCN 16 Odhrán.wav"
blobs := make(chan TreeBlob, b.N*2)
// run the Fib function b.N times
for n := 0; n < b.N; n++ {
parseLsTree(strings.NewReader(stdout), blobs)
scanner := newLsTreeScanner(strings.NewReader(stdout))
for scanner.Scan() {
}
}
close(blobs)
}