git: move LsTreeScanner to the git package
We're going to need to scan trees with ls-tree in the git package in the future, and we can't call into the lfs package because of import loops, so let's move the scanner to the git package. While we're at it, let's make two important changes. First, let's remove the blob size check, since we're going to want this functionality in order to read all blobs, not just small ones. As part of that, move that check into the place where we use the output of the scanner so we don't lose this check. The other check is to change the name Sha1 to Oid, since we now support SHA-256 repos as well as SHA-1 repos. Move the tests and some of the helper functions to the new package as well.
This commit is contained in:
parent
4b28e2e821
commit
1e41bbffbb
87
git/ls_tree_scanner.go
Normal file
87
git/ls_tree_scanner.go
Normal file
@ -0,0 +1,87 @@
|
||||
package git
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// An entry from ls-tree or rev-list including a blob sha and tree path
|
||||
type TreeBlob struct {
|
||||
Oid string
|
||||
Size int64
|
||||
Filename string
|
||||
}
|
||||
|
||||
type LsTreeScanner struct {
|
||||
s *bufio.Scanner
|
||||
tree *TreeBlob
|
||||
}
|
||||
|
||||
func NewLsTreeScanner(r io.Reader) *LsTreeScanner {
|
||||
s := bufio.NewScanner(r)
|
||||
s.Split(scanNullLines)
|
||||
return &LsTreeScanner{s: s}
|
||||
}
|
||||
|
||||
func (s *LsTreeScanner) TreeBlob() *TreeBlob {
|
||||
return s.tree
|
||||
}
|
||||
|
||||
func (s *LsTreeScanner) Err() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *LsTreeScanner) Scan() bool {
|
||||
t, hasNext := s.next()
|
||||
s.tree = t
|
||||
return hasNext
|
||||
}
|
||||
|
||||
func (s *LsTreeScanner) next() (*TreeBlob, bool) {
|
||||
hasNext := s.s.Scan()
|
||||
line := s.s.Text()
|
||||
parts := strings.SplitN(line, "\t", 2)
|
||||
if len(parts) < 2 {
|
||||
return nil, hasNext
|
||||
}
|
||||
|
||||
attrs := strings.SplitN(parts[0], " ", 4)
|
||||
if len(attrs) < 4 {
|
||||
return nil, hasNext
|
||||
}
|
||||
|
||||
if attrs[1] != "blob" {
|
||||
return nil, hasNext
|
||||
}
|
||||
|
||||
sz, err := strconv.ParseInt(strings.TrimSpace(attrs[3]), 10, 64)
|
||||
if err != nil {
|
||||
return nil, hasNext
|
||||
}
|
||||
|
||||
oid := attrs[2]
|
||||
filename := parts[1]
|
||||
return &TreeBlob{Oid: oid, Size: sz, Filename: filename}, hasNext
|
||||
}
|
||||
|
||||
func scanNullLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||
if atEOF && len(data) == 0 {
|
||||
return 0, nil, nil
|
||||
}
|
||||
|
||||
if i := bytes.IndexByte(data, '\000'); i >= 0 {
|
||||
// We have a full null-terminated line.
|
||||
return i + 1, data[0:i], nil
|
||||
}
|
||||
|
||||
// If we're at EOF, we have a final, non-terminated line. Return it.
|
||||
if atEOF {
|
||||
return len(data), data, nil
|
||||
}
|
||||
|
||||
// Request more data.
|
||||
return 0, nil, nil
|
||||
}
|
51
git/scanner_test.go
Normal file
51
git/scanner_test.go
Normal file
@ -0,0 +1,51 @@
|
||||
package git
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
type genericScanner interface {
|
||||
Err() error
|
||||
Scan() bool
|
||||
}
|
||||
|
||||
func assertNextScan(t *testing.T, scanner genericScanner) {
|
||||
assert.True(t, scanner.Scan())
|
||||
assert.Nil(t, scanner.Err())
|
||||
}
|
||||
|
||||
func assertScannerDone(t *testing.T, scanner genericScanner) {
|
||||
assert.False(t, scanner.Scan())
|
||||
assert.Nil(t, scanner.Err())
|
||||
}
|
||||
|
||||
func TestLsTreeParser(t *testing.T) {
|
||||
stdout := "100644 blob d899f6551a51cf19763c5955c7a06a2726f018e9 42 .gitattributes\000100644 blob 4d343e022e11a8618db494dc3c501e80c7e18197 126 PB SCN 16 Odhrán.wav"
|
||||
scanner := NewLsTreeScanner(strings.NewReader(stdout))
|
||||
|
||||
assertNextTreeBlob(t, scanner, "d899f6551a51cf19763c5955c7a06a2726f018e9", ".gitattributes")
|
||||
assertNextTreeBlob(t, scanner, "4d343e022e11a8618db494dc3c501e80c7e18197", "PB SCN 16 Odhrán.wav")
|
||||
assertScannerDone(t, scanner)
|
||||
}
|
||||
|
||||
func assertNextTreeBlob(t *testing.T, scanner *LsTreeScanner, oid, filename string) {
|
||||
assertNextScan(t, scanner)
|
||||
b := scanner.TreeBlob()
|
||||
assert.NotNil(t, b)
|
||||
assert.Equal(t, oid, b.Oid)
|
||||
assert.Equal(t, filename, b.Filename)
|
||||
}
|
||||
|
||||
func BenchmarkLsTreeParser(b *testing.B) {
|
||||
stdout := "100644 blob d899f6551a51cf19763c5955c7a06a2726f018e9 42 .gitattributes\000100644 blob 4d343e022e11a8618db494dc3c501e80c7e18197 126 PB SCN 16 Odhrán.wav"
|
||||
|
||||
// run the Fib function b.N times
|
||||
for n := 0; n < b.N; n++ {
|
||||
scanner := NewLsTreeScanner(strings.NewReader(stdout))
|
||||
for scanner.Scan() {
|
||||
}
|
||||
}
|
||||
}
|
@ -1,25 +1,14 @@
|
||||
package lfs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/git-lfs/git-lfs/config"
|
||||
"github.com/git-lfs/git-lfs/filepathfilter"
|
||||
"github.com/git-lfs/git-lfs/git"
|
||||
)
|
||||
|
||||
// An entry from ls-tree or rev-list including a blob sha and tree path
|
||||
type TreeBlob struct {
|
||||
Sha1 string
|
||||
Filename string
|
||||
}
|
||||
|
||||
func runScanTree(cb GitScannerFoundPointer, ref string, filter *filepathfilter.Filter, gitEnv, osEnv config.Environment) error {
|
||||
// We don't use the nameMap approach here since that's imprecise when >1 file
|
||||
// can be using the same content
|
||||
@ -59,7 +48,7 @@ func catFileBatchTree(treeblobs *TreeBlobChannelWrapper, gitEnv, osEnv config.En
|
||||
go func() {
|
||||
hasNext := true
|
||||
for t := range treeblobs.Results {
|
||||
hasNext = scanner.Scan(t.Sha1)
|
||||
hasNext = scanner.Scan(t.Oid)
|
||||
|
||||
if p := scanner.Pointer(); p != nil {
|
||||
p.Name = t.Filename
|
||||
@ -107,13 +96,13 @@ func lsTreeBlobs(ref string, filter *filepathfilter.Filter) (*TreeBlobChannelWra
|
||||
|
||||
cmd.Stdin.Close()
|
||||
|
||||
blobs := make(chan TreeBlob, chanBufSize)
|
||||
blobs := make(chan git.TreeBlob, chanBufSize)
|
||||
errchan := make(chan error, 1)
|
||||
|
||||
go func() {
|
||||
scanner := newLsTreeScanner(cmd.Stdout)
|
||||
scanner := git.NewLsTreeScanner(cmd.Stdout)
|
||||
for scanner.Scan() {
|
||||
if t := scanner.TreeBlob(); t != nil && filter.Allows(t.Filename) {
|
||||
if t := scanner.TreeBlob(); t != nil && t.Size < blobSizeCutoff && filter.Allows(t.Filename) {
|
||||
blobs <- *t
|
||||
}
|
||||
}
|
||||
@ -129,77 +118,3 @@ func lsTreeBlobs(ref string, filter *filepathfilter.Filter) (*TreeBlobChannelWra
|
||||
|
||||
return NewTreeBlobChannelWrapper(blobs, errchan), nil
|
||||
}
|
||||
|
||||
type lsTreeScanner struct {
|
||||
s *bufio.Scanner
|
||||
tree *TreeBlob
|
||||
}
|
||||
|
||||
func newLsTreeScanner(r io.Reader) *lsTreeScanner {
|
||||
s := bufio.NewScanner(r)
|
||||
s.Split(scanNullLines)
|
||||
return &lsTreeScanner{s: s}
|
||||
}
|
||||
|
||||
func (s *lsTreeScanner) TreeBlob() *TreeBlob {
|
||||
return s.tree
|
||||
}
|
||||
|
||||
func (s *lsTreeScanner) Err() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *lsTreeScanner) Scan() bool {
|
||||
t, hasNext := s.next()
|
||||
s.tree = t
|
||||
return hasNext
|
||||
}
|
||||
|
||||
func (s *lsTreeScanner) next() (*TreeBlob, bool) {
|
||||
hasNext := s.s.Scan()
|
||||
line := s.s.Text()
|
||||
parts := strings.SplitN(line, "\t", 2)
|
||||
if len(parts) < 2 {
|
||||
return nil, hasNext
|
||||
}
|
||||
|
||||
attrs := strings.SplitN(parts[0], " ", 4)
|
||||
if len(attrs) < 4 {
|
||||
return nil, hasNext
|
||||
}
|
||||
|
||||
if attrs[1] != "blob" {
|
||||
return nil, hasNext
|
||||
}
|
||||
|
||||
sz, err := strconv.ParseInt(strings.TrimSpace(attrs[3]), 10, 64)
|
||||
if err != nil {
|
||||
return nil, hasNext
|
||||
}
|
||||
|
||||
if sz < blobSizeCutoff {
|
||||
sha1 := attrs[2]
|
||||
filename := parts[1]
|
||||
return &TreeBlob{Sha1: sha1, Filename: filename}, hasNext
|
||||
}
|
||||
return nil, hasNext
|
||||
}
|
||||
|
||||
func scanNullLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||
if atEOF && len(data) == 0 {
|
||||
return 0, nil, nil
|
||||
}
|
||||
|
||||
if i := bytes.IndexByte(data, '\000'); i >= 0 {
|
||||
// We have a full null-terminated line.
|
||||
return i + 1, data[0:i], nil
|
||||
}
|
||||
|
||||
// If we're at EOF, we have a final, non-terminated line. Return it.
|
||||
if atEOF {
|
||||
return len(data), data, nil
|
||||
}
|
||||
|
||||
// Request more data.
|
||||
return 0, nil, nil
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ package lfs
|
||||
|
||||
import (
|
||||
"github.com/git-lfs/git-lfs/config"
|
||||
"github.com/git-lfs/git-lfs/git"
|
||||
"github.com/git-lfs/git-lfs/tools"
|
||||
)
|
||||
|
||||
@ -90,11 +91,11 @@ func NewStringChannelWrapper(stringChan <-chan string, errorChan <-chan error) *
|
||||
// See NewTreeBlobChannelWrapper for construction / use
|
||||
type TreeBlobChannelWrapper struct {
|
||||
*tools.BaseChannelWrapper
|
||||
Results <-chan TreeBlob
|
||||
Results <-chan git.TreeBlob
|
||||
}
|
||||
|
||||
// Construct a new channel wrapper for TreeBlob
|
||||
// Caller can use s.Results directly for normal processing then call Wait() to finish & check for errors
|
||||
func NewTreeBlobChannelWrapper(treeBlobChan <-chan TreeBlob, errorChan <-chan error) *TreeBlobChannelWrapper {
|
||||
func NewTreeBlobChannelWrapper(treeBlobChan <-chan git.TreeBlob, errorChan <-chan error) *TreeBlobChannelWrapper {
|
||||
return &TreeBlobChannelWrapper{tools.NewBaseChannelWrapper(errorChan), treeBlobChan}
|
||||
}
|
||||
|
@ -305,31 +305,3 @@ func TestLogScannerDeletionsFilterExclude(t *testing.T) {
|
||||
|
||||
assertScannerDone(t, scanner)
|
||||
}
|
||||
|
||||
func TestLsTreeParser(t *testing.T) {
|
||||
stdout := "100644 blob d899f6551a51cf19763c5955c7a06a2726f018e9 42 .gitattributes\000100644 blob 4d343e022e11a8618db494dc3c501e80c7e18197 126 PB SCN 16 Odhrán.wav"
|
||||
scanner := newLsTreeScanner(strings.NewReader(stdout))
|
||||
|
||||
assertNextTreeBlob(t, scanner, "d899f6551a51cf19763c5955c7a06a2726f018e9", ".gitattributes")
|
||||
assertNextTreeBlob(t, scanner, "4d343e022e11a8618db494dc3c501e80c7e18197", "PB SCN 16 Odhrán.wav")
|
||||
assertScannerDone(t, scanner)
|
||||
}
|
||||
|
||||
func assertNextTreeBlob(t *testing.T, scanner *lsTreeScanner, oid, filename string) {
|
||||
assertNextScan(t, scanner)
|
||||
b := scanner.TreeBlob()
|
||||
assert.NotNil(t, b)
|
||||
assert.Equal(t, oid, b.Sha1)
|
||||
assert.Equal(t, filename, b.Filename)
|
||||
}
|
||||
|
||||
func BenchmarkLsTreeParser(b *testing.B) {
|
||||
stdout := "100644 blob d899f6551a51cf19763c5955c7a06a2726f018e9 42 .gitattributes\000100644 blob 4d343e022e11a8618db494dc3c501e80c7e18197 126 PB SCN 16 Odhrán.wav"
|
||||
|
||||
// run the Fib function b.N times
|
||||
for n := 0; n < b.N; n++ {
|
||||
scanner := newLsTreeScanner(strings.NewReader(stdout))
|
||||
for scanner.Scan() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user