commands: use gitscanner for export

Use the GitScanner to scan through revisions and download pointers
with the transfer queue rather than the rewriter
This commit is contained in:
Preben Ingvaldsen 2018-06-25 16:21:55 -07:00
parent 57923cc693
commit 833a306089
4 changed files with 44 additions and 106 deletions

@ -108,9 +108,21 @@ func migrateExportCommand(cmd *cobra.Command, args []string) {
// If we have a valid remote, pre-download all objects using the Transfer Queue
if remoteURL := getAPIClient().Endpoints.RemoteEndpoint("download", cfg.Remote()).Url; remoteURL != "" {
q := newDownloadQueue(getTransferManifestOperationRemote("Download", cfg.Remote()), cfg.Remote())
if err := rewriter.ScanForPointers(q, opts, gitfilter); err != nil {
ExitWithError(err)
}
gs := lfs.NewGitScanner(func(p *lfs.WrappedPointer, err error) {
if err != nil {
return
}
downloadPath, err := gitfilter.ObjectPath(p.Oid)
if err != nil {
return
}
if _, err := os.Stat(downloadPath); os.IsNotExist(err) {
q.Add(p.Name, downloadPath, p.Oid, p.Size)
}
})
gs.ScanRefs(opts.Include, opts.Exclude, nil)
q.Wait()

@ -8,14 +8,11 @@ import (
"strings"
"sync"
"github.com/git-lfs/git-lfs/lfs"
"github.com/git-lfs/git-lfs/errors"
"github.com/git-lfs/git-lfs/filepathfilter"
"github.com/git-lfs/git-lfs/git"
"github.com/git-lfs/git-lfs/git/odb"
"github.com/git-lfs/git-lfs/tasklog"
"github.com/git-lfs/git-lfs/tq"
)
// Rewriter allows rewriting topologically equivalent Git histories
@ -175,33 +172,6 @@ func NewRewriter(db *odb.ObjectDatabase, opts ...rewriterOption) *Rewriter {
return rewriter
}
// ScanForPointers scans through the range of commits given by
// *RewriteOptions.{Left,Right} and adds any pointers matching the rewrite
// filter to the transfer queue to be downloaded
func (r *Rewriter) ScanForPointers(q *tq.TransferQueue, opt *RewriteOptions, gf *lfs.GitFilter) error {
// Obtain a list of commits to scan
commits, err := r.commitsToMigrate(opt)
if err != nil {
return err
}
waiter := r.l.Waiter("migrate: Scanning commits")
defer waiter.Complete()
for _, oid := range commits {
commit, err := r.db.Commit(oid)
if err != nil {
return err
}
if err := r.scanTree(q, gf, commit.TreeID, ""); err != nil {
return err
}
}
return nil
}
// Rewrite rewrites the range of commits given by *RewriteOptions.{Left,Right}
// using the BlobRewriteFn to rewrite the individual blobs.
func (r *Rewriter) Rewrite(opt *RewriteOptions) ([]byte, error) {
@ -340,71 +310,6 @@ func (r *Rewriter) Rewrite(opt *RewriteOptions) ([]byte, error) {
return tip, err
}
// scanTree recursively scans through a tree and adds any pointers matching the
// rewrite filter to the transfer queue to be downloaded
func (r *Rewriter) scanTree(q *tq.TransferQueue, gf *lfs.GitFilter, treeOID []byte, path string) error {
tree, err := r.db.Tree(treeOID)
if err != nil {
return err
}
for _, entry := range tree.Entries {
var fullpath string
if len(path) > 0 {
fullpath = strings.Join([]string{path, entry.Name}, "/")
} else {
fullpath = entry.Name
}
if !r.allows(entry.Type(), fullpath) {
continue
}
// If this is a symlink, skip it
if entry.Filemode == 0120000 {
continue
}
switch entry.Type() {
case odb.BlobObjectType:
// Check if the blob is a pointer, and if so,
// add it to the transfer queue
blob, err := r.db.Blob(entry.Oid)
if err != nil {
return err
}
ptr, err := lfs.DecodePointer(blob.Contents)
if errors.IsNotAPointerError(err) {
continue
}
if err != nil {
return err
}
downloadPath, err := gf.ObjectPath(ptr.Oid)
if err != nil {
return err
}
// Only add files to the transfer queue that aren't already cached
if _, err := os.Stat(downloadPath); os.IsNotExist(err) {
q.Add(entry.Name, downloadPath, ptr.Oid, ptr.Size)
}
case odb.TreeObjectType:
// Scan all subtrees
err = r.scanTree(q, gf, entry.Oid, fullpath)
}
if err != nil {
return err
}
}
return nil
}
// rewriteTree is a recursive function which rewrites a tree given by the ID
// "sha" and path "path". It uses the given BlobRewriteFn to rewrite all blobs
// within the tree, either calling that function or recurring down into subtrees

@ -89,7 +89,20 @@ func (s *GitScanner) ScanLeftToRemote(left string, cb GitScannerFoundPointer) er
}
s.mu.Unlock()
return scanRefsToChan(s, callback, left, "", s.opts(ScanLeftToRemoteMode))
return scanLeftRightToChan(s, callback, left, "", s.opts(ScanLeftToRemoteMode))
}
// ScanRefs through all commits reachable by refs contained in "include" and
// not reachable by any refs included in "excluded"
func (s *GitScanner) ScanRefs(include, exclude []string, cb GitScannerFoundPointer) error {
callback, err := firstGitScannerCallback(cb, s.FoundPointer)
if err != nil {
return err
}
opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = false
return scanRefsToChan(s, callback, include, exclude, opts)
}
// ScanRefRange scans through all commits from the given left and right refs,
@ -102,7 +115,7 @@ func (s *GitScanner) ScanRefRange(left, right string, cb GitScannerFoundPointer)
opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = false
return scanRefsToChan(s, callback, left, right, opts)
return scanLeftRightToChan(s, callback, left, right, opts)
}
// ScanRefWithDeleted scans through all objects in the given ref, including
@ -121,7 +134,7 @@ func (s *GitScanner) ScanRef(ref string, cb GitScannerFoundPointer) error {
opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = true
return scanRefsToChan(s, callback, ref, "", opts)
return scanLeftRightToChan(s, callback, ref, "", opts)
}
// ScanAll scans through all objects in the git repository.
@ -133,7 +146,7 @@ func (s *GitScanner) ScanAll(cb GitScannerFoundPointer) error {
opts := s.opts(ScanAllMode)
opts.SkipDeletedBlobs = false
return scanRefsToChan(s, callback, "", "", opts)
return scanLeftRightToChan(s, callback, "", "", opts)
}
// ScanTree takes a ref and returns WrappedPointer objects in the tree at that

@ -33,15 +33,16 @@ func (s *lockableNameSet) Check(blobSha string) (string, bool) {
func noopFoundLockable(name string) {}
// scanRefsToChan takes a ref and returns a channel of WrappedPointer objects
// for all Git LFS pointers it finds for that ref.
// scanRefsToChan scans through all commits reachable by refs contained in
// "include" and not reachable by any refs included in "excluded" and returns
// a channel of WrappedPointer objects for all Git LFS pointers it finds.
// Reports unique oids once only, not multiple times if >1 file uses the same content
func scanRefsToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, refLeft, refRight string, opt *ScanRefsOptions) error {
func scanRefsToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, include, exclude []string, opt *ScanRefsOptions) error {
if opt == nil {
panic("no scan ref options")
}
revs, err := revListShas([]string{refLeft, refRight}, nil, opt)
revs, err := revListShas(include, exclude, opt)
if err != nil {
return err
}
@ -91,6 +92,13 @@ func scanRefsToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, refLe
return nil
}
// scanLeftRightToChan takes a ref and returns a channel of WrappedPointer objects
// for all Git LFS pointers it finds for that ref.
// Reports unique oids once only, not multiple times if >1 file uses the same content
func scanLeftRightToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, refLeft, refRight string, opt *ScanRefsOptions) error {
return scanRefsToChan(scanner, pointerCb, []string{refLeft, refRight}, nil, opt)
}
// revListShas uses git rev-list to return the list of object sha1s
// for the given ref. If all is true, ref is ignored. It returns a
// channel from which sha1 strings can be read.