From 833a30608956e258f159ee512c42e58994b0a0bc Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Mon, 25 Jun 2018 16:21:55 -0700 Subject: [PATCH] commands: use gitscanner for export Use the GitScanner to scan through revisions and download pointers with the transfer queue rather than the rewriter --- commands/command_migrate_export.go | 18 +++++- git/githistory/rewriter.go | 95 ------------------------------ lfs/gitscanner.go | 21 +++++-- lfs/gitscanner_refs.go | 16 +++-- 4 files changed, 44 insertions(+), 106 deletions(-) diff --git a/commands/command_migrate_export.go b/commands/command_migrate_export.go index 8e9d7604..cee37792 100644 --- a/commands/command_migrate_export.go +++ b/commands/command_migrate_export.go @@ -108,9 +108,21 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { // If we have a valid remote, pre-download all objects using the Transfer Queue if remoteURL := getAPIClient().Endpoints.RemoteEndpoint("download", cfg.Remote()).Url; remoteURL != "" { q := newDownloadQueue(getTransferManifestOperationRemote("Download", cfg.Remote()), cfg.Remote()) - if err := rewriter.ScanForPointers(q, opts, gitfilter); err != nil { - ExitWithError(err) - } + gs := lfs.NewGitScanner(func(p *lfs.WrappedPointer, err error) { + if err != nil { + return + } + + downloadPath, err := gitfilter.ObjectPath(p.Oid) + if err != nil { + return + } + + if _, err := os.Stat(downloadPath); os.IsNotExist(err) { + q.Add(p.Name, downloadPath, p.Oid, p.Size) + } + }) + gs.ScanRefs(opts.Include, opts.Exclude, nil) q.Wait() diff --git a/git/githistory/rewriter.go b/git/githistory/rewriter.go index 71c1eb3b..dd545f08 100644 --- a/git/githistory/rewriter.go +++ b/git/githistory/rewriter.go @@ -8,14 +8,11 @@ import ( "strings" "sync" - "github.com/git-lfs/git-lfs/lfs" - "github.com/git-lfs/git-lfs/errors" "github.com/git-lfs/git-lfs/filepathfilter" "github.com/git-lfs/git-lfs/git" "github.com/git-lfs/git-lfs/git/odb" "github.com/git-lfs/git-lfs/tasklog" - "github.com/git-lfs/git-lfs/tq" ) // Rewriter allows rewriting topologically equivalent Git histories @@ -175,33 +172,6 @@ func NewRewriter(db *odb.ObjectDatabase, opts ...rewriterOption) *Rewriter { return rewriter } -// ScanForPointers scans through the range of commits given by -// *RewriteOptions.{Left,Right} and adds any pointers matching the rewrite -// filter to the transfer queue to be downloaded -func (r *Rewriter) ScanForPointers(q *tq.TransferQueue, opt *RewriteOptions, gf *lfs.GitFilter) error { - // Obtain a list of commits to scan - commits, err := r.commitsToMigrate(opt) - if err != nil { - return err - } - - waiter := r.l.Waiter("migrate: Scanning commits") - defer waiter.Complete() - - for _, oid := range commits { - commit, err := r.db.Commit(oid) - if err != nil { - return err - } - - if err := r.scanTree(q, gf, commit.TreeID, ""); err != nil { - return err - } - } - - return nil -} - // Rewrite rewrites the range of commits given by *RewriteOptions.{Left,Right} // using the BlobRewriteFn to rewrite the individual blobs. func (r *Rewriter) Rewrite(opt *RewriteOptions) ([]byte, error) { @@ -340,71 +310,6 @@ func (r *Rewriter) Rewrite(opt *RewriteOptions) ([]byte, error) { return tip, err } -// scanTree recursively scans through a tree and adds any pointers matching the -// rewrite filter to the transfer queue to be downloaded -func (r *Rewriter) scanTree(q *tq.TransferQueue, gf *lfs.GitFilter, treeOID []byte, path string) error { - tree, err := r.db.Tree(treeOID) - if err != nil { - return err - } - - for _, entry := range tree.Entries { - var fullpath string - if len(path) > 0 { - fullpath = strings.Join([]string{path, entry.Name}, "/") - } else { - fullpath = entry.Name - } - - if !r.allows(entry.Type(), fullpath) { - continue - } - - // If this is a symlink, skip it - if entry.Filemode == 0120000 { - continue - } - - switch entry.Type() { - case odb.BlobObjectType: - // Check if the blob is a pointer, and if so, - // add it to the transfer queue - blob, err := r.db.Blob(entry.Oid) - if err != nil { - return err - } - - ptr, err := lfs.DecodePointer(blob.Contents) - if errors.IsNotAPointerError(err) { - continue - } - if err != nil { - return err - } - - downloadPath, err := gf.ObjectPath(ptr.Oid) - if err != nil { - return err - } - - // Only add files to the transfer queue that aren't already cached - if _, err := os.Stat(downloadPath); os.IsNotExist(err) { - q.Add(entry.Name, downloadPath, ptr.Oid, ptr.Size) - } - - case odb.TreeObjectType: - // Scan all subtrees - err = r.scanTree(q, gf, entry.Oid, fullpath) - - } - if err != nil { - return err - } - } - - return nil -} - // rewriteTree is a recursive function which rewrites a tree given by the ID // "sha" and path "path". It uses the given BlobRewriteFn to rewrite all blobs // within the tree, either calling that function or recurring down into subtrees diff --git a/lfs/gitscanner.go b/lfs/gitscanner.go index 15a6d58c..3d0763a8 100644 --- a/lfs/gitscanner.go +++ b/lfs/gitscanner.go @@ -89,7 +89,20 @@ func (s *GitScanner) ScanLeftToRemote(left string, cb GitScannerFoundPointer) er } s.mu.Unlock() - return scanRefsToChan(s, callback, left, "", s.opts(ScanLeftToRemoteMode)) + return scanLeftRightToChan(s, callback, left, "", s.opts(ScanLeftToRemoteMode)) +} + +// ScanRefs through all commits reachable by refs contained in "include" and +// not reachable by any refs included in "excluded" +func (s *GitScanner) ScanRefs(include, exclude []string, cb GitScannerFoundPointer) error { + callback, err := firstGitScannerCallback(cb, s.FoundPointer) + if err != nil { + return err + } + + opts := s.opts(ScanRefsMode) + opts.SkipDeletedBlobs = false + return scanRefsToChan(s, callback, include, exclude, opts) } // ScanRefRange scans through all commits from the given left and right refs, @@ -102,7 +115,7 @@ func (s *GitScanner) ScanRefRange(left, right string, cb GitScannerFoundPointer) opts := s.opts(ScanRefsMode) opts.SkipDeletedBlobs = false - return scanRefsToChan(s, callback, left, right, opts) + return scanLeftRightToChan(s, callback, left, right, opts) } // ScanRefWithDeleted scans through all objects in the given ref, including @@ -121,7 +134,7 @@ func (s *GitScanner) ScanRef(ref string, cb GitScannerFoundPointer) error { opts := s.opts(ScanRefsMode) opts.SkipDeletedBlobs = true - return scanRefsToChan(s, callback, ref, "", opts) + return scanLeftRightToChan(s, callback, ref, "", opts) } // ScanAll scans through all objects in the git repository. @@ -133,7 +146,7 @@ func (s *GitScanner) ScanAll(cb GitScannerFoundPointer) error { opts := s.opts(ScanAllMode) opts.SkipDeletedBlobs = false - return scanRefsToChan(s, callback, "", "", opts) + return scanLeftRightToChan(s, callback, "", "", opts) } // ScanTree takes a ref and returns WrappedPointer objects in the tree at that diff --git a/lfs/gitscanner_refs.go b/lfs/gitscanner_refs.go index e66daee2..f4619e06 100644 --- a/lfs/gitscanner_refs.go +++ b/lfs/gitscanner_refs.go @@ -33,15 +33,16 @@ func (s *lockableNameSet) Check(blobSha string) (string, bool) { func noopFoundLockable(name string) {} -// scanRefsToChan takes a ref and returns a channel of WrappedPointer objects -// for all Git LFS pointers it finds for that ref. +// scanRefsToChan scans through all commits reachable by refs contained in +// "include" and not reachable by any refs included in "excluded" and returns +// a channel of WrappedPointer objects for all Git LFS pointers it finds. // Reports unique oids once only, not multiple times if >1 file uses the same content -func scanRefsToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, refLeft, refRight string, opt *ScanRefsOptions) error { +func scanRefsToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, include, exclude []string, opt *ScanRefsOptions) error { if opt == nil { panic("no scan ref options") } - revs, err := revListShas([]string{refLeft, refRight}, nil, opt) + revs, err := revListShas(include, exclude, opt) if err != nil { return err } @@ -91,6 +92,13 @@ func scanRefsToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, refLe return nil } +// scanLeftRightToChan takes a ref and returns a channel of WrappedPointer objects +// for all Git LFS pointers it finds for that ref. +// Reports unique oids once only, not multiple times if >1 file uses the same content +func scanLeftRightToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, refLeft, refRight string, opt *ScanRefsOptions) error { + return scanRefsToChan(scanner, pointerCb, []string{refLeft, refRight}, nil, opt) +} + // revListShas uses git rev-list to return the list of object sha1s // for the given ref. If all is true, ref is ignored. It returns a // channel from which sha1 strings can be read.