From c29c68cce0f70fe0913d0a4c6b2d6e63fa7f8210 Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Wed, 13 Jun 2018 14:10:32 -0700 Subject: [PATCH 01/18] commands: implement `migrate export` command Implement a basic version of the `migrate export` subcommand, which simply traverses git history and smudges any LFS pointers matching given patterns. --- commands/command_migrate.go | 4 +- commands/command_migrate_export.go | 127 +++++++++++++++++++++++++++++ test/test-migrate-export.sh | 31 +++++++ 3 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 commands/command_migrate_export.go create mode 100755 test/test-migrate-export.sh diff --git a/commands/command_migrate.go b/commands/command_migrate.go index fe35c2f8..9c5276fc 100644 --- a/commands/command_migrate.go +++ b/commands/command_migrate.go @@ -297,6 +297,8 @@ func init() { importCmd.Flags().BoolVar(&migrateNoRewrite, "no-rewrite", false, "Add new history without rewriting previous") importCmd.Flags().StringVarP(&migrateCommitMessage, "message", "m", "", "With --no-rewrite, an optional commit message") + exportCmd := NewCommand("export", migrateExportCommand) + RegisterCommand("migrate", nil, func(cmd *cobra.Command) { cmd.PersistentFlags().StringVarP(&includeArg, "include", "I", "", "Include a list of paths") cmd.PersistentFlags().StringVarP(&excludeArg, "exclude", "X", "", "Exclude a list of paths") @@ -306,6 +308,6 @@ func init() { cmd.PersistentFlags().BoolVar(&migrateEverything, "everything", false, "Migrate all local references") cmd.PersistentFlags().BoolVar(&migrateSkipFetch, "skip-fetch", false, "Assume up-to-date remote references.") - cmd.AddCommand(importCmd, info) + cmd.AddCommand(importCmd, info, exportCmd) }) } diff --git a/commands/command_migrate_export.go b/commands/command_migrate_export.go new file mode 100644 index 00000000..43ded6fc --- /dev/null +++ b/commands/command_migrate_export.go @@ -0,0 +1,127 @@ +package commands + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + + "github.com/git-lfs/git-lfs/errors" + "github.com/git-lfs/git-lfs/filepathfilter" + "github.com/git-lfs/git-lfs/git" + "github.com/git-lfs/git-lfs/git/githistory" + "github.com/git-lfs/git-lfs/git/odb" + "github.com/git-lfs/git-lfs/lfs" + "github.com/git-lfs/git-lfs/tasklog" + "github.com/git-lfs/git-lfs/tools" + "github.com/spf13/cobra" +) + +func migrateExportCommand(cmd *cobra.Command, args []string) { + l := tasklog.NewLogger(os.Stderr) + defer l.Close() + + db, err := getObjectDatabase() + if err != nil { + ExitWithError(err) + } + defer db.Close() + + rewriter := getHistoryRewriter(cmd, db, l) + + filter := rewriter.Filter() + if len(filter.Include()) <= 0 { + ExitWithError(errors.Errorf("fatal: one or more files must be specified with --include")) + } + + tracked := trackedFromExportFilter(filter) + gitfilter := lfs.NewGitFilter(cfg) + + migrate(args, rewriter, l, &githistory.RewriteOptions{ + Verbose: migrateVerbose, + ObjectMapFilePath: objectMapFilePath, + BlobFn: func(path string, b *odb.Blob) (*odb.Blob, error) { + if filepath.Base(path) == ".gitattributes" { + return b, nil + } + + var buf bytes.Buffer + + if _, err := smudge(gitfilter, &buf, b.Contents, path, false, rewriter.Filter()); err != nil { + return nil, err + } + + return &odb.Blob{ + Contents: &buf, Size: int64(buf.Len()), + }, nil + }, + + TreeCallbackFn: func(path string, t *odb.Tree) (*odb.Tree, error) { + if path != "/" { + // Ignore non-root trees. + return t, nil + } + + ours := tracked + theirs, err := trackedFromAttrs(db, t) + if err != nil { + return nil, err + } + + // Create a blob of the attributes that are optionally + // present in the "t" tree's .gitattributes blob, and + // union in the patterns that we've tracked. + // + // Perform this Union() operation each time we visit a + // root tree such that if the underlying .gitattributes + // is present and has a diff between commits in the + // range of commits to migrate, those changes are + // preserved. + blob, err := trackedToBlob(db, theirs.Clone().Union(ours)) + if err != nil { + return nil, err + } + + // Finally, return a copy of the tree "t" that has the + // new .gitattributes file included/replaced. + return t.Merge(&odb.TreeEntry{ + Name: ".gitattributes", + Filemode: 0100644, + Oid: blob, + }), nil + }, + + UpdateRefs: true, + }) + + // Only perform `git-checkout(1) -f` if the repository is + // non-bare. + if bare, _ := git.IsBare(); !bare { + t := l.Waiter("migrate: checkout") + err := git.Checkout("", nil, true) + t.Complete() + + if err != nil { + ExitWithError(err) + } + } +} + +// trackedFromExportFilter returns an ordered set of strings where each entry is a +// line in the .gitattributes file. It adds/removes the fiter/diff/merge=lfs +// attributes based on patterns included/excluded in the given filter. Since +// `migrate export` removes files from Git LFS, it will remove attributes for included +// files, and add attributes for excluded files +func trackedFromExportFilter(filter *filepathfilter.Filter) *tools.OrderedSet { + tracked := tools.NewOrderedSet() + + for _, include := range filter.Include() { + tracked.Add(fmt.Sprintf("%s text -filter -merge -diff", escapeAttrPattern(include))) + } + + for _, exclude := range filter.Exclude() { + tracked.Add(fmt.Sprintf("%s filter=lfs diff=lfs merge=lfs -text", escapeAttrPattern(exclude))) + } + + return tracked +} diff --git a/test/test-migrate-export.sh b/test/test-migrate-export.sh new file mode 100755 index 00000000..af21defe --- /dev/null +++ b/test/test-migrate-export.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +. "test/test-migrate-fixtures.sh" +. "test/testlib.sh" + +begin_test "migrate export (default branch)" +( + set -e + + setup_multiple_local_branches + + md_oid="$(calc_oid "$(git cat-file -p :a.md)")" + txt_oid="$(calc_oid "$(git cat-file -p :a.txt)")" + + git lfs migrate import + + assert_pointer "refs/heads/master" "a.md" "$md_oid" "140" + assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120" + + git lfs migrate export --include="*.md, *.txt" + + [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "140") ] + [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120") ] + + master="$(git rev-parse refs/heads/master)" + master_attrs="$(git cat-file -p "$master:.gitattributes")" + + echo "$master_attrs" | grep -q "*.md text -filter -merge -diff" + echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff" +) +end_test From 433d1851f958727656728a3feecdc9ef3d210125 Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Thu, 21 Jun 2018 14:03:54 -0700 Subject: [PATCH 02/18] commands: pre-download LFS objects on export If a valid remote is present, pre-download any LFS objects prior to a history rewrite using the transfer queue. --- commands/command_migrate_export.go | 32 ++++++++++- git/githistory/rewriter.go | 86 ++++++++++++++++++++++++++++++ test/test-migrate-export.sh | 31 +++++++++++ test/test-migrate-fixtures.sh | 30 +++++++++++ 4 files changed, 177 insertions(+), 2 deletions(-) diff --git a/commands/command_migrate_export.go b/commands/command_migrate_export.go index 43ded6fc..ba271557 100644 --- a/commands/command_migrate_export.go +++ b/commands/command_migrate_export.go @@ -37,7 +37,7 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { tracked := trackedFromExportFilter(filter) gitfilter := lfs.NewGitFilter(cfg) - migrate(args, rewriter, l, &githistory.RewriteOptions{ + opts := &githistory.RewriteOptions{ Verbose: migrateVerbose, ObjectMapFilePath: objectMapFilePath, BlobFn: func(path string, b *odb.Blob) (*odb.Blob, error) { @@ -92,7 +92,35 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { }, UpdateRefs: true, - }) + } + + requireInRepo() + + opts, err = rewriteOptions(args, opts, l) + if err != nil { + ExitWithError(err) + } + + // If we have a valid remote, pre-download all objects using the Transfer Queue + if remoteURL := getAPIClient().Endpoints.RemoteEndpoint("download", cfg.Remote()).Url; remoteURL != "" { + q := newDownloadQueue(getTransferManifestOperationRemote("Download", cfg.Remote()), cfg.Remote()) + if err := rewriter.ScanForPointers(q, opts); err != nil { + ExitWithError(err) + } + + q.Wait() + + for _, err := range q.Errors() { + if err != nil { + ExitWithError(err) + } + } + } + + // Perform the rewrite + if _, err := rewriter.Rewrite(opts); err != nil { + ExitWithError(err) + } // Only perform `git-checkout(1) -f` if the repository is // non-bare. diff --git a/git/githistory/rewriter.go b/git/githistory/rewriter.go index dd545f08..90516679 100644 --- a/git/githistory/rewriter.go +++ b/git/githistory/rewriter.go @@ -8,11 +8,14 @@ import ( "strings" "sync" + "github.com/git-lfs/git-lfs/lfs" + "github.com/git-lfs/git-lfs/errors" "github.com/git-lfs/git-lfs/filepathfilter" "github.com/git-lfs/git-lfs/git" "github.com/git-lfs/git-lfs/git/odb" "github.com/git-lfs/git-lfs/tasklog" + "github.com/git-lfs/git-lfs/tq" ) // Rewriter allows rewriting topologically equivalent Git histories @@ -172,6 +175,33 @@ func NewRewriter(db *odb.ObjectDatabase, opts ...rewriterOption) *Rewriter { return rewriter } +// ScanForPointers scans through the range of commits given by +// *RewriteOptions.{Left,Right} and adds any pointers matching the rewrite +// filter to the transfer queue to be downloaded +func (r *Rewriter) ScanForPointers(q *tq.TransferQueue, opt *RewriteOptions) error { + // Obtain a list of commits to scan + commits, err := r.commitsToMigrate(opt) + if err != nil { + return err + } + + waiter := r.l.Waiter("migrate: Scanning commits") + defer waiter.Complete() + + for _, oid := range commits { + commit, err := r.db.Commit(oid) + if err != nil { + return err + } + + if err := r.scanTree(q, commit.TreeID, ""); err != nil { + return err + } + } + + return nil +} + // Rewrite rewrites the range of commits given by *RewriteOptions.{Left,Right} // using the BlobRewriteFn to rewrite the individual blobs. func (r *Rewriter) Rewrite(opt *RewriteOptions) ([]byte, error) { @@ -310,6 +340,62 @@ func (r *Rewriter) Rewrite(opt *RewriteOptions) ([]byte, error) { return tip, err } +// scanTree recursively scans through a tree and adds any pointers matching the +// rewrite filter to the transfer queue to be downloaded +func (r *Rewriter) scanTree(q *tq.TransferQueue, treeOID []byte, path string) error { + tree, err := r.db.Tree(treeOID) + if err != nil { + return err + } + + for _, entry := range tree.Entries { + var fullpath string + if len(path) > 0 { + fullpath = strings.Join([]string{path, entry.Name}, "/") + } else { + fullpath = entry.Name + } + + if !r.allows(entry.Type(), fullpath) { + continue + } + + // If this is a symlink, skip it + if entry.Filemode == 0120000 { + continue + } + + switch entry.Type() { + case odb.BlobObjectType: + // Check if the blob is a pointer, and if so, + // add it to the transfer queue + blob, err := r.db.Blob(entry.Oid) + if err != nil { + return err + } + + ptr, err := lfs.DecodePointer(blob.Contents) + if errors.IsNotAPointerError(err) { + continue + } + if err != nil { + return err + } + + q.Add(entry.Name, fullpath, ptr.Oid, ptr.Size) + case odb.TreeObjectType: + // Scan all subtrees + err = r.scanTree(q, entry.Oid, fullpath) + + } + if err != nil { + return err + } + } + + return nil +} + // rewriteTree is a recursive function which rewrites a tree given by the ID // "sha" and path "path". It uses the given BlobRewriteFn to rewrite all blobs // within the tree, either calling that function or recurring down into subtrees diff --git a/test/test-migrate-export.sh b/test/test-migrate-export.sh index af21defe..7ee4ce10 100755 --- a/test/test-migrate-export.sh +++ b/test/test-migrate-export.sh @@ -29,3 +29,34 @@ begin_test "migrate export (default branch)" echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff" ) end_test + +begin_test "migrate export (with remote)" +( + set -e + + setup_single_remote_branch_tracked + + md_oid="$(calc_oid "$(cat a.md)")" + txt_oid="$(calc_oid "$(cat a.txt)")" + + assert_pointer "refs/heads/master" "a.md" "$md_oid" "50" + assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "30" + + assert_pointer "refs/remotes/origin/master" "a.md" "$md_oid" "50" + assert_pointer "refs/remotes/origin/master" "a.txt" "$txt_oid" "30" + + git lfs migrate export --everything --include="*.md, *.txt" + + [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "50") ] + [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "30") ] + + [ ! $(assert_pointer "refs/remotes/origin/master" "a.md" "$md_oid" "50") ] + [ ! $(assert_pointer "refs/remotes/origin/master" "a.txt" "$txt_oid" "30") ] + + master="$(git rev-parse refs/heads/master)" + master_attrs="$(git cat-file -p "$master:.gitattributes")" + + echo "$master_attrs" | grep -q "*.md text -filter -merge -diff" + echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff" +) +end_test diff --git a/test/test-migrate-fixtures.sh b/test/test-migrate-fixtures.sh index 5beee7d9..0dea5c25 100755 --- a/test/test-migrate-fixtures.sh +++ b/test/test-migrate-fixtures.sh @@ -201,6 +201,36 @@ setup_single_remote_branch_with_gitattrs() { git commit -m "add .gitattributes" } +# Creates a repo identical to setup_single_remote_branch, except with *.md and +# *.txt files tracked by Git LFS, and all commits pushed to master +setup_single_remote_branch_tracked() { + set -e + + reponame="migrate-info-single-remote-branch" + + remove_and_create_remote_repo "$reponame" + + git lfs track "*.md" "*.txt" + git add .gitattributes + git commit -m "initial commit" + + base64 < /dev/urandom | head -c 120 > a.txt + base64 < /dev/urandom | head -c 140 > a.md + + git add a.txt a.md + git commit -m "add a.{txt,md}" + + git push origin master + + base64 < /dev/urandom | head -c 30 > a.txt + base64 < /dev/urandom | head -c 50 > a.md + + git add a.md a.txt + git commit -m "add an additional 30, 50 bytes to a.{txt,md}" + + git push origin master +} + # setup_multiple_remote_branches creates a repository as follows: # # C From 159c5fa2e3aa9778057b7bb1271d2aa3088c0f7b Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Thu, 21 Jun 2018 15:43:34 -0700 Subject: [PATCH 03/18] commands: download export files to proper place When using the transfer queue to download files on export, ensure they are downloaded to the cache directory --- commands/command_migrate_export.go | 2 +- git/githistory/rewriter.go | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/commands/command_migrate_export.go b/commands/command_migrate_export.go index ba271557..b234eac9 100644 --- a/commands/command_migrate_export.go +++ b/commands/command_migrate_export.go @@ -104,7 +104,7 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { // If we have a valid remote, pre-download all objects using the Transfer Queue if remoteURL := getAPIClient().Endpoints.RemoteEndpoint("download", cfg.Remote()).Url; remoteURL != "" { q := newDownloadQueue(getTransferManifestOperationRemote("Download", cfg.Remote()), cfg.Remote()) - if err := rewriter.ScanForPointers(q, opts); err != nil { + if err := rewriter.ScanForPointers(q, opts, gitfilter); err != nil { ExitWithError(err) } diff --git a/git/githistory/rewriter.go b/git/githistory/rewriter.go index 90516679..78a6ba7a 100644 --- a/git/githistory/rewriter.go +++ b/git/githistory/rewriter.go @@ -178,7 +178,7 @@ func NewRewriter(db *odb.ObjectDatabase, opts ...rewriterOption) *Rewriter { // ScanForPointers scans through the range of commits given by // *RewriteOptions.{Left,Right} and adds any pointers matching the rewrite // filter to the transfer queue to be downloaded -func (r *Rewriter) ScanForPointers(q *tq.TransferQueue, opt *RewriteOptions) error { +func (r *Rewriter) ScanForPointers(q *tq.TransferQueue, opt *RewriteOptions, gf *lfs.GitFilter) error { // Obtain a list of commits to scan commits, err := r.commitsToMigrate(opt) if err != nil { @@ -194,7 +194,7 @@ func (r *Rewriter) ScanForPointers(q *tq.TransferQueue, opt *RewriteOptions) err return err } - if err := r.scanTree(q, commit.TreeID, ""); err != nil { + if err := r.scanTree(q, gf, commit.TreeID, ""); err != nil { return err } } @@ -342,7 +342,7 @@ func (r *Rewriter) Rewrite(opt *RewriteOptions) ([]byte, error) { // scanTree recursively scans through a tree and adds any pointers matching the // rewrite filter to the transfer queue to be downloaded -func (r *Rewriter) scanTree(q *tq.TransferQueue, treeOID []byte, path string) error { +func (r *Rewriter) scanTree(q *tq.TransferQueue, gf *lfs.GitFilter, treeOID []byte, path string) error { tree, err := r.db.Tree(treeOID) if err != nil { return err @@ -382,10 +382,15 @@ func (r *Rewriter) scanTree(q *tq.TransferQueue, treeOID []byte, path string) er return err } - q.Add(entry.Name, fullpath, ptr.Oid, ptr.Size) + downloadPath, err := gf.ObjectPath(ptr.Oid) + if err != nil { + return err + } + + q.Add(entry.Name, downloadPath, ptr.Oid, ptr.Size) case odb.TreeObjectType: // Scan all subtrees - err = r.scanTree(q, entry.Oid, fullpath) + err = r.scanTree(q, gf, entry.Oid, fullpath) } if err != nil { From 3ab80db213be1357f8bb28b576175c916023de99 Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Fri, 22 Jun 2018 11:48:27 -0700 Subject: [PATCH 04/18] githistory: only download non-cached files When scanning a tree for pointers, don't download files that already exist in the cache --- git/githistory/rewriter.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/git/githistory/rewriter.go b/git/githistory/rewriter.go index 78a6ba7a..71c1eb3b 100644 --- a/git/githistory/rewriter.go +++ b/git/githistory/rewriter.go @@ -387,7 +387,11 @@ func (r *Rewriter) scanTree(q *tq.TransferQueue, gf *lfs.GitFilter, treeOID []by return err } - q.Add(entry.Name, downloadPath, ptr.Oid, ptr.Size) + // Only add files to the transfer queue that aren't already cached + if _, err := os.Stat(downloadPath); os.IsNotExist(err) { + q.Add(entry.Name, downloadPath, ptr.Oid, ptr.Size) + } + case odb.TreeObjectType: // Scan all subtrees err = r.scanTree(q, gf, entry.Oid, fullpath) From 966762b8abcba44322ed1f0161ad32b8cadfd9bf Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Fri, 22 Jun 2018 17:11:44 -0700 Subject: [PATCH 05/18] commands: add additional export test coverage Add additional integration tests to test various functionality of the `migrate export` command --- test/test-migrate-export.sh | 245 +++++++++++++++++++++++++++++++++- test/test-migrate-fixtures.sh | 69 +++++++++- 2 files changed, 307 insertions(+), 7 deletions(-) diff --git a/test/test-migrate-export.sh b/test/test-migrate-export.sh index 7ee4ce10..39ddbf75 100755 --- a/test/test-migrate-export.sh +++ b/test/test-migrate-export.sh @@ -7,26 +7,36 @@ begin_test "migrate export (default branch)" ( set -e - setup_multiple_local_branches + setup_multiple_local_branches_tracked - md_oid="$(calc_oid "$(git cat-file -p :a.md)")" - txt_oid="$(calc_oid "$(git cat-file -p :a.txt)")" + md_oid="$(calc_oid "$(cat a.md)")" + txt_oid="$(calc_oid "$(cat a.txt)")" - git lfs migrate import + git checkout my-feature + md_feature_oid="$(calc_oid "$(cat a.md)")" + git checkout master assert_pointer "refs/heads/master" "a.md" "$md_oid" "140" assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120" + assert_pointer "refs/heads/my-feature" "a.md" "$md_feature_oid" "30" git lfs migrate export --include="*.md, *.txt" [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "140") ] [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120") ] + assert_pointer "refs/heads/my-feature" "a.md" "$md_feature_oid" "30" master="$(git rev-parse refs/heads/master)" + feature="$(git rev-parse refs/heads/my-feature)" + master_attrs="$(git cat-file -p "$master:.gitattributes")" + feature_attrs="$(git cat-file -p "$feature:.gitattributes")" echo "$master_attrs" | grep -q "*.md text -filter -merge -diff" echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff" + + [ ! $(echo "$feature_attrs" | grep -q "*.md text -filter -merge -diff") ] + [ ! $(echo "$feature_attrs" | grep -q "*.txt text -filter -merge -diff") ] ) end_test @@ -36,6 +46,8 @@ begin_test "migrate export (with remote)" setup_single_remote_branch_tracked + git push origin master + md_oid="$(calc_oid "$(cat a.md)")" txt_oid="$(calc_oid "$(cat a.txt)")" @@ -60,3 +72,228 @@ begin_test "migrate export (with remote)" echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff" ) end_test + +begin_test "migrate export (include/exclude args)" +( + set -e + + setup_multiple_local_branches_tracked + + md_oid="$(calc_oid "$(cat a.md)")" + txt_oid="$(calc_oid "$(cat a.txt)")" + + assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120" + + git lfs migrate export --include="*" --exclude="a.md" + + [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120") ] + assert_pointer "refs/heads/master" "a.md" "$md_oid" "140" + + master="$(git rev-parse refs/heads/master)" + + master_attrs="$(git cat-file -p "$master:.gitattributes")" + + echo "$master_attrs" | grep -q "* text -filter -merge -diff" + echo "$master_attrs" | grep -q "a.md filter=lfs diff=lfs merge=lfs" + +) +end_test + +begin_test "migrate export (bare repository)" +( + set -e + + setup_single_remote_branch_tracked + git push origin master + make_bare + + git lfs migrate export --everything --include="*" +) +end_test + +begin_test "migrate export (given branch)" +( + set -e + + setup_multiple_local_branches_tracked + + md_oid="$(calc_oid "$(cat a.md)")" + txt_oid="$(calc_oid "$(cat a.txt)")" + + git checkout my-feature + md_feature_oid="$(calc_oid "$(cat a.md)")" + git checkout master + + assert_pointer "refs/heads/my-feature" "a.md" "$md_feature_oid" "30" + assert_pointer "refs/heads/my-feature" "a.txt" "$txt_oid" "120" + assert_pointer "refs/heads/master" "a.md" "$md_oid" "140" + assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120" + + git lfs migrate export --include="*.md,*.txt" my-feature + + [ ! $(assert_pointer "refs/heads/my-feature" "a.md" "$md_feature_oid" "30") ] + [ ! $(assert_pointer "refs/heads/my-feature" "a.txt" "$txt_oid" "120") ] + [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "140") ] + [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120") ] + + master="$(git rev-parse refs/heads/master)" + feature="$(git rev-parse refs/heads/my-feature)" + + master_attrs="$(git cat-file -p "$master:.gitattributes")" + feature_attrs="$(git cat-file -p "$feature:.gitattributes")" + + echo "$master_attrs" | grep -q "*.md text -filter -merge -diff" + echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff" + echo "$feature_attrs" | grep -q "*.md text -filter -merge -diff" + echo "$feature_attrs" | grep -q "*.txt text -filter -merge -diff" +) +end_test + +begin_test "migrate export (no filter)" +( + set -e + + setup_multiple_local_branches_tracked + + git lfs migrate export 2>&1 | tee migrate.log + if [ ${PIPESTATUS[0]} -eq 0 ]; then + echo >&2 "fatal: expected git lfs migrate export to fail, didn't" + exit 1 + fi + + grep "fatal: one or more files must be specified with --include" migrate.log +) +end_test + +begin_test "migrate export (exclude remote refs)" +( + set -e + + setup_single_remote_branch_tracked + + md_oid="$(calc_oid "$(cat a.md)")" + txt_oid="$(calc_oid "$(cat a.txt)")" + + git checkout refs/remotes/origin/master + md_remote_oid="$(calc_oid "$(cat a.md)")" + txt_remote_oid="$(calc_oid "$(cat a.txt)")" + git checkout master + + assert_pointer "refs/heads/master" "a.md" "$md_oid" "50" + assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "30" + + assert_pointer "refs/remotes/origin/master" "a.md" "$md_remote_oid" "140" + assert_pointer "refs/remotes/origin/master" "a.txt" "$txt_remote_oid" "120" + + git lfs migrate export --include="*.md,*.txt" + + [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "50") ] + [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "30") ] + + assert_pointer "refs/remotes/origin/master" "a.md" "$md_remote_oid" "140" + assert_pointer "refs/remotes/origin/master" "a.txt" "$txt_remote_oid" "120" + + master="$(git rev-parse refs/heads/master)" + remote="$(git rev-parse refs/remotes/origin/master)" + + master_attrs="$(git cat-file -p "$master:.gitattributes")" + remote_attrs="$(git cat-file -p "$remote:.gitattributes")" + + echo "$master_attrs" | grep -q "*.md text -filter -merge -diff" + echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff" + + [ ! $(echo "$remote_attrs" | grep -q "*.md text -filter -merge -diff") ] + [ ! $(echo "$remote_attrs" | grep -q "*.txt text -filter -merge -diff") ] +) +end_test + +begin_test "migrate export (--skip-fetch)" +( + set -e + + setup_single_remote_branch_tracked + + md_master_oid="$(calc_oid "$(cat a.md)")" + txt_master_oid="$(calc_oid "$(cat a.txt)")" + + git checkout refs/remotes/origin/master + md_remote_oid="$(calc_oid "$(cat a.md)")" + txt_remote_oid="$(calc_oid "$(cat a.txt)")" + git checkout master + + git tag pseudo-remote "$(git rev-parse refs/remotes/origin/master)" + # Remove the refs/remotes/origin/master ref, and instruct 'git lfs migrate' to + # not fetch it. + git update-ref -d refs/remotes/origin/master + + assert_pointer "refs/heads/master" "a.md" "$md_master_oid" "50" + assert_pointer "pseudo-remote" "a.md" "$md_remote_oid" "140" + assert_pointer "refs/heads/master" "a.txt" "$txt_master_oid" "30" + assert_pointer "pseudo-remote" "a.txt" "$txt_remote_oid" "120" + + git lfs migrate export --skip-fetch --include="*.md,*.txt" + + [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_master_oid" "50") ] + [ ! $(assert_pointer "pseudo-remote" "a.md" "$md_remote_oid" "140") ] + [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_master_oid" "30") ] + [ ! $(assert_pointer "pseudo-remote" "a.txt" "$txt_remote_oid" "120") ] + + master="$(git rev-parse refs/heads/master)" + remote="$(git rev-parse pseudo-remote)" + + master_attrs="$(git cat-file -p "$master:.gitattributes")" + remote_attrs="$(git cat-file -p "$remote:.gitattributes")" + + echo "$master_attrs" | grep -q "*.md text -filter -merge -diff" + echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff" + echo "$remote_attrs" | grep -q "*.md text -filter -merge -diff" + echo "$remote_attrs" | grep -q "*.txt text -filter -merge -diff" +) +end_test + +begin_test "migrate export (include/exclude ref)" +( + set -e + + setup_multiple_remote_branches_gitattrs + + md_master_oid="$(calc_oid "$(cat a.md)")" + txt_master_oid="$(calc_oid "$(cat a.txt)")" + + git checkout refs/remotes/origin/master + md_remote_oid="$(calc_oid "$(cat a.md)")" + txt_remote_oid="$(calc_oid "$(cat a.txt)")" + + git checkout my-feature + md_feature_oid="$(calc_oid "$(cat a.md)")" + txt_feature_oid="$(calc_oid "$(cat a.txt)")" + + git checkout master + + git lfs migrate export \ + --include="*.txt" \ + --include-ref=refs/heads/my-feature \ + --exclude-ref=refs/heads/master + + assert_pointer "refs/heads/master" "a.md" "$md_master_oid" "21" + assert_pointer "refs/heads/master" "a.txt" "$txt_master_oid" "20" + + assert_pointer "refs/remotes/origin/master" "a.md" "$md_remote_oid" "11" + assert_pointer "refs/remotes/origin/master" "a.txt" "$txt_remote_oid" "10" + + assert_pointer "refs/heads/my-feature" "a.md" "$md_feature_oid" "31" + [ ! $(assert_pointer "refs/heads/my-feature" "a.txt" "$txt_feature_oid" "30") ] + + master="$(git rev-parse refs/heads/master)" + feature="$(git rev-parse refs/heads/my-feature)" + remote="$(git rev-parse refs/remotes/origin/master)" + + master_attrs="$(git cat-file -p "$master:.gitattributes")" + remote_attrs="$(git cat-file -p "$remote:.gitattributes")" + feature_attrs="$(git cat-file -p "$feature:.gitattributes")" + + [ ! $(echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff") ] + [ ! $(echo "$remote_attrs" | grep -q "*.txt text -filter -merge -diff") ] + echo "$feature_attrs" | grep -q "*.txt text -filter -merge -diff" +) +end_test diff --git a/test/test-migrate-fixtures.sh b/test/test-migrate-fixtures.sh index 0dea5c25..e1d2a62b 100755 --- a/test/test-migrate-fixtures.sh +++ b/test/test-migrate-fixtures.sh @@ -134,6 +134,36 @@ setup_multiple_local_branches_with_gitattrs() { git commit -m "add .gitattributes" } +# setup_multiple_local_branches_tracked creates a repo with exactly the same +# structure as in setup_multiple_local_branches, but with all files tracked by +# Git LFS +setup_multiple_local_branches_tracked() { + set -e + + reponame="migrate-info-multiple-local-branches" + + remove_and_create_local_repo "$reponame" + + git lfs track "*.txt" "*.md" + git add .gitattributes + git commit -m "initial commit" + + base64 < /dev/urandom | head -c 120 > a.txt + base64 < /dev/urandom | head -c 140 > a.md + + git add a.txt a.md + git commit -m "add a.{txt,md}" + + git checkout -b my-feature + + base64 < /dev/urandom | head -c 30 > a.md + + git add a.md + git commit -m "add an additional 30 bytes to a.md" + + git checkout master +} + # setup_local_branch_with_space creates a repository as follows: # # A @@ -202,7 +232,7 @@ setup_single_remote_branch_with_gitattrs() { } # Creates a repo identical to setup_single_remote_branch, except with *.md and -# *.txt files tracked by Git LFS, and all commits pushed to master +# *.txt files tracked by Git LFS setup_single_remote_branch_tracked() { set -e @@ -227,8 +257,6 @@ setup_single_remote_branch_tracked() { git add a.md a.txt git commit -m "add an additional 30, 50 bytes to a.{txt,md}" - - git push origin master } # setup_multiple_remote_branches creates a repository as follows: @@ -277,6 +305,41 @@ setup_multiple_remote_branches() { git checkout master } +# Creates a repo identical to that in setup_multiple_remote_branches(), but +# with all files tracked by Git LFS +setup_multiple_remote_branches_gitattrs() { + set -e + + reponame="migrate-info-exclude-remote-refs-given-branch" + + remove_and_create_remote_repo "$reponame" + + git lfs track "*.txt" "*.md" + git add .gitattributes + git commit -m "initial commit" + + base64 < /dev/urandom | head -c 10 > a.txt + base64 < /dev/urandom | head -c 11 > a.md + git add a.txt a.md + git commit -m "add 10, 11 bytes, a.{txt,md}" + + git push origin master + + base64 < /dev/urandom | head -c 20 > a.txt + base64 < /dev/urandom | head -c 21 > a.md + git add a.txt a.md + git commit -m "add 20, 21 bytes, a.{txt,md}" + + git checkout -b my-feature + + base64 < /dev/urandom | head -c 30 > a.txt + base64 < /dev/urandom | head -c 31 > a.md + git add a.txt a.md + git commit -m "add 30, 31 bytes, a.{txt,md}" + + git checkout master +} + # setup_single_local_branch_with_tags creates a repository as follows: # # A---B From 7632a205d378709bcf1d3733c31a0f43668c4821 Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Mon, 25 Jun 2018 13:06:43 -0700 Subject: [PATCH 06/18] commands: add additional flags to export Add support and tests for the `--verbose` and `--object-map` flags to the `migrate export` command --- commands/command_migrate.go | 4 +++- test/test-migrate-export.sh | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/commands/command_migrate.go b/commands/command_migrate.go index 9c5276fc..0df4e2d3 100644 --- a/commands/command_migrate.go +++ b/commands/command_migrate.go @@ -298,6 +298,8 @@ func init() { importCmd.Flags().StringVarP(&migrateCommitMessage, "message", "m", "", "With --no-rewrite, an optional commit message") exportCmd := NewCommand("export", migrateExportCommand) + exportCmd.Flags().BoolVar(&migrateVerbose, "verbose", false, "Verbose logging") + exportCmd.Flags().StringVar(&objectMapFilePath, "object-map", "", "Object map file") RegisterCommand("migrate", nil, func(cmd *cobra.Command) { cmd.PersistentFlags().StringVarP(&includeArg, "include", "I", "", "Include a list of paths") @@ -308,6 +310,6 @@ func init() { cmd.PersistentFlags().BoolVar(&migrateEverything, "everything", false, "Migrate all local references") cmd.PersistentFlags().BoolVar(&migrateSkipFetch, "skip-fetch", false, "Assume up-to-date remote references.") - cmd.AddCommand(importCmd, info, exportCmd) + cmd.AddCommand(exportCmd, importCmd, info) }) } diff --git a/test/test-migrate-export.sh b/test/test-migrate-export.sh index 39ddbf75..24b49760 100755 --- a/test/test-migrate-export.sh +++ b/test/test-migrate-export.sh @@ -297,3 +297,30 @@ begin_test "migrate export (include/exclude ref)" echo "$feature_attrs" | grep -q "*.txt text -filter -merge -diff" ) end_test + +begin_test "migrate export (--object-map)" +( + set -e + + setup_multiple_local_branches_tracked + + output_dir=$(mktemp -d) + + git log --all --pretty='format:%H' > "${output_dir}/old_sha.txt" + git lfs migrate export --everything --include="*" --object-map "${output_dir}/object-map.txt" + git log --all --pretty='format:%H' > "${output_dir}/new_sha.txt" + paste -d',' "${output_dir}/old_sha.txt" "${output_dir}/new_sha.txt" > "${output_dir}/expected-map.txt" + + diff -u <(sort "${output_dir}/expected-map.txt") <(sort "${output_dir}/object-map.txt") +) +end_test + +begin_test "migrate export (--verbose)" +( + set -e + + setup_multiple_local_branches_tracked + + git lfs migrate export --everything --include="*" --verbose 2>&1 | grep -q "migrate: commit " +) +end_test From 785200e85d831bc2977dc70c9213938b006aa29e Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Mon, 25 Jun 2018 14:10:31 -0700 Subject: [PATCH 07/18] commands: fix comment strings --- commands/command_migrate_export.go | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/commands/command_migrate_export.go b/commands/command_migrate_export.go index b234eac9..3ba1f9be 100644 --- a/commands/command_migrate_export.go +++ b/commands/command_migrate_export.go @@ -122,8 +122,7 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { ExitWithError(err) } - // Only perform `git-checkout(1) -f` if the repository is - // non-bare. + // Only perform `git-checkout(1) -f` if the repository is non-bare. if bare, _ := git.IsBare(); !bare { t := l.Waiter("migrate: checkout") err := git.Checkout("", nil, true) @@ -135,11 +134,11 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { } } -// trackedFromExportFilter returns an ordered set of strings where each entry is a -// line in the .gitattributes file. It adds/removes the fiter/diff/merge=lfs -// attributes based on patterns included/excluded in the given filter. Since -// `migrate export` removes files from Git LFS, it will remove attributes for included -// files, and add attributes for excluded files +// trackedFromExportFilter returns an ordered set of strings where each entry +// is a line we intend to place in the .gitattributes file. It adds/removes the +// filter/diff/merge=lfs attributes based on patterns included/excluded in the +// given filter. Since `migrate export` removes files from Git LFS, it will +// remove attributes for included files, and add attributes for excluded files func trackedFromExportFilter(filter *filepathfilter.Filter) *tools.OrderedSet { tracked := tools.NewOrderedSet() From 3a07215cc3399a76c383012c05c4ba4e38e2cc94 Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Mon, 25 Jun 2018 14:36:57 -0700 Subject: [PATCH 08/18] commands: directly convert ptr to object When performing an export, directly convert an LFS pointer to an object using the downloaded object file, skipping over the smudge. Thanks to @ttaylorr for the implementation of the NewBlobFromFile() function --- commands/command_migrate_export.go | 18 ++++++++------ git/odb/blob.go | 39 ++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/commands/command_migrate_export.go b/commands/command_migrate_export.go index 3ba1f9be..d910c070 100644 --- a/commands/command_migrate_export.go +++ b/commands/command_migrate_export.go @@ -1,7 +1,6 @@ package commands import ( - "bytes" "fmt" "os" "path/filepath" @@ -45,15 +44,20 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { return b, nil } - var buf bytes.Buffer - - if _, err := smudge(gitfilter, &buf, b.Contents, path, false, rewriter.Filter()); err != nil { + ptr, err := lfs.DecodePointer(b.Contents) + if errors.IsNotAPointerError(err) { + return b, nil + } + if err != nil { return nil, err } - return &odb.Blob{ - Contents: &buf, Size: int64(buf.Len()), - }, nil + downloadPath, err := gitfilter.ObjectPath(ptr.Oid) + if err != nil { + return nil, err + } + + return odb.NewBlobFromFile(downloadPath) }, TreeCallbackFn: func(path string, t *odb.Tree) (*odb.Tree, error) { diff --git a/git/odb/blob.go b/git/odb/blob.go index 7f9d66ae..0862b693 100644 --- a/git/odb/blob.go +++ b/git/odb/blob.go @@ -3,6 +3,9 @@ package odb import ( "bytes" "io" + "os" + + "github.com/git-lfs/git-lfs/errors" ) // Blob represents a Git object of type "blob". @@ -27,6 +30,42 @@ func NewBlobFromBytes(contents []byte) *Blob { } } +// NewBlobFromFile returns a new *Blob that contains the contents of the file +// at location "path" on disk. NewBlobFromFile does not read the file ahead of +// time, and instead defers this task until encoding the blob to the object +// database. +// +// If the file cannot be opened or stat(1)-ed, an error will be returned. +// +// When the blob receives a function call Close(), the file will also be closed, +// and any error encountered in doing so will be returned from Close(). +func NewBlobFromFile(path string) (*Blob, error) { + f, err := os.Open(path) + if err != nil { + return nil, errors.Wrapf(err, "git/odb: could not open: %s", + path) + } + + stat, err := f.Stat() + if err != nil { + return nil, errors.Wrapf(err, "git/odb: could not stat %s", + path) + } + + return &Blob{ + Contents: f, + Size: stat.Size(), + + closeFn: func() error { + if err := f.Close(); err != nil { + return errors.Wrapf(err, + "git/odb: could not close %s", path) + } + return nil + }, + }, nil +} + // Type implements Object.ObjectType by returning the correct object type for // Blobs, BlobObjectType. func (b *Blob) Type() ObjectType { return BlobObjectType } From 57923cc6938dfbf2872e0874ec2466f2558948ae Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Mon, 25 Jun 2018 14:46:24 -0700 Subject: [PATCH 09/18] commands: use `!` instead of `-` Use `!` to set an attribute to Unspecified instead of `-` to set an attribute to False --- commands/command_migrate_export.go | 2 +- test/test-migrate-export.sh | 44 +++++++++++++++--------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/commands/command_migrate_export.go b/commands/command_migrate_export.go index d910c070..8e9d7604 100644 --- a/commands/command_migrate_export.go +++ b/commands/command_migrate_export.go @@ -147,7 +147,7 @@ func trackedFromExportFilter(filter *filepathfilter.Filter) *tools.OrderedSet { tracked := tools.NewOrderedSet() for _, include := range filter.Include() { - tracked.Add(fmt.Sprintf("%s text -filter -merge -diff", escapeAttrPattern(include))) + tracked.Add(fmt.Sprintf("%s text !filter !merge !diff", escapeAttrPattern(include))) } for _, exclude := range filter.Exclude() { diff --git a/test/test-migrate-export.sh b/test/test-migrate-export.sh index 24b49760..2831d301 100755 --- a/test/test-migrate-export.sh +++ b/test/test-migrate-export.sh @@ -32,11 +32,11 @@ begin_test "migrate export (default branch)" master_attrs="$(git cat-file -p "$master:.gitattributes")" feature_attrs="$(git cat-file -p "$feature:.gitattributes")" - echo "$master_attrs" | grep -q "*.md text -filter -merge -diff" - echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff" + echo "$master_attrs" | grep -q "*.md text !filter !merge !diff" + echo "$master_attrs" | grep -q "*.txt text !filter !merge !diff" - [ ! $(echo "$feature_attrs" | grep -q "*.md text -filter -merge -diff") ] - [ ! $(echo "$feature_attrs" | grep -q "*.txt text -filter -merge -diff") ] + [ ! $(echo "$feature_attrs" | grep -q "*.md text !filter !merge !diff") ] + [ ! $(echo "$feature_attrs" | grep -q "*.txt text !filter !merge !diff") ] ) end_test @@ -68,8 +68,8 @@ begin_test "migrate export (with remote)" master="$(git rev-parse refs/heads/master)" master_attrs="$(git cat-file -p "$master:.gitattributes")" - echo "$master_attrs" | grep -q "*.md text -filter -merge -diff" - echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff" + echo "$master_attrs" | grep -q "*.md text !filter !merge !diff" + echo "$master_attrs" | grep -q "*.txt text !filter !merge !diff" ) end_test @@ -93,7 +93,7 @@ begin_test "migrate export (include/exclude args)" master_attrs="$(git cat-file -p "$master:.gitattributes")" - echo "$master_attrs" | grep -q "* text -filter -merge -diff" + echo "$master_attrs" | grep -q "* text !filter !merge !diff" echo "$master_attrs" | grep -q "a.md filter=lfs diff=lfs merge=lfs" ) @@ -142,10 +142,10 @@ begin_test "migrate export (given branch)" master_attrs="$(git cat-file -p "$master:.gitattributes")" feature_attrs="$(git cat-file -p "$feature:.gitattributes")" - echo "$master_attrs" | grep -q "*.md text -filter -merge -diff" - echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff" - echo "$feature_attrs" | grep -q "*.md text -filter -merge -diff" - echo "$feature_attrs" | grep -q "*.txt text -filter -merge -diff" + echo "$master_attrs" | grep -q "*.md text !filter !merge !diff" + echo "$master_attrs" | grep -q "*.txt text !filter !merge !diff" + echo "$feature_attrs" | grep -q "*.md text !filter !merge !diff" + echo "$feature_attrs" | grep -q "*.txt text !filter !merge !diff" ) end_test @@ -199,11 +199,11 @@ begin_test "migrate export (exclude remote refs)" master_attrs="$(git cat-file -p "$master:.gitattributes")" remote_attrs="$(git cat-file -p "$remote:.gitattributes")" - echo "$master_attrs" | grep -q "*.md text -filter -merge -diff" - echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff" + echo "$master_attrs" | grep -q "*.md text !filter !merge !diff" + echo "$master_attrs" | grep -q "*.txt text !filter !merge !diff" - [ ! $(echo "$remote_attrs" | grep -q "*.md text -filter -merge -diff") ] - [ ! $(echo "$remote_attrs" | grep -q "*.txt text -filter -merge -diff") ] + [ ! $(echo "$remote_attrs" | grep -q "*.md text !filter !merge !diff") ] + [ ! $(echo "$remote_attrs" | grep -q "*.txt text !filter !merge !diff") ] ) end_test @@ -244,10 +244,10 @@ begin_test "migrate export (--skip-fetch)" master_attrs="$(git cat-file -p "$master:.gitattributes")" remote_attrs="$(git cat-file -p "$remote:.gitattributes")" - echo "$master_attrs" | grep -q "*.md text -filter -merge -diff" - echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff" - echo "$remote_attrs" | grep -q "*.md text -filter -merge -diff" - echo "$remote_attrs" | grep -q "*.txt text -filter -merge -diff" + echo "$master_attrs" | grep -q "*.md text !filter !merge !diff" + echo "$master_attrs" | grep -q "*.txt text !filter !merge !diff" + echo "$remote_attrs" | grep -q "*.md text !filter !merge !diff" + echo "$remote_attrs" | grep -q "*.txt text !filter !merge !diff" ) end_test @@ -292,9 +292,9 @@ begin_test "migrate export (include/exclude ref)" remote_attrs="$(git cat-file -p "$remote:.gitattributes")" feature_attrs="$(git cat-file -p "$feature:.gitattributes")" - [ ! $(echo "$master_attrs" | grep -q "*.txt text -filter -merge -diff") ] - [ ! $(echo "$remote_attrs" | grep -q "*.txt text -filter -merge -diff") ] - echo "$feature_attrs" | grep -q "*.txt text -filter -merge -diff" + [ ! $(echo "$master_attrs" | grep -q "*.txt text !filter !merge !diff") ] + [ ! $(echo "$remote_attrs" | grep -q "*.txt text !filter !merge !diff") ] + echo "$feature_attrs" | grep -q "*.txt text !filter !merge !diff" ) end_test From 833a30608956e258f159ee512c42e58994b0a0bc Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Mon, 25 Jun 2018 16:21:55 -0700 Subject: [PATCH 10/18] commands: use gitscanner for export Use the GitScanner to scan through revisions and download pointers with the transfer queue rather than the rewriter --- commands/command_migrate_export.go | 18 +++++- git/githistory/rewriter.go | 95 ------------------------------ lfs/gitscanner.go | 21 +++++-- lfs/gitscanner_refs.go | 16 +++-- 4 files changed, 44 insertions(+), 106 deletions(-) diff --git a/commands/command_migrate_export.go b/commands/command_migrate_export.go index 8e9d7604..cee37792 100644 --- a/commands/command_migrate_export.go +++ b/commands/command_migrate_export.go @@ -108,9 +108,21 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { // If we have a valid remote, pre-download all objects using the Transfer Queue if remoteURL := getAPIClient().Endpoints.RemoteEndpoint("download", cfg.Remote()).Url; remoteURL != "" { q := newDownloadQueue(getTransferManifestOperationRemote("Download", cfg.Remote()), cfg.Remote()) - if err := rewriter.ScanForPointers(q, opts, gitfilter); err != nil { - ExitWithError(err) - } + gs := lfs.NewGitScanner(func(p *lfs.WrappedPointer, err error) { + if err != nil { + return + } + + downloadPath, err := gitfilter.ObjectPath(p.Oid) + if err != nil { + return + } + + if _, err := os.Stat(downloadPath); os.IsNotExist(err) { + q.Add(p.Name, downloadPath, p.Oid, p.Size) + } + }) + gs.ScanRefs(opts.Include, opts.Exclude, nil) q.Wait() diff --git a/git/githistory/rewriter.go b/git/githistory/rewriter.go index 71c1eb3b..dd545f08 100644 --- a/git/githistory/rewriter.go +++ b/git/githistory/rewriter.go @@ -8,14 +8,11 @@ import ( "strings" "sync" - "github.com/git-lfs/git-lfs/lfs" - "github.com/git-lfs/git-lfs/errors" "github.com/git-lfs/git-lfs/filepathfilter" "github.com/git-lfs/git-lfs/git" "github.com/git-lfs/git-lfs/git/odb" "github.com/git-lfs/git-lfs/tasklog" - "github.com/git-lfs/git-lfs/tq" ) // Rewriter allows rewriting topologically equivalent Git histories @@ -175,33 +172,6 @@ func NewRewriter(db *odb.ObjectDatabase, opts ...rewriterOption) *Rewriter { return rewriter } -// ScanForPointers scans through the range of commits given by -// *RewriteOptions.{Left,Right} and adds any pointers matching the rewrite -// filter to the transfer queue to be downloaded -func (r *Rewriter) ScanForPointers(q *tq.TransferQueue, opt *RewriteOptions, gf *lfs.GitFilter) error { - // Obtain a list of commits to scan - commits, err := r.commitsToMigrate(opt) - if err != nil { - return err - } - - waiter := r.l.Waiter("migrate: Scanning commits") - defer waiter.Complete() - - for _, oid := range commits { - commit, err := r.db.Commit(oid) - if err != nil { - return err - } - - if err := r.scanTree(q, gf, commit.TreeID, ""); err != nil { - return err - } - } - - return nil -} - // Rewrite rewrites the range of commits given by *RewriteOptions.{Left,Right} // using the BlobRewriteFn to rewrite the individual blobs. func (r *Rewriter) Rewrite(opt *RewriteOptions) ([]byte, error) { @@ -340,71 +310,6 @@ func (r *Rewriter) Rewrite(opt *RewriteOptions) ([]byte, error) { return tip, err } -// scanTree recursively scans through a tree and adds any pointers matching the -// rewrite filter to the transfer queue to be downloaded -func (r *Rewriter) scanTree(q *tq.TransferQueue, gf *lfs.GitFilter, treeOID []byte, path string) error { - tree, err := r.db.Tree(treeOID) - if err != nil { - return err - } - - for _, entry := range tree.Entries { - var fullpath string - if len(path) > 0 { - fullpath = strings.Join([]string{path, entry.Name}, "/") - } else { - fullpath = entry.Name - } - - if !r.allows(entry.Type(), fullpath) { - continue - } - - // If this is a symlink, skip it - if entry.Filemode == 0120000 { - continue - } - - switch entry.Type() { - case odb.BlobObjectType: - // Check if the blob is a pointer, and if so, - // add it to the transfer queue - blob, err := r.db.Blob(entry.Oid) - if err != nil { - return err - } - - ptr, err := lfs.DecodePointer(blob.Contents) - if errors.IsNotAPointerError(err) { - continue - } - if err != nil { - return err - } - - downloadPath, err := gf.ObjectPath(ptr.Oid) - if err != nil { - return err - } - - // Only add files to the transfer queue that aren't already cached - if _, err := os.Stat(downloadPath); os.IsNotExist(err) { - q.Add(entry.Name, downloadPath, ptr.Oid, ptr.Size) - } - - case odb.TreeObjectType: - // Scan all subtrees - err = r.scanTree(q, gf, entry.Oid, fullpath) - - } - if err != nil { - return err - } - } - - return nil -} - // rewriteTree is a recursive function which rewrites a tree given by the ID // "sha" and path "path". It uses the given BlobRewriteFn to rewrite all blobs // within the tree, either calling that function or recurring down into subtrees diff --git a/lfs/gitscanner.go b/lfs/gitscanner.go index 15a6d58c..3d0763a8 100644 --- a/lfs/gitscanner.go +++ b/lfs/gitscanner.go @@ -89,7 +89,20 @@ func (s *GitScanner) ScanLeftToRemote(left string, cb GitScannerFoundPointer) er } s.mu.Unlock() - return scanRefsToChan(s, callback, left, "", s.opts(ScanLeftToRemoteMode)) + return scanLeftRightToChan(s, callback, left, "", s.opts(ScanLeftToRemoteMode)) +} + +// ScanRefs through all commits reachable by refs contained in "include" and +// not reachable by any refs included in "excluded" +func (s *GitScanner) ScanRefs(include, exclude []string, cb GitScannerFoundPointer) error { + callback, err := firstGitScannerCallback(cb, s.FoundPointer) + if err != nil { + return err + } + + opts := s.opts(ScanRefsMode) + opts.SkipDeletedBlobs = false + return scanRefsToChan(s, callback, include, exclude, opts) } // ScanRefRange scans through all commits from the given left and right refs, @@ -102,7 +115,7 @@ func (s *GitScanner) ScanRefRange(left, right string, cb GitScannerFoundPointer) opts := s.opts(ScanRefsMode) opts.SkipDeletedBlobs = false - return scanRefsToChan(s, callback, left, right, opts) + return scanLeftRightToChan(s, callback, left, right, opts) } // ScanRefWithDeleted scans through all objects in the given ref, including @@ -121,7 +134,7 @@ func (s *GitScanner) ScanRef(ref string, cb GitScannerFoundPointer) error { opts := s.opts(ScanRefsMode) opts.SkipDeletedBlobs = true - return scanRefsToChan(s, callback, ref, "", opts) + return scanLeftRightToChan(s, callback, ref, "", opts) } // ScanAll scans through all objects in the git repository. @@ -133,7 +146,7 @@ func (s *GitScanner) ScanAll(cb GitScannerFoundPointer) error { opts := s.opts(ScanAllMode) opts.SkipDeletedBlobs = false - return scanRefsToChan(s, callback, "", "", opts) + return scanLeftRightToChan(s, callback, "", "", opts) } // ScanTree takes a ref and returns WrappedPointer objects in the tree at that diff --git a/lfs/gitscanner_refs.go b/lfs/gitscanner_refs.go index e66daee2..f4619e06 100644 --- a/lfs/gitscanner_refs.go +++ b/lfs/gitscanner_refs.go @@ -33,15 +33,16 @@ func (s *lockableNameSet) Check(blobSha string) (string, bool) { func noopFoundLockable(name string) {} -// scanRefsToChan takes a ref and returns a channel of WrappedPointer objects -// for all Git LFS pointers it finds for that ref. +// scanRefsToChan scans through all commits reachable by refs contained in +// "include" and not reachable by any refs included in "excluded" and returns +// a channel of WrappedPointer objects for all Git LFS pointers it finds. // Reports unique oids once only, not multiple times if >1 file uses the same content -func scanRefsToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, refLeft, refRight string, opt *ScanRefsOptions) error { +func scanRefsToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, include, exclude []string, opt *ScanRefsOptions) error { if opt == nil { panic("no scan ref options") } - revs, err := revListShas([]string{refLeft, refRight}, nil, opt) + revs, err := revListShas(include, exclude, opt) if err != nil { return err } @@ -91,6 +92,13 @@ func scanRefsToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, refLe return nil } +// scanLeftRightToChan takes a ref and returns a channel of WrappedPointer objects +// for all Git LFS pointers it finds for that ref. +// Reports unique oids once only, not multiple times if >1 file uses the same content +func scanLeftRightToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, refLeft, refRight string, opt *ScanRefsOptions) error { + return scanRefsToChan(scanner, pointerCb, []string{refLeft, refRight}, nil, opt) +} + // revListShas uses git rev-list to return the list of object sha1s // for the given ref. If all is true, ref is ignored. It returns a // channel from which sha1 strings can be read. From 000a12225ecc7809495c445e670291780987b9a0 Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Mon, 25 Jun 2018 17:08:58 -0700 Subject: [PATCH 11/18] commands: only download objects matching filter --- commands/command_migrate_export.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/commands/command_migrate_export.go b/commands/command_migrate_export.go index cee37792..c46e7c6d 100644 --- a/commands/command_migrate_export.go +++ b/commands/command_migrate_export.go @@ -113,6 +113,10 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { return } + if !filter.Allows(p.Name) { + return + } + downloadPath, err := gitfilter.ObjectPath(p.Oid) if err != nil { return From a1b5770329a6b6fa30ce23adeadcad31401970bf Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Tue, 26 Jun 2018 11:18:35 -0700 Subject: [PATCH 12/18] commands: prune cache on export After performing an export, prune object files for exported files from the cache. --- commands/command_migrate_export.go | 22 ++++++++++++++++++- test/test-migrate-export.sh | 34 ++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/commands/command_migrate_export.go b/commands/command_migrate_export.go index c46e7c6d..3db49445 100644 --- a/commands/command_migrate_export.go +++ b/commands/command_migrate_export.go @@ -36,6 +36,8 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { tracked := trackedFromExportFilter(filter) gitfilter := lfs.NewGitFilter(cfg) + var exported []string + opts := &githistory.RewriteOptions{ Verbose: migrateVerbose, ObjectMapFilePath: objectMapFilePath, @@ -57,7 +59,13 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { return nil, err } - return odb.NewBlobFromFile(downloadPath) + newBlob, err := odb.NewBlobFromFile(downloadPath) + if err != nil { + return nil, err + } + + exported = append(exported, downloadPath) + return newBlob, nil }, TreeCallbackFn: func(path string, t *odb.Tree) (*odb.Tree, error) { @@ -142,6 +150,18 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { ExitWithError(err) } + // Prune the cache + for _, object := range exported { + err := os.Remove(object) + if os.IsNotExist(err) { + continue + } + + if err != nil { + ExitWithError(err) + } + } + // Only perform `git-checkout(1) -f` if the repository is non-bare. if bare, _ := git.IsBare(); !bare { t := l.Waiter("migrate: checkout") diff --git a/test/test-migrate-export.sh b/test/test-migrate-export.sh index 2831d301..b5db9cb4 100755 --- a/test/test-migrate-export.sh +++ b/test/test-migrate-export.sh @@ -26,6 +26,10 @@ begin_test "migrate export (default branch)" [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120") ] assert_pointer "refs/heads/my-feature" "a.md" "$md_feature_oid" "30" + refute_local_object "$md_oid" "140" + refute_local_object "$txt_oid" "120" + assert_local_object "$md_feature_oid" "30" + master="$(git rev-parse refs/heads/master)" feature="$(git rev-parse refs/heads/my-feature)" @@ -65,6 +69,9 @@ begin_test "migrate export (with remote)" [ ! $(assert_pointer "refs/remotes/origin/master" "a.md" "$md_oid" "50") ] [ ! $(assert_pointer "refs/remotes/origin/master" "a.txt" "$txt_oid" "30") ] + refute_local_object "$md_oid" "50" + refute_local_object "$txt_oid" "30" + master="$(git rev-parse refs/heads/master)" master_attrs="$(git cat-file -p "$master:.gitattributes")" @@ -89,6 +96,9 @@ begin_test "migrate export (include/exclude args)" [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120") ] assert_pointer "refs/heads/master" "a.md" "$md_oid" "140" + refute_local_object "$txt_oid" "120" + assert_local_object "$md_oid" "140" + master="$(git rev-parse refs/heads/master)" master_attrs="$(git cat-file -p "$master:.gitattributes")" @@ -136,6 +146,10 @@ begin_test "migrate export (given branch)" [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "140") ] [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120") ] + refute_local_object "$md_feature_oid" "30" + refute_local_object "$txt_oid" "120" + refute_local_object "$md_oid" "140" + master="$(git rev-parse refs/heads/master)" feature="$(git rev-parse refs/heads/my-feature)" @@ -190,9 +204,15 @@ begin_test "migrate export (exclude remote refs)" [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "50") ] [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "30") ] + refute_local_object "$md_oid" "50" + refute_local_object "$txt_oid" "30" + assert_pointer "refs/remotes/origin/master" "a.md" "$md_remote_oid" "140" assert_pointer "refs/remotes/origin/master" "a.txt" "$txt_remote_oid" "120" + assert_local_object "$md_remote_oid" "140" + assert_local_object "$txt_remote_oid" "120" + master="$(git rev-parse refs/heads/master)" remote="$(git rev-parse refs/remotes/origin/master)" @@ -238,6 +258,11 @@ begin_test "migrate export (--skip-fetch)" [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_master_oid" "30") ] [ ! $(assert_pointer "pseudo-remote" "a.txt" "$txt_remote_oid" "120") ] + refute_local_object "$md_master_oid" "50" + refute_local_object "$md_remote_oid" "140" + refute_local_object "$txt_master_oid" "30" + refute_local_object "$txt_remote_oid" "120" + master="$(git rev-parse refs/heads/master)" remote="$(git rev-parse pseudo-remote)" @@ -284,6 +309,15 @@ begin_test "migrate export (include/exclude ref)" assert_pointer "refs/heads/my-feature" "a.md" "$md_feature_oid" "31" [ ! $(assert_pointer "refs/heads/my-feature" "a.txt" "$txt_feature_oid" "30") ] + assert_local_object "$md_master_oid" "21" + assert_local_object "$txt_master_oid" "20" + + assert_local_object "$md_remote_oid" "11" + assert_local_object "$txt_remote_oid" "10" + + assert_local_object "$md_feature_oid" "31" + refute_local_object "$txt_feature_oid" "30" + master="$(git rev-parse refs/heads/master)" feature="$(git rev-parse refs/heads/my-feature)" remote="$(git rev-parse refs/remotes/origin/master)" From 84396ab13ee95a963f96e0649352be67c94f938c Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Tue, 26 Jun 2018 15:34:19 -0700 Subject: [PATCH 13/18] test: flush cache in export remote test Flush the object cache in the export remote test before the export is performed to ensure object downloading is tested --- test/test-migrate-export.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/test-migrate-export.sh b/test/test-migrate-export.sh index b5db9cb4..131bbb37 100755 --- a/test/test-migrate-export.sh +++ b/test/test-migrate-export.sh @@ -61,6 +61,9 @@ begin_test "migrate export (with remote)" assert_pointer "refs/remotes/origin/master" "a.md" "$md_oid" "50" assert_pointer "refs/remotes/origin/master" "a.txt" "$txt_oid" "30" + # Flush the cache to ensure all objects have to be downloaded + rm -rf .git/lfs/objects + git lfs migrate export --everything --include="*.md, *.txt" [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "50") ] From c2ca84e4da68cecd35738db4701f642104a7f572 Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Wed, 27 Jun 2018 11:03:23 -0700 Subject: [PATCH 14/18] commands: add --remote flag to export Add a --remote flag to the `migrate export` command allowing specification of a remote from which to download objects --- commands/command_migrate.go | 5 ++++ commands/command_migrate_export.go | 13 ++++++-- test/test-migrate-export.sh | 48 ++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/commands/command_migrate.go b/commands/command_migrate.go index 0df4e2d3..87139afc 100644 --- a/commands/command_migrate.go +++ b/commands/command_migrate.go @@ -43,6 +43,10 @@ var ( // migrateCommitMessage is the message to use with the commit generated // by the migrate command migrateCommitMessage string + + // exportRemote is the remote from which to download objects when + // performing an export + exportRemote string ) // migrate takes the given command and arguments, *odb.ObjectDatabase, as well @@ -300,6 +304,7 @@ func init() { exportCmd := NewCommand("export", migrateExportCommand) exportCmd.Flags().BoolVar(&migrateVerbose, "verbose", false, "Verbose logging") exportCmd.Flags().StringVar(&objectMapFilePath, "object-map", "", "Object map file") + exportCmd.Flags().StringVar(&exportRemote, "remote", "", "Remote from which to download objects") RegisterCommand("migrate", nil, func(cmd *cobra.Command) { cmd.PersistentFlags().StringVarP(&includeArg, "include", "I", "", "Include a list of paths") diff --git a/commands/command_migrate_export.go b/commands/command_migrate_export.go index 3db49445..a5c263de 100644 --- a/commands/command_migrate_export.go +++ b/commands/command_migrate_export.go @@ -113,9 +113,18 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { ExitWithError(err) } + remote := cfg.Remote() + if cmd.Flag("remote").Changed { + remote = exportRemote + } + remoteURL := getAPIClient().Endpoints.RemoteEndpoint("download", remote).Url + if remoteURL == "" && cmd.Flag("remote").Changed { + ExitWithError(errors.Errorf("fatal: invalid remote %s provided", remote)) + } + // If we have a valid remote, pre-download all objects using the Transfer Queue - if remoteURL := getAPIClient().Endpoints.RemoteEndpoint("download", cfg.Remote()).Url; remoteURL != "" { - q := newDownloadQueue(getTransferManifestOperationRemote("Download", cfg.Remote()), cfg.Remote()) + if remoteURL != "" { + q := newDownloadQueue(getTransferManifestOperationRemote("Download", remote), remote) gs := lfs.NewGitScanner(func(p *lfs.WrappedPointer, err error) { if err != nil { return diff --git a/test/test-migrate-export.sh b/test/test-migrate-export.sh index 131bbb37..fd79a215 100755 --- a/test/test-migrate-export.sh +++ b/test/test-migrate-export.sh @@ -361,3 +361,51 @@ begin_test "migrate export (--verbose)" git lfs migrate export --everything --include="*" --verbose 2>&1 | grep -q "migrate: commit " ) end_test + +begin_test "migrate export (--remote)" +( + set -e + + setup_single_remote_branch_tracked + + git push origin master + + md_oid="$(calc_oid "$(cat a.md)")" + txt_oid="$(calc_oid "$(cat a.txt)")" + + assert_pointer "refs/heads/master" "a.md" "$md_oid" "50" + assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "30" + + # Flush the cache to ensure all objects have to be downloaded + rm -rf .git/lfs/objects + + # Setup a new remote and invalidate the default + remote_url="$(git config --get remote.origin.url)" + git remote add zeta "$remote_url" + git remote set-url origin "" + + git lfs migrate export --everything --remote="zeta" --include="*.md, *.txt" + + [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "50") ] + [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "30") ] + + refute_local_object "$md_oid" "50" + refute_local_object "$txt_oid" "30" +) +end_test + +begin_test "migrate export (invalid --remote)" +( + set -e + + setup_single_remote_branch_tracked + + git lfs migrate export --include="*" --remote="zz" 2>&1 | tee migrate.log + if [ ${PIPESTATUS[0]} -eq 0 ]; then + echo >&2 "fatal: expected git lfs migrate export to fail, didn't" + exit 1 + fi + + grep "fatal: invalid remote zz provided" migrate.log +) +end_test \ No newline at end of file From 5ab5a69765caaac5ddb0f127b43c7e4c789e31e9 Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Wed, 27 Jun 2018 11:55:38 -0700 Subject: [PATCH 15/18] docs: document migrate export --- docs/man/git-lfs-migrate.1.ronn | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/man/git-lfs-migrate.1.ronn b/docs/man/git-lfs-migrate.1.ronn index 9628156d..81371f01 100644 --- a/docs/man/git-lfs-migrate.1.ronn +++ b/docs/man/git-lfs-migrate.1.ronn @@ -123,6 +123,27 @@ If `--message` is given, the new commit will be created with the provided message. If no message is given, a commit message will be generated based on the file arguments. +### EXPORT + +The 'export' mode migrates Git LFS pointer files present in the Git history out +of Git LFS, converting them into their corresponding object files. It supports +all the core 'migrate' options and these additional ones: + +* `--verbose` + Print the commit oid and filename of migrated files to STDOUT. + +* `--object-map=` + Write to 'path' a file with the mapping of each rewritten commit. The file + format is CSV with this pattern: `OLD-SHA`,`NEW-SHA` + +* `--remote=` + Download LFS objects from the provided 'git-remote' during the export. If + not provided, defaults to 'origin'. + +The 'export' mode requires at minimum a pattern provided with the `--include` +argument to specify which files to export. The export command will modify the +.gitattributes to unset any filepath patterns as given by those flags. + ## INCLUDE AND EXCLUDE You can configure Git LFS to only migrate tree entries whose pathspec matches From 2e096c1d12402c88d16d968647a5b41415a4923a Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Thu, 28 Jun 2018 14:58:41 -0700 Subject: [PATCH 16/18] commands: clean cache with prune() After performing an export, clean the cache with `prune()` instead of deleting all exported objects --- commands/command_migrate_export.go | 26 ++++++++++----------- test/test-migrate-export.sh | 37 +++++++++++++++++++++++------- test/test-migrate-fixtures.sh | 27 ++++++++++++++++++++++ 3 files changed, 69 insertions(+), 21 deletions(-) diff --git a/commands/command_migrate_export.go b/commands/command_migrate_export.go index a5c263de..859ac58c 100644 --- a/commands/command_migrate_export.go +++ b/commands/command_migrate_export.go @@ -47,10 +47,10 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { } ptr, err := lfs.DecodePointer(b.Contents) - if errors.IsNotAPointerError(err) { - return b, nil - } if err != nil { + if errors.IsNotAPointerError(err) { + return b, nil + } return nil, err } @@ -159,17 +159,17 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { ExitWithError(err) } - // Prune the cache - for _, object := range exported { - err := os.Remove(object) - if os.IsNotExist(err) { - continue - } + fetchPruneCfg := lfs.NewFetchPruneConfig(cfg.Git) - if err != nil { - ExitWithError(err) - } - } + // Set our preservation time-window for objects existing on the remote to + // 0. Because the newly rewritten commits have not yet been pushed, some + // exported objects can still exist on the remote within the time window + // and thus will not be pruned from the cache. + fetchPruneCfg.PruneOffsetDays = 0 + fetchPruneCfg.FetchRecentRefsDays = 0 + + // Prune our cache + prune(fetchPruneCfg, false, false, true) // Only perform `git-checkout(1) -f` if the repository is non-bare. if bare, _ := git.IsBare(); !bare { diff --git a/test/test-migrate-export.sh b/test/test-migrate-export.sh index fd79a215..0eb5755d 100755 --- a/test/test-migrate-export.sh +++ b/test/test-migrate-export.sh @@ -9,8 +9,14 @@ begin_test "migrate export (default branch)" setup_multiple_local_branches_tracked + # Add b.md, a pointer existing only on master + base64 < /dev/urandom | head -c 160 > b.md + git add b.md + git commit -m "add b.md" + md_oid="$(calc_oid "$(cat a.md)")" txt_oid="$(calc_oid "$(cat a.txt)")" + b_md_oid="$(calc_oid "$(cat b.md)")" git checkout my-feature md_feature_oid="$(calc_oid "$(cat a.md)")" @@ -18,16 +24,23 @@ begin_test "migrate export (default branch)" assert_pointer "refs/heads/master" "a.md" "$md_oid" "140" assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120" + assert_pointer "refs/heads/master" "b.md" "$b_md_oid" "160" assert_pointer "refs/heads/my-feature" "a.md" "$md_feature_oid" "30" git lfs migrate export --include="*.md, *.txt" [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "140") ] [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120") ] + [ ! $(assert_pointer "refs/heads/master" "b.md" "$b_md_oid" "160")] assert_pointer "refs/heads/my-feature" "a.md" "$md_feature_oid" "30" - refute_local_object "$md_oid" "140" - refute_local_object "$txt_oid" "120" + # b.md should be pruned as no pointer exists to reference it + refute_local_object "$b_md_oid" "160" + + # Other objects should not be pruned as they're still referenced in `feature` + # by pointers + assert_local_object "$md_oid" "140" + assert_local_object "$txt_oid" "120" assert_local_object "$md_feature_oid" "30" master="$(git rev-parse refs/heads/master)" @@ -72,6 +85,7 @@ begin_test "migrate export (with remote)" [ ! $(assert_pointer "refs/remotes/origin/master" "a.md" "$md_oid" "50") ] [ ! $(assert_pointer "refs/remotes/origin/master" "a.txt" "$txt_oid" "30") ] + # All pointers have been exported, so all objects should be pruned refute_local_object "$md_oid" "50" refute_local_object "$txt_oid" "30" @@ -87,12 +101,13 @@ begin_test "migrate export (include/exclude args)" ( set -e - setup_multiple_local_branches_tracked + setup_single_local_branch_tracked md_oid="$(calc_oid "$(cat a.md)")" txt_oid="$(calc_oid "$(cat a.txt)")" assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120" + assert_pointer "refs/heads/master" "a.md" "$md_oid" "140" git lfs migrate export --include="*" --exclude="a.md" @@ -108,7 +123,6 @@ begin_test "migrate export (include/exclude args)" echo "$master_attrs" | grep -q "* text !filter !merge !diff" echo "$master_attrs" | grep -q "a.md filter=lfs diff=lfs merge=lfs" - ) end_test @@ -149,6 +163,7 @@ begin_test "migrate export (given branch)" [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "140") ] [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120") ] + # No pointers left, so all objects should be pruned refute_local_object "$md_feature_oid" "30" refute_local_object "$txt_oid" "120" refute_local_object "$md_oid" "140" @@ -213,8 +228,10 @@ begin_test "migrate export (exclude remote refs)" assert_pointer "refs/remotes/origin/master" "a.md" "$md_remote_oid" "140" assert_pointer "refs/remotes/origin/master" "a.txt" "$txt_remote_oid" "120" - assert_local_object "$md_remote_oid" "140" - assert_local_object "$txt_remote_oid" "120" + # Since these two objects exist on the remote, they should be removed with + # our prune operation + refute_local_object "$md_remote_oid" "140" + refute_local_object "$txt_remote_oid" "120" master="$(git rev-parse refs/heads/master)" remote="$(git rev-parse refs/remotes/origin/master)" @@ -312,12 +329,16 @@ begin_test "migrate export (include/exclude ref)" assert_pointer "refs/heads/my-feature" "a.md" "$md_feature_oid" "31" [ ! $(assert_pointer "refs/heads/my-feature" "a.txt" "$txt_feature_oid" "30") ] + # Master objects should not be pruned as they exist in unpushed commits assert_local_object "$md_master_oid" "21" assert_local_object "$txt_master_oid" "20" - assert_local_object "$md_remote_oid" "11" - assert_local_object "$txt_remote_oid" "10" + # Remote master objects should be pruned as they exist in the remote + refute_local_object "$md_remote_oid" "11" + refute_local_object "$txt_remote_oid" "10" + # txt_feature_oid should be pruned as it's no longer a pointer, but + # md_feature_oid should remain as it's still a pointer in unpushed commits assert_local_object "$md_feature_oid" "31" refute_local_object "$txt_feature_oid" "30" diff --git a/test/test-migrate-fixtures.sh b/test/test-migrate-fixtures.sh index e1d2a62b..c5784100 100755 --- a/test/test-migrate-fixtures.sh +++ b/test/test-migrate-fixtures.sh @@ -84,6 +84,33 @@ setup_local_branch_with_nested_gitattrs() { git commit -m "add nested .gitattributes" } +# setup_single_local_branch_tracked creates a repository as follows: +# +# A---B +# \ +# refs/heads/master +# +# - Commit 'A' has 120, in a.txt and 140 in a.md, with both files tracked as +# pointers in Git LFS +setup_single_local_branch_tracked() { + set -e + + reponame="migrate-single-remote-branch-with-attrs" + + remove_and_create_local_repo "$reponame" + + git lfs track "*.txt" "*.md" + + git add .gitattributes + git commit -m "initial commit" + + base64 < /dev/urandom | head -c 120 > a.txt + base64 < /dev/urandom | head -c 140 > a.md + + git add a.txt a.md + git commit -m "add a.{txt,md}" +} + # setup_multiple_local_branches creates a repository as follows: # # B From 42a65cbcdd9433daa79f5deebff8419d87c4236b Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Mon, 2 Jul 2018 15:02:44 -0700 Subject: [PATCH 17/18] test: add refute_pointer test helper --- test/test-migrate-export.sh | 41 +++++++++++++++++-------------------- test/testhelpers.sh | 25 ++++++++++++++++++++++ 2 files changed, 44 insertions(+), 22 deletions(-) diff --git a/test/test-migrate-export.sh b/test/test-migrate-export.sh index 0eb5755d..480f7a31 100755 --- a/test/test-migrate-export.sh +++ b/test/test-migrate-export.sh @@ -29,9 +29,9 @@ begin_test "migrate export (default branch)" git lfs migrate export --include="*.md, *.txt" - [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "140") ] - [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120") ] - [ ! $(assert_pointer "refs/heads/master" "b.md" "$b_md_oid" "160")] + refute_pointer "refs/heads/master" "a.md" + refute_pointer "refs/heads/master" "a.txt" + refute_pointer "refs/heads/master" "b.md" assert_pointer "refs/heads/my-feature" "a.md" "$md_feature_oid" "30" # b.md should be pruned as no pointer exists to reference it @@ -79,11 +79,8 @@ begin_test "migrate export (with remote)" git lfs migrate export --everything --include="*.md, *.txt" - [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "50") ] - [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "30") ] - - [ ! $(assert_pointer "refs/remotes/origin/master" "a.md" "$md_oid" "50") ] - [ ! $(assert_pointer "refs/remotes/origin/master" "a.txt" "$txt_oid" "30") ] + refute_pointer "refs/heads/master" "a.md" + refute_pointer "refs/heads/master" "a.txt" # All pointers have been exported, so all objects should be pruned refute_local_object "$md_oid" "50" @@ -111,7 +108,7 @@ begin_test "migrate export (include/exclude args)" git lfs migrate export --include="*" --exclude="a.md" - [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120") ] + refute_pointer "refs/heads/master" "a.txt" assert_pointer "refs/heads/master" "a.md" "$md_oid" "140" refute_local_object "$txt_oid" "120" @@ -158,10 +155,10 @@ begin_test "migrate export (given branch)" git lfs migrate export --include="*.md,*.txt" my-feature - [ ! $(assert_pointer "refs/heads/my-feature" "a.md" "$md_feature_oid" "30") ] - [ ! $(assert_pointer "refs/heads/my-feature" "a.txt" "$txt_oid" "120") ] - [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "140") ] - [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "120") ] + refute_pointer "refs/heads/my-feature" "a.md" + refute_pointer "refs/heads/my-feature" "a.txt" + refute_pointer "refs/heads/master" "a.md" + refute_pointer "refs/heads/master" "a.txt" # No pointers left, so all objects should be pruned refute_local_object "$md_feature_oid" "30" @@ -219,8 +216,8 @@ begin_test "migrate export (exclude remote refs)" git lfs migrate export --include="*.md,*.txt" - [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "50") ] - [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "30") ] + refute_pointer "refs/heads/master" "a.md" + refute_pointer "refs/heads/master" "a.txt" refute_local_object "$md_oid" "50" refute_local_object "$txt_oid" "30" @@ -273,10 +270,10 @@ begin_test "migrate export (--skip-fetch)" git lfs migrate export --skip-fetch --include="*.md,*.txt" - [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_master_oid" "50") ] - [ ! $(assert_pointer "pseudo-remote" "a.md" "$md_remote_oid" "140") ] - [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_master_oid" "30") ] - [ ! $(assert_pointer "pseudo-remote" "a.txt" "$txt_remote_oid" "120") ] + refute_pointer "refs/heads/master" "a.md" + refute_pointer "pseudo-remote" "a.md" + refute_pointer "refs/heads/master" "a.txt" + refute_pointer "pseudo-remote" "a.txt" refute_local_object "$md_master_oid" "50" refute_local_object "$md_remote_oid" "140" @@ -327,7 +324,7 @@ begin_test "migrate export (include/exclude ref)" assert_pointer "refs/remotes/origin/master" "a.txt" "$txt_remote_oid" "10" assert_pointer "refs/heads/my-feature" "a.md" "$md_feature_oid" "31" - [ ! $(assert_pointer "refs/heads/my-feature" "a.txt" "$txt_feature_oid" "30") ] + refute_pointer "refs/heads/my-feature" "a.txt" # Master objects should not be pruned as they exist in unpushed commits assert_local_object "$md_master_oid" "21" @@ -407,8 +404,8 @@ begin_test "migrate export (--remote)" git lfs migrate export --everything --remote="zeta" --include="*.md, *.txt" - [ ! $(assert_pointer "refs/heads/master" "a.md" "$md_oid" "50") ] - [ ! $(assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "30") ] + refute_pointer "refs/heads/master" "a.md" + refute_pointer "refs/heads/master" "a.txt" refute_local_object "$md_oid" "50" refute_local_object "$txt_oid" "30" diff --git a/test/testhelpers.sh b/test/testhelpers.sh index 40180933..b8cdb1e8 100644 --- a/test/testhelpers.sh +++ b/test/testhelpers.sh @@ -24,6 +24,31 @@ assert_pointer() { fi } +# refute_pointer confirms that the file in the repository for $path in the +# given $ref is _not_ a pointer. +# +# $ refute_pointer "master" "path/to/file" +refute_pointer() { + local ref="$1" + local path="$2" + + gitblob=$(git ls-tree -lrz "$ref" | + while read -r -d $'\0' x; do + echo $x + done | + grep "$path" | cut -f 3 -d " ") + + file=$(git cat-file -p $gitblob) + version="version https://git-lfs.github.com/spec/v[0-9]" + oid="oid sha256:[0-9a-f]\{32\}" + size="size [0-9]*" + regex="$version.*$oid.*$size" + + if echo $file | grep -q "$regex"; then + exit 1 + fi +} + # assert_local_object confirms that an object file is stored for the given oid & # has the correct size # $ assert_local_object "some-oid" size From 17e793a76bb49068068ebeab7373fe7525f10c5e Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Mon, 2 Jul 2018 16:13:29 -0700 Subject: [PATCH 18/18] commands: address export PR feedback * Remove unused `exported` variable * Perform prune after checkout * Clarify use of --include and --exclude in docs * Add assertions to bare repository test --- commands/command_migrate_export.go | 33 +++++++++++------------------- docs/man/git-lfs-migrate.1.ronn | 6 ++++-- test/test-migrate-export.sh | 16 ++++++++++++++- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/commands/command_migrate_export.go b/commands/command_migrate_export.go index 859ac58c..278bed7a 100644 --- a/commands/command_migrate_export.go +++ b/commands/command_migrate_export.go @@ -36,8 +36,6 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { tracked := trackedFromExportFilter(filter) gitfilter := lfs.NewGitFilter(cfg) - var exported []string - opts := &githistory.RewriteOptions{ Verbose: migrateVerbose, ObjectMapFilePath: objectMapFilePath, @@ -59,13 +57,7 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { return nil, err } - newBlob, err := odb.NewBlobFromFile(downloadPath) - if err != nil { - return nil, err - } - - exported = append(exported, downloadPath) - return newBlob, nil + return odb.NewBlobFromFile(downloadPath) }, TreeCallbackFn: func(path string, t *odb.Tree) (*odb.Tree, error) { @@ -159,18 +151,6 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { ExitWithError(err) } - fetchPruneCfg := lfs.NewFetchPruneConfig(cfg.Git) - - // Set our preservation time-window for objects existing on the remote to - // 0. Because the newly rewritten commits have not yet been pushed, some - // exported objects can still exist on the remote within the time window - // and thus will not be pruned from the cache. - fetchPruneCfg.PruneOffsetDays = 0 - fetchPruneCfg.FetchRecentRefsDays = 0 - - // Prune our cache - prune(fetchPruneCfg, false, false, true) - // Only perform `git-checkout(1) -f` if the repository is non-bare. if bare, _ := git.IsBare(); !bare { t := l.Waiter("migrate: checkout") @@ -181,6 +161,17 @@ func migrateExportCommand(cmd *cobra.Command, args []string) { ExitWithError(err) } } + + fetchPruneCfg := lfs.NewFetchPruneConfig(cfg.Git) + + // Set our preservation time-window for objects existing on the remote to + // 0. Because the newly rewritten commits have not yet been pushed, some + // exported objects can still exist on the remote within the time window + // and thus will not be pruned from the cache. + fetchPruneCfg.FetchRecentRefsDays = 0 + + // Prune our cache + prune(fetchPruneCfg, false, false, true) } // trackedFromExportFilter returns an ordered set of strings where each entry diff --git a/docs/man/git-lfs-migrate.1.ronn b/docs/man/git-lfs-migrate.1.ronn index 81371f01..aff6cd4c 100644 --- a/docs/man/git-lfs-migrate.1.ronn +++ b/docs/man/git-lfs-migrate.1.ronn @@ -141,8 +141,10 @@ all the core 'migrate' options and these additional ones: not provided, defaults to 'origin'. The 'export' mode requires at minimum a pattern provided with the `--include` -argument to specify which files to export. The export command will modify the -.gitattributes to unset any filepath patterns as given by those flags. +argument to specify which files to export. Files matching the `--include` +patterns will be removed from Git LFS, while files matching the `--exclude` +patterns will retain their Git LFS status. The export command will modify the +.gitattributes to set/unset any filepath patterns as given by those flags. ## INCLUDE AND EXCLUDE diff --git a/test/test-migrate-export.sh b/test/test-migrate-export.sh index 480f7a31..37431bbf 100755 --- a/test/test-migrate-export.sh +++ b/test/test-migrate-export.sh @@ -129,9 +129,23 @@ begin_test "migrate export (bare repository)" setup_single_remote_branch_tracked git push origin master + + md_oid="$(calc_oid "$(cat a.md)")" + txt_oid="$(calc_oid "$(cat a.txt)")" + make_bare + assert_pointer "refs/heads/master" "a.txt" "$txt_oid" "30" + assert_pointer "refs/heads/master" "a.md" "$md_oid" "50" + git lfs migrate export --everything --include="*" + + refute_pointer "refs/heads/master" "a.md" + refute_pointer "refs/heads/master" "a.txt" + + # All pointers have been exported, so all objects should be pruned + refute_local_object "$md_oid" "50" + refute_local_object "$txt_oid" "30" ) end_test @@ -426,4 +440,4 @@ begin_test "migrate export (invalid --remote)" grep "fatal: invalid remote zz provided" migrate.log ) -end_test \ No newline at end of file +end_test