git-lfs/commands/command_migrate.go
Taylor Blau 5346b02747 commands/command_migrate.go: introduce '--fixup' flag on 'import'
A common invocation of the 'git lfs migrate import' command is with
'--include' and/or '--exclude' flag(s), which specify wildmatch
pattern(s) for which paths to migrate and/or not migrate.

This is useful for retroactively importing a set of files into Git LFS's
care, or fixing up a file that should have been tracked by Git LFS but
was accidentally committed as a large object instead.

In the later case, it is often the reality that a user will run 'git lfs
migrate --import' with an '--include' path that they believe will gather
the file (and the file alone). This approach is brittle because it
requires the user to infer not only the applicable pattern but the
meaning of that pattern. It also requires the user to run more than one
migration when fixing multiple types of files.

The .gitattributes file(s) contained within a repository provide an
authoritative source on what file(s) are considered by Git to be tracked
in Git LFS. We can use this information to infer the correct patterns to
``fix up'' a broken repository.

In the simplest case, if a repository's .gitattributes file contains the
following:

    *.txt filter=lfs merge=lfs diff=lfs -text

But a .txt file matched by that pattern is not parse-able as an LFS
pointer, it will appear as unable to checkout.

Running 'git lfs migrate import --fixup --everything' will correctly
traverse history and find the affected .txt file, read it, create an
object file for it, and store it as an LFS pointer in history.

Thus, a user can run one command which will recognize arbitrarily
complex problems where a file should be tracked by Git LFS, but isn't.

Later, this feature could be combined with the new 'git lfs migrate
export' functionality to also clean files _out_ of Git LFS to object
files when they are not supposed to be tracked as Git LFS objects.
2018-07-06 14:42:48 -05:00

332 lines
10 KiB
Go

package commands
import (
"fmt"
"path/filepath"
"strings"
"github.com/git-lfs/git-lfs/errors"
"github.com/git-lfs/git-lfs/git"
"github.com/git-lfs/git-lfs/git/githistory"
"github.com/git-lfs/git-lfs/tasklog"
"github.com/git-lfs/gitobj"
"github.com/spf13/cobra"
)
var (
// migrateIncludeRefs is a set of Git references to explicitly include
// in the migration.
migrateIncludeRefs []string
// migrateExcludeRefs is a set of Git references to explicitly exclude
// in the migration.
migrateExcludeRefs []string
// migrateSkipFetch assumes that the client has the latest copy of
// remote references, and thus should not contact the remote for a set
// of updated references.
migrateSkipFetch bool
// migrateEverything indicates the presence of the --everything flag,
// and instructs 'git lfs migrate' to migrate all local references.
migrateEverything bool
// migrateVerbose enables verbose logging
migrateVerbose bool
// objectMapFile is the path to the map of old sha1 to new sha1
// commits
objectMapFilePath string
// migrateNoRewrite is the flag indicating whether or not the
// command should rewrite git history
migrateNoRewrite bool
// migrateCommitMessage is the message to use with the commit generated
// by the migrate command
migrateCommitMessage string
// exportRemote is the remote from which to download objects when
// performing an export
exportRemote string
// migrateFixup is the flag indicating whether or not to infer the
// included and excluded filepath patterns.
migrateFixup bool
)
// migrate takes the given command and arguments, *gitobj.ObjectDatabase, as well
// as a BlobRewriteFn to apply, and performs a migration.
func migrate(args []string, r *githistory.Rewriter, l *tasklog.Logger, opts *githistory.RewriteOptions) {
requireInRepo()
opts, err := rewriteOptions(args, opts, l)
if err != nil {
ExitWithError(err)
}
_, err = r.Rewrite(opts)
if err != nil {
ExitWithError(err)
}
}
// getObjectDatabase creates a *git.ObjectDatabase from the filesystem pointed
// at the .git directory of the currently checked-out repository.
func getObjectDatabase() (*gitobj.ObjectDatabase, error) {
dir, err := git.GitDir()
if err != nil {
return nil, errors.Wrap(err, "cannot open root")
}
return gitobj.FromFilesystem(filepath.Join(dir, "objects"), cfg.TempDir())
}
// rewriteOptions returns *githistory.RewriteOptions able to be passed to a
// *githistory.Rewriter that reflect the current arguments and flags passed to
// an invocation of git-lfs-migrate(1).
//
// It is merged with the given "opts". In other words, an identical "opts" is
// returned, where the Include and Exclude fields have been filled based on the
// following rules:
//
// The included and excluded references are determined based on the output of
// includeExcludeRefs (see below for documentation and detail).
//
// If any of the above could not be determined without error, that error will be
// returned immediately.
func rewriteOptions(args []string, opts *githistory.RewriteOptions, l *tasklog.Logger) (*githistory.RewriteOptions, error) {
include, exclude, err := includeExcludeRefs(l, args)
if err != nil {
return nil, err
}
return &githistory.RewriteOptions{
Include: include,
Exclude: exclude,
UpdateRefs: opts.UpdateRefs,
Verbose: opts.Verbose,
ObjectMapFilePath: opts.ObjectMapFilePath,
BlobFn: opts.BlobFn,
TreePreCallbackFn: opts.TreePreCallbackFn,
TreeCallbackFn: opts.TreeCallbackFn,
}, nil
}
// includeExcludeRefs returns fully-qualified sets of references to include, and
// exclude, or an error if those could not be determined.
//
// They are determined based on the following rules:
//
// - Include all local refs/heads/<branch> references for each branch
// specified as an argument.
// - Include the currently checked out branch if no branches are given as
// arguments and the --include-ref= or --exclude-ref= flag(s) aren't given.
// - Include all references given in --include-ref=<ref>.
// - Exclude all references given in --exclude-ref=<ref>.
func includeExcludeRefs(l *tasklog.Logger, args []string) (include, exclude []string, err error) {
hardcore := len(migrateIncludeRefs) > 0 || len(migrateExcludeRefs) > 0
if len(args) == 0 && !hardcore && !migrateEverything {
// If no branches were given explicitly AND neither
// --include-ref or --exclude-ref flags were given, then add the
// currently checked out reference.
current, err := currentRefToMigrate()
if err != nil {
return nil, nil, err
}
args = append(args, current.Name)
}
if migrateEverything && len(args) > 0 {
return nil, nil, errors.New("fatal: cannot use --everything with explicit reference arguments")
}
for _, name := range args {
var excluded bool
if strings.HasPrefix("^", name) {
name = name[1:]
excluded = true
}
// Then, loop through each branch given, resolve that reference,
// and include it.
ref, err := git.ResolveRef(name)
if err != nil {
return nil, nil, err
}
if excluded {
exclude = append(exclude, ref.Refspec())
} else {
include = append(include, ref.Refspec())
}
}
if hardcore {
if migrateEverything {
return nil, nil, errors.New("fatal: cannot use --everything with --include-ref or --exclude-ref")
}
// If either --include-ref=<ref> or --exclude-ref=<ref> were
// given, append those to the include and excluded reference
// set, respectively.
include = append(include, migrateIncludeRefs...)
exclude = append(exclude, migrateExcludeRefs...)
} else if migrateEverything {
localRefs, err := git.LocalRefs()
if err != nil {
return nil, nil, err
}
for _, ref := range localRefs {
include = append(include, ref.Refspec())
}
} else {
bare, err := git.IsBare()
if err != nil {
return nil, nil, errors.Wrap(err, "fatal: unable to determine bareness")
}
if !bare {
// Otherwise, if neither --include-ref=<ref> or
// --exclude-ref=<ref> were given, include no additional
// references, and exclude all remote references that
// are remote branches or remote tags.
remoteRefs, err := getRemoteRefs(l)
if err != nil {
return nil, nil, err
}
for _, rr := range remoteRefs {
exclude = append(exclude, rr.Refspec())
}
}
}
return include, exclude, nil
}
// getRemoteRefs returns a fully qualified set of references belonging to all
// remotes known by the currently checked-out repository, or an error if those
// references could not be determined.
func getRemoteRefs(l *tasklog.Logger) ([]*git.Ref, error) {
var refs []*git.Ref
remotes, err := git.RemoteList()
if err != nil {
return nil, err
}
if !migrateSkipFetch {
w := l.Waiter("migrate: Fetching remote refs")
if err := git.Fetch(remotes...); err != nil {
return nil, err
}
w.Complete()
}
for _, remote := range remotes {
var refsForRemote []*git.Ref
if migrateSkipFetch {
refsForRemote, err = git.CachedRemoteRefs(remote)
} else {
refsForRemote, err = git.RemoteRefs(remote)
}
if err != nil {
return nil, err
}
for _, rr := range refsForRemote {
// HACK(@ttaylorr): add remote name to fully-qualify
// references:
rr.Name = fmt.Sprintf("%s/%s", remote, rr.Name)
refs = append(refs, rr)
}
}
return refs, nil
}
// formatRefName returns the fully-qualified name for the given Git reference
// "ref".
func formatRefName(ref *git.Ref, remote string) string {
var name []string
switch ref.Type {
case git.RefTypeRemoteBranch:
name = []string{"refs", "remotes", remote, ref.Name}
case git.RefTypeRemoteTag:
name = []string{"refs", "tags", ref.Name}
default:
return ref.Name
}
return strings.Join(name, "/")
}
// currentRefToMigrate returns the fully-qualified name of the currently
// checked-out reference, or an error if the reference's type was not a local
// branch.
func currentRefToMigrate() (*git.Ref, error) {
current, err := git.CurrentRef()
if err != nil {
return nil, err
}
if current.Type == git.RefTypeOther ||
current.Type == git.RefTypeRemoteBranch ||
current.Type == git.RefTypeRemoteTag {
return nil, errors.Errorf("fatal: cannot migrate non-local ref: %s", current.Name)
}
return current, nil
}
// getHistoryRewriter returns a history rewriter that includes the filepath
// filter given by the --include and --exclude arguments, or no filter if
// --fixup was given.
func getHistoryRewriter(cmd *cobra.Command, db *gitobj.ObjectDatabase, l *tasklog.Logger) *githistory.Rewriter {
if migrateFixup {
return githistory.NewRewriter(db, githistory.WithLogger(l))
}
include, exclude := getIncludeExcludeArgs(cmd)
filter := buildFilepathFilter(cfg, include, exclude)
return githistory.NewRewriter(db,
githistory.WithFilter(filter), githistory.WithLogger(l))
}
func init() {
info := NewCommand("info", migrateInfoCommand)
info.Flags().IntVar(&migrateInfoTopN, "top", 5, "--top=<n>")
info.Flags().StringVar(&migrateInfoAboveFmt, "above", "", "--above=<n>")
info.Flags().StringVar(&migrateInfoUnitFmt, "unit", "", "--unit=<unit>")
importCmd := NewCommand("import", migrateImportCommand)
importCmd.Flags().BoolVar(&migrateVerbose, "verbose", false, "Verbose logging")
importCmd.Flags().StringVar(&objectMapFilePath, "object-map", "", "Object map file")
importCmd.Flags().BoolVar(&migrateNoRewrite, "no-rewrite", false, "Add new history without rewriting previous")
importCmd.Flags().StringVarP(&migrateCommitMessage, "message", "m", "", "With --no-rewrite, an optional commit message")
importCmd.Flags().BoolVar(&migrateFixup, "fixup", false, "Infer filepaths based on .gitattributes")
exportCmd := NewCommand("export", migrateExportCommand)
exportCmd.Flags().BoolVar(&migrateVerbose, "verbose", false, "Verbose logging")
exportCmd.Flags().StringVar(&objectMapFilePath, "object-map", "", "Object map file")
exportCmd.Flags().StringVar(&exportRemote, "remote", "", "Remote from which to download objects")
RegisterCommand("migrate", nil, func(cmd *cobra.Command) {
cmd.PersistentFlags().StringVarP(&includeArg, "include", "I", "", "Include a list of paths")
cmd.PersistentFlags().StringVarP(&excludeArg, "exclude", "X", "", "Exclude a list of paths")
cmd.PersistentFlags().StringSliceVar(&migrateIncludeRefs, "include-ref", nil, "An explicit list of refs to include")
cmd.PersistentFlags().StringSliceVar(&migrateExcludeRefs, "exclude-ref", nil, "An explicit list of refs to exclude")
cmd.PersistentFlags().BoolVar(&migrateEverything, "everything", false, "Migrate all local references")
cmd.PersistentFlags().BoolVar(&migrateSkipFetch, "skip-fetch", false, "Assume up-to-date remote references.")
cmd.AddCommand(exportCmd, importCmd, info)
})
}