git/githistory: introduce TreePreCallbackFn

To determine the paths to migrate from a repository's .gitattributes, a
caller must do the following two things in order:

  1. Read the .gitattributes file(s) in a given tree contained within
  the repository.

  2. Rewrite blobs according to the attributes applied to their paths
  via the .gitattributes file(s) read in (1).

The framework for accomplishing the task necessary in (1) was written in
the previous commit. This commit introduces the rest of that mechanism
for (1).

Because a Git object's SHA-1 signature depends on its children, we must
visit the object graph in a topological ordering. This is not sufficient
for our purposes, since the patterns in a .gitattributes file cascade
downwards.

In other words, while we have to migrate from the leaves of the tree to
its root, we have to read the .gitattributes file(s) from root to
leaves.

To accomplish this, we introduce a new callback function in the
*githistory.RewriteOptions structure, TreePreCallbackFn, which is called
once as soon as a tree is opened for the first time, and before any
blobs or sub-trees are rewritten.

This provides the optimal time to inspect the repository's contents for
interesting .gitattributes files before migrating the blobs within.

We will use this new callback function in the following commit in order
to do precisely the task as described above.
This commit is contained in:
Taylor Blau 2018-07-06 14:14:31 -05:00
parent 58ae7f7f8e
commit 92d8cf18a5
3 changed files with 113 additions and 7 deletions

@ -103,6 +103,7 @@ func rewriteOptions(args []string, opts *githistory.RewriteOptions, l *tasklog.L
ObjectMapFilePath: opts.ObjectMapFilePath,
BlobFn: opts.BlobFn,
TreePreCallbackFn: opts.TreePreCallbackFn,
TreeCallbackFn: opts.TreeCallbackFn,
}, nil
}

@ -67,6 +67,10 @@ type RewriteOptions struct {
// each blob for subsequent revisions, so long as each entry remains
// unchanged.
BlobFn BlobRewriteFn
// TreePreCallbackFn specifies a function to be called before opening a
// tree for rewriting. It will be called on all trees throughout history
// in topological ordering through the tree, starting at the root.
TreePreCallbackFn TreePreCallbackFn
// TreeCallbackFn specifies a function to rewrite trees after they have
// been reassembled by calling the above BlobFn on all existing tree
// entries.
@ -82,6 +86,15 @@ func (r *RewriteOptions) blobFn() BlobRewriteFn {
return r.BlobFn
}
// treePreFn returns a useable TreePreCallbackFn, either the one that was given
// in the *RewriteOptions, or a noopTreePreFn.
func (r *RewriteOptions) treePreFn() TreePreCallbackFn {
if r.TreePreCallbackFn == nil {
return noopTreePreFn
}
return r.TreePreCallbackFn
}
// treeFn returns a useable TreeRewriteFn, either the one that was given in the
// *RewriteOptions, or a noopTreeFn.
func (r *RewriteOptions) treeFn() TreeCallbackFn {
@ -107,13 +120,24 @@ func (r *RewriteOptions) treeFn() TreeCallbackFn {
// of filepath.Join(...) or os.PathSeparator.
type BlobRewriteFn func(path string, b *gitobj.Blob) (*gitobj.Blob, error)
// TreePreCallbackFn specifies a function to call upon opening a new tree for
// rewriting.
//
// Unlike its sibling TreeCallbackFn, TreePreCallbackFn may not modify the given
// tree.
//
// TreePreCallbackFn can be nil, and will therefore exhibit behavior equivalent
// to only calling the BlobFn on existing tree entries.
//
// If the TreePreCallbackFn returns an error, it will be returned from the
// Rewrite() invocation.
type TreePreCallbackFn func(path string, t *gitobj.Tree) error
// TreeCallbackFn specifies a function to call before writing a re-written tree
// to the object database. The TreeCallbackFn can return a modified tree to be
// written to the object database instead of one generated from calling BlobFn
// on all of the tree entries.
//
// Trees returned from a TreeCallbackFn MUST have all objects referenced in the
// entryset already written to the object database.
//
// TreeCallbackFn can be nil, and will therefore exhibit behavior equivalent to
// only calling the BlobFn on existing tree entries.
@ -151,6 +175,9 @@ var (
// noopBlobFn is a no-op implementation of the BlobRewriteFn. It returns
// the blob that it was given, and returns no error.
noopBlobFn = func(path string, b *gitobj.Blob) (*gitobj.Blob, error) { return b, nil }
// noopTreePreFn is a no-op implementation of the TreePreRewriteFn. It
// returns the tree that it was given, and returns no error.
noopTreePreFn = func(path string, t *gitobj.Tree) error { return nil }
// noopTreeFn is a no-op implementation of the TreeRewriteFn. It returns
// the tree that it was given, and returns no error.
noopTreeFn = func(path string, t *gitobj.Tree) (*gitobj.Tree, error) { return t, nil }
@ -214,7 +241,7 @@ func (r *Rewriter) Rewrite(opt *RewriteOptions) ([]byte, error) {
}
// Rewrite the tree given at that commit.
rewrittenTree, err := r.rewriteTree(oid, original.TreeID, "", opt.blobFn(), opt.treeFn(), vPerc)
rewrittenTree, err := r.rewriteTree(oid, original.TreeID, "", opt.blobFn(), opt.treePreFn(), opt.treeFn(), vPerc)
if err != nil {
return nil, err
}
@ -321,12 +348,19 @@ func (r *Rewriter) Rewrite(opt *RewriteOptions) ([]byte, error) {
//
// It returns the new SHA of the rewritten tree, or an error if the tree was
// unable to be rewritten.
func (r *Rewriter) rewriteTree(commitOID []byte, treeOID []byte, path string, fn BlobRewriteFn, tfn TreeCallbackFn, perc *tasklog.PercentageTask) ([]byte, error) {
func (r *Rewriter) rewriteTree(commitOID []byte, treeOID []byte, path string,
fn BlobRewriteFn, tpfn TreePreCallbackFn, tfn TreeCallbackFn,
perc *tasklog.PercentageTask) ([]byte, error) {
tree, err := r.db.Tree(treeOID)
if err != nil {
return nil, err
}
if err := tpfn("/"+path, tree); err != nil {
return nil, err
}
entries := make([]*gitobj.TreeEntry, 0, len(tree.Entries))
for _, entry := range tree.Entries {
var fullpath string
@ -358,7 +392,7 @@ func (r *Rewriter) rewriteTree(commitOID []byte, treeOID []byte, path string, fn
case gitobj.BlobObjectType:
oid, err = r.rewriteBlob(commitOID, entry.Oid, fullpath, fn, perc)
case gitobj.TreeObjectType:
oid, err = r.rewriteTree(commitOID, entry.Oid, fullpath, fn, tfn, perc)
oid, err = r.rewriteTree(commitOID, entry.Oid, fullpath, fn, tpfn, tfn, perc)
default:
oid = entry.Oid

@ -10,6 +10,7 @@ import (
"strings"
"testing"
"github.com/git-lfs/git-lfs/errors"
"github.com/git-lfs/git-lfs/filepathfilter"
"github.com/git-lfs/gitobj"
"github.com/stretchr/testify/assert"
@ -292,6 +293,76 @@ func TestRewriterAllowsAdditionalTreeEntries(t *testing.T) {
AssertBlobContents(t, db, tree3, "extra.txt", "extra\n")
}
func TestHistoryRewriterCallbacks(t *testing.T) {
type Call struct {
Type string
Path string
}
var calls []*Call
db := DatabaseFromFixture(t, "linear-history.git")
r := NewRewriter(db)
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
calls = append(calls, &Call{
Type: "blob",
Path: path,
})
return b, nil
},
TreePreCallbackFn: func(path string, t *gitobj.Tree) error {
calls = append(calls, &Call{
Type: "tree-pre",
Path: path,
})
return nil
},
TreeCallbackFn: func(path string, t *gitobj.Tree) (*gitobj.Tree, error) {
calls = append(calls, &Call{
Type: "tree-post",
Path: path,
})
return t, nil
},
})
assert.Nil(t, err)
assert.Len(t, calls, 9)
assert.Equal(t, calls[0], &Call{Type: "tree-pre", Path: "/"})
assert.Equal(t, calls[1], &Call{Type: "blob", Path: "hello.txt"})
assert.Equal(t, calls[2], &Call{Type: "tree-post", Path: "/"})
assert.Equal(t, calls[3], &Call{Type: "tree-pre", Path: "/"})
assert.Equal(t, calls[4], &Call{Type: "blob", Path: "hello.txt"})
assert.Equal(t, calls[5], &Call{Type: "tree-post", Path: "/"})
assert.Equal(t, calls[6], &Call{Type: "tree-pre", Path: "/"})
assert.Equal(t, calls[7], &Call{Type: "blob", Path: "hello.txt"})
assert.Equal(t, calls[8], &Call{Type: "tree-post", Path: "/"})
}
func TestHistoryRewriterTreePreCallbackPropagatesErrors(t *testing.T) {
expected := errors.Errorf("my error")
db := DatabaseFromFixture(t, "linear-history.git")
r := NewRewriter(db)
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
return b, nil
},
TreePreCallbackFn: func(path string, t *gitobj.Tree) error {
return expected
},
})
assert.Equal(t, err, expected)
}
func TestHistoryRewriterUseOriginalParentsForPartialMigration(t *testing.T) {
db := DatabaseFromFixture(t, "linear-history-with-tags.git")
r := NewRewriter(db)