git-lfs/git/githistory/rewriter_test.go
Taylor Blau 92d8cf18a5 git/githistory: introduce TreePreCallbackFn
To determine the paths to migrate from a repository's .gitattributes, a
caller must do the following two things in order:

  1. Read the .gitattributes file(s) in a given tree contained within
  the repository.

  2. Rewrite blobs according to the attributes applied to their paths
  via the .gitattributes file(s) read in (1).

The framework for accomplishing the task necessary in (1) was written in
the previous commit. This commit introduces the rest of that mechanism
for (1).

Because a Git object's SHA-1 signature depends on its children, we must
visit the object graph in a topological ordering. This is not sufficient
for our purposes, since the patterns in a .gitattributes file cascade
downwards.

In other words, while we have to migrate from the leaves of the tree to
its root, we have to read the .gitattributes file(s) from root to
leaves.

To accomplish this, we introduce a new callback function in the
*githistory.RewriteOptions structure, TreePreCallbackFn, which is called
once as soon as a tree is opened for the first time, and before any
blobs or sub-trees are rewritten.

This provides the optimal time to inspect the repository's contents for
interesting .gitattributes files before migrating the blobs within.

We will use this new callback function in the following commit in order
to do precisely the task as described above.
2018-07-06 14:42:48 -05:00

458 lines
13 KiB
Go

package githistory
import (
"bytes"
"encoding/hex"
"io"
"io/ioutil"
"reflect"
"strconv"
"strings"
"testing"
"github.com/git-lfs/git-lfs/errors"
"github.com/git-lfs/git-lfs/filepathfilter"
"github.com/git-lfs/gitobj"
"github.com/stretchr/testify/assert"
)
func TestRewriterRewritesHistory(t *testing.T) {
db := DatabaseFromFixture(t, "linear-history.git")
r := NewRewriter(db)
tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
contents, err := ioutil.ReadAll(b.Contents)
if err != nil {
return nil, err
}
n, err := strconv.Atoi(string(contents))
if err != nil {
return nil, err
}
rewritten := strconv.Itoa(n + 1)
return &gitobj.Blob{
Contents: strings.NewReader(rewritten),
Size: int64(len(rewritten)),
}, nil
},
})
assert.Nil(t, err)
tree1 := "ad0aebd16e34cf047820994ea7538a6d4a111082"
tree2 := "6e07bd31cb70c4add2c973481ad4fa38b235ca69"
tree3 := "c5decfe1fcf39b8c489f4a0bf3b3823676339f80"
// After rewriting, the HEAD state of the repository should contain a
// tree identical to:
//
// 100644 blob bf0d87ab1b2b0ec1a11a3973d2845b42413d9767 hello.txt
AssertCommitTree(t, db, hex.EncodeToString(tip), tree1)
AssertBlobContents(t, db, tree1, "hello.txt", "4")
// After rewriting, the HEAD~1 state of the repository should contain a
// tree identical to:
//
// 100644 blob e440e5c842586965a7fb77deda2eca68612b1f53 hello.txt
AssertCommitParent(t, db, hex.EncodeToString(tip), "4aaa3f49ffeabbb874250fe13ffeb8c683aba650")
AssertCommitTree(t, db, "4aaa3f49ffeabbb874250fe13ffeb8c683aba650", tree2)
AssertBlobContents(t, db, tree2, "hello.txt", "3")
// After rewriting, the HEAD~2 state of the repository should contain a
// tree identical to:
//
// 100644 blob d8263ee9860594d2806b0dfd1bfd17528b0ba2a4 hello.txt
AssertCommitParent(t, db, "4aaa3f49ffeabbb874250fe13ffeb8c683aba650", "24a341e1ff75addc22e336a8d87f82ba56b86fcf")
AssertCommitTree(t, db, "24a341e1ff75addc22e336a8d87f82ba56b86fcf", tree3)
AssertBlobContents(t, db, tree3, "hello.txt", "2")
}
func TestRewriterRewritesOctopusMerges(t *testing.T) {
db := DatabaseFromFixture(t, "octopus-merge.git")
r := NewRewriter(db)
tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
return &gitobj.Blob{
Contents: io.MultiReader(b.Contents, strings.NewReader("_new")),
Size: b.Size + int64(len("_new")),
}, nil
},
})
assert.Nil(t, err)
tree := "8a56716daa78325c3d0433cc163890969810b0da"
// After rewriting, the HEAD state of the repository should contain a
// tree identical to:
//
// 100644 blob 309f7fc2bfd9ae77b4131cf9cbcc3b548c42ca57 a.txt
// 100644 blob 70470dc26cb3eef54fe3dcba53066f7ca7c495c0 b.txt
// 100644 blob f2557f74fd5b60f959baf77091782089761e2dc3 hello.txt
AssertCommitTree(t, db, hex.EncodeToString(tip), tree)
AssertBlobContents(t, db, tree, "a.txt", "a_new")
AssertBlobContents(t, db, tree, "b.txt", "b_new")
AssertBlobContents(t, db, tree, "hello.txt", "hello_new")
// And should contain the following parents:
//
// parent 1fe2b9577d5610e8d8fb2c3030534036fb648393
// parent ca447959bdcd20253d69b227bcc7c2e1d3126d5c
AssertCommitParent(t, db, hex.EncodeToString(tip), "1fe2b9577d5610e8d8fb2c3030534036fb648393")
AssertCommitParent(t, db, hex.EncodeToString(tip), "ca447959bdcd20253d69b227bcc7c2e1d3126d5c")
// And each of those parents should contain the root commit as their own
// parent:
AssertCommitParent(t, db, "1fe2b9577d5610e8d8fb2c3030534036fb648393", "9237567f379b3c83ddf53ad9a2ae3755afb62a09")
AssertCommitParent(t, db, "ca447959bdcd20253d69b227bcc7c2e1d3126d5c", "9237567f379b3c83ddf53ad9a2ae3755afb62a09")
}
func TestRewriterVisitsPackedObjects(t *testing.T) {
db := DatabaseFromFixture(t, "packed-objects.git")
r := NewRewriter(db)
var contents []byte
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
var err error
contents, err = ioutil.ReadAll(b.Contents)
if err != nil {
return nil, err
}
return &gitobj.Blob{
Contents: bytes.NewReader(contents),
Size: int64(len(contents)),
}, nil
},
})
assert.NoError(t, err)
assert.Equal(t, string(contents), "Hello, world!\n")
}
func TestRewriterDoesntVisitUnchangedSubtrees(t *testing.T) {
db := DatabaseFromFixture(t, "repeated-subtrees.git")
r := NewRewriter(db)
seen := make(map[string]int)
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
seen[path] = seen[path] + 1
return b, nil
},
})
assert.Nil(t, err)
assert.Equal(t, 2, seen["a.txt"])
assert.Equal(t, 1, seen["subdir/b.txt"])
}
func TestRewriterVisitsUniqueEntriesWithIdenticalContents(t *testing.T) {
db := DatabaseFromFixture(t, "identical-blobs.git")
r := NewRewriter(db)
tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
if path == "b.txt" {
return b, nil
}
return &gitobj.Blob{
Contents: strings.NewReader("changed"),
Size: int64(len("changed")),
}, nil
},
})
assert.Nil(t, err)
tree := "bbbe0a7676523ae02234bfe874784ca2380c2d4b"
AssertCommitTree(t, db, hex.EncodeToString(tip), tree)
// After rewriting, the HEAD state of the repository should contain a
// tree identical to:
//
// 100644 blob 21fb1eca31e64cd3914025058b21992ab76edcf9 a.txt
// 100644 blob 94f3610c08588440112ed977376f26a8fba169b0 b.txt
AssertBlobContents(t, db, tree, "a.txt", "changed")
AssertBlobContents(t, db, tree, "b.txt", "original")
}
func TestRewriterIgnoresPathsThatDontMatchFilter(t *testing.T) {
include := []string{"*.txt"}
exclude := []string{"subdir/*.txt"}
filter := filepathfilter.New(include, exclude)
db := DatabaseFromFixture(t, "non-repeated-subtrees.git")
r := NewRewriter(db, WithFilter(filter))
seen := make(map[string]int)
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
seen[path] = seen[path] + 1
return b, nil
},
})
assert.Nil(t, err)
assert.Equal(t, 1, seen["a.txt"])
assert.Equal(t, 0, seen["subdir/b.txt"])
}
func TestRewriterAllowsAdditionalTreeEntries(t *testing.T) {
db := DatabaseFromFixture(t, "linear-history.git")
r := NewRewriter(db)
extra, err := db.WriteBlob(&gitobj.Blob{
Contents: strings.NewReader("extra\n"),
Size: int64(len("extra\n")),
})
assert.Nil(t, err)
tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
return b, nil
},
TreeCallbackFn: func(path string, tr *gitobj.Tree) (*gitobj.Tree, error) {
return &gitobj.Tree{
Entries: append(tr.Entries, &gitobj.TreeEntry{
Name: "extra.txt",
Filemode: 0100644,
Oid: extra,
}),
}, nil
},
})
assert.Nil(t, err)
tree1 := "40c2eb627a3b8e84b82a47a973d32960f3898b6a"
tree2 := "d7a5bcb69f2cd2652a014663a948952ea603c2c0"
tree3 := "45b752554d128f85bf23d7c3ddf48c47cbc345c8"
// After rewriting, the HEAD state of the repository should contain a
// tree identical to:
//
// 100644 blob e440e5c842586965a7fb77deda2eca68612b1f53 hello.txt
// 100644 blob 0f2287157f7cb0dd40498c7a92f74b6975fa2d57 extra.txt
AssertCommitTree(t, db, hex.EncodeToString(tip), tree1)
AssertBlobContents(t, db, tree1, "hello.txt", "3")
AssertBlobContents(t, db, tree1, "extra.txt", "extra\n")
// After rewriting, the HEAD~1 state of the repository should contain a
// tree identical to:
//
// 100644 blob d8263ee9860594d2806b0dfd1bfd17528b0ba2a4 hello.txt
// 100644 blob 0f2287157f7cb0dd40498c7a92f74b6975fa2d57 extra.txt
AssertCommitParent(t, db, hex.EncodeToString(tip), "45af5deb9a25bc4069b15c1f5bdccb0340978707")
AssertCommitTree(t, db, "45af5deb9a25bc4069b15c1f5bdccb0340978707", tree2)
AssertBlobContents(t, db, tree2, "hello.txt", "2")
AssertBlobContents(t, db, tree2, "extra.txt", "extra\n")
// After rewriting, the HEAD~2 state of the repository should contain a
// tree identical to:
//
// 100644 blob 56a6051ca2b02b04ef92d5150c9ef600403cb1de hello.txt
// 100644 blob 0f2287157f7cb0dd40498c7a92f74b6975fa2d57 extra.txt
AssertCommitParent(t, db, "45af5deb9a25bc4069b15c1f5bdccb0340978707", "99f6bd7cd69b45494afed95b026f3e450de8304f")
AssertCommitTree(t, db, "99f6bd7cd69b45494afed95b026f3e450de8304f", tree3)
AssertBlobContents(t, db, tree3, "hello.txt", "1")
AssertBlobContents(t, db, tree3, "extra.txt", "extra\n")
}
func TestHistoryRewriterCallbacks(t *testing.T) {
type Call struct {
Type string
Path string
}
var calls []*Call
db := DatabaseFromFixture(t, "linear-history.git")
r := NewRewriter(db)
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
calls = append(calls, &Call{
Type: "blob",
Path: path,
})
return b, nil
},
TreePreCallbackFn: func(path string, t *gitobj.Tree) error {
calls = append(calls, &Call{
Type: "tree-pre",
Path: path,
})
return nil
},
TreeCallbackFn: func(path string, t *gitobj.Tree) (*gitobj.Tree, error) {
calls = append(calls, &Call{
Type: "tree-post",
Path: path,
})
return t, nil
},
})
assert.Nil(t, err)
assert.Len(t, calls, 9)
assert.Equal(t, calls[0], &Call{Type: "tree-pre", Path: "/"})
assert.Equal(t, calls[1], &Call{Type: "blob", Path: "hello.txt"})
assert.Equal(t, calls[2], &Call{Type: "tree-post", Path: "/"})
assert.Equal(t, calls[3], &Call{Type: "tree-pre", Path: "/"})
assert.Equal(t, calls[4], &Call{Type: "blob", Path: "hello.txt"})
assert.Equal(t, calls[5], &Call{Type: "tree-post", Path: "/"})
assert.Equal(t, calls[6], &Call{Type: "tree-pre", Path: "/"})
assert.Equal(t, calls[7], &Call{Type: "blob", Path: "hello.txt"})
assert.Equal(t, calls[8], &Call{Type: "tree-post", Path: "/"})
}
func TestHistoryRewriterTreePreCallbackPropagatesErrors(t *testing.T) {
expected := errors.Errorf("my error")
db := DatabaseFromFixture(t, "linear-history.git")
r := NewRewriter(db)
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
return b, nil
},
TreePreCallbackFn: func(path string, t *gitobj.Tree) error {
return expected
},
})
assert.Equal(t, err, expected)
}
func TestHistoryRewriterUseOriginalParentsForPartialMigration(t *testing.T) {
db := DatabaseFromFixture(t, "linear-history-with-tags.git")
r := NewRewriter(db)
tip, err := r.Rewrite(&RewriteOptions{
Include: []string{"refs/heads/master"},
Exclude: []string{"refs/tags/middle"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
return b, nil
},
})
// After rewriting, the rewriter should have only modified the latest
// commit (HEAD), and excluded the first two, both reachable by
// refs/tags/middle.
//
// This should modify one commit, and appropriately link the parent as
// follows:
//
// tree 20ecedad3e74a113695fe5f00ab003694e2e1e9c
// parent 228afe30855933151f7a88e70d9d88314fd2f191
// author Taylor Blau <me@ttaylorr.com> 1496954214 -0600
// committer Taylor Blau <me@ttaylorr.com> 1496954214 -0600
//
// some.txt: c
expectedParent := "228afe30855933151f7a88e70d9d88314fd2f191"
assert.NoError(t, err)
AssertCommitParent(t, db, hex.EncodeToString(tip), expectedParent)
}
func TestHistoryRewriterUpdatesRefs(t *testing.T) {
db := DatabaseFromFixture(t, "linear-history.git")
r := NewRewriter(db)
AssertRef(t, db,
"refs/heads/master", HexDecode(t, "e669b63f829bfb0b91fc52a5bcea53dd7977a0ee"))
tip, err := r.Rewrite(&RewriteOptions{
Include: []string{"refs/heads/master"},
UpdateRefs: true,
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
suffix := strings.NewReader("_suffix")
return &gitobj.Blob{
Contents: io.MultiReader(b.Contents, suffix),
Size: b.Size + int64(suffix.Len()),
}, nil
},
})
assert.Nil(t, err)
c1 := hex.EncodeToString(tip)
c2 := "66561fe3ae68651658e18e48053dcfe66a2e9da1"
c3 := "8268d8486c48024a871fa42fc487dbeabd6e3d86"
AssertRef(t, db, "refs/heads/master", tip)
AssertCommitParent(t, db, c1, c2)
AssertCommitParent(t, db, c2, c3)
}
func TestHistoryRewriterReturnsFilter(t *testing.T) {
f := filepathfilter.New([]string{"a"}, []string{"b"})
r := NewRewriter(nil, WithFilter(f))
expected := reflect.ValueOf(f).Elem().Addr().Pointer()
got := reflect.ValueOf(r.Filter()).Elem().Addr().Pointer()
assert.Equal(t, expected, got,
"git/githistory: expected Rewriter.Filter() to return same *filepathfilter.Filter instance")
}
// debug is meant to be called from a defer statement to aide in debugging a
// test failure among any in this file.
//
// Callers are expected to call it immediately after calling the Rewrite()
// function.
func debug(t *testing.T, db *gitobj.ObjectDatabase, tip []byte, err error) {
root, ok := db.Root()
t.Log(strings.Repeat("*", 80))
t.Logf("* root=%s, ok=%t\n", root, ok)
t.Logf("* tip=%x\n", tip)
t.Logf("* err=%s\n", err)
t.Log(strings.Repeat("*", 80))
}