git-lfs/git/githistory/rewriter_test.go
brian m. carlson da4fdef00b
Support SHA-256 repositories
Git will start to support SHA-256 as a hash for repositories in the near
future.  Let's update gitobj to version 2 to support SHA-256
repositories properly.  We initialize the repository based on the
extensions.objectFormat value, if one is provided, since this is the
configuration key that represents the hash algorithm.

Vendor the proper dependencies in place.
2020-07-29 20:53:44 +00:00

491 lines
15 KiB
Go

package githistory
import (
"bytes"
"encoding/hex"
"io"
"io/ioutil"
"reflect"
"strconv"
"strings"
"testing"
"github.com/git-lfs/git-lfs/errors"
"github.com/git-lfs/git-lfs/filepathfilter"
"github.com/git-lfs/gitobj/v2"
"github.com/stretchr/testify/assert"
)
func TestRewriterRewritesHistory(t *testing.T) {
db := DatabaseFromFixture(t, "linear-history.git")
r := NewRewriter(db)
tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
contents, err := ioutil.ReadAll(b.Contents)
if err != nil {
return nil, err
}
n, err := strconv.Atoi(string(contents))
if err != nil {
return nil, err
}
rewritten := strconv.Itoa(n + 1)
return &gitobj.Blob{
Contents: strings.NewReader(rewritten),
Size: int64(len(rewritten)),
}, nil
},
})
assert.Nil(t, err)
tree1 := "ad0aebd16e34cf047820994ea7538a6d4a111082"
tree2 := "6e07bd31cb70c4add2c973481ad4fa38b235ca69"
tree3 := "c5decfe1fcf39b8c489f4a0bf3b3823676339f80"
// After rewriting, the HEAD state of the repository should contain a
// tree identical to:
//
// 100644 blob bf0d87ab1b2b0ec1a11a3973d2845b42413d9767 hello.txt
AssertCommitTree(t, db, hex.EncodeToString(tip), tree1)
AssertBlobContents(t, db, tree1, "hello.txt", "4")
// After rewriting, the HEAD~1 state of the repository should contain a
// tree identical to:
//
// 100644 blob e440e5c842586965a7fb77deda2eca68612b1f53 hello.txt
AssertCommitParent(t, db, hex.EncodeToString(tip), "4aaa3f49ffeabbb874250fe13ffeb8c683aba650")
AssertCommitTree(t, db, "4aaa3f49ffeabbb874250fe13ffeb8c683aba650", tree2)
AssertBlobContents(t, db, tree2, "hello.txt", "3")
// After rewriting, the HEAD~2 state of the repository should contain a
// tree identical to:
//
// 100644 blob d8263ee9860594d2806b0dfd1bfd17528b0ba2a4 hello.txt
AssertCommitParent(t, db, "4aaa3f49ffeabbb874250fe13ffeb8c683aba650", "24a341e1ff75addc22e336a8d87f82ba56b86fcf")
AssertCommitTree(t, db, "24a341e1ff75addc22e336a8d87f82ba56b86fcf", tree3)
AssertBlobContents(t, db, tree3, "hello.txt", "2")
}
func TestRewriterRewritesOctopusMerges(t *testing.T) {
db := DatabaseFromFixture(t, "octopus-merge.git")
r := NewRewriter(db)
tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
return &gitobj.Blob{
Contents: io.MultiReader(b.Contents, strings.NewReader("_new")),
Size: b.Size + int64(len("_new")),
}, nil
},
})
assert.Nil(t, err)
tree := "8a56716daa78325c3d0433cc163890969810b0da"
// After rewriting, the HEAD state of the repository should contain a
// tree identical to:
//
// 100644 blob 309f7fc2bfd9ae77b4131cf9cbcc3b548c42ca57 a.txt
// 100644 blob 70470dc26cb3eef54fe3dcba53066f7ca7c495c0 b.txt
// 100644 blob f2557f74fd5b60f959baf77091782089761e2dc3 hello.txt
AssertCommitTree(t, db, hex.EncodeToString(tip), tree)
AssertBlobContents(t, db, tree, "a.txt", "a_new")
AssertBlobContents(t, db, tree, "b.txt", "b_new")
AssertBlobContents(t, db, tree, "hello.txt", "hello_new")
// And should contain the following parents:
//
// parent 1fe2b9577d5610e8d8fb2c3030534036fb648393
// parent ca447959bdcd20253d69b227bcc7c2e1d3126d5c
AssertCommitParent(t, db, hex.EncodeToString(tip), "1fe2b9577d5610e8d8fb2c3030534036fb648393")
AssertCommitParent(t, db, hex.EncodeToString(tip), "ca447959bdcd20253d69b227bcc7c2e1d3126d5c")
// And each of those parents should contain the root commit as their own
// parent:
AssertCommitParent(t, db, "1fe2b9577d5610e8d8fb2c3030534036fb648393", "9237567f379b3c83ddf53ad9a2ae3755afb62a09")
AssertCommitParent(t, db, "ca447959bdcd20253d69b227bcc7c2e1d3126d5c", "9237567f379b3c83ddf53ad9a2ae3755afb62a09")
}
func TestRewriterVisitsPackedObjects(t *testing.T) {
db := DatabaseFromFixture(t, "packed-objects.git")
r := NewRewriter(db)
var contents []byte
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
var err error
contents, err = ioutil.ReadAll(b.Contents)
if err != nil {
return nil, err
}
return &gitobj.Blob{
Contents: bytes.NewReader(contents),
Size: int64(len(contents)),
}, nil
},
})
assert.NoError(t, err)
assert.Equal(t, string(contents), "Hello, world!\n")
}
func TestRewriterDoesntVisitUnchangedSubtrees(t *testing.T) {
db := DatabaseFromFixture(t, "repeated-subtrees.git")
r := NewRewriter(db)
seen := make(map[string]int)
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
seen[path] = seen[path] + 1
return b, nil
},
})
assert.Nil(t, err)
assert.Equal(t, 2, seen["a.txt"])
assert.Equal(t, 1, seen["subdir/b.txt"])
}
func TestRewriterVisitsUniqueEntriesWithIdenticalContents(t *testing.T) {
db := DatabaseFromFixture(t, "identical-blobs.git")
r := NewRewriter(db)
tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
if path == "b.txt" {
return b, nil
}
return &gitobj.Blob{
Contents: strings.NewReader("changed"),
Size: int64(len("changed")),
}, nil
},
})
assert.Nil(t, err)
tree := "bbbe0a7676523ae02234bfe874784ca2380c2d4b"
AssertCommitTree(t, db, hex.EncodeToString(tip), tree)
// After rewriting, the HEAD state of the repository should contain a
// tree identical to:
//
// 100644 blob 21fb1eca31e64cd3914025058b21992ab76edcf9 a.txt
// 100644 blob 94f3610c08588440112ed977376f26a8fba169b0 b.txt
AssertBlobContents(t, db, tree, "a.txt", "changed")
AssertBlobContents(t, db, tree, "b.txt", "original")
}
func TestRewriterIgnoresPathsThatDontMatchFilter(t *testing.T) {
include := []string{"*.txt"}
exclude := []string{"subdir/*.txt"}
filter := filepathfilter.New(include, exclude)
db := DatabaseFromFixture(t, "non-repeated-subtrees.git")
r := NewRewriter(db, WithFilter(filter))
seen := make(map[string]int)
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
seen[path] = seen[path] + 1
return b, nil
},
})
assert.Nil(t, err)
assert.Equal(t, 1, seen["a.txt"])
assert.Equal(t, 0, seen["subdir/b.txt"])
}
func TestRewriterAllowsAdditionalTreeEntries(t *testing.T) {
db := DatabaseFromFixture(t, "linear-history.git")
r := NewRewriter(db)
extra, err := db.WriteBlob(&gitobj.Blob{
Contents: strings.NewReader("extra\n"),
Size: int64(len("extra\n")),
})
assert.Nil(t, err)
tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
return b, nil
},
TreeCallbackFn: func(path string, tr *gitobj.Tree) (*gitobj.Tree, error) {
return &gitobj.Tree{
Entries: append(tr.Entries, &gitobj.TreeEntry{
Name: "extra.txt",
Filemode: 0100644,
Oid: extra,
}),
}, nil
},
})
assert.Nil(t, err)
tree1 := "40c2eb627a3b8e84b82a47a973d32960f3898b6a"
tree2 := "d7a5bcb69f2cd2652a014663a948952ea603c2c0"
tree3 := "45b752554d128f85bf23d7c3ddf48c47cbc345c8"
// After rewriting, the HEAD state of the repository should contain a
// tree identical to:
//
// 100644 blob e440e5c842586965a7fb77deda2eca68612b1f53 hello.txt
// 100644 blob 0f2287157f7cb0dd40498c7a92f74b6975fa2d57 extra.txt
AssertCommitTree(t, db, hex.EncodeToString(tip), tree1)
AssertBlobContents(t, db, tree1, "hello.txt", "3")
AssertBlobContents(t, db, tree1, "extra.txt", "extra\n")
// After rewriting, the HEAD~1 state of the repository should contain a
// tree identical to:
//
// 100644 blob d8263ee9860594d2806b0dfd1bfd17528b0ba2a4 hello.txt
// 100644 blob 0f2287157f7cb0dd40498c7a92f74b6975fa2d57 extra.txt
AssertCommitParent(t, db, hex.EncodeToString(tip), "45af5deb9a25bc4069b15c1f5bdccb0340978707")
AssertCommitTree(t, db, "45af5deb9a25bc4069b15c1f5bdccb0340978707", tree2)
AssertBlobContents(t, db, tree2, "hello.txt", "2")
AssertBlobContents(t, db, tree2, "extra.txt", "extra\n")
// After rewriting, the HEAD~2 state of the repository should contain a
// tree identical to:
//
// 100644 blob 56a6051ca2b02b04ef92d5150c9ef600403cb1de hello.txt
// 100644 blob 0f2287157f7cb0dd40498c7a92f74b6975fa2d57 extra.txt
AssertCommitParent(t, db, "45af5deb9a25bc4069b15c1f5bdccb0340978707", "99f6bd7cd69b45494afed95b026f3e450de8304f")
AssertCommitTree(t, db, "99f6bd7cd69b45494afed95b026f3e450de8304f", tree3)
AssertBlobContents(t, db, tree3, "hello.txt", "1")
AssertBlobContents(t, db, tree3, "extra.txt", "extra\n")
}
// CallbackCall is a structure recording information pertinent to when a
// *githistory.Rewrite called either BlobFn, TreePreCallbackFn, or
// TreeCallbackFn.
type CallbackCall struct {
Type string
Path string
}
var (
// collectCalls is a function that returns a *RewriteOptions that
// updates a pointer to a slice of `*CallbackCall`'s with each call that
// is received.
collectCalls = func(calls *[]*CallbackCall) *RewriteOptions {
return &RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
*calls = append(*calls, &CallbackCall{
Type: "blob",
Path: path,
})
return b, nil
},
TreePreCallbackFn: func(path string, t *gitobj.Tree) error {
*calls = append(*calls, &CallbackCall{
Type: "tree-pre",
Path: path,
})
return nil
},
TreeCallbackFn: func(path string, t *gitobj.Tree) (*gitobj.Tree, error) {
*calls = append(*calls, &CallbackCall{
Type: "tree-post",
Path: path,
})
return t, nil
},
}
}
)
func TestHistoryRewriterCallbacks(t *testing.T) {
var calls []*CallbackCall
db := DatabaseFromFixture(t, "linear-history.git")
r := NewRewriter(db)
_, err := r.Rewrite(collectCalls(&calls))
assert.Nil(t, err)
assert.Len(t, calls, 9)
assert.Equal(t, calls[0], &CallbackCall{Type: "tree-pre", Path: "/"})
assert.Equal(t, calls[1], &CallbackCall{Type: "blob", Path: "hello.txt"})
assert.Equal(t, calls[2], &CallbackCall{Type: "tree-post", Path: "/"})
assert.Equal(t, calls[3], &CallbackCall{Type: "tree-pre", Path: "/"})
assert.Equal(t, calls[4], &CallbackCall{Type: "blob", Path: "hello.txt"})
assert.Equal(t, calls[5], &CallbackCall{Type: "tree-post", Path: "/"})
assert.Equal(t, calls[6], &CallbackCall{Type: "tree-pre", Path: "/"})
assert.Equal(t, calls[7], &CallbackCall{Type: "blob", Path: "hello.txt"})
assert.Equal(t, calls[8], &CallbackCall{Type: "tree-post", Path: "/"})
}
func TestHistoryRewriterCallbacksSubtrees(t *testing.T) {
var calls []*CallbackCall
db := DatabaseFromFixture(t, "non-repeated-subtrees.git")
r := NewRewriter(db)
_, err := r.Rewrite(collectCalls(&calls))
assert.Nil(t, err)
assert.Len(t, calls, 8)
assert.Equal(t, calls[0], &CallbackCall{Type: "tree-pre", Path: "/"})
assert.Equal(t, calls[1], &CallbackCall{Type: "blob", Path: "a.txt"})
assert.Equal(t, calls[2], &CallbackCall{Type: "tree-post", Path: "/"})
assert.Equal(t, calls[3], &CallbackCall{Type: "tree-pre", Path: "/"})
assert.Equal(t, calls[4], &CallbackCall{Type: "tree-pre", Path: "/subdir"})
assert.Equal(t, calls[5], &CallbackCall{Type: "blob", Path: "subdir/b.txt"})
assert.Equal(t, calls[6], &CallbackCall{Type: "tree-post", Path: "/subdir"})
assert.Equal(t, calls[7], &CallbackCall{Type: "tree-post", Path: "/"})
}
func TestHistoryRewriterTreePreCallbackPropagatesErrors(t *testing.T) {
expected := errors.Errorf("my error")
db := DatabaseFromFixture(t, "linear-history.git")
r := NewRewriter(db)
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
return b, nil
},
TreePreCallbackFn: func(path string, t *gitobj.Tree) error {
return expected
},
})
assert.Equal(t, err, expected)
}
func TestHistoryRewriterUseOriginalParentsForPartialMigration(t *testing.T) {
db := DatabaseFromFixture(t, "linear-history-with-tags.git")
r := NewRewriter(db)
tip, err := r.Rewrite(&RewriteOptions{
Include: []string{"refs/heads/master"},
Exclude: []string{"refs/tags/middle"},
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
return b, nil
},
})
// After rewriting, the rewriter should have only modified the latest
// commit (HEAD), and excluded the first two, both reachable by
// refs/tags/middle.
//
// This should modify one commit, and appropriately link the parent as
// follows:
//
// tree 20ecedad3e74a113695fe5f00ab003694e2e1e9c
// parent 228afe30855933151f7a88e70d9d88314fd2f191
// author Taylor Blau <me@ttaylorr.com> 1496954214 -0600
// committer Taylor Blau <me@ttaylorr.com> 1496954214 -0600
//
// some.txt: c
expectedParent := "228afe30855933151f7a88e70d9d88314fd2f191"
assert.NoError(t, err)
AssertCommitParent(t, db, hex.EncodeToString(tip), expectedParent)
}
func TestHistoryRewriterUpdatesRefs(t *testing.T) {
db := DatabaseFromFixture(t, "linear-history.git")
r := NewRewriter(db)
AssertRef(t, db,
"refs/heads/master", HexDecode(t, "e669b63f829bfb0b91fc52a5bcea53dd7977a0ee"))
tip, err := r.Rewrite(&RewriteOptions{
Include: []string{"refs/heads/master"},
UpdateRefs: true,
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
suffix := strings.NewReader("_suffix")
return &gitobj.Blob{
Contents: io.MultiReader(b.Contents, suffix),
Size: b.Size + int64(suffix.Len()),
}, nil
},
})
assert.Nil(t, err)
c1 := hex.EncodeToString(tip)
c2 := "66561fe3ae68651658e18e48053dcfe66a2e9da1"
c3 := "8268d8486c48024a871fa42fc487dbeabd6e3d86"
AssertRef(t, db, "refs/heads/master", tip)
AssertCommitParent(t, db, c1, c2)
AssertCommitParent(t, db, c2, c3)
}
func TestHistoryRewriterReturnsFilter(t *testing.T) {
f := filepathfilter.New([]string{"a"}, []string{"b"})
r := NewRewriter(nil, WithFilter(f))
expected := reflect.ValueOf(f).Elem().Addr().Pointer()
got := reflect.ValueOf(r.Filter()).Elem().Addr().Pointer()
assert.Equal(t, expected, got,
"git/githistory: expected Rewriter.Filter() to return same *filepathfilter.Filter instance")
}
// debug is meant to be called from a defer statement to aide in debugging a
// test failure among any in this file.
//
// Callers are expected to call it immediately after calling the Rewrite()
// function.
func debug(t *testing.T, db *gitobj.ObjectDatabase, tip []byte, err error) {
root, ok := db.Root()
t.Log(strings.Repeat("*", 80))
t.Logf("* root=%s, ok=%t\n", root, ok)
t.Logf("* tip=%x\n", tip)
t.Logf("* err=%s\n", err)
t.Log(strings.Repeat("*", 80))
}