Merge pull request #2295 from git-lfs/githistory-filter-culling

git/githistory: cull subtrees, blobs based on filepathfilter
This commit is contained in:
Taylor Blau 2017-06-05 13:31:18 -06:00 committed by GitHub
commit e377d7cc50
15 changed files with 74 additions and 4 deletions

@ -0,0 +1 @@
ref: refs/heads/master

@ -0,0 +1,7 @@
[core]
repositoryformatversion = 0
filemode = true
bare = false
logallrefupdates = true
ignorecase = true
precomposeunicode = true

@ -0,0 +1,2 @@
0000000000000000000000000000000000000000 37f99c7f2706d317b3bf7ff13d574eef33d8788a Taylor Blau <me@ttaylorr.com> 1496686519 -0600 commit (initial): a.txt: initial commit
37f99c7f2706d317b3bf7ff13d574eef33d8788a bc63077ac5e575ccc9dbbd93dc882f1e10600ea7 Taylor Blau <me@ttaylorr.com> 1496686541 -0600 commit: subdir/b.txt: initial commit

@ -0,0 +1,2 @@
0000000000000000000000000000000000000000 37f99c7f2706d317b3bf7ff13d574eef33d8788a Taylor Blau <me@ttaylorr.com> 1496686519 -0600 commit (initial): a.txt: initial commit
37f99c7f2706d317b3bf7ff13d574eef33d8788a bc63077ac5e575ccc9dbbd93dc882f1e10600ea7 Taylor Blau <me@ttaylorr.com> 1496686541 -0600 commit: subdir/b.txt: initial commit

@ -0,0 +1,2 @@
x•ŽQjÃ0ó­J^K+—BÏÐ ¬¤ØqQÖ<51>Ü>¦=A¿æñòº,Ma@Ð.X\bæ,môÙâ‰Gbò)R¢à|±RÍw¹+ ÕiÊT²¡ £„©R­§Q¤"H1²áMok‡o~Í;¾fÞà¼ÈUõWôS^— ¸q
!?:8Ú`­Ùí~Oåß¡yl©´þNúÔOh÷¦<C3B7>gøÛ3o.ÇKï

@ -0,0 +1 @@
bc63077ac5e575ccc9dbbd93dc882f1e10600ea7

@ -6,6 +6,7 @@ import (
"path/filepath" "path/filepath"
"sync" "sync"
"github.com/git-lfs/git-lfs/filepathfilter"
"github.com/git-lfs/git-lfs/git" "github.com/git-lfs/git-lfs/git"
"github.com/git-lfs/git-lfs/git/odb" "github.com/git-lfs/git-lfs/git/odb"
) )
@ -22,6 +23,10 @@ type Rewriter struct {
// commits is a mapping of old commit SHAs to new ones, where the ASCII // commits is a mapping of old commit SHAs to new ones, where the ASCII
// hex encoding of the SHA1 values are used as map keys. // hex encoding of the SHA1 values are used as map keys.
commits map[string][]byte commits map[string][]byte
// filter is an optional value used to specify which tree entries
// (blobs, subtrees) are modifiable given a BlobFn. If non-nil, this
// filter will cull out any unmodifiable subtrees and blobs.
filter *filepathfilter.Filter
// db is the *ObjectDatabase from which blobs, commits, and trees are // db is the *ObjectDatabase from which blobs, commits, and trees are
// loaded from. // loaded from.
db *odb.ObjectDatabase db *odb.ObjectDatabase
@ -59,15 +64,33 @@ type RewriteOptions struct {
// of filepath.Join(...) or os.PathSeparator. // of filepath.Join(...) or os.PathSeparator.
type BlobRewriteFn func(path string, b *odb.Blob) (*odb.Blob, error) type BlobRewriteFn func(path string, b *odb.Blob) (*odb.Blob, error)
type rewriterOption func(*Rewriter)
var (
// WithFilter is an optional argument given to the NewRewriter
// constructor function to limit invocations of the BlobRewriteFn to
// only pathspecs that match the given *filepathfilter.Filter.
WithFilter = func(filter *filepathfilter.Filter) rewriterOption {
return func(r *Rewriter) {
r.filter = filter
}
}
)
// NewRewriter constructs a *Rewriter from the given *ObjectDatabase instance. // NewRewriter constructs a *Rewriter from the given *ObjectDatabase instance.
func NewRewriter(db *odb.ObjectDatabase) *Rewriter { func NewRewriter(db *odb.ObjectDatabase, opts ...rewriterOption) *Rewriter {
return &Rewriter{ rewriter := &Rewriter{
mu: new(sync.Mutex), mu: new(sync.Mutex),
entries: make(map[string]*odb.TreeEntry), entries: make(map[string]*odb.TreeEntry),
commits: make(map[string][]byte), commits: make(map[string][]byte),
db: db, db: db,
} }
for _, opt := range opts {
opt(rewriter)
}
return rewriter
} }
// Rewrite rewrites the range of commits given by *RewriteOptions.{Left,Right} // Rewrite rewrites the range of commits given by *RewriteOptions.{Left,Right}
@ -154,6 +177,13 @@ func (r *Rewriter) rewriteTree(sha []byte, path string, fn BlobRewriteFn) ([]byt
entries := make([]*odb.TreeEntry, 0, len(tree.Entries)) entries := make([]*odb.TreeEntry, 0, len(tree.Entries))
for _, entry := range tree.Entries { for _, entry := range tree.Entries {
path := filepath.Join(path, entry.Name)
if !r.filter.Allows(path) {
entries = append(entries, entry)
continue
}
if cached := r.uncacheEntry(entry); cached != nil { if cached := r.uncacheEntry(entry); cached != nil {
entries = append(entries, cached) entries = append(entries, cached)
continue continue
@ -163,9 +193,9 @@ func (r *Rewriter) rewriteTree(sha []byte, path string, fn BlobRewriteFn) ([]byt
switch entry.Type { switch entry.Type {
case odb.BlobObjectType: case odb.BlobObjectType:
oid, err = r.rewriteBlob(entry.Oid, filepath.Join(path, entry.Name), fn) oid, err = r.rewriteBlob(entry.Oid, path, fn)
case odb.TreeObjectType: case odb.TreeObjectType:
oid, err = r.rewriteTree(entry.Oid, filepath.Join(path, entry.Name), fn) oid, err = r.rewriteTree(entry.Oid, path, fn)
default: default:
oid = entry.Oid oid = entry.Oid

@ -9,6 +9,7 @@ import (
"strings" "strings"
"testing" "testing"
"github.com/git-lfs/git-lfs/filepathfilter"
"github.com/git-lfs/git-lfs/git/odb" "github.com/git-lfs/git-lfs/git/odb"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
@ -171,3 +172,27 @@ func TestRewriterVisitsUniqueEntriesWithIdenticalContents(t *testing.T) {
AssertBlobContents(t, db, tree, "a.txt", "changed") AssertBlobContents(t, db, tree, "a.txt", "changed")
AssertBlobContents(t, db, tree, "b.txt", "original") AssertBlobContents(t, db, tree, "b.txt", "original")
} }
func TestRewriterIgnoresPathsThatDontMatchFilter(t *testing.T) {
include := []string{"*.txt"}
exclude := []string{"subdir/**/*.txt"}
filter := filepathfilter.New(include, exclude)
db := DatabaseFromFixture(t, "non-repeated-subtrees.git")
r := NewRewriter(db, WithFilter(filter))
seen := make(map[string]int)
_, err := r.Rewrite(&RewriteOptions{Left: "master",
BlobFn: func(path string, b *odb.Blob) (*odb.Blob, error) {
seen[path] = seen[path] + 1
return b, nil
},
})
assert.Nil(t, err)
assert.Equal(t, 1, seen["a.txt"])
assert.Equal(t, 0, seen["subdir/b.txt"])
}