Call migrate() BlobFn on every blob
Caching is now done inside the BlobFn function instead of in the caller. To do this, the OID of the original blob is given as an argument to BlobFn. Adapt migrate() calls to this : * For "migrate info", a set of "seen" OID is cached to avoid counting them twice. * For "migrate import", the cleaned blob is cached and reused. Fixes git-lfs/git-lfs#4628
This commit is contained in:
parent
c459782315
commit
3acbec20a6
@ -43,7 +43,7 @@ func migrateExportCommand(cmd *cobra.Command, args []string) {
|
||||
opts := &githistory.RewriteOptions{
|
||||
Verbose: migrateVerbose,
|
||||
ObjectMapFilePath: objectMapFilePath,
|
||||
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
BlobFn: func(path string, oid []byte, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
if filepath.Base(path) == ".gitattributes" {
|
||||
return b, nil
|
||||
}
|
||||
|
@ -139,10 +139,12 @@ func migrateImportCommand(cmd *cobra.Command, args []string) {
|
||||
ExitWithError(errors.Wrap(err, "fatal: cannot parse --above=<n>"))
|
||||
}
|
||||
|
||||
blobCache := make(map[string]bytes.Buffer)
|
||||
|
||||
migrate(args, rewriter, l, &githistory.RewriteOptions{
|
||||
Verbose: migrateVerbose,
|
||||
ObjectMapFilePath: objectMapFilePath,
|
||||
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
BlobFn: func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
if filepath.Base(path) == ".gitattributes" {
|
||||
return b, nil
|
||||
}
|
||||
@ -166,9 +168,13 @@ func migrateImportCommand(cmd *cobra.Command, args []string) {
|
||||
|
||||
var buf bytes.Buffer
|
||||
|
||||
buf, cached := blobCache[hex.EncodeToString(origOid)]
|
||||
if !cached {
|
||||
if _, err := clean(gitfilter, &buf, b.Contents, path, b.Size); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
blobCache[hex.EncodeToString(origOid)] = buf
|
||||
}
|
||||
|
||||
if ext := filepath.Ext(path); len(ext) > 0 && above == 0 {
|
||||
exts.Add(fmt.Sprintf("*%s filter=lfs diff=lfs merge=lfs -text", ext))
|
||||
|
@ -1,6 +1,7 @@
|
||||
package commands
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
@ -114,8 +115,10 @@ func migrateInfoCommand(cmd *cobra.Command, args []string) {
|
||||
pointersInfoEntry := &MigrateInfoEntry{Qualifier: "LFS Objects", Separate: true}
|
||||
var fixups *gitattr.Tree
|
||||
|
||||
blobSeenSet := make(map[string]struct{})
|
||||
|
||||
migrate(args, rewriter, l, &githistory.RewriteOptions{
|
||||
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
BlobFn: func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
var entry *MigrateInfoEntry
|
||||
var size int64
|
||||
var p *lfs.Pointer
|
||||
@ -138,6 +141,10 @@ func migrateInfoCommand(cmd *cobra.Command, args []string) {
|
||||
}
|
||||
}
|
||||
|
||||
_, seen := blobSeenSet[hex.EncodeToString(origOid)]
|
||||
if !seen {
|
||||
blobSeenSet[hex.EncodeToString(origOid)] = struct{}{}
|
||||
|
||||
if migrateInfoPointersMode != migrateInfoPointersNoFollow {
|
||||
p, err = lfs.DecodePointerFromBlob(b)
|
||||
}
|
||||
@ -159,6 +166,8 @@ func migrateInfoCommand(cmd *cobra.Command, args []string) {
|
||||
entry.BytesAbove += size
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return b, nil
|
||||
},
|
||||
|
||||
|
@ -116,9 +116,11 @@ func (r *RewriteOptions) treeFn() TreeCallbackFn {
|
||||
// instance, a file "b.txt" in directory "dir" would be given as "/dir/b.txt",
|
||||
// where as a file "a.txt" in the root would be given as "/a.txt".
|
||||
//
|
||||
// The origOid argument is the OID (i.e. the SHA) of the blob to be rewritten.
|
||||
//
|
||||
// As above, the path separators are OS specific, and equivalent to the result
|
||||
// of filepath.Join(...) or os.PathSeparator.
|
||||
type BlobRewriteFn func(path string, b *gitobj.Blob) (*gitobj.Blob, error)
|
||||
type BlobRewriteFn func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error)
|
||||
|
||||
// TreePreCallbackFn specifies a function to call upon opening a new tree for
|
||||
// rewriting.
|
||||
@ -176,7 +178,7 @@ var (
|
||||
|
||||
// noopBlobFn is a no-op implementation of the BlobRewriteFn. It returns
|
||||
// the blob that it was given, and returns no error.
|
||||
noopBlobFn = func(path string, b *gitobj.Blob) (*gitobj.Blob, error) { return b, nil }
|
||||
noopBlobFn = func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error) { return b, nil }
|
||||
// noopTreePreFn is a no-op implementation of the TreePreRewriteFn. It
|
||||
// returns the tree that it was given, and returns no error.
|
||||
noopTreePreFn = func(path string, t *gitobj.Tree) error { return nil }
|
||||
@ -383,12 +385,6 @@ func (r *Rewriter) rewriteTree(commitOID []byte, treeOID []byte, path string,
|
||||
continue
|
||||
}
|
||||
|
||||
if cached := r.uncacheEntry(entry); cached != nil {
|
||||
entries = append(entries, copyEntryMode(cached,
|
||||
entry.Filemode))
|
||||
continue
|
||||
}
|
||||
|
||||
var oid []byte
|
||||
|
||||
switch entry.Type() {
|
||||
@ -465,7 +461,7 @@ func (r *Rewriter) rewriteBlob(commitOID, from []byte, path string, fn BlobRewri
|
||||
return nil, err
|
||||
}
|
||||
|
||||
b, err := fn(path, blob)
|
||||
b, err := fn(path, from, blob)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ func TestRewriterRewritesHistory(t *testing.T) {
|
||||
r := NewRewriter(db)
|
||||
|
||||
tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
|
||||
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
BlobFn: func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
contents, err := ioutil.ReadAll(b.Contents)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -82,7 +82,7 @@ func TestRewriterRewritesOctopusMerges(t *testing.T) {
|
||||
r := NewRewriter(db)
|
||||
|
||||
tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
|
||||
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
BlobFn: func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
return &gitobj.Blob{
|
||||
Contents: io.MultiReader(b.Contents, strings.NewReader("_new")),
|
||||
Size: b.Size + int64(len("_new")),
|
||||
@ -129,7 +129,7 @@ func TestRewriterVisitsPackedObjects(t *testing.T) {
|
||||
var contents []byte
|
||||
|
||||
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
|
||||
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
BlobFn: func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
var err error
|
||||
|
||||
contents, err = ioutil.ReadAll(b.Contents)
|
||||
@ -155,7 +155,7 @@ func TestRewriterDoesntVisitUnchangedSubtrees(t *testing.T) {
|
||||
seen := make(map[string]int)
|
||||
|
||||
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
|
||||
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
BlobFn: func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
seen[path] = seen[path] + 1
|
||||
|
||||
return b, nil
|
||||
@ -173,7 +173,7 @@ func TestRewriterVisitsUniqueEntriesWithIdenticalContents(t *testing.T) {
|
||||
r := NewRewriter(db)
|
||||
|
||||
tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
|
||||
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
BlobFn: func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
if path == "b.txt" {
|
||||
return b, nil
|
||||
}
|
||||
@ -213,7 +213,7 @@ func TestRewriterIgnoresPathsThatDontMatchFilter(t *testing.T) {
|
||||
seen := make(map[string]int)
|
||||
|
||||
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
|
||||
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
BlobFn: func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
seen[path] = seen[path] + 1
|
||||
|
||||
return b, nil
|
||||
@ -236,7 +236,7 @@ func TestRewriterAllowsAdditionalTreeEntries(t *testing.T) {
|
||||
assert.Nil(t, err)
|
||||
|
||||
tip, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
|
||||
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
BlobFn: func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
return b, nil
|
||||
},
|
||||
|
||||
@ -307,7 +307,7 @@ var (
|
||||
// is received.
|
||||
collectCalls = func(calls *[]*CallbackCall) *RewriteOptions {
|
||||
return &RewriteOptions{Include: []string{"refs/heads/master"},
|
||||
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
BlobFn: func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
*calls = append(*calls, &CallbackCall{
|
||||
Type: "blob",
|
||||
Path: path,
|
||||
@ -384,7 +384,7 @@ func TestHistoryRewriterTreePreCallbackPropagatesErrors(t *testing.T) {
|
||||
r := NewRewriter(db)
|
||||
|
||||
_, err := r.Rewrite(&RewriteOptions{Include: []string{"refs/heads/master"},
|
||||
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
BlobFn: func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
return b, nil
|
||||
},
|
||||
|
||||
@ -404,7 +404,7 @@ func TestHistoryRewriterUseOriginalParentsForPartialMigration(t *testing.T) {
|
||||
Include: []string{"refs/heads/master"},
|
||||
Exclude: []string{"refs/tags/middle"},
|
||||
|
||||
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
BlobFn: func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
return b, nil
|
||||
},
|
||||
})
|
||||
@ -441,7 +441,7 @@ func TestHistoryRewriterUpdatesRefs(t *testing.T) {
|
||||
|
||||
UpdateRefs: true,
|
||||
|
||||
BlobFn: func(path string, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
BlobFn: func(path string, origOid []byte, b *gitobj.Blob) (*gitobj.Blob, error) {
|
||||
suffix := strings.NewReader("_suffix")
|
||||
|
||||
return &gitobj.Blob{
|
||||
|
Loading…
Reference in New Issue
Block a user