From 0d31bce1c1e0d23661c9efebc1d34c432b9bf976 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Wed, 12 Jan 2022 18:18:33 +0000 Subject: [PATCH] git/githistory: cache files based on full path When we cache files, do so on the full path instead of just the directory entry. This means that when we have an identical file with the same name in two different direectories, we distinguish between the two paths and ensure both are added to .gitattributes. This is an alternate solution to #4671 which should perform better. For compmarison, with a clone of Git's main repository with the following command, we get: git lfs migrate import --everything --include="*.h": * v3.0.1 (broken): 608s user, 53s system, 5:34 total * v3.0.2 (fixed): 13435s user, 1255s system, 1:43:17 total * this commit (fixed): 716s user, 67s system, 6:59 total This is a much better performance characteristic for equivalent results. Preserve the integration from the earlier attempt at fixing this plus add an additional one. Avoid using assert_pointer in the new test because that helper doesn't always work correctly when there are two files with the same file name. --- git/githistory/rewriter.go | 16 ++++++++-------- t/t-migrate-import.sh | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/git/githistory/rewriter.go b/git/githistory/rewriter.go index 55640b9e..3852a983 100644 --- a/git/githistory/rewriter.go +++ b/git/githistory/rewriter.go @@ -383,7 +383,7 @@ func (r *Rewriter) rewriteTree(commitOID []byte, treeOID []byte, path string, continue } - if cached := r.uncacheEntry(entry); cached != nil { + if cached := r.uncacheEntry(fullpath, entry); cached != nil { entries = append(entries, copyEntryMode(cached, entry.Filemode)) continue @@ -404,7 +404,7 @@ func (r *Rewriter) rewriteTree(commitOID []byte, treeOID []byte, path string, return nil, err } - entries = append(entries, r.cacheEntry(entry, &gitobj.TreeEntry{ + entries = append(entries, r.cacheEntry(fullpath, entry, &gitobj.TreeEntry{ Filemode: entry.Filemode, Name: entry.Name, Oid: oid, @@ -590,11 +590,11 @@ func (r *Rewriter) Filter() *filepathfilter.Filter { // cacheEntry caches then given "from" entry so that it is always rewritten as // a *TreeEntry equivalent to "to". -func (r *Rewriter) cacheEntry(from, to *gitobj.TreeEntry) *gitobj.TreeEntry { +func (r *Rewriter) cacheEntry(path string, from, to *gitobj.TreeEntry) *gitobj.TreeEntry { r.mu.Lock() defer r.mu.Unlock() - r.entries[r.entryKey(from)] = to + r.entries[r.entryKey(path, from)] = to return to } @@ -602,16 +602,16 @@ func (r *Rewriter) cacheEntry(from, to *gitobj.TreeEntry) *gitobj.TreeEntry { // uncacheEntry returns a *TreeEntry that is cached from the given *TreeEntry // "from". That is to say, it returns the *TreeEntry that "from" should be // rewritten to, or nil if none could be found. -func (r *Rewriter) uncacheEntry(from *gitobj.TreeEntry) *gitobj.TreeEntry { +func (r *Rewriter) uncacheEntry(path string, from *gitobj.TreeEntry) *gitobj.TreeEntry { r.mu.Lock() defer r.mu.Unlock() - return r.entries[r.entryKey(from)] + return r.entries[r.entryKey(path, from)] } // entryKey returns a unique key for a given *TreeEntry "e". -func (r *Rewriter) entryKey(e *gitobj.TreeEntry) string { - return fmt.Sprintf("%s:%x", e.Name, e.Oid) +func (r *Rewriter) entryKey(path string, e *gitobj.TreeEntry) string { + return fmt.Sprintf("%s:%x", path, e.Oid) } // cacheEntry caches then given "from" commit so that it is always rewritten as diff --git a/t/t-migrate-import.sh b/t/t-migrate-import.sh index 8a175c9e..de4689aa 100755 --- a/t/t-migrate-import.sh +++ b/t/t-migrate-import.sh @@ -1053,6 +1053,25 @@ begin_test "migrate import (copied file)" ) end_test +begin_test "migrate import (copied file with only a single path)" +( + set -e + + setup_local_branch_with_copied_file + + oid="$(calc_oid "$(git cat-file -p :a.txt)")" + + # Prevent MSYS from rewriting /a.txt into a Windows path. + MSYS_NO_PATHCONV=1 git lfs migrate import --include="/a.txt" --everything + + # Expect attribute for only "/a.txt". + if grep -q "^/dir/a.txt" ./.gitattributes || ! grep -q "^/a.txt" ./.gitattributes; then + exit 1 + fi + refute_pointer "refs/heads/main" "dir/a.txt" "$oid" 5 +) +end_test + begin_test "migrate import (filename special characters)" ( set -e