git/githistory: cache files based on full path
When we cache files, do so on the full path instead of just the directory entry. This means that when we have an identical file with the same name in two different direectories, we distinguish between the two paths and ensure both are added to .gitattributes. This is an alternate solution to #4671 which should perform better. For compmarison, with a clone of Git's main repository with the following command, we get: git lfs migrate import --everything --include="*.h": * v3.0.1 (broken): 608s user, 53s system, 5:34 total * v3.0.2 (fixed): 13435s user, 1255s system, 1:43:17 total * this commit (fixed): 716s user, 67s system, 6:59 total This is a much better performance characteristic for equivalent results. Preserve the integration from the earlier attempt at fixing this plus add an additional one. Avoid using assert_pointer in the new test because that helper doesn't always work correctly when there are two files with the same file name.
This commit is contained in:
parent
0e543b8a1e
commit
0d31bce1c1
@ -383,7 +383,7 @@ func (r *Rewriter) rewriteTree(commitOID []byte, treeOID []byte, path string,
|
||||
continue
|
||||
}
|
||||
|
||||
if cached := r.uncacheEntry(entry); cached != nil {
|
||||
if cached := r.uncacheEntry(fullpath, entry); cached != nil {
|
||||
entries = append(entries, copyEntryMode(cached,
|
||||
entry.Filemode))
|
||||
continue
|
||||
@ -404,7 +404,7 @@ func (r *Rewriter) rewriteTree(commitOID []byte, treeOID []byte, path string,
|
||||
return nil, err
|
||||
}
|
||||
|
||||
entries = append(entries, r.cacheEntry(entry, &gitobj.TreeEntry{
|
||||
entries = append(entries, r.cacheEntry(fullpath, entry, &gitobj.TreeEntry{
|
||||
Filemode: entry.Filemode,
|
||||
Name: entry.Name,
|
||||
Oid: oid,
|
||||
@ -590,11 +590,11 @@ func (r *Rewriter) Filter() *filepathfilter.Filter {
|
||||
|
||||
// cacheEntry caches then given "from" entry so that it is always rewritten as
|
||||
// a *TreeEntry equivalent to "to".
|
||||
func (r *Rewriter) cacheEntry(from, to *gitobj.TreeEntry) *gitobj.TreeEntry {
|
||||
func (r *Rewriter) cacheEntry(path string, from, to *gitobj.TreeEntry) *gitobj.TreeEntry {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
r.entries[r.entryKey(from)] = to
|
||||
r.entries[r.entryKey(path, from)] = to
|
||||
|
||||
return to
|
||||
}
|
||||
@ -602,16 +602,16 @@ func (r *Rewriter) cacheEntry(from, to *gitobj.TreeEntry) *gitobj.TreeEntry {
|
||||
// uncacheEntry returns a *TreeEntry that is cached from the given *TreeEntry
|
||||
// "from". That is to say, it returns the *TreeEntry that "from" should be
|
||||
// rewritten to, or nil if none could be found.
|
||||
func (r *Rewriter) uncacheEntry(from *gitobj.TreeEntry) *gitobj.TreeEntry {
|
||||
func (r *Rewriter) uncacheEntry(path string, from *gitobj.TreeEntry) *gitobj.TreeEntry {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
return r.entries[r.entryKey(from)]
|
||||
return r.entries[r.entryKey(path, from)]
|
||||
}
|
||||
|
||||
// entryKey returns a unique key for a given *TreeEntry "e".
|
||||
func (r *Rewriter) entryKey(e *gitobj.TreeEntry) string {
|
||||
return fmt.Sprintf("%s:%x", e.Name, e.Oid)
|
||||
func (r *Rewriter) entryKey(path string, e *gitobj.TreeEntry) string {
|
||||
return fmt.Sprintf("%s:%x", path, e.Oid)
|
||||
}
|
||||
|
||||
// cacheEntry caches then given "from" commit so that it is always rewritten as
|
||||
|
@ -1053,6 +1053,25 @@ begin_test "migrate import (copied file)"
|
||||
)
|
||||
end_test
|
||||
|
||||
begin_test "migrate import (copied file with only a single path)"
|
||||
(
|
||||
set -e
|
||||
|
||||
setup_local_branch_with_copied_file
|
||||
|
||||
oid="$(calc_oid "$(git cat-file -p :a.txt)")"
|
||||
|
||||
# Prevent MSYS from rewriting /a.txt into a Windows path.
|
||||
MSYS_NO_PATHCONV=1 git lfs migrate import --include="/a.txt" --everything
|
||||
|
||||
# Expect attribute for only "/a.txt".
|
||||
if grep -q "^/dir/a.txt" ./.gitattributes || ! grep -q "^/a.txt" ./.gitattributes; then
|
||||
exit 1
|
||||
fi
|
||||
refute_pointer "refs/heads/main" "dir/a.txt" "$oid" 5
|
||||
)
|
||||
end_test
|
||||
|
||||
begin_test "migrate import (filename special characters)"
|
||||
(
|
||||
set -e
|
||||
|
Loading…
Reference in New Issue
Block a user