From 4ff03089e0095bb970beeba67206e002f10e2dfd Mon Sep 17 00:00:00 2001 From: Chris Darroch Date: Fri, 22 Apr 2022 00:14:28 -0700 Subject: [PATCH] commands,lfs,t: scan refs by tree when pruning In commit d2221dcecacc6a2ad38ffd2e429fca18805cb4ea of PR #2851 the "git lfs prune" command was changed to respect the "lfs.fetchexclude" configuration option such that objects would always be pruned if they were referenced by files whose paths matched one of the patterns in the configuration option (unless they were referenced by an unpushed commit). However, this filter is applied using the GitScanner.ScanRef() method, which indirectly utilizes the internal scanRefsToChan() function, and that function only visits unique OIDs a single time each, even if they are referenced by multiple tree entries (i.e., if there are multiple files with the same content). This means that if an LFS object appears in both a file that matches a pattern from "lfs.fetchexclude" and in a file that does not match, the object may be pruned if the file path seen during the scan is the matching one regardless of whether the non-matching file would otherwise have its object retained. To resolve this we change the pruneTaskGetRetainedAtRef() function to use the GitScanner.ScanTree() method instead of ScanRef(), because ScanTree() visits all file paths in each commit. We need to pass our callback to the ScanTree() method so that we can save all non-matching files' OIDs into our list of OIDs to be retained; therefore we need to add a callback argument to ScanTree() in the same manner as is done for ScanRef() and various other GitScanner methods. We also introduce additional checks in our "prune all excluded paths" test to ensure that we always retain objects when they appear in a commit to be retained and at least one of the files referencing that object ID does not match the "lfs.fetchexclude" filter. --- commands/command_checkout.go | 2 +- commands/command_dedup.go | 2 +- commands/command_fetch.go | 2 +- commands/command_ls_files.go | 2 +- commands/command_prune.go | 2 +- commands/command_pull.go | 2 +- lfs/gitscanner.go | 4 ++-- t/t-prune.sh | 26 ++++++++++++++++++++++++-- 8 files changed, 32 insertions(+), 10 deletions(-) diff --git a/commands/command_checkout.go b/commands/command_checkout.go index cdc4da95..c46d5f69 100644 --- a/commands/command_checkout.go +++ b/commands/command_checkout.go @@ -73,7 +73,7 @@ func checkoutCommand(cmd *cobra.Command, args []string) { chgitscanner.Filter = filepathfilter.New(rootedPaths(args), nil, filepathfilter.GitIgnore) - if err := chgitscanner.ScanTree(ref.Sha); err != nil { + if err := chgitscanner.ScanTree(ref.Sha, nil); err != nil { ExitWithError(err) } chgitscanner.Close() diff --git a/commands/command_dedup.go b/commands/command_dedup.go index 35d595c4..00fc56b3 100644 --- a/commands/command_dedup.go +++ b/commands/command_dedup.go @@ -87,7 +87,7 @@ func dedupCommand(cmd *cobra.Command, args []string) { }) defer gitScanner.Close() - if err := gitScanner.ScanTree("HEAD"); err != nil { + if err := gitScanner.ScanTree("HEAD", nil); err != nil { ExitWithError(err) } diff --git a/commands/command_fetch.go b/commands/command_fetch.go index afc3a301..2c821a0b 100644 --- a/commands/command_fetch.go +++ b/commands/command_fetch.go @@ -135,7 +135,7 @@ func pointersToFetchForRef(ref string, filter *filepathfilter.Filter) ([]*lfs.Wr tempgitscanner.Filter = filter - if err := tempgitscanner.ScanTree(ref); err != nil { + if err := tempgitscanner.ScanTree(ref, nil); err != nil { return nil, err } diff --git a/commands/command_ls_files.go b/commands/command_ls_files.go index 0cccd9be..1aa91129 100644 --- a/commands/command_ls_files.go +++ b/commands/command_ls_files.go @@ -137,7 +137,7 @@ func lsFilesCommand(cmd *cobra.Command, args []string) { } else if scanRange { err = gitscanner.ScanRefRange(includeRef, ref, nil) } else { - err = gitscanner.ScanTree(ref) + err = gitscanner.ScanTree(ref, nil) } if err != nil { diff --git a/commands/command_prune.go b/commands/command_prune.go index bc830fd4..a7a3671e 100644 --- a/commands/command_prune.go +++ b/commands/command_prune.go @@ -352,7 +352,7 @@ func pruneTaskGetRetainedAtRef(gitscanner *lfs.GitScanner, ref string, retainCha defer sem.Release(1) defer waitg.Done() - err := gitscanner.ScanRef(ref, func(p *lfs.WrappedPointer, err error) { + err := gitscanner.ScanTree(ref, func(p *lfs.WrappedPointer, err error) { if err != nil { errorChan <- err return diff --git a/commands/command_pull.go b/commands/command_pull.go index f8b1f3a0..01ac9bd1 100644 --- a/commands/command_pull.go +++ b/commands/command_pull.go @@ -87,7 +87,7 @@ func pull(filter *filepathfilter.Filter) { }() processQueue := time.Now() - if err := gitscanner.ScanTree(ref.Sha); err != nil { + if err := gitscanner.ScanTree(ref.Sha, nil); err != nil { singleCheckout.Close() ExitWithError(err) } diff --git a/lfs/gitscanner.go b/lfs/gitscanner.go index 559a4351..4555d885 100644 --- a/lfs/gitscanner.go +++ b/lfs/gitscanner.go @@ -208,8 +208,8 @@ func (s *GitScanner) ScanAll(cb GitScannerFoundPointer) error { // ScanTree takes a ref and returns WrappedPointer objects in the tree at that // ref. Differs from ScanRefs in that multiple files in the tree with the same // content are all reported. -func (s *GitScanner) ScanTree(ref string) error { - callback, err := firstGitScannerCallback(s.FoundPointer) +func (s *GitScanner) ScanTree(ref string, cb GitScannerFoundPointer) error { + callback, err := firstGitScannerCallback(cb, s.FoundPointer) if err != nil { return err } diff --git a/t/t-prune.sh b/t/t-prune.sh index e88ded08..22dbb46e 100755 --- a/t/t-prune.sh +++ b/t/t-prune.sh @@ -112,10 +112,14 @@ begin_test "prune all excluded paths" content_oldandexcluded="To delete: pushed and too old and excluded by filter" content_oldandunchanged="Keep: pushed and created a while ago, but still current" content_prevandexcluded="To delete: pushed and in previous commit to HEAD but excluded by filter" + content_unreferencedandexcluded="To delete: unreferenced in deleted branch and pushed and excluded by filter" + content_includedandexcluded="Keep: pushed and both excluded by filter and included by another path" content_excluded="To delete: pushed and in HEAD but excluded by filter" oid_oldandexcluded=$(calc_oid "$content_oldandexcluded") oid_oldandunchanged=$(calc_oid "$content_oldandunchanged") oid_prevandexcluded=$(calc_oid "$content_prevandexcluded") + oid_unreferencedandexcluded=$(calc_oid "$content_unreferencedandexcluded") + oid_includedandexcluded=$(calc_oid "$content_includedandexcluded") oid_excluded=$(calc_oid "$content_excluded") echo "[ @@ -131,12 +135,23 @@ begin_test "prune all excluded paths" {\"Filename\":\"foo/oldandexcluded.dat\",\"Size\":${#content_prevandexcluded}, \"Data\":\"$content_prevandexcluded\"}] }, { + \"CommitDate\":\"$(get_date -4d)\", + \"NewBranch\":\"branch_to_delete\", \"Files\":[ + {\"Filename\":\"unreferenced.dat\",\"Size\":${#content_unreferencedandexcluded}, \"Data\":\"$content_unreferencedandexcluded\"}] + }, + { + \"ParentBranches\":[\"main\"], + \"Files\":[ + {\"Filename\":\"foo/unreferencedandexcluded.dat\",\"Size\":${#content_unreferencedandexcluded}, \"Data\":\"$content_unreferencedandexcluded\"}, + {\"Filename\":\"foo/includedandexcluded.dat\",\"Size\":${#content_includedandexcluded}, \"Data\":\"$content_includedandexcluded\"}, + {\"Filename\":\"included.dat\",\"Size\":${#content_includedandexcluded}, \"Data\":\"$content_includedandexcluded\"}, {\"Filename\":\"foo/oldandexcluded.dat\",\"Size\":${#content_excluded}, \"Data\":\"$content_excluded\"}] } ]" | lfstest-testutils addcommits git push origin main + git branch -D branch_to_delete git config lfs.fetchrecentrefsdays 5 git config lfs.fetchrecentremoterefs true @@ -148,16 +163,23 @@ begin_test "prune all excluded paths" git lfs prune --dry-run --verbose 2>&1 | tee prune.log - grep "prune: 4 local objects, 1 retained" prune.log - grep "prune: 3 files would be pruned" prune.log + grep "prune: 6 local objects, 2 retained" prune.log + grep "prune: 4 files would be pruned" prune.log grep "$oid_oldandexcluded" prune.log grep "$oid_prevandexcluded" prune.log + grep "$oid_unreferencedandexcluded" prune.log grep "$oid_excluded" prune.log + assert_local_object "$oid_oldandexcluded" "${#content_oldandexcluded}" + assert_local_object "$oid_prevandexcluded" "${#content_prevandexcluded}" + assert_local_object "$oid_unreferencedandexcluded" "${#content_unreferencedandexcluded}" + assert_local_object "$oid_excluded" "${#content_excluded}" git lfs prune refute_local_object "$oid_oldandexcluded" "${#content_oldandexcluded}" assert_local_object "$oid_oldandunchanged" "${#content_oldandunchanged}" refute_local_object "$oid_prevandexcluded" "${#content_prevandexcluded}" + refute_local_object "$oid_unreferencedandexcluded" "${#content_unreferencedandexcluded}" + assert_local_object "$oid_includedandexcluded" "${#content_includedandexcluded}" refute_local_object "$oid_excluded" "${#content_excluded}" ) end_test