From 2dc718b38c41f7525c5b7b12e6b23ed558a81cc5 Mon Sep 17 00:00:00 2001 From: Steve Streeting Date: Fri, 7 Aug 2020 16:48:59 +0100 Subject: [PATCH] prune: fix deleting objects referred to by stashes It was impossible to pop a stash with LFS data successfully after running any lfs prune command before this fix, because prune would consider stashed data unreferenced. This fixes #4206 --- commands/command_prune.go | 22 +++++++++++++++++++- git/git.go | 5 +++++ lfs/gitscanner.go | 10 +++++++++ lfs/gitscanner_log.go | 27 ++++++++++++++++++++++++ t/t-prune.sh | 43 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 1 deletion(-) diff --git a/commands/command_prune.go b/commands/command_prune.go index bf9b396c..f611478b 100644 --- a/commands/command_prune.go +++ b/commands/command_prune.go @@ -71,7 +71,7 @@ func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose // Add all the base funcs to the waitgroup before starting them, in case // one completes really fast & hits 0 unexpectedly // each main process can Add() to the wg itself if it subdivides the task - taskwait.Add(4) // 1..4: localObjects, current & recent refs, unpushed, worktree + taskwait.Add(5) // 1..5: localObjects, current & recent refs, unpushed, worktree, stashes if verifyRemote { taskwait.Add(1) // 5 } @@ -99,6 +99,7 @@ func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose go pruneTaskGetRetainedCurrentAndRecentRefs(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait, sem) go pruneTaskGetRetainedUnpushed(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait, sem) go pruneTaskGetRetainedWorktree(gitscanner, retainChan, errorChan, &taskwait, sem) + go pruneTaskGetRetainedStashed(gitscanner, retainChan, errorChan, &taskwait, sem) if verifyRemote { reachableObjects = tools.NewStringSetWithCapacity(100) go pruneTaskGetReachableObjects(gitscanner, &reachableObjects, errorChan, &taskwait, sem) @@ -476,6 +477,25 @@ func pruneTaskGetRetainedWorktree(gitscanner *lfs.GitScanner, retainChan chan st } } +// Background task, must call waitg.Done() once at end +func pruneTaskGetRetainedStashed(gitscanner *lfs.GitScanner, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup, sem *semaphore.Weighted) { + defer waitg.Done() + + err := gitscanner.ScanStashed(func(p *lfs.WrappedPointer, err error) { + if err != nil { + errorChan <- err + } else { + retainChan <- p.Pointer.Oid + tracerx.Printf("RETAIN: %v stashed", p.Pointer.Oid) + } + }) + + if err != nil { + errorChan <- err + return + } +} + // Background task, must call waitg.Done() once at end func pruneTaskGetReachableObjects(gitscanner *lfs.GitScanner, outObjectSet *tools.StringSet, errorChan chan error, waitg *sync.WaitGroup, sem *semaphore.Weighted) { defer waitg.Done() diff --git a/git/git.go b/git/git.go index 5b8d9b7f..663ef6ba 100644 --- a/git/git.go +++ b/git/git.go @@ -269,6 +269,11 @@ func Log(args ...string) (*subprocess.BufferedCmd, error) { return gitNoLFSBuffered(logArgs...) } +func RefLog(args ...string) (*subprocess.BufferedCmd, error) { + reflogArgs := append([]string{"reflog"}, args...) + return gitNoLFSBuffered(reflogArgs...) +} + func LsRemote(remote, remoteRef string) (string, error) { if remote == "" { return "", errors.New("remote required") diff --git a/lfs/gitscanner.go b/lfs/gitscanner.go index 6ea07caf..a2b05c0d 100644 --- a/lfs/gitscanner.go +++ b/lfs/gitscanner.go @@ -192,6 +192,16 @@ func (s *GitScanner) ScanUnpushed(remote string, cb GitScannerFoundPointer) erro return scanUnpushed(callback, remote) } +// ScanStashed scans for all LFS pointers referenced solely by a stash +func (s *GitScanner) ScanStashed(cb GitScannerFoundPointer) error { + callback, err := firstGitScannerCallback(cb, s.FoundPointer) + if err != nil { + return err + } + + return scanStashed(callback, s) +} + // ScanPreviousVersions scans changes reachable from ref (commit) back to since. // Returns channel of pointers for *previous* versions that overlap that time. // Does not include pointers which were still in use at ref (use ScanRefsToChan diff --git a/lfs/gitscanner_log.go b/lfs/gitscanner_log.go index 30c0258a..398468b1 100644 --- a/lfs/gitscanner_log.go +++ b/lfs/gitscanner_log.go @@ -7,6 +7,7 @@ import ( "io" "io/ioutil" "regexp" + "strings" "time" "github.com/git-lfs/git-lfs/filepathfilter" @@ -63,6 +64,32 @@ func scanUnpushed(cb GitScannerFoundPointer, remote string) error { return nil } +func scanStashed(cb GitScannerFoundPointer, s *GitScanner) error { + // First get the SHAs of all stashes + // git reflog show --format="%H" stash + reflogArgs := []string{"show", "--format=%H", "stash"} + + cmd, err := git.RefLog(reflogArgs...) + if err != nil { + return err + } + + cmd.Start() + defer cmd.Wait() + + scanner := bufio.NewScanner(cmd.Stdout) + + for scanner.Scan() { + err = s.ScanRef(strings.TrimSpace(scanner.Text()), cb) + if err != nil { + return err + } + } + + return nil + +} + func parseScannerLogOutput(cb GitScannerFoundPointer, direction LogDiffDirection, cmd *subprocess.BufferedCmd) { ch := make(chan gitscannerResult, chanBufSize) diff --git a/t/t-prune.sh b/t/t-prune.sh index e8b8e2ac..f598e92a 100755 --- a/t/t-prune.sh +++ b/t/t-prune.sh @@ -594,3 +594,46 @@ begin_test "prune verify large numbers of refs" ) end_test + +begin_test "prune keep stashed changes" +( + set -e + + reponame="prune_keep_stashed" + setup_remote_repo "remote_$reponame" + + clone_repo "remote_$reponame" "clone_$reponame" + + git lfs track "*.dat" 2>&1 | tee track.log + grep "Tracking \"\*.dat\"" track.log + + # generate content we'll use + content_inrepo="This is the original committed data" + oid_inrepo=$(calc_oid "$content_inrepo") + content_stashed="This data will be stashed and should not be deleted" + oid_stashed=$(calc_oid "$content_stashed") + + # We just need one commit of base data, makes it easier to test stash + echo "[ + { + \"CommitDate\":\"$(get_date -1d)\", + \"Files\":[ + {\"Filename\":\"stashedfile.dat\",\"Size\":${#content_inrepo}, \"Data\":\"$content_inrepo\"}] + } + ]" | lfstest-testutils addcommits + + # now modify the file, and stash it + echo -n "$content_stashed" > stashedfile.dat + + git stash + + # Prove that the stashed data was stored in LFS (should call clean filter) + assert_local_object "$oid_stashed" "${#content_stashed}" + + # Prune data, should NOT delete stashed file + git lfs prune + + assert_local_object "$oid_stashed" "${#content_stashed}" + +) +end_test \ No newline at end of file