prune: fix deleting objects referred to by stashes

It was impossible to pop a stash with LFS data successfully
after running any lfs prune command before this fix, because
prune would consider stashed data unreferenced.

This fixes #4206
This commit is contained in:
Steve Streeting 2020-08-07 16:48:59 +01:00
parent c2722a3d8c
commit 2dc718b38c
5 changed files with 106 additions and 1 deletions

@ -71,7 +71,7 @@ func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose
// Add all the base funcs to the waitgroup before starting them, in case // Add all the base funcs to the waitgroup before starting them, in case
// one completes really fast & hits 0 unexpectedly // one completes really fast & hits 0 unexpectedly
// each main process can Add() to the wg itself if it subdivides the task // each main process can Add() to the wg itself if it subdivides the task
taskwait.Add(4) // 1..4: localObjects, current & recent refs, unpushed, worktree taskwait.Add(5) // 1..5: localObjects, current & recent refs, unpushed, worktree, stashes
if verifyRemote { if verifyRemote {
taskwait.Add(1) // 5 taskwait.Add(1) // 5
} }
@ -99,6 +99,7 @@ func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose
go pruneTaskGetRetainedCurrentAndRecentRefs(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait, sem) go pruneTaskGetRetainedCurrentAndRecentRefs(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait, sem)
go pruneTaskGetRetainedUnpushed(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait, sem) go pruneTaskGetRetainedUnpushed(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait, sem)
go pruneTaskGetRetainedWorktree(gitscanner, retainChan, errorChan, &taskwait, sem) go pruneTaskGetRetainedWorktree(gitscanner, retainChan, errorChan, &taskwait, sem)
go pruneTaskGetRetainedStashed(gitscanner, retainChan, errorChan, &taskwait, sem)
if verifyRemote { if verifyRemote {
reachableObjects = tools.NewStringSetWithCapacity(100) reachableObjects = tools.NewStringSetWithCapacity(100)
go pruneTaskGetReachableObjects(gitscanner, &reachableObjects, errorChan, &taskwait, sem) go pruneTaskGetReachableObjects(gitscanner, &reachableObjects, errorChan, &taskwait, sem)
@ -476,6 +477,25 @@ func pruneTaskGetRetainedWorktree(gitscanner *lfs.GitScanner, retainChan chan st
} }
} }
// Background task, must call waitg.Done() once at end
func pruneTaskGetRetainedStashed(gitscanner *lfs.GitScanner, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup, sem *semaphore.Weighted) {
defer waitg.Done()
err := gitscanner.ScanStashed(func(p *lfs.WrappedPointer, err error) {
if err != nil {
errorChan <- err
} else {
retainChan <- p.Pointer.Oid
tracerx.Printf("RETAIN: %v stashed", p.Pointer.Oid)
}
})
if err != nil {
errorChan <- err
return
}
}
// Background task, must call waitg.Done() once at end // Background task, must call waitg.Done() once at end
func pruneTaskGetReachableObjects(gitscanner *lfs.GitScanner, outObjectSet *tools.StringSet, errorChan chan error, waitg *sync.WaitGroup, sem *semaphore.Weighted) { func pruneTaskGetReachableObjects(gitscanner *lfs.GitScanner, outObjectSet *tools.StringSet, errorChan chan error, waitg *sync.WaitGroup, sem *semaphore.Weighted) {
defer waitg.Done() defer waitg.Done()

@ -269,6 +269,11 @@ func Log(args ...string) (*subprocess.BufferedCmd, error) {
return gitNoLFSBuffered(logArgs...) return gitNoLFSBuffered(logArgs...)
} }
func RefLog(args ...string) (*subprocess.BufferedCmd, error) {
reflogArgs := append([]string{"reflog"}, args...)
return gitNoLFSBuffered(reflogArgs...)
}
func LsRemote(remote, remoteRef string) (string, error) { func LsRemote(remote, remoteRef string) (string, error) {
if remote == "" { if remote == "" {
return "", errors.New("remote required") return "", errors.New("remote required")

@ -192,6 +192,16 @@ func (s *GitScanner) ScanUnpushed(remote string, cb GitScannerFoundPointer) erro
return scanUnpushed(callback, remote) return scanUnpushed(callback, remote)
} }
// ScanStashed scans for all LFS pointers referenced solely by a stash
func (s *GitScanner) ScanStashed(cb GitScannerFoundPointer) error {
callback, err := firstGitScannerCallback(cb, s.FoundPointer)
if err != nil {
return err
}
return scanStashed(callback, s)
}
// ScanPreviousVersions scans changes reachable from ref (commit) back to since. // ScanPreviousVersions scans changes reachable from ref (commit) back to since.
// Returns channel of pointers for *previous* versions that overlap that time. // Returns channel of pointers for *previous* versions that overlap that time.
// Does not include pointers which were still in use at ref (use ScanRefsToChan // Does not include pointers which were still in use at ref (use ScanRefsToChan

@ -7,6 +7,7 @@ import (
"io" "io"
"io/ioutil" "io/ioutil"
"regexp" "regexp"
"strings"
"time" "time"
"github.com/git-lfs/git-lfs/filepathfilter" "github.com/git-lfs/git-lfs/filepathfilter"
@ -63,6 +64,32 @@ func scanUnpushed(cb GitScannerFoundPointer, remote string) error {
return nil return nil
} }
func scanStashed(cb GitScannerFoundPointer, s *GitScanner) error {
// First get the SHAs of all stashes
// git reflog show --format="%H" stash
reflogArgs := []string{"show", "--format=%H", "stash"}
cmd, err := git.RefLog(reflogArgs...)
if err != nil {
return err
}
cmd.Start()
defer cmd.Wait()
scanner := bufio.NewScanner(cmd.Stdout)
for scanner.Scan() {
err = s.ScanRef(strings.TrimSpace(scanner.Text()), cb)
if err != nil {
return err
}
}
return nil
}
func parseScannerLogOutput(cb GitScannerFoundPointer, direction LogDiffDirection, cmd *subprocess.BufferedCmd) { func parseScannerLogOutput(cb GitScannerFoundPointer, direction LogDiffDirection, cmd *subprocess.BufferedCmd) {
ch := make(chan gitscannerResult, chanBufSize) ch := make(chan gitscannerResult, chanBufSize)

@ -594,3 +594,46 @@ begin_test "prune verify large numbers of refs"
) )
end_test end_test
begin_test "prune keep stashed changes"
(
set -e
reponame="prune_keep_stashed"
setup_remote_repo "remote_$reponame"
clone_repo "remote_$reponame" "clone_$reponame"
git lfs track "*.dat" 2>&1 | tee track.log
grep "Tracking \"\*.dat\"" track.log
# generate content we'll use
content_inrepo="This is the original committed data"
oid_inrepo=$(calc_oid "$content_inrepo")
content_stashed="This data will be stashed and should not be deleted"
oid_stashed=$(calc_oid "$content_stashed")
# We just need one commit of base data, makes it easier to test stash
echo "[
{
\"CommitDate\":\"$(get_date -1d)\",
\"Files\":[
{\"Filename\":\"stashedfile.dat\",\"Size\":${#content_inrepo}, \"Data\":\"$content_inrepo\"}]
}
]" | lfstest-testutils addcommits
# now modify the file, and stash it
echo -n "$content_stashed" > stashedfile.dat
git stash
# Prove that the stashed data was stored in LFS (should call clean filter)
assert_local_object "$oid_stashed" "${#content_stashed}"
# Prune data, should NOT delete stashed file
git lfs prune
assert_local_object "$oid_stashed" "${#content_stashed}"
)
end_test