prune: add options to be more aggressive about pruning

Some users really want to minimize the amount of data they store on the
local system, especially for large checkouts.  Let's add two options,
--recent and --force, the former which ignores the recency settings and
the latter which additionally prunes objects which are currently checked
out.  We will never prune objects that are unpushed, even with these
options, including stashed objects.

Add tests and documentation for both of these options.
This commit is contained in:
brian m. carlson 2021-01-12 21:24:44 +00:00
parent ca2f09be48
commit 1481e9776d
No known key found for this signature in database
GPG Key ID: 2D0C9BC12F82B3A1
4 changed files with 138 additions and 6 deletions

@ -26,6 +26,8 @@ var (
pruneDryRunArg bool
pruneVerboseArg bool
pruneVerifyArg bool
pruneRecentArg bool
pruneForceArg bool
pruneDoNotVerifyArg bool
)
@ -38,6 +40,8 @@ func pruneCommand(cmd *cobra.Command, args []string) {
fetchPruneConfig := lfs.NewFetchPruneConfig(cfg.Git)
verify := !pruneDoNotVerifyArg &&
(fetchPruneConfig.PruneVerifyRemoteAlways || pruneVerifyArg)
fetchPruneConfig.PruneRecent = pruneRecentArg || pruneForceArg
fetchPruneConfig.PruneForce = pruneForceArg
prune(fetchPruneConfig, verify, pruneDryRunArg, pruneVerboseArg)
}
@ -98,7 +102,7 @@ func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose
go pruneTaskGetRetainedCurrentAndRecentRefs(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait, sem)
go pruneTaskGetRetainedUnpushed(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait, sem)
go pruneTaskGetRetainedWorktree(gitscanner, retainChan, errorChan, &taskwait, sem)
go pruneTaskGetRetainedWorktree(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait, sem)
go pruneTaskGetRetainedStashed(gitscanner, retainChan, errorChan, &taskwait, sem)
if verifyRemote {
reachableObjects = tools.NewStringSetWithCapacity(100)
@ -387,11 +391,13 @@ func pruneTaskGetRetainedCurrentAndRecentRefs(gitscanner *lfs.GitScanner, fetchc
return
}
commits.Add(ref.Sha)
waitg.Add(1)
go pruneTaskGetRetainedAtRef(gitscanner, ref.Sha, retainChan, errorChan, waitg, sem)
if !fetchconf.PruneForce {
waitg.Add(1)
go pruneTaskGetRetainedAtRef(gitscanner, ref.Sha, retainChan, errorChan, waitg, sem)
}
// Now recent
if fetchconf.FetchRecentRefsDays > 0 {
if !fetchconf.PruneRecent && fetchconf.FetchRecentRefsDays > 0 {
pruneRefDays := fetchconf.FetchRecentRefsDays + fetchconf.PruneOffsetDays
tracerx.Printf("PRUNE: Retaining non-HEAD refs within %d (%d+%d) days", pruneRefDays, fetchconf.FetchRecentRefsDays, fetchconf.PruneOffsetDays)
refsSince := time.Now().AddDate(0, 0, -pruneRefDays)
@ -411,7 +417,7 @@ func pruneTaskGetRetainedCurrentAndRecentRefs(gitscanner *lfs.GitScanner, fetchc
// For every unique commit we've fetched, check recent commits too
// Only if we're fetching recent commits, otherwise only keep at refs
if fetchconf.FetchRecentCommitsDays > 0 {
if !fetchconf.PruneRecent && fetchconf.FetchRecentCommitsDays > 0 {
pruneCommitDays := fetchconf.FetchRecentCommitsDays + fetchconf.PruneOffsetDays
for commit := range commits.Iter() {
// We measure from the last commit at the ref
@ -447,9 +453,13 @@ func pruneTaskGetRetainedUnpushed(gitscanner *lfs.GitScanner, fetchconf lfs.Fetc
}
// Background task, must call waitg.Done() once at end
func pruneTaskGetRetainedWorktree(gitscanner *lfs.GitScanner, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup, sem *semaphore.Weighted) {
func pruneTaskGetRetainedWorktree(gitscanner *lfs.GitScanner, fetchconf lfs.FetchPruneConfig, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup, sem *semaphore.Weighted) {
defer waitg.Done()
if fetchconf.PruneForce {
return
}
// Retain other worktree HEADs too
// Working copy, branch & maybe commit is different but repo is shared
allWorktreeRefs, err := git.GetAllWorkTreeHEADs(cfg.LocalGitStorageDir())
@ -520,6 +530,8 @@ func init() {
RegisterCommand("prune", pruneCommand, func(cmd *cobra.Command) {
cmd.Flags().BoolVarP(&pruneDryRunArg, "dry-run", "d", false, "Don't delete anything, just report")
cmd.Flags().BoolVarP(&pruneVerboseArg, "verbose", "v", false, "Print full details of what is/would be deleted")
cmd.Flags().BoolVarP(&pruneRecentArg, "recent", "", false, "Prune even recent objects")
cmd.Flags().BoolVarP(&pruneForceArg, "force", "f", false, "Prune everything that has been pushed")
cmd.Flags().BoolVarP(&pruneVerifyArg, "verify-remote", "c", false, "Verify that remote has LFS files before deleting")
cmd.Flags().BoolVar(&pruneDoNotVerifyArg, "no-verify-remote", false, "Override lfs.pruneverifyremotealways and don't verify")
})

@ -34,6 +34,14 @@ details about `lfs.storage` option.
* `--dry-run` `-d`
Don't actually delete anything, just report on what would have been done
* `--force` `-f`
Prune all objects except unpushed objects, including objects required for
currently checked out refs. Implies `--recent`.
* `--recent`
Prune even objects that would normally be preserved by the configuration
options specified below in [RECENT FILES].
* `--verify-remote` `-c`
Contact the remote and check that copies of the files we would delete
definitely exist before deleting. See [VERIFY REMOTE].

@ -21,6 +21,10 @@ type FetchPruneConfig struct {
PruneVerifyRemoteAlways bool
// Name of remote to check for unpushed and verify checks
PruneRemoteName string
// Whether to ignore all recent options.
PruneRecent bool
// Whether to delete everything pushed.
PruneForce bool
}
func NewFetchPruneConfig(git config.Environment) FetchPruneConfig {
@ -37,5 +41,7 @@ func NewFetchPruneConfig(git config.Environment) FetchPruneConfig {
PruneOffsetDays: git.Int("lfs.pruneoffsetdays", 3),
PruneVerifyRemoteAlways: git.Bool("lfs.pruneverifyremotealways", false),
PruneRemoteName: pruneRemote,
PruneRecent: false,
PruneForce: false,
}
}

@ -752,3 +752,109 @@ begin_test "prune keep stashed untracked files"
)
end_test
begin_test "prune recent changes with --recent"
(
set -e
reponame="prune_recent_arg"
setup_remote_repo "remote_$reponame"
clone_repo "remote_$reponame" "clone_$reponame"
git lfs track "*.dat"
# generate content we'll use
content_inrepo="this is the original committed data"
oid_inrepo=$(calc_oid "$content_inrepo")
content_new="this data will be recent"
oid_new=$(calc_oid "$content_new")
echo "[
{
\"CommitDate\":\"$(get_date -1d)\",
\"Files\":[
{\"Filename\":\"file.dat\",\"Size\":${#content_inrepo}, \"Data\":\"$content_inrepo\"}]
}
]" | lfstest-testutils addcommits
# now modify the file, and stash it
printf '%s' "$content_new" > file.dat
git add .
git commit -m 'Update file.dat'
git config lfs.fetchrecentrefsdays 5
git config lfs.fetchrecentremoterefs true
git config lfs.fetchrecentcommitsdays 3
assert_local_object "$oid_new" "${#content_new}"
assert_local_object "$oid_inrepo" "${#content_inrepo}"
# prune data, should not delete.
git lfs prune --recent
assert_local_object "$oid_new" "${#content_new}"
assert_local_object "$oid_inrepo" "${#content_inrepo}"
git push origin HEAD
# prune data.
git lfs prune --recent
assert_local_object "$oid_new" "${#content_new}"
refute_local_object "$oid_inrepo" "${#content_inrepo}"
)
end_test
begin_test "prune --force"
(
set -e
reponame="prune_force"
setup_remote_repo "remote_$reponame"
clone_repo "remote_$reponame" "clone_$reponame"
git lfs track "*.dat"
# generate content we'll use
content_inrepo="this is the original committed data"
oid_inrepo=$(calc_oid "$content_inrepo")
content_new="this data will be recent"
oid_new=$(calc_oid "$content_new")
echo "[
{
\"CommitDate\":\"$(get_date -1d)\",
\"Files\":[
{\"Filename\":\"file.dat\",\"Size\":${#content_inrepo}, \"Data\":\"$content_inrepo\"}]
}
]" | lfstest-testutils addcommits
# now modify the file, and stash it
printf '%s' "$content_new" > file.dat
git add .
git commit -m 'Update file.dat'
git config lfs.fetchrecentrefsdays 5
git config lfs.fetchrecentremoterefs true
git config lfs.fetchrecentcommitsdays 3
assert_local_object "$oid_new" "${#content_new}"
assert_local_object "$oid_inrepo" "${#content_inrepo}"
# prune data, should not delete.
git lfs prune --force
assert_local_object "$oid_new" "${#content_new}"
assert_local_object "$oid_inrepo" "${#content_inrepo}"
git push origin HEAD
# prune data.
git lfs prune --force
refute_local_object "$oid_new" "${#content_new}"
refute_local_object "$oid_inrepo" "${#content_inrepo}"
)
end_test