Adam Majer cbf923e87b Abstract hash function usage (#28138)
Refactor Hash interfaces and centralize hash function. This will allow
easier introduction of different hash function later on.

This forms the "no-op" part of the SHA256 enablement patch.
2023-12-13 21:02:00 +00:00

139 lines
4.7 KiB
Go

// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package repository
import (
"context"
"errors"
"fmt"
"time"
git_model "code.gitea.io/gitea/models/git"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
)
// GarbageCollectLFSMetaObjectsOptions provides options for GarbageCollectLFSMetaObjects function
type GarbageCollectLFSMetaObjectsOptions struct {
LogDetail func(format string, v ...any)
AutoFix bool
OlderThan time.Time
UpdatedLessRecentlyThan time.Time
NumberToCheckPerRepo int64
ProportionToCheckPerRepo float64
}
// GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories
func GarbageCollectLFSMetaObjects(ctx context.Context, opts GarbageCollectLFSMetaObjectsOptions) error {
log.Trace("Doing: GarbageCollectLFSMetaObjects")
defer log.Trace("Finished: GarbageCollectLFSMetaObjects")
if opts.LogDetail == nil {
opts.LogDetail = log.Debug
}
if !setting.LFS.StartServer {
opts.LogDetail("LFS support is disabled")
return nil
}
return git_model.IterateRepositoryIDsWithLFSMetaObjects(ctx, func(ctx context.Context, repoID, count int64) error {
repo, err := repo_model.GetRepositoryByID(ctx, repoID)
if err != nil {
return err
}
if newMinimum := int64(float64(count) * opts.ProportionToCheckPerRepo); newMinimum > opts.NumberToCheckPerRepo && opts.NumberToCheckPerRepo != 0 {
opts.NumberToCheckPerRepo = newMinimum
}
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
})
}
// GarbageCollectLFSMetaObjectsForRepo garbage collects LFS objects for a specific repository
func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, opts GarbageCollectLFSMetaObjectsOptions) error {
opts.LogDetail("Checking %-v", repo)
total, orphaned, collected, deleted := int64(0), 0, 0, 0
defer func() {
if orphaned == 0 {
opts.LogDetail("Found %d total LFSMetaObjects in %-v", total, repo)
} else if !opts.AutoFix {
opts.LogDetail("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo)
} else {
opts.LogDetail("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted)
}
}()
gitRepo, err := git.OpenRepository(ctx, repo.RepoPath())
if err != nil {
log.Error("Unable to open git repository %-v: %v", repo, err)
return err
}
defer gitRepo.Close()
store := lfs.NewContentStore()
errStop := errors.New("STOPERR")
objectFormat, _ := gitRepo.GetObjectFormat()
err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
if opts.NumberToCheckPerRepo > 0 && total > opts.NumberToCheckPerRepo {
return errStop
}
total++
pointerSha := git.ComputeBlobHash(objectFormat, []byte(metaObject.Pointer.StringContent()))
if gitRepo.IsObjectExist(pointerSha.String()) {
return git_model.MarkLFSMetaObject(ctx, metaObject.ID)
}
orphaned++
if !opts.AutoFix {
return nil
}
// Non-existent pointer file
_, err = git_model.RemoveLFSMetaObjectByOidFn(ctx, repo.ID, metaObject.Oid, func(count int64) error {
if count > 0 {
return nil
}
if err := store.Delete(metaObject.RelativePath()); err != nil {
log.Error("Unable to remove lfs metaobject %s from store: %v", metaObject.Oid, err)
}
deleted++
return nil
})
if err != nil {
return fmt.Errorf("unable to remove meta-object %s in %s: %w", metaObject.Oid, repo.FullName(), err)
}
collected++
return nil
}, &git_model.IterateLFSMetaObjectsForRepoOptions{
// Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload
// and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby
// an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid
// changes in new branches that might lead to lfs objects becoming temporarily unassociated with git
// objects.
//
// It is likely that a week is potentially excessive but it should definitely be enough that any
// unassociated LFS object is genuinely unassociated.
OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()),
UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
OrderByUpdated: true,
LoopFunctionAlwaysUpdates: true,
})
if err == errStop {
opts.LogDetail("Processing stopped at %d total LFSMetaObjects in %-v", total, repo)
return nil
} else if err != nil {
return err
}
return nil
}