commands,lfs: drop GitScanner Close() method

The GitScanner structure and its methods were introduced in PR #1670,
and in commit bdbca399c46f0447f08066ce53185009b3db90ec of that PR
the structure's Close() method was introduced.  Unlike other similar
structures whose Close() methods should be called to release underlying
resources such as channels or I/O streams, the (*GitScanner).Close()
method serves only to output an optional performance timing trace metric.

This Close() method is not called consistently; for instance, it is never
called by the migrateExportCommand() function of the "git lfs migrate"
command, and will be skipped by the checkoutCommand() function of the
"git lfs checkout" command if an error is returned by the
(*GitScanner).ScanTree() method.

The utility of the performance timing metric is also undercut by the
fact that some commands perform other tasks before and after calling
the specific (*GitScanner).Scan*() method they invoke.  And in the
particular case of the "git lfs prune" command, multiple goroutines
are started, each of which runs a different Scan*() method simultaneously
with the others, so the final timing metric does not account for
their different execution times, just the overall final timing.

We can improve the value of the timing metric while also simplifying
the calling convention for the GitScanner structure's methods by
removing the Close() method, and tracing the performance of each
Scan*() method individually.

Removing the Close() method clarifies that no underlying resources
must be released for the GitScanner structure, and so callers need
not try to register a deferred call to the method.  This parallels
some other conventional Go structures, such as the Scanner structure
of the "bufio" package.

As well, running a "git lfs prune" command with the GIT_TRACE_PERFORMANCE=1
environment variable set now results in more detailed and useful output,
for example:

  12:36:51.221526 performance ScanStashed: 0.013632533 s
  12:36:51.224494 performance ScanUnpushed: 0.016570280 s
  12:36:51.240670 performance ScanTree: 0.017171717 s
This commit is contained in:
Chris Darroch 2023-06-05 12:48:47 -07:00
parent 2dd8934ff0
commit a0986c786c
11 changed files with 73 additions and 45 deletions

@ -76,7 +76,6 @@ func checkoutCommand(cmd *cobra.Command, args []string) {
if err := chgitscanner.ScanTree(ref.Sha, nil); err != nil {
ExitWithError(err)
}
chgitscanner.Close()
meter.Start()
for _, p := range pointers {

@ -85,7 +85,6 @@ func dedupCommand(cmd *cobra.Command, args []string) {
atomic.AddInt64(&dedupStats.totalProcessedSize, p.Size)
}
})
defer gitScanner.Close()
if err := gitScanner.ScanTree("HEAD", nil); err != nil {
ExitWithError(err)

@ -136,7 +136,6 @@ func pointersToFetchForRef(ref string, filter *filepathfilter.Filter) ([]*lfs.Wr
return nil, err
}
tempgitscanner.Close()
return pointers, multiErr
}
@ -180,7 +179,6 @@ func pointersToFetchForRefs(refs []string) ([]*lfs.WrappedPointer, error) {
return nil, err
}
tempgitscanner.Close()
return pointers, multiErr
}
@ -212,7 +210,6 @@ func fetchPreviousVersions(ref string, since time.Time, filter *filepathfilter.F
ExitWithError(err)
}
tempgitscanner.Close()
return fetchAndReportToChan(pointers, filter, nil)
}
@ -317,8 +314,6 @@ func scanAll() []*lfs.WrappedPointer {
Panic(err, tr.Tr.Get("Could not scan for Git LFS files"))
}
tempgitscanner.Close()
if multiErr != nil {
Panic(multiErr, tr.Tr.Get("Could not scan for Git LFS files"))
}

@ -162,7 +162,6 @@ func doFsckObjects(include, exclude string, useIndex bool) []string {
}
}
gitscanner.Close()
return corruptOids
}
@ -209,7 +208,6 @@ func doFsckPointers(include, exclude string) []corruptPointer {
}
}
gitscanner.Close()
return corruptPointers
}

@ -134,7 +134,6 @@ func lsFilesCommand(cmd *cobra.Command, args []string) {
seen[p.Name] = struct{}{}
})
defer gitscanner.Close()
includeArg, excludeArg := getIncludeExcludeArgs(cmd)
gitscanner.Filter = buildFilepathFilter(cfg, includeArg, excludeArg, false)

@ -121,8 +121,7 @@ func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose
progresswait.Add(1)
go pruneTaskDisplayProgress(progressChan, &progresswait, logger)
taskwait.Wait() // wait for subtasks
gitscanner.Close()
taskwait.Wait() // wait for subtasks
close(retainChan) // triggers retain collector to end now all tasks have
retainwait.Wait() // make sure all retained objects added

@ -93,7 +93,6 @@ func pull(filter *filepathfilter.Filter) {
}
meter.Start()
gitscanner.Close()
q.Wait()
wg.Wait()
tracerx.PerformanceSince("process queue", processQueue)

@ -245,7 +245,6 @@ func statusScanRefRange(ref *git.Ref) {
Print("\t%s (%s)", p.Name, p.Oid)
})
defer gitscanner.Close()
Print("%s\n", tr.Tr.Get("Objects to be pushed to %s:", remoteRef.Name))
if err := gitscanner.ScanRefRange(ref.Sha, remoteRef.Sha, nil); err != nil {

@ -22,10 +22,7 @@ import (
func uploadForRefUpdates(ctx *uploadContext, updates []*git.RefUpdate, pushAll bool) error {
gitscanner := ctx.buildGitScanner()
defer func() {
gitscanner.Close()
ctx.ReportErrors()
}()
defer ctx.ReportErrors()
verifyLocksForUpdates(ctx.lockVerifier, updates)
exclude := make([]string, 0, len(updates))

@ -28,8 +28,6 @@ type GitScanner struct {
remote string
skippedRefs []string
closed bool
started time.Time
cfg *config.Configuration
}
@ -43,18 +41,7 @@ type GitScannerSet interface {
// NewGitScanner initializes a *GitScanner for a Git repository in the current
// working directory.
func NewGitScanner(cfg *config.Configuration, cb GitScannerFoundPointer) *GitScanner {
return &GitScanner{started: time.Now(), FoundPointer: cb, cfg: cfg}
}
// Close stops exits once all processing has stopped, and all resources are
// tracked and cleaned up.
func (s *GitScanner) Close() {
if s.closed {
return
}
s.closed = true
tracerx.PerformanceSince("scan", s.started)
return &GitScanner{FoundPointer: cb, cfg: cfg}
}
// RemoteForPush sets up this *GitScanner to scan for objects to push to the
@ -81,7 +68,11 @@ func (s *GitScanner) ScanMultiRangeToRemote(include string, exclude []string, cb
return errors.New(tr.Tr.Get("unable to scan starting at %q: no remote set", include))
}
return scanRefsToChanSingleIncludeMultiExclude(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), s.opts(ScanRangeToRemoteMode))
start := time.Now()
err = scanRefsToChanSingleIncludeMultiExclude(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), s.opts(ScanRangeToRemoteMode))
tracerx.PerformanceSince("ScanMultiRangeToRemote", start)
return err
}
// ScanRefs scans through all unique objects reachable from the "include" refs
@ -95,7 +86,12 @@ func (s *GitScanner) ScanRefs(include, exclude []string, cb GitScannerFoundPoint
opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = false
return scanRefsToChan(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
start := time.Now()
err = scanRefsToChan(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRefs", start)
return err
}
// ScanRefRange scans through all unique objects reachable from the "include"
@ -109,7 +105,12 @@ func (s *GitScanner) ScanRefRange(include, exclude string, cb GitScannerFoundPoi
opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = false
return scanRefsToChanSingleIncludeExclude(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
start := time.Now()
err = scanRefsToChanSingleIncludeExclude(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRefRange", start)
return err
}
// ScanRefRangeByTree scans through all objects reachable from the "include"
@ -125,7 +126,12 @@ func (s *GitScanner) ScanRefRangeByTree(include, exclude string, cb GitScannerFo
opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = false
opts.CommitsOnly = true
return scanRefsByTree(s, callback, []string{include}, []string{exclude}, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
start := time.Now()
err = scanRefsByTree(s, callback, []string{include}, []string{exclude}, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRefRangeByTree", start)
return err
}
// ScanRefWithDeleted scans through all unique objects in the given ref,
@ -144,7 +150,12 @@ func (s *GitScanner) ScanRef(ref string, cb GitScannerFoundPointer) error {
opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = true
return scanRefsToChanSingleIncludeExclude(s, callback, ref, "", s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
start := time.Now()
err = scanRefsToChanSingleIncludeExclude(s, callback, ref, "", s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRef", start)
return err
}
// ScanRefByTree scans through all objects in the current ref, excluding
@ -159,7 +170,12 @@ func (s *GitScanner) ScanRefByTree(ref string, cb GitScannerFoundPointer) error
opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = true
opts.CommitsOnly = true
return scanRefsByTree(s, callback, []string{ref}, []string{}, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
start := time.Now()
err = scanRefsByTree(s, callback, []string{ref}, []string{}, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRefByTree", start)
return err
}
// ScanAll scans through all unique objects in the repository, including
@ -172,7 +188,12 @@ func (s *GitScanner) ScanAll(cb GitScannerFoundPointer) error {
opts := s.opts(ScanAllMode)
opts.SkipDeletedBlobs = false
return scanRefsToChanSingleIncludeExclude(s, callback, "", "", s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
start := time.Now()
err = scanRefsToChanSingleIncludeExclude(s, callback, "", "", s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanAll", start)
return err
}
// ScanTree takes a ref and returns WrappedPointer objects in the tree at that
@ -183,7 +204,12 @@ func (s *GitScanner) ScanTree(ref string, cb GitScannerFoundPointer) error {
if err != nil {
return err
}
return runScanTree(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())
start := time.Now()
err = runScanTree(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())
tracerx.PerformanceSince("ScanTree", start)
return err
}
// ScanUnpushed scans history for all LFS pointers which have been added but not
@ -193,7 +219,12 @@ func (s *GitScanner) ScanUnpushed(remote string, cb GitScannerFoundPointer) erro
if err != nil {
return err
}
return scanUnpushed(callback, remote)
start := time.Now()
err = scanUnpushed(callback, remote)
tracerx.PerformanceSince("ScanUnpushed", start)
return err
}
// ScanStashed scans for all LFS pointers referenced solely by a stash
@ -203,7 +234,11 @@ func (s *GitScanner) ScanStashed(cb GitScannerFoundPointer) error {
return err
}
return scanStashed(callback)
start := time.Now()
err = scanStashed(callback)
tracerx.PerformanceSince("ScanStashed", start)
return err
}
// ScanPreviousVersions scans changes reachable from ref (commit) back to since.
@ -215,7 +250,12 @@ func (s *GitScanner) ScanPreviousVersions(ref string, since time.Time, cb GitSca
if err != nil {
return err
}
return logPreviousSHAs(callback, ref, s.Filter, since)
start := time.Now()
err = logPreviousSHAs(callback, ref, s.Filter, since)
tracerx.PerformanceSince("ScanPreviousVersions", start)
return err
}
// ScanIndex scans the git index for modified LFS objects.
@ -224,7 +264,12 @@ func (s *GitScanner) ScanIndex(ref string, cb GitScannerFoundPointer) error {
if err != nil {
return err
}
return scanIndex(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())
start := time.Now()
err = scanIndex(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())
tracerx.PerformanceSince("ScanIndex", start)
return err
}
func (s *GitScanner) opts(mode ScanningMode) *ScanRefsOptions {

@ -109,7 +109,6 @@ func scanUnpushed(remoteName string) ([]*WrappedPointer, error) {
return nil, err
}
gitscanner.Close()
return pointers, multiErr
}