commands,lfs: drop GitScanner Close() method

The GitScanner structure and its methods were introduced in PR #1670,
and in commit bdbca399c46f0447f08066ce53185009b3db90ec of that PR
the structure's Close() method was introduced.  Unlike other similar
structures whose Close() methods should be called to release underlying
resources such as channels or I/O streams, the (*GitScanner).Close()
method serves only to output an optional performance timing trace metric.

This Close() method is not called consistently; for instance, it is never
called by the migrateExportCommand() function of the "git lfs migrate"
command, and will be skipped by the checkoutCommand() function of the
"git lfs checkout" command if an error is returned by the
(*GitScanner).ScanTree() method.

The utility of the performance timing metric is also undercut by the
fact that some commands perform other tasks before and after calling
the specific (*GitScanner).Scan*() method they invoke.  And in the
particular case of the "git lfs prune" command, multiple goroutines
are started, each of which runs a different Scan*() method simultaneously
with the others, so the final timing metric does not account for
their different execution times, just the overall final timing.

We can improve the value of the timing metric while also simplifying
the calling convention for the GitScanner structure's methods by
removing the Close() method, and tracing the performance of each
Scan*() method individually.

Removing the Close() method clarifies that no underlying resources
must be released for the GitScanner structure, and so callers need
not try to register a deferred call to the method.  This parallels
some other conventional Go structures, such as the Scanner structure
of the "bufio" package.

As well, running a "git lfs prune" command with the GIT_TRACE_PERFORMANCE=1
environment variable set now results in more detailed and useful output,
for example:

  12:36:51.221526 performance ScanStashed: 0.013632533 s
  12:36:51.224494 performance ScanUnpushed: 0.016570280 s
  12:36:51.240670 performance ScanTree: 0.017171717 s
This commit is contained in:
Chris Darroch 2023-06-05 12:48:47 -07:00
parent 2dd8934ff0
commit a0986c786c
11 changed files with 73 additions and 45 deletions

@ -76,7 +76,6 @@ func checkoutCommand(cmd *cobra.Command, args []string) {
if err := chgitscanner.ScanTree(ref.Sha, nil); err != nil { if err := chgitscanner.ScanTree(ref.Sha, nil); err != nil {
ExitWithError(err) ExitWithError(err)
} }
chgitscanner.Close()
meter.Start() meter.Start()
for _, p := range pointers { for _, p := range pointers {

@ -85,7 +85,6 @@ func dedupCommand(cmd *cobra.Command, args []string) {
atomic.AddInt64(&dedupStats.totalProcessedSize, p.Size) atomic.AddInt64(&dedupStats.totalProcessedSize, p.Size)
} }
}) })
defer gitScanner.Close()
if err := gitScanner.ScanTree("HEAD", nil); err != nil { if err := gitScanner.ScanTree("HEAD", nil); err != nil {
ExitWithError(err) ExitWithError(err)

@ -136,7 +136,6 @@ func pointersToFetchForRef(ref string, filter *filepathfilter.Filter) ([]*lfs.Wr
return nil, err return nil, err
} }
tempgitscanner.Close()
return pointers, multiErr return pointers, multiErr
} }
@ -180,7 +179,6 @@ func pointersToFetchForRefs(refs []string) ([]*lfs.WrappedPointer, error) {
return nil, err return nil, err
} }
tempgitscanner.Close()
return pointers, multiErr return pointers, multiErr
} }
@ -212,7 +210,6 @@ func fetchPreviousVersions(ref string, since time.Time, filter *filepathfilter.F
ExitWithError(err) ExitWithError(err)
} }
tempgitscanner.Close()
return fetchAndReportToChan(pointers, filter, nil) return fetchAndReportToChan(pointers, filter, nil)
} }
@ -317,8 +314,6 @@ func scanAll() []*lfs.WrappedPointer {
Panic(err, tr.Tr.Get("Could not scan for Git LFS files")) Panic(err, tr.Tr.Get("Could not scan for Git LFS files"))
} }
tempgitscanner.Close()
if multiErr != nil { if multiErr != nil {
Panic(multiErr, tr.Tr.Get("Could not scan for Git LFS files")) Panic(multiErr, tr.Tr.Get("Could not scan for Git LFS files"))
} }

@ -162,7 +162,6 @@ func doFsckObjects(include, exclude string, useIndex bool) []string {
} }
} }
gitscanner.Close()
return corruptOids return corruptOids
} }
@ -209,7 +208,6 @@ func doFsckPointers(include, exclude string) []corruptPointer {
} }
} }
gitscanner.Close()
return corruptPointers return corruptPointers
} }

@ -134,7 +134,6 @@ func lsFilesCommand(cmd *cobra.Command, args []string) {
seen[p.Name] = struct{}{} seen[p.Name] = struct{}{}
}) })
defer gitscanner.Close()
includeArg, excludeArg := getIncludeExcludeArgs(cmd) includeArg, excludeArg := getIncludeExcludeArgs(cmd)
gitscanner.Filter = buildFilepathFilter(cfg, includeArg, excludeArg, false) gitscanner.Filter = buildFilepathFilter(cfg, includeArg, excludeArg, false)

@ -121,8 +121,7 @@ func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose
progresswait.Add(1) progresswait.Add(1)
go pruneTaskDisplayProgress(progressChan, &progresswait, logger) go pruneTaskDisplayProgress(progressChan, &progresswait, logger)
taskwait.Wait() // wait for subtasks taskwait.Wait() // wait for subtasks
gitscanner.Close()
close(retainChan) // triggers retain collector to end now all tasks have close(retainChan) // triggers retain collector to end now all tasks have
retainwait.Wait() // make sure all retained objects added retainwait.Wait() // make sure all retained objects added

@ -93,7 +93,6 @@ func pull(filter *filepathfilter.Filter) {
} }
meter.Start() meter.Start()
gitscanner.Close()
q.Wait() q.Wait()
wg.Wait() wg.Wait()
tracerx.PerformanceSince("process queue", processQueue) tracerx.PerformanceSince("process queue", processQueue)

@ -245,7 +245,6 @@ func statusScanRefRange(ref *git.Ref) {
Print("\t%s (%s)", p.Name, p.Oid) Print("\t%s (%s)", p.Name, p.Oid)
}) })
defer gitscanner.Close()
Print("%s\n", tr.Tr.Get("Objects to be pushed to %s:", remoteRef.Name)) Print("%s\n", tr.Tr.Get("Objects to be pushed to %s:", remoteRef.Name))
if err := gitscanner.ScanRefRange(ref.Sha, remoteRef.Sha, nil); err != nil { if err := gitscanner.ScanRefRange(ref.Sha, remoteRef.Sha, nil); err != nil {

@ -22,10 +22,7 @@ import (
func uploadForRefUpdates(ctx *uploadContext, updates []*git.RefUpdate, pushAll bool) error { func uploadForRefUpdates(ctx *uploadContext, updates []*git.RefUpdate, pushAll bool) error {
gitscanner := ctx.buildGitScanner() gitscanner := ctx.buildGitScanner()
defer func() { defer ctx.ReportErrors()
gitscanner.Close()
ctx.ReportErrors()
}()
verifyLocksForUpdates(ctx.lockVerifier, updates) verifyLocksForUpdates(ctx.lockVerifier, updates)
exclude := make([]string, 0, len(updates)) exclude := make([]string, 0, len(updates))

@ -28,8 +28,6 @@ type GitScanner struct {
remote string remote string
skippedRefs []string skippedRefs []string
closed bool
started time.Time
cfg *config.Configuration cfg *config.Configuration
} }
@ -43,18 +41,7 @@ type GitScannerSet interface {
// NewGitScanner initializes a *GitScanner for a Git repository in the current // NewGitScanner initializes a *GitScanner for a Git repository in the current
// working directory. // working directory.
func NewGitScanner(cfg *config.Configuration, cb GitScannerFoundPointer) *GitScanner { func NewGitScanner(cfg *config.Configuration, cb GitScannerFoundPointer) *GitScanner {
return &GitScanner{started: time.Now(), FoundPointer: cb, cfg: cfg} return &GitScanner{FoundPointer: cb, cfg: cfg}
}
// Close stops exits once all processing has stopped, and all resources are
// tracked and cleaned up.
func (s *GitScanner) Close() {
if s.closed {
return
}
s.closed = true
tracerx.PerformanceSince("scan", s.started)
} }
// RemoteForPush sets up this *GitScanner to scan for objects to push to the // RemoteForPush sets up this *GitScanner to scan for objects to push to the
@ -81,7 +68,11 @@ func (s *GitScanner) ScanMultiRangeToRemote(include string, exclude []string, cb
return errors.New(tr.Tr.Get("unable to scan starting at %q: no remote set", include)) return errors.New(tr.Tr.Get("unable to scan starting at %q: no remote set", include))
} }
return scanRefsToChanSingleIncludeMultiExclude(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), s.opts(ScanRangeToRemoteMode)) start := time.Now()
err = scanRefsToChanSingleIncludeMultiExclude(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), s.opts(ScanRangeToRemoteMode))
tracerx.PerformanceSince("ScanMultiRangeToRemote", start)
return err
} }
// ScanRefs scans through all unique objects reachable from the "include" refs // ScanRefs scans through all unique objects reachable from the "include" refs
@ -95,7 +86,12 @@ func (s *GitScanner) ScanRefs(include, exclude []string, cb GitScannerFoundPoint
opts := s.opts(ScanRefsMode) opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = false opts.SkipDeletedBlobs = false
return scanRefsToChan(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
start := time.Now()
err = scanRefsToChan(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRefs", start)
return err
} }
// ScanRefRange scans through all unique objects reachable from the "include" // ScanRefRange scans through all unique objects reachable from the "include"
@ -109,7 +105,12 @@ func (s *GitScanner) ScanRefRange(include, exclude string, cb GitScannerFoundPoi
opts := s.opts(ScanRefsMode) opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = false opts.SkipDeletedBlobs = false
return scanRefsToChanSingleIncludeExclude(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
start := time.Now()
err = scanRefsToChanSingleIncludeExclude(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRefRange", start)
return err
} }
// ScanRefRangeByTree scans through all objects reachable from the "include" // ScanRefRangeByTree scans through all objects reachable from the "include"
@ -125,7 +126,12 @@ func (s *GitScanner) ScanRefRangeByTree(include, exclude string, cb GitScannerFo
opts := s.opts(ScanRefsMode) opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = false opts.SkipDeletedBlobs = false
opts.CommitsOnly = true opts.CommitsOnly = true
return scanRefsByTree(s, callback, []string{include}, []string{exclude}, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
start := time.Now()
err = scanRefsByTree(s, callback, []string{include}, []string{exclude}, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRefRangeByTree", start)
return err
} }
// ScanRefWithDeleted scans through all unique objects in the given ref, // ScanRefWithDeleted scans through all unique objects in the given ref,
@ -144,7 +150,12 @@ func (s *GitScanner) ScanRef(ref string, cb GitScannerFoundPointer) error {
opts := s.opts(ScanRefsMode) opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = true opts.SkipDeletedBlobs = true
return scanRefsToChanSingleIncludeExclude(s, callback, ref, "", s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
start := time.Now()
err = scanRefsToChanSingleIncludeExclude(s, callback, ref, "", s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRef", start)
return err
} }
// ScanRefByTree scans through all objects in the current ref, excluding // ScanRefByTree scans through all objects in the current ref, excluding
@ -159,7 +170,12 @@ func (s *GitScanner) ScanRefByTree(ref string, cb GitScannerFoundPointer) error
opts := s.opts(ScanRefsMode) opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = true opts.SkipDeletedBlobs = true
opts.CommitsOnly = true opts.CommitsOnly = true
return scanRefsByTree(s, callback, []string{ref}, []string{}, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
start := time.Now()
err = scanRefsByTree(s, callback, []string{ref}, []string{}, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRefByTree", start)
return err
} }
// ScanAll scans through all unique objects in the repository, including // ScanAll scans through all unique objects in the repository, including
@ -172,7 +188,12 @@ func (s *GitScanner) ScanAll(cb GitScannerFoundPointer) error {
opts := s.opts(ScanAllMode) opts := s.opts(ScanAllMode)
opts.SkipDeletedBlobs = false opts.SkipDeletedBlobs = false
return scanRefsToChanSingleIncludeExclude(s, callback, "", "", s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
start := time.Now()
err = scanRefsToChanSingleIncludeExclude(s, callback, "", "", s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanAll", start)
return err
} }
// ScanTree takes a ref and returns WrappedPointer objects in the tree at that // ScanTree takes a ref and returns WrappedPointer objects in the tree at that
@ -183,7 +204,12 @@ func (s *GitScanner) ScanTree(ref string, cb GitScannerFoundPointer) error {
if err != nil { if err != nil {
return err return err
} }
return runScanTree(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())
start := time.Now()
err = runScanTree(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())
tracerx.PerformanceSince("ScanTree", start)
return err
} }
// ScanUnpushed scans history for all LFS pointers which have been added but not // ScanUnpushed scans history for all LFS pointers which have been added but not
@ -193,7 +219,12 @@ func (s *GitScanner) ScanUnpushed(remote string, cb GitScannerFoundPointer) erro
if err != nil { if err != nil {
return err return err
} }
return scanUnpushed(callback, remote)
start := time.Now()
err = scanUnpushed(callback, remote)
tracerx.PerformanceSince("ScanUnpushed", start)
return err
} }
// ScanStashed scans for all LFS pointers referenced solely by a stash // ScanStashed scans for all LFS pointers referenced solely by a stash
@ -203,7 +234,11 @@ func (s *GitScanner) ScanStashed(cb GitScannerFoundPointer) error {
return err return err
} }
return scanStashed(callback) start := time.Now()
err = scanStashed(callback)
tracerx.PerformanceSince("ScanStashed", start)
return err
} }
// ScanPreviousVersions scans changes reachable from ref (commit) back to since. // ScanPreviousVersions scans changes reachable from ref (commit) back to since.
@ -215,7 +250,12 @@ func (s *GitScanner) ScanPreviousVersions(ref string, since time.Time, cb GitSca
if err != nil { if err != nil {
return err return err
} }
return logPreviousSHAs(callback, ref, s.Filter, since)
start := time.Now()
err = logPreviousSHAs(callback, ref, s.Filter, since)
tracerx.PerformanceSince("ScanPreviousVersions", start)
return err
} }
// ScanIndex scans the git index for modified LFS objects. // ScanIndex scans the git index for modified LFS objects.
@ -224,7 +264,12 @@ func (s *GitScanner) ScanIndex(ref string, cb GitScannerFoundPointer) error {
if err != nil { if err != nil {
return err return err
} }
return scanIndex(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())
start := time.Now()
err = scanIndex(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())
tracerx.PerformanceSince("ScanIndex", start)
return err
} }
func (s *GitScanner) opts(mode ScanningMode) *ScanRefsOptions { func (s *GitScanner) opts(mode ScanningMode) *ScanRefsOptions {

@ -109,7 +109,6 @@ func scanUnpushed(remoteName string) ([]*WrappedPointer, error) {
return nil, err return nil, err
} }
gitscanner.Close()
return pointers, multiErr return pointers, multiErr
} }