git-lfs/commands/command_pull.go
Chris Darroch a0986c786c commands,lfs: drop GitScanner Close() method
The GitScanner structure and its methods were introduced in PR #1670,
and in commit bdbca399c46f0447f08066ce53185009b3db90ec of that PR
the structure's Close() method was introduced.  Unlike other similar
structures whose Close() methods should be called to release underlying
resources such as channels or I/O streams, the (*GitScanner).Close()
method serves only to output an optional performance timing trace metric.

This Close() method is not called consistently; for instance, it is never
called by the migrateExportCommand() function of the "git lfs migrate"
command, and will be skipped by the checkoutCommand() function of the
"git lfs checkout" command if an error is returned by the
(*GitScanner).ScanTree() method.

The utility of the performance timing metric is also undercut by the
fact that some commands perform other tasks before and after calling
the specific (*GitScanner).Scan*() method they invoke.  And in the
particular case of the "git lfs prune" command, multiple goroutines
are started, each of which runs a different Scan*() method simultaneously
with the others, so the final timing metric does not account for
their different execution times, just the overall final timing.

We can improve the value of the timing metric while also simplifying
the calling convention for the GitScanner structure's methods by
removing the Close() method, and tracing the performance of each
Scan*() method individually.

Removing the Close() method clarifies that no underlying resources
must be released for the GitScanner structure, and so callers need
not try to register a deferred call to the method.  This parallels
some other conventional Go structures, such as the Scanner structure
of the "bufio" package.

As well, running a "git lfs prune" command with the GIT_TRACE_PERFORMANCE=1
environment variable set now results in more detailed and useful output,
for example:

  12:36:51.221526 performance ScanStashed: 0.013632533 s
  12:36:51.224494 performance ScanUnpushed: 0.016570280 s
  12:36:51.240670 performance ScanTree: 0.017171717 s
2023-06-07 15:57:35 -07:00

159 lines
3.6 KiB
Go

package commands
import (
"fmt"
"os"
"sync"
"time"
"github.com/git-lfs/git-lfs/v3/filepathfilter"
"github.com/git-lfs/git-lfs/v3/git"
"github.com/git-lfs/git-lfs/v3/lfs"
"github.com/git-lfs/git-lfs/v3/tasklog"
"github.com/git-lfs/git-lfs/v3/tq"
"github.com/git-lfs/git-lfs/v3/tr"
"github.com/rubyist/tracerx"
"github.com/spf13/cobra"
)
func pullCommand(cmd *cobra.Command, args []string) {
requireGitVersion()
setupRepository()
if len(args) > 0 {
// Remote is first arg
if err := cfg.SetValidRemote(args[0]); err != nil {
Exit(tr.Tr.Get("Invalid remote name %q: %s", args[0], err))
}
}
includeArg, excludeArg := getIncludeExcludeArgs(cmd)
filter := buildFilepathFilter(cfg, includeArg, excludeArg, true)
pull(filter)
}
func pull(filter *filepathfilter.Filter) {
ref, err := git.CurrentRef()
if err != nil {
Panic(err, tr.Tr.Get("Could not pull"))
}
pointers := newPointerMap()
logger := tasklog.NewLogger(os.Stdout,
tasklog.ForceProgress(cfg.ForceProgress()),
)
meter := tq.NewMeter(cfg)
meter.Logger = meter.LoggerFromEnv(cfg.Os)
logger.Enqueue(meter)
remote := cfg.Remote()
singleCheckout := newSingleCheckout(cfg.Git, remote)
q := newDownloadQueue(singleCheckout.Manifest(), remote, tq.WithProgress(meter))
gitscanner := lfs.NewGitScanner(cfg, func(p *lfs.WrappedPointer, err error) {
if err != nil {
LoggedError(err, tr.Tr.Get("Scanner error: %s", err))
return
}
if pointers.Seen(p) {
return
}
// no need to download objects that exist locally already
lfs.LinkOrCopyFromReference(cfg, p.Oid, p.Size)
if cfg.LFSObjectExists(p.Oid, p.Size) {
singleCheckout.Run(p)
return
}
meter.Add(p.Size)
tracerx.Printf("fetch %v [%v]", p.Name, p.Oid)
pointers.Add(p)
q.Add(downloadTransfer(p))
})
gitscanner.Filter = filter
dlwatch := q.Watch()
var wg sync.WaitGroup
wg.Add(1)
go func() {
for t := range dlwatch {
for _, p := range pointers.All(t.Oid) {
singleCheckout.Run(p)
}
}
wg.Done()
}()
processQueue := time.Now()
if err := gitscanner.ScanTree(ref.Sha, nil); err != nil {
singleCheckout.Close()
ExitWithError(err)
}
meter.Start()
q.Wait()
wg.Wait()
tracerx.PerformanceSince("process queue", processQueue)
singleCheckout.Close()
success := true
for _, err := range q.Errors() {
success = false
FullError(err)
}
if !success {
c := getAPIClient()
e := c.Endpoints.Endpoint("download", remote)
Exit(tr.Tr.Get("Failed to fetch some objects from '%s'", e.Url))
}
if singleCheckout.Skip() {
fmt.Println(tr.Tr.Get("Skipping object checkout, Git LFS is not installed for this repository.\nConsider installing it with 'git lfs install'."))
}
}
// tracks LFS objects being downloaded, according to their unique OIDs.
type pointerMap struct {
pointers map[string][]*lfs.WrappedPointer
mu sync.Mutex
}
func newPointerMap() *pointerMap {
return &pointerMap{pointers: make(map[string][]*lfs.WrappedPointer)}
}
func (m *pointerMap) Seen(p *lfs.WrappedPointer) bool {
m.mu.Lock()
defer m.mu.Unlock()
if existing, ok := m.pointers[p.Oid]; ok {
m.pointers[p.Oid] = append(existing, p)
return true
}
return false
}
func (m *pointerMap) Add(p *lfs.WrappedPointer) {
m.mu.Lock()
defer m.mu.Unlock()
m.pointers[p.Oid] = append(m.pointers[p.Oid], p)
}
func (m *pointerMap) All(oid string) []*lfs.WrappedPointer {
m.mu.Lock()
defer m.mu.Unlock()
pointers := m.pointers[oid]
delete(m.pointers, oid)
return pointers
}
func init() {
RegisterCommand("pull", pullCommand, func(cmd *cobra.Command) {
cmd.Flags().StringVarP(&includeArg, "include", "I", "", "Include a list of paths")
cmd.Flags().StringVarP(&excludeArg, "exclude", "X", "", "Exclude a list of paths")
})
}