each command should open and close a gitscanner just once

This commit is contained in:
risk danger olson 2016-11-17 17:22:21 -07:00
parent bbe552ac83
commit bdbca399c4
10 changed files with 116 additions and 50 deletions

@ -6,6 +6,7 @@ import (
"path/filepath"
"strings"
"github.com/git-lfs/git-lfs/lfs"
"github.com/git-lfs/git-lfs/localstorage"
"github.com/git-lfs/git-lfs/subprocess"
@ -70,18 +71,18 @@ func cloneCommand(cmd *cobra.Command, args []string) {
includeArg, excludeArg := getIncludeExcludeArgs(cmd)
include, exclude := determineIncludeExcludePaths(cfg, includeArg, excludeArg)
gitscanner := lfs.NewGitScanner()
defer gitscanner.Close()
if cloneFlags.NoCheckout || cloneFlags.Bare {
// If --no-checkout or --bare then we shouldn't check out, just fetch instead
fetchRef("HEAD", include, exclude)
fetchRef(gitscanner, "HEAD", include, exclude)
} else {
pull(include, exclude)
pull(gitscanner, include, exclude)
err := postCloneSubmodules(args)
if err != nil {
Exit("Error performing 'git lfs pull' for submodules: %v", err)
}
}
}
func postCloneSubmodules(args []string) error {

@ -61,6 +61,9 @@ func fetchCommand(cmd *cobra.Command, args []string) {
}
success := true
gitscanner := lfs.NewGitScanner()
defer gitscanner.Close()
include, exclude := getIncludeExcludeArgs(cmd)
if fetchAllArg {
@ -73,7 +76,7 @@ func fetchCommand(cmd *cobra.Command, args []string) {
if len(cfg.FetchIncludePaths()) > 0 || len(cfg.FetchExcludePaths()) > 0 {
Print("Ignoring global include / exclude paths to fulfil --all")
}
success = fetchAll()
success = fetchAll(gitscanner)
} else { // !all
includePaths, excludePaths := determineIncludeExcludePaths(cfg, include, exclude)
@ -81,12 +84,12 @@ func fetchCommand(cmd *cobra.Command, args []string) {
// Fetch refs sequentially per arg order; duplicates in later refs will be ignored
for _, ref := range refs {
Print("Fetching %v", ref.Name)
s := fetchRef(ref.Sha, includePaths, excludePaths)
s := fetchRef(gitscanner, ref.Sha, includePaths, excludePaths)
success = success && s
}
if fetchRecentArg || cfg.FetchPruneConfig().FetchRecentAlways {
s := fetchRecent(refs, includePaths, excludePaths)
s := fetchRecent(gitscanner, refs, includePaths, excludePaths)
success = success && s
}
}
@ -103,13 +106,17 @@ func fetchCommand(cmd *cobra.Command, args []string) {
}
}
func pointersToFetchForRef(ref string) ([]*lfs.WrappedPointer, error) {
return lfs.ScanTree(ref)
func pointersToFetchForRef(gitscanner *lfs.GitScanner, ref string) ([]*lfs.WrappedPointer, error) {
pointerCh, err := gitscanner.ScanTree(ref)
if err != nil {
return nil, err
}
return collectPointers(pointerCh)
}
func fetchRefToChan(ref string, include, exclude []string) chan *lfs.WrappedPointer {
func fetchRefToChan(gitscanner *lfs.GitScanner, ref string, include, exclude []string) chan *lfs.WrappedPointer {
c := make(chan *lfs.WrappedPointer)
pointers, err := pointersToFetchForRef(ref)
pointers, err := pointersToFetchForRef(gitscanner, ref)
if err != nil {
Panic(err, "Could not scan for Git LFS files")
}
@ -120,8 +127,8 @@ func fetchRefToChan(ref string, include, exclude []string) chan *lfs.WrappedPoin
}
// Fetch all binaries for a given ref (that we don't have already)
func fetchRef(ref string, include, exclude []string) bool {
pointers, err := pointersToFetchForRef(ref)
func fetchRef(gitscanner *lfs.GitScanner, ref string, include, exclude []string) bool {
pointers, err := pointersToFetchForRef(gitscanner, ref)
if err != nil {
Panic(err, "Could not scan for Git LFS files")
}
@ -130,8 +137,12 @@ func fetchRef(ref string, include, exclude []string) bool {
// Fetch all previous versions of objects from since to ref (not including final state at ref)
// So this will fetch all the '-' sides of the diff from since to ref
func fetchPreviousVersions(ref string, since time.Time, include, exclude []string) bool {
pointers, err := lfs.ScanPreviousVersions(ref, since)
func fetchPreviousVersions(gitscanner *lfs.GitScanner, ref string, since time.Time, include, exclude []string) bool {
pointerCh, err := gitscanner.ScanPreviousVersions(ref, since)
if err != nil {
ExitWithError(err)
}
pointers, err := collectPointers(pointerCh)
if err != nil {
Panic(err, "Could not scan for Git LFS previous versions")
}
@ -139,7 +150,7 @@ func fetchPreviousVersions(ref string, since time.Time, include, exclude []strin
}
// Fetch recent objects based on config
func fetchRecent(alreadyFetchedRefs []*git.Ref, include, exclude []string) bool {
func fetchRecent(gitscanner *lfs.GitScanner, alreadyFetchedRefs []*git.Ref, include, exclude []string) bool {
fetchconf := cfg.FetchPruneConfig()
if fetchconf.FetchRecentRefsDays == 0 && fetchconf.FetchRecentCommitsDays == 0 {
@ -169,7 +180,7 @@ func fetchRecent(alreadyFetchedRefs []*git.Ref, include, exclude []string) bool
} else {
uniqueRefShas[ref.Sha] = ref.Name
Print("Fetching %v", ref.Name)
k := fetchRef(ref.Sha, include, exclude)
k := fetchRef(gitscanner, ref.Sha, include, exclude)
ok = ok && k
}
}
@ -185,7 +196,7 @@ func fetchRecent(alreadyFetchedRefs []*git.Ref, include, exclude []string) bool
}
Print("Fetching changes within %v days of %v", fetchconf.FetchRecentCommitsDays, refName)
commitsSince := summ.CommitDate.AddDate(0, 0, -fetchconf.FetchRecentCommitsDays)
k := fetchPreviousVersions(commit, commitsSince, include, exclude)
k := fetchPreviousVersions(gitscanner, commit, commitsSince, include, exclude)
ok = ok && k
}
@ -193,19 +204,18 @@ func fetchRecent(alreadyFetchedRefs []*git.Ref, include, exclude []string) bool
return ok
}
func fetchAll() bool {
pointers := scanAll()
func fetchAll(gitscanner *lfs.GitScanner) bool {
pointers := scanAll(gitscanner)
Print("Fetching objects...")
return fetchPointers(pointers, nil, nil)
}
func scanAll() []*lfs.WrappedPointer {
func scanAll(gitscanner *lfs.GitScanner) []*lfs.WrappedPointer {
// This could be a long process so use the chan version & report progress
Print("Scanning for all objects ever referenced...")
spinner := progress.NewSpinner()
var numObjs int64
gitscanner := lfs.NewGitScanner()
pointerCh, err := gitscanner.ScanAll()
if err != nil {
Panic(err, "Could not scan for Git LFS files")

@ -30,6 +30,7 @@ func doFsck() (bool, error) {
pointerIndex := make(map[string]string)
gitscanner := lfs.NewGitScanner()
defer gitscanner.Close()
pointerCh, err := gitscanner.ScanRefWithDeleted(ref.Sha)
if err != nil {
return false, err

@ -55,7 +55,11 @@ func prePushCommand(cmd *cobra.Command, args []string) {
ctx := newUploadContext(prePushDryRun)
gitscanner := lfs.NewGitScanner()
gitscanner.RemoteForPush(cfg.CurrentRemote)
if err := gitscanner.RemoteForPush(cfg.CurrentRemote); err != nil {
ExitWithError(err)
}
defer gitscanner.Close()
// We can be passed multiple lines of refs
scanner := bufio.NewScanner(os.Stdin)

@ -80,12 +80,14 @@ func prune(fetchPruneConfig config.FetchPruneConfig, verifyRemote, dryRun, verbo
// Now find files to be retained from many sources
retainChan := make(chan string, 100)
go pruneTaskGetRetainedCurrentAndRecentRefs(fetchPruneConfig, retainChan, errorChan, &taskwait)
go pruneTaskGetRetainedUnpushed(fetchPruneConfig, retainChan, errorChan, &taskwait)
go pruneTaskGetRetainedWorktree(retainChan, errorChan, &taskwait)
gitscanner := lfs.NewGitScanner()
defer gitscanner.Close()
go pruneTaskGetRetainedCurrentAndRecentRefs(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait)
go pruneTaskGetRetainedUnpushed(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait)
go pruneTaskGetRetainedWorktree(gitscanner, retainChan, errorChan, &taskwait)
if verifyRemote {
reachableObjects = tools.NewStringSetWithCapacity(100)
go pruneTaskGetReachableObjects(&reachableObjects, errorChan, &taskwait)
go pruneTaskGetReachableObjects(gitscanner, &reachableObjects, errorChan, &taskwait)
}
// Now collect all the retained objects, on separate wait
@ -299,10 +301,9 @@ func pruneTaskGetLocalObjects(outLocalObjects *[]localstorage.Object, progChan P
}
// Background task, must call waitg.Done() once at end
func pruneTaskGetRetainedAtRef(ref string, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
func pruneTaskGetRetainedAtRef(gitscanner *lfs.GitScanner, ref string, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
defer waitg.Done()
gitscanner := lfs.NewGitScanner()
refchan, err := gitscanner.ScanRef(ref)
if err != nil {
errorChan <- err
@ -319,10 +320,9 @@ func pruneTaskGetRetainedAtRef(ref string, retainChan chan string, errorChan cha
}
// Background task, must call waitg.Done() once at end
func pruneTaskGetPreviousVersionsOfRef(ref string, since time.Time, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
func pruneTaskGetPreviousVersionsOfRef(gitscanner *lfs.GitScanner, ref string, since time.Time, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
defer waitg.Done()
gitscanner := lfs.NewGitScanner()
refchan, err := gitscanner.ScanPreviousVersions(ref, since)
if err != nil {
errorChan <- err
@ -339,7 +339,7 @@ func pruneTaskGetPreviousVersionsOfRef(ref string, since time.Time, retainChan c
}
// Background task, must call waitg.Done() once at end
func pruneTaskGetRetainedCurrentAndRecentRefs(fetchconf config.FetchPruneConfig, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
func pruneTaskGetRetainedCurrentAndRecentRefs(gitscanner *lfs.GitScanner, fetchconf config.FetchPruneConfig, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
defer waitg.Done()
// We actually increment the waitg in this func since we kick off sub-goroutines
@ -353,7 +353,7 @@ func pruneTaskGetRetainedCurrentAndRecentRefs(fetchconf config.FetchPruneConfig,
}
commits.Add(ref.Sha)
waitg.Add(1)
go pruneTaskGetRetainedAtRef(ref.Sha, retainChan, errorChan, waitg)
go pruneTaskGetRetainedAtRef(gitscanner, ref.Sha, retainChan, errorChan, waitg)
// Now recent
if fetchconf.FetchRecentRefsDays > 0 {
@ -369,7 +369,7 @@ func pruneTaskGetRetainedCurrentAndRecentRefs(fetchconf config.FetchPruneConfig,
if commits.Add(ref.Sha) {
// A new commit
waitg.Add(1)
go pruneTaskGetRetainedAtRef(ref.Sha, retainChan, errorChan, waitg)
go pruneTaskGetRetainedAtRef(gitscanner, ref.Sha, retainChan, errorChan, waitg)
}
}
}
@ -387,16 +387,15 @@ func pruneTaskGetRetainedCurrentAndRecentRefs(fetchconf config.FetchPruneConfig,
}
commitsSince := summ.CommitDate.AddDate(0, 0, -pruneCommitDays)
waitg.Add(1)
go pruneTaskGetPreviousVersionsOfRef(commit, commitsSince, retainChan, errorChan, waitg)
go pruneTaskGetPreviousVersionsOfRef(gitscanner, commit, commitsSince, retainChan, errorChan, waitg)
}
}
}
// Background task, must call waitg.Done() once at end
func pruneTaskGetRetainedUnpushed(fetchconf config.FetchPruneConfig, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
func pruneTaskGetRetainedUnpushed(gitscanner *lfs.GitScanner, fetchconf config.FetchPruneConfig, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
defer waitg.Done()
gitscanner := lfs.NewGitScanner()
refchan, err := gitscanner.ScanUnpushed(fetchconf.PruneRemoteName)
if err != nil {
errorChan <- err
@ -413,7 +412,7 @@ func pruneTaskGetRetainedUnpushed(fetchconf config.FetchPruneConfig, retainChan
}
// Background task, must call waitg.Done() once at end
func pruneTaskGetRetainedWorktree(retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
func pruneTaskGetRetainedWorktree(gitscanner *lfs.GitScanner, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup) {
defer waitg.Done()
// Retain other worktree HEADs too
@ -438,17 +437,16 @@ func pruneTaskGetRetainedWorktree(retainChan chan string, errorChan chan error,
// Worktree is on a different commit
waitg.Add(1)
// Don't need to 'cd' to worktree since we share same repo
go pruneTaskGetRetainedAtRef(ref.Sha, retainChan, errorChan, waitg)
go pruneTaskGetRetainedAtRef(gitscanner, ref.Sha, retainChan, errorChan, waitg)
}
}
}
// Background task, must call waitg.Done() once at end
func pruneTaskGetReachableObjects(outObjectSet *tools.StringSet, errorChan chan error, waitg *sync.WaitGroup) {
func pruneTaskGetReachableObjects(gitscanner *lfs.GitScanner, outObjectSet *tools.StringSet, errorChan chan error, waitg *sync.WaitGroup) {
defer waitg.Done()
gitscanner := lfs.NewGitScanner()
pointerchan, err := gitscanner.ScanAll()
if err != nil {
errorChan <- fmt.Errorf("Error scanning for reachable objects: %v", err)

@ -4,6 +4,7 @@ import (
"fmt"
"github.com/git-lfs/git-lfs/git"
"github.com/git-lfs/git-lfs/lfs"
"github.com/spf13/cobra"
)
@ -27,20 +28,20 @@ func pullCommand(cmd *cobra.Command, args []string) {
}
includeArg, excludeArg := getIncludeExcludeArgs(cmd)
pull(determineIncludeExcludePaths(cfg, includeArg, excludeArg))
include, exclude := determineIncludeExcludePaths(cfg, includeArg, excludeArg)
gitscanner := lfs.NewGitScanner()
defer gitscanner.Close()
pull(gitscanner, include, exclude)
}
func pull(includePaths, excludePaths []string) {
func pull(gitscanner *lfs.GitScanner, includePaths, excludePaths []string) {
ref, err := git.CurrentRef()
if err != nil {
Panic(err, "Could not pull")
}
c := fetchRefToChan(ref.Sha, includePaths, excludePaths)
c := fetchRefToChan(gitscanner, ref.Sha, includePaths, excludePaths)
checkoutFromFetchChan(includePaths, excludePaths, c)
}
func init() {

@ -22,7 +22,10 @@ func uploadsBetweenRefAndRemote(ctx *uploadContext, refnames []string) {
tracerx.Printf("Upload refs %v to remote %v", refnames, cfg.CurrentRemote)
gitscanner := lfs.NewGitScanner()
gitscanner.RemoteForPush(cfg.CurrentRemote)
if err := gitscanner.RemoteForPush(cfg.CurrentRemote); err != nil {
ExitWithError(err)
}
defer gitscanner.Close()
refs, err := refsByNames(refnames)
if err != nil {

@ -19,6 +19,7 @@ func statusCommand(cmd *cobra.Command, args []string) {
ref, _ := git.CurrentRef()
gitscanner := lfs.NewGitScanner()
defer gitscanner.Close()
scanIndexAt := "HEAD"
if ref == nil {

11
commands/pointers.go Normal file

@ -0,0 +1,11 @@
package commands
import "github.com/git-lfs/git-lfs/lfs"
func collectPointers(pointerCh *lfs.PointerChannelWrapper) ([]*lfs.WrappedPointer, error) {
var pointers []*lfs.WrappedPointer
for p := range pointerCh.Results {
pointers = append(pointers, p)
}
return pointers, pointerCh.Wait()
}

@ -2,34 +2,67 @@ package lfs
import (
"fmt"
"sync"
"time"
"github.com/rubyist/tracerx"
)
// GitScanner scans objects in a Git repository for LFS pointers.
type GitScanner struct {
remote string
skippedRefs []string
closed bool
started time.Time
mu sync.Mutex
}
// NewGitScanner initializes a *GitScanner for a Git repository in the current
// working directory.
func NewGitScanner() *GitScanner {
return &GitScanner{}
return &GitScanner{started: time.Now()}
}
// Close stops exits once all processing has stopped, and all resources are
// tracked and cleaned up.
func (s *GitScanner) Close() {
s.mu.Lock()
defer s.mu.Unlock()
if s.closed {
return
}
s.closed = true
tracerx.PerformanceSince("scan", s.started)
}
// RemoteForPush sets up this *GitScanner to scan for objects to push to the
// given remote. Needed for ScanLeftToRemote().
func (s *GitScanner) RemoteForPush(r string) {
func (s *GitScanner) RemoteForPush(r string) error {
s.mu.Lock()
defer s.mu.Unlock()
if len(s.remote) > 0 && s.remote != r {
return fmt.Errorf("Trying to set remote to %q, already set to %q", r, s.remote)
}
s.remote = r
s.skippedRefs = calcSkippedRefs(r)
return nil
}
// ScanLeftToRemote scans through all commits starting at the given ref that the
// given remote does not have. See RemoteForPush().
func (s *GitScanner) ScanLeftToRemote(left string) (*PointerChannelWrapper, error) {
s.mu.Lock()
if len(s.remote) == 0 {
s.mu.Unlock()
return nil, fmt.Errorf("Unable to scan starting at %q: no remote set.", left)
}
s.mu.Unlock()
return scanRefsToChan(left, "", s.opts(ScanLeftToRemoteMode))
}
@ -84,6 +117,9 @@ func (s *GitScanner) ScanPreviousVersions(ref string, since time.Time) (*Pointer
}
func (s *GitScanner) opts(mode ScanningMode) *ScanRefsOptions {
s.mu.Lock()
defer s.mu.Unlock()
opts := newScanRefsOptions()
opts.ScanMode = mode
opts.RemoteName = s.remote