Make pull parallelise fetch/checkout again

Turns out you CAN do this while maintaining support for multiple files
with the same content and still get a performance boost. Thanks @rubyist
This commit is contained in:
Steve Streeting 2015-07-29 10:26:14 +01:00
parent a44c59554a
commit d3d5999d83
3 changed files with 64 additions and 7 deletions

@ -8,6 +8,7 @@ import (
"github.com/github/git-lfs/git"
"github.com/github/git-lfs/lfs"
"github.com/github/git-lfs/vendor/_nuts/github.com/spf13/cobra"
"github.com/rubyist/tracerx"
)
var (
@ -40,6 +41,52 @@ func init() {
RootCmd.AddCommand(checkoutCmd)
}
// Checkout from items reported from the fetch process (in parallel)
func checkoutAllFromFetchChan(c chan *lfs.WrappedPointer) {
tracerx.Printf("starting fetch/parallel checkout")
checkoutFromFetchChan(nil, nil, c)
}
func checkoutFromFetchChan(include []string, exclude []string, in chan *lfs.WrappedPointer) {
ref, err := git.CurrentRef()
if err != nil {
Panic(err, "Could not checkout")
}
// Need to ScanTree to identify multiple files with the same content (fetch will only report oids once)
pointers, err := lfs.ScanTree(ref)
if err != nil {
Panic(err, "Could not scan for Git LFS files")
}
// Map oid to multiple pointers
mapping := make(map[string][]*lfs.WrappedPointer)
for _, pointer := range pointers {
if lfs.FilenamePassesIncludeExcludeFilter(pointer.Name, include, exclude) {
mapping[pointer.Oid] = append(mapping[pointer.Oid], pointer)
}
}
// Launch git update-index
c := make(chan *lfs.WrappedPointer)
var wait sync.WaitGroup
wait.Add(1)
go func() {
checkoutWithChan(c)
wait.Done()
}()
// Feed it from in, which comes from fetch
for p := range in {
// Add all of the files for this oid
for _, fp := range mapping[p.Oid] {
c <- fp
}
}
close(c)
wait.Wait()
}
func checkoutWithIncludeExclude(include []string, exclude []string) {
ref, err := git.CurrentRef()
if err != nil {

@ -37,6 +37,18 @@ func init() {
RootCmd.AddCommand(fetchCmd)
}
func fetchRefToChan(ref string) chan *lfs.WrappedPointer {
c := make(chan *lfs.WrappedPointer)
pointers, err := lfs.ScanRefs(ref, "", nil)
if err != nil {
Panic(err, "Could not scan for Git LFS files")
}
go fetchAndReportToChan(pointers, c)
return c
}
// Fetch all binaries for a given ref (that we don't have already)
func fetchRef(ref string) {
pointers, err := lfs.ScanRefs(ref, "", nil)
@ -60,6 +72,9 @@ func fetchAndReportToChan(pointers []*lfs.WrappedPointer, out chan<- *lfs.Wrappe
// which would only be skipped by PointerSmudgeObject later
if !lfs.ObjectExistsOfSize(p.Oid, p.Size) {
q.Add(lfs.NewDownloadable(p))
} else {
// If we already have it, report it to chan immediately to support pull/checkout
out <- p
}
}

@ -20,13 +20,8 @@ func pullCommand(cmd *cobra.Command, args []string) {
Panic(err, "Could not pull")
}
// Previously we would only checkout files that were downloaded, as they
// were downloaded. However this would ignore files where the content was
// already present locally (since these are no longer included in transfer Q for
// better reporting purposes).
// So now we do exactly what we say on the tin, fetch then a separate checkout
fetchRef(ref)
checkoutAll()
c := fetchRefToChan(ref)
checkoutAllFromFetchChan(c)
}
func init() {