Merge pull request #1616 from github/fast-directory-walk
Fast directory walk
This commit is contained in:
commit
7b3bc24daf
@ -11,6 +11,7 @@ import (
|
|||||||
"github.com/github/git-lfs/git"
|
"github.com/github/git-lfs/git"
|
||||||
"github.com/github/git-lfs/lfs"
|
"github.com/github/git-lfs/lfs"
|
||||||
"github.com/github/git-lfs/progress"
|
"github.com/github/git-lfs/progress"
|
||||||
|
"github.com/github/git-lfs/tools"
|
||||||
"github.com/rubyist/tracerx"
|
"github.com/rubyist/tracerx"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
@ -55,7 +56,7 @@ func checkoutFromFetchChan(include []string, exclude []string, in chan *lfs.Wrap
|
|||||||
// Map oid to multiple pointers
|
// Map oid to multiple pointers
|
||||||
mapping := make(map[string][]*lfs.WrappedPointer)
|
mapping := make(map[string][]*lfs.WrappedPointer)
|
||||||
for _, pointer := range pointers {
|
for _, pointer := range pointers {
|
||||||
if lfs.FilenamePassesIncludeExcludeFilter(pointer.Name, include, exclude) {
|
if tools.FilenamePassesIncludeExcludeFilter(pointer.Name, include, exclude) {
|
||||||
mapping[pointer.Oid] = append(mapping[pointer.Oid], pointer)
|
mapping[pointer.Oid] = append(mapping[pointer.Oid], pointer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -115,7 +116,7 @@ func checkoutWithIncludeExclude(include []string, exclude []string) {
|
|||||||
totalBytes = 0
|
totalBytes = 0
|
||||||
for _, pointer := range pointers {
|
for _, pointer := range pointers {
|
||||||
totalBytes += pointer.Size
|
totalBytes += pointer.Size
|
||||||
if lfs.FilenamePassesIncludeExcludeFilter(pointer.Name, include, exclude) {
|
if tools.FilenamePassesIncludeExcludeFilter(pointer.Name, include, exclude) {
|
||||||
progress.Add(pointer.Name)
|
progress.Add(pointer.Name)
|
||||||
c <- pointer
|
c <- pointer
|
||||||
// not strictly correct (parallel) but we don't have a callback & it's just local
|
// not strictly correct (parallel) but we don't have a callback & it's just local
|
||||||
|
@ -7,6 +7,7 @@ import (
|
|||||||
"github.com/github/git-lfs/git"
|
"github.com/github/git-lfs/git"
|
||||||
"github.com/github/git-lfs/lfs"
|
"github.com/github/git-lfs/lfs"
|
||||||
"github.com/github/git-lfs/progress"
|
"github.com/github/git-lfs/progress"
|
||||||
|
"github.com/github/git-lfs/tools"
|
||||||
"github.com/rubyist/tracerx"
|
"github.com/rubyist/tracerx"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
@ -310,7 +311,7 @@ func readyAndMissingPointers(allpointers []*lfs.WrappedPointer, include, exclude
|
|||||||
|
|
||||||
for _, p := range allpointers {
|
for _, p := range allpointers {
|
||||||
// Filtered out by --include or --exclude
|
// Filtered out by --include or --exclude
|
||||||
if !lfs.FilenamePassesIncludeExcludeFilter(p.Name, include, exclude) {
|
if !tools.FilenamePassesIncludeExcludeFilter(p.Name, include, exclude) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ import (
|
|||||||
|
|
||||||
"github.com/github/git-lfs/errors"
|
"github.com/github/git-lfs/errors"
|
||||||
"github.com/github/git-lfs/lfs"
|
"github.com/github/git-lfs/lfs"
|
||||||
|
"github.com/github/git-lfs/tools"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -57,7 +58,7 @@ func smudgeCommand(cmd *cobra.Command, args []string) {
|
|||||||
Error(err.Error())
|
Error(err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
download := lfs.FilenamePassesIncludeExcludeFilter(filename, cfg.FetchIncludePaths(), cfg.FetchExcludePaths())
|
download := tools.FilenamePassesIncludeExcludeFilter(filename, cfg.FetchIncludePaths(), cfg.FetchExcludePaths())
|
||||||
|
|
||||||
if smudgeSkip || cfg.Os.Bool("GIT_LFS_SKIP_SMUDGE", false) {
|
if smudgeSkip || cfg.Os.Bool("GIT_LFS_SKIP_SMUDGE", false) {
|
||||||
download = false
|
download = false
|
||||||
|
@ -7,12 +7,15 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/rubyist/tracerx"
|
||||||
|
|
||||||
"github.com/github/git-lfs/config"
|
"github.com/github/git-lfs/config"
|
||||||
"github.com/github/git-lfs/git"
|
"github.com/github/git-lfs/git"
|
||||||
|
|
||||||
"github.com/github/git-lfs/lfs"
|
"github.com/github/git-lfs/lfs"
|
||||||
|
"github.com/github/git-lfs/tools"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -181,16 +184,24 @@ func findAttributeFiles() []string {
|
|||||||
paths = append(paths, repoAttributes)
|
paths = append(paths, repoAttributes)
|
||||||
}
|
}
|
||||||
|
|
||||||
filepath.Walk(config.LocalWorkingDir, func(path string, info os.FileInfo, err error) error {
|
fchan, errchan := tools.FastWalkGitRepo(config.LocalWorkingDir)
|
||||||
if err != nil {
|
var waitg sync.WaitGroup
|
||||||
return err
|
waitg.Add(2)
|
||||||
|
go func() {
|
||||||
|
for o := range fchan {
|
||||||
|
if !o.Info.IsDir() && (o.Info.Name() == ".gitattributes") {
|
||||||
|
paths = append(paths, filepath.Join(o.ParentDir, o.Info.Name()))
|
||||||
}
|
}
|
||||||
|
|
||||||
if !info.IsDir() && (filepath.Base(path) == ".gitattributes") {
|
|
||||||
paths = append(paths, path)
|
|
||||||
}
|
}
|
||||||
return nil
|
waitg.Done()
|
||||||
})
|
}()
|
||||||
|
go func() {
|
||||||
|
for err := range errchan {
|
||||||
|
tracerx.Printf("Error finding .gitattributes: %v", err)
|
||||||
|
}
|
||||||
|
waitg.Done()
|
||||||
|
}()
|
||||||
|
waitg.Wait()
|
||||||
|
|
||||||
return paths
|
return paths
|
||||||
}
|
}
|
||||||
|
@ -1066,12 +1066,12 @@ func parseLogOutputToPointers(log io.Reader, dir LogDiffDirection,
|
|||||||
} else {
|
} else {
|
||||||
currentFilename = match[1]
|
currentFilename = match[1]
|
||||||
}
|
}
|
||||||
currentFileIncluded = FilenamePassesIncludeExcludeFilter(currentFilename, includePaths, excludePaths)
|
currentFileIncluded = tools.FilenamePassesIncludeExcludeFilter(currentFilename, includePaths, excludePaths)
|
||||||
} else if match := fileMergeHeaderRegex.FindStringSubmatch(line); match != nil {
|
} else if match := fileMergeHeaderRegex.FindStringSubmatch(line); match != nil {
|
||||||
// Git merge file header is a little different, only one file
|
// Git merge file header is a little different, only one file
|
||||||
finishLastPointer()
|
finishLastPointer()
|
||||||
currentFilename = match[1]
|
currentFilename = match[1]
|
||||||
currentFileIncluded = FilenamePassesIncludeExcludeFilter(currentFilename, includePaths, excludePaths)
|
currentFileIncluded = tools.FilenamePassesIncludeExcludeFilter(currentFilename, includePaths, excludePaths)
|
||||||
} else if currentFileIncluded {
|
} else if currentFileIncluded {
|
||||||
if match := pointerDataRegex.FindStringSubmatch(line); match != nil {
|
if match := pointerDataRegex.FindStringSubmatch(line); match != nil {
|
||||||
// An LFS pointer data line
|
// An LFS pointer data line
|
||||||
|
63
lfs/util.go
63
lfs/util.go
@ -7,7 +7,6 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/github/git-lfs/config"
|
"github.com/github/git-lfs/config"
|
||||||
"github.com/github/git-lfs/progress"
|
"github.com/github/git-lfs/progress"
|
||||||
@ -72,68 +71,6 @@ func wrapProgressError(err error, event, filename string) error {
|
|||||||
|
|
||||||
var localDirSet = tools.NewStringSetFromSlice([]string{".", "./", ".\\"})
|
var localDirSet = tools.NewStringSetFromSlice([]string{".", "./", ".\\"})
|
||||||
|
|
||||||
// Return whether a given filename passes the include / exclude path filters
|
|
||||||
// Only paths that are in includePaths and outside excludePaths are passed
|
|
||||||
// If includePaths is empty that filter always passes and the same with excludePaths
|
|
||||||
// Both path lists support wildcard matches
|
|
||||||
func FilenamePassesIncludeExcludeFilter(filename string, includePaths, excludePaths []string) bool {
|
|
||||||
if len(includePaths) == 0 && len(excludePaths) == 0 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
filename = filepath.Clean(filename)
|
|
||||||
if len(includePaths) > 0 {
|
|
||||||
matched := false
|
|
||||||
for _, inc := range includePaths {
|
|
||||||
inc = filepath.Clean(inc)
|
|
||||||
|
|
||||||
// Special case local dir, matches all (inc subpaths)
|
|
||||||
if _, local := localDirSet[inc]; local {
|
|
||||||
matched = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
matched, _ = filepath.Match(inc, filename)
|
|
||||||
if !matched {
|
|
||||||
// Also support matching a parent directory without a wildcard
|
|
||||||
if strings.HasPrefix(filename, inc+string(filepath.Separator)) {
|
|
||||||
matched = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if matched {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
if !matched {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(excludePaths) > 0 {
|
|
||||||
for _, ex := range excludePaths {
|
|
||||||
ex = filepath.Clean(ex)
|
|
||||||
|
|
||||||
// Special case local dir, matches all (inc subpaths)
|
|
||||||
if _, local := localDirSet[ex]; local {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if matched, _ := filepath.Match(ex, filename); matched {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Also support matching a parent directory without a wildcard
|
|
||||||
if strings.HasPrefix(filename, ex+string(filepath.Separator)) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetPlatform() Platform {
|
func GetPlatform() Platform {
|
||||||
if currentPlatform == PlatformUndetermined {
|
if currentPlatform == PlatformUndetermined {
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
|
@ -2,7 +2,6 @@ package lfs
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"strings"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/github/git-lfs/progress"
|
"github.com/github/git-lfs/progress"
|
||||||
@ -38,62 +37,3 @@ func TestWriterWithCallback(t *testing.T) {
|
|||||||
assert.Equal(t, 3, int(calledRead[0]))
|
assert.Equal(t, 3, int(calledRead[0]))
|
||||||
assert.Equal(t, 5, int(calledRead[1]))
|
assert.Equal(t, 5, int(calledRead[1]))
|
||||||
}
|
}
|
||||||
|
|
||||||
type TestIncludeExcludeCase struct {
|
|
||||||
expectedResult bool
|
|
||||||
includes []string
|
|
||||||
excludes []string
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestFilterIncludeExclude(t *testing.T) {
|
|
||||||
|
|
||||||
cases := []TestIncludeExcludeCase{
|
|
||||||
// Null case
|
|
||||||
TestIncludeExcludeCase{true, nil, nil},
|
|
||||||
// Inclusion
|
|
||||||
TestIncludeExcludeCase{true, []string{"test/filename.dat"}, nil},
|
|
||||||
TestIncludeExcludeCase{true, []string{"test/filename.dat"}, nil},
|
|
||||||
TestIncludeExcludeCase{false, []string{"blank", "something", "foo"}, nil},
|
|
||||||
TestIncludeExcludeCase{false, []string{"test/notfilename.dat"}, nil},
|
|
||||||
TestIncludeExcludeCase{true, []string{"test"}, nil},
|
|
||||||
TestIncludeExcludeCase{true, []string{"test/*"}, nil},
|
|
||||||
TestIncludeExcludeCase{false, []string{"nottest"}, nil},
|
|
||||||
TestIncludeExcludeCase{false, []string{"nottest/*"}, nil},
|
|
||||||
TestIncludeExcludeCase{true, []string{"test/fil*"}, nil},
|
|
||||||
TestIncludeExcludeCase{false, []string{"test/g*"}, nil},
|
|
||||||
TestIncludeExcludeCase{true, []string{"tes*/*"}, nil},
|
|
||||||
// Exclusion
|
|
||||||
TestIncludeExcludeCase{false, nil, []string{"test/filename.dat"}},
|
|
||||||
TestIncludeExcludeCase{false, nil, []string{"blank", "something", "test/filename.dat", "foo"}},
|
|
||||||
TestIncludeExcludeCase{true, nil, []string{"blank", "something", "foo"}},
|
|
||||||
TestIncludeExcludeCase{true, nil, []string{"test/notfilename.dat"}},
|
|
||||||
TestIncludeExcludeCase{false, nil, []string{"test"}},
|
|
||||||
TestIncludeExcludeCase{false, nil, []string{"test/*"}},
|
|
||||||
TestIncludeExcludeCase{true, nil, []string{"nottest"}},
|
|
||||||
TestIncludeExcludeCase{true, nil, []string{"nottest/*"}},
|
|
||||||
TestIncludeExcludeCase{false, nil, []string{"test/fil*"}},
|
|
||||||
TestIncludeExcludeCase{true, nil, []string{"test/g*"}},
|
|
||||||
TestIncludeExcludeCase{false, nil, []string{"tes*/*"}},
|
|
||||||
|
|
||||||
// Both
|
|
||||||
TestIncludeExcludeCase{true, []string{"test/filename.dat"}, []string{"test/notfilename.dat"}},
|
|
||||||
TestIncludeExcludeCase{false, []string{"test"}, []string{"test/filename.dat"}},
|
|
||||||
TestIncludeExcludeCase{true, []string{"test/*"}, []string{"test/notfile*"}},
|
|
||||||
TestIncludeExcludeCase{false, []string{"test/*"}, []string{"test/file*"}},
|
|
||||||
TestIncludeExcludeCase{false, []string{"another/*", "test/*"}, []string{"test/notfilename.dat", "test/filename.dat"}},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, c := range cases {
|
|
||||||
assert.Equal(t, c.expectedResult, FilenamePassesIncludeExcludeFilter("test/filename.dat", c.includes, c.excludes), c)
|
|
||||||
if IsWindows() {
|
|
||||||
// also test with \ path separators, tolerate mixed separators
|
|
||||||
for i, inc := range c.includes {
|
|
||||||
c.includes[i] = strings.Replace(inc, "/", "\\", -1)
|
|
||||||
}
|
|
||||||
for i, ex := range c.excludes {
|
|
||||||
c.excludes[i] = strings.Replace(ex, "/", "\\", -1)
|
|
||||||
}
|
|
||||||
assert.Equal(t, c.expectedResult, FilenamePassesIncludeExcludeFilter("test/filename.dat", c.includes, c.excludes), c)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -3,15 +3,20 @@
|
|||||||
package tools
|
package tools
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"encoding/hex"
|
"encoding/hex"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var localDirSet = NewStringSetFromSlice([]string{".", "./", ".\\"})
|
||||||
|
|
||||||
// FileOrDirExists determines if a file/dir exists, returns IsDir() results too.
|
// FileOrDirExists determines if a file/dir exists, returns IsDir() results too.
|
||||||
func FileOrDirExists(path string) (exists bool, isDir bool) {
|
func FileOrDirExists(path string) (exists bool, isDir bool) {
|
||||||
fi, err := os.Stat(path)
|
fi, err := os.Stat(path)
|
||||||
@ -121,3 +126,247 @@ func VerifyFileHash(oid, path string) error {
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FilenamePassesIncludeExcludeFilter returns whether a given filename passes the include / exclude path filters
|
||||||
|
// Only paths that are in includePaths and outside excludePaths are passed
|
||||||
|
// If includePaths is empty that filter always passes and the same with excludePaths
|
||||||
|
// Both path lists support wildcard matches
|
||||||
|
func FilenamePassesIncludeExcludeFilter(filename string, includePaths, excludePaths []string) bool {
|
||||||
|
if len(includePaths) == 0 && len(excludePaths) == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(includePaths) > 0 {
|
||||||
|
matched := false
|
||||||
|
for _, inc := range includePaths {
|
||||||
|
matched = FileMatch(inc, filename)
|
||||||
|
if matched {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !matched {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(excludePaths) > 0 {
|
||||||
|
for _, ex := range excludePaths {
|
||||||
|
if FileMatch(ex, filename) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileMatch is a revised version of filepath.Match which makes it behave more
|
||||||
|
// like gitignore
|
||||||
|
func FileMatch(pattern, name string) bool {
|
||||||
|
pattern = filepath.Clean(pattern)
|
||||||
|
name = filepath.Clean(name)
|
||||||
|
|
||||||
|
// Special case local dir, matches all (inc subpaths)
|
||||||
|
if _, local := localDirSet[pattern]; local {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
if matched, _ := filepath.Match(pattern, name); matched {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// special case * when there are no path separators
|
||||||
|
// filepath.Match never allows * to match a path separator, which is correct
|
||||||
|
// for gitignore IF the pattern includes a path separator, but not otherwise
|
||||||
|
// So *.txt should match in any subdir, as should test*, but sub/*.txt would
|
||||||
|
// only match directly in the sub dir
|
||||||
|
// Don't need to test cross-platform separators as both cleaned above
|
||||||
|
if !strings.Contains(pattern, string(filepath.Separator)) &&
|
||||||
|
strings.Contains(pattern, "*") {
|
||||||
|
pattern = regexp.QuoteMeta(pattern)
|
||||||
|
// Match the whole of the base name but allow matching in folders if no path
|
||||||
|
basename := filepath.Base(name)
|
||||||
|
regpattern := fmt.Sprintf("^%s$", strings.Replace(pattern, "\\*", ".*", -1))
|
||||||
|
if regexp.MustCompile(regpattern).MatchString(basename) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Also support ** with path separators
|
||||||
|
if strings.Contains(pattern, string(filepath.Separator)) && strings.Contains(pattern, "**") {
|
||||||
|
pattern = regexp.QuoteMeta(pattern)
|
||||||
|
regpattern := fmt.Sprintf("^%s$", strings.Replace(pattern, "\\*\\*", ".*", -1))
|
||||||
|
if regexp.MustCompile(regpattern).MatchString(name) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
// Also support matching a parent directory without a wildcard
|
||||||
|
if strings.HasPrefix(name, pattern+string(filepath.Separator)) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returned from FastWalk with parent directory context
|
||||||
|
// This is needed because FastWalk can provide paths out of order so the
|
||||||
|
// parent dir cannot be implied
|
||||||
|
type FastWalkInfo struct {
|
||||||
|
ParentDir string
|
||||||
|
Info os.FileInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
// fastWalkWithExcludeFiles walks the contents of a dir, respecting
|
||||||
|
// include/exclude patterns and also loading new exlude patterns from files
|
||||||
|
// named excludeFilename in directories walked
|
||||||
|
func fastWalkWithExcludeFiles(dir, excludeFilename string,
|
||||||
|
includePaths, excludePaths []string) (<-chan FastWalkInfo, <-chan error) {
|
||||||
|
fiChan := make(chan FastWalkInfo, 256)
|
||||||
|
errChan := make(chan error, 10)
|
||||||
|
|
||||||
|
go fastWalkFromRoot(dir, excludeFilename, includePaths, excludePaths, fiChan, errChan)
|
||||||
|
|
||||||
|
return fiChan, errChan
|
||||||
|
}
|
||||||
|
|
||||||
|
// FastWalkGitRepo is a more optimal implementation of filepath.Walk for a Git repo
|
||||||
|
// It differs in the following ways:
|
||||||
|
// * Provides a channel of information instead of using a callback func
|
||||||
|
// * Uses goroutines to parallelise large dirs and descent into subdirs
|
||||||
|
// * Does not provide sorted output; parents will always be before children but
|
||||||
|
// there are no other guarantees. Use parentDir in the FastWalkInfo struct to
|
||||||
|
// determine absolute path rather than tracking it yourself like filepath.Walk
|
||||||
|
// * Automatically ignores any .git directories
|
||||||
|
// * Respects .gitignore contents and skips ignored files/dirs
|
||||||
|
func FastWalkGitRepo(dir string) (<-chan FastWalkInfo, <-chan error) {
|
||||||
|
// Ignore all git metadata including subrepos
|
||||||
|
excludePaths := []string{".git", filepath.Join("**", ".git")}
|
||||||
|
return fastWalkWithExcludeFiles(dir, ".gitignore", nil, excludePaths)
|
||||||
|
}
|
||||||
|
|
||||||
|
func fastWalkFromRoot(dir string, excludeFilename string,
|
||||||
|
includePaths, excludePaths []string, fiChan chan<- FastWalkInfo, errChan chan<- error) {
|
||||||
|
|
||||||
|
dirFi, err := os.Stat(dir)
|
||||||
|
if err != nil {
|
||||||
|
errChan <- err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// This waitgroup will be incremented for each nested goroutine
|
||||||
|
var waitg sync.WaitGroup
|
||||||
|
|
||||||
|
fastWalkFileOrDir(filepath.Dir(dir), dirFi, excludeFilename, includePaths, excludePaths, fiChan, errChan, &waitg)
|
||||||
|
|
||||||
|
waitg.Wait()
|
||||||
|
close(fiChan)
|
||||||
|
close(errChan)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// fastWalkFileOrDir is the main recursive implementation of fast walk
|
||||||
|
// Sends the file/dir and any contents to the channel so long as it passes the
|
||||||
|
// include/exclude filter. If a dir, parses any excludeFilename found and updates
|
||||||
|
// the excludePaths with its content before (parallel) recursing into contents
|
||||||
|
// Also splits large directories into multiple goroutines.
|
||||||
|
// Increments waitg.Add(1) for each new goroutine launched internally
|
||||||
|
func fastWalkFileOrDir(parentDir string, itemFi os.FileInfo, excludeFilename string,
|
||||||
|
includePaths, excludePaths []string, fiChan chan<- FastWalkInfo, errChan chan<- error,
|
||||||
|
waitg *sync.WaitGroup) {
|
||||||
|
|
||||||
|
fullPath := filepath.Join(parentDir, itemFi.Name())
|
||||||
|
|
||||||
|
if !FilenamePassesIncludeExcludeFilter(fullPath, includePaths, excludePaths) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fiChan <- FastWalkInfo{ParentDir: parentDir, Info: itemFi}
|
||||||
|
|
||||||
|
if !itemFi.IsDir() {
|
||||||
|
// Nothing more to do if this is not a dir
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(excludeFilename) > 0 {
|
||||||
|
possibleExcludeFile := filepath.Join(fullPath, excludeFilename)
|
||||||
|
if FileExists(possibleExcludeFile) {
|
||||||
|
var err error
|
||||||
|
excludePaths, err = loadExcludeFilename(possibleExcludeFile, fullPath, excludePaths)
|
||||||
|
if err != nil {
|
||||||
|
errChan <- err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The absolute optimal way to scan would be File.Readdirnames but we
|
||||||
|
// still need the Stat() to know whether something is a dir, so use
|
||||||
|
// File.Readdir instead. Means we can provide os.FileInfo to callers like
|
||||||
|
// filepath.Walk as a bonus.
|
||||||
|
df, err := os.Open(fullPath)
|
||||||
|
if err != nil {
|
||||||
|
errChan <- err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer df.Close()
|
||||||
|
// The number of items in a dir we process in each goroutine
|
||||||
|
jobSize := 100
|
||||||
|
for children, err := df.Readdir(jobSize); err == nil; children, err = df.Readdir(jobSize) {
|
||||||
|
// Parallelise all dirs, and chop large dirs into batches
|
||||||
|
waitg.Add(1)
|
||||||
|
go func(subitems []os.FileInfo) {
|
||||||
|
for _, childFi := range subitems {
|
||||||
|
fastWalkFileOrDir(fullPath, childFi, excludeFilename, includePaths, excludePaths, fiChan, errChan, waitg)
|
||||||
|
}
|
||||||
|
waitg.Done()
|
||||||
|
}(children)
|
||||||
|
|
||||||
|
}
|
||||||
|
if err != nil && err != io.EOF {
|
||||||
|
errChan <- err
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// loadExcludeFilename reads the given file in gitignore format and returns a
|
||||||
|
// revised array of exclude paths if there are any changes.
|
||||||
|
// If any changes are made a copy of the array is taken so the original is not
|
||||||
|
// modified
|
||||||
|
func loadExcludeFilename(filename, parentDir string, excludePaths []string) ([]string, error) {
|
||||||
|
f, err := os.OpenFile(filename, os.O_RDONLY, 0644)
|
||||||
|
if err != nil {
|
||||||
|
return excludePaths, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
retPaths := excludePaths
|
||||||
|
modified := false
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
// Skip blanks, comments and negations (not supported right now)
|
||||||
|
if len(line) == 0 || strings.HasPrefix(line, "#") || strings.HasPrefix(line, "!") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if !modified {
|
||||||
|
// copy on write
|
||||||
|
retPaths = make([]string, len(excludePaths))
|
||||||
|
copy(retPaths, excludePaths)
|
||||||
|
modified = true
|
||||||
|
}
|
||||||
|
|
||||||
|
path := line
|
||||||
|
// Add pattern in context if exclude has separator, or no wildcard
|
||||||
|
// Allow for both styles of separator at this point
|
||||||
|
if strings.ContainsAny(path, "/\\") ||
|
||||||
|
!strings.Contains(path, "*") {
|
||||||
|
path = filepath.Join(parentDir, line)
|
||||||
|
}
|
||||||
|
retPaths = append(retPaths, path)
|
||||||
|
}
|
||||||
|
|
||||||
|
return retPaths, nil
|
||||||
|
|
||||||
|
}
|
||||||
|
@ -1,20 +1,287 @@
|
|||||||
package tools_test
|
package tools
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/github/git-lfs/tools"
|
"github.com/github/git-lfs/subprocess"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestCleanPathsCleansPaths(t *testing.T) {
|
func TestCleanPathsCleansPaths(t *testing.T) {
|
||||||
cleaned := tools.CleanPaths("/foo/bar/,/foo/bar/baz", ",")
|
cleaned := CleanPaths("/foo/bar/,/foo/bar/baz", ",")
|
||||||
|
|
||||||
assert.Equal(t, []string{"/foo/bar", "/foo/bar/baz"}, cleaned)
|
assert.Equal(t, []string{"/foo/bar", "/foo/bar/baz"}, cleaned)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCleanPathsReturnsNoResultsWhenGivenNoPaths(t *testing.T) {
|
func TestCleanPathsReturnsNoResultsWhenGivenNoPaths(t *testing.T) {
|
||||||
cleaned := tools.CleanPaths("", ",")
|
cleaned := CleanPaths("", ",")
|
||||||
|
|
||||||
assert.Empty(t, cleaned)
|
assert.Empty(t, cleaned)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFileMatch(t *testing.T) {
|
||||||
|
assert.True(t, FileMatch("filename.txt", "filename.txt"))
|
||||||
|
assert.True(t, FileMatch("*.txt", "filename.txt"))
|
||||||
|
assert.False(t, FileMatch("*.tx", "filename.txt"))
|
||||||
|
assert.True(t, FileMatch("f*.txt", "filename.txt"))
|
||||||
|
assert.False(t, FileMatch("g*.txt", "filename.txt"))
|
||||||
|
assert.True(t, FileMatch("file*", "filename.txt"))
|
||||||
|
assert.False(t, FileMatch("file", "filename.txt"))
|
||||||
|
|
||||||
|
// With no path separators, should match in subfolders
|
||||||
|
assert.True(t, FileMatch("*.txt", "sub/filename.txt"))
|
||||||
|
assert.False(t, FileMatch("*.tx", "sub/filename.txt"))
|
||||||
|
assert.True(t, FileMatch("f*.txt", "sub/filename.txt"))
|
||||||
|
assert.False(t, FileMatch("g*.txt", "sub/filename.txt"))
|
||||||
|
assert.True(t, FileMatch("file*", "sub/filename.txt"))
|
||||||
|
assert.False(t, FileMatch("file", "sub/filename.txt"))
|
||||||
|
// Needs wildcard for exact filename
|
||||||
|
assert.True(t, FileMatch("**/filename.txt", "sub/sub/sub/filename.txt"))
|
||||||
|
|
||||||
|
// Should not match dots to subparts
|
||||||
|
assert.False(t, FileMatch("*.ign", "sub/shouldignoreme.txt"))
|
||||||
|
|
||||||
|
// Path specific
|
||||||
|
assert.True(t, FileMatch("sub", "sub/filename.txt"))
|
||||||
|
assert.False(t, FileMatch("sub", "subfilename.txt"))
|
||||||
|
|
||||||
|
// Absolute
|
||||||
|
assert.True(t, FileMatch("*.dat", "/path/to/sub/.git/test.dat"))
|
||||||
|
assert.True(t, FileMatch("**/.git", "/path/to/sub/.git"))
|
||||||
|
|
||||||
|
// Match anything
|
||||||
|
assert.True(t, FileMatch(".", "path.txt"))
|
||||||
|
assert.True(t, FileMatch("./", "path.txt"))
|
||||||
|
assert.True(t, FileMatch(".\\", "path.txt"))
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
type TestIncludeExcludeCase struct {
|
||||||
|
expectedResult bool
|
||||||
|
includes []string
|
||||||
|
excludes []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFilterIncludeExclude(t *testing.T) {
|
||||||
|
|
||||||
|
cases := []TestIncludeExcludeCase{
|
||||||
|
// Null case
|
||||||
|
TestIncludeExcludeCase{true, nil, nil},
|
||||||
|
// Inclusion
|
||||||
|
TestIncludeExcludeCase{true, []string{"*.dat"}, nil},
|
||||||
|
TestIncludeExcludeCase{true, []string{"file*.dat"}, nil},
|
||||||
|
TestIncludeExcludeCase{true, []string{"file*"}, nil},
|
||||||
|
TestIncludeExcludeCase{true, []string{"*name.dat"}, nil},
|
||||||
|
TestIncludeExcludeCase{false, []string{"/*.dat"}, nil},
|
||||||
|
TestIncludeExcludeCase{false, []string{"otherfolder/*.dat"}, nil},
|
||||||
|
TestIncludeExcludeCase{false, []string{"*.nam"}, nil},
|
||||||
|
TestIncludeExcludeCase{true, []string{"test/filename.dat"}, nil},
|
||||||
|
TestIncludeExcludeCase{true, []string{"test/filename.dat"}, nil},
|
||||||
|
TestIncludeExcludeCase{false, []string{"blank", "something", "foo"}, nil},
|
||||||
|
TestIncludeExcludeCase{false, []string{"test/notfilename.dat"}, nil},
|
||||||
|
TestIncludeExcludeCase{true, []string{"test"}, nil},
|
||||||
|
TestIncludeExcludeCase{true, []string{"test/*"}, nil},
|
||||||
|
TestIncludeExcludeCase{false, []string{"nottest"}, nil},
|
||||||
|
TestIncludeExcludeCase{false, []string{"nottest/*"}, nil},
|
||||||
|
TestIncludeExcludeCase{true, []string{"test/fil*"}, nil},
|
||||||
|
TestIncludeExcludeCase{false, []string{"test/g*"}, nil},
|
||||||
|
TestIncludeExcludeCase{true, []string{"tes*/*"}, nil},
|
||||||
|
// Exclusion
|
||||||
|
TestIncludeExcludeCase{false, nil, []string{"*.dat"}},
|
||||||
|
TestIncludeExcludeCase{false, nil, []string{"file*.dat"}},
|
||||||
|
TestIncludeExcludeCase{false, nil, []string{"file*"}},
|
||||||
|
TestIncludeExcludeCase{false, nil, []string{"*name.dat"}},
|
||||||
|
TestIncludeExcludeCase{true, nil, []string{"/*.dat"}},
|
||||||
|
TestIncludeExcludeCase{true, nil, []string{"otherfolder/*.dat"}},
|
||||||
|
TestIncludeExcludeCase{false, nil, []string{"test/filename.dat"}},
|
||||||
|
TestIncludeExcludeCase{false, nil, []string{"blank", "something", "test/filename.dat", "foo"}},
|
||||||
|
TestIncludeExcludeCase{true, nil, []string{"blank", "something", "foo"}},
|
||||||
|
TestIncludeExcludeCase{true, nil, []string{"test/notfilename.dat"}},
|
||||||
|
TestIncludeExcludeCase{false, nil, []string{"test"}},
|
||||||
|
TestIncludeExcludeCase{false, nil, []string{"test/*"}},
|
||||||
|
TestIncludeExcludeCase{true, nil, []string{"nottest"}},
|
||||||
|
TestIncludeExcludeCase{true, nil, []string{"nottest/*"}},
|
||||||
|
TestIncludeExcludeCase{false, nil, []string{"test/fil*"}},
|
||||||
|
TestIncludeExcludeCase{true, nil, []string{"test/g*"}},
|
||||||
|
TestIncludeExcludeCase{false, nil, []string{"tes*/*"}},
|
||||||
|
|
||||||
|
// Both
|
||||||
|
TestIncludeExcludeCase{true, []string{"test/filename.dat"}, []string{"test/notfilename.dat"}},
|
||||||
|
TestIncludeExcludeCase{false, []string{"test"}, []string{"test/filename.dat"}},
|
||||||
|
TestIncludeExcludeCase{true, []string{"test/*"}, []string{"test/notfile*"}},
|
||||||
|
TestIncludeExcludeCase{false, []string{"test/*"}, []string{"test/file*"}},
|
||||||
|
TestIncludeExcludeCase{false, []string{"another/*", "test/*"}, []string{"test/notfilename.dat", "test/filename.dat"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
result := FilenamePassesIncludeExcludeFilter("test/filename.dat", c.includes, c.excludes)
|
||||||
|
assert.Equal(t, c.expectedResult, result, "includes: %v excludes: %v", c.includes, c.excludes)
|
||||||
|
if runtime.GOOS == "windows" {
|
||||||
|
// also test with \ path separators, tolerate mixed separators
|
||||||
|
for i, inc := range c.includes {
|
||||||
|
c.includes[i] = strings.Replace(inc, "/", "\\", -1)
|
||||||
|
}
|
||||||
|
for i, ex := range c.excludes {
|
||||||
|
c.excludes[i] = strings.Replace(ex, "/", "\\", -1)
|
||||||
|
}
|
||||||
|
assert.Equal(t, c.expectedResult, FilenamePassesIncludeExcludeFilter("test/filename.dat", c.includes, c.excludes), c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFastWalkBasic(t *testing.T) {
|
||||||
|
rootDir, err := ioutil.TempDir(os.TempDir(), "GitLfsTestFastWalkBasic")
|
||||||
|
if err != nil {
|
||||||
|
assert.FailNow(t, "Unable to get temp dir: %v", err)
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(rootDir)
|
||||||
|
os.Chdir(rootDir)
|
||||||
|
|
||||||
|
expectedEntries := createFastWalkInputData(10, 160)
|
||||||
|
|
||||||
|
fchan, errchan := fastWalkWithExcludeFiles(expectedEntries[0], "", nil, nil)
|
||||||
|
gotEntries, gotErrors := collectFastWalkResults(fchan, errchan)
|
||||||
|
|
||||||
|
assert.Empty(t, gotErrors)
|
||||||
|
|
||||||
|
sort.Strings(expectedEntries)
|
||||||
|
sort.Strings(gotEntries)
|
||||||
|
assert.Equal(t, expectedEntries, gotEntries)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFastWalkGitRepo(t *testing.T) {
|
||||||
|
rootDir, err := ioutil.TempDir(os.TempDir(), "GitLfsTestFastWalkGitRepo")
|
||||||
|
if err != nil {
|
||||||
|
assert.FailNow(t, "Unable to get temp dir: %v", err)
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(rootDir)
|
||||||
|
os.Chdir(rootDir)
|
||||||
|
|
||||||
|
expectedEntries := createFastWalkInputData(3, 3)
|
||||||
|
|
||||||
|
mainDir := expectedEntries[0]
|
||||||
|
|
||||||
|
// Set up a git repo and add some ignored files / dirs
|
||||||
|
subprocess.SimpleExec("git", "init", mainDir)
|
||||||
|
ignored := []string{
|
||||||
|
"filethatweignore.ign",
|
||||||
|
"foldercontainingignored",
|
||||||
|
"foldercontainingignored/notthisone.ign",
|
||||||
|
"ignoredfolder",
|
||||||
|
"ignoredfolder/file1.txt",
|
||||||
|
"ignoredfolder/file2.txt",
|
||||||
|
"ignoredfrominside",
|
||||||
|
"ignoredfrominside/thisisok.txt",
|
||||||
|
"ignoredfrominside/thisisnot.txt",
|
||||||
|
"ignoredfrominside/thisone",
|
||||||
|
"ignoredfrominside/thisone/file1.txt",
|
||||||
|
}
|
||||||
|
for _, f := range ignored {
|
||||||
|
fullPath := filepath.Join(mainDir, f)
|
||||||
|
if len(filepath.Ext(f)) > 0 {
|
||||||
|
ioutil.WriteFile(fullPath, []byte("TEST"), 0644)
|
||||||
|
} else {
|
||||||
|
os.MkdirAll(fullPath, 0755)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// write root .gitignore
|
||||||
|
rootGitIgnore := `
|
||||||
|
# ignore *.ign everywhere
|
||||||
|
*.ign
|
||||||
|
# ignore folder
|
||||||
|
ignoredfolder
|
||||||
|
`
|
||||||
|
ioutil.WriteFile(filepath.Join(mainDir, ".gitignore"), []byte(rootGitIgnore), 0644)
|
||||||
|
// Subfolder ignore; folder will show up but but subfolder 'thisone' won't
|
||||||
|
subFolderIgnore := `
|
||||||
|
thisone
|
||||||
|
thisisnot.txt
|
||||||
|
`
|
||||||
|
ioutil.WriteFile(filepath.Join(mainDir, "ignoredfrominside", ".gitignore"), []byte(subFolderIgnore), 0644)
|
||||||
|
|
||||||
|
// This dir will be walked but content won't be
|
||||||
|
expectedEntries = append(expectedEntries, filepath.Join(mainDir, "foldercontainingignored"))
|
||||||
|
// This dir will be walked and some of its content but has its own gitignore
|
||||||
|
expectedEntries = append(expectedEntries, filepath.Join(mainDir, "ignoredfrominside"))
|
||||||
|
expectedEntries = append(expectedEntries, filepath.Join(mainDir, "ignoredfrominside", "thisisok.txt"))
|
||||||
|
// Also gitignores
|
||||||
|
expectedEntries = append(expectedEntries, filepath.Join(mainDir, ".gitignore"))
|
||||||
|
expectedEntries = append(expectedEntries, filepath.Join(mainDir, "ignoredfrominside", ".gitignore"))
|
||||||
|
// nothing else should be there
|
||||||
|
|
||||||
|
fchan, errchan := FastWalkGitRepo(mainDir)
|
||||||
|
gotEntries, gotErrors := collectFastWalkResults(fchan, errchan)
|
||||||
|
|
||||||
|
assert.Empty(t, gotErrors)
|
||||||
|
|
||||||
|
sort.Strings(expectedEntries)
|
||||||
|
sort.Strings(gotEntries)
|
||||||
|
assert.Equal(t, expectedEntries, gotEntries)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make test data - ensure you've Chdir'ed into a temp dir first
|
||||||
|
// Returns list of files/dirs that are created
|
||||||
|
// First entry is the parent dir of all others
|
||||||
|
func createFastWalkInputData(smallFolder, largeFolder int) []string {
|
||||||
|
dirs := []string{
|
||||||
|
"testroot",
|
||||||
|
"testroot/folder1",
|
||||||
|
"testroot/folder2",
|
||||||
|
"testroot/folder2/subfolder1",
|
||||||
|
"testroot/folder2/subfolder2",
|
||||||
|
"testroot/folder2/subfolder3",
|
||||||
|
"testroot/folder2/subfolder4",
|
||||||
|
"testroot/folder2/subfolder4/subsub",
|
||||||
|
}
|
||||||
|
expectedEntries := make([]string, 0, 250)
|
||||||
|
|
||||||
|
for i, dir := range dirs {
|
||||||
|
os.MkdirAll(dir, 0755)
|
||||||
|
numFiles := smallFolder
|
||||||
|
expectedEntries = append(expectedEntries, filepath.Clean(dir))
|
||||||
|
if i >= 3 && i <= 5 {
|
||||||
|
// Bulk test to ensure works with > 1 batch
|
||||||
|
numFiles = largeFolder
|
||||||
|
}
|
||||||
|
for f := 0; f < numFiles; f++ {
|
||||||
|
filename := filepath.Join(dir, fmt.Sprintf("file%d.txt", f))
|
||||||
|
ioutil.WriteFile(filename, []byte("TEST"), 0644)
|
||||||
|
expectedEntries = append(expectedEntries, filepath.Clean(filename))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return expectedEntries
|
||||||
|
}
|
||||||
|
|
||||||
|
func collectFastWalkResults(fchan <-chan FastWalkInfo, errchan <-chan error) ([]string, []error) {
|
||||||
|
gotEntries := make([]string, 0, 1000)
|
||||||
|
gotErrors := make([]error, 0, 5)
|
||||||
|
var waitg sync.WaitGroup
|
||||||
|
waitg.Add(2)
|
||||||
|
go func() {
|
||||||
|
for o := range fchan {
|
||||||
|
gotEntries = append(gotEntries, filepath.Join(o.ParentDir, o.Info.Name()))
|
||||||
|
}
|
||||||
|
waitg.Done()
|
||||||
|
}()
|
||||||
|
go func() {
|
||||||
|
for err := range errchan {
|
||||||
|
gotErrors = append(gotErrors, err)
|
||||||
|
}
|
||||||
|
waitg.Done()
|
||||||
|
}()
|
||||||
|
waitg.Wait()
|
||||||
|
|
||||||
|
return gotEntries, gotErrors
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user