diff --git a/commands/command_checkout.go b/commands/command_checkout.go index 81563bee..72ac7d1c 100644 --- a/commands/command_checkout.go +++ b/commands/command_checkout.go @@ -11,6 +11,7 @@ import ( "github.com/github/git-lfs/git" "github.com/github/git-lfs/lfs" "github.com/github/git-lfs/progress" + "github.com/github/git-lfs/tools" "github.com/rubyist/tracerx" "github.com/spf13/cobra" ) @@ -55,7 +56,7 @@ func checkoutFromFetchChan(include []string, exclude []string, in chan *lfs.Wrap // Map oid to multiple pointers mapping := make(map[string][]*lfs.WrappedPointer) for _, pointer := range pointers { - if lfs.FilenamePassesIncludeExcludeFilter(pointer.Name, include, exclude) { + if tools.FilenamePassesIncludeExcludeFilter(pointer.Name, include, exclude) { mapping[pointer.Oid] = append(mapping[pointer.Oid], pointer) } } @@ -115,7 +116,7 @@ func checkoutWithIncludeExclude(include []string, exclude []string) { totalBytes = 0 for _, pointer := range pointers { totalBytes += pointer.Size - if lfs.FilenamePassesIncludeExcludeFilter(pointer.Name, include, exclude) { + if tools.FilenamePassesIncludeExcludeFilter(pointer.Name, include, exclude) { progress.Add(pointer.Name) c <- pointer // not strictly correct (parallel) but we don't have a callback & it's just local diff --git a/commands/command_fetch.go b/commands/command_fetch.go index c0be986d..40098fa1 100644 --- a/commands/command_fetch.go +++ b/commands/command_fetch.go @@ -7,6 +7,7 @@ import ( "github.com/github/git-lfs/git" "github.com/github/git-lfs/lfs" "github.com/github/git-lfs/progress" + "github.com/github/git-lfs/tools" "github.com/rubyist/tracerx" "github.com/spf13/cobra" ) @@ -310,7 +311,7 @@ func readyAndMissingPointers(allpointers []*lfs.WrappedPointer, include, exclude for _, p := range allpointers { // Filtered out by --include or --exclude - if !lfs.FilenamePassesIncludeExcludeFilter(p.Name, include, exclude) { + if !tools.FilenamePassesIncludeExcludeFilter(p.Name, include, exclude) { continue } diff --git a/commands/command_smudge.go b/commands/command_smudge.go index 4d60ce40..25251f46 100644 --- a/commands/command_smudge.go +++ b/commands/command_smudge.go @@ -8,6 +8,7 @@ import ( "github.com/github/git-lfs/errors" "github.com/github/git-lfs/lfs" + "github.com/github/git-lfs/tools" "github.com/spf13/cobra" ) @@ -57,7 +58,7 @@ func smudgeCommand(cmd *cobra.Command, args []string) { Error(err.Error()) } - download := lfs.FilenamePassesIncludeExcludeFilter(filename, cfg.FetchIncludePaths(), cfg.FetchExcludePaths()) + download := tools.FilenamePassesIncludeExcludeFilter(filename, cfg.FetchIncludePaths(), cfg.FetchExcludePaths()) if smudgeSkip || cfg.Os.Bool("GIT_LFS_SKIP_SMUDGE", false) { download = false diff --git a/commands/command_track.go b/commands/command_track.go index 92ddbbab..013aa8de 100644 --- a/commands/command_track.go +++ b/commands/command_track.go @@ -7,12 +7,15 @@ import ( "os" "path/filepath" "strings" + "sync" "time" + "github.com/rubyist/tracerx" + "github.com/github/git-lfs/config" "github.com/github/git-lfs/git" - "github.com/github/git-lfs/lfs" + "github.com/github/git-lfs/tools" "github.com/spf13/cobra" ) @@ -181,16 +184,24 @@ func findAttributeFiles() []string { paths = append(paths, repoAttributes) } - filepath.Walk(config.LocalWorkingDir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err + fchan, errchan := tools.FastWalkGitRepo(config.LocalWorkingDir) + var waitg sync.WaitGroup + waitg.Add(2) + go func() { + for o := range fchan { + if !o.Info.IsDir() && (o.Info.Name() == ".gitattributes") { + paths = append(paths, filepath.Join(o.ParentDir, o.Info.Name())) + } } - - if !info.IsDir() && (filepath.Base(path) == ".gitattributes") { - paths = append(paths, path) + waitg.Done() + }() + go func() { + for err := range errchan { + tracerx.Printf("Error finding .gitattributes: %v", err) } - return nil - }) + waitg.Done() + }() + waitg.Wait() return paths } diff --git a/lfs/scanner.go b/lfs/scanner.go index 517a06ee..c7ffc391 100644 --- a/lfs/scanner.go +++ b/lfs/scanner.go @@ -1066,12 +1066,12 @@ func parseLogOutputToPointers(log io.Reader, dir LogDiffDirection, } else { currentFilename = match[1] } - currentFileIncluded = FilenamePassesIncludeExcludeFilter(currentFilename, includePaths, excludePaths) + currentFileIncluded = tools.FilenamePassesIncludeExcludeFilter(currentFilename, includePaths, excludePaths) } else if match := fileMergeHeaderRegex.FindStringSubmatch(line); match != nil { // Git merge file header is a little different, only one file finishLastPointer() currentFilename = match[1] - currentFileIncluded = FilenamePassesIncludeExcludeFilter(currentFilename, includePaths, excludePaths) + currentFileIncluded = tools.FilenamePassesIncludeExcludeFilter(currentFilename, includePaths, excludePaths) } else if currentFileIncluded { if match := pointerDataRegex.FindStringSubmatch(line); match != nil { // An LFS pointer data line diff --git a/lfs/util.go b/lfs/util.go index 95f2fe83..f1a0dd67 100644 --- a/lfs/util.go +++ b/lfs/util.go @@ -7,7 +7,6 @@ import ( "os" "path/filepath" "runtime" - "strings" "github.com/github/git-lfs/config" "github.com/github/git-lfs/progress" @@ -72,68 +71,6 @@ func wrapProgressError(err error, event, filename string) error { var localDirSet = tools.NewStringSetFromSlice([]string{".", "./", ".\\"}) -// Return whether a given filename passes the include / exclude path filters -// Only paths that are in includePaths and outside excludePaths are passed -// If includePaths is empty that filter always passes and the same with excludePaths -// Both path lists support wildcard matches -func FilenamePassesIncludeExcludeFilter(filename string, includePaths, excludePaths []string) bool { - if len(includePaths) == 0 && len(excludePaths) == 0 { - return true - } - - filename = filepath.Clean(filename) - if len(includePaths) > 0 { - matched := false - for _, inc := range includePaths { - inc = filepath.Clean(inc) - - // Special case local dir, matches all (inc subpaths) - if _, local := localDirSet[inc]; local { - matched = true - break - } - - matched, _ = filepath.Match(inc, filename) - if !matched { - // Also support matching a parent directory without a wildcard - if strings.HasPrefix(filename, inc+string(filepath.Separator)) { - matched = true - } - } - - if matched { - break - } - - } - if !matched { - return false - } - } - - if len(excludePaths) > 0 { - for _, ex := range excludePaths { - ex = filepath.Clean(ex) - - // Special case local dir, matches all (inc subpaths) - if _, local := localDirSet[ex]; local { - return false - } - - if matched, _ := filepath.Match(ex, filename); matched { - return false - } - - // Also support matching a parent directory without a wildcard - if strings.HasPrefix(filename, ex+string(filepath.Separator)) { - return false - } - } - } - - return true -} - func GetPlatform() Platform { if currentPlatform == PlatformUndetermined { switch runtime.GOOS { diff --git a/lfs/util_test.go b/lfs/util_test.go index b2e776e4..2e389028 100644 --- a/lfs/util_test.go +++ b/lfs/util_test.go @@ -2,7 +2,6 @@ package lfs import ( "bytes" - "strings" "testing" "github.com/github/git-lfs/progress" @@ -38,62 +37,3 @@ func TestWriterWithCallback(t *testing.T) { assert.Equal(t, 3, int(calledRead[0])) assert.Equal(t, 5, int(calledRead[1])) } - -type TestIncludeExcludeCase struct { - expectedResult bool - includes []string - excludes []string -} - -func TestFilterIncludeExclude(t *testing.T) { - - cases := []TestIncludeExcludeCase{ - // Null case - TestIncludeExcludeCase{true, nil, nil}, - // Inclusion - TestIncludeExcludeCase{true, []string{"test/filename.dat"}, nil}, - TestIncludeExcludeCase{true, []string{"test/filename.dat"}, nil}, - TestIncludeExcludeCase{false, []string{"blank", "something", "foo"}, nil}, - TestIncludeExcludeCase{false, []string{"test/notfilename.dat"}, nil}, - TestIncludeExcludeCase{true, []string{"test"}, nil}, - TestIncludeExcludeCase{true, []string{"test/*"}, nil}, - TestIncludeExcludeCase{false, []string{"nottest"}, nil}, - TestIncludeExcludeCase{false, []string{"nottest/*"}, nil}, - TestIncludeExcludeCase{true, []string{"test/fil*"}, nil}, - TestIncludeExcludeCase{false, []string{"test/g*"}, nil}, - TestIncludeExcludeCase{true, []string{"tes*/*"}, nil}, - // Exclusion - TestIncludeExcludeCase{false, nil, []string{"test/filename.dat"}}, - TestIncludeExcludeCase{false, nil, []string{"blank", "something", "test/filename.dat", "foo"}}, - TestIncludeExcludeCase{true, nil, []string{"blank", "something", "foo"}}, - TestIncludeExcludeCase{true, nil, []string{"test/notfilename.dat"}}, - TestIncludeExcludeCase{false, nil, []string{"test"}}, - TestIncludeExcludeCase{false, nil, []string{"test/*"}}, - TestIncludeExcludeCase{true, nil, []string{"nottest"}}, - TestIncludeExcludeCase{true, nil, []string{"nottest/*"}}, - TestIncludeExcludeCase{false, nil, []string{"test/fil*"}}, - TestIncludeExcludeCase{true, nil, []string{"test/g*"}}, - TestIncludeExcludeCase{false, nil, []string{"tes*/*"}}, - - // Both - TestIncludeExcludeCase{true, []string{"test/filename.dat"}, []string{"test/notfilename.dat"}}, - TestIncludeExcludeCase{false, []string{"test"}, []string{"test/filename.dat"}}, - TestIncludeExcludeCase{true, []string{"test/*"}, []string{"test/notfile*"}}, - TestIncludeExcludeCase{false, []string{"test/*"}, []string{"test/file*"}}, - TestIncludeExcludeCase{false, []string{"another/*", "test/*"}, []string{"test/notfilename.dat", "test/filename.dat"}}, - } - - for _, c := range cases { - assert.Equal(t, c.expectedResult, FilenamePassesIncludeExcludeFilter("test/filename.dat", c.includes, c.excludes), c) - if IsWindows() { - // also test with \ path separators, tolerate mixed separators - for i, inc := range c.includes { - c.includes[i] = strings.Replace(inc, "/", "\\", -1) - } - for i, ex := range c.excludes { - c.excludes[i] = strings.Replace(ex, "/", "\\", -1) - } - assert.Equal(t, c.expectedResult, FilenamePassesIncludeExcludeFilter("test/filename.dat", c.includes, c.excludes), c) - } - } -} diff --git a/tools/filetools.go b/tools/filetools.go index 729afb07..4c8d9704 100644 --- a/tools/filetools.go +++ b/tools/filetools.go @@ -3,15 +3,20 @@ package tools import ( + "bufio" "encoding/hex" "fmt" "io" "os" "path" "path/filepath" + "regexp" "strings" + "sync" ) +var localDirSet = NewStringSetFromSlice([]string{".", "./", ".\\"}) + // FileOrDirExists determines if a file/dir exists, returns IsDir() results too. func FileOrDirExists(path string) (exists bool, isDir bool) { fi, err := os.Stat(path) @@ -121,3 +126,247 @@ func VerifyFileHash(oid, path string) error { return nil } + +// FilenamePassesIncludeExcludeFilter returns whether a given filename passes the include / exclude path filters +// Only paths that are in includePaths and outside excludePaths are passed +// If includePaths is empty that filter always passes and the same with excludePaths +// Both path lists support wildcard matches +func FilenamePassesIncludeExcludeFilter(filename string, includePaths, excludePaths []string) bool { + if len(includePaths) == 0 && len(excludePaths) == 0 { + return true + } + + if len(includePaths) > 0 { + matched := false + for _, inc := range includePaths { + matched = FileMatch(inc, filename) + if matched { + break + } + } + if !matched { + return false + } + } + + if len(excludePaths) > 0 { + for _, ex := range excludePaths { + if FileMatch(ex, filename) { + return false + } + } + } + + return true +} + +// FileMatch is a revised version of filepath.Match which makes it behave more +// like gitignore +func FileMatch(pattern, name string) bool { + pattern = filepath.Clean(pattern) + name = filepath.Clean(name) + + // Special case local dir, matches all (inc subpaths) + if _, local := localDirSet[pattern]; local { + return true + } + + if matched, _ := filepath.Match(pattern, name); matched { + return true + } + + // special case * when there are no path separators + // filepath.Match never allows * to match a path separator, which is correct + // for gitignore IF the pattern includes a path separator, but not otherwise + // So *.txt should match in any subdir, as should test*, but sub/*.txt would + // only match directly in the sub dir + // Don't need to test cross-platform separators as both cleaned above + if !strings.Contains(pattern, string(filepath.Separator)) && + strings.Contains(pattern, "*") { + pattern = regexp.QuoteMeta(pattern) + // Match the whole of the base name but allow matching in folders if no path + basename := filepath.Base(name) + regpattern := fmt.Sprintf("^%s$", strings.Replace(pattern, "\\*", ".*", -1)) + if regexp.MustCompile(regpattern).MatchString(basename) { + return true + } + } + // Also support ** with path separators + if strings.Contains(pattern, string(filepath.Separator)) && strings.Contains(pattern, "**") { + pattern = regexp.QuoteMeta(pattern) + regpattern := fmt.Sprintf("^%s$", strings.Replace(pattern, "\\*\\*", ".*", -1)) + if regexp.MustCompile(regpattern).MatchString(name) { + return true + } + + } + // Also support matching a parent directory without a wildcard + if strings.HasPrefix(name, pattern+string(filepath.Separator)) { + return true + } + + return false + +} + +// Returned from FastWalk with parent directory context +// This is needed because FastWalk can provide paths out of order so the +// parent dir cannot be implied +type FastWalkInfo struct { + ParentDir string + Info os.FileInfo +} + +// fastWalkWithExcludeFiles walks the contents of a dir, respecting +// include/exclude patterns and also loading new exlude patterns from files +// named excludeFilename in directories walked +func fastWalkWithExcludeFiles(dir, excludeFilename string, + includePaths, excludePaths []string) (<-chan FastWalkInfo, <-chan error) { + fiChan := make(chan FastWalkInfo, 256) + errChan := make(chan error, 10) + + go fastWalkFromRoot(dir, excludeFilename, includePaths, excludePaths, fiChan, errChan) + + return fiChan, errChan +} + +// FastWalkGitRepo is a more optimal implementation of filepath.Walk for a Git repo +// It differs in the following ways: +// * Provides a channel of information instead of using a callback func +// * Uses goroutines to parallelise large dirs and descent into subdirs +// * Does not provide sorted output; parents will always be before children but +// there are no other guarantees. Use parentDir in the FastWalkInfo struct to +// determine absolute path rather than tracking it yourself like filepath.Walk +// * Automatically ignores any .git directories +// * Respects .gitignore contents and skips ignored files/dirs +func FastWalkGitRepo(dir string) (<-chan FastWalkInfo, <-chan error) { + // Ignore all git metadata including subrepos + excludePaths := []string{".git", filepath.Join("**", ".git")} + return fastWalkWithExcludeFiles(dir, ".gitignore", nil, excludePaths) +} + +func fastWalkFromRoot(dir string, excludeFilename string, + includePaths, excludePaths []string, fiChan chan<- FastWalkInfo, errChan chan<- error) { + + dirFi, err := os.Stat(dir) + if err != nil { + errChan <- err + return + } + + // This waitgroup will be incremented for each nested goroutine + var waitg sync.WaitGroup + + fastWalkFileOrDir(filepath.Dir(dir), dirFi, excludeFilename, includePaths, excludePaths, fiChan, errChan, &waitg) + + waitg.Wait() + close(fiChan) + close(errChan) + +} + +// fastWalkFileOrDir is the main recursive implementation of fast walk +// Sends the file/dir and any contents to the channel so long as it passes the +// include/exclude filter. If a dir, parses any excludeFilename found and updates +// the excludePaths with its content before (parallel) recursing into contents +// Also splits large directories into multiple goroutines. +// Increments waitg.Add(1) for each new goroutine launched internally +func fastWalkFileOrDir(parentDir string, itemFi os.FileInfo, excludeFilename string, + includePaths, excludePaths []string, fiChan chan<- FastWalkInfo, errChan chan<- error, + waitg *sync.WaitGroup) { + + fullPath := filepath.Join(parentDir, itemFi.Name()) + + if !FilenamePassesIncludeExcludeFilter(fullPath, includePaths, excludePaths) { + return + } + + fiChan <- FastWalkInfo{ParentDir: parentDir, Info: itemFi} + + if !itemFi.IsDir() { + // Nothing more to do if this is not a dir + return + } + + if len(excludeFilename) > 0 { + possibleExcludeFile := filepath.Join(fullPath, excludeFilename) + if FileExists(possibleExcludeFile) { + var err error + excludePaths, err = loadExcludeFilename(possibleExcludeFile, fullPath, excludePaths) + if err != nil { + errChan <- err + } + } + } + + // The absolute optimal way to scan would be File.Readdirnames but we + // still need the Stat() to know whether something is a dir, so use + // File.Readdir instead. Means we can provide os.FileInfo to callers like + // filepath.Walk as a bonus. + df, err := os.Open(fullPath) + if err != nil { + errChan <- err + return + } + defer df.Close() + // The number of items in a dir we process in each goroutine + jobSize := 100 + for children, err := df.Readdir(jobSize); err == nil; children, err = df.Readdir(jobSize) { + // Parallelise all dirs, and chop large dirs into batches + waitg.Add(1) + go func(subitems []os.FileInfo) { + for _, childFi := range subitems { + fastWalkFileOrDir(fullPath, childFi, excludeFilename, includePaths, excludePaths, fiChan, errChan, waitg) + } + waitg.Done() + }(children) + + } + if err != nil && err != io.EOF { + errChan <- err + } + +} + +// loadExcludeFilename reads the given file in gitignore format and returns a +// revised array of exclude paths if there are any changes. +// If any changes are made a copy of the array is taken so the original is not +// modified +func loadExcludeFilename(filename, parentDir string, excludePaths []string) ([]string, error) { + f, err := os.OpenFile(filename, os.O_RDONLY, 0644) + if err != nil { + return excludePaths, err + } + defer f.Close() + + retPaths := excludePaths + modified := false + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + // Skip blanks, comments and negations (not supported right now) + if len(line) == 0 || strings.HasPrefix(line, "#") || strings.HasPrefix(line, "!") { + continue + } + + if !modified { + // copy on write + retPaths = make([]string, len(excludePaths)) + copy(retPaths, excludePaths) + modified = true + } + + path := line + // Add pattern in context if exclude has separator, or no wildcard + // Allow for both styles of separator at this point + if strings.ContainsAny(path, "/\\") || + !strings.Contains(path, "*") { + path = filepath.Join(parentDir, line) + } + retPaths = append(retPaths, path) + } + + return retPaths, nil + +} diff --git a/tools/filetools_test.go b/tools/filetools_test.go index de705b21..b2417950 100644 --- a/tools/filetools_test.go +++ b/tools/filetools_test.go @@ -1,20 +1,287 @@ -package tools_test +package tools import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "runtime" + "sort" + "strings" + "sync" "testing" - "github.com/github/git-lfs/tools" + "github.com/github/git-lfs/subprocess" + "github.com/stretchr/testify/assert" ) func TestCleanPathsCleansPaths(t *testing.T) { - cleaned := tools.CleanPaths("/foo/bar/,/foo/bar/baz", ",") + cleaned := CleanPaths("/foo/bar/,/foo/bar/baz", ",") assert.Equal(t, []string{"/foo/bar", "/foo/bar/baz"}, cleaned) } func TestCleanPathsReturnsNoResultsWhenGivenNoPaths(t *testing.T) { - cleaned := tools.CleanPaths("", ",") + cleaned := CleanPaths("", ",") assert.Empty(t, cleaned) } + +func TestFileMatch(t *testing.T) { + assert.True(t, FileMatch("filename.txt", "filename.txt")) + assert.True(t, FileMatch("*.txt", "filename.txt")) + assert.False(t, FileMatch("*.tx", "filename.txt")) + assert.True(t, FileMatch("f*.txt", "filename.txt")) + assert.False(t, FileMatch("g*.txt", "filename.txt")) + assert.True(t, FileMatch("file*", "filename.txt")) + assert.False(t, FileMatch("file", "filename.txt")) + + // With no path separators, should match in subfolders + assert.True(t, FileMatch("*.txt", "sub/filename.txt")) + assert.False(t, FileMatch("*.tx", "sub/filename.txt")) + assert.True(t, FileMatch("f*.txt", "sub/filename.txt")) + assert.False(t, FileMatch("g*.txt", "sub/filename.txt")) + assert.True(t, FileMatch("file*", "sub/filename.txt")) + assert.False(t, FileMatch("file", "sub/filename.txt")) + // Needs wildcard for exact filename + assert.True(t, FileMatch("**/filename.txt", "sub/sub/sub/filename.txt")) + + // Should not match dots to subparts + assert.False(t, FileMatch("*.ign", "sub/shouldignoreme.txt")) + + // Path specific + assert.True(t, FileMatch("sub", "sub/filename.txt")) + assert.False(t, FileMatch("sub", "subfilename.txt")) + + // Absolute + assert.True(t, FileMatch("*.dat", "/path/to/sub/.git/test.dat")) + assert.True(t, FileMatch("**/.git", "/path/to/sub/.git")) + + // Match anything + assert.True(t, FileMatch(".", "path.txt")) + assert.True(t, FileMatch("./", "path.txt")) + assert.True(t, FileMatch(".\\", "path.txt")) + +} + +type TestIncludeExcludeCase struct { + expectedResult bool + includes []string + excludes []string +} + +func TestFilterIncludeExclude(t *testing.T) { + + cases := []TestIncludeExcludeCase{ + // Null case + TestIncludeExcludeCase{true, nil, nil}, + // Inclusion + TestIncludeExcludeCase{true, []string{"*.dat"}, nil}, + TestIncludeExcludeCase{true, []string{"file*.dat"}, nil}, + TestIncludeExcludeCase{true, []string{"file*"}, nil}, + TestIncludeExcludeCase{true, []string{"*name.dat"}, nil}, + TestIncludeExcludeCase{false, []string{"/*.dat"}, nil}, + TestIncludeExcludeCase{false, []string{"otherfolder/*.dat"}, nil}, + TestIncludeExcludeCase{false, []string{"*.nam"}, nil}, + TestIncludeExcludeCase{true, []string{"test/filename.dat"}, nil}, + TestIncludeExcludeCase{true, []string{"test/filename.dat"}, nil}, + TestIncludeExcludeCase{false, []string{"blank", "something", "foo"}, nil}, + TestIncludeExcludeCase{false, []string{"test/notfilename.dat"}, nil}, + TestIncludeExcludeCase{true, []string{"test"}, nil}, + TestIncludeExcludeCase{true, []string{"test/*"}, nil}, + TestIncludeExcludeCase{false, []string{"nottest"}, nil}, + TestIncludeExcludeCase{false, []string{"nottest/*"}, nil}, + TestIncludeExcludeCase{true, []string{"test/fil*"}, nil}, + TestIncludeExcludeCase{false, []string{"test/g*"}, nil}, + TestIncludeExcludeCase{true, []string{"tes*/*"}, nil}, + // Exclusion + TestIncludeExcludeCase{false, nil, []string{"*.dat"}}, + TestIncludeExcludeCase{false, nil, []string{"file*.dat"}}, + TestIncludeExcludeCase{false, nil, []string{"file*"}}, + TestIncludeExcludeCase{false, nil, []string{"*name.dat"}}, + TestIncludeExcludeCase{true, nil, []string{"/*.dat"}}, + TestIncludeExcludeCase{true, nil, []string{"otherfolder/*.dat"}}, + TestIncludeExcludeCase{false, nil, []string{"test/filename.dat"}}, + TestIncludeExcludeCase{false, nil, []string{"blank", "something", "test/filename.dat", "foo"}}, + TestIncludeExcludeCase{true, nil, []string{"blank", "something", "foo"}}, + TestIncludeExcludeCase{true, nil, []string{"test/notfilename.dat"}}, + TestIncludeExcludeCase{false, nil, []string{"test"}}, + TestIncludeExcludeCase{false, nil, []string{"test/*"}}, + TestIncludeExcludeCase{true, nil, []string{"nottest"}}, + TestIncludeExcludeCase{true, nil, []string{"nottest/*"}}, + TestIncludeExcludeCase{false, nil, []string{"test/fil*"}}, + TestIncludeExcludeCase{true, nil, []string{"test/g*"}}, + TestIncludeExcludeCase{false, nil, []string{"tes*/*"}}, + + // Both + TestIncludeExcludeCase{true, []string{"test/filename.dat"}, []string{"test/notfilename.dat"}}, + TestIncludeExcludeCase{false, []string{"test"}, []string{"test/filename.dat"}}, + TestIncludeExcludeCase{true, []string{"test/*"}, []string{"test/notfile*"}}, + TestIncludeExcludeCase{false, []string{"test/*"}, []string{"test/file*"}}, + TestIncludeExcludeCase{false, []string{"another/*", "test/*"}, []string{"test/notfilename.dat", "test/filename.dat"}}, + } + + for _, c := range cases { + result := FilenamePassesIncludeExcludeFilter("test/filename.dat", c.includes, c.excludes) + assert.Equal(t, c.expectedResult, result, "includes: %v excludes: %v", c.includes, c.excludes) + if runtime.GOOS == "windows" { + // also test with \ path separators, tolerate mixed separators + for i, inc := range c.includes { + c.includes[i] = strings.Replace(inc, "/", "\\", -1) + } + for i, ex := range c.excludes { + c.excludes[i] = strings.Replace(ex, "/", "\\", -1) + } + assert.Equal(t, c.expectedResult, FilenamePassesIncludeExcludeFilter("test/filename.dat", c.includes, c.excludes), c) + } + } +} + +func TestFastWalkBasic(t *testing.T) { + rootDir, err := ioutil.TempDir(os.TempDir(), "GitLfsTestFastWalkBasic") + if err != nil { + assert.FailNow(t, "Unable to get temp dir: %v", err) + } + defer os.RemoveAll(rootDir) + os.Chdir(rootDir) + + expectedEntries := createFastWalkInputData(10, 160) + + fchan, errchan := fastWalkWithExcludeFiles(expectedEntries[0], "", nil, nil) + gotEntries, gotErrors := collectFastWalkResults(fchan, errchan) + + assert.Empty(t, gotErrors) + + sort.Strings(expectedEntries) + sort.Strings(gotEntries) + assert.Equal(t, expectedEntries, gotEntries) + +} + +func TestFastWalkGitRepo(t *testing.T) { + rootDir, err := ioutil.TempDir(os.TempDir(), "GitLfsTestFastWalkGitRepo") + if err != nil { + assert.FailNow(t, "Unable to get temp dir: %v", err) + } + defer os.RemoveAll(rootDir) + os.Chdir(rootDir) + + expectedEntries := createFastWalkInputData(3, 3) + + mainDir := expectedEntries[0] + + // Set up a git repo and add some ignored files / dirs + subprocess.SimpleExec("git", "init", mainDir) + ignored := []string{ + "filethatweignore.ign", + "foldercontainingignored", + "foldercontainingignored/notthisone.ign", + "ignoredfolder", + "ignoredfolder/file1.txt", + "ignoredfolder/file2.txt", + "ignoredfrominside", + "ignoredfrominside/thisisok.txt", + "ignoredfrominside/thisisnot.txt", + "ignoredfrominside/thisone", + "ignoredfrominside/thisone/file1.txt", + } + for _, f := range ignored { + fullPath := filepath.Join(mainDir, f) + if len(filepath.Ext(f)) > 0 { + ioutil.WriteFile(fullPath, []byte("TEST"), 0644) + } else { + os.MkdirAll(fullPath, 0755) + } + } + // write root .gitignore + rootGitIgnore := ` +# ignore *.ign everywhere +*.ign +# ignore folder +ignoredfolder +` + ioutil.WriteFile(filepath.Join(mainDir, ".gitignore"), []byte(rootGitIgnore), 0644) + // Subfolder ignore; folder will show up but but subfolder 'thisone' won't + subFolderIgnore := ` +thisone +thisisnot.txt +` + ioutil.WriteFile(filepath.Join(mainDir, "ignoredfrominside", ".gitignore"), []byte(subFolderIgnore), 0644) + + // This dir will be walked but content won't be + expectedEntries = append(expectedEntries, filepath.Join(mainDir, "foldercontainingignored")) + // This dir will be walked and some of its content but has its own gitignore + expectedEntries = append(expectedEntries, filepath.Join(mainDir, "ignoredfrominside")) + expectedEntries = append(expectedEntries, filepath.Join(mainDir, "ignoredfrominside", "thisisok.txt")) + // Also gitignores + expectedEntries = append(expectedEntries, filepath.Join(mainDir, ".gitignore")) + expectedEntries = append(expectedEntries, filepath.Join(mainDir, "ignoredfrominside", ".gitignore")) + // nothing else should be there + + fchan, errchan := FastWalkGitRepo(mainDir) + gotEntries, gotErrors := collectFastWalkResults(fchan, errchan) + + assert.Empty(t, gotErrors) + + sort.Strings(expectedEntries) + sort.Strings(gotEntries) + assert.Equal(t, expectedEntries, gotEntries) + +} + +// Make test data - ensure you've Chdir'ed into a temp dir first +// Returns list of files/dirs that are created +// First entry is the parent dir of all others +func createFastWalkInputData(smallFolder, largeFolder int) []string { + dirs := []string{ + "testroot", + "testroot/folder1", + "testroot/folder2", + "testroot/folder2/subfolder1", + "testroot/folder2/subfolder2", + "testroot/folder2/subfolder3", + "testroot/folder2/subfolder4", + "testroot/folder2/subfolder4/subsub", + } + expectedEntries := make([]string, 0, 250) + + for i, dir := range dirs { + os.MkdirAll(dir, 0755) + numFiles := smallFolder + expectedEntries = append(expectedEntries, filepath.Clean(dir)) + if i >= 3 && i <= 5 { + // Bulk test to ensure works with > 1 batch + numFiles = largeFolder + } + for f := 0; f < numFiles; f++ { + filename := filepath.Join(dir, fmt.Sprintf("file%d.txt", f)) + ioutil.WriteFile(filename, []byte("TEST"), 0644) + expectedEntries = append(expectedEntries, filepath.Clean(filename)) + } + } + + return expectedEntries +} + +func collectFastWalkResults(fchan <-chan FastWalkInfo, errchan <-chan error) ([]string, []error) { + gotEntries := make([]string, 0, 1000) + gotErrors := make([]error, 0, 5) + var waitg sync.WaitGroup + waitg.Add(2) + go func() { + for o := range fchan { + gotEntries = append(gotEntries, filepath.Join(o.ParentDir, o.Info.Name())) + } + waitg.Done() + }() + go func() { + for err := range errchan { + gotErrors = append(gotErrors, err) + } + waitg.Done() + }() + waitg.Wait() + + return gotEntries, gotErrors +}