diff --git a/tools/filetools.go b/tools/filetools.go index 8e0bc28a..41ef8b5e 100644 --- a/tools/filetools.go +++ b/tools/filetools.go @@ -142,14 +142,8 @@ type FastWalkCallback func(parentDir string, info os.FileInfo, err error) // // rootDir - Absolute path to the top of the repository working directory func FastWalkGitRepo(rootDir string, cb FastWalkCallback) { - // Ignore all git metadata including subrepos - excludePaths := []filepathfilter.Pattern{ - filepathfilter.NewPattern(".git"), - filepathfilter.NewPattern(filepath.Join("**", ".git")), - } - - fileCh := fastWalkWithExcludeFiles(rootDir, ".gitignore", excludePaths) - for file := range fileCh { + walker := fastWalkWithExcludeFiles(rootDir, ".gitignore") + for file := range walker.ch { cb(file.ParentDir, file.Info, file.Err) } } @@ -163,53 +157,63 @@ type fastWalkInfo struct { Err error } +type fastWalker struct { + rootDir string + excludeFilename string + ch chan fastWalkInfo + limit int32 + cur *int32 + wg *sync.WaitGroup +} + // fastWalkWithExcludeFiles walks the contents of a dir, respecting // include/exclude patterns and also loading new exlude patterns from files // named excludeFilename in directories walked // // rootDir - Absolute path to the top of the repository working directory -func fastWalkWithExcludeFiles(rootDir, excludeFilename string, - excludePaths []filepathfilter.Pattern) <-chan fastWalkInfo { - fiChan := make(chan fastWalkInfo, 256) - go fastWalkFromRoot(rootDir, excludeFilename, excludePaths, fiChan) - return fiChan -} - -// rootDir - Absolute path to the top of the repository working directory -func fastWalkFromRoot(rootDir string, excludeFilename string, - excludePaths []filepathfilter.Pattern, fiChan chan<- fastWalkInfo) { - - dirFi, err := os.Stat(rootDir) - if err != nil { - fiChan <- fastWalkInfo{Err: err} - return +func fastWalkWithExcludeFiles(rootDir, excludeFilename string) *fastWalker { + excludePaths := []filepathfilter.Pattern{ + filepathfilter.NewPattern(".git"), + filepathfilter.NewPattern(filepath.Join("**", ".git")), } - // This waitgroup will be incremented for each nested goroutine - var waitg sync.WaitGroup - fastWalkFileOrDir(true, rootDir, "", dirFi, excludeFilename, excludePaths, fiChan, &waitg) - waitg.Wait() - close(fiChan) + w := &fastWalker{ + rootDir: rootDir, + excludeFilename: excludeFilename, + ch: make(chan fastWalkInfo, 256), + wg: &sync.WaitGroup{}, + } + + go func() { + dirFi, err := os.Stat(w.rootDir) + if err != nil { + w.ch <- fastWalkInfo{Err: err} + return + } + + w.Walk(true, "", dirFi, excludePaths) + w.Wait() + }() + return w } -// fastWalkFileOrDir is the main recursive implementation of fast walk +// Walk is the main recursive implementation of fast walk. // Sends the file/dir and any contents to the channel so long as it passes the // include/exclude filter. If a dir, parses any excludeFilename found and updates // the excludePaths with its content before (parallel) recursing into contents // Also splits large directories into multiple goroutines. // Increments waitg.Add(1) for each new goroutine launched internally // -// rootDir - Absolute path to the top of the repository working directory // workDir - Relative path inside the repository -func fastWalkFileOrDir(isRoot bool, rootDir, workDir string, itemFi os.FileInfo, excludeFilename string, - excludePaths []filepathfilter.Pattern, fiChan chan<- fastWalkInfo, waitg *sync.WaitGroup) { +func (w *fastWalker) Walk(isRoot bool, workDir string, itemFi os.FileInfo, + excludePaths []filepathfilter.Pattern) { var fullPath string // Absolute path to the current file or dir var parentWorkDir string // Absolute path to the workDir inside the repository if isRoot { - fullPath = rootDir + fullPath = w.rootDir } else { - parentWorkDir = filepath.Join(rootDir, workDir) + parentWorkDir = filepath.Join(w.rootDir, workDir) fullPath = filepath.Join(parentWorkDir, itemFi.Name()) } @@ -218,7 +222,7 @@ func fastWalkFileOrDir(isRoot bool, rootDir, workDir string, itemFi os.FileInfo, return } - fiChan <- fastWalkInfo{ParentDir: parentWorkDir, Info: itemFi} + w.ch <- fastWalkInfo{ParentDir: parentWorkDir, Info: itemFi} if !itemFi.IsDir() { // Nothing more to do if this is not a dir @@ -230,12 +234,12 @@ func fastWalkFileOrDir(isRoot bool, rootDir, workDir string, itemFi os.FileInfo, childWorkDir = filepath.Join(workDir, itemFi.Name()) } - if len(excludeFilename) > 0 { - possibleExcludeFile := filepath.Join(fullPath, excludeFilename) + if len(w.excludeFilename) > 0 { + possibleExcludeFile := filepath.Join(fullPath, w.excludeFilename) var err error excludePaths, err = loadExcludeFilename(possibleExcludeFile, childWorkDir, excludePaths) if err != nil { - fiChan <- fastWalkInfo{Err: err} + w.ch <- fastWalkInfo{Err: err} } } @@ -245,30 +249,40 @@ func fastWalkFileOrDir(isRoot bool, rootDir, workDir string, itemFi os.FileInfo, // filepath.Walk as a bonus. df, err := os.Open(fullPath) if err != nil { - fiChan <- fastWalkInfo{Err: err} + w.ch <- fastWalkInfo{Err: err} return } - defer df.Close() // The number of items in a dir we process in each goroutine jobSize := 100 - for children, err := df.Readdir(jobSize); err == nil; children, err = df.Readdir(jobSize) { // Parallelise all dirs, and chop large dirs into batches - waitg.Add(1) - go func(subitems []os.FileInfo) { + w.walk(children, func(subitems []os.FileInfo) { for _, childFi := range subitems { - fastWalkFileOrDir(false, rootDir, childWorkDir, childFi, excludeFilename, excludePaths, fiChan, waitg) + w.Walk(false, childWorkDir, childFi, excludePaths) } - waitg.Done() - }(children) + }) + } - } + df.Close() if err != nil && err != io.EOF { - fiChan <- fastWalkInfo{Err: err} + w.ch <- fastWalkInfo{Err: err} } } +func (w *fastWalker) walk(children []os.FileInfo, fn func([]os.FileInfo)) { + w.wg.Add(1) + go func() { + fn(children) + w.wg.Done() + }() +} + +func (w *fastWalker) Wait() { + w.wg.Wait() + close(w.ch) +} + // loadExcludeFilename reads the given file in gitignore format and returns a // revised array of exclude paths if there are any changes. // If any changes are made a copy of the array is taken so the original is not diff --git a/tools/filetools_test.go b/tools/filetools_test.go index ade9a492..6e3500e2 100644 --- a/tools/filetools_test.go +++ b/tools/filetools_test.go @@ -15,13 +15,11 @@ import ( func TestCleanPathsCleansPaths(t *testing.T) { cleaned := CleanPaths("/foo/bar/,/foo/bar/baz", ",") - assert.Equal(t, []string{"/foo/bar", "/foo/bar/baz"}, cleaned) } func TestCleanPathsReturnsNoResultsWhenGivenNoPaths(t *testing.T) { cleaned := CleanPaths("", ",") - assert.Empty(t, cleaned) } @@ -35,15 +33,14 @@ func TestFastWalkBasic(t *testing.T) { expectedEntries := createFastWalkInputData(10, 160) - fchan := fastWalkWithExcludeFiles(expectedEntries[0], "", nil) - gotEntries, gotErrors := collectFastWalkResults(fchan) + walker := fastWalkWithExcludeFiles(expectedEntries[0], "") + gotEntries, gotErrors := collectFastWalkResults(walker.ch) assert.Empty(t, gotErrors) sort.Strings(expectedEntries) sort.Strings(gotEntries) assert.Equal(t, expectedEntries, gotEntries) - } func BenchmarkFastWalkGitRepoChannels(b *testing.B) { @@ -229,7 +226,6 @@ func getFileMode(filename string) os.FileMode { } func TestSetWriteFlag(t *testing.T) { - f, err := ioutil.TempFile("", "lfstestwriteflag") assert.Nil(t, err) filename := f.Name() @@ -272,5 +268,4 @@ func TestSetWriteFlag(t *testing.T) { // should only add back user write assert.EqualValues(t, 0640, getFileMode(filename)) } - }