tools: extract internal fastWalker type

This commit is contained in:
rick olson 2017-10-17 09:06:30 -06:00
parent 150edd19ea
commit fd2d9df43b
2 changed files with 64 additions and 55 deletions

@ -142,14 +142,8 @@ type FastWalkCallback func(parentDir string, info os.FileInfo, err error)
//
// rootDir - Absolute path to the top of the repository working directory
func FastWalkGitRepo(rootDir string, cb FastWalkCallback) {
// Ignore all git metadata including subrepos
excludePaths := []filepathfilter.Pattern{
filepathfilter.NewPattern(".git"),
filepathfilter.NewPattern(filepath.Join("**", ".git")),
}
fileCh := fastWalkWithExcludeFiles(rootDir, ".gitignore", excludePaths)
for file := range fileCh {
walker := fastWalkWithExcludeFiles(rootDir, ".gitignore")
for file := range walker.ch {
cb(file.ParentDir, file.Info, file.Err)
}
}
@ -163,53 +157,63 @@ type fastWalkInfo struct {
Err error
}
type fastWalker struct {
rootDir string
excludeFilename string
ch chan fastWalkInfo
limit int32
cur *int32
wg *sync.WaitGroup
}
// fastWalkWithExcludeFiles walks the contents of a dir, respecting
// include/exclude patterns and also loading new exlude patterns from files
// named excludeFilename in directories walked
//
// rootDir - Absolute path to the top of the repository working directory
func fastWalkWithExcludeFiles(rootDir, excludeFilename string,
excludePaths []filepathfilter.Pattern) <-chan fastWalkInfo {
fiChan := make(chan fastWalkInfo, 256)
go fastWalkFromRoot(rootDir, excludeFilename, excludePaths, fiChan)
return fiChan
}
// rootDir - Absolute path to the top of the repository working directory
func fastWalkFromRoot(rootDir string, excludeFilename string,
excludePaths []filepathfilter.Pattern, fiChan chan<- fastWalkInfo) {
dirFi, err := os.Stat(rootDir)
if err != nil {
fiChan <- fastWalkInfo{Err: err}
return
func fastWalkWithExcludeFiles(rootDir, excludeFilename string) *fastWalker {
excludePaths := []filepathfilter.Pattern{
filepathfilter.NewPattern(".git"),
filepathfilter.NewPattern(filepath.Join("**", ".git")),
}
// This waitgroup will be incremented for each nested goroutine
var waitg sync.WaitGroup
fastWalkFileOrDir(true, rootDir, "", dirFi, excludeFilename, excludePaths, fiChan, &waitg)
waitg.Wait()
close(fiChan)
w := &fastWalker{
rootDir: rootDir,
excludeFilename: excludeFilename,
ch: make(chan fastWalkInfo, 256),
wg: &sync.WaitGroup{},
}
go func() {
dirFi, err := os.Stat(w.rootDir)
if err != nil {
w.ch <- fastWalkInfo{Err: err}
return
}
w.Walk(true, "", dirFi, excludePaths)
w.Wait()
}()
return w
}
// fastWalkFileOrDir is the main recursive implementation of fast walk
// Walk is the main recursive implementation of fast walk.
// Sends the file/dir and any contents to the channel so long as it passes the
// include/exclude filter. If a dir, parses any excludeFilename found and updates
// the excludePaths with its content before (parallel) recursing into contents
// Also splits large directories into multiple goroutines.
// Increments waitg.Add(1) for each new goroutine launched internally
//
// rootDir - Absolute path to the top of the repository working directory
// workDir - Relative path inside the repository
func fastWalkFileOrDir(isRoot bool, rootDir, workDir string, itemFi os.FileInfo, excludeFilename string,
excludePaths []filepathfilter.Pattern, fiChan chan<- fastWalkInfo, waitg *sync.WaitGroup) {
func (w *fastWalker) Walk(isRoot bool, workDir string, itemFi os.FileInfo,
excludePaths []filepathfilter.Pattern) {
var fullPath string // Absolute path to the current file or dir
var parentWorkDir string // Absolute path to the workDir inside the repository
if isRoot {
fullPath = rootDir
fullPath = w.rootDir
} else {
parentWorkDir = filepath.Join(rootDir, workDir)
parentWorkDir = filepath.Join(w.rootDir, workDir)
fullPath = filepath.Join(parentWorkDir, itemFi.Name())
}
@ -218,7 +222,7 @@ func fastWalkFileOrDir(isRoot bool, rootDir, workDir string, itemFi os.FileInfo,
return
}
fiChan <- fastWalkInfo{ParentDir: parentWorkDir, Info: itemFi}
w.ch <- fastWalkInfo{ParentDir: parentWorkDir, Info: itemFi}
if !itemFi.IsDir() {
// Nothing more to do if this is not a dir
@ -230,12 +234,12 @@ func fastWalkFileOrDir(isRoot bool, rootDir, workDir string, itemFi os.FileInfo,
childWorkDir = filepath.Join(workDir, itemFi.Name())
}
if len(excludeFilename) > 0 {
possibleExcludeFile := filepath.Join(fullPath, excludeFilename)
if len(w.excludeFilename) > 0 {
possibleExcludeFile := filepath.Join(fullPath, w.excludeFilename)
var err error
excludePaths, err = loadExcludeFilename(possibleExcludeFile, childWorkDir, excludePaths)
if err != nil {
fiChan <- fastWalkInfo{Err: err}
w.ch <- fastWalkInfo{Err: err}
}
}
@ -245,30 +249,40 @@ func fastWalkFileOrDir(isRoot bool, rootDir, workDir string, itemFi os.FileInfo,
// filepath.Walk as a bonus.
df, err := os.Open(fullPath)
if err != nil {
fiChan <- fastWalkInfo{Err: err}
w.ch <- fastWalkInfo{Err: err}
return
}
defer df.Close()
// The number of items in a dir we process in each goroutine
jobSize := 100
for children, err := df.Readdir(jobSize); err == nil; children, err = df.Readdir(jobSize) {
// Parallelise all dirs, and chop large dirs into batches
waitg.Add(1)
go func(subitems []os.FileInfo) {
w.walk(children, func(subitems []os.FileInfo) {
for _, childFi := range subitems {
fastWalkFileOrDir(false, rootDir, childWorkDir, childFi, excludeFilename, excludePaths, fiChan, waitg)
w.Walk(false, childWorkDir, childFi, excludePaths)
}
waitg.Done()
}(children)
})
}
}
df.Close()
if err != nil && err != io.EOF {
fiChan <- fastWalkInfo{Err: err}
w.ch <- fastWalkInfo{Err: err}
}
}
func (w *fastWalker) walk(children []os.FileInfo, fn func([]os.FileInfo)) {
w.wg.Add(1)
go func() {
fn(children)
w.wg.Done()
}()
}
func (w *fastWalker) Wait() {
w.wg.Wait()
close(w.ch)
}
// loadExcludeFilename reads the given file in gitignore format and returns a
// revised array of exclude paths if there are any changes.
// If any changes are made a copy of the array is taken so the original is not

@ -15,13 +15,11 @@ import (
func TestCleanPathsCleansPaths(t *testing.T) {
cleaned := CleanPaths("/foo/bar/,/foo/bar/baz", ",")
assert.Equal(t, []string{"/foo/bar", "/foo/bar/baz"}, cleaned)
}
func TestCleanPathsReturnsNoResultsWhenGivenNoPaths(t *testing.T) {
cleaned := CleanPaths("", ",")
assert.Empty(t, cleaned)
}
@ -35,15 +33,14 @@ func TestFastWalkBasic(t *testing.T) {
expectedEntries := createFastWalkInputData(10, 160)
fchan := fastWalkWithExcludeFiles(expectedEntries[0], "", nil)
gotEntries, gotErrors := collectFastWalkResults(fchan)
walker := fastWalkWithExcludeFiles(expectedEntries[0], "")
gotEntries, gotErrors := collectFastWalkResults(walker.ch)
assert.Empty(t, gotErrors)
sort.Strings(expectedEntries)
sort.Strings(gotEntries)
assert.Equal(t, expectedEntries, gotEntries)
}
func BenchmarkFastWalkGitRepoChannels(b *testing.B) {
@ -229,7 +226,6 @@ func getFileMode(filename string) os.FileMode {
}
func TestSetWriteFlag(t *testing.T) {
f, err := ioutil.TempFile("", "lfstestwriteflag")
assert.Nil(t, err)
filename := f.Name()
@ -272,5 +268,4 @@ func TestSetWriteFlag(t *testing.T) {
// should only add back user write
assert.EqualValues(t, 0640, getFileMode(filename))
}
}