tools: extract internal fastWalker type
This commit is contained in:
parent
150edd19ea
commit
fd2d9df43b
@ -142,14 +142,8 @@ type FastWalkCallback func(parentDir string, info os.FileInfo, err error)
|
||||
//
|
||||
// rootDir - Absolute path to the top of the repository working directory
|
||||
func FastWalkGitRepo(rootDir string, cb FastWalkCallback) {
|
||||
// Ignore all git metadata including subrepos
|
||||
excludePaths := []filepathfilter.Pattern{
|
||||
filepathfilter.NewPattern(".git"),
|
||||
filepathfilter.NewPattern(filepath.Join("**", ".git")),
|
||||
}
|
||||
|
||||
fileCh := fastWalkWithExcludeFiles(rootDir, ".gitignore", excludePaths)
|
||||
for file := range fileCh {
|
||||
walker := fastWalkWithExcludeFiles(rootDir, ".gitignore")
|
||||
for file := range walker.ch {
|
||||
cb(file.ParentDir, file.Info, file.Err)
|
||||
}
|
||||
}
|
||||
@ -163,53 +157,63 @@ type fastWalkInfo struct {
|
||||
Err error
|
||||
}
|
||||
|
||||
type fastWalker struct {
|
||||
rootDir string
|
||||
excludeFilename string
|
||||
ch chan fastWalkInfo
|
||||
limit int32
|
||||
cur *int32
|
||||
wg *sync.WaitGroup
|
||||
}
|
||||
|
||||
// fastWalkWithExcludeFiles walks the contents of a dir, respecting
|
||||
// include/exclude patterns and also loading new exlude patterns from files
|
||||
// named excludeFilename in directories walked
|
||||
//
|
||||
// rootDir - Absolute path to the top of the repository working directory
|
||||
func fastWalkWithExcludeFiles(rootDir, excludeFilename string,
|
||||
excludePaths []filepathfilter.Pattern) <-chan fastWalkInfo {
|
||||
fiChan := make(chan fastWalkInfo, 256)
|
||||
go fastWalkFromRoot(rootDir, excludeFilename, excludePaths, fiChan)
|
||||
return fiChan
|
||||
}
|
||||
|
||||
// rootDir - Absolute path to the top of the repository working directory
|
||||
func fastWalkFromRoot(rootDir string, excludeFilename string,
|
||||
excludePaths []filepathfilter.Pattern, fiChan chan<- fastWalkInfo) {
|
||||
|
||||
dirFi, err := os.Stat(rootDir)
|
||||
if err != nil {
|
||||
fiChan <- fastWalkInfo{Err: err}
|
||||
return
|
||||
func fastWalkWithExcludeFiles(rootDir, excludeFilename string) *fastWalker {
|
||||
excludePaths := []filepathfilter.Pattern{
|
||||
filepathfilter.NewPattern(".git"),
|
||||
filepathfilter.NewPattern(filepath.Join("**", ".git")),
|
||||
}
|
||||
|
||||
// This waitgroup will be incremented for each nested goroutine
|
||||
var waitg sync.WaitGroup
|
||||
fastWalkFileOrDir(true, rootDir, "", dirFi, excludeFilename, excludePaths, fiChan, &waitg)
|
||||
waitg.Wait()
|
||||
close(fiChan)
|
||||
w := &fastWalker{
|
||||
rootDir: rootDir,
|
||||
excludeFilename: excludeFilename,
|
||||
ch: make(chan fastWalkInfo, 256),
|
||||
wg: &sync.WaitGroup{},
|
||||
}
|
||||
|
||||
go func() {
|
||||
dirFi, err := os.Stat(w.rootDir)
|
||||
if err != nil {
|
||||
w.ch <- fastWalkInfo{Err: err}
|
||||
return
|
||||
}
|
||||
|
||||
w.Walk(true, "", dirFi, excludePaths)
|
||||
w.Wait()
|
||||
}()
|
||||
return w
|
||||
}
|
||||
|
||||
// fastWalkFileOrDir is the main recursive implementation of fast walk
|
||||
// Walk is the main recursive implementation of fast walk.
|
||||
// Sends the file/dir and any contents to the channel so long as it passes the
|
||||
// include/exclude filter. If a dir, parses any excludeFilename found and updates
|
||||
// the excludePaths with its content before (parallel) recursing into contents
|
||||
// Also splits large directories into multiple goroutines.
|
||||
// Increments waitg.Add(1) for each new goroutine launched internally
|
||||
//
|
||||
// rootDir - Absolute path to the top of the repository working directory
|
||||
// workDir - Relative path inside the repository
|
||||
func fastWalkFileOrDir(isRoot bool, rootDir, workDir string, itemFi os.FileInfo, excludeFilename string,
|
||||
excludePaths []filepathfilter.Pattern, fiChan chan<- fastWalkInfo, waitg *sync.WaitGroup) {
|
||||
func (w *fastWalker) Walk(isRoot bool, workDir string, itemFi os.FileInfo,
|
||||
excludePaths []filepathfilter.Pattern) {
|
||||
|
||||
var fullPath string // Absolute path to the current file or dir
|
||||
var parentWorkDir string // Absolute path to the workDir inside the repository
|
||||
if isRoot {
|
||||
fullPath = rootDir
|
||||
fullPath = w.rootDir
|
||||
} else {
|
||||
parentWorkDir = filepath.Join(rootDir, workDir)
|
||||
parentWorkDir = filepath.Join(w.rootDir, workDir)
|
||||
fullPath = filepath.Join(parentWorkDir, itemFi.Name())
|
||||
}
|
||||
|
||||
@ -218,7 +222,7 @@ func fastWalkFileOrDir(isRoot bool, rootDir, workDir string, itemFi os.FileInfo,
|
||||
return
|
||||
}
|
||||
|
||||
fiChan <- fastWalkInfo{ParentDir: parentWorkDir, Info: itemFi}
|
||||
w.ch <- fastWalkInfo{ParentDir: parentWorkDir, Info: itemFi}
|
||||
|
||||
if !itemFi.IsDir() {
|
||||
// Nothing more to do if this is not a dir
|
||||
@ -230,12 +234,12 @@ func fastWalkFileOrDir(isRoot bool, rootDir, workDir string, itemFi os.FileInfo,
|
||||
childWorkDir = filepath.Join(workDir, itemFi.Name())
|
||||
}
|
||||
|
||||
if len(excludeFilename) > 0 {
|
||||
possibleExcludeFile := filepath.Join(fullPath, excludeFilename)
|
||||
if len(w.excludeFilename) > 0 {
|
||||
possibleExcludeFile := filepath.Join(fullPath, w.excludeFilename)
|
||||
var err error
|
||||
excludePaths, err = loadExcludeFilename(possibleExcludeFile, childWorkDir, excludePaths)
|
||||
if err != nil {
|
||||
fiChan <- fastWalkInfo{Err: err}
|
||||
w.ch <- fastWalkInfo{Err: err}
|
||||
}
|
||||
}
|
||||
|
||||
@ -245,30 +249,40 @@ func fastWalkFileOrDir(isRoot bool, rootDir, workDir string, itemFi os.FileInfo,
|
||||
// filepath.Walk as a bonus.
|
||||
df, err := os.Open(fullPath)
|
||||
if err != nil {
|
||||
fiChan <- fastWalkInfo{Err: err}
|
||||
w.ch <- fastWalkInfo{Err: err}
|
||||
return
|
||||
}
|
||||
defer df.Close()
|
||||
|
||||
// The number of items in a dir we process in each goroutine
|
||||
jobSize := 100
|
||||
|
||||
for children, err := df.Readdir(jobSize); err == nil; children, err = df.Readdir(jobSize) {
|
||||
// Parallelise all dirs, and chop large dirs into batches
|
||||
waitg.Add(1)
|
||||
go func(subitems []os.FileInfo) {
|
||||
w.walk(children, func(subitems []os.FileInfo) {
|
||||
for _, childFi := range subitems {
|
||||
fastWalkFileOrDir(false, rootDir, childWorkDir, childFi, excludeFilename, excludePaths, fiChan, waitg)
|
||||
w.Walk(false, childWorkDir, childFi, excludePaths)
|
||||
}
|
||||
waitg.Done()
|
||||
}(children)
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
df.Close()
|
||||
if err != nil && err != io.EOF {
|
||||
fiChan <- fastWalkInfo{Err: err}
|
||||
w.ch <- fastWalkInfo{Err: err}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *fastWalker) walk(children []os.FileInfo, fn func([]os.FileInfo)) {
|
||||
w.wg.Add(1)
|
||||
go func() {
|
||||
fn(children)
|
||||
w.wg.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
func (w *fastWalker) Wait() {
|
||||
w.wg.Wait()
|
||||
close(w.ch)
|
||||
}
|
||||
|
||||
// loadExcludeFilename reads the given file in gitignore format and returns a
|
||||
// revised array of exclude paths if there are any changes.
|
||||
// If any changes are made a copy of the array is taken so the original is not
|
||||
|
@ -15,13 +15,11 @@ import (
|
||||
|
||||
func TestCleanPathsCleansPaths(t *testing.T) {
|
||||
cleaned := CleanPaths("/foo/bar/,/foo/bar/baz", ",")
|
||||
|
||||
assert.Equal(t, []string{"/foo/bar", "/foo/bar/baz"}, cleaned)
|
||||
}
|
||||
|
||||
func TestCleanPathsReturnsNoResultsWhenGivenNoPaths(t *testing.T) {
|
||||
cleaned := CleanPaths("", ",")
|
||||
|
||||
assert.Empty(t, cleaned)
|
||||
}
|
||||
|
||||
@ -35,15 +33,14 @@ func TestFastWalkBasic(t *testing.T) {
|
||||
|
||||
expectedEntries := createFastWalkInputData(10, 160)
|
||||
|
||||
fchan := fastWalkWithExcludeFiles(expectedEntries[0], "", nil)
|
||||
gotEntries, gotErrors := collectFastWalkResults(fchan)
|
||||
walker := fastWalkWithExcludeFiles(expectedEntries[0], "")
|
||||
gotEntries, gotErrors := collectFastWalkResults(walker.ch)
|
||||
|
||||
assert.Empty(t, gotErrors)
|
||||
|
||||
sort.Strings(expectedEntries)
|
||||
sort.Strings(gotEntries)
|
||||
assert.Equal(t, expectedEntries, gotEntries)
|
||||
|
||||
}
|
||||
|
||||
func BenchmarkFastWalkGitRepoChannels(b *testing.B) {
|
||||
@ -229,7 +226,6 @@ func getFileMode(filename string) os.FileMode {
|
||||
}
|
||||
|
||||
func TestSetWriteFlag(t *testing.T) {
|
||||
|
||||
f, err := ioutil.TempFile("", "lfstestwriteflag")
|
||||
assert.Nil(t, err)
|
||||
filename := f.Name()
|
||||
@ -272,5 +268,4 @@ func TestSetWriteFlag(t *testing.T) {
|
||||
// should only add back user write
|
||||
assert.EqualValues(t, 0640, getFileMode(filename))
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user