Draft version of FastWalk; optimised replacement for filepath.Walk
This commit is contained in:
parent
3a7ef57fc3
commit
13a8af6dfe
@ -185,3 +185,107 @@ func FilenamePassesIncludeExcludeFilter(filename string, includePaths, excludePa
|
|||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returned from FastWalk with parent directory context
|
||||||
|
// This is needed because FastWalk can provide paths out of order so the
|
||||||
|
// parent dir cannot be implied
|
||||||
|
type FastWalkInfo struct {
|
||||||
|
ParentDir string
|
||||||
|
Info os.FileInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
// FastWalk is a more optimal implementation of filepath.Walk
|
||||||
|
// It differs in the following ways:
|
||||||
|
// * Provides a channel of information instead of using a callback func
|
||||||
|
// * Uses goroutines to parallelise large dirs and descent into subdirs
|
||||||
|
// * Does not provide sorted output; parents will always be before children but
|
||||||
|
// there are no other guarantees. Use parentDir in the FastWalkInfo struct to
|
||||||
|
// determine absolute path rather than tracking it yourself like filepath.Walk
|
||||||
|
// * Supports include / exclude filters
|
||||||
|
// Both dir and include/exclude paths can be relative or absolute, but they must
|
||||||
|
// all be of the same type. includePaths/excludePaths can be nil.
|
||||||
|
func FastWalk(dir string, includePaths, excludePaths []string) (<-chan FastWalkInfo, <-chan error) {
|
||||||
|
return FastWalkWithExcludeFiles(dir, "", includePaths, excludePaths)
|
||||||
|
}
|
||||||
|
|
||||||
|
// FastWalkWithExcludeFiles is like FastWalk but with the additional option to
|
||||||
|
// load any file named excludeFilename in any directory, and add its contents
|
||||||
|
// to the excludePaths list for that directory and children.
|
||||||
|
func FastWalkWithExcludeFiles(dir, excludeFilename string,
|
||||||
|
includePaths, excludePaths []string) (<-chan FastWalkInfo, <-chan error) {
|
||||||
|
fiChan := make(chan FastWalkInfo, 256)
|
||||||
|
errChan := make(chan error, 10)
|
||||||
|
|
||||||
|
dirFi, err := os.Stat(dir)
|
||||||
|
if err != nil {
|
||||||
|
errChan <- err
|
||||||
|
return fiChan, errChan
|
||||||
|
}
|
||||||
|
|
||||||
|
go fastWalkItem("", dirFi, excludeFilename, includePaths, excludePaths, fiChan, errChan)
|
||||||
|
|
||||||
|
return fiChan, errChan
|
||||||
|
}
|
||||||
|
|
||||||
|
// FastWalkGitRepo behaves like FastWalkWithExcludeFiles, preconfigured to ignore
|
||||||
|
// the git repo itself (.git) and to load exclude patterns from .gitignore
|
||||||
|
func FastWalkGitRepo(dir string) (<-chan FastWalkInfo, <-chan error) {
|
||||||
|
excludePaths := []string{".git"}
|
||||||
|
return FastWalkWithExcludeFiles(dir, ".gitignore", nil, excludePaths)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main recursive implementation of fast walk
|
||||||
|
func fastWalkItem(parentDir string, itemFi os.FileInfo, excludeFilename string,
|
||||||
|
includePaths, excludePaths []string, fiChan chan<- FastWalkInfo, errChan chan<- error) {
|
||||||
|
|
||||||
|
fullPath := filepath.Join(parentDir, itemFi.Name())
|
||||||
|
if !FilenamePassesIncludeExcludeFilter(fullPath, includePaths, excludePaths) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fiChan <- FastWalkInfo{ParentDir: parentDir, Info: itemFi}
|
||||||
|
|
||||||
|
if !itemFi.IsDir() {
|
||||||
|
// Nothing more to do if this is not a dir
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(excludeFilename) > 0 {
|
||||||
|
possibleExcludeFile := filepath.Join(fullPath, excludeFilename)
|
||||||
|
if FileExists(possibleExcludeFile) {
|
||||||
|
excludePaths = loadExcludeFilename(possibleExcludeFile, excludePaths)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The absolute optimal way to scan would be File.Readdirnames but we
|
||||||
|
// still need the Stat() to know whether something is a dir, so use
|
||||||
|
// File.Readdir instead. Means we can provide os.FileInfo to callers like
|
||||||
|
// filepath.Walk as a bonus.
|
||||||
|
df, err := os.Open(fullPath)
|
||||||
|
if err != nil {
|
||||||
|
errChan <- err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
jobSize := 256
|
||||||
|
for children, err := df.Readdir(jobSize); err == nil; children, err = df.Readdir(jobSize) {
|
||||||
|
// Parallelise all dirs, and chop large dirs into batches of 256
|
||||||
|
go func() {
|
||||||
|
for _, childFi := range children {
|
||||||
|
fastWalkItem(fullPath, childFi, excludeFilename, includePaths, excludePaths, fiChan, errChan)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
}
|
||||||
|
if err != io.EOF {
|
||||||
|
errChan <- err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// loadExcludeFilename reads the given file in gitignore format and returns a
|
||||||
|
// revised array of exclude paths if there are any changes.
|
||||||
|
// If any changes are made a copy of the array is taken so the original is not
|
||||||
|
// modified
|
||||||
|
func loadExcludeFilename(filename string, excludePaths []string) []string {
|
||||||
|
// TODO
|
||||||
|
return excludePaths
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user