Draft version of FastWalk; optimised replacement for filepath.Walk
This commit is contained in:
parent
3a7ef57fc3
commit
13a8af6dfe
@ -185,3 +185,107 @@ func FilenamePassesIncludeExcludeFilter(filename string, includePaths, excludePa
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// Returned from FastWalk with parent directory context
|
||||
// This is needed because FastWalk can provide paths out of order so the
|
||||
// parent dir cannot be implied
|
||||
type FastWalkInfo struct {
|
||||
ParentDir string
|
||||
Info os.FileInfo
|
||||
}
|
||||
|
||||
// FastWalk is a more optimal implementation of filepath.Walk
|
||||
// It differs in the following ways:
|
||||
// * Provides a channel of information instead of using a callback func
|
||||
// * Uses goroutines to parallelise large dirs and descent into subdirs
|
||||
// * Does not provide sorted output; parents will always be before children but
|
||||
// there are no other guarantees. Use parentDir in the FastWalkInfo struct to
|
||||
// determine absolute path rather than tracking it yourself like filepath.Walk
|
||||
// * Supports include / exclude filters
|
||||
// Both dir and include/exclude paths can be relative or absolute, but they must
|
||||
// all be of the same type. includePaths/excludePaths can be nil.
|
||||
func FastWalk(dir string, includePaths, excludePaths []string) (<-chan FastWalkInfo, <-chan error) {
|
||||
return FastWalkWithExcludeFiles(dir, "", includePaths, excludePaths)
|
||||
}
|
||||
|
||||
// FastWalkWithExcludeFiles is like FastWalk but with the additional option to
|
||||
// load any file named excludeFilename in any directory, and add its contents
|
||||
// to the excludePaths list for that directory and children.
|
||||
func FastWalkWithExcludeFiles(dir, excludeFilename string,
|
||||
includePaths, excludePaths []string) (<-chan FastWalkInfo, <-chan error) {
|
||||
fiChan := make(chan FastWalkInfo, 256)
|
||||
errChan := make(chan error, 10)
|
||||
|
||||
dirFi, err := os.Stat(dir)
|
||||
if err != nil {
|
||||
errChan <- err
|
||||
return fiChan, errChan
|
||||
}
|
||||
|
||||
go fastWalkItem("", dirFi, excludeFilename, includePaths, excludePaths, fiChan, errChan)
|
||||
|
||||
return fiChan, errChan
|
||||
}
|
||||
|
||||
// FastWalkGitRepo behaves like FastWalkWithExcludeFiles, preconfigured to ignore
|
||||
// the git repo itself (.git) and to load exclude patterns from .gitignore
|
||||
func FastWalkGitRepo(dir string) (<-chan FastWalkInfo, <-chan error) {
|
||||
excludePaths := []string{".git"}
|
||||
return FastWalkWithExcludeFiles(dir, ".gitignore", nil, excludePaths)
|
||||
}
|
||||
|
||||
// Main recursive implementation of fast walk
|
||||
func fastWalkItem(parentDir string, itemFi os.FileInfo, excludeFilename string,
|
||||
includePaths, excludePaths []string, fiChan chan<- FastWalkInfo, errChan chan<- error) {
|
||||
|
||||
fullPath := filepath.Join(parentDir, itemFi.Name())
|
||||
if !FilenamePassesIncludeExcludeFilter(fullPath, includePaths, excludePaths) {
|
||||
return
|
||||
}
|
||||
|
||||
fiChan <- FastWalkInfo{ParentDir: parentDir, Info: itemFi}
|
||||
|
||||
if !itemFi.IsDir() {
|
||||
// Nothing more to do if this is not a dir
|
||||
return
|
||||
}
|
||||
|
||||
if len(excludeFilename) > 0 {
|
||||
possibleExcludeFile := filepath.Join(fullPath, excludeFilename)
|
||||
if FileExists(possibleExcludeFile) {
|
||||
excludePaths = loadExcludeFilename(possibleExcludeFile, excludePaths)
|
||||
}
|
||||
}
|
||||
|
||||
// The absolute optimal way to scan would be File.Readdirnames but we
|
||||
// still need the Stat() to know whether something is a dir, so use
|
||||
// File.Readdir instead. Means we can provide os.FileInfo to callers like
|
||||
// filepath.Walk as a bonus.
|
||||
df, err := os.Open(fullPath)
|
||||
if err != nil {
|
||||
errChan <- err
|
||||
return
|
||||
}
|
||||
jobSize := 256
|
||||
for children, err := df.Readdir(jobSize); err == nil; children, err = df.Readdir(jobSize) {
|
||||
// Parallelise all dirs, and chop large dirs into batches of 256
|
||||
go func() {
|
||||
for _, childFi := range children {
|
||||
fastWalkItem(fullPath, childFi, excludeFilename, includePaths, excludePaths, fiChan, errChan)
|
||||
}
|
||||
}()
|
||||
|
||||
}
|
||||
if err != io.EOF {
|
||||
errChan <- err
|
||||
}
|
||||
}
|
||||
|
||||
// loadExcludeFilename reads the given file in gitignore format and returns a
|
||||
// revised array of exclude paths if there are any changes.
|
||||
// If any changes are made a copy of the array is taken so the original is not
|
||||
// modified
|
||||
func loadExcludeFilename(filename string, excludePaths []string) []string {
|
||||
// TODO
|
||||
return excludePaths
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user