git-lfs/tools/filetools.go

380 lines
11 KiB
Go
Raw Normal View History

// Package tools contains other helper functions too small to justify their own package
// NOTE: Subject to change, do not rely on this package from outside git-lfs source
package tools
import (
2016-10-31 15:37:21 +00:00
"bufio"
"encoding/hex"
"fmt"
"io"
"os"
"path"
"path/filepath"
"regexp"
"strings"
2016-10-31 15:39:52 +00:00
"sync"
)
var localDirSet = NewStringSetFromSlice([]string{".", "./", ".\\"})
// FileOrDirExists determines if a file/dir exists, returns IsDir() results too.
func FileOrDirExists(path string) (exists bool, isDir bool) {
fi, err := os.Stat(path)
if err != nil {
return false, false
} else {
return true, fi.IsDir()
}
}
// FileExists determines if a file (NOT dir) exists.
func FileExists(path string) bool {
ret, isDir := FileOrDirExists(path)
return ret && !isDir
}
// DirExists determines if a dir (NOT file) exists.
func DirExists(path string) bool {
ret, isDir := FileOrDirExists(path)
return ret && isDir
}
// FileExistsOfSize determines if a file exists and is of a specific size.
func FileExistsOfSize(path string, sz int64) bool {
fi, err := os.Stat(path)
if err != nil {
return false
}
return !fi.IsDir() && fi.Size() == sz
}
// ResolveSymlinks ensures that if the path supplied is a symlink, it is
// resolved to the actual concrete path
func ResolveSymlinks(path string) string {
if len(path) == 0 {
return path
}
if resolved, err := filepath.EvalSymlinks(path); err == nil {
return resolved
}
return path
}
// RenameFileCopyPermissions moves srcfile to destfile, replacing destfile if
// necessary and also copying the permissions of destfile if it already exists
func RenameFileCopyPermissions(srcfile, destfile string) error {
info, err := os.Stat(destfile)
if os.IsNotExist(err) {
// no original file
} else if err != nil {
return err
} else {
if err := os.Chmod(srcfile, info.Mode()); err != nil {
return fmt.Errorf("can't set filemode on file %q: %v", srcfile, err)
}
}
if err := os.Rename(srcfile, destfile); err != nil {
return fmt.Errorf("cannot replace %q with %q: %v", destfile, srcfile, err)
}
return nil
}
// CleanPaths splits the given `paths` argument by the delimiter argument, and
// then "cleans" that path according to the path.Clean function (see
// https://golang.org/pkg/path#Clean).
// Note always cleans to '/' path separators regardless of platform (git friendly)
func CleanPaths(paths, delim string) (cleaned []string) {
// If paths is an empty string, splitting it will yield [""], which will
// become the path ".". To avoid this, bail out if trimmed paths
// argument is empty.
if paths = strings.TrimSpace(paths); len(paths) == 0 {
return
}
for _, part := range strings.Split(paths, delim) {
part = strings.TrimSpace(part)
cleaned = append(cleaned, path.Clean(part))
}
return cleaned
}
// VerifyFileHash reads a file and verifies whether the SHA is correct
// Returns an error if there is a problem
func VerifyFileHash(oid, path string) error {
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
h := NewLfsContentHash()
_, err = io.Copy(h, f)
if err != nil {
return err
}
calcOid := hex.EncodeToString(h.Sum(nil))
if calcOid != oid {
return fmt.Errorf("File %q has an invalid hash %s, expected %s", path, calcOid, oid)
}
return nil
}
// FilenamePassesIncludeExcludeFilter returns whether a given filename passes the include / exclude path filters
// Only paths that are in includePaths and outside excludePaths are passed
// If includePaths is empty that filter always passes and the same with excludePaths
// Both path lists support wildcard matches
func FilenamePassesIncludeExcludeFilter(filename string, includePaths, excludePaths []string) bool {
if len(includePaths) == 0 && len(excludePaths) == 0 {
return true
}
if len(includePaths) > 0 {
matched := false
for _, inc := range includePaths {
matched = FileMatch(inc, filename)
if matched {
break
}
}
if !matched {
return false
}
}
if len(excludePaths) > 0 {
for _, ex := range excludePaths {
if FileMatch(ex, filename) {
return false
}
}
}
return true
}
// FileMatch is a revised version of filepath.Match which makes it behave more
// like gitignore
func FileMatch(pattern, name string) bool {
pattern = filepath.Clean(pattern)
name = filepath.Clean(name)
// Special case local dir, matches all (inc subpaths)
if _, local := localDirSet[pattern]; local {
return true
}
if matched, _ := filepath.Match(pattern, name); matched {
return true
}
// special case * when there are no path separators
// filepath.Match never allows * to match a path separator, which is correct
// for gitignore IF the pattern includes a path separator, but not otherwise
// So *.txt should match in any subdir, as should test*, but sub/*.txt would
// only match directly in the sub dir
// Don't need to test cross-platform separators as both cleaned above
if !strings.Contains(pattern, string(filepath.Separator)) &&
strings.Contains(pattern, "*") {
pattern = regexp.QuoteMeta(pattern)
// Match the whole of the base name but allow matching in folders if no path
basename := filepath.Base(name)
regpattern := fmt.Sprintf("^%s$", strings.Replace(pattern, "\\*", ".*", -1))
if regexp.MustCompile(regpattern).MatchString(basename) {
return true
}
}
// Also support ** with path separators
if strings.Contains(pattern, string(filepath.Separator)) && strings.Contains(pattern, "**") {
pattern = regexp.QuoteMeta(pattern)
regpattern := fmt.Sprintf("^%s$", strings.Replace(pattern, "\\*\\*", ".*", -1))
if regexp.MustCompile(regpattern).MatchString(name) {
return true
}
}
// Also support matching a parent directory without a wildcard
if strings.HasPrefix(name, pattern+string(filepath.Separator)) {
return true
}
return false
}
// Returned from FastWalk with parent directory context
// This is needed because FastWalk can provide paths out of order so the
// parent dir cannot be implied
type FastWalkInfo struct {
ParentDir string
Info os.FileInfo
}
// FastWalk is a more optimal implementation of filepath.Walk
// It differs in the following ways:
// * Provides a channel of information instead of using a callback func
// * Uses goroutines to parallelise large dirs and descent into subdirs
// * Does not provide sorted output; parents will always be before children but
// there are no other guarantees. Use parentDir in the FastWalkInfo struct to
// determine absolute path rather than tracking it yourself like filepath.Walk
// * Supports include / exclude filters
// Both dir and include/exclude paths can be relative or absolute, but they must
// all be of the same type. includePaths/excludePaths can be nil.
func FastWalk(dir string, includePaths, excludePaths []string) (<-chan FastWalkInfo, <-chan error) {
return FastWalkWithExcludeFiles(dir, "", includePaths, excludePaths)
}
// FastWalkWithExcludeFiles is like FastWalk but with the additional option to
// load any file named excludeFilename in any directory, and add its contents
// to the excludePaths list for that directory and children.
func FastWalkWithExcludeFiles(dir, excludeFilename string,
includePaths, excludePaths []string) (<-chan FastWalkInfo, <-chan error) {
fiChan := make(chan FastWalkInfo, 256)
errChan := make(chan error, 10)
2016-10-31 15:39:52 +00:00
go fastWalkFromRoot(dir, excludeFilename, includePaths, excludePaths, fiChan, errChan)
return fiChan, errChan
}
// FastWalkGitRepo behaves like FastWalkWithExcludeFiles, preconfigured to ignore
// the git repo itself (.git) and to load exclude patterns from .gitignore
func FastWalkGitRepo(dir string) (<-chan FastWalkInfo, <-chan error) {
2016-11-01 10:23:55 +00:00
// Ignore all git metadata including subrepos
excludePaths := []string{".git", filepath.Join("**", ".git")}
return FastWalkWithExcludeFiles(dir, ".gitignore", nil, excludePaths)
}
2016-10-31 15:39:52 +00:00
func fastWalkFromRoot(dir string, excludeFilename string,
includePaths, excludePaths []string, fiChan chan<- FastWalkInfo, errChan chan<- error) {
dirFi, err := os.Stat(dir)
if err != nil {
errChan <- err
return
}
// This waitgroup will be incremented for each nested goroutine
var waitg sync.WaitGroup
fastWalkFileOrDir(filepath.Dir(dir), dirFi, excludeFilename, includePaths, excludePaths, fiChan, errChan, &waitg)
2016-10-31 15:39:52 +00:00
waitg.Wait()
close(fiChan)
close(errChan)
}
// fastWalkFileOrDir is the main recursive implementation of fast walk
// Sends the file/dir and any contents to the channel so long as it passes the
// include/exclude filter. If a dir, parses any excludeFilename found and updates
// the excludePaths with its content before (parallel) recursing into contents
// Also splits large directories into multiple goroutines.
// Increments waitg.Add(1) for each new goroutine launched internally
func fastWalkFileOrDir(parentDir string, itemFi os.FileInfo, excludeFilename string,
2016-10-31 15:39:52 +00:00
includePaths, excludePaths []string, fiChan chan<- FastWalkInfo, errChan chan<- error,
waitg *sync.WaitGroup) {
fullPath := filepath.Join(parentDir, itemFi.Name())
2016-10-31 15:39:52 +00:00
if !FilenamePassesIncludeExcludeFilter(fullPath, includePaths, excludePaths) {
return
}
fiChan <- FastWalkInfo{ParentDir: parentDir, Info: itemFi}
if !itemFi.IsDir() {
// Nothing more to do if this is not a dir
return
}
if len(excludeFilename) > 0 {
possibleExcludeFile := filepath.Join(fullPath, excludeFilename)
if FileExists(possibleExcludeFile) {
var err error
excludePaths, err = loadExcludeFilename(possibleExcludeFile, fullPath, excludePaths)
if err != nil {
errChan <- err
}
}
}
// The absolute optimal way to scan would be File.Readdirnames but we
// still need the Stat() to know whether something is a dir, so use
// File.Readdir instead. Means we can provide os.FileInfo to callers like
// filepath.Walk as a bonus.
df, err := os.Open(fullPath)
if err != nil {
errChan <- err
return
}
2016-10-31 14:22:12 +00:00
defer df.Close()
2016-10-31 15:39:52 +00:00
jobSize := 100
for children, err := df.Readdir(jobSize); err == nil; children, err = df.Readdir(jobSize) {
2016-10-31 15:39:52 +00:00
// Parallelise all dirs, and chop large dirs into batches
waitg.Add(1)
go func(subitems []os.FileInfo) {
for _, childFi := range subitems {
fastWalkFileOrDir(fullPath, childFi, excludeFilename, includePaths, excludePaths, fiChan, errChan, waitg)
}
2016-10-31 15:39:52 +00:00
waitg.Done()
}(children)
}
2016-10-31 15:39:52 +00:00
if err != nil && err != io.EOF {
errChan <- err
}
2016-10-31 15:39:52 +00:00
}
// loadExcludeFilename reads the given file in gitignore format and returns a
// revised array of exclude paths if there are any changes.
// If any changes are made a copy of the array is taken so the original is not
// modified
func loadExcludeFilename(filename, parentDir string, excludePaths []string) ([]string, error) {
2016-10-31 15:37:21 +00:00
f, err := os.OpenFile(filename, os.O_RDONLY, 0644)
if err != nil {
return excludePaths, err
2016-10-31 15:37:21 +00:00
}
defer f.Close()
retPaths := excludePaths
modified := false
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
// Skip blanks, comments and negations (not supported right now)
if len(line) == 0 || strings.HasPrefix(line, "#") || strings.HasPrefix(line, "!") {
continue
}
if !modified {
// copy on write
retPaths = make([]string, len(excludePaths))
copy(retPaths, excludePaths)
modified = true
}
path := line
// Add pattern in context if exclude has separator, or no wildcard
// Allow for both styles of separator at this point
if strings.ContainsAny(path, "/\\") ||
!strings.Contains(path, "*") {
path = filepath.Join(parentDir, line)
}
2016-10-31 15:37:21 +00:00
retPaths = append(retPaths, path)
}
return retPaths, nil
2016-10-31 15:37:21 +00:00
}