2016-05-18 10:43:42 +00:00
|
|
|
// Package tools contains other helper functions too small to justify their own package
|
|
|
|
// NOTE: Subject to change, do not rely on this package from outside git-lfs source
|
2016-05-13 16:38:06 +00:00
|
|
|
package tools
|
|
|
|
|
|
|
|
import (
|
2016-10-31 15:37:21 +00:00
|
|
|
"bufio"
|
2016-07-12 10:43:54 +00:00
|
|
|
"encoding/hex"
|
2016-05-25 10:28:24 +00:00
|
|
|
"fmt"
|
2016-07-12 10:43:54 +00:00
|
|
|
"io"
|
2016-05-13 16:38:06 +00:00
|
|
|
"os"
|
2016-07-15 13:20:53 +00:00
|
|
|
"path"
|
2016-05-13 16:38:06 +00:00
|
|
|
"path/filepath"
|
2016-05-31 15:48:09 +00:00
|
|
|
"strings"
|
2016-10-31 15:39:52 +00:00
|
|
|
"sync"
|
2016-05-13 16:38:06 +00:00
|
|
|
|
2016-11-21 23:53:19 +00:00
|
|
|
"github.com/git-lfs/git-lfs/filepathfilter"
|
|
|
|
)
|
2016-10-31 12:45:12 +00:00
|
|
|
|
2016-05-13 16:38:06 +00:00
|
|
|
// FileOrDirExists determines if a file/dir exists, returns IsDir() results too.
|
|
|
|
func FileOrDirExists(path string) (exists bool, isDir bool) {
|
|
|
|
fi, err := os.Stat(path)
|
|
|
|
if err != nil {
|
|
|
|
return false, false
|
|
|
|
} else {
|
|
|
|
return true, fi.IsDir()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// FileExists determines if a file (NOT dir) exists.
|
|
|
|
func FileExists(path string) bool {
|
|
|
|
ret, isDir := FileOrDirExists(path)
|
|
|
|
return ret && !isDir
|
|
|
|
}
|
|
|
|
|
|
|
|
// DirExists determines if a dir (NOT file) exists.
|
|
|
|
func DirExists(path string) bool {
|
|
|
|
ret, isDir := FileOrDirExists(path)
|
|
|
|
return ret && isDir
|
|
|
|
}
|
|
|
|
|
|
|
|
// FileExistsOfSize determines if a file exists and is of a specific size.
|
|
|
|
func FileExistsOfSize(path string, sz int64) bool {
|
|
|
|
fi, err := os.Stat(path)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return !fi.IsDir() && fi.Size() == sz
|
|
|
|
}
|
|
|
|
|
|
|
|
// ResolveSymlinks ensures that if the path supplied is a symlink, it is
|
|
|
|
// resolved to the actual concrete path
|
|
|
|
func ResolveSymlinks(path string) string {
|
|
|
|
if len(path) == 0 {
|
|
|
|
return path
|
|
|
|
}
|
|
|
|
|
|
|
|
if resolved, err := filepath.EvalSymlinks(path); err == nil {
|
|
|
|
return resolved
|
|
|
|
}
|
|
|
|
return path
|
|
|
|
}
|
2016-05-25 10:28:24 +00:00
|
|
|
|
|
|
|
// RenameFileCopyPermissions moves srcfile to destfile, replacing destfile if
|
|
|
|
// necessary and also copying the permissions of destfile if it already exists
|
|
|
|
func RenameFileCopyPermissions(srcfile, destfile string) error {
|
|
|
|
info, err := os.Stat(destfile)
|
|
|
|
if os.IsNotExist(err) {
|
|
|
|
// no original file
|
|
|
|
} else if err != nil {
|
|
|
|
return err
|
|
|
|
} else {
|
|
|
|
if err := os.Chmod(srcfile, info.Mode()); err != nil {
|
|
|
|
return fmt.Errorf("can't set filemode on file %q: %v", srcfile, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := os.Rename(srcfile, destfile); err != nil {
|
|
|
|
return fmt.Errorf("cannot replace %q with %q: %v", destfile, srcfile, err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2016-06-02 09:04:40 +00:00
|
|
|
|
2016-05-31 15:48:09 +00:00
|
|
|
// CleanPaths splits the given `paths` argument by the delimiter argument, and
|
2016-07-15 13:20:53 +00:00
|
|
|
// then "cleans" that path according to the path.Clean function (see
|
|
|
|
// https://golang.org/pkg/path#Clean).
|
|
|
|
// Note always cleans to '/' path separators regardless of platform (git friendly)
|
2016-05-31 15:48:09 +00:00
|
|
|
func CleanPaths(paths, delim string) (cleaned []string) {
|
|
|
|
// If paths is an empty string, splitting it will yield [""], which will
|
2016-07-15 13:20:53 +00:00
|
|
|
// become the path ".". To avoid this, bail out if trimmed paths
|
2016-05-31 15:48:09 +00:00
|
|
|
// argument is empty.
|
|
|
|
if paths = strings.TrimSpace(paths); len(paths) == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, part := range strings.Split(paths, delim) {
|
|
|
|
part = strings.TrimSpace(part)
|
|
|
|
|
2016-07-15 13:20:53 +00:00
|
|
|
cleaned = append(cleaned, path.Clean(part))
|
2016-05-31 15:48:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return cleaned
|
|
|
|
}
|
|
|
|
|
2016-07-12 10:43:54 +00:00
|
|
|
// VerifyFileHash reads a file and verifies whether the SHA is correct
|
|
|
|
// Returns an error if there is a problem
|
|
|
|
func VerifyFileHash(oid, path string) error {
|
|
|
|
f, err := os.Open(path)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
|
|
|
|
h := NewLfsContentHash()
|
|
|
|
_, err = io.Copy(h, f)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
calcOid := hex.EncodeToString(h.Sum(nil))
|
|
|
|
if calcOid != oid {
|
|
|
|
return fmt.Errorf("File %q has an invalid hash %s, expected %s", path, calcOid, oid)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
2016-10-31 12:45:12 +00:00
|
|
|
|
2016-10-31 12:46:49 +00:00
|
|
|
// Returned from FastWalk with parent directory context
|
|
|
|
// This is needed because FastWalk can provide paths out of order so the
|
|
|
|
// parent dir cannot be implied
|
|
|
|
type FastWalkInfo struct {
|
|
|
|
ParentDir string
|
|
|
|
Info os.FileInfo
|
|
|
|
}
|
|
|
|
|
2016-11-08 09:58:07 +00:00
|
|
|
// fastWalkWithExcludeFiles walks the contents of a dir, respecting
|
|
|
|
// include/exclude patterns and also loading new exlude patterns from files
|
|
|
|
// named excludeFilename in directories walked
|
|
|
|
func fastWalkWithExcludeFiles(dir, excludeFilename string,
|
2016-11-21 23:53:19 +00:00
|
|
|
excludePaths []filepathfilter.Pattern) (<-chan FastWalkInfo, <-chan error) {
|
2016-10-31 12:46:49 +00:00
|
|
|
fiChan := make(chan FastWalkInfo, 256)
|
|
|
|
errChan := make(chan error, 10)
|
|
|
|
|
2016-11-21 23:53:19 +00:00
|
|
|
go fastWalkFromRoot(dir, excludeFilename, excludePaths, fiChan, errChan)
|
2016-10-31 12:46:49 +00:00
|
|
|
|
|
|
|
return fiChan, errChan
|
|
|
|
}
|
|
|
|
|
2016-11-08 09:58:07 +00:00
|
|
|
// FastWalkGitRepo is a more optimal implementation of filepath.Walk for a Git repo
|
|
|
|
// It differs in the following ways:
|
|
|
|
// * Provides a channel of information instead of using a callback func
|
|
|
|
// * Uses goroutines to parallelise large dirs and descent into subdirs
|
|
|
|
// * Does not provide sorted output; parents will always be before children but
|
|
|
|
// there are no other guarantees. Use parentDir in the FastWalkInfo struct to
|
|
|
|
// determine absolute path rather than tracking it yourself like filepath.Walk
|
|
|
|
// * Automatically ignores any .git directories
|
|
|
|
// * Respects .gitignore contents and skips ignored files/dirs
|
2016-10-31 12:46:49 +00:00
|
|
|
func FastWalkGitRepo(dir string) (<-chan FastWalkInfo, <-chan error) {
|
2016-11-01 10:23:55 +00:00
|
|
|
// Ignore all git metadata including subrepos
|
2016-11-21 23:53:19 +00:00
|
|
|
excludePaths := []filepathfilter.Pattern{
|
|
|
|
filepathfilter.NewPattern(".git"),
|
|
|
|
filepathfilter.NewPattern(filepath.Join("**", ".git")),
|
|
|
|
}
|
|
|
|
return fastWalkWithExcludeFiles(dir, ".gitignore", excludePaths)
|
2016-10-31 12:46:49 +00:00
|
|
|
}
|
|
|
|
|
2016-10-31 15:39:52 +00:00
|
|
|
func fastWalkFromRoot(dir string, excludeFilename string,
|
2016-11-21 23:53:19 +00:00
|
|
|
excludePaths []filepathfilter.Pattern, fiChan chan<- FastWalkInfo, errChan chan<- error) {
|
2016-10-31 15:39:52 +00:00
|
|
|
|
|
|
|
dirFi, err := os.Stat(dir)
|
|
|
|
if err != nil {
|
|
|
|
errChan <- err
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// This waitgroup will be incremented for each nested goroutine
|
|
|
|
var waitg sync.WaitGroup
|
|
|
|
|
2016-11-21 23:53:19 +00:00
|
|
|
fastWalkFileOrDir(filepath.Dir(dir), dirFi, excludeFilename, excludePaths, fiChan, errChan, &waitg)
|
2016-10-31 15:39:52 +00:00
|
|
|
|
|
|
|
waitg.Wait()
|
|
|
|
close(fiChan)
|
|
|
|
close(errChan)
|
|
|
|
}
|
|
|
|
|
2016-11-08 09:48:29 +00:00
|
|
|
// fastWalkFileOrDir is the main recursive implementation of fast walk
|
|
|
|
// Sends the file/dir and any contents to the channel so long as it passes the
|
|
|
|
// include/exclude filter. If a dir, parses any excludeFilename found and updates
|
|
|
|
// the excludePaths with its content before (parallel) recursing into contents
|
|
|
|
// Also splits large directories into multiple goroutines.
|
|
|
|
// Increments waitg.Add(1) for each new goroutine launched internally
|
|
|
|
func fastWalkFileOrDir(parentDir string, itemFi os.FileInfo, excludeFilename string,
|
2016-11-21 23:53:19 +00:00
|
|
|
excludePaths []filepathfilter.Pattern, fiChan chan<- FastWalkInfo, errChan chan<- error,
|
2016-10-31 15:39:52 +00:00
|
|
|
waitg *sync.WaitGroup) {
|
2016-10-31 12:46:49 +00:00
|
|
|
|
|
|
|
fullPath := filepath.Join(parentDir, itemFi.Name())
|
2016-10-31 15:39:52 +00:00
|
|
|
|
2016-11-21 23:53:19 +00:00
|
|
|
if !filepathfilter.NewFromPatterns(nil, excludePaths).Allows(fullPath) {
|
2016-10-31 12:46:49 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
fiChan <- FastWalkInfo{ParentDir: parentDir, Info: itemFi}
|
|
|
|
|
|
|
|
if !itemFi.IsDir() {
|
|
|
|
// Nothing more to do if this is not a dir
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(excludeFilename) > 0 {
|
|
|
|
possibleExcludeFile := filepath.Join(fullPath, excludeFilename)
|
|
|
|
if FileExists(possibleExcludeFile) {
|
2016-11-01 10:24:55 +00:00
|
|
|
var err error
|
|
|
|
excludePaths, err = loadExcludeFilename(possibleExcludeFile, fullPath, excludePaths)
|
|
|
|
if err != nil {
|
|
|
|
errChan <- err
|
|
|
|
}
|
2016-10-31 12:46:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// The absolute optimal way to scan would be File.Readdirnames but we
|
|
|
|
// still need the Stat() to know whether something is a dir, so use
|
|
|
|
// File.Readdir instead. Means we can provide os.FileInfo to callers like
|
|
|
|
// filepath.Walk as a bonus.
|
|
|
|
df, err := os.Open(fullPath)
|
|
|
|
if err != nil {
|
|
|
|
errChan <- err
|
|
|
|
return
|
|
|
|
}
|
2016-10-31 14:22:12 +00:00
|
|
|
defer df.Close()
|
2016-11-21 23:53:19 +00:00
|
|
|
|
2016-11-08 09:57:39 +00:00
|
|
|
// The number of items in a dir we process in each goroutine
|
2016-10-31 15:39:52 +00:00
|
|
|
jobSize := 100
|
2016-10-31 12:46:49 +00:00
|
|
|
for children, err := df.Readdir(jobSize); err == nil; children, err = df.Readdir(jobSize) {
|
2016-10-31 15:39:52 +00:00
|
|
|
// Parallelise all dirs, and chop large dirs into batches
|
|
|
|
waitg.Add(1)
|
|
|
|
go func(subitems []os.FileInfo) {
|
|
|
|
for _, childFi := range subitems {
|
2016-11-21 23:53:19 +00:00
|
|
|
fastWalkFileOrDir(fullPath, childFi, excludeFilename, excludePaths, fiChan, errChan, waitg)
|
2016-10-31 12:46:49 +00:00
|
|
|
}
|
2016-10-31 15:39:52 +00:00
|
|
|
waitg.Done()
|
|
|
|
}(children)
|
2016-10-31 12:46:49 +00:00
|
|
|
|
|
|
|
}
|
2016-10-31 15:39:52 +00:00
|
|
|
if err != nil && err != io.EOF {
|
2016-10-31 12:46:49 +00:00
|
|
|
errChan <- err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// loadExcludeFilename reads the given file in gitignore format and returns a
|
|
|
|
// revised array of exclude paths if there are any changes.
|
|
|
|
// If any changes are made a copy of the array is taken so the original is not
|
|
|
|
// modified
|
2016-11-21 23:53:19 +00:00
|
|
|
func loadExcludeFilename(filename, parentDir string, excludePaths []filepathfilter.Pattern) ([]filepathfilter.Pattern, error) {
|
2016-10-31 15:37:21 +00:00
|
|
|
f, err := os.OpenFile(filename, os.O_RDONLY, 0644)
|
|
|
|
if err != nil {
|
2016-11-01 10:24:55 +00:00
|
|
|
return excludePaths, err
|
2016-10-31 15:37:21 +00:00
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
|
|
|
|
retPaths := excludePaths
|
|
|
|
modified := false
|
|
|
|
|
|
|
|
scanner := bufio.NewScanner(f)
|
|
|
|
for scanner.Scan() {
|
|
|
|
line := strings.TrimSpace(scanner.Text())
|
|
|
|
// Skip blanks, comments and negations (not supported right now)
|
|
|
|
if len(line) == 0 || strings.HasPrefix(line, "#") || strings.HasPrefix(line, "!") {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if !modified {
|
|
|
|
// copy on write
|
2016-11-21 23:53:19 +00:00
|
|
|
retPaths = make([]filepathfilter.Pattern, len(excludePaths))
|
2016-10-31 15:37:21 +00:00
|
|
|
copy(retPaths, excludePaths)
|
|
|
|
modified = true
|
|
|
|
}
|
|
|
|
|
2016-11-01 12:18:38 +00:00
|
|
|
path := line
|
|
|
|
// Add pattern in context if exclude has separator, or no wildcard
|
|
|
|
// Allow for both styles of separator at this point
|
|
|
|
if strings.ContainsAny(path, "/\\") ||
|
|
|
|
!strings.Contains(path, "*") {
|
|
|
|
path = filepath.Join(parentDir, line)
|
|
|
|
}
|
2016-11-21 23:53:19 +00:00
|
|
|
retPaths = append(retPaths, filepathfilter.NewPattern(path))
|
2016-10-31 15:37:21 +00:00
|
|
|
}
|
|
|
|
|
2016-11-01 10:24:55 +00:00
|
|
|
return retPaths, nil
|
2016-10-31 12:46:49 +00:00
|
|
|
}
|