lfs: find invalid pointers
In the future, we'll want to support detecting various problems with pointers. These fall into two types: pointers which are non-canonical and files which should be pointers but are not. Our existing scanning functions are not well suited to this, unfortunately, so we add some additional functions. We first scan all of the commits in the range we want and then, having found their object IDs, call git ls-tree to enumerate each item in its corresponding root tree. We accumulate the patterns in every found .gitattributes file, and we keep track of every other file we process, checking small files for being a pointer. Once we've processed the entire tree, we compute the set of patterns for the .gitattributes file and check each file against it. If the file is a pointer, we emit the pointer to our callback, and if it is not a pointer but matches the patterns, then we emit an error indicating that it should have been a pointer.
This commit is contained in:
parent
6bfbde868a
commit
608bc8d53e
@ -159,6 +159,19 @@ func (s *GitScanner) ScanRef(ref string, cb GitScannerFoundPointer) error {
|
|||||||
return scanLeftRightToChan(s, callback, ref, "", s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
|
return scanLeftRightToChan(s, callback, ref, "", s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ScanRefByTree scans through all trees in the current ref.
|
||||||
|
func (s *GitScanner) ScanRefByTree(ref string, cb GitScannerFoundPointer) error {
|
||||||
|
callback, err := firstGitScannerCallback(cb, s.FoundPointer)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := s.opts(ScanRefsMode)
|
||||||
|
opts.SkipDeletedBlobs = true
|
||||||
|
opts.CommitsOnly = true
|
||||||
|
return scanRefsByTree(s, callback, []string{ref}, []string{}, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
|
||||||
|
}
|
||||||
|
|
||||||
// ScanAll scans through all objects in the git repository.
|
// ScanAll scans through all objects in the git repository.
|
||||||
func (s *GitScanner) ScanAll(cb GitScannerFoundPointer) error {
|
func (s *GitScanner) ScanAll(cb GitScannerFoundPointer) error {
|
||||||
callback, err := firstGitScannerCallback(cb, s.FoundPointer)
|
callback, err := firstGitScannerCallback(cb, s.FoundPointer)
|
||||||
@ -257,6 +270,7 @@ type ScanRefsOptions struct {
|
|||||||
ScanMode ScanningMode
|
ScanMode ScanningMode
|
||||||
RemoteName string
|
RemoteName string
|
||||||
SkipDeletedBlobs bool
|
SkipDeletedBlobs bool
|
||||||
|
CommitsOnly bool
|
||||||
skippedRefs []string
|
skippedRefs []string
|
||||||
nameMap map[string]string
|
nameMap map[string]string
|
||||||
mutex *sync.Mutex
|
mutex *sync.Mutex
|
||||||
|
@ -2,6 +2,7 @@ package lfs
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/hex"
|
"encoding/hex"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/git-lfs/git-lfs/config"
|
"github.com/git-lfs/git-lfs/config"
|
||||||
"github.com/git-lfs/git-lfs/git"
|
"github.com/git-lfs/git-lfs/git"
|
||||||
@ -105,6 +106,45 @@ func scanMultiLeftRightToChan(scanner *GitScanner, pointerCb GitScannerFoundPoin
|
|||||||
return scanRefsToChan(scanner, pointerCb, []string{refLeft}, bases, gitEnv, osEnv, opt)
|
return scanRefsToChan(scanner, pointerCb, []string{refLeft}, bases, gitEnv, osEnv, opt)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// scanRefsByTree scans through all commits reachable by refs contained in
|
||||||
|
// "include" and not reachable by any refs included in "exclude" and invokes
|
||||||
|
// the provided callback for each pointer file, valid or invalid, that it finds.
|
||||||
|
// Reports unique oids once only, not multiple times if >1 file uses the same content
|
||||||
|
func scanRefsByTree(scanner *GitScanner, pointerCb GitScannerFoundPointer, include, exclude []string, gitEnv, osEnv config.Environment, opt *ScanRefsOptions) error {
|
||||||
|
if opt == nil {
|
||||||
|
panic("no scan ref options")
|
||||||
|
}
|
||||||
|
|
||||||
|
revs, err := revListShas(include, exclude, opt)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
errchan := make(chan error, 20) // multiple errors possible
|
||||||
|
wg := &sync.WaitGroup{}
|
||||||
|
|
||||||
|
for r := range revs.Results {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(rev string) {
|
||||||
|
defer wg.Done()
|
||||||
|
err := runScanTreeForPointers(pointerCb, rev, gitEnv, osEnv)
|
||||||
|
if err != nil {
|
||||||
|
errchan <- err
|
||||||
|
}
|
||||||
|
}(r)
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
close(errchan)
|
||||||
|
for err := range errchan {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return revs.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
// revListShas uses git rev-list to return the list of object sha1s
|
// revListShas uses git rev-list to return the list of object sha1s
|
||||||
// for the given ref. If all is true, ref is ignored. It returns a
|
// for the given ref. If all is true, ref is ignored. It returns a
|
||||||
// channel from which sha1 strings can be read.
|
// channel from which sha1 strings can be read.
|
||||||
@ -116,6 +156,7 @@ func revListShas(include, exclude []string, opt *ScanRefsOptions) (*StringChanne
|
|||||||
SkippedRefs: opt.skippedRefs,
|
SkippedRefs: opt.skippedRefs,
|
||||||
Mutex: opt.mutex,
|
Mutex: opt.mutex,
|
||||||
Names: opt.nameMap,
|
Names: opt.nameMap,
|
||||||
|
CommitsOnly: opt.CommitsOnly,
|
||||||
})
|
})
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -3,10 +3,14 @@ package lfs
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"path"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
"github.com/git-lfs/git-lfs/config"
|
"github.com/git-lfs/git-lfs/config"
|
||||||
|
"github.com/git-lfs/git-lfs/errors"
|
||||||
"github.com/git-lfs/git-lfs/filepathfilter"
|
"github.com/git-lfs/git-lfs/filepathfilter"
|
||||||
"github.com/git-lfs/git-lfs/git"
|
"github.com/git-lfs/git-lfs/git"
|
||||||
|
"github.com/git-lfs/git-lfs/git/gitattr"
|
||||||
)
|
)
|
||||||
|
|
||||||
func runScanTree(cb GitScannerFoundPointer, ref string, filter *filepathfilter.Filter, gitEnv, osEnv config.Environment) error {
|
func runScanTree(cb GitScannerFoundPointer, ref string, filter *filepathfilter.Filter, gitEnv, osEnv config.Environment) error {
|
||||||
@ -120,3 +124,115 @@ func lsTreeBlobs(ref string, predicate func(*git.TreeBlob) bool) (*TreeBlobChann
|
|||||||
|
|
||||||
return NewTreeBlobChannelWrapper(blobs, errchan), nil
|
return NewTreeBlobChannelWrapper(blobs, errchan), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func catFileBatchTreeForPointers(treeblobs *TreeBlobChannelWrapper, gitEnv, osEnv config.Environment) (map[string]*WrappedPointer, *filepathfilter.Filter, error) {
|
||||||
|
pscanner, err := NewPointerScanner(gitEnv, osEnv)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
oscanner, err := git.NewObjectScanner(gitEnv, osEnv)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
pointers := make(map[string]*WrappedPointer)
|
||||||
|
|
||||||
|
paths := make([]git.AttributePath, 0)
|
||||||
|
processor := gitattr.NewMacroProcessor()
|
||||||
|
|
||||||
|
hasNext := true
|
||||||
|
for t := range treeblobs.Results {
|
||||||
|
if path.Base(t.Filename) == ".gitattributes" {
|
||||||
|
hasNext = oscanner.Scan(t.Oid)
|
||||||
|
|
||||||
|
if rdr := oscanner.Contents(); rdr != nil {
|
||||||
|
paths = append(paths, git.AttrPathsFromReader(
|
||||||
|
processor,
|
||||||
|
t.Filename,
|
||||||
|
"",
|
||||||
|
rdr,
|
||||||
|
t.Filename == ".gitattributes", // Read macros from the top-level attributes
|
||||||
|
)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := oscanner.Err(); err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
} else if t.Size < blobSizeCutoff {
|
||||||
|
hasNext = pscanner.Scan(t.Oid)
|
||||||
|
|
||||||
|
// It's intentional that we insert nil for
|
||||||
|
// non-pointers; we want to keep track of them
|
||||||
|
// as well as pointers.
|
||||||
|
p := pscanner.Pointer()
|
||||||
|
if p != nil {
|
||||||
|
p.Name = t.Filename
|
||||||
|
}
|
||||||
|
pointers[t.Filename] = p
|
||||||
|
|
||||||
|
if err := pscanner.Err(); err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pointers[t.Filename] = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if !hasNext {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the scanner quit early, we may still have treeblobs to
|
||||||
|
// read, so waiting for it to close will cause a deadlock.
|
||||||
|
if hasNext {
|
||||||
|
// Deal with nested error from incoming treeblobs
|
||||||
|
err := treeblobs.Wait()
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = pscanner.Close(); err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
if err = oscanner.Close(); err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
patterns := make([]filepathfilter.Pattern, 0, len(paths))
|
||||||
|
for _, path := range paths {
|
||||||
|
// Convert all separators to `/` before creating a pattern to
|
||||||
|
// avoid characters being escaped in situations like `subtree\*.md`
|
||||||
|
patterns = append(patterns, filepathfilter.NewPattern(filepath.ToSlash(path.Path), filepathfilter.Strict(true)))
|
||||||
|
}
|
||||||
|
|
||||||
|
return pointers, filepathfilter.NewFromPatterns(patterns, nil), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func runScanTreeForPointers(cb GitScannerFoundPointer, tree string, gitEnv, osEnv config.Environment) error {
|
||||||
|
treeShas, err := lsTreeBlobs(tree, func(t *git.TreeBlob) bool {
|
||||||
|
return t != nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
pointers, filter, err := catFileBatchTreeForPointers(treeShas, gitEnv, osEnv)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for name, p := range pointers {
|
||||||
|
// This file matches the patterns in .gitattributes, so it
|
||||||
|
// should be a pointer. If it is not, then it is a plain Git
|
||||||
|
// blob, which we report as an error.
|
||||||
|
if filter.Allows(name) {
|
||||||
|
if p == nil {
|
||||||
|
cb(nil, errors.NewPointerScanError(errors.NewNotAPointerError(nil), tree, name))
|
||||||
|
} else {
|
||||||
|
cb(p, nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user