2016-11-17 22:47:01 +00:00
|
|
|
package lfs
|
|
|
|
|
|
|
|
import (
|
2017-05-24 18:54:32 +00:00
|
|
|
"encoding/hex"
|
lfs: find invalid pointers
In the future, we'll want to support detecting various problems with
pointers. These fall into two types: pointers which are non-canonical
and files which should be pointers but are not.
Our existing scanning functions are not well suited to this,
unfortunately, so we add some additional functions. We first scan all
of the commits in the range we want and then, having found their object
IDs, call git ls-tree to enumerate each item in its corresponding root
tree. We accumulate the patterns in every found .gitattributes file,
and we keep track of every other file we process, checking small files
for being a pointer.
Once we've processed the entire tree, we compute the set of patterns for
the .gitattributes file and check each file against it. If the file is
a pointer, we emit the pointer to our callback, and if it is not a
pointer but matches the patterns, then we emit an error indicating that
it should have been a pointer.
2021-06-09 20:31:01 +00:00
|
|
|
"sync"
|
2017-05-24 18:54:32 +00:00
|
|
|
|
2021-09-01 19:41:10 +00:00
|
|
|
"github.com/git-lfs/git-lfs/v3/config"
|
|
|
|
"github.com/git-lfs/git-lfs/v3/git"
|
2016-11-17 22:47:01 +00:00
|
|
|
)
|
|
|
|
|
2017-02-16 23:52:40 +00:00
|
|
|
type lockableNameSet struct {
|
|
|
|
opt *ScanRefsOptions
|
|
|
|
set GitScannerSet
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determines if the given blob sha matches a locked file.
|
|
|
|
func (s *lockableNameSet) Check(blobSha string) (string, bool) {
|
|
|
|
if s == nil || s.opt == nil || s.set == nil {
|
|
|
|
return "", false
|
|
|
|
}
|
|
|
|
|
|
|
|
name, ok := s.opt.GetName(blobSha)
|
|
|
|
if !ok {
|
|
|
|
return name, ok
|
|
|
|
}
|
|
|
|
|
|
|
|
if s.set.Contains(name) {
|
|
|
|
return name, true
|
|
|
|
}
|
|
|
|
return name, false
|
|
|
|
}
|
|
|
|
|
|
|
|
func noopFoundLockable(name string) {}
|
|
|
|
|
2018-06-25 23:21:55 +00:00
|
|
|
// scanRefsToChan scans through all commits reachable by refs contained in
|
2021-07-21 03:19:25 +00:00
|
|
|
// "include" and not reachable by any refs included in "exclude" and invokes
|
|
|
|
// the provided callback for each pointer file, valid or invalid, that it finds.
|
2016-11-17 22:47:01 +00:00
|
|
|
// Reports unique oids once only, not multiple times if >1 file uses the same content
|
2020-05-15 18:58:37 +00:00
|
|
|
func scanRefsToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, include, exclude []string, gitEnv, osEnv config.Environment, opt *ScanRefsOptions) error {
|
2016-11-17 22:47:01 +00:00
|
|
|
if opt == nil {
|
|
|
|
panic("no scan ref options")
|
|
|
|
}
|
|
|
|
|
2018-06-25 23:21:55 +00:00
|
|
|
revs, err := revListShas(include, exclude, opt)
|
2016-11-17 22:47:01 +00:00
|
|
|
if err != nil {
|
2016-11-29 20:04:05 +00:00
|
|
|
return err
|
2016-11-17 22:47:01 +00:00
|
|
|
}
|
|
|
|
|
2017-02-16 23:52:40 +00:00
|
|
|
lockableSet := &lockableNameSet{opt: opt, set: scanner.PotentialLockables}
|
|
|
|
smallShas, batchLockableCh, err := catFileBatchCheck(revs, lockableSet)
|
2016-11-17 22:47:01 +00:00
|
|
|
if err != nil {
|
2016-11-29 20:04:05 +00:00
|
|
|
return err
|
2016-11-17 22:47:01 +00:00
|
|
|
}
|
|
|
|
|
2017-02-16 23:52:40 +00:00
|
|
|
lockableCb := scanner.FoundLockable
|
|
|
|
if lockableCb == nil {
|
|
|
|
lockableCb = noopFoundLockable
|
|
|
|
}
|
|
|
|
|
|
|
|
go func(cb GitScannerFoundLockable, ch chan string) {
|
|
|
|
for name := range ch {
|
|
|
|
cb(name)
|
|
|
|
}
|
|
|
|
}(lockableCb, batchLockableCh)
|
|
|
|
|
2020-05-15 18:58:37 +00:00
|
|
|
pointers, checkLockableCh, err := catFileBatch(smallShas, lockableSet, gitEnv, osEnv)
|
2016-11-17 22:47:01 +00:00
|
|
|
if err != nil {
|
2016-11-29 20:04:05 +00:00
|
|
|
return err
|
2016-11-17 22:47:01 +00:00
|
|
|
}
|
|
|
|
|
2016-11-29 20:04:05 +00:00
|
|
|
for p := range pointers.Results {
|
|
|
|
if name, ok := opt.GetName(p.Sha1); ok {
|
|
|
|
p.Name = name
|
2016-11-17 22:47:01 +00:00
|
|
|
}
|
2018-02-01 02:16:28 +00:00
|
|
|
|
|
|
|
if scanner.Filter.Allows(p.Name) {
|
|
|
|
pointerCb(p, nil)
|
|
|
|
}
|
2017-02-16 23:52:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for lockableName := range checkLockableCh {
|
2018-02-01 02:16:28 +00:00
|
|
|
if scanner.Filter.Allows(lockableName) {
|
|
|
|
lockableCb(lockableName)
|
|
|
|
}
|
2016-11-29 20:04:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if err := pointers.Wait(); err != nil {
|
2017-02-16 23:52:40 +00:00
|
|
|
pointerCb(nil, err)
|
2016-11-29 20:04:05 +00:00
|
|
|
}
|
2016-11-17 22:47:01 +00:00
|
|
|
|
2016-11-29 20:04:05 +00:00
|
|
|
return nil
|
2016-11-17 22:47:01 +00:00
|
|
|
}
|
|
|
|
|
2018-06-25 23:21:55 +00:00
|
|
|
// scanLeftRightToChan takes a ref and returns a channel of WrappedPointer objects
|
|
|
|
// for all Git LFS pointers it finds for that ref.
|
|
|
|
// Reports unique oids once only, not multiple times if >1 file uses the same content
|
2020-05-15 18:58:37 +00:00
|
|
|
func scanLeftRightToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, refLeft, refRight string, gitEnv, osEnv config.Environment, opt *ScanRefsOptions) error {
|
|
|
|
return scanRefsToChan(scanner, pointerCb, []string{refLeft}, []string{refRight}, gitEnv, osEnv, opt)
|
2018-06-25 23:21:55 +00:00
|
|
|
}
|
|
|
|
|
Optimize pushes of multiple refs
When pushing multiple refs, we know any Git objects on the remote side
can be excluded from the objects that refer to LFS objects we need to
push, since if the remote side already has the Git objects, it should
have the corresponding LFS objects as well.
However, when traversing Git objects, we traverse them on a per-ref
basis, which is required since any LFS objects which spawn a batch
request will need the ref to be placed in the batch request as part of
the protocol.
Let's find a list of all the remote sides that exist before traversing
any Git objects, and exclude traversing any of those objects in any
traversal. As a result, we can traverse far, far fewer objects,
especially when pushing new refs in a large repository.
Note that we exclude the case when the left and right sides are the same
because our code sets them to the same thing in some cases even though
Git does not, so we cannot reason about the values in that case.
2020-01-14 19:43:35 +00:00
|
|
|
// scanMultiLeftRightToChan takes a ref and a set of bases and returns a channel
|
|
|
|
// of WrappedPointer objects for all Git LFS pointers it finds for that ref.
|
|
|
|
// Reports unique oids once only, not multiple times if >1 file uses the same
|
|
|
|
// content
|
2020-05-15 18:58:37 +00:00
|
|
|
func scanMultiLeftRightToChan(scanner *GitScanner, pointerCb GitScannerFoundPointer, refLeft string, bases []string, gitEnv, osEnv config.Environment, opt *ScanRefsOptions) error {
|
|
|
|
return scanRefsToChan(scanner, pointerCb, []string{refLeft}, bases, gitEnv, osEnv, opt)
|
Optimize pushes of multiple refs
When pushing multiple refs, we know any Git objects on the remote side
can be excluded from the objects that refer to LFS objects we need to
push, since if the remote side already has the Git objects, it should
have the corresponding LFS objects as well.
However, when traversing Git objects, we traverse them on a per-ref
basis, which is required since any LFS objects which spawn a batch
request will need the ref to be placed in the batch request as part of
the protocol.
Let's find a list of all the remote sides that exist before traversing
any Git objects, and exclude traversing any of those objects in any
traversal. As a result, we can traverse far, far fewer objects,
especially when pushing new refs in a large repository.
Note that we exclude the case when the left and right sides are the same
because our code sets them to the same thing in some cases even though
Git does not, so we cannot reason about the values in that case.
2020-01-14 19:43:35 +00:00
|
|
|
}
|
|
|
|
|
lfs: find invalid pointers
In the future, we'll want to support detecting various problems with
pointers. These fall into two types: pointers which are non-canonical
and files which should be pointers but are not.
Our existing scanning functions are not well suited to this,
unfortunately, so we add some additional functions. We first scan all
of the commits in the range we want and then, having found their object
IDs, call git ls-tree to enumerate each item in its corresponding root
tree. We accumulate the patterns in every found .gitattributes file,
and we keep track of every other file we process, checking small files
for being a pointer.
Once we've processed the entire tree, we compute the set of patterns for
the .gitattributes file and check each file against it. If the file is
a pointer, we emit the pointer to our callback, and if it is not a
pointer but matches the patterns, then we emit an error indicating that
it should have been a pointer.
2021-06-09 20:31:01 +00:00
|
|
|
// scanRefsByTree scans through all commits reachable by refs contained in
|
|
|
|
// "include" and not reachable by any refs included in "exclude" and invokes
|
|
|
|
// the provided callback for each pointer file, valid or invalid, that it finds.
|
|
|
|
// Reports unique oids once only, not multiple times if >1 file uses the same content
|
|
|
|
func scanRefsByTree(scanner *GitScanner, pointerCb GitScannerFoundPointer, include, exclude []string, gitEnv, osEnv config.Environment, opt *ScanRefsOptions) error {
|
|
|
|
if opt == nil {
|
|
|
|
panic("no scan ref options")
|
|
|
|
}
|
|
|
|
|
|
|
|
revs, err := revListShas(include, exclude, opt)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
errchan := make(chan error, 20) // multiple errors possible
|
|
|
|
wg := &sync.WaitGroup{}
|
|
|
|
|
|
|
|
for r := range revs.Results {
|
|
|
|
wg.Add(1)
|
|
|
|
go func(rev string) {
|
|
|
|
defer wg.Done()
|
|
|
|
err := runScanTreeForPointers(pointerCb, rev, gitEnv, osEnv)
|
|
|
|
if err != nil {
|
|
|
|
errchan <- err
|
|
|
|
}
|
|
|
|
}(r)
|
|
|
|
}
|
|
|
|
|
|
|
|
wg.Wait()
|
|
|
|
close(errchan)
|
|
|
|
for err := range errchan {
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return revs.Wait()
|
|
|
|
}
|
|
|
|
|
2016-11-17 22:47:01 +00:00
|
|
|
// revListShas uses git rev-list to return the list of object sha1s
|
|
|
|
// for the given ref. If all is true, ref is ignored. It returns a
|
|
|
|
// channel from which sha1 strings can be read.
|
2017-06-08 17:22:30 +00:00
|
|
|
func revListShas(include, exclude []string, opt *ScanRefsOptions) (*StringChannelWrapper, error) {
|
|
|
|
scanner, err := git.NewRevListScanner(include, exclude, &git.ScanRefsOptions{
|
2017-05-24 18:54:32 +00:00
|
|
|
Mode: git.ScanningMode(opt.ScanMode),
|
|
|
|
Remote: opt.RemoteName,
|
|
|
|
SkipDeletedBlobs: opt.SkipDeletedBlobs,
|
|
|
|
SkippedRefs: opt.skippedRefs,
|
|
|
|
Mutex: opt.mutex,
|
|
|
|
Names: opt.nameMap,
|
lfs: find invalid pointers
In the future, we'll want to support detecting various problems with
pointers. These fall into two types: pointers which are non-canonical
and files which should be pointers but are not.
Our existing scanning functions are not well suited to this,
unfortunately, so we add some additional functions. We first scan all
of the commits in the range we want and then, having found their object
IDs, call git ls-tree to enumerate each item in its corresponding root
tree. We accumulate the patterns in every found .gitattributes file,
and we keep track of every other file we process, checking small files
for being a pointer.
Once we've processed the entire tree, we compute the set of patterns for
the .gitattributes file and check each file against it. If the file is
a pointer, we emit the pointer to our callback, and if it is not a
pointer but matches the patterns, then we emit an error indicating that
it should have been a pointer.
2021-06-09 20:31:01 +00:00
|
|
|
CommitsOnly: opt.CommitsOnly,
|
2017-05-24 18:54:32 +00:00
|
|
|
})
|
2016-11-17 22:47:01 +00:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
revs := make(chan string, chanBufSize)
|
2017-05-24 18:54:32 +00:00
|
|
|
errs := make(chan error, 5) // may be multiple errors
|
2016-11-17 22:47:01 +00:00
|
|
|
|
|
|
|
go func() {
|
|
|
|
for scanner.Scan() {
|
2017-05-24 19:52:01 +00:00
|
|
|
sha := hex.EncodeToString(scanner.OID())
|
|
|
|
if name := scanner.Name(); len(name) > 0 {
|
|
|
|
opt.SetName(sha, name)
|
2016-11-17 22:47:01 +00:00
|
|
|
}
|
2017-05-24 18:54:32 +00:00
|
|
|
revs <- sha
|
2016-11-17 22:47:01 +00:00
|
|
|
}
|
|
|
|
|
2017-05-24 18:54:32 +00:00
|
|
|
if err = scanner.Err(); err != nil {
|
|
|
|
errs <- err
|
2016-11-17 22:47:01 +00:00
|
|
|
}
|
2017-05-24 18:54:32 +00:00
|
|
|
|
|
|
|
if err = scanner.Close(); err != nil {
|
|
|
|
errs <- err
|
|
|
|
}
|
|
|
|
|
2016-11-17 22:47:01 +00:00
|
|
|
close(revs)
|
2017-05-24 18:54:32 +00:00
|
|
|
close(errs)
|
2016-11-17 22:47:01 +00:00
|
|
|
}()
|
|
|
|
|
2017-05-24 18:54:32 +00:00
|
|
|
return NewStringChannelWrapper(revs, errs), nil
|
2016-11-17 22:47:01 +00:00
|
|
|
}
|