git-lfs/git/rev_list_scanner.go

363 lines
10 KiB
Go
Raw Normal View History

2017-05-24 18:54:32 +00:00
package git
import (
"bufio"
"encoding/hex"
"fmt"
2017-05-24 18:54:32 +00:00
"io"
"io/ioutil"
"os/exec"
"regexp"
"strings"
"sync"
"github.com/git-lfs/git-lfs/errors"
"github.com/rubyist/tracerx"
)
// ScanningMode is a constant type that allows for variation in the range of
// commits to scan when given to the `*git.RevListScanner` type.
type ScanningMode int
const (
// ScanRefsMode will scan between two refspecs.
ScanRefsMode ScanningMode = iota
// ScanAllMode will scan all history.
ScanAllMode
// ScanLeftToRemoteMode will scan the difference between any included
// SHA1s and a remote tracking ref.
2017-05-24 18:54:32 +00:00
ScanLeftToRemoteMode
)
// RevListOrder is a constant type that allows for variation in the ordering of
// revisions given by the *RevListScanner below.
type RevListOrder int
const (
// DefaultRevListOrder is the zero-value for this type and yields the
// results as given by git-rev-list(1) without any `--<t>-order`
// argument given. By default: reverse chronological order.
DefaultRevListOrder RevListOrder = iota
2017-05-24 22:38:08 +00:00
// DateRevListOrder gives the revisions such that no parents are shown
// before children, and otherwise in commit timestamp order.
DateRevListOrder
// AuthorDateRevListOrder gives the revisions such that no parents are
// shown before children, and otherwise in author date timestamp order.
AuthorDateRevListOrder
2017-05-24 22:38:50 +00:00
// TopoRevListOrder gives the revisions such that they appear in
// topological order.
TopoRevListOrder
)
// Flag returns the command-line flag to be passed to git-rev-list(1) in order
// to order the output according to the given RevListOrder. It returns both the
// flag ("--date-order", "--topo-order", etc) and a bool, whether or not to
// append the flag (for instance, DefaultRevListOrder requires no flag).
//
// Given a type other than those defined above, Flag() will panic().
func (o RevListOrder) Flag() (string, bool) {
switch o {
case DefaultRevListOrder:
return "", false
case DateRevListOrder:
return "--date-order", true
case AuthorDateRevListOrder:
return "--author-date-order", true
case TopoRevListOrder:
return "--topo-order", true
default:
panic(fmt.Sprintf("git/rev_list_scanner: unknown RevListOrder %d", o))
}
}
2017-05-24 18:54:32 +00:00
// ScanRefsOptions is an "options" type that is used to configure a scan
// operation on the `*git.RevListScanner` instance when given to the function
// `NewRevListScanner()`.
type ScanRefsOptions struct {
// Mode is the scan mode to apply, see above.
Mode ScanningMode
// Remote is the current remote to scan against, if using
// ScanLeftToRemoveMode.
Remote string
// SkipDeletedBlobs specifies whether or not to traverse into commit
// ancestry (revealing potentially deleted (unreferenced) blobs, trees,
// or commits.
SkipDeletedBlobs bool
// Order specifies the order in which revisions are yielded from the
// output of `git-rev-list(1)`. For more information, see the above
// documentation on the RevListOrder type.
Order RevListOrder
// CommitsOnly specifies whether or not the *RevListScanner should
// return only commits, or all objects in range by performing a
// traversal of the graph. By default, false: show all objects.
CommitsOnly bool
// WorkingDir specifies the working directory in which to run
// git-rev-list(1). If this is an empty string, (has len(WorkingDir) ==
// 0), it is equivalent to running in os.Getwd().
WorkingDir string
// Reverse specifies whether or not to give the revisions in reverse
// order.
Reverse bool
2017-05-24 18:54:32 +00:00
// SkippedRefs provides a list of refs to ignore.
SkippedRefs []string
// Mutex guards names.
Mutex *sync.Mutex
// Names maps Git object IDs (encoded as hex using
// hex.EncodeString()) to their names, i.e., a directory name
// (fully-qualified) for trees, or a pathspec for blob tree entries.
Names map[string]string
}
// GetName returns the name associated with a given blob/tree sha and "true" if
// it exists, or ("", false) if it doesn't.
//
// GetName is guarded by a use of o.Mutex, and is goroutine safe.
func (o *ScanRefsOptions) GetName(sha string) (string, bool) {
o.Mutex.Lock()
defer o.Mutex.Unlock()
name, ok := o.Names[sha]
return name, ok
}
// SetName sets the name associated with a given blob/tree sha.
//
// SetName is guarded by a use of o.Mutex, and is therefore goroutine safe.
func (o *ScanRefsOptions) SetName(sha, name string) {
o.Mutex.Lock()
defer o.Mutex.Unlock()
o.Names[sha] = name
}
// RevListScanner is a Scanner type that parses through results of the `git
// rev-list` command.
type RevListScanner struct {
// s is a buffered scanner feeding from the output (stdout) of
// git-rev-list(1) invocation.
s *bufio.Scanner
// closeFn is an optional type returning an error yielded by closing any
// resources held by an open (running) instance of the *RevListScanner
// type.
closeFn func() error
// name is the name of the most recently read object.
name string
// oid is the oid of the most recently read object.
oid []byte
2017-05-24 18:54:32 +00:00
// err is the most recently encountered error.
err error
}
var (
// ambiguousRegex is a regular expression matching the output of stderr
// when ambiguous refnames are encountered.
ambiguousRegex = regexp.MustCompile(`warning: refname (.*) is ambiguous`)
// z40 is a regular expression matching the empty blob/commit/tree
// SHA: "0000000000000000000000000000000000000000".
z40 = regexp.MustCompile(`\^?0{40}`)
)
// NewRevListScanner instantiates a new RevListScanner instance scanning all
// revisions reachable by refs contained in "include" and not reachable by any
// refs included in "excluded", using the *ScanRefsOptions "opt" configuration.
2017-05-24 18:54:32 +00:00
//
// It returns a new *RevListScanner instance, or an error if one was
// encountered. Upon returning, the `git-rev-list(1)` instance is already
// running, and Scan() may be called immediately.
func NewRevListScanner(include, excluded []string, opt *ScanRefsOptions) (*RevListScanner, error) {
stdin, args, err := revListArgs(include, excluded, opt)
2017-05-24 18:54:32 +00:00
if err != nil {
return nil, err
}
cmd := exec.Command("git", args...)
if len(opt.WorkingDir) > 0 {
cmd.Dir = opt.WorkingDir
}
2017-05-24 18:54:32 +00:00
cmd.Stdin = stdin
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, err
}
stderr, err := cmd.StderrPipe()
if err != nil {
return nil, err
}
tracerx.Printf("run_command: git %s", strings.Join(args, " "))
if err := cmd.Start(); err != nil {
return nil, err
}
return &RevListScanner{
s: bufio.NewScanner(stdout),
closeFn: func() error {
msg, _ := ioutil.ReadAll(stderr)
// First check if there was a non-zero exit code given
// when Wait()-ing on the command execution.
if err := cmd.Wait(); err != nil {
return errors.Errorf("Error in git %s: %v %s",
strings.Join(args, " "), err, msg)
}
// If the command exited cleanly, but found an ambiguous
// refname, promote that to an error and return it.
//
// `git-rev-list(1)` does not treat ambiguous refnames
// as fatal (non-zero exit status), but we do.
if am := ambiguousRegex.FindSubmatch(msg); len(am) > 1 {
return errors.Errorf("ref %s is ambiguous", am[1])
}
return nil
},
}, nil
}
// revListArgs returns the arguments for a given included and excluded set of
// SHA1s, and ScanRefsOptions instance.
2017-05-24 18:54:32 +00:00
//
// In order, it returns the contents of stdin as an io.Reader, the args passed
// to git as a []string, and any error encountered in generating those if one
// occurred.
func revListArgs(include, exclude []string, opt *ScanRefsOptions) (io.Reader, []string, error) {
2017-05-24 18:54:32 +00:00
var stdin io.Reader
args := []string{"rev-list"}
if !opt.CommitsOnly {
args = append(args, "--objects")
}
2017-05-24 18:54:32 +00:00
if opt.Reverse {
args = append(args, "--reverse")
}
if orderFlag, ok := opt.Order.Flag(); ok {
args = append(args, orderFlag)
2017-05-24 22:38:08 +00:00
}
2017-05-24 18:54:32 +00:00
switch opt.Mode {
case ScanRefsMode:
if opt.SkipDeletedBlobs {
args = append(args, "--no-walk")
} else {
args = append(args, "--do-walk")
}
args = append(args, includeExcludeShas(include, exclude)...)
2017-05-24 18:54:32 +00:00
case ScanAllMode:
args = append(args, "--all")
case ScanLeftToRemoteMode:
if len(opt.SkippedRefs) == 0 {
args = append(args, includeExcludeShas(include, exclude)...)
args = append(args, "--not", "--remotes="+opt.Remote)
2017-05-24 18:54:32 +00:00
} else {
args = append(args, "--stdin")
stdin = strings.NewReader(strings.Join(
append(includeExcludeShas(include, exclude), opt.SkippedRefs...), "\n"),
2017-05-24 18:54:32 +00:00
)
}
default:
return nil, nil, errors.Errorf("unknown scan type: %d", opt.Mode)
}
return stdin, append(args, "--"), nil
}
func includeExcludeShas(include, exclude []string) []string {
include = nonZeroShas(include)
exclude = nonZeroShas(exclude)
args := make([]string, 0, len(include)+len(exclude))
for _, i := range include {
args = append(args, i)
}
for _, x := range exclude {
args = append(args, fmt.Sprintf("^%s", x))
}
return args
}
func nonZeroShas(all []string) []string {
nz := make([]string, 0, len(all))
for _, sha := range all {
if len(sha) > 0 && !z40.MatchString(sha) {
nz = append(nz, sha)
}
}
return nz
}
// Name is an optional field that gives the name of the object (if the object is
// a tree, blob).
2017-05-24 18:54:32 +00:00
//
// It can be called before or after Scan(), but will return "" if called
2017-05-24 18:54:32 +00:00
// before.
func (s *RevListScanner) Name() string { return s.name }
// OID is the hex-decoded bytes of the object's ID.
//
// It can be called before or after Scan(), but will return "" if called
// before.
func (s *RevListScanner) OID() []byte { return s.oid }
2017-05-24 18:54:32 +00:00
// Err returns the last encountered error (or nil) after a call to Scan().
//
// It SHOULD be called, checked and handled after a call to Scan().
func (s *RevListScanner) Err() error { return s.err }
// Scan scans the next entry given by git-rev-list(1), and returns true/false
// indicating if there are more results to scan.
func (s *RevListScanner) Scan() bool {
var err error
s.oid, s.name, err = s.scan()
2017-05-24 18:54:32 +00:00
if err != nil {
if err != io.EOF {
s.err = err
}
return false
}
return len(s.oid) > 0
2017-05-24 18:54:32 +00:00
}
// Close closes the RevListScanner by freeing any resources held by the
// instance while running, and returns any error encountered while doing so.
func (s *RevListScanner) Close() error {
if s.closeFn == nil {
return nil
}
return s.closeFn()
}
// scan provides the internal implementation of scanning a line of text from the
// output of `git-rev-list(1)`.
func (s *RevListScanner) scan() ([]byte, string, error) {
2017-05-24 18:54:32 +00:00
if !s.s.Scan() {
return nil, "", s.s.Err()
2017-05-24 18:54:32 +00:00
}
line := strings.TrimSpace(s.s.Text())
if len(line) < 40 {
return nil, "", nil
2017-05-24 18:54:32 +00:00
}
sha1, err := hex.DecodeString(line[:40])
if err != nil {
return nil, "", err
2017-05-24 18:54:32 +00:00
}
var name string
if len(line) > 40 {
name = line[41:]
}
return sha1, name, nil
2017-05-24 18:54:32 +00:00
}