2016-11-17 17:23:29 +00:00
|
|
|
package lfs
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"bytes"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"io/ioutil"
|
|
|
|
"regexp"
|
2020-08-07 15:48:59 +00:00
|
|
|
"strings"
|
2016-11-17 22:41:19 +00:00
|
|
|
"time"
|
2016-11-17 17:23:29 +00:00
|
|
|
|
2016-11-21 18:38:39 +00:00
|
|
|
"github.com/git-lfs/git-lfs/filepathfilter"
|
2016-11-17 22:41:19 +00:00
|
|
|
"github.com/git-lfs/git-lfs/git"
|
2017-08-21 10:15:48 +00:00
|
|
|
"github.com/git-lfs/git-lfs/subprocess"
|
2016-11-17 17:23:29 +00:00
|
|
|
"github.com/rubyist/tracerx"
|
|
|
|
)
|
|
|
|
|
|
|
|
// When scanning diffs e.g. parseLogOutputToPointers, which direction of diff to include
|
|
|
|
// data from, i.e. '+' or '-'. Depending on what you're scanning for either might be useful
|
|
|
|
type LogDiffDirection byte
|
|
|
|
|
|
|
|
const (
|
|
|
|
LogDiffAdditions = LogDiffDirection('+') // include '+' diffs
|
|
|
|
LogDiffDeletions = LogDiffDirection('-') // include '-' diffs
|
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
// Arguments to append to a git log call which will limit the output to
|
|
|
|
// lfs changes and format the output suitable for parseLogOutput.. method(s)
|
|
|
|
logLfsSearchArgs = []string{
|
2021-02-19 17:36:27 +00:00
|
|
|
"--no-ext-diff",
|
|
|
|
"--no-textconv",
|
2016-11-17 17:23:29 +00:00
|
|
|
"-G", "oid sha256:", // only diffs which include an lfs file SHA change
|
2018-08-28 15:25:44 +00:00
|
|
|
"-p", // include diff so we can read the SHA
|
|
|
|
"-U12", // Make sure diff context is always big enough to support 10 extension lines to get whole pointer
|
2016-11-17 17:23:29 +00:00
|
|
|
`--format=lfs-commit-sha: %H %P`, // just a predictable commit header we can detect
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2016-11-29 16:40:50 +00:00
|
|
|
type gitscannerResult struct {
|
|
|
|
Pointer *WrappedPointer
|
|
|
|
Err error
|
|
|
|
}
|
|
|
|
|
2017-02-15 23:48:47 +00:00
|
|
|
func scanUnpushed(cb GitScannerFoundPointer, remote string) error {
|
2017-08-21 10:15:48 +00:00
|
|
|
logArgs := []string{
|
2016-11-17 17:23:29 +00:00
|
|
|
"--branches", "--tags", // include all locally referenced commits
|
|
|
|
"--not"} // but exclude everything that comes after
|
|
|
|
|
|
|
|
if len(remote) == 0 {
|
|
|
|
logArgs = append(logArgs, "--remotes")
|
|
|
|
} else {
|
|
|
|
logArgs = append(logArgs, fmt.Sprintf("--remotes=%v", remote))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add standard search args to find lfs references
|
|
|
|
logArgs = append(logArgs, logLfsSearchArgs...)
|
|
|
|
|
2017-08-21 10:15:48 +00:00
|
|
|
cmd, err := git.Log(logArgs...)
|
2016-11-17 17:23:29 +00:00
|
|
|
if err != nil {
|
2016-11-29 16:40:50 +00:00
|
|
|
return err
|
2016-11-17 17:23:29 +00:00
|
|
|
}
|
|
|
|
|
2016-11-29 17:27:42 +00:00
|
|
|
parseScannerLogOutput(cb, LogDiffAdditions, cmd)
|
|
|
|
return nil
|
2020-08-07 15:48:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func scanStashed(cb GitScannerFoundPointer, s *GitScanner) error {
|
2020-08-13 11:52:30 +00:00
|
|
|
// Stashes are actually 2-3 commits, each containing one of:
|
2020-08-27 02:53:11 +00:00
|
|
|
// 1. Working copy (WIP) modified files
|
2020-08-13 11:52:30 +00:00
|
|
|
// 2. Index changes
|
2020-08-27 02:53:11 +00:00
|
|
|
// 3. Untracked files (but only if "git stash -u" was used)
|
|
|
|
// The first of these, the WIP commit, is a merge whose first parent
|
|
|
|
// is HEAD and whose other parent(s) are commits 2 and 3 above.
|
|
|
|
|
|
|
|
// We need to get the individual diff of each of these commits to
|
|
|
|
// ensure we have all of the LFS objects referenced by the stash,
|
|
|
|
// so a future "git stash pop" can restore them all.
|
|
|
|
|
|
|
|
// First we get the list of SHAs of the WIP merge commits from the
|
|
|
|
// reflog using "git log -g --format=%h refs/stash --". Because
|
|
|
|
// older Git versions (at least <=2.7) don't report merge parents in
|
|
|
|
// the reflog, we can't extract the parent SHAs from "Merge:" lines
|
|
|
|
// in the log; we can, however, use the "git log -m" option to force
|
|
|
|
// individual diffs of all the merge parents in a second step.
|
2020-08-13 11:52:30 +00:00
|
|
|
logArgs := []string{"-g", "--format=%h", "refs/stash", "--"}
|
2020-08-07 15:48:59 +00:00
|
|
|
|
2020-08-12 11:11:39 +00:00
|
|
|
cmd, err := git.Log(logArgs...)
|
2020-08-07 15:48:59 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
scanner := bufio.NewScanner(cmd.Stdout)
|
|
|
|
|
2020-08-27 02:53:11 +00:00
|
|
|
var stashMergeShas []string
|
2020-08-07 15:48:59 +00:00
|
|
|
for scanner.Scan() {
|
2020-08-27 02:53:11 +00:00
|
|
|
stashMergeShas = append(stashMergeShas, strings.TrimSpace(scanner.Text()))
|
2020-08-07 15:48:59 +00:00
|
|
|
}
|
2020-08-13 11:52:30 +00:00
|
|
|
err = cmd.Wait()
|
|
|
|
if err != nil {
|
|
|
|
// Ignore this error, it really only happens when there's no refs/stash
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-08-17 19:52:40 +00:00
|
|
|
// We can use the log parser if we provide the -m option to get
|
2020-08-27 02:53:11 +00:00
|
|
|
// merge diffs shown individually
|
|
|
|
logArgs = []string{"-m"}
|
2020-08-07 15:48:59 +00:00
|
|
|
|
2020-08-27 02:53:11 +00:00
|
|
|
// Add standard search args to find lfs references
|
|
|
|
logArgs = append(logArgs, logLfsSearchArgs...)
|
2020-08-13 11:52:30 +00:00
|
|
|
|
2020-08-27 02:53:11 +00:00
|
|
|
logArgs = append(logArgs, stashMergeShas...)
|
|
|
|
|
|
|
|
cmd, err = git.Log(logArgs...)
|
2020-08-13 11:52:30 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
parseScannerLogOutput(cb, LogDiffAdditions, cmd)
|
2020-08-07 15:48:59 +00:00
|
|
|
return nil
|
2016-11-29 17:27:42 +00:00
|
|
|
}
|
2016-11-17 17:23:29 +00:00
|
|
|
|
2017-08-21 10:15:48 +00:00
|
|
|
func parseScannerLogOutput(cb GitScannerFoundPointer, direction LogDiffDirection, cmd *subprocess.BufferedCmd) {
|
2016-11-29 16:40:50 +00:00
|
|
|
ch := make(chan gitscannerResult, chanBufSize)
|
2016-11-17 17:23:29 +00:00
|
|
|
|
|
|
|
go func() {
|
2016-11-29 17:27:42 +00:00
|
|
|
scanner := newLogScanner(direction, cmd.Stdout)
|
2016-11-29 16:40:50 +00:00
|
|
|
for scanner.Scan() {
|
|
|
|
if p := scanner.Pointer(); p != nil {
|
|
|
|
ch <- gitscannerResult{Pointer: p}
|
|
|
|
}
|
|
|
|
}
|
2016-11-17 17:23:29 +00:00
|
|
|
stderr, _ := ioutil.ReadAll(cmd.Stderr)
|
|
|
|
err := cmd.Wait()
|
|
|
|
if err != nil {
|
2019-10-22 07:15:05 +00:00
|
|
|
ch <- gitscannerResult{Err: fmt.Errorf("error in git log: %v %v", err, string(stderr))}
|
2016-11-17 17:23:29 +00:00
|
|
|
}
|
2016-11-29 16:40:50 +00:00
|
|
|
close(ch)
|
2016-11-17 17:23:29 +00:00
|
|
|
}()
|
|
|
|
|
2016-11-29 17:27:42 +00:00
|
|
|
cmd.Stdin.Close()
|
2016-11-29 16:40:50 +00:00
|
|
|
for result := range ch {
|
|
|
|
cb(result.Pointer, result.Err)
|
|
|
|
}
|
2016-11-17 17:23:29 +00:00
|
|
|
}
|
|
|
|
|
2016-11-17 22:41:19 +00:00
|
|
|
// logPreviousVersions scans history for all previous versions of LFS pointers
|
|
|
|
// from 'since' up to (but not including) the final state at ref
|
2017-02-15 23:48:47 +00:00
|
|
|
func logPreviousSHAs(cb GitScannerFoundPointer, ref string, since time.Time) error {
|
2017-08-21 10:15:48 +00:00
|
|
|
logArgs := []string{
|
2016-11-17 22:41:19 +00:00
|
|
|
fmt.Sprintf("--since=%v", git.FormatGitDate(since)),
|
|
|
|
}
|
|
|
|
// Add standard search args to find lfs references
|
|
|
|
logArgs = append(logArgs, logLfsSearchArgs...)
|
|
|
|
// ending at ref
|
|
|
|
logArgs = append(logArgs, ref)
|
|
|
|
|
2017-08-21 10:15:48 +00:00
|
|
|
cmd, err := git.Log(logArgs...)
|
2016-11-17 22:41:19 +00:00
|
|
|
if err != nil {
|
2016-11-29 17:27:42 +00:00
|
|
|
return err
|
2016-11-17 22:41:19 +00:00
|
|
|
}
|
|
|
|
|
2016-11-29 17:27:42 +00:00
|
|
|
parseScannerLogOutput(cb, LogDiffDeletions, cmd)
|
|
|
|
return nil
|
2016-11-17 22:41:19 +00:00
|
|
|
}
|
|
|
|
|
2016-11-21 17:48:36 +00:00
|
|
|
func parseLogOutputToPointers(log io.Reader, dir LogDiffDirection,
|
|
|
|
includePaths, excludePaths []string, results chan *WrappedPointer) {
|
2016-11-21 18:38:39 +00:00
|
|
|
scanner := newLogScanner(dir, log)
|
|
|
|
if len(includePaths)+len(excludePaths) > 0 {
|
|
|
|
scanner.Filter = filepathfilter.New(includePaths, excludePaths)
|
|
|
|
}
|
2016-11-21 17:48:36 +00:00
|
|
|
for scanner.Scan() {
|
|
|
|
if p := scanner.Pointer(); p != nil {
|
|
|
|
results <- p
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// logScanner parses log output formatted as per logLfsSearchArgs & returns
|
|
|
|
// pointers.
|
2016-11-21 16:50:40 +00:00
|
|
|
type logScanner struct {
|
2016-11-21 18:38:39 +00:00
|
|
|
// Filter will ensure file paths matching the include patterns, or not matchin
|
|
|
|
// the exclude patterns are skipped.
|
|
|
|
Filter *filepathfilter.Filter
|
|
|
|
|
|
|
|
s *bufio.Scanner
|
|
|
|
dir LogDiffDirection
|
|
|
|
pointer *WrappedPointer
|
2016-11-21 16:50:40 +00:00
|
|
|
|
|
|
|
pointerData *bytes.Buffer
|
|
|
|
currentFilename string
|
|
|
|
currentFileIncluded bool
|
|
|
|
|
|
|
|
commitHeaderRegex *regexp.Regexp
|
|
|
|
fileHeaderRegex *regexp.Regexp
|
|
|
|
fileMergeHeaderRegex *regexp.Regexp
|
|
|
|
pointerDataRegex *regexp.Regexp
|
|
|
|
}
|
|
|
|
|
2016-11-21 17:48:36 +00:00
|
|
|
// dir: whether to include results from + or - diffs
|
2016-11-21 18:38:39 +00:00
|
|
|
// r: a stream of output from git log with at least logLfsSearchArgs specified
|
|
|
|
func newLogScanner(dir LogDiffDirection, r io.Reader) *logScanner {
|
2016-11-21 16:50:40 +00:00
|
|
|
return &logScanner{
|
2016-11-21 17:48:36 +00:00
|
|
|
s: bufio.NewScanner(r),
|
|
|
|
dir: dir,
|
|
|
|
pointerData: &bytes.Buffer{},
|
|
|
|
currentFileIncluded: true,
|
|
|
|
|
|
|
|
// no need to compile these regexes on every `git-lfs` call, just ones that
|
|
|
|
// use the scanner.
|
2020-06-10 21:29:49 +00:00
|
|
|
commitHeaderRegex: regexp.MustCompile(fmt.Sprintf(`^lfs-commit-sha: (%s)(?: (%s))*`, git.ObjectIDRegex, git.ObjectIDRegex)),
|
2016-11-21 16:50:40 +00:00
|
|
|
fileHeaderRegex: regexp.MustCompile(`diff --git a\/(.+?)\s+b\/(.+)`),
|
|
|
|
fileMergeHeaderRegex: regexp.MustCompile(`diff --cc (.+)`),
|
2020-08-17 19:52:40 +00:00
|
|
|
pointerDataRegex: regexp.MustCompile(`^([\+\- ])(version https://git-lfs|oid sha256|size|ext-).*$`),
|
2016-11-21 16:50:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *logScanner) Pointer() *WrappedPointer {
|
|
|
|
return s.pointer
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *logScanner) Err() error {
|
|
|
|
return s.s.Err()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *logScanner) Scan() bool {
|
|
|
|
s.pointer = nil
|
|
|
|
p, canScan := s.scan()
|
|
|
|
s.pointer = p
|
|
|
|
return canScan
|
|
|
|
}
|
|
|
|
|
|
|
|
// Utility func used at several points below (keep in narrow scope)
|
|
|
|
func (s *logScanner) finishLastPointer() *WrappedPointer {
|
2016-11-21 18:38:39 +00:00
|
|
|
if s.pointerData.Len() == 0 || !s.currentFileIncluded {
|
2016-11-21 17:51:49 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
p, err := DecodePointer(s.pointerData)
|
|
|
|
s.pointerData.Reset()
|
|
|
|
|
|
|
|
if err == nil {
|
|
|
|
return &WrappedPointer{Name: s.currentFilename, Pointer: p}
|
|
|
|
} else {
|
|
|
|
tracerx.Printf("Unable to parse pointer from log: %v", err)
|
|
|
|
return nil
|
2016-11-21 16:50:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-11-21 17:48:36 +00:00
|
|
|
// For each commit we'll get something like this:
|
|
|
|
/*
|
|
|
|
lfs-commit-sha: 60fde3d23553e10a55e2a32ed18c20f65edd91e7 e2eaf1c10b57da7b98eb5d722ec5912ddeb53ea1
|
|
|
|
|
|
|
|
diff --git a/1D_Noise.png b/1D_Noise.png
|
|
|
|
new file mode 100644
|
|
|
|
index 0000000..2622b4a
|
|
|
|
--- /dev/null
|
|
|
|
+++ b/1D_Noise.png
|
|
|
|
@@ -0,0 +1,3 @@
|
|
|
|
+version https://git-lfs.github.com/spec/v1
|
|
|
|
+oid sha256:f5d84da40ab1f6aa28df2b2bf1ade2cdcd4397133f903c12b4106641b10e1ed6
|
|
|
|
+size 1289
|
|
|
|
*/
|
|
|
|
// There can be multiple diffs per commit (multiple binaries)
|
|
|
|
// Also when a binary is changed the diff will include a '-' line for the old SHA
|
2016-11-21 16:50:40 +00:00
|
|
|
func (s *logScanner) scan() (*WrappedPointer, bool) {
|
|
|
|
for s.s.Scan() {
|
|
|
|
line := s.s.Text()
|
|
|
|
|
|
|
|
if match := s.commitHeaderRegex.FindStringSubmatch(line); match != nil {
|
|
|
|
// Currently we're not pulling out commit groupings, but could if we wanted
|
|
|
|
// This just acts as a delimiter for finishing a multiline pointer
|
|
|
|
if p := s.finishLastPointer(); p != nil {
|
|
|
|
return p, true
|
|
|
|
}
|
|
|
|
} else if match := s.fileHeaderRegex.FindStringSubmatch(line); match != nil {
|
|
|
|
// Finding a regular file header
|
|
|
|
p := s.finishLastPointer()
|
2016-11-21 18:38:39 +00:00
|
|
|
|
2016-11-21 16:50:40 +00:00
|
|
|
// Pertinent file name depends on whether we're listening to additions or removals
|
|
|
|
if s.dir == LogDiffAdditions {
|
2016-11-21 18:38:39 +00:00
|
|
|
s.setFilename(match[2])
|
2016-11-21 16:50:40 +00:00
|
|
|
} else {
|
2016-11-21 18:38:39 +00:00
|
|
|
s.setFilename(match[1])
|
2016-11-21 16:50:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if p != nil {
|
|
|
|
return p, true
|
|
|
|
}
|
|
|
|
} else if match := s.fileMergeHeaderRegex.FindStringSubmatch(line); match != nil {
|
|
|
|
// Git merge file header is a little different, only one file
|
|
|
|
p := s.finishLastPointer()
|
2016-11-21 18:38:39 +00:00
|
|
|
|
|
|
|
s.setFilename(match[1])
|
2016-11-21 16:50:40 +00:00
|
|
|
|
|
|
|
if p != nil {
|
|
|
|
return p, true
|
|
|
|
}
|
|
|
|
} else if s.currentFileIncluded {
|
|
|
|
if match := s.pointerDataRegex.FindStringSubmatch(line); match != nil {
|
|
|
|
// An LFS pointer data line
|
|
|
|
// Include only the entirety of one side of the diff
|
|
|
|
// -U3 will ensure we always get all of it, even if only
|
|
|
|
// the SHA changed (version & size the same)
|
|
|
|
changeType := match[1][0]
|
2020-08-13 13:32:26 +00:00
|
|
|
|
2016-11-21 16:50:40 +00:00
|
|
|
// Always include unchanged context lines (normally just the version line)
|
2020-08-17 19:52:40 +00:00
|
|
|
if LogDiffDirection(changeType) == s.dir || changeType == ' ' {
|
2016-11-21 16:50:40 +00:00
|
|
|
// Must skip diff +/- marker
|
2020-08-17 19:52:40 +00:00
|
|
|
s.pointerData.WriteString(line[1:])
|
2016-11-21 16:50:40 +00:00
|
|
|
s.pointerData.WriteString("\n") // newline was stripped off by scanner
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-11-21 17:36:35 +00:00
|
|
|
if p := s.finishLastPointer(); p != nil {
|
|
|
|
return p, true
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, false
|
2016-11-21 16:50:40 +00:00
|
|
|
}
|
2016-11-21 18:38:39 +00:00
|
|
|
|
|
|
|
func (s *logScanner) setFilename(name string) {
|
|
|
|
s.currentFilename = name
|
|
|
|
s.currentFileIncluded = s.Filter.Allows(name)
|
|
|
|
}
|