From 404992801e194f50d5f3ea8dda56d5fc14671795 Mon Sep 17 00:00:00 2001 From: Steve Streeting Date: Thu, 13 Aug 2020 12:52:30 +0100 Subject: [PATCH] Enhanced prune stash scanner to handle index, working copy, untracked --- git/git.go | 5 +++ lfs/gitscanner_log.go | 98 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 94 insertions(+), 9 deletions(-) diff --git a/git/git.go b/git/git.go index 5b8d9b7f..5b49991d 100644 --- a/git/git.go +++ b/git/git.go @@ -269,6 +269,11 @@ func Log(args ...string) (*subprocess.BufferedCmd, error) { return gitNoLFSBuffered(logArgs...) } +func Show(args ...string) (*subprocess.BufferedCmd, error) { + logArgs := append([]string{"show"}, args...) + return gitNoLFSBuffered(logArgs...) +} + func LsRemote(remote, remoteRef string) (string, error) { if remote == "" { return "", errors.New("remote required") diff --git a/lfs/gitscanner_log.go b/lfs/gitscanner_log.go index ad8a1a1b..5b7927a9 100644 --- a/lfs/gitscanner_log.go +++ b/lfs/gitscanner_log.go @@ -65,26 +65,102 @@ func scanUnpushed(cb GitScannerFoundPointer, remote string) error { } func scanStashed(cb GitScannerFoundPointer, s *GitScanner) error { - // First get the SHAs of all stashes - logArgs := []string{"-g", "--format=%H", "refs/stash", "--"} + + // Stashes are actually 2-3 commits, each containing one of: + // 1. Working copy modified files + // 2. Index changes + // 3. Untracked files, if -u was used + // We need to get the diff of all 3 of these commits to ensure we have all + // of the LFS objects necessary to pop the stash + + // First get the list of stashes + // In recent version of git you can get parents directly from + // this command, avoiding the intermediate "git show" + // However older gits (at least <=2.7) don't report merge parents in the reflog + // So we need to do it in 2 stages + logArgs := []string{"-g", "--format=%h", "refs/stash", "--"} cmd, err := git.Log(logArgs...) if err != nil { return err } - cmd.Start() - defer cmd.Wait() - scanner := bufio.NewScanner(cmd.Stdout) + var allStashShas []string for scanner.Scan() { - err = s.ScanRef(strings.TrimSpace(scanner.Text()), cb) + leafSha := strings.TrimSpace(scanner.Text()) + + allStashShas = append(allStashShas, leafSha) + + // For each leaf, use "git show" to expand parents & thus get + // all 2-3 shas involved in the stash + // As mentioned newer gits could do this in the reflog output but not gteed + showArgs := []string{"--quiet", "--format=%p", leafSha} + showCmd, err := git.Show(showArgs...) if err != nil { return err } + + // gets the abbreviated parent hashes as : + // A B [C] + // A = Parent commit of the stash (ignore, not part of the stash) + // B = Index changes for the hash + // C = Untracked files (optional, only present if -u) + // So we need to scan refs for A, C and optionally D + showScanner := bufio.NewScanner(showCmd.Stdout) + + for showScanner.Scan() { + line := strings.TrimSpace(showScanner.Text()) + refs := strings.Split(line, " ") + for i, ref := range refs { + if i > 0 { // Extra merge parents + allStashShas = append(allStashShas, ref) + } + } + } + err = showCmd.Wait() + if err != nil { + return err + } + + } + err = cmd.Wait() + if err != nil { + // Ignore this error, it really only happens when there's no refs/stash + return nil } + // Now we need to specifically use "git show" to parse results + // We can't use "git log" because weirdly that omits the index changes + // in the diff display, it collapses both into one diff and only shows the + // final change (not a 3-way like show). Only "show" on all the shas + // above displays them separately + + // The "leaf" stash actually shows both the index and working copy, like this: + + // - oid sha256:8e1c163c2a04e25158962537cbff2540ded60d4612506a27bc04d059c7ae16dd + // - oid sha256:f2f84832183a0fca648c1ef49cfd32632b16b47ef5f17ac07dcfcb0ae00b86e5 + // -- size 16 + // +++oid sha256:b23f7e7314c5921e3e1cd87456d7867a51ccbe0c2c19ee4df64525c468d775df + // +++size 30 + + // The second "-" entry has a space prefix which shows this as a 3-way diff + // However since we include all 2-3 commits explicitly in the git show, + // We get this line as a "+" entry in the other commit + // So we only need to care about the "+" entries + // We can use the log parser, which can now handle 3-char +/- prefixes as well + + showArgs := logLfsSearchArgs + showArgs = append(showArgs, allStashShas...) + showArgs = append(showArgs, "--") + + cmd, err = git.Show(showArgs...) + if err != nil { + return err + } + + parseScannerLogOutput(cb, LogDiffAdditions, cmd) return nil } @@ -181,7 +257,8 @@ func newLogScanner(dir LogDiffDirection, r io.Reader) *logScanner { commitHeaderRegex: regexp.MustCompile(fmt.Sprintf(`^lfs-commit-sha: (%s)(?: (%s))*`, git.ObjectIDRegex, git.ObjectIDRegex)), fileHeaderRegex: regexp.MustCompile(`diff --git a\/(.+?)\s+b\/(.+)`), fileMergeHeaderRegex: regexp.MustCompile(`diff --cc (.+)`), - pointerDataRegex: regexp.MustCompile(`^([\+\- ])(version https://git-lfs|oid sha256|size|ext-).*$`), + // stash diff can have up to 3 +/- characters. We only capture the first one + pointerDataRegex: regexp.MustCompile(`^([\+\- ]{1,3})(version https://git-lfs|oid sha256|size|ext-).*$`), } } @@ -273,11 +350,14 @@ func (s *logScanner) scan() (*WrappedPointer, bool) { // -U3 will ensure we always get all of it, even if only // the SHA changed (version & size the same) changeType := match[1][0] + // merge lines can have 2-3 chars so can't just use changeType==' ' for blank + changeIsBlank := len(strings.TrimSpace(match[1])) == 0 // Always include unchanged context lines (normally just the version line) - if LogDiffDirection(changeType) == s.dir || changeType == ' ' { + if LogDiffDirection(changeType) == s.dir || changeIsBlank { // Must skip diff +/- marker - s.pointerData.WriteString(line[1:]) + // can be 1-3 chars (3 for merge) + s.pointerData.WriteString(line[len(match[1]):]) s.pointerData.WriteString("\n") // newline was stripped off by scanner } }