From b5f276e13a92fe5ff1e822d4cc9f88f2b3f31118 Mon Sep 17 00:00:00 2001 From: Chris Darroch Date: Tue, 19 Apr 2022 23:48:51 -0700 Subject: [PATCH 1/6] lfs/gitscanner_log.go: remove unused function The internal parseLogOutputToPointers() function was used only by logPreviousSHAs() and only until commit d8ca6109396e37a927fe6c4ad296120f8bde6c66 of PR #1743 in 2016, so we can remove it now. --- lfs/gitscanner_log.go | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/lfs/gitscanner_log.go b/lfs/gitscanner_log.go index 1bb35562..230367aa 100644 --- a/lfs/gitscanner_log.go +++ b/lfs/gitscanner_log.go @@ -18,8 +18,9 @@ import ( "github.com/rubyist/tracerx" ) -// When scanning diffs e.g. parseLogOutputToPointers, which direction of diff to include -// data from, i.e. '+' or '-'. Depending on what you're scanning for either might be useful +// When scanning diffs with parseScannerLogOutput(), the direction of diff +// to include data from, i.e., '+' or '-'. Depending on what you're scanning +// for either might be useful. type LogDiffDirection byte const ( @@ -181,19 +182,6 @@ func logPreviousSHAs(cb GitScannerFoundPointer, ref string, since time.Time) err return nil } -func parseLogOutputToPointers(log io.Reader, dir LogDiffDirection, - includePaths, excludePaths []string, results chan *WrappedPointer) { - scanner := newLogScanner(dir, log) - if len(includePaths)+len(excludePaths) > 0 { - scanner.Filter = filepathfilter.New(includePaths, excludePaths, filepathfilter.GitAttributes) - } - for scanner.Scan() { - if p := scanner.Pointer(); p != nil { - results <- p - } - } -} - // logScanner parses log output formatted as per logLfsSearchArgs & returns // pointers. type logScanner struct { From b4e70fffa91c1a9ef654aad94609c55a6e7e4092 Mon Sep 17 00:00:00 2001 From: Chris Darroch Date: Sun, 24 Apr 2022 15:11:40 -0700 Subject: [PATCH 2/6] commands/command_prune.go: remove extra whitespace Prior to making changes in the "git lfs prune" command's implementation in subsequent PRs, we first remove a bit of spurious whitespace, just to improve consistency between similar functions. --- commands/command_prune.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/commands/command_prune.go b/commands/command_prune.go index 135ddaa2..bc830fd4 100644 --- a/commands/command_prune.go +++ b/commands/command_prune.go @@ -353,7 +353,6 @@ func pruneTaskGetRetainedAtRef(gitscanner *lfs.GitScanner, ref string, retainCha defer waitg.Done() err := gitscanner.ScanRef(ref, func(p *lfs.WrappedPointer, err error) { - if err != nil { errorChan <- err return @@ -375,7 +374,6 @@ func pruneTaskGetPreviousVersionsOfRef(gitscanner *lfs.GitScanner, ref string, s defer waitg.Done() err := gitscanner.ScanPreviousVersions(ref, since, func(p *lfs.WrappedPointer, err error) { - if err != nil { errorChan <- err return From 739fbbcef29a9a6fc5a4c6089b1fa95f67947933 Mon Sep 17 00:00:00 2001 From: Chris Darroch Date: Mon, 18 Apr 2022 23:52:46 -0700 Subject: [PATCH 3/6] t: revise ls-tree path matching in pointer helpers The assert_pointer() and refute_pointer() test helper functions use grep to match specific lines of output from "git ls-tree". However, while assert_pointer() matches with fixed patterns by using grep's -F option, refute_pointer() does not. This difference was introduced in commit fc421da1c176a9d0844a38e6eb2cba154d864069 of PR #3756, so we update refute_pointer() here to match. In a subsequent PR we will also need to ensure that the file paths we provide as arguments to these functions (specifically, to assert_pointer()) do not match just any part of the paths output by "git ls-tree", but only at the start of the path. That is, "a.bin" should match "a.bin" but not "foo/a.bin". Because the output from "git ls-tree" is space-separated, we prepend a space to our fixed patterns so we match against the start of the file path. This has the limitation that if a test has a file named "foo a.bin", and calls one of these functions with just the filename "a.bin", it will incorrectly match the filename with a space. However, at present only one of our tests that calls either of these functions, specifically the "track: escaped glob pattern with spaces in .gitattributes" test in t/t-track.sh, has a filename with a space in it, and does not create any additional files which might be confused by that filename. --- t/t-migrate-fixup.sh | 2 +- t/testhelpers.sh | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/t/t-migrate-fixup.sh b/t/t-migrate-fixup.sh index 8f027fce..32d56fa2 100755 --- a/t/t-migrate-fixup.sh +++ b/t/t-migrate-fixup.sh @@ -34,7 +34,7 @@ begin_test "migrate import (--fixup, complex nested)" git lfs migrate import --everything --fixup --yes assert_pointer "refs/heads/main" "a.txt" "$a_oid" "1" - refute_pointer "refs/heads/main" "b.txt" + refute_pointer "refs/heads/main" "dir/b.txt" assert_local_object "$a_oid" "1" refute_local_object "$b_oid" "1" diff --git a/t/testhelpers.sh b/t/testhelpers.sh index e32e32a8..7c921f43 100644 --- a/t/testhelpers.sh +++ b/t/testhelpers.sh @@ -2,6 +2,9 @@ # assert_pointer confirms that the pointer in the repository for $path in the # given $ref matches the given $oid and $size. +# Note that $path is prepended with a space to match the against the start +# of path field in the ls-tree output, so be careful if your test involves +# files with spaces in their paths. # # $ assert_pointer "main" "path/to/file" "some-oid" 123 assert_pointer() { @@ -14,7 +17,7 @@ assert_pointer() { while read -r -d $'\0' x; do echo $x done | - grep -F "$path" | cut -f 3 -d " ") + grep -F " $path" | cut -f 3 -d " ") actual=$(git cat-file -p $gitblob) expected=$(pointer $oid $size) @@ -26,6 +29,9 @@ assert_pointer() { # refute_pointer confirms that the file in the repository for $path in the # given $ref is _not_ a pointer. +# Note that $path is prepended with a space to match the against the start +# of path field in the ls-tree output, so be careful if your test involves +# files with spaces in their paths. # # $ refute_pointer "main" "path/to/file" refute_pointer() { @@ -36,7 +42,7 @@ refute_pointer() { while read -r -d $'\0' x; do echo $x done | - grep "$path" | cut -f 3 -d " ") + grep -F " $path" | cut -f 3 -d " ") file=$(git cat-file -p $gitblob) version="version https://git-lfs.github.com/spec/v[0-9]" From d957cfe2b5ccd2400a75f2072780d2949c829359 Mon Sep 17 00:00:00 2001 From: Chris Darroch Date: Mon, 18 Apr 2022 23:56:59 -0700 Subject: [PATCH 4/6] t/t-fsck.sh: add files in directory to object test In our "fsck detects invalid objects" test of the "git lfs fsck" command we currently only use files in the top-level directory for which we then corrupt or remove the corresponding Git LFS object files. In a subsequent PR we will add a parallel test which checks that the "lfs.fetchexclude" option is supported for this command, specifically by excluding files in a subdirectory. For the primary test, then, we want to demonstrate normal behaviour without path-based exclusions, so we add files in a subdirectory to our existing test and ensure their corruption is detected. --- t/t-fsck.sh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/t/t-fsck.sh b/t/t-fsck.sh index 48d4ecaa..79444da1 100755 --- a/t/t-fsck.sh +++ b/t/t-fsck.sh @@ -273,13 +273,20 @@ setup_invalid_objects () { git lfs track *.dat echo "test data" > a.dat echo "test data 2" > b.dat - git add .gitattributes *.dat + mkdir foo + echo "test test 3" > foo/a.dat + echo "test data 4" > foo/b.dat + git add .gitattributes *.dat foo git commit -m "first commit" oid1=$(calc_oid_file a.dat) oid2=$(calc_oid_file b.dat) + oid3=$(calc_oid_file foo/a.dat) + oid4=$(calc_oid_file foo/b.dat) echo "CORRUPTION" >>".git/lfs/objects/${oid1:0:2}/${oid1:2:2}/$oid1" rm ".git/lfs/objects/${oid2:0:2}/${oid2:2:2}/$oid2" + echo "CORRUPTION" >>".git/lfs/objects/${oid3:0:2}/${oid3:2:2}/$oid3" + rm ".git/lfs/objects/${oid4:0:2}/${oid4:2:2}/$oid4" } begin_test "fsck detects invalid objects" @@ -297,6 +304,8 @@ begin_test "fsck detects invalid objects" [ "$RET" -eq 1 ] [ $(grep -c 'objects: corruptObject: a.dat (.*) is corrupt' test.log) -eq 1 ] [ $(grep -c 'objects: openError: b.dat (.*) could not be checked: .*' test.log) -eq 1 ] + [ $(grep -c 'objects: corruptObject: foo/a.dat (.*) is corrupt' test.log) -eq 1 ] + [ $(grep -c 'objects: openError: foo/b.dat (.*) could not be checked: .*' test.log) -eq 1 ] [ $(grep -c 'objects: repair: moving corrupt objects to .*' test.log) -eq 1 ] cd .. @@ -311,6 +320,8 @@ begin_test "fsck detects invalid objects" [ "$RET" -eq 1 ] [ $(grep -c 'objects: corruptObject: a.dat (.*) is corrupt' test.log) -eq 1 ] [ $(grep -c 'objects: openError: b.dat (.*) could not be checked: .*' test.log) -eq 1 ] + [ $(grep -c 'objects: corruptObject: foo/a.dat (.*) is corrupt' test.log) -eq 1 ] + [ $(grep -c 'objects: openError: foo/b.dat (.*) could not be checked: .*' test.log) -eq 1 ] [ $(grep -c 'objects: repair: moving corrupt objects to .*' test.log) -eq 1 ] ) end_test From eee89fc5a9dd3c0a43fef9968c493ddc733b8349 Mon Sep 17 00:00:00 2001 From: Chris Darroch Date: Thu, 21 Apr 2022 23:31:36 -0700 Subject: [PATCH 5/6] t/t-prune*.sh: remove extra whitespace In subsequent PRs we expect to expand and revise our tests of the "git lfs prune" command, so we first remove some spurious whitespace, just to improve consistency between the tests. --- t/t-prune-worktree.sh | 4 ---- t/t-prune.sh | 10 ---------- 2 files changed, 14 deletions(-) diff --git a/t/t-prune-worktree.sh b/t/t-prune-worktree.sh index 209c4362..71248201 100755 --- a/t/t-prune-worktree.sh +++ b/t/t-prune-worktree.sh @@ -6,7 +6,6 @@ ensure_git_version_isnt $VERSION_LOWER "2.5.0" begin_test "prune worktree" ( - set -e reponame="prune_worktree" @@ -102,8 +101,5 @@ begin_test "prune worktree" git lfs prune --dry-run 2>&1 | tee prune.log grep "prune: 6 local objects, 2 retained, done." prune.log grep "prune: 4 files would be pruned" prune.log - - ) end_test - diff --git a/t/t-prune.sh b/t/t-prune.sh index 42c5a9b9..b5bf1338 100755 --- a/t/t-prune.sh +++ b/t/t-prune.sh @@ -26,7 +26,6 @@ begin_test "prune unreferenced and old" oid_retain1=$(calc_oid "$content_retain1") oid_retain2=$(calc_oid "$content_retain2") - # Remember for something to be 'too old' it has to appear on the MINUS side # of the diff outside the prune window, i.e. it's not when it was introduced # but when it disappeared from relevance. That's why changes to file1.dat on main @@ -110,7 +109,6 @@ begin_test "prune keep unpushed" git lfs track "*.dat" 2>&1 | tee track.log grep "Tracking \"\*.dat\"" track.log - content_keepunpushedhead1="Keep: unpushed HEAD 1" content_keepunpushedhead2="Keep: unpushed HEAD 2" content_keepunpushedhead3="Keep: unpushed HEAD 3" @@ -204,7 +202,6 @@ begin_test "prune keep unpushed" assert_server_object "remote_$reponame" "$oid_keepunpushedbranch1" assert_server_object "remote_$reponame" "$oid_keepunpushedbranch2" assert_server_object "remote_$reponame" "$oid_keepunpushedhead3" - ) end_test @@ -243,7 +240,6 @@ begin_test "prune keep recent" oid_prunecommitbranch2=$(calc_oid "$content_prunecommitbranch2") oid_prunecommithead=$(calc_oid "$content_prunecommithead") - # use a single file so each commit supersedes the last, if different files # then history becomes harder to track # Also note that when considering 'recent' when editing a single file, it means @@ -325,7 +321,6 @@ begin_test "prune keep recent" # push everything so that's not a reason to retain git push origin main:main branch_old:branch_old branch1:branch1 branch2:branch2 - git lfs prune --verbose 2>&1 | tee prune.log grep "prune: 11 local objects, 6 retained, done." prune.log grep "prune: Deleting objects: 100% (5/5), done." prune.log @@ -368,7 +363,6 @@ begin_test "prune keep recent" assert_local_object "$oid_keephead" "${#content_keephead}" refute_local_object "$oid_keeprecentbranch1tip" refute_local_object "$oid_keeprecentbranch2tip" - ) end_test @@ -435,9 +429,6 @@ begin_test "prune remote tests" git lfs prune --verbose --dry-run 2>&1 | tee prune.log grep "prune: 4 local objects, 1 retained, done." prune.log grep "prune: 3 files would be pruned" prune.log - - - ) end_test @@ -534,7 +525,6 @@ begin_test "prune verify" refute_local_object "$oid_commit1" refute_local_object "$oid_commit2_failverify" refute_local_object "$oid_commit3" - ) end_test From d85d28d193bb88e97daf4f43be0538819d3a21f4 Mon Sep 17 00:00:00 2001 From: Chris Darroch Date: Sun, 24 Apr 2022 16:03:39 -0700 Subject: [PATCH 6/6] t/t-prune.sh: edit comments and drop unused prune We revise some of the comments in the "prune keep unpushed" test to clarify their meaning before we add additional checks to this test. We also drop an unused "git lfs prune --dry-run" invocation since we are not testing its output and therefore it serves no purpose in the test. --- t/t-prune.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/t/t-prune.sh b/t/t-prune.sh index b5bf1338..92abc0c4 100755 --- a/t/t-prune.sh +++ b/t/t-prune.sh @@ -170,7 +170,7 @@ begin_test "prune keep unpushed" git lfs prune - # Now push main and show that older versions on main will be removed + # Now push main and show that only older versions on main will be removed. git push origin main git lfs prune --verbose 2>&1 | tee prune.log @@ -181,14 +181,14 @@ begin_test "prune keep unpushed" refute_local_object "$oid_keepunpushedhead1" refute_local_object "$oid_keepunpushedhead2" - # MERGE the secondary branch, delete the branch then push main, then make sure - # we delete the intermediate commits but also make sure they're on server - # resolve conflicts by taking other branch + # Merge the unpushed branch, delete it, and then push main. + # Resolve conflicts by taking other branch. git merge -Xtheirs branch_unpushed git branch -D branch_unpushed - git lfs prune --dry-run git push origin main + # Now make sure we purged all the intermediate commits but also make sure + # they are on the remote. git lfs prune --verbose 2>&1 | tee prune.log grep "prune: 4 local objects, 1 retained" prune.log grep "prune: Deleting objects: 100% (3/3), done." prune.log @@ -197,7 +197,7 @@ begin_test "prune keep unpushed" grep "$oid_keepunpushedhead3" prune.log refute_local_object "$oid_keepunpushedbranch1" refute_local_object "$oid_keepunpushedbranch2" - # we used -Xtheirs so old head state is now obsolete, is the last state on branch + # We used -Xtheirs when merging the branch so the old HEAD is now obsolete. refute_local_object "$oid_keepunpushedhead3" assert_server_object "remote_$reponame" "$oid_keepunpushedbranch1" assert_server_object "remote_$reponame" "$oid_keepunpushedbranch2"