From fecaf2da30b27f5b9a19870320b78a60f16fc249 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Tue, 10 Nov 2020 22:52:52 +0000 Subject: [PATCH 1/2] filepathfilter: add an option for stricter matching For backwards compatibility reasons, we transform some wildmatch patterns. However, when we're reading a pattern from .gitattributes, we do this as well, and these transformations can cause us to fail to match some things that Git does match. While we want to retain the behavior of the command-line options for compatibility, at least until 3.0, there's no reason for us to mismatch patterns in .gitattributes: that's clearly a bug. Let's add an option to our pattern constructor to match patterns strictly so we can enable it when reading from .gitattributes, and disable our pattern manipulation when it's enabled. To help us improve debugging, let's also add the value of the strictness flag when printing the transformed pattern. --- filepathfilter/filepathfilter.go | 91 ++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 33 deletions(-) diff --git a/filepathfilter/filepathfilter.go b/filepathfilter/filepathfilter.go index 72103af7..c3fe4bc3 100644 --- a/filepathfilter/filepathfilter.go +++ b/filepathfilter/filepathfilter.go @@ -128,46 +128,71 @@ const ( sep byte = '/' ) -func NewPattern(p string) Pattern { - pp := p +type patternOptions struct { + strict bool +} - // Special case: the below patterns match anything according to existing - // behavior. - switch pp { - case `*`, `.`, `./`, `.\`: - pp = join("**", "*") +type patternOption func(*patternOptions) + +// Strict is an option representing whether to strictly match wildmatch patterns +// the way Git does. If disabled, additional modifications are made to patterns +// for backwards compatibility. +func Strict(val bool) patternOption { + return func(args *patternOptions) { + args.strict = val + } +} + +func NewPattern(p string, setters ...patternOption) Pattern { + args := &patternOptions{strict: false} + for _, setter := range setters { + setter(args) } - dirs := strings.Contains(pp, string(sep)) - rooted := strings.HasPrefix(pp, string(sep)) - wild := strings.Contains(pp, "*") + pp := p - if !dirs && !wild { - // Special case: if pp is a literal string (optionally including - // a character class), rewrite it is a substring match. - pp = join("**", pp, "**") - } else { - if dirs && !rooted { - // Special case: if there are any directory separators, - // rewrite "pp" as a substring match. - if !wild { - pp = join("**", pp, "**") - } + dirs := strings.Contains(pp, string(sep)) + + if !args.strict { + + // Special case: the below patterns match anything according to existing + // behavior. + switch pp { + case `*`, `.`, `./`, `.\`: + pp = join("**", "*") + } + + dirs = strings.Contains(pp, string(sep)) + rooted := strings.HasPrefix(pp, string(sep)) + wild := strings.Contains(pp, "*") + + if !dirs && !wild { + // Special case: if pp is a literal string (optionally including + // a character class), rewrite it is a substring match. + pp = join("**", pp, "**") } else { - if rooted { - // Special case: if there are not any directory - // separators, rewrite "pp" as a substring - // match. - pp = join(pp, "**") + if dirs && !rooted { + // Special case: if there are any directory separators, + // rewrite "pp" as a substring match. + if !wild { + pp = join("**", pp, "**") + } } else { - // Special case: if there are not any directory - // separators, rewrite "pp" as a substring - // match. - pp = join("**", pp) + if rooted { + // Special case: if there are not any directory + // separators, rewrite "pp" as a substring + // match. + pp = join(pp, "**") + } else { + // Special case: if there are not any directory + // separators, rewrite "pp" as a substring + // match. + pp = join("**", pp) + } } } } - tracerx.Printf("filepathfilter: rewrite %q as %q", p, pp) + tracerx.Printf("filepathfilter: rewrite %q as %q (strict: %v)", p, pp, args.strict) return &wm{ p: p, @@ -195,10 +220,10 @@ func join(paths ...string) string { return joined } -func convertToWildmatch(rawpatterns []string) []Pattern { +func convertToWildmatch(rawpatterns []string, setters ...patternOption) []Pattern { patterns := make([]Pattern, len(rawpatterns)) for i, raw := range rawpatterns { - patterns[i] = NewPattern(raw) + patterns[i] = NewPattern(raw, setters...) } return patterns } From 6e8b16667af8f1a7866dc3700469ecceabae8a66 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Thu, 12 Nov 2020 18:41:32 +0000 Subject: [PATCH 2/2] git: match gitattributes patterns strictly When we look for patterns in the .gitattributes file, we want to match these patterns strictly: that is, exactly the way that Git does. We don't want to use a different algorithm because that will lead to confusing behavior when our code behaves differently from Git does. Let's fix this by using our new strict matching when reading patterns from the gitattributes files and add a test to make sure we don't regress anything. --- git/attribs.go | 2 +- t/t-migrate-import-no-rewrite.sh | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/git/attribs.go b/git/attribs.go index eef982d8..5687f5c9 100644 --- a/git/attribs.go +++ b/git/attribs.go @@ -159,7 +159,7 @@ func GetAttributeFilter(workingDir, gitDir string) *filepathfilter.Filter { for _, path := range paths { // Convert all separators to `/` before creating a pattern to // avoid characters being escaped in situations like `subtree\*.md` - patterns = append(patterns, filepathfilter.NewPattern(filepath.ToSlash(path.Path))) + patterns = append(patterns, filepathfilter.NewPattern(filepath.ToSlash(path.Path), filepathfilter.Strict(true))) } return filepathfilter.NewFromPatterns(patterns, nil) diff --git a/t/t-migrate-import-no-rewrite.sh b/t/t-migrate-import-no-rewrite.sh index a876d663..c350a9eb 100755 --- a/t/t-migrate-import-no-rewrite.sh +++ b/t/t-migrate-import-no-rewrite.sh @@ -225,3 +225,41 @@ begin_test "migrate import --no-rewrite (with empty commit message)" fi ) end_test + +begin_test "migrate import --no-rewrite (strict .gitattributes)" +( + set -e + + reponame="$(basename "$0" ".sh")-strict-match" + clone_repo "$reponame" repo-strict-match + + mkdir -p major-oak/mainst/.yarn-offline-mirror/ + mkdir -p major-oak/major-oak/frontend/.yarn-offline-mirror/ + foo_contents="foo" + foo_oid=$(calc_oid "$foo_contents") + bar_contents="bar" + bar_oid=$(calc_oid "$bar_contents") + printf "$foo_contents" > major-oak/mainst/.yarn-offline-mirror/typescript-3.4.3.tgz + printf "$bar_contents" > major-oak/major-oak/frontend/.yarn-offline-mirror/typescript-2.9.2.tgz + git add . + git commit -m 'Initial import' + + cat >.gitattributes <