git-lfs/t/t-filter-process.sh

#!/usr/bin/env bash

. "$(dirname "$0")/testlib.sh"

# HACK(taylor): git uses ".g<hash>" in the version name to signal that it is
# from the "next" branch, which is the only (current) version of Git that has
# support for the filter protocol.
#
ensure_git_version_isnt $VERSION_LOWER "2.11.0"

begin_test "filter process: checking out a branch"
(
  set -e

  reponame="filter_process_checkout"
  setup_remote_repo "$reponame"
  clone_repo "$reponame" repo

  git lfs track "*.dat"
  git add .gitattributes
  git commit -m "initial commit"

  contents_a="contents_a"
  contents_a_oid="$(calc_oid $contents_a)"
  printf "%s" "$contents_a" > a.dat

  git add a.dat
  git commit -m "add a.dat"

  git checkout -b b

  contents_b="contents_b"
  contents_b_oid="$(calc_oid $contents_b)"
  printf "%s" "$contents_b" > b.dat

  git add b.dat
  git commit -m "add b.dat"

  git push origin --all

  pushd ..
    # Git will choose filter.lfs.process over `filter.lfs.clean` and
    # `filter.lfs.smudge`
    GIT_TRACE_PACKET=1 git \
      -c "filter.lfs.process=git-lfs filter-process" \
      -c "filter.lfs.clean=false"\
      -c "filter.lfs.smudge=false" \
      -c "filter.lfs.required=true" \
      clone "$GITSERVER/$reponame" "$reponame-assert"

    cd "$reponame-assert"

    # Assert that we are on the "main" branch, and have a.dat
    [ "main" = "$(git rev-parse --abbrev-ref HEAD)" ]
    [ "$contents_a" = "$(cat a.dat)" ]
    assert_pointer "main" "a.dat" "$contents_a_oid" 10

    git checkout b

    # Assert that we are on the "b" branch, and have b.dat
    [ "b" = "$(git rev-parse --abbrev-ref HEAD)" ]
    [ "$contents_b" = "$(cat b.dat)" ]
    assert_pointer "b" "b.dat" "$contents_b_oid" 10
  popd
)
end_test

begin_test "filter process: adding a file"
(
  set -e

  reponame="filter_process_add"
  setup_remote_repo "$reponame"
  clone_repo "$reponame" "$reponame"

  git lfs track "*.dat"
  git add .gitattributes
  git commit -m "initial commit"

  contents="contents"
  contents_oid="$(calc_oid "$contents")"
  printf "%s" "$contents" > a.dat

  git add a.dat

  expected="$(pointer "$contents_oid" "${#contents}")"
  got="$(git cat-file -p :a.dat)"

  diff -u <(echo "$expected") <(echo "$got")
)
end_test

# https://github.com/git-lfs/git-lfs/issues/1697
begin_test "filter process: add a file with 1024 bytes"
(
  set -e

  mkdir repo-issue-1697
  cd repo-issue-1697
  git init
  git lfs track "*.dat"
  dd if=/dev/zero of=first.dat bs=1024 count=1
  printf "any contents" > second.dat
  git add .
)
end_test

begin_test "filter process: hash-object --stdin --path does not hang"
(
  set -e

  mkdir repo-hash-object
  cd repo-hash-object
  git init
  git lfs track "*.dat"
  contents="test"
  contents_oid="$(calc_oid "$contents")"
  expected=$(pointer "$contents_oid" 4 | git hash-object --stdin)

  dd if=/dev/zero of=first.dat bs=1000 count=1
  echo a > second.dat
  # Works for existing file longer than this one.
  output=$(printf test | git hash-object --path first.dat --stdin)
  [ "$expected" = "$output" ]
  # Works for existing file shorter than this one.
  output=$(printf test | git hash-object --path second.dat --stdin)
  [ "$expected" = "$output" ]
  # Works for absent file.
  output=$(printf test | git hash-object --path third.dat --stdin)
  [ "$expected" = "$output" ]

  dd if=/dev/zero of=large.dat bs=65537 count=1
  oid=$(calc_oid_file large.dat)
  expected=$(pointer "$oid" 65537 | git hash-object --stdin)
  output=$(git hash-object --path third.dat --stdin <large.dat)
  [ "$expected" = "$output" ]
  git add .
)
end_test

begin_test "filter process: checking out a branch with --skip-smudge and checkout-index"
(
  set -e

  reponame="filter-process-skip-smudge-checkout-index"
  setup_remote_repo "$reponame"
  clone_repo "$reponame" "$reponame"

  git lfs track "*.dat"
  git add .gitattributes
  git commit -m "initial commit"

  contents_a="contents_a"
  contents_a_oid="$(calc_oid $contents_a)"
  printf "%s" "$contents_a" > a.dat

  git add a.dat
  git commit -m "add a.dat"

  git checkout -b b

  contents_b="contents_b"
  contents_b_oid="$(calc_oid $contents_b)"
  printf "%s" "$contents_b" > b.dat

  git add b.dat
  git commit -m "add b.dat"

  git lfs install --local --skip-smudge

  git checkout main

  rm a.dat
  git checkout-index -af
  git lfs pointer --check --file a.dat

  assert_pointer "main" "a.dat" "$contents_a_oid" 10

  git checkout b

  rm *.dat
  git checkout-index -af
  git lfs pointer --check --file a.dat
  git lfs pointer --check --file b.dat

  # Assert that we are on the "b" branch, and have b.dat
  assert_pointer "b" "b.dat" "$contents_b_oid" 10
)
end_test
test: add an integration test for checkouts using filter protocol 2016-10-26 17:19:16 +00:00			`#!/usr/bin/env bash`

t: load shell scripts from $(dirname) instead of 't' 2018-07-10 18:48:02 +00:00			`. "$(dirname "$0")/testlib.sh"`
test: add an integration test for checkouts using filter protocol 2016-10-26 17:19:16 +00:00
			`# HACK(taylor): git uses ".g<hash>" in the version name to signal that it is`
			`# from the "next" branch, which is the only (current) version of Git that has`
			`# support for the filter protocol.`
			`#`
filter-process: enable test for Git 2.11 and up 2016-11-22 09:01:14 +00:00			`ensure_git_version_isnt $VERSION_LOWER "2.11.0"`
test: add an integration test for checkouts using filter protocol 2016-10-26 17:19:16 +00:00
			`begin_test "filter process: checking out a branch"`
			`(`
			`set -e`

			`reponame="filter_process_checkout"`
			`setup_remote_repo "$reponame"`
			`clone_repo "$reponame" repo`

			`git lfs track "*.dat"`
			`git add .gitattributes`
			`git commit -m "initial commit"`

			`contents_a="contents_a"`
			`contents_a_oid="$(calc_oid $contents_a)"`
t: avoid using shell variables in printf's first argument The printf(1) command, like it's C cousin, takes a format string as its first argument. If a shell variable is passed as the first argument, it will be interpreted as a format string; this can lead to surprising behavior and can cause the test suite to fail if we accidentally insert a format string character into the variable. Modify all the places in the individual tests that we use a plain quoted variable as the format string by running the following Ruby one-liner: ruby -i -pe '$_.gsub!(/printf "\$/, %q(printf "%s" "$))' t/t-*.sh Avoid modifying the test helpers, as there are places (such as calc_oid) where we want to pass text containing escapes (such as "\n") and have those be properly interpreted by printf(1). 2018-09-10 14:57:10 +00:00			`printf "%s" "$contents_a" > a.dat`
test: add an integration test for checkouts using filter protocol 2016-10-26 17:19:16 +00:00
			`git add a.dat`
			`git commit -m "add a.dat"`

			`git checkout -b b`

			`contents_b="contents_b"`
			`contents_b_oid="$(calc_oid $contents_b)"`
t: avoid using shell variables in printf's first argument The printf(1) command, like it's C cousin, takes a format string as its first argument. If a shell variable is passed as the first argument, it will be interpreted as a format string; this can lead to surprising behavior and can cause the test suite to fail if we accidentally insert a format string character into the variable. Modify all the places in the individual tests that we use a plain quoted variable as the format string by running the following Ruby one-liner: ruby -i -pe '$_.gsub!(/printf "\$/, %q(printf "%s" "$))' t/t-*.sh Avoid modifying the test helpers, as there are places (such as calc_oid) where we want to pass text containing escapes (such as "\n") and have those be properly interpreted by printf(1). 2018-09-10 14:57:10 +00:00			`printf "%s" "$contents_b" > b.dat`
test: add an integration test for checkouts using filter protocol 2016-10-26 17:19:16 +00:00
			`git add b.dat`
			`git commit -m "add b.dat"`

			`git push origin --all`

			`pushd ..`
test/fp: explicitly set `filter.lfs.{clean,smudge}` to commands that would fail 2016-11-10 18:26:07 +00:00			# Git will choose filter.lfs.process over `filter.lfs.clean` and
			# `filter.lfs.smudge`
commands/filter_process: implement 'delay' capability 2017-08-12 04:18:56 +00:00			`GIT_TRACE_PACKET=1 git \`
commands,lfs,test: rename command to `filter-process` 2016-11-08 20:05:12 +00:00			`-c "filter.lfs.process=git-lfs filter-process" \`
test/fp: explicitly set `filter.lfs.{clean,smudge}` to commands that would fail 2016-11-10 18:26:07 +00:00			`-c "filter.lfs.clean=false"\`
			`-c "filter.lfs.smudge=false" \`
test: add an integration test for checkouts using filter protocol 2016-10-26 17:19:16 +00:00			`-c "filter.lfs.required=true" \`
			`clone "$GITSERVER/$reponame" "$reponame-assert"`

			`cd "$reponame-assert"`

Rename default branch in tests to "main" Currently, our default branch in tests is "master". This is the Git default, but the Git default will likely change in the future, so it makes sense to update our testsuite to be explicit about the branch name. We'll ensure this continues by building against older versions of Git as well as newer versions. We use "main" for the new branch name, since that's the proposed value upstream. This commit was made entirely by automated means using the following command: git grep -l master t \| xargs sed -i -e 's/master/main/g' 2020-06-29 16:18:57 +00:00			`# Assert that we are on the "main" branch, and have a.dat`
			`[ "main" = "$(git rev-parse --abbrev-ref HEAD)" ]`
test: add an integration test for checkouts using filter protocol 2016-10-26 17:19:16 +00:00			`[ "$contents_a" = "$(cat a.dat)" ]`
Rename default branch in tests to "main" Currently, our default branch in tests is "master". This is the Git default, but the Git default will likely change in the future, so it makes sense to update our testsuite to be explicit about the branch name. We'll ensure this continues by building against older versions of Git as well as newer versions. We use "main" for the new branch name, since that's the proposed value upstream. This commit was made entirely by automated means using the following command: git grep -l master t \| xargs sed -i -e 's/master/main/g' 2020-06-29 16:18:57 +00:00			`assert_pointer "main" "a.dat" "$contents_a_oid" 10`
test: add an integration test for checkouts using filter protocol 2016-10-26 17:19:16 +00:00
			`git checkout b`

			`# Assert that we are on the "b" branch, and have b.dat`
			`[ "b" = "$(git rev-parse --abbrev-ref HEAD)" ]`
			`[ "$contents_b" = "$(cat b.dat)" ]`
			`assert_pointer "b" "b.dat" "$contents_b_oid" 10`
			`popd`
			`)`
			`end_test`
test: test staging a file using the filter process 2016-11-02 23:07:35 +00:00
			`begin_test "filter process: adding a file"`
			`(`
			`set -e`

			`reponame="filter_process_add"`
			`setup_remote_repo "$reponame"`
			`clone_repo "$reponame" "$reponame"`

			`git lfs track "*.dat"`
			`git add .gitattributes`
			`git commit -m "initial commit"`

			`contents="contents"`
			`contents_oid="$(calc_oid "$contents")"`
t: avoid using shell variables in printf's first argument The printf(1) command, like it's C cousin, takes a format string as its first argument. If a shell variable is passed as the first argument, it will be interpreted as a format string; this can lead to surprising behavior and can cause the test suite to fail if we accidentally insert a format string character into the variable. Modify all the places in the individual tests that we use a plain quoted variable as the format string by running the following Ruby one-liner: ruby -i -pe '$_.gsub!(/printf "\$/, %q(printf "%s" "$))' t/t-*.sh Avoid modifying the test helpers, as there are places (such as calc_oid) where we want to pass text containing escapes (such as "\n") and have those be properly interpreted by printf(1). 2018-09-10 14:57:10 +00:00			`printf "%s" "$contents" > a.dat`
test: test staging a file using the filter process 2016-11-02 23:07:35 +00:00
			`git add a.dat`

			`expected="$(pointer "$contents_oid" "${#contents}")"`
			`got="$(git cat-file -p :a.dat)"`

			`diff -u <(echo "$expected") <(echo "$got")`
			`)`
			`end_test`
filter-process: add test to demonstrate problem with 1024 byte files c.f. https://github.com/git-lfs/git-lfs/issues/1697 2016-11-22 08:51:55 +00:00
			`# https://github.com/git-lfs/git-lfs/issues/1697`
			`begin_test "filter process: add a file with 1024 bytes"`
			`(`
			`set -e`

Travis-CI complains that it cannot create directory `repo'. Rename it! 2016-11-22 11:32:39 +00:00			`mkdir repo-issue-1697`
			`cd repo-issue-1697`
filter-process: add test to demonstrate problem with 1024 byte files c.f. https://github.com/git-lfs/git-lfs/issues/1697 2016-11-22 08:51:55 +00:00			`git init`
			`git lfs track "*.dat"`
			`dd if=/dev/zero of=first.dat bs=1024 count=1`
			`printf "any contents" > second.dat`
			`git add .`
			`)`
			`end_test`

filter-process: avoid hang when using git hash-object --stdin When we use git hash-object --stdin with the --path option, Git applies filters to the object, so Git LFS is invoked. However, if the object provided is less than 1024 bytes in size, we would hang. This occurred because of our packet reader didn't quite implement the io.Reader interface completely: if it returned a non-zero value and io.EOF, the next call to Read would not return 0 and io.EOF. Instead, it would try to read from stdin, which would not be sending us more data until we provided a response, so we would hang. To solve this, keep track of the EOF and always return it on subsequent Read calls. In addition, don't process the callback to write the file in this case, since we don't actually want to write into the working tree. 2019-11-04 19:20:45 +00:00			`begin_test "filter process: hash-object --stdin --path does not hang"`
			`(`
			`set -e`
filter-process: add test to demonstrate problem with 1024 byte files c.f. https://github.com/git-lfs/git-lfs/issues/1697 2016-11-22 08:51:55 +00:00
filter-process: avoid hang when using git hash-object --stdin When we use git hash-object --stdin with the --path option, Git applies filters to the object, so Git LFS is invoked. However, if the object provided is less than 1024 bytes in size, we would hang. This occurred because of our packet reader didn't quite implement the io.Reader interface completely: if it returned a non-zero value and io.EOF, the next call to Read would not return 0 and io.EOF. Instead, it would try to read from stdin, which would not be sending us more data until we provided a response, so we would hang. To solve this, keep track of the EOF and always return it on subsequent Read calls. In addition, don't process the callback to write the file in this case, since we don't actually want to write into the working tree. 2019-11-04 19:20:45 +00:00			`mkdir repo-hash-object`
			`cd repo-hash-object`
			`git init`
			`git lfs track "*.dat"`
			`contents="test"`
			`contents_oid="$(calc_oid "$contents")"`
			`expected=$(pointer "$contents_oid" 4 \| git hash-object --stdin)`

			`dd if=/dev/zero of=first.dat bs=1000 count=1`
			`echo a > second.dat`
			`# Works for existing file longer than this one.`
			`output=$(printf test \| git hash-object --path first.dat --stdin)`
			`[ "$expected" = "$output" ]`
			`# Works for existing file shorter than this one.`
			`output=$(printf test \| git hash-object --path second.dat --stdin)`
			`[ "$expected" = "$output" ]`
			`# Works for absent file.`
			`output=$(printf test \| git hash-object --path third.dat --stdin)`
			`[ "$expected" = "$output" ]`

			`dd if=/dev/zero of=large.dat bs=65537 count=1`
			`oid=$(calc_oid_file large.dat)`
			`expected=$(pointer "$oid" 65537 \| git hash-object --stdin)`
			`output=$(git hash-object --path third.dat --stdin <large.dat)`
			`[ "$expected" = "$output" ]`
			`git add .`
			`)`
			`end_test`
smudge: honor GIT_LFS_SKIP_SMUDGE with checkout-index In most cases, users use a relatively modern version of Git which supports the filter-process code and delayed smudging. This is valuable because it makes things much faster. However, it also uses a different code path from the non-delayed path. This non-delayed path is also used by git checkout-index. The non-delayed path doesn't work properly, however, if the data is already on disk. In such a case, we simply ask the smudge filter not to download the data, but since it's already on disk, we feed it out anyway. Let's solve this by simply not invoking the filter and emitting the pointer if we've asked to skip. We do the same thing if the file doesn't match the filter, since the comment at the top of the function states that's what's supposed to happen here as well. Once we've done that, the variable download is always true, so remove all the code which considers that it might be false to simplify the code somewhat. 2022-02-07 16:00:01 +00:00
			`begin_test "filter process: checking out a branch with --skip-smudge and checkout-index"`
			`(`
			`set -e`

			`reponame="filter-process-skip-smudge-checkout-index"`
			`setup_remote_repo "$reponame"`
			`clone_repo "$reponame" "$reponame"`

			`git lfs track "*.dat"`
			`git add .gitattributes`
			`git commit -m "initial commit"`

			`contents_a="contents_a"`
			`contents_a_oid="$(calc_oid $contents_a)"`
			`printf "%s" "$contents_a" > a.dat`

			`git add a.dat`
			`git commit -m "add a.dat"`

			`git checkout -b b`

			`contents_b="contents_b"`
			`contents_b_oid="$(calc_oid $contents_b)"`
			`printf "%s" "$contents_b" > b.dat`

			`git add b.dat`
			`git commit -m "add b.dat"`

			`git lfs install --local --skip-smudge`

			`git checkout main`

			`rm a.dat`
			`git checkout-index -af`
			`git lfs pointer --check --file a.dat`

			`assert_pointer "main" "a.dat" "$contents_a_oid" 10`

			`git checkout b`

			`rm *.dat`
			`git checkout-index -af`
			`git lfs pointer --check --file a.dat`
			`git lfs pointer --check --file b.dat`

			`# Assert that we are on the "b" branch, and have b.dat`
			`assert_pointer "b" "b.dat" "$contents_b_oid" 10`
			`)`
			`end_test`