git-lfs/lfs/gitfilter_clean.go
Chris Darroch 67e9d3ad1f lfs: use blobSizeCutoff in clean pointer buf test
When the "clean" filter processes a pre-existing LFS pointer
blob, the copyToTemp() function returns a CleanPointerError;
it does this when DecodeFrom() successfully reads and parses
the pointer blob and when the entirety of the blob's contents
have been read (i.e., there is no additional blob data to be
read).

This latter check was originally introduced in commit
e09e5e1000bdc890ff8b27f6f330bbdb980f232b in PR #271, when
the relevant code was in the Clean() method in the
pointer/clean.go file; it used the same 512 byte maximum
to determine if all the blob content had been read.  This
was aligned with the size of the byte array used in
DecodeFrom() in the pointer/pointer.go file.  The 512-byte
buffer created in DecodeFrom() was adjusted and returned to
Clean(), which then checked its length to see if it had been
populated to the 512-byte maximum, meaning there might still
be additional data to be read.

In commit f58db7f7935fe612f455e2939bbf4617dda9e615 in
PR #684 the size of the read buffer in DecodeFrom() was
changed from 512 bytes to the value of blobSizeCutoff, which
was 1024 (and remains so since).  However, the check in
Clean()'s copyToTemp() function was not changed at the same
time.  (Note that Clean() had been refactored and the check
was in copyToTemp(), where it remains.)

In commit 6f54232890174a119f7dbfd562d5c1ab57174d0b in
PR #1796 the DecodeFrom() method was changed to return an
io.Reader instead of a byte array, and copyToTemp() would
then read from that into a byte array using ioutil.ReadAll().
That introduced a bug when handling large malformed pointers,
fixed in commit dcc05817c350fc7f629271c7bed5fa4f4e36bde9 in
PR #1852 by reading into a byte array in copyToTemp() with
blobSizeCutoff length.  However, the check for a resultant
data length of less than 512 bytes still remained in place.

In order to keep this blob size check in sync with the
allocated byte array in copyToTemp(), we therefore change
copyToTemp() so it checks if a pointer was successfully
read and the blob size is below blobSizeCutoff.  This
implies there is no more data to be read and we can
return a CleanPointerError (to signal a clean pointer was
found) containing the byte array, which will then be written
out, leaving the blob's contents unchanged.
2021-03-10 20:58:17 -08:00

108 lines
2.3 KiB
Go

package lfs
import (
"bytes"
"crypto/sha256"
"encoding/hex"
"io"
"os"
"github.com/git-lfs/git-lfs/errors"
"github.com/git-lfs/git-lfs/tools"
)
type cleanedAsset struct {
Filename string
*Pointer
}
func (f *GitFilter) Clean(reader io.Reader, fileName string, fileSize int64, cb tools.CopyCallback) (*cleanedAsset, error) {
extensions, err := f.cfg.SortedExtensions()
if err != nil {
return nil, err
}
var oid string
var size int64
var tmp *os.File
var exts []*PointerExtension
if len(extensions) > 0 {
request := &pipeRequest{"clean", reader, fileName, extensions}
var response pipeResponse
if response, err = pipeExtensions(f.cfg, request); err != nil {
return nil, err
}
oid = response.results[len(response.results)-1].oidOut
tmp = response.file
var stat os.FileInfo
if stat, err = os.Stat(tmp.Name()); err != nil {
return nil, err
}
size = stat.Size()
for _, result := range response.results {
if result.oidIn != result.oidOut {
ext := NewPointerExtension(result.name, len(exts), result.oidIn)
exts = append(exts, ext)
}
}
} else {
oid, size, tmp, err = f.copyToTemp(reader, fileSize, cb)
if err != nil {
return nil, err
}
}
pointer := NewPointer(oid, size, exts)
return &cleanedAsset{tmp.Name(), pointer}, err
}
func (f *GitFilter) copyToTemp(reader io.Reader, fileSize int64, cb tools.CopyCallback) (oid string, size int64, tmp *os.File, err error) {
tmp, err = TempFile(f.cfg, "")
if err != nil {
return
}
defer tmp.Close()
oidHash := sha256.New()
writer := io.MultiWriter(oidHash, tmp)
if fileSize <= 0 {
cb = nil
}
ptr, buf, err := DecodeFrom(reader)
by := make([]byte, blobSizeCutoff)
n, rerr := buf.Read(by)
by = by[:n]
if rerr != nil || (err == nil && len(by) < blobSizeCutoff) {
err = errors.NewCleanPointerError(ptr, by)
return
}
var from io.Reader = bytes.NewReader(by)
if fileSize < 0 || int64(len(by)) < fileSize {
// If there is still more data to be read from the file, tack on
// the original reader and continue the read from there.
from = io.MultiReader(from, reader)
}
size, err = tools.CopyWithCallback(writer, from, fileSize, cb)
if err != nil {
return
}
oid = hex.EncodeToString(oidHash.Sum(nil))
return
}
func (a *cleanedAsset) Teardown() error {
return os.Remove(a.Filename)
}