git-lfs/lfs/gitfilter_clean.go

108 lines
2.3 KiB
Go
Raw Normal View History

package lfs
import (
"bytes"
2013-10-24 16:55:46 +00:00
"crypto/sha256"
"encoding/hex"
"io"
"os"
2016-11-15 17:01:18 +00:00
"github.com/git-lfs/git-lfs/errors"
"github.com/git-lfs/git-lfs/tools"
)
2014-07-28 19:28:05 +00:00
type cleanedAsset struct {
Filename string
*Pointer
}
func (f *GitFilter) Clean(reader io.Reader, fileName string, fileSize int64, cb tools.CopyCallback) (*cleanedAsset, error) {
2017-10-24 17:42:00 +00:00
extensions, err := f.cfg.SortedExtensions()
if err != nil {
return nil, err
}
2015-07-24 04:53:36 +00:00
var oid string
var size int64
var tmp *os.File
var exts []*PointerExtension
2015-07-21 23:53:31 +00:00
if len(extensions) > 0 {
2015-07-24 04:53:36 +00:00
request := &pipeRequest{"clean", reader, fileName, extensions}
2015-07-21 23:53:31 +00:00
2015-07-24 04:53:36 +00:00
var response pipeResponse
if response, err = pipeExtensions(f.cfg, request); err != nil {
2015-07-21 23:53:31 +00:00
return nil, err
}
2015-07-24 04:53:36 +00:00
oid = response.results[len(response.results)-1].oidOut
tmp = response.file
2015-07-21 23:53:31 +00:00
var stat os.FileInfo
if stat, err = os.Stat(tmp.Name()); err != nil {
return nil, err
}
size = stat.Size()
2015-07-24 04:53:36 +00:00
for _, result := range response.results {
if result.oidIn != result.oidOut {
ext := NewPointerExtension(result.name, len(exts), result.oidIn)
exts = append(exts, ext)
}
}
} else {
oid, size, tmp, err = f.copyToTemp(reader, fileSize, cb)
2015-07-24 04:53:36 +00:00
if err != nil {
return nil, err
}
2015-07-21 23:53:31 +00:00
}
pointer := NewPointer(oid, size, exts)
return &cleanedAsset{tmp.Name(), pointer}, err
}
func (f *GitFilter) copyToTemp(reader io.Reader, fileSize int64, cb tools.CopyCallback) (oid string, size int64, tmp *os.File, err error) {
tmp, err = TempFile(f.cfg, "")
2015-07-21 23:53:31 +00:00
if err != nil {
return
}
defer tmp.Close()
2013-10-24 16:55:46 +00:00
oidHash := sha256.New()
writer := io.MultiWriter(oidHash, tmp)
2014-08-07 17:01:06 +00:00
if fileSize <= 0 {
2014-08-07 17:01:06 +00:00
cb = nil
}
ptr, buf, err := DecodeFrom(reader)
by := make([]byte, blobSizeCutoff)
n, rerr := buf.Read(by)
by = by[:n]
lfs: use blobSizeCutoff in clean pointer buf test When the "clean" filter processes a pre-existing LFS pointer blob, the copyToTemp() function returns a CleanPointerError; it does this when DecodeFrom() successfully reads and parses the pointer blob and when the entirety of the blob's contents have been read (i.e., there is no additional blob data to be read). This latter check was originally introduced in commit e09e5e1000bdc890ff8b27f6f330bbdb980f232b in PR #271, when the relevant code was in the Clean() method in the pointer/clean.go file; it used the same 512 byte maximum to determine if all the blob content had been read. This was aligned with the size of the byte array used in DecodeFrom() in the pointer/pointer.go file. The 512-byte buffer created in DecodeFrom() was adjusted and returned to Clean(), which then checked its length to see if it had been populated to the 512-byte maximum, meaning there might still be additional data to be read. In commit f58db7f7935fe612f455e2939bbf4617dda9e615 in PR #684 the size of the read buffer in DecodeFrom() was changed from 512 bytes to the value of blobSizeCutoff, which was 1024 (and remains so since). However, the check in Clean()'s copyToTemp() function was not changed at the same time. (Note that Clean() had been refactored and the check was in copyToTemp(), where it remains.) In commit 6f54232890174a119f7dbfd562d5c1ab57174d0b in PR #1796 the DecodeFrom() method was changed to return an io.Reader instead of a byte array, and copyToTemp() would then read from that into a byte array using ioutil.ReadAll(). That introduced a bug when handling large malformed pointers, fixed in commit dcc05817c350fc7f629271c7bed5fa4f4e36bde9 in PR #1852 by reading into a byte array in copyToTemp() with blobSizeCutoff length. However, the check for a resultant data length of less than 512 bytes still remained in place. In order to keep this blob size check in sync with the allocated byte array in copyToTemp(), we therefore change copyToTemp() so it checks if a pointer was successfully read and the blob size is below blobSizeCutoff. This implies there is no more data to be read and we can return a CleanPointerError (to signal a clean pointer was found) containing the byte array, which will then be written out, leaving the blob's contents unchanged.
2021-03-11 01:48:33 +00:00
if rerr != nil || (err == nil && len(by) < blobSizeCutoff) {
err = errors.NewCleanPointerError(ptr, by)
2015-07-21 23:53:31 +00:00
return
}
var from io.Reader = bytes.NewReader(by)
if fileSize < 0 || int64(len(by)) < fileSize {
// If there is still more data to be read from the file, tack on
// the original reader and continue the read from there.
from = io.MultiReader(from, reader)
}
size, err = tools.CopyWithCallback(writer, from, fileSize, cb)
2015-07-21 23:53:31 +00:00
if err != nil {
return
}
oid = hex.EncodeToString(oidHash.Sum(nil))
return
2014-08-07 21:32:45 +00:00
}
func (a *cleanedAsset) Teardown() error {
return os.Remove(a.Filename)
}