lfs/catfilebatch: don't read large blobs into memory

This commit is contained in:
Taylor Blau 2017-03-23 11:55:40 -06:00
parent 3a858da316
commit 844c0b0db2
2 changed files with 44 additions and 9 deletions

@ -162,24 +162,35 @@ func (s *CatFileBatchScanner) next() (string, string, *WrappedPointer, error) {
blobSha := string(fields[0]) blobSha := string(fields[0])
size, _ := strconv.Atoi(string(fields[2])) size, _ := strconv.Atoi(string(fields[2]))
sha := sha256.New() sha := sha256.New()
buf := make([]byte, size)
read, err := io.ReadFull(io.TeeReader(s.r, sha), buf) var buf *bytes.Buffer
var to io.Writer = sha
if size <= blobSizeCutoff {
buf = bytes.NewBuffer(make([]byte, 0, size))
to = io.MultiWriter(to, buf)
}
read, err := io.CopyN(to, s.r, int64(size))
if err != nil { if err != nil {
return blobSha, "", nil, err return blobSha, "", nil, err
} }
if size != read { if int64(size) != read {
return blobSha, "", nil, fmt.Errorf("expected %d bytes, read %d bytes", size, read) return blobSha, "", nil, fmt.Errorf("expected %d bytes, read %d bytes", size, read)
} }
p, err := DecodePointer(bytes.NewBuffer(buf[:read]))
var pointer *WrappedPointer var pointer *WrappedPointer
var contentsSha string var contentsSha string
if err == nil {
contentsSha = p.Oid if size <= blobSizeCutoff {
pointer = &WrappedPointer{ if p, err := DecodePointer(bytes.NewReader(buf.Bytes())); err != nil {
Sha1: blobSha, contentsSha = fmt.Sprintf("%x", sha.Sum(nil))
Pointer: p, } else {
pointer = &WrappedPointer{
Sha1: blobSha,
Pointer: p,
}
contentsSha = p.Oid
} }
} else { } else {
contentsSha = fmt.Sprintf("%x", sha.Sum(nil)) contentsSha = fmt.Sprintf("%x", sha.Sum(nil))

@ -3,12 +3,14 @@ package lfs
import ( import (
"bufio" "bufio"
"bytes" "bytes"
"crypto/sha256"
"fmt" "fmt"
"io" "io"
"math/rand" "math/rand"
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
func TestCatFileBatchScannerWithValidOutput(t *testing.T) { func TestCatFileBatchScannerWithValidOutput(t *testing.T) {
@ -55,6 +57,28 @@ func TestCatFileBatchScannerWithValidOutput(t *testing.T) {
assert.Nil(t, scanner.Pointer()) assert.Nil(t, scanner.Pointer())
} }
func TestCatFileBatchScannerWithLargeBlobs(t *testing.T) {
buf := bytes.NewBuffer(make([]byte, 0, 1025))
sha := sha256.New()
rng := rand.New(rand.NewSource(0))
_, err := io.CopyN(io.MultiWriter(sha, buf), rng, 1025)
require.Nil(t, err)
fake := bytes.NewBuffer(nil)
writeFakeBuffer(t, fake, buf.Bytes(), buf.Len())
scanner := &CatFileBatchScanner{r: bufio.NewReader(fake)}
require.True(t, scanner.Scan(nil))
assert.Nil(t, scanner.Pointer())
assert.Equal(t, fmt.Sprintf("%x", sha.Sum(nil)), scanner.ContentsSha())
assert.False(t, scanner.Scan(nil))
assert.Nil(t, scanner.Err())
assert.Nil(t, scanner.Pointer())
}
func assertNextPointer(t *testing.T, scanner *CatFileBatchScanner, oid string) { func assertNextPointer(t *testing.T, scanner *CatFileBatchScanner, oid string) {
assert.True(t, scanner.Scan(nil)) assert.True(t, scanner.Scan(nil))
assert.Nil(t, scanner.Err()) assert.Nil(t, scanner.Err())