From 5159055278a457e121d4b7efb59180b33fa2fbad Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Mon, 25 Oct 2021 18:46:56 +0200 Subject: [PATCH] Read expected buffer size (#17409) (#17430) Backport of #17409 * Read expected buffer size. * Changed name. --- modules/charset/charset.go | 5 +++-- modules/csv/csv.go | 5 +---- modules/git/blob.go | 3 ++- modules/repofiles/update.go | 5 +++-- modules/typesniffer/typesniffer.go | 6 ++++-- modules/util/io.go | 20 ++++++++++++++++++++ routers/common/repo.go | 5 +++-- routers/web/repo/attachment.go | 7 +++---- routers/web/repo/editor.go | 4 ++-- routers/web/repo/lfs.go | 7 ++++--- routers/web/repo/view.go | 13 ++++++------- 11 files changed, 51 insertions(+), 29 deletions(-) create mode 100644 modules/util/io.go diff --git a/modules/charset/charset.go b/modules/charset/charset.go index 3000864c2e..55e183ebfc 100644 --- a/modules/charset/charset.go +++ b/modules/charset/charset.go @@ -14,6 +14,7 @@ import ( "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/util" "github.com/gogs/chardet" "golang.org/x/net/html/charset" @@ -26,9 +27,9 @@ var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'} // ToUTF8WithFallbackReader detects the encoding of content and coverts to UTF-8 reader if possible func ToUTF8WithFallbackReader(rd io.Reader) io.Reader { var buf = make([]byte, 2048) - n, err := rd.Read(buf) + n, err := util.ReadAtMost(rd, buf) if err != nil { - return rd + return io.MultiReader(bytes.NewReader(RemoveBOMIfPresent(buf[:n])), rd) } charsetLabel, err := DetectEncoding(buf[:n]) diff --git a/modules/csv/csv.go b/modules/csv/csv.go index ee54452891..83b1a5d37b 100644 --- a/modules/csv/csv.go +++ b/modules/csv/csv.go @@ -30,11 +30,8 @@ func CreateReader(input io.Reader, delimiter rune) *stdcsv.Reader { // CreateReaderAndGuessDelimiter tries to guess the field delimiter from the content and creates a csv.Reader. func CreateReaderAndGuessDelimiter(rd io.Reader) (*stdcsv.Reader, error) { var data = make([]byte, 1e4) - size, err := rd.Read(data) + size, err := util.ReadAtMost(rd, data) if err != nil { - if err == io.EOF { - return CreateReader(bytes.NewReader([]byte{}), rune(',')), nil - } return nil, err } diff --git a/modules/git/blob.go b/modules/git/blob.go index 5831bc3735..39b28cbe26 100644 --- a/modules/git/blob.go +++ b/modules/git/blob.go @@ -12,6 +12,7 @@ import ( "io/ioutil" "code.gitea.io/gitea/modules/typesniffer" + "code.gitea.io/gitea/modules/util" ) // This file contains common functions between the gogit and !gogit variants for git Blobs @@ -29,7 +30,7 @@ func (b *Blob) GetBlobContent() (string, error) { } defer dataRc.Close() buf := make([]byte, 1024) - n, _ := dataRc.Read(buf) + n, _ := util.ReadAtMost(dataRc, buf) buf = buf[:n] return string(buf), nil } diff --git a/modules/repofiles/update.go b/modules/repofiles/update.go index ad984c465a..5b45479f3f 100644 --- a/modules/repofiles/update.go +++ b/modules/repofiles/update.go @@ -19,6 +19,7 @@ import ( repo_module "code.gitea.io/gitea/modules/repository" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/structs" + "code.gitea.io/gitea/modules/util" stdcharset "golang.org/x/net/html/charset" "golang.org/x/text/transform" @@ -61,7 +62,7 @@ func detectEncodingAndBOM(entry *git.TreeEntry, repo *models.Repository) (string } defer reader.Close() buf := make([]byte, 1024) - n, err := reader.Read(buf) + n, err := util.ReadAtMost(reader, buf) if err != nil { // return default return "UTF-8", false @@ -84,7 +85,7 @@ func detectEncodingAndBOM(entry *git.TreeEntry, repo *models.Repository) (string } defer dataRc.Close() buf = make([]byte, 1024) - n, err = dataRc.Read(buf) + n, err = util.ReadAtMost(dataRc, buf) if err != nil { // return default return "UTF-8", false diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go index d257b8179b..9e29b3557c 100644 --- a/modules/typesniffer/typesniffer.go +++ b/modules/typesniffer/typesniffer.go @@ -10,6 +10,8 @@ import ( "net/http" "regexp" "strings" + + "code.gitea.io/gitea/modules/util" ) // Use at most this many bytes to determine Content Type. @@ -86,8 +88,8 @@ func DetectContentType(data []byte) SniffedType { // DetectContentTypeFromReader guesses the content type contained in the reader. func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) { buf := make([]byte, sniffLen) - n, err := r.Read(buf) - if err != nil && err != io.EOF { + n, err := util.ReadAtMost(r, buf) + if err != nil { return SniffedType{}, fmt.Errorf("DetectContentTypeFromReader io error: %w", err) } buf = buf[:n] diff --git a/modules/util/io.go b/modules/util/io.go new file mode 100644 index 0000000000..b467c0ac8a --- /dev/null +++ b/modules/util/io.go @@ -0,0 +1,20 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package util + +import ( + "io" +) + +// ReadAtMost reads at most len(buf) bytes from r into buf. +// It returns the number of bytes copied. n is only less then len(buf) if r provides fewer bytes. +// If EOF occurs while reading, err will be nil. +func ReadAtMost(r io.Reader, buf []byte) (n int, err error) { + n, err = io.ReadFull(r, buf) + if err == io.EOF || err == io.ErrUnexpectedEOF { + err = nil + } + return +} diff --git a/routers/common/repo.go b/routers/common/repo.go index 8d33fb07fb..e5b1a0493a 100644 --- a/routers/common/repo.go +++ b/routers/common/repo.go @@ -18,6 +18,7 @@ import ( "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/typesniffer" + "code.gitea.io/gitea/modules/util" ) // ServeBlob download a git.Blob @@ -42,8 +43,8 @@ func ServeBlob(ctx *context.Context, blob *git.Blob) error { // ServeData download file from io.Reader func ServeData(ctx *context.Context, name string, size int64, reader io.Reader) error { buf := make([]byte, 1024) - n, err := reader.Read(buf) - if err != nil && err != io.EOF { + n, err := util.ReadAtMost(reader, buf) + if err != nil { return err } if n >= 0 { diff --git a/routers/web/repo/attachment.go b/routers/web/repo/attachment.go index 5becbea271..a32bc3e140 100644 --- a/routers/web/repo/attachment.go +++ b/routers/web/repo/attachment.go @@ -15,6 +15,7 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/modules/upload" + "code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/routers/common" ) @@ -43,10 +44,8 @@ func uploadAttachment(ctx *context.Context, allowedTypes string) { defer file.Close() buf := make([]byte, 1024) - n, _ := file.Read(buf) - if n > 0 { - buf = buf[:n] - } + n, _ := util.ReadAtMost(file, buf) + buf = buf[:n] err = upload.Verify(buf, header.Filename, allowedTypes) if err != nil { diff --git a/routers/web/repo/editor.go b/routers/web/repo/editor.go index 0f978c7b01..f7a8778344 100644 --- a/routers/web/repo/editor.go +++ b/routers/web/repo/editor.go @@ -114,7 +114,7 @@ func editFile(ctx *context.Context, isNewFile bool) { ctx.Data["FileName"] = blob.Name() buf := make([]byte, 1024) - n, _ := dataRc.Read(buf) + n, _ := util.ReadAtMost(dataRc, buf) buf = buf[:n] // Only some file types are editable online as text. @@ -747,7 +747,7 @@ func UploadFileToServer(ctx *context.Context) { defer file.Close() buf := make([]byte, 1024) - n, _ := file.Read(buf) + n, _ := util.ReadAtMost(file, buf) if n > 0 { buf = buf[:n] } diff --git a/routers/web/repo/lfs.go b/routers/web/repo/lfs.go index 173ffb773f..937d623e5a 100644 --- a/routers/web/repo/lfs.go +++ b/routers/web/repo/lfs.go @@ -26,6 +26,7 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/modules/typesniffer" + "code.gitea.io/gitea/modules/util" ) const ( @@ -272,7 +273,7 @@ func LFSFileGet(ctx *context.Context) { } defer dataRc.Close() buf := make([]byte, 1024) - n, err := dataRc.Read(buf) + n, err := util.ReadAtMost(dataRc, buf) if err != nil { ctx.ServerError("Data", err) return @@ -297,10 +298,10 @@ func LFSFileGet(ctx *context.Context) { break } - buf := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc)) + rd := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc)) // Building code view blocks with line number on server side. - fileContent, _ := ioutil.ReadAll(buf) + fileContent, _ := ioutil.ReadAll(rd) var output bytes.Buffer lines := strings.Split(string(fileContent), "\n") diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 2be81d7485..64b97a91ab 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -31,6 +31,7 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/modules/typesniffer" + "code.gitea.io/gitea/modules/util" ) const ( @@ -264,7 +265,7 @@ func renderDirectory(ctx *context.Context, treeLink string) { defer dataRc.Close() buf := make([]byte, 1024) - n, _ := dataRc.Read(buf) + n, _ := util.ReadAtMost(dataRc, buf) buf = buf[:n] st := typesniffer.DetectContentType(buf) @@ -299,7 +300,7 @@ func renderDirectory(ctx *context.Context, treeLink string) { defer dataRc.Close() buf = make([]byte, 1024) - n, err = dataRc.Read(buf) + n, err = util.ReadAtMost(dataRc, buf) if err != nil { ctx.ServerError("Data", err) return @@ -413,7 +414,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st ctx.Data["RawFileLink"] = rawLink + "/" + ctx.Repo.TreePath buf := make([]byte, 1024) - n, _ := dataRc.Read(buf) + n, _ := util.ReadAtMost(dataRc, buf) buf = buf[:n] st := typesniffer.DetectContentType(buf) @@ -445,10 +446,8 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st defer dataRc.Close() buf = make([]byte, 1024) - n, err = dataRc.Read(buf) - // Error EOF don't mean there is an error, it just means we read to - // the end - if err != nil && err != io.EOF { + n, err = util.ReadAtMost(dataRc, buf) + if err != nil { ctx.ServerError("Data", err) return }