Fix #732: Add LFS objects to base repository on merging (#7082)

On merge we walk the merge history and ensure that all lfs objects pointed to in
the history are added to the base repository. This switches from relying on having git-lfs installed on the server, (and in fact .gitattributes being correctly installed.)
This commit is contained in:
zeripath
2019-06-22 18:35:34 +01:00
committed by GitHub
parent d14595514e
commit baefea311f
7 changed files with 648 additions and 292 deletions

View File

@ -66,6 +66,44 @@ func doAPICreateRepository(ctx APITestContext, empty bool, callback ...func(*tes
}
}
func doAPIAddCollaborator(ctx APITestContext, username string, mode models.AccessMode) func(*testing.T) {
return func(t *testing.T) {
permission := "read"
if mode == models.AccessModeAdmin {
permission = "admin"
} else if mode > models.AccessModeRead {
permission = "write"
}
addCollaboratorOption := &api.AddCollaboratorOption{
Permission: &permission,
}
req := NewRequestWithJSON(t, "PUT", fmt.Sprintf("/api/v1/repos/%s/%s/collaborators/%s?token=%s", ctx.Username, ctx.Reponame, username, ctx.Token), addCollaboratorOption)
if ctx.ExpectedCode != 0 {
ctx.Session.MakeRequest(t, req, ctx.ExpectedCode)
return
}
ctx.Session.MakeRequest(t, req, http.StatusNoContent)
}
}
func doAPIForkRepository(ctx APITestContext, username string, callback ...func(*testing.T, api.Repository)) func(*testing.T) {
return func(t *testing.T) {
createForkOption := &api.CreateForkOption{}
req := NewRequestWithJSON(t, "POST", fmt.Sprintf("/api/v1/repos/%s/%s/forks?token=%s", username, ctx.Reponame, ctx.Token), createForkOption)
if ctx.ExpectedCode != 0 {
ctx.Session.MakeRequest(t, req, ctx.ExpectedCode)
return
}
resp := ctx.Session.MakeRequest(t, req, http.StatusAccepted)
var repository api.Repository
DecodeJSON(t, resp, &repository)
if len(callback) > 0 {
callback[0](t, repository)
}
}
}
func doAPIGetRepository(ctx APITestContext, callback ...func(*testing.T, api.Repository)) func(*testing.T) {
return func(t *testing.T) {
urlStr := fmt.Sprintf("/api/v1/repos/%s/%s?token=%s", ctx.Username, ctx.Reponame, ctx.Token)

View File

@ -39,17 +39,23 @@ func testGit(t *testing.T, u *url.URL) {
u.Path = baseAPITestContext.GitPath()
forkedUserCtx := NewAPITestContext(t, "user4", "repo1")
t.Run("HTTP", func(t *testing.T) {
PrintCurrentTest(t)
ensureAnonymousClone(t, u)
httpContext := baseAPITestContext
httpContext.Reponame = "repo-tmp-17"
forkedUserCtx.Reponame = httpContext.Reponame
dstPath, err := ioutil.TempDir("", httpContext.Reponame)
assert.NoError(t, err)
defer os.RemoveAll(dstPath)
t.Run("CreateRepo", doAPICreateRepository(httpContext, false))
ensureAnonymousClone(t, u)
t.Run("CreateRepoInDifferentUser", doAPICreateRepository(forkedUserCtx, false))
t.Run("AddUserAsCollaborator", doAPIAddCollaborator(forkedUserCtx, httpContext.Username, models.AccessModeRead))
t.Run("ForkFromDifferentUser", doAPIForkRepository(httpContext, forkedUserCtx.Username))
u.Path = httpContext.GitPath()
u.User = url.UserPassword(username, userPassword)
@ -62,12 +68,23 @@ func testGit(t *testing.T, u *url.URL) {
mediaTest(t, &httpContext, little, big, littleLFS, bigLFS)
t.Run("BranchProtectMerge", doBranchProtectPRMerge(&httpContext, dstPath))
t.Run("MergeFork", func(t *testing.T) {
t.Run("CreatePRAndMerge", doMergeFork(httpContext, forkedUserCtx, "master", httpContext.Username+":master"))
t.Run("DeleteRepository", doAPIDeleteRepository(httpContext))
rawTest(t, &forkedUserCtx, little, big, littleLFS, bigLFS)
mediaTest(t, &forkedUserCtx, little, big, littleLFS, bigLFS)
})
})
t.Run("SSH", func(t *testing.T) {
PrintCurrentTest(t)
sshContext := baseAPITestContext
sshContext.Reponame = "repo-tmp-18"
keyname := "my-testing-key"
forkedUserCtx.Reponame = sshContext.Reponame
t.Run("CreateRepoInDifferentUser", doAPICreateRepository(forkedUserCtx, false))
t.Run("AddUserAsCollaborator", doAPIAddCollaborator(forkedUserCtx, sshContext.Username, models.AccessModeRead))
t.Run("ForkFromDifferentUser", doAPIForkRepository(sshContext, forkedUserCtx.Username))
//Setup key the user ssh key
withKeyFile(t, keyname, func(keyFile string) {
t.Run("CreateUserKey", doAPICreateUserKey(sshContext, "test-key", keyFile))
@ -81,8 +98,6 @@ func testGit(t *testing.T, u *url.URL) {
assert.NoError(t, err)
defer os.RemoveAll(dstPath)
t.Run("CreateRepo", doAPICreateRepository(sshContext, false))
t.Run("Clone", doGitClone(dstPath, sshURL))
little, big := standardCommitAndPushTest(t, dstPath)
@ -91,8 +106,13 @@ func testGit(t *testing.T, u *url.URL) {
mediaTest(t, &sshContext, little, big, littleLFS, bigLFS)
t.Run("BranchProtectMerge", doBranchProtectPRMerge(&sshContext, dstPath))
t.Run("MergeFork", func(t *testing.T) {
t.Run("CreatePRAndMerge", doMergeFork(sshContext, forkedUserCtx, "master", sshContext.Username+":master"))
t.Run("DeleteRepository", doAPIDeleteRepository(sshContext))
rawTest(t, &forkedUserCtx, little, big, littleLFS, bigLFS)
mediaTest(t, &forkedUserCtx, little, big, littleLFS, bigLFS)
})
})
})
}
@ -341,3 +361,16 @@ func doProtectBranch(ctx APITestContext, branch string, userToWhitelist string)
assert.EqualValues(t, "success%3DBranch%2Bprotection%2Bfor%2Bbranch%2B%2527"+url.QueryEscape(branch)+"%2527%2Bhas%2Bbeen%2Bupdated.", flashCookie.Value)
}
}
func doMergeFork(ctx, baseCtx APITestContext, baseBranch, headBranch string) func(t *testing.T) {
return func(t *testing.T) {
var pr api.PullRequest
var err error
t.Run("CreatePullRequest", func(t *testing.T) {
pr, err = doAPICreatePullRequest(ctx, baseCtx.Username, baseCtx.Reponame, baseBranch, headBranch)(t)
assert.NoError(t, err)
})
t.Run("MergePR", doAPIMergePullRequest(baseCtx, baseCtx.Username, baseCtx.Reponame, pr.Index))
}
}

File diff suppressed because it is too large Load Diff

226
modules/pull/lfs.go Normal file
View File

@ -0,0 +1,226 @@
// Copyright 2019 The Gitea Authors.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package pull
import (
"bufio"
"bytes"
"fmt"
"io"
"strconv"
"strings"
"sync"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
)
// LFSPush pushes lfs objects referred to in new commits in the head repository from the base repository
func LFSPush(tmpBasePath, mergeHeadSHA, mergeBaseSHA string, pr *models.PullRequest) error {
// Now we have to implement git lfs push
// git rev-list --objects --filter=blob:limit=1k HEAD --not base
// pass blob shas in to git cat-file --batch-check (possibly unnecessary)
// ensure only blobs and <=1k size then pass in to git cat-file --batch
// to read each sha and check each as a pointer
// Then if they are lfs -> add them to the baseRepo
revListReader, revListWriter := io.Pipe()
shasToCheckReader, shasToCheckWriter := io.Pipe()
catFileCheckReader, catFileCheckWriter := io.Pipe()
shasToBatchReader, shasToBatchWriter := io.Pipe()
catFileBatchReader, catFileBatchWriter := io.Pipe()
errChan := make(chan error, 1)
wg := sync.WaitGroup{}
wg.Add(6)
// Create the go-routines in reverse order.
// 6. Take the output of cat-file --batch and check if each file in turn
// to see if they're pointers to files in the LFS store associated with
// the head repo and add them to the base repo if so
go readCatFileBatch(catFileBatchReader, &wg, pr)
// 5. Take the shas of the blobs and batch read them
go doCatFileBatch(shasToBatchReader, catFileBatchWriter, &wg, tmpBasePath)
// 4. From the provided objects restrict to blobs <=1k
go readCatFileBatchCheck(catFileCheckReader, shasToBatchWriter, &wg)
// 3. Run batch-check on the objects retrieved from rev-list
go doCatFileBatchCheck(shasToCheckReader, catFileCheckWriter, &wg, tmpBasePath)
// 2. Check each object retrieved rejecting those without names as they will be commits or trees
go readRevListObjects(revListReader, shasToCheckWriter, &wg)
// 1. Run rev-list objects from mergeHead to mergeBase
go doRevListObjects(revListWriter, &wg, tmpBasePath, mergeHeadSHA, mergeBaseSHA, errChan)
wg.Wait()
select {
case err, has := <-errChan:
if has {
return err
}
default:
}
return nil
}
func doRevListObjects(revListWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath, headSHA, baseSHA string, errChan chan<- error) {
defer wg.Done()
defer revListWriter.Close()
stderr := new(bytes.Buffer)
var errbuf strings.Builder
cmd := git.NewCommand("rev-list", "--objects", headSHA, "--not", baseSHA)
if err := cmd.RunInDirPipeline(tmpBasePath, revListWriter, stderr); err != nil {
log.Error("git rev-list [%s]: %v - %s", tmpBasePath, err, errbuf.String())
errChan <- fmt.Errorf("git rev-list [%s]: %v - %s", tmpBasePath, err, errbuf.String())
}
}
func readRevListObjects(revListReader *io.PipeReader, shasToCheckWriter *io.PipeWriter, wg *sync.WaitGroup) {
defer wg.Done()
defer revListReader.Close()
defer shasToCheckWriter.Close()
scanner := bufio.NewScanner(revListReader)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 {
continue
}
fields := strings.Split(line, " ")
if len(fields) < 2 || len(fields[1]) == 0 {
continue
}
toWrite := []byte(fields[0] + "\n")
for len(toWrite) > 0 {
n, err := shasToCheckWriter.Write(toWrite)
if err != nil {
_ = revListReader.CloseWithError(err)
break
}
toWrite = toWrite[n:]
}
}
_ = shasToCheckWriter.CloseWithError(scanner.Err())
}
func doCatFileBatchCheck(shasToCheckReader *io.PipeReader, catFileCheckWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath string) {
defer wg.Done()
defer shasToCheckReader.Close()
defer catFileCheckWriter.Close()
stderr := new(bytes.Buffer)
var errbuf strings.Builder
cmd := git.NewCommand("cat-file", "--batch-check")
if err := cmd.RunInDirFullPipeline(tmpBasePath, catFileCheckWriter, stderr, shasToCheckReader); err != nil {
_ = catFileCheckWriter.CloseWithError(fmt.Errorf("git cat-file --batch-check [%s]: %v - %s", tmpBasePath, err, errbuf.String()))
}
}
func readCatFileBatchCheck(catFileCheckReader *io.PipeReader, shasToBatchWriter *io.PipeWriter, wg *sync.WaitGroup) {
defer wg.Done()
defer catFileCheckReader.Close()
scanner := bufio.NewScanner(catFileCheckReader)
defer func() {
_ = shasToBatchWriter.CloseWithError(scanner.Err())
}()
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 {
continue
}
fields := strings.Split(line, " ")
if len(fields) < 3 || fields[1] != "blob" {
continue
}
size, _ := strconv.Atoi(string(fields[2]))
if size > 1024 {
continue
}
toWrite := []byte(fields[0] + "\n")
for len(toWrite) > 0 {
n, err := shasToBatchWriter.Write(toWrite)
if err != nil {
_ = catFileCheckReader.CloseWithError(err)
break
}
toWrite = toWrite[n:]
}
}
}
func doCatFileBatch(shasToBatchReader *io.PipeReader, catFileBatchWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath string) {
defer wg.Done()
defer shasToBatchReader.Close()
defer catFileBatchWriter.Close()
stderr := new(bytes.Buffer)
var errbuf strings.Builder
if err := git.NewCommand("cat-file", "--batch").RunInDirFullPipeline(tmpBasePath, catFileBatchWriter, stderr, shasToBatchReader); err != nil {
_ = shasToBatchReader.CloseWithError(fmt.Errorf("git rev-list [%s]: %v - %s", tmpBasePath, err, errbuf.String()))
}
}
func readCatFileBatch(catFileBatchReader *io.PipeReader, wg *sync.WaitGroup, pr *models.PullRequest) {
defer wg.Done()
defer catFileBatchReader.Close()
bufferedReader := bufio.NewReader(catFileBatchReader)
buf := make([]byte, 1025)
for {
// File descriptor line: sha
_, err := bufferedReader.ReadString(' ')
if err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
// Throw away the blob
if _, err := bufferedReader.ReadString(' '); err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
sizeStr, err := bufferedReader.ReadString('\n')
if err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
size, err := strconv.Atoi(sizeStr[:len(sizeStr)-1])
if err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
pointerBuf := buf[:size+1]
if _, err := io.ReadFull(bufferedReader, pointerBuf); err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
pointerBuf = pointerBuf[:size]
// Now we need to check if the pointerBuf is an LFS pointer
pointer := lfs.IsPointerFile(&pointerBuf)
if pointer == nil {
continue
}
// Then we need to check that this pointer is in the db
if _, err := pr.HeadRepo.GetLFSMetaObjectByOid(pointer.Oid); err != nil {
if err == models.ErrLFSObjectNotExist {
log.Warn("During merge of: %d in %-v, there is a pointer to LFS Oid: %s which although present in the LFS store is not associated with the head repo %-v", pr.Index, pr.BaseRepo, pointer.Oid, pr.HeadRepo)
continue
}
_ = catFileBatchReader.CloseWithError(err)
break
}
// OK we have a pointer that is associated with the head repo
// and is actually a file in the LFS
// Therefore it should be associated with the base repo
pointer.RepositoryID = pr.BaseRepoID
if _, err := models.NewLFSMetaObject(pointer); err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
}
}

339
modules/pull/merge.go Normal file

File diff suppressed because it is too large Load Diff

View File

@ -15,6 +15,7 @@ import (
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/pull"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/util"
)
@ -595,7 +596,7 @@ func MergePullRequest(ctx *context.APIContext, form auth.MergePullRequestForm) {
message += "\n\n" + form.MergeMessageField
}
if err := pr.Merge(ctx.User, ctx.Repo.GitRepo, models.MergeStyle(form.Do), message); err != nil {
if err := pull.Merge(pr, ctx.User, ctx.Repo.GitRepo, models.MergeStyle(form.Do), message); err != nil {
if models.IsErrInvalidMergeStyle(err) {
ctx.Status(405)
return

View File

@ -20,6 +20,7 @@ import (
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification"
"code.gitea.io/gitea/modules/pull"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
@ -596,7 +597,7 @@ func MergePullRequest(ctx *context.Context, form auth.MergePullRequestForm) {
return
}
if err = pr.Merge(ctx.User, ctx.Repo.GitRepo, models.MergeStyle(form.Do), message); err != nil {
if err = pull.Merge(pr, ctx.User, ctx.Repo.GitRepo, models.MergeStyle(form.Do), message); err != nil {
if models.IsErrInvalidMergeStyle(err) {
ctx.Flash.Error(ctx.Tr("repo.pulls.invalid_merge_option"))
ctx.Redirect(ctx.Repo.RepoLink + "/pulls/" + com.ToStr(pr.Index))