git-lfs/tq/api.go

package tq

import (
	"time"

	"github.com/git-lfs/git-lfs/v3/errors"
	"github.com/git-lfs/git-lfs/v3/git"
	"github.com/git-lfs/git-lfs/v3/lfsapi"
	"github.com/git-lfs/git-lfs/v3/lfshttp"
	"github.com/git-lfs/git-lfs/v3/tr"
	"github.com/rubyist/tracerx"
)

type tqClient struct {
	maxRetries int
	*lfsapi.Client
}

type batchRef struct {
	Name string `json:"name,omitempty"`
}

type batchRequest struct {
	Operation            string      `json:"operation"`
	Objects              []*Transfer `json:"objects"`
	TransferAdapterNames []string    `json:"transfers,omitempty"`
	Ref                  *batchRef   `json:"ref"`
	HashAlgorithm        string      `json:"hash_algo"`
}

type BatchResponse struct {
	Objects             []*Transfer `json:"objects"`
	TransferAdapterName string      `json:"transfer"`
	HashAlgorithm       string      `json:"hash_algo"`
	endpoint            lfshttp.Endpoint
}

func Batch(m Manifest, dir Direction, remote string, remoteRef *git.Ref, objects []*Transfer) (*BatchResponse, error) {
	if len(objects) == 0 {
		return &BatchResponse{}, nil
	}

	cm := m.Upgrade()

	return cm.batchClient().Batch(remote, &batchRequest{
		Operation:            dir.String(),
		Objects:              objects,
		TransferAdapterNames: m.GetAdapterNames(dir),
		Ref:                  &batchRef{Name: remoteRef.Refspec()},
		HashAlgorithm:        "sha256",
	})
}

type BatchClient interface {
	Batch(remote string, bReq *batchRequest) (*BatchResponse, error)
	MaxRetries() int
	SetMaxRetries(n int)
}

func (c *tqClient) MaxRetries() int {
	return c.maxRetries
}

func (c *tqClient) SetMaxRetries(n int) {
	c.maxRetries = n
}

func (c *tqClient) Batch(remote string, bReq *batchRequest) (*BatchResponse, error) {
	bRes := &BatchResponse{}
	if len(bReq.Objects) == 0 {
		return bRes, nil
	}

	if len(bReq.TransferAdapterNames) == 1 && bReq.TransferAdapterNames[0] == "basic" {
		bReq.TransferAdapterNames = nil
	}

	missing := make(map[string]bool)
	for _, obj := range bReq.Objects {
		missing[obj.Oid] = obj.Missing
	}

	bRes.endpoint = c.Endpoints.Endpoint(bReq.Operation, remote)
	requestedAt := time.Now()

	req, err := c.NewRequest("POST", bRes.endpoint, "objects/batch", bReq)
	if err != nil {
		return nil, errors.Wrap(err, tr.Tr.Get("batch request"))
	}

	tracerx.Printf("api: batch %d files", len(bReq.Objects))

	req = c.Client.LogRequest(req, "lfs.batch")
	res, err := c.DoAPIRequestWithAuth(remote, lfshttp.WithRetries(req, c.MaxRetries()))
	if err != nil {
		tracerx.Printf("api error: %s", err)
		return nil, errors.Wrap(err, tr.Tr.Get("batch response"))
	}

	if err := lfshttp.DecodeJSON(res, bRes); err != nil {
		return bRes, errors.Wrap(err, tr.Tr.Get("batch response"))
	}

	if bRes.HashAlgorithm != "" && bRes.HashAlgorithm != "sha256" {
		return bRes, errors.Wrap(errors.New(tr.Tr.Get("unsupported hash algorithm")), tr.Tr.Get("batch response"))
	}

	if res.StatusCode != 200 {
		return nil, lfshttp.NewStatusCodeError(res)
	}

	for _, obj := range bRes.Objects {
		obj.Missing = missing[obj.Oid]
		for _, a := range obj.Actions {
			a.createdAt = requestedAt
		}
	}

	return bRes, nil
}
tq: implement batch api using lfsapi.Client 2017-01-04 16:33:40 +00:00			`package tq`

			`import (`
tq,lfsapi/ssh: include `createdAt` field in struct 2017-04-05 22:43:00 +00:00			`"time"`

Set package version to v3 Since we're about to do a v3.0.0 release, let's bump the version to v3. Make this change automatically with the following command to avoid any missed items: git grep -l github.com/git-lfs/git-lfs/v2 \| \ xargs sed -i -e 's!github.com/git-lfs/git-lfs/v2!github.com/git-lfs/git-lfs/v3!g' 2021-09-01 19:41:10 +00:00			`"github.com/git-lfs/git-lfs/v3/errors"`
			`"github.com/git-lfs/git-lfs/v3/git"`
			`"github.com/git-lfs/git-lfs/v3/lfsapi"`
			`"github.com/git-lfs/git-lfs/v3/lfshttp"`
tq: make strings translatable Remove the Verb method on the Direction object in favor of a method formatting the operation in progress. This is easier to translate and will prevent sentence fragments from appearing in strings. Additionally, move a variable down into a function so that we can translate it, since strings at the top level cannot be translated due to the locale object not being initialized yet. 2021-12-14 17:28:16 +00:00			`"github.com/git-lfs/git-lfs/v3/tr"`
tq: use lfsapi.Client to make batch api requests 2017-01-04 17:11:16 +00:00			`"github.com/rubyist/tracerx"`
tq: implement batch api using lfsapi.Client 2017-01-04 16:33:40 +00:00			`)`

			`type tqClient struct {`
tq: turn the batch client into an interface Right now, we always perform a batch operation using an instance of the transfer queue client, which is HTTP based. However, in the future, we'll want to add an SSH-based option, so let's turn the client into a simple interface we can use to abstract this away. 2021-02-10 19:52:37 +00:00			`maxRetries int`
tq: implement batch api using lfsapi.Client 2017-01-04 16:33:40 +00:00			`*lfsapi.Client`
			`}`

tq, commands: send refspec on batch upload calls 2018-01-05 18:12:57 +00:00			`type batchRef struct {`
			Name string `json:"name,omitempty"`
			`}`

tq: implement batch api using lfsapi.Client 2017-01-04 16:33:40 +00:00			`type batchRequest struct {`
tq: remove objectResource in favor of Transfer 2017-01-06 22:51:49 +00:00			Operation string `json:"operation"`
			Objects []*Transfer `json:"objects"`
			TransferAdapterNames []string `json:"transfers,omitempty"`
tq, commands: send refspec on batch upload calls 2018-01-05 18:12:57 +00:00			Ref *batchRef `json:"ref"`
tq: pass hash algorithm during batch requests During a batch request, pass the hash algorithm we're using to the remote server, and read the value, if any, that we get back. If it is not either absent or the string "sha256", fail, since that means that the client and server don't agree on the proper hash algorithm. 2021-06-02 20:13:49 +00:00			HashAlgorithm string `json:"hash_algo"`
tq: implement batch api using lfsapi.Client 2017-01-04 16:33:40 +00:00			`}`

tq: tq.Batch() returns a *tq.BatchResponse, with internal-only properties private 2017-01-09 20:00:59 +00:00			`type BatchResponse struct {`
tq: remove objectResource in favor of Transfer 2017-01-06 22:51:49 +00:00			Objects []*Transfer `json:"objects"`
tq: TransferAdapterName property has to be public because of the json tag 2017-01-09 20:04:58 +00:00			TransferAdapterName string `json:"transfer"`
tq: pass hash algorithm during batch requests During a batch request, pass the hash algorithm we're using to the remote server, and read the value, if any, that we get back. If it is not either absent or the string "sha256", fail, since that means that the client and server don't agree on the proper hash algorithm. 2021-06-02 20:13:49 +00:00			HashAlgorithm string `json:"hash_algo"`
lfsapi: extract new lfshttp package Extract more basic http-related functionality out of lfsapi and into a new package, lfshttp. Everything is currently functional aside from authorization. 2018-09-06 21:42:41 +00:00			`endpoint lfshttp.Endpoint`
tq: implement batch api using lfsapi.Client 2017-01-04 16:33:40 +00:00			`}`

tq: make Manifest an interface Right now, any time we instantiate a Manifest object, we create an API client, and when we create the API client, if we're using SSH, we try to make a connection to the server. However, we often instantiate a Manifest object when performing various functionality such as smudging data, which means that when a user creates an archive locally, they can be prompted for an SSH password, which is undesirable. Let's take a first step to fixing this by making Manifest an interface. Right now, it has one concrete version, a concreteManifest, which can be used to access the internals, and we provide methods to upgrade it from the interface to the concrete type and determine whether it's upgraded or not. We attempt to upgrade it any time we need to access its internals. In the future, we'll also offer a lazyManifest, which is lazy and will only instantiate the concreteManifest inside when we attempt to upgrade it to the latter. But for now, only implement the concreteManifest to make it clearer what's changing. Similarly, we make our TransferQueue upgradable so that we don't upgrade its Manifest right away. In both cases, we'll want to use the lazyManifest to delay the instantiation of the API client (and hence the starting of the SSH connection) in a future commit. 2023-03-10 15:24:21 +00:00			`func Batch(m Manifest, dir Direction, remote string, remoteRef git.Ref, objects []Transfer) (*BatchResponse, error) {`
tq,test: export func for calling Batch API directly 2017-01-07 03:13:31 +00:00			`if len(objects) == 0 {`
tq: use tq.Batch() internally too 2017-01-09 20:50:19 +00:00			`return &BatchResponse{}, nil`
tq,test: export func for calling Batch API directly 2017-01-07 03:13:31 +00:00			`}`

tq: make Manifest an interface Right now, any time we instantiate a Manifest object, we create an API client, and when we create the API client, if we're using SSH, we try to make a connection to the server. However, we often instantiate a Manifest object when performing various functionality such as smudging data, which means that when a user creates an archive locally, they can be prompted for an SSH password, which is undesirable. Let's take a first step to fixing this by making Manifest an interface. Right now, it has one concrete version, a concreteManifest, which can be used to access the internals, and we provide methods to upgrade it from the interface to the concrete type and determine whether it's upgraded or not. We attempt to upgrade it any time we need to access its internals. In the future, we'll also offer a lazyManifest, which is lazy and will only instantiate the concreteManifest inside when we attempt to upgrade it to the latter. But for now, only implement the concreteManifest to make it clearer what's changing. Similarly, we make our TransferQueue upgradable so that we don't upgrade its Manifest right away. In both cases, we'll want to use the lazyManifest to delay the instantiation of the API client (and hence the starting of the SSH connection) in a future commit. 2023-03-10 15:24:21 +00:00			`cm := m.Upgrade()`

			`return cm.batchClient().Batch(remote, &batchRequest{`
tq: revert Direction back to int, add String() conversion func 2017-01-09 19:52:36 +00:00			`Operation: dir.String(),`
tq,test: export func for calling Batch API directly 2017-01-07 03:13:31 +00:00			`Objects: objects,`
			`TransferAdapterNames: m.GetAdapterNames(dir),`
tq, commands: send refspec on batch upload calls 2018-01-05 18:12:57 +00:00			`Ref: &batchRef{Name: remoteRef.Refspec()},`
tq: pass hash algorithm during batch requests During a batch request, pass the hash algorithm we're using to the remote server, and read the value, if any, that we get back. If it is not either absent or the string "sha256", fail, since that means that the client and server don't agree on the proper hash algorithm. 2021-06-02 20:13:49 +00:00			`HashAlgorithm: "sha256",`
tq: use tq.Batch() internally too 2017-01-09 20:50:19 +00:00			`})`
tq,test: export func for calling Batch API directly 2017-01-07 03:13:31 +00:00			`}`

tq: turn the batch client into an interface Right now, we always perform a batch operation using an instance of the transfer queue client, which is HTTP based. However, in the future, we'll want to add an SSH-based option, so let's turn the client into a simple interface we can use to abstract this away. 2021-02-10 19:52:37 +00:00			`type BatchClient interface {`
			`Batch(remote string, bReq batchRequest) (BatchResponse, error)`
			`MaxRetries() int`
			`SetMaxRetries(n int)`
			`}`

			`func (c *tqClient) MaxRetries() int {`
			`return c.maxRetries`
			`}`

			`func (c *tqClient) SetMaxRetries(n int) {`
			`c.maxRetries = n`
			`}`

tq: use tq.Batch() internally too 2017-01-09 20:50:19 +00:00			`func (c tqClient) Batch(remote string, bReq batchRequest) (*BatchResponse, error) {`
tq: tq.Batch() returns a *tq.BatchResponse, with internal-only properties private 2017-01-09 20:00:59 +00:00			`bRes := &BatchResponse{}`
tq: implement batch api using lfsapi.Client 2017-01-04 16:33:40 +00:00			`if len(bReq.Objects) == 0 {`
tq: use tq.Batch() internally too 2017-01-09 20:50:19 +00:00			`return bRes, nil`
tq: implement batch api using lfsapi.Client 2017-01-04 16:33:40 +00:00			`}`

			`if len(bReq.TransferAdapterNames) == 1 && bReq.TransferAdapterNames[0] == "basic" {`
			`bReq.TransferAdapterNames = nil`
			`}`

Don't fail if we lack objects the server has A Git LFS client may not have the entire history of the objects for the repository. However, in some situations, we traverse the entire history of a branch when pushing it, meaning that we need to process every LFS object in the history of that branch. If the objects for the entire history are not present, we currently fail to push. Instead, let's mark objects we don't have on disk as missing and only fail when we would need to upload those objects. We'll know the server has the objects if the batch response provides no actions to take for them when we request an upload. Pass the missing flag down through the code, and always set it to false for non-uploads. If for some reason we fail to properly flag a missing object, we will still fail later on when we cannot open the file, just in a messier and more poorly controlled way. The technique used here will attempt to abort the batch as soon as we notice a problem, which means that in the common case (less than 100 objects) we won't have transferred any objects, so the user can notice the failure as soon as possible. Update the tests to look for a string which will occur in the error message, since we no longer produce the system error message for ENOENT. 2019-04-30 19:18:18 +00:00			`missing := make(map[string]bool)`
			`for _, obj := range bReq.Objects {`
			`missing[obj.Oid] = obj.Missing`
			`}`

tq: tq.Batch() returns a *tq.BatchResponse, with internal-only properties private 2017-01-09 20:00:59 +00:00			`bRes.endpoint = c.Endpoints.Endpoint(bReq.Operation, remote)`
tq,lfsapi/ssh: include `createdAt` field in struct 2017-04-05 22:43:00 +00:00			`requestedAt := time.Now()`

tq: revert inline request retrying 2017-08-24 21:51:49 +00:00			`req, err := c.NewRequest("POST", bRes.endpoint, "objects/batch", bReq)`
			`if err != nil {`
tq: make strings translatable Remove the Verb method on the Direction object in favor of a method formatting the operation in progress. This is easier to translate and will prevent sentence fragments from appearing in strings. Additionally, move a variable down into a function so that we can translate it, since strings at the top level cannot be translated due to the locale object not being initialized yet. 2021-12-14 17:28:16 +00:00			`return nil, errors.Wrap(err, tr.Tr.Get("batch request"))`
tq: revert inline request retrying 2017-08-24 21:51:49 +00:00			`}`
tq: teach Batch() API to retry itself after io.EOF's 2017-08-18 00:52:57 +00:00
tq: revert inline request retrying 2017-08-24 21:51:49 +00:00			`tracerx.Printf("api: batch %d files", len(bReq.Objects))`
tq: use lfsapi.Client to make batch api requests 2017-01-04 17:11:16 +00:00
lfsapi: extract new lfshttp package Extract more basic http-related functionality out of lfsapi and into a new package, lfshttp. Everything is currently functional aside from authorization. 2018-09-06 21:42:41 +00:00			`req = c.Client.LogRequest(req, "lfs.batch")`
tq: turn the batch client into an interface Right now, we always perform a batch operation using an instance of the transfer queue client, which is HTTP based. However, in the future, we'll want to add an SSH-based option, so let's turn the client into a simple interface we can use to abstract this away. 2021-02-10 19:52:37 +00:00			`res, err := c.DoAPIRequestWithAuth(remote, lfshttp.WithRetries(req, c.MaxRetries()))`
tq: revert inline request retrying 2017-08-24 21:51:49 +00:00			`if err != nil {`
tq: use lfsapi.Client to make batch api requests 2017-01-04 17:11:16 +00:00			`tracerx.Printf("api error: %s", err)`
tq: make strings translatable Remove the Verb method on the Direction object in favor of a method formatting the operation in progress. This is easier to translate and will prevent sentence fragments from appearing in strings. Additionally, move a variable down into a function so that we can translate it, since strings at the top level cannot be translated due to the locale object not being initialized yet. 2021-12-14 17:28:16 +00:00			`return nil, errors.Wrap(err, tr.Tr.Get("batch response"))`
tq: use lfsapi.Client to make batch api requests 2017-01-04 17:11:16 +00:00			`}`

lfsapi: extract new lfshttp package Extract more basic http-related functionality out of lfsapi and into a new package, lfshttp. Everything is currently functional aside from authorization. 2018-09-06 21:42:41 +00:00			`if err := lfshttp.DecodeJSON(res, bRes); err != nil {`
tq: make strings translatable Remove the Verb method on the Direction object in favor of a method formatting the operation in progress. This is easier to translate and will prevent sentence fragments from appearing in strings. Additionally, move a variable down into a function so that we can translate it, since strings at the top level cannot be translated due to the locale object not being initialized yet. 2021-12-14 17:28:16 +00:00			`return bRes, errors.Wrap(err, tr.Tr.Get("batch response"))`
tq: parse response before checking response code for test-batch-error-handling.sh 2017-01-04 17:20:40 +00:00			`}`

tq: pass hash algorithm during batch requests During a batch request, pass the hash algorithm we're using to the remote server, and read the value, if any, that we get back. If it is not either absent or the string "sha256", fail, since that means that the client and server don't agree on the proper hash algorithm. 2021-06-02 20:13:49 +00:00			`if bRes.HashAlgorithm != "" && bRes.HashAlgorithm != "sha256" {`
tq: make strings translatable Remove the Verb method on the Direction object in favor of a method formatting the operation in progress. This is easier to translate and will prevent sentence fragments from appearing in strings. Additionally, move a variable down into a function so that we can translate it, since strings at the top level cannot be translated due to the locale object not being initialized yet. 2021-12-14 17:28:16 +00:00			`return bRes, errors.Wrap(errors.New(tr.Tr.Get("unsupported hash algorithm")), tr.Tr.Get("batch response"))`
tq: pass hash algorithm during batch requests During a batch request, pass the hash algorithm we're using to the remote server, and read the value, if any, that we get back. If it is not either absent or the string "sha256", fail, since that means that the client and server don't agree on the proper hash algorithm. 2021-06-02 20:13:49 +00:00			`}`

tq: use lfsapi.Client to make batch api requests 2017-01-04 17:11:16 +00:00			`if res.StatusCode != 200 {`
lfsapi: extract new lfshttp package Extract more basic http-related functionality out of lfsapi and into a new package, lfshttp. Everything is currently functional aside from authorization. 2018-09-06 21:42:41 +00:00			`return nil, lfshttp.NewStatusCodeError(res)`
tq: implement batch api using lfsapi.Client 2017-01-04 16:33:40 +00:00			`}`

tq,lfsapi/ssh: include `createdAt` field in struct 2017-04-05 22:43:00 +00:00			`for _, obj := range bRes.Objects {`
Don't fail if we lack objects the server has A Git LFS client may not have the entire history of the objects for the repository. However, in some situations, we traverse the entire history of a branch when pushing it, meaning that we need to process every LFS object in the history of that branch. If the objects for the entire history are not present, we currently fail to push. Instead, let's mark objects we don't have on disk as missing and only fail when we would need to upload those objects. We'll know the server has the objects if the batch response provides no actions to take for them when we request an upload. Pass the missing flag down through the code, and always set it to false for non-uploads. If for some reason we fail to properly flag a missing object, we will still fail later on when we cannot open the file, just in a messier and more poorly controlled way. The technique used here will attempt to abort the batch as soon as we notice a problem, which means that in the common case (less than 100 objects) we won't have transferred any objects, so the user can notice the failure as soon as possible. Update the tests to look for a string which will occur in the error message, since we no longer produce the system error message for ENOENT. 2019-04-30 19:18:18 +00:00			`obj.Missing = missing[obj.Oid]`
tq,lfsapi/ssh: include `createdAt` field in struct 2017-04-05 22:43:00 +00:00			`for _, a := range obj.Actions {`
			`a.createdAt = requestedAt`
			`}`
			`}`

tq: use tq.Batch() internally too 2017-01-09 20:50:19 +00:00			`return bRes, nil`
tq: implement batch api using lfsapi.Client 2017-01-04 16:33:40 +00:00			`}`