From 0556a995b69e9a8cd090c1793a96dd293beafe8e Mon Sep 17 00:00:00 2001 From: rubyist Date: Wed, 2 Sep 2015 15:25:40 -0400 Subject: [PATCH] Limit the number of batch endpoint retries --- lfs/transfer_queue.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lfs/transfer_queue.go b/lfs/transfer_queue.go index 46aef9cb..b5e1631b 100644 --- a/lfs/transfer_queue.go +++ b/lfs/transfer_queue.go @@ -9,7 +9,8 @@ import ( ) const ( - batchSize = 100 + batchSize = 100 + maxBatchRetries = 3 ) type Transferable interface { @@ -161,11 +162,9 @@ func (q *TransferQueue) legacyFallback(failedBatch []Transferable) { // off to the transfer workers. func (q *TransferQueue) batchApiRoutine() { var startProgress sync.Once - batchNumber := 0 + batchRetries := 0 for { - batchNumber++ - batch := q.batcher.Next() if batch == nil { break @@ -188,14 +187,17 @@ func (q *TransferQueue) batchApiRoutine() { return } - // TODO technically, this could go forever. Maybe we just limit it to n batch retries total. - if q.canRetry(err, "batch") { + // Batch operation retries should be caused by network issues. We want to + // retry these failures, but limit it to maxBatchRetries total retries, + // otherwise a serious network issue could cause an infinite loop of + // retried calls. + if IsRetriableError(err) && batchRetries <= maxBatchRetries { + batchRetries++ tracerx.Printf("tq: resubmitting batch: %s", err) for _, t := range batch { q.Add(t) } } else { - tracerx.Printf("Too many batch failures, erroring") q.errorc <- err }