Worker Timeout: subject all but offline/error workers to timeout checks

Workers that are in `starting`, `asleep`, or `testing` state should also
be subject to the timeout check, not just workers in `awake` state.
This commit is contained in:
Sybren A. Stüvel 2022-07-18 11:30:39 +02:00
parent a6ca3f7bdc
commit 658a3d7a85
2 changed files with 63 additions and 1 deletions

@ -11,6 +11,14 @@ import (
// This file contains functions for dealing with task/worker timeouts. Not database timeouts.
// workerStatusNoTimeout contains the worker statuses that are exempt from
// timeout checking. A worker in any other status will be subject to the timeout
// check.
var workerStatusNoTimeout = []api.WorkerStatus{
api.WorkerStatusError,
api.WorkerStatusOffline,
}
// FetchTimedOutTasks returns a slice of tasks that have timed out.
//
// In order to time out, a task must be in status `active` and not touched by a
@ -36,7 +44,7 @@ func (db *DB) FetchTimedOutWorkers(ctx context.Context, lastSeenBefore time.Time
result := []*Worker{}
tx := db.gormDB.WithContext(ctx).
Model(&Worker{}).
Where("workers.status = ?", api.WorkerStatusAwake).
Where("workers.status not in ?", workerStatusNoTimeout).
Where("workers.last_seen_at <= ?", lastSeenBefore).
Scan(&result)
if tx.Error != nil {

@ -50,3 +50,57 @@ func TestFetchTimedOutTasks(t *testing.T) {
assert.Equal(t, w, timedout[0].Worker, "the worker should be included in the result as well")
}
}
func TestFetchTimedOutWorkers(t *testing.T) {
ctx, cancel, db := persistenceTestFixtures(t, 1*time.Second)
defer cancel()
timeoutDeadline := mustParseTime("2022-06-07T11:14:47+02:00")
beforeDeadline := timeoutDeadline.Add(-10 * time.Second)
afterDeadline := timeoutDeadline.Add(10 * time.Second)
worker0 := Worker{ // Offline, so should not time out.
UUID: "c7b4d1d5-0a96-4e19-993f-028786d3d2c1",
Name: "дрон 0",
Status: api.WorkerStatusOffline,
LastSeenAt: beforeDeadline,
}
worker1 := Worker{ // Awake and timed out.
UUID: "bafc098f-2760-40c6-9a45-a4f980389a9a",
Name: "дрон 1",
Status: api.WorkerStatusAwake,
LastSeenAt: beforeDeadline,
}
worker2 := Worker{ // Starting and timed out.
UUID: "67afa6e6-406d-4224-87d9-82abde7f9d6a",
Name: "дрон 2",
Status: api.WorkerStatusStarting,
LastSeenAt: beforeDeadline,
}
worker3 := Worker{ // Asleep and timed out.
UUID: "12a0bb9a-515b-440a-922a-fd6765fd89a4",
Name: "дрон 3",
Status: api.WorkerStatusAsleep,
LastSeenAt: beforeDeadline,
}
worker4 := Worker{ // Awake and not timed out.
UUID: "aecfc9c8-ebf5-4be3-9091-99b6961a8b6e",
Name: "дрон 4",
Status: api.WorkerStatusAwake,
LastSeenAt: afterDeadline,
}
workers := []*Worker{&worker0, &worker1, &worker2, &worker3, &worker4}
for _, worker := range workers {
err := db.CreateWorker(ctx, worker)
if !assert.NoError(t, err) {
t.FailNow()
}
}
timedout, err := db.FetchTimedOutWorkers(ctx, timeoutDeadline)
if assert.NoError(t, err) && assert.Len(t, timedout, 3) {
assert.Equal(t, worker1.UUID, timedout[0].UUID)
assert.Equal(t, worker2.UUID, timedout[1].UUID)
assert.Equal(t, worker3.UUID, timedout[2].UUID)
}
}