Flamenco/internal/manager/api_impl/worker_task_updates_test.go
Sybren A. Stüvel 02fac6a4df Change Go package name from git.blender.org to projects.blender.org
Change the package base name of the Go code, from
`git.blender.org/flamenco` to `projects.blender.org/studio/flamenco`.

The old location, `git.blender.org`, has no longer been use since the
[migration to Gitea][1]. The new package names now reflect the actual
location where Flamenco is hosted.

[1]: https://code.blender.org/2023/02/new-blender-development-infrastructure/
2023-08-01 12:42:31 +02:00

387 lines
15 KiB
Go

package api_impl
// SPDX-License-Identifier: GPL-3.0-or-later
import (
"context"
"testing"
"github.com/golang/mock/gomock"
"github.com/stretchr/testify/assert"
"projects.blender.org/studio/flamenco/internal/manager/config"
"projects.blender.org/studio/flamenco/internal/manager/persistence"
"projects.blender.org/studio/flamenco/pkg/api"
)
func TestTaskUpdate(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
mf := newMockedFlamenco(mockCtrl)
worker := testWorker()
// Construct the JSON request object.
taskUpdate := api.TaskUpdateJSONRequestBody{
Activity: ptr("testing"),
Log: ptr("line1\nline2\n"),
TaskStatus: ptr(api.TaskStatusCompleted),
}
// Construct the task that's supposed to be updated.
taskID := "181eab68-1123-4790-93b1-94309a899411"
jobID := "e4719398-7cfa-4877-9bab-97c2d6c158b5"
mockJob := persistence.Job{UUID: jobID}
mockTask := persistence.Task{
UUID: taskID,
Worker: &worker,
WorkerID: &worker.ID,
Job: &mockJob,
Activity: "pre-update activity",
}
// Expect the task to be fetched.
mf.persistence.EXPECT().FetchTask(gomock.Any(), taskID).Return(&mockTask, nil)
// Expect the task status change to be handed to the state machine.
var statusChangedtask persistence.Task
mf.stateMachine.EXPECT().TaskStatusChange(gomock.Any(), gomock.AssignableToTypeOf(&persistence.Task{}), api.TaskStatusCompleted).
DoAndReturn(func(ctx context.Context, task *persistence.Task, newStatus api.TaskStatus) error {
statusChangedtask = *task
return nil
})
// Expect the activity to be updated.
var actUpdatedTask persistence.Task
mf.persistence.EXPECT().SaveTaskActivity(gomock.Any(), gomock.Any()).DoAndReturn(
func(ctx context.Context, task *persistence.Task) error {
actUpdatedTask = *task
return nil
})
// Expect the log to be written and broadcast over SocketIO.
mf.logStorage.EXPECT().Write(gomock.Any(), jobID, taskID, "line1\nline2\n")
// Expect a 'touch' of the task.
var touchedTask persistence.Task
mf.persistence.EXPECT().TaskTouchedByWorker(gomock.Any(), gomock.Any()).DoAndReturn(
func(ctx context.Context, task *persistence.Task) error {
touchedTask = *task
return nil
})
mf.persistence.EXPECT().WorkerSeen(gomock.Any(), &worker)
// Do the call.
echoCtx := mf.prepareMockedJSONRequest(taskUpdate)
requestWorkerStore(echoCtx, &worker)
err := mf.flamenco.TaskUpdate(echoCtx, taskID)
// Check the saved task.
assert.NoError(t, err)
assert.Equal(t, mockTask.UUID, statusChangedtask.UUID)
assert.Equal(t, mockTask.UUID, actUpdatedTask.UUID)
assert.Equal(t, mockTask.UUID, touchedTask.UUID)
assert.Equal(t, "testing", statusChangedtask.Activity)
assert.Equal(t, "testing", actUpdatedTask.Activity)
}
func TestTaskUpdateFailed(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
mf := newMockedFlamenco(mockCtrl)
worker := testWorker()
// Construct the JSON request object.
taskUpdate := api.TaskUpdateJSONRequestBody{
TaskStatus: ptr(api.TaskStatusFailed),
}
// Construct the task that's supposed to be updated.
taskID := "181eab68-1123-4790-93b1-94309a899411"
jobID := "e4719398-7cfa-4877-9bab-97c2d6c158b5"
mockJob := persistence.Job{UUID: jobID}
mockTask := persistence.Task{
UUID: taskID,
Worker: &worker,
WorkerID: &worker.ID,
Job: &mockJob,
Activity: "pre-update activity",
Type: "misc",
}
conf := config.Conf{
Base: config.Base{
TaskFailAfterSoftFailCount: 3,
BlocklistThreshold: 65535, // This test doesn't cover blocklisting.
},
}
mf.config.EXPECT().Get().Return(&conf).AnyTimes()
const numSubTests = 2
// Expect the task to be fetched for each sub-test:
mf.persistence.EXPECT().FetchTask(gomock.Any(), taskID).Return(&mockTask, nil).Times(numSubTests)
// Expect a 'touch' of the task for each sub-test:
mf.persistence.EXPECT().TaskTouchedByWorker(gomock.Any(), &mockTask).Times(numSubTests)
mf.persistence.EXPECT().WorkerSeen(gomock.Any(), &worker).Times(numSubTests)
// Mimick that this is always first failure of this worker/job/tasktype combo:
mf.persistence.EXPECT().CountTaskFailuresOfWorker(gomock.Any(), &mockJob, &worker, "misc").Return(0, nil).Times(numSubTests)
{
// Expect the Worker to be added to the list of failed workers.
// This returns 1, which is less than the failure threshold -> soft failure expected.
mf.persistence.EXPECT().AddWorkerToTaskFailedList(gomock.Any(), &mockTask, &worker).Return(1, nil)
mf.persistence.EXPECT().WorkersLeftToRun(gomock.Any(), &mockJob, "misc").
Return(map[string]bool{"60453eec-5a26-43e9-9da2-d00506d492cc": true, "ce312357-29cd-4389-81ab-4d43e30945f8": true}, nil)
mf.persistence.EXPECT().FetchTaskFailureList(gomock.Any(), &mockTask).
Return([]*persistence.Worker{ /* It shouldn't matter whether the failing worker is here or not. */ }, nil)
// Expect soft failure.
mf.stateMachine.EXPECT().TaskStatusChange(gomock.Any(), &mockTask, api.TaskStatusSoftFailed)
mf.logStorage.EXPECT().WriteTimestamped(gomock.Any(), jobID, taskID,
"Task failed by 1 worker, Manager will mark it as soft failure. 2 more failures will cause hard failure.")
// Do the call.
echoCtx := mf.prepareMockedJSONRequest(taskUpdate)
requestWorkerStore(echoCtx, &worker)
err := mf.flamenco.TaskUpdate(echoCtx, taskID)
assert.NoError(t, err)
assertResponseNoContent(t, echoCtx)
}
{
// Test with more (mocked) failures in the past, pushing the task over the threshold.
mf.persistence.EXPECT().AddWorkerToTaskFailedList(gomock.Any(), &mockTask, &worker).
Return(conf.TaskFailAfterSoftFailCount, nil)
mf.stateMachine.EXPECT().TaskStatusChange(gomock.Any(), &mockTask, api.TaskStatusFailed)
mf.logStorage.EXPECT().WriteTimestamped(gomock.Any(), jobID, taskID,
"Task failed by 3 workers, Manager will mark it as hard failure")
// Do the call.
echoCtx := mf.prepareMockedJSONRequest(taskUpdate)
requestWorkerStore(echoCtx, &worker)
err := mf.flamenco.TaskUpdate(echoCtx, taskID)
assert.NoError(t, err)
assertResponseNoContent(t, echoCtx)
}
}
func TestBlockingAfterFailure(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
mf := newMockedFlamenco(mockCtrl)
worker := testWorker()
// Construct the JSON request object.
taskUpdate := api.TaskUpdateJSONRequestBody{
TaskStatus: ptr(api.TaskStatusFailed),
}
// Construct the task that's supposed to be updated.
taskID := "181eab68-1123-4790-93b1-94309a899411"
jobID := "e4719398-7cfa-4877-9bab-97c2d6c158b5"
mockJob := persistence.Job{UUID: jobID}
mockTask := persistence.Task{
UUID: taskID,
Worker: &worker,
WorkerID: &worker.ID,
Job: &mockJob,
Activity: "pre-update activity",
Type: "misc",
}
conf := config.Conf{
Base: config.Base{
TaskFailAfterSoftFailCount: 3,
BlocklistThreshold: 3,
},
}
mf.config.EXPECT().Get().Return(&conf).AnyTimes()
const numSubTests = 3
// Expect the task to be fetched for each sub-test:
mf.persistence.EXPECT().FetchTask(gomock.Any(), taskID).Return(&mockTask, nil).Times(numSubTests)
// Expect a 'touch' of the task for each sub-test:
mf.persistence.EXPECT().TaskTouchedByWorker(gomock.Any(), &mockTask).Times(numSubTests)
mf.persistence.EXPECT().WorkerSeen(gomock.Any(), &worker).Times(numSubTests)
// Mimick that this is the 3rd of this worker/job/tasktype combo, and thus should trigger a block.
// Returns 2 because there have been 2 previous failures.
mf.persistence.EXPECT().
CountTaskFailuresOfWorker(gomock.Any(), &mockJob, &worker, "misc").
Return(2, nil).
Times(numSubTests)
// Expect the worker to be blocked.
mf.persistence.EXPECT().
AddWorkerToJobBlocklist(gomock.Any(), &mockJob, &worker, "misc").
Times(numSubTests)
{
// Mimick that there is another worker to work on this task, so the job should continue happily.
mf.persistence.EXPECT().WorkersLeftToRun(gomock.Any(), &mockJob, "misc").
Return(map[string]bool{"60453eec-5a26-43e9-9da2-d00506d492cc": true, "ce312357-29cd-4389-81ab-4d43e30945f8": true}, nil).Times(2)
mf.persistence.EXPECT().FetchTaskFailureList(gomock.Any(), &mockTask).
Return([]*persistence.Worker{ /* It shouldn't matter whether the failing worker is here or not. */ }, nil).Times(2)
// Expect the Worker to be added to the list of failed workers for this task.
// This returns 1, which is less than the failure threshold -> soft failure.
mf.persistence.EXPECT().AddWorkerToTaskFailedList(gomock.Any(), &mockTask, &worker).Return(1, nil)
// Expect soft failure of the task.
mf.stateMachine.EXPECT().TaskStatusChange(gomock.Any(), &mockTask, api.TaskStatusSoftFailed)
mf.logStorage.EXPECT().WriteTimestamped(gomock.Any(), jobID, taskID,
"Task failed by 1 worker, Manager will mark it as soft failure. 2 more failures will cause hard failure.")
// Because the job didn't fail in its entirety, the tasks previously failed
// by the Worker should be requeued so they can be picked up by another.
mf.stateMachine.EXPECT().RequeueFailedTasksOfWorkerOfJob(
gomock.Any(), &worker, &mockJob,
"worker дрон was blocked from tasks of type \"misc\"")
// Do the call.
echoCtx := mf.prepareMockedJSONRequest(taskUpdate)
requestWorkerStore(echoCtx, &worker)
err := mf.flamenco.TaskUpdate(echoCtx, taskID)
assert.NoError(t, err)
assertResponseNoContent(t, echoCtx)
}
{
// Test without any workers left to run these tasks on this job due to blocklisting. This should fail the entire job.
mf.persistence.EXPECT().WorkersLeftToRun(gomock.Any(), &mockJob, "misc").
Return(map[string]bool{}, nil)
mf.persistence.EXPECT().FetchTaskFailureList(gomock.Any(), &mockTask).
Return([]*persistence.Worker{ /* It shouldn't matter whether the failing worker is here or not. */ }, nil)
// Expect the Worker to be added to the list of failed workers for this task.
// This returns 1, which is less than the failure threshold -> soft failure if it were only based on this metric.
mf.persistence.EXPECT().AddWorkerToTaskFailedList(gomock.Any(), &mockTask, &worker).Return(1, nil)
// Expect hard failure of the task, because there are no workers left to perfom it.
mf.stateMachine.EXPECT().TaskStatusChange(gomock.Any(), &mockTask, api.TaskStatusFailed)
mf.logStorage.EXPECT().WriteTimestamped(gomock.Any(), jobID, taskID,
"Task failed by worker дрон (e7632d62-c3b8-4af0-9e78-01752928952c), Manager will fail the entire job "+
"as there are no more workers left for tasks of type \"misc\".")
// Expect failure of the job.
mf.stateMachine.EXPECT().
JobStatusChange(gomock.Any(), &mockJob, api.JobStatusFailed, "no more workers left to run tasks of type \"misc\"")
// Because the job failed, there is no need to re-queue any tasks previously failed by this worker.
// Do the call.
echoCtx := mf.prepareMockedJSONRequest(taskUpdate)
requestWorkerStore(echoCtx, &worker)
err := mf.flamenco.TaskUpdate(echoCtx, taskID)
assert.NoError(t, err)
assertResponseNoContent(t, echoCtx)
}
{
// Test that no worker has been blocklisted, but the one available one did fail this task.
// This also makes the task impossible to run, and should just fail the entire job.
theOtherFailingWorker := persistence.Worker{
UUID: "ce312357-29cd-4389-81ab-4d43e30945f8",
}
mf.persistence.EXPECT().WorkersLeftToRun(gomock.Any(), &mockJob, "misc").
Return(map[string]bool{theOtherFailingWorker.UUID: true}, nil)
mf.persistence.EXPECT().FetchTaskFailureList(gomock.Any(), &mockTask).
Return([]*persistence.Worker{&theOtherFailingWorker}, nil)
// Expect the Worker to be added to the list of failed workers for this task.
// This returns 1, which is less than the failure threshold -> soft failure if it were only based on this metric.
mf.persistence.EXPECT().AddWorkerToTaskFailedList(gomock.Any(), &mockTask, &worker).Return(1, nil)
// Expect hard failure of the task, because there are no workers left to perfom it.
mf.stateMachine.EXPECT().TaskStatusChange(gomock.Any(), &mockTask, api.TaskStatusFailed)
mf.logStorage.EXPECT().WriteTimestamped(gomock.Any(), jobID, taskID,
"Task failed by worker дрон (e7632d62-c3b8-4af0-9e78-01752928952c), Manager will fail the entire job "+
"as there are no more workers left for tasks of type \"misc\".")
// Expect failure of the job.
mf.stateMachine.EXPECT().
JobStatusChange(gomock.Any(), &mockJob, api.JobStatusFailed, "no more workers left to run tasks of type \"misc\"")
// Because the job failed, there is no need to re-queue any tasks previously failed by this worker.
// Do the call.
echoCtx := mf.prepareMockedJSONRequest(taskUpdate)
requestWorkerStore(echoCtx, &worker)
err := mf.flamenco.TaskUpdate(echoCtx, taskID)
assert.NoError(t, err)
assertResponseNoContent(t, echoCtx)
}
}
func TestJobFailureAfterWorkerTaskFailure(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
mf := newMockedFlamenco(mockCtrl)
worker := testWorker()
// Contruct the JSON request object
taskUpdate := api.TaskUpdateJSONRequestBody{
TaskStatus: ptr(api.TaskStatusFailed),
}
// Construct the task that's supposed to be updated.
taskID := "181eab68-1123-4790-93b1-94309a899411"
jobID := "e4719398-7cfa-4877-9bab-97c2d6c158b5"
mockJob := persistence.Job{UUID: jobID}
mockTask := persistence.Task{
UUID: taskID,
Worker: &worker,
WorkerID: &worker.ID,
Job: &mockJob,
Activity: "pre-update activity",
Type: "misc",
}
conf := config.Conf{
Base: config.Base{
TaskFailAfterSoftFailCount: 3,
BlocklistThreshold: 65535, // This test doesn't cover blocklisting.
},
}
mf.config.EXPECT().Get().Return(&conf).Times(2)
mf.persistence.EXPECT().FetchTask(gomock.Any(), taskID).Return(&mockTask, nil)
mf.persistence.EXPECT().TaskTouchedByWorker(gomock.Any(), &mockTask)
mf.persistence.EXPECT().WorkerSeen(gomock.Any(), &worker)
mf.persistence.EXPECT().CountTaskFailuresOfWorker(gomock.Any(), &mockJob, &worker, "misc").Return(0, nil)
mf.persistence.EXPECT().AddWorkerToTaskFailedList(gomock.Any(), &mockTask, &worker).Return(1, nil)
mf.persistence.EXPECT().WorkersLeftToRun(gomock.Any(), &mockJob, "misc").
Return(map[string]bool{"e7632d62-c3b8-4af0-9e78-01752928952c": true}, nil)
mf.persistence.EXPECT().FetchTaskFailureList(gomock.Any(), &mockTask).
Return([]*persistence.Worker{ /* It shouldn't matter whether the failing worker is here or not. */ }, nil)
// Expect hard failure of the task, because there are no workers left to perfom it.
mf.stateMachine.EXPECT().TaskStatusChange(gomock.Any(), &mockTask, api.TaskStatusFailed)
mf.logStorage.EXPECT().WriteTimestamped(gomock.Any(), jobID, taskID,
"Task failed by worker дрон (e7632d62-c3b8-4af0-9e78-01752928952c), Manager will fail the entire job "+
"as there are no more workers left for tasks of type \"misc\".")
// Expect failure of the job.
mf.stateMachine.EXPECT().
JobStatusChange(gomock.Any(), &mockJob, api.JobStatusFailed, "no more workers left to run tasks of type \"misc\"")
// Do the call
echoCtx := mf.prepareMockedJSONRequest(taskUpdate)
requestWorkerStore(echoCtx, &worker)
err := mf.flamenco.TaskUpdate(echoCtx, taskID)
assert.NoError(t, err)
assertResponseNoContent(t, echoCtx)
}