VTK-m Timer now handles when devices fail at runtime

A device can fail during startup of vtk-m or mid execution
due to issues such as OOM. Timer needs to be able to handle
these situations gracefully
This commit is contained in:
Robert Maynard 2019-12-11 17:59:32 -05:00
parent a28d2a3a06
commit 64e3e8f344
3 changed files with 148 additions and 177 deletions

@ -19,32 +19,11 @@ template <typename Device>
using DeviceInvalid = std::integral_constant<bool, !Device::IsEnabled>;
using EnabledDeviceList = vtkm::ListRemoveIf<vtkm::cont::DeviceAdapterListCommon, DeviceInvalid>;
using EnabledTimerImpls =
vtkm::ListTransform<EnabledDeviceList, vtkm::cont::DeviceAdapterTimerImplementation>;
template <typename Device>
using DeviceTimerPtr = std::unique_ptr<vtkm::cont::DeviceAdapterTimerImplementation<Device>>;
using EnabledTimerImpls = vtkm::ListTransform<EnabledDeviceList, DeviceTimerPtr>;
using EnabledTimerImplTuple = vtkm::ListApply<EnabledTimerImpls, std::tuple>;
} // anonymous namespace
namespace vtkm
{
namespace cont
{
namespace detail
{
class EnabledDeviceTimerImpls
{
public:
EnabledDeviceTimerImpls() {}
~EnabledDeviceTimerImpls() {}
// A tuple of enabled timer implementations
EnabledTimerImplTuple timerImplTuple;
};
}
}
} // namespace vtkm::cont::detail
namespace
{
// C++11 does not support get tuple element by type. C++14 does support that.
// Get the index of a type in tuple elements
@ -66,29 +45,51 @@ struct Index<T, Container<U, Types...>>
template <typename Device>
VTKM_CONT inline
typename std::tuple_element<Index<Device, EnabledDeviceList>::value, EnabledTimerImplTuple>::type&
GetTimerImpl(Device, vtkm::cont::detail::EnabledDeviceTimerImpls* timerImpls)
GetUniqueTimerPtr(Device, EnabledTimerImplTuple& enabledTimers)
{
return std::get<Index<Device, EnabledDeviceList>::value>(timerImpls->timerImplTuple);
return std::get<Index<Device, EnabledDeviceList>::value>(enabledTimers);
}
template <typename Device>
VTKM_CONT inline const typename std::tuple_element<Index<Device, EnabledDeviceList>::value,
EnabledTimerImplTuple>::type&
GetTimerImpl(Device, const vtkm::cont::detail::EnabledDeviceTimerImpls* timerImpls)
struct InitFunctor
{
return std::get<Index<Device, EnabledDeviceList>::value>(timerImpls->timerImplTuple);
}
template <typename Device>
VTKM_CONT void operator()(Device, EnabledTimerImplTuple& timerImpls)
{
//We don't use the runtime device tracker to very initializtion support
//so that the following use case is supported:
//
// GetRuntimeDeviceTracker().Disable( openMP );
// vtkm::cont::Timer timer; //tracks all active devices
// GetRuntimeDeviceTracker().Enable( openMP );
// timer.Start() //want to test openmp
//
// timer.GetElapsedTime()
//
// When `GetElapsedTime` is called we need to make sure that the OpenMP
// device timer is safe to call. At the same time we still need to make
// sure that we have the required runtime and not just compile time support
// this is why we use `DeviceAdapterRuntimeDetector`
bool haveRequiredRuntimeSupport = vtkm::cont::DeviceAdapterRuntimeDetector<Device>{}.Exists();
if (haveRequiredRuntimeSupport)
{
std::get<Index<Device, EnabledDeviceList>::value>(timerImpls)
.reset(new vtkm::cont::DeviceAdapterTimerImplementation<Device>());
}
}
};
struct ResetFunctor
{
template <typename Device>
VTKM_CONT void operator()(Device device,
vtkm::cont::Timer* timer,
vtkm::cont::detail::EnabledDeviceTimerImpls* timerImpls)
vtkm::cont::DeviceAdapterId deviceToRunOn,
const vtkm::cont::RuntimeDeviceTracker& tracker,
EnabledTimerImplTuple& timerImpls)
{
if ((timer->GetDevice() == device) || (timer->GetDevice() == vtkm::cont::DeviceAdapterTagAny()))
if ((deviceToRunOn == device || deviceToRunOn == vtkm::cont::DeviceAdapterTagAny()) &&
tracker.CanRunOn(device))
{
GetTimerImpl(device, timerImpls).Reset();
GetUniqueTimerPtr(device, timerImpls)->Reset();
}
}
};
@ -97,12 +98,14 @@ struct StartFunctor
{
template <typename Device>
VTKM_CONT void operator()(Device device,
vtkm::cont::Timer* timer,
vtkm::cont::detail::EnabledDeviceTimerImpls* timerImpls)
vtkm::cont::DeviceAdapterId deviceToRunOn,
const vtkm::cont::RuntimeDeviceTracker& tracker,
EnabledTimerImplTuple& timerImpls)
{
if ((timer->GetDevice() == device) || (timer->GetDevice() == vtkm::cont::DeviceAdapterTagAny()))
if ((deviceToRunOn == device || deviceToRunOn == vtkm::cont::DeviceAdapterTagAny()) &&
tracker.CanRunOn(device))
{
GetTimerImpl(device, timerImpls).Start();
GetUniqueTimerPtr(device, timerImpls)->Start();
}
}
};
@ -111,12 +114,14 @@ struct StopFunctor
{
template <typename Device>
VTKM_CONT void operator()(Device device,
vtkm::cont::Timer* timer,
vtkm::cont::detail::EnabledDeviceTimerImpls* timerImpls)
vtkm::cont::DeviceAdapterId deviceToRunOn,
const vtkm::cont::RuntimeDeviceTracker& tracker,
EnabledTimerImplTuple& timerImpls)
{
if ((timer->GetDevice() == device) || (timer->GetDevice() == vtkm::cont::DeviceAdapterTagAny()))
if ((deviceToRunOn == device || deviceToRunOn == vtkm::cont::DeviceAdapterTagAny()) &&
tracker.CanRunOn(device))
{
GetTimerImpl(device, timerImpls).Stop();
GetUniqueTimerPtr(device, timerImpls)->Stop();
}
}
};
@ -127,12 +132,14 @@ struct StartedFunctor
template <typename Device>
VTKM_CONT void operator()(Device device,
const vtkm::cont::Timer* timer,
const vtkm::cont::detail::EnabledDeviceTimerImpls* timerImpls)
vtkm::cont::DeviceAdapterId deviceToRunOn,
const vtkm::cont::RuntimeDeviceTracker& tracker,
EnabledTimerImplTuple& timerImpls)
{
if ((timer->GetDevice() == device) || (timer->GetDevice() == vtkm::cont::DeviceAdapterTagAny()))
if ((deviceToRunOn == device || deviceToRunOn == vtkm::cont::DeviceAdapterTagAny()) &&
tracker.CanRunOn(device))
{
this->Value &= GetTimerImpl(device, timerImpls).Started();
this->Value &= GetUniqueTimerPtr(device, timerImpls)->Started();
}
}
};
@ -143,12 +150,14 @@ struct StoppedFunctor
template <typename Device>
VTKM_CONT void operator()(Device device,
const vtkm::cont::Timer* timer,
const vtkm::cont::detail::EnabledDeviceTimerImpls* timerImpls)
vtkm::cont::DeviceAdapterId deviceToRunOn,
const vtkm::cont::RuntimeDeviceTracker& tracker,
EnabledTimerImplTuple& timerImpls)
{
if ((timer->GetDevice() == device) || (timer->GetDevice() == vtkm::cont::DeviceAdapterTagAny()))
if ((deviceToRunOn == device || deviceToRunOn == vtkm::cont::DeviceAdapterTagAny()) &&
tracker.CanRunOn(device))
{
this->Value &= GetTimerImpl(device, timerImpls).Stopped();
this->Value &= GetUniqueTimerPtr(device, timerImpls)->Stopped();
}
}
};
@ -159,12 +168,14 @@ struct ReadyFunctor
template <typename Device>
VTKM_CONT void operator()(Device device,
const vtkm::cont::Timer* timer,
const vtkm::cont::detail::EnabledDeviceTimerImpls* timerImpls)
vtkm::cont::DeviceAdapterId deviceToRunOn,
const vtkm::cont::RuntimeDeviceTracker& tracker,
EnabledTimerImplTuple& timerImpls)
{
if ((timer->GetDevice() == device) || (timer->GetDevice() == vtkm::cont::DeviceAdapterTagAny()))
if ((deviceToRunOn == device || deviceToRunOn == vtkm::cont::DeviceAdapterTagAny()) &&
tracker.CanRunOn(device))
{
this->Value &= GetTimerImpl(device, timerImpls).Ready();
this->Value &= GetUniqueTimerPtr(device, timerImpls)->Ready();
}
}
};
@ -174,19 +185,44 @@ struct ElapsedTimeFunctor
vtkm::Float64 ElapsedTime = 0.0;
template <typename Device>
VTKM_CONT void operator()(Device deviceToTry,
VTKM_CONT void operator()(Device device,
vtkm::cont::DeviceAdapterId deviceToRunOn,
const vtkm::cont::detail::EnabledDeviceTimerImpls* timerImpls)
const vtkm::cont::RuntimeDeviceTracker& tracker,
EnabledTimerImplTuple& timerImpls)
{
if ((deviceToRunOn == deviceToTry) || (deviceToRunOn == vtkm::cont::DeviceAdapterTagAny()))
if ((deviceToRunOn == device || deviceToRunOn == vtkm::cont::DeviceAdapterTagAny()) &&
tracker.CanRunOn(device))
{
this->ElapsedTime =
vtkm::Max(this->ElapsedTime, GetTimerImpl(deviceToTry, timerImpls).GetElapsedTime());
vtkm::Max(this->ElapsedTime, GetUniqueTimerPtr(device, timerImpls)->GetElapsedTime());
}
}
};
} // anonymous namespace
namespace vtkm
{
namespace cont
{
namespace detail
{
struct EnabledDeviceTimerImpls
{
EnabledDeviceTimerImpls()
{
vtkm::ListForEach(InitFunctor(), EnabledDeviceList(), this->EnabledTimers);
}
~EnabledDeviceTimerImpls() {}
// A tuple of enabled timer implementations
EnabledTimerImplTuple EnabledTimers;
};
}
}
} // namespace vtkm::cont::detail
namespace vtkm
{
namespace cont
@ -194,45 +230,35 @@ namespace cont
Timer::Timer()
: Device(vtkm::cont::DeviceAdapterTagAny())
, Internal(nullptr)
, Internal(new detail::EnabledDeviceTimerImpls)
{
this->Init();
}
Timer::Timer(vtkm::cont::DeviceAdapterId device)
: Device(device)
, Internal(nullptr)
, Internal(new detail::EnabledDeviceTimerImpls)
{
const vtkm::cont::RuntimeDeviceTracker& tracker = vtkm::cont::GetRuntimeDeviceTracker();
const auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
if (!tracker.CanRunOn(device))
{
VTKM_LOG_S(vtkm::cont::LogLevel::Error,
"Device '" << device.GetName() << "' can not run on current Device."
"Thus timer is not usable");
}
this->Init();
}
Timer::~Timer() = default;
void Timer::Init()
{
if (!this->Internal)
{
this->Internal.reset(new detail::EnabledDeviceTimerImpls);
}
}
void Timer::Reset()
{
vtkm::ListForEach(ResetFunctor(), EnabledDeviceList(), this, this->Internal.get());
const auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
vtkm::ListForEach(
ResetFunctor(), EnabledDeviceList(), this->Device, tracker, this->Internal->EnabledTimers);
}
void Timer::Reset(vtkm::cont::DeviceAdapterId device)
{
const vtkm::cont::RuntimeDeviceTracker& tracker = vtkm::cont::GetRuntimeDeviceTracker();
const auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
if (!tracker.CanRunOn(device))
{
VTKM_LOG_S(vtkm::cont::LogLevel::Error,
@ -246,76 +272,53 @@ void Timer::Reset(vtkm::cont::DeviceAdapterId device)
void Timer::Start()
{
vtkm::ListForEach(StartFunctor(), EnabledDeviceList(), this, this->Internal.get());
const auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
vtkm::ListForEach(
StartFunctor(), EnabledDeviceList(), this->Device, tracker, this->Internal->EnabledTimers);
}
void Timer::Stop()
{
vtkm::ListForEach(StopFunctor(), EnabledDeviceList(), this, this->Internal.get());
const auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
vtkm::ListForEach(
StopFunctor(), EnabledDeviceList(), this->Device, tracker, this->Internal->EnabledTimers);
}
bool Timer::Started() const
{
const auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
StartedFunctor functor;
vtkm::ListForEach(functor, EnabledDeviceList(), this, this->Internal.get());
vtkm::ListForEach(
functor, EnabledDeviceList(), this->Device, tracker, this->Internal->EnabledTimers);
return functor.Value;
}
bool Timer::Stopped() const
{
const auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
StoppedFunctor functor;
vtkm::ListForEach(functor, EnabledDeviceList(), this, this->Internal.get());
vtkm::ListForEach(
functor, EnabledDeviceList(), this->Device, tracker, this->Internal->EnabledTimers);
return functor.Value;
}
bool Timer::Ready() const
{
const auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
ReadyFunctor functor;
vtkm::ListForEach(functor, EnabledDeviceList(), this, this->Internal.get());
vtkm::ListForEach(
functor, EnabledDeviceList(), this->Device, tracker, this->Internal->EnabledTimers);
return functor.Value;
}
vtkm::Float64 Timer::GetElapsedTime(vtkm::cont::DeviceAdapterId device) const
vtkm::Float64 Timer::GetElapsedTime() const
{
vtkm::cont::DeviceAdapterId deviceToTime = device;
if (this->Device != DeviceAdapterTagAny())
{
// Timer is constructed for a specific device. Only querying on this device is allowed.
if (deviceToTime == vtkm::cont::DeviceAdapterTagAny())
{
// User did not specify a device to time on. Use the one set in the timer.
deviceToTime = this->Device;
}
else if (deviceToTime == this->Device)
{
// User asked for the same device already set for the timer. We are OK. Nothing to do.
}
else
{
// The user selected a device that is differnt than the one set for the timer. This query
// is not allowed.
VTKM_LOG_S(vtkm::cont::LogLevel::Error,
"Device '" << device.GetName() << "' is not supported for current timer"
<< "("
<< this->Device.GetName()
<< ")");
return 0.0;
}
}
// If we have specified a specific device, make sure we can run on it.
//Throw an exception if a timer bound device now can't be used
auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
if (!tracker.CanRunOn(deviceToTime))
{
VTKM_LOG_S(vtkm::cont::LogLevel::Error,
"Device '" << deviceToTime.GetName() << "' can not run on current Device."
" Thus timer is not usable");
return 0.0;
}
ElapsedTimeFunctor functor;
vtkm::ListForEach(functor, EnabledDeviceList(), deviceToTime, this->Internal.get());
vtkm::ListForEach(
functor, EnabledDeviceList(), this->Device, tracker, this->Internal->EnabledTimers);
return functor.ElapsedTime;
}

@ -24,7 +24,7 @@ namespace cont
{
namespace detail
{
class EnabledDeviceTimerImpls;
struct EnabledDeviceTimerImpls;
}
/// A class that can be used to time operations in VTK-m that might be occuring
@ -58,7 +58,6 @@ public:
/// Resets the timer and changes the device to time on.
VTKM_CONT void Reset(vtkm::cont::DeviceAdapterId device);
/// Start would call Reset function before starting the timer for convenience
VTKM_CONT void Start();
VTKM_CONT void Stop();
@ -73,15 +72,13 @@ public:
/// Get the elapsed time measured by the given device adapter. If no device is
/// specified, the max time of all device measurements will be returned.
VTKM_CONT
vtkm::Float64 GetElapsedTime(
vtkm::cont::DeviceAdapterId id = vtkm::cont::DeviceAdapterTagAny()) const;
vtkm::Float64 GetElapsedTime() const;
/// Returns the device for which this timer is synchronized. If the device adapter has the same
/// id as DeviceAdapterTagAny, then the timer will synchronize all devices.
VTKM_CONT vtkm::cont::DeviceAdapterId GetDevice() const { return this->Device; }
private:
VTKM_CONT void Init();
/// Some timers are ill-defined when copied, so disallow that for all timers.
VTKM_CONT Timer(const Timer&) = delete;
VTKM_CONT void operator=(const Timer&) = delete;

@ -59,57 +59,14 @@ struct Waiter
}
};
bool CanTimeOnDevice(const vtkm::cont::Timer& timer, vtkm::cont::DeviceAdapterId device)
{
if (device == vtkm::cont::DeviceAdapterTagAny())
{
// The timer can run on any device. It should pick up something (unless perhaps there are no
// devices, which would only happen if you explicitly disable serial, which we don't).
return true;
}
else if ((timer.GetDevice() == vtkm::cont::DeviceAdapterTagAny()) ||
(timer.GetDevice() == device))
{
// Device is specified and it is a match for the timer's device.
return vtkm::cont::GetRuntimeDeviceTracker().CanRunOn(device);
}
else
{
// The requested device does not match the device of the timer.
return false;
}
}
struct CheckTimeForDeviceFunctor
{
void operator()(vtkm::cont::DeviceAdapterId device,
const vtkm::cont::Timer& timer,
vtkm::Float64 expectedTime) const
{
std::cout << " Checking time for device " << device.GetName() << std::endl;
if (CanTimeOnDevice(timer, device))
{
vtkm::Float64 elapsedTime = timer.GetElapsedTime(device);
VTKM_TEST_ASSERT(
elapsedTime > (expectedTime - 0.001), "Timer did not capture full wait. ", elapsedTime);
VTKM_TEST_ASSERT(elapsedTime < (expectedTime + waitTimeSeconds),
"Timer counted too far or system really busy. ",
elapsedTime);
}
else
{
std::cout << " Device not supported. Expect 0 back and possible error in log."
<< std::endl;
VTKM_TEST_ASSERT(timer.GetElapsedTime(device) == 0.0,
"Disabled timer should return nothing.");
}
}
};
void CheckTime(const vtkm::cont::Timer& timer, vtkm::Float64 expectedTime)
{
std::cout << " Check time for " << expectedTime << "s" << std::endl;
vtkm::ListForEach(CheckTimeForDeviceFunctor(), TimerTestDevices(), timer, expectedTime);
vtkm::Float64 elapsedTime = timer.GetElapsedTime();
VTKM_TEST_ASSERT(
elapsedTime > (expectedTime - 0.001), "Timer did not capture full wait. ", elapsedTime);
VTKM_TEST_ASSERT(elapsedTime < (expectedTime + waitTimeSeconds),
"Timer counted too far or system really busy. ",
elapsedTime);
}
void DoTimerCheck(vtkm::cont::Timer& timer)
@ -159,17 +116,31 @@ struct TimerCheckFunctor
}
{
std::cout << "Checking Timer on device " << device.GetName() << " set with constructor"
<< std::endl;
vtkm::cont::Timer timer(device);
DoTimerCheck(timer);
}
{
std::cout << "Checking Timer on device " << device.GetName() << " reset" << std::endl;
vtkm::cont::Timer timer;
timer.Reset(device);
DoTimerCheck(timer);
}
{
vtkm::cont::GetRuntimeDeviceTracker().DisableDevice(device);
vtkm::cont::Timer timer(device);
vtkm::cont::GetRuntimeDeviceTracker().ResetDevice(device);
DoTimerCheck(timer);
}
{
vtkm::cont::ScopedRuntimeDeviceTracker scoped(device);
vtkm::cont::Timer timer(device);
timer.Start();
VTKM_TEST_ASSERT(timer.Started(), "Timer fails to track started status");
//simulate a device failing
scoped.DisableDevice(device);
Waiter waiter;
waiter.Wait();
CheckTime(timer, 0.0);
}
}
};