make test: detect hung tests

Run tests in a forked process with a set of pipes to communicate
keep-alives and overall result. This allows us to detect when e.g.
vpp dies mid-API call causing the test to hang waiting for response
(which will never come since vpp died).

Support setting a (per test case) TIMEOUT make test option to set timeout,
with a default timeout of 120 seconds.

Example - fail the test suite if any test-case fails to finish within
300s:

make TIMEOUT=300 test

Change-Id: I0d04f26a7232968f4bf043adf5d5b508f5018717
Signed-off-by: Klement Sekera <ksekera@cisco.com>
This commit is contained in:
Klement Sekera
2017-08-08 04:33:53 +02:00
committed by Dave Wallace
parent 52851e6aa9
commit 909a6a1eb9
6 changed files with 148 additions and 29 deletions

View File

@ -184,6 +184,7 @@ help:
@echo "Arguments controlling test runs:"
@echo " V=[0|1|2] - set test verbosity level"
@echo " FAILFAST=[0|1] - fail fast if 1, complete all tests if 0"
@echo " TIMEOUT=<timeout> - fail test suite if any single test takes longer than <timeout> to finish"
@echo " DEBUG=<type> - set VPP debugging kind"
@echo " DEBUG=core - detect coredump and load it in gdb on crash"
@echo " DEBUG=gdb - allow easy debugging by printing VPP PID "

23
test/debug.py Normal file
View File

@ -0,0 +1,23 @@
""" debug utilities """
import os
import pexpect
gdb_path = '/usr/bin/gdb'
def spawn_gdb(binary_path, core_path, logger):
if os.path.isfile(gdb_path) and os.access(gdb_path, os.X_OK):
# automatically attach gdb
gdb_cmdline = "%s %s %s" % (gdb_path, binary_path, core_path)
gdb = pexpect.spawn(gdb_cmdline)
gdb.interact()
try:
gdb.terminate(True)
except:
pass
if gdb.isalive():
raise Exception("GDB refused to die...")
else:
logger.error("Debugger '%s' does not exist or is not an "
"executable.." % gdb_path)

View File

@ -12,7 +12,7 @@ import resource
import faulthandler
from collections import deque
from threading import Thread, Event
from inspect import getdoc
from inspect import getdoc, isclass
from traceback import format_exception
from logging import FileHandler, DEBUG, Formatter
from scapy.packet import Raw
@ -92,6 +92,39 @@ def running_extended_tests():
return False
class KeepAliveReporter(object):
"""
Singleton object which reports test start to parent process
"""
_shared_state = {}
def __init__(self):
self.__dict__ = self._shared_state
@property
def pipe(self):
return self._pipe
@pipe.setter
def pipe(self, pipe):
if hasattr(self, '_pipe'):
raise Exception("Internal error - pipe should only be set once.")
self._pipe = pipe
def send_keep_alive(self, test):
"""
Write current test tmpdir & desc to keep-alive pipe to signal liveness
"""
if isclass(test):
desc = test.__name__
else:
desc = test.shortDescription()
if not desc:
desc = str(test)
self.pipe.send((desc, test.vpp_bin, test.tempdir))
class VppTestCase(unittest.TestCase):
"""This subclass is a base class for VPP test cases that are implemented as
classes. It provides methods to create and run test case.
@ -257,6 +290,8 @@ class VppTestCase(unittest.TestCase):
cls.vpp_dead = False
cls.registry = VppObjectRegistry()
cls.vpp_startup_failed = False
cls.reporter = KeepAliveReporter()
cls.reporter.send_keep_alive(cls)
# need to catch exceptions here because if we raise, then the cleanup
# doesn't get called and we might end with a zombie vpp
try:
@ -394,6 +429,7 @@ class VppTestCase(unittest.TestCase):
def setUp(self):
""" Clear trace before running each test"""
self.reporter.send_keep_alive(self)
self.logger.debug("--- setUp() for %s.%s(%s) called ---" %
(self.__class__.__name__, self._testMethodName,
self._testMethodDoc))
@ -865,13 +901,15 @@ class VppTestRunner(unittest.TextTestRunner):
"""Class maintaining the results of the tests"""
return VppTestResult
def __init__(self, stream=sys.stderr, descriptions=True, verbosity=1,
def __init__(self, pipe, stream=sys.stderr, descriptions=True, verbosity=1,
failfast=False, buffer=False, resultclass=None):
# ignore stream setting here, use hard-coded stdout to be in sync
# with prints from VppTestCase methods ...
super(VppTestRunner, self).__init__(sys.stdout, descriptions,
verbosity, failfast, buffer,
resultclass)
reporter = KeepAliveReporter()
reporter.pipe = pipe
test_option = "TEST"

View File

@ -1,8 +1,8 @@
import signal
import os
import pexpect
import traceback
from log import *
from log import RED, single_line_delim, double_line_delim
from debug import spawn_gdb, gdb_path
class Hook(object):
@ -60,31 +60,16 @@ class PollHook(Hook):
self.testcase = testcase
self.logger = testcase.logger
def spawn_gdb(self, gdb_path, core_path):
gdb_cmdline = gdb_path + ' ' + self.testcase.vpp_bin + ' ' + core_path
gdb = pexpect.spawn(gdb_cmdline)
gdb.interact()
try:
gdb.terminate(True)
except:
pass
if gdb.isalive():
raise Exception("GDB refused to die...")
def on_crash(self, core_path):
if self.testcase.debug_core:
gdb_path = '/usr/bin/gdb'
if os.path.isfile(gdb_path) and os.access(gdb_path, os.X_OK):
# automatically attach gdb
self.spawn_gdb(gdb_path, core_path)
return
else:
if not spawn_gdb(self.testcase.vpp_bin, core_path):
self.logger.error(
"Debugger '%s' does not exist or is not an executable.." %
gdb_path)
self.logger.critical('core file present, debug with: gdb ' +
self.testcase.vpp_bin + ' ' + core_path)
else:
return
self.logger.critical("Core file present, debug with: gdb %s %s" %
(self.testcase.vpp_bin, core_path))
def poll_vpp(self):
"""

View File

@ -2,10 +2,14 @@
import sys
import os
import select
import unittest
import argparse
import importlib
from multiprocessing import Process, Pipe
from framework import VppTestRunner
from debug import spawn_gdb
from log import global_logger
def add_from_dir(suite, directory):
@ -39,12 +43,38 @@ def add_from_dir(suite, directory):
if method.startswith("test_"):
suite.addTest(cls(method))
def test_runner_wrapper(keep_alive_pipe, result_pipe):
result = not VppTestRunner(
pipe=keep_alive_pipe,
verbosity=verbose,
failfast=failfast).run(suite).wasSuccessful()
result_pipe.send(result)
result_pipe.close()
keep_alive_pipe.close()
def handle_core(vpp_binary, core_path):
try:
d = os.getenv("DEBUG")
except:
d = None
if d and d.lower() == "core":
spawn_gdb(vpp_binary, core_path, global_logger)
if __name__ == '__main__':
try:
verbose = int(os.getenv("V", 0))
except:
verbose = 0
default_test_timeout = 120
try:
test_timeout = int(os.getenv("TIMEOUT", default_test_timeout))
except:
test_timeout = default_test_timeout
parser = argparse.ArgumentParser(description="VPP unit tests")
parser.add_argument("-f", "--failfast", action='count',
help="fast failure flag")
@ -56,7 +86,43 @@ if __name__ == '__main__':
suite = unittest.TestSuite()
for d in args.dir:
print("Adding tests from directory tree %s" % d)
global_logger.info("Adding tests from directory tree %s" % d)
add_from_dir(suite, d)
sys.exit(not VppTestRunner(verbosity=verbose,
failfast=failfast).run(suite).wasSuccessful())
keep_alive_parent_end, keep_alive_child_end = Pipe(duplex=False)
result_parent_end, result_child_end = Pipe(duplex=False)
p = Process(target=test_runner_wrapper,
args=(keep_alive_child_end,
result_child_end))
p.start()
last_test_temp_dir = None
last_test_vpp_binary = None
last_test = None
result = None
while result is None:
readable = select.select([keep_alive_parent_end.fileno(),
result_parent_end.fileno(),
],
[], [], test_timeout)[0]
if result_parent_end.fileno() in readable:
result = result_parent_end.recv()
elif keep_alive_parent_end.fileno() in readable:
while keep_alive_parent_end.poll():
last_test, last_test_vpp_binary, last_test_temp_dir =\
keep_alive_parent_end.recv()
else:
global_logger.critical("Timeout while waiting for child test "
"runner process (last test running was "
"`%s' in `%s')!" %
(last_test, last_test_temp_dir))
if last_test_temp_dir and last_test_vpp_binary:
core_path = "%s/core" % last_test_temp_dir
if os.path.isfile(core_path):
global_logger.error("Core-file exists in test temporary "
"directory: %s!" % core_path)
handle_core(last_test_vpp_binary, core_path)
p.terminate()
result = -1
keep_alive_parent_end.close()
result_parent_end.close()
sys.exit(result)

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python
from __future__ import print_function
from framework import VppTestCase
from hook import VppDiedError
from multiprocessing import Pipe
from sys import exit
from hook import VppDiedError
from framework import VppTestCase, KeepAliveReporter
class SanityTestCase(VppTestCase):
@ -13,6 +14,9 @@ class SanityTestCase(VppTestCase):
if __name__ == '__main__':
rc = 0
tc = SanityTestCase
x, y = Pipe()
reporter = KeepAliveReporter()
reporter.pipe = y
try:
tc.setUpClass()
except VppDiedError:
@ -22,5 +26,7 @@ if __name__ == '__main__':
tc.tearDownClass()
except:
pass
x.close()
y.close()
exit(rc)