vpp/test/run_tests.py
juraj.linkes cae64f896d VPP-1421: Reworked results gathering
The previous version sent the whole VPPTestResult object through pipe,
which uses pickle to transfer objects. Pickle does not support sending
any arbitrary objects and was causing issues. Now just a tuple of
(test_id, result) is sent.

Change-Id: I3a3a9e6f1b9ac9b05889babfc1f7560c7ac4471c
Signed-off-by: juraj.linkes <juraj.linkes@pantheon.tech>
2018-09-20 02:42:53 +00:00

715 lines
27 KiB
Python

#!/usr/bin/env python
import sys
import shutil
import os
import select
import unittest
import argparse
import time
import threading
import signal
import psutil
from multiprocessing import Process, Pipe, cpu_count
from multiprocessing.queues import Queue
from multiprocessing.managers import BaseManager
from framework import VppTestRunner, running_extended_tests, VppTestCase, \
get_testcase_doc_name, get_test_description, PASS, FAIL, ERROR, SKIP, \
TEST_RUN
from debug import spawn_gdb
from log import get_parallel_logger, double_line_delim, RED, YELLOW, GREEN, \
colorize
from discover_tests import discover_tests
from subprocess import check_output, CalledProcessError
from util import check_core_path
# timeout which controls how long the child has to finish after seeing
# a core dump in test temporary directory. If this is exceeded, parent assumes
# that child process is stuck (e.g. waiting for shm mutex, which will never
# get unlocked) and kill the child
core_timeout = 3
min_req_shm = 536870912 # min 512MB shm required
# 128MB per extra process
shm_per_process = 134217728
class StreamQueue(Queue):
def write(self, msg):
self.put(msg)
def flush(self):
sys.__stdout__.flush()
sys.__stderr__.flush()
def fileno(self):
return self._writer.fileno()
class StreamQueueManager(BaseManager):
pass
StreamQueueManager.register('StreamQueue', StreamQueue)
class TestResult(dict):
def __init__(self, testcase_suite):
super(TestResult, self).__init__()
self[PASS] = []
self[FAIL] = []
self[ERROR] = []
self[SKIP] = []
self[TEST_RUN] = []
self.testcase_suite = testcase_suite
self.testcases = [testcase for testcase in testcase_suite]
self.testcases_by_id = {}
def was_successful(self):
return len(self[PASS] + self[SKIP]) \
== self.testcase_suite.countTestCases()
def no_tests_run(self):
return 0 == len(self[TEST_RUN])
def process_result(self, test_id, result):
self[result].append(test_id)
for testcase in self.testcases:
if testcase.id() == test_id:
self.testcases_by_id[test_id] = testcase
self.testcases.remove(testcase)
break
def suite_from_failed(self):
rerun_ids = set([])
for testcase in self.testcase_suite:
tc_id = testcase.id()
if tc_id not in self[PASS] and tc_id not in self[SKIP]:
rerun_ids.add(tc_id)
if len(rerun_ids) > 0:
return suite_from_failed(self.testcase_suite, rerun_ids)
def get_testcase_names(self, test_id):
return self._get_testcase_class(test_id), \
self._get_test_description(test_id)
def _get_test_description(self, test_id):
if test_id in self.testcases_by_id:
return get_test_description(descriptions,
self.testcases_by_id[test_id])
else:
return test_id
def _get_testcase_class(self, test_id):
if test_id in self.testcases_by_id:
return get_testcase_doc_name(self.testcases_by_id[test_id])
else:
return test_id
def test_runner_wrapper(suite, keep_alive_pipe, stdouterr_queue,
finished_pipe, result_pipe, logger):
sys.stdout = stdouterr_queue
sys.stderr = stdouterr_queue
VppTestCase.logger = logger
result = VppTestRunner(keep_alive_pipe=keep_alive_pipe,
descriptions=descriptions,
verbosity=verbose,
result_pipe=result_pipe,
failfast=failfast).run(suite)
finished_pipe.send(result.wasSuccessful())
finished_pipe.close()
keep_alive_pipe.close()
class TestCaseWrapper(object):
def __init__(self, testcase_suite, manager):
self.keep_alive_parent_end, self.keep_alive_child_end = Pipe(
duplex=False)
self.finished_parent_end, self.finished_child_end = Pipe(duplex=False)
self.result_parent_end, self.result_child_end = Pipe(duplex=False)
self.testcase_suite = testcase_suite
self.stdouterr_queue = manager.StreamQueue()
self.logger = get_parallel_logger(self.stdouterr_queue)
self.child = Process(target=test_runner_wrapper,
args=(testcase_suite,
self.keep_alive_child_end,
self.stdouterr_queue,
self.finished_child_end,
self.result_child_end,
self.logger)
)
self.child.start()
self.last_test_temp_dir = None
self.last_test_vpp_binary = None
self.last_test = None
self.result = None
self.vpp_pid = None
self.last_heard = time.time()
self.core_detected_at = None
self.result = TestResult(testcase_suite)
def close_pipes(self):
self.keep_alive_child_end.close()
self.finished_child_end.close()
self.result_child_end.close()
self.keep_alive_parent_end.close()
self.finished_parent_end.close()
self.result_parent_end.close()
def stdouterr_reader_wrapper(unread_testcases, finished_unread_testcases,
read_testcases):
read_testcase = None
while read_testcases.is_set() or len(unread_testcases) > 0:
if not read_testcase:
if len(finished_unread_testcases) > 0:
read_testcase = finished_unread_testcases.pop()
unread_testcases.remove(read_testcase)
elif len(unread_testcases) > 0:
read_testcase = unread_testcases.pop()
if read_testcase:
data = ''
while data is not None:
sys.stdout.write(data)
data = read_testcase.stdouterr_queue.get()
read_testcase.stdouterr_queue.close()
finished_unread_testcases.discard(read_testcase)
read_testcase = None
def run_forked(testcase_suites):
wrapped_testcase_suites = set()
# suites are unhashable, need to use list
results = []
debug_core = os.getenv("DEBUG", "").lower() == "core"
unread_testcases = set()
finished_unread_testcases = set()
manager = StreamQueueManager()
manager.start()
for i in range(concurrent_tests):
if len(testcase_suites) > 0:
wrapped_testcase_suite = TestCaseWrapper(testcase_suites.pop(0),
manager)
wrapped_testcase_suites.add(wrapped_testcase_suite)
unread_testcases.add(wrapped_testcase_suite)
# time.sleep(1)
else:
break
read_from_testcases = threading.Event()
read_from_testcases.set()
stdouterr_thread = threading.Thread(target=stdouterr_reader_wrapper,
args=(unread_testcases,
finished_unread_testcases,
read_from_testcases))
stdouterr_thread.start()
while len(wrapped_testcase_suites) > 0:
finished_testcase_suites = set()
for wrapped_testcase_suite in wrapped_testcase_suites:
while wrapped_testcase_suite.result_parent_end.poll():
wrapped_testcase_suite.result.process_result(
*wrapped_testcase_suite.result_parent_end.recv())
wrapped_testcase_suite.last_heard = time.time()
if wrapped_testcase_suite.finished_parent_end.poll():
wrapped_testcase_suite.finished_parent_end.recv()
results.append(wrapped_testcase_suite.result)
finished_testcase_suites.add(wrapped_testcase_suite)
continue
while wrapped_testcase_suite.keep_alive_parent_end.poll():
wrapped_testcase_suite.last_test, \
wrapped_testcase_suite.last_test_vpp_binary, \
wrapped_testcase_suite.last_test_temp_dir, \
wrapped_testcase_suite.vpp_pid = \
wrapped_testcase_suite.keep_alive_parent_end.recv()
wrapped_testcase_suite.last_heard = time.time()
fail = False
if wrapped_testcase_suite.last_heard + test_timeout < time.time() \
and not os.path.isfile(
"%s/_core_handled" %
wrapped_testcase_suite.last_test_temp_dir):
fail = True
wrapped_testcase_suite.logger.critical(
"Timeout while waiting for child test "
"runner process (last test running was "
"`%s' in `%s')!" %
(wrapped_testcase_suite.last_test,
wrapped_testcase_suite.last_test_temp_dir))
elif not wrapped_testcase_suite.child.is_alive():
fail = True
wrapped_testcase_suite.logger.critical(
"Child python process unexpectedly died "
"(last test running was `%s' in `%s')!" %
(wrapped_testcase_suite.last_test,
wrapped_testcase_suite.last_test_temp_dir))
elif wrapped_testcase_suite.last_test_temp_dir and \
wrapped_testcase_suite.last_test_vpp_binary:
core_path = "%s/core" % \
wrapped_testcase_suite.last_test_temp_dir
if os.path.isfile(core_path):
if wrapped_testcase_suite.core_detected_at is None:
wrapped_testcase_suite.core_detected_at = time.time()
elif wrapped_testcase_suite.core_detected_at + \
core_timeout < time.time():
if not os.path.isfile(
"%s/_core_handled" %
wrapped_testcase_suite.
last_test_temp_dir):
wrapped_testcase_suite.logger.critical(
"Child python process unresponsive and core-"
"file exists in test temporary directory!")
fail = True
if fail:
failed_dir = os.getenv('VPP_TEST_FAILED_DIR')
if wrapped_testcase_suite.last_test_temp_dir:
lttd = os.path.basename(
wrapped_testcase_suite.last_test_temp_dir)
else:
lttd = None
link_path = '%s%s-FAILED' % (failed_dir, lttd)
wrapped_testcase_suite.logger.error(
"Creating a link to the failed test: %s -> %s" %
(link_path, lttd))
if not os.path.exists(link_path) \
and wrapped_testcase_suite.last_test_temp_dir:
os.symlink(wrapped_testcase_suite.last_test_temp_dir,
link_path)
api_post_mortem_path = "/tmp/api_post_mortem.%d" % \
wrapped_testcase_suite.vpp_pid
if os.path.isfile(api_post_mortem_path):
wrapped_testcase_suite.logger.error(
"Copying api_post_mortem.%d to %s" %
(wrapped_testcase_suite.vpp_pid,
wrapped_testcase_suite.last_test_temp_dir))
shutil.copy2(api_post_mortem_path,
wrapped_testcase_suite.last_test_temp_dir)
if wrapped_testcase_suite.last_test_temp_dir and \
wrapped_testcase_suite.last_test_vpp_binary:
core_path = "%s/core" % \
wrapped_testcase_suite.last_test_temp_dir
if os.path.isfile(core_path):
wrapped_testcase_suite.logger.error(
"Core-file exists in test temporary directory: %s!"
% core_path)
check_core_path(wrapped_testcase_suite.logger,
core_path)
wrapped_testcase_suite.logger.debug(
"Running `file %s':" % core_path)
try:
info = check_output(["file", core_path])
wrapped_testcase_suite.logger.debug(info)
except CalledProcessError as e:
wrapped_testcase_suite.logger.error(
"Could not run `file' utility on core-file, "
"rc=%s" % e.returncode)
pass
if debug_core:
spawn_gdb(
wrapped_testcase_suite.last_test_vpp_binary,
core_path, wrapped_testcase_suite.logger)
wrapped_testcase_suite.child.terminate()
try:
# terminating the child process tends to leave orphan
# VPP process around
os.kill(wrapped_testcase_suite.vpp_pid, signal.SIGTERM)
except OSError:
# already dead
pass
results.append(wrapped_testcase_suite.result)
finished_testcase_suites.add(wrapped_testcase_suite)
for finished_testcase in finished_testcase_suites:
finished_testcase.child.join()
finished_testcase.close_pipes()
wrapped_testcase_suites.remove(finished_testcase)
finished_unread_testcases.add(finished_testcase)
finished_testcase.stdouterr_queue.put(None)
if len(testcase_suites) > 0:
new_testcase = TestCaseWrapper(testcase_suites.pop(0), manager)
wrapped_testcase_suites.add(new_testcase)
unread_testcases.add(new_testcase)
read_from_testcases.clear()
stdouterr_thread.join(test_timeout)
manager.shutdown()
return results
class SplitToSuitesCallback:
def __init__(self, filter_callback):
self.suites = {}
self.suite_name = 'default'
self.filter_callback = filter_callback
self.filtered = unittest.TestSuite()
def __call__(self, file_name, cls, method):
test_method = cls(method)
if self.filter_callback(file_name, cls.__name__, method):
self.suite_name = file_name + cls.__name__
if self.suite_name not in self.suites:
self.suites[self.suite_name] = unittest.TestSuite()
self.suites[self.suite_name].addTest(test_method)
else:
self.filtered.addTest(test_method)
test_option = "TEST"
def parse_test_option():
f = os.getenv(test_option, None)
filter_file_name = None
filter_class_name = None
filter_func_name = None
if f:
if '.' in f:
parts = f.split('.')
if len(parts) > 3:
raise Exception("Unrecognized %s option: %s" %
(test_option, f))
if len(parts) > 2:
if parts[2] not in ('*', ''):
filter_func_name = parts[2]
if parts[1] not in ('*', ''):
filter_class_name = parts[1]
if parts[0] not in ('*', ''):
if parts[0].startswith('test_'):
filter_file_name = parts[0]
else:
filter_file_name = 'test_%s' % parts[0]
else:
if f.startswith('test_'):
filter_file_name = f
else:
filter_file_name = 'test_%s' % f
if filter_file_name:
filter_file_name = '%s.py' % filter_file_name
return filter_file_name, filter_class_name, filter_func_name
def filter_tests(tests, filter_cb):
result = unittest.suite.TestSuite()
for t in tests:
if isinstance(t, unittest.suite.TestSuite):
# this is a bunch of tests, recursively filter...
x = filter_tests(t, filter_cb)
if x.countTestCases() > 0:
result.addTest(x)
elif isinstance(t, unittest.TestCase):
# this is a single test
parts = t.id().split('.')
# t.id() for common cases like this:
# test_classifier.TestClassifier.test_acl_ip
# apply filtering only if it is so
if len(parts) == 3:
if not filter_cb(parts[0], parts[1], parts[2]):
continue
result.addTest(t)
else:
# unexpected object, don't touch it
result.addTest(t)
return result
class FilterByTestOption:
def __init__(self, filter_file_name, filter_class_name, filter_func_name):
self.filter_file_name = filter_file_name
self.filter_class_name = filter_class_name
self.filter_func_name = filter_func_name
def __call__(self, file_name, class_name, func_name):
if self.filter_file_name and file_name != self.filter_file_name:
return False
if self.filter_class_name and class_name != self.filter_class_name:
return False
if self.filter_func_name and func_name != self.filter_func_name:
return False
return True
class FilterByClassList:
def __init__(self, classes_with_filenames):
self.classes_with_filenames = classes_with_filenames
def __call__(self, file_name, class_name, func_name):
return '.'.join([file_name, class_name]) in self.classes_with_filenames
def suite_from_failed(suite, failed):
failed = {x.rsplit('.', 1)[0] for x in failed}
filter_cb = FilterByClassList(failed)
suite = filter_tests(suite, filter_cb)
return suite
class AllResults(dict):
def __init__(self):
super(AllResults, self).__init__()
self.all_testcases = 0
self.results_per_suite = []
self[PASS] = 0
self[FAIL] = 0
self[ERROR] = 0
self[SKIP] = 0
self[TEST_RUN] = 0
self.rerun = []
self.testsuites_no_tests_run = []
def add_results(self, result):
self.results_per_suite.append(result)
result_types = [PASS, FAIL, ERROR, SKIP, TEST_RUN]
for result_type in result_types:
self[result_type] += len(result[result_type])
def add_result(self, result):
retval = 0
self.all_testcases += result.testcase_suite.countTestCases()
if not result.no_tests_run():
if not result.was_successful():
retval = 1
self.add_results(result)
else:
self.testsuites_no_tests_run.append(result.testcase_suite)
retval = -1
if retval != 0:
if concurrent_tests == 1:
if not result.no_tests_run():
self.rerun.append(result.suite_from_failed())
else:
self.rerun.append(result.testcase_suite)
else:
self.rerun.append(result.testcase_suite)
return retval
def print_results(self):
print('')
print(double_line_delim)
print('TEST RESULTS:')
print(' Scheduled tests: {}'.format(self.all_testcases))
print(' Executed tests: {}'.format(self[TEST_RUN]))
print(' Passed tests: {}'.format(
colorize(str(self[PASS]), GREEN)))
if self[SKIP] > 0:
print(' Skipped tests: {}'.format(
colorize(str(self[SKIP]), YELLOW)))
if self.not_executed > 0:
print(' Not Executed tests: {}'.format(
colorize(str(self.not_executed), RED)))
if self[FAIL] > 0:
print(' Failures: {}'.format(
colorize(str(self[FAIL]), RED)))
if self[ERROR] > 0:
print(' Errors: {}'.format(
colorize(str(self[ERROR]), RED)))
if self.all_failed > 0:
print('FAILED TESTS:')
for result in self.results_per_suite:
failed_testcase_ids = result[FAIL]
errored_testcase_ids = result[ERROR]
old_testcase_name = None
if len(failed_testcase_ids) or len(errored_testcase_ids):
for failed_test_id in failed_testcase_ids:
new_testcase_name, test_name = \
result.get_testcase_names(failed_test_id)
if new_testcase_name != old_testcase_name:
print(' Testcase name: {}'.format(
colorize(new_testcase_name, RED)))
old_testcase_name = new_testcase_name
print(' FAILED: {}'.format(
colorize(test_name, RED)))
for failed_test_id in errored_testcase_ids:
new_testcase_name, test_name = \
result.get_testcase_names(failed_test_id)
if new_testcase_name != old_testcase_name:
print(' Testcase name: {}'.format(
colorize(new_testcase_name, RED)))
old_testcase_name = new_testcase_name
print(' ERRORED: {}'.format(
colorize(test_name, RED)))
if len(self.testsuites_no_tests_run) > 0:
print('TESTCASES WHERE NO TESTS WERE SUCCESSFULLY EXECUTED:')
tc_classes = set([])
for testsuite in self.testsuites_no_tests_run:
for testcase in testsuite:
tc_classes.add(get_testcase_doc_name(testcase))
for tc_class in tc_classes:
print(' {}'.format(colorize(tc_class, RED)))
print(double_line_delim)
print('')
@property
def not_executed(self):
return self.all_testcases - self[TEST_RUN]
@property
def all_failed(self):
return self[FAIL] + self[ERROR]
def parse_results(results):
"""
Prints the number of scheduled, executed, not executed, passed, failed,
errored and skipped tests and details about failed and errored tests.
Also returns all suites where any test failed.
:param results:
:return:
"""
results_per_suite = AllResults()
crashed = False
failed = False
for result in results:
result_code = results_per_suite.add_result(result)
if result_code == 1:
failed = True
elif result_code == -1:
crashed = True
results_per_suite.print_results()
if crashed:
return_code = -1
elif failed:
return_code = 1
else:
return_code = 0
return return_code, results_per_suite.rerun
def parse_digit_env(env_var, default):
value = os.getenv(env_var, default)
if value != default:
if value.isdigit():
value = int(value)
else:
print('WARNING: unsupported value "%s" for env var "%s",'
'defaulting to %s' % (value, env_var, default))
value = default
return value
if __name__ == '__main__':
verbose = parse_digit_env("V", 0)
test_timeout = parse_digit_env("TIMEOUT", 600) # default = 10 minutes
retries = parse_digit_env("RETRIES", 0)
debug = os.getenv("DEBUG", "n").lower() in ["gdb", "gdbserver"]
step = os.getenv("STEP", "n").lower() in ("y", "yes", "1")
force_foreground = \
os.getenv("FORCE_FOREGROUND", "n").lower() in ("y", "yes", "1")
run_interactive = debug or step or force_foreground
test_jobs = os.getenv("TEST_JOBS", "1").lower() # default = 1 process
if test_jobs == 'auto':
if run_interactive:
concurrent_tests = 1
print('Interactive mode required, running on one core')
else:
shm_free = psutil.disk_usage('/dev/shm').free
shm_max_processes = 1
if shm_free < min_req_shm:
raise Exception('Not enough free space in /dev/shm. Required '
'free space is at least %sM.'
% (min_req_shm >> 20))
else:
extra_shm = shm_free - min_req_shm
shm_max_processes += extra_shm / shm_per_process
concurrent_tests = min(cpu_count(), shm_max_processes)
print('Found enough resources to run tests with %s cores'
% concurrent_tests)
elif test_jobs.isdigit():
concurrent_tests = int(test_jobs)
else:
concurrent_tests = 1
if run_interactive and concurrent_tests > 1:
raise NotImplementedError(
'Running tests interactively (DEBUG, STEP or FORCE_FOREGROUND is '
'set) in parallel (TEST_JOBS is more than 1) is not '
'supported')
parser = argparse.ArgumentParser(description="VPP unit tests")
parser.add_argument("-f", "--failfast", action='store_true',
help="fast failure flag")
parser.add_argument("-d", "--dir", action='append', type=str,
help="directory containing test files "
"(may be specified multiple times)")
args = parser.parse_args()
failfast = args.failfast
descriptions = True
print("Running tests using custom test runner") # debug message
filter_file, filter_class, filter_func = parse_test_option()
print("Active filters: file=%s, class=%s, function=%s" % (
filter_file, filter_class, filter_func))
filter_cb = FilterByTestOption(filter_file, filter_class, filter_func)
cb = SplitToSuitesCallback(filter_cb)
for d in args.dir:
print("Adding tests from directory tree %s" % d)
discover_tests(d, cb)
# suites are not hashable, need to use list
suites = []
tests_amount = 0
for testcase_suite in cb.suites.values():
tests_amount += testcase_suite.countTestCases()
suites.append(testcase_suite)
if concurrent_tests == 1:
new_suite = unittest.TestSuite()
for suite in suites:
new_suite.addTests(suite)
suites = [new_suite]
print("%s out of %s tests match specified filters" % (
tests_amount, tests_amount + cb.filtered.countTestCases()))
if not running_extended_tests():
print("Not running extended tests (some tests will be skipped)")
attempts = retries + 1
if attempts > 1:
print("Perform %s attempts to pass the suite..." % attempts)
if run_interactive:
# don't fork if requiring interactive terminal
sys.exit(not VppTestRunner(
verbosity=verbose, failfast=failfast)
.run(suites[0]).wasSuccessful())
else:
exit_code = 0
while len(suites) > 0 and attempts > 0:
tests_amount = sum([x.countTestCases() for x in suites])
results = run_forked(suites)
exit_code, suites = parse_results(results)
attempts -= 1
if exit_code == 0:
print('Test run was successful')
else:
print('%s attempt(s) left.' % attempts)
sys.exit(exit_code)