Add option to be able to run_functests.sh in parallel

Running the functional tests is time consuming.  This patch adds the
option `-j <job count>` to the tests/run_functests.sh: when given the
test run in parallel up the <job count> jobs.
When using this, be sure to have enough resources (CPUs, RAM and HD
space) on the host.

In addition there was the need to change two things:
o Global /tmp/dib-test-should-fail was move to temporary build
  directory of each execution.
o Because the logs might now interleave, each log line has now a
  prefix of the name of the testcase.

[In my environment running functests sequential takes 15+ minutes,
running them parallel takes less than 6 minutes.]

Change-Id: Id9ea5131f0026c292ca6453ba2c80fe12c47f808
Signed-off-by: Andreas Florath <andreas@florath.net>
This commit is contained in:
Andreas Florath 2016-07-20 23:34:50 +02:00
parent 71c68bf18d
commit 20bb6a0255
2 changed files with 126 additions and 8 deletions

View File

@ -385,6 +385,11 @@ line to run it. If it should not be run as part of the default CI
run, you can submit a change with it added to ``DEFAULT_SKIP_TESTS``
in that file.
Running the functional tests is time consuming. Multiple parallel
jobs can be started by specifying ``-j <job count>``. Each of the
jobs uses a lot resources (CPU, disk space, RAM) - therefore the job
count must carefully be chosen.
python
""""""

View File

@ -23,21 +23,65 @@ DEFAULT_SKIP_TESTS=(
debian-minimal/testing-build-succeeds
)
function log_with_prefix {
local pr=$1
while read a; do
echo $(date +"%Y%m%d-%H%M%S.%N") "[$pr] $a"
done
}
# Log job control messages
function log_jc {
local msg="$1"
printf "[JOB-CONTROL] %s %s\n" "$(date)" "${msg}"
}
function job_cnt {
running_jobs=$(jobs -p)
echo ${running_jobs} | wc -w
}
# This is needed, because the better 'wait -n' is
# available since bash 4.3 only.
function wait_minus_n {
if [ "${BASH_VERSINFO[0]}" -gt 4 \
-o "${BASH_VERSINFO[0]}" = 4 \
-a "${BASH_VERSINFO[1]}" -ge 3 ]; then
# Good way: wait on any job
wait -n
return $?
else
# Not that good way: wait on one specific job
# (others may be finished in the mean time)
local wait_for_pid=$(jobs -p | head -1)
wait ${wait_for_pid}
return $?
fi
}
# run_disk_element_test <test_element> <element>
# Run a disk-image-build .tar build of ELEMENT including any elements
# specified by TEST_ELEMENT
function run_disk_element_test() {
local test_element=$1
local element=$2
local dont_use_tmp=$3
local use_tmp_flag=""
local dest_dir=$(mktemp -d)
trap "rm -rf $dest_dir /tmp/dib-test-should-fail" EXIT
trap "rm -rf $dest_dir" EXIT
if [ "${dont_use_tmp}" = "yes" ]; then
use_tmp_flag="--no-tmpfs"
fi
if break="after-error" break_outside_target=1 \
break_cmd="cp \$TMP_MOUNT_PATH/tmp/dib-test-should-fail /tmp/ 2>&1 > /dev/null || true" \
break_cmd="cp -v \$TMP_MOUNT_PATH/tmp/dib-test-should-fail ${dest_dir} || true" \
DIB_SHOW_IMAGE_USAGE=1 \
ELEMENTS_PATH=$DIB_ELEMENTS:$DIB_ELEMENTS/$element/test-elements \
$DIB_CMD -x -t tar,qcow2 -o $dest_dir/image -n $element $test_element; then
$DIB_CMD -x -t tar,qcow2 ${use_tmp_flag} -o $dest_dir/image -n $element $test_element 2>&1 \
| log_with_prefix "${element}/${test_element}"; then
if ! [ -f "$dest_dir/image.qcow2" ]; then
echo "Error: qcow2 build failed for element: $element, test-element: $test_element."
@ -58,7 +102,7 @@ function run_disk_element_test() {
fi
fi
else
if [ -f "/tmp/dib-test-should-fail" ]; then
if [ -f "${dest_dir}/dib-test-should-fail" ]; then
echo "PASS: Element $element, test-element: $test_element"
else
echo "Error: Build failed for element: $element, test-element: $test_element."
@ -79,7 +123,8 @@ function run_ramdisk_element_test() {
local dest_dir=$(mktemp -d)
if ELEMENTS_PATH=$DIB_ELEMENTS/$element/test-elements \
$DIB_CMD -x -o $dest_dir/image $element $test_element; then
$DIB_CMD -x -o $dest_dir/image $element $test_element \
| log_with_prefix "${element}/${test_element}"; then
# TODO(dtantsur): test also kernel presence once we sort out its naming
# problem (vmlinuz vs kernel)
if ! [ -f "$dest_dir/image.initramfs" ]; then
@ -109,12 +154,15 @@ for e in $DIB_ELEMENTS/*/test-elements/*; do
TESTS+=("$element/$test_element")
done
while getopts ":hl" opt; do
JOB_MAX_CNT=1
while getopts ":hlpj:" opt; do
case $opt in
h)
echo "run_functests.sh [-h] [-l] <test> <test> ..."
echo " -h : show this help"
echo " -l : list available tests"
echo " -p : run all tests in parallel"
echo " <test> : functional test to run"
echo " Special test 'all' will run all tests"
exit 0
@ -128,6 +176,10 @@ while getopts ":hl" opt; do
echo
exit 0
;;
j)
JOB_MAX_CNT=${OPTARG}
echo "Running parallel - using [${JOB_MAX_CNT}] jobs"
;;
\?)
echo "Invalid option: -$OPTARG"
exit 1
@ -136,6 +188,15 @@ while getopts ":hl" opt; do
done
shift $((OPTIND-1))
DONT_USE_TMP="no"
if [ "${JOB_MAX_CNT}" -gt 1 ]; then
# switch off using tmp dir for image building
# (The mem check using the tmp dir is currently done
# based on the available memory - and not on the free.
# See #1618124 for more details)
DONT_USE_TMP="yes"
fi
# cull the list of tests to run into TESTS_TO_RUN
TESTS_TO_RUN=()
title=""
@ -171,7 +232,36 @@ for test in "${TESTS_TO_RUN[@]}"; do
done
echo "------"
function wait_and_exit_on_failure {
local pid=$1
wait ${pid}
result=$?
if [ "${result}" -ne 0 ]; then
exit ${result}
fi
return 0
}
EXIT_CODE=0
for test in "${TESTS_TO_RUN[@]}"; do
running_jobs_cnt=$(job_cnt)
log_jc "Number of running jobs [${running_jobs_cnt}] max jobs [${JOB_MAX_CNT}]"
if [ "${running_jobs_cnt}" -ge "${JOB_MAX_CNT}" ]; then
log_jc "Waiting for job to finish"
wait_minus_n
result=$?
if [ "${result}" -ne 0 ]; then
EXIT_CODE=1
# If a job fails, do not start any new ones.
break
fi
fi
log_jc "Starting new job"
# from above; each array value is element/test_element. split it
# back up
element=${test%/*}
@ -186,7 +276,30 @@ for test in "${TESTS_TO_RUN[@]}"; do
fi
echo "Running $test ($element_type)"
run_${element_type}_element_test $test_element $element
run_${element_type}_element_test $test_element $element ${DONT_USE_TMP} &
done
echo "Tests passed!"
# Wait for the rest of the jobs
while true; do
running_jobs_cnt=$(job_cnt)
log_jc "Number of running jobs left [${running_jobs_cnt}]"
if [ "${running_jobs_cnt}" -eq 0 ]; then
break;
fi
wait_minus_n
result=$?
if [ "${result}" -ne 0 ]; then
EXIT_CODE=1
fi
done
if [ "${EXIT_CODE}" -eq 0 ]; then
echo "Tests passed!"
exit 0
else
echo "At least one test failed"
exit 1
fi