Make graphite metrics more granular

Right now, most metrics are recorded the same regardless of branch,
and some ignore the job type as well.  Since many of the metrics
can vary significantly depending on those details (mitaka jobs take
far less time than master, for example), let's separate out the
metrics more.

This change adds a .release.jobtype specifier to most of the
metrics.  The notable exception is the testenv wait time because
that is not release-specific.  Also, the image metrics do not
actually need to be split based on job type since they don't vary,
but for consistency with the other overcloud metrics I did anyway.

With this change we do not lose the ability to look at broader
aggregate numbers either.  We can still combine these newly
separated metrics back together in the Graphite UI.

Change-Id: I1df58f3a0c44708728e6ea1d705a6e8bf49ef330
This commit is contained in:
Ben Nemec 2017-02-06 19:02:02 +00:00
parent 5a00a8279b
commit 905eac0eee
5 changed files with 24 additions and 24 deletions

View File

@ -173,7 +173,7 @@ function postci(){
local exit_val=${1:-0}
set -x
set +e
stop_metric "tripleo.ci.total.seconds"
stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.ci.total.seconds"
if [ -e $TRIPLEO_ROOT/delorean/data/repos/ ] ; then
# I'd like to tar up repos/current but tar'ed its about 8M it may be a
# bit much for the log server, maybe when we are building less

View File

@ -133,9 +133,9 @@ fi
echo "INFO: Check /var/log/undercloud_install.txt for undercloud install output"
echo "INFO: This file can be found in logs/undercloud.tar.xz in the directory containing console.log"
start_metric "tripleo.undercloud.install.seconds"
start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.undercloud.install.seconds"
$TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --undercloud 2>&1 | ts '%Y-%m-%d %H:%M:%S.000 |' | sudo dd of=/var/log/undercloud_install.txt || (tail -n 50 /var/log/undercloud_install.txt && false)
stop_metric "tripleo.undercloud.install.seconds"
stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.undercloud.install.seconds"
if [ "$OVB" = 1 ]; then
@ -203,26 +203,26 @@ if [ "$OSINFRA" = "0" ]; then
# Directing the output of this command to a file as its extreemly verbose
echo "INFO: Check /var/log/image_build.txt for image build output"
echo "INFO: This file can be found in logs/undercloud.tar.xz in the directory containing console.log"
start_metric "tripleo.overcloud.${TOCI_JOBTYPE}.images.seconds"
start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.images.seconds"
$TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --overcloud-images 2>&1 | ts '%Y-%m-%d %H:%M:%S.000 |' | sudo dd of=/var/log/image_build.txt || (tail -n 50 /var/log/image_build.txt && false)
stop_metric "tripleo.overcloud.${TOCI_JOBTYPE}.images.seconds"
stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.images.seconds"
OVERCLOUD_IMAGE_MB=$(du -ms overcloud-full.qcow2 | cut -f 1)
record_metric "tripleo.overcloud.${TOCI_JOBTYPE}.image.size_mb" "$OVERCLOUD_IMAGE_MB"
record_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.image.size_mb" "$OVERCLOUD_IMAGE_MB"
start_metric "tripleo.register.nodes.seconds"
start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.register.nodes.seconds"
if [ $INTROSPECT == 1 ]; then
export INTROSPECT_NODES=1
fi
$TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --register-nodes
# We don't want to keep this set for further calls to tripleo.sh
unset INTROSPECT_NODES
stop_metric "tripleo.register.nodes.seconds"
stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.register.nodes.seconds"
if [ $INTROSPECT == 1 ] ; then
start_metric "tripleo.introspect.seconds"
start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.introspect.seconds"
$TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --introspect-nodes
stop_metric "tripleo.introspect.seconds"
stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.introspect.seconds"
fi
if [ $PREDICTABLE_PLACEMENT == 1 ]; then
@ -290,9 +290,9 @@ fi
if [ $OVERCLOUD == 1 ] ; then
source ~/stackrc
start_metric "tripleo.overcloud.${TOCI_JOBTYPE}.deploy.seconds"
start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.deploy.seconds"
http_proxy= $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --overcloud-deploy ${TRIPLEO_SH_ARGS:-}
stop_metric "tripleo.overcloud.${TOCI_JOBTYPE}.deploy.seconds"
stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.deploy.seconds"
# Add hosts to /etc/hosts
openstack stack output show overcloud HostsEntry -f value -c output_value | sudo tee -a /etc/hosts
fi
@ -311,9 +311,9 @@ if [ -n "${OVERCLOUD_UPDATE_ARGS:-}" ] ; then
sudo rpm -ev --nodeps openstack-tripleo-heat-templates
sudo yum -y install openstack-tripleo-heat-templates
start_metric "tripleo.overcloud.${TOCI_JOBTYPE}.update.seconds"
start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.update.seconds"
http_proxy= $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --overcloud-update ${TRIPLEO_SH_ARGS:-}
stop_metric "tripleo.overcloud.${TOCI_JOBTYPE}.update.seconds"
stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.update.seconds"
fi
if [ "$MULTINODE" == 0 ] && [ "$OVERCLOUD" == 1 ] ; then
@ -342,7 +342,7 @@ if [ "$MULTINODE" == 0 ] && [ "$OVERCLOUD" == 1 ] ; then
# available. heat-{api,engine} are the best candidates since due to the
# constraint ordering they are typically started last. We'll wait up to
# 180s.
start_metric "tripleo.overcloud.${TOCI_JOBTYPE}.settle.seconds"
start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.settle.seconds"
timeout -k 10 240 ssh $SSH_OPTIONS heat-admin@$(nova list | grep controller-0 | awk '{print $12}' | cut -d'=' -f2) sudo crm_resource -r openstack-heat-api --wait || {
exitcode=$?
echo "crm_resource for openstack-heat-api has failed!"
@ -353,7 +353,7 @@ if [ "$MULTINODE" == 0 ] && [ "$OVERCLOUD" == 1 ] ; then
echo "crm_resource for openstack-heat-engine has failed!"
exit $exitcode
}
stop_metric "tripleo.overcloud.${TOCI_JOBTYPE}.settle.seconds"
stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.settle.seconds"
fi
fi
@ -362,12 +362,12 @@ if [ -f ~/overcloudrc ]; then
fi
if [ $RUN_PING_TEST == 1 ] ; then
start_metric "tripleo.overcloud.${TOCI_JOBTYPE}.ping_test.seconds"
start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.ping_test.seconds"
OVERCLOUD_PINGTEST_OLD_HEATCLIENT=0 $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --overcloud-pingtest $OVERCLOUD_PINGTEST_ARGS
stop_metric "tripleo.overcloud.${TOCI_JOBTYPE}.ping_test.seconds"
stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.ping_test.seconds"
fi
if [ $RUN_TEMPEST_TESTS == 1 ] ; then
start_metric "tripleo.overcloud.${TOCI_JOBTYPE}.tempest.seconds"
start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.tempest.seconds"
export TEMPEST_REGEX='^(?=(.*smoke))(?!('
export TEMPEST_REGEX="${TEMPEST_REGEX}tempest.scenario.test_volume_boot_pattern" # http://bugzilla.redhat.com/1272289
export TEMPEST_REGEX="${TEMPEST_REGEX}|tempest.api.identity.*v3" # https://bugzilla.redhat.com/1266947
@ -375,7 +375,7 @@ if [ $RUN_TEMPEST_TESTS == 1 ] ; then
export TEMPEST_REGEX="${TEMPEST_REGEX}|tempest.api.data_processing" # Sahara is not enabled by default and has problem with performance
export TEMPEST_REGEX="${TEMPEST_REGEX}))"
bash $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --run-tempest
stop_metric "tripleo.overcloud.${TOCI_JOBTYPE}.tempest.seconds"
stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.tempest.seconds"
fi
if [ $TEST_OVERCLOUD_DELETE -eq 1 ] ; then
source ~/stackrc

View File

@ -351,7 +351,7 @@ else
fi
source $TRIPLEO_ROOT/tripleo-ci/scripts/metrics.bash
start_metric "tripleo.testenv.wait.seconds"
start_metric "tripleo.testenv.${TOCI_JOBTYPE}.wait.seconds"
if [ -z "${TE_DATAFILE:-}" -a "$OSINFRA" = "0" ] ; then
# NOTE(pabelanger): We need gear for testenv, but this really should be
# handled by tox.

View File

@ -11,7 +11,7 @@ export IP_DEVICE=${IP_DEVICE:-"eth0"}
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_vars.bash
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_functions.sh
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/metrics.bash
start_metric "tripleo.ci.total.seconds"
start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.ci.total.seconds"
mkdir -p $WORKSPACE/logs

View File

@ -13,8 +13,8 @@ export ZUUL_PROJECT=${ZUUL_PROJECT:-""}
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_vars.bash
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_functions.sh
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/metrics.bash
stop_metric "tripleo.testenv.wait.seconds" # start_metric in toci_gate_test.sh
start_metric "tripleo.ci.total.seconds"
stop_metric "tripleo.testenv.${TOCI_JOBTYPE}.wait.seconds" # start_metric in toci_gate_test.sh
start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.ci.total.seconds"
mkdir -p $WORKSPACE/logs