Make graphite metrics more granular

Right now, most metrics are recorded the same regardless of branch,
and some ignore the job type as well.  Since many of the metrics
can vary significantly depending on those details (mitaka jobs take
far less time than master, for example), let's separate out the
metrics more.

This change adds a .release.jobtype specifier to most of the
metrics.  The notable exception is the testenv wait time because
that is not release-specific.  Also, the image metrics do not
actually need to be split based on job type since they don't vary,
but for consistency with the other overcloud metrics I did anyway.

With this change we do not lose the ability to look at broader
aggregate numbers either.  We can still combine these newly
separated metrics back together in the Graphite UI.

Change-Id: I1df58f3a0c44708728e6ea1d705a6e8bf49ef330
This commit is contained in:
Ben Nemec 2017-02-06 19:02:02 +00:00
parent 5a00a8279b
commit 905eac0eee
5 changed files with 24 additions and 24 deletions

View File

@ -173,7 +173,7 @@ function postci(){
local exit_val=${1:-0} local exit_val=${1:-0}
set -x set -x
set +e set +e
stop_metric "tripleo.ci.total.seconds" stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.ci.total.seconds"
if [ -e $TRIPLEO_ROOT/delorean/data/repos/ ] ; then if [ -e $TRIPLEO_ROOT/delorean/data/repos/ ] ; then
# I'd like to tar up repos/current but tar'ed its about 8M it may be a # I'd like to tar up repos/current but tar'ed its about 8M it may be a
# bit much for the log server, maybe when we are building less # bit much for the log server, maybe when we are building less

View File

@ -133,9 +133,9 @@ fi
echo "INFO: Check /var/log/undercloud_install.txt for undercloud install output" echo "INFO: Check /var/log/undercloud_install.txt for undercloud install output"
echo "INFO: This file can be found in logs/undercloud.tar.xz in the directory containing console.log" echo "INFO: This file can be found in logs/undercloud.tar.xz in the directory containing console.log"
start_metric "tripleo.undercloud.install.seconds" start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.undercloud.install.seconds"
$TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --undercloud 2>&1 | ts '%Y-%m-%d %H:%M:%S.000 |' | sudo dd of=/var/log/undercloud_install.txt || (tail -n 50 /var/log/undercloud_install.txt && false) $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --undercloud 2>&1 | ts '%Y-%m-%d %H:%M:%S.000 |' | sudo dd of=/var/log/undercloud_install.txt || (tail -n 50 /var/log/undercloud_install.txt && false)
stop_metric "tripleo.undercloud.install.seconds" stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.undercloud.install.seconds"
if [ "$OVB" = 1 ]; then if [ "$OVB" = 1 ]; then
@ -203,26 +203,26 @@ if [ "$OSINFRA" = "0" ]; then
# Directing the output of this command to a file as its extreemly verbose # Directing the output of this command to a file as its extreemly verbose
echo "INFO: Check /var/log/image_build.txt for image build output" echo "INFO: Check /var/log/image_build.txt for image build output"
echo "INFO: This file can be found in logs/undercloud.tar.xz in the directory containing console.log" echo "INFO: This file can be found in logs/undercloud.tar.xz in the directory containing console.log"
start_metric "tripleo.overcloud.${TOCI_JOBTYPE}.images.seconds" start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.images.seconds"
$TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --overcloud-images 2>&1 | ts '%Y-%m-%d %H:%M:%S.000 |' | sudo dd of=/var/log/image_build.txt || (tail -n 50 /var/log/image_build.txt && false) $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --overcloud-images 2>&1 | ts '%Y-%m-%d %H:%M:%S.000 |' | sudo dd of=/var/log/image_build.txt || (tail -n 50 /var/log/image_build.txt && false)
stop_metric "tripleo.overcloud.${TOCI_JOBTYPE}.images.seconds" stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.images.seconds"
OVERCLOUD_IMAGE_MB=$(du -ms overcloud-full.qcow2 | cut -f 1) OVERCLOUD_IMAGE_MB=$(du -ms overcloud-full.qcow2 | cut -f 1)
record_metric "tripleo.overcloud.${TOCI_JOBTYPE}.image.size_mb" "$OVERCLOUD_IMAGE_MB" record_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.image.size_mb" "$OVERCLOUD_IMAGE_MB"
start_metric "tripleo.register.nodes.seconds" start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.register.nodes.seconds"
if [ $INTROSPECT == 1 ]; then if [ $INTROSPECT == 1 ]; then
export INTROSPECT_NODES=1 export INTROSPECT_NODES=1
fi fi
$TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --register-nodes $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --register-nodes
# We don't want to keep this set for further calls to tripleo.sh # We don't want to keep this set for further calls to tripleo.sh
unset INTROSPECT_NODES unset INTROSPECT_NODES
stop_metric "tripleo.register.nodes.seconds" stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.register.nodes.seconds"
if [ $INTROSPECT == 1 ] ; then if [ $INTROSPECT == 1 ] ; then
start_metric "tripleo.introspect.seconds" start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.introspect.seconds"
$TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --introspect-nodes $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --introspect-nodes
stop_metric "tripleo.introspect.seconds" stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.introspect.seconds"
fi fi
if [ $PREDICTABLE_PLACEMENT == 1 ]; then if [ $PREDICTABLE_PLACEMENT == 1 ]; then
@ -290,9 +290,9 @@ fi
if [ $OVERCLOUD == 1 ] ; then if [ $OVERCLOUD == 1 ] ; then
source ~/stackrc source ~/stackrc
start_metric "tripleo.overcloud.${TOCI_JOBTYPE}.deploy.seconds" start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.deploy.seconds"
http_proxy= $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --overcloud-deploy ${TRIPLEO_SH_ARGS:-} http_proxy= $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --overcloud-deploy ${TRIPLEO_SH_ARGS:-}
stop_metric "tripleo.overcloud.${TOCI_JOBTYPE}.deploy.seconds" stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.deploy.seconds"
# Add hosts to /etc/hosts # Add hosts to /etc/hosts
openstack stack output show overcloud HostsEntry -f value -c output_value | sudo tee -a /etc/hosts openstack stack output show overcloud HostsEntry -f value -c output_value | sudo tee -a /etc/hosts
fi fi
@ -311,9 +311,9 @@ if [ -n "${OVERCLOUD_UPDATE_ARGS:-}" ] ; then
sudo rpm -ev --nodeps openstack-tripleo-heat-templates sudo rpm -ev --nodeps openstack-tripleo-heat-templates
sudo yum -y install openstack-tripleo-heat-templates sudo yum -y install openstack-tripleo-heat-templates
start_metric "tripleo.overcloud.${TOCI_JOBTYPE}.update.seconds" start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.update.seconds"
http_proxy= $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --overcloud-update ${TRIPLEO_SH_ARGS:-} http_proxy= $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --overcloud-update ${TRIPLEO_SH_ARGS:-}
stop_metric "tripleo.overcloud.${TOCI_JOBTYPE}.update.seconds" stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.update.seconds"
fi fi
if [ "$MULTINODE" == 0 ] && [ "$OVERCLOUD" == 1 ] ; then if [ "$MULTINODE" == 0 ] && [ "$OVERCLOUD" == 1 ] ; then
@ -342,7 +342,7 @@ if [ "$MULTINODE" == 0 ] && [ "$OVERCLOUD" == 1 ] ; then
# available. heat-{api,engine} are the best candidates since due to the # available. heat-{api,engine} are the best candidates since due to the
# constraint ordering they are typically started last. We'll wait up to # constraint ordering they are typically started last. We'll wait up to
# 180s. # 180s.
start_metric "tripleo.overcloud.${TOCI_JOBTYPE}.settle.seconds" start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.settle.seconds"
timeout -k 10 240 ssh $SSH_OPTIONS heat-admin@$(nova list | grep controller-0 | awk '{print $12}' | cut -d'=' -f2) sudo crm_resource -r openstack-heat-api --wait || { timeout -k 10 240 ssh $SSH_OPTIONS heat-admin@$(nova list | grep controller-0 | awk '{print $12}' | cut -d'=' -f2) sudo crm_resource -r openstack-heat-api --wait || {
exitcode=$? exitcode=$?
echo "crm_resource for openstack-heat-api has failed!" echo "crm_resource for openstack-heat-api has failed!"
@ -353,7 +353,7 @@ if [ "$MULTINODE" == 0 ] && [ "$OVERCLOUD" == 1 ] ; then
echo "crm_resource for openstack-heat-engine has failed!" echo "crm_resource for openstack-heat-engine has failed!"
exit $exitcode exit $exitcode
} }
stop_metric "tripleo.overcloud.${TOCI_JOBTYPE}.settle.seconds" stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.settle.seconds"
fi fi
fi fi
@ -362,12 +362,12 @@ if [ -f ~/overcloudrc ]; then
fi fi
if [ $RUN_PING_TEST == 1 ] ; then if [ $RUN_PING_TEST == 1 ] ; then
start_metric "tripleo.overcloud.${TOCI_JOBTYPE}.ping_test.seconds" start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.ping_test.seconds"
OVERCLOUD_PINGTEST_OLD_HEATCLIENT=0 $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --overcloud-pingtest $OVERCLOUD_PINGTEST_ARGS OVERCLOUD_PINGTEST_OLD_HEATCLIENT=0 $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --overcloud-pingtest $OVERCLOUD_PINGTEST_ARGS
stop_metric "tripleo.overcloud.${TOCI_JOBTYPE}.ping_test.seconds" stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.ping_test.seconds"
fi fi
if [ $RUN_TEMPEST_TESTS == 1 ] ; then if [ $RUN_TEMPEST_TESTS == 1 ] ; then
start_metric "tripleo.overcloud.${TOCI_JOBTYPE}.tempest.seconds" start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.tempest.seconds"
export TEMPEST_REGEX='^(?=(.*smoke))(?!(' export TEMPEST_REGEX='^(?=(.*smoke))(?!('
export TEMPEST_REGEX="${TEMPEST_REGEX}tempest.scenario.test_volume_boot_pattern" # http://bugzilla.redhat.com/1272289 export TEMPEST_REGEX="${TEMPEST_REGEX}tempest.scenario.test_volume_boot_pattern" # http://bugzilla.redhat.com/1272289
export TEMPEST_REGEX="${TEMPEST_REGEX}|tempest.api.identity.*v3" # https://bugzilla.redhat.com/1266947 export TEMPEST_REGEX="${TEMPEST_REGEX}|tempest.api.identity.*v3" # https://bugzilla.redhat.com/1266947
@ -375,7 +375,7 @@ if [ $RUN_TEMPEST_TESTS == 1 ] ; then
export TEMPEST_REGEX="${TEMPEST_REGEX}|tempest.api.data_processing" # Sahara is not enabled by default and has problem with performance export TEMPEST_REGEX="${TEMPEST_REGEX}|tempest.api.data_processing" # Sahara is not enabled by default and has problem with performance
export TEMPEST_REGEX="${TEMPEST_REGEX}))" export TEMPEST_REGEX="${TEMPEST_REGEX}))"
bash $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --run-tempest bash $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --run-tempest
stop_metric "tripleo.overcloud.${TOCI_JOBTYPE}.tempest.seconds" stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.tempest.seconds"
fi fi
if [ $TEST_OVERCLOUD_DELETE -eq 1 ] ; then if [ $TEST_OVERCLOUD_DELETE -eq 1 ] ; then
source ~/stackrc source ~/stackrc

View File

@ -351,7 +351,7 @@ else
fi fi
source $TRIPLEO_ROOT/tripleo-ci/scripts/metrics.bash source $TRIPLEO_ROOT/tripleo-ci/scripts/metrics.bash
start_metric "tripleo.testenv.wait.seconds" start_metric "tripleo.testenv.${TOCI_JOBTYPE}.wait.seconds"
if [ -z "${TE_DATAFILE:-}" -a "$OSINFRA" = "0" ] ; then if [ -z "${TE_DATAFILE:-}" -a "$OSINFRA" = "0" ] ; then
# NOTE(pabelanger): We need gear for testenv, but this really should be # NOTE(pabelanger): We need gear for testenv, but this really should be
# handled by tox. # handled by tox.

View File

@ -11,7 +11,7 @@ export IP_DEVICE=${IP_DEVICE:-"eth0"}
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_vars.bash source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_vars.bash
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_functions.sh source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_functions.sh
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/metrics.bash source $TRIPLEO_CI_DIR/tripleo-ci/scripts/metrics.bash
start_metric "tripleo.ci.total.seconds" start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.ci.total.seconds"
mkdir -p $WORKSPACE/logs mkdir -p $WORKSPACE/logs

View File

@ -13,8 +13,8 @@ export ZUUL_PROJECT=${ZUUL_PROJECT:-""}
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_vars.bash source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_vars.bash
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_functions.sh source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_functions.sh
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/metrics.bash source $TRIPLEO_CI_DIR/tripleo-ci/scripts/metrics.bash
stop_metric "tripleo.testenv.wait.seconds" # start_metric in toci_gate_test.sh stop_metric "tripleo.testenv.${TOCI_JOBTYPE}.wait.seconds" # start_metric in toci_gate_test.sh
start_metric "tripleo.ci.total.seconds" start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.ci.total.seconds"
mkdir -p $WORKSPACE/logs mkdir -p $WORKSPACE/logs