From 169f2c3ca31e2e576c4f76e7adbaef00135bfedb Mon Sep 17 00:00:00 2001 From: James Gu Date: Wed, 18 Oct 2017 15:24:27 -0700 Subject: [PATCH] Add performance testing in Monasca persister Leverages the JMeter and KafkaMeter to inject into persister metric messages with permutation of random metric names and dimensions. Monitors the metrics flushed to the database in all persister instances and calculates the throughput. The number of metrics and the number of unique metic definitions are configurable in the test plan. The script currently supports only the Java implementation of the Monasca persister. Monasca python does not provide the same internal metric api as the Dropwizard in the Java implemation. Future work is required for the Python implemation. Change-Id: Id8e6a5b62aa434d9943c7eee4be8991536b1c45f Depends-On: https://review.openstack.org/543399 story: 2001292 task: 5841 --- .gitignore | 4 +- perf/README.md | 56 ++++++++ perf/jmeter_test_plan.jmx | 170 ++++++++++++++++++++++++ perf/jmeter_test_plan_mix.jmx | 238 ++++++++++++++++++++++++++++++++++ perf/persister_perf.sh | 164 +++++++++++++++++++++++ 5 files changed, 631 insertions(+), 1 deletion(-) create mode 100644 perf/README.md create mode 100644 perf/jmeter_test_plan.jmx create mode 100644 perf/jmeter_test_plan_mix.jmx create mode 100755 perf/persister_perf.sh diff --git a/.gitignore b/.gitignore index 3773aeac..9db6a18a 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,6 @@ cover/ *egg-info/ .testrepository/ .stestr/ -*.sample \ No newline at end of file +*.sample +perf/persister_perf_*/*.* +perf/jmeter.log diff --git a/perf/README.md b/perf/README.md new file mode 100644 index 00000000..2051912c --- /dev/null +++ b/perf/README.md @@ -0,0 +1,56 @@ + + +monasca-persister performance benchmarking +============= + +This tool benchmarkes the Monasca Persister performance and throughput. +It uses JMeter and Kafka plugin to initiate Kafka metric messages and +query the Dropwizard rest api to retrieve perister processing metrics. +Becasue the Monasca persister python implementation does not have +similar internal processing metric rest api availalbe, the support +for python implementation performance benchmark will be added in a +future release. + +# Install + +1. Download and install the latest Apache JMeter from +http://jmeter.apache.org/download_jmeter.cgi. +2. Add JMeter bin directory to the path, for example, +PATH=$PATH:/opt/apache-jmeter/bin. +3. Clone KafkaMeter repository: https://github.com/BrightTag/kafkameter. +4. Run Maven package and install Kafkameter jar to $JMETER_HOME/lib/ext +folder under the Apache JMeter installation home. + +# Configure + +1. Make a copy of the jmeter_test_plan.jmx file and modify the test plan +to fit your goal. The available options are: +- The number of threads(users) +- The number of loops, i.e., the number of metric messages each thread/user +will create) +- The range of the random values in the metric name and dimension values. +This controls the number of unique metric definitions the test plan will +create. + +The total number of metrics = the number of threads(users) * loop count. + +# Run test + +1. Execute persister_perf.sh, for example, +``` + ./persister_perf.sh -t jmeter_test_plan.jmx -n 1000000 -s 192.168.1.5,192.168.1.6 -p 8091 -w 10 + + -n the expected number of metric messages (equals to the number of threads(users) * loop count. + -s a comma separated list of the Monasca Persister server hosts in the cluster + -p Monasca Persister server port number + -w the time to wait for before checking the processing status from the Monasca persister next time +``` + +2. For each test run, an output folder (postfixed by the timestamp) is +created. You can monitor the log file to watch the progress of jmeter +sending messages, Monasca persister reading messages as well as +Persister flushing the metrics into the time series database. It +includes the accumulated number of metrics created, read and flushed +and snapshots of throughput in each check interval. + +3. The output file contains the summary of the performance testing result. diff --git a/perf/jmeter_test_plan.jmx b/perf/jmeter_test_plan.jmx new file mode 100644 index 00000000..23627a47 --- /dev/null +++ b/perf/jmeter_test_plan.jmx @@ -0,0 +1,170 @@ + + + + + + false + false + + + + tenant + t1 + = + + + + + + + + stoptest + + false + 80000000 + + 2 + 1 + 1507913213000 + 1507913213000 + false + + + + + + 1 + 1000000 + 1 + metric_counter + + false + + + + 10 + 1 + tenant_00 + true + ${__Random(1, 1000000)} + tenant_id + + + + 600 + 1 + metric_000 + true + ${__Random(1000001, 2000000)} + metric_name + + + + 1500 + 1 + host_0000 + true + ${__Random(3000001, 4000000)} + host_dim_value + + + + 20 + 1 + process_000 + true + ${__Random(5000000, 10000000)} + process_dim_value + + + + + + + kafka_brokers + 192.168.1.2:9092,192.168.1.16:9092 + = + + + kafka_topic + metrics + = + + + kafka_key + ${__time()} + = + + + kafka_message + {"metric":{"timestamp":${__time()},"name":"${metric_name}","dimensions":{"hostname":"${host_dim_value}","service":"monitoring", "process":"${__Random(1,22)}"},"value":${metric_counter},"value_meta":null},"meta":{"region":"RegionX","tenantId":"${tenant_id}"},"creation_time":${__time(/1000,)}} + = + + + kafka_message_serializer + org.apache.kafka.common.serialization.StringSerializer + = + + + kafka_key_serializer + org.apache.kafka.common.serialization.StringSerializer + = + + + kafka_ssl_keystore + ${PARAMETER_KAFKA_SSL_KEYSTORE} + = + + + kafka_ssl_keystore_password + ${PARAMETER_KAFKA_SSL_KEYSTORE_PASSWORD} + = + + + kafka_ssl_truststore + ${PARAMETER_KAFKA_SSL_TRUSTSTORE} + = + + + kafka_ssl_truststore_password + ${PARAMETER_KAFKA_SSL_TRUSTSTORE_PASSWORD} + = + + + kafka_use_ssl + false + = + + + kafka_compression_type + + = + + + kafka_partition + + = + + + + co.signal.kafkameter.KafkaProducerSampler + + + + + + process_dim_value + ${__javaScript(Math.floor(Math.random()*22),MYRESULT)} + = + + + + + + + + true + + + + diff --git a/perf/jmeter_test_plan_mix.jmx b/perf/jmeter_test_plan_mix.jmx new file mode 100644 index 00000000..8c6af804 --- /dev/null +++ b/perf/jmeter_test_plan_mix.jmx @@ -0,0 +1,238 @@ + + + + + + false + false + + + + tenant + t1 + = + + + + + + + + stoptest + + false + 1080 + + 4 + 1 + 1510384829000 + 1510384829000 + false + + + + + + 1 + 1000000 + 1 + metric_counter + + false + + + + 1 + + 50 + guestHostStartNum + + true + + + + true + 10400000 + + + + 1 + false + 1 + + ThroughputController.percentThroughput + 40.0 + 0.0 + + 1 mil unique metrics for admin tenants + + + + + + + kafka_brokers + 192.168.1.2:9092,192.168.1.16:9092 + = + + + kafka_topic + metrics + = + + + kafka_key + ${__time()} + = + + + kafka_message + {"metric":{"timestamp":${__time()},"name":"metric_${__Random(1,500)}","dimensions":{"hostname":"controller_${__Random(1,3)}","service":"service_${__Random(1,20)}", "process":"process_${__Random(1,33)}"},"value":${metric_counter},"value_meta":null},"meta":{"region":"RegionX","tenantId":"admin"},"creation_time":${__time(/1000,)}} + = + + + kafka_message_serializer + org.apache.kafka.common.serialization.StringSerializer + = + + + kafka_key_serializer + org.apache.kafka.common.serialization.StringSerializer + = + + + kafka_ssl_keystore + ${PARAMETER_KAFKA_SSL_KEYSTORE} + = + + + kafka_ssl_keystore_password + ${PARAMETER_KAFKA_SSL_KEYSTORE_PASSWORD} + = + + + kafka_ssl_truststore + ${PARAMETER_KAFKA_SSL_TRUSTSTORE} + = + + + kafka_ssl_truststore_password + ${PARAMETER_KAFKA_SSL_TRUSTSTORE_PASSWORD} + = + + + kafka_use_ssl + false + = + + + kafka_compression_type + + = + + + kafka_partition + + = + + + + co.signal.kafkameter.KafkaProducerSampler + + + + + 1 + true + 1 + + ThroughputController.percentThroughput + 60.0 + 0.0 + + 3 mil unique metric definitions + + + + + + + kafka_brokers + 192.168.1.2:9092,192.168.1.16:9092 + = + + + kafka_topic + metrics + = + + + kafka_key + ${__time()} + = + + + kafka_message + {"metric":{"timestamp":${__time()},"name":"metric_${__Random(501,600)}","dimensions":{"hostname":"host_${__intSum(${guestHostStartNum},${__Random(0,499)})}","service":"service_${__Random(1,2)}", "process":"process_${__Random(1,3)}"},"value":${metric_counter},"value_meta":null},"meta":{"region":"RegionX","tenantId":"tenant_${__Random(0,10)}"},"creation_time":${__time(/1000,)}} + = + + + kafka_message_serializer + org.apache.kafka.common.serialization.StringSerializer + = + + + kafka_key_serializer + org.apache.kafka.common.serialization.StringSerializer + = + + + kafka_ssl_keystore + ${PARAMETER_KAFKA_SSL_KEYSTORE} + = + + + kafka_ssl_keystore_password + ${PARAMETER_KAFKA_SSL_KEYSTORE_PASSWORD} + = + + + kafka_ssl_truststore + ${PARAMETER_KAFKA_SSL_TRUSTSTORE} + = + + + kafka_ssl_truststore_password + ${PARAMETER_KAFKA_SSL_TRUSTSTORE_PASSWORD} + = + + + kafka_use_ssl + false + = + + + kafka_compression_type + + = + + + kafka_partition + + = + + + + co.signal.kafkameter.KafkaProducerSampler + + + + + + + + true + + + + diff --git a/perf/persister_perf.sh b/perf/persister_perf.sh new file mode 100755 index 00000000..483cfccc --- /dev/null +++ b/perf/persister_perf.sh @@ -0,0 +1,164 @@ +#!/bin/bash + +# Copyright 2017 SUSE LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +PORT=8091 +CHECK_INTERVAL=30 +TIME_STAMP=$(date +%s) + +mkdir persister_perf_${TIME_STAMP} + +usage() +{ + echo "usage: persister_perf.sh [[-s servers (required)] [-p port] \ +[-t JMeter test plan file] [-n number of metrics to insert (required)] \ +[-w wait time (seconds) betweenchecking persister status] | [-h help ]]" + exit +} + +log() +{ +# echo "$1" + echo "$1" >> persister_perf_${TIME_STAMP}/persister_perf.log +} + +output() +{ +# echo "$1" + echo "$1" >> persister_perf_${TIME_STAMP}/persister_perf_output.txt +} + +get_received_metric_count_per_host() +{ + local msg_count=$(curl -m 60 -s http://$1:$PORT/metrics?pretty=true | \ +jq '[.meters | with_entries(select(.key|match("monasca.persister.pipeline.event.MetricHandler\\[metric-[0-9]*\\].events-processed-meter";"i")))[] | .count] | add') + echo "$msg_count" +} + +get_total_received_metric_count() +{ + local count=0 + for server in $SERVERS; do + local count_per_host=$(get_received_metric_count_per_host $server) + log "$(date) Persister host: $server; Received metrics: $count_per_host" + count=$((count + count_per_host)) + done + log "$(date) Total received metrics: $count" + echo "$count" +} + +get_flushed_metric_count_per_host() +{ + local msg_count=$(curl -m 60 -s http://$1:$PORT/metrics?pretty=true \ + | jq '[.meters | with_entries(select(.key|match("monasca.persister.pipeline.event.MetricHandler\\[metric-[0-9]*\\].flush-meter";"i")))[] | .count] | add') + echo "$msg_count" +} + +get_total_flushed_metric_count() +{ + local count=0 + for server in $SERVERS; do + local count_per_host=$(get_flushed_metric_count_per_host $server) + log "$(date) Persister host: $server; Flushed metrics: $count_per_host" + count=$((count + count_per_host)) + done + log "$(date) Total flushed metrics: $count" + echo "$count" +} + + +while getopts "hs:p:t:n:w:" option; do + case "${option}" in + h) usage;; + s) declare -a SERVERS=$(echo "${OPTARG}" | sed "s/,/\n/g");; + p) PORT=${OPTARG};; + t) TEST_PLAN=${OPTARG};; + n) NUM_METRICS=${OPTARG};; + w) CHECK_INTERVAL=${OPTARG};; + *) exit 1;; + :) echo "Missing option argument for -$OPTARG" >&2; exit 1;; + esac +done + +if [ ! "$SERVERS" ] || [ ! "$NUM_METRICS" ]; then + usage + exit 1 +fi + +log "starting JMeter run $TEST_PLAN" + +jmeter -n -t $TEST_PLAN -l persister_perf_${TIME_STAMP}/jmeter.jnl -e \ +-o persister_perf_${TIME_STAMP}/jmeter \ +>> persister_perf_${TIME_STAMP}/persister_perf.log 2>&1 & + +START_TIME=$(date +%s) + +received_metric_count_start=$(get_total_received_metric_count) +output "Total received metric count at start: $received_metric_count_start" + +flushed_metric_count_start=$(get_total_flushed_metric_count) +output "Total flushed metric count at start: $flushed_metric_count_start" + +received_metric_count_orig=$((received_metric_count_start)) + +flushed_metric_count_orig=$((flushed_metric_count_start)) + +target_received_metric_count=$((received_metric_count_start + NUM_METRICS)) + +target_flushed_metric_count=$((flushed_metric_count_start + NUM_METRICS)) + +sleep $CHECK_INTERVAL + +INTERVAL_END_TIME=$(date +%s) +received_metric_count=$(get_total_received_metric_count) +flushed_metric_count=$(get_total_flushed_metric_count) + +while [ "$received_metric_count" -lt "$target_received_metric_count" \ + -o "$flushed_metric_count" -lt "$target_flushed_metric_count" ] +do + INTERVAL_START_TIME=$((INTERVAL_END_TIME)) + received_metric_count_start=$((received_metric_count)) + flushed_metric_count_start=$((flushed_metric_count)) + + sleep $CHECK_INTERVAL + INTERVAL_END_TIME=$(date +%s) + received_metric_count=$(get_total_received_metric_count) + flushed_metric_count=$(get_total_flushed_metric_count) + + log "Current received metric throughput: \ + $((($received_metric_count - $received_metric_count_start) \ + / $(($INTERVAL_END_TIME - $INTERVAL_START_TIME))))" + log "Current flushed metric throughput: \ + $((($flushed_metric_count - $flushed_metric_count_start) \ + / $(($INTERVAL_END_TIME - $INTERVAL_START_TIME))))" + log "Average received metric throughput: \ + $((($received_metric_count - $received_metric_count_orig) \ + / $(($INTERVAL_END_TIME - $START_TIME))))" + log "Average flushed metric throughput: \ + $((($flushed_metric_count - $flushed_metric_count_orig) \ + / $(($INTERVAL_END_TIME - $START_TIME))))" + log "Expect $((target_flushed_metric_count - flushed_metric_count)) \ + more metrics to be flushed" +done + +END_TIME=$(date +%s) +ELAPSED=$(($END_TIME - $START_TIME)) +output "Total received metric count at end: $received_metric_count" +output "Total flushed metric count at end: $flushed_metric_count" +output "Total elapsed time: $ELAPSED" +output "Average received metrics/second: \ +$((($received_metric_count - $received_metric_count_orig) / $ELAPSED))" +output "Average persisted metrics/second: \ +$((($flushed_metric_count - $flushed_metric_count_orig) / $ELAPSED))"