diff --git a/.gitignore b/.gitignore index 3773aeac..9db6a18a 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,6 @@ cover/ *egg-info/ .testrepository/ .stestr/ -*.sample \ No newline at end of file +*.sample +perf/persister_perf_*/*.* +perf/jmeter.log diff --git a/perf/README.md b/perf/README.md new file mode 100644 index 00000000..2051912c --- /dev/null +++ b/perf/README.md @@ -0,0 +1,56 @@ + + +monasca-persister performance benchmarking +============= + +This tool benchmarkes the Monasca Persister performance and throughput. +It uses JMeter and Kafka plugin to initiate Kafka metric messages and +query the Dropwizard rest api to retrieve perister processing metrics. +Becasue the Monasca persister python implementation does not have +similar internal processing metric rest api availalbe, the support +for python implementation performance benchmark will be added in a +future release. + +# Install + +1. Download and install the latest Apache JMeter from +http://jmeter.apache.org/download_jmeter.cgi. +2. Add JMeter bin directory to the path, for example, +PATH=$PATH:/opt/apache-jmeter/bin. +3. Clone KafkaMeter repository: https://github.com/BrightTag/kafkameter. +4. Run Maven package and install Kafkameter jar to $JMETER_HOME/lib/ext +folder under the Apache JMeter installation home. + +# Configure + +1. Make a copy of the jmeter_test_plan.jmx file and modify the test plan +to fit your goal. The available options are: +- The number of threads(users) +- The number of loops, i.e., the number of metric messages each thread/user +will create) +- The range of the random values in the metric name and dimension values. +This controls the number of unique metric definitions the test plan will +create. + +The total number of metrics = the number of threads(users) * loop count. + +# Run test + +1. Execute persister_perf.sh, for example, +``` + ./persister_perf.sh -t jmeter_test_plan.jmx -n 1000000 -s 192.168.1.5,192.168.1.6 -p 8091 -w 10 + + -n the expected number of metric messages (equals to the number of threads(users) * loop count. + -s a comma separated list of the Monasca Persister server hosts in the cluster + -p Monasca Persister server port number + -w the time to wait for before checking the processing status from the Monasca persister next time +``` + +2. For each test run, an output folder (postfixed by the timestamp) is +created. You can monitor the log file to watch the progress of jmeter +sending messages, Monasca persister reading messages as well as +Persister flushing the metrics into the time series database. It +includes the accumulated number of metrics created, read and flushed +and snapshots of throughput in each check interval. + +3. The output file contains the summary of the performance testing result. diff --git a/perf/jmeter_test_plan.jmx b/perf/jmeter_test_plan.jmx new file mode 100644 index 00000000..23627a47 --- /dev/null +++ b/perf/jmeter_test_plan.jmx @@ -0,0 +1,170 @@ + + + + + + false + false + + + + tenant + t1 + = + + + + + + + + stoptest + + false + 80000000 + + 2 + 1 + 1507913213000 + 1507913213000 + false + + + + + + 1 + 1000000 + 1 + metric_counter + + false + + + + 10 + 1 + tenant_00 + true + ${__Random(1, 1000000)} + tenant_id + + + + 600 + 1 + metric_000 + true + ${__Random(1000001, 2000000)} + metric_name + + + + 1500 + 1 + host_0000 + true + ${__Random(3000001, 4000000)} + host_dim_value + + + + 20 + 1 + process_000 + true + ${__Random(5000000, 10000000)} + process_dim_value + + + + + + + kafka_brokers + 192.168.1.2:9092,192.168.1.16:9092 + = + + + kafka_topic + metrics + = + + + kafka_key + ${__time()} + = + + + kafka_message + {"metric":{"timestamp":${__time()},"name":"${metric_name}","dimensions":{"hostname":"${host_dim_value}","service":"monitoring", "process":"${__Random(1,22)}"},"value":${metric_counter},"value_meta":null},"meta":{"region":"RegionX","tenantId":"${tenant_id}"},"creation_time":${__time(/1000,)}} + = + + + kafka_message_serializer + org.apache.kafka.common.serialization.StringSerializer + = + + + kafka_key_serializer + org.apache.kafka.common.serialization.StringSerializer + = + + + kafka_ssl_keystore + ${PARAMETER_KAFKA_SSL_KEYSTORE} + = + + + kafka_ssl_keystore_password + ${PARAMETER_KAFKA_SSL_KEYSTORE_PASSWORD} + = + + + kafka_ssl_truststore + ${PARAMETER_KAFKA_SSL_TRUSTSTORE} + = + + + kafka_ssl_truststore_password + ${PARAMETER_KAFKA_SSL_TRUSTSTORE_PASSWORD} + = + + + kafka_use_ssl + false + = + + + kafka_compression_type + + = + + + kafka_partition + + = + + + + co.signal.kafkameter.KafkaProducerSampler + + + + + + process_dim_value + ${__javaScript(Math.floor(Math.random()*22),MYRESULT)} + = + + + + + + + + true + + + + diff --git a/perf/jmeter_test_plan_mix.jmx b/perf/jmeter_test_plan_mix.jmx new file mode 100644 index 00000000..8c6af804 --- /dev/null +++ b/perf/jmeter_test_plan_mix.jmx @@ -0,0 +1,238 @@ + + + + + + false + false + + + + tenant + t1 + = + + + + + + + + stoptest + + false + 1080 + + 4 + 1 + 1510384829000 + 1510384829000 + false + + + + + + 1 + 1000000 + 1 + metric_counter + + false + + + + 1 + + 50 + guestHostStartNum + + true + + + + true + 10400000 + + + + 1 + false + 1 + + ThroughputController.percentThroughput + 40.0 + 0.0 + + 1 mil unique metrics for admin tenants + + + + + + + kafka_brokers + 192.168.1.2:9092,192.168.1.16:9092 + = + + + kafka_topic + metrics + = + + + kafka_key + ${__time()} + = + + + kafka_message + {"metric":{"timestamp":${__time()},"name":"metric_${__Random(1,500)}","dimensions":{"hostname":"controller_${__Random(1,3)}","service":"service_${__Random(1,20)}", "process":"process_${__Random(1,33)}"},"value":${metric_counter},"value_meta":null},"meta":{"region":"RegionX","tenantId":"admin"},"creation_time":${__time(/1000,)}} + = + + + kafka_message_serializer + org.apache.kafka.common.serialization.StringSerializer + = + + + kafka_key_serializer + org.apache.kafka.common.serialization.StringSerializer + = + + + kafka_ssl_keystore + ${PARAMETER_KAFKA_SSL_KEYSTORE} + = + + + kafka_ssl_keystore_password + ${PARAMETER_KAFKA_SSL_KEYSTORE_PASSWORD} + = + + + kafka_ssl_truststore + ${PARAMETER_KAFKA_SSL_TRUSTSTORE} + = + + + kafka_ssl_truststore_password + ${PARAMETER_KAFKA_SSL_TRUSTSTORE_PASSWORD} + = + + + kafka_use_ssl + false + = + + + kafka_compression_type + + = + + + kafka_partition + + = + + + + co.signal.kafkameter.KafkaProducerSampler + + + + + 1 + true + 1 + + ThroughputController.percentThroughput + 60.0 + 0.0 + + 3 mil unique metric definitions + + + + + + + kafka_brokers + 192.168.1.2:9092,192.168.1.16:9092 + = + + + kafka_topic + metrics + = + + + kafka_key + ${__time()} + = + + + kafka_message + {"metric":{"timestamp":${__time()},"name":"metric_${__Random(501,600)}","dimensions":{"hostname":"host_${__intSum(${guestHostStartNum},${__Random(0,499)})}","service":"service_${__Random(1,2)}", "process":"process_${__Random(1,3)}"},"value":${metric_counter},"value_meta":null},"meta":{"region":"RegionX","tenantId":"tenant_${__Random(0,10)}"},"creation_time":${__time(/1000,)}} + = + + + kafka_message_serializer + org.apache.kafka.common.serialization.StringSerializer + = + + + kafka_key_serializer + org.apache.kafka.common.serialization.StringSerializer + = + + + kafka_ssl_keystore + ${PARAMETER_KAFKA_SSL_KEYSTORE} + = + + + kafka_ssl_keystore_password + ${PARAMETER_KAFKA_SSL_KEYSTORE_PASSWORD} + = + + + kafka_ssl_truststore + ${PARAMETER_KAFKA_SSL_TRUSTSTORE} + = + + + kafka_ssl_truststore_password + ${PARAMETER_KAFKA_SSL_TRUSTSTORE_PASSWORD} + = + + + kafka_use_ssl + false + = + + + kafka_compression_type + + = + + + kafka_partition + + = + + + + co.signal.kafkameter.KafkaProducerSampler + + + + + + + + true + + + + diff --git a/perf/persister_perf.sh b/perf/persister_perf.sh new file mode 100755 index 00000000..483cfccc --- /dev/null +++ b/perf/persister_perf.sh @@ -0,0 +1,164 @@ +#!/bin/bash + +# Copyright 2017 SUSE LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +PORT=8091 +CHECK_INTERVAL=30 +TIME_STAMP=$(date +%s) + +mkdir persister_perf_${TIME_STAMP} + +usage() +{ + echo "usage: persister_perf.sh [[-s servers (required)] [-p port] \ +[-t JMeter test plan file] [-n number of metrics to insert (required)] \ +[-w wait time (seconds) betweenchecking persister status] | [-h help ]]" + exit +} + +log() +{ +# echo "$1" + echo "$1" >> persister_perf_${TIME_STAMP}/persister_perf.log +} + +output() +{ +# echo "$1" + echo "$1" >> persister_perf_${TIME_STAMP}/persister_perf_output.txt +} + +get_received_metric_count_per_host() +{ + local msg_count=$(curl -m 60 -s http://$1:$PORT/metrics?pretty=true | \ +jq '[.meters | with_entries(select(.key|match("monasca.persister.pipeline.event.MetricHandler\\[metric-[0-9]*\\].events-processed-meter";"i")))[] | .count] | add') + echo "$msg_count" +} + +get_total_received_metric_count() +{ + local count=0 + for server in $SERVERS; do + local count_per_host=$(get_received_metric_count_per_host $server) + log "$(date) Persister host: $server; Received metrics: $count_per_host" + count=$((count + count_per_host)) + done + log "$(date) Total received metrics: $count" + echo "$count" +} + +get_flushed_metric_count_per_host() +{ + local msg_count=$(curl -m 60 -s http://$1:$PORT/metrics?pretty=true \ + | jq '[.meters | with_entries(select(.key|match("monasca.persister.pipeline.event.MetricHandler\\[metric-[0-9]*\\].flush-meter";"i")))[] | .count] | add') + echo "$msg_count" +} + +get_total_flushed_metric_count() +{ + local count=0 + for server in $SERVERS; do + local count_per_host=$(get_flushed_metric_count_per_host $server) + log "$(date) Persister host: $server; Flushed metrics: $count_per_host" + count=$((count + count_per_host)) + done + log "$(date) Total flushed metrics: $count" + echo "$count" +} + + +while getopts "hs:p:t:n:w:" option; do + case "${option}" in + h) usage;; + s) declare -a SERVERS=$(echo "${OPTARG}" | sed "s/,/\n/g");; + p) PORT=${OPTARG};; + t) TEST_PLAN=${OPTARG};; + n) NUM_METRICS=${OPTARG};; + w) CHECK_INTERVAL=${OPTARG};; + *) exit 1;; + :) echo "Missing option argument for -$OPTARG" >&2; exit 1;; + esac +done + +if [ ! "$SERVERS" ] || [ ! "$NUM_METRICS" ]; then + usage + exit 1 +fi + +log "starting JMeter run $TEST_PLAN" + +jmeter -n -t $TEST_PLAN -l persister_perf_${TIME_STAMP}/jmeter.jnl -e \ +-o persister_perf_${TIME_STAMP}/jmeter \ +>> persister_perf_${TIME_STAMP}/persister_perf.log 2>&1 & + +START_TIME=$(date +%s) + +received_metric_count_start=$(get_total_received_metric_count) +output "Total received metric count at start: $received_metric_count_start" + +flushed_metric_count_start=$(get_total_flushed_metric_count) +output "Total flushed metric count at start: $flushed_metric_count_start" + +received_metric_count_orig=$((received_metric_count_start)) + +flushed_metric_count_orig=$((flushed_metric_count_start)) + +target_received_metric_count=$((received_metric_count_start + NUM_METRICS)) + +target_flushed_metric_count=$((flushed_metric_count_start + NUM_METRICS)) + +sleep $CHECK_INTERVAL + +INTERVAL_END_TIME=$(date +%s) +received_metric_count=$(get_total_received_metric_count) +flushed_metric_count=$(get_total_flushed_metric_count) + +while [ "$received_metric_count" -lt "$target_received_metric_count" \ + -o "$flushed_metric_count" -lt "$target_flushed_metric_count" ] +do + INTERVAL_START_TIME=$((INTERVAL_END_TIME)) + received_metric_count_start=$((received_metric_count)) + flushed_metric_count_start=$((flushed_metric_count)) + + sleep $CHECK_INTERVAL + INTERVAL_END_TIME=$(date +%s) + received_metric_count=$(get_total_received_metric_count) + flushed_metric_count=$(get_total_flushed_metric_count) + + log "Current received metric throughput: \ + $((($received_metric_count - $received_metric_count_start) \ + / $(($INTERVAL_END_TIME - $INTERVAL_START_TIME))))" + log "Current flushed metric throughput: \ + $((($flushed_metric_count - $flushed_metric_count_start) \ + / $(($INTERVAL_END_TIME - $INTERVAL_START_TIME))))" + log "Average received metric throughput: \ + $((($received_metric_count - $received_metric_count_orig) \ + / $(($INTERVAL_END_TIME - $START_TIME))))" + log "Average flushed metric throughput: \ + $((($flushed_metric_count - $flushed_metric_count_orig) \ + / $(($INTERVAL_END_TIME - $START_TIME))))" + log "Expect $((target_flushed_metric_count - flushed_metric_count)) \ + more metrics to be flushed" +done + +END_TIME=$(date +%s) +ELAPSED=$(($END_TIME - $START_TIME)) +output "Total received metric count at end: $received_metric_count" +output "Total flushed metric count at end: $flushed_metric_count" +output "Total elapsed time: $ELAPSED" +output "Average received metrics/second: \ +$((($received_metric_count - $received_metric_count_orig) / $ELAPSED))" +output "Average persisted metrics/second: \ +$((($flushed_metric_count - $flushed_metric_count_orig) / $ELAPSED))"