Fix a couple bugs in the stack management script

There are two use cases for a stack restart,
one where you want to take a node down for maintentance,
and two where you really want to stop the whole monasca
stack.  monasca-thresh is a cluster-wide process and can
be stopped from any node in the cluster.  But stopping it
when taking a single node out of service isn't appropriate.

Also update the lag checks for the new/correct way to
get kafka consumer lag.

Change-Id: I333b791c903a45e8a0fc101ade229991a03da2cc
This commit is contained in:
Brad Klein 2016-12-09 07:18:42 -07:00
parent 43937f00ea
commit 0c64cfad13
1 changed files with 27 additions and 15 deletions

View File

@ -3,6 +3,7 @@
MIRROR_FILE="/etc/monasca/monasca-persister-mirror.yml" MIRROR_FILE="/etc/monasca/monasca-persister-mirror.yml"
STORM_FILE="/opt/storm/current/conf/storm.yaml" STORM_FILE="/opt/storm/current/conf/storm.yaml"
INFLUXDB_FILE="/etc/opt/influxdb/influxdb.conf" INFLUXDB_FILE="/etc/opt/influxdb/influxdb.conf"
INCLUDE_THRESH="include_thresh_flag"
# #
# Get the list of monasca services in the order they should be # Get the list of monasca services in the order they should be
@ -12,16 +13,17 @@ INFLUXDB_FILE="/etc/opt/influxdb/influxdb.conf"
# #
get_up_list() { get_up_list() {
if [ -e $INFLUXDB_FILE ] if [ -e $INFLUXDB_FILE ]
then then
echo "influxdb" echo "influxdb"
fi fi
echo "zookeeper kafka storm-supervisor" echo "zookeeper kafka storm-supervisor storm-nimbus storm-ui"
if grep nimbus.seeds $STORM_FILE | grep -e $(hostname) -e localhost > /dev/null if [ "$1" = "$INCLUDE_THRESH" ]
then then
echo "storm-nimbus storm-ui monasca-thresh" echo "monasca-thresh"
fi fi
if [ -e $MIRROR_FILE ] if [ -e $MIRROR_FILE ]
@ -45,12 +47,12 @@ get_down_list() {
echo "monasca-persister-mirror" echo "monasca-persister-mirror"
fi fi
if grep nimbus.host $STORM_FILE | grep -e $(hostname) -e localhost > /dev/null if [ "$1" = "$INCLUDE_THRESH" ]
then then
echo "monasca-thresh storm-ui storm-nimbus" echo "monasca-thresh"
fi fi
echo "storm-supervisor kafka zookeeper" echo "storm-ui storm-nimbus storm-supervisor kafka zookeeper"
if [ -e $INFLUXDB_FILE ] if [ -e $INFLUXDB_FILE ]
then then
@ -59,14 +61,14 @@ get_down_list() {
} }
status() { status() {
for x in $(get_up_list) for x in $(get_up_list $INCLUDE_THRESH)
do do
service $x status service $x status
done done
} }
start() { start() {
for x in $(get_up_list) for x in $(get_up_list $1)
do do
STATUS=$(is_service_running $x) STATUS=$(is_service_running $x)
# #
@ -102,7 +104,7 @@ is_service_running() {
} }
stop() { stop() {
for x in $(get_down_list) for x in $(get_down_list $1)
do do
service $x stop service $x stop
# #
@ -134,12 +136,11 @@ tail_metrics() {
lag() { lag() {
# #
# Print the consumer lag -- ignore java log warnings # Print the consumer lag
# #
/opt/kafka/bin/kafka-run-class.sh kafka.tools.ConsumerOffsetChecker \ /opt/kafka/bin/kafka-run-class.sh kafka.admin.ConsumerGroupCommand \
--zkconnect localhost:2181 \ --zookeeper localhost:2181 \
--topic metrics --group $1 2>&1 \ --group $1 --describe 2>&1
| grep -v SLF4J
} }
case "$1" in case "$1" in
@ -149,14 +150,25 @@ case "$1" in
start) start)
start start
;; ;;
start-cluster)
start $INCLUDE_THRESH
;;
stop) stop)
stop stop
;; ;;
stop-cluster)
stop $INCLUDE_THRESH
;;
restart) restart)
stop stop
sleep 2 sleep 2
start start
;; ;;
restart-cluster)
stop $INCLUDE_THRESH
sleep 2
start $INCLUDE_THRESH
;;
tail-logs) tail-logs)
tail_logs tail_logs
;; ;;
@ -170,6 +182,6 @@ case "$1" in
lag '2_metrics' lag '2_metrics'
;; ;;
*) *)
echo "Usage: "$1" {status|start|stop|restart|tail-logs|tail-metrics|local-lag|mirror-lag}" echo "Usage: "$1" {status|start|start-cluster|stop|stop-cluster|restart|restart-cluster|tail-logs|tail-metrics|local-lag|mirror-lag}"
exit 1 exit 1
esac esac