Fix a couple bugs in the stack management script

There are two use cases for a stack restart,
one where you want to take a node down for maintentance,
and two where you really want to stop the whole monasca
stack.  monasca-thresh is a cluster-wide process and can
be stopped from any node in the cluster.  But stopping it
when taking a single node out of service isn't appropriate.

Also update the lag checks for the new/correct way to
get kafka consumer lag.

Change-Id: I333b791c903a45e8a0fc101ade229991a03da2cc
This commit is contained in:
Brad Klein 2016-12-09 07:18:42 -07:00
parent 43937f00ea
commit 0c64cfad13
1 changed files with 27 additions and 15 deletions

View File

@ -3,6 +3,7 @@
MIRROR_FILE="/etc/monasca/monasca-persister-mirror.yml"
STORM_FILE="/opt/storm/current/conf/storm.yaml"
INFLUXDB_FILE="/etc/opt/influxdb/influxdb.conf"
INCLUDE_THRESH="include_thresh_flag"
#
# Get the list of monasca services in the order they should be
@ -12,16 +13,17 @@ INFLUXDB_FILE="/etc/opt/influxdb/influxdb.conf"
#
get_up_list() {
if [ -e $INFLUXDB_FILE ]
then
echo "influxdb"
fi
echo "zookeeper kafka storm-supervisor"
echo "zookeeper kafka storm-supervisor storm-nimbus storm-ui"
if grep nimbus.seeds $STORM_FILE | grep -e $(hostname) -e localhost > /dev/null
if [ "$1" = "$INCLUDE_THRESH" ]
then
echo "storm-nimbus storm-ui monasca-thresh"
echo "monasca-thresh"
fi
if [ -e $MIRROR_FILE ]
@ -45,12 +47,12 @@ get_down_list() {
echo "monasca-persister-mirror"
fi
if grep nimbus.host $STORM_FILE | grep -e $(hostname) -e localhost > /dev/null
if [ "$1" = "$INCLUDE_THRESH" ]
then
echo "monasca-thresh storm-ui storm-nimbus"
echo "monasca-thresh"
fi
echo "storm-supervisor kafka zookeeper"
echo "storm-ui storm-nimbus storm-supervisor kafka zookeeper"
if [ -e $INFLUXDB_FILE ]
then
@ -59,14 +61,14 @@ get_down_list() {
}
status() {
for x in $(get_up_list)
for x in $(get_up_list $INCLUDE_THRESH)
do
service $x status
done
}
start() {
for x in $(get_up_list)
for x in $(get_up_list $1)
do
STATUS=$(is_service_running $x)
#
@ -102,7 +104,7 @@ is_service_running() {
}
stop() {
for x in $(get_down_list)
for x in $(get_down_list $1)
do
service $x stop
#
@ -134,12 +136,11 @@ tail_metrics() {
lag() {
#
# Print the consumer lag -- ignore java log warnings
# Print the consumer lag
#
/opt/kafka/bin/kafka-run-class.sh kafka.tools.ConsumerOffsetChecker \
--zkconnect localhost:2181 \
--topic metrics --group $1 2>&1 \
| grep -v SLF4J
/opt/kafka/bin/kafka-run-class.sh kafka.admin.ConsumerGroupCommand \
--zookeeper localhost:2181 \
--group $1 --describe 2>&1
}
case "$1" in
@ -149,14 +150,25 @@ case "$1" in
start)
start
;;
start-cluster)
start $INCLUDE_THRESH
;;
stop)
stop
;;
stop-cluster)
stop $INCLUDE_THRESH
;;
restart)
stop
sleep 2
start
;;
restart-cluster)
stop $INCLUDE_THRESH
sleep 2
start $INCLUDE_THRESH
;;
tail-logs)
tail_logs
;;
@ -170,6 +182,6 @@ case "$1" in
lag '2_metrics'
;;
*)
echo "Usage: "$1" {status|start|stop|restart|tail-logs|tail-metrics|local-lag|mirror-lag}"
echo "Usage: "$1" {status|start|start-cluster|stop|stop-cluster|restart|restart-cluster|tail-logs|tail-metrics|local-lag|mirror-lag}"
exit 1
esac