Fix a couple bugs in the stack management script
There are two use cases for a stack restart, one where you want to take a node down for maintentance, and two where you really want to stop the whole monasca stack. monasca-thresh is a cluster-wide process and can be stopped from any node in the cluster. But stopping it when taking a single node out of service isn't appropriate. Also update the lag checks for the new/correct way to get kafka consumer lag. Change-Id: I333b791c903a45e8a0fc101ade229991a03da2cc
This commit is contained in:
parent
43937f00ea
commit
0c64cfad13
|
@ -3,6 +3,7 @@
|
||||||
MIRROR_FILE="/etc/monasca/monasca-persister-mirror.yml"
|
MIRROR_FILE="/etc/monasca/monasca-persister-mirror.yml"
|
||||||
STORM_FILE="/opt/storm/current/conf/storm.yaml"
|
STORM_FILE="/opt/storm/current/conf/storm.yaml"
|
||||||
INFLUXDB_FILE="/etc/opt/influxdb/influxdb.conf"
|
INFLUXDB_FILE="/etc/opt/influxdb/influxdb.conf"
|
||||||
|
INCLUDE_THRESH="include_thresh_flag"
|
||||||
|
|
||||||
#
|
#
|
||||||
# Get the list of monasca services in the order they should be
|
# Get the list of monasca services in the order they should be
|
||||||
|
@ -12,16 +13,17 @@ INFLUXDB_FILE="/etc/opt/influxdb/influxdb.conf"
|
||||||
#
|
#
|
||||||
get_up_list() {
|
get_up_list() {
|
||||||
|
|
||||||
|
|
||||||
if [ -e $INFLUXDB_FILE ]
|
if [ -e $INFLUXDB_FILE ]
|
||||||
then
|
then
|
||||||
echo "influxdb"
|
echo "influxdb"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "zookeeper kafka storm-supervisor"
|
echo "zookeeper kafka storm-supervisor storm-nimbus storm-ui"
|
||||||
|
|
||||||
if grep nimbus.seeds $STORM_FILE | grep -e $(hostname) -e localhost > /dev/null
|
if [ "$1" = "$INCLUDE_THRESH" ]
|
||||||
then
|
then
|
||||||
echo "storm-nimbus storm-ui monasca-thresh"
|
echo "monasca-thresh"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -e $MIRROR_FILE ]
|
if [ -e $MIRROR_FILE ]
|
||||||
|
@ -45,12 +47,12 @@ get_down_list() {
|
||||||
echo "monasca-persister-mirror"
|
echo "monasca-persister-mirror"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if grep nimbus.host $STORM_FILE | grep -e $(hostname) -e localhost > /dev/null
|
if [ "$1" = "$INCLUDE_THRESH" ]
|
||||||
then
|
then
|
||||||
echo "monasca-thresh storm-ui storm-nimbus"
|
echo "monasca-thresh"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "storm-supervisor kafka zookeeper"
|
echo "storm-ui storm-nimbus storm-supervisor kafka zookeeper"
|
||||||
|
|
||||||
if [ -e $INFLUXDB_FILE ]
|
if [ -e $INFLUXDB_FILE ]
|
||||||
then
|
then
|
||||||
|
@ -59,14 +61,14 @@ get_down_list() {
|
||||||
}
|
}
|
||||||
|
|
||||||
status() {
|
status() {
|
||||||
for x in $(get_up_list)
|
for x in $(get_up_list $INCLUDE_THRESH)
|
||||||
do
|
do
|
||||||
service $x status
|
service $x status
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
start() {
|
start() {
|
||||||
for x in $(get_up_list)
|
for x in $(get_up_list $1)
|
||||||
do
|
do
|
||||||
STATUS=$(is_service_running $x)
|
STATUS=$(is_service_running $x)
|
||||||
#
|
#
|
||||||
|
@ -102,7 +104,7 @@ is_service_running() {
|
||||||
}
|
}
|
||||||
|
|
||||||
stop() {
|
stop() {
|
||||||
for x in $(get_down_list)
|
for x in $(get_down_list $1)
|
||||||
do
|
do
|
||||||
service $x stop
|
service $x stop
|
||||||
#
|
#
|
||||||
|
@ -134,12 +136,11 @@ tail_metrics() {
|
||||||
|
|
||||||
lag() {
|
lag() {
|
||||||
#
|
#
|
||||||
# Print the consumer lag -- ignore java log warnings
|
# Print the consumer lag
|
||||||
#
|
#
|
||||||
/opt/kafka/bin/kafka-run-class.sh kafka.tools.ConsumerOffsetChecker \
|
/opt/kafka/bin/kafka-run-class.sh kafka.admin.ConsumerGroupCommand \
|
||||||
--zkconnect localhost:2181 \
|
--zookeeper localhost:2181 \
|
||||||
--topic metrics --group $1 2>&1 \
|
--group $1 --describe 2>&1
|
||||||
| grep -v SLF4J
|
|
||||||
}
|
}
|
||||||
|
|
||||||
case "$1" in
|
case "$1" in
|
||||||
|
@ -149,14 +150,25 @@ case "$1" in
|
||||||
start)
|
start)
|
||||||
start
|
start
|
||||||
;;
|
;;
|
||||||
|
start-cluster)
|
||||||
|
start $INCLUDE_THRESH
|
||||||
|
;;
|
||||||
stop)
|
stop)
|
||||||
stop
|
stop
|
||||||
;;
|
;;
|
||||||
|
stop-cluster)
|
||||||
|
stop $INCLUDE_THRESH
|
||||||
|
;;
|
||||||
restart)
|
restart)
|
||||||
stop
|
stop
|
||||||
sleep 2
|
sleep 2
|
||||||
start
|
start
|
||||||
;;
|
;;
|
||||||
|
restart-cluster)
|
||||||
|
stop $INCLUDE_THRESH
|
||||||
|
sleep 2
|
||||||
|
start $INCLUDE_THRESH
|
||||||
|
;;
|
||||||
tail-logs)
|
tail-logs)
|
||||||
tail_logs
|
tail_logs
|
||||||
;;
|
;;
|
||||||
|
@ -170,6 +182,6 @@ case "$1" in
|
||||||
lag '2_metrics'
|
lag '2_metrics'
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo "Usage: "$1" {status|start|stop|restart|tail-logs|tail-metrics|local-lag|mirror-lag}"
|
echo "Usage: "$1" {status|start|start-cluster|stop|stop-cluster|restart|restart-cluster|tail-logs|tail-metrics|local-lag|mirror-lag}"
|
||||||
exit 1
|
exit 1
|
||||||
esac
|
esac
|
||||||
|
|
Loading…
Reference in New Issue