Merge "Add processmonitor"

This commit is contained in:
Jenkins 2016-11-08 08:29:09 +00:00 committed by Gerrit Code Review
commit 0f003bc26d
7 changed files with 781 additions and 0 deletions

View File

@ -0,0 +1,2 @@
01,/usr/sbin/libvirtd,sudo service libvirt-bin start,sudo service libvirt-bin start,,,,
02,/usr/bin/python /usr/local/bin/masakari-instancemonitor,sudo service masakari-instancemonitor start,sudo service masakari-instancemonitor start,,,,

View File

@ -0,0 +1,13 @@
PROCESS_CHECK_INTERVAL=5
PROCESS_REBOOT_RETRY=3
REBOOT_INTERVAL=5
MASAKARI_API_SEND_TIMEOUT=10
MASAKARI_API_SEND_RETRY=12
MASAKARI_API_SEND_DELAY=10
LOG_LEVEL="debug"
DOMAIN="Default"
PROJECT="demo"
ADMIN_USER="admin"
ADMIN_PASS="admin"
AUTH_URL="http://127.0.0.1:5000/"
REGION="RegionOne"

View File

@ -0,0 +1,196 @@
# Copyright(c) 2016 Nippon Telegraph and Telephone Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
LOGTAG=`basename $0`
HOST_NAME=`hostname`
LOGDIR="/var/log/masakari"
LOGFILE="${LOGDIR}/masakari-processmonitor.log"
# Debug log output function
# Argument
# $1 : Message
log_debug () {
if [ ! -e ${LOGDIR} ]; then
mkdir -p ${LOGDIR}
fi
if [ "${LOG_LEVEL}" == "debug" ]; then
log_output "$1"
fi
}
# Info log output function
# Argument
# $1 : Message
log_info () {
if [ ! -e ${LOGDIR} ]; then
mkdir -p ${LOGDIR}
fi
log_output "$1"
}
# This function outputs the log
# Argument
# $1 : Message
log_output () {
echo "`date +'%Y-%m-%d %H:%M:%S'` ${HOST_NAME} ${LOGTAG}: $1" >> $LOGFILE
}
# Some sanity checks on the check target processing list.
# Format of the proc.list(Each columns must be separated by a comma.)
# The first column : Process ID (two digits of leading zeros) : cannot be omitted.
# The second column : The keyword when check exists in processing list(empty is NG.). : cannot be omitted
# The third column : The initial startup command (it's required to include word of "start". )
# The fourth column : Rebooting command (it's required to include word of "start".)
# The fifth column : Shell file name for special processing at the initial startup(before the startup)
# The sixth column : Shell file name for special processing at the initial startup(after the startup)
# The seventh column : Shell file name for special processing at the initial restart(before the startup)
# The eighth column : Shell file name for special processing at the initial restart(after the startup)
#
# When abonormal condition is detected about proc.list, exits by "exit 2".
column_num=8
check_proc_file_common (){
# Check the existence and validity of the proc.list.
if [ ! -e $PROC_LIST ]; then
log_info "$PROC_LIST(proc_list) is not exists."
exit 2
fi
if [ ! -s $PROC_LIST ]; then
log_info "$PROC_LIST(proc_list) is empty file."
exit 2
fi
if [ ! -r "$PROC_LIST" ]; then
log_info "$PROC_LIST(proc_list) is not readable."
exit 2
fi
OLD_IFS=$IFS
IFS=$'\n'
proc_list=(`cat $PROC_LIST`)
IFS=$OLD_IFS
LINE_NO=1
for line in "${proc_list[@]}"
do
num=`echo "$line" | tr -dc ',' | wc -c`
# The number of required column are incomplete.
check_num=`expr $column_num - 1`
if [ $num -ne $check_num ]; then
log_info "$PROC_LIST format error (column_num) line $LINE_NO"
exit 2
fi
PROC_ID=`echo $line | cut -d"," -f 1`
if [ ! -z "$PROC_ID" ]; then
expr "$PROC_ID" + 1 >/dev/null 2>&1
# If PROC ID is not a numeric,
if [ 1 -lt $? ]; then
log_info "$PROC_LIST format error (PROC_ID) not number. line $LINE_NO"
exit 2
fi
else
log_info "$PROC_LIST format error (PROC_ID) empty. line $LINE_NO"
exit 2
fi
KEY_WORD=`echo $line | cut -d"," -f 2`
if [ -z "$KEY_WORD" ]; then
log_info "$PROC_LIST format error (KEY_WORD) empty. line $LINE_NO"
exit 2
fi
START_CMD=`echo $line | cut -d"," -f 3`
if [ ! -z "$START_CMD" ]; then
check=`echo $START_CMD | grep -c start`
# If words of "start" are not included in initial startup processing.,
if [ $check -ne 1 ]; then
log_info "$PROC_LIST format error (START_CMD) line $LINE_NO"
exit 2
fi
fi
RESTART_CMD=`echo $line | cut -d"," -f 4`
if [ ! -z "$RESTART_CMD" ]; then
check=`echo $RESTART_CMD | grep -c start`
# If words of "start" are not included in restart processing,
if [ $check -ne 1 ]; then
log_info "$PROC_LIST format error (RESTART_CMD) line $LINE_NO"
exit 2
fi
fi
# Check the existence and validity of special processing shell file to be executed before and after start processing.
START_SP_CMDFILE_BEFORE=`echo $line | cut -d"," -f 5`
if [ ! -z "$START_SP_CMDFILE_BEFORE" ]; then
# The starting (before executing) special processing shell file does not exist.
if [ ! -e $START_SP_CMDFILE_BEFORE ]; then
log_info "$PROC_LIST format error (START_SP_CMDFILE_BEFORE) not exists. line $LINE_NO"
exit 2
fi
if [ ! -x $START_SP_CMDFILE_BEFORE ]; then
log_info "$PROC_LIST format error (START_SP_CMDFILE_BEFORE) not exeutable. line $LINE_NO"
exit 2
fi
fi
START_SP_CMDFILE_AFTER=`echo $line | cut -d"," -f 6`
if [ ! -z "$START_SP_CMDFILE_AFTER" ]; then
# The restarting (before executing) special processing shell file does not exist.
if [ ! -e $START_SP_CMDFILE_AFTER ]; then
log_info "$PROC_LIST format error (START_SP_CMDFILE_AFTER) not exists. line $LINE_NO"
exit 2
fi
if [ ! -x $START_SP_CMDFILE_AFTER ]; then
log_info "$PROC_LIST format error (START_SP_CMDFILE_AFTER) not exeutable. line $LINE_NO"
exit 2
fi
fi
# Check the existence and validity of special processing shell file to be executed before and after restart processing.
RESTART_SP_CMDFILE_BEFORE=`echo $line | cut -d"," -f 7`
if [ ! -z "$RESTART_SP_CMDFILE_BEFORE" ]; then
# The restarting (before executing) special processing shell file does not exist.
if [ ! -e $RESTART_SP_CMDFILE_BEFORE ]; then
log_info "$PROC_LIST format error (RESTART_SP_CMDFILE_BEFORE) not exists. line $LINE_NO"
exit 2
fi
if [ ! -x $RESTART_SP_CMDFILE_BEFORE ]; then
log_info "$PROC_LIST format error (RESTART_SP_CMDFILE_BEFORE) not exeutable. line $LINE_NO"
exit 2
fi
fi
RESTART_SP_CMDFILE_AFTER=`echo $line | cut -d"," -f 8`
if [ ! -z "$RESTART_SP_CMDFILE_AFTER" ]; then
# The restarting (before executing) special processing shell file does not exist.
if [ ! -e $RESTART_SP_CMDFILE_AFTER ]; then
log_info "$PROC_LIST format error (RESTART_SP_CMDFILE_AFTER) not exists. line $LINE_NO"
exit 2
fi
if [ ! -x $RESTART_SP_CMDFILE_AFTER ]; then
log_info "$PROC_LIST format error (RESTART_SP_CMDFILE_AFTER) not exeutable. line $LINE_NO"
exit 2
fi
fi
LINE_NO=`expr $LINE_NO + 1`
done
}

View File

@ -0,0 +1,24 @@
#!/bin/bash
# Copyright(c) 2016 Nippon Telegraph and Telephone Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Delete the child process as the required steps to restart of nova_compute process.
KILL_PS_LIST=(`ps -ef | grep nova-compute | grep -v grep | awk '{ print $2; }'`)
for PS_ID in ${KILL_PS_LIST[@]}
do
sudo kill -9 ${PS_ID}
done

View File

@ -0,0 +1,53 @@
#!/bin/bash
# Copyright(c) 2016 Nippon Telegraph and Telephone Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Define constants
SCRIPT_DIR=$(cd $(dirname $0);pwd)
SCRIPT_COMMON_SH="$SCRIPT_DIR/common.sh"
TMP_DIR="/var/tmp"
PROC_LIST=/etc/masakari/proc.list
BAD_CODE_LIST_FILE="$TMP_DIR/badproc.list"
# Common processing (check of proc.list)
. $SCRIPT_COMMON_SH
check_proc_file_common
# Get the process list.
ps_result=`ps -ef`
# Initialize abnormal condition list
cat /dev/null > ${BAD_CODE_LIST_FILE}
# Process check main processing
while read line
do
PROC_NO=`echo $line | cut -d"," -f 1`
PROC_NAME=`echo $line | cut -d"," -f 2`
PROC_CHECK=`echo $ps_result |grep -c "${PROC_NAME}"`
# If process was not detect, register ID in the abnormality process.
if [ ${PROC_CHECK} -eq 0 ]; then
log_info "down process id_no : ${PROC_NO}"
echo ${PROC_NO} >> ${BAD_CODE_LIST_FILE}
fi
done < ${PROC_LIST}
# If failing process ID was detected, decide state as abnormal termination(exit code:1).
if [ -s ${BAD_CODE_LIST_FILE} ]; then
exit 1
fi
exit 0

View File

@ -0,0 +1,493 @@
#!/bin/bash
# Copyright(c) 2016 Nippon Telegraph and Telephone Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Define constants.
BASE_NAME=`basename $0`
TMP_DIR="/var/tmp"
TMP_CRM_MON_FILE="$TMP_DIR/crm_mon.tmp"
STATUS_FILE="$TMP_DIR/node_status.tmp"
TMP_CRMADM_FILE="$TMP_DIR/crmadmin.tmp"
NOTICE_OUTPUT="$TMP_DIR/${BASE_NAME}_resp.out"
SCRIPT_DIR=$(cd $(dirname $0);pwd)
SCRIPT_CHECK_PROCESS="$SCRIPT_DIR/process_status_checker.sh"
SCRIPT_COMMON_SH="$SCRIPT_DIR/common.sh"
DOWN_PROCESS_LIST="$TMP_DIR/badproc.list"
MASAKARI_API_SEND_PROGRAM=curl
MASAKARI_API_SEND_FAIL_FLG="off"
ALREADY_SEND_ID_LIST=()
LOGTAG=`basename $0`
P_HOST=`uname -n`
# Define the default setting.
DEFAULT_PROCESS_CHECK_INTERVAL=5
DEFAULT_PROCESS_REBOOT_RETRY=3
DEFAULT_REBOOT_INTERVAL=10
DEFAULT_MASAKARI_API_SEND_TIMEOUT=10
DEFAULT_MASAKARI_API_SEND_RETRY=12
DEFAULT_MASAKARI_API_SEND_DELAY=10
# This function locks a file
# Argument:
# $1 : File name
file_lock () {
exec 9>>$1
flock -x 9
}
# This function unlocks a file
file_unlock () {
exec 9>&-
}
# This function reads the configuration file and setting value.
# If the value is omitted, set the default value.
# If invalid value is set, return "1".
# Note) The default value for each item are as follows.
# PROCESS_CHECK_INTERVAL (defualt : 60)
# PROCESS_REBOOT_RETRY (default : 10)
# REBOOT_INTERVAL (default : 3)
# MASAKARI_API_SEND_TIMEOUT (defualt : 10)
# MASAKARI_API_SEND_RETRY (default : 3)
# MASAKARI_API_SEND_DELAY (default : 1)
#
# Return value:
# 0 : Setting completion
# 1 : Reading failure of the configuration or invalid setting value
# 2 : Omission of the required item
set_conf_value () {
# Initialize setting
unset PROCESS_CHECK_INTERVAL
unset PROCESS_REBOOT_RETRY
unset REBOOT_INTERVAL
unset MASAKARI_API_SEND_TIMEOUT
unset MASAKARI_API_SEND_RETRY
unset MASAKARI_API_SEND_DELAY
unset DOMAIN
unset PROJECT
unset ADMIN_USER
unset ADMIN_PASS
unset AUTH_URL
unset REGION
# Read configuration file
source $SCRIPT_CONF_FILE > /dev/null 2>&1
if [ $? -ne 0 ]; then
log_info "config file read error. [$SCRIPT_CONF_FILE]"
return 1
fi
# Empty string is permitted. If there is no key itself, consider it as an error.
# If the PROCESS_CHECK_INTERVAL is omitted, set the default value.
# If invalid is set, return 1.
expect_empty=`echo -n $PROCESS_CHECK_INTERVAL | sed 's/[0-9]//g'`
if [ "x" = "x${PROCESS_CHECK_INTERVAL}" ]; then
PROCESS_CHECK_INTERVAL=$DEFAULT_PROCESS_CHECK_INTERVAL
elif [ "x" != "x${expect_empty}" ]; then
log_info "config file parameter error. [$SCRIPT_CONF_FILE:PROCESS_CHECK_INTERVAL]"
return 1
fi
log_debug "config file parameter : PROCESS_CHECK_INTERVAL=$PROCESS_CHECK_INTERVAL"
# If the PROCESS_REBOOT_RETRY is omitted, set the default value.
# If invalid is set, return 1.
expect_empty=`echo -n $PROCESS_REBOOT_RETRY | sed 's/[0-9]//g'`
if [ "x" = "x${PROCESS_REBOOT_RETRY}" ]; then
PROCESS_REBOOT_RETRY=$DEFAULT_PROCESS_REBOOT_RETRY
elif [ "x" != "x${expect_empty}" ]; then
log_info "config file parameter error. [$SCRIPT_CONF_FILE:PROCESS_REBOOT_RETRY]"
return 1
fi
log_debug "config file parameter : PROCESS_REBOOT_RETRY=$PROCESS_REBOOT_RETRY"
# If the REBOOT_INTERVAL is omitted, set the default value.
# If invalid is set, return 1.
expect_empty=`echo -n $REBOOT_INTERVAL | sed 's/[0-9]//g'`
if [ "x" = "x${REBOOT_INTERVAL}" ]; then
REBOOT_INTERVAL=$DEFAULT_REBOOT_INTERVAL
elif [ "x" != "x${expect_empty}" ]; then
log_info "config file parameter error. [$SCRIPT_CONF_FILE:REBOOT_INTERVAL]"
return 1
fi
log_debug "config file parameter : REBOOT_INTERVAL=$REBOOT_INTERVAL"
# If the MASAKARI_API_SEND_TIMEOUT is omitted, set the default value.
# If invalid is set, return 1.
expect_empty=`echo -n $MASAKARI_API_SEND_TIMEOUT | sed 's/[0-9]//g'`
if [ "x" = "x${MASAKARI_API_SEND_TIMEOUT}" ]; then
MASAKARI_API_SEND_TIMEOUT=$DEFAULT_MASAKARI_API_SEND_TIMEOUT
elif [ "x" != "x${expect_empty}" ]; then
log_info "config file parameter error. [$SCRIPT_CONF_FILE:MASAKARI_API_SEND_TIMEOUT]"
return 1
fi
log_debug "config file parameter : MASAKARI_API_SEND_TIMEOUT=$MASAKARI_API_SEND_TIMEOUT"
# If the MASAKARI_API_SEND_RETRY is omitted, set the default value.
# If invalid is set, return 1.
expect_empty=`echo -n $MASAKARI_API_SEND_RETRY | sed 's/[0-9]//g'`
if [ "x" = "x${MASAKARI_API_SEND_RETRY}" ]; then
MASAKARI_API_SEND_RETRY=$DEFAULT_MASAKARI_API_SEND_RETRY
elif [ "x" != "x${expect_empty}" ]; then
log_info "config file parameter error. [$SCRIPT_CONF_FILE:MASAKARI_API_SEND_RETRY]"
return 1
fi
log_debug "config file parameter : MASAKARI_API_SEND_RETRY=$MASAKARI_API_SEND_RETRY"
# If the MASAKARI_API_SEND_DELAY is omitted, set the default value.
# If invalid is set, return 1.
expect_empty=`echo -n $MASAKARI_API_SEND_DELAY | sed 's/[0-9]//g'`
if [ "x" = "x${MASAKARI_API_SEND_DELAY}" ]; then
MASAKARI_API_SEND_DELAY=$DEFAULT_MASAKARI_API_SEND_DELAY
elif [ "x" != "x${expect_empty}" ]; then
log_info "config file parameter error. [$SCRIPT_CONF_FILE:MASAKARI_API_SEND_DELAY]"
return 1
fi
log_debug "config file parameter : MASAKARI_API_SEND_DELAY=$MASAKARI_API_SEND_DELAY"
# If the DOMAIN is omitted, return 1.
if [ "x" = "x${DOMAIN}" ]; then
log_info "config file parameter error. [$DOMAIN:DOMAIN]"
return 1
else
log_debug "config file parameter : DOMAIN=$DOMAIN"
fi
# If the PROJECT is omitted, return 1.
if [ "x" = "x${PROJECT}" ]; then
log_info "config file parameter error. [$DOMAIN:PROJECT]"
return 1
else
log_debug "config file parameterREGIONCT=$PROJECT"
fi
# If the ADMIN_USER is omitted, return 1.
if [ "x" = "x${ADMIN_USER}" ]; then
log_info "config file parameter error. [$ADMIN_USER:ADMIN_USER]"
return 1
else
log_debug "config file parameter : ADMIN_USER=$ADMIN_USER"
fi
# If the ADMIN_PASS is omitted, return 1.
if [ "x" = "x${ADMIN_PASS}" ]; then
log_info "config file parameter error. [$ADMIN_PASS:ADMIN_PASS]"
return 1
else
log_debug "config file parameter : ADMIN_PASS=$ADMIN_PASS"
fi
# If the AUTH_URL is omitted, return 1.
if [ "x" = "x${AUTH_URL}" ]; then
log_info "config file parameter error. [$AUTH_URL:AUTH_URL]"
return 1
else
log_debug "config file parameter : AUTH_URL=$AUTH_URL"
fi
# If the REGION is omitted, return 1.
if [ "x" = "x${REGION}" ]; then
log_info "config file parameter error. [$REGION:REGION]"
return 1
else
log_debug "config file parameter : REGION=$REGION"
fi
return 0
}
# Initial startup command execution method:
# This method does not execute same command as startup command that executed once.
init_boot() {
log_debug "init_boot start"
CMD_LIST=()
for line in "${proc_list[@]}"
do
ALREADY_FLG="off"
CMD=`echo ${line} | cut -d"," -f 3`
SPECIAL_BEFORE=`echo $line | cut -d"," -f 5`
SPECIAL_AFTER=`echo $line | cut -d"," -f 6`
# If there is no startup command, can proceed to the next command.
if [ -z "$CMD" ]; then
continue
fi
# Check whether already is executed.
for CHECK_CMD in "${CMD_LIST[@]}"
do
if [ "$CHECK_CMD" = "$CMD" ]; then
ALREADY_FLG="on"
break
fi
done
# Execute special processing before the initial startup.
if [ ! -z "$SPECIAL_BEFORE" ]; then
$SPECIAL_BEFORE
fi
# If not be executed, execute start command.
if [ "$ALREADY_FLG" = "off" ]; then
OLD_IFS=$IFS
IFS=';'
set -- $CMD
CMD_SPLIT_LIST=("$@")
IFS=$OLD_IFS
for SPLIT_CMD in "${CMD_SPLIT_LIST[@]}"
do
$SPLIT_CMD > /dev/null 2>&1
done
CMD_LIST=("$CMD_LIST" "$CMD")
fi
# Execute special processing after the initial startup.
if [ ! -z "$SPECIAL_AFTER" ]; then
$SPECIAL_AFTER
fi
done
log_debug "init_boot end"
}
# This function creates data that is notified to the masakari api.
# It is called from the child process.
#
make_notice_data () {
TIME=`date -u +'%Y-%m-%d %H:%M:%S'`
PAYLOAD="{\"event\": \"STOPPED\", \"process_name\": \"${PROCESS_NAME}\"}"
}
# This function notifies to the masakari api.
# It is called masakari_cli post_event method.
send_notification () {
TYPE="PROCESS"
TARGET="post_event"
AUTH_INFO="--os-domain-name ${DOMAIN} --os-project-name ${PROJECT} --os-region-name ${REGION} --os-auth-url ${AUTH_URL} --os-username ${ADMIN_USER} --os-password ${ADMIN_PASS}"
log_info "info : Send a notification."
log_info "info : openstack ${AUTH_INFO} notification create ${TYPE} ${P_HOST} \"${TIME}\" \"${PAYLOAD}\""
RESP=`openstack ${AUTH_INFO} notification create ${TYPE} ${P_HOST} "${TIME}" "${PAYLOAD}"`
result=$?
if [ $result -eq 1 ]; then
log_info "info : Failed to send a notification. [exit-code: $result]"
log_info "info : $RESP"
MASAKARI_API_SEND_FAIL_FLG="on"
else
log_info "info : Succeeded in sending a notification."
log_info "info : $RESP"
fi
return
}
# Attempt to restart the failer process.
# If failure to number of retries, notify to the masakari api.
down_process_reboot(){
ALREADY_REBOOT_CMD_LIST=()
while read line
do
ALREADY_FLG="off"
# No processing is executed about process id included in the send list.
for already_id in "${ALREADY_SEND_ID_LIST[@]}"
do
if [ "$line" = "$already_id" ]; then
ALREADY_FLG="on"
break
fi
done
if [ "$ALREADY_FLG" = "on" ]; then
continue
fi
for proc in "${proc_list[@]}"
do
PROC_ID=`echo $proc | cut -d"," -f 1`
if [ "$line" = "$PROC_ID" ] ; then
CMD=`echo $proc | cut -d"," -f 4`
PROCESS_NAME=`echo $proc | cut -d"," -f 2`
SPECIAL_BEFORE=`echo $proc | cut -d"," -f 7`
SPECIAL_AFTER=`echo $proc | cut -d"," -f 8`
break
fi
done
if [ ! -z "$SPECIAL_BEFORE" ]; then
$SPECIAL_BEFORE
fi
# If there is not restart command, can proceed to the next command.
if [ -z "$CMD" ]; then
continue
fi
RESULT_FLG=1
# Decomposes multiple processing be joined by ";" and execute them. (restart execution part)
OLD_IFS=$IFS
IFS=';'
set -- $CMD
CMD_SPLIT_LIST=("$@")
IFS=$OLD_IFS
for SPLIT_CMD in "${CMD_SPLIT_LIST[@]}"
do
ALREADY_FLG="off"
# Check whether already is executed.
for CHECK_CMD in "${ALREADY_REBOOT_CMD_LIST[@]}"
do
if [ "$CHECK_CMD" = "$SPLIT_CMD" ]; then
ALREADY_FLG="on"
break
fi
done
# If is already executed, skip.
if [ "$ALREADY_FLG" = "on" ]; then
continue
fi
log_debug "reboot cmd:$SPLIT_CMD"
$SPLIT_CMD > /dev/null 2>&1
if [ $? -ne 0 ]; then
RESULT_FLG=0
break
else
ALREADY_REBOOT_CMD_LIST=("$ALREADY_REBOOT_CMD_LIST" "$SPLIT_CMD")
fi
done
# If fail to restart, executes retry restart.
if [ $RESULT_FLG -ne 1 ]; then
result=0
for retry in `seq $PROCESS_REBOOT_RETRY`
do
sleep $REBOOT_INTERVAL
# Retry the restart processing.
RESULT_FLG=1
for SPLIT_CMD in "${CMD_SPLIT_LIST[@]}"
do
ALREADY_FLG="off"
# Check whether already is executed.
for CHECK_CMD in "${ALREADY_REBOOT_CMD_LIST[@]}"
do
if [ "$CHECK_CMD" = "$SPLIT_CMD" ]; then
ALREADY_FLG="on"
break
fi
done
# If is already executed, skip.
if [ "$ALREADY_FLG" = "on" ]; then
continue
fi
log_debug "reboot cmd:$SPLIT_CMD"
$SPLIT_CMD > /dev/null 2>&1
if [ $? -ne 0 ]; then
RESULT_FLG=0
break
else
ALREADY_REBOOT_CMD_LIST=("$ALREADY_REBOOT_CMD_LIST" "$SPLIT_CMD")
fi
done
if [ $RESULT_FLG -eq 1 ]; then
break
elif [ $retry -eq $PROCESS_REBOOT_RETRY ]; then
# If number of retries is exceeded, notify to the masakari api.
make_notice_data
if [ $result -eq 0 ]&&
[ "$MASAKARI_API_SEND_FAIL_FLG" = "off" ]; then
send_notification
fi
# Add the sent list.
ALREADY_SEND_ID_LIST=("${ALREADY_SEND_ID_LIST[@]}" "${line}")
fi
done
fi
# Special processes after restart.
if [ ! -z "$SPECIAL_AFTER" ]; then
$SPECIAL_AFTER
fi
done < $DOWN_PROCESS_LIST
}
# Argument check
if [ $# -ne 2 ]; then
echo "Usage: $0 <configuration file path> <proc.list file path>"
exit 1
else
SCRIPT_CONF_FILE=$1
PROC_LIST=$2
fi
# Initial processing (check proc.list and read conf file)
. $SCRIPT_COMMON_SH
log_debug "processmonitor start!!"
check_proc_file_common
set_conf_value
if [ $? -ne 0 ]; then
exit 1
fi
if [ -e $NOTICE_OUTPUT ]; then
sudo rm -rf $NOTICE_OUTPUT
fi
# Initial startup
init_boot
while true
do
# Recheck and reload of the proc.list.
check_proc_file_common
# If invalid value is set to configuration file, set default value.
set_conf_value
if [ $? -ne 0 ]; then
exit 1
fi
# Execute process check processing.
${SCRIPT_CHECK_PROCESS}
RESULT_CODE=$?
# If the return code is 2, because can't continue functionally, stop.
if [ $RESULT_CODE -eq 2 ]; then
log_debug "process_status_checker down!"
exit 1
fi
# If the failing process is detected by shell check, retry restart.
if [ $RESULT_CODE -ne 0 ]; then
down_process_reboot
fi
sleep ${PROCESS_CHECK_INTERVAL}
done