#!/bin/bash # Copyright(c) 2016 Nippon Telegraph and Telephone Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Define constants. BASE_NAME=`basename $0` TMP_DIR="/var/tmp" TMP_CRM_MON_FILE="$TMP_DIR/crm_mon.tmp" STATUS_FILE="$TMP_DIR/node_status.tmp" TMP_CRMADM_FILE="$TMP_DIR/crmadmin.tmp" NOTICE_OUTPUT="$TMP_DIR/${BASE_NAME}_resp.out" SCRIPT_DIR=$(cd $(dirname $0);pwd) SCRIPT_CHECK_PROCESS="$SCRIPT_DIR/process_status_checker.sh" SCRIPT_COMMON_SH="$SCRIPT_DIR/common.sh" DOWN_PROCESS_LIST="$TMP_DIR/badproc.list" MASAKARI_API_SEND_PROGRAM=curl MASAKARI_API_SEND_FAIL_FLG="off" ALREADY_SEND_ID_LIST=() LOGTAG=`basename $0` P_HOST=`uname -n` # Define the default setting. DEFAULT_PROCESS_CHECK_INTERVAL=5 DEFAULT_PROCESS_REBOOT_RETRY=3 DEFAULT_REBOOT_INTERVAL=10 DEFAULT_MASAKARI_API_SEND_TIMEOUT=10 DEFAULT_MASAKARI_API_SEND_RETRY=12 DEFAULT_MASAKARI_API_SEND_DELAY=10 # This function locks a file # Argument: # $1 : File name file_lock () { exec 9>>$1 flock -x 9 } # This function unlocks a file file_unlock () { exec 9>&- } # This function reads the configuration file and setting value. # If the value is omitted, set the default value. # If invalid value is set, return "1". # Note) The default value for each item are as follows. # PROCESS_CHECK_INTERVAL (defualt : 60) # PROCESS_REBOOT_RETRY (default : 10) # REBOOT_INTERVAL (default : 3) # MASAKARI_API_SEND_TIMEOUT (defualt : 10) # MASAKARI_API_SEND_RETRY (default : 3) # MASAKARI_API_SEND_DELAY (default : 1) # # Return value: # 0 : Setting completion # 1 : Reading failure of the configuration or invalid setting value # 2 : Omission of the required item set_conf_value () { # Initialize setting unset PROCESS_CHECK_INTERVAL unset PROCESS_REBOOT_RETRY unset REBOOT_INTERVAL unset MASAKARI_API_SEND_TIMEOUT unset MASAKARI_API_SEND_RETRY unset MASAKARI_API_SEND_DELAY unset DOMAIN unset PROJECT unset ADMIN_USER unset ADMIN_PASS unset AUTH_URL unset REGION # Read configuration file source $SCRIPT_CONF_FILE > /dev/null 2>&1 if [ $? -ne 0 ]; then log_info "config file read error. [$SCRIPT_CONF_FILE]" return 1 fi # Empty string is permitted. If there is no key itself, consider it as an error. # If the PROCESS_CHECK_INTERVAL is omitted, set the default value. # If invalid is set, return 1. expect_empty=`echo -n $PROCESS_CHECK_INTERVAL | sed 's/[0-9]//g'` if [ "x" = "x${PROCESS_CHECK_INTERVAL}" ]; then PROCESS_CHECK_INTERVAL=$DEFAULT_PROCESS_CHECK_INTERVAL elif [ "x" != "x${expect_empty}" ]; then log_info "config file parameter error. [$SCRIPT_CONF_FILE:PROCESS_CHECK_INTERVAL]" return 1 fi log_debug "config file parameter : PROCESS_CHECK_INTERVAL=$PROCESS_CHECK_INTERVAL" # If the PROCESS_REBOOT_RETRY is omitted, set the default value. # If invalid is set, return 1. expect_empty=`echo -n $PROCESS_REBOOT_RETRY | sed 's/[0-9]//g'` if [ "x" = "x${PROCESS_REBOOT_RETRY}" ]; then PROCESS_REBOOT_RETRY=$DEFAULT_PROCESS_REBOOT_RETRY elif [ "x" != "x${expect_empty}" ]; then log_info "config file parameter error. [$SCRIPT_CONF_FILE:PROCESS_REBOOT_RETRY]" return 1 fi log_debug "config file parameter : PROCESS_REBOOT_RETRY=$PROCESS_REBOOT_RETRY" # If the REBOOT_INTERVAL is omitted, set the default value. # If invalid is set, return 1. expect_empty=`echo -n $REBOOT_INTERVAL | sed 's/[0-9]//g'` if [ "x" = "x${REBOOT_INTERVAL}" ]; then REBOOT_INTERVAL=$DEFAULT_REBOOT_INTERVAL elif [ "x" != "x${expect_empty}" ]; then log_info "config file parameter error. [$SCRIPT_CONF_FILE:REBOOT_INTERVAL]" return 1 fi log_debug "config file parameter : REBOOT_INTERVAL=$REBOOT_INTERVAL" # If the MASAKARI_API_SEND_TIMEOUT is omitted, set the default value. # If invalid is set, return 1. expect_empty=`echo -n $MASAKARI_API_SEND_TIMEOUT | sed 's/[0-9]//g'` if [ "x" = "x${MASAKARI_API_SEND_TIMEOUT}" ]; then MASAKARI_API_SEND_TIMEOUT=$DEFAULT_MASAKARI_API_SEND_TIMEOUT elif [ "x" != "x${expect_empty}" ]; then log_info "config file parameter error. [$SCRIPT_CONF_FILE:MASAKARI_API_SEND_TIMEOUT]" return 1 fi log_debug "config file parameter : MASAKARI_API_SEND_TIMEOUT=$MASAKARI_API_SEND_TIMEOUT" # If the MASAKARI_API_SEND_RETRY is omitted, set the default value. # If invalid is set, return 1. expect_empty=`echo -n $MASAKARI_API_SEND_RETRY | sed 's/[0-9]//g'` if [ "x" = "x${MASAKARI_API_SEND_RETRY}" ]; then MASAKARI_API_SEND_RETRY=$DEFAULT_MASAKARI_API_SEND_RETRY elif [ "x" != "x${expect_empty}" ]; then log_info "config file parameter error. [$SCRIPT_CONF_FILE:MASAKARI_API_SEND_RETRY]" return 1 fi log_debug "config file parameter : MASAKARI_API_SEND_RETRY=$MASAKARI_API_SEND_RETRY" # If the MASAKARI_API_SEND_DELAY is omitted, set the default value. # If invalid is set, return 1. expect_empty=`echo -n $MASAKARI_API_SEND_DELAY | sed 's/[0-9]//g'` if [ "x" = "x${MASAKARI_API_SEND_DELAY}" ]; then MASAKARI_API_SEND_DELAY=$DEFAULT_MASAKARI_API_SEND_DELAY elif [ "x" != "x${expect_empty}" ]; then log_info "config file parameter error. [$SCRIPT_CONF_FILE:MASAKARI_API_SEND_DELAY]" return 1 fi log_debug "config file parameter : MASAKARI_API_SEND_DELAY=$MASAKARI_API_SEND_DELAY" # If the DOMAIN is omitted, return 1. if [ "x" = "x${DOMAIN}" ]; then log_info "config file parameter error. [$DOMAIN:DOMAIN]" return 1 else log_debug "config file parameter : DOMAIN=$DOMAIN" fi # If the PROJECT is omitted, return 1. if [ "x" = "x${PROJECT}" ]; then log_info "config file parameter error. [$PROJECT:PROJECT]" return 1 else log_debug "config file parameter : PROJECT=$PROJECT" fi # If the ADMIN_USER is omitted, return 1. if [ "x" = "x${ADMIN_USER}" ]; then log_info "config file parameter error. [$ADMIN_USER:ADMIN_USER]" return 1 else log_debug "config file parameter : ADMIN_USER=$ADMIN_USER" fi # If the ADMIN_PASS is omitted, return 1. if [ "x" = "x${ADMIN_PASS}" ]; then log_info "config file parameter error. [$ADMIN_PASS:ADMIN_PASS]" return 1 else log_debug "config file parameter : ADMIN_PASS=$ADMIN_PASS" fi # If the AUTH_URL is omitted, return 1. if [ "x" = "x${AUTH_URL}" ]; then log_info "config file parameter error. [$AUTH_URL:AUTH_URL]" return 1 else log_debug "config file parameter : AUTH_URL=$AUTH_URL" fi # If the REGION is omitted, return 1. if [ "x" = "x${REGION}" ]; then log_info "config file parameter error. [$REGION:REGION]" return 1 else log_debug "config file parameter : REGION=$REGION" fi return 0 } # Initial startup command execution method: # This method does not execute same command as startup command that executed once. init_boot() { log_debug "init_boot start" CMD_LIST=() for line in "${proc_list[@]}" do ALREADY_FLG="off" CMD=`echo ${line} | cut -d"," -f 3` SPECIAL_BEFORE=`echo $line | cut -d"," -f 5` SPECIAL_AFTER=`echo $line | cut -d"," -f 6` # If there is no startup command, can proceed to the next command. if [ -z "$CMD" ]; then continue fi # Check whether already is executed. for CHECK_CMD in "${CMD_LIST[@]}" do if [ "$CHECK_CMD" = "$CMD" ]; then ALREADY_FLG="on" break fi done # Execute special processing before the initial startup. if [ ! -z "$SPECIAL_BEFORE" ]; then $SPECIAL_BEFORE fi # If not be executed, execute start command. if [ "$ALREADY_FLG" = "off" ]; then OLD_IFS=$IFS IFS=';' set -- $CMD CMD_SPLIT_LIST=("$@") IFS=$OLD_IFS for SPLIT_CMD in "${CMD_SPLIT_LIST[@]}" do $SPLIT_CMD > /dev/null 2>&1 done CMD_LIST=("$CMD_LIST" "$CMD") fi # Execute special processing after the initial startup. if [ ! -z "$SPECIAL_AFTER" ]; then $SPECIAL_AFTER fi done log_debug "init_boot end" } # This function creates data that is notified to the masakari api. # It is called from the child process. # make_notice_data () { TIME=`date -u +'%Y-%m-%d %H:%M:%S'` PAYLOAD="{\"event\": \"STOPPED\", \"process_name\": \"${PROCESS_NAME}\"}" } # This function notifies to the masakari api. # It is called masakari_cli post_event method. send_notification () { TYPE="PROCESS" TARGET="post_event" AUTH_INFO="--os-domain-name ${DOMAIN} --os-project-name ${PROJECT} --os-region-name ${REGION} --os-auth-url ${AUTH_URL} --os-username ${ADMIN_USER} --os-password ${ADMIN_PASS}" log_info "info : Send a notification." log_info "info : openstack ${AUTH_INFO} notification create ${TYPE} ${P_HOST} \"${TIME}\" \"${PAYLOAD}\"" RESP=`openstack ${AUTH_INFO} notification create ${TYPE} ${P_HOST} "${TIME}" "${PAYLOAD}"` result=$? if [ $result -eq 1 ]; then log_info "info : Failed to send a notification. [exit-code: $result]" log_info "info : $RESP" MASAKARI_API_SEND_FAIL_FLG="on" else log_info "info : Succeeded in sending a notification." log_info "info : $RESP" fi return } # Attempt to restart the failer process. # If failure to number of retries, notify to the masakari api. down_process_reboot(){ ALREADY_REBOOT_CMD_LIST=() while read line do ALREADY_FLG="off" # No processing is executed about process id included in the send list. for already_id in "${ALREADY_SEND_ID_LIST[@]}" do if [ "$line" = "$already_id" ]; then ALREADY_FLG="on" break fi done if [ "$ALREADY_FLG" = "on" ]; then continue fi for proc in "${proc_list[@]}" do PROC_ID=`echo $proc | cut -d"," -f 1` if [ "$line" = "$PROC_ID" ] ; then CMD=`echo $proc | cut -d"," -f 4` PROCESS_NAME=`echo $proc | cut -d"," -f 2` SPECIAL_BEFORE=`echo $proc | cut -d"," -f 7` SPECIAL_AFTER=`echo $proc | cut -d"," -f 8` break fi done if [ ! -z "$SPECIAL_BEFORE" ]; then $SPECIAL_BEFORE fi # If there is not restart command, can proceed to the next command. if [ -z "$CMD" ]; then continue fi RESULT_FLG=1 # Decomposes multiple processing be joined by ";" and execute them. (restart execution part) OLD_IFS=$IFS IFS=';' set -- $CMD CMD_SPLIT_LIST=("$@") IFS=$OLD_IFS for SPLIT_CMD in "${CMD_SPLIT_LIST[@]}" do ALREADY_FLG="off" # Check whether already is executed. for CHECK_CMD in "${ALREADY_REBOOT_CMD_LIST[@]}" do if [ "$CHECK_CMD" = "$SPLIT_CMD" ]; then ALREADY_FLG="on" break fi done # If is already executed, skip. if [ "$ALREADY_FLG" = "on" ]; then continue fi log_debug "reboot cmd:$SPLIT_CMD" $SPLIT_CMD > /dev/null 2>&1 if [ $? -ne 0 ]; then RESULT_FLG=0 break else ALREADY_REBOOT_CMD_LIST=("$ALREADY_REBOOT_CMD_LIST" "$SPLIT_CMD") fi done # If fail to restart, executes retry restart. if [ $RESULT_FLG -ne 1 ]; then result=0 for retry in `seq $PROCESS_REBOOT_RETRY` do sleep $REBOOT_INTERVAL # Retry the restart processing. RESULT_FLG=1 for SPLIT_CMD in "${CMD_SPLIT_LIST[@]}" do ALREADY_FLG="off" # Check whether already is executed. for CHECK_CMD in "${ALREADY_REBOOT_CMD_LIST[@]}" do if [ "$CHECK_CMD" = "$SPLIT_CMD" ]; then ALREADY_FLG="on" break fi done # If is already executed, skip. if [ "$ALREADY_FLG" = "on" ]; then continue fi log_debug "reboot cmd:$SPLIT_CMD" $SPLIT_CMD > /dev/null 2>&1 if [ $? -ne 0 ]; then RESULT_FLG=0 break else ALREADY_REBOOT_CMD_LIST=("$ALREADY_REBOOT_CMD_LIST" "$SPLIT_CMD") fi done if [ $RESULT_FLG -eq 1 ]; then break elif [ $retry -eq $PROCESS_REBOOT_RETRY ]; then # If number of retries is exceeded, notify to the masakari api. make_notice_data if [ $result -eq 0 ]&& [ "$MASAKARI_API_SEND_FAIL_FLG" = "off" ]; then send_notification fi # Add the sent list. ALREADY_SEND_ID_LIST=("${ALREADY_SEND_ID_LIST[@]}" "${line}") fi done fi # Special processes after restart. if [ ! -z "$SPECIAL_AFTER" ]; then $SPECIAL_AFTER fi done < $DOWN_PROCESS_LIST } # Argument check if [ $# -ne 2 ]; then echo "Usage: $0 " exit 1 else SCRIPT_CONF_FILE=$1 PROC_LIST=$2 fi # Initial processing (check proc.list and read conf file) . $SCRIPT_COMMON_SH # Output warning message. log_info "WARNING : $0 is deprecated as of the Ocata release and will be removed in the Queens release. Use masakari-processmonitor implemented in python instead of $0." log_debug "processmonitor start!!" check_proc_file_common set_conf_value if [ $? -ne 0 ]; then exit 1 fi if [ -e $NOTICE_OUTPUT ]; then sudo rm -rf $NOTICE_OUTPUT fi # Initial startup init_boot while true do # Recheck and reload of the proc.list. check_proc_file_common # If invalid value is set to configuration file, set default value. set_conf_value if [ $? -ne 0 ]; then exit 1 fi # Execute process check processing. ${SCRIPT_CHECK_PROCESS} ${PROC_LIST} RESULT_CODE=$? # If the return code is 2, because can't continue functionally, stop. if [ $RESULT_CODE -eq 2 ]; then log_debug "process_status_checker down!" exit 1 fi # If the failing process is detected by shell check, retry restart. if [ $RESULT_CODE -ne 0 ]; then down_process_reboot fi sleep ${PROCESS_CHECK_INTERVAL} done