update cluster state to error when deployment fails

Change-Id: I54fb25d6151679118cf648b5c2192d56f7d95957
This commit is contained in:
xiaodongwang 2015-04-28 14:33:30 -07:00
parent 15c976fb36
commit a18bdbf92b
13 changed files with 216 additions and 59 deletions

View File

@ -1,15 +1,31 @@
#!/bin/bash
set -e
service mysqld restart
service mysqld status || exit $?
/opt/compass/bin/manage_db.py createdb
/opt/compass/bin/clean_installers.py --noasync
/opt/compass/bin/clean_installation_logs.py
rm -rf /var/ansible/run/*
service httpd restart
sleep 10
service httpd status || exit $?
service rsyslog restart
sleep 10
service rsyslog status || exit $?
service redis restart
sleep 10
service redis status || exit $?
redis-cli flushall
service cobblerd restart
sleep 10
service cobblerd status || exit $?
chef-server-ctl restart
sleep 10
chef-server-ctl status || exit $?
service compass-celeryd restart
sleep 10
service compass-celeryd status || exit $?
service compass-progress-updated restart
sleep 10
service compass-progress-updated status || exit $?

View File

@ -46,14 +46,28 @@ def deploy(cluster_id, hosts_id_list, username=None):
hosts_info = util.ActionHelper.get_hosts_info(
cluster_id, hosts_id_list, user)
deploy_manager = DeployManager(adapter_info, cluster_info, hosts_info)
# deploy_manager.prepare_for_deploy()
logging.debug('Created deploy manager with %s %s %s'
% (adapter_info, cluster_info, hosts_info))
deploy_successful = True
try:
deploy_manager = DeployManager(
adapter_info, cluster_info, hosts_info)
# deploy_manager.prepare_for_deploy()
logging.debug('Created deploy manager with %s %s %s'
% (adapter_info, cluster_info, hosts_info))
deployed_config = deploy_manager.deploy()
except Exception as error:
logging.exception(error)
deploy_successful = False
deployed_config = deploy_manager.deploy()
util.ActionHelper.save_deployed_config(deployed_config, user)
util.ActionHelper.update_state(cluster_id, hosts_id_list, user)
if deploy_successful:
util.ActionHelper.save_deployed_config(deployed_config, user)
util.ActionHelper.update_state(
cluster_id, hosts_id_list, user, state='INSTALLING'
)
else:
util.ActionHelper.update_state(
cluster_id, hosts_id_list, user, state='ERROR',
message='failed to start deployment', severity='ERROR'
)
def redeploy(cluster_id, hosts_id_list, username=None):
@ -75,10 +89,24 @@ def redeploy(cluster_id, hosts_id_list, username=None):
hosts_info = util.ActionHelper.get_hosts_info(
cluster_id, hosts_id_list, user)
deploy_manager = DeployManager(adapter_info, cluster_info, hosts_info)
# deploy_manager.prepare_for_deploy()
deploy_manager.redeploy()
util.ActionHelper.update_state(cluster_id, hosts_id_list, user)
deploy_successful = True
try:
deploy_manager = DeployManager(
adapter_info, cluster_info, hosts_info)
# deploy_manager.prepare_for_deploy()
deploy_manager.redeploy()
except Exception as error:
logging.exception(error)
deploy_successful = False
if deploy_successful:
util.ActionHelper.update_state(
cluster_id, hosts_id_list, user, state='INSTALLING',
)
else:
util.ActionHelper.update_state(
cluster_id, hosts_id_list, user, state='ERROR',
message='failed to start redeployment', severity='ERROR'
)
def health_check(cluster_id, report_uri, username):

View File

@ -232,21 +232,23 @@ class ActionHelper(object):
**config)
@staticmethod
def update_state(cluster_id, host_id_list, user):
def update_state(
cluster_id, host_id_list, user, **kwargs
):
# update all clusterhosts state
for host_id in host_id_list:
cluster_db.update_cluster_host_state(
cluster_id,
host_id,
user=user,
state='INSTALLING'
**kwargs
)
# update cluster state
cluster_db.update_cluster_state(
cluster_id,
user=user,
state='INSTALLING'
**kwargs
)
@staticmethod

View File

@ -134,6 +134,7 @@ UPDATED_CLUSTERHOST_STATE_INTERNAL_FIELDS = [
'ready'
]
UPDATED_CLUSTER_STATE_FIELDS = ['state']
IGNORE_UPDATED_CLUSTER_STATE_FIELDS = ['percentage', 'message', 'severity']
UPDATED_CLUSTER_STATE_INTERNAL_FIELDS = ['ready']
RESP_CLUSTERHOST_LOG_FIELDS = [
'clusterhost_id', 'id', 'host_id', 'cluster_id',
@ -1916,7 +1917,7 @@ def update_clusterhost_state_internal(
@utils.supported_filters(
optional_support_keys=UPDATED_CLUSTER_STATE_FIELDS,
ignore_support_keys=IGNORE_FIELDS
ignore_support_keys=(IGNORE_FIELDS + IGNORE_UPDATED_CLUSTER_STATE_FIELDS)
)
@database.run_in_session()
@user_api.check_user_permission_in_session(

View File

@ -1152,42 +1152,43 @@ class ClusterState(BASE, StateMixin):
self.completed_hosts = 0
if self.state == 'INSTALLING':
cluster.reinstall_distributed_system = False
if not cluster.distributed_system:
for clusterhost in clusterhosts:
host = clusterhost.host
host_state = host.state.state
if host_state == 'INSTALLING':
self.installing_hosts += 1
elif host_state == 'ERROR':
self.failed_hosts += 1
elif host_state == 'SUCCESSFUL':
self.completed_hosts += 1
if not cluster.distributed_system:
for clusterhost in clusterhosts:
host = clusterhost.host
host_state = host.state.state
if host_state == 'INSTALLING':
self.installing_hosts += 1
elif host_state == 'ERROR':
self.failed_hosts += 1
elif host_state == 'SUCCESSFUL':
self.completed_hosts += 1
else:
for clusterhost in clusterhosts:
clusterhost_state = clusterhost.state.state
if clusterhost_state == 'INSTALLING':
self.installing_hosts += 1
elif clusterhost_state == 'ERROR':
self.failed_hosts += 1
elif clusterhost_state == 'SUCCESSFUL':
self.completed_hosts += 1
if self.total_hosts:
if self.completed_hosts == self.total_hosts:
self.percentage = 1.0
else:
for clusterhost in clusterhosts:
clusterhost_state = clusterhost.state.state
if clusterhost_state == 'INSTALLING':
self.installing_hosts += 1
elif clusterhost_state == 'ERROR':
self.failed_hosts += 1
elif clusterhost_state == 'SUCCESSFUL':
self.completed_hosts += 1
if self.total_hosts:
if self.completed_hosts == self.total_hosts:
self.percentage = 1.0
else:
self.percentage = (
float(self.completed_hosts)
/
float(self.total_hosts)
)
self.message = (
'total %s, installing %s, completed: %s, error %s'
) % (
self.total_hosts, self.installing_hosts,
self.completed_hosts, self.failed_hosts
)
if self.failed_hosts:
self.severity = 'ERROR'
self.percentage = (
float(self.completed_hosts)
/
float(self.total_hosts)
)
self.message = (
'total %s, installing %s, completed: %s, error %s'
) % (
self.total_hosts, self.installing_hosts,
self.completed_hosts, self.failed_hosts
)
if self.failed_hosts:
self.severity = 'ERROR'
super(ClusterState, self).update()

View File

@ -27,7 +27,13 @@
#if $val.is_mgmt
#set $rekeyed[$dns_key] = $dns
#else
#set $rekeyed[$dns_key] = '-'.join(($dns, $nic))
#if '.' in $dns
#set $dns_name, $dns_domain = $dns.split('.', 1)
#set $dns_nic = '%s-%s.%s' % ($dns_name, $nic, $dns_domain)
#else
#set $dns_nic = '%s-%s' % ($dns, $nic)
#end if
#set $rekeyed[$dns_key] = $dns_nic
#end if
#if $val.is_promiscuous:

View File

@ -27,7 +27,13 @@
#if $val.is_mgmt
#set $rekeyed[$dns_key] = $dns
#else
#set $rekeyed[$dns_key] = '-'.join(($dns, $nic))
#if '.' in $dns
#set $dns_name, $dns_domain = $dns.split('.', 1)
#set $dns_nic = '%s-%s.%s' % ($dns_name, $nic, $dns_domain)
#else
#set $dns_nic = '%s-%s' % ($dns, $nic)
#end if
#set $rekeyed[$dns_key] = $dns_nic
#end if
#if $val.is_promiscuous:

View File

@ -27,7 +27,13 @@
#if $val.is_mgmt
#set $rekeyed[$dns_key] = $dns
#else
#set $rekeyed[$dns_key] = '-'.join(($dns, $nic))
#if '.' in $dns
#set $dns_name, $dns_domain = $dns.split('.', 1)
#set $dns_nic = '%s-%s.%s' % ($dns_name, $nic, $dns_domain)
#else
#set $dns_nic = '%s-%s' % ($dns, $nic)
#end if
#set $rekeyed[$dns_key] = $dns_nic
#end if
#if $val.is_promiscuous:

View File

@ -26,7 +26,13 @@
#if $val.is_mgmt
#set $rekeyed[$dns_key] = $dns
#else
#set $rekeyed[$dns_key] = '-'.join(($dns, $nic))
#if '.' in $dns
#set $dns_name, $dns_domain = $dns.split('.', 1)
#set $dns_nic = '%s-%s.%s' % ($dns_name, $nic, $dns_domain)
#else
#set $dns_nic = '%s-%s' % ($dns, $nic)
#end if
#set $rekeyed[$dns_key] = $dns_nic
#end if
#if $val.is_promiscuous:

View File

@ -26,7 +26,13 @@
#if $val.is_mgmt
#set $rekeyed[$dns_key] = $dns
#else
#set $rekeyed[$dns_key] = '-'.join(($dns, $nic))
#if '.' in $dns
#set $dns_name, $dns_domain = $dns.split('.', 1)
#set $dns_nic = '%s-%s.%s' % ($dns_name, $nic, $dns_domain)
#else
#set $dns_nic = '%s-%s' % ($dns, $nic)
#end if
#set $rekeyed[$dns_key] = $dns_nic
#end if
#if $val.is_promiscuous:

View File

@ -27,7 +27,13 @@
#if $val.is_mgmt
#set $rekeyed[$dns_key] = $dns
#else
#set $rekeyed[$dns_key] = '-'.join(($dns, $nic))
#if '.' in $dns
#set $dns_name, $dns_domain = $dns.split('.', 1)
#set $dns_nic = '%s-%s.%s' % ($dns_name, $nic, $dns_domain)
#else
#set $dns_nic = '%s-%s' % ($dns, $nic)
#end if
#set $rekeyed[$dns_key] = $dns_nic
#end if
#if $val.is_promiscuous:

View File

@ -0,0 +1,76 @@
{
"name": "$hostname",
"hostname": "$hostname",
"profile": "$profile",
"gateway": "$gateway",
#import simplejson as json
#set nameservers = json.dumps($nameservers, encoding='utf-8')
"name_servers": $nameservers,
#set search_path = ' '.join($search_path)
"name_servers_search": "$search_path",
"proxy": "$getVar('http_proxy', '')",
"modify_interface":
#set networks = $networks
#set rekeyed = {}
#set promic_nic = ""
#for $nic, $val in $networks.iteritems():
#set ip_key = '-'.join(('ipaddress', $nic))
#set netmask_key = '-'.join(('netmask', $nic))
#set mgmt_key = '-'.join(('management', $nic))
#set static_key = '-'.join(('static', $nic))
#set $rekeyed[$ip_key] = $val.ip
#set $rekeyed[$netmask_key] = $val.netmask
#set $rekeyed[$mgmt_key] = $val.is_mgmt
#set $rekeyed[$static_key] = True
#set dns_key = '-'.join(('dnsname', $nic))
#if $val.is_mgmt
#set $rekeyed[$dns_key] = $dns
#else
#if '.' in $dns
#set $dns_name, $dns_domain = $dns.split('.', 1)
#set $dns_nic = '%s-%s.%s' % ($dns_name, $nic, $dns_domain)
#else
#set $dns_nic = '%s-%s' % ($dns, $nic)
#end if
#set $rekeyed[$dns_key] = $dns_nic
#end if
#if $val.is_promiscuous:
#set promic_nic = $nic
#end if
#if $val.is_mgmt:
#set mac_key = '-'.join(('macaddress', $nic))
#set $rekeyed[$mac_key] = $mac
#end if
#end for
#set nic_info = json.dumps($rekeyed, encoding='utf-8')
$nic_info
,
"ksmeta":{
#set partition_config = ''
#for k, v in $partition.iteritems():
#set path = ''
#if v['vol_percentage']:
#set $path = k + ' ' + str(v['vol_percentage']) + '%'
#else:
#set $path = k + ' ' + str(v['vol_size'])
#end if
#set partition_config = ';'.join((partition_config, $path))
#end for
#set partition_config = partition_config[1:]
#import crypt
#set $password = crypt.crypt($server_credentials.password, "az")
#set no_proxy = ','.join($getVar('no_proxy', []))
"username": "$server_credentials.username",
"password": "$password",
"promisc_nics": "$promic_nic",
"partition": "$partition_config",
"https_proxy": "$getVar('https_proxy', '')",
"ntp_server": "$ntp_server",
"timezone": "$timezone",
"ignore_proxy": "$no_proxy",
"local_repo": "$getVar('local_repo', '')",
"disk_num": "1"
}
}

View File

@ -163,9 +163,6 @@ if [[ "$?" != "0" ]]; then
exit 1
fi
sudo killall -9 celery
sudo service compass-celeryd restart
sudo sleep 10
sudo service compass-celeryd status |grep running
if [[ "$?" != "0" ]]; then
echo "compass-celeryd is not started"