update cluster state to error when deployment fails

Change-Id: I54fb25d6151679118cf648b5c2192d56f7d95957
This commit is contained in:
xiaodongwang 2015-04-28 14:33:30 -07:00
parent 15c976fb36
commit a18bdbf92b
13 changed files with 216 additions and 59 deletions

View File

@ -1,15 +1,31 @@
#!/bin/bash #!/bin/bash
set -e set -e
service mysqld restart service mysqld restart
service mysqld status || exit $?
/opt/compass/bin/manage_db.py createdb /opt/compass/bin/manage_db.py createdb
/opt/compass/bin/clean_installers.py --noasync /opt/compass/bin/clean_installers.py --noasync
/opt/compass/bin/clean_installation_logs.py /opt/compass/bin/clean_installation_logs.py
rm -rf /var/ansible/run/* rm -rf /var/ansible/run/*
service httpd restart service httpd restart
sleep 10
service httpd status || exit $?
service rsyslog restart service rsyslog restart
sleep 10
service rsyslog status || exit $?
service redis restart service redis restart
sleep 10 sleep 10
service redis status || exit $?
redis-cli flushall redis-cli flushall
service cobblerd restart
sleep 10
service cobblerd status || exit $?
chef-server-ctl restart
sleep 10
chef-server-ctl status || exit $?
service compass-celeryd restart service compass-celeryd restart
sleep 10
service compass-celeryd status || exit $?
service compass-progress-updated restart service compass-progress-updated restart
sleep 10
service compass-progress-updated status || exit $?

View File

@ -46,14 +46,28 @@ def deploy(cluster_id, hosts_id_list, username=None):
hosts_info = util.ActionHelper.get_hosts_info( hosts_info = util.ActionHelper.get_hosts_info(
cluster_id, hosts_id_list, user) cluster_id, hosts_id_list, user)
deploy_manager = DeployManager(adapter_info, cluster_info, hosts_info) deploy_successful = True
# deploy_manager.prepare_for_deploy() try:
logging.debug('Created deploy manager with %s %s %s' deploy_manager = DeployManager(
% (adapter_info, cluster_info, hosts_info)) adapter_info, cluster_info, hosts_info)
# deploy_manager.prepare_for_deploy()
logging.debug('Created deploy manager with %s %s %s'
% (adapter_info, cluster_info, hosts_info))
deployed_config = deploy_manager.deploy()
except Exception as error:
logging.exception(error)
deploy_successful = False
deployed_config = deploy_manager.deploy() if deploy_successful:
util.ActionHelper.save_deployed_config(deployed_config, user) util.ActionHelper.save_deployed_config(deployed_config, user)
util.ActionHelper.update_state(cluster_id, hosts_id_list, user) util.ActionHelper.update_state(
cluster_id, hosts_id_list, user, state='INSTALLING'
)
else:
util.ActionHelper.update_state(
cluster_id, hosts_id_list, user, state='ERROR',
message='failed to start deployment', severity='ERROR'
)
def redeploy(cluster_id, hosts_id_list, username=None): def redeploy(cluster_id, hosts_id_list, username=None):
@ -75,10 +89,24 @@ def redeploy(cluster_id, hosts_id_list, username=None):
hosts_info = util.ActionHelper.get_hosts_info( hosts_info = util.ActionHelper.get_hosts_info(
cluster_id, hosts_id_list, user) cluster_id, hosts_id_list, user)
deploy_manager = DeployManager(adapter_info, cluster_info, hosts_info) deploy_successful = True
# deploy_manager.prepare_for_deploy() try:
deploy_manager.redeploy() deploy_manager = DeployManager(
util.ActionHelper.update_state(cluster_id, hosts_id_list, user) adapter_info, cluster_info, hosts_info)
# deploy_manager.prepare_for_deploy()
deploy_manager.redeploy()
except Exception as error:
logging.exception(error)
deploy_successful = False
if deploy_successful:
util.ActionHelper.update_state(
cluster_id, hosts_id_list, user, state='INSTALLING',
)
else:
util.ActionHelper.update_state(
cluster_id, hosts_id_list, user, state='ERROR',
message='failed to start redeployment', severity='ERROR'
)
def health_check(cluster_id, report_uri, username): def health_check(cluster_id, report_uri, username):

View File

@ -232,21 +232,23 @@ class ActionHelper(object):
**config) **config)
@staticmethod @staticmethod
def update_state(cluster_id, host_id_list, user): def update_state(
cluster_id, host_id_list, user, **kwargs
):
# update all clusterhosts state # update all clusterhosts state
for host_id in host_id_list: for host_id in host_id_list:
cluster_db.update_cluster_host_state( cluster_db.update_cluster_host_state(
cluster_id, cluster_id,
host_id, host_id,
user=user, user=user,
state='INSTALLING' **kwargs
) )
# update cluster state # update cluster state
cluster_db.update_cluster_state( cluster_db.update_cluster_state(
cluster_id, cluster_id,
user=user, user=user,
state='INSTALLING' **kwargs
) )
@staticmethod @staticmethod

View File

@ -134,6 +134,7 @@ UPDATED_CLUSTERHOST_STATE_INTERNAL_FIELDS = [
'ready' 'ready'
] ]
UPDATED_CLUSTER_STATE_FIELDS = ['state'] UPDATED_CLUSTER_STATE_FIELDS = ['state']
IGNORE_UPDATED_CLUSTER_STATE_FIELDS = ['percentage', 'message', 'severity']
UPDATED_CLUSTER_STATE_INTERNAL_FIELDS = ['ready'] UPDATED_CLUSTER_STATE_INTERNAL_FIELDS = ['ready']
RESP_CLUSTERHOST_LOG_FIELDS = [ RESP_CLUSTERHOST_LOG_FIELDS = [
'clusterhost_id', 'id', 'host_id', 'cluster_id', 'clusterhost_id', 'id', 'host_id', 'cluster_id',
@ -1916,7 +1917,7 @@ def update_clusterhost_state_internal(
@utils.supported_filters( @utils.supported_filters(
optional_support_keys=UPDATED_CLUSTER_STATE_FIELDS, optional_support_keys=UPDATED_CLUSTER_STATE_FIELDS,
ignore_support_keys=IGNORE_FIELDS ignore_support_keys=(IGNORE_FIELDS + IGNORE_UPDATED_CLUSTER_STATE_FIELDS)
) )
@database.run_in_session() @database.run_in_session()
@user_api.check_user_permission_in_session( @user_api.check_user_permission_in_session(

View File

@ -1152,42 +1152,43 @@ class ClusterState(BASE, StateMixin):
self.completed_hosts = 0 self.completed_hosts = 0
if self.state == 'INSTALLING': if self.state == 'INSTALLING':
cluster.reinstall_distributed_system = False cluster.reinstall_distributed_system = False
if not cluster.distributed_system:
for clusterhost in clusterhosts: if not cluster.distributed_system:
host = clusterhost.host for clusterhost in clusterhosts:
host_state = host.state.state host = clusterhost.host
if host_state == 'INSTALLING': host_state = host.state.state
self.installing_hosts += 1 if host_state == 'INSTALLING':
elif host_state == 'ERROR': self.installing_hosts += 1
self.failed_hosts += 1 elif host_state == 'ERROR':
elif host_state == 'SUCCESSFUL': self.failed_hosts += 1
self.completed_hosts += 1 elif host_state == 'SUCCESSFUL':
self.completed_hosts += 1
else:
for clusterhost in clusterhosts:
clusterhost_state = clusterhost.state.state
if clusterhost_state == 'INSTALLING':
self.installing_hosts += 1
elif clusterhost_state == 'ERROR':
self.failed_hosts += 1
elif clusterhost_state == 'SUCCESSFUL':
self.completed_hosts += 1
if self.total_hosts:
if self.completed_hosts == self.total_hosts:
self.percentage = 1.0
else: else:
for clusterhost in clusterhosts: self.percentage = (
clusterhost_state = clusterhost.state.state float(self.completed_hosts)
if clusterhost_state == 'INSTALLING': /
self.installing_hosts += 1 float(self.total_hosts)
elif clusterhost_state == 'ERROR': )
self.failed_hosts += 1 self.message = (
elif clusterhost_state == 'SUCCESSFUL': 'total %s, installing %s, completed: %s, error %s'
self.completed_hosts += 1 ) % (
if self.total_hosts: self.total_hosts, self.installing_hosts,
if self.completed_hosts == self.total_hosts: self.completed_hosts, self.failed_hosts
self.percentage = 1.0 )
else: if self.failed_hosts:
self.percentage = ( self.severity = 'ERROR'
float(self.completed_hosts)
/
float(self.total_hosts)
)
self.message = (
'total %s, installing %s, completed: %s, error %s'
) % (
self.total_hosts, self.installing_hosts,
self.completed_hosts, self.failed_hosts
)
if self.failed_hosts:
self.severity = 'ERROR'
super(ClusterState, self).update() super(ClusterState, self).update()

View File

@ -27,7 +27,13 @@
#if $val.is_mgmt #if $val.is_mgmt
#set $rekeyed[$dns_key] = $dns #set $rekeyed[$dns_key] = $dns
#else #else
#set $rekeyed[$dns_key] = '-'.join(($dns, $nic)) #if '.' in $dns
#set $dns_name, $dns_domain = $dns.split('.', 1)
#set $dns_nic = '%s-%s.%s' % ($dns_name, $nic, $dns_domain)
#else
#set $dns_nic = '%s-%s' % ($dns, $nic)
#end if
#set $rekeyed[$dns_key] = $dns_nic
#end if #end if
#if $val.is_promiscuous: #if $val.is_promiscuous:

View File

@ -27,7 +27,13 @@
#if $val.is_mgmt #if $val.is_mgmt
#set $rekeyed[$dns_key] = $dns #set $rekeyed[$dns_key] = $dns
#else #else
#set $rekeyed[$dns_key] = '-'.join(($dns, $nic)) #if '.' in $dns
#set $dns_name, $dns_domain = $dns.split('.', 1)
#set $dns_nic = '%s-%s.%s' % ($dns_name, $nic, $dns_domain)
#else
#set $dns_nic = '%s-%s' % ($dns, $nic)
#end if
#set $rekeyed[$dns_key] = $dns_nic
#end if #end if
#if $val.is_promiscuous: #if $val.is_promiscuous:

View File

@ -27,7 +27,13 @@
#if $val.is_mgmt #if $val.is_mgmt
#set $rekeyed[$dns_key] = $dns #set $rekeyed[$dns_key] = $dns
#else #else
#set $rekeyed[$dns_key] = '-'.join(($dns, $nic)) #if '.' in $dns
#set $dns_name, $dns_domain = $dns.split('.', 1)
#set $dns_nic = '%s-%s.%s' % ($dns_name, $nic, $dns_domain)
#else
#set $dns_nic = '%s-%s' % ($dns, $nic)
#end if
#set $rekeyed[$dns_key] = $dns_nic
#end if #end if
#if $val.is_promiscuous: #if $val.is_promiscuous:

View File

@ -26,7 +26,13 @@
#if $val.is_mgmt #if $val.is_mgmt
#set $rekeyed[$dns_key] = $dns #set $rekeyed[$dns_key] = $dns
#else #else
#set $rekeyed[$dns_key] = '-'.join(($dns, $nic)) #if '.' in $dns
#set $dns_name, $dns_domain = $dns.split('.', 1)
#set $dns_nic = '%s-%s.%s' % ($dns_name, $nic, $dns_domain)
#else
#set $dns_nic = '%s-%s' % ($dns, $nic)
#end if
#set $rekeyed[$dns_key] = $dns_nic
#end if #end if
#if $val.is_promiscuous: #if $val.is_promiscuous:

View File

@ -26,7 +26,13 @@
#if $val.is_mgmt #if $val.is_mgmt
#set $rekeyed[$dns_key] = $dns #set $rekeyed[$dns_key] = $dns
#else #else
#set $rekeyed[$dns_key] = '-'.join(($dns, $nic)) #if '.' in $dns
#set $dns_name, $dns_domain = $dns.split('.', 1)
#set $dns_nic = '%s-%s.%s' % ($dns_name, $nic, $dns_domain)
#else
#set $dns_nic = '%s-%s' % ($dns, $nic)
#end if
#set $rekeyed[$dns_key] = $dns_nic
#end if #end if
#if $val.is_promiscuous: #if $val.is_promiscuous:

View File

@ -27,7 +27,13 @@
#if $val.is_mgmt #if $val.is_mgmt
#set $rekeyed[$dns_key] = $dns #set $rekeyed[$dns_key] = $dns
#else #else
#set $rekeyed[$dns_key] = '-'.join(($dns, $nic)) #if '.' in $dns
#set $dns_name, $dns_domain = $dns.split('.', 1)
#set $dns_nic = '%s-%s.%s' % ($dns_name, $nic, $dns_domain)
#else
#set $dns_nic = '%s-%s' % ($dns, $nic)
#end if
#set $rekeyed[$dns_key] = $dns_nic
#end if #end if
#if $val.is_promiscuous: #if $val.is_promiscuous:

View File

@ -0,0 +1,76 @@
{
"name": "$hostname",
"hostname": "$hostname",
"profile": "$profile",
"gateway": "$gateway",
#import simplejson as json
#set nameservers = json.dumps($nameservers, encoding='utf-8')
"name_servers": $nameservers,
#set search_path = ' '.join($search_path)
"name_servers_search": "$search_path",
"proxy": "$getVar('http_proxy', '')",
"modify_interface":
#set networks = $networks
#set rekeyed = {}
#set promic_nic = ""
#for $nic, $val in $networks.iteritems():
#set ip_key = '-'.join(('ipaddress', $nic))
#set netmask_key = '-'.join(('netmask', $nic))
#set mgmt_key = '-'.join(('management', $nic))
#set static_key = '-'.join(('static', $nic))
#set $rekeyed[$ip_key] = $val.ip
#set $rekeyed[$netmask_key] = $val.netmask
#set $rekeyed[$mgmt_key] = $val.is_mgmt
#set $rekeyed[$static_key] = True
#set dns_key = '-'.join(('dnsname', $nic))
#if $val.is_mgmt
#set $rekeyed[$dns_key] = $dns
#else
#if '.' in $dns
#set $dns_name, $dns_domain = $dns.split('.', 1)
#set $dns_nic = '%s-%s.%s' % ($dns_name, $nic, $dns_domain)
#else
#set $dns_nic = '%s-%s' % ($dns, $nic)
#end if
#set $rekeyed[$dns_key] = $dns_nic
#end if
#if $val.is_promiscuous:
#set promic_nic = $nic
#end if
#if $val.is_mgmt:
#set mac_key = '-'.join(('macaddress', $nic))
#set $rekeyed[$mac_key] = $mac
#end if
#end for
#set nic_info = json.dumps($rekeyed, encoding='utf-8')
$nic_info
,
"ksmeta":{
#set partition_config = ''
#for k, v in $partition.iteritems():
#set path = ''
#if v['vol_percentage']:
#set $path = k + ' ' + str(v['vol_percentage']) + '%'
#else:
#set $path = k + ' ' + str(v['vol_size'])
#end if
#set partition_config = ';'.join((partition_config, $path))
#end for
#set partition_config = partition_config[1:]
#import crypt
#set $password = crypt.crypt($server_credentials.password, "az")
#set no_proxy = ','.join($getVar('no_proxy', []))
"username": "$server_credentials.username",
"password": "$password",
"promisc_nics": "$promic_nic",
"partition": "$partition_config",
"https_proxy": "$getVar('https_proxy', '')",
"ntp_server": "$ntp_server",
"timezone": "$timezone",
"ignore_proxy": "$no_proxy",
"local_repo": "$getVar('local_repo', '')",
"disk_num": "1"
}
}

View File

@ -163,9 +163,6 @@ if [[ "$?" != "0" ]]; then
exit 1 exit 1
fi fi
sudo killall -9 celery
sudo service compass-celeryd restart
sudo sleep 10
sudo service compass-celeryd status |grep running sudo service compass-celeryd status |grep running
if [[ "$?" != "0" ]]; then if [[ "$?" != "0" ]]; then
echo "compass-celeryd is not started" echo "compass-celeryd is not started"