Patching of OpenStack

Implements: blueprint openstack-patching-nailgun-part

Change-Id: Iaed70ee002916858a52e34f05c8485841ae05737
This commit is contained in:
Aleksey Kasatkin 2014-04-08 16:01:47 +03:00 committed by Igor Kalnitsky
parent 00c45c63ca
commit 88aad90989
24 changed files with 479 additions and 36 deletions

View File

@ -39,6 +39,7 @@ from nailgun.task.manager import ApplyChangesTaskManager
from nailgun.task.manager import ClusterDeletionManager
from nailgun.task.manager import ResetEnvironmentTaskManager
from nailgun.task.manager import StopDeploymentTaskManager
from nailgun.task.manager import UpdateEnvironmentTaskManager
from nailgun import utils
@ -102,6 +103,14 @@ class ClusterResetHandler(DeferredTaskHandler):
task_manager = ResetEnvironmentTaskManager
class ClusterUpdateHandler(DeferredTaskHandler):
log_message = u"Trying to update environment '{env_id}'"
log_error = u"Error during execution of update task " \
u"on environment '{env_id}': {error}"
task_manager = UpdateEnvironmentTaskManager
class ClusterAttributesHandler(BaseHandler):
"""Cluster attributes handler
"""

View File

@ -30,6 +30,7 @@ from nailgun.api.v1.handlers.cluster import ClusterGeneratedData
from nailgun.api.v1.handlers.cluster import ClusterHandler
from nailgun.api.v1.handlers.cluster import ClusterResetHandler
from nailgun.api.v1.handlers.cluster import ClusterStopDeploymentHandler
from nailgun.api.v1.handlers.cluster import ClusterUpdateHandler
from nailgun.api.v1.handlers.disks import NodeDefaultsDisksHandler
from nailgun.api.v1.handlers.disks import NodeDisksHandler
@ -129,6 +130,8 @@ urls = (
ClusterStopDeploymentHandler,
r'/clusters/(?P<cluster_id>\d+)/reset/?$',
ClusterResetHandler,
r'/clusters/(?P<cluster_id>\d+)/update/?$',
ClusterUpdateHandler,
r'/clusters/(?P<cluster_id>\d+)/assignment/?$',
NodeAssignmentHandler,

View File

@ -22,15 +22,41 @@ from nailgun.objects import Release
class ClusterValidator(BasicValidator):
@classmethod
def _validate_common(cls, data):
def _validate_common(cls, data, instance=None):
d = cls.validate_json(data)
release_id = d.get("release", d.get("release_id", None))
release_id = d.get("release", d.get("release_id"))
if release_id:
release = Release.get_by_uid(release_id)
if not release:
if not Release.get_by_uid(release_id):
raise errors.InvalidData(
"Invalid release ID", log_message=True)
pend_release_id = d.get("pending_release_id")
if pend_release_id:
pend_release = Release.get_by_uid(pend_release_id,
fail_if_not_found=True)
if not release_id:
if not instance:
raise errors.InvalidData(
"Cannot set pending release when "
"there is no current release",
log_message=True
)
release_id = instance.release_id
curr_release = Release.get_by_uid(release_id)
def curr_release_can_be_updated_with_pend_release():
return release_id == pend_release_id or (
curr_release.operating_system ==
pend_release.operating_system
and curr_release.version in
pend_release.can_update_from_versions)
if not curr_release_can_be_updated_with_pend_release():
raise errors.InvalidData(
"Cannot set pending release as "
"it cannot update current release",
log_message=True
)
return d
@classmethod
@ -55,7 +81,7 @@ class ClusterValidator(BasicValidator):
@classmethod
def validate_update(cls, data, instance):
d = cls._validate_common(data)
d = cls._validate_common(data, instance=instance)
if "name" in d:
query = ClusterCollection.filter_by_not(None, id=instance.id)

View File

@ -21,9 +21,18 @@ from nailgun.errors import errors
class NotificationValidator(BasicValidator):
@classmethod
def validate(cls, data):
d = cls.validate_json(data)
if d.get('topic') not in consts.NOTIFICATION_TOPICS:
raise errors.InvalidData(
"Notification topic is not found or invalid"
)
return d
@classmethod
def validate_update(cls, data, instance):
valid = {}
d = cls.validate_json(data)

View File

@ -51,7 +51,9 @@ CLUSTER_STATUSES = Enum(
'stopped',
'operational',
'error',
'remove'
'remove',
'update',
'update_error'
)
NOVA_NET_MANAGERS = Enum(
@ -128,6 +130,7 @@ TASK_NAMES = Enum(
'provision',
'stop_deployment',
'reset_environment',
'update',
'node_deletion',
'cluster_deletion',

View File

@ -0,0 +1,122 @@
"""fuel_5_0_1
Revision ID: 1398619bdf8c
Revises: 1a1504d469f8
Create Date: 2014-05-30 14:46:55.496697
"""
# revision identifiers, used by Alembic.
revision = '1398619bdf8c'
down_revision = '1a1504d469f8'
from alembic import op
import sqlalchemy as sa
from nailgun import consts
from nailgun.db.sqlalchemy.models.fields import JSON
from nailgun.utils.migration import upgrade_enum
task_names_old = (
'super',
'deploy',
'deployment',
'provision',
'stop_deployment',
'reset_environment',
'node_deletion',
'cluster_deletion',
'check_before_deployment',
'check_networks',
'verify_networks',
'check_dhcp',
'verify_network_connectivity',
'redhat_setup',
'redhat_check_credentials',
'redhat_check_licenses',
'redhat_download_release',
'redhat_update_cobbler_profile',
'dump',
'capacity_log'
)
task_names_new = consts.TASK_NAMES
cluster_statuses_old = (
'new',
'deployment',
'stopped',
'operational',
'error',
'remove'
)
cluster_statuses_new = consts.CLUSTER_STATUSES
def upgrade():
### commands auto generated by Alembic - please adjust! ###
op.add_column(
'releases',
sa.Column(
'can_update_from_versions',
JSON(),
nullable=False
)
)
op.add_column(
'clusters',
sa.Column(
'pending_release_id',
sa.Integer(),
nullable=True
)
)
op.create_foreign_key(
'fk_pending_release_id',
'clusters',
'releases',
['pending_release_id'],
['id'],
)
upgrade_enum(
"clusters", # table
"status", # column
"cluster_status", # ENUM name
cluster_statuses_old, # old options
cluster_statuses_new # new options
)
upgrade_enum(
"tasks", # table
"name", # column
"task_name", # ENUM name
task_names_old, # old options
task_names_new # new options
)
### end Alembic commands ###
def downgrade():
### commands auto generated by Alembic - please adjust! ###
upgrade_enum(
"tasks", # table
"name", # column
"task_name", # ENUM name
task_names_new, # old options
task_names_old # new options
)
upgrade_enum(
"clusters", # table
"status", # column
"cluster_status", # ENUM name
cluster_statuses_new, # old options
cluster_statuses_old # new options
)
op.drop_constraint(
'pending_release_id',
'clusters',
type_='foreignkey'
)
op.drop_column('clusters', 'pending_release_id')
op.drop_column('releases', 'can_update_from_versions')
### end Alembic commands ###

View File

@ -78,6 +78,7 @@ class Cluster(Base):
)
name = Column(Unicode(50), unique=True, nullable=False)
release_id = Column(Integer, ForeignKey('releases.id'), nullable=False)
pending_release_id = Column(Integer, ForeignKey('releases.id'))
nodes = relationship(
"Node", backref="cluster", cascade="delete", order_by='Node.id')
tasks = relationship("Task", backref="cluster", cascade="delete")

View File

@ -50,6 +50,7 @@ class Release(Base):
id = Column(Integer, primary_key=True)
name = Column(Unicode(100), nullable=False)
version = Column(String(30), nullable=False)
can_update_from_versions = Column(JSON, default=[], nullable=False)
description = Column(Unicode)
operating_system = Column(String(50), nullable=False)
state = Column(
@ -73,6 +74,7 @@ class Release(Base):
)
clusters = relationship(
"Cluster",
primaryjoin="Release.id==Cluster.release_id",
backref="release",
cascade="all,delete"
)

View File

@ -45,6 +45,8 @@ default_messages = {
"NodeOffline": "Node is offline",
"NotEnoughControllers": "Not enough controllers",
"RedHatSetupError": "Red Hat setup error",
"TaskAlreadyRunning": "A task is already running",
"InvalidReleaseId": "Release Id is invalid",
# disk errors
"NotEnoughFreeSpace": "Not enough free space",

View File

@ -667,6 +667,7 @@
fields:
name: "Icehouse on CentOS 6.5"
version: "2014.1"
can_update_from_versions: []
operating_system: "CentOS"
description: "This option will install the OpenStack Icehouse packages using a CentOS based operating system. With high availability features built in, you are getting a robust, enterprise-grade OpenStack deployment."
attributes_metadata:
@ -704,6 +705,7 @@
fields:
name: "Icehouse on Ubuntu 12.04.4"
version: "2014.1"
can_update_from_versions: []
operating_system: "Ubuntu"
description: "This option will install the OpenStack Icehouse packages using Ubuntu as a base operating system. With high availability features built in, you are getting a robust, enterprise-grade OpenStack deployment."
attributes_metadata:

View File

@ -136,6 +136,7 @@ class Cluster(NailgunObject):
"enum": list(consts.CLUSTER_GROUPING)
},
"release_id": {"type": "number"},
"pending_release_id": {"type": "number"},
"replaced_deployment_info": {"type": "object"},
"replaced_provisioning_info": {"type": "object"},
"is_customized": {"type": "boolean"},

View File

@ -602,6 +602,13 @@ class Node(NailgunObject):
)
return node_dict
@classmethod
def can_be_updated(cls, instance):
return (instance.status in (consts.NODE_STATUSES.ready,
consts.NODE_STATUSES.provisioned)) or \
(instance.status == consts.NODE_STATUSES.error
and instance.error_type == consts.NODE_ERRORS.deploy)
class NodeCollection(NailgunCollection):
"""Node collection

View File

@ -74,17 +74,18 @@ class Notification(NailgunObject):
if "datetime" not in data:
data["datetime"] = datetime.now()
task = None
exist = None
if task_uuid:
task = Task.get_by_uuid(task_uuid)
if task and node_id:
exist = NotificationCollection.filter_by(
None,
node_id=node_id,
message=message,
task_id=task.id
).first()
exist = NotificationCollection.count(
NotificationCollection.filter_by(
None,
node_id=node_id,
message=message,
task_id=task.id
)
)
if not exist:
super(Notification, cls).create(data)

View File

@ -82,6 +82,7 @@ class Release(NailgunObject):
"id": {"type": "number"},
"name": {"type": "string"},
"version": {"type": "string"},
"can_update_from_versions": {"type": "array"},
"description": {"type": "string"},
"operating_system": {"type": "string"},
"state": {

View File

@ -29,7 +29,8 @@ class ClusterSerializer(BasicSerializer):
"is_customized",
"net_provider",
"release_id",
"fuel_version"
"fuel_version",
"pending_release_id"
)
@ -45,5 +46,6 @@ class AttributesSerializer(BasicSerializer):
"is_customized",
"net_provider",
"release_id",
"fuel_version"
"fuel_version",
"pending_release_id"
)

View File

@ -23,6 +23,7 @@ class ReleaseSerializer(BasicSerializer):
"id",
"name",
"version",
"can_update_from_versions",
"description",
"operating_system",
"modes_metadata",

View File

@ -87,12 +87,15 @@ class DeploymentMultinodeSerializer(object):
attrs = objects.Attributes.merged_attrs_values(
cluster.attributes
)
release = objects.Release.get_by_uid(cluster.pending_release_id) \
if cluster.status == consts.CLUSTER_STATUSES.update \
else cluster.release
attrs['deployment_mode'] = cluster.mode
attrs['deployment_id'] = cluster.id
attrs['openstack_version'] = cluster.release.version
attrs['openstack_version'] = release.version
attrs['fuel_version'] = cluster.fuel_version
attrs.update(
objects.Release.get_orchestrator_data_dict(cluster.release)
objects.Release.get_orchestrator_data_dict(release)
)
attrs['nodes'] = cls.node_list(get_nodes_not_for_deletion(cluster))

View File

@ -325,10 +325,12 @@ class NailgunReceiver(object):
@classmethod
def _error_action(cls, task, status, progress, message=None):
task_name = task.name.title()
if message:
message = u"Deployment has failed. {0}".format(message)
message = u"{0} has failed. {1}".format(task_name, message)
else:
message = u"Deployment has failed. Check these nodes:\n{0}".format(
message = u"{0} has failed. Check these nodes:\n{1}".format(
task_name,
cls._generate_error_message(
task,
error_types=('deploy', 'provision'),
@ -350,6 +352,7 @@ class NailgunReceiver(object):
cls._error_action(task, 'error', 100)
return
task_name = task.name.title()
if task.cluster.mode in ('singlenode', 'multinode'):
# determining horizon url - it's an IP
# of a first cluster controller
@ -371,27 +374,29 @@ class NailgunReceiver(object):
if public_net:
horizon_ip = public_net[0]['ip'].split('/')[0]
message = (
u"Deployment of environment '{0}' is done. "
u"{0} of environment '{1}' is done. "
"Access the OpenStack dashboard (Horizon) at "
"http://{1}/ or via internal network at http://{2}/"
"http://{2}/ or via internal network at http://{3}/"
).format(
task_name,
task.cluster.name,
horizon_ip,
controller.ip
)
else:
message = (
u"Deployment of environment '{0}' is done"
).format(task.cluster.name)
message = u"{0} of environment '{1}' is done".format(
task_name,
task.cluster.name
)
logger.warning(
u"Public ip for controller node "
"not found in '{0}'".format(task.cluster.name)
)
else:
message = (
u"Deployment of environment"
" '{0}' is done"
).format(task.cluster.name)
message = u"{0} of environment '{1}' is done".format(
task_name,
task.cluster.name
)
logger.warning(u"Controller node not found in '{0}'".format(
task.cluster.name
))
@ -400,9 +405,10 @@ class NailgunReceiver(object):
# from a public network saved in task cache
try:
message = (
u"Deployment of environment '{0}' is done. "
"Access the OpenStack dashboard (Horizon) at {1}"
u"{0} of environment '{1}' is done. "
"Access the OpenStack dashboard (Horizon) at {2}"
).format(
task_name,
task.cluster.name,
objects.Cluster.get_network_manager(
task.cluster
@ -413,10 +419,10 @@ class NailgunReceiver(object):
str(exc),
traceback.format_exc()
]))
message = (
u"Deployment of environment"
" '{0}' is done"
).format(task.cluster.name)
message = u"{0} of environment '{1}' is done".format(
task_name,
task.cluster.name
)
logger.warning(
u"Cannot find virtual IP for '{0}'".format(
task.cluster.name

View File

@ -19,6 +19,8 @@ import shutil
from sqlalchemy import or_
from nailgun import consts
from nailgun import objects
from nailgun.db import db
@ -278,6 +280,15 @@ class TaskHelper(object):
cls.__set_cluster_status(cluster, 'error')
else:
cls.__set_cluster_status(cluster, 'stopped')
elif task.name == consts.TASK_NAMES.update:
if task.status == consts.TASK_STATUSES.error:
cls.__set_cluster_status(cluster,
consts.CLUSTER_STATUSES.update_error)
elif task.status == consts.TASK_STATUSES.ready:
cls.__set_cluster_status(cluster,
consts.CLUSTER_STATUSES.operational)
cluster.release_id = cluster.pending_release_id
cluster.pending_release_id = None
db().commit()
@ -408,6 +419,18 @@ class TaskHelper(object):
cluster.nodes
), key=lambda n: n.id)
@classmethod
def nodes_to_upgrade(cls, cluster):
nodes_to_upgrade = filter(
lambda n: objects.Node.can_be_updated(n),
cluster.nodes
)
if cluster.is_ha_mode:
return cls.__nodes_to_deploy_ha(cluster, nodes_to_upgrade)
return sorted(nodes_to_upgrade, key=lambda n: n.id)
@classmethod
def __nodes_to_deploy_ha(cls, cluster, nodes):
"""Get nodes for deployment for ha mode

View File

@ -510,6 +510,62 @@ class ResetEnvironmentTaskManager(TaskManager):
return task
class UpdateEnvironmentTaskManager(TaskManager):
def execute(self):
if not self.cluster.pending_release_id:
raise errors.InvalidReleaseId(
u"Can't update environment '{0}' when "
u"new release Id is invalid".format(self.cluster.name))
running_tasks = db().query(Task).filter_by(
cluster_id=self.cluster.id,
status='running'
).filter(
Task.name.in_([
'deploy',
'deployment',
'reset_environment',
'stop_deployment'
])
)
if running_tasks.first():
raise errors.TaskAlreadyRunning(
u"Can't update environment '{0}' when "
u"other task is running".format(
self.cluster.id
)
)
nodes_to_change = TaskHelper.nodes_to_upgrade(self.cluster)
TaskHelper.update_slave_nodes_fqdn(nodes_to_change)
logger.debug('Nodes to update: {0}'.format(
' '.join([n.fqdn for n in nodes_to_change])))
task_update = Task(name='update', cluster=self.cluster)
db().add(task_update)
self.cluster.status = 'update'
db().flush()
deployment_message = self._call_silently(
task_update,
tasks.UpdateTask,
nodes_to_change,
method_name='message')
db().refresh(task_update)
task_update.cache = deployment_message
for node in nodes_to_change:
node.status = 'deploying'
node.progress = 0
db().commit()
rpc.cast('naily', deployment_message)
return task_update
class CheckNetworksTaskManager(TaskManager):
def execute(self, data, check_admin_untagged=False):

View File

@ -153,6 +153,38 @@ class DeploymentTask(object):
)
class UpdateTask(object):
@classmethod
def message(cls, task, nodes):
logger.debug("%s.message(task=%s)", cls.__class__.__name__, task.uuid)
for n in nodes:
if n.pending_roles:
n.roles += n.pending_roles
n.pending_roles = []
n.status = 'provisioned'
n.progress = 0
# here we replace deployment data if user redefined them
serialized_cluster = task.cluster.replaced_deployment_info or \
deployment_serializers.serialize(task.cluster, nodes)
# After serialization set pending_addition to False
for node in nodes:
node.pending_addition = False
db().commit()
return make_astute_message(
'deploy',
'deploy_resp',
{
'task_uuid': task.uuid,
'deployment_info': serialized_cluster
}
)
class ProvisionTask(object):
@classmethod

View File

@ -11,6 +11,9 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from mock import patch
from nailgun.openstack.common import jsonutils as json
from nailgun.test import base
@ -83,7 +86,8 @@ class TestPutSameJson(base.BaseIntegrationTest):
cluster, 200
)
def test_cluster_changes(self):
@patch('nailgun.rpc.cast')
def test_cluster_changes(self, mock_rpc):
cluster = self.http_get(
'ClusterHandler', {
'obj_id': self.cluster.id

View File

@ -665,6 +665,83 @@ class TestDhcpCheckTask(BaseIntegrationTest):
self.assertEqual(self.task.result, {})
class TestClusterUpdate(BaseIntegrationTest):
def setUp(self):
super(TestClusterUpdate, self).setUp()
self.receiver = rcvr.NailgunReceiver()
cluster_id = self.env.create(
cluster_kwargs={},
nodes_kwargs=[
{"api": False},
{"api": False}]
)['id']
self.cluster = self.db.query(Cluster).get(cluster_id)
self.cluster.pending_release_id = self.cluster.release_id
self.cluster.status = 'update'
self.db.commit()
self.task = Task(
uuid=str(uuid.uuid4()),
name="update",
cluster_id=self.cluster.id
)
self.db.add(self.task)
self.db.commit()
def test_node_deploy_resp_ready(self):
node1, node2 = self.env.nodes
kwargs = {'task_uuid': self.task.uuid,
'status': 'ready',
'nodes': [{'uid': node1.id, 'status': 'ready'},
{'uid': node2.id, 'status': 'ready'}]}
self.receiver.deploy_resp(**kwargs)
self.db.refresh(node1)
self.db.refresh(node2)
self.db.refresh(self.task)
self.db.refresh(self.cluster)
self.assertEqual((node1.status, node2.status),
("ready", "ready"))
self.assertEqual(self.task.status, "ready")
self.assertEqual(self.cluster.status, "operational")
self.assertEqual(self.cluster.pending_release_id, None)
def test_node_deploy_resp_node_error(self):
node1, node2 = self.env.nodes
kwargs = {'task_uuid': self.task.uuid,
'nodes': [{'uid': node1.id, 'status': 'ready'},
{'uid': node2.id, 'status': 'error'}]}
self.receiver.deploy_resp(**kwargs)
self.db.refresh(node1)
self.db.refresh(node2)
self.db.refresh(self.task)
self.db.refresh(self.cluster)
self.assertEqual((node1.status, node2.status),
("ready", "error"))
self.assertEqual(self.task.status, "running")
self.assertEqual(self.cluster.status, "update")
self.assertEqual(self.cluster.pending_release_id,
self.cluster.release_id)
def test_node_deploy_resp_update_error(self):
node1, node2 = self.env.nodes
kwargs = {'task_uuid': self.task.uuid,
'status': 'error',
'nodes': [{'uid': node1.id, 'status': 'ready'},
{'uid': node2.id, 'status': 'error'}]}
self.receiver.deploy_resp(**kwargs)
self.db.refresh(node1)
self.db.refresh(node2)
self.db.refresh(self.task)
self.db.refresh(self.cluster)
self.assertEqual((node1.status, node2.status),
("ready", "error"))
self.assertEqual(self.task.status, "error")
self.assertEqual(self.cluster.status, "update_error")
self.assertEqual(self.cluster.pending_release_id,
self.cluster.release_id)
class TestConsumer(BaseIntegrationTest):
def setUp(self):

View File

@ -0,0 +1,50 @@
# Copyright 2014 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from alembic import op
import sqlalchemy as sa
def upgrade_enum(table, column_name, enum_name, old_options, new_options):
old_type = sa.Enum(*old_options, name=enum_name)
new_type = sa.Enum(*new_options, name=enum_name)
tmp_type = sa.Enum(*new_options, name="_" + enum_name)
# Create a temporary type, convert and drop the "old" type
tmp_type.create(op.get_bind(), checkfirst=False)
op.execute(
u'ALTER TABLE {0} ALTER COLUMN {1} TYPE _{2}'
u' USING {1}::text::_{2}'.format(
table,
column_name,
enum_name
)
)
old_type.drop(op.get_bind(), checkfirst=False)
# Create and convert to the "new" type
new_type.create(op.get_bind(), checkfirst=False)
op.execute(
u'ALTER TABLE {0} ALTER COLUMN {1} TYPE {2}'
u' USING {1}::text::{2}'.format(
table,
column_name,
enum_name
)
)
tmp_type.drop(op.get_bind(), checkfirst=False)
def drop_enum(name):
op.execute(
u'DROP TYPE {0}'.format(name)
)