summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJenkins <jenkins@review.openstack.org>2017-08-14 05:19:24 +0000
committerGerrit Code Review <review@openstack.org>2017-08-14 05:19:24 +0000
commit778cac30cd807ed30253f8695c269f159a509058 (patch)
tree7d5adb60d876199d001b2cedc345efd7fdeb1fa9
parent540887159b110756cd02c4399ddbcdb382804b19 (diff)
parent4173aaf0396890ba47ce9c34aa38bf87945e9144 (diff)
Merge "Make provision to evacuate all instances"
-rw-r--r--masakari/engine/drivers/taskflow/host_failure.py171
-rw-r--r--masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py109
-rw-r--r--masakari/tests/unit/fakes.py13
3 files changed, 158 insertions, 135 deletions
diff --git a/masakari/engine/drivers/taskflow/host_failure.py b/masakari/engine/drivers/taskflow/host_failure.py
index 3b1e003..9f440cc 100644
--- a/masakari/engine/drivers/taskflow/host_failure.py
+++ b/masakari/engine/drivers/taskflow/host_failure.py
@@ -19,6 +19,7 @@ from eventlet import timeout as etimeout
19 19
20from oslo_log import log as logging 20from oslo_log import log as logging
21from oslo_service import loopingcall 21from oslo_service import loopingcall
22from oslo_utils import excutils
22from oslo_utils import strutils 23from oslo_utils import strutils
23import taskflow.engines 24import taskflow.engines
24from taskflow.patterns import linear_flow 25from taskflow.patterns import linear_flow
@@ -37,6 +38,9 @@ LOG = logging.getLogger(__name__)
37 38
38ACTION = 'instance:evacuate' 39ACTION = 'instance:evacuate'
39 40
41# Instance power_state
42SHUTDOWN = 4
43
40 44
41class DisableComputeServiceTask(base.MasakariTask): 45class DisableComputeServiceTask(base.MasakariTask):
42 def __init__(self, novaclient): 46 def __init__(self, novaclient):
@@ -95,72 +99,119 @@ class EvacuateInstancesTask(base.MasakariTask):
95 requires=requires) 99 requires=requires)
96 self.novaclient = novaclient 100 self.novaclient = novaclient
97 101
102 def _get_state_and_host_of_instance(self, context, instance):
103 new_instance = self.novaclient.get_server(context, instance.id)
104 instance_host = getattr(new_instance,
105 "OS-EXT-SRV-ATTR:hypervisor_hostname")
106 old_vm_state = getattr(instance, "OS-EXT-STS:vm_state")
107 new_vm_state = getattr(new_instance, "OS-EXT-STS:vm_state")
108
109 return (old_vm_state, new_vm_state, instance_host)
110
111 def _stop_after_evacuation(self, context, instance):
112 def _wait_for_stop_confirmation():
113 old_vm_state, new_vm_state, _ = (
114 self._get_state_and_host_of_instance(context, instance))
115
116 if new_vm_state == 'stopped':
117 raise loopingcall.LoopingCallDone()
118
119 periodic_call_stopped = loopingcall.FixedIntervalLoopingCall(
120 _wait_for_stop_confirmation)
121
122 try:
123 self.novaclient.stop_server(context, instance.id)
124 # confirm instance is stopped after recovery
125 periodic_call_stopped.start(interval=CONF.verify_interval)
126 etimeout.with_timeout(
127 CONF.wait_period_after_power_off,
128 periodic_call_stopped.wait)
129 except etimeout.Timeout:
130 with excutils.save_and_reraise_exception():
131 msg = ("Instance '%s' is successfully evacuated but "
132 "failed to stop.")
133 LOG.warning(msg, instance.id)
134 finally:
135 periodic_call_stopped.stop()
136
98 def _evacuate_and_confirm(self, context, instance, host_name, 137 def _evacuate_and_confirm(self, context, instance, host_name,
99 failed_evacuation_instances, reserved_host=None): 138 failed_evacuation_instances, reserved_host=None):
100 vm_state = getattr(instance, "OS-EXT-STS:vm_state") 139 # Before locking the instance check whether it is already locked
101 if vm_state in ['active', 'error', 'resized', 'stopped']: 140 # by user, if yes don't lock the instance
102 141 instance_already_locked = self.novaclient.get_server(
103 # Before locking the instance check whether it is already locked 142 context, instance.id).locked
104 # by user, if yes don't lock the instance 143
105 instance_already_locked = self.novaclient.get_server( 144 if not instance_already_locked:
106 context, instance.id).locked 145 # lock the instance so that until evacuation and confirmation
107 146 # is not complete, user won't be able to perform any actions
108 if not instance_already_locked: 147 # on the instance.
109 # lock the instance so that until evacuation and confirmation 148 self.novaclient.lock_server(context, instance.id)
110 # is not complete, user won't be able to perform any actions 149
111 # on the instance. 150 def _wait_for_evacuation_confirmation():
112 self.novaclient.lock_server(context, instance.id) 151 old_vm_state, new_vm_state, instance_host = (
113 152 self._get_state_and_host_of_instance(context, instance))
114 def _wait_for_evacuation(): 153
115 new_instance = self.novaclient.get_server(context, instance.id) 154 if instance_host != host_name:
116 instance_host = getattr(new_instance, 155 if ((old_vm_state == 'error' and
117 "OS-EXT-SRV-ATTR:hypervisor_hostname") 156 new_vm_state == 'active') or
118 old_vm_state = getattr(instance, "OS-EXT-STS:vm_state") 157 old_vm_state == new_vm_state):
119 new_vm_state = getattr(new_instance, "OS-EXT-STS:vm_state") 158 raise loopingcall.LoopingCallDone()
120 159
121 if instance_host != host_name: 160 try:
122 if ((old_vm_state == 'error' and 161 vm_state = getattr(instance, "OS-EXT-STS:vm_state")
123 new_vm_state == 'active') or 162
124 old_vm_state == new_vm_state): 163 # Nova evacuates an instance only when vm_state is in active,
125 raise loopingcall.LoopingCallDone() 164 # stopped or error state. If an instance is in other than active,
165 # error and stopped vm_state, masakari resets the instance state
166 # to *error* so that the instance can be evacuated.
167 stop_instance = True
168 if vm_state not in ['active', 'error', 'stopped']:
169 self.novaclient.reset_instance_state(context, instance.id)
170 instance = self.novaclient.get_server(context, instance.id)
171 power_state = getattr(instance, "OS-EXT-STS:power_state")
172 if vm_state == 'resized' and power_state != SHUTDOWN:
173 stop_instance = False
174
175 vm_state = getattr(instance, "OS-EXT-STS:vm_state")
176
177 # evacuate the instance
178 self.novaclient.evacuate_instance(
179 context, instance.id,
180 target=reserved_host.name if reserved_host else None)
181
182 periodic_call = loopingcall.FixedIntervalLoopingCall(
183 _wait_for_evacuation_confirmation)
126 184
127 try: 185 try:
128 # Nova evacuates an instance only when vm_state is in active, 186 # add a timeout to the periodic call.
129 # stopped or error state. If an instance is in resized 187 periodic_call.start(interval=CONF.verify_interval)
130 # vm_state, masakari resets the instance state to *error* so 188 etimeout.with_timeout(
131 # that the instance can be evacuated. 189 CONF.wait_period_after_evacuation,
132 if vm_state == 'resized': 190 periodic_call.wait)
133 self.novaclient.reset_instance_state(context, instance.id) 191
134 192 if vm_state not in ['active', 'stopped']:
135 # evacuate the instance 193 if stop_instance:
136 self.novaclient.evacuate_instance( 194 self._stop_after_evacuation(context, instance)
137 context, instance.id, 195 # If the instance was in 'error' state before failure
138 target=reserved_host.name if reserved_host else None) 196 # it should be set to 'error' after recovery.
139 197 if vm_state == 'error':
140 periodic_call = loopingcall.FixedIntervalLoopingCall( 198 self.novaclient.reset_instance_state(
141 _wait_for_evacuation) 199 context, instance.id)
142 200 except etimeout.Timeout:
143 try: 201 # Instance is not evacuated in the expected time_limit.
144 # add a timeout to the periodic call.
145 periodic_call.start(interval=CONF.verify_interval)
146 etimeout.with_timeout(
147 CONF.wait_period_after_evacuation,
148 periodic_call.wait)
149 except etimeout.Timeout:
150 # Instance is not evacuated in the expected time_limit.
151 failed_evacuation_instances.append(instance.id)
152 finally:
153 # stop the periodic call, in case of exceptions or
154 # Timeout.
155 periodic_call.stop()
156 except Exception:
157 # Exception is raised while resetting instance state or
158 # evacuating the instance itself.
159 failed_evacuation_instances.append(instance.id) 202 failed_evacuation_instances.append(instance.id)
160 finally: 203 finally:
161 if not instance_already_locked: 204 # stop the periodic call, in case of exceptions or
162 # Unlock the server after evacuation and confirmation 205 # Timeout.
163 self.novaclient.unlock_server(context, instance.id) 206 periodic_call.stop()
207 except Exception:
208 # Exception is raised while resetting instance state or
209 # evacuating the instance itself.
210 failed_evacuation_instances.append(instance.id)
211 finally:
212 if not instance_already_locked:
213 # Unlock the server after evacuation and confirmation
214 self.novaclient.unlock_server(context, instance.id)
164 215
165 def execute(self, context, host_name, instance_list, reserved_host=None): 216 def execute(self, context, host_name, instance_list, reserved_host=None):
166 def _do_evacuate(context, host_name, instance_list, 217 def _do_evacuate(context, host_name, instance_list,
diff --git a/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py b/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py
index d17e243..45172bc 100644
--- a/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py
+++ b/masakari/tests/unit/engine/drivers/taskflow/test_host_failure_flow.py
@@ -18,6 +18,7 @@ Unit Tests for host failure TaskFlow
18""" 18"""
19import copy 19import copy
20 20
21import ddt
21import mock 22import mock
22 23
23from masakari.compute import nova 24from masakari.compute import nova
@@ -32,6 +33,7 @@ from masakari.tests.unit import fakes
32CONF = conf.CONF 33CONF = conf.CONF
33 34
34 35
36@ddt.ddt
35@mock.patch.object(nova.API, "enable_disable_service") 37@mock.patch.object(nova.API, "enable_disable_service")
36@mock.patch.object(nova.API, "lock_server") 38@mock.patch.object(nova.API, "lock_server")
37@mock.patch.object(nova.API, "unlock_server") 39@mock.patch.object(nova.API, "unlock_server")
@@ -55,8 +57,8 @@ class HostFailureTestCase(test.TestCase):
55 for server in self.novaclient.get_servers(self.ctxt, 57 for server in self.novaclient.get_servers(self.ctxt,
56 self.instance_host): 58 self.instance_host):
57 instance = self.novaclient.get_server(self.ctxt, server.id) 59 instance = self.novaclient.get_server(self.ctxt, server.id)
58 self.assertEqual('active', 60 self.assertIn(getattr(instance, 'OS-EXT-STS:vm_state'),
59 getattr(instance, 'OS-EXT-STS:vm_state')) 61 ['active', 'stopped', 'error'])
60 self.assertNotEqual(self.instance_host, 62 self.assertNotEqual(self.instance_host,
61 getattr(instance, 63 getattr(instance,
62 'OS-EXT-SRV-ATTR:hypervisor_hostname')) 64 'OS-EXT-SRV-ATTR:hypervisor_hostname'))
@@ -155,22 +157,47 @@ class HostFailureTestCase(test.TestCase):
155 self.assertIn(reserved_host.name, 157 self.assertIn(reserved_host.name,
156 self.fake_client.aggregates.get('1').hosts) 158 self.fake_client.aggregates.get('1').hosts)
157 159
160 @ddt.data('active', 'rescued', 'paused', 'shelved', 'suspended',
161 'error', 'stopped', 'resized')
158 @mock.patch('masakari.compute.nova.novaclient') 162 @mock.patch('masakari.compute.nova.novaclient')
159 def test_evacuate_instances_task(self, _mock_novaclient, mock_unlock, 163 def test_host_failure_flow_all_instances(
160 mock_lock, mock_enable_disable): 164 self, vm_state, _mock_novaclient, mock_unlock, mock_lock,
165 mock_enable_disable):
161 _mock_novaclient.return_value = self.fake_client 166 _mock_novaclient.return_value = self.fake_client
162 167
163 # create test data 168 # create ha_enabled test data
169 power_state = 4 if vm_state == 'resized' else None
164 self.fake_client.servers.create(id="1", host=self.instance_host, 170 self.fake_client.servers.create(id="1", host=self.instance_host,
165 vm_state="error", ha_enabled=True) 171 vm_state=vm_state,
172 power_state=power_state,
173 ha_enabled=True)
166 self.fake_client.servers.create(id="2", host=self.instance_host, 174 self.fake_client.servers.create(id="2", host=self.instance_host,
167 vm_state="error", ha_enabled=True) 175 vm_state=vm_state,
176 power_state=power_state,
177 ha_enabled=True)
178 instance_list = {
179 "instance_list": self.fake_client.servers.list()
180 }
168 181
169 # execute DisableComputeServiceTask 182 # execute EvacuateInstancesTask
170 self._test_disable_compute_service(mock_enable_disable) 183 self._evacuate_instances(instance_list, mock_enable_disable)
171 184
172 # execute PrepareHAEnabledInstancesTask 185 @mock.patch('masakari.compute.nova.novaclient')
173 instance_list = self._test_instance_list() 186 def test_host_failure_flow_all_instances_active_resized_instance(
187 self, _mock_novaclient, mock_unlock, mock_lock,
188 mock_enable_disable):
189 _mock_novaclient.return_value = self.fake_client
190
191 # create ha_enabled test data
192 self.fake_client.servers.create(id="1", host=self.instance_host,
193 vm_state='resized',
194 ha_enabled=True)
195 self.fake_client.servers.create(id="2", host=self.instance_host,
196 vm_state='resized',
197 ha_enabled=True)
198 instance_list = {
199 "instance_list": self.fake_client.servers.list()
200 }
174 201
175 # execute EvacuateInstancesTask 202 # execute EvacuateInstancesTask
176 self._evacuate_instances(instance_list, mock_enable_disable) 203 self._evacuate_instances(instance_list, mock_enable_disable)
@@ -220,66 +247,6 @@ class HostFailureTestCase(test.TestCase):
220 self._evacuate_instances, instance_list, mock_enable_disable) 247 self._evacuate_instances, instance_list, mock_enable_disable)
221 248
222 @mock.patch('masakari.compute.nova.novaclient') 249 @mock.patch('masakari.compute.nova.novaclient')
223 def test_host_failure_flow_resized_instance(
224 self, _mock_novaclient, mock_unlock, mock_lock,
225 mock_enable_disable):
226 _mock_novaclient.return_value = self.fake_client
227
228 # create ha_enabled test data
229 self.fake_client.servers.create(id="1", host=self.instance_host,
230 vm_state="resized",
231 ha_enabled=True)
232 self.fake_client.servers.create(id="2", host=self.instance_host,
233 vm_state="resized",
234 ha_enabled=True)
235 instance_list = {
236 "instance_list": self.fake_client.servers.list()
237 }
238
239 # execute EvacuateInstancesTask
240 self._evacuate_instances(instance_list, mock_enable_disable)
241
242 @mock.patch('masakari.compute.nova.novaclient')
243 def test_host_failure_flow_shutdown_instance(
244 self, _mock_novaclient, mock_unlock, mock_lock,
245 mock_enable_disable):
246 _mock_novaclient.return_value = self.fake_client
247
248 # create ha_enabled test data
249 self.fake_client.servers.create(id="1", host=self.instance_host,
250 vm_state="stopped",
251 ha_enabled=True)
252 self.fake_client.servers.create(id="2", host=self.instance_host,
253 vm_state="stopped",
254 ha_enabled=True)
255 instance_list = {
256 "instance_list": self.fake_client.servers.list()
257 }
258
259 # execute EvacuateInstancesTask
260 self._evacuate_instances(instance_list, mock_enable_disable)
261
262 @mock.patch('masakari.compute.nova.novaclient')
263 def test_host_failure_flow_instance_in_error(
264 self, _mock_novaclient, mock_unlock, mock_lock,
265 mock_enable_disable):
266 _mock_novaclient.return_value = self.fake_client
267
268 # create ha_enabled test data
269 self.fake_client.servers.create(id="1", host=self.instance_host,
270 vm_state="error",
271 ha_enabled=True)
272 self.fake_client.servers.create(id="2", host=self.instance_host,
273 vm_state="error",
274 ha_enabled=True)
275 instance_list = {
276 "instance_list": self.fake_client.servers.list()
277 }
278
279 # execute EvacuateInstancesTask
280 self._evacuate_instances(instance_list, mock_enable_disable)
281
282 @mock.patch('masakari.compute.nova.novaclient')
283 def test_host_failure_flow_no_instances_on_host( 250 def test_host_failure_flow_no_instances_on_host(
284 self, _mock_novaclient, mock_unlock, mock_lock, 251 self, _mock_novaclient, mock_unlock, mock_lock,
285 mock_enable_disable): 252 mock_enable_disable):
diff --git a/masakari/tests/unit/fakes.py b/masakari/tests/unit/fakes.py
index 4cc7897..7fd4907 100644
--- a/masakari/tests/unit/fakes.py
+++ b/masakari/tests/unit/fakes.py
@@ -24,12 +24,13 @@ NOW = timeutils.utcnow().replace(microsecond=0)
24class FakeNovaClient(object): 24class FakeNovaClient(object):
25 class Server(object): 25 class Server(object):
26 def __init__(self, id=None, uuid=None, host=None, vm_state=None, 26 def __init__(self, id=None, uuid=None, host=None, vm_state=None,
27 ha_enabled=None, locked=False): 27 power_state=1, ha_enabled=None, locked=False):
28 self.id = id 28 self.id = id
29 self.uuid = uuid or uuidutils.generate_uuid() 29 self.uuid = uuid or uuidutils.generate_uuid()
30 self.host = host 30 self.host = host
31 setattr(self, 'OS-EXT-SRV-ATTR:hypervisor_hostname', host) 31 setattr(self, 'OS-EXT-SRV-ATTR:hypervisor_hostname', host)
32 setattr(self, 'OS-EXT-STS:vm_state', vm_state) 32 setattr(self, 'OS-EXT-STS:vm_state', vm_state)
33 setattr(self, 'OS-EXT-STS:power_state', power_state)
33 self.metadata = {"HA_Enabled": ha_enabled} 34 self.metadata = {"HA_Enabled": ha_enabled}
34 self.locked = locked 35 self.locked = locked
35 36
@@ -38,9 +39,10 @@ class FakeNovaClient(object):
38 self._servers = [] 39 self._servers = []
39 40
40 def create(self, id, uuid=None, host=None, vm_state='active', 41 def create(self, id, uuid=None, host=None, vm_state='active',
41 ha_enabled=False): 42 power_state=1, ha_enabled=False):
42 server = FakeNovaClient.Server(id=id, uuid=uuid, host=host, 43 server = FakeNovaClient.Server(id=id, uuid=uuid, host=host,
43 vm_state=vm_state, 44 vm_state=vm_state,
45 power_state=power_state,
44 ha_enabled=ha_enabled) 46 ha_enabled=ha_enabled)
45 self._servers.append(server) 47 self._servers.append(server)
46 return server 48 return server
@@ -69,9 +71,12 @@ class FakeNovaClient(object):
69 if not host: 71 if not host:
70 host = 'fake-host-1' 72 host = 'fake-host-1'
71 server = self.get(uuid) 73 server = self.get(uuid)
72 # pretending that instance is evacuated successfully on given host
73 setattr(server, 'OS-EXT-SRV-ATTR:hypervisor_hostname', host) 74 setattr(server, 'OS-EXT-SRV-ATTR:hypervisor_hostname', host)
74 setattr(server, 'OS-EXT-STS:vm_state', 'active') 75 # pretending that instance is evacuated successfully on given host
76 if getattr(server, "OS-EXT-STS:vm_state") == 'active':
77 setattr(server, 'OS-EXT-STS:vm_state', 'active')
78 else:
79 setattr(server, 'OS-EXT-STS:vm_state', 'stopped')
75 80
76 def stop(self, id): 81 def stop(self, id):
77 server = self.get(id) 82 server = self.get(id)