Set instance host field after resource claim

Set the 'host' field on the instance after the resource tracker
on the compute node has accepted the build.

The field is set after resources are confirmed to be available while the
COMPUTE_RESOURCES_SEMAPHORE is held.  The semaphore ensures the
resources usage values will be consistent even if the
update_available_resource periodic task audit runs.

bug 1060255

(cherry picked from commit 5fd7a9dba1)

Also includes the following bugfix to the original patch:

Set host prior to allocating network information.

Make sure 'host' field on the instance is set before allocating
network information.

bug 1065004

(cherry picked from commit 2649f14673)
Change-Id: I92105ec14924960ac8ef7ca8c810783085314e10
This commit is contained in:
Brian Elliott 2012-10-08 18:35:14 +00:00 committed by Vishvananda Ishaya
parent 197398fdf6
commit 284f6ea64b
8 changed files with 60 additions and 34 deletions

View File

@ -479,17 +479,14 @@ class ComputeManager(manager.SchedulerDependentManager):
self._notify_about_instance_usage(
context, instance, "create.start",
extra_usage_info=extra_usage_info)
network_info = self._allocate_network(context, instance,
requested_networks)
network_info = None
try:
limits = filter_properties.get('limits', {})
with self.resource_tracker.resource_claim(context, instance,
limits):
# Resources are available to build this instance here,
# mark it as belonging to this host:
self._instance_update(context, instance['uuid'],
host=self.host, launched_on=self.host)
network_info = self._allocate_network(context, instance,
requested_networks)
block_device_info = self._prep_block_device(context,
instance)
instance = self._spawn(context, instance, image_meta,
@ -534,7 +531,7 @@ class ComputeManager(manager.SchedulerDependentManager):
try:
self._deallocate_network(context, instance)
except Exception:
# do not attempt retry if network de-allocation occurs:
# do not attempt retry if network de-allocation failed:
_log_original_error()
raise

View File

@ -23,6 +23,7 @@ from nova.compute import vm_states
from nova import db
from nova import exception
from nova import flags
from nova import notifications
from nova.openstack.common import cfg
from nova.openstack.common import importutils
from nova.openstack.common import jsonutils
@ -152,8 +153,18 @@ class ResourceTracker(object):
failed.
"""
if self.disabled:
# compute_driver doesn't support resource tracking, just
# set the 'host' field and continue the build:
instance_ref = self._set_instance_host(context,
instance_ref['uuid'])
return
# sanity check:
if instance_ref['host']:
LOG.warning(_("Host field should be not be set on the instance "
"until resources have been claimed."),
instance=instance_ref)
if not limits:
limits = {}
@ -184,6 +195,8 @@ class ResourceTracker(object):
if not self._can_claim_cpu(vcpus, vcpu_limit):
return
instance_ref = self._set_instance_host(context, instance_ref['uuid'])
# keep track of this claim until we know whether the compute operation
# was successful/completed:
claim = Claim(instance_ref, timeout)
@ -196,6 +209,17 @@ class ResourceTracker(object):
self._update(context, self.compute_node)
return claim
def _set_instance_host(self, context, instance_uuid):
"""Tag the instance as belonging to this host. This should be done
while the COMPUTE_RESOURCES_SEMPAHORE is being held so the resource
claim will not be lost if the audit process starts.
"""
values = {'host': self.host, 'launched_on': self.host}
(old_ref, instance_ref) = db.instance_update_and_get_original(context,
instance_uuid, values)
notifications.send_update(context, old_ref, instance_ref)
return instance_ref
def _can_claim_memory(self, memory_mb, memory_mb_limit):
"""Test if memory needed for a claim can be safely allocated"""
# Installed memory and usage info:

View File

@ -68,7 +68,7 @@ class ChanceScheduler(driver.Scheduler):
host = self._schedule(context, 'compute', request_spec,
filter_properties)
updated_instance = driver.instance_update_db(context,
instance_uuid, host)
instance_uuid)
self.compute_rpcapi.run_instance(context,
instance=updated_instance, host=host,
requested_networks=requested_networks,

View File

@ -101,13 +101,13 @@ def cast_to_volume_host(context, host, method, **kwargs):
LOG.debug(_("Casted '%(method)s' to volume '%(host)s'") % locals())
def instance_update_db(context, instance_uuid, host):
'''Set the host and scheduled_at fields of an Instance.
def instance_update_db(context, instance_uuid):
'''Clear the host and set the scheduled_at field of an Instance.
:returns: An Instance with the updated fields set properly.
'''
now = timeutils.utcnow()
values = {'host': host, 'scheduled_at': now}
values = {'host': None, 'scheduled_at': now}
return db.instance_update(context, instance_uuid, values)
@ -116,7 +116,7 @@ def cast_to_compute_host(context, host, method, **kwargs):
instance_uuid = kwargs.get('instance_uuid', None)
if instance_uuid:
instance_update_db(context, instance_uuid, host)
instance_update_db(context, instance_uuid)
rpc.cast(context,
rpc.queue_get_for(context, 'compute', host),

View File

@ -142,8 +142,7 @@ class FilterScheduler(driver.Scheduler):
'scheduler.run_instance.scheduled', notifier.INFO,
payload)
updated_instance = driver.instance_update_db(context,
instance_uuid, weighted_host.host_state.host)
updated_instance = driver.instance_update_db(context, instance_uuid)
self.compute_rpcapi.run_instance(context, instance=updated_instance,
host=weighted_host.host_state.host,

View File

@ -22,6 +22,7 @@ import uuid
from nova.compute import resource_tracker
from nova.compute import task_states
from nova.compute import vm_states
from nova import context
from nova import db
from nova import exception
from nova.openstack.common import log as logging
@ -32,14 +33,6 @@ from nova.virt import driver
LOG = logging.getLogger(__name__)
class FakeContext(object):
def __init__(self, is_admin=False):
self.is_admin = is_admin
def elevated(self):
return FakeContext(is_admin=True)
class UnsupportedVirtDriver(driver.ComputeDriver):
"""Pretend version of a lame virt driver"""
def get_available_resource(self):
@ -81,11 +74,13 @@ class BaseTestCase(test.TestCase):
self.flags(reserved_host_disk_mb=0,
reserved_host_memory_mb=0)
self.context = FakeContext()
self.context = context.RequestContext('fake', 'fake')
self._instances = []
self._instances = {}
self.stubs.Set(db, 'instance_get_all_by_host',
lambda c, h: self._instances)
lambda c, h: self._instances.values())
self.stubs.Set(db, 'instance_update_and_get_original',
self._fake_instance_update_and_get_original)
def _create_compute_node(self, values=None):
compute = {
@ -122,8 +117,10 @@ class BaseTestCase(test.TestCase):
return service
def _fake_instance(self, *args, **kwargs):
instance_uuid = str(uuid.uuid1())
instance = {
'uuid': str(uuid.uuid1()),
'uuid': instance_uuid,
'vm_state': vm_states.BUILDING,
'task_state': None,
'memory_mb': 2,
@ -136,9 +133,17 @@ class BaseTestCase(test.TestCase):
}
instance.update(kwargs)
self._instances.append(instance)
self._instances[instance_uuid] = instance
return instance
def _fake_instance_update_and_get_original(self, context, instance_uuid,
values):
instance = self._instances[instance_uuid]
instance.update(values)
# the test doesn't care what the original instance values are, it's
# only used in the subsequent notification:
return (instance, instance)
def _tracker(self, unsupported=False):
host = "fakehost"
@ -168,7 +173,8 @@ class UnsupportedDriverTestCase(BaseTestCase):
def testDisabledClaim(self):
# basic claim:
claim = self.tracker.begin_resource_claim(self.context, 1, 1)
instance = self._fake_instance()
claim = self.tracker.begin_resource_claim(self.context, instance)
self.assertEqual(None, claim)
def testDisabledInstanceClaim(self):
@ -200,7 +206,7 @@ class UnsupportedDriverTestCase(BaseTestCase):
class MissingServiceTestCase(BaseTestCase):
def setUp(self):
super(MissingServiceTestCase, self).setUp()
self.context = FakeContext(is_admin=True)
self.context = context.get_admin_context()
self.tracker = self._tracker()
def testMissingService(self):

View File

@ -90,8 +90,8 @@ class ChanceSchedulerTestCase(test_scheduler.SchedulerTestCase):
self.driver.hosts_up(ctxt_elevated, 'compute').AndReturn(
['host1', 'host2', 'host3', 'host4'])
random.random().AndReturn(.5)
driver.instance_update_db(ctxt, instance1['uuid'],
'host3').WithSideEffects(inc_launch_index).AndReturn(instance1)
driver.instance_update_db(ctxt, instance1['uuid']).WithSideEffects(
inc_launch_index).AndReturn(instance1)
compute_rpcapi.ComputeAPI.run_instance(ctxt, host='host3',
instance=instance1, requested_networks=None,
injected_files=None, admin_password=None, is_first_time=None,
@ -102,8 +102,8 @@ class ChanceSchedulerTestCase(test_scheduler.SchedulerTestCase):
self.driver.hosts_up(ctxt_elevated, 'compute').AndReturn(
['host1', 'host2', 'host3', 'host4'])
random.random().AndReturn(.2)
driver.instance_update_db(ctxt, instance2['uuid'],
'host1').WithSideEffects(inc_launch_index).AndReturn(instance2)
driver.instance_update_db(ctxt, instance2['uuid']).WithSideEffects(
inc_launch_index).AndReturn(instance2)
compute_rpcapi.ComputeAPI.run_instance(ctxt, host='host1',
instance=instance2, requested_networks=None,
injected_files=None, admin_password=None, is_first_time=None,

View File

@ -712,7 +712,7 @@ class SchedulerDriverModuleTestCase(test.TestCase):
timeutils.utcnow().AndReturn('fake-now')
db.instance_update(self.context, 'fake_uuid',
{'host': host, 'scheduled_at': 'fake-now'})
{'host': None, 'scheduled_at': 'fake-now'})
rpc.queue_get_for(self.context, 'compute', host).AndReturn(queue)
rpc.cast(self.context, queue,
{'method': method,