Make virt drivers use a consistent hostname

Right now, the virt drivers will happily return the hostname
of a system, even if it has changed since the compute service has
started. The problem with this is that DNS issues can cause the
service to constantly delete and re-create itself in the database
leading to flaky scheduling without the operator knowing.

This patch makes them cache the hostname at the first call, and
log a visible error message if the hostname changes during the
lifetime of the process so that the admin can see what is
going on.

NOTE: This excludes hyperv and vmwareapi until further study of
if this is appropriate to do for those drivers as well, as they
can manage multiple systems behind a single compute host.

Closes-bug: 1224982
Change-Id: I4ef64f9715ff117f50120846d8b43ee7183a0b42
This commit is contained in:
Dan Smith 2013-09-13 14:21:14 -07:00
parent 8e0d9dacfd
commit 277c07b605
8 changed files with 108 additions and 6 deletions

View File

@ -15,6 +15,8 @@
# License for the specific language governing permissions and limitations
# under the License.
import socket
from nova import test
from nova.tests import utils
import nova.tests.virt.docker.mock_client
@ -59,3 +61,13 @@ class DockerDriverTestCase(_VirtDriverTestCase, test.TestCase):
self.connection.spawn(self.ctxt, instance_ref, image_info,
[], 'herp', network_info=network_info)
return instance_ref, network_info
def test_get_host_stats(self):
self.mox.StubOutWithMock(socket, 'gethostname')
socket.gethostname().AndReturn('foo')
socket.gethostname().AndReturn('bar')
self.mox.ReplayAll()
self.assertEqual('foo',
self.connection.get_host_stats()['host_hostname'])
self.assertEqual('foo',
self.connection.get_host_stats()['host_hostname'])

View File

@ -5122,6 +5122,24 @@ class LibvirtConnTestCase(test.TestCase):
ephemerals, swap,
block_device_mapping)
def test_hypervisor_hostname_caching(self):
# Make sure that the first hostname is always returned
class FakeConn(object):
def getHostname(self):
pass
def getLibVersion(self):
return 99999
conn = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
conn._wrapped_conn = FakeConn()
self.mox.StubOutWithMock(conn._wrapped_conn, 'getHostname')
conn._conn.getHostname().AndReturn('foo')
conn._conn.getHostname().AndReturn('bar')
self.mox.ReplayAll()
self.assertEqual('foo', conn.get_hypervisor_hostname())
self.assertEqual('foo', conn.get_hypervisor_hostname())
class HostStateTestCase(test.TestCase):

View File

@ -1212,3 +1212,13 @@ class IVMOperatorTestCase(test.TestCase):
self.assertRaises(n_exc.InvalidParameterValue,
self.ivm_operator._poll_for_lpar_status,
'fake', 'bad-lpar-state-value', 'test')
def test_get_hostname_returns_cached(self):
self.mox.StubOutWithMock(self.ivm_operator, 'run_vios_command')
self.ivm_operator.run_vios_command(self.ivm_operator.command.hostname()
).AndReturn(('foo', None))
self.ivm_operator.run_vios_command(self.ivm_operator.command.hostname()
).AndReturn(('bar', None))
self.mox.ReplayAll()
self.assertEqual('foo', self.ivm_operator.get_hostname())
self.assertEqual('foo', self.ivm_operator.get_hostname())

View File

@ -2087,6 +2087,38 @@ class XenAPIHostTestCase(stubs.XenAPITestBase):
stats = self.conn.get_host_stats()
self.assertEquals("SOMERETURNVALUE", stats['supported_instances'])
def test_update_stats_caches_hostname(self):
self.mox.StubOutWithMock(host, 'call_xenhost')
self.mox.StubOutWithMock(vm_utils, 'safe_find_sr')
self.mox.StubOutWithMock(self.conn._session, 'call_xenapi')
data = {'disk_total': 0,
'disk_used': 0,
'disk_available': 0,
'supported_instances': 0,
'host_capabilities': [],
'host_hostname': 'foo',
}
sr_rec = {
'physical_size': 0,
'physical_utilisation': 0,
}
for i in range(3):
host.call_xenhost(mox.IgnoreArg(), 'host_data', {}).AndReturn(data)
vm_utils.safe_find_sr(self.conn._session).AndReturn(None)
self.conn._session.call_xenapi('SR.scan', None)
self.conn._session.call_xenapi('SR.get_record', None).AndReturn(
sr_rec)
if i == 2:
# On the third call (the second below) change the hostname
data = dict(data, host_hostname='bar')
self.mox.ReplayAll()
stats = self.conn.get_host_stats(refresh=True)
self.assertEqual('foo', stats['hypervisor_hostname'])
stats = self.conn.get_host_stats(refresh=True)
self.assertEqual('foo', stats['hypervisor_hostname'])
class ToSupportedInstancesTestCase(test.TestCase):
def test_default_return_value(self):

View File

@ -131,12 +131,20 @@ class DockerDriver(driver.ComputeDriver):
memory = hostinfo.get_memory_usage()
disk = hostinfo.get_disk_usage()
stats = self.get_available_resource(hostname)
stats['hypervisor_hostname'] = hostname
stats['host_hostname'] = hostname
stats['host_name_label'] = hostname
stats['hypervisor_hostname'] = stats['hypervisor_hostname']
stats['host_hostname'] = stats['hypervisor_hostname']
stats['host_name_label'] = stats['hypervisor_hostname']
return stats
def get_available_resource(self, nodename):
if not hasattr(self, '_nodename'):
self._nodename = nodename
if nodename != self._nodename:
LOG.error(_('Hostname has changed from %(old)s to %(new)s. '
'A restart is required to take effect.'
) % {'old': self._nodename,
'new': nodename})
memory = hostinfo.get_memory_usage()
disk = hostinfo.get_disk_usage()
stats = {
@ -149,7 +157,7 @@ class DockerDriver(driver.ComputeDriver):
'disk_available_least': disk['available'] / (1024 ** 3),
'hypervisor_type': 'docker',
'hypervisor_version': '1.0',
'hypervisor_hostname': nodename,
'hypervisor_hostname': self._nodename,
'cpu_info': '?',
'supported_instances': jsonutils.dumps([
('i686', 'docker', 'lxc'),

View File

@ -3471,7 +3471,15 @@ class LibvirtDriver(driver.ComputeDriver):
def get_hypervisor_hostname(self):
"""Returns the hostname of the hypervisor."""
return self._conn.getHostname()
hostname = self._conn.getHostname()
if not hasattr(self, '_hypervisor_hostname'):
self._hypervisor_hostname = hostname
elif hostname != self._hypervisor_hostname:
LOG.error(_('Hostname has changed from %(old)s '
'to %(new)s. A restart is required to take effect.'
) % {'old': self._hypervisor_hostname,
'new': hostname})
return self._hypervisor_hostname
def get_instance_capabilities(self):
"""Get hypervisor instance capabilities

View File

@ -632,7 +632,14 @@ class BaseOperator(object):
:returns: string -- hostname
"""
output = self.run_vios_command(self.command.hostname())
return output[0]
hostname = output[0]
if not hasattr(self, '_hostname'):
self._hostname = hostname
elif hostname != self._hostname:
LOG.error(_('Hostname has changed from %(old)s to %(new)s. '
'A restart is required to take effect.'
) % {'old': self._hostname, 'new': hostname})
return self._hostname
def get_disk_name_by_vhost(self, vhost):
"""Returns the disk name attached to a vhost.

View File

@ -185,6 +185,13 @@ class HostState(object):
data["host_memory_free_computed"] = host_memory.get(
'free-computed', 0)
del data['host_memory']
if (data['host_hostname'] !=
self._stats.get('host_hostname', data['host_hostname'])):
LOG.error(_('Hostname has changed from %(old)s '
'to %(new)s. A restart is required to take effect.'
) % {'old': self._stats['host_hostname'],
'new': data['host_hostname']})
data['host_hostname'] = self._stats['host_hostname']
data['hypervisor_hostname'] = data['host_hostname']
self._stats = data