Fix race condition bug during live_snapshot

During live_snapshot creation, when nova starts block_rebase
operation in libvirt there is possibility that block_job is
not yet started and libvirt blockJobInfo method will return
status.end = 0 and status.cur = 0. In such case libvirt driver
does not wait to finish block rebase operation and that causes
a problem because created snapshot is corrupted.

This patch adds check if status.end != 0 to return information
that job is already finished.

Change-Id: I45ac06eae0b1949f746dae305469718649bfcf23
Closes-Bug: #1530275
This commit is contained in:
Sławek Kapłoński 2016-08-31 20:28:36 +00:00
parent dd44096a04
commit 6b20239a5d
3 changed files with 22 additions and 5 deletions

View File

@ -14284,7 +14284,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
mock_dom.XMLDesc.return_value = xmldoc
mock_dom.isPersistent.return_value = True
mock_dom.blockJobInfo.return_value = {}
mock_dom.blockJobInfo.return_value = {'cur': 100, 'end': 100}
drvr._swap_volume(guest, srcfile, dstfile, 1)
@ -17557,7 +17557,8 @@ class LibvirtVolumeSnapshotTestCase(test.NoDBTestCase):
flags=fakelibvirt.VIR_DOMAIN_BLOCK_COMMIT_RELATIVE)
domain.blockJobInfo('vda', flags=0).AndReturn({'cur': 1, 'end': 1000})
domain.blockJobInfo('vda', flags=0).AndReturn({})
domain.blockJobInfo('vda', flags=0).AndReturn({'cur': 1000,
'end': 1000})
self.mox.ReplayAll()

View File

@ -622,7 +622,16 @@ class GuestBlockTestCase(test.NoDBTestCase):
'vda', "foo", "top", 0,
flags=fakelibvirt.VIR_DOMAIN_BLOCK_COMMIT_RELATIVE)
def test_wait_for_job(self):
def test_wait_for_job_cur_end_zeros(self):
self.domain.blockJobInfo.return_value = {
"type": 4,
"bandwidth": 18,
"cur": 0,
"end": 0}
in_progress = self.gblock.wait_for_job()
self.assertTrue(in_progress)
def test_wait_for_job_current_lower_than_end(self):
self.domain.blockJobInfo.return_value = {
"type": 4,
"bandwidth": 18,
@ -631,6 +640,7 @@ class GuestBlockTestCase(test.NoDBTestCase):
in_progress = self.gblock.wait_for_job()
self.assertTrue(in_progress)
def test_wait_for_job_finished(self):
self.domain.blockJobInfo.return_value = {
"type": 4,
"bandwidth": 18,
@ -639,6 +649,7 @@ class GuestBlockTestCase(test.NoDBTestCase):
in_progress = self.gblock.wait_for_job()
self.assertFalse(in_progress)
def test_wait_for_job_clean(self):
self.domain.blockJobInfo.return_value = {"type": 0}
in_progress = self.gblock.wait_for_job(wait_for_job_clean=True)
self.assertFalse(in_progress)

View File

@ -695,11 +695,13 @@ class BlockDevice(object):
Libvirt may return either cur==end or an empty dict when
the job is complete, depending on whether the job has been
cleaned up by libvirt yet, or not.
It can also return end=0 if qemu has not yet started the block
operation.
:param abort_on_error: Whether to stop process and raise NovaException
on error (default: False)
:param wait_for_job_clean: Whether to force wait to ensure job is
finished (see bug: LP#1119173)
finished (see bug: RH Bugzilla#1119173)
:returns: True if still in progress
False if completed
@ -714,7 +716,10 @@ class BlockDevice(object):
if wait_for_job_clean:
job_ended = status.job == 0
else:
job_ended = status.cur == status.end
# NOTE(slaweq): because of bug in libvirt, which is described in
# http://www.redhat.com/archives/libvir-list/2016-September/msg00017.html
# if status.end == 0 job is not started yet so it is not finished
job_ended = status.end != 0 and status.cur == status.end
return not job_ended