Fix race condition on multinode
On a multinode environment, it is possible that multiple nodes try to create a docker network at the same time. In before, we didn't handle the race condition correctly so that there are still concurrent requests on creating the docker network. This commit refines the locking logic so that it can handle the race condition correctly. In particular, we let the first request to go through to create the network and let other nodes to retry later. Change-Id: Iaacc8285d2ea5170bcb57dfb449058359cd92c6b
This commit is contained in:
parent
6780d79574
commit
008a96f828
|
@ -114,6 +114,19 @@ class KuryrNetwork(network.Network):
|
|||
network_dict['name'] = name
|
||||
network_dict['neutron_net_id'] = neutron_net_id
|
||||
network = objects.Network(self.context, **network_dict)
|
||||
|
||||
for attempt in (1, 2, 3):
|
||||
LOG.debug("Attempt (%s) to create network: %s", attempt, network)
|
||||
created_network = self._create_network_attempt(
|
||||
network, options, ipam_options)
|
||||
if created_network:
|
||||
return created_network
|
||||
time.sleep(1)
|
||||
|
||||
raise exception.ZunException(_(
|
||||
"Cannot create docker network after several attempts %s"))
|
||||
|
||||
def _create_network_attempt(self, network, options, ipam_options):
|
||||
# The DB model has unique constraint on 'neutron_net_id' field
|
||||
# which will guarantee only one request can create the network in here
|
||||
# (and call docker.create_network later) if there are concurrent
|
||||
|
@ -121,20 +134,34 @@ class KuryrNetwork(network.Network):
|
|||
try:
|
||||
network.create(self.context)
|
||||
except exception.NetworkAlreadyExists as e:
|
||||
if e.field == 'neutron_net_id':
|
||||
network = objects.Network.list(
|
||||
self.context,
|
||||
filters={'neutron_net_id': network.neutron_net_id})[0]
|
||||
else:
|
||||
if e.field != 'neutron_net_id':
|
||||
raise
|
||||
|
||||
networks = objects.Network.list(
|
||||
self.context,
|
||||
filters={'neutron_net_id': network.neutron_net_id})
|
||||
docker_networks = self.list_networks(names=[network.name])
|
||||
if (networks and networks[0].network_id and
|
||||
docker_networks and
|
||||
networks[0].network_id == docker_networks[0]['Id']):
|
||||
LOG.debug("Network (%s) has already been created in docker",
|
||||
network.name)
|
||||
return networks[0]
|
||||
else:
|
||||
# Probably, there are concurrent requests on creating the
|
||||
# network but the network is yet created in Docker.
|
||||
# We return False and let the caller retry.
|
||||
return False
|
||||
|
||||
LOG.debug("Calling docker.create_network to create network %s, "
|
||||
"ipam_options %s, options %s", name, ipam_options, options)
|
||||
"ipam_options %s, options %s",
|
||||
network.name, ipam_options, options)
|
||||
enable_ipv6 = bool(options.get('neutron.subnet.v6.uuid'))
|
||||
try:
|
||||
docker_network = self.docker.create_network(
|
||||
name=name,
|
||||
name=network.name,
|
||||
driver=CONF.network.driver_name,
|
||||
enable_ipv6=True if v6_subnet else False,
|
||||
enable_ipv6=enable_ipv6,
|
||||
options=options,
|
||||
ipam=ipam_options)
|
||||
except Exception:
|
||||
|
|
|
@ -203,26 +203,20 @@ class KuryrNetworkTestCase(base.TestCase):
|
|||
mock_neutron_api_cls.return_value = self.network_api.neutron_api
|
||||
name = 'test_kuryr_network'
|
||||
neutron_net_id = 'fake-net-id'
|
||||
docker_net_id = 'docker-net'
|
||||
fake_network = mock.Mock()
|
||||
fake_network.network_id = docker_net_id
|
||||
mock_list.return_value = [fake_network]
|
||||
mock_create.side_effect = exception.NetworkAlreadyExists(
|
||||
field='neutron_net_id', value=neutron_net_id)
|
||||
with mock.patch.object(self.network_api.docker, 'create_network',
|
||||
return_value={'Id': 'docker-net'}
|
||||
) as mock_create_network:
|
||||
with mock.patch.object(self.network_api.docker, 'networks',
|
||||
return_value=[{'Id': docker_net_id}]
|
||||
) as mock_list_network:
|
||||
network = self.network_api.create_network(name, neutron_net_id)
|
||||
self.assertEqual('docker-net', network.network_id)
|
||||
self.assertEqual(docker_net_id, network.network_id)
|
||||
mock_list.assert_called_once_with(
|
||||
self.context, filters={'neutron_net_id': neutron_net_id})
|
||||
mock_create_network.assert_called_once_with(
|
||||
name=name,
|
||||
driver='kuryr',
|
||||
enable_ipv6=False,
|
||||
ipam={'Config': [{'Subnet': '10.5.0.0/16', 'Gateway': '10.5.0.1'}],
|
||||
'Driver': 'kuryr',
|
||||
'Options': {'neutron.net.shared': 'False',
|
||||
'neutron.subnet.uuid': 'fake-subnet-id'}},
|
||||
options={'neutron.net.uuid': 'fake-net-id',
|
||||
'neutron.net.shared': 'False',
|
||||
'neutron.subnet.uuid': 'fake-subnet-id'})
|
||||
mock_list_network.assert_called_once_with(names=[name])
|
||||
|
||||
def test_remove_network(self):
|
||||
network_name = 'c02afe4e-8350-4263-8078'
|
||||
|
|
Loading…
Reference in New Issue