Fix race condition on multinode

On a multinode environment, it is possible that multiple nodes
try to create a docker network at the same time. In before,
we didn't handle the race condition correctly so that
there are still concurrent requests on creating the docker
network. This commit refines the locking logic so that
it can handle the race condition correctly. In particular,
we let the first request to go through to create
the network and let other nodes to retry later.

Change-Id: Iaacc8285d2ea5170bcb57dfb449058359cd92c6b
This commit is contained in:
Hongbin Lu 2018-11-04 17:46:53 +00:00
parent 6780d79574
commit 008a96f828
2 changed files with 44 additions and 23 deletions

View File

@ -114,6 +114,19 @@ class KuryrNetwork(network.Network):
network_dict['name'] = name
network_dict['neutron_net_id'] = neutron_net_id
network = objects.Network(self.context, **network_dict)
for attempt in (1, 2, 3):
LOG.debug("Attempt (%s) to create network: %s", attempt, network)
created_network = self._create_network_attempt(
network, options, ipam_options)
if created_network:
return created_network
time.sleep(1)
raise exception.ZunException(_(
"Cannot create docker network after several attempts %s"))
def _create_network_attempt(self, network, options, ipam_options):
# The DB model has unique constraint on 'neutron_net_id' field
# which will guarantee only one request can create the network in here
# (and call docker.create_network later) if there are concurrent
@ -121,20 +134,34 @@ class KuryrNetwork(network.Network):
try:
network.create(self.context)
except exception.NetworkAlreadyExists as e:
if e.field == 'neutron_net_id':
network = objects.Network.list(
self.context,
filters={'neutron_net_id': network.neutron_net_id})[0]
else:
if e.field != 'neutron_net_id':
raise
networks = objects.Network.list(
self.context,
filters={'neutron_net_id': network.neutron_net_id})
docker_networks = self.list_networks(names=[network.name])
if (networks and networks[0].network_id and
docker_networks and
networks[0].network_id == docker_networks[0]['Id']):
LOG.debug("Network (%s) has already been created in docker",
network.name)
return networks[0]
else:
# Probably, there are concurrent requests on creating the
# network but the network is yet created in Docker.
# We return False and let the caller retry.
return False
LOG.debug("Calling docker.create_network to create network %s, "
"ipam_options %s, options %s", name, ipam_options, options)
"ipam_options %s, options %s",
network.name, ipam_options, options)
enable_ipv6 = bool(options.get('neutron.subnet.v6.uuid'))
try:
docker_network = self.docker.create_network(
name=name,
name=network.name,
driver=CONF.network.driver_name,
enable_ipv6=True if v6_subnet else False,
enable_ipv6=enable_ipv6,
options=options,
ipam=ipam_options)
except Exception:

View File

@ -203,26 +203,20 @@ class KuryrNetworkTestCase(base.TestCase):
mock_neutron_api_cls.return_value = self.network_api.neutron_api
name = 'test_kuryr_network'
neutron_net_id = 'fake-net-id'
docker_net_id = 'docker-net'
fake_network = mock.Mock()
fake_network.network_id = docker_net_id
mock_list.return_value = [fake_network]
mock_create.side_effect = exception.NetworkAlreadyExists(
field='neutron_net_id', value=neutron_net_id)
with mock.patch.object(self.network_api.docker, 'create_network',
return_value={'Id': 'docker-net'}
) as mock_create_network:
with mock.patch.object(self.network_api.docker, 'networks',
return_value=[{'Id': docker_net_id}]
) as mock_list_network:
network = self.network_api.create_network(name, neutron_net_id)
self.assertEqual('docker-net', network.network_id)
self.assertEqual(docker_net_id, network.network_id)
mock_list.assert_called_once_with(
self.context, filters={'neutron_net_id': neutron_net_id})
mock_create_network.assert_called_once_with(
name=name,
driver='kuryr',
enable_ipv6=False,
ipam={'Config': [{'Subnet': '10.5.0.0/16', 'Gateway': '10.5.0.1'}],
'Driver': 'kuryr',
'Options': {'neutron.net.shared': 'False',
'neutron.subnet.uuid': 'fake-subnet-id'}},
options={'neutron.net.uuid': 'fake-net-id',
'neutron.net.shared': 'False',
'neutron.subnet.uuid': 'fake-subnet-id'})
mock_list_network.assert_called_once_with(names=[name])
def test_remove_network(self):
network_name = 'c02afe4e-8350-4263-8078'