Set node to ERROR status if IP doesn't match Admin networks

When a node group or cluster is deleted there can be some nodes in bootstrap
which have IPs corresponding to those deleted node groups. They cannot be provisioned
as dnsmasq configuration does not contain info about those networks anymore.
Such nodes should be marked so that user can understand they are not usable.
The best thing user can do in this situation is to reboot the nodes.

Co-Authored-By: Aleksei Kasatkin <akasatkin@mirantis.com>

Partial-Bug: #1495593

Change-Id: I402225fac73d254826479139cbf91ed03f037f95
This commit is contained in:
=Nikita Koshikov 2015-09-18 18:09:07 -05:00 committed by Aleksey Kasatkin
parent 0d84bb56d2
commit 477f52f2e5
10 changed files with 328 additions and 53 deletions

View File

@ -24,6 +24,7 @@ from nailgun.api.v1.validators.json_schema import networks
from nailgun import consts
from nailgun.db import db
from nailgun.db.sqlalchemy.models import Cluster
from nailgun.db.sqlalchemy.models import IPAddrRange
from nailgun.db.sqlalchemy.models import NetworkGroup
from nailgun.db.sqlalchemy.models import Node
from nailgun.db.sqlalchemy.models import NodeGroup
@ -51,7 +52,22 @@ class NetworkConfigurationValidator(BasicValidator):
return data
@classmethod
def validate_network_group(cls, ng_data, ng_db, cluster):
def validate_network_group(cls, ng_data, ng_db, cluster,
admin_ranges=None):
"""Validate new settings of network group.
Validate new settings of network group for consistency and check
that nodes' IPs will not be out of new IP ranges (for all nodes vs
Admin networks and for deployed nodes vs all networks).
:param ng_data: new parameters of network group (dict)
:param ng_db: network group instance
:param cluster: cluster instance
:param admin_ranges: IP ranges for Admin networks are collected here
if admin_ranges is not None (to use by other code)
:return: ng_data
:raises: errors.InvalidData
"""
cidr = ng_data.get('cidr', ng_db.cidr)
ip_ranges = ng_data.get(
'ip_ranges',
@ -86,22 +102,50 @@ class NetworkConfigurationValidator(BasicValidator):
"No CIDR was specified for network "
"{0}".format(ng_db.id))
nm = objects.Cluster.get_network_manager(cluster)
if ng_db.name == consts.NETWORKS.fuelweb_admin and \
cls._check_for_ip_conflicts(
ng_data, ng_db, nm, notation, True):
ranges = cls._get_network_ip_ranges(ng_data, ng_db, notation,
gateway is not None)
is_admin_network = ng_db.name == consts.NETWORKS.fuelweb_admin
if is_admin_network:
if admin_ranges is not None:
admin_ranges[ng_db.id] = ranges
if (is_admin_network or cluster.is_locked) and \
cls._check_ips_out_of_ip_ranges(ng_db, nm, ranges):
raise errors.InvalidData(
"New IP ranges for network '{0}'({1}) conflict "
"with nodes' IPs.".format(
ng_data['name'], ng_data['id']))
elif cluster.is_locked and cls._check_for_ip_conflicts(
ng_data, ng_db, nm, notation, use_gateway):
raise errors.InvalidData(
"New IP ranges for network '{0}'({1}) conflict "
"with already allocated IPs.".format(
"New IP ranges for network '{0}'({1}) do not cover "
"already allocated IPs.".format(
ng_data['name'], ng_data['id']))
return ng_data
@classmethod
def validate_admin_networks_update(cls, cluster, admin_ranges):
"""Check new Admin IP ranges VS nodes outside clusters.
:param cluster: cluster instance
:param admin_ranges: new IP ranges for Admin networks
:return: None
:raises: errors.InvalidData
"""
admin_ranges_db = list(db().query(
IPAddrRange.first,
IPAddrRange.last
).join(
NetworkGroup
).filter(
NetworkGroup.name == consts.NETWORKS.fuelweb_admin,
NetworkGroup.id.notin_(admin_ranges.keys())
).all())
for r in admin_ranges.values():
admin_ranges_db.extend(r)
nodes = db().query(Node.ip).filter(Node.cluster_id.is_(None))
node_ips = [x[0] for x in nodes]
nm = objects.Cluster.get_network_manager(cluster)
if not nm.check_ips_belong_to_ranges(node_ips, admin_ranges_db):
raise errors.InvalidData(
"New IP ranges for Admin networks conflict with bootstrap "
"nodes' IPs.")
@classmethod
def validate_networks_update(cls, data, cluster):
cls.validate_schema(data, networks.NETWORK_GROUPS)
@ -121,20 +165,18 @@ class NetworkConfigurationValidator(BasicValidator):
)
)
admin_ranges = {}
for network in data['networks']:
# admin_ranges are collected using validate_network_group()
# to avoid duplicated DB queries.
cls.validate_network_group(
network, ng_db_by_id[network['id']], cluster)
network, ng_db_by_id[network['id']], cluster, admin_ranges)
cls.validate_admin_networks_update(cluster, admin_ranges)
return data
@classmethod
def _check_for_ip_conflicts(cls, network, ng_db, nm, notation,
use_gateway):
"""This method checks if any of already allocated IPs
will be out of all ip-ranges after networks update.
"""
ips = nm.get_assigned_ips_by_network_id(network['id'])
def _get_network_ip_ranges(cls, network, ng_db, notation, use_gateway):
ranges = []
if notation == consts.NETWORK_NOTATION.ip_ranges:
ranges = network.get('ip_ranges',
@ -144,19 +186,34 @@ class NetworkConfigurationValidator(BasicValidator):
ip_network = IPNetwork(cidr)
first_index = 2 if use_gateway else 1
ranges = [(ip_network[first_index], ip_network[-2])]
return ranges
@classmethod
def _check_ips_out_of_ip_ranges(cls, ng_db, nm, ranges):
"""Check if any of IPs in the network is out of provided IP ranges.
This checks if any of already allocated IPs in 'ng_db' network
will be out of all provided IP ranges 'ranges'.
:param ng_db: network group instance
:param nm: NetworkManager
:param ranges: new IP ranges for the network
:return: True if any of already allocated IPs in the network
is outside of all provided IP ranges
"""
ips = nm.get_assigned_ips_by_network_id(ng_db.id)
# check IPs of bootstrap nodes in Admin network
if ng_db.name == consts.NETWORKS.fuelweb_admin:
nodes = db().query(Node.ip).filter_by(group_id=ng_db.group_id)
node_ips = [x[0] for x in nodes]
if ng_db.group_id is None:
# shared admin network. get nodes from all default groups
nodes = db().query(Node.ip).join(NodeGroup).filter(
NodeGroup.name == consts.NODE_GROUPS.default
)
node_ips.extend(x[0] for x in nodes)
if not nm.check_ips_belong_to_ranges(node_ips, ranges):
return True
else:
nodes = db().query(Node.ip).filter(
Node.group_id == ng_db.group_id,
Node.cluster_id.isnot(None)
)
ips.extend(x[0] for x in nodes)
return not nm.check_ips_belong_to_ranges(ips, ranges)

View File

@ -127,7 +127,8 @@ NODE_STATUSES = Enum(
NODE_ERRORS = Enum(
'deploy',
'provision',
'deletion'
'deletion',
'discover',
)
NODE_GROUPS = Enum(

View File

@ -84,6 +84,19 @@ task_names_new = task_names_old + (
)
node_errors_old = (
'deploy',
'provision',
'deletion',
)
node_errors_new = (
'deploy',
'provision',
'deletion',
'discover',
)
def upgrade():
create_components_table()
create_release_components_table()
@ -91,9 +104,11 @@ def upgrade():
upgrade_release_state()
task_statuses_upgrade()
task_names_upgrade()
add_node_discover_error_upgrade()
def downgrade():
add_node_discover_error_downgrade()
task_names_downgrade()
task_statuses_downgrade()
downgrade_release_state()
@ -233,3 +248,23 @@ def task_names_downgrade():
task_names_new,
task_names_old
)
def add_node_discover_error_upgrade():
upgrade_enum(
"nodes",
"error_type",
"node_error_type",
node_errors_old,
node_errors_new
)
def add_node_discover_error_downgrade():
upgrade_enum(
"nodes",
"error_type",
"node_error_type",
node_errors_new,
node_errors_old
)

View File

@ -480,6 +480,23 @@ class NetworkManager(object):
if ip_str not in ips_in_use:
yield ip_str
@classmethod
def check_ips_belong_to_admin_ranges(cls, ips):
"""Check if every provided IP belongs to any Admin networks' IP range.
:param ips: list of IPs (e.g. ['192.168.1.1', '127.0.0.1'], ...)
:return: *True* if all IPs belong to Admin ranges or *False* otherwise
"""
admin_ranges_db = db().query(
IPAddrRange.first,
IPAddrRange.last
).join(
NetworkGroup
).filter(
NetworkGroup.name == consts.NETWORKS.fuelweb_admin
)
return cls.check_ips_belong_to_ranges(ips, admin_ranges_db)
@classmethod
def get_free_ips_from_ranges(cls, net_name, ip_ranges, ips_in_use, count):
"""Returns list of free IP addresses for given IP ranges. Required

View File

@ -248,10 +248,72 @@ class Node(NailgunObject):
cls.create_attributes(new_node)
cls.create_discover_notification(new_node)
if new_node.ip:
cls.check_ip_belongs_to_any_admin_network(new_node)
fire_callback_on_node_create(new_node)
return new_node
@classmethod
def set_error_status_and_file_notification(cls, instance, etype, emessage):
instance.status = consts.NODE_STATUSES.error
instance.error_type = etype
instance.error_msg = emessage
db().flush()
Notification.create({
"topic": consts.NOTIFICATION_TOPICS.error,
"message": instance.error_msg,
"node_id": instance.id
})
@classmethod
def check_ip_belongs_to_any_admin_network(cls, instance, new_ip=None):
"""Checks that node's IP belongs to any of Admin networks IP ranges.
Node can be inside or out of a cluster. Set node to error and file a
notification if node's IP does not belong to any of Admin networks.
:param instance: node instance
:param new_ip: new IP for a node (got from Nailgun agent)
:return: True if IP belongs to any of Admin networks
"""
ip = new_ip or instance.ip
nm = Cluster.get_network_manager(instance.cluster)
match = nm.check_ips_belong_to_admin_ranges([ip])
if not match:
cls.set_error_status_and_file_notification(
instance,
consts.NODE_ERRORS.discover,
"Node '{0}' has IP '{1}' that does not match any Admin "
"network".format(instance.hostname, ip)
)
return match
@classmethod
def check_ip_belongs_to_own_admin_network(cls, instance, new_ip=None):
"""Checks that node's IP belongs to node's Admin network IP ranges.
Node should be inside a cluster. Set node to error and file a
notification if node's IP does not belong to its Admin network.
:param instance: node instance
:param new_ip: new IP for a node (got from Nailgun agent)
:return: True if IP belongs to node's Admin network
"""
ip = new_ip or instance.ip
nm = Cluster.get_network_manager(instance.cluster)
admin_ng = nm.get_admin_network_group(instance.id)
match = nm.is_same_network(ip, admin_ng.cidr)
if not match:
cls.set_error_status_and_file_notification(
instance,
consts.NODE_ERRORS.discover,
"Node '{0}' has IP '{1}' that does not match its own Admin "
"network '{2}'".format(instance.hostname, ip, admin_ng.cidr)
)
return match
@classmethod
def assign_group(cls, instance):
if instance.group_id is None and instance.ip:
@ -565,6 +627,7 @@ class Node(NailgunObject):
:returns: Node instance
"""
# don't update provisioning and error back to discover
data_status = data.get('status')
if instance.status in ('provisioning', 'error'):
if data.get('status', 'discover') == 'discover':
logger.debug(
@ -574,7 +637,7 @@ class Node(NailgunObject):
)
)
data['status'] = instance.status
data.pop('status', None)
meta = data.get('meta', {})
# don't update volume information, if agent has sent an empty array
@ -596,14 +659,24 @@ class Node(NailgunObject):
# (dshulyak) change this verification to NODE_STATUSES.deploying
# after we will reuse ips from dhcp range
netmanager = Cluster.get_network_manager()
admin_ng = netmanager.get_admin_network_group(instance.id)
if data.get('ip') and not netmanager.is_same_network(data['ip'],
admin_ng.cidr):
logger.debug(
'Corrupted network data %s, skipping update',
instance.id)
return instance
if data.get('ip'):
if instance.cluster_id:
update_status = cls.check_ip_belongs_to_own_admin_network(
instance, data['ip'])
else:
update_status = cls.check_ip_belongs_to_any_admin_network(
instance, data['ip'])
if update_status:
if instance.status == consts.NODE_STATUSES.error and \
instance.error_type == consts.NODE_ERRORS.discover:
# accept the status from agent if the node had wrong IP
# previously
if data_status:
instance.status = data_status
else:
instance.status = consts.NODE_STATUSES.discover
else:
data.pop('status', None)
return cls.update(instance, data)
@classmethod

View File

@ -122,9 +122,8 @@ class TestNetworkModels(BaseIntegrationTest):
self.assertEqual(400, resp_neutron_net.status_code)
self.assertEqual(
"New IP ranges for network '{0}'({1}) conflict "
"with already allocated IPs.".format(test_network_name,
mgmt_net['id']),
"New IP ranges for network '{0}'({1}) do not cover already "
"allocated IPs.".format(test_network_name, mgmt_net['id']),
resp_neutron_net.json_body['message'])
mgmt_net['cidr'] = u'192.168.0.0/30'
@ -164,8 +163,8 @@ class TestNetworkModels(BaseIntegrationTest):
self.env.clusters[0].id, test_nets, expect_errors=True)
self.assertEqual(400, resp_neutron_net.status_code)
self.assertEqual(
"New IP ranges for network '{0}'({1}) conflict "
"with nodes' IPs.".format(admin_net['name'], admin_net['id']),
"New IP ranges for network '{0}'({1}) do not cover already "
"allocated IPs.".format(admin_net['name'], admin_net['id']),
resp_neutron_net.json_body['message'])
for node in self.env.nodes:

View File

@ -14,12 +14,14 @@
# License for the specific language governing permissions and limitations
# under the License.
import netaddr
from oslo_serialization import jsonutils
from nailgun import consts
from nailgun import objects
from nailgun.db.sqlalchemy.models import Node
from nailgun.db.sqlalchemy.models import Notification
from nailgun.test.base import BaseIntegrationTest
from nailgun.test.base import fake_tasks
from nailgun.utils import reverse
@ -319,12 +321,23 @@ class TestHandlers(BaseIntegrationTest):
self.app.put(
reverse('NodeAgentHandler'),
jsonutils.dumps({'id': node.id,
'ip': ipaddress}),
'ip': ipaddress,
'status': consts.NODE_STATUSES.discover}),
headers=self.default_headers)
self.assertNotEqual(node.ip, ipaddress)
self.assertEqual(node.ip, ipaddress)
self.assertEqual(node.status, consts.NODE_STATUSES.error)
notif = self.db.query(Notification).filter_by(
node_id=node.id,
topic='error'
).first()
self.assertRegexpMatches(notif.message,
"that does not match any Admin network")
ipaddress = '10.20.0.25'
nm = objects.Cluster.get_network_manager(node.cluster)
admin_ng = nm.get_admin_network_group(node.id)
ipaddress = str(netaddr.IPRange(admin_ng.ip_ranges[0].first,
admin_ng.ip_ranges[0].last)[1])
self.app.put(
reverse('NodeAgentHandler'),
jsonutils.dumps({'id': node.id,
@ -332,6 +345,7 @@ class TestHandlers(BaseIntegrationTest):
headers=self.default_headers)
self.assertEqual(node.ip, ipaddress)
self.assertEqual(node.status, consts.NODE_STATUSES.discover)
def test_update_node_with_none_ip(self):
node = self.env.create_node(api=False, ip='10.20.0.2')

View File

@ -116,7 +116,7 @@ class TestLogs(BaseIntegrationTest):
self.assertEqual(resp.json_body, settings.LOGS)
def test_log_source_by_node_collection_handler(self):
node_ip = '40.30.20.10'
node_ip = '10.20.0.130'
node = self.env.create_node(api=False, ip=node_ip)
resp = self.app.get(
@ -139,7 +139,7 @@ class TestLogs(BaseIntegrationTest):
self.assertEqual(resp.json_body, [settings.LOGS[1]])
def test_log_entry_collection_handler(self):
node_ip = '10.20.30.40'
node_ip = '10.20.0.130'
log_entries = [
[
time.strftime(settings.UI_LOG_DATE_FORMAT),

View File

@ -13,6 +13,7 @@
# under the License.
import alembic
from datetime import datetime
import six
import sqlalchemy as sa
from sqlalchemy.exc import DataError
@ -325,3 +326,33 @@ class TestTaskNameMigration(base.BaseAlembicMigrationTest):
{'name': 'wrong_task_name',
'uuid': str(uuid.uuid4()),
'status': 'running'})
class TestNodeErrorTypeMigration(base.BaseAlembicMigrationTest):
def test_node_error_type_enum(self):
added_error_types = ('discover',)
nodes_table = self.meta.tables['nodes']
for error_type in added_error_types:
insert_table_row(nodes_table,
{'name': 'node1',
'status': 'error',
'error_type': error_type,
'uuid': str(uuid.uuid4()),
'mac': '00:00:00:00:00:00',
'timestamp': datetime.now()})
inserted_count = db.execute(
sa.select([sa.func.count(nodes_table.c.error_type)]).
where(sa.and_(nodes_table.c.error_type == error_type))
).fetchone()[0]
self.assertEqual(inserted_count, 1)
with self.assertRaisesRegexp(DataError, 'invalid input value for '
'enum node_error_type'):
insert_table_row(nodes_table,
{'name': 'node1',
'status': 'error',
'error_type': 'wrong_error_type',
'uuid': str(uuid.uuid4()),
'mac': '00:00:00:00:00:00',
'timestamp': datetime.now()})

View File

@ -13,6 +13,8 @@
# under the License.
import mock
import netaddr
from oslo_serialization import jsonutils
from nailgun.api.v1.validators.network import NetworkConfigurationValidator
@ -329,7 +331,49 @@ class TestNetworkConfigurationValidator(base.BaseIntegrationTest):
self.assertRaisesInvalidData(
"'use_gateway' cannot be provided without gateway")
@mock.patch('nailgun.rpc.cast')
def test_validate_admin_ips(self, _):
node_ip = self.env.nodes[0].ip
admin = self.find_net_by_name('fuelweb_admin')
admin['ip_ranges'] = [[str(netaddr.IPAddress(node_ip) + 1),
str(netaddr.IPAddress(node_ip) + 10)]]
resp_neutron_net = self.env.neutron_networks_put(
self.env.clusters[0].id,
self.config,
expect_errors=True)
self.assertEqual(400, resp_neutron_net.status_code)
# check for node in cluster failed
self.assertEqual(
"New IP ranges for network '{0}'({1}) do not cover already "
"allocated IPs.".format(admin['name'], admin['id']),
resp_neutron_net.json_body['message'])
admin['ip_ranges'] = [[str(netaddr.IPAddress(node_ip)),
str(netaddr.IPAddress(node_ip) + 10)]]
resp_neutron_net = self.env.neutron_networks_put(
self.env.clusters[0].id,
self.config)
self.assertEqual(200, resp_neutron_net.status_code)
self.env.create_node(api=True, ip=str(netaddr.IPAddress(node_ip) + 10))
admin['ip_ranges'] = [[str(netaddr.IPAddress(node_ip)),
str(netaddr.IPAddress(node_ip) + 9)]]
resp_neutron_net = self.env.neutron_networks_put(
self.env.clusters[0].id,
self.config,
expect_errors=True)
self.assertEqual(400, resp_neutron_net.status_code)
# check for node outside cluster failed
self.assertEqual("New IP ranges for Admin networks conflict with "
"bootstrap nodes' IPs.",
resp_neutron_net.json_body['message'])
def test_check_ip_conflicts(self):
validator = NetworkConfigurationValidator
nm = objects.Cluster.get_network_manager(self.cluster)
mgmt = self.find_net_by_name(consts.NETWORKS.management)
mgmt_db = self.db.query(NetworkGroup).get(mgmt['id'])
@ -340,23 +384,27 @@ class TestNetworkConfigurationValidator(base.BaseIntegrationTest):
"Default IPs were changed for some reason.")
mgmt['cidr'] = '10.101.0.0/24'
result = NetworkConfigurationValidator._check_for_ip_conflicts(
mgmt, mgmt_db, nm, 'cidr', False)
ranges = validator._get_network_ip_ranges(
mgmt, mgmt_db, 'cidr', False)
result = validator._check_ips_out_of_ip_ranges(mgmt_db, nm, ranges)
self.assertTrue(result)
mgmt['cidr'] = '192.168.0.0/28'
result = NetworkConfigurationValidator._check_for_ip_conflicts(
mgmt, mgmt_db, nm, 'cidr', False)
ranges = validator._get_network_ip_ranges(
mgmt, mgmt_db, 'cidr', False)
result = validator._check_ips_out_of_ip_ranges(mgmt_db, nm, ranges)
self.assertFalse(result)
mgmt['ip_ranges'] = [['192.168.0.1', '192.168.0.15']]
result = NetworkConfigurationValidator._check_for_ip_conflicts(
mgmt, mgmt_db, nm, 'ip_ranges', False)
ranges = validator._get_network_ip_ranges(
mgmt, mgmt_db, 'ip_ranges', False)
result = validator._check_ips_out_of_ip_ranges(mgmt_db, nm, ranges)
self.assertFalse(result)
mgmt['ip_ranges'] = [['10.101.0.1', '10.101.0.255']]
result = NetworkConfigurationValidator._check_for_ip_conflicts(
mgmt, mgmt_db, nm, 'ip_ranges', False)
ranges = validator._get_network_ip_ranges(
mgmt, mgmt_db, 'ip_ranges', False)
result = validator._check_ips_out_of_ip_ranges(mgmt_db, nm, ranges)
self.assertTrue(result)