Add pre-flight check for device pristinity

Add `non-pristine` key to `list-disks` action.

No longer attempt to do initializtion of `osd-journal` devices.

Make py27 test noop

Flip pep8 test to py3

Partial-Bug: #1698154
Change-Id: I0ca574fa7f0683b4e8a693b9f62fbf6b39689789
Depends-On: I90a866aa138d18e4242783c42d4c7c587f696d7d
This commit is contained in:
Frode Nordahl 2018-06-01 12:15:01 +02:00
parent bebf8601c9
commit 352d699387
No known key found for this signature in database
GPG Key ID: 6A5D59A3BA48373F
9 changed files with 252 additions and 115 deletions

View File

@ -25,7 +25,18 @@ resume:
Set the local osd units in the charm to 'in'. Note that the pause option
does NOT stop the osd processes.
list-disks:
description: List the unmounted disk on the specified unit
description: |
List disks
.
The 'disks' key is populated with block devices that are known by udev,
are not mounted and not mentioned in 'osd-journal' configuration option.
.
The 'blacklist' key is populated with osd-devices in the blacklist stored
in the local kv store of this specific unit.
.
The 'non-pristine' key is populated with block devices that are known by
udev, are not mounted, not mentioned in 'osd-journal' configuration option
and are currently not eligible for use because of presence of foreign data.
add-disk:
description: Add disk(s) to Ceph
params:

View File

@ -15,10 +15,17 @@
# limitations under the License.
"""
List unmounted devices.
List disks
This script will get all block devices known by udev and check if they
are mounted so that we can give unmounted devices to the administrator.
The 'disks' key is populated with block devices that are known by udev,
are not mounted and not mentioned in 'osd-journal' configuration option.
The 'blacklist' key is populated with osd-devices in the blacklist stored
in the local kv store of this specific unit.
The 'non-pristine' key is populated with block devices that are known by
udev, are not mounted, not mentioned in 'osd-journal' configuration option
and are currently not eligible for use because of presence of foreign data.
"""
import sys
@ -32,7 +39,15 @@ import ceph.utils
import utils
if __name__ == '__main__':
non_pristine = []
osd_journal = utils.get_journal_devices()
for dev in list(set(ceph.utils.unmounted_disks()) - set(osd_journal)):
if (not ceph.utils.is_active_bluestore_device(dev) and
not ceph.utils.is_pristine_disk(dev)):
non_pristine.append(dev)
hookenv.action_set({
'disks': ceph.utils.unmounted_disks(),
'disks': list(set(ceph.utils.unmounted_disks()) - set(osd_journal)),
'blacklist': utils.get_blacklist(),
'non-pristine': non_pristine,
})

View File

@ -20,7 +20,6 @@ import glob
import os
import shutil
import sys
import tempfile
import socket
import subprocess
import netifaces
@ -41,6 +40,7 @@ from charmhelpers.core.hookenv import (
Hooks,
UnregisteredHookError,
service_name,
status_get,
status_set,
storage_get,
storage_list,
@ -76,6 +76,7 @@ from utils import (
get_public_addr,
get_cluster_addr,
get_blacklist,
get_journal_devices,
)
from charmhelpers.contrib.openstack.alternatives import install_alternative
from charmhelpers.contrib.network.ip import (
@ -85,6 +86,9 @@ from charmhelpers.contrib.network.ip import (
)
from charmhelpers.contrib.storage.linux.ceph import (
CephConfContext)
from charmhelpers.contrib.storage.linux.utils import (
is_device_mounted,
)
from charmhelpers.contrib.charmsupport import nrpe
from charmhelpers.contrib.hardening.harden import harden
@ -357,38 +361,6 @@ def emit_cephconf(upgrading=False):
charm_ceph_conf, 90)
JOURNAL_ZAPPED = '/var/lib/ceph/journal_zapped'
def read_zapped_journals():
if os.path.exists(JOURNAL_ZAPPED):
with open(JOURNAL_ZAPPED, 'rt', encoding='UTF-8') as zapfile:
zapped = set(
filter(None,
[l.strip() for l in zapfile.readlines()]))
log("read zapped: {}".format(zapped), level=DEBUG)
return zapped
return set()
def write_zapped_journals(journal_devs):
tmpfh, tmpfile = tempfile.mkstemp()
with os.fdopen(tmpfh, 'wb') as zapfile:
log("write zapped: {}".format(journal_devs),
level=DEBUG)
zapfile.write('\n'.join(sorted(list(journal_devs))).encode('UTF-8'))
shutil.move(tmpfile, JOURNAL_ZAPPED)
def check_overlap(journaldevs, datadevs):
if not journaldevs.isdisjoint(datadevs):
msg = ("Journal/data devices mustn't"
" overlap; journal: {0}, data: {1}".format(journaldevs,
datadevs))
log(msg, level=ERROR)
raise ValueError(msg)
@hooks.hook('config-changed')
@harden()
def config_changed():
@ -438,13 +410,28 @@ def prepare_disks_and_activate():
vaultlocker.write_vaultlocker_conf(context)
osd_journal = get_journal_devices()
check_overlap(osd_journal, set(get_devices()))
if not osd_journal.isdisjoint(set(get_devices())):
raise ValueError('`osd-journal` and `osd-devices` options must not'
'overlap.')
log("got journal devs: {}".format(osd_journal), level=DEBUG)
already_zapped = read_zapped_journals()
non_zapped = osd_journal - already_zapped
for journ in non_zapped:
ceph.maybe_zap_journal(journ)
write_zapped_journals(osd_journal)
# pre-flight check of eligible device pristinity
devices = get_devices()
# filter osd-devices that are file system paths
devices = [dev for dev in devices if dev.startswith('/dev')]
# filter osd-devices that does not exist on this unit
devices = [dev for dev in devices if os.path.exists(dev)]
# filter osd-devices that are already mounted
devices = [dev for dev in devices if not is_device_mounted(dev)]
# filter osd-devices that are active bluestore devices
devices = [dev for dev in devices
if not ceph.is_active_bluestore_device(dev)]
log('Checking for pristine devices: "{}"'.format(devices), level=DEBUG)
if not all(ceph.is_pristine_disk(dev) for dev in devices):
status_set('blocked',
'Non-pristine devices detected, consult '
'`list-disks`, `zap-disk` and `blacklist-*` actions.')
return
if ceph.is_bootstrapped():
log('ceph bootstrapped, rescanning disks')
@ -521,20 +508,6 @@ def get_devices():
return [device for device in devices if device not in _blacklist]
def get_journal_devices():
if config('osd-journal'):
devices = [l.strip() for l in config('osd-journal').split(' ')]
else:
devices = []
storage_ids = storage_list('osd-journals')
devices.extend((storage_get('location', s) for s in storage_ids))
# Filter out any devices in the action managed unit-local device blacklist
_blacklist = get_blacklist()
return set(device for device in devices
if device not in _blacklist and os.path.exists(device))
@hooks.hook('mon-relation-changed',
'mon-relation-departed')
def mon_relation():
@ -631,13 +604,15 @@ def assess_status():
# Check for OSD device creation parity i.e. at least some devices
# must have been presented and used for this charm to be operational
(prev_status, prev_message) = status_get()
running_osds = ceph.get_running_osds()
if not running_osds:
status_set('blocked',
'No block devices detected using current configuration')
else:
status_set('active',
'Unit is ready ({} OSD)'.format(len(running_osds)))
if prev_status != 'blocked':
if not running_osds:
status_set('blocked',
'No block devices detected using current configuration')
else:
status_set('active',
'Unit is ready ({} OSD)'.format(len(running_osds)))
@hooks.hook('update-status')

View File

@ -12,8 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import socket
import re
import os
import socket
from charmhelpers.core.hookenv import (
unit_get,
cached,
@ -22,6 +24,8 @@ from charmhelpers.core.hookenv import (
log,
DEBUG,
status_set,
storage_get,
storage_list,
)
from charmhelpers.core import unitdata
from charmhelpers.fetch import (
@ -39,6 +43,7 @@ from charmhelpers.contrib.network.ip import (
get_ipv6_addr
)
TEMPLATES_DIR = 'templates'
try:
@ -213,3 +218,17 @@ def get_blacklist():
"""Get blacklist stored in the local kv() store"""
db = unitdata.kv()
return db.get('osd-blacklist', [])
def get_journal_devices():
if config('osd-journal'):
devices = [l.strip() for l in config('osd-journal').split(' ')]
else:
devices = []
storage_ids = storage_list('osd-journals')
devices.extend((storage_get('location', s) for s in storage_ids))
# Filter out any devices in the action managed unit-local device blacklist
_blacklist = get_blacklist()
return set(device for device in devices
if device not in _blacklist and os.path.exists(device))

View File

@ -66,7 +66,6 @@ from charmhelpers.contrib.storage.linux.ceph import (
from charmhelpers.contrib.storage.linux.utils import (
is_block_device,
is_device_mounted,
zap_disk,
)
from charmhelpers.contrib.openstack.utils import (
get_os_codename_install_source,
@ -870,7 +869,42 @@ def get_partition_list(dev):
raise
def is_pristine_disk(dev):
"""
Read first 2048 bytes (LBA 0 - 3) of block device to determine whether it
is actually all zeros and safe for us to use.
Existing partitioning tools does not discern between a failure to read from
block device, failure to understand a partition table and the fact that a
block device has no partition table. Since we need to be positive about
which is which we need to read the device directly and confirm ourselves.
:param dev: Path to block device
:type dev: str
:returns: True all 2048 bytes == 0x0, False if not
:rtype: bool
"""
want_bytes = 2048
f = open(dev, 'rb')
data = f.read(want_bytes)
read_bytes = len(data)
if read_bytes != want_bytes:
log('{}: short read, got {} bytes expected {}.'
.format(dev, read_bytes, want_bytes), level=WARNING)
return False
return all(byte == 0x0 for byte in data)
def is_osd_disk(dev):
db = kv()
osd_devices = db.get('osd-devices', [])
if dev in osd_devices:
log('Device {} already processed by charm,'
' skipping'.format(dev))
return True
partitions = get_partition_list(dev)
for partition in partitions:
try:
@ -1296,15 +1330,6 @@ def update_monfs():
pass
def maybe_zap_journal(journal_dev):
if is_osd_disk(journal_dev):
log('Looks like {} is already an OSD data'
' or journal, skipping.'.format(journal_dev))
return
zap_disk(journal_dev)
log("Zapped journal device {}".format(journal_dev))
def get_partitions(dev):
cmd = ['partx', '--raw', '--noheadings', dev]
try:

View File

@ -15,6 +15,7 @@
# limitations under the License.
import amulet
import re
import time
import keystoneclient
@ -712,6 +713,69 @@ class CephOsdBasicDeployment(OpenStackAmuletDeployment):
mtime, unit_name))
amulet.raise_status('Folder mtime is older than provided mtime')
def test_901_blocked_when_non_pristine_disk_appears(self):
"""
Validate that charm goes into blocked state when it is presented with
new block devices that have foreign data on them.
Instances used in UOSCI has a flavour with ephemeral storage in
addition to the bootable instance storage. The ephemeral storage
device is partitioned, formatted and mounted early in the boot process
by cloud-init.
As long as the device is mounted the charm will not attempt to use it.
If we unmount it and trigger the config-changed hook the block device
will appear as a new and previously untouched device for the charm.
One of the first steps of device eligibility checks should be to make
sure we are seeing a pristine and empty device before doing any
further processing.
As the ephemeral device will have data on it we can use it to validate
that these checks work as intended.
"""
u.log.debug('Checking behaviour when non-pristine disks appear...')
u.log.debug('Configuring ephemeral-unmount...')
self.d.configure('ceph-osd', {'ephemeral-unmount': '/mnt',
'osd-devices': '/dev/vdb'})
self._auto_wait_for_status(message=re.compile('Non-pristine.*'),
include_only=['ceph-osd'])
u.log.debug('Units now in blocked state, running zap-disk action...')
action_ids = []
self.ceph_osd_sentry = self.d.sentry['ceph-osd'][0]
for unit in range(0, 3):
zap_disk_params = {
'devices': '/dev/vdb',
'i-really-mean-it': True,
}
action_id = u.run_action(self.d.sentry['ceph-osd'][unit],
'zap-disk', params=zap_disk_params)
action_ids.append(action_id)
for unit in range(0, 3):
assert u.wait_on_action(action_ids[unit]), (
'zap-disk action failed.')
u.log.debug('Running add-disk action...')
action_ids = []
for unit in range(0, 3):
add_disk_params = {
'osd-devices': '/dev/vdb',
}
action_id = u.run_action(self.d.sentry['ceph-osd'][unit],
'add-disk', params=add_disk_params)
action_ids.append(action_id)
# NOTE(fnordahl): LP: #1774694
# for unit in range(0, 3):
# assert u.wait_on_action(action_ids[unit]), (
# 'add-disk action failed.')
u.log.debug('Wait for idle/ready status...')
self._auto_wait_for_status(include_only=['ceph-osd'])
u.log.debug('OK')
def test_910_pause_and_resume(self):
"""The services can be paused and resumed. """
u.log.debug('Checking pause and resume actions...')

10
tox.ini
View File

@ -18,11 +18,11 @@ whitelist_externals = juju
passenv = HOME TERM AMULET_* CS_API_*
[testenv:py27]
basepython = python2.7
deps = -r{toxinidir}/requirements.txt
-r{toxinidir}/test-requirements.txt
# temporarily disable py27
commands = /bin/true
# ceph charms are Python3-only, but py27 unit test target
# is required by OpenStack Governance. Remove this shim as soon as
# permitted. http://governance.openstack.org/reference/cti/python_cti.html
whitelist_externals = true
commands = true
[testenv:py35]
basepython = python3.5

View File

@ -405,21 +405,6 @@ class CephHooksTestCase(unittest.TestCase):
devices = ceph_hooks.get_devices()
self.assertEqual(devices, ['/dev/vda', '/dev/vdb'])
@patch('os.path.exists')
@patch.object(ceph_hooks, 'storage_list')
@patch.object(ceph_hooks, 'config')
def test_get_journal_devices(self, mock_config, mock_storage_list,
mock_os_path_exists):
'''Devices returned as expected'''
config = {'osd-journal': '/dev/vda /dev/vdb'}
mock_config.side_effect = lambda key: config[key]
mock_storage_list.return_value = []
mock_os_path_exists.return_value = True
devices = ceph_hooks.get_journal_devices()
mock_storage_list.assert_called()
mock_os_path_exists.assert_called()
self.assertEqual(devices, set(['/dev/vda', '/dev/vdb']))
@patch.object(ceph_hooks, 'get_blacklist')
@patch.object(ceph_hooks, 'storage_list')
@patch.object(ceph_hooks, 'config')
@ -435,26 +420,6 @@ class CephHooksTestCase(unittest.TestCase):
mock_get_blacklist.assert_called()
self.assertEqual(devices, ['/dev/vdb'])
@patch('os.path.exists')
@patch.object(ceph_hooks, 'get_blacklist')
@patch.object(ceph_hooks, 'storage_list')
@patch.object(ceph_hooks, 'config')
def test_get_journal_devices_blacklist(self, mock_config,
mock_storage_list,
mock_get_blacklist,
mock_os_path_exists):
'''Devices returned as expected when blacklist in effect'''
config = {'osd-journal': '/dev/vda /dev/vdb'}
mock_config.side_effect = lambda key: config[key]
mock_storage_list.return_value = []
mock_get_blacklist.return_value = ['/dev/vda']
mock_os_path_exists.return_value = True
devices = ceph_hooks.get_journal_devices()
mock_storage_list.assert_called()
mock_os_path_exists.assert_called()
mock_get_blacklist.assert_called()
self.assertEqual(devices, set(['/dev/vdb']))
@patch.object(ceph_hooks, 'log')
@patch.object(ceph_hooks, 'config')
@patch('os.environ')

View File

@ -0,0 +1,63 @@
# Copyright 2016 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from mock import patch
with patch('charmhelpers.contrib.hardening.harden.harden') as mock_dec:
mock_dec.side_effect = (lambda *dargs, **dkwargs: lambda f:
lambda *args, **kwargs: f(*args, **kwargs))
import utils
class CephUtilsTestCase(unittest.TestCase):
def setUp(self):
super(CephUtilsTestCase, self).setUp()
@patch('os.path.exists')
@patch.object(utils, 'storage_list')
@patch.object(utils, 'config')
def test_get_journal_devices(self, mock_config, mock_storage_list,
mock_os_path_exists):
'''Devices returned as expected'''
config = {'osd-journal': '/dev/vda /dev/vdb'}
mock_config.side_effect = lambda key: config[key]
mock_storage_list.return_value = []
mock_os_path_exists.return_value = True
devices = utils.get_journal_devices()
mock_storage_list.assert_called()
mock_os_path_exists.assert_called()
self.assertEqual(devices, set(['/dev/vda', '/dev/vdb']))
@patch('os.path.exists')
@patch.object(utils, 'get_blacklist')
@patch.object(utils, 'storage_list')
@patch.object(utils, 'config')
def test_get_journal_devices_blacklist(self, mock_config,
mock_storage_list,
mock_get_blacklist,
mock_os_path_exists):
'''Devices returned as expected when blacklist in effect'''
config = {'osd-journal': '/dev/vda /dev/vdb'}
mock_config.side_effect = lambda key: config[key]
mock_storage_list.return_value = []
mock_get_blacklist.return_value = ['/dev/vda']
mock_os_path_exists.return_value = True
devices = utils.get_journal_devices()
mock_storage_list.assert_called()
mock_os_path_exists.assert_called()
mock_get_blacklist.assert_called()
self.assertEqual(devices, set(['/dev/vdb']))