diff --git a/actions.yaml b/actions.yaml index 7858d51f..731de13e 100644 --- a/actions.yaml +++ b/actions.yaml @@ -24,17 +24,6 @@ resume: description: | Set the local osd units in the charm to 'in'. Note that the pause option does NOT stop the osd processes. -replace-osd: - description: Replace a failed osd with a fresh disk - params: - osd-number: - type: integer - description: The osd number to operate on. Example 99. Hint you can get this information from `ceph osd tree`. - replacement-device: - type: string - description: The replacement device to use. Example /dev/sdb. - required: [osd-number, replacement-device] - additionalProperties: false list-disks: description: List the unmounted disk on the specified unit add-disk: diff --git a/actions/replace-osd b/actions/replace-osd deleted file mode 120000 index d9f1a694..00000000 --- a/actions/replace-osd +++ /dev/null @@ -1 +0,0 @@ -replace_osd.py \ No newline at end of file diff --git a/actions/replace_osd.py b/actions/replace_osd.py deleted file mode 100755 index 297ec177..00000000 --- a/actions/replace_osd.py +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright 2016 Canonical Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys - -sys.path.append('hooks/') -sys.path.append('lib/') - -import charmhelpers.core.hookenv as hookenv - -import ceph.utils - -""" -Given a OSD number this script will attempt to turn that back into a mount -point and then replace the OSD with a new one. -""" - - -def get_disk_stats(): - try: - # https://www.kernel.org/doc/Documentation/iostats.txt - with open('/proc/diskstats', 'rt', encoding='UTF-8') as diskstats: - return diskstats.readlines() - except IOError as err: - hookenv.log('Could not open /proc/diskstats. Error: {}' - .format(str(err))) - hookenv.action_fail( - 'replace-osd failed because /proc/diskstats could not ' - 'be opened {}'.format(str(err))) - return None - - -def lookup_device_name(major_number, minor_number): - """ - - :param major_number: int. The major device number - :param minor_number: int. The minor device number - :return: string. The name of the device. Example: /dev/sda. - Returns None on error. - """ - diskstats = get_disk_stats() - for line in diskstats: - parts = line.split() - if not len(parts) > 3: - # Skip bogus lines - continue - try: - if int(parts[0]) is major_number and int(parts[1]) is \ - minor_number: - # Found our device. Return its name - return parts[2] - except ValueError as value_err: - hookenv.log('Could not convert {} or {} into an integer. Error: {}' - .format(parts[0], parts[1], str(value_err))) - continue - return None - - -def get_device_number(osd_number): - """ - This function will return a tuple of (major_number, minor_number) - device number for the given osd. - :param osd_number: int - :rtype : (major_number,minor_number) - """ - path = "/var/lib/ceph/osd/ceph-{}".format(osd_number) - info = os.lstat(path) - major_number = os.major(info.st_dev) - minor_number = os.minor(info.st_dev) - return major_number, minor_number - - -if __name__ == '__main__': - dead_osd_number = hookenv.action_get("osd-number") - replacement_device = hookenv.action_get("replacement-device") - major, minor = get_device_number(dead_osd_number) - device_name = lookup_device_name(major, minor) - osd_format = hookenv.config('osd-format') - osd_journal = hookenv.config('osd-journal') - - ceph.utils.replace_osd(dead_osd_number=dead_osd_number, - dead_osd_device="/dev/{}".format(device_name), - new_osd_device=replacement_device, - osd_format=osd_format, - osd_journal=osd_journal) diff --git a/lib/ceph/utils.py b/lib/ceph/utils.py index 329d69d5..d281a3b6 100644 --- a/lib/ceph/utils.py +++ b/lib/ceph/utils.py @@ -13,8 +13,6 @@ # limitations under the License. import collections -import ctypes -import errno import glob import json import os @@ -25,7 +23,6 @@ import socket import subprocess import sys import time -import shutil import uuid from datetime import datetime @@ -38,7 +35,6 @@ from charmhelpers.core.host import ( cmp_pkgrevno, lsb_release, mkdir, - mounts, owner, service_restart, service_start, @@ -835,114 +831,6 @@ CEPH_PARTITIONS = [ ] -def umount(mount_point): - """This function unmounts a mounted directory forcibly. This will - be used for unmounting broken hard drive mounts which may hang. - - If umount returns EBUSY this will lazy unmount. - - :param mount_point: str. A String representing the filesystem mount point - :returns: int. Returns 0 on success. errno otherwise. - """ - libc_path = ctypes.util.find_library("c") - libc = ctypes.CDLL(libc_path, use_errno=True) - - # First try to umount with MNT_FORCE - ret = libc.umount(mount_point, 1) - if ret < 0: - err = ctypes.get_errno() - if err == errno.EBUSY: - # Detach from try. IE lazy umount - ret = libc.umount(mount_point, 2) - if ret < 0: - err = ctypes.get_errno() - return err - return 0 - else: - return err - return 0 - - -def replace_osd(dead_osd_number, - dead_osd_device, - new_osd_device, - osd_format, - osd_journal, - reformat_osd=False, - ignore_errors=False): - """This function will automate the replacement of a failed osd disk as much - as possible. It will revoke the keys for the old osd, remove it from the - crush map and then add a new osd into the cluster. - - :param dead_osd_number: The osd number found in ceph osd tree. Example: 99 - :param dead_osd_device: The physical device. Example: /dev/sda - :param osd_format: - :param osd_journal: - :param reformat_osd: - :param ignore_errors: - """ - host_mounts = mounts() - mount_point = None - for mount in host_mounts: - if mount[1] == dead_osd_device: - mount_point = mount[0] - # need to convert dev to osd number - # also need to get the mounted drive so we can tell the admin to - # replace it - try: - # Drop this osd out of the cluster. This will begin a - # rebalance operation - status_set('maintenance', 'Removing osd {}'.format(dead_osd_number)) - subprocess.check_output([ - 'ceph', - '--id', - 'osd-upgrade', - 'osd', 'out', - 'osd.{}'.format(dead_osd_number)]) - - # Kill the osd process if it's not already dead - if systemd(): - service_stop('ceph-osd@{}'.format(dead_osd_number)) - else: - subprocess.check_output(['stop', 'ceph-osd', 'id={}'.format( - dead_osd_number)]) - # umount if still mounted - ret = umount(mount_point) - if ret < 0: - raise RuntimeError('umount {} failed with error: {}'.format( - mount_point, os.strerror(ret))) - # Clean up the old mount point - shutil.rmtree(mount_point) - subprocess.check_output([ - 'ceph', - '--id', - 'osd-upgrade', - 'osd', 'crush', 'remove', - 'osd.{}'.format(dead_osd_number)]) - # Revoke the OSDs access keys - subprocess.check_output([ - 'ceph', - '--id', - 'osd-upgrade', - 'auth', 'del', - 'osd.{}'.format(dead_osd_number)]) - subprocess.check_output([ - 'ceph', - '--id', - 'osd-upgrade', - 'osd', 'rm', - 'osd.{}'.format(dead_osd_number)]) - status_set('maintenance', 'Setting up replacement osd {}'.format( - new_osd_device)) - osdize(new_osd_device, - osd_format, - osd_journal, - reformat_osd, - ignore_errors) - except subprocess.CalledProcessError as e: - log('replace_osd failed with error: ' + e.output) - - def get_partition_list(dev): """Lists the partitions of a block device. @@ -2248,19 +2136,14 @@ def wait_on_previous_node(upgrade_key, service, previous_node, version): def get_upgrade_position(osd_sorted_list, match_name): """Return the upgrade position for the given osd. - :param osd_sorted_list: Osds sorted - :type osd_sorted_list: [str] - :param match_name: The osd name to match - :type match_name: str - :returns: The position of the name - :rtype: int - :raises: ValueError if name is not found + :param osd_sorted_list: list. Osds sorted + :param match_name: str. The osd name to match + :returns: int. The position or None if not found """ for index, item in enumerate(osd_sorted_list): if item.name == match_name: return index - raise ValueError("osd name '{}' not found in get_upgrade_position list" - .format(match_name)) + return None # Edge cases: diff --git a/unit_tests/test_replace_osd.py b/unit_tests/test_replace_osd.py deleted file mode 100644 index 141ffbd2..00000000 --- a/unit_tests/test_replace_osd.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright 2016 Canonical Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import errno -import posix - -from mock import call, Mock, patch - -import test_utils -import ceph.utils as ceph -import replace_osd - -TO_PATCH = [ - 'ctypes', - 'status_set', -] - -proc_data = [ - ' 8 0 sda 2291336 263100 108136080 1186276 28844343 28798167 ' - '2145908072 49433216 0 7550032 50630100\n', - ' 8 1 sda1 1379 1636 8314 692 75 17 1656 0 0 496 692\n', - ' 8 2 sda2 1 0 2 0 0 0 0 0 0 0 0\n', -] - - -def umount_busy(*args): - # MNT_FORCE - if args[1] == 1: - return -1 - # MNT_DETACH - if args[1] == 2: - return 0 - - -class ReplaceOsdTestCase(test_utils.CharmTestCase): - def setUp(self): - super(ReplaceOsdTestCase, self).setUp(ceph, TO_PATCH) - - def test_umount_ebusy(self): - self.ctypes.util.find_library.return_value = 'libc.so.6' - umount_mock = Mock() - self.ctypes.CDLL.return_value = umount_mock - umount_mock.umount.side_effect = umount_busy - self.ctypes.get_errno.return_value = errno.EBUSY - - ret = ceph.umount('/some/osd/mount') - umount_mock.assert_has_calls([ - call.umount('/some/osd/mount', 1), - call.umount('/some/osd/mount', 2), - ]) - assert ret == 0 - - def test_umount(self): - self.ctypes.util.find_library.return_value = 'libc.so.6' - umount_mock = Mock() - self.ctypes.CDLL.return_value = umount_mock - umount_mock.umount.return_value = 0 - - ret = ceph.umount('/some/osd/mount') - umount_mock.assert_has_calls([ - call.umount('/some/osd/mount', 1), - ]) - assert ret == 0 - - @patch.object(ceph, 'mounts') - @patch.object(ceph.subprocess, 'check_output') - @patch.object(ceph, 'umount') - @patch.object(ceph, 'osdize') - @patch.object(ceph, 'shutil') - @patch.object(ceph, 'systemd') - @patch.object(ceph, 'ceph_user') - def test_replace_osd(self, ceph_user, systemd, shutil, osdize, umount, - check_output, mounts): - ceph_user.return_value = "ceph" - mounts.return_value = [['/var/lib/ceph/osd/ceph-a', '/dev/sda']] - check_output.return_value = True - self.status_set.return_value = None - systemd.return_value = False - umount.return_value = 0 - osdize.return_value = None - shutil.rmtree.return_value = None - ceph.replace_osd(dead_osd_number=0, - dead_osd_device='/dev/sda', - new_osd_device='/dev/sdb', - osd_format=True, - osd_journal=None, - reformat_osd=False, - ignore_errors=False) - check_output.assert_has_calls( - [ - call(['ceph', '--id', 'osd-upgrade', - 'osd', 'out', 'osd.0']), - call(['stop', 'ceph-osd', 'id=0']), - call(['ceph', '--id', - 'osd-upgrade', 'osd', 'crush', 'remove', 'osd.0']), - call(['ceph', '--id', - 'osd-upgrade', 'auth', 'del', 'osd.0']), - call(['ceph', '--id', - 'osd-upgrade', 'osd', 'rm', 'osd.0']) - ] - ) - - @patch('replace_osd.get_disk_stats') - def test_lookup_device_name(self, disk_stats): - disk_stats.return_value = proc_data - dev_name = replace_osd.lookup_device_name(major_number=8, - minor_number=0) - assert dev_name == 'sda', "dev_name: {}".format(dev_name) - - @patch('replace_osd.os.lstat') - def test_get_device_number(self, lstat): - lstat.return_value = posix.stat_result([ - 16877, 16, 51729, 3, 0, 0, 217, 0, 1458086872, 1458086872 - ]) - major, minor = replace_osd.get_device_number(1) - assert major == 202 - assert minor == 17