Remove replace_osd and supporting functions
At present the action using this function does not work. While looking to repair the functionality I found a number of issues with the current implementation. For now I suggest we remove this functionality, and at some point we may consider replacing it with a `remove-disk` action. Change-Id: Id61b87927c43d807aacc93cf05ec8f88d91b7a39
This commit is contained in:
parent
71c1b138f3
commit
20ba3c876a
112
ceph/utils.py
112
ceph/utils.py
|
@ -13,8 +13,6 @@
|
|||
# limitations under the License.
|
||||
|
||||
import collections
|
||||
import ctypes
|
||||
import errno
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
|
@ -25,7 +23,6 @@ import socket
|
|||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import shutil
|
||||
import uuid
|
||||
|
||||
from datetime import datetime
|
||||
|
@ -38,7 +35,6 @@ from charmhelpers.core.host import (
|
|||
cmp_pkgrevno,
|
||||
lsb_release,
|
||||
mkdir,
|
||||
mounts,
|
||||
owner,
|
||||
service_restart,
|
||||
service_start,
|
||||
|
@ -835,114 +831,6 @@ CEPH_PARTITIONS = [
|
|||
]
|
||||
|
||||
|
||||
def umount(mount_point):
|
||||
"""This function unmounts a mounted directory forcibly. This will
|
||||
be used for unmounting broken hard drive mounts which may hang.
|
||||
|
||||
If umount returns EBUSY this will lazy unmount.
|
||||
|
||||
:param mount_point: str. A String representing the filesystem mount point
|
||||
:returns: int. Returns 0 on success. errno otherwise.
|
||||
"""
|
||||
libc_path = ctypes.util.find_library("c")
|
||||
libc = ctypes.CDLL(libc_path, use_errno=True)
|
||||
|
||||
# First try to umount with MNT_FORCE
|
||||
ret = libc.umount(mount_point, 1)
|
||||
if ret < 0:
|
||||
err = ctypes.get_errno()
|
||||
if err == errno.EBUSY:
|
||||
# Detach from try. IE lazy umount
|
||||
ret = libc.umount(mount_point, 2)
|
||||
if ret < 0:
|
||||
err = ctypes.get_errno()
|
||||
return err
|
||||
return 0
|
||||
else:
|
||||
return err
|
||||
return 0
|
||||
|
||||
|
||||
def replace_osd(dead_osd_number,
|
||||
dead_osd_device,
|
||||
new_osd_device,
|
||||
osd_format,
|
||||
osd_journal,
|
||||
reformat_osd=False,
|
||||
ignore_errors=False):
|
||||
"""This function will automate the replacement of a failed osd disk as much
|
||||
as possible. It will revoke the keys for the old osd, remove it from the
|
||||
crush map and then add a new osd into the cluster.
|
||||
|
||||
:param dead_osd_number: The osd number found in ceph osd tree. Example: 99
|
||||
:param dead_osd_device: The physical device. Example: /dev/sda
|
||||
:param osd_format:
|
||||
:param osd_journal:
|
||||
:param reformat_osd:
|
||||
:param ignore_errors:
|
||||
"""
|
||||
host_mounts = mounts()
|
||||
mount_point = None
|
||||
for mount in host_mounts:
|
||||
if mount[1] == dead_osd_device:
|
||||
mount_point = mount[0]
|
||||
# need to convert dev to osd number
|
||||
# also need to get the mounted drive so we can tell the admin to
|
||||
# replace it
|
||||
try:
|
||||
# Drop this osd out of the cluster. This will begin a
|
||||
# rebalance operation
|
||||
status_set('maintenance', 'Removing osd {}'.format(dead_osd_number))
|
||||
subprocess.check_output([
|
||||
'ceph',
|
||||
'--id',
|
||||
'osd-upgrade',
|
||||
'osd', 'out',
|
||||
'osd.{}'.format(dead_osd_number)])
|
||||
|
||||
# Kill the osd process if it's not already dead
|
||||
if systemd():
|
||||
service_stop('ceph-osd@{}'.format(dead_osd_number))
|
||||
else:
|
||||
subprocess.check_output(['stop', 'ceph-osd', 'id={}'.format(
|
||||
dead_osd_number)])
|
||||
# umount if still mounted
|
||||
ret = umount(mount_point)
|
||||
if ret < 0:
|
||||
raise RuntimeError('umount {} failed with error: {}'.format(
|
||||
mount_point, os.strerror(ret)))
|
||||
# Clean up the old mount point
|
||||
shutil.rmtree(mount_point)
|
||||
subprocess.check_output([
|
||||
'ceph',
|
||||
'--id',
|
||||
'osd-upgrade',
|
||||
'osd', 'crush', 'remove',
|
||||
'osd.{}'.format(dead_osd_number)])
|
||||
# Revoke the OSDs access keys
|
||||
subprocess.check_output([
|
||||
'ceph',
|
||||
'--id',
|
||||
'osd-upgrade',
|
||||
'auth', 'del',
|
||||
'osd.{}'.format(dead_osd_number)])
|
||||
subprocess.check_output([
|
||||
'ceph',
|
||||
'--id',
|
||||
'osd-upgrade',
|
||||
'osd', 'rm',
|
||||
'osd.{}'.format(dead_osd_number)])
|
||||
status_set('maintenance', 'Setting up replacement osd {}'.format(
|
||||
new_osd_device))
|
||||
osdize(new_osd_device,
|
||||
osd_format,
|
||||
osd_journal,
|
||||
reformat_osd,
|
||||
ignore_errors)
|
||||
except subprocess.CalledProcessError as e:
|
||||
log('replace_osd failed with error: ' + e.output)
|
||||
|
||||
|
||||
def get_partition_list(dev):
|
||||
"""Lists the partitions of a block device.
|
||||
|
||||
|
|
Loading…
Reference in New Issue