Allow Kolla Ceph to deploy bluestore OSDs in Kolla

Support Kolla Ceph to deploy blustore OSDs. With the patch, Kolla
Ceph can deploy bluestore OSDs on ONE, TWO or THREE storage
devices.

Before deploy bluestore OSD, please prepare devices. The detailed
Please refer to [1] for devices initialization.

extend_start.sh: initialize and start bluestore OSD

find_disk.py: search the devices for bluestore OSD

[1]: specs/kolla-ceph-bluestore.rst

Partially-Implements: blueprint kolla-ceph-bluestore

Change-Id: I832f490de63e1aeb68814697cda610a51b622c1f
Signed-off-by: Tone Zhang <tone.zhang@arm.com>
This commit is contained in:
Tone Zhang 2018-05-08 08:12:28 +00:00 committed by tone.zhang
parent b9b872b0b5
commit 291ba252a5
3 changed files with 234 additions and 22 deletions

View File

@ -13,38 +13,97 @@ if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then
# NOTE(SamYaple): Static gpt partcodes
CEPH_JOURNAL_TYPE_CODE="45B0969E-9B03-4F30-B4C6-B4B80CEFF106"
CEPH_OSD_TYPE_CODE="4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D"
CEPH_OSD_BS_WAL_TYPE_CODE="0FC63DAF-8483-4772-8E79-3D69D8477DE4"
CEPH_OSD_BS_DB_TYPE_CODE="CE8DF73C-B89D-45B0-AD98-D45332906d90"
# Wait for ceph quorum before proceeding
ceph quorum_status
if [[ "${USE_EXTERNAL_JOURNAL}" == "False" ]]; then
# Formatting disk for ceph
sgdisk --zap-all -- "${OSD_DEV}"
sgdisk --new=2:1M:5G -- "${JOURNAL_DEV}"
sgdisk --largest-new=1 -- "${OSD_DEV}"
if [[ "${OSD_STORETYPE}" == "bluestore" ]]; then
if [ -n "${OSD_BS_BLOCK_DEV}" ]; then
sgdisk --zap-all -- "${OSD_BS_BLOCK_DEV}"
sgdisk --new=1:0:+100M --mbrtogpt -- "${OSD_BS_BLOCK_DEV}"
sgdisk --largest-new=2 --mbrtogpt -- "${OSD_BS_BLOCK_DEV}"
sgdisk --zap-all -- "${OSD_BS_BLOCK_DEV}"2
fi
if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then
sgdisk --zap-all -- "${OSD_BS_WAL_DEV}""${OSD_BS_WAL_PARTNUM}"
fi
if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_DB_DEV}" ] && [ -n "${OSD_BS_DB_PARTNUM}" ]; then
sgdisk --zap-all -- "${OSD_BS_DB_DEV}""${OSD_BS_DB_PARTNUM}"
fi
else
sgdisk --zap-all -- "${OSD_DEV}"
sgdisk --new=2:1M:5G -- "${JOURNAL_DEV}"
sgdisk --largest-new=1 -- "${OSD_DEV}"
fi
# NOTE(SamYaple): This command may throw errors that we can safely ignore
partprobe || true
fi
OSD_ID=$(ceph osd create)
OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}"
mkdir -p "${OSD_DIR}"
if [[ "${OSD_STORETYPE}" == "bluestore" ]]; then
OSD_UUID=$(uuidgen)
OSD_ID=$(ceph osd new "${OSD_UUID}")
OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}"
mkdir -p "${OSD_DIR}"
if [[ "${OSD_FILESYSTEM}" == "btrfs" ]]; then
mkfs.btrfs -f "${OSD_PARTITION}"
elif [[ "${OSD_FILESYSTEM}" == "ext4" ]]; then
mkfs.ext4 "${OSD_PARTITION}"
mkfs.xfs -f "${OSD_BS_BLOCK_DEV}"1
mount "${OSD_BS_BLOCK_DEV}"1 "${OSD_DIR}"
# This will through an error about no key existing. That is normal. It then
# creates the key in the next step.
ceph-osd -i "${OSD_ID}" --mkkey
echo "bluestore" > "${OSD_DIR}"/type
sgdisk "--change-name=2:KOLLA_CEPH_DATA_BS_B_${OSD_ID}" "--typecode=2:${CEPH_OSD_TYPE_CODE}" -- "${OSD_BS_BLOCK_DEV}"
if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then
sgdisk "--change-name="${OSD_BS_WAL_PARTNUM}":KOLLA_CEPH_DATA_BS_W_${OSD_ID}" "--typecode=1:${CEPH_OSD_BS_WAL_TYPE_CODE}" -- "${OSD_BS_WAL_DEV}"
fi
if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_DB_DEV}" ] && [ -n "${OSD_BS_DB_PARTNUM}" ]; then
sgdisk "--change-name="${OSD_BS_DB_PARTNUM}":KOLLA_CEPH_DATA_BS_D_${OSD_ID}" "--typecode=1:${CEPH_OSD_BS_DB_TYPE_CODE}" -- "${OSD_BS_DB_DEV}"
fi
partprobe || true
ln -sf /dev/disk/by-partlabel/KOLLA_CEPH_DATA_BS_B_"${OSD_ID}" "${OSD_DIR}"/block
if [ -n "${OSD_BS_WAL_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_WAL_DEV}" ] && [ -n "${OSD_BS_WAL_PARTNUM}" ]; then
ln -sf /dev/disk/by-partlabel/KOLLA_CEPH_DATA_BS_W_"${OSD_ID}" "${OSD_DIR}"/block.wal
fi
if [ -n "${OSD_BS_DB_DEV}" ] && [ "${OSD_BS_BLOCK_DEV}" != "${OSD_BS_DB_DEV}" ]i && [ -n "${OSD_BS_DB_PARTNUM}" ]; then
ln -sf /dev/disk/by-partlabel/KOLLA_CEPH_DATA_BS_D_"${OSD_ID}" "${OSD_DIR}"/block.db
fi
ceph-osd -i "${OSD_ID}" --mkfs -k "${OSD_DIR}"/keyring --osd-uuid "${OSD_UUID}"
ceph auth add "osd.${OSD_ID}" osd 'allow *' mon 'allow profile osd' -i "${OSD_DIR}/keyring"
umount "${OSD_BS_BLOCK_DEV}"1
else
mkfs.xfs -f "${OSD_PARTITION}"
fi
mount "${OSD_PARTITION}" "${OSD_DIR}"
OSD_ID=$(ceph osd create)
OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}"
mkdir -p "${OSD_DIR}"
# This will through an error about no key existing. That is normal. It then
# creates the key in the next step.
ceph-osd -i "${OSD_ID}" --mkfs --osd-journal="${JOURNAL_PARTITION}" --mkkey
ceph auth add "osd.${OSD_ID}" osd 'allow *' mon 'allow profile osd' -i "${OSD_DIR}/keyring"
umount "${OSD_PARTITION}"
if [[ "${OSD_FILESYSTEM}" == "btrfs" ]]; then
mkfs.btrfs -f "${OSD_PARTITION}"
elif [[ "${OSD_FILESYSTEM}" == "ext4" ]]; then
mkfs.ext4 "${OSD_PARTITION}"
else
mkfs.xfs -f "${OSD_PARTITION}"
fi
mount "${OSD_PARTITION}" "${OSD_DIR}"
# This will through an error about no key existing. That is normal. It then
# creates the key in the next step.
ceph-osd -i "${OSD_ID}" --mkfs --osd-journal="${JOURNAL_PARTITION}" --mkkey
ceph auth add "osd.${OSD_ID}" osd 'allow *' mon 'allow profile osd' -i "${OSD_DIR}/keyring"
umount "${OSD_PARTITION}"
fi
if [[ "${!CEPH_CACHE[@]}" ]]; then
CEPH_ROOT_NAME=cache
@ -63,11 +122,20 @@ if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then
ceph osd crush add "${OSD_ID}" "${OSD_INITIAL_WEIGHT}" host="${HOSTNAME}${CEPH_ROOT_NAME:+-${CEPH_ROOT_NAME}}"
# Setting partition name based on ${OSD_ID}
sgdisk "--change-name=${OSD_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}" "--typecode=${OSD_PARTITION_NUM}:${CEPH_OSD_TYPE_CODE}" -- "${OSD_DEV}"
sgdisk "--change-name=${JOURNAL_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}_J" "--typecode=${JOURNAL_PARTITION_NUM}:${CEPH_JOURNAL_TYPE_CODE}" -- "${JOURNAL_DEV}"
if [[ "${OSD_STORETYPE}" == "bluestore" ]]; then
sgdisk "--change-name=1:KOLLA_CEPH_BSDATA_${OSD_ID}" -- "${OSD_BS_BLOCK_DEV}"
else
sgdisk "--change-name=${OSD_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}" "--typecode=${OSD_PARTITION_NUM}:${CEPH_OSD_TYPE_CODE}" -- "${OSD_DEV}"
sgdisk "--change-name=${JOURNAL_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}_J" "--typecode=${JOURNAL_PARTITION_NUM}:${CEPH_JOURNAL_TYPE_CODE}" -- "${JOURNAL_DEV}"
fi
partprobe || true
exit 0
fi
OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}"
ARGS="-i ${OSD_ID} --osd-journal ${JOURNAL_PARTITION} -k ${OSD_DIR}/keyring"
if [[ "${OSD_STORETYPE}" == "bluestore" ]]; then
ARGS="-i ${OSD_ID}"
else
ARGS="-i ${OSD_ID} --osd-journal ${JOURNAL_PARTITION} -k ${OSD_DIR}/keyring"
fi

View File

@ -157,6 +157,7 @@ def extract_disk_info(ct, dev, name, use_udev):
kwargs['fs_uuid'] = get_id_fs_uuid(dev, use_udev)
kwargs['fs_label'] = dev.get('ID_FS_LABEL', '')
if dev.get('DEVTYPE', '') == 'partition':
kwargs['partition_label'] = name
kwargs['device'] = dev.find_parent('block').device_node
kwargs['partition'] = dev.device_node
kwargs['partition_num'] = \
@ -187,6 +188,121 @@ def extract_disk_info(ct, dev, name, use_udev):
yield kwargs
def extract_disk_info_bs(ct, dev, name, use_udev):
if not dev:
return
kwargs = dict(bs_db_partition='', bs_db_label='', bs_db_device='',
bs_wal_partition='', bs_wal_label='', bs_wal_device='',
bs_wal_partition_num='', bs_db_partition_num='',
partition='', partition_label='', partition_num='',
device='', partition_usage='')
kwargs['fs_uuid'] = get_id_fs_uuid(dev, use_udev)
kwargs['fs_label'] = dev.get('ID_FS_LABEL', '')
if dev.get('DEVTYPE', '') == 'partition':
actual_name = get_id_part_entry_name(dev, use_udev)
if (('BOOTSTRAP_BS' in name and name in actual_name)
or ('BSDATA' in name and name in actual_name)):
if '_BS_D' in actual_name:
kwargs['partition_usage'] = 'block.db'
kwargs['bs_db_partition_num'] = \
re.sub(r'.*[^\d]', '', dev.device_node)
kwargs['bs_db_device'] = dev.device_node[:-1]
kwargs['bs_db_label'] = actual_name
return kwargs
if '_BS_W' in actual_name:
kwargs['partition_usage'] = 'block.wal'
kwargs['bs_wal_partition_num'] = \
re.sub(r'.*[^\d]', '', dev.device_node)
kwargs['bs_wal_device'] = dev.device_node[:-1]
kwargs['bs_wal_label'] = actual_name
return kwargs
if '_BS' in actual_name:
kwargs['partition_usage'] = 'block'
kwargs['partition'] = dev.device_node[:-1]
kwargs['partition_label'] = actual_name
kwargs['partition_num'] = \
re.sub(r'.*[^\d]', '', dev.device_node)
kwargs['device'] = dev.device_node[:-1]
return kwargs
return 0
def nb_of_block_device(disks):
block_info = dict()
block_info['block_label'] = list()
nb_of_blocks = 0
for item in disks:
if item['partition_usage'] == 'block':
block_info['block_label'].append(item['partition_label'])
nb_of_blocks += 1
block_info['nb_of_block'] = nb_of_blocks
return block_info
def combine_info(disks):
info = list()
blocks = nb_of_block_device(disks)
block_id = 0
while block_id < blocks['nb_of_block']:
final = dict()
idx = 0
idx_osd = idx_wal = idx_db = -1
for item in disks:
if (item['partition_usage'] == 'block' and
item['partition_label'] ==
blocks['block_label'][block_id]):
idx_osd = idx
elif (item['partition_usage'] == 'block.wal' and
item['bs_wal_label'] ==
blocks['block_label'][block_id].replace('_BS', '_BS_W')):
idx_wal = idx
elif (item['partition_usage'] == 'block.db' and
item['bs_db_label'] ==
blocks['block_label'][block_id].replace('_BS', '_BS_D')):
idx_db = idx
idx = idx + 1
# write the information of block.db and block.wal to block item
# if block.db and block.wal are found
if idx_wal != -1:
disks[idx_osd]['bs_wal_device'] = disks[idx_wal]['bs_wal_device']
disks[idx_osd]['bs_wal_partition_num'] = \
disks[idx_wal]['bs_wal_partition_num']
disks[idx_osd]['bs_wal_label'] = disks[idx_wal]['bs_wal_label']
disks[idx_wal]['partition_usage'] = ''
if idx_db != -1:
disks[idx_osd]['bs_db_device'] = disks[idx_db]['bs_db_device']
disks[idx_osd]['bs_db_partition_num'] = \
disks[idx_db]['bs_db_partition_num']
disks[idx_osd]['bs_db_label'] = disks[idx_db]['bs_db_label']
disks[idx_db]['partition_usage'] = ''
final['fs_uuid'] = disks[idx_osd]['fs_uuid']
final['fs_label'] = disks[idx_osd]['fs_label']
final['bs_db_device'] = disks[idx_osd]['bs_db_device']
final['bs_db_partition_num'] = disks[idx_osd]['bs_db_partition_num']
final['bs_db_label'] = disks[idx_osd]['bs_db_label']
final['bs_wal_device'] = disks[idx_osd]['bs_wal_device']
final['bs_wal_partition_num'] = disks[idx_osd]['bs_wal_partition_num']
final['bs_wal_label'] = disks[idx_osd]['bs_wal_label']
final['device'] = disks[idx_osd]['device']
final['partition'] = disks[idx_osd]['partition']
final['partition_label'] = disks[idx_osd]['partition_label']
final['partition_num'] = disks[idx_osd]['partition_num']
final['external_journal'] = False
final['journal'] = ''
final['journal_device'] = ''
final['journal_num'] = 0
info.append(final)
disks[idx_osd]['partition_usage'] = ''
block_id += 1
return info
def main():
argument_spec = dict(
match_mode=dict(required=False, choices=['strict', 'prefix'],
@ -203,9 +319,33 @@ def main():
ret = list()
ct = pyudev.Context()
for dev in find_disk(ct, name, match_mode, use_udev):
for info in extract_disk_info(ct, dev, name, use_udev):
if '_BSDATA' in name:
info = extract_disk_info_bs(ct, dev, name, use_udev)
if info:
ret.append(info)
elif '_BS' in name:
info = extract_disk_info_bs(ct, dev, name, use_udev)
if info:
ret.append(info)
info = extract_disk_info_bs(ct, dev,
name.replace('_BS', '_BS_W'),
use_udev)
if info:
ret.append(info)
info = extract_disk_info_bs(ct, dev,
name.replace('_BS', '_BS_D'),
use_udev)
if info:
ret.append(info)
else:
for info in extract_disk_info(ct, dev, name, use_udev):
if info:
ret.append(info)
if '_BS' in name and len(ret) > 0:
ret = combine_info(ret)
module.exit_json(disks=json.dumps(ret))
except Exception as e:

View File

@ -0,0 +1,4 @@
---
features:
- |
Support Kolla Ceph to deploy bluestore OSDs in Rocky release.