Allow external ceph journals and fix bootstrap

This allows us to specify external journals for osds which can greatly
improve performance when the external journals are on the solid-state
drives.

The new lookup and startup methods fix the previous races we had
preventing osds from being created properly.

This retains the same functionality as before and is completely
compatible with the previous method and labels, however this does set
new labels for all new bootstrap OSDs. This was due to a limitation
in the length of the name of a GPT partition.

Closes-Bug: #1558853
DocImpact
Partially-Implements: blueprint ceph-improvements
Change-Id: I61fd10cb35c67dabc53bd82270f26909ef51fc38
This commit is contained in:
SamYaple 2016-03-18 13:52:32 +00:00
parent 6d0cd535d3
commit 5250a00781
4 changed files with 93 additions and 48 deletions

View File

@ -2,7 +2,7 @@
- name: Looking up disks to bootstrap for Ceph
command: docker exec -t kolla_toolbox /usr/bin/ansible localhost
-m find_disks
-a "partition_name='KOLLA_CEPH_OSD_BOOTSTRAP'"
-a "partition_name='KOLLA_CEPH_OSD_BOOTSTRAP' match_mode='prefix'"
register: osd_lookup
changed_when: "{{ osd_lookup.stdout.find('localhost | SUCCESS => ') != -1 and (osd_lookup.stdout.split('localhost | SUCCESS => ')[1]|from_json).changed }}"
failed_when: osd_lookup.stdout.split()[2] != 'SUCCESS'
@ -14,7 +14,7 @@
- name: Looking up disks to bootstrap for Ceph
command: docker exec -t kolla_toolbox /usr/bin/ansible localhost
-m find_disks
-a "partition_name='KOLLA_CEPH_OSD_CACHE_BOOTSTRAP'"
-a "partition_name='KOLLA_CEPH_OSD_CACHE_BOOTSTRAP' match_mode='prefix'"
register: osd_cache_lookup
changed_when: "{{ osd_cache_lookup.stdout.find('localhost | SUCCESS => ') != -1 and (osd_cache_lookup.stdout.split('localhost | SUCCESS => ')[1]|from_json).changed }}"
failed_when: osd_cache_lookup.stdout.split()[2] != 'SUCCESS'
@ -32,6 +32,12 @@
KOLLA_BOOTSTRAP:
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
OSD_DEV: "{{ item.1.device }}"
OSD_PARTITION: "{{ item.1.partition }}"
OSD_PARTITION_NUM: "{{ item.1.partition_num }}"
JOURNAL_DEV: "{{ item.1.journal_device }}"
JOURNAL_PARTITION: "{{ item.1.journal }}"
JOURNAL_PARTITION_NUM: "{{ item.1.journal_num }}"
USE_EXTERNAL_JOURNAL: "{{ item.1.external_journal | bool }}"
OSD_FILESYSTEM: "{{ ceph_osd_filesystem }}"
OSD_INITIAL_WEIGHT: "{{ osd_initial_weight }}"
HOSTNAME: "{{ hostvars[inventory_hostname]['ansible_' + storage_interface]['ipv4']['address'] }}"
@ -56,6 +62,12 @@
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
CEPH_CACHE:
OSD_DEV: "{{ item.1.device }}"
OSD_PARTITION: "{{ item.1.partition }}"
OSD_PARTITION_NUM: "{{ item.1.partition_num }}"
JOURNAL_DEV: "{{ item.1.journal_device }}"
JOURNAL_PARTITION: "{{ item.1.journal }}"
JOURNAL_PARTITION_NUM: "{{ item.1.journal_num }}"
USE_EXTERNAL_JOURNAL: "{{ item.1.external_journal | bool }}"
OSD_FILESYSTEM: "{{ ceph_osd_filesystem }}"
OSD_INITIAL_WEIGHT: "{{ osd_initial_weight }}"
HOSTNAME: "{{ hostvars[inventory_hostname]['ansible_' + storage_interface]['ipv4']['address'] }}"

View File

@ -2,7 +2,7 @@
- name: Looking up OSDs for Ceph
command: docker exec -t kolla_toolbox /usr/bin/ansible localhost
-m find_disks
-a "partition_name='KOLLA_CEPH_DATA'"
-a "partition_name='KOLLA_CEPH_DATA' match_mode='prefix'"
register: osd_lookup
changed_when: "{{ osd_lookup.stdout.find('localhost | SUCCESS => ') != -1 and (osd_lookup.stdout.split('localhost | SUCCESS => ')[1]|from_json).changed }}"
failed_when: osd_lookup.stdout.split()[2] != 'SUCCESS'
@ -34,7 +34,7 @@
environment:
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
OSD_ID: "{{ item.0.stdout }}"
OSD_DEV: "{{ item.1['device'] }}"
JOURNAL_PARTITION: "{{ item.1.journal }}"
image: "{{ ceph_osd_image_full }}"
name: "ceph_osd_{{ item.0.stdout }}"
pid_mode: "host"

View File

@ -3,28 +3,22 @@
# Bootstrap and exit if KOLLA_BOOTSTRAP variable is set. This catches all cases
# of the KOLLA_BOOTSTRAP variable being set, including empty.
if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then
# NOTE(SamYaple): Static gpt partcodes
CEPH_JOURNAL_TYPE_CODE="45B0969E-9B03-4F30-B4C6-B4B80CEFF106"
CEPH_OSD_TYPE_CODE="4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D"
# Wait for ceph quorum before proceeding
ceph quorum_status
# Formatting disk for ceph
sgdisk --zap-all -- "${OSD_DEV}"
sgdisk --new=2:1M:5G --change-name=2:KOLLA_CEPH_JOURNAL --typecode=2:45B0969E-9B03-4F30-B4C6-B4B80CEFF106 --mbrtogpt -- "${OSD_DEV}"
sgdisk --largest-new=1 --change-name=1:KOLLA_CEPH_DATA --typecode=1:4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D -- "${OSD_DEV}"
# This command may throw errors that we can safely ignore
partprobe || true
if [[ "${USE_EXTERNAL_JOURNAL}" == "False" ]]; then
# Formatting disk for ceph
sgdisk --zap-all -- "${OSD_DEV}"
sgdisk --new=2:1M:5G -- "${JOURNAL_DEV}"
sgdisk --largest-new=1 -- "${OSD_DEV}"
# NOTE(SamYaple): This command may throw errors that we can safely ignore
partprobe || true
count=0
while [[ "${OSD_PARTITION}x" == "x" ]]; do
if [[ "${count}" -gt 5 ]]; then
echo "Could not find OSD Partition"
exit 1
fi
sleep 1
# We look up the appropriate device path with partition.
OSD_PARTITION=$(ls "${OSD_DEV}"* | egrep "${OSD_DEV}p?1")
count=$(( count + 1 ))
done
JOURNAL_PARTITION="${OSD_PARTITION%?}2"
fi
OSD_ID=$(ceph osd create)
OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}"
@ -56,10 +50,13 @@ if [[ "${!KOLLA_BOOTSTRAP[@]}" ]]; then
# Adding osd to crush map
ceph osd crush add "${OSD_ID}" "${OSD_INITIAL_WEIGHT}" host="${HOSTNAME}${CEPH_ROOT_NAME:+-${CEPH_ROOT_NAME}}"
# Setting partition name based on ${OSD_ID}
sgdisk "--change-name=${OSD_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}" "--typecode=${OSD_PARTITION_NUM}:${CEPH_OSD_TYPE_CODE}" -- "${OSD_DEV}"
sgdisk "--change-name=${JOURNAL_PARTITION_NUM}:KOLLA_CEPH_DATA_${OSD_ID}_J" "--typecode=${JOURNAL_PARTITION_NUM}:${CEPH_JOURNAL_TYPE_CODE}" -- "${JOURNAL_DEV}"
exit 0
fi
# We look up the appropriate journal since we cannot rely on symlinks
JOURNAL_PARTITION=$(ls "${OSD_DEV}"* | egrep "${OSD_DEV}p?2")
OSD_DIR="/var/lib/ceph/osd/ceph-${OSD_ID}"
ARGS="-i ${OSD_ID} --osd-journal ${JOURNAL_PARTITION} -k ${OSD_DIR}/keyring"

View File

@ -66,6 +66,61 @@ EXAMPLES = '''
import json
import pyudev
import re
def is_dev_matched_by_name(dev, name, mode):
if dev.get('DEVTYPE', '') == 'partition':
dev_name = dev.get('ID_PART_ENTRY_NAME', '')
else:
dev_name = dev.get('ID_FS_LABEL', '')
if mode == 'strict':
return dev_name == name
elif mode == 'prefix':
return dev_name.startswith(name)
else:
return False
def find_disk(ct, name, match_mode):
for dev in ct.list_devices(subsystem='block'):
if is_dev_matched_by_name(dev, name, match_mode):
yield dev
def extract_disk_info(ct, dev, name):
if not dev:
return
kwargs = dict()
kwargs['fs_uuid'] = dev.get('ID_FS_UUID', '')
kwargs['fs_label'] = dev.get('ID_FS_LABEL', '')
if dev.get('DEVTYPE', '') == 'partition':
kwargs['device'] = dev.find_parent('block').device_node
kwargs['partition'] = dev.device_node
kwargs['partition_num'] = \
re.sub(r'.*[^\d$]', '', dev.device_node)
if is_dev_matched_by_name(dev, name, 'strict'):
kwargs['external_journal'] = False
kwargs['journal'] = dev.device_node[:-1] + '2'
kwargs['journal_device'] = kwargs['device']
kwargs['journal_num'] = 2
else:
kwargs['external_journal'] = True
journal_name = dev.get('ID_PART_ENTRY_NAME', '') + '_J'
for journal in find_disk(ct, journal_name, 'strict'):
kwargs['journal'] = journal.device_node
kwargs['journal_device'] = \
journal.find_parent('block').device_node
kwargs['journal_num'] = \
re.sub(r'.*[^\d$]', '', journal.device_node)
break
if 'journal' not in kwargs:
# NOTE(SamYaple): Journal not found, not returning info
return
else:
kwargs['device'] = dev.device_node
yield kwargs
def main():
@ -78,33 +133,14 @@ def main():
match_mode = module.params.get('match_mode')
name = module.params.get('name')
def is_dev_matched_by_name(dev, name):
if dev.get('DEVTYPE', '') == 'partition':
dev_name = dev.get('ID_PART_ENTRY_NAME', '')
else:
dev_name = dev.get('ID_FS_LABEL', '')
if match_mode == 'strict':
return dev_name == name
elif match_mode == 'prefix':
return dev_name.startswith(name)
else:
return False
try:
ret = list()
ct = pyudev.Context()
for dev in ct.list_devices(subsystem='block'):
if is_dev_matched_by_name(dev, name):
fs_uuid = dev.get('ID_FS_UUID', '')
fs_label = dev.get('ID_FS_LABEL', '')
if dev.get('DEVTYPE', '') == 'partition':
device_node = dev.find_parent('block').device_node
else:
device_node = dev.device_node
ret.append({'device': device_node,
'fs_uuid': fs_uuid,
'fs_label': fs_label})
for dev in find_disk(ct, name, match_mode):
for info in extract_disk_info(ct, dev, name):
if info:
ret.append(info)
module.exit_json(disks=json.dumps(ret))
except Exception as e:
module.exit_json(failed=True, msg=repr(e))