Initial ceph support.

This commit is contained in:
Adam Gandelman 2013-02-27 18:07:08 -08:00
parent e53c196e70
commit 82d784b79e
8 changed files with 792 additions and 18 deletions

View File

@ -41,4 +41,16 @@ options:
description: |
Default multicast port number that will be used to communicate between
HA Cluster nodes.
rbd-size:
type: string
default: 5G
description: |
Default rbd storage size to create when setting up block storage.
This value should be specified in GB (e.g. 100G).
rbd-name:
type: string
default: mysql1
description: |
The name that will be used to create the Ceph's RBD image with. If the
image name exists in Ceph, it will be re-used and the data will be
overwritten.

1
hooks/ceph-relation-changed Symbolic link
View File

@ -0,0 +1 @@
rabbitmq-server-relations.py

1
hooks/ceph-relation-joined Symbolic link
View File

@ -0,0 +1 @@
rabbitmq-server-relations.py

240
hooks/ceph.py Normal file
View File

@ -0,0 +1,240 @@
import utils
import commands
import re
import subprocess
import sys
import os
import shutil
KEYRING = '/etc/ceph/ceph.client.%s.keyring'
KEYFILE = '/etc/ceph/ceph.client.%s.key'
CEPH_CONF = """[global]
auth supported = %(auth)s
keyring = %(keyring)s
mon host = %(mon_hosts)s
"""
def execute(cmd):
subprocess.check_call(cmd)
def execute_shell(cmd):
subprocess.check_call(cmd, shell=True)
def install():
ceph_dir = "/etc/ceph"
if not os.path.isdir(ceph_dir):
os.mkdir(ceph_dir)
utils.install('ceph-common')
def rbd_exists(service, pool, rbd_img):
(rc, out) = commands.getstatusoutput('rbd list --id %s --pool %s' %\
(service, pool))
return rbd_img in out
def create_rbd_image(service, pool, image, sizemb):
cmd = [
'rbd',
'create',
image,
'--size',
str(sizemb),
'--id',
service,
'--pool',
pool
]
execute(cmd)
def pool_exists(service, name):
(rc, out) = commands.getstatusoutput("rados --id %s lspools" % service)
return name in out
def create_pool(service, name):
cmd = [
'rados',
'--id',
service,
'mkpool',
name
]
execute(cmd)
def keyfile_path(service):
return KEYFILE % service
def keyring_path(service):
return KEYRING % service
def create_keyring(service, key):
keyring = keyring_path(service)
if os.path.exists(keyring):
utils.juju_log('INFO', 'ceph: Keyring exists at %s.' % keyring)
cmd = [
'ceph-authtool',
keyring,
'--create-keyring',
'--name=client.%s' % service,
'--add-key=%s' % key
]
execute(cmd)
utils.juju_log('INFO', 'ceph: Created new ring at %s.' % keyring)
def create_key_file(service, key):
# create a file containing the key
keyfile = keyfile_path(service)
if os.path.exists(keyfile):
utils.juju_log('INFO', 'ceph: Keyfile exists at %s.' % keyfile)
fd = open(keyfile, 'w')
fd.write(key)
fd.close()
utils.juju_log('INFO', 'ceph: Created new keyfile at %s.' % keyfile)
def get_ceph_nodes():
hosts = []
for r_id in utils.relation_ids('ceph'):
for unit in utils.relation_list(r_id):
hosts.append(utils.relation_get('private-address',
unit=unit, rid=r_id))
return hosts
def configure(service, key, auth):
create_keyring(service, key)
create_key_file(service, key)
hosts = get_ceph_nodes()
mon_hosts = ",".join(map(str, hosts))
keyring = keyring_path(service)
with open('/etc/ceph/ceph.conf', 'w') as ceph_conf:
ceph_conf.write(CEPH_CONF % locals())
modprobe_kernel_module('rbd')
def image_mapped(image_name):
(rc, out) = commands.getstatusoutput('rbd showmapped')
return image_name in out
def map_block_storage(service, pool, image):
cmd = [
'rbd',
'map',
'%s/%s' % (pool, image),
'--user',
service,
'--secret',
keyfile_path(service),
]
execute(cmd)
def filesystem_mounted(fs):
return subprocess.call(['grep', '-wqs', fs, '/proc/mounts']) == 0
def make_filesystem(blk_device, fstype='ext4'):
utils.juju_log('INFO',
'ceph: Formatting block device %s as filesystem %s.' %\
(blk_device, fstype))
cmd = ['mkfs', '-t', fstype, blk_device]
execute(cmd)
def place_data_on_ceph(service, blk_device, data_src_dst, fstype='ext4'):
# mount block device into /mnt
cmd = ['mount', '-t', fstype, blk_device, '/mnt']
execute(cmd)
# copy data to /mnt
try:
copy_files(data_src_dst, '/mnt')
except:
pass
# umount block device
cmd = ['umount', '/mnt']
execute(cmd)
_dir = os.stat(data_src_dst)
uid = _dir.st_uid
gid = _dir.st_gid
# re-mount where the data should originally be
cmd = ['mount', '-t', fstype, blk_device, data_src_dst]
execute(cmd)
# ensure original ownership of new mount.
cmd = ['chown', '-R', '%s:%s' % (uid, gid), data_src_dst]
execute(cmd)
# TODO: re-use
def modprobe_kernel_module(module):
utils.juju_log('INFO','Loading kernel module')
cmd = ['modprobe', module]
execute(cmd)
cmd = 'echo %s >> /etc/modules' % module
execute_shell(cmd)
def copy_files(src, dst, symlinks=False, ignore=None):
for item in os.listdir(src):
s = os.path.join(src, item)
d = os.path.join(dst, item)
if os.path.isdir(s):
shutil.copytree(s, d, symlinks, ignore)
else:
shutil.copy2(s, d)
def ensure_ceph_storage(service, pool, rbd_img, sizemb, mount_point,
blk_device, fstype, system_services=[]):
"""
To be called from the current cluster leader.
Ensures given pool and RBD image exists, is mapped to a block device,
and the device is formatted and mounted at the given mount_point.
If formatting a device for the first time, data existing at mount_point
will be migrated to the RBD device before being remounted.
All services listed in system_services will be stopped prior to data
migration and restarted when complete.
"""
# Ensure pool, RBD image, RBD mappings are in place.
if not pool_exists(service, pool):
utils.juju_log('INFO', 'ceph: Creating new pool %s.' % pool)
create_pool(service, pool)
if not rbd_exists(service, pool, rbd_img):
utils.juju_log('INFO', 'ceph: Creating RBD image (%s).' % rbd_img)
create_rbd_image(service, pool, rbd_img, sizemb)
if not image_mapped(rbd_img):
utils.juju_log('INFO', 'ceph: Mapping RBD Image as a Block Device.')
map_block_storage(service, pool, rbd_img)
# make file system
# TODO: What happens if for whatever reason this is run again and
# the data is already in the rbd device and/or is mounted??
# When it is mounted already, it will fail to make the fs
# XXX: This is really sketchy! Need to at least add an fstab entry
# otherwise this hook will blow away existing data if its executed
# after a reboot.
if not filesystem_mounted(mount_point):
make_filesystem(blk_device, fstype)
for svc in system_services:
if utils.running(svc):
utils.juju_log('INFO',
'Stopping services %s prior to migrating '\
'data' % svc)
utils.stop(svc)
place_data_on_ceph(service, blk_device, mount_point, fstype)
for svc in system_services:
utils.start(svc)

View File

@ -1,5 +1,6 @@
#!/usr/bin/python
import ceph
import rabbit_utils as rabbit
import utils
@ -7,6 +8,10 @@ import os
import sys
import subprocess
SERVICE_NAME = utils.get_unit_name().replace('-','/').split('/')[0]
POOL_NAME = SERVICE_NAME
RABBIT_DIR='/var/lib/rabbitmq'
def install():
utils.install(*rabbit.PACKAGES)
utils.expose(5672)
@ -104,17 +109,41 @@ def ha_joined():
'configure hacluster.')
sys.exit(1)
if not utils.is_relation_made('ceph'):
utils.juju_log('INFO',
'ha_joined: No ceph relation yet, deferring.')
return
relation_settings = {}
relation_settings['corosync_bindiface'] = corosync_bindiface
relation_settings['corosync_mcastport'] = corosync_mcastport
relation_settings['resources'] = {
'res_rabbitmq_vip': 'ocf:heartbeat:IPaddr2'
'res_rabbitmq_rbd':'ocf:ceph:rbd',
'res_rabbitmq_fs':'ocf:heartbeat:Filesystem',
'res_rabbitmq_vip':'ocf:heartbeat:IPaddr2',
'res_rabbitmq-server':'lsb:rabbitmq-server',
}
relation_settings['resource_params'] = {
'res_rabbitmq_vip': ('params ip="%s" cider_netmask="%s" nic="%s"' %\
(vip, vip_cidr, vip_iface))
'res_rabbitmq_rbd': 'params name="%s" pool="%s" user="%s" secret="%s"' %\
(config['rbd-name'], POOL_NAME, SERVICE_NAME, ceph.keyfile_path(SERVICE_NAME)),
'res_rabbitmq_fs': 'params device="/dev/rbd/%s/%s" directory="%s" '\
'fstype="ext4" op start start-delay="10s"' %\
(POOL_NAME, config['rbd-name'], RABBIT_DIR),
'res_rabbitmq_vip':'params ip="%s" cidr_netmask="%s" nic="%s"' %\
(config['vip'], config['vip_cidr'], config['vip_iface']),
'res_rabbitmqd':'op start start-delay="5s" op monitor interval="5s"',
}
utils.relation_set(**relation_settings)
relation_settings['groups'] = {
'grp_rabbitmq':'res_rabbitmq_rbd res_rabbitmq_fs res_rabbitmq_vip '\
'res_rabbitmq-server',
}
for rel_id in utils.relation_ids('ha'):
utils.relation_set(rid=rel_id, **relation_settings)
def ha_changed():
@ -130,6 +159,49 @@ def ha_changed():
utils.relation_set(**relation_settings)
def ceph_joined():
utils.juju_log('INFO', 'Start Ceph Relation Joined')
ceph.install()
utils.juju_log('INFO', 'Finish Ceph Relation Joined')
def ceph_changed():
utils.juju_log('INFO', 'Start Ceph Relation Changed')
auth = utils.relation_get('auth')
key = utils.relation_get('key')
if None in [auth, key]:
utils.juju_log('INFO', 'Missing key or auth in relation')
sys.exit(0)
ceph.configure(service=SERVICE_NAME, key=key, auth=auth)
if utils.eligible_leader():
sizemb = int(config['rbd-size'].split('G')[0]) * 1024
rbd_img = config['rbd-name']
blk_device = '/dev/rbd/%s/%s' % (POOL_NAME, rbd_img)
ceph.ensure_ceph_storage(service=SERVICE_NAME, pool=POOL_NAME,
rbd_img=rbd_img, sizemb=sizemb,
fstype='ext4', mount_point=RABBIT_DIR,
blk_device=blk_device,
system_services=['rabbitmq-server'])
else:
utils.juju_log('INFO',
'This is not the peer leader. Not configuring RBD.')
# Stopping MySQL
if utils.running('rabbitmq-server'):
utils.juju_log('INFO','Stopping rabbitmq-server.')
utils.stop('rabbitmq-server')
# If 'ha' relation has been made before the 'ceph' relation
# it is important to make sure the ha-relation data is being
# sent.
if utils.is_relation_made('ha'):
utils.juju_log('INFO', '*ha* relation exists. Triggering ha_joined()')
ha_joined()
else:
utils.juju_log('INFO', '*ha* relation does not exist.')
utils.juju_log('INFO', 'Finish Ceph Relation Changed')
hooks = {
'install': install,
'amqp-relation-changed': amqp_changed,

147
hooks/utils.back Normal file
View File

@ -0,0 +1,147 @@
#
# Copyright 2012 Canonical Ltd.
#
# Authors:
# James Page <james.page@ubuntu.com>
# Paul Collins <paul.collins@canonical.com>
#
import os
import subprocess
import socket
import sys
def do_hooks(hooks):
hook = os.path.basename(sys.argv[0])
try:
hook_func = hooks[hook]
except KeyError:
juju_log('INFO',
"This charm doesn't know how to handle '{}'.".format(hook))
else:
hook_func()
def install(*pkgs):
cmd = [
'apt-get',
'-y',
'install'
]
for pkg in pkgs:
cmd.append(pkg)
subprocess.check_call(cmd)
TEMPLATES_DIR = 'templates'
def expose(port, protocol='TCP'):
cmd = [
'open-port',
'{}/{}'.format(port, protocol)
]
subprocess.check_call(cmd)
def juju_log(severity, message):
cmd = [
'juju-log',
'--log-level', severity,
message
]
subprocess.check_call(cmd)
def relation_ids(relation):
cmd = [
'relation-ids',
relation
]
return subprocess.check_output(cmd).split() # IGNORE:E1103
def relation_list(rid):
cmd = [
'relation-list',
'-r', rid,
]
return subprocess.check_output(cmd).split() # IGNORE:E1103
def relation_get(attribute, unit=None, rid=None):
cmd = [
'relation-get',
]
if rid:
cmd.append('-r')
cmd.append(rid)
cmd.append(attribute)
if unit:
cmd.append(unit)
value = subprocess.check_output(cmd).strip() # IGNORE:E1103
if value == "":
return None
else:
return value
def relation_set(**kwargs):
cmd = [
'relation-set'
]
args = []
for k, v in kwargs.items():
if k == 'rid':
cmd.append('-r')
cmd.append(v)
else:
args.append('{}={}'.format(k, v))
cmd += args
subprocess.check_call(cmd)
def unit_get(attribute):
cmd = [
'unit-get',
attribute
]
value = subprocess.check_output(cmd).strip() # IGNORE:E1103
if value == "":
return None
else:
return value
def config_get(attribute):
cmd = [
'config-get',
attribute
]
value = subprocess.check_output(cmd).strip() # IGNORE:E1103
if value == "":
return None
else:
return value
def is_clustered():
for r_id in (relation_ids('ha') or []):
for unit in (relation_list(r_id) or []):
relation_data = \
relation_get_dict(relation_id=r_id,
remote_unit=unit)
if 'clustered' in relation_data:
return True
return False
def is_leader():
status = execute('crm resource show res_ks_vip', echo=True)[0].strip()
hostname = execute('hostname', echo=True)[0].strip()
if hostname in status:
return True
else:
return False

View File

@ -11,18 +11,33 @@ import os
import subprocess
import socket
import sys
import fcntl
import struct
import json
import time
def do_hooks(hooks):
hook = os.path.basename(sys.argv[0])
try:
hook_func = hooks[hook]
hooks[hook]()
except KeyError:
juju_log('INFO',
"This charm doesn't know how to handle '{}'.".format(hook))
def can_install():
try:
fd = os.open("/var/lib/dpkg/lock", os.O_RDWR|os.O_CREAT|os.O_NOFOLLOW, 0640)
fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
except IOError, message:
os.close(fd)
return False
else:
hook_func()
fcntl.lockf(fd, fcntl.LOCK_UN)
os.close(fd)
return True
def install(*pkgs):
@ -33,10 +48,131 @@ def install(*pkgs):
]
for pkg in pkgs:
cmd.append(pkg)
while not can_install():
juju_log('INFO',
"dpkg is busy, can't install %s yet, waiting..." % pkgs)
time.sleep(1)
subprocess.check_call(cmd)
TEMPLATES_DIR = 'templates'
try:
import jinja2
except ImportError:
install('python-jinja2')
import jinja2
try:
from netaddr import IPNetwork
except ImportError:
install('python-netaddr')
from netaddr import IPNetwork
try:
import dns.resolver
except ImportError:
install('python-dnspython')
import dns.resolver
def render_template(template_name, context, template_dir=TEMPLATES_DIR):
templates = jinja2.Environment(
loader=jinja2.FileSystemLoader(template_dir)
)
template = templates.get_template(template_name)
return template.render(context)
CLOUD_ARCHIVE = \
""" # Ubuntu Cloud Archive
deb http://ubuntu-cloud.archive.canonical.com/ubuntu {} main
"""
CLOUD_ARCHIVE_POCKETS = {
'precise-folsom': 'precise-updates/folsom',
'precise-folsom/updates': 'precise-updates/folsom',
'precise-folsom/proposed': 'precise-proposed/folsom',
'precise-grizzly': 'precise-updates/grizzly',
'precise-grizzly/updates': 'precise-updates/grizzly',
'precise-grizzly/proposed': 'precise-proposed/grizzly'
}
def execute(cmd, die=False, echo=False):
""" Executes a command
if die=True, script will exit(1) if command does not return 0
if echo=True, output of command will be printed to stdout
returns a tuple: (stdout, stderr, return code)
"""
p = subprocess.Popen(cmd.split(" "),
stdout=subprocess.PIPE,
stdin=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout=""
stderr=""
def print_line(l):
if echo:
print l.strip('\n')
sys.stdout.flush()
for l in iter(p.stdout.readline, ''):
print_line(l)
stdout += l
for l in iter(p.stderr.readline, ''):
print_line(l)
stderr += l
p.communicate()
rc = p.returncode
if die and rc != 0:
error_out("ERROR: command %s return non-zero.\n" % cmd)
return (stdout, stderr, rc)
def configure_source():
source = str(config_get('openstack-origin'))
if not source:
return
if source.startswith('ppa:'):
cmd = [
'add-apt-repository',
source
]
subprocess.check_call(cmd)
if source.startswith('cloud:'):
install('ubuntu-cloud-keyring')
pocket = source.split(':')[1]
with open('/etc/apt/sources.list.d/cloud-archive.list', 'w') as apt:
apt.write(CLOUD_ARCHIVE.format(CLOUD_ARCHIVE_POCKETS[pocket]))
if source.startswith('deb'):
l = len(source.split('|'))
if l == 2:
(apt_line, key) = source.split('|')
cmd = [
'apt-key',
'adv', '--keyserver keyserver.ubuntu.com',
'--recv-keys', key
]
subprocess.check_call(cmd)
elif l == 1:
apt_line = source
with open('/etc/apt/sources.list.d/quantum.list', 'w') as apt:
apt.write(apt_line + "\n")
cmd = [
'apt-get',
'update'
]
subprocess.check_call(cmd)
# Protocols
TCP = 'TCP'
UDP = 'UDP'
def expose(port, protocol='TCP'):
cmd = [
@ -88,6 +224,25 @@ def relation_get(attribute, unit=None, rid=None):
return value
def relation_get_dict(relation_id=None, remote_unit=None):
"""Obtain all relation data as dict by way of JSON"""
cmd = 'relation-get --format=json'
if relation_id:
cmd += ' -r %s' % relation_id
if remote_unit:
remote_unit_orig = os.getenv('JUJU_REMOTE_UNIT', None)
os.environ['JUJU_REMOTE_UNIT'] = remote_unit
j = execute(cmd, die=True)[0]
if remote_unit and remote_unit_orig:
os.environ['JUJU_REMOTE_UNIT'] = remote_unit_orig
d = json.loads(j)
settings = {}
# convert unicode to strings
for k, v in d.iteritems():
settings[str(k)] = str(v)
return settings
def relation_set(**kwargs):
cmd = [
'relation-set'
@ -108,11 +263,7 @@ def unit_get(attribute):
'unit-get',
attribute
]
value = subprocess.check_output(cmd).strip() # IGNORE:E1103
if value == "":
return None
else:
return value
return subprocess.check_output(cmd).strip() # IGNORE:E1103
def config_get(attribute):
@ -120,11 +271,118 @@ def config_get(attribute):
'config-get',
attribute
]
value = subprocess.check_output(cmd).strip() # IGNORE:E1103
if value == "":
return None
return subprocess.check_output(cmd).strip() # IGNORE:E1103
def get_unit_hostname():
return socket.gethostname()
def get_unit_name():
return os.environ.get('JUJU_UNIT_NAME').replace('/','-')
def get_host_ip(hostname=unit_get('private-address')):
try:
# Test to see if already an IPv4 address
socket.inet_aton(hostname)
return hostname
except socket.error:
pass
try:
answers = dns.resolver.query(hostname, 'A')
if answers:
return answers[0].address
except dns.resolver.NXDOMAIN:
pass
return None
def restart(*services):
for service in services:
subprocess.check_call(['service', service, 'restart'])
def stop(*services):
for service in services:
subprocess.check_call(['service', service, 'stop'])
def start(*services):
for service in services:
subprocess.check_call(['service', service, 'start'])
def running(service):
try:
output = subprocess.check_output(['service', service, 'status'])
except subprocess.CalledProcessError:
return False
else:
return value
if ("start/running" in output or
"is running" in output):
return True
else:
return False
def disable_upstart_services(*services):
for service in services:
with open("/etc/init/{}.override".format(service), "w") as override:
override.write("manual")
def enable_upstart_services(*services):
for service in services:
path = '/etc/init/{}.override'.format(service)
if os.path.exists(path):
os.remove(path)
def disable_lsb_services(*services):
for service in services:
subprocess.check_call(['update-rc.d', '-f', service, 'remove'])
def enable_lsb_services(*services):
for service in services:
subprocess.check_call(['update-rc.d', '-f', service, 'defaults'])
def get_iface_ipaddr(iface):
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
return socket.inet_ntoa(fcntl.ioctl(
s.fileno(),
0x8919, # SIOCGIFADDR
struct.pack('256s', iface[:15])
)[20:24])
def get_iface_netmask(iface):
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
return socket.inet_ntoa(fcntl.ioctl(
s.fileno(),
0x891b, # SIOCGIFNETMASK
struct.pack('256s', iface[:15])
)[20:24])
def get_netmask_cidr(netmask):
netmask = netmask.split('.')
binary_str = ''
for octet in netmask:
binary_str += bin(int(octet))[2:].zfill(8)
return str(len(binary_str.rstrip('0')))
def get_network_address(iface):
if iface:
network = "{}/{}".format(get_iface_ipaddr(iface),
get_netmask_cidr(get_iface_netmask(iface)))
ip = IPNetwork(network)
return str(ip.network)
else:
return None
def is_clustered():
@ -139,9 +397,50 @@ def is_clustered():
def is_leader():
status = execute('crm resource show res_ks_vip', echo=True)[0].strip()
status = execute('crm resource show res_rabbitmq_vip', echo=True)[0].strip()
hostname = execute('hostname', echo=True)[0].strip()
if hostname in status:
return True
else:
return False
def peer_units():
peers = []
for r_id in (relation_ids('cluster') or []):
for unit in (relation_list(r_id) or []):
peers.append(unit)
return peers
def oldest_peer(peers):
local_unit_no = os.getenv('JUJU_UNIT_NAME').split('/')[1]
for peer in peers:
remote_unit_no = peer.split('/')[1]
if remote_unit_no < local_unit_no:
return False
return True
def eligible_leader():
if is_clustered():
if not is_leader():
juju_log('INFO', 'Deferring action to CRM leader.')
return False
else:
peers = peer_units()
if peers and not oldest_peer(peers):
juju_log('INFO', 'Deferring action to oldest service unit.')
return False
return True
def is_relation_made(relation=None):
relation_data = []
for r_id in (relation_ids(relation) or []):
for unit in (relation_list(r_id) or []):
relation_data.append(relation_get_dict(relation_id=r_id,
remote_unit=unit))
if not relation_data:
return False
return True

View File

@ -12,6 +12,8 @@ requires:
ha:
interface: hacluster
scope: container
ceph:
interface: ceph-client
peers:
cluster:
interface: rabbitmq
interface: rabbitmq-ha