Add livemigration helper for hypervisor maint
Adds a very basic one-by-one livemigration tool to remove instances from a host, but not quite as aggressively as the openstack CLI tool. Change-Id: I54b03e7f7eb6eb0f4efc10de97dc208498f2a56b
This commit is contained in:
parent
f945a3ecd2
commit
e4eb5582bb
|
@ -0,0 +1,292 @@
|
|||
#! /usr/bin/env python
|
||||
# Copyright 2013 AT&T Services, Inc.
|
||||
# 2015 Catalyst IT Ltd.
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import argparse
|
||||
import datetime
|
||||
import json
|
||||
from keystoneclient.v2_0 import client as ksclient
|
||||
import logging
|
||||
from novaclient import client
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
LOG = logging.getLogger("livemigrate_instances")
|
||||
LOG_FORMAT = '%(asctime)s %(levelname)-8s %(message)s'
|
||||
DESCRIPTION = "Live migration tool to clear out a hypervisor"
|
||||
|
||||
|
||||
def parse_args():
|
||||
# ensure environment has necessary items to authenticate
|
||||
for key in ['OS_TENANT_NAME', 'OS_USERNAME', 'OS_PASSWORD',
|
||||
'OS_AUTH_URL', 'OS_REGION_NAME']:
|
||||
if key not in os.environ.keys():
|
||||
LOG.exception("Your environment is missing '%s'")
|
||||
ap = argparse.ArgumentParser(description=DESCRIPTION)
|
||||
ap.add_argument('-d', '--debug', action='store_true',
|
||||
default=False, help='Show debugging output')
|
||||
ap.add_argument('-q', '--quiet', action='store_true', default=False,
|
||||
help='Only show error and warning messages')
|
||||
ap.add_argument('-n', '--noop', action='store_true', default=False,
|
||||
help='Do not do any modifying operations (dry-run)')
|
||||
ap.add_argument('-m', '--migrate', action='store_true', default=False,
|
||||
help='Migrate from one host to another')
|
||||
ap.add_argument('-r', '--recover', action='store_true', default=False,
|
||||
help='Move hosts previously migrated back home')
|
||||
ap.add_argument('--source',
|
||||
help='the FQDN of a hypervisor to move instances \
|
||||
away from')
|
||||
ap.add_argument('--dest',
|
||||
default=False,
|
||||
help='the FQDN of a hypervisor to move instances \
|
||||
to')
|
||||
ap.add_argument('--file', default='./results.json',
|
||||
help='The file in which to store/retrieve the server list')
|
||||
ap.add_argument('--timeout', default=240,
|
||||
help='How long to wait for migration to complete')
|
||||
ap.add_argument('--insecure', action='store_true', default=False,
|
||||
help='Explicitly allow tool to perform '
|
||||
'"insecure" SSL (https) requests. The server\'s '
|
||||
'certificate will not be verified against any '
|
||||
'certificate authorities. This option should be used '
|
||||
'with caution.')
|
||||
return ap.parse_args()
|
||||
|
||||
|
||||
def setup_logging(args):
|
||||
level = logging.INFO
|
||||
if args.quiet:
|
||||
level = logging.WARN
|
||||
if args.debug:
|
||||
level = logging.DEBUG
|
||||
logfilename = os.environ['HOME'] + '/livemigrations.log'
|
||||
logging.basicConfig(level=logging.DEBUG,
|
||||
format=LOG_FORMAT,
|
||||
filename=logfilename,
|
||||
filemode='a')
|
||||
console = logging.StreamHandler()
|
||||
console.setLevel(level)
|
||||
LOG.addHandler(console)
|
||||
|
||||
|
||||
def get_hypervisor_instances(args, nova):
|
||||
instance_list = []
|
||||
# check if the hypervisor exists and is unique
|
||||
hypervisor_id = nova.hypervisors.search(args.source)
|
||||
if len(hypervisor_id) != 1:
|
||||
LOG.exception("The hypervisor %s was either not found, "
|
||||
"or found more than once",
|
||||
args.source)
|
||||
raise SystemExit
|
||||
hyp_obj = nova.hypervisors.get(hypervisor_id[0])
|
||||
for instance in nova.servers.list(search_opts={'all_tenants': True}):
|
||||
inst_hyp = getattr(instance, 'OS-EXT-SRV-ATTR:hypervisor_hostname')
|
||||
if hyp_obj.hypervisor_hostname == inst_hyp:
|
||||
instance_list.append(instance)
|
||||
return instance_list
|
||||
|
||||
|
||||
def migrate_instance(args, nova, keystone_client, instance, dest, timeout):
|
||||
result = {}
|
||||
start_hypervisor = getattr(instance, 'OS-EXT-SRV-ATTR:hypervisor_hostname')
|
||||
datestamp_start = datetime.datetime.now()
|
||||
try:
|
||||
tenant = keystone_client.tenants.find(id=instance.tenant_id).name
|
||||
except:
|
||||
tenant = 'unknown tenant'
|
||||
message = "Migrating %s (%s) - %s, from %s" % (instance.name,
|
||||
instance.id,
|
||||
tenant,
|
||||
start_hypervisor)
|
||||
LOG.warn(message)
|
||||
if args.noop:
|
||||
check_result = {
|
||||
'message': 'noop',
|
||||
'new_hypervisor': start_hypervisor,
|
||||
'error': False
|
||||
}
|
||||
else:
|
||||
if instance.status == 'ACTIVE':
|
||||
if dest:
|
||||
instance.live_migrate(host=dest)
|
||||
else:
|
||||
instance.live_migrate()
|
||||
check_result = check_migration(instance, start_hypervisor, timeout)
|
||||
elif instance.status == 'SHUTOFF':
|
||||
instance.migrate()
|
||||
check_result = check_migration(instance, start_hypervisor, timeout)
|
||||
else:
|
||||
check_result = {
|
||||
'message': 'not moved',
|
||||
'new_hypervisor': start_hypervisor,
|
||||
'error': False
|
||||
}
|
||||
|
||||
datestamp_end = datetime.datetime.now()
|
||||
duration = datestamp_end - datestamp_start
|
||||
result = {'instance': instance.id,
|
||||
'name': instance.name,
|
||||
'tenant': tenant,
|
||||
'state': instance.status,
|
||||
'message': check_result['message'],
|
||||
'start_hypervisor': start_hypervisor,
|
||||
'end_hypervisor': check_result['new_hypervisor'],
|
||||
'duration': "%.0f" % duration.total_seconds(),
|
||||
'error': check_result['error']}
|
||||
return result
|
||||
|
||||
|
||||
def check_migration(instance, start_hypervisor, timeout):
|
||||
time.sleep(5)
|
||||
wait_time = 5
|
||||
error = False
|
||||
while wait_time < timeout:
|
||||
message = ''
|
||||
instance.get()
|
||||
new_hypervisor = getattr(instance,
|
||||
'OS-EXT-SRV-ATTR:hypervisor_hostname')
|
||||
if instance.status in ['ACTIVE', 'VERIFY_RESIZE']:
|
||||
if new_hypervisor == start_hypervisor:
|
||||
message = 'Instance did not move'
|
||||
error = True
|
||||
if new_hypervisor != start_hypervisor:
|
||||
message = 'Instance moved'
|
||||
break
|
||||
if instance.status == 'ERROR':
|
||||
message = 'Instance now in ERRROR state!'
|
||||
error = True
|
||||
break
|
||||
#TODO more checks here for status of instance after migration
|
||||
time.sleep(5)
|
||||
wait_time += 5
|
||||
sys.stdout.write("#")
|
||||
sys.stdout.flush()
|
||||
print
|
||||
check_result = {'new_hypervisor': new_hypervisor,
|
||||
'message': message,
|
||||
'error': error}
|
||||
return check_result
|
||||
|
||||
|
||||
def migrate_away(args, nova, keystone_client, timeout):
|
||||
instances_to_migrate = get_hypervisor_instances(args, nova)
|
||||
if args.dest:
|
||||
dest_id = nova.hypervisors.search(args.dest)
|
||||
if len(dest_id) != 1:
|
||||
LOG.error("The hypervisor %s was either not found, or found "
|
||||
"more than once",
|
||||
args.source)
|
||||
raise SystemExit
|
||||
dest = nova.hypervisors.get(dest_id[0]).service['host']
|
||||
else:
|
||||
dest = False
|
||||
final_results = []
|
||||
for instance in instances_to_migrate:
|
||||
instance.get()
|
||||
result = migrate_instance(args, nova, keystone_client,
|
||||
instance, dest, timeout)
|
||||
LOG.warn("Instance %s moved from %s to %s, %s, "
|
||||
"status is %s, took %ss",
|
||||
result['name'],
|
||||
result['start_hypervisor'],
|
||||
result['end_hypervisor'],
|
||||
result['message'],
|
||||
result['state'],
|
||||
result['duration'])
|
||||
if result['state'] == 'VERIFY_RESIZE':
|
||||
instance.confirm_resize()
|
||||
time.sleep(5)
|
||||
instance.get()
|
||||
result['state'] = instance.status
|
||||
final_results.append(result)
|
||||
if result['state'] not in ['ACTIVE', 'SHUTOFF', 'SUSPENDED']:
|
||||
break
|
||||
if result['error']:
|
||||
break
|
||||
if not args.noop:
|
||||
# ugh, a magic sleep to let things settle down
|
||||
time.sleep(30)
|
||||
# TODO(XP) this needs exception handling
|
||||
with open(args.file, 'w') as fp:
|
||||
json.dump(final_results, fp)
|
||||
fp.close()
|
||||
|
||||
|
||||
def recover(args, nova, keystone_client, timeout):
|
||||
# TODO(XP) this needs exception handling
|
||||
with open(args.file, 'r') as fp:
|
||||
temp_locations = json.load(fp)
|
||||
fp.close()
|
||||
|
||||
final_results = []
|
||||
for entry in temp_locations:
|
||||
if entry['end_hypervisor'] == entry['start_hypervisor']:
|
||||
print("Instance %s left alone" % entry['instance'])
|
||||
else:
|
||||
# set up instance, dest list
|
||||
instance = nova.servers.get(entry['instance'])
|
||||
dest = entry['start_hypervisor']
|
||||
result = migrate_instance(args,
|
||||
nova,
|
||||
keystone_client,
|
||||
instance,
|
||||
dest,
|
||||
timeout)
|
||||
final_results.append(result)
|
||||
# ugh, a magic sleep to let things settle down
|
||||
time.sleep(5)
|
||||
#pprint.pprint(final_results)
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
setup_logging(args)
|
||||
try:
|
||||
nova = client.Client(
|
||||
'2.0',
|
||||
os.environ['OS_USERNAME'],
|
||||
os.environ['OS_PASSWORD'],
|
||||
os.environ['OS_TENANT_NAME'],
|
||||
os.environ['OS_AUTH_URL'],
|
||||
region_name=os.environ['OS_REGION_NAME'])
|
||||
except Exception:
|
||||
raise
|
||||
try:
|
||||
keystone_client = ksclient.Client(
|
||||
username=os.environ['OS_USERNAME'],
|
||||
password=os.environ['OS_PASSWORD'],
|
||||
tenant_name=os.environ['OS_TENANT_NAME'],
|
||||
auth_url=os.environ['OS_AUTH_URL'],
|
||||
region_name=os.environ['OS_REGION_NAME'])
|
||||
except Exception:
|
||||
raise
|
||||
timeout = args.timeout
|
||||
if ((args.migrate and args.recover) or (args.migrate is False
|
||||
and args.recover is False)):
|
||||
LOG.exception("Please either migrate, or recover, but not both")
|
||||
raise SystemExit
|
||||
if args.migrate:
|
||||
if not args.source:
|
||||
LOG.exception("Must supply both source hypervisors")
|
||||
raise SystemExit
|
||||
migrate_away(args, nova, keystone_client, timeout)
|
||||
|
||||
if args.recover:
|
||||
recover(args, nova, keystone_client, timeout)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue