diff --git a/contrib/devstack/extras.d/70-cue.sh b/contrib/devstack/extras.d/70-cue.sh index c6e3f07a..3d2a38d1 100644 --- a/contrib/devstack/extras.d/70-cue.sh +++ b/contrib/devstack/extras.d/70-cue.sh @@ -13,10 +13,8 @@ if is_service_enabled cue; then echo_summary "Installing Cue Client" install_cueclient - #echo_summary "Installing Cue Dashboard" - # TODO Steve. Disabling the installation of dashboard - # until the cue gate is fixed. - #install_cuedashboard + echo_summary "Installing Cue Dashboard" + install_cuedashboard elif [[ "$1" == "stack" && "$2" == "post-config" ]]; then echo_summary "Configuring Cue" diff --git a/contrib/devstack/lib/cue b/contrib/devstack/lib/cue index d4fc7975..ccce6f1e 100644 --- a/contrib/devstack/lib/cue +++ b/contrib/devstack/lib/cue @@ -46,6 +46,7 @@ CUE_AUTH_CACHE_DIR=${CUE_AUTH_CACHE_DIR:-/var/cache/cue} CUE_TF_DB=${CUE_TF_DB:-cue_taskflow} CUE_TF_PERSISTENCE=${CUE_TF_PERSISTENCE:-} +CUE_TF_CREATE_CLUSTER_NODE_VM_ACTIVE_RETRY_COUNT=${CUE_TF_CREATE_CLUSTER_NODE_VM_ACTIVE_RETRY_COUNT:-12} # Public IP/Port Settings CUE_SERVICE_PROTOCOL=${CUE_SERVICE_PROTOCOL:-$SERVICE_PROTOCOL} @@ -101,6 +102,9 @@ function configure_cue { iniset $CUE_CONF taskflow cluster_node_check_timeout 10 iniset $CUE_CONF taskflow cluster_node_check_max_count 30 + # Set flow create cluster node vm active retry count + iniset $CUE_CONF flow_options create_cluster_node_vm_active_retry_count $CUE_TF_CREATE_CLUSTER_NODE_VM_ACTIVE_RETRY_COUNT + iniset $CUE_CONF openstack os_auth_url $KEYSTONE_AUTH_PROTOCOL://$KEYSTONE_AUTH_HOST:$KEYSTONE_AUTH_PORT/v2.0 iniset $CUE_CONF openstack os_tenant_name admin iniset $CUE_CONF openstack os_username admin diff --git a/contrib/devstack/local.sh b/contrib/devstack/local.sh index 6fe37697..3195c072 100755 --- a/contrib/devstack/local.sh +++ b/contrib/devstack/local.sh @@ -28,6 +28,9 @@ fi # Generate an ssh keypair to add to devstack if [[ ! -f ~/.ssh/id_rsa ]]; then ssh-keygen -q -t rsa -N "" -f ~/.ssh/id_rsa + # copying key to /tmp so that tests can access it + cp ~/.ssh/id_rsa /tmp/cue-mgmt-key + chmod 644 /tmp/cue-mgmt-key fi if [[ -z $CUE_MANAGEMENT_KEY ]]; then diff --git a/cue/taskflow/flow/__init__.py b/cue/taskflow/flow/__init__.py index e6599b3a..05fab708 100644 --- a/cue/taskflow/flow/__init__.py +++ b/cue/taskflow/flow/__init__.py @@ -17,3 +17,12 @@ from create_cluster import create_cluster # noqa from create_cluster_node import create_cluster_node # noqa from delete_cluster import delete_cluster # noqa from delete_cluster_node import delete_cluster_node # noqa + +from oslo_config import cfg + +opt_group = cfg.OptGroup( + name='flow_options', + title='Options for taskflow flows.' +) + +cfg.CONF.register_group(opt_group) diff --git a/cue/taskflow/flow/create_cluster_node.py b/cue/taskflow/flow/create_cluster_node.py index 5f27f63d..3407d6e4 100644 --- a/cue/taskflow/flow/create_cluster_node.py +++ b/cue/taskflow/flow/create_cluster_node.py @@ -13,6 +13,7 @@ # License for the specific language governing permissions and limitations # under the License. +from oslo_config import cfg import taskflow.patterns.linear_flow as linear_flow import taskflow.retry as retry @@ -24,6 +25,16 @@ import os_tasklib.neutron as neutron import os_tasklib.nova as nova +CONF = cfg.CONF + +FLOW_OPTS = [ + cfg.IntOpt('create_cluster_node_vm_active_retry_count', + default=12), +] + +CONF.register_opts(FLOW_OPTS, group='flow_options') + + def create_cluster_node(cluster_id, node_number, node_id, graph_flow, generate_userdata, start_task, end_task, node_check_timeout, node_check_max_count, @@ -127,10 +138,11 @@ def create_cluster_node(cluster_id, node_number, node_id, graph_flow, graph_flow.add(get_vm_id) graph_flow.link(create_vm, get_vm_id) + retry_count = CONF.flow_options.create_cluster_node_vm_active_retry_count #todo(dagnello): make retry times configurable check_vm_active = linear_flow.Flow( name="wait for VM active state %s" % node_name, - retry=retry.Times(12)) + retry=retry.Times(retry_count)) check_vm_active.add( nova.GetVmStatus( os_client=client.nova_client(), diff --git a/tests/gate_hook.sh b/tests/gate_hook.sh index e394cf80..1478fd9d 100755 --- a/tests/gate_hook.sh +++ b/tests/gate_hook.sh @@ -10,6 +10,7 @@ export PROJECTS="openstack/diskimage-builder stackforge/cue-dashboard $PROJECTS" export DEVSTACK_GATE_NEUTRON=1 echo "CUE_MANAGEMENT_KEY=cue-mgmt-key" >> $BASE/new/devstack/localrc +echo "CUE_TF_CREATE_CLUSTER_NODE_VM_ACTIVE_RETRY_COUNT=60" >> $BASE/new/devstack/localrc popd diff --git a/tests/integration/api/v1/clusters/test_clusters.py b/tests/integration/api/v1/clusters/test_clusters.py index 20809207..0fee002d 100644 --- a/tests/integration/api/v1/clusters/test_clusters.py +++ b/tests/integration/api/v1/clusters/test_clusters.py @@ -18,14 +18,18 @@ Tests for the API /cluster/ controller methods. """ import logging +import sys import time +import traceback import uuid +import paramiko import tempest_lib.base from tempest_lib.common.utils import data_utils from tempest_lib import exceptions as tempest_exceptions from tests.integration.api.v1.clients import clusters_client +from tests.integration.common import client from tests.integration.common import config @@ -67,6 +71,7 @@ class ClusterTest(tempest_lib.base.BaseTestCase): 'Create cluster failed') time.sleep(1) if time.time() - start_time > 1800: + self.get_logs(cluster_resp['id']) self.fail('Waited 30 minutes for cluster to get ACTIVE') self.assertEqual(cluster_resp['status'], 'ACTIVE', 'Create cluster failed') @@ -103,6 +108,42 @@ class ClusterTest(tempest_lib.base.BaseTestCase): if time.time() - start_time > 900: self.fail('Waited 15 minutes for cluster to be deleted') + @staticmethod + def get_logs(cluster_id=None): + admin_client = client.ServerClient() + nodes = admin_client.get_cluster_nodes(cluster_id) + for node in nodes['servers']: + try: + # Print server console log + data = admin_client.get_console_log(node['id']).data + print("Console log for node %s" % node['name']) + print(data) + + # SSH to get the rabbitmq logs + networks = node['addresses']['cue_management_net'] + if networks: + ip = [n['addr'] for n in networks if n['version'] == 4][0] + else: + # Devstack bug where network information is missing. + # This issue is intermittent. + print("Could not SSH to %s. Network information is missing" + % (node['name'])) + continue + print("SSHing to %s, IP: %s" % (node['name'], ip)) + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + ssh.connect(ip, username='ubuntu', + key_filename='/tmp/cue-mgmt-key') + stdin, stdout, stderr = ssh.exec_command( + "tail -n +1 /var/log/rabbitmq/*") + print("Printing all logs in /var/log/rabbitmq/") + result = stdout.readlines() + print(''.join(result)) + ssh.close() + except Exception: + print("Could not SSH to %s, IP: %s" % (node['name'], ip)) + traceback.print_exc(file=sys.stdout) + def test_create_cluster_invalid_request_body(self): """Verify create cluster request with invalid request body.""" diff --git a/tests/integration/common/client.py b/tests/integration/common/client.py index 7df62880..a2429059 100644 --- a/tests/integration/common/client.py +++ b/tests/integration/common/client.py @@ -13,6 +13,8 @@ # License for the specific language governing permissions and limitations # under the License. +import json + from tempest.services.compute.json import tenant_networks_client from tempest_lib import auth from tempest_lib.common import rest_client @@ -34,7 +36,7 @@ class BaseMessageQueueClient(rest_client.RestClient): def __init__(self): - auth_provider = self._get_keystone_auth_provider() + auth_provider = _get_keystone_auth_provider() super(BaseMessageQueueClient, self).__init__( auth_provider=auth_provider, service='message-broker', @@ -44,20 +46,65 @@ class BaseMessageQueueClient(rest_client.RestClient): def _get_network(self, label): network_client = tenant_networks_client.TenantNetworksClientJSON( - self._get_keystone_auth_provider(), + _get_keystone_auth_provider(), 'compute', 'RegionOne') networks = network_client.list_tenant_networks() return [network for network in networks if network['label'] == label][0] - def _get_keystone_auth_provider(self): - creds = auth.KeystoneV2Credentials( - username=CONF.identity.username, - password=CONF.identity.password, - tenant_name=CONF.identity.tenant_name, + +class ServerClient(rest_client.RestClient): + """This class is used for querying Nova servers. + + It extends the Openstack RestClient class, which provides a base layer for + wrapping outgoing http requests in keystone auth as well as providing + response code checking and error handling. It obtains the keystone + credentials from the configuration. + """ + + def __init__(self): + + auth_provider = _get_keystone_auth_provider() + super(ServerClient, self).__init__( + auth_provider=auth_provider, + service='compute', + region='RegionOne', ) - auth_provider = auth.KeystoneV2AuthProvider(creds, - CONF.identity.uri) - auth_provider.fill_credentials() - return auth_provider + + def get_cluster_nodes(self, cluster_id=None): + """Get all server nodes of a cluster + + :param cluster_id: The cluster to get the nodes from + """ + url = 'servers/detail' + if cluster_id: + url += '?name=%s' % cluster_id + + resp, body = self.get(url) + body = json.loads(body) + return rest_client.ResponseBody(resp, body) + + def get_console_log(self, server_id): + """Get the console node for a server + + :param server_id: The server to get the console log from + """ + post_body = json.dumps({"os-getConsoleOutput": {}}) + resp, body = self.post('servers/%s/action' % str(server_id), + post_body) + body = json.loads(body)['output'] + + return rest_client.ResponseBodyData(resp, body) + + +def _get_keystone_auth_provider(): + creds = auth.KeystoneV2Credentials( + username='admin', + password=CONF.identity.password, + tenant_name='admin', + ) + auth_provider = auth.KeystoneV2AuthProvider(creds, + CONF.identity.uri) + auth_provider.fill_credentials() + return auth_provider