diff --git a/elements/testenv-worker/README.md b/elements/testenv-worker/README.md deleted file mode 100644 index fcb1a6114..000000000 --- a/elements/testenv-worker/README.md +++ /dev/null @@ -1,26 +0,0 @@ -Install and configure a tripleo testenv worker - -Carves up this host into a number of test environments and registers each one with gearman. - -Typically built with "-a amd64 -o testenv-worker ubuntu testenv-worker dhcp-all-interfaces hosts baremetal" - -See deploy-testenv in the tripleo-cd element for example deployments. - -Configuration -------------- - - gearman-worker: - host: 127.0.0.1 # gearman broker host - port: - mem-per-env: 16 # Indicates each testenv should have 16G of Mem - cpu-per-env: 4 # Indicates each testenv should have 4 cpu cores - disk-per-env: 80 # Indicates each testenv should have 80G of disk space - auth_user: admin - auth_tenant: admin - auth_url: http://127.0.0.1:5000 - auth_passwd: password - neutron: - ovs: - physical_bridge: # A bridge name for the public_interface and seed interfaces - public_interface: # The interface that should be moved onto physical_bridge - # in order to communicate with seed VMs diff --git a/elements/testenv-worker/bin/ci_commands b/elements/testenv-worker/bin/ci_commands deleted file mode 100755 index f6a20279c..000000000 --- a/elements/testenv-worker/bin/ci_commands +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/python -# -# Script to use as a forced command in a roots authorized_keys (by setting -# command="/usr/local/bin/ci_commands". Defines a number of commands that can -# be run over ssh as root. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# - -from __future__ import print_function - -import os -import re -import subprocess -import sys - - -def virsh_start(params): - return subprocess.call(["virsh", "start", params[1]]) - - -def virsh_destroy(params): - return subprocess.call(["virsh", "destroy", params[1]]) - - -def virsh_list(params): - command = ["virsh", "list"] - if len(params) > 1 and params[1] == "--all": - command.append("--all") - return subprocess.call(command) - - -def copyseed(params): - seed_file = "/var/lib/libvirt/images/seed_%s.qcow2" % params[0] - - rv = subprocess.call(["dd", "of=%s" % seed_file]) - rv = rv or subprocess.call(["chattr", "+C", seed_file]) - - return rv - -def purge_env(params): - rv1 = subprocess.call(["find", "/var/lib/libvirt/images", "-name", "baremetal%s*" % (params[0]), "-exec", "qemu-img", "create", "-f", "qcow2", "{}", "41G", ";"]) - rv2 = subprocess.call(["qemu-img", "create", "-f", "qcow2", "/var/lib/libvirt/images/seed_%s.qcow2" % (params[0]), "41G"]) - return (rv1 | rv2) - -COMMANDS = { - 'virsh start': virsh_start, - 'virsh destroy': virsh_destroy, - 'virsh list': virsh_list, - 'copyseed': copyseed, - 'purge_env': purge_env, -} - - -def run_command(orig_command): - return subprocess.call(orig_command, shell=True) - -# NOTE(NobodyCam): First dumpxml command below (with grep) is for Nova Baremetal and should be -# removed when Nova's Baremetal driver is removed from the repo. -RE_COMMANDS = { - r"""^(LC_ALL=C )?/usr/bin/virsh( --connect qemu:///system)? list --name$""": run_command, - r"""^(LC_ALL=C )?/usr/bin/virsh( --connect qemu:///system)? dumpxml [a-z0-9_]+ \| grep "mac address" \| awk -F"'" '{print \$2}' \| tr -d ':'$""": run_command, - r"""^(LC_ALL=C )?/usr/bin/virsh( --connect qemu:///system)? dumpxml [a-z0-9_]+ \| awk -F "'" '/mac address/{print \$2}'\| tr -d ':'$""": run_command, - r"""^(LC_ALL=C )?/usr/bin/virsh( --connect qemu:///system)? list --all --name$""": run_command, - r"""^(LC_ALL=C )?/usr/bin/virsh( --connect qemu:///system)? (start|destroy|reset) "?[a-z0-9_]+"?$""": run_command, - r"""^(LC_ALL=C )?/usr/bin/virsh( --connect qemu:///system)? dumpxml [a-z0-9_]+ \| awk '[\w\\\/<>\(\){},.|=*;" ]+' Q="'" RS="\[<>\]" \| head -1$""": run_command, - r"""^EDITOR="sed -i '[\w\\\/<>\(\)|=*;" ]+'"( LC_ALL=C)? /usr/bin/virsh( --connect qemu:///system)? edit "?[a-z0-9_]+"?$""": run_command, - # Ironic Pre Mitaka - r"""^(LC_ALL=C )?/usr/bin/virsh( --connect qemu:///system)? list --all \| tail -n \+2 \| awk -F" " '{print \$2}'$""": run_command, - r"""^(LC_ALL=C )?/usr/bin/virsh( --connect qemu:///system)? list --all\|grep running( )?\|( )?awk -v qc='"' -F" " '{print qc\$2qc}'$""": run_command, -} - - -def is_arg_safe(arg): - unsafe_characters = r"[^a-zA-Z0-9_-]" - search = re.compile(unsafe_characters).search - if not bool(search(arg)): - return True - raise ValueError("Unsafe argument detected: %s" % arg) - - -def check_args(args): - for arg in args: - is_arg_safe(arg) - - -def main(): - orig_command = os.environ["SSH_ORIGINAL_COMMAND"] - user_args = os.environ["SSH_ORIGINAL_COMMAND"].split()[1:] - - for command in COMMANDS: - if orig_command.find(command + " ") == 0 or orig_command == command: - check_args(user_args) - handler = COMMANDS[command] - print("Calling %s with: %s" % (handler, user_args), - file=sys.stderr) - return handler(user_args) - else: - for expr, handler in RE_COMMANDS.items(): - if re.match(expr, orig_command): - print("Calling %s " % (orig_command), file=sys.stderr) - return handler(orig_command) - else: - print("Unknown command: %s" % os.environ["SSH_ORIGINAL_COMMAND"], - file=sys.stderr) - return 1 - - -if __name__ == "__main__": - exit(main()) diff --git a/elements/testenv-worker/bin/ensure-test-env b/elements/testenv-worker/bin/ensure-test-env deleted file mode 100755 index 8b7fbe9b2..000000000 --- a/elements/testenv-worker/bin/ensure-test-env +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash -# -# Copyright 2013 Red Hat, Inc. -# Copyright 2014 Hewlett-Packard Development Company, L.P. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# - -set -eux - -HOSTIP=$1 -OVSBRIDGE=$2 -NUM=$3 - -DATAFILE=/etc/test_env_${NUM}.json -trap 'rm -f $DATAFILE' ERR -LOGFILE=/var/log/testenv-worker_${NUM}.log - -export NODE_ARCH=amd64 -export NODE_MEM=4096 -export OVERCLOUD_COMPUTESCALE=2 # Creates this + 2 nodes in testenv. -devtest_testenv.sh $DATAFILE -n $NUM -b $OVSBRIDGE -s /root/.ssh/test_env_rsa - -SEEDMAC=$(virsh dumpxml seed_$NUM | grep 'mac address' | awk -F \' 'NR==1,/mac address/ {print $2}') - -NETWORKNAME=$(os-apply-config --key gearman-worker.network_name --type netdevice) -PORTOUTPUT=$(neutron port-create --name te_$(hostname)_$NUM --mac-address $SEEDMAC --format shell --column fixed_ips $NETWORKNAME) -echo "$PORTOUTPUT" -SEEDIP=$(echo "$PORTOUTPUT" | grep -E -o "([0-9]+\.){3}[0-9]+") - -NEW_JSON=$(mktemp) -jq -s add $DATAFILE - < $NEW_JSON -{ - "env-num":"$NUM", - "remote-operations":"1", - "remote-host":"$(hostname)", - "seed-ip":"$SEEDIP", - "seed-route-dev":"eth1" -} -EOF -mv $NEW_JSON $DATAFILE - -BROKER=$(os-apply-config --key gearman-worker.host --type netaddress):$(os-apply-config --key gearman-worker.port --key-default 4730 --type int) -# FIXME : not idempotent, just starts more workers, we need a way cleanly kill then -testenv-worker -b $BROKER $DATAFILE >> $LOGFILE 2>&1 & diff --git a/elements/testenv-worker/bin/reset-ovs b/elements/testenv-worker/bin/reset-ovs deleted file mode 100755 index f1889fa69..000000000 --- a/elements/testenv-worker/bin/reset-ovs +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# Not including set -e here as we always want to finish up by starting -# ovs and the testenvs regardless of what happens. -set -x - -# Gracfully stop all envs -/usr/local/bin/testenv-ctl stop_jobs - -# Wait for upto 3 hours for the envs to finish processing -for x in $(seq 180) ; do - if [ $(virsh list --name | grep seed_ | wc -l) == 0 ] ; then - break - fi - sleep 60 -done - -# Do the ovs reset -systemctl stop openvswitch -cp /etc/openvswitch/conf.db_testenv /etc/openvswitch/conf.db -systemctl start openvswitch - -/usr/local/bin/testenv-ctl start_jobs diff --git a/elements/testenv-worker/bin/testenv-ctl b/elements/testenv-worker/bin/testenv-ctl deleted file mode 100755 index 6c81402d7..000000000 --- a/elements/testenv-worker/bin/testenv-ctl +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash -# -# Copyright 2014 Red Hat, Inc. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -set -eu - -CMD=${1:-''} -export WORKER_PIDS='' - -usage() { - echo "Usage: testenv-ctl start_jobs|stop_jobs" - exit 1 -} - -function start_workers() { - set_worker_pids - sudo kill -USR1 $WORKER_PIDS - echo "Job processing enabled for workers: $WORKER_PIDS" -} - -function stop_workers() { - set_worker_pids - sudo kill -USR2 $WORKER_PIDS - echo "Job processing disabled for workers: $WORKER_PIDS" -} - -function set_worker_pids() { - WORKER_PIDS=$(ps -e -o pid,command | grep testenv-worker | grep -v grep | awk '{print $1}') - if [ -z "$WORKER_PIDS" ]; then - echo "No worker pids found." - exit 1 - fi -} - -case $CMD in - stop_jobs) stop_workers;; - start_jobs) start_workers;; - *) usage;; -esac diff --git a/elements/testenv-worker/element-deps b/elements/testenv-worker/element-deps deleted file mode 100644 index 1c592acb8..000000000 --- a/elements/testenv-worker/element-deps +++ /dev/null @@ -1,5 +0,0 @@ -network-utils -openstack-clients -os-collect-config -source-repositories -ntp diff --git a/elements/testenv-worker/install.d/79-te-worker b/elements/testenv-worker/install.d/79-te-worker deleted file mode 100755 index 5dcf73552..000000000 --- a/elements/testenv-worker/install.d/79-te-worker +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -set -eux - -install-packages python-pip libvirt-bin qemu-utils python-libvirt qemu-kvm qemu-system jq psmisc - -pip install gear - -install -m 0755 -o root -g root $(dirname $0)/../testenv-worker /usr/local/bin diff --git a/elements/testenv-worker/os-refresh-config/configure.d/60-setup-testenvs b/elements/testenv-worker/os-refresh-config/configure.d/60-setup-testenvs deleted file mode 100755 index 82b827b3e..000000000 --- a/elements/testenv-worker/os-refresh-config/configure.d/60-setup-testenvs +++ /dev/null @@ -1,103 +0,0 @@ -#!/bin/bash -# -# Copyright 2013 Red Hat, Inc. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# - -set -ex - -# We don't want this running twice, even if it fails -# We'll end up with lots of stuff duplicated -[ -e /opt/stack/setup-testenvs.done ] && exit 0 -touch /opt/stack/setup-testenvs.done - -# Calculate number of envs to setup based on lowest of Host Mem/CPU and Disk -MEMTOTAL=$(free -g | awk '/Mem/ {print $2}') -NUMENVS=$(expr $MEMTOTAL / $(os-apply-config --key gearman-worker.mem-per-env --key-default 20)) - -CPUTOTAL=$(grep "^processor" /proc/cpuinfo | wc -l) -ENVS=$(expr $CPUTOTAL / $(os-apply-config --key gearman-worker.cpu-per-env --key-default 5)) -NUMENVS=$(($ENVS < $NUMENVS ? $ENVS : $NUMENVS)) - -DISKTOTAL=$(df /var/lib/libvirt/images/ | awk '/^\// {print $2}') -ENVS=$(expr $DISKTOTAL / 1024 / 1024 / $(os-apply-config --key gearman-worker.disk-per-env --key-default 100)) - -# Hardcoding to 3 -NUMENVS=3 - -# Deploy 5 hosts per testenv -export NODE_CNT=5 - -# Mount the rh1 ssd's to the libvirt images dir -# assume the SSD is the last drive on the list -SSD=$(ls /dev/sd[a-z] | sort | tail -n 1) -mkfs.ext4 -F $SSD -mount -t ext4 $SSD /var/lib/libvirt/images - -echo "Setting up $NUMENVS test environments" - -export PATH=/opt/stack/tripleo-incubator/scripts/:$PATH - -# Some of the incubator scripts are expecting TRIPLEO_OS_FAMILY to be set -source /opt/stack/tripleo-incubator/scripts/set-os-type - -OVSBRIDGE=$(os-apply-config --key neutron.ovs.physical_bridge --type netdevice) - -# Place the physical interface onto the seed bridge -ensure-bridge $OVSBRIDGE \ - $(os-apply-config --key neutron.ovs.public_interface --type netdevice) - -# Generate a ssh keypair -if [ ! -e /root/.ssh/test_env_rsa ] ; then - ssh-keygen -N '' -f /root/.ssh/test_env_rsa - echo no-port-forwarding,no-agent-forwarding,no-X11-forwarding,command=\"/usr/local/bin/ci_commands\" $(cat /root/.ssh/test_env_rsa.pub) >> ~/.ssh/authorized_keys -fi - -# Define OS Auth env variables, needed for neutron command -export OS_PASSWORD=$(os-apply-config --key gearman-worker.auth_passwd) -export OS_AUTH_URL=$(os-apply-config --key gearman-worker.auth_url --type netaddress) -# TODO : add a type to os-apply-config for username -export OS_USERNAME=$(os-apply-config --key gearman-worker.auth_user --type raw) -export OS_TENANT_NAME=$(os-apply-config --key gearman-worker.auth_tenant --type raw) - -# Give this host an IP on tripleo-bm-test -# Create a internel port on the public bridge, this will have an IP on the overcloud -DUMMYDEVICE=tedev -ovs-vsctl add-port $OVSBRIDGE $DUMMYDEVICE -- set Interface $DUMMYDEVICE type=internal -MAC=$(ip link show ${DUMMYDEVICE} | awk 'NR==2 {print $2}') -NETWORKNAME=$(os-apply-config --key gearman-worker.network_name --type netdevice) -PORTOUTPUT=$(neutron port-create --name te_$(hostname) --mac-address $MAC --format shell --column fixed_ips $NETWORKNAME) -echo "$PORTOUTPUT" -export HOSTIP=$(echo "$PORTOUTPUT" | grep -E -o "([0-9]+\.){3}[0-9]+") - -# TODO : make this persist across reboots -# TODO : Get prefix length -ip link set up dev $DUMMYDEVICE -ip addr add $HOSTIP/24 dev $DUMMYDEVICE - -killall -9 testenv-worker || true -for port in $(neutron port-list | awk "\$4~\"te_$(hostname)_.*\" {print \$2}") ; do - neutron port-delete $port -done - -for NUM in $(seq 1 $NUMENVS) ; do - ensure-test-env $HOSTIP $OVSBRIDGE $NUM -done - -# Take a copy of the ovs config db, this will be refreshed nightly to prevent ovs loosing track of itself -cp /etc/openvswitch/conf.db /etc/openvswitch/conf.db_testenv - -# Reset the ovs config nightly -echo "49 $(( ( $RANDOM % 6 ) + 2 )) * * * root /usr/local/bin/reset-ovs" > /etc/cron.d/reset-ovs diff --git a/elements/testenv-worker/source-repository-testenv-worker b/elements/testenv-worker/source-repository-testenv-worker deleted file mode 100644 index 61a1ae707..000000000 --- a/elements/testenv-worker/source-repository-testenv-worker +++ /dev/null @@ -1,2 +0,0 @@ -tripleo-incubator git /opt/stack/tripleo-incubator https://git.openstack.org/openstack/tripleo-incubator.git -tripleo-image-elements git /opt/stack/tripleo-image-elements https://git.openstack.org/openstack/tripleo-image-elements.git diff --git a/elements/testenv-worker/testenv-worker b/elements/testenv-worker/testenv-worker deleted file mode 100755 index 8bd5e9d4a..000000000 --- a/elements/testenv-worker/testenv-worker +++ /dev/null @@ -1,224 +0,0 @@ -#!/usr/bin/python -# -# Runs a tripleo-ci test-worker -# -# Copyright 2013 Red Hat, Inc. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# - -import argparse -import json -import logging -import signal -import sys -import threading -import time -import uuid - -import gear - -logging.basicConfig( - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') -logger = logging.getLogger('testenv-worker') -logger.setLevel(logging.INFO) - - -class CallbackClient(gear.Client): - def __init__(self): - super(CallbackClient, self).__init__() - self.event = threading.Event() - - def handleWorkComplete(self, packet): - super(CallbackClient, self).handleWorkComplete(packet) - self.event.set() - - def handleWorkException(self, packet): - super(CallbackClient, self).handleWorkException(packet) - self.event.set() - - def handleWorkFail(self, packet): - super(CallbackClient, self).handleWorkFail(packet) - self.event.set() - - def wait(self, timeout=None): - """Wait for notification of completion, error or failure. - - :param timeout: a timeout for the operation in seconds - :type timeout: float - :returns: True if a notification was received, False on timeout - """ - self.event.wait(timeout) - return self.event.is_set() - - -class TEWorkerThread(threading.Thread): - def __init__(self, geard, num, timeout, te_data): - super(TEWorkerThread, self).__init__() - self.geard = geard - self.timeout = timeout - self.te_data = te_data - self.running = True - self.process_new_jobs = True - self.num = num - self.worker = None - - def stop(self): - self.running = False - self.stopJobProcessing() - - def startJobProcessing(self): - self.process_new_jobs = True - - def stopJobProcessing(self): - self.process_new_jobs = False - if self.worker: - self.worker.stopWaitingForJobs() - - def run(self): - while self.running: - try: - if self.process_new_jobs: - logger.info('running TE worker') - self.runJob() - except gear.InterruptedError: - logger.info('getJob interrupted...') - except: - logger.exception('Error while run_te_worker worker') - time.sleep(2) - - def runJob(self): - self.worker = gear.Worker('testenv-worker-%s' % self.num) - try: - self._add_servers(self.worker, self.geard) - self.worker.waitForServer() - - self.worker.registerFunction('lockenv') - - logger.info('Getting new job...') - job = self.worker.getJob() - logger.info('Received job : %s', job.arguments) - - arguments = json.loads(job.arguments) - call_back = arguments["callback_name"] - job_timeout = int(arguments.get("timeout", self.timeout)) - - # Once this Job is called we call back to the client to run its - # commands while this environment is locked - self._run_callback(job_timeout, call_back) - - job.sendWorkComplete("") - finally: - self.worker.shutdown() - - def _add_servers(self, client, servers): - for server in servers.split(','): - server = server.rsplit(':', 1) - if len(server) == 1: - server.append('4730') - client.addServer(server[0], int(server[1])) - - def _run_callback(self, timeout, callback_name): - client = CallbackClient() - self._add_servers(client, self.geard) - client.waitForServer() - - cb_job = gear.Job(callback_name, self.te_data) - client.submitJob(cb_job) - - # Wait for 30 seconds, then test the status of the job - if not client.wait(30): - # Request the job status from the broker - cb_job.connection.sendPacket(gear.Packet(gear.constants.REQ, - gear.constants.GET_STATUS, - cb_job.handle)) - # Let a little time pass for the STATUS_RES to return, If we're in - # here we've already waited 30 seconds so another 10 wont make much - # difference - time.sleep(10) - if not cb_job.running: - logger.error("No sign of the Callback job starting," - "assuming its no longer present") - client.shutdown() - return - - # We timeout after the configured timeout - the 40 second sleep that we - # perform during initial handshaking. Note that after this timeout we - # offer the environment for other test clients, but the prior client's - # credentials are still valid, so very confusing errors can occur if we - # were ever to timeout without the client timing out first. - client.wait(timeout - 40) - if cb_job.failure: - logger.error("The Job appears to have failed.") - elif not cb_job.complete: - logger.error("No sign of Job completing, Freeing environment.") - else: - logger.info('Returned from Job : %s', cb_job.data) - client.shutdown() - - -def main(args=sys.argv[1:]): - parser = argparse.ArgumentParser( - description='Registers a test environment with a gearman broker, the ' - 'registered job "lockenv" then holds the environment in a ' - '"locked" state while it calls back to the client. The ' - 'clients job is provided with data (contents of datafile)' - ) - parser.add_argument('datafile', - help='Path to the data to provided to the client') - parser.add_argument('--timeout', '-t', type=int, default=10800, - help='The maximum number of seconds to hold the ' - 'testenv for, can be overridden by the client.') - parser.add_argument('--tenum', '-n', default=uuid.uuid4().hex, - help='A unique identifier identifing this env on ' - 'this host.') - parser.add_argument('--geard', '-b', default='127.0.0.1:4730', - help='A comma separated list of gearman brokers to ' - 'connect to.') - parser.add_argument('--debug', '-d', action='store_true', - help='Set to debug mode.') - opts = parser.parse_args(args) - if opts.debug: - logger.setLevel(logging.DEBUG) - - with open(opts.datafile, "r") as fp: - te_data = fp.read() - - logger.info('Starting test-env worker with data %r', te_data.strip()) - # run worker in thread so signal handling is responsive - te_worker = TEWorkerThread(opts.geard, opts.tenum, opts.timeout, te_data) - - def usr2_signal_handler(signal, frame): - te_worker.stopJobProcessing() - logger.info('SIGUSR2 recieved: Processing of new jobs is disabled.') - signal.signal(signal.SIGUSR2, usr2_signal_handler) - - def usr1_signal_handler(signal, frame): - te_worker.startJobProcessing() - logger.info('SIGUSR1 recieved: Processing of new jobs is enabled.') - signal.signal(signal.SIGUSR1, usr1_signal_handler) - - def int_signal_handler(signal, frame): - te_worker.stop() - logger.info('SIGINT recieved: Exiting...') - sys.exit(0) - signal.signal(signal.SIGINT, int_signal_handler) - - te_worker.start() - while te_worker.running: - time.sleep(1) - - -if __name__ == '__main__': - main() diff --git a/scripts/common_functions.sh b/scripts/common_functions.sh index ffbbcd2dc..394fb7d28 100755 --- a/scripts/common_functions.sh +++ b/scripts/common_functions.sh @@ -173,28 +173,26 @@ function postci(){ find $TRIPLEO_ROOT/delorean/data/repos -name "*.log" | XZ_OPT=-3 xargs tar -cJf $WORKSPACE/logs/delorean_repos.tar.xz extract_logs delorean_repos fi - if [ "${SEED_IP:-}" != "" ] ; then - SANITIZED_SEED_ADDRESS=$(sanitize_ip_address ${SEED_IP}) - # Generate extra state information from the running undercloud - ssh root@${SEED_IP} 'export TRIPLEO_ROOT='"$TRIPLEO_ROOT""; $TRIPLEO_ROOT/tripleo-ci/scripts/get_host_info.sh" + # Generate extra state information from the running undercloud + sudo --preserve-env $TRIPLEO_ROOT/tripleo-ci/scripts/get_host_info.sh + + if [ "$OVB" == "1" ] ; then # Get logs from the undercloud - ssh root@${SEED_IP} $TARCMD > $WORKSPACE/logs/undercloud.tar.xz + $TARCMD > $WORKSPACE/logs/undercloud.tar.xz extract_logs undercloud # when we ran get_host_info.sh on the undercloud it left the output of nova list in /tmp for us - for INSTANCE in $(ssh root@${SEED_IP} cat /tmp/nova-list.txt | grep ACTIVE | awk '{printf"%s=%s\n", $4, $12}') ; do + for INSTANCE in $(cat /tmp/nova-list.txt | grep ACTIVE | awk '{printf"%s=%s\n", $4, $12}') ; do IP=${INSTANCE//*=} SANITIZED_ADDRESS=$(sanitize_ip_address ${IP}) NAME=${INSTANCE//=*} - ssh $SSH_OPTIONS root@${SEED_IP} su jenkins -c \"scp $SSH_OPTIONS $TRIPLEO_ROOT/tripleo-ci/scripts/get_host_info.sh heat-admin@${SANITIZED_ADDRESS}:/tmp\" - timeout -s 15 -k 600 300 ssh $SSH_OPTIONS root@${SEED_IP} su jenkins -c \"ssh $SSH_OPTIONS heat-admin@$IP sudo /tmp/get_host_info.sh\" - ssh $SSH_OPTIONS root@${SEED_IP} su jenkins -c \"ssh $SSH_OPTIONS heat-admin@$IP $TARCMD\" > $WORKSPACE/logs/${NAME}.tar.xz + scp $SSH_OPTIONS $TRIPLEO_ROOT/tripleo-ci/scripts/get_host_info.sh heat-admin@${SANITIZED_ADDRESS}:/tmp + timeout -s 15 -k 600 300 ssh $SSH_OPTIONS heat-admin@$IP sudo /tmp/get_host_info.sh + ssh $SSH_OPTIONS heat-admin@$IP $TARCMD > $WORKSPACE/logs/${NAME}.tar.xz extract_logs $NAME done # post metrics - scp $SSH_OPTIONS root@${SANITIZED_SEED_ADDRESS}:${METRICS_DATA_FILE} /tmp/seed-metrics - cat /tmp/seed-metrics >> ${METRICS_DATA_FILE} # This spams the postci output with largely uninteresting trace output set +x echo -n 'Recording Heat deployment times...' @@ -217,7 +215,6 @@ function postci(){ fi elif [ "$OSINFRA" = "1" ] ; then local i=2 - sudo $TRIPLEO_ROOT/tripleo-ci/scripts/get_host_info.sh $TARCMD > $WORKSPACE/logs/primary_node.tar.xz # Extract /var/log for easy viewing tar xf $WORKSPACE/logs/primary_node.tar.xz -C $WORKSPACE/logs/ var/log diff --git a/toci_gate_test.sh b/toci_gate_test.sh index 5ea919c00..7d0187522 100755 --- a/toci_gate_test.sh +++ b/toci_gate_test.sh @@ -74,7 +74,7 @@ export RUN_PING_TEST=1 export RUN_TEMPEST_TESTS=0 export OVB=0 export UCINSTANCEID=NULL -export TOCIRUNNER="./toci_instack.sh" +export TOCIRUNNER="./toci_instack_ovb.sh" export MULTINODE=0 # Whether or not we run TripleO using OpenStack Infra nodes export OSINFRA=0 @@ -153,7 +153,6 @@ for JOB_TYPE_PART in $(sed 's/-/ /g' <<< "${TOCI_JOBTYPE:-}") ; do ;; ovb) OVB=1 - TOCIRUNNER="./toci_instack_ovb.sh" # The test env broker needs to know the instanceid of the this node so it can attach it to the provisioning network UCINSTANCEID=$(http_proxy= curl http://169.254.169.254/openstack/2015-10-15/meta_data.json | python -c 'import json, sys; print json.load(sys.stdin)["uuid"]') @@ -199,15 +198,6 @@ for JOB_TYPE_PART in $(sed 's/-/ /g' <<< "${TOCI_JOBTYPE:-}") ; do esac done -# print the final values of control variables to console -env | grep -E "(TOCI_JOBTYPE)=" - -# Allow the instack node to have traffic forwards through here -sudo iptables -A FORWARD -i eth0 -o eth1 -m state --state RELATED,ESTABLISHED -j ACCEPT -sudo iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE -sudo iptables -A FORWARD -i eth1 -o eth0 -j ACCEPT -echo 1 | sudo dd of=/proc/sys/net/ipv4/ip_forward - TIMEOUT_SECS=$((DEVSTACK_GATE_TIMEOUT*60)) # ./testenv-client kill everything in its own process group it it hits a timeout # run it in a separate group to avoid getting killed along with it diff --git a/toci_instack.sh b/toci_instack.sh deleted file mode 100755 index 9e011db6b..000000000 --- a/toci_instack.sh +++ /dev/null @@ -1,210 +0,0 @@ -#!/usr/bin/env bash -set -eux - -## Signal to toci_gate_test.sh we've started -touch /tmp/toci.started - -if [ ! -e "$TE_DATAFILE" ] ; then - echo "Couldn't find data file" - exit 1 -fi - -export PATH=/sbin:/usr/sbin:$PATH - -export CURRENT_DIR=$(dirname ${BASH_SOURCE[0]:-$0}) -export TRIPLEO_CI_DIR=$CURRENT_DIR/../ - -source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_vars.bash -source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_functions.sh -source $TRIPLEO_CI_DIR/tripleo-ci/scripts/metrics.bash - -stop_metric "tripleo.testenv.wait.seconds" # start_metric in toci_gate_test.sh -start_metric "tripleo.ci.total.seconds" - -mkdir -p $WORKSPACE/logs - -MY_IP=$(ip addr show dev eth1 | awk '/inet / {gsub("/.*", "") ; print $2}') - -undercloud_net_range="192.0.2." -undercloud_services_ip=$undercloud_net_range"1" -undercloud_haproxy_public_ip=$undercloud_net_range"2" -undercloud_haproxy_admin_ip=$undercloud_net_range"3" -export no_proxy=$undercloud_services_ip,$undercloud_haproxy_public_ip,$undercloud_haproxy_admin_ip,$MY_IP,$MIRRORSERVER - -# Setup delorean -$TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --delorean-setup - -dummy_ci_repo - -trap "[ \$? != 0 ] && echo ERROR DURING PREVIOUS COMMAND ^^^ && echo 'See postci.txt in the logs directory for debugging details'; postci 2>&1 | ts '%Y-%m-%d %H:%M:%S.000 |' > $WORKSPACE/logs/postci.log 2>&1" EXIT - -delorean_build_and_serve - -# Install all of the repositories we need -$TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --repo-setup - -layer_ci_repo - -# Remove everything installed from a delorean repository (only requred if ci nodes are being reused) -TOBEREMOVED=$(yumdb search from_repo delorean delorean-current delorean-ci | grep -v -e from_repo -e "Loaded plugins" || true) -[ "$TOBEREMOVED" != "" ] && sudo yum remove -y $TOBEREMOVED -sudo yum clean all - -# ===== End : Yum repository setup ==== - -cd $TRIPLEO_ROOT -sudo yum install -y diskimage-builder instack-undercloud os-apply-config qemu-kvm - -PRIV_SSH_KEY=$(OS_CONFIG_FILES=$TE_DATAFILE os-apply-config --key ssh-key --type raw) -SSH_USER=$(OS_CONFIG_FILES=$TE_DATAFILE os-apply-config --key ssh-user --type username) -HOST_IP=$(OS_CONFIG_FILES=$TE_DATAFILE os-apply-config --key host-ip --type netaddress) -ENV_NUM=$(OS_CONFIG_FILES=$TE_DATAFILE os-apply-config --key env-num --type int) - -mkdir -p ~/.ssh -echo "$PRIV_SSH_KEY" > ~/.ssh/id_rsa -chmod 600 ~/.ssh/id_rsa -# Generate the public key from the private one -ssh-keygen -y -f ~/.ssh/id_rsa > ~/.ssh/id_rsa.pub -# Ensure there is a newline after the last key -echo >> ~/.ssh/authorized_keys -cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys - -# Kill any VM's in the test env that we may have started, freeing up RAM -# for other tests running on the TE host. -function destroy_vms(){ - ssh $SSH_OPTIONS $SSH_USER@$HOST_IP virsh destroy seed_${ENV_NUM} || true - for i in $(seq 0 14) ; do - ssh $SSH_OPTIONS $SSH_USER@$HOST_IP virsh destroy baremetal${ENV_NUM}brbm_one${ENV_NUM}_${i} || true - done - ssh $SSH_OPTIONS $SSH_USER@$HOST_IP purge_env ${ENV_NUM} || true -} - -# TODO : Remove the need for this from instack-undercloud -ls /home/jenkins/.ssh/id_rsa_virt_power || ssh-keygen -f /home/jenkins/.ssh/id_rsa_virt_power -P "" - -export ANSWERSFILE=/usr/share/instack-undercloud/undercloud.conf.sample -export ELEMENTS_PATH=/usr/share/instack-undercloud -export DIB_DISTRIBUTION_MIRROR=$CENTOS_MIRROR -export DIB_CLOUD_IMAGES=http://$MIRRORSERVER/cloud.centos.org/centos/7/images - -source $TRIPLEO_ROOT/tripleo-ci/deploy.env - -# Build and deploy our undercloud instance -destroy_vms - -# If this 404's it wont error just continue without a file created -if canusecache $UNDERCLOUD_VM_NAME.qcow2 ; then - wget --progress=dot:mega http://$MIRRORSERVER/builds/current-tripleo/$UNDERCLOUD_VM_NAME.qcow2 || true - [ -f $PWD/$UNDERCLOUD_VM_NAME.qcow2 ] && update_image $PWD/$UNDERCLOUD_VM_NAME.qcow2 -fi - -# We're adding some packages to the image build here so when using a cached image -# less has to be installed during the undercloud install -if [ ! -e $UNDERCLOUD_VM_NAME.qcow2 ] ; then - echo "INFO: Check logs/instack-build.txt for instack image build output" - DIB_YUM_REPO_CONF=$(ls /etc/yum.repos.d/delorean*) \ - # Pre install packages on the instack image for the master jobs, We don't currently - # cache images for the stabole jobs so this isn't need and causes complications bug #1585937 - PREINSTALLPACKAGES= - if [ -z "$STABLE_RELEASE" ] ; then - PREINSTALLPACKAGES="-p automake,docker-registry,dstat,gcc-c++,ipxe-bootimgs,libxslt-devel,mariadb-devel,mariadb-server,memcached,mod_wsgi,openstack-aodh-api,openstack-aodh-evaluator,openstack-aodh-listener,openstack-aodh-notifier,openstack-ceilometer-api,openstack-ceilometer-central,openstack-ceilometer-collector,openstack-glance,openstack-heat-api,openstack-heat-api-cfn,openstack-heat-engine,openstack-ironic-api,openstack-ironic-conductor,openstack-ironic-inspector,openstack-keystone,openstack-neutron,openstack-neutron-ml2,openstack-neutron-openvswitch,openstack-nova-api,openstack-nova-cert,openstack-nova-compute,openstack-nova-conductor,openstack-nova-scheduler,openstack-selinux,openstack-swift-account,openstack-swift-object,openstack-swift-proxy,openstack-tempest,openwsman-python,os-apply-config,os-cloud-config,os-collect-config,os-net-config,os-refresh-config,puppet,python-pip,python-virtualenv,rabbitmq-server,tftp-server,xinetd,yum-plugin-priorities" - fi - # NOTE(pabelanger): Create both qcow2 and raw formats, but once we removed - # Fedora 22 support, we can stop building qcow2 images. - disk-image-create --image-size 30 -t qcow2,raw -a amd64 centos7 instack-vm -o $UNDERCLOUD_VM_NAME $PREINSTALLPACKAGES 2>&1 | sudo dd of=$WORKSPACE/logs/instack-build.txt || (tail -n 50 $WORKSPACE/logs/instack-build.txt && false) -fi -dd if=$UNDERCLOUD_VM_NAME.qcow2 | ssh $SSH_OPTIONS root@${HOST_IP} copyseed $ENV_NUM -ssh $SSH_OPTIONS root@${HOST_IP} virsh start seed_$ENV_NUM - -# Set SEED_IP here to prevent postci ssh'ing to the undercloud before its up and running -SEED_IP=$(OS_CONFIG_FILES=$TE_DATAFILE os-apply-config --key seed-ip --type netaddress --key-default '') -SANITIZED_SEED_ADDRESS=$(sanitize_ip_address ${SEED_IP}) - -# The very first thing we should do is put a valid dns server in /etc/resolv.conf, without it -# all ssh connections hit a 20 second delay until a reverse dns lookup hits a timeout -echo -e "nameserver 10.1.8.10\nnameserver 8.8.8.8" > /tmp/resolv.conf -tripleo wait_for -d 5 -l 20 -- scp $SSH_OPTIONS /tmp/resolv.conf root@${SANITIZED_SEED_ADDRESS}:/etc/resolv.conf - -echo_vars_to_deploy_env -cp $TRIPLEO_ROOT/tripleo-ci/deploy.env $WORKSPACE/logs/deploy.env.log - -# Copy the required CI resources to the undercloud were we use them -tar -czf - $TRIPLEO_ROOT/tripleo-ci /etc/yum.repos.d/delorean* | ssh $SSH_OPTIONS root@$SEED_IP tar -C / -xzf - - -# Don't get a file from cache if CACHEUPLOAD=1 (periodic job) -# If this 404's it wont error just continue without a file created -if canusecache ipa_images.tar ; then - wget --progress=dot:mega http://$MIRRORSERVER/builds/current-tripleo/ipa_images.tar || true - if [ -f ipa_images.tar ] ; then - tar -xf ipa_images.tar - update_image $PWD/ironic-python-agent.initramfs - scp $SSH_OPTIONS ironic-python-agent.* root@${SANITIZED_SEED_ADDRESS}:/home/stack - rm ipa_images.tar ironic-python-agent.* - fi -fi - -# Same thing for the overcloud image -if canusecache overcloud-full.tar ; then - wget --progress=dot:mega http://$MIRRORSERVER/builds/current-tripleo/overcloud-full.tar || true - if [ -f overcloud-full.tar ] ; then - tar -xf overcloud-full.tar - update_image $PWD/overcloud-full.qcow2 - scp $SSH_OPTIONS overcloud-full.qcow2 overcloud-full.initrd overcloud-full.vmlinuz root@${SANITIZED_SEED_ADDRESS}:/home/stack - rm overcloud-full.* - fi -fi - -ssh $SSH_OPTIONS root@${SEED_IP} <<-EOF - -set -eux - -source $TRIPLEO_ROOT/tripleo-ci/deploy.env - -ip route add 0.0.0.0/0 dev eth0 via $MY_IP - -# installing basic utils -yum install -y python-simplejson dstat yum-plugin-priorities - -# Add a simple system utilisation logger process -dstat -tcmndrylpg --output /var/log/dstat-csv.log >/dev/null & -disown - -# https://bugs.launchpad.net/tripleo/+bug/1536136 -# Add some swap to the undercloud, this is only a temp solution -# to see if it improves CI fail rates, we need to come to a concensus -# on how much RAM is acceptable as a minimum and stick to it -dd if=/dev/zero of=/swapfile count=2k bs=1M -mkswap /swapfile -swapon /swapfile - -# Install our test cert so SSL tests work -cp $TRIPLEO_ROOT/tripleo-ci/test-environments/overcloud-cacert.pem /etc/pki/ca-trust/source/anchors/ -update-ca-trust extract - -# Run the deployment as the stack user -su -l -c "bash $TRIPLEO_ROOT/tripleo-ci/scripts/deploy.sh" stack -EOF - -# If we got this far and its a periodic job, declare success and upload build artifacts -if [ $CACHEUPLOAD == 1 ] ; then - # Get the IPA and overcloud images for caching - ssh root@$SEED_IP tar -C /home/stack -cf - ironic-python-agent.initramfs ironic-python-agent.vmlinuz ironic-python-agent.kernel > ipa_images.tar - ssh root@$SEED_IP tar -C /home/stack -cf - overcloud-full.qcow2 overcloud-full.initrd overcloud-full.vmlinuz > overcloud-full.tar - - md5sum overcloud-full.tar > overcloud-full.tar.md5 - md5sum ipa_images.tar > ipa_images.tar.md5 - md5sum $TRIPLEO_ROOT/$UNDERCLOUD_VM_NAME.qcow2 > $UNDERCLOUD_VM_NAME.qcow2.md5 - - curl http://$MIRRORSERVER/cgi-bin/upload.cgi -F "repohash=$TRUNKREPOUSED" -F "upload=@ipa_images.tar;filename=ipa_images.tar" - curl http://$MIRRORSERVER/cgi-bin/upload.cgi -F "repohash=$TRUNKREPOUSED" -F "upload=@overcloud-full.tar;filename=overcloud-full.tar" - # TODO(pabelanger): Remove qcow2 format, since centos-7 cannot mount nbd with the default kernel. - curl http://$MIRRORSERVER/cgi-bin/upload.cgi -F "repohash=$TRUNKREPOUSED" -F "upload=@$TRIPLEO_ROOT/$UNDERCLOUD_VM_NAME.qcow2;filename=$UNDERCLOUD_VM_NAME.qcow2" - curl http://$MIRRORSERVER/cgi-bin/upload.cgi -F "repohash=$TRUNKREPOUSED" -F "upload=@$TRIPLEO_ROOT/$UNDERCLOUD_VM_NAME.raw;filename=$UNDERCLOUD_VM_NAME.raw" - curl http://$MIRRORSERVER/cgi-bin/upload.cgi -F "repohash=$TRUNKREPOUSED" -F "upload=@ipa_images.tar.md5;filename=ipa_images.tar.md5" - curl http://$MIRRORSERVER/cgi-bin/upload.cgi -F "repohash=$TRUNKREPOUSED" -F "upload=@overcloud-full.tar.md5;filename=overcloud-full.tar.md5" - curl http://$MIRRORSERVER/cgi-bin/upload.cgi -F "repohash=$TRUNKREPOUSED" -F "upload=@$UNDERCLOUD_VM_NAME.qcow2.md5;filename=$UNDERCLOUD_VM_NAME.qcow2.md5" - curl http://$MIRRORSERVER/cgi-bin/upload.cgi -F "repohash=$TRUNKREPOUSED" -F "$JOB_NAME=SUCCESS" -fi - -exit 0 -echo 'Run completed.' diff --git a/toci_instack_ovb.sh b/toci_instack_ovb.sh index 60d076ae0..ea8bd1271 100755 --- a/toci_instack_ovb.sh +++ b/toci_instack_ovb.sh @@ -26,17 +26,6 @@ echo "127.0.0.1 $(hostname) $(hostname).openstacklocal" | sudo tee -a /etc/hosts # TODO: xfsprogs should be a dep of DIB? sudo yum install -y xfsprogs qemu-img - -# Setting up localhost so that postci will ssh to it to retrieve logs -# once the legacy TE support is removed from tripleo-ci we won't need to do -# this any longer -export SEED_IP=127.0.0.1 -echo | sudo tee -a ~root/.ssh/authorized_keys | sudo tee -a ~/.ssh/authorized_keys -if [ ! -e /home/$USER/.ssh/id_rsa.pub ] ; then - ssh-keygen -N "" -f /home/$USER/.ssh/id_rsa -fi -cat ~/.ssh/id_rsa.pub | sudo tee -a ~root/.ssh/authorized_keys | sudo tee -a ~/.ssh/authorized_keys - # Remove the anything on the infra image template that might interfere with CI sudo yum remove -y puppet hiera puppetlabs-release rdo-release sudo rm -rf /etc/puppet /etc/hiera.yaml