Add support for OVB based CI

Add support to run tripleo ci on rh2 (a OVB based cloud).

Change-Id: Ib5af081412ac4e0c398bd819a99c6ae894e4684f
This commit is contained in:
Derek Higgins 2016-06-29 17:45:24 +01:00 committed by James Slagle
parent 2d22ced260
commit f49c7bbb8d
9 changed files with 200 additions and 102 deletions

View File

@ -16,139 +16,68 @@
set -ex
[ -n "$1" ] || ( echo "Usage : $0 <baseimageid> <num-runs> <sim-runs>" && exit 1 )
[ -n "$1" ] || ( echo "Usage : $0 <num-runs> <sim-runs>" && exit 1 )
# Creates a template image (if it doesn't exist), then runs an
# overcloud ci job <num-runs> times, <sim-runs> simultaneously.
IMAGEID=$1
USER=fedora
if nova image-list | grep $IMAGEID | grep -i ubuntu ; then
USER=ubuntu
fi
IMAGE=CentOS-7-x86_64-GenericCloud
USER=centos
# makes some assumptions but good enough for now
nova keypair-add --pub-key ~/.ssh/id_rsa.pub bighammer || true
NETLIST=$(neutron net-list)
DEFAULTNET=$(echo "$NETLIST" | grep default-net | awk '{print $2}')
TESTNET=$(echo "$NETLIST" | grep tripleo-bm-test | awk '{print $2}')
EXTNET=$(echo "$NETLIST" | grep ext-net | awk '{print $2}')
TEMPLATENAME="$IMAGEID-template"
if ! nova image-list | grep $TEMPLATENAME ; then
nova boot --image $IMAGEID --flavor m1.large --nic net-id=$DEFAULTNET --key-name bighammer template-builder
sleep 60
IP=$(nova show template-builder | awk '/default-net/ {print $5}')
PORTID=$(neutron port-list | grep "$IP\>" | awk '{print $2}')
FLOATINGIPOUT=$(neutron floatingip-create $EXTNET)
FLOATINGIP=$(echo "$FLOATINGIPOUT" | grep floating_ip_address | awk '{print $4}')
FLOATINGIPID=$(echo "$FLOATINGIPOUT" | grep " id " | awk '{print $4}')
neutron floatingip-associate $FLOATINGIPID $PORTID
sleep 90
ssh -t -t $USER@$FLOATINGIP <<EOF
set -ex
sudo mkdir /etc/nodepool
sudo chmod 0777 /etc/nodepool
mkdir tmp
sudo yum install -y git || sudo apt-get install -y git
git clone https://git.openstack.org/openstack-infra/project-config tmp/config
sudo cp -r tmp/config/nodepool/scripts /opt/nodepool-scripts
sudo chmod -R a+rx /opt/nodepool-scripts
cd /opt/nodepool-scripts
sudo yum install -y libxml2-devel libxslt-devel
sudo ./prepare_node_tripleo.sh
exit 0
EOF
nova image-create --poll template-builder $TEMPLATENAME
nova delete template-builder
sleep 20
neutron floatingip-delete $FLOATINGIPID
fi
function tapper(){
set -x
NODENAME=test-node-$1
nova boot --image $TEMPLATENAME --flavor m1.large --nic net-id=$DEFAULTNET --nic net-id=$TESTNET --key-name bighammer $NODENAME
trap "nova delete $NODENAME" RETURN ERR
sleep 180
nova boot --image $IMAGE --flavor undercloud --key-name bighammer $NODENAME
#trap "nova delete $NODENAME" RETURN ERR
sleep 60
if [ "$(nova show $NODENAME | awk '/status/ {print $4}')" != "ACTIVE" ] ; then
nova show $NODENAME
return 1
fi
IP=$(nova show $NODENAME | awk '/default-net/ {print $5}')
IP=$(nova show $NODENAME | awk '/private network/ {print $5}')
PORTID=$(neutron port-list | grep "$IP\>" | awk '{print $2}')
FLOATINGIPOUT=$(neutron floatingip-create $EXTNET)
FLOATINGIP=$(echo "$FLOATINGIPOUT" | grep floating_ip_address | awk '{print $4}')
FLOATINGIPID=$(echo "$FLOATINGIPOUT" | grep " id " | awk '{print $4}')
FLOATINGIP=$(nova floating-ip-create $EXTNET | grep public | awk '{print $2}')
[ -z "$FLOATINGIP" ] && echo "No Floating IP..." && exit 1
trap "nova delete $NODENAME || true ; sleep 20 ; neutron floatingip-delete $FLOATINGIPID" RETURN ERR
#trap "nova delete $NODENAME || true ; sleep 20 ; nova floatingip-delete $FLOATINGIP" RETURN ERR
neutron floatingip-associate $FLOATINGIPID $PORTID
sleep 120
ssh fedora@$FLOATINGIP sudo cp ~fedora/.ssh/authorized_keys ~jenkins/.ssh/authorized_keys
date
ssh -t jenkins@$FLOATINGIP <<EOF
nova floating-ip-associate $NODENAME $FLOATINGIP
sleep 20
ssh -tt $USER@$FLOATINGIP <<EOF
set -xe
export PYTHONUNBUFFERED=true
export DEVSTACK_GATE_TIMEOUT=240
export DEVSTACK_GATE_TEMPEST=0
export DEVSTACK_GATE_EXERCISES=0
export GEARDSERVER=172.16.3.254
export DIB_COMMON_ELEMENTS="common-venv stackuser pypi-openstack"
export TRIPLEO_TEST=overcloud
sudo chown -hR jenkins /opt/git
function gate_hook {
bash -xe /opt/stack/new/tripleo-ci/toci_gate_test.sh
}
export -f gate_hook
export ZUUL_BRANCH=master
export WORKSPACE=~/workspace
export GIT_ORIGIN=git://git.openstack.org
export ZUUL_PROJECT=openstack-infra/devstack-gate
export BRANCH=master
export ZUUL_URL=http://zuul.openstack.org/p
mkdir -p ~/workspace
cd ~/workspace
git clone git://git.openstack.org/openstack-infra/devstack-gate
cp devstack-gate/devstack-vm-gate-wrap.sh ./safe-devstack-vm-gate-wrap.sh
./safe-devstack-vm-gate-wrap.sh
sudo yum install -y git screen
sudo mkdir -p /opt/stack/new
sudo chown centos /opt/stack/new
git clone https://git.openstack.org/openstack-infra/tripleo-ci /opt/stack/new/tripleo-ci
cd /opt/stack/new/tripleo-ci
OVERRIDE_ZUUL_BRANCH= ZUUL_BRANCH=master WORKSPACE=/tmp TOCI_JOBTYPE=nonha DEVSTACK_GATE_TIMEOUT=180 ./toci_gate_test.sh
exit 0
EOF
set +x
date
echo "JOB DONE"
}
TODO=$2
SIM=$3
TODO=$1
SIM=$2
DONE=0
[ -e logs ] && mv logs logs-$(date +%s)
mkdir -p logs
while true; do
[ $DONE -ge $TODO ] && echo "Done" && break
sleep 60 # Lets not hammer the API all in one go
jobs
if [ $(jobs | wc -l) -lt $SIM ] ; then
DONE=$((DONE+1))
echo "Starting job $DONE"
tapper $DONE &> logs/job-$DONE.log &
fi
sleep 10 # Lets not hammer the API all in one go
done
# Wait for the last process to finish

View File

@ -311,7 +311,7 @@ function layer_ci_repo {
function echo_vars_to_deploy_env {
for VAR in CENTOS_MIRROR EPEL_MIRROR http_proxy INTROSPECT MY_IP no_proxy NODECOUNT OVERCLOUD_DEPLOY_ARGS OVERCLOUD_UPDATE_ARGS PACEMAKER SSH_OPTIONS STABLE_RELEASE TRIPLEO_ROOT TRIPLEO_SH_ARGS NETISO_V4 NETISO_V6 TOCI_JOBTYPE UNDERCLOUD_SSL RUN_TEMPEST_TESTS RUN_PING_TEST JOB_NAME; do
for VAR in CENTOS_MIRROR EPEL_MIRROR http_proxy INTROSPECT MY_IP no_proxy NODECOUNT OVERCLOUD_DEPLOY_ARGS OVERCLOUD_UPDATE_ARGS PACEMAKER SSH_OPTIONS STABLE_RELEASE TRIPLEO_ROOT TRIPLEO_SH_ARGS NETISO_V4 NETISO_V6 TOCI_JOBTYPE UNDERCLOUD_SSL RUN_TEMPEST_TESTS RUN_PING_TEST JOB_NAME OVB; do
echo "export $VAR=\"${!VAR}\"" >> $TRIPLEO_ROOT/tripleo-ci/deploy.env
done
}

View File

@ -15,6 +15,8 @@ fi
export TRIPLEO_ROOT=/opt/stack/new
export PATH=/sbin:/usr/sbin:$PATH
export UNDERCLOUD_VM_NAME=instack
# post ci chores to run at the end of ci
SSH_OPTIONS='-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=Verbose -o PasswordAuthentication=no -o ConnectionAttempts=32'
TARCMD="sudo XZ_OPT=-3 tar -cJf - --exclude=udev/hwdb.bin --exclude=etc/services --exclude=selinux/targeted --exclude=etc/services --exclude=etc/pki /var/log /etc"

View File

@ -27,6 +27,25 @@ start_metric "tripleo.undercloud.install.seconds"
$TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --undercloud 2>&1 | ts '%Y-%m-%d %H:%M:%S.000 |' | sudo dd of=/var/log/undercloud_install.txt || (tail -n 50 /var/log/undercloud_install.txt && false)
stop_metric "tripleo.undercloud.install.seconds"
if [ "$OVB" = 1 ]; then
# eth1 is on the provisioning netwrok and doesn't have dhcp, so we need to set its MTU manually.
sudo ip link set dev eth1 up
sudo ip link set dev eth1 mtu 1400
echo -e "\ndhcp-option-force=26,1400" | sudo tee -a /etc/dnsmasq-ironic.conf
sudo systemctl restart 'neutron-*'
# The undercloud install is creating file in ~/.cache as root
# change them back so we can build overcloud images
sudo chown -R $USER ~/.cache || true
# check the power status of the last IPMI device we have details for
# this ensures the BMC is ready and sanity tests that its working
PMADDR=$(jq '.nodes[length-1].pm_addr' < ~/instackenv.json | tr '"' ' ')
tripleo wait_for -d 10 -l 40 -- ipmitool -I lanplus -H $PMADDR -U admin -P password power status
fi
if [ $INTROSPECT == 1 ] ; then
# I'm removing most of the nodes in the env to speed up discovery
# This could be in jq but I don't know how

View File

@ -13,4 +13,4 @@ export QUOTA_PORTS=200
export TEBROKERIP=192.168.103.254
export MIRRORIP=192.168.103.253
export PROXYIP=192.168.103.252
export http_proxy="http://192.168.103.252:3128/"

View File

@ -62,7 +62,7 @@ class TestCallback(object):
logger.info('Received job : %s', job.arguments.strip())
time_waiting = time.time() - self.created
if time_waiting > 10:
if time_waiting > 90:
logger.warn('%.1f seconds waiting for a worker.' % (time_waiting))
logger.info('Running command "%s"', ' '.join(self.command))
@ -141,6 +141,11 @@ def main(args=sys.argv[1:]):
parser.add_argument('--timeout', '-t', default='10800',
help='Set a timeout, after which the command will '
'be killed.')
parser.add_argument('--envsize', default="2",
help='Number of baremetal nodes to request')
parser.add_argument('--ucinstance',
help='uuid for the undercloud instance (where an '
'interface on the provisioning net is attached')
parser.add_argument('--debug', '-d', action='store_true',
help='Set to debug mode.')
opts = parser.parse_args(args)
@ -155,7 +160,12 @@ def main(args=sys.argv[1:]):
add_servers(client, opts.geard)
client.waitForServer()
job_params = {"callback_name": callback_name, "timeout": opts.timeout}
job_params = {
"callback_name": callback_name,
"timeout": opts.timeout,
"envsize":opts.envsize,
"ucinstance":opts.ucinstance,
}
job = gear.Job('lockenv', json.dumps(job_params))
client.submitJob(job)

View File

@ -6,6 +6,13 @@ sudo yum -y install redhat-lsb-core
LSBRELEASE=`lsb_release -i -s`
# I'd like to use a variable from ZUUL to dicide which cloud I'm running on
# but that would then break if running toci_* manually outside of CI, so for
# the moment use a IP uniq to rh1
if ping -c 1 192.168.100.1 ; then
source $(dirname $0)/scripts/rh2.env
fi
# Clean any cached yum metadata, it maybe stale
sudo yum clean all
@ -53,8 +60,8 @@ export EPEL_MIRROR=http://dl.fedoraproject.org/pub/epel
# proxy by setting export http_proxy=""
export http_proxy=${http_proxy-"http://192.168.1.100:3128/"}
export GEARDSERVER=192.168.1.1
export MIRRORSERVER=192.168.1.101
export GEARDSERVER=${TEBROKERIP-192.168.1.1}
export MIRRORSERVER=${MIRRORIP-192.168.1.101}
export CACHEUPLOAD=0
export INTROSPECT=0
@ -75,6 +82,9 @@ export NETISO_V4=0
export NETISO_V6=0
export RUN_PING_TEST=1
export RUN_TEMPEST_TESTS=0
export OVB=0
export UCINSTANCEID=NULL
export TOCIRUNNER="./toci_instack.sh"
# Set the fedora mirror, this is more reliable then relying on the repolist returned by metalink
# NOTE(pabelanger): Once we bring AFS mirrors online, we no longer need to do this.
@ -91,6 +101,10 @@ mkdir -p "$WORKSPACE/logs"
dstat -tcmndrylpg --output "$WORKSPACE/logs/dstat-csv.log" >/dev/null &
disown
# TODO: Submit a patch to chang this in infra once all the reshuffling is done
[ $TOCI_JOBTYPE == "ovb-ha" ] && TOCI_JOBTYPE=ovb-ha2
# Switch defaults based on the job name
for JOB_TYPE_PART in $(sed 's/-/ /g' <<< "${TOCI_JOBTYPE:-}") ; do
case $JOB_TYPE_PART in
@ -112,6 +126,14 @@ for JOB_TYPE_PART in $(sed 's/-/ /g' <<< "${TOCI_JOBTYPE:-}") ; do
NETISO_V4=1
PACEMAKER=1
;;
ha2)
NODECOUNT=4
# In ci our overcloud nodes don't have access to an external netwrok
# --ntp-server is here to make the deploy command happy, the ci env
# is on virt so the clocks should be in sync without it.
OVERCLOUD_DEPLOY_ARGS="$OVERCLOUD_DEPLOY_ARGS --control-scale 3 --ntp-server 0.centos.pool.ntp.org -e /usr/share/openstack-tripleo-heat-templates/environments/puppet-pacemaker.yaml"
PACEMAKER=1
;;
nonha)
OVERCLOUD_DEPLOY_ARGS="$OVERCLOUD_DEPLOY_ARGS -e /opt/stack/new/tripleo-ci/test-environments/enable-tls.yaml -e /opt/stack/new/tripleo-ci/test-environments/inject-trust-anchor.yaml --ceph-storage-scale 1 -e /usr/share/openstack-tripleo-heat-templates/environments/puppet-ceph-devel.yaml"
INTROSPECT=1
@ -123,6 +145,13 @@ for JOB_TYPE_PART in $(sed 's/-/ /g' <<< "${TOCI_JOBTYPE:-}") ; do
exit 1
TRIPLEO_SH_ARGS="--use-containers"
;;
ovb)
OVB=1
TOCIRUNNER="./toci_instack_ovb.sh"
# The test env broker needs to know the instanceid of the this node so it can attach it to the provisioning network
UCINSTANCEID=$(http_proxy= curl http://169.254.169.254/openstack/2015-10-15/meta_data.json | python -c 'import json, sys; print json.load(sys.stdin)["uuid"]')
;;
periodic)
export DELOREAN_REPO_URL=http://trunk.rdoproject.org/centos7/consistent
CACHEUPLOAD=1
@ -163,7 +192,8 @@ if [ -z ${TE_DATAFILE:-} ] ; then
fi
# Kill the whole job if it doesn't get a testenv in 20 minutes as it likely will timout in zuul
( sleep 1200 ; [ ! -e /tmp/toci.started ] && sudo kill -9 $$ ) &
./testenv-client -b $GEARDSERVER:4730 -t $TIMEOUT_SECS -- ./toci_instack.sh
./testenv-client -b $GEARDSERVER:4730 -t $TIMEOUT_SECS --envsize $NODECOUNT --ucinstance $UCINSTANCEID -- $TOCIRUNNER
else
LEAVE_RUNNING=1 ./toci_instack.sh
LEAVE_RUNNING=1 $TOCIRUNNER
fi

View File

@ -83,7 +83,6 @@ function destroy_vms(){
ls /home/jenkins/.ssh/id_rsa_virt_power || ssh-keygen -f /home/jenkins/.ssh/id_rsa_virt_power -P ""
export ANSWERSFILE=/usr/share/instack-undercloud/undercloud.conf.sample
export UNDERCLOUD_VM_NAME=instack
export ELEMENTS_PATH=/usr/share/instack-undercloud
export DIB_DISTRIBUTION_MIRROR=$CENTOS_MIRROR
export DIB_EPEL_MIRROR=$EPEL_MIRROR

109
toci_instack_ovb.sh Executable file
View File

@ -0,0 +1,109 @@
#!/usr/bin/env bash
set -eux
## Signal to toci_gate_test.sh we've started
touch /tmp/toci.started
export CURRENT_DIR=$(dirname ${BASH_SOURCE[0]:-$0})
export TRIPLEO_CI_DIR=$CURRENT_DIR/../
export IP_DEVICE=${IP_DEVICE:-"eth0"}
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_vars.bash
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/common_functions.sh
source $TRIPLEO_CI_DIR/tripleo-ci/scripts/metrics.bash
start_metric "tripleo.ci.total.seconds"
mkdir -p $WORKSPACE/logs
MY_IP=$(ip addr show dev $IP_DEVICE | awk '/inet / {gsub("/.*", "") ; print $2}')
# TODO: Set undercloud_hostname in undercloud.conf
hostname | sudo dd of=/etc/hostname
echo "127.0.0.1 $(hostname) $(hostname).openstacklocal" | sudo tee -a /etc/hosts
# Kill the zuul console stream, its tcp port clashes with the port we're using to serve out /httpboot
sudo netstat -lpn | grep tcp | grep :8088 | awk '{print $7}' | cut -d / -f 1 | head -n 1 | sudo xargs -t kill -9 || true
# TODO: xfsprogs should be a dep of DIB?
sudo yum install -y xfsprogs qemu-img
# Setting up localhost so that postci will ssh to it to retrieve logs
# once the legacy TE support is removed from tripleo-ci we won't need to do
# this any longer
export SEED_IP=127.0.0.1
echo | sudo tee -a ~root/.ssh/authorized_keys | sudo tee -a ~/.ssh/authorized_keys
if [ ! -e /home/jenkins/.ssh/id_rsa.pub ] ; then
ssh-keygen -N "" -f /home/jenkins/.ssh/id_rsa
fi
cat ~/.ssh/id_rsa.pub | sudo tee -a ~root/.ssh/authorized_keys | sudo tee -a ~/.ssh/authorized_keys
# Remove the puppet related stuff that was used by ZUUL to set up this node
sudo yum remove -y puppet hiera puppetlabs-release
sudo rm -rf /etc/puppet /etc/hiera.yaml
export no_proxy=192.0.2.1,$MY_IP,$MIRRORSERVER
# Setup delorean
$TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --delorean-setup
dummy_ci_repo
# Install all of the repositories we need
$TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --repo-setup
# Install wget and moreutils for timestamping postci.log with ts
sudo yum -y install wget moreutils python-simplejson dstat yum-plugin-priorities
trap "[ \$? != 0 ] && echo ERROR DURING PREVIOUS COMMAND ^^^ && echo 'See postci.txt in the logs directory for debugging details'; postci 2>&1 | ts '%Y-%m-%d %H:%M:%S.000 |' > $WORKSPACE/logs/postci.log 2>&1" EXIT
delorean_build_and_serve
# Since we've moved a few commands from this spot before the wget, we need to
# sleep a few seconds in order for the SimpleHTTPServer to get setup.
sleep 3
layer_ci_repo
create_dib_vars_for_puppet
export http_proxy=""
echo_vars_to_deploy_env
source $TRIPLEO_ROOT/tripleo-ci/deploy.env
# Add a simple system utilisation logger process
sudo dstat -tcmndrylpg --output /var/log/dstat-csv.log >/dev/null &
# Install our test cert so SSL tests work
sudo cp $TRIPLEO_ROOT/tripleo-ci/test-environments/overcloud-cacert.pem /etc/pki/ca-trust/source/anchors/
sudo update-ca-trust extract
# Don't get a file from cache if CACHEUPLOAD=1 (periodic job)
# If this 404's it wont error just continue without a file created
if canusecache ipa_images.tar ; then
wget --progress=dot:mega http://$MIRRORSERVER/builds/current-tripleo/ipa_images.tar || true
if [ -f ipa_images.tar ] ; then
tar -xf ipa_images.tar
update_image $PWD/ironic-python-agent.initramfs
mv ironic-python-agent.* ~
rm ipa_images.tar
fi
fi
# Same thing for the overcloud image
if canusecache overcloud-full.tar ; then
wget --progress=dot:mega http://$MIRRORSERVER/builds/current-tripleo/overcloud-full.tar || true
if [ -f overcloud-full.tar ] ; then
tar -xf overcloud-full.tar
update_image $PWD/overcloud-full.qcow2
mv overcloud-full.qcow2 overcloud-full.initrd overcloud-full.vmlinuz ~
rm overcloud-full.tar
fi
fi
cp -f $TE_DATAFILE ~/instackenv.json
$TRIPLEO_ROOT/tripleo-ci/scripts/deploy.sh
echo 'Run completed.'