diff --git a/scripts/common_functions.sh b/scripts/common_functions.sh index 4670e77c2..519ccf5aa 100755 --- a/scripts/common_functions.sh +++ b/scripts/common_functions.sh @@ -73,275 +73,6 @@ function filterref { echo $PROJ } -# Mount a qcow image, copy in the delorean repositories and update the packages -function update_image { - IMAGE=$1 - MOUNTDIR=$(mktemp -d) - case ${IMAGE##*.} in - qcow2) - # NOTE(pabelanger): Sadly, nbd module is missing from CentOS 7, - # so we need to convert the image to raw format. A fix for this - # would be support raw instack images in our nightly builds. - qemu-img convert -f qcow2 -O raw ${IMAGE} ${IMAGE/qcow2/raw} - rm -rf ${IMAGE} - sudo kpartx -avs ${IMAGE/qcow2/raw} - # The qcow2 images may be a whole disk or single partition - sudo mount /dev/mapper/loop0p1 $MOUNTDIR || sudo mount /dev/loop0 $MOUNTDIR - ;; - initramfs) - pushd $MOUNTDIR - gunzip -c $IMAGE | sudo cpio -i - ;; - esac - - # Overwrite resources specific to the environment running this test - # instack-undercloud does this, but for cached images it wont be correct - sudo test -f $MOUNTDIR/root/.ssh/authorized_keys && sudo cp ~/.ssh/authorized_keys $MOUNTDIR/root/.ssh/authorized_keys - sudo test -f $MOUNTDIR/home/stack/instackenv.json && sudo cp $TE_DATAFILE $MOUNTDIR/home/stack/instackenv.json - - # Update the installed packages on the image - sudo mv $MOUNTDIR/etc/resolv.conf{,_} - if [ "$CA_SERVER" == "1" ] ; then - # NOTE(jaosorior): This IP is hardcoded for the FreeIPA server (the CA). - echo -e "nameserver 192.168.24.250\nnameserver 8.8.8.8" | sudo dd of=$MOUNTDIR/etc/resolv.conf - else - echo -e "nameserver 10.1.8.10\nnameserver 8.8.8.8" | sudo dd of=$MOUNTDIR/etc/resolv.conf - fi - sudo cp /etc/yum.repos.d/delorean* $MOUNTDIR/etc/yum.repos.d - sudo rm -f $MOUNTDIR/etc/yum.repos.d/epel* - sudo chroot $MOUNTDIR /bin/yum clean all - sudo chroot $MOUNTDIR /bin/yum update -y - sudo rm -f $MOUNTDIR/etc/yum.repos.d/delorean* - sudo mv -f $MOUNTDIR/etc/resolv.conf{_,} - - case ${IMAGE##*.} in - qcow2) - # The yum update inside a chroot breaks selinux file contexts, fix them - sudo chroot $MOUNTDIR setfiles /etc/selinux/targeted/contexts/files/file_contexts / - sudo umount $MOUNTDIR - sudo kpartx -dv ${IMAGE/qcow2/raw} - qemu-img convert -c -f raw -O qcow2 ${IMAGE/qcow2/raw} ${IMAGE} - sudo rm -rf ${IMAGE/qcow2/raw} - sudo losetup -d /dev/loop0 - ;; - initramfs) - sudo find . -print | sudo cpio -o -H newc | gzip > $IMAGE - popd - ;; - esac - sudo rm -rf $MOUNTDIR -} - -# Decide if a particular cached artifact can be used in this CI test -# Takes a single argument representing the name of the artifact being checked. -function canusecache { - - # If we are uploading to the cache then we shouldn't use it - [ "$CACHEUPLOAD" == 1 ] && return 1 - - # The updates job already takes a long time, always use cache for it - [ -n "$OVERCLOUD_UPDATE_ARGS" ] && return 0 - - CACHEDOBJECT=$1 - - for PROJFULLREF in $ZUUL_CHANGES ; do - PROJ=$(filterref $PROJFULLREF) - - case $CACHEDOBJECT in - ${UNDERCLOUD_VM_NAME}.qcow2) - [[ "$PROJ" =~ instack-undercloud|diskimage-builder|tripleo-image-elements|tripleo-puppet-elements ]] && return 1 - ;; - ironic-python-agent.tar) - [[ "$PROJ" =~ diskimage-builder|python-tripleoclient|tripleo-common|tripleo-image-elements ]] && return 1 - ;; - overcloud-full.tar) - [[ "$PROJ" =~ diskimage-builder|tripleo-image-elements|tripleo-puppet-elements|instack-undercloud|python-tripleoclient|tripleo-common ]] && return 1 - ;; - *) - return 1 - ;; - esac - - done - return 0 -} - -function extract_logs { - local name=$1 - mkdir -p $WORKSPACE/logs/$name - local logs_tar="$WORKSPACE/logs/$name.tar.xz" - - if [[ -f $logs_tar ]]; then - # Exclude journal files because they're large and not useful in a browser - tar -C $WORKSPACE/logs/$name -xf $logs_tar var --exclude=journal - else - echo "$logs_tar doesn't exist. Nothing to untar" - fi -} - -function postci { - local exit_val=${1:-0} - set -x - set +e - stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.ci.total.seconds" - if [[ "$POSTCI" == "0" ]]; then - sudo chown -R $USER $WORKSPACE - sudo iptables -I INPUT -p tcp -j ACCEPT - return 0 - fi - start_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.postci.seconds" - if [ -e $TRIPLEO_ROOT/delorean/data/repos/ ] ; then - # I'd like to tar up repos/current but tar'ed its about 8M it may be a - # bit much for the log server, maybe when we are building less - find $TRIPLEO_ROOT/delorean/data/repos -name "*.log" | XZ_OPT=-3 xargs tar -cJf $WORKSPACE/logs/delorean_repos.tar.xz - extract_logs delorean_repos - fi - - # Persist the deploy.env, as it can help with debugging and local testing - cp $TRIPLEO_ROOT/tripleo-ci/deploy.env $WORKSPACE/logs/ - - # Generate extra state information from the running undercloud - sudo -E $TRIPLEO_ROOT/tripleo-ci/scripts/get_host_info.sh - sudo -E $TRIPLEO_ROOT/tripleo-ci/scripts/get_docker_logs.sh - eval $JLOGCMD - - if [ "$OVB" == "1" ] ; then - # Get logs from the undercloud - # Log collection takes a while. Let's start these in the background - # so they can run in parallel, then we'll wait for them to complete - # after they're all running. - ( - $TARCMD $HOME/*.log > $WORKSPACE/logs/undercloud.tar.xz - extract_logs undercloud - ) & - - # when we ran get_host_info.sh on the undercloud it left the output of nova list in /tmp for us - for INSTANCE in $(cat /tmp/nova-list.txt | grep ACTIVE | awk '{printf"%s=%s\n", $4, $12}') ; do - IP=${INSTANCE//*=} - SANITIZED_ADDRESS=$(sanitize_ip_address ${IP}) - NAME=${INSTANCE//=*} - ( - scp $SSH_OPTIONS $TRIPLEO_ROOT/tripleo-ci/scripts/get_host_info.sh heat-admin@${SANITIZED_ADDRESS}:/tmp - scp $SSH_OPTIONS $TRIPLEO_ROOT/tripleo-ci/scripts/get_docker_logs.sh heat-admin@${SANITIZED_ADDRESS}:/tmp - timeout -s 15 -k 600 300 ssh $SSH_OPTIONS heat-admin@$IP sudo /tmp/get_host_info.sh - timeout -s 15 -k 600 300 ssh $SSH_OPTIONS heat-admin@$IP sudo /tmp/get_docker_logs.sh - ssh $SSH_OPTIONS heat-admin@$IP $JLOGCMD - ssh $SSH_OPTIONS heat-admin@$IP $TARCMD > $WORKSPACE/logs/${NAME}.tar.xz - extract_logs $NAME - ) & - done - # Wait for the commands we started in the background to complete - wait - # This spams the postci output with largely uninteresting trace output - set +x - echo -n 'Recording Heat deployment times...' - # We can't record all of the Heat deployment times because a number of - # them include IDs that change every run, which makes them pretty - # useless as Graphite metrics. However, there are some important ones - # we do want to record over time, so explicitly capture those. - captured_deploy_times=/tmp/captured-deploy-times.log - # Make sure there is a trailing space after all the names so they don't - # match resources that have ids appended. - egrep 'overcloud |AllNodesDeploySteps |ControllerDeployment_Step. |ComputeDeployment_Step. |CephStorageDeploymentStep. |Controller |CephStorage |Compute |ServiceChain |NetworkDeployment |UpdateDeployment ' $WORKSPACE/logs/undercloud/var/log/heat-deploy-times.log > $captured_deploy_times - while read line; do - # $line should look like "ResourceName 123.0", so concatenating all - # of this together we should end up with a call that looks like: - # record_metric tripleo.master.ha.overcloud.resources.ResourceName 123.0 - record_metric tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.overcloud.resources.${line} - done <$captured_deploy_times - echo 'Finished' - set -x - stop_metric "tripleo.${STABLE_RELEASE:-master}.${TOCI_JOBTYPE}.postci.seconds" - # post metrics - if [ $exit_val -eq 0 ]; then - metrics_to_graphite "66.187.229.172" # Graphite server in rh1 - fi - elif [ "$OSINFRA" = "1" ] ; then - local i=2 - $TARCMD $HOME/*.log > $WORKSPACE/logs/primary_node.tar.xz - # Extract /var/log for easy viewing - tar xf $WORKSPACE/logs/primary_node.tar.xz -C $WORKSPACE/logs/ var/log etc --exclude=var/log/journal - # Clean out symlinks, because these seem to break reporting job results - find $WORKSPACE/logs/etc -type l | xargs -t rm -f - for ip in $(cat /etc/nodepool/sub_nodes_private); do - mkdir $WORKSPACE/logs/subnode-$i/ - ssh $SSH_OPTIONS -i /etc/nodepool/id_rsa $ip \ - sudo $TRIPLEO_ROOT/tripleo-ci/scripts/get_host_info.sh - ssh $SSH_OPTIONS -i /etc/nodepool/id_rsa $ip \ - sudo $TRIPLEO_ROOT/tripleo-ci/scripts/get_docker_logs.sh - ssh $SSH_OPTIONS -i /etc/nodepool/id_rsa $ip $JLOGCMD - ssh $SSH_OPTIONS -i /etc/nodepool/id_rsa $ip \ - $TARCMD > $WORKSPACE/logs/subnode-$i/subnode-$i.tar.xz - # Extract /var/log and /etc for easy viewing - tar xf $WORKSPACE/logs/subnode-$i/subnode-$i.tar.xz -C $WORKSPACE/logs/subnode-$i/ var/log etc --exclude=var/log/journal - # Clean out symlinks, because these seem to break reporting job results - find $WORKSPACE/logs/subnode-$i/etc -type l | xargs -t rm -f - # These files are causing the publish logs ansible - # task to fail with an rsync error: - # "symlink has no referent" - ssh $SSH_OPTIONS -i /etc/nodepool/id_rsa $ip \ - sudo rm -f /etc/sahara/rootwrap.d/sahara.filters - ssh $SSH_OPTIONS -i /etc/nodepool/id_rsa $ip \ - sudo rm -f /etc/cinder/rootwrap.d/os-brick.filters - - let i+=1 - done - fi - - sudo chown -R $USER $WORKSPACE - sudo find $WORKSPACE -type d -exec chmod 755 {} \; - # Make sure zuuls log gathering can read everything in the $WORKSPACE, it also contains a - # link to ml2_conf.ini so this also need to be made read only - sudo find /etc/neutron/plugins/ml2/ml2_conf.ini $WORKSPACE -type f | sudo xargs chmod 644 - - # record the size of the logs directory - # -L, --dereference dereference all symbolic links - du -L -ch $WORKSPACE/logs/* | sort -rh | head -n 200 &> $WORKSPACE/logs/log-size.txt || true - - return 0 -} - -function delorean_build_and_serve { - DELOREAN_BUILD_REFS=${DELOREAN_BUILD_REFS:-} - for PROJFULLREF in $ZUUL_CHANGES ; do - PROJ=$(filterref $PROJFULLREF) - # If ci is being run for a change to ci its ok not to have a ci produced repository - excluded_proj="tripleo-ci tripleo-quickstart tripleo-quickstart-extras puppet-openstack-integration grenade zuul-jobs" - if [[ " $excluded_proj " =~ " $PROJ " ]]; then - mkdir -p $TRIPLEO_ROOT/delorean/data/repos/current - touch $TRIPLEO_ROOT/delorean/data/repos/current/delorean-ci.repo - else - # Note we only add the project once for it to be built - if ! echo $DELOREAN_BUILD_REFS | egrep "( |^)$PROJ( |$)"; then - DELOREAN_BUILD_REFS="$DELOREAN_BUILD_REFS $PROJ" - fi - fi - done - - # Build packages - if [ -n "$DELOREAN_BUILD_REFS" ] ; then - $TRIPLEO_ROOT/tripleo-ci/scripts/tripleo.sh --delorean-build $DELOREAN_BUILD_REFS - fi - - # kill the http server if its already running - ps -ef | grep -i python | grep SimpleHTTPServer | awk '{print $2}' | xargs --no-run-if-empty kill -9 || true - pushd $TRIPLEO_ROOT/delorean/data/repos - sudo iptables -I INPUT -p tcp --dport 8766 -i eth1 -j ACCEPT - python -m SimpleHTTPServer 8766 1>$WORKSPACE/logs/yum_mirror.log 2>$WORKSPACE/logs/yum_mirror_error.log & - popd -} - -function dummy_ci_repo { - # If we have no ZUUL_CHANGES then this is a periodic job, we wont be - # building a ci repo, create a dummy one. - if [ -z "${ZUUL_CHANGES:-}" ] ; then - ZUUL_CHANGES=${ZUUL_CHANGES:-} - mkdir -p $TRIPLEO_ROOT/delorean/data/repos/current - touch $TRIPLEO_ROOT/delorean/data/repos/current/delorean-ci.repo - fi - ZUUL_CHANGES=${ZUUL_CHANGES//^/ } -} - function layer_ci_repo { # Find the path to the trunk repository used TRUNKREPOUSED=$(grep -Eo "[0-9a-z]{2}/[0-9a-z]{2}/[0-9a-z]{40}_[0-9a-z]+" /etc/yum.repos.d/delorean.repo) @@ -367,17 +98,3 @@ function echo_vars_to_deploy_env { echo "export ${role}_hosts=\"${hosts}\"" >> $TRIPLEO_ROOT/tripleo-ci/deploy.env done } - -function stop_dstat { - ps axjf | grep bin/dstat | grep -v grep | awk '{print $2;}' | sudo xargs -t -n 1 -r kill -} - -function item_in_array { - local item - for item in "${@:2}"; do - if [[ "$item" == "$1" ]]; then - return 0 - fi - done - return 1 -} diff --git a/scripts/get_docker_logs.sh b/scripts/get_docker_logs.sh deleted file mode 100755 index 69e0ca09b..000000000 --- a/scripts/get_docker_logs.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -BASE_DOCKER_EXTRA="/var/log/extra/docker" - -if command -v docker && systemctl is-active docker; then - mkdir -p $BASE_DOCKER_EXTRA - ALL_FILE=$BASE_DOCKER_EXTRA/docker_allinfo.log - DOCKER_INFO_CMDS=( - "docker ps --all --size" - "docker images" - "docker volume ls" - "docker stats --all --no-stream" - "docker info" - ) - for cmd in "${DOCKER_INFO_CMDS[@]}"; do - echo "+ $cmd" >> $ALL_FILE - $cmd >> $ALL_FILE - done - - for cont in $(docker ps | awk {'print $NF'} | grep -v NAMES); do - INFO_DIR=$BASE_DOCKER_EXTRA/containers/${cont} - mkdir -p $INFO_DIR - INFO_FILE=$INFO_DIR/docker_info.log - DOCKER_CONTAINER_INFO_CMDS=( - "docker top $cont auxw" - "docker exec $cont top -bwn1" - "docker inspect $cont" - ) - for cmd in "${DOCKER_CONTAINER_INFO_CMDS[@]}"; do - echo "+ $cmd" >> $INFO_FILE - $cmd >> $INFO_FILE - done - docker logs $cont > $INFO_DIR/stdout.log - docker cp $cont:/var/lib/kolla/config_files/config.json $INFO_DIR/config.json - # NOTE(flaper87): This should go away. Services should be - # using a `logs` volume - # Removing the following line as openstack infra needs our log size reduced - # docker cp $cont:/var/log $INFO_DIR/log - # NOTE(gfidente): Just copy Ceph - if docker exec --user root $cont stat /var/log/ceph > /dev/null; then - docker cp $cont:/var/log/ceph $INFO_DIR/log - fi - # Delete symlinks because they break log collection and are generally - # not useful - find $INFO_DIR -type l -delete - done - cp -r /var/lib/config-data/puppet-generated /var/log/config-data - - if [[ -d /var/lib/docker/volumes/logs/_data ]]; then - cp -r /var/lib/docker/volumes/logs/_data $BASE_DOCKER_EXTRA/logs - fi -fi diff --git a/scripts/get_host_info.sh b/scripts/get_host_info.sh deleted file mode 100755 index f59450596..000000000 --- a/scripts/get_host_info.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash - -sudo bash <<-EOF &> /var/log/host_info.txt -set -x -export PATH=\$PATH:/sbin -ps -eaufxZ -ls -Z /var/run/ -df -h -uptime -sudo netstat -lpn -sudo iptables-save -sudo ip6tables-save -sudo ovs-vsctl show -ip addr -ip route -ip -6 route -free -h -top -n 1 -b -o RES -rpm -qa | sort -yum repolist -v -sudo os-collect-config --print -which pcs &> /dev/null && sudo pcs status --full -which pcs &> /dev/null && sudo pcs constraint show --full -which pcs &> /dev/null && sudo pcs stonith show --full -which crm_verify &> /dev/null && sudo crm_verify -L -VVVVVV -which ceph &> /dev/null && sudo ceph status -sudo facter -find ~jenkins -iname tripleo-overcloud-passwords -execdir cat '{}' ';' -sudo systemctl list-units --full --all -EOF - -if [ -e ~/stackrc ] ; then - source ~/stackrc - - nova list | tee /tmp/nova-list.txt - openstack workflow list - openstack workflow execution list - # If there's no overcloud then there's no point in continuing - openstack stack show --no-resolve-outputs --format yaml overcloud || (echo 'No active overcloud found' && exit 0) - openstack stack resource list -n5 --format yaml overcloud - openstack stack event list overcloud - # --nested-depth 2 seems to get us a reasonable list of resources without - # taking an excessive amount of time - openstack stack event list --nested-depth 2 -f json overcloud | $TRIPLEO_ROOT/tripleo-ci/scripts/heat-deploy-times.py | tee /var/log/heat-deploy-times.log || echo 'Failed to process resource deployment times. This is expected for stable/liberty.' - # useful to see what failed when puppet fails - # NOTE(bnemec): openstack stack failures list only exists in Newton and above. - # On older releases we still need to manually query the deployments. - openstack stack failures list --long overcloud || for failed_deployment in $(heat resource-list --nested-depth 5 overcloud | grep FAILED | grep 'StructuredDeployment ' | cut -d '|' -f3); do heat deployment-show $failed_deployment; done; -fi