Rework the getthelogs helper script for wget recursive

The getthelogs script uses wget and parallel to fetch
some of the CI jobs logs. Update the README to encourage
contributors to improve the elastic-recheck detection rules
with the help of that script and being smart folks.

Change-Id: Ia48e45118776ca710ecd17421c5fb024ab436293
Signed-off-by: Bogdan Dobrelya <bdobreli@redhat.com>
This commit is contained in:
Bogdan Dobrelya 2017-08-09 17:29:46 +02:00
parent 6cf056cc68
commit eded2fdbd1
2 changed files with 69 additions and 47 deletions

View File

@ -17,6 +17,10 @@ Tools to help run CI jobs for TripleO. Includes things like:
* Heat templates to help deploy and maintain test environment nodes
using an undercloud.
* Helper script(s) to generate CI status reports. (tox -ecireport -- -f)
* Helper `getthelogs` script to download important job logs locally.
Then you may want to inspect the logs for known errors and contribute
discovered search patterns as the
`elastic-recheck queries <https://git.openstack.org/cgit/openstack-infra/elastic-recheck/tree/queries>`_.
OpenStack Infrastructure is deploying multiple jobs with different scenarios.

View File

@ -1,54 +1,72 @@
#!/bin/bash
set -eu -o pipefail
# Helper script for downloading tripleo-ci logs, it then prompts the users for
# file they want to download, unzips them into a tmp directory and changes into
# the tmp directory, while in the tmp directory run "getthelogs" with no params
# to download any log files you hadn't previously downloaded
# Run it like this
# getthelogs http://logs.openstack.org/68/237568/5/check-tripleo/gate-tripleo-ci-f22-ha/dd8f61d/
set -eu
BASEURL=$1
TDIR=${BASEURL//\//}
# We do not clean up this directory, so data doesn't need to be downloaded a second time
# if the script is rerun again with the same url
TDIR=~/tmp/ci-${TDIR//\//-}
mkdir -p $TDIR
cd $TDIR
echo $BASEURL > BASEURL
function _getfile(){
URL=$1
BASENAME=$(basename $1)
if [[ $BASENAME =~ .*(tar|console).* && ! -e $BASENAME ]] ; then
read -p "Want $BASENAME? " X
if [ "$X" = "y" ] ; then
curl -O $URL || curl -O ${URL}.gz
if [[ $BASENAME =~ .*(\.tar).* ]] ; then
mkdir ${BASENAME}_
tar -xf $BASENAME -C ${BASENAME}_
fi
fi
fi
function usage(){
echo "Helper script for downloading tripleo-ci jobs logs"
echo
echo "Example:"
echo "getthelogs http://logs.openstack.org/00/123456/7/check/gate-tripleo-ci-foo/d3adbeef"
echo
echo "Downloads the logs and starts a shell from the logs root directory"
}
FILES="$BASEURL/console.html"
for FILE in $(curl $BASEURL/logs/ 2> /dev/null | grep href | sed -e 's/.*href="\([^"]*\)".*/\1/g' ) ; do
FILES="$FILES $BASEURL/logs/$FILE"
function finish(){
rc=${rc:-$?}
trap - EXIT
cd $TDIR/../
echo "Download job exited ${rc}"
PS1="JOBLOGS ]\$ " bash --noprofile --norc
}
function get_dirs(){
local drop="\b(etc|ara|ara_oooq|docs|build|stackviz|sudoers.d|lib|config-data|extra)\b"
local directories=""
directories=$(curl -s "$1" 2> /dev/null | grep -E "\[DIR" | grep -vE "${drop}" | sed -e "s,.*href=\"\([^\"]*\)\".*,${1}\1,g")
if [ -n "$directories" ]; then
for d in $directories; do
directories="$directories $(get_dirs $d/)"
done
echo $directories
else
echo ""
fi
return 0
}
[[ "${1:--}" =~ ^\s+?- ]] && (usage; exit 1)
type -p wget 2>&1 >/dev/null || ( echo "Please install a wget tool!"; exit 127 )
trap finish EXIT SIGINT SIGTERM
WORKERS=6
BASEURL=${1%/}
SC=$(dirname $BASEURL | grep -o \/ | wc -w)
if [[ ! $(basename $BASEURL) == 'logs' && SC -le 7 ]]; then
console=$BASEURL/console.html
BASEURL=${BASEURL}/logs
else
console=''
fi
TDIR=${BASEURL##*http://}
TDIR=/tmp/${TDIR}
mkdir -p $TDIR
cd /tmp
echo "Target dir for download: $TDIR"
echo Will download logs from the following URLs:
list_to_get="$console $(get_dirs $BASEURL/)"
for d in $list_to_get; do
echo $d
done
function getthelogs(){
for FILE in $FILES ; do
_getfile $FILE
done
}
getthelogs
rm -f wget-jobs.txt
for d in $list_to_get; do
args="\"-nv -nc --no-use-server-timestamps \
--accept-regex='\.txt\.gz$|console\.htm[l]?$|messages$' \
--reject='index.html*' \
--recursive -l 10 --domains logs.openstack.org --no-parent \
-erobots=off --wait 0.25 ${d}\""
echo "${args}" >> wget-jobs.txt
done
export FILES
export -f getthelogs _getfile
PS1="JOBLOGS ]\$ " bash
cat wget-jobs.txt | sed -n '{p;p}' | shuf > wget-jobs-shuf.txt
cat wget-jobs-shuf.txt | xargs -r -n1 -P ${WORKERS} -I{} sh -c "wget {}"