Rework the getthelogs helper script for wget recursive

The getthelogs script uses wget and parallel to fetch
some of the CI jobs logs. Update the README to encourage
contributors to improve the elastic-recheck detection rules
with the help of that script and being smart folks.

Change-Id: Ia48e45118776ca710ecd17421c5fb024ab436293
Signed-off-by: Bogdan Dobrelya <bdobreli@redhat.com>
This commit is contained in:
Bogdan Dobrelya 2017-08-09 17:29:46 +02:00
parent 6cf056cc68
commit eded2fdbd1
2 changed files with 69 additions and 47 deletions

View File

@ -17,6 +17,10 @@ Tools to help run CI jobs for TripleO. Includes things like:
* Heat templates to help deploy and maintain test environment nodes * Heat templates to help deploy and maintain test environment nodes
using an undercloud. using an undercloud.
* Helper script(s) to generate CI status reports. (tox -ecireport -- -f) * Helper script(s) to generate CI status reports. (tox -ecireport -- -f)
* Helper `getthelogs` script to download important job logs locally.
Then you may want to inspect the logs for known errors and contribute
discovered search patterns as the
`elastic-recheck queries <https://git.openstack.org/cgit/openstack-infra/elastic-recheck/tree/queries>`_.
OpenStack Infrastructure is deploying multiple jobs with different scenarios. OpenStack Infrastructure is deploying multiple jobs with different scenarios.

View File

@ -1,54 +1,72 @@
#!/bin/bash #!/bin/bash
set -eu -o pipefail
# Helper script for downloading tripleo-ci logs, it then prompts the users for function usage(){
# file they want to download, unzips them into a tmp directory and changes into echo "Helper script for downloading tripleo-ci jobs logs"
# the tmp directory, while in the tmp directory run "getthelogs" with no params echo
# to download any log files you hadn't previously downloaded echo "Example:"
# Run it like this echo "getthelogs http://logs.openstack.org/00/123456/7/check/gate-tripleo-ci-foo/d3adbeef"
# getthelogs http://logs.openstack.org/68/237568/5/check-tripleo/gate-tripleo-ci-f22-ha/dd8f61d/ echo
echo "Downloads the logs and starts a shell from the logs root directory"
}
set -eu function finish(){
rc=${rc:-$?}
trap - EXIT
cd $TDIR/../
echo "Download job exited ${rc}"
PS1="JOBLOGS ]\$ " bash --noprofile --norc
}
BASEURL=$1 function get_dirs(){
TDIR=${BASEURL//\//} local drop="\b(etc|ara|ara_oooq|docs|build|stackviz|sudoers.d|lib|config-data|extra)\b"
local directories=""
directories=$(curl -s "$1" 2> /dev/null | grep -E "\[DIR" | grep -vE "${drop}" | sed -e "s,.*href=\"\([^\"]*\)\".*,${1}\1,g")
if [ -n "$directories" ]; then
for d in $directories; do
directories="$directories $(get_dirs $d/)"
done
echo $directories
else
echo ""
fi
return 0
}
# We do not clean up this directory, so data doesn't need to be downloaded a second time [[ "${1:--}" =~ ^\s+?- ]] && (usage; exit 1)
# if the script is rerun again with the same url type -p wget 2>&1 >/dev/null || ( echo "Please install a wget tool!"; exit 127 )
TDIR=~/tmp/ci-${TDIR//\//-} trap finish EXIT SIGINT SIGTERM
WORKERS=6
BASEURL=${1%/}
SC=$(dirname $BASEURL | grep -o \/ | wc -w)
if [[ ! $(basename $BASEURL) == 'logs' && SC -le 7 ]]; then
console=$BASEURL/console.html
BASEURL=${BASEURL}/logs
else
console=''
fi
TDIR=${BASEURL##*http://}
TDIR=/tmp/${TDIR}
mkdir -p $TDIR mkdir -p $TDIR
cd $TDIR cd /tmp
echo $BASEURL > BASEURL echo "Target dir for download: $TDIR"
echo Will download logs from the following URLs:
function _getfile(){ list_to_get="$console $(get_dirs $BASEURL/)"
URL=$1 for d in $list_to_get; do
BASENAME=$(basename $1) echo $d
if [[ $BASENAME =~ .*(tar|console).* && ! -e $BASENAME ]] ; then
read -p "Want $BASENAME? " X
if [ "$X" = "y" ] ; then
curl -O $URL || curl -O ${URL}.gz
if [[ $BASENAME =~ .*(\.tar).* ]] ; then
mkdir ${BASENAME}_
tar -xf $BASENAME -C ${BASENAME}_
fi
fi
fi
}
FILES="$BASEURL/console.html"
for FILE in $(curl $BASEURL/logs/ 2> /dev/null | grep href | sed -e 's/.*href="\([^"]*\)".*/\1/g' ) ; do
FILES="$FILES $BASEURL/logs/$FILE"
done done
function getthelogs(){ rm -f wget-jobs.txt
for FILE in $FILES ; do for d in $list_to_get; do
_getfile $FILE args="\"-nv -nc --no-use-server-timestamps \
--accept-regex='\.txt\.gz$|console\.htm[l]?$|messages$' \
--reject='index.html*' \
--recursive -l 10 --domains logs.openstack.org --no-parent \
-erobots=off --wait 0.25 ${d}\""
echo "${args}" >> wget-jobs.txt
done done
}
getthelogs
export FILES cat wget-jobs.txt | sed -n '{p;p}' | shuf > wget-jobs-shuf.txt
export -f getthelogs _getfile cat wget-jobs-shuf.txt | xargs -r -n1 -P ${WORKERS} -I{} sh -c "wget {}"
PS1="JOBLOGS ]\$ " bash