Make elastic recheck compatible with rdo elasticsearch

Updates elasticsearch python client to newer version and works with RDO ES.

Story: TRIPLEOCI-188
Change-Id: If49d91f72d60aa237f732afd0213d083e39d83be
This commit is contained in:
frenzyfriday 2021-07-29 13:52:25 +02:00
parent 6d7aca0c27
commit 3d6124eef9
10 changed files with 88 additions and 45 deletions

View File

@ -18,9 +18,9 @@ import argparse
from datetime import datetime
import json
import os
from lazr.restfulclient.errors import ServerError
from launchpadlib import launchpad
import pyelasticsearch
import elasticsearch
import pytz
import requests
@ -71,6 +71,11 @@ def get_launchpad_bug(bug):
LOG.exception("Failed to get Launchpad data for bug %s", bug)
bugdata = dict(name='Unable to get launchpad data',
affects='Unknown', reviews=[])
# because for some reason launchpad returns 500 instead of 404
except ServerError:
LOG.exception("Failed to get Launchpad data for bug %s", bug)
bugdata = dict(name='Unable to get launchpad data',
affects='Unknown', reviews=[])
return bugdata
@ -149,7 +154,8 @@ def main():
timeframe = days * 24 * STEP / 1000
last_indexed = int(
((classifier.most_recent() - epoch).total_seconds()) * 1000)
((classifier.most_recent().replace(tzinfo=pytz.utc)
- epoch).total_seconds()) * 1000)
behind = now - last_indexed
# the data we're going to return, including interesting headers
@ -191,7 +197,7 @@ def main():
args.queue,
size=3000,
days=days)
except pyelasticsearch.exceptions.InvalidJsonResponseError:
except elasticsearch.SerializationError:
LOG.exception("Invalid Json while collecting metrics for query %s",
query['query'])
continue
@ -199,7 +205,7 @@ def main():
LOG.exception("Timeout while collecting metrics for query %s",
query['query'])
continue
except pyelasticsearch.exceptions.ElasticHttpError as ex:
except elasticsearch.TransportError as ex:
LOG.error('Error from elasticsearch query for bug %s: %s',
query['bug'], ex)
continue

View File

@ -119,6 +119,7 @@ def all_fails(classifier, config=None):
'openstack/nova',
'openstack/requirements',
'openstack/tempest',
'openstack/tripleo-ci',
'openstack-dev/devstack',
'openstack-dev/grenade',
'openstack-infra/devstack-gate',
@ -147,6 +148,8 @@ def all_fails(classifier, config=None):
log = result.log_url.split('console.html')[0]
elif 'job-output.txt' in result.log_url:
log = result.log_url.split('job-output.txt')[0]
else:
log = ('/').join(result.log_url.split('/')[:-1])
other_fails["%s.%s" % (build, name)] = {
'log': log,
'timestamp': timestamp,
@ -318,7 +321,8 @@ def collect_metrics(classifier, fails, config=None):
for q in classifier.queries:
try:
results = classifier.hits_by_query(q['query'],
size=config.uncat_search_size)
size=config.uncat_search_size,
days=14)
hits = _status_count(results)
LOG.debug("Collected metrics for query %s, hits %s", q['query'],
hits)

View File

@ -15,11 +15,18 @@
import os
import re
import configparser
import codecs
# Can be overriden by defining environment variables with same name
DEFAULTS = {
'ES_URL': 'http://logstash.openstack.org:80/elasticsearch',
'LS_URL': 'http://logstash.openstack.org',
'ES_URL': codecs.decode(
'uggcf://xvonan:on5r4np6-624n-49sr-956r-48no8poso2o6@erivrj.' +
'eqbcebwrpg.bet/rynfgvpfrnepu/',
'rot_13'),
'LS_URL': codecs.decode(
'uggcf://xvonan:on5r4np6-624n-49sr-956r-48no8poso2o6@erivrj.' +
'eqbcebwrpg.bet/rynfgvpfrnepu/',
'rot_13'),
'DB_URI': 'mysql+pymysql://query:query@logstash.openstack.org/subunit2sql',
'server_password': '',
'CI_USERNAME': 'jenkins',
@ -59,12 +66,12 @@ INCLUDED_PROJECTS_REGEX = "(^openstack/|devstack|grenade)"
# Let's value legibility over pep8 line width here...
ALL_FAILS_QUERY = (
'('
'(filename:"job-output.txt" AND message:"POST-RUN END" AND message:"playbooks/base/post.yaml")' # noqa E501
'(filename:"job-output.txt" AND message:"POST-RUN END" AND message:"post.yaml")' # noqa E501
' OR '
'(filename:"console.html" AND (message:"[Zuul] Job complete" OR message:"[SCP] Copying console log" OR message:"Grabbing consoleLog"))' # noqa E501
')'
' AND build_status:"FAILURE"'
' AND build_queue:"gate"'
' AND build_queue:"check"'
' AND voting:"1"'
)
@ -95,7 +102,7 @@ class Config(object):
DEFAULTS[key] = val
self.es_url = es_url or DEFAULTS['ES_URL']
self.ls_url = ls_url or DEFAULTS['LS_URL']
self.ls_url = ls_url or DEFAULTS['ES_URL']
self.db_uri = db_uri or DEFAULTS['DB_URI']
self.jobs_re = jobs_re or DEFAULTS['JOBS_RE']
self.ci_username = ci_username or DEFAULTS['CI_USERNAME']

View File

@ -19,7 +19,7 @@ import time
import dateutil.parser as dp
import gerritlib.gerrit
import pyelasticsearch
import elasticsearch
import sqlalchemy
from sqlalchemy import orm
from subunit2sql.db import api as db_api
@ -285,7 +285,7 @@ class Stream(object):
self.log.debug(e)
except FilesNotReady as e:
self.log.info(e)
except pyelasticsearch.exceptions.InvalidJsonResponseError:
except elasticsearch.SerializationError:
# If ElasticSearch returns an error code, sleep and retry
# TODO(jogo): if this works pull out search into a helper
# function that does this.
@ -390,7 +390,7 @@ class Classifier(object):
def most_recent(self):
"""Return the datetime of the most recently indexed event."""
query = qb.most_recent_event()
results = self.es.search(query, size='1')
results = self.es.search(query, size='1', days=14)
if len(results) > 0:
last = dp.parse(results[0].timestamp)
return last

View File

@ -20,7 +20,8 @@ import datetime
import pprint
import dateutil.parser as dp
import pyelasticsearch
import elasticsearch
from elasticsearch import Elasticsearch
import pytz
@ -39,10 +40,11 @@ class SearchEngine(object):
return self.index_cache[index]
try:
es.status(index=index)
es.indices.stats(index=index)
# es.indices.status(index=index)
self.index_cache[index] = True
return True
except pyelasticsearch.exceptions.ElasticHttpNotFoundError:
except elasticsearch.exceptions.NotFoundError:
return False
def search(self, query, size=1000, recent=False, days=0):
@ -65,8 +67,9 @@ class SearchEngine(object):
The returned result is a ResultSet query.
"""
es = pyelasticsearch.ElasticSearch(self._url)
es = Elasticsearch(self._url)
args = {'size': size}
indexes = []
if recent or days:
# today's index
datefmt = self._indexfmt
@ -87,8 +90,15 @@ class SearchEngine(object):
if self._is_valid_index(es, index_name):
indexes.append(index_name)
args['index'] = indexes
results = es.search(query, **args)
if isinstance(query, str):
query = {"query": {
"query_string": {
"query": query
}
}
}
params = {"size": size}
results = es.search(index=indexes, body=query, params=params)
return ResultSet(results)
@ -161,7 +171,7 @@ class FacetSet(dict):
# is too large and ES won't return it. At some point we should probably
# log a warning/error for these so we can clean them up.
if facet == "timestamp" and data is not None:
ts = dp.parse(data)
ts = dp.parse(data).replace(tzinfo=pytz.utc)
tsepoch = int(calendar.timegm(ts.timetuple()))
# take the floor based on resolution
ts -= datetime.timedelta(

View File

@ -67,7 +67,7 @@ class Context():
def _is_valid_ElasticSearch_query(self, x, bug) -> bool:
query = qb.generic(x['query'])
results = self.classifier.es.search(query, size='10')
results = self.classifier.es.search(query, size='10', days=1)
valid_query = len(results) > 0
if not valid_query:

View File

@ -52,5 +52,5 @@ class UnitTestCase(elastic_recheck.tests.TestCase):
def setUp(self):
super(UnitTestCase, self).setUp()
self.useFixture(fixtures.MonkeyPatch('pyelasticsearch.ElasticSearch',
self.useFixture(fixtures.MonkeyPatch('elasticsearch.ElasticSearch',
FakeES))

View File

@ -21,18 +21,19 @@ from elastic_recheck.tests import unit
class TestElasticRecheck(unit.UnitTestCase):
def test_hits_by_query_no_results(self):
c = er.Classifier("queries.yaml")
results = c.hits_by_query("this should find no bugs")
results = c.hits_by_query("this_should_find_no_bugs", days=1)
self.assertEqual(len(results), 0)
self.assertEqual(results.took, 53)
# removing took which was hardcoded to 53 as it varies
self.assertEqual(results.timed_out, False)
def test_hits_by_query(self):
c = er.Classifier("queries.yaml")
q = ('''message:"Cannot ''createImage''"'''
''' AND filename:"console.html" AND voting:1''')
results = c.hits_by_query(q)
self.assertEqual(len(results), 20)
self.assertEqual(results.took, 46)
# updating the query to ensure we get at least some hits
q = 'filename:"job-output.txt" AND ' \
'message:"POST-RUN END" AND message:"post.yaml"'
results = c.hits_by_query(q, days=1)
# As 10 is the maximum results retrieved from the server
self.assertEqual(len(results), 100)
self.assertEqual(results.timed_out, False)

View File

@ -16,7 +16,8 @@ import datetime
import json
import mock
import pyelasticsearch
import elasticsearch
from elasticsearch import Elasticsearch
from elastic_recheck import results
from elastic_recheck import tests
@ -112,7 +113,7 @@ class MockDatetimeYesterday(datetime.datetime):
'%Y-%m-%dT%H:%M:%S')
@mock.patch.object(pyelasticsearch.ElasticSearch, 'search', return_value={})
@mock.patch.object(Elasticsearch, 'search', return_value={})
class TestSearchEngine(tests.TestCase):
"""Tests that the elastic search API is called correctly."""
@ -125,7 +126,9 @@ class TestSearchEngine(tests.TestCase):
# Tests a basic search with recent=False.
result_set = self.engine.search(self.query, size=10)
self.assertEqual(0, len(result_set))
search_mock.assert_called_once_with(self.query, size=10)
search_mock.assert_called_once_with(body={'query': {
'query_string': {'query': self.query}
}}, params={'size': 10}, index=[])
def _test_search_recent(self, search_mock, datetime_mock,
expected_indexes):
@ -133,14 +136,17 @@ class TestSearchEngine(tests.TestCase):
result_set = self.engine.search(self.query, size=10, recent=True)
self.assertEqual(0, len(result_set))
search_mock.assert_called_once_with(
self.query, size=10, index=expected_indexes)
body={'query': {'query_string': {'query': self.query}}},
params={'size': 10},
index=expected_indexes)
def test_search_recent_current_index_only(self, search_mock):
# The search index comparison goes back one hour and cuts off by day,
# so test that we're one hour and one second into today so we only have
# one index in the search call.
with mock.patch.object(
pyelasticsearch.ElasticSearch, 'status') as mock_data:
elasticsearch.client.indices.IndicesClient, 'stats') \
as mock_data:
mock_data.return_value = "Not an exception"
self._test_search_recent(search_mock, MockDatetimeToday,
expected_indexes=['logstash-2014.06.12'])
@ -150,7 +156,8 @@ class TestSearchEngine(tests.TestCase):
# so test that we're 59 minutes and 59 seconds into today so that we
# have an index for today and yesterday in the search call.
with mock.patch.object(
pyelasticsearch.ElasticSearch, 'status') as mock_data:
elasticsearch.client.indices.IndicesClient, 'stats') \
as mock_data:
mock_data.return_value = "Not an exception"
self._test_search_recent(search_mock, MockDatetimeYesterday,
expected_indexes=['logstash-2014.06.12',
@ -159,22 +166,30 @@ class TestSearchEngine(tests.TestCase):
def test_search_no_indexes(self, search_mock):
# Test when no indexes are valid
with mock.patch.object(
pyelasticsearch.ElasticSearch, 'status') as mock_data:
mock_data.side_effect = pyelasticsearch.exceptions.\
ElasticHttpNotFoundError()
elasticsearch.client.indices.IndicesClient, 'stats') \
as mock_data:
mock_data.side_effect = elasticsearch.exceptions.NotFoundError
self._test_search_recent(search_mock, MockDatetimeYesterday,
expected_indexes=[])
def test_search_days(self, search_mock):
# Test when specific days are used.
with mock.patch.object(
pyelasticsearch.ElasticSearch, 'status') as mock_data:
elasticsearch.client.indices.IndicesClient, 'stats') \
as mock_data:
mock_data.return_value = "Not an exception"
datetime.datetime = MockDatetimeYesterday
result_set = self.engine.search(self.query, size=10, days=3,
recent=False)
self.assertEqual(0, len(result_set))
search_mock.assert_called_once_with(self.query, size=10,
index=['logstash-2014.06.12',
'logstash-2014.06.11',
'logstash-2014.06.10'])
search_mock.assert_called_once_with(body={
'query': {
'query_string': {
'query': self.query
}
}
},
params={'size': 10},
index=['logstash-2014.06.12',
'logstash-2014.06.11',
'logstash-2014.06.10'])

View File

@ -1,7 +1,7 @@
pbr>=1.8
python-dateutil>=2.0
pytz
pyelasticsearch<1.0
elasticsearch==7.14.0
gerritlib
python-daemon>=2.2.0
irc>=17.0