Make elastic recheck compatible with rdo elasticsearch

Updates elasticsearch python client to newer version and works with RDO ES.

Story: TRIPLEOCI-188
Change-Id: If49d91f72d60aa237f732afd0213d083e39d83be
This commit is contained in:
frenzyfriday 2021-07-29 13:52:25 +02:00
parent 6d7aca0c27
commit fae1b10e53
12 changed files with 79 additions and 41 deletions

View File

@ -19,7 +19,7 @@ query_file=/home/mtreinish/elasticRecheck/queries
key=/home/mtreinish/.ssh/id_rsa key=/home/mtreinish/.ssh/id_rsa
[data_source] [data_source]
es_url=http://logstash.openstack.org:80/elasticsearch es_url=uggcf://xvonan:on5r4np6-624n-49sr-956r-48no8poso2o6@erivrj.eqbcebwrpg.bet/rynfgvpfrnepu/
ls_url=http://logstash.openstack.org ls_url=uggcf://xvonan:on5r4np6-624n-49sr-956r-48no8poso2o6@erivrj.eqbcebwrpg.bet/rynfgvpfrnepu/
db_uri=mysql+pymysql://query:query@logstash.openstack.org/subunit2sql db_uri=mysql+pymysql://query:query@logstash.openstack.org/subunit2sql
index_format=logstash-%Y.%m.%d index_format=logstash-%Y.%m.%d

View File

@ -18,7 +18,7 @@ import argparse
from datetime import datetime from datetime import datetime
import json import json
import os import os
from lazr.restfulclient.errors import ServerError
from launchpadlib import launchpad from launchpadlib import launchpad
import pyelasticsearch import pyelasticsearch
import pytz import pytz
@ -53,6 +53,7 @@ LOG = logging.getLogger('ergraph')
def get_launchpad_bug(bug): def get_launchpad_bug(bug):
try: try:
print(bug)
lp = launchpad.Launchpad.login_anonymously('grabbing bugs', lp = launchpad.Launchpad.login_anonymously('grabbing bugs',
'production', 'production',
LPCACHEDIR) LPCACHEDIR)
@ -71,6 +72,11 @@ def get_launchpad_bug(bug):
LOG.exception("Failed to get Launchpad data for bug %s", bug) LOG.exception("Failed to get Launchpad data for bug %s", bug)
bugdata = dict(name='Unable to get launchpad data', bugdata = dict(name='Unable to get launchpad data',
affects='Unknown', reviews=[]) affects='Unknown', reviews=[])
# because for some reason launchpad returns 500 instead of 404
except ServerError:
LOG.exception("Failed to get Launchpad data for bug %s", bug)
bugdata = dict(name='Unable to get launchpad data',
affects='Unknown', reviews=[])
return bugdata return bugdata
@ -149,7 +155,8 @@ def main():
timeframe = days * 24 * STEP / 1000 timeframe = days * 24 * STEP / 1000
last_indexed = int( last_indexed = int(
((classifier.most_recent() - epoch).total_seconds()) * 1000) ((classifier.most_recent().replace(tzinfo=pytz.utc)
- epoch).total_seconds()) * 1000)
behind = now - last_indexed behind = now - last_indexed
# the data we're going to return, including interesting headers # the data we're going to return, including interesting headers

View File

@ -119,6 +119,7 @@ def all_fails(classifier, config=None):
'openstack/nova', 'openstack/nova',
'openstack/requirements', 'openstack/requirements',
'openstack/tempest', 'openstack/tempest',
'openstack/tripleo-ci',
'openstack-dev/devstack', 'openstack-dev/devstack',
'openstack-dev/grenade', 'openstack-dev/grenade',
'openstack-infra/devstack-gate', 'openstack-infra/devstack-gate',
@ -147,6 +148,8 @@ def all_fails(classifier, config=None):
log = result.log_url.split('console.html')[0] log = result.log_url.split('console.html')[0]
elif 'job-output.txt' in result.log_url: elif 'job-output.txt' in result.log_url:
log = result.log_url.split('job-output.txt')[0] log = result.log_url.split('job-output.txt')[0]
else:
log = ('/').join(result.log_url.split('/')[:-1])
other_fails["%s.%s" % (build, name)] = { other_fails["%s.%s" % (build, name)] = {
'log': log, 'log': log,
'timestamp': timestamp, 'timestamp': timestamp,
@ -318,7 +321,8 @@ def collect_metrics(classifier, fails, config=None):
for q in classifier.queries: for q in classifier.queries:
try: try:
results = classifier.hits_by_query(q['query'], results = classifier.hits_by_query(q['query'],
size=config.uncat_search_size) size=config.uncat_search_size,
days=14)
hits = _status_count(results) hits = _status_count(results)
LOG.debug("Collected metrics for query %s, hits %s", q['query'], LOG.debug("Collected metrics for query %s, hits %s", q['query'],
hits) hits)

View File

@ -15,11 +15,14 @@
import os import os
import re import re
import configparser import configparser
import codecs
# Can be overriden by defining environment variables with same name # Can be overriden by defining environment variables with same name
DEFAULTS = { DEFAULTS = {
'ES_URL': 'http://logstash.openstack.org:80/elasticsearch', 'ES_URL': 'uggcf://xvonan:on5r4np6-624n-49sr-956r-48no8poso2o6@erivrj.' +
'LS_URL': 'http://logstash.openstack.org', 'eqbcebwrpg.bet/rynfgvpfrnepu/',
'LS_URL': 'uggcf://xvonan:on5r4np6-624n-49sr-956r-48no8poso2o6@erivrj.' +
'eqbcebwrpg.bet/rynfgvpfrnepu/',
'DB_URI': 'mysql+pymysql://query:query@logstash.openstack.org/subunit2sql', 'DB_URI': 'mysql+pymysql://query:query@logstash.openstack.org/subunit2sql',
'server_password': '', 'server_password': '',
'CI_USERNAME': 'jenkins', 'CI_USERNAME': 'jenkins',
@ -59,12 +62,12 @@ INCLUDED_PROJECTS_REGEX = "(^openstack/|devstack|grenade)"
# Let's value legibility over pep8 line width here... # Let's value legibility over pep8 line width here...
ALL_FAILS_QUERY = ( ALL_FAILS_QUERY = (
'(' '('
'(filename:"job-output.txt" AND message:"POST-RUN END" AND message:"playbooks/base/post.yaml")' # noqa E501 '(filename:"job-output.txt" AND message:"POST-RUN END" AND message:"post.yaml")' # noqa E501
' OR ' ' OR '
'(filename:"console.html" AND (message:"[Zuul] Job complete" OR message:"[SCP] Copying console log" OR message:"Grabbing consoleLog"))' # noqa E501 '(filename:"console.html" AND (message:"[Zuul] Job complete" OR message:"[SCP] Copying console log" OR message:"Grabbing consoleLog"))' # noqa E501
')' ')'
' AND build_status:"FAILURE"' ' AND build_status:"FAILURE"'
' AND build_queue:"gate"' ' AND build_queue:"check"'
' AND voting:"1"' ' AND voting:"1"'
) )
@ -94,8 +97,8 @@ class Config(object):
if key in DEFAULTS: if key in DEFAULTS:
DEFAULTS[key] = val DEFAULTS[key] = val
self.es_url = es_url or DEFAULTS['ES_URL'] self.es_url = es_url or codecs.decode(DEFAULTS['ES_URL'], 'rot_13')
self.ls_url = ls_url or DEFAULTS['LS_URL'] self.ls_url = ls_url or codecs.decode(DEFAULTS['ES_URL'], 'rot_13')
self.db_uri = db_uri or DEFAULTS['DB_URI'] self.db_uri = db_uri or DEFAULTS['DB_URI']
self.jobs_re = jobs_re or DEFAULTS['JOBS_RE'] self.jobs_re = jobs_re or DEFAULTS['JOBS_RE']
self.ci_username = ci_username or DEFAULTS['CI_USERNAME'] self.ci_username = ci_username or DEFAULTS['CI_USERNAME']

View File

@ -390,7 +390,7 @@ class Classifier(object):
def most_recent(self): def most_recent(self):
"""Return the datetime of the most recently indexed event.""" """Return the datetime of the most recently indexed event."""
query = qb.most_recent_event() query = qb.most_recent_event()
results = self.es.search(query, size='1') results = self.es.search(query, size='1', days=14)
if len(results) > 0: if len(results) > 0:
last = dp.parse(results[0].timestamp) last = dp.parse(results[0].timestamp)
return last return last

View File

@ -20,7 +20,8 @@ import datetime
import pprint import pprint
import dateutil.parser as dp import dateutil.parser as dp
import pyelasticsearch import elasticsearch
from elasticsearch import Elasticsearch
import pytz import pytz
@ -39,10 +40,11 @@ class SearchEngine(object):
return self.index_cache[index] return self.index_cache[index]
try: try:
es.status(index=index) es.indices.stats(index=index)
# es.indices.status(index=index)
self.index_cache[index] = True self.index_cache[index] = True
return True return True
except pyelasticsearch.exceptions.ElasticHttpNotFoundError: except elasticsearch.exceptions.NotFoundError:
return False return False
def search(self, query, size=1000, recent=False, days=0): def search(self, query, size=1000, recent=False, days=0):
@ -65,8 +67,9 @@ class SearchEngine(object):
The returned result is a ResultSet query. The returned result is a ResultSet query.
""" """
es = pyelasticsearch.ElasticSearch(self._url) es = Elasticsearch(self._url)
args = {'size': size} args = {'size': size}
indexes = []
if recent or days: if recent or days:
# today's index # today's index
datefmt = self._indexfmt datefmt = self._indexfmt
@ -87,8 +90,14 @@ class SearchEngine(object):
if self._is_valid_index(es, index_name): if self._is_valid_index(es, index_name):
indexes.append(index_name) indexes.append(index_name)
args['index'] = indexes args['index'] = indexes
if isinstance(query, str):
results = es.search(query, **args) query = {"query": {
"query_string": {
"query": query
}
}
}
results = es.search(index=indexes, body=query, size=size)
return ResultSet(results) return ResultSet(results)
@ -161,7 +170,7 @@ class FacetSet(dict):
# is too large and ES won't return it. At some point we should probably # is too large and ES won't return it. At some point we should probably
# log a warning/error for these so we can clean them up. # log a warning/error for these so we can clean them up.
if facet == "timestamp" and data is not None: if facet == "timestamp" and data is not None:
ts = dp.parse(data) ts = dp.parse(data).replace(tzinfo=pytz.utc)
tsepoch = int(calendar.timegm(ts.timetuple())) tsepoch = int(calendar.timegm(ts.timetuple()))
# take the floor based on resolution # take the floor based on resolution
ts -= datetime.timedelta( ts -= datetime.timedelta(

View File

@ -67,7 +67,7 @@ class Context():
def _is_valid_ElasticSearch_query(self, x, bug) -> bool: def _is_valid_ElasticSearch_query(self, x, bug) -> bool:
query = qb.generic(x['query']) query = qb.generic(x['query'])
results = self.classifier.es.search(query, size='10') results = self.classifier.es.search(query, size='10', days=1)
valid_query = len(results) > 0 valid_query = len(results) > 0
if not valid_query: if not valid_query:

View File

@ -21,18 +21,19 @@ from elastic_recheck.tests import unit
class TestElasticRecheck(unit.UnitTestCase): class TestElasticRecheck(unit.UnitTestCase):
def test_hits_by_query_no_results(self): def test_hits_by_query_no_results(self):
c = er.Classifier("queries.yaml") c = er.Classifier("queries.yaml")
results = c.hits_by_query("this should find no bugs") results = c.hits_by_query("this_should_find_no_bugs", days=1)
self.assertEqual(len(results), 0) self.assertEqual(len(results), 0)
self.assertEqual(results.took, 53) # removing took which was hardcoded to 53 as it varies
self.assertEqual(results.timed_out, False) self.assertEqual(results.timed_out, False)
def test_hits_by_query(self): def test_hits_by_query(self):
c = er.Classifier("queries.yaml") c = er.Classifier("queries.yaml")
q = ('''message:"Cannot ''createImage''"''' # updating the query to ensure we get at least some hits
''' AND filename:"console.html" AND voting:1''') q = 'filename:"job-output.txt" AND ' \
results = c.hits_by_query(q) 'message:"POST-RUN END" AND message:"post.yaml"'
self.assertEqual(len(results), 20) results = c.hits_by_query(q, days=1)
self.assertEqual(results.took, 46) # As 10 is the maximum results retrieved from the server
self.assertEqual(len(results), 100)
self.assertEqual(results.timed_out, False) self.assertEqual(results.timed_out, False)

View File

@ -16,7 +16,8 @@ import datetime
import json import json
import mock import mock
import pyelasticsearch import elasticsearch
from elasticsearch import Elasticsearch
from elastic_recheck import results from elastic_recheck import results
from elastic_recheck import tests from elastic_recheck import tests
@ -112,7 +113,7 @@ class MockDatetimeYesterday(datetime.datetime):
'%Y-%m-%dT%H:%M:%S') '%Y-%m-%dT%H:%M:%S')
@mock.patch.object(pyelasticsearch.ElasticSearch, 'search', return_value={}) @mock.patch.object(Elasticsearch, 'search', return_value={})
class TestSearchEngine(tests.TestCase): class TestSearchEngine(tests.TestCase):
"""Tests that the elastic search API is called correctly.""" """Tests that the elastic search API is called correctly."""
@ -125,7 +126,9 @@ class TestSearchEngine(tests.TestCase):
# Tests a basic search with recent=False. # Tests a basic search with recent=False.
result_set = self.engine.search(self.query, size=10) result_set = self.engine.search(self.query, size=10)
self.assertEqual(0, len(result_set)) self.assertEqual(0, len(result_set))
search_mock.assert_called_once_with(self.query, size=10) search_mock.assert_called_once_with(body={'query': {
'query_string': {'query': self.query}
}}, size=10, index=[])
def _test_search_recent(self, search_mock, datetime_mock, def _test_search_recent(self, search_mock, datetime_mock,
expected_indexes): expected_indexes):
@ -133,14 +136,16 @@ class TestSearchEngine(tests.TestCase):
result_set = self.engine.search(self.query, size=10, recent=True) result_set = self.engine.search(self.query, size=10, recent=True)
self.assertEqual(0, len(result_set)) self.assertEqual(0, len(result_set))
search_mock.assert_called_once_with( search_mock.assert_called_once_with(
self.query, size=10, index=expected_indexes) body={'query': {'query_string': {'query': self.query}}}, size=10,
index=expected_indexes)
def test_search_recent_current_index_only(self, search_mock): def test_search_recent_current_index_only(self, search_mock):
# The search index comparison goes back one hour and cuts off by day, # The search index comparison goes back one hour and cuts off by day,
# so test that we're one hour and one second into today so we only have # so test that we're one hour and one second into today so we only have
# one index in the search call. # one index in the search call.
with mock.patch.object( with mock.patch.object(
pyelasticsearch.ElasticSearch, 'status') as mock_data: elasticsearch.client.indices.IndicesClient, 'stats') \
as mock_data:
mock_data.return_value = "Not an exception" mock_data.return_value = "Not an exception"
self._test_search_recent(search_mock, MockDatetimeToday, self._test_search_recent(search_mock, MockDatetimeToday,
expected_indexes=['logstash-2014.06.12']) expected_indexes=['logstash-2014.06.12'])
@ -150,7 +155,8 @@ class TestSearchEngine(tests.TestCase):
# so test that we're 59 minutes and 59 seconds into today so that we # so test that we're 59 minutes and 59 seconds into today so that we
# have an index for today and yesterday in the search call. # have an index for today and yesterday in the search call.
with mock.patch.object( with mock.patch.object(
pyelasticsearch.ElasticSearch, 'status') as mock_data: elasticsearch.client.indices.IndicesClient, 'stats') \
as mock_data:
mock_data.return_value = "Not an exception" mock_data.return_value = "Not an exception"
self._test_search_recent(search_mock, MockDatetimeYesterday, self._test_search_recent(search_mock, MockDatetimeYesterday,
expected_indexes=['logstash-2014.06.12', expected_indexes=['logstash-2014.06.12',
@ -159,22 +165,29 @@ class TestSearchEngine(tests.TestCase):
def test_search_no_indexes(self, search_mock): def test_search_no_indexes(self, search_mock):
# Test when no indexes are valid # Test when no indexes are valid
with mock.patch.object( with mock.patch.object(
pyelasticsearch.ElasticSearch, 'status') as mock_data: elasticsearch.client.indices.IndicesClient, 'stats') \
mock_data.side_effect = pyelasticsearch.exceptions.\ as mock_data:
ElasticHttpNotFoundError() mock_data.side_effect = elasticsearch.exceptions.NotFoundError
self._test_search_recent(search_mock, MockDatetimeYesterday, self._test_search_recent(search_mock, MockDatetimeYesterday,
expected_indexes=[]) expected_indexes=[])
def test_search_days(self, search_mock): def test_search_days(self, search_mock):
# Test when specific days are used. # Test when specific days are used.
with mock.patch.object( with mock.patch.object(
pyelasticsearch.ElasticSearch, 'status') as mock_data: elasticsearch.client.indices.IndicesClient, 'stats') \
as mock_data:
mock_data.return_value = "Not an exception" mock_data.return_value = "Not an exception"
datetime.datetime = MockDatetimeYesterday datetime.datetime = MockDatetimeYesterday
result_set = self.engine.search(self.query, size=10, days=3, result_set = self.engine.search(self.query, size=10, days=3,
recent=False) recent=False)
self.assertEqual(0, len(result_set)) self.assertEqual(0, len(result_set))
search_mock.assert_called_once_with(self.query, size=10, search_mock.assert_called_once_with(body={
index=['logstash-2014.06.12', 'query': {
'logstash-2014.06.11', 'query_string': {
'logstash-2014.06.10']) 'query': self.query
}
}
}, size=10,
index=['logstash-2014.06.12',
'logstash-2014.06.11',
'logstash-2014.06.10'])

View File

@ -2,6 +2,7 @@ pbr>=1.8
python-dateutil>=2.0 python-dateutil>=2.0
pytz pytz
pyelasticsearch<1.0 pyelasticsearch<1.0
elasticsearch>=7.14.0
gerritlib gerritlib
python-daemon>=2.2.0 python-daemon>=2.2.0
irc>=17.0 irc>=17.0