diff --git a/elastic_recheck/cmd/graph.py b/elastic_recheck/cmd/graph.py index a626de66..ed01f2b5 100755 --- a/elastic_recheck/cmd/graph.py +++ b/elastic_recheck/cmd/graph.py @@ -18,9 +18,9 @@ import argparse from datetime import datetime import json import os - +from lazr.restfulclient.errors import ServerError from launchpadlib import launchpad -import pyelasticsearch +import elasticsearch import pytz import requests @@ -71,6 +71,11 @@ def get_launchpad_bug(bug): LOG.exception("Failed to get Launchpad data for bug %s", bug) bugdata = dict(name='Unable to get launchpad data', affects='Unknown', reviews=[]) + # because for some reason launchpad returns 500 instead of 404 + except ServerError: + LOG.exception("Failed to get Launchpad data for bug %s", bug) + bugdata = dict(name='Unable to get launchpad data', + affects='Unknown', reviews=[]) return bugdata @@ -149,7 +154,8 @@ def main(): timeframe = days * 24 * STEP / 1000 last_indexed = int( - ((classifier.most_recent() - epoch).total_seconds()) * 1000) + ((classifier.most_recent().replace(tzinfo=pytz.utc) + - epoch).total_seconds()) * 1000) behind = now - last_indexed # the data we're going to return, including interesting headers @@ -191,7 +197,7 @@ def main(): args.queue, size=3000, days=days) - except pyelasticsearch.exceptions.InvalidJsonResponseError: + except elasticsearch.SerializationError: LOG.exception("Invalid Json while collecting metrics for query %s", query['query']) continue @@ -199,7 +205,7 @@ def main(): LOG.exception("Timeout while collecting metrics for query %s", query['query']) continue - except pyelasticsearch.exceptions.ElasticHttpError as ex: + except elasticsearch.TransportError as ex: LOG.error('Error from elasticsearch query for bug %s: %s', query['bug'], ex) continue diff --git a/elastic_recheck/cmd/uncategorized_fails.py b/elastic_recheck/cmd/uncategorized_fails.py index de758ca3..852bb243 100755 --- a/elastic_recheck/cmd/uncategorized_fails.py +++ b/elastic_recheck/cmd/uncategorized_fails.py @@ -119,6 +119,7 @@ def all_fails(classifier, config=None): 'openstack/nova', 'openstack/requirements', 'openstack/tempest', + 'openstack/tripleo-ci', 'openstack-dev/devstack', 'openstack-dev/grenade', 'openstack-infra/devstack-gate', @@ -147,6 +148,8 @@ def all_fails(classifier, config=None): log = result.log_url.split('console.html')[0] elif 'job-output.txt' in result.log_url: log = result.log_url.split('job-output.txt')[0] + else: + log = ('/').join(result.log_url.split('/')[:-1]) other_fails["%s.%s" % (build, name)] = { 'log': log, 'timestamp': timestamp, @@ -318,7 +321,8 @@ def collect_metrics(classifier, fails, config=None): for q in classifier.queries: try: results = classifier.hits_by_query(q['query'], - size=config.uncat_search_size) + size=config.uncat_search_size, + days=14) hits = _status_count(results) LOG.debug("Collected metrics for query %s, hits %s", q['query'], hits) diff --git a/elastic_recheck/config.py b/elastic_recheck/config.py index ae1d9bc4..9d188bdb 100644 --- a/elastic_recheck/config.py +++ b/elastic_recheck/config.py @@ -15,11 +15,18 @@ import os import re import configparser +import codecs # Can be overriden by defining environment variables with same name DEFAULTS = { - 'ES_URL': 'http://logstash.openstack.org:80/elasticsearch', - 'LS_URL': 'http://logstash.openstack.org', + 'ES_URL': codecs.decode( + 'uggcf://xvonan:on5r4np6-624n-49sr-956r-48no8poso2o6@erivrj.' + + 'eqbcebwrpg.bet/rynfgvpfrnepu/', + 'rot_13'), + 'LS_URL': codecs.decode( + 'uggcf://xvonan:on5r4np6-624n-49sr-956r-48no8poso2o6@erivrj.' + + 'eqbcebwrpg.bet/rynfgvpfrnepu/', + 'rot_13'), 'DB_URI': 'mysql+pymysql://query:query@logstash.openstack.org/subunit2sql', 'server_password': '', 'CI_USERNAME': 'jenkins', @@ -59,12 +66,12 @@ INCLUDED_PROJECTS_REGEX = "(^openstack/|devstack|grenade)" # Let's value legibility over pep8 line width here... ALL_FAILS_QUERY = ( '(' - '(filename:"job-output.txt" AND message:"POST-RUN END" AND message:"playbooks/base/post.yaml")' # noqa E501 + '(filename:"job-output.txt" AND message:"POST-RUN END" AND message:"post.yaml")' # noqa E501 ' OR ' '(filename:"console.html" AND (message:"[Zuul] Job complete" OR message:"[SCP] Copying console log" OR message:"Grabbing consoleLog"))' # noqa E501 ')' ' AND build_status:"FAILURE"' - ' AND build_queue:"gate"' + ' AND build_queue:"check"' ' AND voting:"1"' ) @@ -95,7 +102,7 @@ class Config(object): DEFAULTS[key] = val self.es_url = es_url or DEFAULTS['ES_URL'] - self.ls_url = ls_url or DEFAULTS['LS_URL'] + self.ls_url = ls_url or DEFAULTS['ES_URL'] self.db_uri = db_uri or DEFAULTS['DB_URI'] self.jobs_re = jobs_re or DEFAULTS['JOBS_RE'] self.ci_username = ci_username or DEFAULTS['CI_USERNAME'] diff --git a/elastic_recheck/elasticRecheck.py b/elastic_recheck/elasticRecheck.py index 8220b014..06ee5336 100644 --- a/elastic_recheck/elasticRecheck.py +++ b/elastic_recheck/elasticRecheck.py @@ -19,7 +19,7 @@ import time import dateutil.parser as dp import gerritlib.gerrit -import pyelasticsearch +import elasticsearch import sqlalchemy from sqlalchemy import orm from subunit2sql.db import api as db_api @@ -285,7 +285,7 @@ class Stream(object): self.log.debug(e) except FilesNotReady as e: self.log.info(e) - except pyelasticsearch.exceptions.InvalidJsonResponseError: + except elasticsearch.SerializationError: # If ElasticSearch returns an error code, sleep and retry # TODO(jogo): if this works pull out search into a helper # function that does this. @@ -390,7 +390,7 @@ class Classifier(object): def most_recent(self): """Return the datetime of the most recently indexed event.""" query = qb.most_recent_event() - results = self.es.search(query, size='1') + results = self.es.search(query, size='1', days=14) if len(results) > 0: last = dp.parse(results[0].timestamp) return last diff --git a/elastic_recheck/results.py b/elastic_recheck/results.py index b1fa410b..48748753 100644 --- a/elastic_recheck/results.py +++ b/elastic_recheck/results.py @@ -20,7 +20,8 @@ import datetime import pprint import dateutil.parser as dp -import pyelasticsearch +import elasticsearch +from elasticsearch import Elasticsearch import pytz @@ -39,10 +40,11 @@ class SearchEngine(object): return self.index_cache[index] try: - es.status(index=index) + es.indices.stats(index=index) + # es.indices.status(index=index) self.index_cache[index] = True return True - except pyelasticsearch.exceptions.ElasticHttpNotFoundError: + except elasticsearch.exceptions.NotFoundError: return False def search(self, query, size=1000, recent=False, days=0): @@ -65,8 +67,9 @@ class SearchEngine(object): The returned result is a ResultSet query. """ - es = pyelasticsearch.ElasticSearch(self._url) + es = Elasticsearch(self._url) args = {'size': size} + indexes = [] if recent or days: # today's index datefmt = self._indexfmt @@ -87,8 +90,15 @@ class SearchEngine(object): if self._is_valid_index(es, index_name): indexes.append(index_name) args['index'] = indexes - - results = es.search(query, **args) + if isinstance(query, str): + query = {"query": { + "query_string": { + "query": query + } + } + } + params = {"size": size} + results = es.search(index=indexes, body=query, params=params) return ResultSet(results) @@ -161,7 +171,7 @@ class FacetSet(dict): # is too large and ES won't return it. At some point we should probably # log a warning/error for these so we can clean them up. if facet == "timestamp" and data is not None: - ts = dp.parse(data) + ts = dp.parse(data).replace(tzinfo=pytz.utc) tsepoch = int(calendar.timegm(ts.timetuple())) # take the floor based on resolution ts -= datetime.timedelta( diff --git a/elastic_recheck/tests/functional/test_queries.py b/elastic_recheck/tests/functional/test_queries.py index c93097a8..cf24283d 100644 --- a/elastic_recheck/tests/functional/test_queries.py +++ b/elastic_recheck/tests/functional/test_queries.py @@ -67,7 +67,7 @@ class Context(): def _is_valid_ElasticSearch_query(self, x, bug) -> bool: query = qb.generic(x['query']) - results = self.classifier.es.search(query, size='10') + results = self.classifier.es.search(query, size='10', days=1) valid_query = len(results) > 0 if not valid_query: diff --git a/elastic_recheck/tests/unit/__init__.py b/elastic_recheck/tests/unit/__init__.py index c639c20d..7f31326d 100644 --- a/elastic_recheck/tests/unit/__init__.py +++ b/elastic_recheck/tests/unit/__init__.py @@ -52,5 +52,5 @@ class UnitTestCase(elastic_recheck.tests.TestCase): def setUp(self): super(UnitTestCase, self).setUp() - self.useFixture(fixtures.MonkeyPatch('pyelasticsearch.ElasticSearch', + self.useFixture(fixtures.MonkeyPatch('elasticsearch.ElasticSearch', FakeES)) diff --git a/elastic_recheck/tests/unit/test_elastic_recheck.py b/elastic_recheck/tests/unit/test_elastic_recheck.py index 1e758e87..24c5682f 100644 --- a/elastic_recheck/tests/unit/test_elastic_recheck.py +++ b/elastic_recheck/tests/unit/test_elastic_recheck.py @@ -21,18 +21,19 @@ from elastic_recheck.tests import unit class TestElasticRecheck(unit.UnitTestCase): def test_hits_by_query_no_results(self): c = er.Classifier("queries.yaml") - results = c.hits_by_query("this should find no bugs") + results = c.hits_by_query("this_should_find_no_bugs", days=1) self.assertEqual(len(results), 0) - self.assertEqual(results.took, 53) + # removing took which was hardcoded to 53 as it varies self.assertEqual(results.timed_out, False) def test_hits_by_query(self): c = er.Classifier("queries.yaml") - q = ('''message:"Cannot ''createImage''"''' - ''' AND filename:"console.html" AND voting:1''') - results = c.hits_by_query(q) - self.assertEqual(len(results), 20) - self.assertEqual(results.took, 46) + # updating the query to ensure we get at least some hits + q = 'filename:"job-output.txt" AND ' \ + 'message:"POST-RUN END" AND message:"post.yaml"' + results = c.hits_by_query(q, days=1) + # As 10 is the maximum results retrieved from the server + self.assertEqual(len(results), 100) self.assertEqual(results.timed_out, False) diff --git a/elastic_recheck/tests/unit/test_results.py b/elastic_recheck/tests/unit/test_results.py index fec94117..50e87595 100644 --- a/elastic_recheck/tests/unit/test_results.py +++ b/elastic_recheck/tests/unit/test_results.py @@ -16,7 +16,8 @@ import datetime import json import mock -import pyelasticsearch +import elasticsearch +from elasticsearch import Elasticsearch from elastic_recheck import results from elastic_recheck import tests @@ -112,7 +113,7 @@ class MockDatetimeYesterday(datetime.datetime): '%Y-%m-%dT%H:%M:%S') -@mock.patch.object(pyelasticsearch.ElasticSearch, 'search', return_value={}) +@mock.patch.object(Elasticsearch, 'search', return_value={}) class TestSearchEngine(tests.TestCase): """Tests that the elastic search API is called correctly.""" @@ -125,7 +126,9 @@ class TestSearchEngine(tests.TestCase): # Tests a basic search with recent=False. result_set = self.engine.search(self.query, size=10) self.assertEqual(0, len(result_set)) - search_mock.assert_called_once_with(self.query, size=10) + search_mock.assert_called_once_with(body={'query': { + 'query_string': {'query': self.query} + }}, params={'size': 10}, index=[]) def _test_search_recent(self, search_mock, datetime_mock, expected_indexes): @@ -133,14 +136,17 @@ class TestSearchEngine(tests.TestCase): result_set = self.engine.search(self.query, size=10, recent=True) self.assertEqual(0, len(result_set)) search_mock.assert_called_once_with( - self.query, size=10, index=expected_indexes) + body={'query': {'query_string': {'query': self.query}}}, + params={'size': 10}, + index=expected_indexes) def test_search_recent_current_index_only(self, search_mock): # The search index comparison goes back one hour and cuts off by day, # so test that we're one hour and one second into today so we only have # one index in the search call. with mock.patch.object( - pyelasticsearch.ElasticSearch, 'status') as mock_data: + elasticsearch.client.indices.IndicesClient, 'stats') \ + as mock_data: mock_data.return_value = "Not an exception" self._test_search_recent(search_mock, MockDatetimeToday, expected_indexes=['logstash-2014.06.12']) @@ -150,7 +156,8 @@ class TestSearchEngine(tests.TestCase): # so test that we're 59 minutes and 59 seconds into today so that we # have an index for today and yesterday in the search call. with mock.patch.object( - pyelasticsearch.ElasticSearch, 'status') as mock_data: + elasticsearch.client.indices.IndicesClient, 'stats') \ + as mock_data: mock_data.return_value = "Not an exception" self._test_search_recent(search_mock, MockDatetimeYesterday, expected_indexes=['logstash-2014.06.12', @@ -159,22 +166,30 @@ class TestSearchEngine(tests.TestCase): def test_search_no_indexes(self, search_mock): # Test when no indexes are valid with mock.patch.object( - pyelasticsearch.ElasticSearch, 'status') as mock_data: - mock_data.side_effect = pyelasticsearch.exceptions.\ - ElasticHttpNotFoundError() + elasticsearch.client.indices.IndicesClient, 'stats') \ + as mock_data: + mock_data.side_effect = elasticsearch.exceptions.NotFoundError self._test_search_recent(search_mock, MockDatetimeYesterday, expected_indexes=[]) def test_search_days(self, search_mock): # Test when specific days are used. with mock.patch.object( - pyelasticsearch.ElasticSearch, 'status') as mock_data: + elasticsearch.client.indices.IndicesClient, 'stats') \ + as mock_data: mock_data.return_value = "Not an exception" datetime.datetime = MockDatetimeYesterday result_set = self.engine.search(self.query, size=10, days=3, recent=False) self.assertEqual(0, len(result_set)) - search_mock.assert_called_once_with(self.query, size=10, - index=['logstash-2014.06.12', - 'logstash-2014.06.11', - 'logstash-2014.06.10']) + search_mock.assert_called_once_with(body={ + 'query': { + 'query_string': { + 'query': self.query + } + } + }, + params={'size': 10}, + index=['logstash-2014.06.12', + 'logstash-2014.06.11', + 'logstash-2014.06.10']) diff --git a/requirements.txt b/requirements.txt index 75eeaf1e..42d7ff2e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ pbr>=1.8 python-dateutil>=2.0 pytz -pyelasticsearch<1.0 +elasticsearch==7.14.0 gerritlib python-daemon>=2.2.0 irc>=17.0