# Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import collections import datetime import fileinput import logging import urllib.parse from goal_tools import apis LOG = logging.getLogger(__name__) # The base URL to Gerrit REST API GERRIT_API_URL = 'https://review.openstack.org/' QUERY_OPTIONS = [ 'ALL_REVISIONS', 'REVIEWER_UPDATES', 'DETAILED_ACCOUNTS', 'CURRENT_COMMIT', 'LABELS', 'DETAILED_LABELS', ] def parse_review_id(line): parsed = urllib.parse.urlparse(line) if parsed.fragment: # https://review.openstack.org/#/c/561507/ return parsed.fragment.lstrip('/c').partition('/')[0] else: # https://review.openstack.org/555353/ return parsed.path.lstrip('/').partition('/')[0] def parse_review_lists(filenames): """Generator that produces review IDs as strings. Read the files expecting to find one review URL or ID per line. Ignore lines that start with # as comments. Ignore blank lines. :param filenames: Iterable of filenames to read. :return: Generator of str """ for line in fileinput.input(filenames): line = line.strip() if not line: continue if line.startswith('#'): continue LOG.debug('parsing %r', line) yield parse_review_id(line) def query_gerrit(method, params={}): """Query the Gerrit REST API""" url = GERRIT_API_URL + method LOG.debug('fetching %s', url) raw = apis.requester( url, params=params, headers={'Accept': 'application/json'}) return apis.decode_json(raw) def _to_datetime(s): "Convert a string to a datetime.datetime instance" # Ignore the trailing decimal seconds. if s is None: return None s = s.rpartition('.')[0] return datetime.datetime.strptime(s, '%Y-%m-%d %H:%M:%S') Participant = collections.namedtuple( 'Participant', ['role', 'name', 'email', 'date']) class Review: "The history of one code review" def __init__(self, id, data): self._id = id self._data = data @property def id(self): return self._id @property def raw_change(self): return self._data @property def url(self): return GERRIT_API_URL + self._id + '/' @property def created(self): return _to_datetime(self._data.get('created')) @property def is_merged(self): return self._data.get('status') == 'MERGED' @property def project(self): return self._data.get('project') @property def participants(self): yield self.owner yield from self.reviewers yield from self.uploaders @property def branch(self): return self._data.get('branch', '*unknown') @property def owner(self): owner = self._data.get('owner') if 'email' not in owner: owner['email'] = owner.get('email', 'no-reply@openstack.org') return Participant( 'owner', owner.get('name'), owner.get('email'), self.created, ) @property def uploaders(self): known_uploaders = set() # Record the owner of the patch as a known uploader so we do # not emit their information again. This means someone with # the "uploader" role can be counted as someone taking over a # patch created by someone else to fix it in some way. owner_email = self._data.get('owner', {}).get('email') known_uploaders.add(owner_email) # The revision data is stored in a mapping keyed by the SHA, # so in order to be consistent with how we return the # uploaders we sort the revisions based on the number before # we process them. revisions = sorted( self._data.get('revisions', {}).values(), key=lambda x: x.get('_number', 0), ) for revision in revisions: uploader = revision.get('uploader', {}) if 'email' not in uploader: uploader['email'] = 'no-reply@openstack.org' if uploader['email'] in known_uploaders: # Ignore duplicates continue known_uploaders.add(uploader['email']) yield Participant( 'uploader', uploader.get('name'), uploader['email'], _to_datetime(revision.get('created')), ) @property def reviewers(self): labels = self._data.get('labels', {}) code_review_labels = labels.get('Code-Review', {}).get('all', []) for label in code_review_labels: if label.get('value') not in (2, -1): # Only report reviewers with negative reviews or # approvals to avoid counting anyone who is just # leaving lots of +1 votes without actually providing # feedback. continue yield Participant( 'reviewer', label.get('name', 'Unknown Person'), label.get('email', 'unknown@example.com'), _to_datetime(label.get('date')), ) workflow_labels = labels.get('Workflow', {}).get('all', []) for label in workflow_labels: if label.get('value', 0) != 1: continue yield Participant( 'approver', label.get('name', 'Unknown Person'), label.get('email', 'unknown@example.com'), _to_datetime(label.get('date')), ) @property def plus_ones(self): labels = self._data.get('labels', {}) code_review_labels = labels.get('Code-Review', {}).get('all', []) for label in code_review_labels: if label.get('value') != 1: # Other types of reviews are counted elsewhere. continue yield Participant( 'plus_one', label.get('name', 'Unknown Person'), label.get('email', 'unknown@example.com'), _to_datetime(label.get('date')), ) def cache_review(review_id, data, cache): """Add a review to the cache. Review data is only cached if the review is MERGED because otherwise it is more likely to change. :param review_id: Review ID of the review to look for. :type review_id: str :param data: Data structure returned by query_gerrit :type data: dict :param cache: Storage for repeated lookups. :type cache: goal_tools.cache.Cache """ if data.get('status') == 'MERGED': cache[('review', str(review_id))] = data class ReviewFactory: def __init__(self, cache): self._cache = cache def fetch(self, review_id): """Find the review in the cache or look it up in the API. Review data is only cached if the review is MERGED because otherwise it is more likely to change. :param review_id: Review ID of the review to look for. :type review_id: str :param cache: Storage for repeated lookups. :type cache: goal_tools.cache.Cache """ key = ('review', str(review_id)) if key in self._cache: LOG.debug('found %s cached', review_id) return Review(review_id, self._cache[key]) data = query_gerrit( 'changes/' + review_id + '/detail', params={ 'o': QUERY_OPTIONS, }, ) response = Review(review_id, data) cache_review(review_id, data, self._cache) return response def query(self, query_string): "Generator for changes matching the query criteria." batch_size = 200 offset = 0 while True: changes = query_gerrit( 'changes/', params={ 'n': str(batch_size), 'start': offset, 'q': query_string, 'o': QUERY_OPTIONS, }, ) LOG.debug('%d changes', len(changes)) for change in changes: review = Review( change['_number'], change, ) cache_review( review.id, review.raw_change, self._cache, ) yield review if changes and changes[-1].get('_more_changes', False): offset += batch_size else: break