Refactor candidacy checking tools

This is a large change as it does several things

1. Moves to using the gerrit REST API to collecting open reviews (as
   opposed to ssh)
2. Moves the selection of validating commits from scraping git logs to
   the gerrit REST API
3. Uses the new utils.py function for getting the project data
4. Avoids any git checkouts/downloads during the validation process
5. tools/check-new-candidacy still checks all open changes
6. tools/check-ptl-candidacy.py checks a single change, now specified as
   a change ID rather then a file path

Work to be done in follow-up commits

1. Add testing, now that this is more modular we can mock json blobs to
   test code and avoid regressions
2. Add a manual toll to specify a project and community member and check
   that, even if it doesn't match data in an open review
3. Add quality tools such as flake8 / yamllint to keep code/data neat
4. Modify the docs generation to understand that files are now IRC nicks
   and to get the change author as the candidate.

Change-Id: Ibd7fad3eb4d39f1edca624b981fa602d2b4c4d40
This commit is contained in:
Tony Breeds 2016-09-07 14:21:11 +10:00
parent 176a434465
commit cb6504e9af
5 changed files with 234 additions and 130 deletions

View File

@ -3,3 +3,5 @@
# process, which may cause wedges in the gate later.
pytz>=2013.6 # MIT
PyYAML>=3.1.0 # MIT
requests>=2.10.0 # Apache-2.0
ndg-httpsclient>=0.4.2;python_version<'3.0' # BSD

View File

@ -1,104 +1,59 @@
#!/usr/bin/env python
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import yaml
import os
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import sys
import urllib
import re
import datetime
import pytz
DATE_MIN = '2015-03-04'
DATE_MAX = '2016-03-03'
BASE_URL = 'https://git.openstack.org/cgit'
PROJECTS_TAG = 'march-2016-elections'
PROJECTS_URL = ('%s/openstack/governance/plain/reference/projects.yaml?%s' %
(BASE_URL, PROJECTS_TAG))
date_min = datetime.datetime.strptime(DATE_MIN, '%Y-%m-%d').strftime('%s')
date_max = datetime.datetime.strptime(DATE_MAX, '%Y-%m-%d').strftime('%s')
now = datetime.datetime.utcnow().replace(tzinfo=pytz.utc,
hour=0,
minute=0,
second=0,
microsecond=0)
import check_candidacy
import utils
def check_atc_date(atc):
if 'expires-in' not in atc:
return False
expires_in = datetime.datetime.strptime(atc['expires-in'], '%B %Y')
expires_in = expires_in.replace(tzinfo=pytz.utc)
return now < expires_in
def main():
description = ('Check if the owner of a change is a valid candidate as '
'described in the change')
parser = argparse.ArgumentParser(description)
parser.add_argument(dest='change_id', help=('A valid gerrit change ID'))
parser.add_argument('--limit', dest='limit', type=int, default=1,
help=('How many validating changes to report. '
'A negative value means report many. '
'Default: %(default)s'))
parser.add_argument('--tag', dest='tag', default=utils.PROJECTS_TAG,
help=('The governance tag to validate against. '
'Default: %(default)s'))
args = parser.parse_args()
review = utils.get_reviews(args.change_id)[0]
owner = review.get('owner', {})
if args.limit < 0:
args.limit = 100
def check_date(date):
epoch = datetime.datetime.strptime(date, '%Y-%m-%d').strftime('%s')
if epoch > date_min and epoch < date_max:
return True
return False
try:
found = check_candidacy.check_candidacy(review['change_id'],
review=review)
except Exception as exc:
print("[E] %s\n\n" % (exc))
else:
if found:
print('SUCESS: %s is a valid candidate\n\n' % (owner['email']))
return 0
else:
print('[E]: %s is not a valid candidate\n\n' % (owner['email']))
return 1
def escape_author(author):
author = author.replace(' ', '+')
author = author.replace('_', '+')
return author
try:
project_name = os.path.basename(os.path.dirname(sys.argv[1]))
author = os.path.basename(sys.argv[1])[:-4]
except:
print "usage: %s candidacy_file" % sys.argv[0]
exit(1)
author = author.replace('_', ' ')
if not os.path.isfile('.projects.yaml'):
open('.projects.yaml', 'w').write(
urllib.urlopen(PROJECTS_URL).read()
)
projects = yaml.load(open('.projects.yaml'))
project_list = None
if project_name == "TC":
project_list = projects.values()
else:
for key in projects.keys():
if key.title().replace(' ', '_') == project_name:
project_list = [projects[key]]
break
if project_list is None:
print "Can't find project [%s] in %s" % (project_name, projects.keys())
exit(1)
for project in project_list:
if 'extra-atcs' in project:
for atc in project['extra-atcs']:
if atc['name'] == author and check_atc_date(atc):
print "Valid extra ATC record", atc
exit(0)
for deliverable in project['deliverables'].values():
for repo_name in deliverable["repos"]:
url = '%s/%s/log/?qt=author&q=%s' % (BASE_URL, repo_name,
escape_author(author))
print "Querying: %s" % url
found = False
for l in urllib.urlopen(url).read().split('\n'):
if "commit/?id=" not in l:
continue
try:
url = ('http://git.openstack.org%s' %
re.search("href='([^']*)'", l).groups()[0])
date = re.search('<td>([^<]*)</td>', l).groups()[0]
if not check_date(date):
continue
except:
continue
print "[%s]: %s" % (date, url)
found = True
if found:
exit(0)
exit(1)
if __name__ == "__main__":
sys.exit(main())

View File

@ -1,46 +1,59 @@
#!/usr/bin/env python
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import subprocess
import json
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import sys
def pread(argv):
return subprocess.Popen(argv, stdout=subprocess.PIPE).communicate()[0]
def execute(argv):
return subprocess.Popen(argv).wait()
import check_candidacy
import utils
def get_reviews():
reviews = pread(["ssh", "-p", "29418", "review.openstack.org", "gerrit",
"query", "--format=JSON", "status:open",
"project:'openstack/election'"]).split('\n')
results = []
for i in reviews:
if "status" not in i:
continue
review = json.loads(i)
if review['status'] == 'NEW':
results.append(int(review['number']))
return results
return utils.get_reviews('is:open project:%s' %
(utils.ELECTION_REPO))
def check_reviews():
for review in get_reviews():
if execute(["git", "review", "-d", "%d" % review]):
if review['status'] != 'NEW':
continue
print
fl = filter(lambda x: x.startswith("candidates/"),
pread(["git", "diff", "--name-only", "HEAD^"]).split('\n'))
if not len(fl):
print "[E] No candidacy added"
print('Checking %s/%d' %
(utils.GERRIT_BASE, review['_number']))
if not len(utils.candidate_files(review)):
print("[E] No candidacy added")
continue
for candidate in fl:
print "[->] https://review.openstack.org/%d - %s" % (
review, candidate)
execute(["./tools/check-candidacy.py", candidate])
execute(["git", "checkout", "master"])
owner = review.get('owner', {})
try:
found = check_candidacy.check_candidacy(review['change_id'],
review=review)
except Exception as exc:
print("[E] %s\n\n" % (exc))
else:
if found:
print('SUCESS: %s is a valid candidate\n\n' %
(owner['email']))
else:
print('[E]: %s is not a valid candidate\n\n' %
(owner['email']))
return 0
if __name__ == "__main__":
check_reviews()
sys.exit(check_reviews())

78
tools/check_candidacy.py Executable file
View File

@ -0,0 +1,78 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import datetime
import os
import utils
# FIXME: Printing from library function isn't great.
# change API to return the messages and let the consumer decide what to
# do with them
def check_candidacy(change_id, limit=1, tag=None, review=None):
def pretty_datetime(dt_str):
dt = datetime.datetime.strptime(dt_str.split('.')[0],
'%Y-%m-%d %H:%M:%S')
return dt.strftime('%Y-%m-%d')
projects = utils.get_projects(tag=tag)
# If there is more than one review that matches this change_id then all
# bets are off
review = review or utils.get_reviews(change_id)[0]
owner = review.get('owner', {})
found = 0
for filename in utils.candidate_files(review):
_, series, project_name, candidate_file = filename.split(os.sep)
if project_name != 'TC':
project_name = utils.dir2name(project_name, projects)
if project_name in ['Stable branch Maintenance']:
project_list = projects.values()
else:
project_list = [projects[project_name]]
for project in project_list:
for atc in project.get('extra-atcs', []):
if (atc['email'] == owner['email'] and
utils.check_atc_date(atc)):
print("Valid extra ATC record:\n\t%s" % (atc))
found += 1
if found >= limit:
return True
for deliverable in project['deliverables'].values():
for repo_name in deliverable["repos"]:
query = ('is:merged after:"%s" before:"%s" '
'owner:%s project:%s' %
(utils.gerrit_datetime(utils.PERIOD_START),
utils.gerrit_datetime(utils.PERIOD_END),
owner['email'], repo_name))
print('Checking %s for merged changes by %s' %
(repo_name, owner['email']))
for review in utils.get_reviews(query):
url = ('%s/#/q/%s' %
(utils.GERRIT_BASE, review['change_id']))
print('%2d: %s %s' %
(found, pretty_datetime(review['submitted']),
url))
found += 1
if found >= limit:
return True
return found > 0

View File

@ -1,5 +1,3 @@
#!/usr/bin/env python
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
@ -12,17 +10,73 @@
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import datetime
import json
import os
import pickle
import pytz
import requests
import time
import urllib
import yaml
import pickle
# Per election constants
SERIES_NAME = 'ocata'
# 2015-09-05 00:00:00 +0000
PERIOD_START = datetime.datetime(2015, 9, 5, 0, 0, 0, tzinfo=pytz.utc)
# 2016-09-04 23:59:59 +0000
PERIOD_END = datetime.datetime(2016, 9, 4, 23, 59, 59, tzinfo=pytz.utc)
PROJECTS_TAG = 'sept-2016-elections'
# Library constants
GERRIT_BASE = 'https://review.openstack.org'
ELECTION_REPO = 'openstack/election'
BASE_URL = 'https://git.openstack.org/cgit'
PROJECTS_URL = ('%s/openstack/governance/plain/reference/projects.yaml' %
(BASE_URL))
# Gerrit functions
def gerrit_datetime(dt):
return dt.strftime('%Y-%m-%d %H:%M:%S %z')
def gerrit_query(url):
r = requests.get(url)
if r.status_code == 200:
data = json.loads(r.text[4:])
else:
data = []
return data
def get_reviews(query):
opts = ['CURRENT_REVISION', 'CURRENT_FILES', 'DETAILED_ACCOUNTS']
opts_str = '&o=%s' % ('&o='.join(opts))
url = ('%s/changes/?q=%s%s' %
(GERRIT_BASE, urllib.quote_plus(query, safe='/:=><'), opts_str))
return gerrit_query(url)
def candidate_files(review):
return list(filter(lambda x: x.startswith("candidates/"),
list(review['revisions'].values())[0]['files'].keys()))
# Governance functions
def check_atc_date(atc):
if 'expires-in' not in atc:
return False
expires_in = datetime.datetime.strptime(atc['expires-in'], '%B %Y')
expires_in = expires_in.replace(tzinfo=pytz.utc)
return PERIOD_END < expires_in
def get_projects(tag=None):
url = PROJECTS_URL
cache_file = '.projects.pkl'
@ -35,13 +89,14 @@ def get_projects(tag=None):
if (not os.path.isfile(cache_file) or
os.stat(cache_file).st_size < 100 or
os.stat(cache_file).st_mtime + (7*24*3600) < time.time()):
print "[+] Updating %s" % cache_file
print("[+] Updating %s" % (cache_file))
data = yaml.safe_load(urllib.urlopen(url).read())
pickle.dump(data, open(cache_file, "w"))
return pickle.load(open(cache_file))
# Election functions
def name2dir(name):
"""Convert project name to directory name: only [a-zA-Z_] in camelcase"""
name = name.replace(' ', '_').replace('-', '_')
@ -55,3 +110,4 @@ def dir2name(name, projects):
pname = project_name.lower().replace(' ', '').replace('-', '').lower()
if name == pname:
return project_name
raise ValueError(('%s does not match any project' % (name)))