From 712a2bbf1c00dc92046990106b2c92c63bc4eb4c Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Fri, 6 Apr 2012 00:23:29 +0100 Subject: [PATCH] Add gerritdm --- database.py | 6 ++ gerrit/parse-reviews.py | 100 +++++++++++++++++++++++++++++++++ gerritdm | 119 ++++++++++++++++++++++++++++++++++++++++ openstack-config/README | 43 +++++++++++++++ reports.py | 25 +++++++++ 5 files changed, 293 insertions(+) create mode 100644 gerrit/parse-reviews.py create mode 100755 gerritdm diff --git a/database.py b/database.py index 4fd9ac7..1f8b79c 100644 --- a/database.py +++ b/database.py @@ -129,6 +129,7 @@ class Employer: self.added = self.removed = self.count = self.changed = 0 self.sobs = 0 self.bugsfixed = [ ] + self.reviews = [ ] self.hackers = [ ] def AddCSet (self, patch): @@ -147,6 +148,11 @@ class Employer: if bug.owner not in self.hackers: self.hackers.append (bug.owner) + def AddReview (self, reviewer): + self.reviews.append(reviewer) + if reviewer not in self.hackers: + self.hackers.append (reviewer) + Employers = { } def GetEmployer (name): diff --git a/gerrit/parse-reviews.py b/gerrit/parse-reviews.py new file mode 100644 index 0000000..064b853 --- /dev/null +++ b/gerrit/parse-reviews.py @@ -0,0 +1,100 @@ +import argparse +import json +import sys +import time + +# +# List reviewers for a set of git commits +# +# python buglist.py essex-commits.txt openstack-config/launchpad-ids.txt < gerrit.json +# + +parser = argparse.ArgumentParser(description='List reviewers in gerrit') + +parser.add_argument('commits', help='path to list of commits to consider') +parser.add_argument('usermap', help='path to username to email map') + +args = parser.parse_args() + +username_to_email_map = {} +for l in open(args.usermap, 'r'): + (username, email) = l.split() + username_to_email_map.setdefault(username, email) + +commits = [l.strip() for l in open(args.commits, 'r')] + +class Reviewer: + def __init__(self, username, name, email): + self.username = username + self.name = name + self.email = email if email else username_to_email_map.get(self.username) + + @classmethod + def parse(cls, r): + return cls(r.get('username'), r.get('name'), r.get('email')) + +class Approval: + CodeReviewed, Approved, Submitted, Verified = range(4) + + type_map = { + 'CRVW': CodeReviewed, + 'APRV': Approved, + 'SUBM': Submitted, + 'VRIF': Verified, + } + + def __init__(self, type, value, date, by): + self.type = type + self.value = value + self.date = date + self.by = by + + @classmethod + def parse(cls, a): + return cls(cls.type_map[a['type']], + int(a['value']), + time.gmtime(int(a['grantedOn'])), + Reviewer.parse(a['by'])) + +class PatchSet: + def __init__(self, revision, approvals): + self.revision = revision + self.approvals = approvals + + @classmethod + def parse(cls, ps): + return cls(ps['revision'], + [Approval.parse(a) for a in ps.get('approvals', [])]) + +class Review: + def __init__(self, id, patchsets): + self.id = id + self.patchsets = patchsets + + @classmethod + def parse(cls, r): + return cls(r['id'], + [PatchSet.parse(ps) for ps in r['patchSets']]) + +reviews = [Review.parse(json.loads(l)) for l in sys.stdin if not 'runTimeMilliseconds' in l] + +def reviewers(review): + ret = {} + for ps in r.patchsets: + for a in ps.approvals: + if a.type == Approval.CodeReviewed and a.value: + ret.setdefault(a.by.username, (a.by, a.date)) + return ret.values() + +def interesting(review): + for ps in r.patchsets: + if ps.revision in commits: + return True + return False + +for r in reviews: + if not interesting(r): + continue + for reviewer, date in reviewers(r): + if reviewer.email: + print time.strftime('%Y-%m-%d', date), reviewer.username, reviewer.email diff --git a/gerritdm b/gerritdm new file mode 100755 index 0000000..2d0adaa --- /dev/null +++ b/gerritdm @@ -0,0 +1,119 @@ +#!/usr/bin/pypy +#-*- coding:utf-8 -*- +# + +# +# This code is part of the LWN git data miner. +# +# Copyright 2007-11 Eklektix, Inc. +# Copyright 2007-11 Jonathan Corbet +# Copyright 2011 Germán Póo-Caamaño +# +# This file may be distributed under the terms of the GNU General +# Public License, version 2. + + +import database, ConfigFile, reports +import getopt, datetime +import sys + +Today = datetime.date.today() + +# +# Control options. +# +MapUnknown = 0 +DevReports = 1 +DumpDB = 0 +CFName = 'gitdm.config' +DirName = '' + +# +# Options: +# +# -b dir Specify the base directory to fetch the configuration files +# -c cfile Specify a configuration file +# -d Output individual developer stats +# -h hfile HTML output to hfile +# -l count Maximum length for output lists +# -o file File for text output +# -p prefix Prefix for CSV output +# -s Ignore author SOB lines +# -u Map unknown employers to '(Unknown)' +# -z Dump out the hacker database at completion + +def ParseOpts (): + global MapUnknown, DevReports + global DumpDB + global CFName, DirName, Aggregate + + opts, rest = getopt.getopt (sys.argv[1:], 'b:dc:h:l:o:uz') + for opt in opts: + if opt[0] == '-b': + DirName = opt[1] + elif opt[0] == '-c': + CFName = opt[1] + elif opt[0] == '-d': + DevReports = 0 + elif opt[0] == '-h': + reports.SetHTMLOutput (open (opt[1], 'w')) + elif opt[0] == '-l': + reports.SetMaxList (int (opt[1])) + elif opt[0] == '-o': + reports.SetOutput (open (opt[1], 'w')) + elif opt[0] == '-u': + MapUnknown = 1 + elif opt[0] == '-z': + DumpDB = 1 + +def LookupStoreHacker (date, name, email): + email = database.RemapEmail (email) + h = database.LookupEmail (email) + if h: # already there + return date, h + elist = database.LookupEmployer (email, MapUnknown) + h = database.LookupName (name) + if h: # new email + h.addemail (email, elist) + return date, h + return date, database.StoreHacker(name, elist, email) + +# +# Here starts the real program. +# +ParseOpts () + +# +# Read the config files. +# +ConfigFile.ConfigFile (CFName, DirName) + +reviews = [LookupStoreHacker(*l.split()[:3]) for l in sys.stdin] + +for date, reviewer in reviews: + reviewer.addreview(reviewer) + empl = reviewer.emailemployer(reviewer.email[0], ConfigFile.ParseDate(date)) + empl.AddReview(reviewer) + +if DumpDB: + database.DumpDB () +database.MixVirtuals () + +# +# Say something +# +hlist = database.AllHackers () +elist = database.AllEmployers () +ndev = nempl = 0 +for h in hlist: + if len(h.reviews) > 0: + ndev += 1 +for e in elist: + if len(e.reviews) > 0: + nempl += 1 +reports.Write ('Processed %d review from %d developers\n' % (len(reviews), ndev)) +reports.Write ('%d employers found\n' % (nempl)) + +if DevReports: + reports.DevReviews (hlist, len(reviews)) +reports.EmplReviews (elist, len(reviews)) diff --git a/openstack-config/README b/openstack-config/README index 80333ab..dd1595f 100644 --- a/openstack-config/README +++ b/openstack-config/README @@ -65,3 +65,46 @@ Launchpad API docs are here: https://launchpad.net/+apidoc/1.0.html https://help.launchpad.net/API/launchpadlib + +== Gerrit == + +First, generate a list of Change-Ids: + + $> grep -v '^#' openstack-config/essex | \ + while read project revisions; do \ + (cd ~/git/openstack/$project; \ + git fetch origin 2>/dev/null; \ + git log $revisions); \ + done | \ + awk '/^ Change-Id: / { print $2 }' | \ + split -l 100 -d - essex-change-ids- + +The output is split across files of 100 lines each because gerrit's +query will only return 500 results at a time. + +Now, we generate a raw json query result: + + $> for f in essex-change-ids-??; do + ssh -p 29418 review.openstack.org \ + gerrit query --all-approvals --format=json \ + $(awk -v ORS=" OR " '{print}' $f | sed 's/ OR $//') ; \ + done > essex-reviews.txt + +Next, generate a list of commits: + + $> grep -v '^#' openstack-config/essex | \ + while read project revisions; do \ + (cd ~/git/openstack/$project; \ + git fetch origin 2>/dev/null; \ + git log --pretty=format:%H $revisions); \ + done > essex-commits.txt + +Now parse the json into a list of reviewers: + + $> python gerrit/parse-reviews.py \ + essex-commits.txt openstack-config/launchpad-ids.txt \ + < essex-reviews.txt > essex-reviewers.txt + +Finally, generate the stats with: + + $> python ./gerritdm -l 20 < essex-reviewers.txt diff --git a/reports.py b/reports.py index b3af17d..2bf408c 100644 --- a/reports.py +++ b/reports.py @@ -232,6 +232,25 @@ def ReportByRevs (hlist): break EndReport () +def CompareRevsEmpl (e1, e2): + return len (e2.reviews) - len (e1.reviews) + +def ReportByRevsEmpl (elist): + elist.sort (CompareRevsEmpl) + totalrevs = 0 + for e in elist: + totalrevs += len (e.reviews) + count = 0 + BeginReport ('Top reviewers by employer (total %d)' % totalrevs) + for e in elist: + scount = len (e.reviews) + if scount > 0: + ReportLine (e.name, scount, (scount*100.0)/totalrevs) + count += 1 + if count >= ListCount: + break + EndReport () + # # tester reporting. # @@ -377,6 +396,12 @@ def DevBugReports (hlist, totalbugs): def EmplBugReports (elist, totalbugs): ReportByBCEmpl (elist, totalbugs) +def DevReviews (hlist, totalreviews): + ReportByRevs (hlist) + +def EmplReviews (elist, totalreviews): + ReportByRevsEmpl (elist) + def ReportByFileType (hacker_list): total = {} total_by_hacker = {}