Add gerritdm

This commit is contained in:
Mark McLoughlin 2012-04-06 00:23:29 +01:00
parent 171b4e8a6e
commit 712a2bbf1c
5 changed files with 293 additions and 0 deletions

View File

@ -129,6 +129,7 @@ class Employer:
self.added = self.removed = self.count = self.changed = 0
self.sobs = 0
self.bugsfixed = [ ]
self.reviews = [ ]
self.hackers = [ ]
def AddCSet (self, patch):
@ -147,6 +148,11 @@ class Employer:
if bug.owner not in self.hackers:
self.hackers.append (bug.owner)
def AddReview (self, reviewer):
self.reviews.append(reviewer)
if reviewer not in self.hackers:
self.hackers.append (reviewer)
Employers = { }
def GetEmployer (name):

100
gerrit/parse-reviews.py Normal file
View File

@ -0,0 +1,100 @@
import argparse
import json
import sys
import time
#
# List reviewers for a set of git commits
#
# python buglist.py essex-commits.txt openstack-config/launchpad-ids.txt < gerrit.json
#
parser = argparse.ArgumentParser(description='List reviewers in gerrit')
parser.add_argument('commits', help='path to list of commits to consider')
parser.add_argument('usermap', help='path to username to email map')
args = parser.parse_args()
username_to_email_map = {}
for l in open(args.usermap, 'r'):
(username, email) = l.split()
username_to_email_map.setdefault(username, email)
commits = [l.strip() for l in open(args.commits, 'r')]
class Reviewer:
def __init__(self, username, name, email):
self.username = username
self.name = name
self.email = email if email else username_to_email_map.get(self.username)
@classmethod
def parse(cls, r):
return cls(r.get('username'), r.get('name'), r.get('email'))
class Approval:
CodeReviewed, Approved, Submitted, Verified = range(4)
type_map = {
'CRVW': CodeReviewed,
'APRV': Approved,
'SUBM': Submitted,
'VRIF': Verified,
}
def __init__(self, type, value, date, by):
self.type = type
self.value = value
self.date = date
self.by = by
@classmethod
def parse(cls, a):
return cls(cls.type_map[a['type']],
int(a['value']),
time.gmtime(int(a['grantedOn'])),
Reviewer.parse(a['by']))
class PatchSet:
def __init__(self, revision, approvals):
self.revision = revision
self.approvals = approvals
@classmethod
def parse(cls, ps):
return cls(ps['revision'],
[Approval.parse(a) for a in ps.get('approvals', [])])
class Review:
def __init__(self, id, patchsets):
self.id = id
self.patchsets = patchsets
@classmethod
def parse(cls, r):
return cls(r['id'],
[PatchSet.parse(ps) for ps in r['patchSets']])
reviews = [Review.parse(json.loads(l)) for l in sys.stdin if not 'runTimeMilliseconds' in l]
def reviewers(review):
ret = {}
for ps in r.patchsets:
for a in ps.approvals:
if a.type == Approval.CodeReviewed and a.value:
ret.setdefault(a.by.username, (a.by, a.date))
return ret.values()
def interesting(review):
for ps in r.patchsets:
if ps.revision in commits:
return True
return False
for r in reviews:
if not interesting(r):
continue
for reviewer, date in reviewers(r):
if reviewer.email:
print time.strftime('%Y-%m-%d', date), reviewer.username, reviewer.email

119
gerritdm Executable file
View File

@ -0,0 +1,119 @@
#!/usr/bin/pypy
#-*- coding:utf-8 -*-
#
#
# This code is part of the LWN git data miner.
#
# Copyright 2007-11 Eklektix, Inc.
# Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
# Copyright 2011 Germán Póo-Caamaño <gpoo@gnome.org>
#
# This file may be distributed under the terms of the GNU General
# Public License, version 2.
import database, ConfigFile, reports
import getopt, datetime
import sys
Today = datetime.date.today()
#
# Control options.
#
MapUnknown = 0
DevReports = 1
DumpDB = 0
CFName = 'gitdm.config'
DirName = ''
#
# Options:
#
# -b dir Specify the base directory to fetch the configuration files
# -c cfile Specify a configuration file
# -d Output individual developer stats
# -h hfile HTML output to hfile
# -l count Maximum length for output lists
# -o file File for text output
# -p prefix Prefix for CSV output
# -s Ignore author SOB lines
# -u Map unknown employers to '(Unknown)'
# -z Dump out the hacker database at completion
def ParseOpts ():
global MapUnknown, DevReports
global DumpDB
global CFName, DirName, Aggregate
opts, rest = getopt.getopt (sys.argv[1:], 'b:dc:h:l:o:uz')
for opt in opts:
if opt[0] == '-b':
DirName = opt[1]
elif opt[0] == '-c':
CFName = opt[1]
elif opt[0] == '-d':
DevReports = 0
elif opt[0] == '-h':
reports.SetHTMLOutput (open (opt[1], 'w'))
elif opt[0] == '-l':
reports.SetMaxList (int (opt[1]))
elif opt[0] == '-o':
reports.SetOutput (open (opt[1], 'w'))
elif opt[0] == '-u':
MapUnknown = 1
elif opt[0] == '-z':
DumpDB = 1
def LookupStoreHacker (date, name, email):
email = database.RemapEmail (email)
h = database.LookupEmail (email)
if h: # already there
return date, h
elist = database.LookupEmployer (email, MapUnknown)
h = database.LookupName (name)
if h: # new email
h.addemail (email, elist)
return date, h
return date, database.StoreHacker(name, elist, email)
#
# Here starts the real program.
#
ParseOpts ()
#
# Read the config files.
#
ConfigFile.ConfigFile (CFName, DirName)
reviews = [LookupStoreHacker(*l.split()[:3]) for l in sys.stdin]
for date, reviewer in reviews:
reviewer.addreview(reviewer)
empl = reviewer.emailemployer(reviewer.email[0], ConfigFile.ParseDate(date))
empl.AddReview(reviewer)
if DumpDB:
database.DumpDB ()
database.MixVirtuals ()
#
# Say something
#
hlist = database.AllHackers ()
elist = database.AllEmployers ()
ndev = nempl = 0
for h in hlist:
if len(h.reviews) > 0:
ndev += 1
for e in elist:
if len(e.reviews) > 0:
nempl += 1
reports.Write ('Processed %d review from %d developers\n' % (len(reviews), ndev))
reports.Write ('%d employers found\n' % (nempl))
if DevReports:
reports.DevReviews (hlist, len(reviews))
reports.EmplReviews (elist, len(reviews))

View File

@ -65,3 +65,46 @@ Launchpad API docs are here:
https://launchpad.net/+apidoc/1.0.html
https://help.launchpad.net/API/launchpadlib
== Gerrit ==
First, generate a list of Change-Ids:
$> grep -v '^#' openstack-config/essex | \
while read project revisions; do \
(cd ~/git/openstack/$project; \
git fetch origin 2>/dev/null; \
git log $revisions); \
done | \
awk '/^ Change-Id: / { print $2 }' | \
split -l 100 -d - essex-change-ids-
The output is split across files of 100 lines each because gerrit's
query will only return 500 results at a time.
Now, we generate a raw json query result:
$> for f in essex-change-ids-??; do
ssh -p 29418 review.openstack.org \
gerrit query --all-approvals --format=json \
$(awk -v ORS=" OR " '{print}' $f | sed 's/ OR $//') ; \
done > essex-reviews.txt
Next, generate a list of commits:
$> grep -v '^#' openstack-config/essex | \
while read project revisions; do \
(cd ~/git/openstack/$project; \
git fetch origin 2>/dev/null; \
git log --pretty=format:%H $revisions); \
done > essex-commits.txt
Now parse the json into a list of reviewers:
$> python gerrit/parse-reviews.py \
essex-commits.txt openstack-config/launchpad-ids.txt \
< essex-reviews.txt > essex-reviewers.txt
Finally, generate the stats with:
$> python ./gerritdm -l 20 < essex-reviewers.txt

View File

@ -232,6 +232,25 @@ def ReportByRevs (hlist):
break
EndReport ()
def CompareRevsEmpl (e1, e2):
return len (e2.reviews) - len (e1.reviews)
def ReportByRevsEmpl (elist):
elist.sort (CompareRevsEmpl)
totalrevs = 0
for e in elist:
totalrevs += len (e.reviews)
count = 0
BeginReport ('Top reviewers by employer (total %d)' % totalrevs)
for e in elist:
scount = len (e.reviews)
if scount > 0:
ReportLine (e.name, scount, (scount*100.0)/totalrevs)
count += 1
if count >= ListCount:
break
EndReport ()
#
# tester reporting.
#
@ -377,6 +396,12 @@ def DevBugReports (hlist, totalbugs):
def EmplBugReports (elist, totalbugs):
ReportByBCEmpl (elist, totalbugs)
def DevReviews (hlist, totalreviews):
ReportByRevs (hlist)
def EmplReviews (elist, totalreviews):
ReportByRevsEmpl (elist)
def ReportByFileType (hacker_list):
total = {}
total_by_hacker = {}