378 lines
10 KiB
Python
Executable File
378 lines
10 KiB
Python
Executable File
#!/usr/bin/python
|
|
#
|
|
|
|
#
|
|
# This code is part of the LWN git data miner.
|
|
#
|
|
# Copyright 2007-8 LWN.net
|
|
# Copyright 2007-8 Jonathan Corbet <corbet@lwn.net>
|
|
#
|
|
# This file may be distributed under the terms of the GNU General
|
|
# Public License, version 2.
|
|
|
|
|
|
import database, csv, ConfigFile, reports
|
|
import getopt, datetime
|
|
import os, re, sys, rfc822, string
|
|
from patterns import *
|
|
|
|
Today = datetime.date.today()
|
|
#
|
|
# Control options.
|
|
#
|
|
MapUnknown = 0
|
|
DevReports = 1
|
|
DateStats = 0
|
|
AuthorSOBs = 1
|
|
FileFilter = None
|
|
CSVFile = None
|
|
AkpmOverLt = 0
|
|
DumpDB = 0
|
|
CFName = 'gitdm.config'
|
|
#
|
|
# Options:
|
|
#
|
|
# -a Andrew Morton's signoffs shadow Linus's
|
|
# -c cfile Specify a configuration file
|
|
# -d Output individual developer stats
|
|
# -D Output date statistics
|
|
# -h hfile HTML output to hfile
|
|
# -l count Maximum length for output lists
|
|
# -o file File for text output
|
|
# -r pattern Restrict to files matching pattern
|
|
# -s Ignore author SOB lines
|
|
# -u Map unknown employers to '(Unknown)'
|
|
# -x file.csv Export raw statistics as CSV
|
|
# -z Dump out the hacker database at completion
|
|
|
|
def ParseOpts ():
|
|
global MapUnknown, DevReports
|
|
global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
|
|
global CFName, CSVFile
|
|
|
|
opts, rest = getopt.getopt (sys.argv[1:], 'adc:Dh:l:o:r:sux:z')
|
|
for opt in opts:
|
|
if opt[0] == '-a':
|
|
AkpmOverLt = 1
|
|
elif opt[0] == '-c':
|
|
CFName = opt[1]
|
|
elif opt[0] == '-d':
|
|
DevReports = 0
|
|
elif opt[0] == '-D':
|
|
DateStats = 1
|
|
elif opt[0] == '-h':
|
|
reports.SetHTMLOutput (open (opt[1], 'w'))
|
|
elif opt[0] == '-l':
|
|
reports.SetMaxList (int (opt[1]))
|
|
elif opt[0] == '-o':
|
|
reports.SetOutput (open (opt[1], 'w'))
|
|
elif opt[0] == '-r':
|
|
print 'Filter on "%s"' % (opt[1])
|
|
FileFilter = re.compile (opt[1])
|
|
elif opt[0] == '-s':
|
|
AuthorSOBs = 0
|
|
elif opt[0] == '-u':
|
|
MapUnknown = 1
|
|
elif opt[0] == '-x':
|
|
CSVFile = open (opt[1], 'w')
|
|
print "open output file " + opt[1] + "\n"
|
|
elif opt[0] == '-z':
|
|
DumpDB = 1
|
|
|
|
|
|
|
|
def LookupStoreHacker (name, email):
|
|
email = database.RemapEmail (email)
|
|
h = database.LookupEmail (email)
|
|
if h: # already there
|
|
return h
|
|
elist = database.LookupEmployer (email, MapUnknown)
|
|
h = database.LookupName (name)
|
|
if h: # new email
|
|
h.addemail (email, elist)
|
|
return h
|
|
return database.StoreHacker(name, elist, email)
|
|
|
|
#
|
|
# Date tracking.
|
|
#
|
|
|
|
DateMap = { }
|
|
|
|
def AddDateLines(date, lines):
|
|
if lines > 1000000:
|
|
print 'Skip big patch (%d)' % lines
|
|
return
|
|
try:
|
|
DateMap[date] += lines
|
|
except KeyError:
|
|
DateMap[date] = lines
|
|
|
|
def PrintDateStats():
|
|
dates = DateMap.keys ()
|
|
dates.sort ()
|
|
total = 0
|
|
datef = open ('datelc', 'w')
|
|
for date in dates:
|
|
total += DateMap[date]
|
|
datef.write ('%d/%02d/%02d %6d %7d\n' % (date.year, date.month, date.day,
|
|
DateMap[date], total))
|
|
|
|
|
|
#
|
|
# Let's slowly try to move some smarts into this class.
|
|
#
|
|
class patch:
|
|
def __init__ (self, commit):
|
|
self.commit = commit
|
|
self.merge = self.added = self.removed = 0
|
|
self.author = LookupStoreHacker('Unknown hacker', 'unknown@hacker.net')
|
|
self.email = 'unknown@hacker.net'
|
|
self.sobs = [ ]
|
|
self.reviews = [ ]
|
|
self.testers = [ ]
|
|
self.reports = [ ]
|
|
|
|
def addreviewer (self, reviewer):
|
|
self.reviews.append (reviewer)
|
|
|
|
def addtester (self, tester):
|
|
self.testers.append (tester)
|
|
|
|
def addreporter (self, reporter):
|
|
self.reports.append (reporter)
|
|
#
|
|
# The core hack for grabbing the information about a changeset.
|
|
#
|
|
def grabpatch():
|
|
global NextLine, TotalAdded, TotalRemoved, TotalChanged
|
|
|
|
while (1):
|
|
m = Pcommit.match (NextLine)
|
|
if m:
|
|
break;
|
|
NextLine = sys.stdin.readline ()
|
|
if not NextLine:
|
|
return
|
|
|
|
p = patch(m.group (1))
|
|
NextLine = sys.stdin.readline ()
|
|
ignore = (FileFilter is not None)
|
|
while NextLine:
|
|
Line = NextLine
|
|
#
|
|
# If this line starts a new commit, drop out.
|
|
#
|
|
m = Pcommit.match (Line)
|
|
if m:
|
|
break
|
|
NextLine = sys.stdin.readline ()
|
|
#
|
|
# Maybe it's an author line?
|
|
#
|
|
m = Pauthor.match (Line)
|
|
if m:
|
|
p.email = database.RemapEmail (m.group (2))
|
|
p.author = LookupStoreHacker(m.group (1), p.email)
|
|
continue
|
|
#
|
|
# Could be a signed-off-by:
|
|
#
|
|
m = Psob.search (Line)
|
|
if m:
|
|
email = database.RemapEmail (m.group (2))
|
|
sobber = LookupStoreHacker(m.group (1), email)
|
|
if sobber != p.author or AuthorSOBs:
|
|
p.sobs.append ((email, LookupStoreHacker(m.group (1), m.group (2))))
|
|
continue
|
|
#
|
|
# Various other tags of interest.
|
|
#
|
|
m = Preview.search (Line) # Reviewed-by:
|
|
if m:
|
|
email = database.RemapEmail (m.group (2))
|
|
p.addreviewer (LookupStoreHacker(m.group (1), email))
|
|
continue
|
|
m = Ptest.search (Line) # Tested-by:
|
|
if m:
|
|
email = database.RemapEmail (m.group (2))
|
|
p.addtester (LookupStoreHacker (m.group (1), email))
|
|
p.author.testcredit (patch)
|
|
continue
|
|
m = Prep.search (Line) # Reported-by:
|
|
if m:
|
|
email = database.RemapEmail (m.group (2))
|
|
p.addreporter (LookupStoreHacker (m.group (1), email))
|
|
p.author.reportcredit (patch)
|
|
continue
|
|
m = Preptest.search (Line) # Reported-and-tested-by:
|
|
if m:
|
|
email = database.RemapEmail (m.group (2))
|
|
h = LookupStoreHacker (m.group (1), email)
|
|
p.addreporter (h)
|
|
p.addtester (h)
|
|
p.author.reportcredit (patch)
|
|
p.author.testcredit (patch)
|
|
continue
|
|
#
|
|
# If this one is a merge, make note of the fact.
|
|
#
|
|
m = Pmerge.match (Line)
|
|
if m:
|
|
p.merge = 1
|
|
continue
|
|
#
|
|
# See if it's the date.
|
|
#
|
|
m = Pdate.match (Line)
|
|
if m:
|
|
dt = rfc822.parsedate(m.group (2))
|
|
p.date = datetime.date (dt[0], dt[1], dt[2])
|
|
if p.date > Today:
|
|
sys.stderr.write ('Funky date: %s\n' % p.date)
|
|
p.date = Today
|
|
continue
|
|
#
|
|
# If we have a file filter, check for file lines.
|
|
#
|
|
if FileFilter:
|
|
ignore = ApplyFileFilter (Line, ignore)
|
|
#
|
|
# OK, maybe it's part of the diff itself.
|
|
#
|
|
if not ignore:
|
|
if Padd.match (Line):
|
|
p.added += 1
|
|
continue
|
|
if Prem.match (Line):
|
|
p.removed += 1
|
|
#
|
|
# Record some global information - but only if this patch had
|
|
# stuff which wasn't ignored. This work should be done
|
|
# elsewhere,
|
|
#
|
|
if ((p.added + p.removed) > 0 or not FileFilter) and not p.merge:
|
|
TotalAdded += p.added
|
|
TotalRemoved += p.removed
|
|
TotalChanged += max (p.added, p.removed)
|
|
AddDateLines (p.date, max (p.added, p.removed))
|
|
empl = p.author.emailemployer (p.email, p.date)
|
|
empl.AddCSet (p)
|
|
if AkpmOverLt:
|
|
TrimLTSOBs (p)
|
|
for sobemail, sobber in p.sobs:
|
|
empl = sobber.emailemployer (sobemail, p.date)
|
|
empl.AddSOB()
|
|
return p
|
|
|
|
|
|
def ApplyFileFilter (line, ignore):
|
|
#
|
|
# If this is the first file line (--- a/), set ignore one way
|
|
# or the other.
|
|
#
|
|
m = Pfilea.match (line)
|
|
if m:
|
|
file = m.group (1)
|
|
if FileFilter.search (file):
|
|
return 0
|
|
return 1
|
|
#
|
|
# For the second line, we can turn ignore off, but not on
|
|
#
|
|
m = Pfileb.match (line)
|
|
if m:
|
|
file = m.group (1)
|
|
if FileFilter.search (file):
|
|
return 0
|
|
return ignore
|
|
|
|
#
|
|
# If this patch is signed off by both Andrew Morton and Linus Torvalds,
|
|
# remove the (redundant) Linus signoff.
|
|
#
|
|
def TrimLTSOBs (p):
|
|
if Linus in p.sobs and Akpm in p.sobs:
|
|
p.sobs.remove (Linus)
|
|
|
|
|
|
#
|
|
# Here starts the real program.
|
|
#
|
|
ParseOpts ()
|
|
|
|
#
|
|
# Read the config files.
|
|
#
|
|
ConfigFile.ConfigFile (CFName)
|
|
|
|
#
|
|
# Let's pre-seed the database with a couple of hackers
|
|
# we want to remember.
|
|
#
|
|
Linus = ('torvalds@linux-foundation.org',
|
|
LookupStoreHacker ('Linus Torvalds', 'torvalds@linux-foundation.org'))
|
|
Akpm = ('akpm@linux-foundation.org',
|
|
LookupStoreHacker ('Andrew Morton', 'akpm@linux-foundation.org'))
|
|
|
|
NextLine = sys.stdin.readline ()
|
|
TotalChanged = TotalAdded = TotalRemoved = 0
|
|
|
|
#
|
|
# Snarf changesets.
|
|
#
|
|
print >> sys.stderr, 'Grabbing changesets...\r',
|
|
|
|
printcount = CSCount = 0
|
|
while (1):
|
|
if (printcount % 50) == 0:
|
|
print >> sys.stderr, 'Grabbing changesets...%d\r' % printcount,
|
|
printcount += 1
|
|
p = grabpatch()
|
|
if not p:
|
|
break
|
|
# if p.added > 100000 or p.removed > 100000:
|
|
# print 'Skipping massive add', p.commit
|
|
# continue
|
|
if FileFilter and p.added == 0 and p.removed == 0:
|
|
continue
|
|
if not p.merge:
|
|
p.author.addpatch (p)
|
|
for sobemail, sob in p.sobs:
|
|
sob.addsob (p)
|
|
for hacker in p.reviews:
|
|
hacker.addreview (p)
|
|
for hacker in p.testers:
|
|
hacker.addtested (p)
|
|
for hacker in p.reports:
|
|
hacker.addreport (p)
|
|
CSCount += 1
|
|
csv.AccumulatePatch (p)
|
|
print >> sys.stderr, 'Grabbing changesets...done'
|
|
|
|
if DumpDB:
|
|
database.DumpDB ()
|
|
#
|
|
# Say something
|
|
#
|
|
hlist = database.AllHackers ()
|
|
elist = database.AllEmployers ()
|
|
reports.Write ('Processed %d csets from %d developers\n' % (CSCount,
|
|
len (hlist)))
|
|
reports.Write ('%d employers found\n' % len (elist))
|
|
reports.Write ('A total of %d lines added, %d removed (delta %d)\n' %
|
|
(TotalAdded, TotalRemoved, TotalAdded - TotalRemoved))
|
|
if TotalChanged == 0:
|
|
TotalChanged = 1 # HACK to avoid div by zero
|
|
if DateStats:
|
|
PrintDateStats ()
|
|
sys.exit(0)
|
|
|
|
csv.OutputCSV (CSVFile)
|
|
if CSVFile is not None:
|
|
CSVFile.close ()
|
|
|
|
if DevReports:
|
|
reports.DevReports (hlist, TotalChanged, CSCount, TotalRemoved)
|
|
reports.EmplReports (elist, TotalChanged, CSCount)
|