gitdm patch ...

Hi guys,

	I knocked up a patch to generate some per-month, by-affiliation
statistics from the gitdm output; attached for interest or merging.

	A sample of the output, complete with OO.o data-pilot, and pretty chart
is here:

http://www.gnome.org/~michael/data/2008-09-29-linux-stats.ods

	with chart here:
	http://www.gnome.org/~michael/images/2008-09-29-kernel-active.png

	caption being:

	"Graph showing number and affiliation of active kernel developers
(contributing more than 100 lines per month). Quick affiliation key,
from bottom up: Unknown, No-Affiliation, IBM, RedHat, Novell, Intel ..."

	These are as yet not published, I plan to use them as a comparison to
OO.o's somewhat mediocre equivalents; hope to go live with them soon
(and fix the horrible bugs in stacked area charts to make them actually
pretty ).

	HTH,

		Michael.

--
 michael.meeks@novell.com  <><, Pseudo Engineer, itinerant idiot

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
This commit is contained in:
Michael Meeks 2008-09-29 17:46:37 +01:00 committed by Jonathan Corbet
parent 558dbe1cbe
commit d1a8929872
2 changed files with 51 additions and 4 deletions

38
csv.py Normal file
View File

@ -0,0 +1,38 @@
#
# aggregate per-month statistics for people
#
import sys, datetime
class CSVStat:
def __init__ (self, name, employer, date):
self.name = name
self.employer = employer
self.added = self.removed = 0
self.date = date
def accumulate (self, p):
self.added = self.added + p.added
self.removed = self.removed + p.removed
PeriodCommitHash = { }
def AccumulatePatch (p):
date = "%.2d-%.2d-01"%(p.date.year, p.date.month)
authdatekey = "%s-%s"%(p.author.name, date)
if authdatekey not in PeriodCommitHash:
empl = p.author.emailemployer (p.email, p.date)
stat = CSVStat (p.author.name, empl, date)
PeriodCommitHash[authdatekey] = stat
else:
stat = PeriodCommitHash[authdatekey]
stat.accumulate (p)
def OutputCSV (file):
if file is None:
return
file.write ("Name\tAffliation\tDate\tAdded\tRemoved\n")
for date, stat in PeriodCommitHash.items():
# sanitise names " is common and \" sometimes too
empl_name = stat.employer.name.replace ("\"", ".").replace ("\\", ".")
author_name = stat.name.replace ("\"", ".").replace ("\\", ".")
file.write ("\"%s\"\t\"%s\"\t%s\t%d\t%d\n"%(author_name, empl_name, stat.date, \
stat.added, stat.removed))

17
gitdm
View File

@ -11,7 +11,7 @@
# Public License, version 2.
import database, ConfigFile
import database, csv, ConfigFile
import getopt, datetime
import os, re, sys, rfc822, string
from patterns import *
@ -30,6 +30,7 @@ DevReports = 1
DateStats = 0
AuthorSOBs = 1
FileFilter = None
CSVFile = None
AkpmOverLt = 0
DumpDB = 0
CFName = 'gitdm.config'
@ -46,14 +47,15 @@ CFName = 'gitdm.config'
# -r pattern Restrict to files matching pattern
# -s Ignore author SOB lines
# -u Map unknown employers to '(Unknown)'
# -x file.csv Export raw statistics as CSV
# -z Dump out the hacker database at completion
def ParseOpts ():
global Outfile, ListCount, MapUnknown, HTMLfile, DevReports
global DateStats, AuthorSOBs, FileFilter, AkpmOverLt, DumpDB
global CFName
global CFName, CSVFile
opts, rest = getopt.getopt (sys.argv[1:], 'adc:Dh:l:o:r:suz')
opts, rest = getopt.getopt (sys.argv[1:], 'adc:Dh:l:o:r:sux:z')
for opt in opts:
if opt[0] == '-a':
AkpmOverLt = 1
@ -76,6 +78,9 @@ def ParseOpts ():
AuthorSOBs = 0
elif opt[0] == '-u':
MapUnknown = 1
elif opt[0] == '-x':
CSVFile = open (opt[1], 'w')
print "open output file " + opt[1] + "\n"
elif opt[0] == '-z':
DumpDB = 1
@ -260,7 +265,6 @@ THead = '''<p>
<tr><th colspan=3>%s</th></tr>
'''
def BeginReport (title):
global HTMLclass
@ -459,6 +463,7 @@ while (1):
for sobemail, sob in p.sobs:
sob.addsob (p)
CSCount += 1
csv.AccumulatePatch (p)
print >> sys.stderr, 'Grabbing changesets...done'
if DumpDB:
@ -479,6 +484,10 @@ if DateStats:
PrintDateStats ()
sys.exit(0)
csv.OutputCSV (CSVFile)
if CSVFile is not None:
CSVFile.close ()
if DevReports:
ReportByPCount (hlist)
ReportByLChanged (hlist)