gitdm/database.py

219 lines
5.7 KiB
Python

#
# The "database".
#
# This code is part of the LWN git data miner.
#
# Copyright 2007-8 LWN.net
# Copyright 2007-8 Jonathan Corbet <corbet@lwn.net>
#
# This file may be distributed under the terms of the GNU General
# Public License, version 2.
#
import sys, datetime
class Hacker:
def __init__ (self, name, id, elist, email):
self.name = name
self.id = id
self.employer = [ elist ]
self.email = [ email ]
self.added = self.removed = 0
self.patches = [ ]
self.signoffs = [ ]
self.reviews = [ ]
self.tested = [ ]
self.reports = [ ]
self.testcred = self.repcred = 0
def addemail (self, email, elist):
self.email.append (email)
self.employer.append (elist)
HackersByEmail[email] = self
def emailemployer (self, email, date):
for i in range (0, len (self.email)):
if self.email[i] == email:
for edate, empl in self.employer[i]:
if edate > date:
return empl
print 'OOPS. ', self.name, self.employer, self.email, email, date
return None # Should not happen
def addpatch (self, patch):
self.added += patch.added
self.removed += patch.removed
self.patches.append (patch)
#
# There's got to be a better way.
#
def addsob (self, patch):
self.signoffs.append (patch)
def addreview (self, patch):
self.reviews.append (patch)
def addtested (self, patch):
self.tested.append (patch)
def addreport (self, patch):
self.reports.append (patch)
def reportcredit (self, patch):
self.repcred += 1
def testcredit (self, patch):
self.testcred += 1
HackersByName = { }
HackersByEmail = { }
HackersByID = { }
MaxID = 0
def StoreHacker (name, elist, email):
global MaxID
id = MaxID
MaxID += 1
h = Hacker (name, id, elist, email)
HackersByName[name] = h
HackersByEmail[email] = h
HackersByID[id] = h
return h
def LookupEmail (addr):
try:
return HackersByEmail[addr]
except KeyError:
return None
def LookupName (name):
try:
return HackersByName[name]
except KeyError:
return None
def LookupID (id):
try:
return HackersByID[id]
except KeyError:
return None
def AllHackers ():
return HackersByID.values ()
# return [h for h in HackersByID.values ()] # if (h.added + h.removed) > 0]
def DumpDB ():
out = open ('database.dump', 'w')
names = HackersByName.keys ()
names.sort ()
for name in names:
h = HackersByName[name]
out.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h.id, h.name,
len (h.patches),
h.added, h.removed,
len (h.signoffs)))
for i in range (0, len (h.email)):
out.write ('\t%s -> \n' % (h.email[i]))
for date, empl in h.employer[i]:
out.write ('\t\t %d-%d-%d %s\n' % (date.year, date.month, date.day,
empl.name))
#
# Employer info.
#
class Employer:
def __init__ (self, name):
self.name = name
self.added = self.removed = self.count = self.changed = 0
self.sobs = 0
self.hackers = [ ]
def AddCSet (self, patch):
self.added += patch.added
self.removed += patch.removed
self.changed += max(patch.added, patch.removed)
self.count += 1
if patch.author not in self.hackers:
self.hackers.append (patch.author)
def AddSOB (self):
self.sobs += 1
Employers = { }
def GetEmployer (name):
try:
return Employers[name]
except KeyError:
e = Employer (name)
Employers[name] = e
return e
def AllEmployers ():
return Employers.values ()
#
# The email map.
#
EmailAliases = { }
def AddEmailAlias (variant, canonical):
if EmailAliases.has_key (variant):
sys.stderr.write ('Duplicate email alias for %s\n' % (variant))
EmailAliases[variant] = canonical
def RemapEmail (email):
email = email.lower ()
try:
return EmailAliases[email]
except KeyError:
return email
#
# Email-to-employer mapping.
#
EmailToEmployer = { }
nextyear = datetime.date.today () + datetime.timedelta (days = 365)
def AddEmailEmployerMapping (email, employer, end = nextyear):
if end is None:
end = nextyear
email = email.lower ()
empl = GetEmployer (employer)
try:
l = EmailToEmployer[email]
for i in range (0, len(l)):
date, xempl = l[i]
if date == end: # probably both nextyear
print 'WARNING: duplicate email/empl for %s' % (email)
if date > end:
l.insert (i, (end, empl))
return
l.append ((end, empl))
except KeyError:
EmailToEmployer[email] = [(end, empl)]
def MapToEmployer (email, unknown = 0):
email = email.lower ()
try:
return EmailToEmployer[email]
except KeyError:
pass
namedom = email.split ('@')
if len (namedom) < 2:
print 'Oops...funky email %s' % email
return [(nextyear, GetEmployer ('Funky'))]
s = namedom[1].split ('.')
for dots in range (len (s) - 2, -1, -1):
addr = '.'.join (s[dots:])
try:
return EmailToEmployer[addr]
except KeyError:
pass
if unknown:
return [(nextyear, GetEmployer ('(Unknown)'))]
return [(nextyear, GetEmployer (email))]
def LookupEmployer (email, mapunknown = 0):
elist = MapToEmployer (email, mapunknown)
return elist # GetEmployer (ename)