Add database pruning

By default, remove changes (and associated git refs) that have been
closed for more than 2 months.

Change-Id: I5d1176b13a7c0fdaf12e346537fd7e1251868461
This commit is contained in:
James E. Blair 2015-04-20 12:27:26 -04:00
parent c0cabc82f6
commit 3c9124969b
6 changed files with 142 additions and 14 deletions

View File

@ -159,6 +159,15 @@ commentlinks:
# them in UTC instead, uncomment the following line:
# display-times-in-utc: true
# Closed changes that are older than two months are removed from the
# local database (and their refs are removed from the local git repos
# so that git may garbage collect them). If you would like to change
# the expiration delay or disable it, uncomment the following line.
# The time interval is specified in the same way as the "age:" term in
# Gerrit's search syntax. To disable it altogether, set the value to
# the empty string.
# expire_age: '2 months'
# Uncomment the following lines to Hide comments by default that match
# certain criteria. You can toggle their display with 't'. Currently
# the only supported criterion is "author".

View File

@ -117,6 +117,7 @@ class ConfigSchema(object):
'thread-changes': bool,
'display-times-in-utc': bool,
'change-list-options': self.change_list_options,
'expire-age': str,
})
return schema
@ -227,6 +228,8 @@ class Config(object):
'sort-by': change_list_options.get('sort-by', 'number'),
'reverse': change_list_options.get('reverse', False)}
self.expire_age = self.config.get('expire-age', '2 months')
def getServer(self, name=None):
for server in self.config['servers']:
if name is None or name == server['name']:

View File

@ -507,9 +507,11 @@ class File(object):
mapper(Account, account_table)
mapper(Project, project_table, properties=dict(
branches=relationship(Branch, backref='project',
order_by=branch_table.c.name),
order_by=branch_table.c.name,
cascade='all, delete-orphan'),
changes=relationship(Change, backref='project',
order_by=change_table.c.number),
order_by=change_table.c.number,
cascade='all, delete-orphan'),
unreviewed_changes=relationship(Change,
primaryjoin=and_(project_table.c.key==change_table.c.project_key,
change_table.c.hidden==False,
@ -529,17 +531,23 @@ mapper(Branch, branch_table)
mapper(Change, change_table, properties=dict(
owner=relationship(Account),
revisions=relationship(Revision, backref='change',
order_by=revision_table.c.number),
order_by=revision_table.c.number,
cascade='all, delete-orphan'),
messages=relationship(Message,
secondary=revision_table,
order_by=message_table.c.created),
labels=relationship(Label, backref='change', order_by=(label_table.c.category,
label_table.c.value)),
order_by=message_table.c.created,
viewonly=True),
labels=relationship(Label, backref='change',
order_by=(label_table.c.category, label_table.c.value),
cascade='all, delete-orphan'),
permitted_labels=relationship(PermittedLabel, backref='change',
order_by=(permitted_label_table.c.category,
permitted_label_table.c.value)),
approvals=relationship(Approval, backref='change', order_by=(approval_table.c.category,
approval_table.c.value)),
permitted_label_table.c.value),
cascade='all, delete-orphan'),
approvals=relationship(Approval, backref='change',
order_by=(approval_table.c.category,
approval_table.c.value),
cascade='all, delete-orphan'),
draft_approvals=relationship(Approval,
primaryjoin=and_(change_table.c.key==approval_table.c.change_key,
approval_table.c.draft==True),
@ -547,16 +555,20 @@ mapper(Change, change_table, properties=dict(
approval_table.c.value))
))
mapper(Revision, revision_table, properties=dict(
messages=relationship(Message, backref='revision'),
files=relationship(File, backref='revision'),
pending_cherry_picks=relationship(PendingCherryPick, backref='revision'),
messages=relationship(Message, backref='revision',
cascade='all, delete-orphan'),
files=relationship(File, backref='revision',
cascade='all, delete-orphan'),
pending_cherry_picks=relationship(PendingCherryPick, backref='revision',
cascade='all, delete-orphan'),
))
mapper(Message, message_table, properties=dict(
author=relationship(Account)))
mapper(File, file_table, properties=dict(
comments=relationship(Comment, backref='file',
order_by=(comment_table.c.line,
comment_table.c.created)),
comment_table.c.created),
cascade='all, delete-orphan'),
draft_comments=relationship(Comment,
primaryjoin=and_(file_table.c.key==comment_table.c.file_key,
comment_table.c.draft==True),
@ -652,6 +664,9 @@ class DatabaseSession(object):
def delete(self, obj):
self.session().delete(obj)
def vacuum(self):
self.session().execute("VACUUM")
def getProjects(self, subscribed=False, unreviewed=False):
"""Retrieve projects.

View File

@ -267,6 +267,10 @@ class Repo(object):
except AssertionError:
repo.git.fetch(url, refspec)
def deleteRef(self, ref):
repo = git.Repo(self.path)
git.Reference.delete(repo, ref)
def checkout(self, ref):
repo = git.Repo(self.path)
try:

View File

@ -67,7 +67,7 @@ class SearchCompiler(object):
if __name__ == '__main__':
class Dummy(object):
pass
query = 'status:open limit:50'
query = 'status:open limit:50 age:2months'
lexer = tokenizer.SearchTokenizer()
lexer.input(query)
while True:

View File

@ -14,6 +14,7 @@
# under the License.
import collections
import errno
import logging
import math
import os
@ -1158,6 +1159,89 @@ class UploadReviewTask(Task):
sync.post('changes/%s/submit' % (change_id,), {})
sync.submitTask(SyncChangeTask(change_id, priority=self.priority))
class PruneDatabaseTask(Task):
def __init__(self, age, priority=NORMAL_PRIORITY):
super(PruneDatabaseTask, self).__init__(priority)
self.age = age
def __repr__(self):
return '<PruneDatabaseTask %s>' % (self.age,)
def __eq__(self, other):
if (other.__class__ == self.__class__ and
other.age == self.age):
return True
return False
def run(self, sync):
if not self.age:
return
app = sync.app
with app.db.getSession() as session:
for change in session.getChanges('status:closed age:%s' % self.age):
t = PruneChangeTask(change.key, priority=self.priority)
self.tasks.append(t)
sync.submitTask(t)
t = VacuumDatabaseTask(priority=self.priority)
self.tasks.append(t)
sync.submitTask(t)
class PruneChangeTask(Task):
def __init__(self, key, priority=NORMAL_PRIORITY):
super(PruneChangeTask, self).__init__(priority)
self.key = key
def __repr__(self):
return '<PruneChangeTask %s>' % (self.key,)
def __eq__(self, other):
if (other.__class__ == self.__class__ and
other.key == self.key):
return True
return False
def run(self, sync):
app = sync.app
with app.db.getSession() as session:
change = session.getChange(self.key)
if not change:
return
repo = app.getRepo(change.project.name)
self.log.info("Pruning %s change %s status:%s updated:%s" % (
change.project.name, change.number, change.status, change.updated))
change_ref = None
for revision in change.revisions:
if change_ref is None:
change_ref = '/'.join(revision.fetch_ref.split('/')[:-1])
self.log.info("Deleting %s ref %s" % (
change.project.name, revision.fetch_ref))
repo.deleteRef(revision.fetch_ref)
self.log.info("Deleting %s ref %s" % (
change.project.name, change_ref))
try:
repo.deleteRef(change_ref)
except OSError, e:
if e.errno != errno.EISDIR:
raise
session.delete(change)
class VacuumDatabaseTask(Task):
def __init__(self, priority=NORMAL_PRIORITY):
super(VacuumDatabaseTask, self).__init__(priority)
def __repr__(self):
return '<VacuumDatabaseTask>'
def __eq__(self, other):
if other.__class__ == self.__class__:
return True
return False
def run(self, sync):
app = sync.app
with app.db.getSession() as session:
session.vacuum()
class Sync(object):
def __init__(self, app):
self.user_agent = 'Gertty/%s %s' % (gertty.version.version_info.version_string(),
@ -1181,15 +1265,21 @@ class Sync(object):
self.submitTask(SyncProjectListTask(HIGH_PRIORITY))
self.submitTask(SyncSubscribedProjectsTask(NORMAL_PRIORITY))
self.submitTask(SyncSubscribedProjectBranchesTask(LOW_PRIORITY))
self.submitTask(PruneDatabaseTask(self.app.config.expire_age, LOW_PRIORITY))
self.periodic_thread = threading.Thread(target=self.periodicSync)
self.periodic_thread.daemon = True
self.periodic_thread.start()
def periodicSync(self):
hourly = time.time()
while True:
try:
time.sleep(60)
self.syncSubscribedProjects()
now = time.time()
if now-hourly > 3600:
hourly = now
self.pruneDatabase()
except Exception:
self.log.exception('Exception in periodicSync')
@ -1302,6 +1392,13 @@ class Sync(object):
for subtask in task.tasks:
subtask.wait()
def pruneDatabase(self):
task = PruneDatabaseTask(self.app.config.expire_age, LOW_PRIORITY)
self.submitTask(task)
if task.wait():
for subtask in task.tasks:
subtask.wait()
def _syncChangeByCommit(self, commit, priority):
# Accumulate sync change by commit tasks because they often
# come in batches. This method assumes it is being called