radar/mirror_fetchers.py

137 lines
4.7 KiB
Python
Executable File

#!/usr/bin/python
import datetime
import json
import os
import sys
import urllib
REMOTES = ['gerrit-stream-logger-dfw.stillhq.com',
'gerrit-stream-logger-ord.stillhq.com',
'gerrit-stream-logger-syd.stillhq.com']
one_day = datetime.timedelta(days=1)
day = datetime.datetime.now()
day -= datetime.timedelta(days=3)
merged_filename = None
merged_data = {}
merged_data_with_order = []
changed_merge_files = {}
while day < datetime.datetime.now():
for target in REMOTES:
url = 'http://%s/output/%d/%d/%d' %(target, day.year, day.month,
day.day)
single_path = os.path.join(target, str(day.year), str(day.month))
single = os.path.join(single_path, str(day.day))
merged_path = os.path.join('merged', str(day.year), str(day.month))
merged = os.path.join(merged_path, str(day.day))
if not os.path.exists(single_path):
os.makedirs(single_path)
if not os.path.exists(merged_path):
os.makedirs(merged_path)
print '%s Fetching %s' % (datetime.datetime.now(), url)
remote = urllib.urlopen(url)
if remote.getcode() != 404:
remote_size = remote.info().getheaders('Content-Length')[0]
remote_size = int(remote_size)
if not os.path.exists(single):
local_size = 0
else:
local_size = os.stat(single).st_size
print ('%s Local size %s, remote size %s'
%(datetime.datetime.now(), local_size, remote_size))
if remote_size > local_size:
with open(single, 'w') as f:
f.write(remote.read())
print '%s ... fetched' % datetime.datetime.now()
if merged_filename != merged:
merged_data = {}
merged_data_with_order = []
print '%s ... loading merge file' % datetime.datetime.now()
if os.path.exists(merged):
with open(merged, 'r') as f:
for line in f.readlines():
line = line.rstrip()
merged_data[line] = True
merged_data_with_order.append(line)
merged_filename = merged
new_entries = 0
with open(single, 'r') as f:
for entry in f.readlines():
entry = entry.rstrip()
if not entry in merged_data:
merged_data[entry] = True
merged_data_with_order.append(entry)
new_entries += 1
with open(merged, 'w') as f:
f.write('\n'.join(merged_data_with_order))
changed_merge_files[merged] = True
print ('%s ... merged (%d new entries)'
% (datetime.datetime.now(), new_entries))
remote.close()
day += one_day
#for dirpath, subdirs, files in os.walk('merged'):
# for filename in files:
# if filename.endswith('.json'):
# continue
# changed_merge_files[os.path.join(dirpath, filename)] = True
print 'Processing changed merge files'
for filename in changed_merge_files:
print '... %s' % filename
with open(filename, 'r') as f:
reviews = {}
for line in f.readlines():
line = line.rstrip()
try:
j = json.loads(line)
except:
continue
try:
if not 'change' in j:
continue
if j['type'] == 'comment-added':
author = j['author'].get('username', None)
if not author:
author = j['author'].get('email', None)
if not author:
continue
number = j['change']['number']
patchset = j['patchSet']['number']
for approval in j.get('approvals', []):
reviews.setdefault(author, [])
reviews[author].append({'number': number,
'patchset': patchset,
'type': approval['type'],
'value': approval['value']})
except Exception, e:
print 'Error: %s\n' % e
print json.dumps(j, indent=4)
sys.exit(0)
with open('%s_reviews.json' % filename, 'w') as f:
f.write(json.dumps(reviews, indent=4))
print 'Done'