Usage info in JSON format

Before this commit, Slogging only outputs usage info in CSV file.
To enhance usability, the output format of choice is increased.
This commit is contained in:
Daisuke Morita 2014-09-02 11:51:45 +09:00 committed by John Dickinson
parent f5234eed45
commit d221703677
3 changed files with 109 additions and 25 deletions

View File

@ -9,6 +9,7 @@ swift_account = AUTH_test
# lookback_hours = 120
# lookback_window = 120
# user = swift
# format_type = csv
[log-processor-access]
# log_dir = /var/log/swift/

27
slogging/common.py Normal file
View File

@ -0,0 +1,27 @@
# Copyright(c)2014 NTT corp.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
def get_format_type(conf, logger, key, default):
"""
Get and check format_type value.
"""
format_type = conf.get(key, default).lower()
if format_type not in ('json', 'csv'):
logger.warning(
_("Invalid Parameter %s: %s, " % (key, format_type) +
"use default %s.") % default)
format_type = default
return format_type

View File

@ -24,12 +24,15 @@ import multiprocessing
import Queue
import cPickle
import hashlib
import json
import io
from slogging.internal_proxy import InternalProxy
from swift.common.utils import get_logger, readconf
from swift.common.daemon import Daemon
from slogging.log_common import LogProcessorCommon, multiprocess_collate, \
BadFileDownload
from slogging import common
now = datetime.datetime.now
@ -53,6 +56,7 @@ class LogProcessor(LogProcessorCommon):
module = __import__(import_target, fromlist=[import_target])
klass = getattr(module, class_name)
self.plugins[plugin_name]['instance'] = klass(plugin_conf)
self.plugins[plugin_name]['keylist_mapping'] = {}
self.logger.debug(_('Loaded plugin "%s"') % plugin_name)
def process_one_file(self, plugin_name, account, container, object_name):
@ -90,7 +94,8 @@ class LogProcessor(LogProcessorCommon):
def generate_keylist_mapping(self):
keylist = {}
for plugin in self.plugins:
plugin_keylist = self.plugins[plugin]['instance'].keylist_mapping()
plugin_keylist = self.plugins[plugin]['keylist_mapping'] = \
self.plugins[plugin]['instance'].keylist_mapping()
if not plugin_keylist:
continue
for k, v in plugin_keylist.items():
@ -134,6 +139,8 @@ class LogProcessorDaemon(Daemon):
self.worker_count = int(c.get('worker_count', '1'))
self._keylist_mapping = None
self.processed_files_filename = 'processed_files.pickle.gz'
self.format_type = common.get_format_type(c, self.logger,
'format_type', 'csv')
def get_lookback_interval(self):
"""
@ -291,41 +298,90 @@ class LogProcessorDaemon(Daemon):
def get_output(self, final_info):
"""
:returns: a list of rows to appear in the csv file.
:returns: a list of rows to appear in the csv file or
a dictionary to appear in the json file.
The first row contains the column headers for the rest of the
rows in the returned list.
csv file:
The first row contains the column headers for the rest
of the rows in the returned list.
Each row after the first row corresponds to an account's data
for that hour.
Each row after the first row corresponds to an account's
data.
json file:
First level just shows a label "stats_data".
Second level of stats_data lists account names.
Each account block starts with a time label, and it
contains stats of account usage.
"""
sorted_keylist_mapping = sorted(self.keylist_mapping)
columns = ['data_ts', 'account'] + sorted_keylist_mapping
output = [columns]
for (account, year, month, day, hour), d in final_info.items():
data_ts = '%04d/%02d/%02d %02d:00:00' % \
(int(year), int(month), int(day), int(hour))
row = [data_ts, '%s' % (account)]
for k in sorted_keylist_mapping:
row.append(str(d[k]))
output.append(row)
if self.format_type == 'json':
all_account_stats = collections.defaultdict(dict)
for (account, year, month, day, hour), d in final_info.items():
data_ts = datetime.datetime(int(year), int(month),
int(day), int(hour))
time_stamp = data_ts.strftime('%Y/%m/%d %H:00:00')
hourly_account_stats = \
self.restructure_stats_dictionary(d)
all_account_stats[account].update({time_stamp:
hourly_account_stats})
output = {'stats_data': all_account_stats}
else: # csv
sorted_keylist_mapping = sorted(self.keylist_mapping)
columns = ['data_ts', 'account'] + sorted_keylist_mapping
output = [columns]
for (account, year, month, day, hour), d in final_info.items():
data_ts = '%04d/%02d/%02d %02d:00:00' % \
(int(year), int(month), int(day), int(hour))
row = [data_ts, '%s' % (account)]
for k in sorted_keylist_mapping:
row.append(str(d[k]))
output.append(row)
return output
def restructure_stats_dictionary(self, target_dict):
"""
Restructure stats dictionary for json format.
:param target_dict: dictionary of restructuring target
:returns: restructured stats dictionary
"""
account_stats = {}
access_stats = {}
account_stats_key_list = \
self.log_processor.plugins['stats']['keylist_mapping']
access_stats_key_list = \
self.log_processor.plugins['access']['keylist_mapping']
hourly_stats = {'account_stats': account_stats,
'access_stats': access_stats}
for k, v in target_dict.items():
if k in account_stats_key_list:
account_stats[k] = int(v)
elif k in access_stats_key_list:
access_stats[k] = int(v)
return hourly_stats
def store_output(self, output):
"""
Takes the a list of rows and stores a csv file of the values in the
stats account.
Takes the dictionary or the list of rows and stores a json/csv file of
the values in the stats account.
:param output: list of rows to appear in the csv file
:param output: a dictonary or a list of row
This csv file is final product of this script.
This json or csv file is final product of this script.
"""
out_buf = '\n'.join([','.join(row) for row in output])
h = hashlib.md5(out_buf).hexdigest()
upload_name = time.strftime('%Y/%m/%d/%H/') + '%s.csv.gz' % h
f = cStringIO.StringIO(out_buf)
if self.format_type == 'json':
out_buf = json.dumps(output, indent=2)
h = hashlib.md5(out_buf).hexdigest()
upload_name = time.strftime('%Y/%m/%d/%H/') + '%s.json.gz' % h
f = io.BytesIO(out_buf)
else:
out_buf = '\n'.join([','.join(row) for row in output])
h = hashlib.md5(out_buf).hexdigest()
upload_name = time.strftime('%Y/%m/%d/%H/') + '%s.csv.gz' % h
f = cStringIO.StringIO(out_buf)
self.log_processor.internal_proxy.upload_file(f,
self.log_processor_account,
self.log_processor_container,
@ -384,7 +440,7 @@ class LogProcessorDaemon(Daemon):
"""
Process log files that fall within the lookback interval.
Upload resulting csv file to stats account.
Upload resulting csv or json file to stats account.
Update processed files list and upload to stats account.
"""