Memory consumption reduced in fuel-stats analytics

We had huge memory consumption on fetching OSWLs due to fetching
clusters data in each OSWL row.

Joined clusters data removed from each OSWL row. Explicit
fetching of version info from clusters is introduced to CSV
exporter.

We are using SQLAlchemy yield_per for fetching large query results.
Yeld_per page parameter reduced from 1000 to 100.

We still have issues with memory consumption in all
reports generation. Thus we are temporary removing link
on 'All reports' from web UI.

Example uWSGI configuration changed to start http service.

Change-Id: I66c97058c0f15f8a15b626ab1fa222106754a455
Partial-Bug: #1564427
This commit is contained in:
Alexander Kislitsky 2016-04-04 15:58:27 +03:00
parent dce427279e
commit ebe8d4eb46
7 changed files with 182 additions and 86 deletions

View File

@ -30,7 +30,7 @@ class Production(object):
SQLALCHEMY_DATABASE_URI = \
'postgresql://collector:*****@localhost/collector'
CSV_DEFAULT_FROM_DATE_DAYS = 90
CSV_DB_YIELD_PER = 1000
CSV_DB_YIELD_PER = 100
JSON_DB_DEFAULT_LIMIT = 1000
# Number of attachments included into volumes CSV report
CSV_VOLUME_ATTACHMENTS_NUM = 1

View File

@ -65,16 +65,22 @@ def get_to_date():
default_value=datetime.utcnow().date())
def get_inst_structures_query(from_date=None, to_date=None):
def get_inst_structures_query(from_date=None, to_date=None, fields=()):
"""Composes query for fetching not filtered installation
structures info with filtering by from and to dates and
ordering by id. Installation structure is not filtered
if is_filtered is False or None.
:param from_date: filter from creation or modification date
:param to_date: filter to creation or modification date
:param fields: fields to be filtered in query. All fields will
be fetched if parameter is empty.
:return: SQLAlchemy query
"""
query = db.session.query(IS)
if fields:
query = db.session.query(*fields)
else:
query = db.session.query(IS)
query = query.filter(or_(
IS.is_filtered == bool(False), # workaround for PEP8 error E712
IS.is_filtered.is_(None)))
@ -94,8 +100,29 @@ def get_inst_structures():
yield_per = app.config['CSV_DB_YIELD_PER']
from_date = get_from_date()
to_date = get_to_date()
return get_inst_structures_query(from_date=from_date,
to_date=to_date).yield_per(yield_per)
query = get_inst_structures_query(from_date=from_date,
to_date=to_date)
return query.yield_per(yield_per)
def get_clusters_version_info():
"""Returns dict of version info from clusters.
:return: dict of saved cluster versions with
structure {mn_uid: {cluster_id: version_info}}
"""
yield_per = app.config['CSV_DB_YIELD_PER']
from_date = get_from_date()
to_date = get_to_date()
query = get_inst_structures_query(
from_date=from_date,
to_date=to_date,
fields=(IS.master_node_uid, IS.structure['clusters'].label('clusters'))
)
clusters_version_info = {}
for info in query.yield_per(yield_per):
_add_oswl_to_clusters_versions_cache(info, clusters_version_info)
return clusters_version_info
def get_action_logs_query():
@ -179,7 +206,6 @@ def get_oswls_query(resource_type, from_date=None, to_date=None):
IS.creation_date.label('installation_created_date'),
IS.modification_date.label('installation_updated_date'),
IS.structure['fuel_release'].label('fuel_release_from_inst_info'),
IS.structure['clusters'].label('clusters'),
IS.is_filtered).\
join(IS, IS.master_node_uid == OSWS.master_node_uid).\
filter(OSWS.resource_type == resource_type).\
@ -188,17 +214,51 @@ def get_oswls_query(resource_type, from_date=None, to_date=None):
query = query.filter(OSWS.created_date >= from_date)
if to_date is not None:
query = query.filter(OSWS.created_date <= to_date)
return query.order_by(OSWS.created_date)
# For proper handling of paging we must use additional ordering by id.
# In other case we will lose some OSWLs form the execution result.
query = query.order_by(OSWS.created_date, OSWS.id)
return query
def get_oswls(resource_type):
yield_per = app.config['CSV_DB_YIELD_PER']
app.logger.debug("Fetching %s oswls with yeld per %d",
app.logger.debug("Fetching %s oswls with yield per %d",
resource_type, yield_per)
from_date = get_from_date()
to_date = get_to_date()
return get_oswls_query(resource_type, from_date=from_date,
to_date=to_date).yield_per(yield_per)
query = get_oswls_query(resource_type, from_date=from_date,
to_date=to_date)
return query.yield_per(yield_per)
def _add_oswl_to_clusters_versions_cache(inst_structure, clusters_versions):
"""Adds oswl clusters version_info into clusters_versions cache.
:param inst_structure: InstallationStructure DB object
:type inst_structure: fuel_analytics.api.db.model.InstallationStructure
:param clusters_versions: cache for saving cluster versions with
structure {mn_uid: {cluster_id: version_info}}
:type clusters_versions: dict
"""
mn_uid = inst_structure.master_node_uid
clusters = inst_structure.clusters or []
clusters_versions[mn_uid] = {}
for cluster in clusters:
fuel_version = cluster.get('fuel_version')
if not fuel_version:
continue
version_info = {'fuel_version': fuel_version}
release = cluster.get('release')
if release:
version_info['release_version'] = release.get('version')
version_info['release_os'] = release.get('os')
version_info['release_name'] = release.get('name')
clusters_versions[mn_uid][cluster['id']] = version_info
@bp.route('/<resource_type>', methods=['GET'])
@ -208,7 +268,10 @@ def oswl_to_csv(resource_type):
exporter = OswlStatsToCsv()
oswls = get_oswls(resource_type)
result = exporter.export(resource_type, oswls, get_to_date())
clusters_version_info = get_clusters_version_info()
result = exporter.export(resource_type, oswls, get_to_date(),
clusters_version_info)
# NOTE: result - is generator, but streaming can not work with some
# WSGI middlewares: http://flask.pocoo.org/docs/0.10/patterns/streaming/
@ -229,10 +292,12 @@ def get_resources_types():
return (row[0] for row in result)
def get_all_reports(from_date, to_date):
def get_all_reports(from_date, to_date, clusters_version_info):
"""Returns generator on all reports info.
:param from_date: get reports from date
:param to_date: get reports to date
:param clusters_version_info: dict with version_info fetched from
clusters
:return: generator on sequence of tuples (report data, report name)
"""
app.logger.debug("Getting all reports")
@ -247,26 +312,27 @@ def get_all_reports(from_date, to_date):
app.logger.debug("Getting report '%s'", resource_type)
oswls = get_oswls_query(resource_type, from_date=from_date,
to_date=to_date)
report = oswl_exporter.export(resource_type, oswls, to_date)
report = oswl_exporter.export(resource_type, oswls,
clusters_version_info, to_date)
app.logger.debug("Report '%s' got", resource_type)
yield report, '{}.csv'.format(resource_type)
# Clusters report
app.logger.debug("Getting clusters report")
inst_strucutres = get_inst_structures_query(from_date=from_date,
inst_structures = get_inst_structures_query(from_date=from_date,
to_date=to_date)
query_action_logs = get_action_logs_query()
action_logs = db.session.execute(query_action_logs,
{'from_date': from_date,
'to_date': to_date})
clusters = stats_exporter.export_clusters(inst_strucutres,
clusters = stats_exporter.export_clusters(inst_structures,
action_logs)
app.logger.debug("Clusters report got")
yield clusters, CLUSTERS_REPORT_FILE
# Plugins report
app.logger.debug("Getting plugins report")
plugins = stats_exporter.export_plugins(inst_strucutres)
plugins = stats_exporter.export_plugins(inst_structures)
app.logger.debug("Plugins report got")
yield plugins, PLUGINS_REPORT_FILE
@ -309,7 +375,9 @@ def all_reports():
app.logger.debug("Handling all_reports get request")
from_date = get_from_date()
to_date = get_to_date()
reports = get_all_reports(from_date, to_date)
clusters_version_info = get_clusters_version_info()
reports = get_all_reports(from_date, to_date, clusters_version_info)
name = 'reports_from{}_to{}'.format(get_from_date(), get_to_date())
headers = {

View File

@ -95,43 +95,6 @@ class OswlStatsToCsv(object):
return result
def _add_oswl_to_clusters_versions_cache(self, oswl, clusters_versions):
"""Adds oswl clusters version_info into clusters_versions cache.
:param oswl: OSWL DB object
:type oswl: fuel_analytics.api.db.model.OpenStackWorkloadStats
:param clusters_versions: cache for saving cluster versions with
structure {mn_uid: {cluster_id: version_info}}
:type clusters_versions: dict
"""
mn_uid = oswl.master_node_uid
# Result of csv_exporter.get_oswls_query contains info about all
# clusters in the installation. Thus we need to add clusters data
# into the cache only once for specified master_node_uid.
if mn_uid in clusters_versions:
return
if oswl.clusters is None:
return
clusters_versions[mn_uid] = {}
for cluster in oswl.clusters:
fuel_version = cluster.get('fuel_version')
if not fuel_version:
continue
version_info = {'fuel_version': fuel_version}
release = cluster.get('release')
if release:
version_info['release_version'] = release.get('version')
version_info['release_os'] = release.get('os')
version_info['release_name'] = release.get('name')
clusters_versions[mn_uid][cluster['id']] = version_info
def handle_empty_version_info(self, oswl, clusters_versions):
"""Handles empty version info in oswl object
@ -151,7 +114,7 @@ class OswlStatsToCsv(object):
if oswl.version_info:
return
self._add_oswl_to_clusters_versions_cache(oswl, clusters_versions)
# self._add_oswl_to_clusters_versions_cache(oswl, clusters_versions)
mn_uid = oswl.master_node_uid
cluster_id = oswl.cluster_id
@ -171,24 +134,23 @@ class OswlStatsToCsv(object):
oswl.version_info = version_info
def get_flatten_resources(self, resource_type, oswl_keys_paths,
resource_keys_paths, oswls):
"""Gets flatten vms data
resource_keys_paths, oswls,
clusters_version_info):
"""Gets flatten resources data
:param oswl_keys_paths: list of keys paths in the OpenStack workload
info
:param resource_keys_paths: list of keys paths in the resource
:param oswls: list of OpenStack workloads
:param clusters_version_info: clusters version info cache.
Cache is used only if version_info is not provided in the oswl.
Cache structure: {mn_uid: {cluster_id: fuel_release}}
:return: generator on flatten resources info collection
"""
app.logger.debug("Getting OSWL flatten %s info started", resource_type)
# Cache for saving cluster versions. Cache is used only if version_info
# is not provided in the oswl.
# Structure: {mn_uid: {cluster_id: fuel_release}}
clusters_versions = {}
for oswl in oswls:
try:
self.handle_empty_version_info(oswl, clusters_versions)
self.handle_empty_version_info(oswl, clusters_version_info)
flatten_oswl = export_utils.get_flatten_data(oswl_keys_paths,
oswl)
resource_data = oswl.resource_data
@ -328,7 +290,7 @@ class OswlStatsToCsv(object):
app.logger.debug("Filling gaps in oswls finished")
def export(self, resource_type, oswls, to_date):
def export(self, resource_type, oswls, to_date, clusters_version_info):
app.logger.info("Export oswls %s info into CSV started",
resource_type)
oswl_keys_paths, resource_keys_paths, csv_keys_paths = \
@ -337,7 +299,7 @@ class OswlStatsToCsv(object):
oswls, to_date)
flatten_resources = self.get_flatten_resources(
resource_type, oswl_keys_paths, resource_keys_paths,
seamless_oswls)
seamless_oswls, clusters_version_info)
result = export_utils.flatten_data_as_csv(
csv_keys_paths, flatten_resources)
app.logger.info("Export oswls %s info into CSV finished",

View File

@ -192,7 +192,7 @@ class CsvExporterTest(OswlTest, DbTest):
to_date = datetime.utcnow()
from_date = to_date - timedelta(days=30)
reports = ce.get_all_reports(from_date, to_date)
reports = ce.get_all_reports(from_date, to_date, {})
expected_reports = [
ce.CLUSTERS_REPORT_FILE,
@ -213,7 +213,7 @@ class CsvExporterTest(OswlTest, DbTest):
from_date = datetime.utcnow()
to_date = from_date + timedelta(days=7)
reports_generators = ce.get_all_reports(from_date, to_date)
reports_generators = ce.get_all_reports(from_date, to_date, {})
# Checking no exception raised
for report_generator, report_name in reports_generators:

View File

@ -30,6 +30,7 @@ from fuel_analytics.api.app import db
from fuel_analytics.api.common import consts
from fuel_analytics.api.db.model import InstallationStructure
from fuel_analytics.api.db.model import OpenStackWorkloadStats
from fuel_analytics.api.resources.csv_exporter import get_clusters_version_info
from fuel_analytics.api.resources.csv_exporter import get_oswls
from fuel_analytics.api.resources.csv_exporter import get_oswls_query
from fuel_analytics.api.resources.utils import export_utils
@ -66,7 +67,7 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
exporter.get_resource_keys_paths(resource_type)
oswls = self.generate_oswls(2, resource_type)
flatten_resources = exporter.get_flatten_resources(
resource_type, oswl_keys_paths, resource_keys_paths, oswls)
resource_type, oswl_keys_paths, resource_keys_paths, oswls, {})
self.assertTrue(isinstance(flatten_resources, types.GeneratorType))
for _ in flatten_resources:
pass
@ -78,7 +79,7 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
exporter.get_resource_keys_paths(resource_type)
oswls = self.generate_oswls(1, resource_type)
flatten_resources = exporter.get_flatten_resources(
resource_type, oswl_keys_paths, resource_keys_paths, oswls)
resource_type, oswl_keys_paths, resource_keys_paths, oswls, {})
ephemeral_idx = csv_keys_paths.index(['flavor', 'ephemeral'])
for fr in flatten_resources:
@ -145,7 +146,7 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
self.assertEqual(num, len(oswls))
# Checking export
result = exporter.export(resource_type, oswls,
datetime.utcnow().date())
datetime.utcnow().date(), {})
self.assertTrue(isinstance(result, types.GeneratorType))
output = six.StringIO(list(result))
reader = csv.reader(output)
@ -155,7 +156,7 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
def test_export_on_empty_data(self):
exporter = OswlStatsToCsv()
for resource_type in self.RESOURCE_TYPES:
result = exporter.export(resource_type, [], None)
result = exporter.export(resource_type, [], {}, None)
self.assertTrue(isinstance(result, types.GeneratorType))
output = six.StringIO(list(result))
reader = csv.reader(output)
@ -351,7 +352,7 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
# Filtering oswls
oswls = get_oswls(resource_type)
result = exporter.export(resource_type, oswls,
datetime.utcnow().date())
datetime.utcnow().date(), {})
self.assertTrue(isinstance(result, types.GeneratorType))
output = six.StringIO(list(result))
reader = csv.reader(output)
@ -441,7 +442,8 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
result = exporter.export(
resource_type,
oswls,
(base_date - timedelta(days=1))
(base_date - timedelta(days=1)),
{}
)
# Only column names in result
self.assertEqual(1, len(list(result)))
@ -455,7 +457,8 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
result = exporter.export(
resource_type,
oswls,
base_date - timedelta(days=1)
base_date - timedelta(days=1),
{}
)
# Only column names in result
self.assertEqual(1, len(list(result)))
@ -469,7 +472,8 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
result = exporter.export(
resource_type,
oswls,
base_date + timedelta(days=1)
base_date + timedelta(days=1),
{}
)
# Not only column names in result
self.assertEqual(1 + 2, len(list(result)))
@ -496,11 +500,11 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
# Saving installation structures for proper oswls filtering
self.get_saved_inst_structs(oswls_saved)
oswls = list(get_oswls(resource_type).all())
oswls = list(get_oswls(resource_type))
oswl_keys_paths, vm_keys_paths, csv_keys_paths = \
exporter.get_resource_keys_paths(resource_type)
flatten_volumes = exporter.get_flatten_resources(
resource_type, oswl_keys_paths, vm_keys_paths, oswls)
resource_type, oswl_keys_paths, vm_keys_paths, oswls, {})
flatten_volumes = list(flatten_volumes)
csv_att_num = app.config['CSV_VOLUME_ATTACHMENTS_NUM']
@ -527,7 +531,7 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
self.get_saved_inst_structs(oswls_saved)
with app.test_request_context():
oswls = get_oswls(resource_type).all()
oswls = get_oswls(resource_type)
oswl_keys_paths, vm_keys_paths, csv_keys_paths = \
exporter.get_resource_keys_paths(resource_type)
@ -537,7 +541,8 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
'get_additional_resource_info',
side_effect=side_effect):
flatten_resources = exporter.get_flatten_resources(
resource_type, oswl_keys_paths, vm_keys_paths, oswls)
resource_type, oswl_keys_paths, vm_keys_paths,
oswls, {})
# Checking only invalid data is not exported
self.assertEqual(num - 1, len(list(flatten_resources)))
@ -590,7 +595,8 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
release_pos = csv_keys_paths.index(
['version_info', 'fuel_version'])
flatten_resources = exporter.get_flatten_resources(
resource_type, oswl_keys_paths, resource_keys_paths, oswls)
resource_type, oswl_keys_paths, resource_keys_paths,
oswls, {})
for flatten_resource in flatten_resources:
release = flatten_resource[release_pos]
self.assertIn(release, releases)
@ -698,7 +704,7 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
fuel_release_pos = csv_keys_paths.index(
['version_info', 'fuel_version'])
flatten_resources = list(exporter.get_flatten_resources(
resource_type, oswl_keys_paths, resource_keys_paths, oswls))
resource_type, oswl_keys_paths, resource_keys_paths, oswls, {}))
# Checking all oswls are in flatten resources
external_uid_pos = csv_keys_paths.index(['master_node_uid'])
@ -805,7 +811,7 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
flatten_resources = list(exporter.get_flatten_resources(
resource_type, oswl_keys_paths, resource_keys_paths,
oswls_seamless))
oswls_seamless, {}))
# Expected oswls num: 2 for 'first', 2 for 'second', 2 for 'third'
# and only one for finally removed 'fourth'
@ -936,6 +942,7 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
with app.test_request_context():
oswls_data = list(get_oswls(resource_type))
clusters_version_info = get_clusters_version_info()
oswl_keys_paths, resource_keys_paths, csv_keys_paths = \
exporter.get_resource_keys_paths(resource_type)
@ -944,7 +951,9 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
release_version_pos = csv_keys_paths.index(
['version_info', 'release_version'])
flatten_resources = list(exporter.get_flatten_resources(
resource_type, oswl_keys_paths, resource_keys_paths, oswls_data))
resource_type, oswl_keys_paths,
resource_keys_paths, oswls_data, clusters_version_info
))
self.assertEqual(len(oswls), len(flatten_resources))
@ -965,3 +974,58 @@ class OswlStatsToCsvTest(OswlTest, DbTest):
flatten_resources[2][fuel_release_pos])
self.assertEqual(release_version_from_inst_info,
flatten_resources[2][release_version_pos])
def test_get_clusters_version_info(self):
mn_uid = 'x'
cluster_id = 1
empty_cluster_id = 2
mn_uid_no_clusters = 'xx'
release_name = 'release name'
version_from_cluster = '7.0'
release_version_from_cluster = 'from_cluster_7.0'
installation_date = datetime.utcnow().date() - timedelta(days=3)
expected_version_info = {
'release_version': release_version_from_cluster,
'release_os': None,
'release_name': release_name,
'fuel_version': version_from_cluster
}
structures = [
InstallationStructure(
master_node_uid=mn_uid,
structure={
'clusters': [
{'id': cluster_id,
'fuel_version': version_from_cluster,
'release': {'version': release_version_from_cluster,
'name': release_name}},
{'id': empty_cluster_id}
]
},
creation_date=installation_date,
is_filtered=False
),
InstallationStructure(
master_node_uid=mn_uid_no_clusters,
structure={'clusters': []},
creation_date=installation_date,
is_filtered=False
)
]
for structure in structures:
db.session.add(structure)
with app.test_request_context():
clusters_version_info = get_clusters_version_info()
self.assertIn(mn_uid, clusters_version_info)
self.assertIn(cluster_id, clusters_version_info[mn_uid])
self.assertNotIn(empty_cluster_id, clusters_version_info[mn_uid])
self.assertIn(mn_uid_no_clusters, clusters_version_info)
actual_version_info = clusters_version_info[mn_uid][cluster_id]
self.assertEqual(expected_version_info, actual_version_info)
self.assertEqual({}, clusters_version_info[mn_uid_no_clusters])

View File

@ -58,7 +58,9 @@
<li><button class="btn-link" id="tenant">Tenants</button></li>
<li><button class="btn-link" id="vm">Vms</button></li>
<li><button class="btn-link" id="volume">Volumes</button></li>
<!-- TODO: uncomment after bug #1564427 will be fixed
<li><button class="btn-link" id="all">All reports</button> (download can take lot of time, please be patient)</li>
-->
</ul>
</div>
</div>

View File

@ -1,7 +1,7 @@
uwsgi:
socket: :8082
# for production app use analytics.api.app as module
module: fuel_analytics.api.app_test
callable: app
protocol: http
env: ANALYTICS_SETTINGS=/path/to/external/config.py
http: 0.0.0.0:5000
# Uncommetnt the following line to use external config
# env: ANALYTICS_SETTINGS=/path/to/external/config.py