From 3fdc8e347d7197813121f68463434fdabf4d9aa9 Mon Sep 17 00:00:00 2001 From: Alexander Kislitsky Date: Fri, 11 Dec 2015 14:53:24 +0300 Subject: [PATCH] OSWL duplication in DB handled We can have duplicated OSWLs. They have the same checksum but different external_ids. Whe should ignore such 'new' OSWLs in the report generation process. Change-Id: I71b5ef91eb66a9ce475a78da2485f1ec439acd4e Closes-Bug: #1524820 --- .../api/resources/csv_exporter.py | 2 +- .../api/resources/utils/oswl_stats_to_csv.py | 15 +++++- .../resources/utils/test_oswl_stats_to_csv.py | 48 +++++++++++++++++++ 3 files changed, 62 insertions(+), 3 deletions(-) diff --git a/analytics/fuel_analytics/api/resources/csv_exporter.py b/analytics/fuel_analytics/api/resources/csv_exporter.py index 762bdea..6cbf49b 100644 --- a/analytics/fuel_analytics/api/resources/csv_exporter.py +++ b/analytics/fuel_analytics/api/resources/csv_exporter.py @@ -174,7 +174,7 @@ def get_oswls_query(resource_type, from_date=None, to_date=None): OSWS.id, OSWS.master_node_uid, OSWS.cluster_id, OSWS.created_date, # for checking if row is duplicated in CSV OSWS.created_date.label('stats_on_date'), # for showing in CSV - OSWS.resource_type, OSWS.resource_data, + OSWS.resource_type, OSWS.resource_data, OSWS.resource_checksum, IS.creation_date.label('installation_created_date'), IS.modification_date.label('installation_updated_date'), IS.structure['fuel_release'].label('fuel_release'), diff --git a/analytics/fuel_analytics/api/resources/utils/oswl_stats_to_csv.py b/analytics/fuel_analytics/api/resources/utils/oswl_stats_to_csv.py index 58bdafb..a7e09d5 100644 --- a/analytics/fuel_analytics/api/resources/utils/oswl_stats_to_csv.py +++ b/analytics/fuel_analytics/api/resources/utils/oswl_stats_to_csv.py @@ -181,6 +181,17 @@ class OswlStatsToCsv(object): return_value.resource_data['modified'] = {} yield return_value + def _add_oswl_to_horizon(self, horizon, oswl): + idx = export_utils.get_index(oswl, *self.OSWL_INDEX_FIELDS) + + # We can have duplication of the oswls in the DB with the same + # checksum but with different external_id. We shouldn't add + # the same oswl into horizon if it already present in it. + old_oswl = horizon.get(idx) + if old_oswl is None or \ + old_oswl.resource_checksum != oswl.resource_checksum: + horizon[idx] = oswl + def fill_date_gaps(self, oswls, to_date): """Fills the gaps of stats info. If masternode sends stats on on_date and we haven't oswl on this date - the last one oswl for @@ -210,8 +221,8 @@ class OswlStatsToCsv(object): last_date += datetime.timedelta(days=1) if last_date > to_date: break - idx = export_utils.get_index(oswl, *self.OSWL_INDEX_FIELDS) - horizon[idx] = oswl + + self._add_oswl_to_horizon(horizon, oswl) # Filling gaps if oswls exhausted on date before to_date if last_date is not None: diff --git a/analytics/fuel_analytics/test/api/resources/utils/test_oswl_stats_to_csv.py b/analytics/fuel_analytics/test/api/resources/utils/test_oswl_stats_to_csv.py index a5ff6d3..73bea8a 100644 --- a/analytics/fuel_analytics/test/api/resources/utils/test_oswl_stats_to_csv.py +++ b/analytics/fuel_analytics/test/api/resources/utils/test_oswl_stats_to_csv.py @@ -585,3 +585,51 @@ class OswlStatsToCsvTest(OswlTest, DbTest): for flatten_resource in flatten_resources: release = flatten_resource[release_pos] self.assertIn(release, releases) + + def test_duplicated_oswls_skipped(self): + exporter = OswlStatsToCsv() + # Creating oswls duplicates + resource_type = consts.OSWL_RESOURCE_TYPES.vm + old_days = 7 + new_days = 2 + old_created_date = datetime.utcnow().date() - timedelta(days=old_days) + oswls_saved = [ + OpenStackWorkloadStats( + master_node_uid='x', + external_id=1, + cluster_id=1, + created_date=old_created_date, + updated_time=datetime.utcnow().time(), + resource_type=resource_type, + resource_checksum='checksum', + resource_data={'current': [{'id': 1}], 'added': [{'id': 1}]} + ), + OpenStackWorkloadStats( + master_node_uid='x', + external_id=2, + cluster_id=1, + created_date=(datetime.utcnow().date() - + timedelta(days=new_days)), + updated_time=datetime.utcnow().time(), + resource_type=resource_type, + resource_checksum='checksum', + resource_data={'current': [{'id': 1}], 'added': [{'id': 1}]} + ), + ] + for oswl in oswls_saved: + db.session.add(oswl) + self.get_saved_inst_structs(oswls_saved, creation_date_range=(0, 0)) + + with app.test_request_context(): + oswls = get_oswls(resource_type) + oswls_seamless = list(exporter.fill_date_gaps( + oswls, datetime.utcnow().date())) + + # Checking size of seamless report + expected_num = old_days + 1 # current date should be in report + actual_num = len(oswls_seamless) + self.assertEqual(expected_num, actual_num) + + # Checking only old oswl in seamless_oswls + for o in oswls_seamless: + self.assertEqual(old_created_date, o.created_date)