From 820770807fe170af8e101fb05ceb5570cdc1ff32 Mon Sep 17 00:00:00 2001 From: Alexander Kislitsky Date: Thu, 4 Feb 2016 16:38:49 +0300 Subject: [PATCH] DB fields added to action_logs for SQL speedup On the 1M number of rows in the table JSON index begins to work too slow. We are copy action_name and action_type values from action_logs.body to separate indexed columns in the action_logs table. Redundant JSON serialization/deserealization on the python level is removed by changing column 'structure' type from TEXT to JSON in the InstallationStrurcure model. We already had JSON data in 'structure' in the DB, thus only SQLAlchemy model is changed. Change-Id: I415f4fe607f44ac670f63f961af7fb4998f75dd6 Closes-Bug: #1540289 --- analytics/fuel_analytics/api/db/model.py | 4 ++ .../api/resources/csv_exporter.py | 4 +- .../api/resources/utils/test_stats_to_csv.py | 4 ++ ...a_action_type_action_name_columns_added.py | 61 +++++++++++++++++++ collector/collector/api/db/model.py | 9 +-- .../collector/api/resources/action_logs.py | 5 ++ .../api/resources/installation_structure.py | 3 +- .../test/resources/test_action_logs.py | 36 +++++++++++ 8 files changed, 118 insertions(+), 8 deletions(-) create mode 100644 collector/collector/api/db/migrations/versions/2ec36f35eeaa_action_type_action_name_columns_added.py diff --git a/analytics/fuel_analytics/api/db/model.py b/analytics/fuel_analytics/api/db/model.py index b06ca67..26f5305 100644 --- a/analytics/fuel_analytics/api/db/model.py +++ b/analytics/fuel_analytics/api/db/model.py @@ -47,3 +47,7 @@ class ActionLog(db.Model): master_node_uid = db.Column(db.String, nullable=False) external_id = db.Column(db.Integer, nullable=False) body = db.Column(JSON) + action_type = db.Column(db.Text) + action_name = db.Column(db.Text) + db.Index('ix_action_logs_action_name_action_type', + 'action_name', 'action_type') diff --git a/analytics/fuel_analytics/api/resources/csv_exporter.py b/analytics/fuel_analytics/api/resources/csv_exporter.py index 47375d9..75aabcd 100644 --- a/analytics/fuel_analytics/api/resources/csv_exporter.py +++ b/analytics/fuel_analytics/api/resources/csv_exporter.py @@ -109,8 +109,8 @@ def get_action_logs_query(): "WITHOUT TIME ZONE end_timestamp, " \ "body->>'action_name' action_name " \ "FROM action_logs " \ - "WHERE body->>'action_type'='nailgun_task' " \ - "AND body->>'action_name'='verify_networks' " \ + "WHERE action_type='nailgun_task' " \ + "AND action_name='verify_networks' " \ "AND to_timestamp(body->>'end_timestamp', 'YYYY-MM-DD')::" \ "TIMESTAMP WITHOUT TIME ZONE >= :from_date " \ "AND to_timestamp(body->>'end_timestamp', 'YYYY-MM-DD')::" \ diff --git a/analytics/fuel_analytics/test/api/resources/utils/test_stats_to_csv.py b/analytics/fuel_analytics/test/api/resources/utils/test_stats_to_csv.py index 166a658..91b8451 100644 --- a/analytics/fuel_analytics/test/api/resources/utils/test_stats_to_csv.py +++ b/analytics/fuel_analytics/test/api/resources/utils/test_stats_to_csv.py @@ -165,6 +165,8 @@ class StatsToCsvExportTest(InstStructureTest, DbTest): ActionLog( master_node_uid=inst_structure.master_node_uid, external_id=1, + action_type='nailgun_task', + action_name=exporter.NETWORK_VERIFICATION_ACTION, body={'cluster_id': clusters[0]['id'], 'end_timestamp': datetime.utcnow().isoformat(), 'action_type': 'nailgun_task', @@ -174,6 +176,8 @@ class StatsToCsvExportTest(InstStructureTest, DbTest): ActionLog( master_node_uid=inst_structure.master_node_uid, external_id=2, + action_type='nailgun_task', + action_name=exporter.NETWORK_VERIFICATION_ACTION, body={'cluster_id': clusters[1]['id'], 'end_timestamp': datetime.utcnow().isoformat(), 'action_type': 'nailgun_task', diff --git a/collector/collector/api/db/migrations/versions/2ec36f35eeaa_action_type_action_name_columns_added.py b/collector/collector/api/db/migrations/versions/2ec36f35eeaa_action_type_action_name_columns_added.py new file mode 100644 index 0000000..5c4eef0 --- /dev/null +++ b/collector/collector/api/db/migrations/versions/2ec36f35eeaa_action_type_action_name_columns_added.py @@ -0,0 +1,61 @@ +# Copyright 2016 Mirantis, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""action_type action_name columns added to action_logs + +Revision ID: 2ec36f35eeaa +Revises: 4f46e2c07565 +Create Date: 2016-02-03 15:52:13.397631 + +""" + +# action_type and action_name columns added to action_logs +revision = '2ec36f35eeaa' +down_revision = '4f46e2c07565' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + ### commands auto generated by Alembic - please adjust! ### + op.add_column('action_logs', sa.Column('action_name', sa.Text(), + nullable=True)) + op.add_column('action_logs', sa.Column('action_type', sa.Text(), + nullable=True)) + op.create_index(op.f('ix_action_logs_action_name_action_type'), + 'action_logs', ['action_name', 'action_type'], + unique=False) + + set_action_name = sa.sql.text( + "UPDATE action_logs " + "SET action_name = body->'action_name'::TEXT" + ) + set_action_type = sa.sql.text( + "UPDATE action_logs " + "SET action_type = body->'action_type'::TEXT" + ) + connection = op.get_bind() + connection.execute(set_action_name) + connection.execute(set_action_type) + ### end Alembic commands ### + + +def downgrade(): + ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f('ix_action_logs_action_name_action_type'), + table_name='action_logs') + op.drop_column('action_logs', 'action_type') + op.drop_column('action_logs', 'action_name') + ### end Alembic commands ### diff --git a/collector/collector/api/db/model.py b/collector/collector/api/db/model.py index e8b88e0..31a3204 100644 --- a/collector/collector/api/db/model.py +++ b/collector/collector/api/db/model.py @@ -28,6 +28,10 @@ class ActionLog(db.Model): master_node_uid = db.Column(db.String, nullable=False) external_id = db.Column(db.Integer, nullable=False) body = db.Column(db.Text, nullable=False) + action_type = db.Column(db.Text) + action_name = db.Column(db.Text) + db.Index('ix_action_logs_action_name_action_type', + 'action_name', 'action_type') class InstallationStructure(db.Model): @@ -35,10 +39,7 @@ class InstallationStructure(db.Model): id = db.Column(db.Integer, primary_key=True) master_node_uid = db.Column(db.String, nullable=False, unique=True) - # When we use JSON type in the model definition field value - # is not deserialized - # TODO(akislitsky): found how to use JSON instead db.Text here - structure = db.Column(db.Text) + structure = db.Column(JSON, nullable=False) creation_date = db.Column(db.DateTime) modification_date = db.Column(db.DateTime) is_filtered = db.Column(db.Boolean, default=False, index=True) diff --git a/collector/collector/api/resources/action_logs.py b/collector/collector/api/resources/action_logs.py index 5026d77..14d1db7 100644 --- a/collector/collector/api/resources/action_logs.py +++ b/collector/collector/api/resources/action_logs.py @@ -48,6 +48,11 @@ def post(): objects_info.extend(_extract_objects_info(existed_objs)) skipped_objs = [] for obj in action_logs_to_add: + # Scan index of JSON fields is slow, thus we are copying + # action name and action type to columns of actions_logs. + obj['action_type'] = obj['body'].get('action_type') + obj['action_name'] = obj['body'].get('action_name') + if obj['body']['action_type'] == 'nailgun_task' and \ not obj['body']['end_timestamp']: skipped_objs.append(obj) diff --git a/collector/collector/api/resources/installation_structure.py b/collector/collector/api/resources/installation_structure.py index 59d185c..e9f3fe0 100644 --- a/collector/collector/api/resources/installation_structure.py +++ b/collector/collector/api/resources/installation_structure.py @@ -15,7 +15,6 @@ from datetime import datetime from dateutil import parser from flask import Blueprint -from flask import json from flask import request from flask_jsonschema import validate as validate_request @@ -53,7 +52,7 @@ def post(): obj.modification_date = datetime.utcnow() status_code = 200 obj.is_filtered = _is_filtered(structure) - obj.structure = json.dumps(structure) + obj.structure = structure db.session.add(obj) return status_code, {'status': 'ok'} diff --git a/collector/collector/test/resources/test_action_logs.py b/collector/collector/test/resources/test_action_logs.py index 9d2e7b0..8d0b284 100644 --- a/collector/collector/test/resources/test_action_logs.py +++ b/collector/collector/test/resources/test_action_logs.py @@ -277,3 +277,39 @@ class TestActionLogs(DbTest): for r in resp_logs: self.assertEqual(consts.ACTION_LOG_STATUSES.failed, r['status']) + + def test_action_type_action_name_copied_to_columns(self): + action_logs_data = [ + { + 'master_node_uid': 'xx', + 'external_id': 1, + 'body': { + 'id': 1, + 'action_name': 'deployment', + 'action_type': 'nailgun_task', + 'end_timestamp': None + } + }, + { + 'master_node_uid': 'xx', + 'external_id': 2, + 'body': { + 'id': 2, + 'action_name': '', + 'action_type': 'http_request', + 'end_timestamp': "1" + } + } + ] + resp = self.post( + '/api/v1/action_logs/', + {'action_logs': action_logs_data} + ) + self.check_response_ok(resp) + + action_logs = db.session.query(ActionLog).all() + for action_log in action_logs: + self.assertEqual(action_log.action_name, + action_log.body['action_name']) + self.assertEqual(action_log.action_type, + action_log.body['action_type'])