DB fields added to action_logs for SQL speedup

On the 1M number of rows in the table JSON index begins to
work too slow. We are copy action_name and action_type values
from action_logs.body to separate indexed columns in the
action_logs table.

Redundant JSON serialization/deserealization on the python
level is removed by changing column 'structure' type from
TEXT to JSON in the InstallationStrurcure model. We already
had JSON data in 'structure' in the DB, thus only SQLAlchemy
model is changed.

Change-Id: I415f4fe607f44ac670f63f961af7fb4998f75dd6
Closes-Bug: #1540289
This commit is contained in:
Alexander Kislitsky 2016-02-04 16:38:49 +03:00
parent 4203f56d39
commit 820770807f
8 changed files with 118 additions and 8 deletions

View File

@ -47,3 +47,7 @@ class ActionLog(db.Model):
master_node_uid = db.Column(db.String, nullable=False)
external_id = db.Column(db.Integer, nullable=False)
body = db.Column(JSON)
action_type = db.Column(db.Text)
action_name = db.Column(db.Text)
db.Index('ix_action_logs_action_name_action_type',
'action_name', 'action_type')

View File

@ -109,8 +109,8 @@ def get_action_logs_query():
"WITHOUT TIME ZONE end_timestamp, " \
"body->>'action_name' action_name " \
"FROM action_logs " \
"WHERE body->>'action_type'='nailgun_task' " \
"AND body->>'action_name'='verify_networks' " \
"WHERE action_type='nailgun_task' " \
"AND action_name='verify_networks' " \
"AND to_timestamp(body->>'end_timestamp', 'YYYY-MM-DD')::" \
"TIMESTAMP WITHOUT TIME ZONE >= :from_date " \
"AND to_timestamp(body->>'end_timestamp', 'YYYY-MM-DD')::" \

View File

@ -165,6 +165,8 @@ class StatsToCsvExportTest(InstStructureTest, DbTest):
ActionLog(
master_node_uid=inst_structure.master_node_uid,
external_id=1,
action_type='nailgun_task',
action_name=exporter.NETWORK_VERIFICATION_ACTION,
body={'cluster_id': clusters[0]['id'],
'end_timestamp': datetime.utcnow().isoformat(),
'action_type': 'nailgun_task',
@ -174,6 +176,8 @@ class StatsToCsvExportTest(InstStructureTest, DbTest):
ActionLog(
master_node_uid=inst_structure.master_node_uid,
external_id=2,
action_type='nailgun_task',
action_name=exporter.NETWORK_VERIFICATION_ACTION,
body={'cluster_id': clusters[1]['id'],
'end_timestamp': datetime.utcnow().isoformat(),
'action_type': 'nailgun_task',

View File

@ -0,0 +1,61 @@
# Copyright 2016 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""action_type action_name columns added to action_logs
Revision ID: 2ec36f35eeaa
Revises: 4f46e2c07565
Create Date: 2016-02-03 15:52:13.397631
"""
# action_type and action_name columns added to action_logs
revision = '2ec36f35eeaa'
down_revision = '4f46e2c07565'
from alembic import op
import sqlalchemy as sa
def upgrade():
### commands auto generated by Alembic - please adjust! ###
op.add_column('action_logs', sa.Column('action_name', sa.Text(),
nullable=True))
op.add_column('action_logs', sa.Column('action_type', sa.Text(),
nullable=True))
op.create_index(op.f('ix_action_logs_action_name_action_type'),
'action_logs', ['action_name', 'action_type'],
unique=False)
set_action_name = sa.sql.text(
"UPDATE action_logs "
"SET action_name = body->'action_name'::TEXT"
)
set_action_type = sa.sql.text(
"UPDATE action_logs "
"SET action_type = body->'action_type'::TEXT"
)
connection = op.get_bind()
connection.execute(set_action_name)
connection.execute(set_action_type)
### end Alembic commands ###
def downgrade():
### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_action_logs_action_name_action_type'),
table_name='action_logs')
op.drop_column('action_logs', 'action_type')
op.drop_column('action_logs', 'action_name')
### end Alembic commands ###

View File

@ -28,6 +28,10 @@ class ActionLog(db.Model):
master_node_uid = db.Column(db.String, nullable=False)
external_id = db.Column(db.Integer, nullable=False)
body = db.Column(db.Text, nullable=False)
action_type = db.Column(db.Text)
action_name = db.Column(db.Text)
db.Index('ix_action_logs_action_name_action_type',
'action_name', 'action_type')
class InstallationStructure(db.Model):
@ -35,10 +39,7 @@ class InstallationStructure(db.Model):
id = db.Column(db.Integer, primary_key=True)
master_node_uid = db.Column(db.String, nullable=False, unique=True)
# When we use JSON type in the model definition field value
# is not deserialized
# TODO(akislitsky): found how to use JSON instead db.Text here
structure = db.Column(db.Text)
structure = db.Column(JSON, nullable=False)
creation_date = db.Column(db.DateTime)
modification_date = db.Column(db.DateTime)
is_filtered = db.Column(db.Boolean, default=False, index=True)

View File

@ -48,6 +48,11 @@ def post():
objects_info.extend(_extract_objects_info(existed_objs))
skipped_objs = []
for obj in action_logs_to_add:
# Scan index of JSON fields is slow, thus we are copying
# action name and action type to columns of actions_logs.
obj['action_type'] = obj['body'].get('action_type')
obj['action_name'] = obj['body'].get('action_name')
if obj['body']['action_type'] == 'nailgun_task' and \
not obj['body']['end_timestamp']:
skipped_objs.append(obj)

View File

@ -15,7 +15,6 @@
from datetime import datetime
from dateutil import parser
from flask import Blueprint
from flask import json
from flask import request
from flask_jsonschema import validate as validate_request
@ -53,7 +52,7 @@ def post():
obj.modification_date = datetime.utcnow()
status_code = 200
obj.is_filtered = _is_filtered(structure)
obj.structure = json.dumps(structure)
obj.structure = structure
db.session.add(obj)
return status_code, {'status': 'ok'}

View File

@ -277,3 +277,39 @@ class TestActionLogs(DbTest):
for r in resp_logs:
self.assertEqual(consts.ACTION_LOG_STATUSES.failed,
r['status'])
def test_action_type_action_name_copied_to_columns(self):
action_logs_data = [
{
'master_node_uid': 'xx',
'external_id': 1,
'body': {
'id': 1,
'action_name': 'deployment',
'action_type': 'nailgun_task',
'end_timestamp': None
}
},
{
'master_node_uid': 'xx',
'external_id': 2,
'body': {
'id': 2,
'action_name': '',
'action_type': 'http_request',
'end_timestamp': "1"
}
}
]
resp = self.post(
'/api/v1/action_logs/',
{'action_logs': action_logs_data}
)
self.check_response_ok(resp)
action_logs = db.session.query(ActionLog).all()
for action_log in action_logs:
self.assertEqual(action_log.action_name,
action_log.body['action_name'])
self.assertEqual(action_log.action_type,
action_log.body['action_type'])