From 10a9482a91ace68c081aa95ffb7fc394f688020c Mon Sep 17 00:00:00 2001 From: Olivier Chaze Date: Wed, 23 Nov 2022 12:36:04 +0100 Subject: [PATCH] Optimizing SQL queries that filter on a time range Queries filtering on date fields are slow as they have to parse each row. There are sometimes millions of rows to parse while only a few thousands are necessary. The following patch narrows data to process by filtering more on frame_model.begin as a firtst step using a `BETWEEN` statement instead of >= Change-Id: I8acbc8946d9e001419f7bf5064fcebe0a0ae907a Depends-On: Ia6908d13c91a02c47863ae6ac4b595ac98f9fd91 --- cloudkitty/storage/v1/sqlalchemy/__init__.py | 15 +++++++++------ .../optimizing-sql-queries-939f48fff1805389.yaml | 11 +++++++++++ 2 files changed, 20 insertions(+), 6 deletions(-) create mode 100644 releasenotes/notes/optimizing-sql-queries-939f48fff1805389.yaml diff --git a/cloudkitty/storage/v1/sqlalchemy/__init__.py b/cloudkitty/storage/v1/sqlalchemy/__init__.py index 8920e023..938f8138 100644 --- a/cloudkitty/storage/v1/sqlalchemy/__init__.py +++ b/cloudkitty/storage/v1/sqlalchemy/__init__.py @@ -112,9 +112,10 @@ class SQLAlchemyStorage(storage.BaseStorage): if service: q = q.filter( self.frame_model.res_type == service) + # begin and end filters are both needed, do not remove one of them. q = q.filter( - self.frame_model.begin >= begin, - self.frame_model.end <= end, + self.frame_model.begin.between(begin, end), + self.frame_model.end.between(begin, end), self.frame_model.res_type != '_NO_DATA_') if groupby: q = q.group_by(sqlalchemy.sql.text(groupby)) @@ -136,9 +137,10 @@ class SQLAlchemyStorage(storage.BaseStorage): q = utils.model_query( self.frame_model, session) + # begin and end filters are both needed, do not remove one of them. q = q.filter( - self.frame_model.begin >= begin, - self.frame_model.end <= end) + self.frame_model.begin.between(begin, end), + self.frame_model.end.between(begin, end)) tenants = q.distinct().values( self.frame_model.tenant_id) return [tenant.tenant_id for tenant in tenants] @@ -152,9 +154,10 @@ class SQLAlchemyStorage(storage.BaseStorage): q = utils.model_query( self.frame_model, session) + # begin and end filters are both needed, do not remove one of them. q = q.filter( - self.frame_model.begin >= begin, - self.frame_model.end <= end) + self.frame_model.begin.between(begin, end), + self.frame_model.end.between(begin, end)) for filter_name, filter_value in filters.items(): if filter_value: q = q.filter( diff --git a/releasenotes/notes/optimizing-sql-queries-939f48fff1805389.yaml b/releasenotes/notes/optimizing-sql-queries-939f48fff1805389.yaml new file mode 100644 index 00000000..662982b9 --- /dev/null +++ b/releasenotes/notes/optimizing-sql-queries-939f48fff1805389.yaml @@ -0,0 +1,11 @@ +--- +features: + - | + Queries filtering on date fields are slow as they have to parse each row. + There are sometimes millions of rows to parse while only a few thousands + are necessary. + The following patch narrows data to process by filtering more on + frame_model.begin as a firtst step using a `BETWEEN` statement instead + of >= + The BETWEEN statement requires an indexes to be efficient which are + https://review.opendev.org/c/openstack/cloudkitty/+/865435/ \ No newline at end of file