Implement separate db per tenancy

At present, all time series are accumulated in the same database in
InfluxDB. This makes queries slow for tenants that have less data. This
patch enables the option to use separate database per tenancy.

This changeset implements the changes on monasca-api which handles read
requests to InfluxDB database.

It also updates the relevant docs providing link to the migration tool
which enables users to migrate their existing data to a database per
tenant model.

Change-Id: I7de6e0faf069b889d3953583b29a876c3d82c62c
Story: 2006331
Task: 36073
This commit is contained in:
Bharat Kunwar 2019-08-04 09:35:40 +01:00
parent b2c9e1551f
commit 967b918bae
5 changed files with 99 additions and 52 deletions

View File

@ -72,3 +72,18 @@ Enabling InfluxDB Time Series Index in existing deployments
If enabling TSI on an existing InfluxDB install please follow the instructions
for migrating existing data here:
https://docs.influxdata.com/influxdb/v1.7/administration/upgrading/#upgrading-influxdb-1-3-1-4-no-tsi-preview-to-1-7-x-tsi-enabled
Database Per Tenant
-------------------
It is envisaged that separate database per tenant will be the default
behaviour in a future release of Monasca. Not only would it make queries
faster for tenants, it would also allow administrators to define
retention policy per tenancy. To enable this, set
`influxdb.db_per_tenant` to `True` in `monasca-{api,persister}` config
(it defaults to `False` at the moment if not set).
To migrate existing data to database per tenant, refer to README.rst
under the following URL which also contains the Python script to
facilitate migration:
https://opendev.org/openstack/monasca-persister/src/branch/master/monasca_persister/tools/db-per-tenant/

View File

@ -30,6 +30,7 @@ from monasca_api.common.repositories import exceptions
from monasca_api.common.repositories import metrics_repository
MEASUREMENT_NOT_FOUND_MSG = "measurement not found"
DATABASE_NOT_FOUND_MSG = "database not found"
CONF = cfg.CONF
LOG = log.getLogger(__name__)
@ -42,8 +43,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository):
self.conf = cfg.CONF
self.influxdb_client = client.InfluxDBClient(
self.conf.influxdb.ip_address, self.conf.influxdb.port,
self.conf.influxdb.user, self.conf.influxdb.password,
self.conf.influxdb.database_name)
self.conf.influxdb.user, self.conf.influxdb.password)
self._version = None
self._init_version()
except Exception as ex:
@ -259,11 +259,12 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository):
else name.replace("'", "\\'").encode('utf-8')
where_clause += ' from "{}" '.format(clean_name)
# tenant id
where_clause += " where _tenant_id = '{}' ".format(tenant_id)
# region
where_clause += " and _region = '{}' ".format(region)
where_clause += " where _region = '{}'".format(region)
# tenant id
if not self.conf.influxdb.db_per_tenant:
where_clause += " and _tenant_id = '{}'".format(tenant_id)
# dimensions - optional
if dimensions:
@ -273,14 +274,14 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository):
clean_dimension_name = dimension_name.replace("\'", "\\'") if PY3 \
else dimension_name.replace("\'", "\\'").encode('utf-8')
if dimension_value == "":
where_clause += " and \"{}\" =~ /.+/ ".format(
where_clause += " and \"{}\" =~ /.+/".format(
clean_dimension_name)
elif '|' in dimension_value:
# replace ' with \' to make query parsable
clean_dimension_value = dimension_value.replace("\'", "\\'") if PY3 else \
dimension_value.replace("\'", "\\'").encode('utf-8')
where_clause += " and \"{}\" =~ /^{}$/ ".format(
where_clause += " and \"{}\" =~ /^{}$/".format(
clean_dimension_name,
clean_dimension_value)
else:
@ -288,7 +289,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository):
clean_dimension_value = dimension_value.replace("\'", "\\'") if PY3 else \
dimension_value.replace("\'", "\\'").encode('utf-8')
where_clause += " and \"{}\" = '{}' ".format(
where_clause += " and \"{}\" = '{}'".format(
clean_dimension_name,
clean_dimension_value)
@ -310,6 +311,19 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository):
end_timestamp)
return from_clause
def query_tenant_db(self, query, tenant_id):
database = ('%s_%s' % (self.conf.influxdb.database_name, tenant_id)
if self.conf.influxdb.db_per_tenant
else self.conf.influxdb.database_name)
try:
return self.influxdb_client.query(query, database=database)
except InfluxDBClientError as ex:
if (str(ex).startswith(DATABASE_NOT_FOUND_MSG) and
self.conf.influxdb.db_per_tenant):
return None
else:
raise
def list_metrics(self, tenant_id, region, name, dimensions, offset,
limit, start_timestamp=None, end_timestamp=None):
@ -322,7 +336,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository):
if offset:
query += ' offset {}'.format(int(offset) + 1)
result = self.influxdb_client.query(query)
result = self.query_tenant_db(query, tenant_id)
json_metric_list = self._build_serie_metric_list(result,
tenant_id,
@ -585,7 +599,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository):
dimensions = self._get_dimensions(tenant_id, region, name, dimensions)
query += " slimit 1"
result = self.influxdb_client.query(query)
result = self.query_tenant_db(query, tenant_id)
if not result:
return json_measurement_list
@ -661,7 +675,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository):
query = self._build_show_measurements_query(dimensions, None, tenant_id,
region)
result = self.influxdb_client.query(query)
result = self.query_tenant_db(query, tenant_id)
json_name_list = self._build_measurement_name_list(result)
return json_name_list
@ -687,7 +701,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository):
dimensions = self._get_dimensions(tenant_id, region, name, dimensions)
query += " slimit 1"
result = self.influxdb_client.query(query)
result = self.query_tenant_db(query, tenant_id)
if not result:
return json_statistics_list
@ -879,7 +893,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository):
query += where_clause + time_clause + offset_clause + limit_clause
result = self.influxdb_client.query(query)
result = self.query_tenant_db(query, tenant_id)
if not result:
return json_alarm_history_list
@ -931,7 +945,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository):
tenant_id, region,
start_timestamp,
end_timestamp)
result = self.influxdb_client.query(query)
result = self.query_tenant_db(query, tenant_id)
json_dim_name_list = self._build_serie_dimension_values(
result, dimension_name)
return json_dim_name_list
@ -946,7 +960,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository):
tenant_id, region,
start_timestamp,
end_timestamp)
result = self.influxdb_client.query(query)
result = self.query_tenant_db(query, tenant_id)
json_dim_name_list = self._build_serie_dimension_names(result)
return json_dim_name_list
except Exception as ex:

View File

@ -21,6 +21,10 @@ influxdb_opts = [
cfg.StrOpt('database_name', default='mon',
help='''
Database name where metrics are stored
'''),
cfg.BoolOpt('db_per_tenant', default=False,
help='''
Whether to use a separate database per tenant
'''),
cfg.HostAddressOpt('ip_address', default='127.0.0.1',
help='''

View File

@ -192,10 +192,10 @@ class TestRepoMetricsInfluxDB(base.BaseTestCase):
@patch("monasca_api.common.repositories.influxdb."
"metrics_repository.client.InfluxDBClient")
def test_list_dimension_values(self, influxdb_client_mock, timestamp=True):
def test_list_dimension_values(self, influxdb_client_mock, timestamp=False):
mock_client = influxdb_client_mock.return_value
tenant = u'38dc2a2549f94d2e9a4fa1cc45a4970c'
tenant_id = u'38dc2a2549f94d2e9a4fa1cc45a4970c'
region = u'useast'
metric = u'custom_metric'
column = u'hostname'
@ -209,29 +209,32 @@ class TestRepoMetricsInfluxDB(base.BaseTestCase):
u'columns': [column]
}]
}
repo = influxdb_repo.MetricsRepository()
mock_client.query.reset_mock()
if timestamp:
result = repo.list_dimension_values(tenant, region, metric, column,
start_timestamp, end_timestamp)
else:
result = repo.list_dimension_values(tenant, region, metric, column)
db_per_tenant = repo.conf.influxdb.db_per_tenant
database = repo.conf.influxdb.database_name
database += "_%s" % tenant_id if db_per_tenant else ""
result = (repo.list_dimension_values(tenant_id, region, metric, column,
start_timestamp, end_timestamp)
if timestamp else
repo.list_dimension_values(tenant_id, region, metric, column))
self.assertEqual(result, [{u'dimension_value': hostname}])
expected_query = ('show tag values from "{metric}"'
' with key = "{column}"'
' where _tenant_id = \'{tenant}\''
' and _region = \'{region}\' '
.format(tenant=tenant, region=region,
metric=metric, column=column))
expected_query += (' and time >= {start_timestamp}000000u'
' and time < {end_timestamp}000000u'
.format(start_timestamp=start_timestamp,
end_timestamp=end_timestamp)
if timestamp else '')
mock_client.query.assert_called_once_with(expected_query)
query = ('show tag values from "{metric}"'
' with key = "{column}"'
' where _region = \'{region}\''
.format(region=region, metric=metric, column=column))
query += ('' if db_per_tenant else ' and _tenant_id = \'{tenant_id}\''
.format(tenant_id=tenant_id))
query += (' and time >= {start_timestamp}000000u'
' and time < {end_timestamp}000000u'
.format(start_timestamp=start_timestamp,
end_timestamp=end_timestamp)
if timestamp else '')
mock_client.query.assert_called_once_with(query, database=database)
def test_list_dimension_values_with_timestamp(self):
self.test_list_dimension_values(timestamp=True)
@ -241,7 +244,7 @@ class TestRepoMetricsInfluxDB(base.BaseTestCase):
def test_list_dimension_names(self, influxdb_client_mock, timestamp=False):
mock_client = influxdb_client_mock.return_value
tenant = u'38dc2a2549f94d2e9a4fa1cc45a4970c'
tenant_id = u'38dc2a2549f94d2e9a4fa1cc45a4970c'
region = u'useast'
metric = u'custom_metric'
start_timestamp = 1571917171275
@ -257,11 +260,15 @@ class TestRepoMetricsInfluxDB(base.BaseTestCase):
repo = influxdb_repo.MetricsRepository()
mock_client.query.reset_mock()
if timestamp:
result = repo.list_dimension_names(tenant, region, metric,
start_timestamp, end_timestamp)
else:
result = repo.list_dimension_names(tenant, region, metric)
db_per_tenant = repo.conf.influxdb.db_per_tenant
database = repo.conf.influxdb.database_name
database += "_%s" % tenant_id if db_per_tenant else ""
result = (repo.list_dimension_names(tenant_id, region, metric,
start_timestamp, end_timestamp)
if timestamp else
repo.list_dimension_names(tenant_id, region, metric))
self.assertEqual(result,
[
@ -269,17 +276,18 @@ class TestRepoMetricsInfluxDB(base.BaseTestCase):
{u'dimension_name': u'service'}
])
expected_query = ('show tag keys from "{metric}"'
' where _tenant_id = \'{tenant}\''
' and _region = \'{region}\' '
.format(tenant=tenant, region=region, metric=metric))
expected_query += (' and time >= {start_timestamp}000000u'
' and time < {end_timestamp}000000u'
.format(start_timestamp=start_timestamp,
end_timestamp=end_timestamp)
if timestamp else '')
query = ('show tag keys from "{metric}"'
' where _region = \'{region}\''
.format(region=region, metric=metric))
query += ('' if db_per_tenant else ' and _tenant_id = \'{tenant_id}\''
.format(tenant_id=tenant_id))
query += (' and time >= {start_timestamp}000000u'
' and time < {end_timestamp}000000u'
.format(start_timestamp=start_timestamp,
end_timestamp=end_timestamp)
if timestamp else '')
mock_client.query.assert_called_once_with(expected_query)
mock_client.query.assert_called_once_with(query, database=database)
def test_list_dimension_names_with_timestamp(self):
self.test_list_dimension_names(timestamp=True)

View File

@ -0,0 +1,6 @@
---
features:
- |
Configuration option `db_per_tenant` added for InfluxDB to allow
data points to be written to dedicated tenant database where the
`database_name` prefixes the tenant ID, e.g. monasca_tenantid.