Optimize Gnocchi fetcher

Depending on the environment where CloudKitty is applied, when using
Gnocchi fetcher, if the environment is too big, CloudKitty tends to
consume too much RAM. This happens because before retrieving the scope
IDs and filtering out only the unique ones, it loads all of the Gnocchi
resources to memory.

This patch proposes a change in that process, to load the chunk of
Gnocchi resources and execute the retrieval of the scope ID and
selecting only the unique ones right away. Then, in the worst case
scenario, we would only have 1000 resources being loaded to memory.

Change-Id: Ibcc6af5aa5cf544be9032e33d3ac90eb1f6b14ef
This commit is contained in:
Rafael Weingärtner 2022-11-11 08:26:24 -03:00 committed by Pierre Riteau
parent addb4d6aa8
commit 13a249969b
4 changed files with 143 additions and 16 deletions

View File

@ -121,24 +121,42 @@ class GnocchiFetcher(fetcher.BaseFetcher):
)
def get_tenants(self):
resources = []
unique_scope_ids = set()
total_resources_navigated = 0
scope_attribute = CONF.fetcher_gnocchi.scope_attribute
resource_types = CONF.fetcher_gnocchi.resource_types
for resource_type in resource_types:
marker = None
while True:
resources_chunk = self._conn.resource.list(
resource_type=resource_type,
marker=marker,
details=True)
if len(resources_chunk) < 1 or (
len(resources) == 1 and resources[0]['id'] == marker):
break
resources += resources_chunk
marker = resources_chunk[-1]['id']
resource_type=resource_type, marker=marker, details=True)
scope_attribute = CONF.fetcher_gnocchi.scope_attribute
scope_ids = [
resource.get(scope_attribute, None) for resource in resources]
scope_ids = [s_id for s_id in scope_ids if s_id]
# Returning unique ids
return list(set(scope_ids))
chunk_len = len(resources_chunk)
is_last_chunk_equals_marker =\
chunk_len > 0 and resources_chunk[
chunk_len - 1]['id'] == marker
if chunk_len < 1 or (
chunk_len == 1 and is_last_chunk_equals_marker):
LOG.debug("Scopes IDs [%s] loaded. The total number of "
"unique scope IDs loaded is [%s]. Total number "
"of resources navigated [%s].", unique_scope_ids,
len(unique_scope_ids), total_resources_navigated)
break
marker = resources_chunk[-1]['id']
total_resources_navigated += chunk_len
scope_ids = [resource.get(
scope_attribute, None) for resource in resources_chunk]
scope_ids = [s_id for s_id in scope_ids if s_id]
unique_scope_ids.update(set(scope_ids))
LOG.debug("Scopes IDs [%s] loaded. The total number of unique "
"scopes IDs loaded so far is [%s]. Next chunk with "
"Markers [%s]. Total number of resources navigated "
"[%s].", scope_ids, len(scope_ids), marker,
total_resources_navigated)
return list(unique_scope_ids)

View File

@ -627,7 +627,7 @@ class CloudKittyProcessor(cotyledon.Service):
finally:
lock.release()
LOG.debug("Finished processing scopes [%s].", tenant_id)
LOG.debug("Finished processing scope [%s].", tenant_id)
else:
LOG.debug("Could not acquire lock [%s] for processing "
"scope [%s] with worker [%s].", lock_name,

View File

@ -0,0 +1,104 @@
# -*- coding: utf-8 -*-
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
#
from unittest import mock
from cloudkitty.fetcher import gnocchi
from cloudkitty import tests
class GnocchiFetcherTest(tests.TestCase):
def setUp(self):
super(GnocchiFetcherTest, self).setUp()
self.fetcher = gnocchi.GnocchiFetcher()
self.resource_list = [{'id': "some_id",
'project_id': 'some_other_project_id'},
{'id': "some_id2",
'project_id': 'some_other_project_id2'},
{'id': "some_id3",
'project_id': 'some_other_project_id3'},
{'id': "some_replicated_id",
'project_id': 'some_replicated_id_project'},
{'id': "some_replicated_id",
'project_id': 'some_replicated_id_project'}
]
self.unique_scope_ids = ["some_other_project_id",
"some_other_project_id2",
"some_other_project_id3",
"some_replicated_id_project"]
self.unique_scope_ids.sort()
def test_get_tenants_marker_list_resource_last_call(self):
with mock.patch.object(
self.fetcher._conn.resource, 'list') as resource_list:
resource_list.side_effect = [
self.resource_list,
[{'id': "some_replicated_id",
'project_id': 'some_replicated_id_project'}]]
all_scope_ids = self.fetcher.get_tenants()
all_scope_ids.sort()
self.assertEqual(self.unique_scope_ids, all_scope_ids)
resource_list.assert_has_calls([
mock.call(resource_type="generic", marker=None, details=True),
mock.call(resource_type="generic", marker="some_replicated_id",
details=True)
])
def test_get_tenants_empty_list_resource_last_call(self):
with mock.patch.object(
self.fetcher._conn.resource, 'list') as resource_list:
resource_list.side_effect = [
self.resource_list, self.resource_list, []]
all_scope_ids = self.fetcher.get_tenants()
all_scope_ids.sort()
self.assertEqual(self.unique_scope_ids, all_scope_ids)
resource_list.assert_has_calls([
mock.call(resource_type="generic", marker=None, details=True),
mock.call(resource_type="generic", marker="some_replicated_id",
details=True),
mock.call(resource_type="generic", marker="some_replicated_id",
details=True)], any_order=False)
def test_get_tenants_scope_id_as_none(self):
with mock.patch.object(
self.fetcher._conn.resource, 'list') as resource_list:
resource_list.side_effect = [
self.resource_list, self.resource_list,
[{"id": "test", "project_id": None}], []]
all_scope_ids = self.fetcher.get_tenants()
all_scope_ids.sort()
self.assertEqual(self.unique_scope_ids, all_scope_ids)
resource_list.assert_has_calls([
mock.call(resource_type="generic", marker=None, details=True),
mock.call(resource_type="generic", marker="some_replicated_id",
details=True),
mock.call(resource_type="generic", marker="some_replicated_id",
details=True),
mock.call(resource_type="generic", marker="test",
details=True)
], any_order=False)

View File

@ -0,0 +1,5 @@
---
issues:
- |
Optimize Gnocchi fetcher to avoid consuming too much RAM when CloudKitty
runs in cloud environments with hundreds of thousands of resources.