[placement] split gigantor SQL query, add logging

This patch modifies the code paths for the non-granular request group allocation candidates processing. It removes the giant multi-join SQL query and replaces it with multiple calls to _get_providers_with_resource(), logging the number of matched providers for each resource class requested and filter (on required traits, forbidden traits and aggregate memebership). Here are some examples of the debug output: - A request for three resources with no aggregate or trait filters: found 7 providers with available 5 VCPU found 9 providers with available 1024 MEMORY_MB found 5 providers after filtering by previous result found 8 providers with available 1500 DISK_GB found 2 providers after filtering by previous result - The same request, but with a required trait that nobody has, shorts out quickly: found 0 providers after applying required traits filter (['HW_CPU_X86_AVX2']) - A request for one resource with aggregates and forbidden (but no required) traits: found 2 providers after applying aggregates filter ([['3ed8fb2f-4793-46ee-a55b-fdf42cb392ca']]) found 1 providers after applying forbidden traits filter ([u'CUSTOM_TWO', u'CUSTOM_THREE']) found 3 providers with available 4 VCPU found 1 providers after applying initial aggregate and trait filters Co-authored-by: Eric Fried <efried@us.ibm.com> Closes-Bug: #1786519 Change-Id: If9ddb8a6d2f03392f3cc11136c4a0b026212b95b
2018-08-08 17:11:25 -04:00 · 2018-08-08 17:11:25 -04:00 · b5ab9f5ace
parent 8f0968d091
commit b5ab9f5ace
1 changed files with 91 additions and 101 deletions
--- a/nova/api/openstack/placement/objects/resource_provider.py
+++ b/nova/api/openstack/placement/objects/resource_provider.py
@ -801,7 +801,7 @@ def _provider_ids_matching_aggregates(context, member_of, rp_ids=None):
    :param rp_ids: When present, returned resource providers are limited
        to only those in this value

-    :returns: A list of internal resource provider IDs having all required
+    :returns: A set of internal resource provider IDs having all required
        aggregate associations
    """
    # Given a request for the following:
@ -862,7 +862,7 @@ def _provider_ids_matching_aggregates(context, member_of, rp_ids=None):
    sel = sa.select([rp_tbl.c.id]).select_from(join_chain)
    if rp_ids:
        sel = sel.where(rp_tbl.c.id.in_(rp_ids))
-    return [r[0] for r in context.session.execute(sel).fetchall()]
+    return set(r[0] for r in context.session.execute(sel))


@db_api.placement_context_manager.writer
@ -2780,7 +2780,7 @@ def _get_usages_by_provider_tree(ctx, root_ids):

@db_api.placement_context_manager.reader
 def _get_provider_ids_having_any_trait(ctx, traits):
-    """Returns a list of resource provider internal IDs that have ANY of the
+    """Returns a set of resource provider internal IDs that have ANY of the
    supplied traits.

    :param ctx: Session context to use
@ -2796,12 +2796,12 @@ def _get_provider_ids_having_any_trait(ctx, traits):
    sel = sa.select([rptt.c.resource_provider_id])
    sel = sel.where(rptt.c.trait_id.in_(traits.values()))
    sel = sel.group_by(rptt.c.resource_provider_id)
-    return [r[0] for r in ctx.session.execute(sel)]
+    return set(r[0] for r in ctx.session.execute(sel))


@db_api.placement_context_manager.reader
 def _get_provider_ids_having_all_traits(ctx, required_traits):
-    """Returns a list of resource provider internal IDs that have ALL of the
+    """Returns a set of resource provider internal IDs that have ALL of the
    required traits.

    NOTE: Don't call this method with no required_traits.
@ -2825,7 +2825,7 @@ def _get_provider_ids_having_all_traits(ctx, required_traits):
    num_traits = len(required_traits)
    cond = sa.func.count(rptt.c.trait_id) == num_traits
    sel = sel.having(cond)
-    return [r[0] for r in ctx.session.execute(sel)]
+    return set(r[0] for r in ctx.session.execute(sel))


@db_api.placement_context_manager.reader
@ -2869,112 +2869,102 @@ def _get_provider_ids_matching(ctx, resources, required_traits,
                      resource providers that are members of one or more of the
                      supplied aggregates of each aggregate UUID list.
    """
-    trait_rps = None
-    forbidden_rp_ids = None
+    # The iteratively filtered set of resource provider internal IDs that match
+    # all the constraints in the request
+    filtered_rps = set()
    if required_traits:
        trait_rps = _get_provider_ids_having_all_traits(ctx, required_traits)
-        if not trait_rps:
+        filtered_rps = trait_rps
+        LOG.debug("found %d providers after applying required traits filter "
+                  "(%s)",
+                  len(filtered_rps), list(required_traits))
+        if not filtered_rps:
            return []
-    if forbidden_traits:
-        forbidden_rp_ids = _get_provider_ids_having_any_trait(
-            ctx, forbidden_traits)
-
-    rpt = sa.alias(_RP_TBL, name="rp")
-
-    rc_name_map = {
-        rc_id: _RC_CACHE.string_from_id(rc_id).lower() for rc_id in resources
-    }
-
-    # Dict, keyed by resource class ID, of an aliased table object for the
-    # inventories table winnowed to only that resource class.
-    inv_tables = {
-        rc_id: sa.alias(_INV_TBL, name='inv_%s' % rc_name_map[rc_id])
-        for rc_id in resources
-    }
-
-    # Dict, keyed by resource class ID, of a derived table (subquery in the
-    # FROM clause or JOIN) against the allocations table winnowed to only that
-    # resource class, grouped by resource provider.
-    usage_tables = {
-        rc_id: sa.alias(
-            sa.select([
-                _ALLOC_TBL.c.resource_provider_id,
-                sql.func.sum(_ALLOC_TBL.c.used).label('used'),
-            ]).where(
-                _ALLOC_TBL.c.resource_class_id == rc_id
-            ).group_by(
-                _ALLOC_TBL.c.resource_provider_id
-            ),
-            name='usage_%s' % rc_name_map[rc_id],
-        )
-        for rc_id in resources
-    }
-
-    sel = sa.select([rpt.c.id, rpt.c.root_provider_id])
-
-    # List of the WHERE conditions we build up by iterating over the requested
-    # resources
-    where_conds = []
-
-    # First filter by the resource providers that had all the required traits
-    if trait_rps:
-        where_conds.append(rpt.c.id.in_(trait_rps))
-    # and didn't have any forbidden traits
-    if forbidden_rp_ids:
-        where_conds.append(~rpt.c.id.in_(forbidden_rp_ids))
-
-    # The chain of joins that we eventually pass to select_from()
-    join_chain = rpt
-
-    for rc_id, amount in resources.items():
-        inv_by_rc = inv_tables[rc_id]
-        usage_by_rc = usage_tables[rc_id]
-
-        # We can do a more efficient INNER JOIN because we don't have shared
-        # resource providers to deal with
-        rp_inv_join = sa.join(
-            join_chain, inv_by_rc,
-            sa.and_(
-                inv_by_rc.c.resource_provider_id == rpt.c.id,
-                # Add a join condition winnowing this copy of inventories table
-                # to only the resource class being analyzed in this loop...
-                inv_by_rc.c.resource_class_id == rc_id,
-            ),
-        )
-        rp_inv_usage_join = sa.outerjoin(
-            rp_inv_join, usage_by_rc,
-            inv_by_rc.c.resource_provider_id ==
-                usage_by_rc.c.resource_provider_id,
-        )
-        join_chain = rp_inv_usage_join
-
-        usage_cond = sa.and_(
-            (
-            (sql.func.coalesce(usage_by_rc.c.used, 0) + amount) <=
-            (inv_by_rc.c.total - inv_by_rc.c.reserved) *
-                inv_by_rc.c.allocation_ratio
-            ),
-            inv_by_rc.c.min_unit <= amount,
-            inv_by_rc.c.max_unit >= amount,
-            amount % inv_by_rc.c.step_size == 0,
-        )
-        where_conds.append(usage_cond)

    # If 'member_of' has values, do a separate lookup to identify the
    # resource providers that meet the member_of constraints.
    if member_of:
        rps_in_aggs = _provider_ids_matching_aggregates(ctx, member_of)
-        if not rps_in_aggs:
-            # Short-circuit. The user either asked for a non-existing
-            # aggregate or there were no resource providers that matched
-            # the requirements...
+        if filtered_rps:
+            filtered_rps &= set(rps_in_aggs)
+        else:
+            filtered_rps = set(rps_in_aggs)
+        LOG.debug("found %d providers after applying aggregates filter (%s)",
+                  len(filtered_rps), member_of)
+        if not filtered_rps:
            return []
-        where_conds.append(rpt.c.id.in_(rps_in_aggs))

-    sel = sel.select_from(join_chain)
-    sel = sel.where(sa.and_(*where_conds))
+    forbidden_rp_ids = set()
+    if forbidden_traits:
+        forbidden_rp_ids = _get_provider_ids_having_any_trait(
+            ctx, forbidden_traits)
+        if filtered_rps:
+            filtered_rps -= forbidden_rp_ids
+            LOG.debug("found %d providers after applying forbidden traits "
+                      "filter (%s)", len(filtered_rps),
+                      list(forbidden_traits))
+            if not filtered_rps:
+                return []

-    return [(r[0], r[1]) for r in ctx.session.execute(sel)]
+    # Instead of constructing a giant complex SQL statement that joins multiple
+    # copies of derived usage tables and inventory tables to each other, we do
+    # one query for each requested resource class. This allows us to log a
+    # rough idea of which resource class query returned no results (for
+    # purposes of rough debugging of a single allocation candidates request) as
+    # well as reduce the necessary knowledge of SQL in order to understand the
+    # queries being executed here.
+    #
+    # NOTE(jaypipes): The efficiency of this operation may be improved by
+    # passing the trait_rps and/or forbidden_ip_ids iterables to the
+    # _get_providers_with_resource() function so that we don't have to process
+    # as many records inside the loop below to remove providers from the
+    # eventual results list
+    provs_with_resource = set()
+    first = True
+    for rc_id, amount in resources.items():
+        rc_name = _RC_CACHE.string_from_id(rc_id)
+        provs_with_resource = _get_providers_with_resource(ctx, rc_id, amount)
+        LOG.debug("found %d providers with available %d %s",
+                  len(provs_with_resource), amount, rc_name)
+        if not provs_with_resource:
+            return []
+
+        rc_rp_ids = set(p[0] for p in provs_with_resource)
+        # The branching below could be collapsed code-wise, but is in place to
+        # make the debug logging clearer.
+        if first:
+            first = False
+            if filtered_rps:
+                filtered_rps &= rc_rp_ids
+                LOG.debug("found %d providers after applying initial "
+                          "aggregate and trait filters", len(filtered_rps))
+            else:
+                filtered_rps = rc_rp_ids
+                # The following condition is not necessary for the logic; just
+                # prevents the message from being logged unnecessarily.
+                if forbidden_rp_ids:
+                    # Forbidden trait filters only need to be applied
+                    # a) on the first iteration; and
+                    # b) if not already set up before the loop
+                    # ...since any providers in the resulting set are the basis
+                    # for intersections, and providers with forbidden traits
+                    # are already absent from that set after we've filtered
+                    # them once.
+                    filtered_rps -= forbidden_rp_ids
+                    LOG.debug("found %d providers after applying forbidden "
+                              "traits", len(filtered_rps))
+        else:
+            filtered_rps &= rc_rp_ids
+            LOG.debug("found %d providers after filtering by previous result",
+                      len(filtered_rps))
+
+        if not filtered_rps:
+            return []
+
+    # provs_with_resource will contain a superset of providers with IDs still
+    # in our filtered_rps set. We return the list of tuples of
+    # (internal provider ID, root internal provider ID)
+    return [rpids for rpids in provs_with_resource if rpids[0] in filtered_rps]


@db_api.placement_context_manager.reader