congress/congress/tests/policy_engines/test_agnostic_performance.py

# Copyright (c) 2015 VMware, Inc. All rights reserved.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.
#

from __future__ import print_function
from __future__ import division
from __future__ import absolute_import

import tenacity

from oslo_config import cfg
from oslo_log import log as logging

from congress.datalog import base
from congress.datalog import compile
from congress.policy_engines import agnostic
from congress.tests.api import base as api_base
from congress.tests import base as testbase
from congress.tests.datasources import performance_datasource_driver
from congress.tests import helper

LOG = logging.getLogger(__name__)

NREC_THEORY = 'non-recursive theory'
DB_THEORY = 'database'
ACTION_THEORY = 'action'


class TestRuntimePerformance(testbase.TestCase):
    """Tests for Runtime performance that are not specific to any theory.

    To run one test:
      nosetests -v  \
      congress/tests/policy_engines/test_agnostic_performance.py:TestRuntimePerformance.test_foo

    To collect profiling data:
      python -m cProfile -o profile.out `which nosetests` -v \
      congress/tests/policy_engines/test_agnostic_performance.py:TestRuntimePerformance.test_foo

    To parse and sort profiling data in different ways:
      import pstats
      pstats.Stats('profile.out').strip_dirs().sort_stats("cum").print_stats()
      pstats.Stats('profile.out').strip_dirs().sort_stats("time").print_stats()
      pstats.Stats('profile.out').strip_dirs().sort_stats("calls").print_stats()

    """

    def setUp(self):
        super(TestRuntimePerformance, self).setUp()

        self._agnostic = agnostic.Runtime()
        self._agnostic.create_policy(NREC_THEORY,
                                     kind=base.NONRECURSIVE_POLICY_TYPE)
        self._agnostic.create_policy(DB_THEORY, kind=base.DATABASE_POLICY_TYPE)
        self._agnostic.debug_mode()
        self._agnostic.insert('', target=NREC_THEORY)

    def _create_event(self, table, tuple_, insert, target):
        return compile.Event(compile.Literal.create_from_table_tuple(table,
                                                                     tuple_),
                             insert=insert, target=target)

    def _create_large_tables(self, n, theory):
        facts = [compile.Fact('p', (i, j, k))
                 for i in range(n) for k in range(n) for j in range(n)]

        facts.extend(compile.Fact('q', (i,)) for i in range(n))
        self._agnostic.initialize_tables(['p', 'q'], facts, theory)

    def test_insert_nonrecursive(self):
        MAX = 100
        th = NREC_THEORY
        for i in range(MAX):
            self._agnostic.insert('r(%d)' % i, th)

    def test_insert_database(self):
        MAX = 100
        th = DB_THEORY
        for i in range(MAX):
            self._agnostic.insert('r(%d)' % i, th)

    def test_update_nonrecursive(self):
        MAX = 10000
        th = NREC_THEORY
        updates = [self._create_event('r', (i,), True, th)
                   for i in range(MAX)]
        self._agnostic.update(updates)

    def test_update_database(self):
        MAX = 1000
        th = DB_THEORY
        updates = [self._create_event('r', (i,), True, th)
                   for i in range(MAX)]
        self._agnostic.update(updates)

    def test_indexing(self):
        MAX = 100
        th = NREC_THEORY

        for table in ('a', 'b', 'c'):
            updates = [self._create_event(table, (i,), True, th)
                       for i in range(MAX)]
            self._agnostic.update(updates)

        # With indexing, this query should take O(n) time where n is MAX.
        # Without indexing, this query will take O(n^3).
        self._agnostic.insert('d(x) :- a(x), b(x), c(x)', th)
        ans = ' '.join(['d(%d)' % i for i in range(MAX)])
        self.assertTrue(helper.datalog_equal(self._agnostic.select('d(x)',
                                                                   th), ans))

    def test_runtime_initialize_tables(self):
        MAX = 700
        longstring = 'a' * 100
        facts = (compile.Fact('p',
                              (1, 2, 'foo', 'bar', i, longstring + str(i)))
                 for i in range(MAX))

        th = NREC_THEORY
        self._agnostic.initialize_tables(['p'], facts, th)

    def test_select_1match(self):
        # with different types of policies (exercise indexing, large sets,
        # many joins, etc)
        MAX = 10
        th = NREC_THEORY

        self._create_large_tables(MAX, th)
        self._agnostic.insert('r(x,y) :- p(x,x,y), q(x)', th)

        for i in range(100):
            # This select returns 1 result
            self._agnostic.select('r(1, 1)', th)

    def test_select_100matches(self):
        # with different types of policies (exercise indexing, large sets,
        # many joins, etc)
        MAX = 10
        th = NREC_THEORY

        self._create_large_tables(MAX, th)
        self._agnostic.insert('r(x,y) :- p(x,x,y), q(x)', th)

        # This takes about 60ms per select
        for i in range(10):
            # This select returns 100 results
            self._agnostic.select('r(x, y)', th)

    def test_simulate_latency(self):
        # We think the cost will be the sum of the simulate call + the cost to
        # do and undo the evaluation, so this test should focus on the cost
        # specific to the simulate call, so the test should do a minimal
        # amount of evaluation.

        MAX = 10
        th = NREC_THEORY

        self._create_large_tables(MAX, th)
        self._agnostic.create_policy(ACTION_THEORY,
                                     kind=base.ACTION_POLICY_TYPE)

        self._agnostic.insert('q(0)', th)
        self._agnostic.insert('s(x) :- q(x), p(x,0,0)', th)

        # This takes about 13ms per simulate.  The query for s(x) can use
        # indexing, so it should be efficient.
        for _ in range(100):
            self._agnostic.simulate('s(x)', th, 'p-(0,0,0)',
                                    ACTION_THEORY, delta=True)

    def test_simulate_throughput(self):
        # up to 250 requests per second
        pass

    def test_update_rate(self):
        pass

    def test_concurrency(self):
        pass


class TestDsePerformance(testbase.SqlTestCase):

    def setUp(self):
        super(TestDsePerformance, self).setUp()
        self.services = api_base.setup_config(with_fake_datasource=False,
                                              node_id="perf")
        cfg.CONF.set_override(
            'drivers',
            [('congress.tests.datasources.performance_datasource_driver'
              '.PerformanceTestDriver')])

        self.node = self.services['node']
        self.engine = self.services['engine']

    def tearDown(self):
        self.node.stop()
        super(TestDsePerformance, self).tearDown()

    @tenacity.retry(wait=tenacity.wait_fixed(0.1))
    def wait_til_query_nonempty(self, query, policy):
        if len(self.engine.select(query, target=policy)) == 0:
            raise Exception("Query %s is not empty" % query)

    def test_initialize_tables_dse(self):
        """Test performance of initializing data with DSE and Engine.

        This test populates the tables exported by a datasource driver,
        and then invokes the poll() method to send that data to the
        policy engine.  It tests the amount of time to send tables
        across the DSE and load them into the policy engine.
        """
        MAX_TUPLES = 700
        # install datasource driver we can control
        kwds = helper.datasource_openstack_args()
        kwds['poll_time'] = 0
        driver = performance_datasource_driver.PerformanceTestDriver('data',
                                                                     args=kwds)
        self.node.register_service(driver)
        self.engine.create_policy('data')

        # set return value for datasource driver
        facts = [(1, 2.3, 'foo', 'bar', i, 'a'*100 + str(i))
                 for i in range(MAX_TUPLES)]
        driver.state = {'p': facts}

        # Send formula to engine (so engine subscribes to data:p)
        policy = self.engine.DEFAULT_THEORY
        formula = compile.parse1(
            'q(1) :- data:p(1, 2.3, "foo", "bar", 1, %s)' % ('a'*100 + '1'))
        self.engine.process_policy_update(
            [compile.Event(formula, target=policy)])

        # Poll data and wait til it arrives at engine
        driver.poll()
        self.wait_til_query_nonempty('q(1)', policy)

    def test_initialize_tables_full(self):
        """Test performance of initializing data with Datasource, DSE, Engine.

        This test gives a datasource driver the Python data that would
        have resulted from making an API call and parsing it into Python
        and then polls that datasource, waiting until the data arrives
        in the policy engine.  It tests the amount of time required to
        translate Python data into tables, send those tables over the DSE,
        and load them into the policy engine.
        """
        MAX_TUPLES = 700
        # install datasource driver we can control
        kwds = helper.datasource_openstack_args()
        kwds['poll_time'] = 0
        driver = performance_datasource_driver.PerformanceTestDriver('data',
                                                                     args=kwds)
        self.node.register_service(driver)
        self.engine.create_policy('data')

        # set return value for datasource driver
        facts = [{'field1': 1,
                  'field2': 2.3,
                  'field3': 'foo',
                  'field4': 'bar',
                  'field5': i,
                  'field6': 'a'*100 + str(i)}
                 for i in range(MAX_TUPLES)]
        driver.client_data = facts

        # Send formula to engine (so engine subscribes to data:p)
        policy = self.engine.DEFAULT_THEORY
        formula = compile.parse1(
            'q(1) :- data:p(1, 2.3, "foo", "bar", 1, %s)' % ('a'*100 + '1'))
        LOG.info("publishing rule")
        self.engine.process_policy_update(
            [compile.Event(formula, target=policy)])

        # Poll data and wait til it arrives at engine
        driver.poll()
        self.wait_til_query_nonempty('q(1)', policy)