From 9f52ebbf68dbfab1bd018fb22ca1fd4dd57f944b Mon Sep 17 00:00:00 2001
From: Eric Kao <ekcs.openstack@gmail.com>
Date: Wed, 22 Mar 2017 16:34:11 -0700
Subject: [PATCH] Pad positional args up to required number

Pad positional args in rules up to number required by schema.

The eliminate_column_references methods are slightly extended
to also pad positional arguments even when no column refs
are present.

The change makes it so that adding new data columns (to the right
of existing ones)  in datasource drivers do not break existing
policy rules.

For example,
nova:flavors(x) is automatically expanded to something like
nova:flavors(x, _x1, _x2, _x3), just like if the input was
nova:flavors(id=x). The behavior is consistent with that of
optional parameters Python and C++.

We discussed on IRC how to keep backward compatibility with existing
policy rules when we add new information to data source drivers.

There was some agreement that versioning the schema was the most
complete solution, as it allows for the removal of tables/columns as
well as additions.

But even with the adoption explicit versioning, the allowance for too
few positional arguments is still a great convenience for deployers
because it makes the adding of data columns a backward compatible
version change rather than a backward incompatible version change.
That is, the deployer can take advantage of the new data columns in
new policy, without having to rewrite all the existing policy.

Related-Bug: 1674537

Change-Id: Iac817f0a5c6dbb8c94804097d84f9541e1f22b1e
---
 congress/datalog/compile.py             | 179 +++++++++++++-----------
 congress/policy_engines/agnostic.py     |  16 ++-
 congress/tests/datalog/test_compiler.py |  97 ++++++++++++-
 3 files changed, 197 insertions(+), 95 deletions(-)

diff --git a/congress/datalog/compile.py b/congress/datalog/compile.py
index 19ea3f3b6..68f2e805e 100644
--- a/congress/datalog/compile.py
+++ b/congress/datalog/compile.py
@@ -800,84 +800,96 @@ class Literal (object):
         self.table.drop_service()
         return self
 
-    def eliminate_column_references(self, theories, default_theory=None,
-                                    index=0, prefix=''):
-        """Expand column references to traditional datalog positional args.
+    def eliminate_column_references_and_pad_positional(
+            self, theories, default_theory=None, index=0, prefix=''):
+        """Expand column references to positional args and pad positional args.
 
-        Returns a new literal, unless no column references.
+        Expand column references to traditional datalog positional args.
+        Also pad positional args if too few are provided.
+        Returns a new literal. If no column reference, unless no schema found
+        for the table.
         """
         # TODO(ekcs): remove unused parameter: index
         # corner cases
-        if len(self.named_arguments) == 0:
-            return self
-        theory = literal_theory(self, theories, default_theory)
-        if theory is None or theory.schema is None:
-            raise exception.IncompleteSchemaException(
-                "Literal %s uses named arguments, but the "
-                "schema is unknown." % self)
-        if theory.kind != base.DATASOURCE_POLICY_TYPE:  # eventually remove
-            raise exception.PolicyException(
-                "Literal {} uses column references, but '{}' does not "
-                "reference a datasource policy.".format(self, theory.name))
-        schema = theory.schema
-        if self.table.table not in schema:
-            raise exception.IncompleteSchemaException(
-                "Literal {} uses unknown table {}.".format(
-                    str(self), str(self.table.table)))
+        if len(self.named_arguments) > 0:
+            theory = literal_theory(self, theories, default_theory)
+            if theory is None or theory.schema is None:
+                raise exception.IncompleteSchemaException(
+                    "Literal %s uses named arguments, but the "
+                    "schema is unknown." % self)
+            if theory.kind != base.DATASOURCE_POLICY_TYPE:  # eventually remove
+                raise exception.PolicyException(
+                    "Literal {} uses column references, but '{}' does not "
+                    "reference a datasource policy.".format(self, theory.name))
+            schema = theory.schema
+            if self.table.table not in schema:
+                raise exception.IncompleteSchemaException(
+                    "Literal {} uses unknown table {}.".format(
+                        str(self), str(self.table.table)))
 
-        # check if named arguments conflict with positional or named arguments
-        errors = []
-        term_index = {}
-        for col, arg in self.named_arguments.items():
-            if isinstance(col, six.string_types):  # column name
-                index = schema.column_number(self.table.table, col)
-                if index is None:
-                    errors.append(exception.PolicyException(
-                        "In literal {} column name {} does not exist".format(
-                            str(self), col)))
-                    continue
-                if index < len(self.arguments):
-                    errors.append(exception.PolicyException(
-                        "In literal {} column name {} references position {},"
-                        " which is already provided by position.".format(
-                            str(self), col, index)))
-                if index in self.named_arguments:
-                    errors.append(exception.PolicyException(
-                        "In literal {} column name {} references position {}, "
-                        "which is also referenced by number.))".format(
-                            str(self), col, index)))
-                if index in term_index:
-                    # should have already caught this case above
-                    errors.append(exception.PolicyException(
-                        "In literal {}, column name {} references position {},"
-                        " which already has reference {}".format(
-                            str(self), col, index, str(term_index[index]))))
-                term_index[index] = arg
-            else:  # column number
-                if col >= schema.arity(self.table.table):
-                    errors.append(exception.PolicyException(
-                        "In literal {} column index {} is too large".format(
-                            str(self), col)))
-                if col < len(self.arguments):
-                    errors.append(exception.PolicyException(
-                        "In literal {} column index {} "
-                        " is already provided by position.".format(
-                            str(self), col)))
-                name = schema.column_name(self.table.table, col)
-                if name in self.named_arguments:
-                    errors.append(exception.PolicyException(
-                        "In literal {} column index {} references column {}, "
-                        "which is also referenced by name.))".format(
-                            str(self), col, name)))
-                if col in term_index:
-                    # should have already caught this case above
-                    errors.append(exception.PolicyException(
-                        "In literal {} column index {} already has a reference"
-                        " {}".format(str(self), col, str(term_index[col]))))
-                term_index[col] = arg
-        if errors:
-            raise exception.PolicyException(
-                " ".join(str(err) for err in errors))
+            # check if named arguments conflict with positional or named args
+            errors = []
+            term_index = {}
+            for col, arg in self.named_arguments.items():
+                if isinstance(col, six.string_types):  # column name
+                    index = schema.column_number(self.table.table, col)
+                    if index is None:
+                        errors.append(exception.PolicyException(
+                            "In literal {} column name {} does not "
+                            "exist".format(str(self), col)))
+                        continue
+                    if index < len(self.arguments):
+                        errors.append(exception.PolicyException(
+                            "In literal {} column name {} references position "
+                            "{}, which is already provided by "
+                            "position.".format(str(self), col, index)))
+                    if index in self.named_arguments:
+                        errors.append(exception.PolicyException(
+                            "In literal {} column name {} references position "
+                            "{}, which is also referenced by number.))".format(
+                                str(self), col, index)))
+                    if index in term_index:
+                        # should have already caught this case above
+                        errors.append(exception.PolicyException(
+                            "In literal {}, column name {} references "
+                            "position {}, which already has reference "
+                            "{}".format(str(self), col, index,
+                                        str(term_index[index]))))
+                    term_index[index] = arg
+                else:  # column number
+                    if col >= schema.arity(self.table.table):
+                        errors.append(exception.PolicyException(
+                            "In literal {} column index {} is too "
+                            "large".format(str(self), col)))
+                    if col < len(self.arguments):
+                        errors.append(exception.PolicyException(
+                            "In literal {} column index {} "
+                            " is already provided by position.".format(
+                                str(self), col)))
+                    name = schema.column_name(self.table.table, col)
+                    if name in self.named_arguments:
+                        errors.append(exception.PolicyException(
+                            "In literal {} column index {} references column "
+                            "{}, which is also referenced by name.))".format(
+                                str(self), col, name)))
+                    if col in term_index:
+                        # should have already caught this case above
+                        errors.append(exception.PolicyException(
+                            "In literal {} column index {} already has a "
+                            "reference {}".format(
+                                str(self), col, str(term_index[col]))))
+                    term_index[col] = arg
+            if errors:
+                raise exception.PolicyException(
+                    " ".join(str(err) for err in errors))
+        else:
+            theory = literal_theory(self, theories, default_theory)
+            if theory is None or theory.schema is None:
+                return self
+            schema = theory.schema
+            if self.table.table not in schema:
+                return self
+            term_index = {}
 
         # turn reference args into position args
         position_args = list(self.arguments)  # copy the original list
@@ -1072,17 +1084,21 @@ class Rule(object):
     def is_update(self):
         return self.head.is_update()
 
-    def eliminate_column_references(self, theories, default_theory=None):
-        """Return version of SELF where all column references have been removed.
+    def eliminate_column_references_and_pad_positional(
+            self, theories, default_theory=None):
+        """Return version of SELF /w col refs removed and pos args padded.
 
+        All column references removed. Positional args padded up to required
+        length.
         Throws exception if RULE is inconsistent with schemas.
         """
         pre = self._unused_variable_prefix()
         heads = []
         for i in range(0, len(self.heads)):
-            heads.append(self.heads[i].eliminate_column_references(
-                theories, default_theory=default_theory,
-                index=i, prefix='%s%s' % (pre, i)))
+            heads.append(
+                self.heads[i].eliminate_column_references_and_pad_positional(
+                    theories, default_theory=default_theory,
+                    index=i, prefix='%s%s' % (pre, i)))
 
         body = []
         sorted_lits = sorted(self.body)
@@ -1091,9 +1107,10 @@ class Rule(object):
             lit_rank[sorted_lits[i]] = i
 
         for i in range(0, len(self.body)):
-            body.append(self.body[i].eliminate_column_references(
-                theories, default_theory=default_theory,
-                index=i, prefix='%s%s' % (pre, lit_rank[self.body[i]])))
+            body.append(
+                self.body[i].eliminate_column_references_and_pad_positional(
+                    theories, default_theory=default_theory,
+                    index=i, prefix='%s%s' % (pre, lit_rank[self.body[i]])))
 
         return Rule(heads, body, self.location, name=self.name,
                     comment=self.comment, original_str=self.original_str)
diff --git a/congress/policy_engines/agnostic.py b/congress/policy_engines/agnostic.py
index 0753a9420..d1d67bdc4 100644
--- a/congress/policy_engines/agnostic.py
+++ b/congress/policy_engines/agnostic.py
@@ -1056,18 +1056,20 @@ class Runtime (object):
         enabled = []
         errors = []
         for event in events:
-            errs = compile.check_schema_consistency(
-                event.formula, self.theory, event.target)
-            if len(errs) > 0:
-                errors.append((event, errs))
-                continue
             try:
                 oldformula = event.formula
-                event.formula = oldformula.eliminate_column_references(
-                    self.theory, default_theory=event.target)
+                event.formula = \
+                    oldformula.eliminate_column_references_and_pad_positional(
+                        self.theory, default_theory=event.target)
                 # doesn't copy over ID since it creates a new one
                 event.formula.set_id(oldformula.id)
                 enabled.append(event)
+
+                errs = compile.check_schema_consistency(
+                    event.formula, self.theory, event.target)
+                if len(errs) > 0:
+                    errors.append((event, errs))
+                    continue
             except exception.IncompleteSchemaException as e:
                 if persistent:
                     # FIXME(ekcs): inconsistent behavior?
diff --git a/congress/tests/datalog/test_compiler.py b/congress/tests/datalog/test_compiler.py
index 9ba56d847..34b5d7bae 100644
--- a/congress/tests/datalog/test_compiler.py
+++ b/congress/tests/datalog/test_compiler.py
@@ -264,6 +264,83 @@ class TestColumnReferences(base.TestCase):
             '1 is already provided by position arguments',
             'Conflict between name and position')
 
+    def test_positional_args_padding_atom(self):
+        """Test positional args padding on a single atom."""
+        def check_err(rule, errmsg, msg):
+            rule = compile.parse1(rule)
+            try:
+                rule.eliminate_column_references_and_pad_positional(theories)
+                self.fail("Failed to throw error {}".format(errmsg))
+            except (exception.PolicyException,
+                    exception.IncompleteSchemaException) as e:
+                emsg = "Err messages '{}' should include '{}'".format(
+                    str(e), errmsg)
+                self.assertIn(errmsg, str(e), msg + ": " + emsg)
+
+        def check(code, correct, msg, no_theory=False):
+            actual = compile.parse1(
+                code).eliminate_column_references_and_pad_positional(
+                    {} if no_theory else theories)
+            eq = helper.datalog_same(str(actual), correct)
+            self.assertTrue(eq, msg)
+
+        run = agnostic.Runtime()
+        run.create_policy('nova')
+        schema = compile.Schema({'q': ('id', 'name', 'status')})
+        theories = {'nova': self.SchemaWrapper(schema)}
+
+        # Too few positional args
+        code = ("p(x) :- nova:q(w, y)")
+        correct = "p(x) :- nova:q(w, y, x3)"
+        check(code, correct, 'Too few positional args')
+
+        code = ("p(x) :- nova:q(w)")
+        correct = "p(x) :- nova:q(w, y, x3)"
+        check(code, correct, 'Too few positional args')
+
+        code = ("p(x) :- nova:q()")
+        correct = "p(x) :- nova:q(w, y, x3)"
+        check(code, correct, 'Too few (no) positional args')
+
+        # No schema provided, no change
+        code = ("p(x) :- nova:q(w, y)")
+        correct = "p(x) :- nova:q(w, y)"
+        check(code, correct, 'No schema provided', True)
+
+        code = ("p(x) :- nova:q(w, x, y, z)")
+        correct = "p(x) :- nova:q(w, x, y, z)"
+        check(code, correct, 'No schema provided', True)
+
+    def test_positional_args_padding_multiple_atoms(self):
+        """Test positional args padding on a single atom."""
+        def check(code, correct, msg, no_theory=False):
+            actual = compile.parse1(
+                code).eliminate_column_references_and_pad_positional(
+                    {} if no_theory else theories)
+            eq = helper.datalog_same(str(actual), correct)
+            self.assertTrue(eq, msg)
+
+        run = agnostic.Runtime()
+        run.create_policy('nova')
+        schema = compile.Schema({'q': ('id', 'name', 'status'),
+                                 'r': ('id', 'age', 'weight')})
+        theories = {'nova': self.SchemaWrapper(schema)}
+
+        # Multiple atoms, no shared variable
+        code = ("p(x) :- nova:q(x, y), nova:r(w)")
+        correct = "p(x) :- nova:q(x, y, z0), nova:r(w, y0, y1)"
+        check(code, correct, 'Multiple atoms')
+
+        # Multiple atoms, some shared variable
+        code = ("p(x) :- nova:q(x, y), nova:r(x)")
+        correct = "p(x) :- nova:q(x, y, z0), nova:r(x, y0, y1)"
+        check(code, correct, 'Multiple atoms')
+
+        # Multiple atoms, same table
+        code = ("p(x) :- nova:q(x, y), nova:q(x)")
+        correct = "p(x) :- nova:q(x, y, z0), nova:q(x, w0, w1)"
+        check(code, correct, 'Multiple atoms, same table')
+
     def test_column_references_validation_errors(self):
         """Test invalid column references occurring in a single atom."""
         schema = compile.Schema({'q': ('id', 'name', 'status'),
@@ -274,7 +351,7 @@ class TestColumnReferences(base.TestCase):
         def check_err(rule, errmsg, msg):
             rule = compile.parse1(rule)
             try:
-                rule.eliminate_column_references(theories)
+                rule.eliminate_column_references_and_pad_positional(theories)
                 self.fail("Failed to throw error {}".format(errmsg))
             except (exception.PolicyException,
                     exception.IncompleteSchemaException) as e:
@@ -316,7 +393,8 @@ class TestColumnReferences(base.TestCase):
     def test_column_references_atom(self):
         """Test column references occurring in a single atom in a rule."""
         def check(code, correct, msg):
-            actual = compile.parse1(code).eliminate_column_references(theories)
+            actual = compile.parse1(
+                code).eliminate_column_references_and_pad_positional(theories)
             eq = helper.datalog_same(str(actual), correct)
             self.assertTrue(eq, msg)
 
@@ -376,10 +454,13 @@ class TestColumnReferences(base.TestCase):
         correct = "p(x) :- nova:q(x, y, z)"
         check(code, correct, 'Pure positional without schema')
 
+        # Too few pure positional EKCS
+
     def test_column_references_multiple_atoms(self):
         """Test column references occurring in multiple atoms in a rule."""
         def check(code, correct, msg):
-            actual = compile.parse1(code).eliminate_column_references(theories)
+            actual = compile.parse1(
+                code).eliminate_column_references_and_pad_positional(theories)
             eq = helper.datalog_same(str(actual), correct)
             self.assertTrue(eq, msg)
 
@@ -412,10 +493,12 @@ class TestColumnReferences(base.TestCase):
                                  'r': ('id', 'age', 'weight')})
         theories = {'nova': self.SchemaWrapper(schema)}
 
-        rule1 = compile.parse1("p(x) :- nova:q(id=x, 2=y), nova:r(id=x)"
-                               ).eliminate_column_references(theories)
-        rule2 = compile.parse1("p(x) :- nova:r(id=x), nova:q(id=x, 2=y)"
-                               ).eliminate_column_references(theories)
+        rule1 = compile.parse1(
+            "p(x) :- nova:q(id=x, 2=y), nova:r(id=x)"
+            ).eliminate_column_references_and_pad_positional(theories)
+        rule2 = compile.parse1(
+            "p(x) :- nova:r(id=x), nova:q(id=x, 2=y)"
+            ).eliminate_column_references_and_pad_positional(theories)
         self.assertEqual(rule1, rule2, 'eliminate_column_references failed to '
                                        'preserve order insensitivity')