Python3-readiness for antlr3 pieces

Purpose: Make antlr code Python3-ready while keeping Python2 support Changes: - Generated Python3 CongressParser.py and CongressLexer.py placed in subpackage congress/datalog/Python3/ (using antlr 3.5.2) - Moved original CongressParser.py and CongressLexer.py into new subpackage congress/datalog/Python2/ - Changed import section in congress/datalog/compiler.py to import the appropriate subpackage based on python version used - Removed original symlink antlr3 to the antlr3-python2 runtime - Changed setup.py to create the appropriate antlr3 symlink based on python version - Changed the error reporting interface of antlr3-python3 runtime to be compatible with what existing Congress code expects (thirdparty/antlr3-antlr-3.5/runtime/Python3/antlr3/recognizers.py) Result: tox -e py34 no longer reports any errors traced to antlr code. Notes: - Supporting both Python versions was not straightforward because neither the antlr runtime nor the antlr generated code is cross-version compatible. - Making symlinks via setup.py is not ideal, but the best solution I came up with among several I considered. If you have better ideas please suggest. - Making our own custom changes to antlr3-python3 runtime is not ideal, but it was necessary because for some reason the antlr3-python3 runtime broke error reporting interface compatibility with antlr3-python2 runtime, leading to test failures in test_compile.py and test_congress.py. In theory, supporting our custom changes in antlr3 runtime bad, but because antlr3 is not supported anyway, the situation is really no worse than before. In addition, the changes are fairly minor. - antlr4 was not adopted because it does not have support for abstract syntax trees. Partially implements blueprint: support-python3 Change-Id: I376aee803c4dc9953c1b5ba9311cf9c4a42f3319
2015-11-05 17:46:35 -08:00 · 2015-11-05 17:46:35 -08:00 · 0576d774a4
parent 959525df52
commit 0576d774a4
10 changed files with 5562 additions and 57 deletions
--- a/1
+++ b/1
@ -1 +0,0 @@
-thirdparty/antlr3-antlr-3.5/runtime/Python/antlr3/
--- a/congress/datalog/Python2/CongressLexer.py
+++ b/congress/datalog/Python2/CongressLexer.py
--- a/congress/datalog/Python2/CongressParser.py
+++ b/congress/datalog/Python2/CongressParser.py
--- a/congress/datalog/Python2/init.py
+++ b/congress/datalog/Python2/init.py
--- a/congress/datalog/Python3/CongressLexer.py
+++ b/congress/datalog/Python3/CongressLexer.py
--- a/congress/datalog/Python3/CongressParser.py
+++ b/congress/datalog/Python3/CongressParser.py
--- a/congress/datalog/Python3/init.py
+++ b/congress/datalog/Python3/init.py
--- a/congress/datalog/compile.py
+++ b/congress/datalog/compile.py
@ -18,6 +18,7 @@ import functools
 import optparse
 import uuid

+
 import six
 from six.moves import range

@ -27,8 +28,12 @@ from oslo_log import log as logging

 from congress.datalog import analysis
 from congress.datalog.builtin import congressbuiltin
-from congress.datalog import CongressLexer
-from congress.datalog import CongressParser
+if six.PY2:
+    from congress.datalog.Python2 import CongressLexer
+    from congress.datalog.Python2 import CongressParser
+else:
+    from congress.datalog.Python3 import CongressLexer
+    from congress.datalog.Python3 import CongressParser
 from congress.datalog import utility
 from congress import exception
 from congress import utils
--- a/setup.py
+++ b/setup.py
@ -24,6 +24,19 @@ try:
 except ImportError:
    pass

+import os
+import six
+
+# Remove existing symlink
+if os.path.islink("antlr3"):
+    os.remove("antlr3")
+
+# Add appropriate symlink
+if six.PY2:
+    os.symlink("thirdparty/antlr3-antlr-3.5/runtime/Python/antlr3/", "antlr3")
+else:
+    os.symlink("thirdparty/antlr3-antlr-3.5/runtime/Python3/antlr3/", "antlr3")
+
 setuptools.setup(
    setup_requires=['pbr>=1.8'],
    pbr=True)
--- a/thirdparty/antlr3-antlr-3.5/runtime/Python3/antlr3/recognizers.py
+++ b/thirdparty/antlr3-antlr-3.5/runtime/Python3/antlr3/recognizers.py
@ -274,7 +274,6 @@ class BaseRecognizer(object):
        that.

        """
-
        # if we've already reported an error and have not matched a token
        # yet successfully, don't report any errors.
        if self._state.errorRecovery:
@ -283,16 +282,17 @@ class BaseRecognizer(object):
        self._state.syntaxErrors += 1 # don't count spurious
        self._state.errorRecovery = True

-        self.displayRecognitionError(e)
+        # ekcs: first param added for compat w antlr Python2 runtime interface
+        self.displayRecognitionError(self.tokenNames, e)

-
-    def displayRecognitionError(self, e):
+    # ekcs: restored to implementation from antlr Python2 runtime for compat
+    def displayRecognitionError(self, token_names, e):
        hdr = self.getErrorHeader(e)
-        msg = self.getErrorMessage(e)
-        self.emitErrorMessage(hdr + " " + msg)
+        msg = self.getErrorMessage(e, token_names)
+        self.error_list.append(str(hdr) + "  " + str(msg))

-
-    def getErrorMessage(self, e):
+    # ekcs: restored to implementation from antlr Python2 runtime for compat
+    def getErrorMessage(self, e, tokenNames):
        """
        What error message should be generated for the various
        exception types?
@ -318,78 +318,84 @@ class BaseRecognizer(object):
        """

        if isinstance(e, UnwantedTokenException):
+            tokenName = "<unknown>"
            if e.expecting == EOF:
                tokenName = "EOF"
+
            else:
                tokenName = self.tokenNames[e.expecting]

-            msg = "extraneous input {} expecting {}".format(
+            msg = "extraneous input %s expecting %s" % (
                self.getTokenErrorDisplay(e.getUnexpectedToken()),
                tokenName
                )

        elif isinstance(e, MissingTokenException):
+            tokenName = "<unknown>"
            if e.expecting == EOF:
                tokenName = "EOF"
+
            else:
                tokenName = self.tokenNames[e.expecting]

-            msg = "missing {} at {}".format(
+            msg = "missing %s at %s" % (
                tokenName, self.getTokenErrorDisplay(e.token)
                )

        elif isinstance(e, MismatchedTokenException):
+            tokenName = "<unknown>"
            if e.expecting == EOF:
                tokenName = "EOF"
            else:
                tokenName = self.tokenNames[e.expecting]

-            msg = "mismatched input {} expecting {}".format(
-                self.getTokenErrorDisplay(e.token),
-                tokenName
-                )
+            msg = "mismatched input " \
+                  + self.getTokenErrorDisplay(e.token) \
+                  + " expecting " \
+                  + tokenName

        elif isinstance(e, MismatchedTreeNodeException):
+            tokenName = "<unknown>"
            if e.expecting == EOF:
                tokenName = "EOF"
            else:
                tokenName = self.tokenNames[e.expecting]

-            msg = "mismatched tree node: {} expecting {}".format(
-                e.node, tokenName)
+            msg = "mismatched tree node: %s expecting %s" \
+                  % (e.node, tokenName)

        elif isinstance(e, NoViableAltException):
-            msg = "no viable alternative at input {}".format(
-                self.getTokenErrorDisplay(e.token))
+            msg = "no viable alternative at input " \
+                  + self.getTokenErrorDisplay(e.token)

        elif isinstance(e, EarlyExitException):
-            msg = "required (...)+ loop did not match anything at input {}".format(
-                self.getTokenErrorDisplay(e.token))
+            msg = "required (...)+ loop did not match anything at input " \
+                  + self.getTokenErrorDisplay(e.token)

        elif isinstance(e, MismatchedSetException):
-            msg = "mismatched input {} expecting set {!r}".format(
-                self.getTokenErrorDisplay(e.token),
-                e.expecting
-                )
+            msg = "mismatched input " \
+                  + self.getTokenErrorDisplay(e.token) \
+                  + " expecting set " \
+                  + repr(e.expecting)

        elif isinstance(e, MismatchedNotSetException):
-            msg = "mismatched input {} expecting set {!r}".format(
-                self.getTokenErrorDisplay(e.token),
-                e.expecting
-                )
+            msg = "mismatched input " \
+                  + self.getTokenErrorDisplay(e.token) \
+                  + " expecting set " \
+                  + repr(e.expecting)

        elif isinstance(e, FailedPredicateException):
-            msg = "rule {} failed predicate: {{{}}}?".format(
-                e.ruleName,
-                e.predicateText
-                )
+            msg = "rule " \
+                  + e.ruleName \
+                  + " failed predicate: {" \
+                  + e.predicateText \
+                  + "}?"

        else:
            msg = str(e)

        return msg

-
    def getNumberOfSyntaxErrors(self):
        """
        Get number of recognition errors (lexer, parser, tree parser).  Each
@ -1251,43 +1257,49 @@ class Lexer(BaseRecognizer, TokenSource):
        ##
        ## self.errorRecovery = True

-        self.displayRecognitionError(e)
+        # ekcs: first param added for compat w antlr Python2 runtime interface
+        self.displayRecognitionError(self.tokenNames, e)

-
-    def getErrorMessage(self, e):
+    # ekcs: restored to implementation from antlr Python2 runtime for compat
+    def getErrorMessage(self, e, tokenNames):
        msg = None

        if isinstance(e, MismatchedTokenException):
-            msg = "mismatched character {} expecting {}".format(
-                self.getCharErrorDisplay(e.c),
-                self.getCharErrorDisplay(e.expecting))
+            msg = "mismatched character " \
+                  + self.getCharErrorDisplay(e.c) \
+                  + " expecting " \
+                  + self.getCharErrorDisplay(e.expecting)

        elif isinstance(e, NoViableAltException):
-            msg = "no viable alternative at character {}".format(
-                self.getCharErrorDisplay(e.c))
+            msg = "no viable alternative at character " \
+                  + self.getCharErrorDisplay(e.c)

        elif isinstance(e, EarlyExitException):
-            msg = "required (...)+ loop did not match anything at character {}".format(
-                self.getCharErrorDisplay(e.c))
+            msg = "required (...)+ loop did not match anything at character " \
+                  + self.getCharErrorDisplay(e.c)

        elif isinstance(e, MismatchedNotSetException):
-            msg = "mismatched character {} expecting set {!r}".format(
-                self.getCharErrorDisplay(e.c),
-                e.expecting)
+            msg = "mismatched character " \
+                  + self.getCharErrorDisplay(e.c) \
+                  + " expecting set " \
+                  + repr(e.expecting)

        elif isinstance(e, MismatchedSetException):
-            msg = "mismatched character {} expecting set {!r}".format(
-                self.getCharErrorDisplay(e.c),
-                e.expecting)
+            msg = "mismatched character " \
+                  + self.getCharErrorDisplay(e.c) \
+                  + " expecting set " \
+                  + repr(e.expecting)

        elif isinstance(e, MismatchedRangeException):
-            msg = "mismatched character {} expecting set {}..{}".format(
-                self.getCharErrorDisplay(e.c),
-                self.getCharErrorDisplay(e.a),
-                self.getCharErrorDisplay(e.b))
+            msg = "mismatched character " \
+                  + self.getCharErrorDisplay(e.c) \
+                  + " expecting set " \
+                  + self.getCharErrorDisplay(e.a) \
+                  + ".." \
+                  + self.getCharErrorDisplay(e.b)

        else:
-            msg = super().getErrorMessage(e)
+            msg = BaseRecognizer.getErrorMessage(self, e, tokenNames)

        return msg
				`@ -1 +0,0 @@`
				`thirdparty/antlr3-antlr-3.5/runtime/Python/antlr3/`