Python3-readiness for antlr3 pieces

Purpose: Make antlr code Python3-ready while keeping Python2 support

Changes:
- Generated Python3 CongressParser.py and CongressLexer.py placed in
subpackage congress/datalog/Python3/ (using antlr 3.5.2)
- Moved original CongressParser.py and CongressLexer.py into new
subpackage congress/datalog/Python2/
- Changed import section in congress/datalog/compiler.py to import
the appropriate subpackage based on python version used
- Removed original symlink antlr3 to the antlr3-python2 runtime
- Changed setup.py to create the appropriate antlr3 symlink based on
python version
- Changed the error reporting interface of antlr3-python3 runtime to
be compatible with what existing Congress code expects
(thirdparty/antlr3-antlr-3.5/runtime/Python3/antlr3/recognizers.py)

Result: tox -e py34 no longer reports any errors traced to antlr code.

Notes:
- Supporting both Python versions was not straightforward because
neither the antlr runtime nor the antlr generated code is
cross-version compatible.
- Making symlinks via setup.py is not ideal, but the best solution
I came up with among several I considered. If you have better ideas
please suggest.
- Making our own custom changes to antlr3-python3 runtime is not
ideal, but it was necessary because for some reason the
antlr3-python3 runtime broke error reporting interface compatibility
with antlr3-python2 runtime, leading to test failures in
test_compile.py and test_congress.py.
In theory, supporting our custom changes in antlr3 runtime bad, but
because antlr3 is not supported anyway, the situation is really no
worse than before. In addition, the changes are fairly minor.
- antlr4 was not adopted because it does not have support for
abstract syntax trees.

Partially implements blueprint: support-python3

Change-Id: I376aee803c4dc9953c1b5ba9311cf9c4a42f3319
This commit is contained in:
Eric K 2015-11-05 17:46:35 -08:00
parent 959525df52
commit 0576d774a4
10 changed files with 5562 additions and 57 deletions

1
antlr3
View File

@ -1 +0,0 @@
thirdparty/antlr3-antlr-3.5/runtime/Python/antlr3/

View File

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

View File

@ -18,6 +18,7 @@ import functools
import optparse
import uuid
import six
from six.moves import range
@ -27,8 +28,12 @@ from oslo_log import log as logging
from congress.datalog import analysis
from congress.datalog.builtin import congressbuiltin
from congress.datalog import CongressLexer
from congress.datalog import CongressParser
if six.PY2:
from congress.datalog.Python2 import CongressLexer
from congress.datalog.Python2 import CongressParser
else:
from congress.datalog.Python3 import CongressLexer
from congress.datalog.Python3 import CongressParser
from congress.datalog import utility
from congress import exception
from congress import utils

View File

@ -24,6 +24,19 @@ try:
except ImportError:
pass
import os
import six
# Remove existing symlink
if os.path.islink("antlr3"):
os.remove("antlr3")
# Add appropriate symlink
if six.PY2:
os.symlink("thirdparty/antlr3-antlr-3.5/runtime/Python/antlr3/", "antlr3")
else:
os.symlink("thirdparty/antlr3-antlr-3.5/runtime/Python3/antlr3/", "antlr3")
setuptools.setup(
setup_requires=['pbr>=1.8'],
pbr=True)

View File

@ -274,7 +274,6 @@ class BaseRecognizer(object):
that.
"""
# if we've already reported an error and have not matched a token
# yet successfully, don't report any errors.
if self._state.errorRecovery:
@ -283,16 +282,17 @@ class BaseRecognizer(object):
self._state.syntaxErrors += 1 # don't count spurious
self._state.errorRecovery = True
self.displayRecognitionError(e)
# ekcs: first param added for compat w antlr Python2 runtime interface
self.displayRecognitionError(self.tokenNames, e)
def displayRecognitionError(self, e):
# ekcs: restored to implementation from antlr Python2 runtime for compat
def displayRecognitionError(self, token_names, e):
hdr = self.getErrorHeader(e)
msg = self.getErrorMessage(e)
self.emitErrorMessage(hdr + " " + msg)
msg = self.getErrorMessage(e, token_names)
self.error_list.append(str(hdr) + " " + str(msg))
def getErrorMessage(self, e):
# ekcs: restored to implementation from antlr Python2 runtime for compat
def getErrorMessage(self, e, tokenNames):
"""
What error message should be generated for the various
exception types?
@ -318,78 +318,84 @@ class BaseRecognizer(object):
"""
if isinstance(e, UnwantedTokenException):
tokenName = "<unknown>"
if e.expecting == EOF:
tokenName = "EOF"
else:
tokenName = self.tokenNames[e.expecting]
msg = "extraneous input {} expecting {}".format(
msg = "extraneous input %s expecting %s" % (
self.getTokenErrorDisplay(e.getUnexpectedToken()),
tokenName
)
elif isinstance(e, MissingTokenException):
tokenName = "<unknown>"
if e.expecting == EOF:
tokenName = "EOF"
else:
tokenName = self.tokenNames[e.expecting]
msg = "missing {} at {}".format(
msg = "missing %s at %s" % (
tokenName, self.getTokenErrorDisplay(e.token)
)
elif isinstance(e, MismatchedTokenException):
tokenName = "<unknown>"
if e.expecting == EOF:
tokenName = "EOF"
else:
tokenName = self.tokenNames[e.expecting]
msg = "mismatched input {} expecting {}".format(
self.getTokenErrorDisplay(e.token),
tokenName
)
msg = "mismatched input " \
+ self.getTokenErrorDisplay(e.token) \
+ " expecting " \
+ tokenName
elif isinstance(e, MismatchedTreeNodeException):
tokenName = "<unknown>"
if e.expecting == EOF:
tokenName = "EOF"
else:
tokenName = self.tokenNames[e.expecting]
msg = "mismatched tree node: {} expecting {}".format(
e.node, tokenName)
msg = "mismatched tree node: %s expecting %s" \
% (e.node, tokenName)
elif isinstance(e, NoViableAltException):
msg = "no viable alternative at input {}".format(
self.getTokenErrorDisplay(e.token))
msg = "no viable alternative at input " \
+ self.getTokenErrorDisplay(e.token)
elif isinstance(e, EarlyExitException):
msg = "required (...)+ loop did not match anything at input {}".format(
self.getTokenErrorDisplay(e.token))
msg = "required (...)+ loop did not match anything at input " \
+ self.getTokenErrorDisplay(e.token)
elif isinstance(e, MismatchedSetException):
msg = "mismatched input {} expecting set {!r}".format(
self.getTokenErrorDisplay(e.token),
e.expecting
)
msg = "mismatched input " \
+ self.getTokenErrorDisplay(e.token) \
+ " expecting set " \
+ repr(e.expecting)
elif isinstance(e, MismatchedNotSetException):
msg = "mismatched input {} expecting set {!r}".format(
self.getTokenErrorDisplay(e.token),
e.expecting
)
msg = "mismatched input " \
+ self.getTokenErrorDisplay(e.token) \
+ " expecting set " \
+ repr(e.expecting)
elif isinstance(e, FailedPredicateException):
msg = "rule {} failed predicate: {{{}}}?".format(
e.ruleName,
e.predicateText
)
msg = "rule " \
+ e.ruleName \
+ " failed predicate: {" \
+ e.predicateText \
+ "}?"
else:
msg = str(e)
return msg
def getNumberOfSyntaxErrors(self):
"""
Get number of recognition errors (lexer, parser, tree parser). Each
@ -1251,43 +1257,49 @@ class Lexer(BaseRecognizer, TokenSource):
##
## self.errorRecovery = True
self.displayRecognitionError(e)
# ekcs: first param added for compat w antlr Python2 runtime interface
self.displayRecognitionError(self.tokenNames, e)
def getErrorMessage(self, e):
# ekcs: restored to implementation from antlr Python2 runtime for compat
def getErrorMessage(self, e, tokenNames):
msg = None
if isinstance(e, MismatchedTokenException):
msg = "mismatched character {} expecting {}".format(
self.getCharErrorDisplay(e.c),
self.getCharErrorDisplay(e.expecting))
msg = "mismatched character " \
+ self.getCharErrorDisplay(e.c) \
+ " expecting " \
+ self.getCharErrorDisplay(e.expecting)
elif isinstance(e, NoViableAltException):
msg = "no viable alternative at character {}".format(
self.getCharErrorDisplay(e.c))
msg = "no viable alternative at character " \
+ self.getCharErrorDisplay(e.c)
elif isinstance(e, EarlyExitException):
msg = "required (...)+ loop did not match anything at character {}".format(
self.getCharErrorDisplay(e.c))
msg = "required (...)+ loop did not match anything at character " \
+ self.getCharErrorDisplay(e.c)
elif isinstance(e, MismatchedNotSetException):
msg = "mismatched character {} expecting set {!r}".format(
self.getCharErrorDisplay(e.c),
e.expecting)
msg = "mismatched character " \
+ self.getCharErrorDisplay(e.c) \
+ " expecting set " \
+ repr(e.expecting)
elif isinstance(e, MismatchedSetException):
msg = "mismatched character {} expecting set {!r}".format(
self.getCharErrorDisplay(e.c),
e.expecting)
msg = "mismatched character " \
+ self.getCharErrorDisplay(e.c) \
+ " expecting set " \
+ repr(e.expecting)
elif isinstance(e, MismatchedRangeException):
msg = "mismatched character {} expecting set {}..{}".format(
self.getCharErrorDisplay(e.c),
self.getCharErrorDisplay(e.a),
self.getCharErrorDisplay(e.b))
msg = "mismatched character " \
+ self.getCharErrorDisplay(e.c) \
+ " expecting set " \
+ self.getCharErrorDisplay(e.a) \
+ ".." \
+ self.getCharErrorDisplay(e.b)
else:
msg = super().getErrorMessage(e)
msg = BaseRecognizer.getErrorMessage(self, e, tokenNames)
return msg