From 3542d404c59d1f3fbd42effe642175e804327d2a Mon Sep 17 00:00:00 2001 From: Daisuke Fujita Date: Mon, 5 Nov 2018 03:50:30 -0800 Subject: [PATCH] Update requirements This patch includes the following updates. - Remove "findspark" and "libpgm" packages. There are no longer in use. - Rename "sklearn" to "scikit-learn". "global-reqirements" needs "scikit-learn" instead of "sklearn" which is old package name. - Pin scipy version less than 1.2.0. Currently, according to upper-constraints.txt of openstack/requirements, scipy version is '1.2.0'. However, this scipy version seems to be unable to import and use some scikit-learn libraries. - Remove "docopt" and re-wite code wihtout "docopt". "docopt" is not used in OpenStack. - Remove requirements-check job. Currently, "tornado" is a required package, but requirements-check job has a error. However, to remove that error, adding "tornado" package to openstack/requirements was refused. Change-Id: I3bb98ef733ff16558d241968b06c31fa7508d047 --- .zuul.yaml | 1 - requirements.txt | 7 ++-- run.py | 64 ++++++++++++++++++++-------------- test/test_run.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 129 insertions(+), 32 deletions(-) create mode 100644 test/test_run.py diff --git a/.zuul.yaml b/.zuul.yaml index a2dfed9..4ddd5a8 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -1,7 +1,6 @@ - project: templates: - openstack-python35-jobs-nonvoting - - check-requirements check: jobs: - openstack-tox-pep8 diff --git a/requirements.txt b/requirements.txt index ca12650..a8aabce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,7 @@ -docopt -findspark -libpgm numpy -scipy +scipy < 1.2.0 tornado -sklearn +scikit-learn kafka-python pyparsing voluptuous>=0.8.9 # BSD License diff --git a/run.py b/run.py index 43c919f..a1a9068 100644 --- a/run.py +++ b/run.py @@ -20,22 +20,6 @@ This script checks for appropriate arguments and starts Monanas to use data coming from one or more given sources. The source(s) can be configured using the optional argument --sources. However, a default source using random data generator is provided in the config folder. - -Usage: - run.py -p -c -l [-s ... - []] [-dvh] - run.py -v | --version - run.py -h | --help - -Options: - -c --config Config file. - -d --debug Show debug messages. - -h --help Show this screen. - -l --log_config Log config file's path. - -p --spark_path Spark's path. - -s --sources A list of data sources. - -v --version Show version. - """ import json @@ -45,7 +29,8 @@ import os import subprocess import sys -import docopt +import argparse +import textwrap import setup_property @@ -59,13 +44,13 @@ class RunnerError(Exception): def main(arguments): - spark_submit = "{0}/bin/spark-submit".format(arguments[""]) + spark_submit = "{0}/bin/spark-submit".format(arguments.spark_path) monanas_path = os.environ.get('MONANAS_HOME', "") kafka_jar = None try: for filename in os.listdir("{0}/external/kafka-assembly/target". - format(arguments[""])): + format(arguments.spark_path)): if filename.startswith("spark-streaming-kafka-assembly") and\ not any(s in filename for s in ["source", "test"]): kafka_jar = filename @@ -77,13 +62,15 @@ def main(arguments): raise RunnerError(e.__str__()) spark_kafka_jar = "{0}/external/kafka-assembly/target/{1}".\ - format(arguments[""], kafka_jar) + format(arguments.spark_path, kafka_jar) command = [ spark_submit, "--master", "local[2]", "--jars", spark_kafka_jar, monanas_path + "/monasca_analytics/monanas.py", - arguments[""], arguments[""] + arguments.config, arguments.log_config ] - command += arguments[""] + + if arguments.sources is not None: + command += arguments.sources try: logger.info("Executing `{}`...".format(" ".join(command))) @@ -99,17 +86,42 @@ def setup_logging(filename): log_conf.dictConfig(config) +def setup_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description=textwrap.dedent(__doc__.strip()), + add_help=False) + + parser.add_argument('-c', '--config', + help='Config file.', required=True) + # "-d" currently unused + parser.add_argument('-d', '--debug', + help='Show debug messages.', action='store_true') + parser.add_argument('-h', '--help', + help='Show this screen.', action='help') + parser.add_argument('-l', '--log_config', + help='Log config file\'s path.', required=True) + parser.add_argument('-p', '--spark_path', + help='Spark\'s path.', required=True) + parser.add_argument('-s', '--sources', + help='A list of data sources.', nargs='*') + parser.add_argument('-v', '--version', + help='Show version.', action='version', + version=setup_property.VERSION) + + return parser + if __name__ == "__main__": - arguments = docopt.docopt(__doc__, version=setup_property.VERSION) + arguments = setup_parser().parse_args() try: - setup_logging(arguments[""]) + setup_logging(arguments.log_config) except IOError: raise RunnerError("File not found: {0}.". - format(arguments[""])) + format(arguments.log_config)) except ValueError: raise RunnerError("{0} is not a valid logging config file.". - format(arguments[""])) + format(arguments.log_config)) logger = logging.getLogger(__name__) diff --git a/test/test_run.py b/test/test_run.py new file mode 100644 index 0000000..56504ca --- /dev/null +++ b/test/test_run.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python + +# Copyright (c) 2018 FUJITSU LIMITED +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import run +from util_for_testing import MonanasTestCase + + +class ParserTest(MonanasTestCase): + """Test argment parser in run.py""" + + def setUp(self): + super(ParserTest, self).setUp() + self.parser = run.setup_parser() + + def tearDown(self): + super(ParserTest, self).tearDown() + + def _get_parser(self, args): + try: + parsed = self.parser.parse_args(args) + except SystemExit: + raise ParserException("Argument parse failed") + + return parsed + + def _check_parser(self, parsed, args, verify_args): + for av in verify_args: + attr, value = av + if attr: + self.assertIn(attr, parsed) + self.assertEqual(value, getattr(parsed, attr)) + + def test_parser_required(self): + arglist = [ + '--config', '/path/to/config_file', + '--log_config', '/path/to/log_file', + '--spark_path', '/path/to/spark', + ] + + verifylist = [ + ('config', '/path/to/config_file'), + ('log_config', '/path/to/log_file'), + ('spark_path', '/path/to/spark'), + ] + + parsed = self._get_parser(arglist) + self._check_parser(parsed, arglist, verifylist) + + def test_parser_optional(self): + arglist = [ + '--config', '/path/to/config_file', + '--log_config', '/path/to/log_file', + '--spark_path', '/path/to/spark', + '--sources', '/path/to/src1', '/path/to/src2', + ] + + verifylist = [ + ('config', '/path/to/config_file'), + ('log_config', '/path/to/log_file'), + ('spark_path', '/path/to/spark'), + ('sources', ['/path/to/src1', '/path/to/src2']), + ] + + parsed = self._get_parser(arglist) + self._check_parser(parsed, arglist, verifylist) + + def test_parser_optional_bool(self): + arglist = [ + '--config', '/path/to/config_file', + '--log_config', '/path/to/log_file', + '--spark_path', '/path/to/spark', + '--debug', + ] + + parsed = self._get_parser(arglist) + self.assertTrue(parsed.debug)