From 3542d404c59d1f3fbd42effe642175e804327d2a Mon Sep 17 00:00:00 2001
From: Daisuke Fujita <fuzita.daisuke@jp.fujitsu.com>
Date: Mon, 5 Nov 2018 03:50:30 -0800
Subject: [PATCH] Update requirements

This patch includes the following updates.

- Remove "findspark" and "libpgm" packages.
    There are no longer in use.

- Rename "sklearn" to "scikit-learn".
    "global-reqirements" needs "scikit-learn" instead of "sklearn" which is old package name.

- Pin scipy version less than 1.2.0.
    Currently, according to upper-constraints.txt of openstack/requirements, scipy version is '1.2.0'.
    However, this scipy version seems to be unable to import and use some scikit-learn libraries.

- Remove "docopt" and re-wite code wihtout "docopt".
    "docopt" is not used in OpenStack.

- Remove requirements-check job.
    Currently, "tornado" is a required package, but requirements-check job has a error.
    However, to remove that error, adding "tornado" package to openstack/requirements was refused.

Change-Id: I3bb98ef733ff16558d241968b06c31fa7508d047
---
 .zuul.yaml       |  1 -
 requirements.txt |  7 ++--
 run.py           | 64 ++++++++++++++++++++--------------
 test/test_run.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 129 insertions(+), 32 deletions(-)
 create mode 100644 test/test_run.py
diff --git a/.zuul.yaml b/.zuul.yaml
index a2dfed9..4ddd5a8 100644
--- a/.zuul.yaml
+++ b/.zuul.yaml
@@ -1,7 +1,6 @@
 - project:
     templates:
       - openstack-python35-jobs-nonvoting
-      - check-requirements
     check:
       jobs:
         - openstack-tox-pep8
diff --git a/requirements.txt b/requirements.txt
index ca12650..a8aabce 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,7 @@
-docopt
-findspark
-libpgm
 numpy
-scipy
+scipy < 1.2.0
 tornado
-sklearn
+scikit-learn
 kafka-python
 pyparsing
 voluptuous>=0.8.9  # BSD License
diff --git a/run.py b/run.py
index 43c919f..a1a9068 100644
--- a/run.py
+++ b/run.py
@@ -20,22 +20,6 @@ This script checks for appropriate arguments and starts Monanas to use
 data coming from one or more given sources. The source(s) can be configured
 using the optional argument --sources. However, a default source using random
 data generator is provided in the config folder.
-
-Usage:
-    run.py -p <spark_path> -c <config> -l <log_config> [-s <sources>...
-        [<sources>]] [-dvh]
-    run.py -v | --version
-    run.py -h | --help
-
-Options:
-    -c --config          Config file.
-    -d --debug           Show debug messages.
-    -h --help            Show this screen.
-    -l --log_config      Log config file's path.
-    -p --spark_path      Spark's path.
-    -s --sources         A list of data sources.
-    -v --version         Show version.
-
 """
 
 import json
@@ -45,7 +29,8 @@ import os
 import subprocess
 import sys
 
-import docopt
+import argparse
+import textwrap
 
 import setup_property
 
@@ -59,13 +44,13 @@ class RunnerError(Exception):
 
 
 def main(arguments):
-    spark_submit = "{0}/bin/spark-submit".format(arguments["<spark_path>"])
+    spark_submit = "{0}/bin/spark-submit".format(arguments.spark_path)
     monanas_path = os.environ.get('MONANAS_HOME', "")
     kafka_jar = None
 
     try:
         for filename in os.listdir("{0}/external/kafka-assembly/target".
-                                   format(arguments["<spark_path>"])):
+                                   format(arguments.spark_path)):
             if filename.startswith("spark-streaming-kafka-assembly") and\
                not any(s in filename for s in ["source", "test"]):
                 kafka_jar = filename
@@ -77,13 +62,15 @@ def main(arguments):
         raise RunnerError(e.__str__())
 
     spark_kafka_jar = "{0}/external/kafka-assembly/target/{1}".\
-                      format(arguments["<spark_path>"], kafka_jar)
+                      format(arguments.spark_path, kafka_jar)
     command = [
         spark_submit, "--master", "local[2]",
         "--jars", spark_kafka_jar, monanas_path + "/monasca_analytics/monanas.py",
-        arguments["<config>"], arguments["<log_config>"]
+        arguments.config, arguments.log_config
     ]
-    command += arguments["<sources>"]
+
+    if arguments.sources is not None:
+        command += arguments.sources
 
     try:
         logger.info("Executing `{}`...".format(" ".join(command)))
@@ -99,17 +86,42 @@ def setup_logging(filename):
 
     log_conf.dictConfig(config)
 
+def setup_parser():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=textwrap.dedent(__doc__.strip()),
+        add_help=False)
+
+    parser.add_argument('-c', '--config',
+                        help='Config file.', required=True)
+    # "-d" currently unused
+    parser.add_argument('-d', '--debug',
+                        help='Show debug messages.', action='store_true')
+    parser.add_argument('-h', '--help',
+                        help='Show this screen.', action='help')
+    parser.add_argument('-l', '--log_config',
+                        help='Log config file\'s path.', required=True)
+    parser.add_argument('-p', '--spark_path',
+                        help='Spark\'s path.', required=True)
+    parser.add_argument('-s', '--sources',
+                        help='A list of data sources.', nargs='*')
+    parser.add_argument('-v', '--version',
+                        help='Show version.', action='version',
+                        version=setup_property.VERSION)
+
+    return parser
+
 if __name__ == "__main__":
-    arguments = docopt.docopt(__doc__, version=setup_property.VERSION)
+    arguments = setup_parser().parse_args()
 
     try:
-        setup_logging(arguments["<log_config>"])
+        setup_logging(arguments.log_config)
     except IOError:
         raise RunnerError("File not found: {0}.".
-                          format(arguments["<log_config>"]))
+                          format(arguments.log_config))
     except ValueError:
         raise RunnerError("{0} is not a valid logging config file.".
-                          format(arguments["<log_config>"]))
+                          format(arguments.log_config))
 
     logger = logging.getLogger(__name__)
 
diff --git a/test/test_run.py b/test/test_run.py
new file mode 100644
index 0000000..56504ca
--- /dev/null
+++ b/test/test_run.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2018 FUJITSU LIMITED
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import run
+from util_for_testing import MonanasTestCase
+
+
+class ParserTest(MonanasTestCase):
+    """Test argment parser in run.py"""
+
+    def setUp(self):
+        super(ParserTest, self).setUp()
+        self.parser = run.setup_parser()
+
+    def tearDown(self):
+        super(ParserTest, self).tearDown()
+
+    def _get_parser(self, args):
+        try:
+            parsed = self.parser.parse_args(args)
+        except SystemExit:
+            raise ParserException("Argument parse failed")
+
+        return parsed
+
+    def _check_parser(self, parsed, args, verify_args):
+        for av in verify_args:
+            attr, value = av
+            if attr:
+                self.assertIn(attr, parsed)
+                self.assertEqual(value, getattr(parsed, attr))
+
+    def test_parser_required(self):
+        arglist = [
+            '--config', '/path/to/config_file',
+            '--log_config', '/path/to/log_file',
+            '--spark_path', '/path/to/spark',
+        ]
+
+        verifylist = [
+            ('config', '/path/to/config_file'),
+            ('log_config', '/path/to/log_file'),
+            ('spark_path', '/path/to/spark'),
+        ]
+
+        parsed = self._get_parser(arglist)
+        self._check_parser(parsed, arglist, verifylist)
+
+    def test_parser_optional(self):
+        arglist = [
+            '--config', '/path/to/config_file',
+            '--log_config', '/path/to/log_file',
+            '--spark_path', '/path/to/spark',
+            '--sources', '/path/to/src1', '/path/to/src2',
+        ]
+
+        verifylist = [
+            ('config', '/path/to/config_file'),
+            ('log_config', '/path/to/log_file'),
+            ('spark_path', '/path/to/spark'),
+            ('sources', ['/path/to/src1', '/path/to/src2']),
+        ]
+
+        parsed = self._get_parser(arglist)
+        self._check_parser(parsed, arglist, verifylist)
+
+    def test_parser_optional_bool(self):
+        arglist = [
+            '--config', '/path/to/config_file',
+            '--log_config', '/path/to/log_file',
+            '--spark_path', '/path/to/spark',
+            '--debug',
+        ]
+
+        parsed = self._get_parser(arglist)
+        self.assertTrue(parsed.debug)