From 3396764571f5e18f99a63f1f1160a0a6a1b3097a Mon Sep 17 00:00:00 2001 From: kafka Date: Wed, 15 Jul 2015 16:16:36 +0800 Subject: [PATCH] Add command fuzzy matching The command 'openstack user' throws error with no helpful message even though 'openstack user list' works, that is really a bad UX. This patch adds fuzzy matching to print the most similar command when user mistypes the command. Use Demeraou-Levenshtein algorithm to find the best similarity. It takes experience from Git's algothrim inplement https://github.com/git/git/commit/8af84dadb142f7321ff0ce8690385e99da8ede2f $ openstack user openstack: 'user' is not an openstack command. See 'openstack --help'. Did you mean one of these? user create user delete user list user password set user set user show consumer create consumer delete consumer list consumer set consumer show Change-Id: Id8732504c0b36177319fc33fae7e630b7b714be7 Closes-Bug: 1462192 --- cliff/app.py | 54 +++++++++++++++++++++++-- cliff/tests/test_app.py | 20 +++++++++- cliff/utils.py | 88 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+), 5 deletions(-) create mode 100644 cliff/utils.py diff --git a/cliff/app.py b/cliff/app.py index 57528a8..d6398d5 100644 --- a/cliff/app.py +++ b/cliff/app.py @@ -9,9 +9,11 @@ import logging import logging.handlers import os import sys +import operator from .complete import CompleteCommand from .help import HelpAction, HelpCommand +from .utils import damerau_levenshtein, COST # Make sure the cliff library has a logging handler # in case the app developer doesn't set up logging. @@ -287,14 +289,60 @@ class App(object): self.interpreter.cmdloop() return 0 + def get_fuzzy_matches(self, cmd): + """return fuzzy matches of unknown command + """ + + sep = '_' + if self.command_manager.convert_underscores: + sep = ' ' + all_cmds = [k[0] for k in self.command_manager] + dist = [] + for candidate in sorted(all_cmds): + prefix = candidate.split(sep)[0] + # Give prefix match a very good score + if candidate.startswith(cmd): + dist.append((candidate, 0)) + continue + # Levenshtein distance + dist.append((candidate, damerau_levenshtein(cmd, prefix, COST)+1)) + dist = sorted(dist, key=operator.itemgetter(1, 0)) + matches = [] + i = 0 + # Find the best similarity + while (not dist[i][1]): + matches.append(dist[i][0]) + i += 1 + best_similarity = dist[i][1] + while (dist[i][1] == best_similarity): + matches.append(dist[i][0]) + i += 1 + + return matches + def run_subcommand(self, argv): try: subcommand = self.command_manager.find_command(argv) except ValueError as err: - if self.options.debug: - raise + # If there was no exact match, try to find a fuzzy match + the_cmd = argv[0] + fuzzy_matches = self.get_fuzzy_matches(the_cmd) + if fuzzy_matches: + article = 'a' + if self.NAME[0] in 'aeiou': + article = 'an' + self.stdout.write('%s: \'%s\' is not %s %s command. ' + 'See \'%s --help\'.\n' + % (self.NAME, the_cmd, article, + self.NAME, self.NAME)) + self.stdout.write('Did you mean one of these?\n') + for match in fuzzy_matches: + self.stdout.write(' %s\n' % match) else: - self.LOG.error(err) + if self.options.debug: + raise + else: + self.LOG.error(err) return 2 cmd_factory, cmd_name, sub_argv = subcommand kwargs = {} diff --git a/cliff/tests/test_app.py b/cliff/tests/test_app.py index 799e5ee..ffda8e7 100644 --- a/cliff/tests/test_app.py +++ b/cliff/tests/test_app.py @@ -3,8 +3,7 @@ from argparse import ArgumentError try: from StringIO import StringIO except ImportError: - # Probably python 3, that test won't be run so ignore the error - pass + from io import StringIO import sys import nose @@ -13,6 +12,7 @@ import mock from cliff.app import App from cliff.command import Command from cliff.commandmanager import CommandManager +from cliff.tests import utils def make_app(**kwargs): @@ -412,3 +412,19 @@ def test_unknown_cmd_debug(): app.run(['--debug', 'hell']) == 2 except ValueError as err: assert "['hell']" in ('%s' % err) + + +def test_list_matching_commands(): + stdout = StringIO() + app = App('testing', '1', + utils.TestCommandManager(utils.TEST_NAMESPACE), + stdout=stdout) + app.NAME = 'test' + try: + assert app.run(['t']) == 2 + except SystemExit: + pass + output = stdout.getvalue() + assert "test: 't' is not a test command. See 'test --help'." in output + assert 'Did you mean one of these?' in output + assert 'three word command\n two words\n' in output diff --git a/cliff/utils.py b/cliff/utils.py new file mode 100644 index 0000000..aed74f3 --- /dev/null +++ b/cliff/utils.py @@ -0,0 +1,88 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Each edit operation is assigned different cost, such as: +# 'w' means swap operation, the cost is 0; +# 's' means substitution operation, the cost is 2; +# 'a' means insertion operation, the cost is 1; +# 'd' means deletion operation, the cost is 3; +# The smaller cost results in the better similarity. +COST = {'w': 0, 's': 2, 'a': 1, 'd': 3} + + +def damerau_levenshtein(s1, s2, cost): + """Calculates the Damerau-Levenshtein distance between two strings. + + The Levenshtein distance says the minimum number of single-character edits + (i.e. insertions, deletions, swap or substitution) required to change one + string to the other. + The idea is to reserve a matrix to hold the Levenshtein distances between + all prefixes of the first string and all prefixes of the second, then we + can compute the values in the matrix in a dynamic programming fashion. To + avoid a large space complexity, only the last three rows in the matrix is + needed.(row2 holds the current row, row1 holds the previous row, and row0 + the row before that.) + + More details: + https://en.wikipedia.org/wiki/Levenshtein_distance + https://github.com/git/git/commit/8af84dadb142f7321ff0ce8690385e99da8ede2f + """ + + if s1 == s2: + return 0 + + len1 = len(s1) + len2 = len(s2) + + if len1 == 0: + return len2 * cost['a'] + if len2 == 0: + return len1 * cost['d'] + + row1 = [i * cost['a'] for i in range(len2 + 1)] + row2 = row1[:] + row0 = row1[:] + + for i in range(len1): + row2[0] = (i + 1) * cost['d'] + + for j in range(len2): + + # substitution + sub_cost = row1[j] + (s1[i] != s2[j]) * cost['s'] + + # insertion + ins_cost = row2[j] + cost['a'] + + # deletion + del_cost = row1[j + 1] + cost['d'] + + # swap + swp_condition = ((i > 0) + and (j > 0) + and (s1[i - 1] == s2[j]) + and (s1[i] == s2[j - 1]) + ) + + # min cost + if swp_condition: + swp_cost = row0[j - 1] + cost['w'] + p_cost = min(sub_cost, ins_cost, del_cost, swp_cost) + else: + p_cost = min(sub_cost, ins_cost, del_cost) + + row2[j + 1] = p_cost + + row0, row1, row2 = row1, row2, row0 + + return row1[-1]