Merge "Add command fuzzy matching"

This commit is contained in:
Jenkins 2015-07-24 19:36:37 +00:00 committed by Gerrit Code Review
commit 5970766546
3 changed files with 157 additions and 5 deletions

View File

@ -9,9 +9,11 @@ import logging
import logging.handlers import logging.handlers
import os import os
import sys import sys
import operator
from .complete import CompleteCommand from .complete import CompleteCommand
from .help import HelpAction, HelpCommand from .help import HelpAction, HelpCommand
from .utils import damerau_levenshtein, COST
# Make sure the cliff library has a logging handler # Make sure the cliff library has a logging handler
# in case the app developer doesn't set up logging. # in case the app developer doesn't set up logging.
@ -299,14 +301,60 @@ class App(object):
self.interpreter.cmdloop() self.interpreter.cmdloop()
return 0 return 0
def get_fuzzy_matches(self, cmd):
"""return fuzzy matches of unknown command
"""
sep = '_'
if self.command_manager.convert_underscores:
sep = ' '
all_cmds = [k[0] for k in self.command_manager]
dist = []
for candidate in sorted(all_cmds):
prefix = candidate.split(sep)[0]
# Give prefix match a very good score
if candidate.startswith(cmd):
dist.append((candidate, 0))
continue
# Levenshtein distance
dist.append((candidate, damerau_levenshtein(cmd, prefix, COST)+1))
dist = sorted(dist, key=operator.itemgetter(1, 0))
matches = []
i = 0
# Find the best similarity
while (not dist[i][1]):
matches.append(dist[i][0])
i += 1
best_similarity = dist[i][1]
while (dist[i][1] == best_similarity):
matches.append(dist[i][0])
i += 1
return matches
def run_subcommand(self, argv): def run_subcommand(self, argv):
try: try:
subcommand = self.command_manager.find_command(argv) subcommand = self.command_manager.find_command(argv)
except ValueError as err: except ValueError as err:
if self.options.debug: # If there was no exact match, try to find a fuzzy match
raise the_cmd = argv[0]
fuzzy_matches = self.get_fuzzy_matches(the_cmd)
if fuzzy_matches:
article = 'a'
if self.NAME[0] in 'aeiou':
article = 'an'
self.stdout.write('%s: \'%s\' is not %s %s command. '
'See \'%s --help\'.\n'
% (self.NAME, the_cmd, article,
self.NAME, self.NAME))
self.stdout.write('Did you mean one of these?\n')
for match in fuzzy_matches:
self.stdout.write(' %s\n' % match)
else: else:
self.LOG.error(err) if self.options.debug:
raise
else:
self.LOG.error(err)
return 2 return 2
cmd_factory, cmd_name, sub_argv = subcommand cmd_factory, cmd_name, sub_argv = subcommand
kwargs = {} kwargs = {}

View File

@ -3,8 +3,7 @@ from argparse import ArgumentError
try: try:
from StringIO import StringIO from StringIO import StringIO
except ImportError: except ImportError:
# Probably python 3, that test won't be run so ignore the error from io import StringIO
pass
import sys import sys
import nose import nose
@ -13,6 +12,7 @@ import mock
from cliff.app import App from cliff.app import App
from cliff.command import Command from cliff.command import Command
from cliff.commandmanager import CommandManager from cliff.commandmanager import CommandManager
from cliff.tests import utils
def make_app(**kwargs): def make_app(**kwargs):
@ -432,3 +432,19 @@ def test_unknown_cmd_debug():
app.run(['--debug', 'hell']) == 2 app.run(['--debug', 'hell']) == 2
except ValueError as err: except ValueError as err:
assert "['hell']" in ('%s' % err) assert "['hell']" in ('%s' % err)
def test_list_matching_commands():
stdout = StringIO()
app = App('testing', '1',
utils.TestCommandManager(utils.TEST_NAMESPACE),
stdout=stdout)
app.NAME = 'test'
try:
assert app.run(['t']) == 2
except SystemExit:
pass
output = stdout.getvalue()
assert "test: 't' is not a test command. See 'test --help'." in output
assert 'Did you mean one of these?' in output
assert 'three word command\n two words\n' in output

88
cliff/utils.py Normal file
View File

@ -0,0 +1,88 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Each edit operation is assigned different cost, such as:
# 'w' means swap operation, the cost is 0;
# 's' means substitution operation, the cost is 2;
# 'a' means insertion operation, the cost is 1;
# 'd' means deletion operation, the cost is 3;
# The smaller cost results in the better similarity.
COST = {'w': 0, 's': 2, 'a': 1, 'd': 3}
def damerau_levenshtein(s1, s2, cost):
"""Calculates the Damerau-Levenshtein distance between two strings.
The Levenshtein distance says the minimum number of single-character edits
(i.e. insertions, deletions, swap or substitution) required to change one
string to the other.
The idea is to reserve a matrix to hold the Levenshtein distances between
all prefixes of the first string and all prefixes of the second, then we
can compute the values in the matrix in a dynamic programming fashion. To
avoid a large space complexity, only the last three rows in the matrix is
needed.(row2 holds the current row, row1 holds the previous row, and row0
the row before that.)
More details:
https://en.wikipedia.org/wiki/Levenshtein_distance
https://github.com/git/git/commit/8af84dadb142f7321ff0ce8690385e99da8ede2f
"""
if s1 == s2:
return 0
len1 = len(s1)
len2 = len(s2)
if len1 == 0:
return len2 * cost['a']
if len2 == 0:
return len1 * cost['d']
row1 = [i * cost['a'] for i in range(len2 + 1)]
row2 = row1[:]
row0 = row1[:]
for i in range(len1):
row2[0] = (i + 1) * cost['d']
for j in range(len2):
# substitution
sub_cost = row1[j] + (s1[i] != s2[j]) * cost['s']
# insertion
ins_cost = row2[j] + cost['a']
# deletion
del_cost = row1[j + 1] + cost['d']
# swap
swp_condition = ((i > 0)
and (j > 0)
and (s1[i - 1] == s2[j])
and (s1[i] == s2[j - 1])
)
# min cost
if swp_condition:
swp_cost = row0[j - 1] + cost['w']
p_cost = min(sub_cost, ins_cost, del_cost, swp_cost)
else:
p_cost = min(sub_cost, ins_cost, del_cost)
row2[j + 1] = p_cost
row0, row1, row2 = row1, row2, row0
return row1[-1]