Merge "Add command fuzzy matching"
This commit is contained in:
commit
5970766546
54
cliff/app.py
54
cliff/app.py
|
@ -9,9 +9,11 @@ import logging
|
||||||
import logging.handlers
|
import logging.handlers
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import operator
|
||||||
|
|
||||||
from .complete import CompleteCommand
|
from .complete import CompleteCommand
|
||||||
from .help import HelpAction, HelpCommand
|
from .help import HelpAction, HelpCommand
|
||||||
|
from .utils import damerau_levenshtein, COST
|
||||||
|
|
||||||
# Make sure the cliff library has a logging handler
|
# Make sure the cliff library has a logging handler
|
||||||
# in case the app developer doesn't set up logging.
|
# in case the app developer doesn't set up logging.
|
||||||
|
@ -299,14 +301,60 @@ class App(object):
|
||||||
self.interpreter.cmdloop()
|
self.interpreter.cmdloop()
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
def get_fuzzy_matches(self, cmd):
|
||||||
|
"""return fuzzy matches of unknown command
|
||||||
|
"""
|
||||||
|
|
||||||
|
sep = '_'
|
||||||
|
if self.command_manager.convert_underscores:
|
||||||
|
sep = ' '
|
||||||
|
all_cmds = [k[0] for k in self.command_manager]
|
||||||
|
dist = []
|
||||||
|
for candidate in sorted(all_cmds):
|
||||||
|
prefix = candidate.split(sep)[0]
|
||||||
|
# Give prefix match a very good score
|
||||||
|
if candidate.startswith(cmd):
|
||||||
|
dist.append((candidate, 0))
|
||||||
|
continue
|
||||||
|
# Levenshtein distance
|
||||||
|
dist.append((candidate, damerau_levenshtein(cmd, prefix, COST)+1))
|
||||||
|
dist = sorted(dist, key=operator.itemgetter(1, 0))
|
||||||
|
matches = []
|
||||||
|
i = 0
|
||||||
|
# Find the best similarity
|
||||||
|
while (not dist[i][1]):
|
||||||
|
matches.append(dist[i][0])
|
||||||
|
i += 1
|
||||||
|
best_similarity = dist[i][1]
|
||||||
|
while (dist[i][1] == best_similarity):
|
||||||
|
matches.append(dist[i][0])
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return matches
|
||||||
|
|
||||||
def run_subcommand(self, argv):
|
def run_subcommand(self, argv):
|
||||||
try:
|
try:
|
||||||
subcommand = self.command_manager.find_command(argv)
|
subcommand = self.command_manager.find_command(argv)
|
||||||
except ValueError as err:
|
except ValueError as err:
|
||||||
if self.options.debug:
|
# If there was no exact match, try to find a fuzzy match
|
||||||
raise
|
the_cmd = argv[0]
|
||||||
|
fuzzy_matches = self.get_fuzzy_matches(the_cmd)
|
||||||
|
if fuzzy_matches:
|
||||||
|
article = 'a'
|
||||||
|
if self.NAME[0] in 'aeiou':
|
||||||
|
article = 'an'
|
||||||
|
self.stdout.write('%s: \'%s\' is not %s %s command. '
|
||||||
|
'See \'%s --help\'.\n'
|
||||||
|
% (self.NAME, the_cmd, article,
|
||||||
|
self.NAME, self.NAME))
|
||||||
|
self.stdout.write('Did you mean one of these?\n')
|
||||||
|
for match in fuzzy_matches:
|
||||||
|
self.stdout.write(' %s\n' % match)
|
||||||
else:
|
else:
|
||||||
self.LOG.error(err)
|
if self.options.debug:
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
self.LOG.error(err)
|
||||||
return 2
|
return 2
|
||||||
cmd_factory, cmd_name, sub_argv = subcommand
|
cmd_factory, cmd_name, sub_argv = subcommand
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
|
|
|
@ -3,8 +3,7 @@ from argparse import ArgumentError
|
||||||
try:
|
try:
|
||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# Probably python 3, that test won't be run so ignore the error
|
from io import StringIO
|
||||||
pass
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import nose
|
import nose
|
||||||
|
@ -13,6 +12,7 @@ import mock
|
||||||
from cliff.app import App
|
from cliff.app import App
|
||||||
from cliff.command import Command
|
from cliff.command import Command
|
||||||
from cliff.commandmanager import CommandManager
|
from cliff.commandmanager import CommandManager
|
||||||
|
from cliff.tests import utils
|
||||||
|
|
||||||
|
|
||||||
def make_app(**kwargs):
|
def make_app(**kwargs):
|
||||||
|
@ -432,3 +432,19 @@ def test_unknown_cmd_debug():
|
||||||
app.run(['--debug', 'hell']) == 2
|
app.run(['--debug', 'hell']) == 2
|
||||||
except ValueError as err:
|
except ValueError as err:
|
||||||
assert "['hell']" in ('%s' % err)
|
assert "['hell']" in ('%s' % err)
|
||||||
|
|
||||||
|
|
||||||
|
def test_list_matching_commands():
|
||||||
|
stdout = StringIO()
|
||||||
|
app = App('testing', '1',
|
||||||
|
utils.TestCommandManager(utils.TEST_NAMESPACE),
|
||||||
|
stdout=stdout)
|
||||||
|
app.NAME = 'test'
|
||||||
|
try:
|
||||||
|
assert app.run(['t']) == 2
|
||||||
|
except SystemExit:
|
||||||
|
pass
|
||||||
|
output = stdout.getvalue()
|
||||||
|
assert "test: 't' is not a test command. See 'test --help'." in output
|
||||||
|
assert 'Did you mean one of these?' in output
|
||||||
|
assert 'three word command\n two words\n' in output
|
||||||
|
|
|
@ -0,0 +1,88 @@
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
# implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# Each edit operation is assigned different cost, such as:
|
||||||
|
# 'w' means swap operation, the cost is 0;
|
||||||
|
# 's' means substitution operation, the cost is 2;
|
||||||
|
# 'a' means insertion operation, the cost is 1;
|
||||||
|
# 'd' means deletion operation, the cost is 3;
|
||||||
|
# The smaller cost results in the better similarity.
|
||||||
|
COST = {'w': 0, 's': 2, 'a': 1, 'd': 3}
|
||||||
|
|
||||||
|
|
||||||
|
def damerau_levenshtein(s1, s2, cost):
|
||||||
|
"""Calculates the Damerau-Levenshtein distance between two strings.
|
||||||
|
|
||||||
|
The Levenshtein distance says the minimum number of single-character edits
|
||||||
|
(i.e. insertions, deletions, swap or substitution) required to change one
|
||||||
|
string to the other.
|
||||||
|
The idea is to reserve a matrix to hold the Levenshtein distances between
|
||||||
|
all prefixes of the first string and all prefixes of the second, then we
|
||||||
|
can compute the values in the matrix in a dynamic programming fashion. To
|
||||||
|
avoid a large space complexity, only the last three rows in the matrix is
|
||||||
|
needed.(row2 holds the current row, row1 holds the previous row, and row0
|
||||||
|
the row before that.)
|
||||||
|
|
||||||
|
More details:
|
||||||
|
https://en.wikipedia.org/wiki/Levenshtein_distance
|
||||||
|
https://github.com/git/git/commit/8af84dadb142f7321ff0ce8690385e99da8ede2f
|
||||||
|
"""
|
||||||
|
|
||||||
|
if s1 == s2:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
len1 = len(s1)
|
||||||
|
len2 = len(s2)
|
||||||
|
|
||||||
|
if len1 == 0:
|
||||||
|
return len2 * cost['a']
|
||||||
|
if len2 == 0:
|
||||||
|
return len1 * cost['d']
|
||||||
|
|
||||||
|
row1 = [i * cost['a'] for i in range(len2 + 1)]
|
||||||
|
row2 = row1[:]
|
||||||
|
row0 = row1[:]
|
||||||
|
|
||||||
|
for i in range(len1):
|
||||||
|
row2[0] = (i + 1) * cost['d']
|
||||||
|
|
||||||
|
for j in range(len2):
|
||||||
|
|
||||||
|
# substitution
|
||||||
|
sub_cost = row1[j] + (s1[i] != s2[j]) * cost['s']
|
||||||
|
|
||||||
|
# insertion
|
||||||
|
ins_cost = row2[j] + cost['a']
|
||||||
|
|
||||||
|
# deletion
|
||||||
|
del_cost = row1[j + 1] + cost['d']
|
||||||
|
|
||||||
|
# swap
|
||||||
|
swp_condition = ((i > 0)
|
||||||
|
and (j > 0)
|
||||||
|
and (s1[i - 1] == s2[j])
|
||||||
|
and (s1[i] == s2[j - 1])
|
||||||
|
)
|
||||||
|
|
||||||
|
# min cost
|
||||||
|
if swp_condition:
|
||||||
|
swp_cost = row0[j - 1] + cost['w']
|
||||||
|
p_cost = min(sub_cost, ins_cost, del_cost, swp_cost)
|
||||||
|
else:
|
||||||
|
p_cost = min(sub_cost, ins_cost, del_cost)
|
||||||
|
|
||||||
|
row2[j + 1] = p_cost
|
||||||
|
|
||||||
|
row0, row1, row2 = row1, row2, row0
|
||||||
|
|
||||||
|
return row1[-1]
|
Loading…
Reference in New Issue