diff --git a/README.rst b/README.rst index 471a9d2..9e074bd 100644 --- a/README.rst +++ b/README.rst @@ -29,7 +29,8 @@ Usage $ doc8 -h usage: doc8 [-h] [--config path] [--allow-long-titles] [--ignore code] - [--ignore-path path] [--max-line-length int] [-e extension] + [--no-sphinx] [--ignore-path path] [--max-line-length int] + [-e extension] [-v] [path [path ...]] Check documentation for simple style requirements. @@ -55,12 +56,13 @@ Usage --allow-long-titles allow long section titles (default: False) --ignore code ignore the given errors code/codes --no-sphinx do not ignore sphinx specific false positives - --ignore-path path - ignore the given directory or file + --ignore-path path ignore the given directory or file (globs are + supported) --max-line-length int maximum allowed line length (default: 79) -e extension, --extension extension check file extensions of the given type (default: .rst, .txt) + -v, --verbose run in verbose mode .. _rst: http://docutils.sourceforge.net/docs/ref/rst/introduction.html diff --git a/doc8/main.py b/doc8/main.py index 03be25b..0d48db4 100644 --- a/doc8/main.py +++ b/doc8/main.py @@ -169,8 +169,8 @@ def main(): help="do not ignore sphinx specific false positives", default=True, dest='sphinx') parser.add_argument("--ignore-path", action="append", default=[], - help="ignore the given directory or file", - metavar='path') + help="ignore the given directory or file (globs" + " are supported)", metavar='path') parser.add_argument("--max-line-length", action="store", metavar="int", type=int, help="maximum allowed line" @@ -194,15 +194,24 @@ def main(): if not args.get('extension'): args['extension'] = list(FILE_PATTERNS) setup_logging(args.get('verbose')) - files = collections.deque() - ignored_paths = [] - for path in args.pop('ignore_path', []): - ignored_paths.append(os.path.normpath(path)) + print("Scanning...") - for filename in utils.find_files(args.pop('paths', []), - args.pop('extension', []), - ignored_paths): - files.append(file_parser.parse(filename)) + files = collections.deque() + ignored_paths = list(args.pop('ignore_path', [])) + files_ignored = 0 + files_selected = 0 + file_iter = utils.find_files(args.pop('paths', []), + args.pop('extension', []), ignored_paths) + for filename, ignoreable in file_iter: + if ignoreable: + files_ignored += 1 + if args.get('verbose'): + print(" Ignoring '%s'" % (filename)) + else: + files_selected += 1 + files.append(file_parser.parse(filename)) + if args.get('verbose'): + print(" Selecting '%s'" % (filename)) ignoreables = frozenset(args.pop('ignore', [])) error_counts = {} @@ -258,6 +267,8 @@ def main(): % (type(c), c)) total_errors = sum(six.itervalues(error_counts)) print("=" * 8) + print("Total files scanned = %s" % (files_selected)) + print("Total files ignored = %s" % (files_ignored)) print("Total accumulated errors = %s" % total_errors) if error_counts: print("Detailed error counts:") diff --git a/doc8/parser.py b/doc8/parser.py index 9f7b82e..9f6a614 100644 --- a/doc8/parser.py +++ b/doc8/parser.py @@ -26,6 +26,8 @@ import six class ParsedFile(object): + FALLBACK_ENCODING = 'utf-8' + def __init__(self, filename, encoding=None): self._filename = filename self._content = None @@ -82,8 +84,11 @@ class ParsedFile(object): @property def encoding(self): - if self._encoding is None: - self._encoding = chardet.detect(self.raw_contents)['encoding'] + if not self._encoding: + encoding = chardet.detect(self.raw_contents)['encoding'] + if not encoding: + encoding = self.FALLBACK_ENCODING + self._encoding = encoding return self._encoding @property diff --git a/doc8/utils.py b/doc8/utils.py index c508523..4e50217 100644 --- a/doc8/utils.py +++ b/doc8/utils.py @@ -14,33 +14,46 @@ # License for the specific language governing permissions and limitations # under the License. +import glob import os def find_files(paths, extensions, ignored_paths): extensions = set(extensions) + ignored_absolute_paths = set() + for path in ignored_paths: + for expanded_path in glob.iglob(path): + expanded_path = os.path.abspath(expanded_path) + ignored_absolute_paths.add(expanded_path) def extension_matches(path): _base, ext = os.path.splitext(path) return ext in extensions - def path_ignored(path): - return os.path.normpath(path) in ignored_paths + def path_ignorable(path): + path = os.path.abspath(path) + if path in ignored_absolute_paths: + return True + last_path = None + while path != last_path: + # If we hit the root, this loop will stop since the resolution + # of "/../" is still "/" when ran through the abspath function... + last_path = path + path = os.path.abspath(os.path.join(path, os.path.pardir)) + if path in ignored_absolute_paths: + return True + return False for path in paths: - if path_ignored(path): - continue if os.path.isfile(path): if extension_matches(path): - yield path + yield (path, path_ignorable(path)) elif os.path.isdir(path): for root, dirnames, filenames in os.walk(path): - if path_ignored(root): - continue for filename in filenames: path = os.path.join(root, filename) - if extension_matches(path) and not path_ignored(path): - yield path + if extension_matches(path): + yield (path, path_ignorable(path)) else: raise IOError('Invalid path: %s' % path)