361 lines
12 KiB
Python
Executable File
361 lines
12 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright (C) 2014 Ivan Melnikov <iv at altlinux dot org>
|
|
#
|
|
# Author: Joshua Harlow <harlowja@yahoo-inc.com>
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
|
|
"""Check documentation for simple style requirements.
|
|
|
|
What is checked:
|
|
- lines should not be longer than 79 characters - D001
|
|
- exception: line with no whitespace except in the beginning
|
|
- exception: lines with http or https urls
|
|
- exception: literal blocks
|
|
- exception: rst target directives
|
|
- no trailing whitespace - D002
|
|
- no tabulation for indentation - D003
|
|
- no carriage returns (use unix newlines) - D004
|
|
"""
|
|
|
|
import argparse
|
|
import collections
|
|
import fnmatch
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
from docutils import frontend
|
|
from docutils import nodes as doc_nodes
|
|
from docutils.parsers import rst
|
|
from docutils import utils
|
|
|
|
import six
|
|
from six.moves import configparser
|
|
|
|
|
|
FILE_PATTERNS = ['*.rst', '*.txt']
|
|
MAX_LINE_LENGTH = 79
|
|
TRAILING_WHITESPACE_REGEX = re.compile('\s$')
|
|
STARTING_WHITESPACE_REGEX = re.compile('^(\s+)')
|
|
CONFIG_FILENAMES = [
|
|
"doc8.ini",
|
|
"tox.ini",
|
|
"pep8.ini",
|
|
"setup.cfg",
|
|
]
|
|
|
|
|
|
def check_max_length(fn, cfg, contents):
|
|
|
|
def contains_url(line):
|
|
if "http://" in line or "https://" in line:
|
|
return True
|
|
return False
|
|
|
|
def any_node_type(node, node_types):
|
|
n = node
|
|
node_types = tuple(node_types)
|
|
while n is not None:
|
|
if isinstance(n, node_types):
|
|
return True
|
|
n = n.parent
|
|
return False
|
|
|
|
def extract_lines(node, start_line):
|
|
lines = [start_line]
|
|
if isinstance(node, (doc_nodes.title)):
|
|
start = start_line - len(node.rawsource.splitlines())
|
|
if start >= 0:
|
|
lines.append(start)
|
|
if isinstance(node, (doc_nodes.literal_block)):
|
|
end = start_line + len(node.rawsource.splitlines()) - 1
|
|
lines.append(end)
|
|
return lines
|
|
|
|
def gather_lines(node):
|
|
lines = []
|
|
for n in node.traverse(include_self=True):
|
|
lines.extend(extract_lines(n, find_line(n)))
|
|
return lines
|
|
|
|
def find_line(node):
|
|
n = node
|
|
while n is not None:
|
|
if n.line is not None:
|
|
return n.line
|
|
n = n.parent
|
|
return None
|
|
|
|
def find_containing_nodes(num, node_lines, first_line):
|
|
if num < first_line and len(node_lines):
|
|
return [node_lines[0][0]]
|
|
contained_in = []
|
|
for (n, (line_min, line_max)) in node_lines:
|
|
if num >= line_min and num <= line_max:
|
|
contained_in.append((n, (line_min, line_max)))
|
|
smallest_span = None
|
|
best_nodes = []
|
|
for (n, (line_min, line_max)) in contained_in:
|
|
span = line_max - line_min
|
|
if smallest_span is None:
|
|
smallest_span = span
|
|
best_nodes = [n]
|
|
elif span < smallest_span:
|
|
smallest_span = span
|
|
best_nodes = [n]
|
|
elif span == smallest_span:
|
|
best_nodes.append(n)
|
|
return best_nodes
|
|
|
|
def find_directive_end(start, lines):
|
|
|
|
def starting_whitespace(line):
|
|
m = re.match(r"^(\s+)(.*)$", line)
|
|
if not m:
|
|
return 0
|
|
return len(m.group(1))
|
|
|
|
def all_whitespace(line):
|
|
return bool(re.match(r"^(\s*)$", line))
|
|
|
|
after_lines = collections.deque(lines[start + 1:])
|
|
k = 0
|
|
while after_lines:
|
|
line = after_lines.popleft()
|
|
if all_whitespace(line) or starting_whitespace(line) >= 1:
|
|
k += 1
|
|
else:
|
|
break
|
|
return start + k
|
|
|
|
# Use the rst parsers document output to do as much of the validation
|
|
# as we can without resorting to custom logic (this parser is what sphinx
|
|
# and others use anyway so it's very mature).
|
|
parser = rst.Parser()
|
|
defaults = {
|
|
'input_encoding': 'utf8',
|
|
'halt_level': 5,
|
|
'report_level': 5,
|
|
'quiet': True,
|
|
'file_insertion_enabled': False,
|
|
'traceback': True,
|
|
}
|
|
opt = frontend.OptionParser(components=[parser], defaults=defaults)
|
|
doc = utils.new_document(source_path=fn, settings=opt.get_default_values())
|
|
parser.parse(contents, doc)
|
|
node_lines = []
|
|
first_line = -1
|
|
for n in doc.traverse(include_self=True):
|
|
line = find_line(n)
|
|
if line is None:
|
|
continue
|
|
if any_node_type(n, [doc_nodes.system_message]):
|
|
# These are failures, and there node content isn't correct,
|
|
# so skip them; we should work on making it so that the parser
|
|
# stops doing this custom parent creation in the first place.
|
|
continue
|
|
if first_line == -1:
|
|
first_line = line
|
|
contained_lines = set(gather_lines(n))
|
|
node_lines.append((n, (min(contained_lines), max(contained_lines))))
|
|
|
|
# Find where directives start & end so that we can exclude content in
|
|
# these directive regions (the rst parser may not handle this correctly
|
|
# for unknown directives, so we have to do it manually).
|
|
lines = contents.split("\n")
|
|
directives = []
|
|
for i, line in enumerate(lines):
|
|
if re.match(r"^..\s(.*?)::\s*", line):
|
|
directives.append((i, find_directive_end(i, lines)))
|
|
elif re.match(r"^::\s*$", line):
|
|
directives.append((i, find_directive_end(i, lines)))
|
|
|
|
skip_types = (
|
|
doc_nodes.target,
|
|
doc_nodes.literal_block,
|
|
)
|
|
title_types = (
|
|
doc_nodes.title,
|
|
)
|
|
max_line_length = cfg['max_line_length']
|
|
allow_long = cfg['allow_long_titles']
|
|
for i, line in enumerate(lines):
|
|
if len(line) > max_line_length:
|
|
in_directive = False
|
|
for (start, end) in directives:
|
|
if i >= start and i <= end:
|
|
in_directive = True
|
|
break
|
|
if in_directive:
|
|
continue
|
|
stripped = line.strip()
|
|
if ' ' not in stripped:
|
|
continue
|
|
if contains_url(stripped):
|
|
continue
|
|
nodes = find_containing_nodes(i + 1, node_lines, first_line)
|
|
if any([isinstance(n, skip_types) for n in nodes]):
|
|
continue
|
|
if allow_long and any([isinstance(n, title_types) for n in nodes]):
|
|
continue
|
|
yield (i + 1, 'D001', 'Line too long')
|
|
|
|
|
|
def check_trailing_whitespace(fn, cfg, line):
|
|
if TRAILING_WHITESPACE_REGEX.search(line):
|
|
yield ('D002', 'Trailing whitespace')
|
|
|
|
|
|
def check_indentation_no_tab(fn, cfg, line):
|
|
match = STARTING_WHITESPACE_REGEX.search(line)
|
|
if match:
|
|
spaces = match.group(1)
|
|
if '\t' in spaces:
|
|
yield ('D003', 'Tabulation used for indentation')
|
|
|
|
|
|
def check_carriage_return(fn, cfg, line):
|
|
if "\r" in line:
|
|
yield ('D004', 'Found literal carriage return')
|
|
|
|
|
|
def check_lines(fn, cfg, lines, line_checks):
|
|
for idx, line in enumerate(lines, 1):
|
|
line = six.text_type(line, encoding='utf8')
|
|
line = line.rstrip('\n')
|
|
for check in line_checks:
|
|
for code, message in check(fn, cfg, line):
|
|
yield idx, code, message
|
|
|
|
|
|
def check_files(cfg, filenames, line_checks, content_checks):
|
|
for fn in filenames:
|
|
with open(fn, 'rb') as fh:
|
|
content = six.text_type(fh.read(), encoding='utf8')
|
|
for content_check in content_checks:
|
|
for line_num, code, message in content_check(fn, cfg, content):
|
|
yield fn, line_num, code, message
|
|
fh.seek(0)
|
|
for line_num, code, message in check_lines(fn, cfg,
|
|
fh, line_checks):
|
|
yield fn, line_num, code, message
|
|
|
|
|
|
def find_files(pathes, patterns):
|
|
for path in pathes:
|
|
if os.path.isfile(path):
|
|
yield path
|
|
elif os.path.isdir(path):
|
|
for root, dirnames, filenames in os.walk(path):
|
|
for filename in filenames:
|
|
if any(fnmatch.fnmatch(filename, pattern)
|
|
for pattern in patterns):
|
|
yield os.path.join(root, filename)
|
|
else:
|
|
print('Invalid path: %s' % path)
|
|
|
|
|
|
def split_string(text):
|
|
return [i.strip() for i in text.split(",") if i.strip()]
|
|
|
|
|
|
def extract_config(args, default_cfg):
|
|
if args.config:
|
|
parser = configparser.RawConfigParser()
|
|
for fn in list(args.config):
|
|
with open(fn, 'r') as fh:
|
|
parser.readfp(fh, filename=fn)
|
|
else:
|
|
parser = configparser.RawConfigParser()
|
|
parser.read(CONFIG_FILENAMES)
|
|
cfg = dict(default_cfg)
|
|
try:
|
|
cfg['max_line_length'] = parser.getint("doc8", "max-line-length")
|
|
except (configparser.NoSectionError, configparser.NoOptionError):
|
|
pass
|
|
try:
|
|
ignores = parser.get("doc8", "ignore")
|
|
except (configparser.NoSectionError, configparser.NoOptionError):
|
|
pass
|
|
else:
|
|
cfg['ignore'].update(split_string(ignores))
|
|
try:
|
|
cfg['allow_long_titles'] = parser.getboolean("doc8",
|
|
"allow-long-titles")
|
|
except (configparser.NoSectionError, configparser.NoOptionError):
|
|
pass
|
|
return cfg
|
|
|
|
|
|
def unique_itr(itr):
|
|
seen = set()
|
|
for i in itr:
|
|
if i in seen:
|
|
continue
|
|
yield i
|
|
seen.add(i)
|
|
|
|
|
|
def main():
|
|
file_types = ", ".join(FILE_PATTERNS)
|
|
default_configs = ", ".join(CONFIG_FILENAMES)
|
|
parser = argparse.ArgumentParser(
|
|
description=__doc__,
|
|
formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
parser.add_argument("paths", metavar='path', type=str, nargs='*',
|
|
help=("path to scan for %s files"
|
|
" (default: os.getcwd())") % file_types,
|
|
default=[os.getcwd()])
|
|
parser.add_argument("--config", metavar='path', action="append",
|
|
help="user config file location"
|
|
" (default: %s)" % default_configs)
|
|
parser.add_argument("--allow-long-titles", action="store_true",
|
|
help="allow long section titles (default: False)",
|
|
default=False)
|
|
parser.add_argument("--ignore", action="append", metavar="code",
|
|
help="ignore the given errors code/codes",
|
|
default=[])
|
|
args = parser.parse_args()
|
|
default_cfg = {
|
|
'max_line_length': MAX_LINE_LENGTH,
|
|
'ignore': set(),
|
|
'allow_long_titles': args.allow_long_titles,
|
|
}
|
|
for c in args.ignore:
|
|
default_cfg['ignore'].update(split_string(c))
|
|
cfg = extract_config(args, default_cfg)
|
|
line_checks = [
|
|
check_trailing_whitespace,
|
|
check_indentation_no_tab,
|
|
check_carriage_return,
|
|
]
|
|
content_checks = [
|
|
check_max_length,
|
|
]
|
|
ok = True
|
|
paths = unique_itr(args.paths)
|
|
for error in check_files(cfg, find_files(paths, FILE_PATTERNS),
|
|
line_checks, content_checks):
|
|
if error[2] in cfg['ignore']:
|
|
continue
|
|
ok = False
|
|
print('%s:%s: %s %s' % error)
|
|
sys.exit(0 if ok else 1)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|