summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Harlow <harlowja@yahoo-inc.com>2014-10-22 16:13:33 -0700
committerJoshua Harlow <harlowja@yahoo-inc.com>2014-10-22 17:30:58 -0700
commit04a710c687a7cb2da2b2a6bbd86c20aa32d1dc60 (patch)
tree02188515560adae208c846189c232fc54914aca0
parent8b8f22329bb8456df870616a8c8e6601f4e70755 (diff)
Allow overriding file encoding
Chardet doesn't always seem to correctly detect files encoding correctly in all circumstances, to make it so that a user can specify the exact encoding of there files allow a new config option and a new CLI option that allows for manually overriding the encoding that chardet will try to determine. If enabled chardet detection will no longer run. Fixes bug 1384463 Change-Id: Ie8baf3f79083e1495c7420a9d0569390cad2115e
Notes
Notes (review): Verified+2: Jenkins Code-Review+2: Joshua Harlow <harlowja@yahoo-inc.com> Workflow+1: Joshua Harlow <harlowja@yahoo-inc.com> Submitted-by: Jenkins Submitted-at: Thu, 23 Oct 2014 19:35:54 +0000 Reviewed-on: https://review.openstack.org/130390 Project: stackforge/doc8 Branch: refs/heads/master
-rw-r--r--README.rst5
-rw-r--r--doc8/main.py15
-rw-r--r--doc8/tests/test_checks.py16
3 files changed, 35 insertions, 1 deletions
diff --git a/README.rst b/README.rst
index 3df598e..63e46a2 100644
--- a/README.rst
+++ b/README.rst
@@ -59,6 +59,10 @@ Command line usage
59 --default-extension extension 59 --default-extension extension
60 Default file extension to use when a file is found 60 Default file extension to use when a file is found
61 without a file extension. 61 without a file extension.
62 --file-encoding encoding
63 Override encoding to use when attempting to determine
64 an input files text encoding (providing this avoids
65 using `chardet` to automatically detect encoding/s)
62 --max-line-length int 66 --max-line-length int
63 maximum allowed line length (default: 79) 67 maximum allowed line length (default: 79)
64 -e extension, --extension extension 68 -e extension, --extension extension
@@ -110,6 +114,7 @@ Option Overrides Merges
110``ignore-path`` No Yes 114``ignore-path`` No Yes
111``ignore`` No Yes 115``ignore`` No Yes
112``max-line-length`` Yes No 116``max-line-length`` Yes No
117``file-encoding`` Yes No
113``sphinx`` Yes No 118``sphinx`` Yes No
114===================== =========== ======== 119===================== =========== ========
115 120
diff --git a/doc8/main.py b/doc8/main.py
index 85228d8..5de802b 100644
--- a/doc8/main.py
+++ b/doc8/main.py
@@ -110,6 +110,10 @@ def extract_config(args):
110 except (configparser.NoSectionError, configparser.NoOptionError): 110 except (configparser.NoSectionError, configparser.NoOptionError):
111 pass 111 pass
112 try: 112 try:
113 cfg['file_encoding'] = parser.get("doc8", "file-encoding")
114 except (configparser.NoSectionError, configparser.NoOptionError):
115 pass
116 try:
113 cfg['default_extension'] = parser.get("doc8", "default-extension") 117 cfg['default_extension'] = parser.get("doc8", "default-extension")
114 except (configparser.NoSectionError, configparser.NoOptionError): 118 except (configparser.NoSectionError, configparser.NoOptionError):
115 pass 119 pass
@@ -160,6 +164,7 @@ def scan(cfg):
160 file_iter = utils.find_files(cfg.get('paths', []), 164 file_iter = utils.find_files(cfg.get('paths', []),
161 cfg.get('extension', []), ignored_paths) 165 cfg.get('extension', []), ignored_paths)
162 default_extension = cfg.get('default_extension') 166 default_extension = cfg.get('default_extension')
167 file_encoding = cfg.get('file_encoding')
163 for filename, ignoreable in file_iter: 168 for filename, ignoreable in file_iter:
164 if ignoreable: 169 if ignoreable:
165 files_ignored += 1 170 files_ignored += 1
@@ -167,7 +172,8 @@ def scan(cfg):
167 print(" Ignoring '%s'" % (filename)) 172 print(" Ignoring '%s'" % (filename))
168 else: 173 else:
169 f = file_parser.parse(filename, 174 f = file_parser.parse(filename,
170 default_extension=default_extension) 175 default_extension=default_extension,
176 encoding=file_encoding)
171 files.append(f) 177 files.append(f)
172 if cfg.get('verbose'): 178 if cfg.get('verbose'):
173 print(" Selecting '%s'" % (filename)) 179 print(" Selecting '%s'" % (filename))
@@ -275,6 +281,13 @@ def main():
275 " found without a file extension.", 281 " found without a file extension.",
276 default='', dest='default_extension', 282 default='', dest='default_extension',
277 metavar='extension') 283 metavar='extension')
284 parser.add_argument("--file-encoding", action="store",
285 help="Override encoding to use when attempting"
286 " to determine an input files text encoding "
287 "(providing this avoids using `chardet` to"
288 " automatically detect encoding/s)",
289 default='', dest='file_encoding',
290 metavar='encoding')
278 parser.add_argument("--max-line-length", action="store", metavar="int", 291 parser.add_argument("--max-line-length", action="store", metavar="int",
279 type=int, 292 type=int,
280 help="Maximum allowed line" 293 help="Maximum allowed line"
diff --git a/doc8/tests/test_checks.py b/doc8/tests/test_checks.py
index 1530fc5..7dbba69 100644
--- a/doc8/tests/test_checks.py
+++ b/doc8/tests/test_checks.py
@@ -89,6 +89,22 @@ test
89 (line, code, msg) = errors[0] 89 (line, code, msg) = errors[0]
90 self.assertIn(code, check.REPORTS) 90 self.assertIn(code, check.REPORTS)
91 91
92 def test_correct_length(self):
93 conf = {
94 'max_line_length': 79,
95 'allow_long_titles': True,
96 }
97 with tempfile.NamedTemporaryFile(suffix='.rst') as fh:
98 fh.write(b'known exploit in the wild, for example'
99 ' \xe2\x80\x93 the time'
100 ' between advance notification')
101 fh.flush()
102
103 parsed_file = parser.ParsedFile(fh.name, encoding='utf-8')
104 check = checks.CheckMaxLineLength(conf)
105 errors = list(check.report_iter(parsed_file))
106 self.assertEqual(0, len(errors))
107
92 def test_unsplittable_length(self): 108 def test_unsplittable_length(self):
93 content = """ 109 content = """
94=== 110===