#!/usr/bin/env python # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. from __future__ import absolute_import import argparse import fileinput import os import re import subprocess import sys from bashate import messages MESSAGES = messages.MESSAGES def is_continuation(line): return re.search('\\\\\s*$', line) def check_for_do(line, report): if not is_continuation(line): match = re.match('^\s*(for|while|until)\s', line) if match: operator = match.group(1).strip() if operator == "for": # "for i in ..." and "for ((" is bash, but # "for (" is likely from an embedded awk script, # so skip it if re.search('for \([^\(]', line): return if not re.search(';\s*do$', line): report.print_error((MESSAGES['E010'].msg % operator), line) def check_if_then(line, report): if not is_continuation(line): if re.search('^\s*(el)?if \[', line): if not re.search(';\s*then$', line): report.print_error(MESSAGES['E011'].msg, line) def check_no_trailing_whitespace(line, report): if re.search('[ \t]+$', line): report.print_error(MESSAGES['E001'].msg, line) def check_no_long_lines(line, report): if len(line.rstrip("\r\n")) > 79: report.print_error(MESSAGES['E006'].msg, line) def check_indents(logical_line, report): # this is rather complex to handle argument offset indenting; # primarily done by emacs. If there is an argument, it will try # to line up the following arguments underneath it, e.g. # foobar_cmd bar baz \ # moo boo # Thus the offset in this case might not be a strict multiple of 4 # Find the offset of the first argument of the command (if it has # one) m = re.search('^(?P[ \t]+)?(?P\S+)(?P\s+)(?P\S+)', logical_line[0]) arg_offset = None if m: arg_offset = len(m.group('indent')) if m.group('indent') else 0 arg_offset += len(m.group('cmd')) + len(m.group('ws')) # go through each line for lineno, line in enumerate(logical_line): m = re.search('^(?P[ \t]+)', line) if m: # no tabs, only spaces if re.search('\t', m.group('indent')): report.print_error(MESSAGES['E002'].msg, line) offset = len(m.group('indent')) # the first line and lines without an argument should be # offset by 4 spaces if (lineno == 0) or (arg_offset is None): if (offset % 4) != 0: report.print_error(MESSAGES['E003'].msg, line) else: # other lines are allowed to line up with the first # argument, or be multiple-of 4 spaces if offset != arg_offset and (offset % 4) != 0: report.print_error(MESSAGES['E003'].msg, line) def check_function_decl(line, report): failed = False if line.startswith("function"): if not re.search('^function [\w-]* \{$', line): failed = True else: # catch the case without "function", e.g. # things like '^foo() {' if re.search('^\s*?\(\)\s*?\{', line): failed = True if failed: report.print_error(MESSAGES['E020'].msg, line) def starts_heredoc(line): # note, watch out for <\w+)([\'\"]?)", line) return m.group('token') if m else False def end_of_heredoc(line, token): return token and re.search("^%s\s*$" % token, line) def check_arithmetic(line, report): if "$[" in line: report.print_error(MESSAGES['E041'].msg, line) def check_bare_arithmetic(line, report): if line.lstrip().startswith("(("): report.print_error(MESSAGES['E043'].msg, line) def check_local_subshell(line, report): # XXX: should we increase the string checking to see if the $( is # anywhere with a string being set? Risk of false positives?x if line.lstrip().startswith('local ') and \ any(s in line for s in ('=$(', '=`', '="$(', '="`')): report.print_error(MESSAGES['E042'].msg, line) def check_hashbang(line, filename, report): # this check only runs on the first line # maybe this should check for shell? if not line.startswith("#!") and not filename.endswith(".sh"): report.print_error(MESSAGES['E005'].msg, line) def check_syntax(filename, report): # run the file through "bash -n" to catch basic syntax errors and # other warnings matches = [] # sample lines we want to match: # foo.sh: line 4: warning: \ # here-document at line 1 delimited by end-of-file (wanted `EOF') # foo.sh: line 9: syntax error: unexpected end of file # foo.sh: line 7: syntax error near unexpected token `}' # # i.e. consistency with ":"'s isn't constant, so just do our # best... r = re.compile( '^(?P.*): line (?P[0-9]+): (?P.*)') # we are parsing the error message, so force it to ignore the # system locale so we don't get messages in another language bash_environment = os.environ bash_environment['LC_ALL'] = 'C' proc = subprocess.Popen( ['bash', '-n', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=bash_environment, universal_newlines=True) outputs = proc.communicate() for line in outputs[1].split('\n'): m = r.match(line) if m: matches.append(m) for m in matches: if 'syntax error' in m.group('error'): msg = '%s: %s' % (MESSAGES['E040'].msg, m.group('error')) report.print_error(msg, filename=filename, filelineno=int(m.group('lineno'))) # Matching output from bash warning about here-documents not # ending. # FIXME: are there other warnings that might come out # with "bash -n"? A quick scan of the source code suggests # no, but there might be other interesting things we could # catch. if 'warning:' in m.group('error'): if 'delimited by end-of-file' in m.group('error'): start = re.match('^.*line (?P[0-9]+).*$', m.group('error')) report.print_error( MESSAGES['E012'].msg % int(start.group('start')), filename=filename, filelineno=int(m.group('lineno'))) class BashateRun(object): def __init__(self): self.error_count = 0 self.error_list = None self.ignore_list = None self.warning_count = 0 self.warning_list = None def register_ignores(self, ignores): if ignores: self.ignore_list = '^(' + '|'.join(ignores.split(',')) + ')' def register_warnings(self, warnings): if warnings: self.warning_list = '^(' + '|'.join(warnings.split(',')) + ')' def register_errors(self, errors): if errors: self.error_list = '^(' + '|'.join(errors.split(',')) + ')' def should_ignore(self, error): return self.ignore_list and re.search(self.ignore_list, error) def should_warn(self, error): # if in the errors list, overrides warning level if self.error_list and re.search(self.error_list, error): return False if messages.is_default_warning(error): return True return self.warning_list and re.search(self.warning_list, error) def print_error(self, error, line='', filename=None, filelineno=None): if self.should_ignore(error): return warn = self.should_warn(error) if not filename: filename = fileinput.filename() if not filelineno: filelineno = fileinput.filelineno() if warn: self.warning_count = self.warning_count + 1 else: self.error_count = self.error_count + 1 self.log_error(error, line, filename, filelineno, warn) def log_error(self, error, line, filename, filelineno, warn=False): print("[%(warn)s] %(error)s: '%(line)s'" % {'warn': "W" if warn else "E", 'error': error, 'line': line.rstrip('\n')}) print(" - %s : L%s" % (filename, filelineno)) def check_files(self, files, verbose): logical_line = "" token = False # NOTE(mrodden): magic; replace with proper # report class when necessary report = self for fname in files: # reset world in_heredoc = False in_continuation = False # simple syntax checking, as files can pass style but still cause # syntax errors when you try to run them. check_syntax(fname, report) for line in fileinput.input(fname): if fileinput.isfirstline(): check_hashbang(line, fileinput.filename(), report) if verbose: print("Running bashate on %s" % fileinput.filename()) # Don't run any tests on comment lines (but remember # inside a heredoc this might be part of the syntax of # an embedded script, just ignore that) if line.lstrip().startswith('#') and not in_heredoc: continue # Strip trailing comments. From bash: # # a word beginning with # causes that word and all # remaining characters on that line to be ignored. # ... # A character that, when unquoted, separates # words. One of the following: | & ; ( ) < > space # tab # # for simplicity, we strip inline comments by # matching just '#'. if not in_heredoc: ll_split = line.split(' #', 1) if len(ll_split) > 1: line = ll_split[0].rstrip() # see if this starts a heredoc if not in_heredoc: token = starts_heredoc(line) if token: in_heredoc = True logical_line = [line] continue # see if this starts a continuation if not in_continuation: if is_continuation(line): in_continuation = True logical_line = [line] continue # if we are in a heredoc or continuation, just loop # back and keep buffering the lines into # "logical_line" until the end of the # heredoc/continuation. if in_heredoc: logical_line.append(line) if not end_of_heredoc(line, token): continue else: in_heredoc = False # FIXME: if we want to do something with # heredocs in the future, then the whole thing # is now stored in logical_line. for now, # skip continue elif in_continuation: logical_line.append(line) if is_continuation(line): continue else: in_continuation = False else: logical_line = [line] check_indents(logical_line, report) # at this point, logical_line is an array that holds # the whole continuation. XXX : historically, we've # just handled every line in a continuation # separatley. Stick with what works... for line in logical_line: check_no_trailing_whitespace(line, report) check_no_long_lines(line, report) check_for_do(line, report) check_if_then(line, report) check_function_decl(line, report) check_arithmetic(line, report) check_local_subshell(line, report) check_bare_arithmetic(line, report) # finished processing the file # last line should always end with a newline if not line.endswith('\n'): report.print_error(MESSAGES['E004'].msg, line) def main(): parser = argparse.ArgumentParser( description='A bash script style checker') parser.add_argument('files', metavar='file', nargs='*', help='files to scan for errors') parser.add_argument('-i', '--ignore', help='Rules to ignore') parser.add_argument('-w', '--warn', help='Rules to always warn (rather than error)') parser.add_argument('-e', '--error', help='Rules to always error (rather than warn)') parser.add_argument('-v', '--verbose', action='store_true', default=False) parser.add_argument('-s', '--show', action='store_true', default=False) opts = parser.parse_args() if opts.show: messages.print_messages() sys.exit(0) files = opts.files if not files: parser.print_usage() return 1 run = BashateRun() run.register_ignores(opts.ignore) run.register_warnings(opts.warn) run.register_errors(opts.error) try: run.check_files(files, opts.verbose) except IOError as e: print("bashate: %s" % e) return 1 if run.warning_count > 0: print("%d bashate warning(s) found" % run.warning_count) if run.error_count > 0: print("%d bashate error(s) found" % run.error_count) return 1 return 0 if __name__ == "__main__": sys.exit(main())