From 1fa63aa892b368f5346db34ededd57cd7035ea91 Mon Sep 17 00:00:00 2001 From: Doug Hellmann Date: Thu, 30 Oct 2014 18:28:07 -0400 Subject: [PATCH] ignore binary files ignore some files we know will be binary --- aeromancer/project.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/aeromancer/project.py b/aeromancer/project.py index c28a608..4957131 100644 --- a/aeromancer/project.py +++ b/aeromancer/project.py @@ -1,3 +1,4 @@ +import fnmatch import glob import io import itertools @@ -57,8 +58,19 @@ def _find_files_in_project(path): return output.split('\0') +_DO_NOT_READ = [ + '*.png', + '*.gif', + '*.jpg', + '*.jpeg', + '*.jar', # Why do we check in jar files?! + '*.xml', +] + + def _update_project_files(session, proj_obj): """Update the files stored for each project""" + LOG.info('reading file contents in %s', proj_obj.name) # Delete any existing files in case the list of files being # managed has changed. This naive, and we can do better, but as a # first version it's OK. @@ -74,6 +86,9 @@ def _update_project_files(session, proj_obj): continue new_file = File(project=proj_obj, name=filename, path=fullname) session.add(new_file) + if any(fnmatch.fnmatch(filename, dnr) for dnr in _DO_NOT_READ): + LOG.debug('ignoring contents of %s', fullname) + continue with io.open(fullname, mode='r', encoding='utf-8') as f: try: body = f.read() @@ -84,7 +99,7 @@ def _update_project_files(session, proj_obj): fullname) continue lines = body.splitlines() - LOG.info('%s/%s has %s lines', proj_obj.name, filename, len(lines)) + LOG.debug('%s/%s has %s lines', proj_obj.name, filename, len(lines)) for num, content in enumerate(lines, 1): session.add(Line(file=new_file, number=num, content=content))