diff --git a/freezer/utils/checksum.py b/freezer/utils/checksum.py index 44bd03be..ecc5902f 100644 --- a/freezer/utils/checksum.py +++ b/freezer/utils/checksum.py @@ -16,24 +16,25 @@ import hashlib import os from six.moves import StringIO +from six import PY2 # True if running on Python 2 from freezer.utils import utils class CheckSum(object): """ - Checksum a file or directory with sha256 or md5 alogrithms. + Checksum a file or directory with sha256 or md5 algorithms. This is used by backup and restore jobs to check for backup consistency. - **parameters**:: :param path: the path to the file or directory to checksum :type path: string - :param hasher_type: the hasher algorithm to use for checksum + :param hasher_type: the hashing algorithm to use for checksum :type hasher_type: string :param hasher: hasher object for the specified hasher_type - :type hasher: hashlib oject - :param blocksize: the size of blocks to read when checksuming + :type hasher: hashlib object + :param blocksize: the max. size of block to read when hashing a file :type blocksize: integer :param exclude: pattern of files to exclude :type exclude: string @@ -72,7 +73,7 @@ class CheckSum(object): self.hasher_size = 32 else: raise ValueError( - "Unknown hasher_type for checksum: %s" % hasher_type) + "Unknown hasher_type for checksum: {}".format(hasher_type)) def get_files_hashes_in_path(self): """ @@ -89,8 +90,8 @@ class CheckSum(object): Open filename and calculate its hash. Append the hash to the previous result and stores the checksum for this concatenation - :param filename: path to file - :type filename: string + :param filepath: path to file + :type filepath: string :return: string containing the hash of the given file """ if (os.path.isfile(filepath) and not ( @@ -108,12 +109,21 @@ class CheckSum(object): def hashfile(self, afile): """ Checksum a single file with the chosen algorithm. - The file is read per chunk. + The file is read in chunks of self.blocksize. :return: string """ + # encode_buffer = False + buf = afile.read(self.blocksize) - while len(buf) > 0: - buf = buf.encode("utf-8") + while buf: + # Need to use string-escape for Python 2 non-unicode strings. For + # Python 2 unicode strings and all Python 3 strings, we need to use + # unicode-escape. The effect of them is the same. + if PY2 and isinstance(buf, str): + buf = buf.encode('string-escape') + else: + buf = buf.encode('unicode-escape') + self.hasher.update(buf) buf = afile.read(self.blocksize) return self.hasher.hexdigest() @@ -128,10 +138,10 @@ class CheckSum(object): def compute(self): """ Compute the checksum for the given path. - If a single file is provided, the result is its checksum concacatenated + If a single file is provided, the result is its checksum concatenated with its name. If a directory is provided, the result is the checksum of the checksum - concatenatin for each file. + concatenation for each file. :return: string """ self.checksum = self.get_files_hashes_in_path() diff --git a/freezer/utils/utils.py b/freezer/utils/utils.py index 5e1867e4..3ad9001b 100644 --- a/freezer/utils/utils.py +++ b/freezer/utils/utils.py @@ -34,11 +34,11 @@ CONF = cfg.CONF LOG = log.getLogger(__name__) -def create_dir_tree(dir): +def create_dir_tree(directory_path): try: - os.makedirs(dir) + os.makedirs(directory_path) except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir(dir): + if exc.errno == errno.EEXIST and os.path.isdir(directory_path): pass else: raise exc @@ -57,8 +57,8 @@ def create_dir(directory, do_log=True): try: if not os.path.isdir(expanded_dir_name): if do_log: - LOG.warning('Directory {0} does not exists,\ - creating...'.format(expanded_dir_name)) + LOG.warning('Directory {0} does not exist, creating...'.format( + expanded_dir_name)) os.makedirs(expanded_dir_name) else: if do_log: @@ -413,7 +413,7 @@ def exclude_path(path, exclude): :return: True if path matches the exclude pattern, False otherwise """ for name in path.split('/'): - if (fn.fnmatch(name, exclude) or os.path.basename(path) == exclude): + if fn.fnmatch(name, exclude) or os.path.basename(path) == exclude: return True return False @@ -497,14 +497,14 @@ def set_max_process_priority(): try: LOG.warning( 'Setting freezer execution with high CPU and I/O priority') - PID = os.getpid() + pid = os.getpid() # Set cpu priority os.nice(-19) # Set I/O Priority to Real Time class with level 0 subprocess.call([ u'{0}'.format(find_executable("ionice")), u'-c', u'1', u'-n', u'0', u'-t', - u'-p', u'{0}'.format(PID) + u'-p', u'{0}'.format(pid) ]) except Exception as priority_error: LOG.warning('Priority: {0}'.format(priority_error)) diff --git a/tests/unit/utils/test_checksum.py b/tests/unit/utils/test_checksum.py index b1070f5f..e4c44d49 100644 --- a/tests/unit/utils/test_checksum.py +++ b/tests/unit/utils/test_checksum.py @@ -25,13 +25,15 @@ class TestChecksum(unittest.TestCase): def setUp(self): self.file = Mock() self.dir = Mock() - self.hello_world_md5sum = "6f5902ac237024bdd0c176cb93063dc4" - self.hello_world_sha256sum = "a948904f2f0f479b8f8197694b301"\ - "84b0d2ed1c1cd2a1ec0fb85d299a192a447" + + self.hello_world_md5sum = 'f36b2652200f5e88edd57963a1109146' + self.hello_world_sha256sum = '17b949eb67acf16bbf2605d57a01f7af4ff4b5' \ + '7e200259de63fcebf20e75bbf5' + self.fake_file = StringIO(u"hello world\n") self.increment_hash_one = self.hello_world_sha256sum - self.increment_hash_multi = "50952b1bedb323003ccc47b49d459f43"\ - "11d4be243668a81ecf489c824463caa1" + self.increment_hash_multi = '1b4bc4ff41172a5f29eaeffb7e9fc24c683c693' \ + '9ab30132ad5d93a1e4a6b16e8' self.increment_hash_emptydir = "6b6c6a3d7548cc4396b3dacc6c2750c3"\ "da53f379d20996cbdd2c18be00c3742c" self.fake_dir = [('root', ['d1, .git'], ['a', 'b']), ]