Fix checksum generation so that it handles binary data

At the moment, if freezer attempts to perform a backup with
the consistency check option enabled, it will fail if the
files being backed up contain anything other than plain
ASCII (or at least some flavour of same, ISO-8859-x
etc.). This is because the hashlib module fails as it
expects a string to digest rather than a binary blob.

This should fix matters by escaping everything that might
cause an issue and works for Python 2 and 3. Because it
escapes strings in a different way to before, the
checksums it computes may be different to previously
in some cases, but at least now it should always produce
one.

Change-Id: Idf62dc9e398c396c9de19cbff5622e79d4c7a198
Closes-Bug: #1595732
This commit is contained in:
Domhnall Walsh 2016-07-13 10:06:57 +01:00
parent 45b1430663
commit 98c93e68a5
3 changed files with 37 additions and 25 deletions

View File

@ -16,24 +16,25 @@ import hashlib
import os import os
from six.moves import StringIO from six.moves import StringIO
from six import PY2 # True if running on Python 2
from freezer.utils import utils from freezer.utils import utils
class CheckSum(object): class CheckSum(object):
""" """
Checksum a file or directory with sha256 or md5 alogrithms. Checksum a file or directory with sha256 or md5 algorithms.
This is used by backup and restore jobs to check for backup consistency. This is used by backup and restore jobs to check for backup consistency.
- **parameters**:: - **parameters**::
:param path: the path to the file or directory to checksum :param path: the path to the file or directory to checksum
:type path: string :type path: string
:param hasher_type: the hasher algorithm to use for checksum :param hasher_type: the hashing algorithm to use for checksum
:type hasher_type: string :type hasher_type: string
:param hasher: hasher object for the specified hasher_type :param hasher: hasher object for the specified hasher_type
:type hasher: hashlib oject :type hasher: hashlib object
:param blocksize: the size of blocks to read when checksuming :param blocksize: the max. size of block to read when hashing a file
:type blocksize: integer :type blocksize: integer
:param exclude: pattern of files to exclude :param exclude: pattern of files to exclude
:type exclude: string :type exclude: string
@ -72,7 +73,7 @@ class CheckSum(object):
self.hasher_size = 32 self.hasher_size = 32
else: else:
raise ValueError( raise ValueError(
"Unknown hasher_type for checksum: %s" % hasher_type) "Unknown hasher_type for checksum: {}".format(hasher_type))
def get_files_hashes_in_path(self): def get_files_hashes_in_path(self):
""" """
@ -89,8 +90,8 @@ class CheckSum(object):
Open filename and calculate its hash. Open filename and calculate its hash.
Append the hash to the previous result and stores the checksum for Append the hash to the previous result and stores the checksum for
this concatenation this concatenation
:param filename: path to file :param filepath: path to file
:type filename: string :type filepath: string
:return: string containing the hash of the given file :return: string containing the hash of the given file
""" """
if (os.path.isfile(filepath) and not ( if (os.path.isfile(filepath) and not (
@ -108,12 +109,21 @@ class CheckSum(object):
def hashfile(self, afile): def hashfile(self, afile):
""" """
Checksum a single file with the chosen algorithm. Checksum a single file with the chosen algorithm.
The file is read per chunk. The file is read in chunks of self.blocksize.
:return: string :return: string
""" """
# encode_buffer = False
buf = afile.read(self.blocksize) buf = afile.read(self.blocksize)
while len(buf) > 0: while buf:
buf = buf.encode("utf-8") # Need to use string-escape for Python 2 non-unicode strings. For
# Python 2 unicode strings and all Python 3 strings, we need to use
# unicode-escape. The effect of them is the same.
if PY2 and isinstance(buf, str):
buf = buf.encode('string-escape')
else:
buf = buf.encode('unicode-escape')
self.hasher.update(buf) self.hasher.update(buf)
buf = afile.read(self.blocksize) buf = afile.read(self.blocksize)
return self.hasher.hexdigest() return self.hasher.hexdigest()
@ -128,10 +138,10 @@ class CheckSum(object):
def compute(self): def compute(self):
""" """
Compute the checksum for the given path. Compute the checksum for the given path.
If a single file is provided, the result is its checksum concacatenated If a single file is provided, the result is its checksum concatenated
with its name. with its name.
If a directory is provided, the result is the checksum of the checksum If a directory is provided, the result is the checksum of the checksum
concatenatin for each file. concatenation for each file.
:return: string :return: string
""" """
self.checksum = self.get_files_hashes_in_path() self.checksum = self.get_files_hashes_in_path()

View File

@ -34,11 +34,11 @@ CONF = cfg.CONF
LOG = log.getLogger(__name__) LOG = log.getLogger(__name__)
def create_dir_tree(dir): def create_dir_tree(directory_path):
try: try:
os.makedirs(dir) os.makedirs(directory_path)
except OSError as exc: except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(dir): if exc.errno == errno.EEXIST and os.path.isdir(directory_path):
pass pass
else: else:
raise exc raise exc
@ -57,8 +57,8 @@ def create_dir(directory, do_log=True):
try: try:
if not os.path.isdir(expanded_dir_name): if not os.path.isdir(expanded_dir_name):
if do_log: if do_log:
LOG.warning('Directory {0} does not exists,\ LOG.warning('Directory {0} does not exist, creating...'.format(
creating...'.format(expanded_dir_name)) expanded_dir_name))
os.makedirs(expanded_dir_name) os.makedirs(expanded_dir_name)
else: else:
if do_log: if do_log:
@ -413,7 +413,7 @@ def exclude_path(path, exclude):
:return: True if path matches the exclude pattern, False otherwise :return: True if path matches the exclude pattern, False otherwise
""" """
for name in path.split('/'): for name in path.split('/'):
if (fn.fnmatch(name, exclude) or os.path.basename(path) == exclude): if fn.fnmatch(name, exclude) or os.path.basename(path) == exclude:
return True return True
return False return False
@ -497,14 +497,14 @@ def set_max_process_priority():
try: try:
LOG.warning( LOG.warning(
'Setting freezer execution with high CPU and I/O priority') 'Setting freezer execution with high CPU and I/O priority')
PID = os.getpid() pid = os.getpid()
# Set cpu priority # Set cpu priority
os.nice(-19) os.nice(-19)
# Set I/O Priority to Real Time class with level 0 # Set I/O Priority to Real Time class with level 0
subprocess.call([ subprocess.call([
u'{0}'.format(find_executable("ionice")), u'{0}'.format(find_executable("ionice")),
u'-c', u'1', u'-n', u'0', u'-t', u'-c', u'1', u'-n', u'0', u'-t',
u'-p', u'{0}'.format(PID) u'-p', u'{0}'.format(pid)
]) ])
except Exception as priority_error: except Exception as priority_error:
LOG.warning('Priority: {0}'.format(priority_error)) LOG.warning('Priority: {0}'.format(priority_error))

View File

@ -25,13 +25,15 @@ class TestChecksum(unittest.TestCase):
def setUp(self): def setUp(self):
self.file = Mock() self.file = Mock()
self.dir = Mock() self.dir = Mock()
self.hello_world_md5sum = "6f5902ac237024bdd0c176cb93063dc4"
self.hello_world_sha256sum = "a948904f2f0f479b8f8197694b301"\ self.hello_world_md5sum = 'f36b2652200f5e88edd57963a1109146'
"84b0d2ed1c1cd2a1ec0fb85d299a192a447" self.hello_world_sha256sum = '17b949eb67acf16bbf2605d57a01f7af4ff4b5' \
'7e200259de63fcebf20e75bbf5'
self.fake_file = StringIO(u"hello world\n") self.fake_file = StringIO(u"hello world\n")
self.increment_hash_one = self.hello_world_sha256sum self.increment_hash_one = self.hello_world_sha256sum
self.increment_hash_multi = "50952b1bedb323003ccc47b49d459f43"\ self.increment_hash_multi = '1b4bc4ff41172a5f29eaeffb7e9fc24c683c693' \
"11d4be243668a81ecf489c824463caa1" '9ab30132ad5d93a1e4a6b16e8'
self.increment_hash_emptydir = "6b6c6a3d7548cc4396b3dacc6c2750c3"\ self.increment_hash_emptydir = "6b6c6a3d7548cc4396b3dacc6c2750c3"\
"da53f379d20996cbdd2c18be00c3742c" "da53f379d20996cbdd2c18be00c3742c"
self.fake_dir = [('root', ['d1, .git'], ['a', 'b']), ] self.fake_dir = [('root', ['d1, .git'], ['a', 'b']), ]