Fix checksum generation so that it handles binary data

At the moment, if freezer attempts to perform a backup with
the consistency check option enabled, it will fail if the
files being backed up contain anything other than plain
ASCII (or at least some flavour of same, ISO-8859-x
etc.). This is because the hashlib module fails as it
expects a string to digest rather than a binary blob.

This should fix matters by escaping everything that might
cause an issue and works for Python 2 and 3. Because it
escapes strings in a different way to before, the
checksums it computes may be different to previously
in some cases, but at least now it should always produce
one.

Change-Id: Idf62dc9e398c396c9de19cbff5622e79d4c7a198
Closes-Bug: #1595732
This commit is contained in:
Domhnall Walsh 2016-07-13 10:06:57 +01:00
parent 45b1430663
commit 98c93e68a5
3 changed files with 37 additions and 25 deletions

View File

@ -16,24 +16,25 @@ import hashlib
import os
from six.moves import StringIO
from six import PY2 # True if running on Python 2
from freezer.utils import utils
class CheckSum(object):
"""
Checksum a file or directory with sha256 or md5 alogrithms.
Checksum a file or directory with sha256 or md5 algorithms.
This is used by backup and restore jobs to check for backup consistency.
- **parameters**::
:param path: the path to the file or directory to checksum
:type path: string
:param hasher_type: the hasher algorithm to use for checksum
:param hasher_type: the hashing algorithm to use for checksum
:type hasher_type: string
:param hasher: hasher object for the specified hasher_type
:type hasher: hashlib oject
:param blocksize: the size of blocks to read when checksuming
:type hasher: hashlib object
:param blocksize: the max. size of block to read when hashing a file
:type blocksize: integer
:param exclude: pattern of files to exclude
:type exclude: string
@ -72,7 +73,7 @@ class CheckSum(object):
self.hasher_size = 32
else:
raise ValueError(
"Unknown hasher_type for checksum: %s" % hasher_type)
"Unknown hasher_type for checksum: {}".format(hasher_type))
def get_files_hashes_in_path(self):
"""
@ -89,8 +90,8 @@ class CheckSum(object):
Open filename and calculate its hash.
Append the hash to the previous result and stores the checksum for
this concatenation
:param filename: path to file
:type filename: string
:param filepath: path to file
:type filepath: string
:return: string containing the hash of the given file
"""
if (os.path.isfile(filepath) and not (
@ -108,12 +109,21 @@ class CheckSum(object):
def hashfile(self, afile):
"""
Checksum a single file with the chosen algorithm.
The file is read per chunk.
The file is read in chunks of self.blocksize.
:return: string
"""
# encode_buffer = False
buf = afile.read(self.blocksize)
while len(buf) > 0:
buf = buf.encode("utf-8")
while buf:
# Need to use string-escape for Python 2 non-unicode strings. For
# Python 2 unicode strings and all Python 3 strings, we need to use
# unicode-escape. The effect of them is the same.
if PY2 and isinstance(buf, str):
buf = buf.encode('string-escape')
else:
buf = buf.encode('unicode-escape')
self.hasher.update(buf)
buf = afile.read(self.blocksize)
return self.hasher.hexdigest()
@ -128,10 +138,10 @@ class CheckSum(object):
def compute(self):
"""
Compute the checksum for the given path.
If a single file is provided, the result is its checksum concacatenated
If a single file is provided, the result is its checksum concatenated
with its name.
If a directory is provided, the result is the checksum of the checksum
concatenatin for each file.
concatenation for each file.
:return: string
"""
self.checksum = self.get_files_hashes_in_path()

View File

@ -34,11 +34,11 @@ CONF = cfg.CONF
LOG = log.getLogger(__name__)
def create_dir_tree(dir):
def create_dir_tree(directory_path):
try:
os.makedirs(dir)
os.makedirs(directory_path)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(dir):
if exc.errno == errno.EEXIST and os.path.isdir(directory_path):
pass
else:
raise exc
@ -57,8 +57,8 @@ def create_dir(directory, do_log=True):
try:
if not os.path.isdir(expanded_dir_name):
if do_log:
LOG.warning('Directory {0} does not exists,\
creating...'.format(expanded_dir_name))
LOG.warning('Directory {0} does not exist, creating...'.format(
expanded_dir_name))
os.makedirs(expanded_dir_name)
else:
if do_log:
@ -413,7 +413,7 @@ def exclude_path(path, exclude):
:return: True if path matches the exclude pattern, False otherwise
"""
for name in path.split('/'):
if (fn.fnmatch(name, exclude) or os.path.basename(path) == exclude):
if fn.fnmatch(name, exclude) or os.path.basename(path) == exclude:
return True
return False
@ -497,14 +497,14 @@ def set_max_process_priority():
try:
LOG.warning(
'Setting freezer execution with high CPU and I/O priority')
PID = os.getpid()
pid = os.getpid()
# Set cpu priority
os.nice(-19)
# Set I/O Priority to Real Time class with level 0
subprocess.call([
u'{0}'.format(find_executable("ionice")),
u'-c', u'1', u'-n', u'0', u'-t',
u'-p', u'{0}'.format(PID)
u'-p', u'{0}'.format(pid)
])
except Exception as priority_error:
LOG.warning('Priority: {0}'.format(priority_error))

View File

@ -25,13 +25,15 @@ class TestChecksum(unittest.TestCase):
def setUp(self):
self.file = Mock()
self.dir = Mock()
self.hello_world_md5sum = "6f5902ac237024bdd0c176cb93063dc4"
self.hello_world_sha256sum = "a948904f2f0f479b8f8197694b301"\
"84b0d2ed1c1cd2a1ec0fb85d299a192a447"
self.hello_world_md5sum = 'f36b2652200f5e88edd57963a1109146'
self.hello_world_sha256sum = '17b949eb67acf16bbf2605d57a01f7af4ff4b5' \
'7e200259de63fcebf20e75bbf5'
self.fake_file = StringIO(u"hello world\n")
self.increment_hash_one = self.hello_world_sha256sum
self.increment_hash_multi = "50952b1bedb323003ccc47b49d459f43"\
"11d4be243668a81ecf489c824463caa1"
self.increment_hash_multi = '1b4bc4ff41172a5f29eaeffb7e9fc24c683c693' \
'9ab30132ad5d93a1e4a6b16e8'
self.increment_hash_emptydir = "6b6c6a3d7548cc4396b3dacc6c2750c3"\
"da53f379d20996cbdd2c18be00c3742c"
self.fake_dir = [('root', ['d1, .git'], ['a', 'b']), ]