Fix checksum generation so that it handles binary data
At the moment, if freezer attempts to perform a backup with the consistency check option enabled, it will fail if the files being backed up contain anything other than plain ASCII (or at least some flavour of same, ISO-8859-x etc.). This is because the hashlib module fails as it expects a string to digest rather than a binary blob. This should fix matters by escaping everything that might cause an issue and works for Python 2 and 3. Because it escapes strings in a different way to before, the checksums it computes may be different to previously in some cases, but at least now it should always produce one. Change-Id: Idf62dc9e398c396c9de19cbff5622e79d4c7a198 Closes-Bug: #1595732
This commit is contained in:
parent
45b1430663
commit
98c93e68a5
|
@ -16,24 +16,25 @@ import hashlib
|
|||
import os
|
||||
|
||||
from six.moves import StringIO
|
||||
from six import PY2 # True if running on Python 2
|
||||
|
||||
from freezer.utils import utils
|
||||
|
||||
|
||||
class CheckSum(object):
|
||||
"""
|
||||
Checksum a file or directory with sha256 or md5 alogrithms.
|
||||
Checksum a file or directory with sha256 or md5 algorithms.
|
||||
|
||||
This is used by backup and restore jobs to check for backup consistency.
|
||||
|
||||
- **parameters**::
|
||||
:param path: the path to the file or directory to checksum
|
||||
:type path: string
|
||||
:param hasher_type: the hasher algorithm to use for checksum
|
||||
:param hasher_type: the hashing algorithm to use for checksum
|
||||
:type hasher_type: string
|
||||
:param hasher: hasher object for the specified hasher_type
|
||||
:type hasher: hashlib oject
|
||||
:param blocksize: the size of blocks to read when checksuming
|
||||
:type hasher: hashlib object
|
||||
:param blocksize: the max. size of block to read when hashing a file
|
||||
:type blocksize: integer
|
||||
:param exclude: pattern of files to exclude
|
||||
:type exclude: string
|
||||
|
@ -72,7 +73,7 @@ class CheckSum(object):
|
|||
self.hasher_size = 32
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unknown hasher_type for checksum: %s" % hasher_type)
|
||||
"Unknown hasher_type for checksum: {}".format(hasher_type))
|
||||
|
||||
def get_files_hashes_in_path(self):
|
||||
"""
|
||||
|
@ -89,8 +90,8 @@ class CheckSum(object):
|
|||
Open filename and calculate its hash.
|
||||
Append the hash to the previous result and stores the checksum for
|
||||
this concatenation
|
||||
:param filename: path to file
|
||||
:type filename: string
|
||||
:param filepath: path to file
|
||||
:type filepath: string
|
||||
:return: string containing the hash of the given file
|
||||
"""
|
||||
if (os.path.isfile(filepath) and not (
|
||||
|
@ -108,12 +109,21 @@ class CheckSum(object):
|
|||
def hashfile(self, afile):
|
||||
"""
|
||||
Checksum a single file with the chosen algorithm.
|
||||
The file is read per chunk.
|
||||
The file is read in chunks of self.blocksize.
|
||||
:return: string
|
||||
"""
|
||||
# encode_buffer = False
|
||||
|
||||
buf = afile.read(self.blocksize)
|
||||
while len(buf) > 0:
|
||||
buf = buf.encode("utf-8")
|
||||
while buf:
|
||||
# Need to use string-escape for Python 2 non-unicode strings. For
|
||||
# Python 2 unicode strings and all Python 3 strings, we need to use
|
||||
# unicode-escape. The effect of them is the same.
|
||||
if PY2 and isinstance(buf, str):
|
||||
buf = buf.encode('string-escape')
|
||||
else:
|
||||
buf = buf.encode('unicode-escape')
|
||||
|
||||
self.hasher.update(buf)
|
||||
buf = afile.read(self.blocksize)
|
||||
return self.hasher.hexdigest()
|
||||
|
@ -128,10 +138,10 @@ class CheckSum(object):
|
|||
def compute(self):
|
||||
"""
|
||||
Compute the checksum for the given path.
|
||||
If a single file is provided, the result is its checksum concacatenated
|
||||
If a single file is provided, the result is its checksum concatenated
|
||||
with its name.
|
||||
If a directory is provided, the result is the checksum of the checksum
|
||||
concatenatin for each file.
|
||||
concatenation for each file.
|
||||
:return: string
|
||||
"""
|
||||
self.checksum = self.get_files_hashes_in_path()
|
||||
|
|
|
@ -34,11 +34,11 @@ CONF = cfg.CONF
|
|||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
def create_dir_tree(dir):
|
||||
def create_dir_tree(directory_path):
|
||||
try:
|
||||
os.makedirs(dir)
|
||||
os.makedirs(directory_path)
|
||||
except OSError as exc:
|
||||
if exc.errno == errno.EEXIST and os.path.isdir(dir):
|
||||
if exc.errno == errno.EEXIST and os.path.isdir(directory_path):
|
||||
pass
|
||||
else:
|
||||
raise exc
|
||||
|
@ -57,8 +57,8 @@ def create_dir(directory, do_log=True):
|
|||
try:
|
||||
if not os.path.isdir(expanded_dir_name):
|
||||
if do_log:
|
||||
LOG.warning('Directory {0} does not exists,\
|
||||
creating...'.format(expanded_dir_name))
|
||||
LOG.warning('Directory {0} does not exist, creating...'.format(
|
||||
expanded_dir_name))
|
||||
os.makedirs(expanded_dir_name)
|
||||
else:
|
||||
if do_log:
|
||||
|
@ -413,7 +413,7 @@ def exclude_path(path, exclude):
|
|||
:return: True if path matches the exclude pattern, False otherwise
|
||||
"""
|
||||
for name in path.split('/'):
|
||||
if (fn.fnmatch(name, exclude) or os.path.basename(path) == exclude):
|
||||
if fn.fnmatch(name, exclude) or os.path.basename(path) == exclude:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
@ -497,14 +497,14 @@ def set_max_process_priority():
|
|||
try:
|
||||
LOG.warning(
|
||||
'Setting freezer execution with high CPU and I/O priority')
|
||||
PID = os.getpid()
|
||||
pid = os.getpid()
|
||||
# Set cpu priority
|
||||
os.nice(-19)
|
||||
# Set I/O Priority to Real Time class with level 0
|
||||
subprocess.call([
|
||||
u'{0}'.format(find_executable("ionice")),
|
||||
u'-c', u'1', u'-n', u'0', u'-t',
|
||||
u'-p', u'{0}'.format(PID)
|
||||
u'-p', u'{0}'.format(pid)
|
||||
])
|
||||
except Exception as priority_error:
|
||||
LOG.warning('Priority: {0}'.format(priority_error))
|
||||
|
|
|
@ -25,13 +25,15 @@ class TestChecksum(unittest.TestCase):
|
|||
def setUp(self):
|
||||
self.file = Mock()
|
||||
self.dir = Mock()
|
||||
self.hello_world_md5sum = "6f5902ac237024bdd0c176cb93063dc4"
|
||||
self.hello_world_sha256sum = "a948904f2f0f479b8f8197694b301"\
|
||||
"84b0d2ed1c1cd2a1ec0fb85d299a192a447"
|
||||
|
||||
self.hello_world_md5sum = 'f36b2652200f5e88edd57963a1109146'
|
||||
self.hello_world_sha256sum = '17b949eb67acf16bbf2605d57a01f7af4ff4b5' \
|
||||
'7e200259de63fcebf20e75bbf5'
|
||||
|
||||
self.fake_file = StringIO(u"hello world\n")
|
||||
self.increment_hash_one = self.hello_world_sha256sum
|
||||
self.increment_hash_multi = "50952b1bedb323003ccc47b49d459f43"\
|
||||
"11d4be243668a81ecf489c824463caa1"
|
||||
self.increment_hash_multi = '1b4bc4ff41172a5f29eaeffb7e9fc24c683c693' \
|
||||
'9ab30132ad5d93a1e4a6b16e8'
|
||||
self.increment_hash_emptydir = "6b6c6a3d7548cc4396b3dacc6c2750c3"\
|
||||
"da53f379d20996cbdd2c18be00c3742c"
|
||||
self.fake_dir = [('root', ['d1, .git'], ['a', 'b']), ]
|
||||
|
|
Loading…
Reference in New Issue