Improve ChunkedBackupDriver hashlib calls

Currently we have 2 hashlib calls withing the ChunkedBackupDriver, one to calculate the MD5 of the chunk and another to calculate the SHA256 of the blocks within each chunk. This patch improve interactions between cinder and the hashlib library method calls by making sure MD5 and SHA256 related calls are execute in a native thread to improve context switching responsiveness within eventlet. The MD5 of a 1GB chunk could take around 4 seconds, so the overhead of creating a native thread is acceptable, and for the SHA256 instead of creating a thread for each call we create a single thread to do the calculations of all the blocks, thus making it cost effective. Current code slices the data into blocks, which means that the data is being copied, but this has now been switched to a memoryview object to take advantage of the buffer protocol so copying of data is no longer necesary. Change-Id: Ifb65b8008f30bc9cc4b6cd9b867a726ec4ed4707 (cherry picked from commit 671b02b504) Conflicts: cinder/backup/chunkeddriver.py (cherry picked from commit 2ec114d3c7) (cherry picked from commit acfdbb62f7)
2018-02-05 18:57:46 +01:00 · 2018-02-05 18:57:46 +01:00 · 7dd5b5ce7f
parent f2f65dfab0
commit 7dd5b5ce7f
1 changed files with 22 additions and 14 deletions
--- a/cinder/backup/chunkeddriver.py
+++ b/cinder/backup/chunkeddriver.py
@ -328,7 +328,7 @@ class ChunkedBackupDriver(driver.BackupDriver):
                container, object_name, extra_metadata=extra_metadata
        ) as writer:
            writer.write(output_data)
-        md5 = hashlib.md5(data).hexdigest()
+        md5 = eventlet.tpool.execute(hashlib.md5, data).hexdigest()
        obj[object_name]['md5'] = md5
        LOG.debug('backup MD5 for %(object_name)s: %(md5)s',
                  {'object_name': object_name, 'md5': md5})
@ -421,6 +421,25 @@ class ChunkedBackupDriver(driver.BackupDriver):
                                               extra_usage_info=
                                               object_meta)

+    def _calculate_sha(self, data):
+        """Calculate SHA256 of a data chunk.
+
+        This method cannot log anything as it is called on a native thread.
+        """
+        # NOTE(geguileo): Using memoryview to avoid data copying when slicing
+        # for the sha256 call.
+        chunk = memoryview(data)
+        shalist = []
+        off = 0
+        datalen = len(chunk)
+        while off < datalen:
+            chunk_end = min(datalen, off + self.sha_block_size_bytes)
+            block = chunk[off:chunk_end]
+            sha = hashlib.sha256(block).hexdigest()
+            shalist.append(sha)
+            off += self.sha_block_size_bytes
+        return shalist
+
    def backup(self, backup, volume_file, backup_metadata=True):
        """Backup the given volume.

@ -499,18 +518,7 @@ class ChunkedBackupDriver(driver.BackupDriver):
                break

            # Calculate new shas with the datablock.
-            shalist = []
-            off = 0
-            datalen = len(data)
-            while off < datalen:
-                chunk_start = off
-                chunk_end = chunk_start + self.sha_block_size_bytes
-                if chunk_end > datalen:
-                    chunk_end = datalen
-                chunk = data[chunk_start:chunk_end]
-                sha = hashlib.sha256(chunk).hexdigest()
-                shalist.append(sha)
-                off += self.sha_block_size_bytes
+            shalist = eventlet.tpool.execute(self._calculate_sha, data)
            sha256_list.extend(shalist)

            # If parent_backup is not None, that means an incremental
@ -537,7 +545,7 @@ class ChunkedBackupDriver(driver.BackupDriver):

                # The last extent extends to the end of data buffer.
                if extent_off != -1:
-                    extent_end = datalen
+                    extent_end = len(data)
                    segment = data[extent_off:extent_end]
                    self._backup_chunk(backup, container, segment,
                                       data_offset + extent_off,