Hash the files before compressing them

When doing external lookups, we don't want to have to compress the files before determining the hash of the file. By computing the hash before compression, we make this lookup easier elsewhere. Change-Id: If3680038f0cc85659598ddfd39203804e889d33c
2018-10-03 12:48:02 -05:00 · 2018-10-03 12:48:02 -05:00 · be649f16d3
parent 41db775584
commit be649f16d3
2 changed files with 3 additions and 4 deletions
--- a/ara/api/serializers.py
+++ b/ara/api/serializers.py
@ -90,10 +90,10 @@ class FileContentField(serializers.CharField):
        return zlib.decompress(obj.contents).decode("utf8")

    def to_internal_value(self, data):
-        contents = zlib.compress(data.encode("utf8"))
+        contents = data.encode("utf8")
        sha1 = hashlib.sha1(contents).hexdigest()
        content_file, created = models.FileContent.objects.get_or_create(
-            sha1=sha1, defaults={"sha1": sha1, "contents": contents}
+            sha1=sha1, defaults={"sha1": sha1, "contents": zlib.compress(contents)}
        )
        return content_file

--- a/ara/api/tests/utils.py
+++ b/ara/api/tests/utils.py
@ -38,5 +38,4 @@ def sha1(obj):
    """
    Returns the sha1 of a compressed string or an object
    """
-    contents = zlib.compress(obj.encode("utf8"))
-    return hashlib.sha1(contents).hexdigest()
+    return hashlib.sha1(obj.encode("utf8")).hexdigest()