From 1a4d5d3af1abd6d6d3d2f7cc9d976a1fea5fe70b Mon Sep 17 00:00:00 2001 From: Robert Putt Date: Thu, 14 Dec 2017 13:28:28 +0000 Subject: [PATCH] Add MIME type detection upon upload Change-Id: Ia2f080b9fa7c7b12fe06166caecf089ded3cb16a --- alembic/versions/53784f13e35d_initial_models.py | 2 +- python_nemesis/api/v1/__init__.py | 16 +++++++++++++--- python_nemesis/db/models.py | 6 +++--- python_nemesis/db/utilities.py | 6 ++++-- python_nemesis/exceptions.py | 6 ++++++ requirements.txt | 1 + 6 files changed, 28 insertions(+), 9 deletions(-) diff --git a/alembic/versions/53784f13e35d_initial_models.py b/alembic/versions/53784f13e35d_initial_models.py index 05ac1b9..32d1a0d 100644 --- a/alembic/versions/53784f13e35d_initial_models.py +++ b/alembic/versions/53784f13e35d_initial_models.py @@ -37,7 +37,7 @@ def upgrade(): sa.Column('sha1_hash', sa.TEXT(), nullable=True), sa.Column('md5_hash', sa.TEXT(), nullable=True), sa.Column('size', sa.FLOAT(), nullable=True), - sa.Column('mime_type', sa.VARCHAR(length=40), + sa.Column('mime_type', sa.VARCHAR(length=120), nullable=True), sa.Column('submitted_by', sa.VARCHAR(length=120), nullable=False), diff --git a/python_nemesis/api/v1/__init__.py b/python_nemesis/api/v1/__init__.py index 74a711d..e40c8db 100644 --- a/python_nemesis/api/v1/__init__.py +++ b/python_nemesis/api/v1/__init__.py @@ -15,10 +15,13 @@ from flask import Blueprint from flask import jsonify from flask import request +import magic import os from python_nemesis.db.utilities import add_request from python_nemesis.db.utilities import create_or_renew_by_hash +from python_nemesis.db.utilities import get_file_by_sha512_hash from python_nemesis.db.utilities import search_by_hash +from python_nemesis.exceptions import BadRequestException from python_nemesis.exceptions import general_handler from python_nemesis.exceptions import NemesisException from python_nemesis.exceptions import NotFoundException @@ -56,7 +59,8 @@ def lookup_hash(req_hash): raise NotFoundException("Unable to find file with hash %s." % req_hash) elif len(result) == 1: - add_request(req_hash, 'found', file_id=result[0]['file_id']) + file = get_file_by_sha512_hash(req_hash) + add_request(req_hash, 'found', file_id=file.file_id) else: add_request(req_hash, 'multiple_found') @@ -68,7 +72,12 @@ def lookup_hash(req_hash): def post_file(): file_uuid = secure_filename(str(uuid.uuid4())) filename = '/tmp/%s' % file_uuid - file = request.files['file'] + + try: + file = request.files['file'] + except Exception: + raise BadRequestException("Not a valid multipart upload form with " + "key named file.") if 'Content-Range' in request.headers: # Extract starting byte from Content-Range header string. @@ -87,7 +96,8 @@ def post_file(): # Generate hash of file, and create new, or renew existing db row. file_hashes = get_all_hashes(filename) file_size = os.path.getsize(filename) - file = create_or_renew_by_hash(file_hashes, file_size) + file_type = magic.from_file(filename, mime=True) + file = create_or_renew_by_hash(file_hashes, file_size, file_type) file_id = file.file_id file_dict = file.to_dict() diff --git a/python_nemesis/db/models.py b/python_nemesis/db/models.py index eaf8511..dca3175 100644 --- a/python_nemesis/db/models.py +++ b/python_nemesis/db/models.py @@ -21,7 +21,7 @@ class Files(db.Model): sha1_hash = db.Column(db.UnicodeText(), nullable=True, index=True) md5_hash = db.Column(db.UnicodeText(), nullable=True, index=True) size = db.Column(db.Float(), nullable=True) - mime_type = db.Column(db.String(40), nullable=True) + mime_type = db.Column(db.String(120), nullable=True) submitted_by = db.Column(db.String(120), nullable=False, index=True) status = db.Column(db.String(20), nullable=False) last_updated = db.Column(db.DateTime, nullable=False) @@ -36,8 +36,8 @@ class Files(db.Model): "size": self.size, "mime_type": self.mime_type, "status": self.status, - "last_updated": self.last_updated, - "first_seen": self.first_seen} + "last_updated": self.last_updated.isoformat(), + "first_seen": self.first_seen.isoformat()} class FileLookupRequest(db.Model): diff --git a/python_nemesis/db/utilities.py b/python_nemesis/db/utilities.py index d2b05c6..ff783e1 100644 --- a/python_nemesis/db/utilities.py +++ b/python_nemesis/db/utilities.py @@ -56,9 +56,10 @@ def get_file_by_sha512_hash(lookup_hash): def create_new_file(md5_hash, sha1_hash, sha256_hash, sha512_hash, - size, submitter): + size, file_type, submitter): now = datetime.datetime.now() file = Files(size=size, + mime_type=file_type, md5_hash=md5_hash, sha1_hash=sha1_hash, sha256_hash=sha256_hash, @@ -72,7 +73,7 @@ def create_new_file(md5_hash, sha1_hash, sha256_hash, sha512_hash, return file -def create_or_renew_by_hash(hashes, file_size): +def create_or_renew_by_hash(hashes, file_size, file_type=None): current_file = get_file_by_sha512_hash(hashes['sha512']) if current_file: @@ -87,5 +88,6 @@ def create_or_renew_by_hash(hashes, file_size): hashes['sha256'], hashes['sha512'], file_size, + file_type, current_user.user_id) return file diff --git a/python_nemesis/exceptions.py b/python_nemesis/exceptions.py index 625009f..84b1e72 100644 --- a/python_nemesis/exceptions.py +++ b/python_nemesis/exceptions.py @@ -60,3 +60,9 @@ class NotFoundException(NemesisException): status_code = 404 title = "Not Found" message = "" + + +class BadRequestException(NemesisException): + status_code = 400 + title = "Bad Request" + message = "" diff --git a/requirements.txt b/requirements.txt index 08374db..8ee4435 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ oslo.messaging>=5.29.0 # Apache-2.0 oslo.log>=3.30.0 # Apache-2.0 python-swiftclient>=3.2.0 # Apache-2.0 keystonemiddleware>=4.17.0 # Apache-2.0 +python-magic>=0.4.13 # MIT