diff --git a/alembic/versions/53784f13e35d_initial_models.py b/alembic/versions/53784f13e35d_initial_models.py index 5ace4d9..05ac1b9 100644 --- a/alembic/versions/53784f13e35d_initial_models.py +++ b/alembic/versions/53784f13e35d_initial_models.py @@ -36,7 +36,6 @@ def upgrade(): sa.Column('sha256_hash', sa.TEXT(), nullable=True), sa.Column('sha1_hash', sa.TEXT(), nullable=True), sa.Column('md5_hash', sa.TEXT(), nullable=True), - sa.Column('crc32', sa.TEXT(), nullable=True), sa.Column('size', sa.FLOAT(), nullable=True), sa.Column('mime_type', sa.VARCHAR(length=40), nullable=True), diff --git a/python_nemesis/api/v1/__init__.py b/python_nemesis/api/v1/__init__.py index 6fd2195..710c5ad 100644 --- a/python_nemesis/api/v1/__init__.py +++ b/python_nemesis/api/v1/__init__.py @@ -12,14 +12,24 @@ # License for the specific language governing permissions and limitations # under the License. +import datetime from flask import Blueprint from flask import jsonify +from flask import request +from flask_keystone import current_user +import os from python_nemesis.db.utilities import add_request +from python_nemesis.db.utilities import create_new_file +from python_nemesis.db.utilities import get_file_by_sha512_hash from python_nemesis.db.utilities import search_by_hash from python_nemesis.exceptions import general_handler from python_nemesis.exceptions import NemesisException from python_nemesis.exceptions import NotFoundException +from python_nemesis.extensions import db from python_nemesis.extensions import log +from python_nemesis.file_hasher import get_all_hashes +import uuid +from werkzeug.utils import secure_filename V1_API = Blueprint('v1_api', __name__) @@ -58,4 +68,41 @@ def lookup_hash(req_hash): @V1_API.route('/v1/file', methods=['POST']) def post_file(): - return "" + filename = secure_filename(str(uuid.uuid4())) + filename = '/tmp/%s' % filename + file = request.files['file'] + + if 'Content-Range' in request.headers: + # extract starting byte from Content-Range header string + range_str = request.headers['Content-Range'] + start_bytes = int(range_str.split(' ')[1].split('-')[0]) + + # append chunk to the file on disk, or create new + with open(filename, 'a') as f: + f.seek(start_bytes) + f.write(file.stream.read()) + + else: + # this is not a chunked request, so just save the whole file + file.save(filename) + + file_hashes = get_all_hashes(filename) + current_file = get_file_by_sha512_hash(file_hashes['sha512']) + file_size = os.path.getsize(filename) + + if current_file: + current_file.last_updated = datetime.datetime.now() + current_file.status = 'analysing' + db.session.commit() + file_dict = current_file.to_dict() + + else: + file = create_new_file(file_hashes['md5'], + file_hashes['sha1'], + file_hashes['sha256'], + file_hashes['sha512'], + file_size, + current_user.user_id) + file_dict = file.to_dict() + + return jsonify(file_dict) diff --git a/python_nemesis/db/models.py b/python_nemesis/db/models.py index b758cc1..d66a043 100644 --- a/python_nemesis/db/models.py +++ b/python_nemesis/db/models.py @@ -20,7 +20,6 @@ class Files(db.Model): sha256_hash = db.Column(db.UnicodeText(), nullable=True, index=True) sha1_hash = db.Column(db.UnicodeText(), nullable=True, index=True) md5_hash = db.Column(db.UnicodeText(), nullable=True, index=True) - crc32 = db.Column(db.UnicodeText(), nullable=True, index=True) size = db.Column(db.Float(), nullable=True) mime_type = db.Column(db.String(40), nullable=True) submitted_by = db.Column(db.String(120), nullable=False, index=True) @@ -35,7 +34,6 @@ class Files(db.Model): "sha256": self.sha256_hash, "sha1": self.sha1_hash, "md5": self.md5_hash, - "crc32": self.crc32, "size": self.size, "mime_type": self.mime_type, "status": self.status, diff --git a/python_nemesis/db/utilities.py b/python_nemesis/db/utilities.py index 5fb682d..7fa5b71 100644 --- a/python_nemesis/db/utilities.py +++ b/python_nemesis/db/utilities.py @@ -17,6 +17,7 @@ from python_nemesis.db.models import FileLookupRequest from python_nemesis.db.models import Files from python_nemesis.extensions import db from sqlalchemy import or_ +from sqlalchemy.orm.exc import NoResultFound def add_request(lookup_hash, result, file_id=None): @@ -32,14 +33,41 @@ def add_request(lookup_hash, result, file_id=None): def search_by_hash(lookup_hash): results = db.session.query(Files). \ - filter(or_(Files.sha512_hash == lookup_hash, + filter(or_(Files.file_id == lookup_hash, + Files.sha512_hash == lookup_hash, Files.sha256_hash == lookup_hash, Files.sha1_hash == lookup_hash, - Files.md5_hash == lookup_hash, - Files.crc32 == lookup_hash)) + Files.md5_hash == lookup_hash)) ret_results = [] for file in results: ret_results.append(file.to_dict()) return ret_results + + +def get_file_by_sha512_hash(lookup_hash): + try: + result = db.session.query(Files). \ + filter(Files.sha512_hash == lookup_hash).one() + except NoResultFound: + result = None + + return result + + +def create_new_file(md5_hash, sha1_hash, sha256_hash, sha512_hash, + size, submitter): + now = datetime.datetime.now() + file = Files(size=size, + md5_hash=md5_hash, + sha1_hash=sha1_hash, + sha256_hash=sha256_hash, + sha512_hash=sha512_hash, + submitted_by=submitter, + last_updated=now, + first_seen=now, + status='analysing') + db.session.add(file) + db.session.commit() + return file diff --git a/python_nemesis/file_hasher.py b/python_nemesis/file_hasher.py new file mode 100644 index 0000000..ab62ca8 --- /dev/null +++ b/python_nemesis/file_hasher.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- + +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +import hashlib + + +BUFFER_SIZE = 65536 + + +def get_all_hashes(filename): + md5 = hashlib.md5() + sha1 = hashlib.sha1() + sha256 = hashlib.sha256() + sha512 = hashlib.sha512() + + with open(filename, 'rb') as f: + while True: + data = f.read(BUFFER_SIZE) + if not data: + break + md5.update(data) + sha1.update(data) + sha256.update(data) + sha512.update(data) + + return {"sha512": sha512.hexdigest(), + "sha256": sha256.hexdigest(), + "sha1": sha1.hexdigest(), + "md5": md5.hexdigest()} diff --git a/python_nemesis/plugins/__init__.py b/python_nemesis/plugins/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python_nemesis/plugins/clamav.py b/python_nemesis/plugins/clamav.py new file mode 100644 index 0000000..128539d --- /dev/null +++ b/python_nemesis/plugins/clamav.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- + +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +class NemesisPlugin(object): + + def __init__(self): + pass