Allow file upload and hash calculation.

Allow users to upload file for analysis and calculate hash for file.
Later we probably need to send this to some shared storage (Swift)
and then pass the file_id to a queue so the workers can pick it up
and process it accordingly.

Change-Id: Ie45adb5154c1c17f61c6d0cdddeaa2191d05f91e
This commit is contained in:
Robert Putt 2017-12-02 20:10:48 +00:00
parent d7efed8c1d
commit a4ab2d89ca
7 changed files with 139 additions and 7 deletions

View File

@ -36,7 +36,6 @@ def upgrade():
sa.Column('sha256_hash', sa.TEXT(), nullable=True),
sa.Column('sha1_hash', sa.TEXT(), nullable=True),
sa.Column('md5_hash', sa.TEXT(), nullable=True),
sa.Column('crc32', sa.TEXT(), nullable=True),
sa.Column('size', sa.FLOAT(), nullable=True),
sa.Column('mime_type', sa.VARCHAR(length=40),
nullable=True),

View File

@ -12,14 +12,24 @@
# License for the specific language governing permissions and limitations
# under the License.
import datetime
from flask import Blueprint
from flask import jsonify
from flask import request
from flask_keystone import current_user
import os
from python_nemesis.db.utilities import add_request
from python_nemesis.db.utilities import create_new_file
from python_nemesis.db.utilities import get_file_by_sha512_hash
from python_nemesis.db.utilities import search_by_hash
from python_nemesis.exceptions import general_handler
from python_nemesis.exceptions import NemesisException
from python_nemesis.exceptions import NotFoundException
from python_nemesis.extensions import db
from python_nemesis.extensions import log
from python_nemesis.file_hasher import get_all_hashes
import uuid
from werkzeug.utils import secure_filename
V1_API = Blueprint('v1_api', __name__)
@ -58,4 +68,41 @@ def lookup_hash(req_hash):
@V1_API.route('/v1/file', methods=['POST'])
def post_file():
return ""
filename = secure_filename(str(uuid.uuid4()))
filename = '/tmp/%s' % filename
file = request.files['file']
if 'Content-Range' in request.headers:
# extract starting byte from Content-Range header string
range_str = request.headers['Content-Range']
start_bytes = int(range_str.split(' ')[1].split('-')[0])
# append chunk to the file on disk, or create new
with open(filename, 'a') as f:
f.seek(start_bytes)
f.write(file.stream.read())
else:
# this is not a chunked request, so just save the whole file
file.save(filename)
file_hashes = get_all_hashes(filename)
current_file = get_file_by_sha512_hash(file_hashes['sha512'])
file_size = os.path.getsize(filename)
if current_file:
current_file.last_updated = datetime.datetime.now()
current_file.status = 'analysing'
db.session.commit()
file_dict = current_file.to_dict()
else:
file = create_new_file(file_hashes['md5'],
file_hashes['sha1'],
file_hashes['sha256'],
file_hashes['sha512'],
file_size,
current_user.user_id)
file_dict = file.to_dict()
return jsonify(file_dict)

View File

@ -20,7 +20,6 @@ class Files(db.Model):
sha256_hash = db.Column(db.UnicodeText(), nullable=True, index=True)
sha1_hash = db.Column(db.UnicodeText(), nullable=True, index=True)
md5_hash = db.Column(db.UnicodeText(), nullable=True, index=True)
crc32 = db.Column(db.UnicodeText(), nullable=True, index=True)
size = db.Column(db.Float(), nullable=True)
mime_type = db.Column(db.String(40), nullable=True)
submitted_by = db.Column(db.String(120), nullable=False, index=True)
@ -35,7 +34,6 @@ class Files(db.Model):
"sha256": self.sha256_hash,
"sha1": self.sha1_hash,
"md5": self.md5_hash,
"crc32": self.crc32,
"size": self.size,
"mime_type": self.mime_type,
"status": self.status,

View File

@ -17,6 +17,7 @@ from python_nemesis.db.models import FileLookupRequest
from python_nemesis.db.models import Files
from python_nemesis.extensions import db
from sqlalchemy import or_
from sqlalchemy.orm.exc import NoResultFound
def add_request(lookup_hash, result, file_id=None):
@ -32,14 +33,41 @@ def add_request(lookup_hash, result, file_id=None):
def search_by_hash(lookup_hash):
results = db.session.query(Files). \
filter(or_(Files.sha512_hash == lookup_hash,
filter(or_(Files.file_id == lookup_hash,
Files.sha512_hash == lookup_hash,
Files.sha256_hash == lookup_hash,
Files.sha1_hash == lookup_hash,
Files.md5_hash == lookup_hash,
Files.crc32 == lookup_hash))
Files.md5_hash == lookup_hash))
ret_results = []
for file in results:
ret_results.append(file.to_dict())
return ret_results
def get_file_by_sha512_hash(lookup_hash):
try:
result = db.session.query(Files). \
filter(Files.sha512_hash == lookup_hash).one()
except NoResultFound:
result = None
return result
def create_new_file(md5_hash, sha1_hash, sha256_hash, sha512_hash,
size, submitter):
now = datetime.datetime.now()
file = Files(size=size,
md5_hash=md5_hash,
sha1_hash=sha1_hash,
sha256_hash=sha256_hash,
sha512_hash=sha512_hash,
submitted_by=submitter,
last_updated=now,
first_seen=now,
status='analysing')
db.session.add(file)
db.session.commit()
return file

View File

@ -0,0 +1,41 @@
# -*- coding: utf-8 -*-
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import hashlib
BUFFER_SIZE = 65536
def get_all_hashes(filename):
md5 = hashlib.md5()
sha1 = hashlib.sha1()
sha256 = hashlib.sha256()
sha512 = hashlib.sha512()
with open(filename, 'rb') as f:
while True:
data = f.read(BUFFER_SIZE)
if not data:
break
md5.update(data)
sha1.update(data)
sha256.update(data)
sha512.update(data)
return {"sha512": sha512.hexdigest(),
"sha256": sha256.hexdigest(),
"sha1": sha1.hexdigest(),
"md5": md5.hexdigest()}

View File

View File

@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
class NemesisPlugin(object):
def __init__(self):
pass