diff --git a/server/first/__init__.py b/server/first/__init__.py index 80bd52a..e69de29 100644 --- a/server/first/__init__.py +++ b/server/first/__init__.py @@ -1,27 +0,0 @@ -#------------------------------------------------------------------------------- -# -# Intializes FIRST's DBManager and EngineManager -# Copyright (C) 2016 Angel M. Villegas -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -#------------------------------------------------------------------------------- - -# FIRST Modules -from first.dbs import FIRSTDBManager -from first.engines import FIRSTEngineManager - -DBManager = FIRSTDBManager() -EngineManager = FIRSTEngineManager(DBManager) diff --git a/server/first/dbs/builtin_db.py b/server/first/dbs/builtin_db.py deleted file mode 100644 index 5f4437d..0000000 --- a/server/first/dbs/builtin_db.py +++ /dev/null @@ -1,466 +0,0 @@ -#------------------------------------------------------------------------------- -# -# FIRST DB Module for completing operations with the MongoDB backend -# Copyright (C) 2016 Angel M. Villegas -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Requirements -# ------------ -# - flask -# - mongoengine -# - werkzeug -# -#------------------------------------------------------------------------------- - -# Python Modules -import re -import math -import json -import hashlib -import datetime -import ConfigParser -from hashlib import md5 - -# Third Party Modules -import bson -from mongoengine import Q -from mongoengine.queryset import DoesNotExist, MultipleObjectsReturned - -# FIRST Modules -from first.dbs import AbstractDB -from first.models import User, Metadata, Function, Sample, Engine - - -class FIRSTDB(AbstractDB): - _name = 'first_db' - standards = { 'intel16', 'intel32', 'intel64', 'arm32', 'arm64', 'mips', - 'ppc', 'sparc', 'sysz'} - - # - # Functions called by FIRST Framework - #-------------------------------------------------------------------------- - def __init__(self, config): - ''' - Constructor. - - @param conf: ConfigParser.RawConfigParser - ''' - self._is_installed = True - ''' - section = 'mongodb_settings' - - if (not config.has_section(section) - or not config.has_option(section, 'db')): - raise FirstDBError('DB settings not available', skip=True) - - if section.upper() not in app.config: - app.config[section.upper()] = {} - - app.config[section.upper()]['db'] = conf.get(section, 'db') - self.db.init_app(app) - ''' - - def get_architectures(self): - standards = FIRSTDB.standards.copy() - standards.update(Function.objects().distinct(field='architecture')) - return list(standards) - - def get_sample(self, md5_hash, crc32, create=False): - try: - # Get Sample from DB - return Sample.objects.get(md5=md5_hash, crc32=crc32) - - except DoesNotExist: - if not create: - return None - - # Create Sample for DB - sample = Sample(md5=md5_hash, crc32=crc32) - sample.last_seen = datetime.datetime.now() - sample.save() - return sample - - def sample_seen_by_user(self, sample, user): - if (not isinstance(sample, Sample)) or (not isinstance(user, User)): - return None - - if user not in sample.seen_by: - sample.seen_by.append(user) - sample.save() - - def checkin(self, user, md5_hash, crc32, sha1_hash=None, sha256_hash=None): - ''' - TODO: - - @returns String error message on Failure - None - ''' - if not isinstance(user, User): - return False - - # Validate data - if ((not re.match('^[a-f\d]{32}$', md5_hash)) - or (sha1_hash and not re.match('^[a-f\d]{40}$', sha1_hash)) - or (sha256_hash and not re.match('^[a-f\d]{64}$', sha256_hash))): - return False - - sample = self.get_sample(md5_hash, crc32, True) - if not sample: - return False - - sample.last_seen = datetime.datetime.now() - if user not in sample.seen_by: - sample.seen_by.append(user) - - if None != sha1_hash: - sample.sha1 = sha1_hash - - if None != sha256_hash: - sample.sha256 = sha256_hash - - sample.save() - return True - - def get_function(self, opcodes, architecture, apis, create=False, **kwargs): - function = None - - try: - function = Function.objects.get(sha256=hashlib.sha256(opcodes).hexdigest(), - opcodes=bson.Binary(opcodes), - architecture=architecture, - apis=apis) - except DoesNotExist: - # Create function and add it to sample - function = Function(sha256=hashlib.sha256(opcodes).hexdigest(), - opcodes=bson.Binary(opcodes), - architecture=architecture, - apis=apis) - function.save() - - return function - - def get_all_functions(self): - try: - return Function.objects.all() - - except: - return [] - - def find_function(self, _id=None, opcodes=None, apis=None, architecture=None, h_sha256=None): - try: - # User function ID - if None != _id: - return Function.objects(id=bson.objectid.ObjectId(_id)).get() - - # User opcodes and apis - elif None not in [opcodes, apis]: - return Function.objects(opcodes=opcodes, apis=apis).get() - - # Use hash, architecture - elif None not in [architecture, h_sha256]: - return Function.objects(sha256=h_sha256, architecture=architecture).get() - - else: - return None - - except DoesNotExist: - return None - - def add_function_to_sample(self, sample, function): - if (not isinstance(sample, Sample)) or (not isinstance(function, Function)): - return False - - if function not in sample.functions: - sample.functions.append(function) - sample.save() - - return True - - def add_metadata_to_function(self, user, function, name, prototype, comment, **kwargs): - if (not isinstance(function, Function)) or (not isinstance(user, User)): - return None - - # Check to see if user already has metadata associated with the sample - metadata = None - for m in function.metadata: - if user == m.user: - if m.has_changed(name, prototype, comment): - m.name = [name] + m.name - m.prototype = [prototype] + m.prototype - m.comment = [comment] + m.comment - m.committed = [datetime.datetime.now()] + m.committed - - metadata = m - break - - if not metadata: - metadata = Metadata(user=user, name=[name], - prototype=[prototype], - comment=[comment], - committed=[datetime.datetime.now()]) - function.metadata.append(metadata) - - function.save() - return str(metadata.id) - - def get_metadata_list(self, metadata): - results = [] - user_metadata, engine_metadata = self.separate_metadata(metadata) - - metadata_ids = map(bson.objectid.ObjectId, user_metadata) - mongo_filter = Q(metadata__id=metadata_ids[0]) - for mid in metadata_ids[1:]: - mongo_filter |= Q(metadata__id=mid) - - matches = Function.objects.filter(mongo_filter).only('metadata') - for function in matches: - for metadata in function.metadata: - if metadata.id in metadata_ids: - data = metadata.dump() - data['id'] = str(metadata.id) - results.append(data) - - # Remove id from list to shorten list - del metadata_ids[metadata_ids.index(metadata.id)] - - for _id in engine_metadata: - engines = Engine.object(id=_id) - if (not engines) or (len(engines) > 1): - continue - - data = {'id' : _id, 'engine' : engine.name, - 'description' : engine.description} - results.append(data) - - return results - - def delete_metadata(self, user, metadata_id): - if not isinstance(user, User): - return False - - user_metadata, engine_metadata = self.separate_metadata([metadata_id]) - if not user_metadata: - return False - - # User must be the creator of the metadata to delete it - metadata_id = bson.objectid.ObjectId(user_metadata[0]) - try: - Function.objects(metadata__user=user, metadata__id=metadata_id).update_one(pull__metadata__id=metadata_id) - return True - except DoesNotExist: - return False - - def created(self, user, page, max_metadata=20): - offset = (page - 1) * max_metadata - results = [] - pages = 0 - - if (offset < 0) or (not isinstance(user, User)): - return (results, pages) - - try: - matches = Function.objects(metadata__user=user).only('metadata') - total = Function.objects(metadata__user=user).count() + 0.0 - pages = int(math.ceil(total / max_metadata)) - if page > pages: - return (results, pages) - - matches = matches.skip(offset).limit(max_metadata) - - except ValueError: - return (results, pages) - - for function in matches: - for metadata in function.metadata: - if user == metadata.user: - temp = metadata.dump() - temp['id'] = FIRSTDB.make_id(metadata.id, 0) - results.append(temp) - - # Bail out of inner loop early since a user can only - # create one metadata entry per function - break - - return (results, pages) - - @staticmethod - def make_id(_id, flags): - return '{:1x}{}'.format(flags & 0xF, _id) - - def separate_metadata(self, metadata): - # Get metadata created by users only, MSB should not be set - user_metadata = [] - engine_metadata = [] - for x in metadata: - if len(x) == 24: - user_metadata.append(x) - elif (len(x) == 25) and (((int(x[0], 16) >> 3) & 1) == 0): - user_metadata.append(x[1:]) - elif (len(x) == 25) and (((int(x[0], 16) >> 3) & 1) == 1): - engine_metadata.append(x[1:]) - - return (user_metadata, engine_metadata) - - def metadata_history(self, metadata): - results = {} - user_metadata, engine_metadata = self.separate_metadata(metadata) - e_comment = ('Generated by Engine: {0.name}\n{0.description}\n\n' - 'Developer: {0.developer.user_handle}') - - if len(user_metadata) > 0: - metadata_ids = map(bson.objectid.ObjectId, user_metadata) - mongo_filter = Q(metadata__id=metadata_ids[0]) - for mid in metadata_ids[1:]: - mongo_filter |= Q(metadata__id=mid) - - matches = Function.objects.filter(mongo_filter).only('metadata') - for function in matches: - for metadata in function.metadata: - if metadata.id in metadata_ids: - data = metadata.dump(True) - _id = FIRSTDB.make_id(metadata.id, 0) - results[_id] = {'creator' : data['creator'], - 'history' : data['history']} - # Remove id from list to shorten list - del metadata_ids[metadata_ids.index(metadata.id)] - - # Provide information for engine created metadata... - for engine_id in engine_metadata: - engine = self.get_engine(engine_id) - if not engine: - continue - data = {'creator' : engine.name, - 'history' : [{'committed' : '', - 'name' : 'N/A', - 'prototype' : 'N/A', - 'comment' : e_comment.format(engine)}]} - results[FIRSTDB.make_id(engine_id, 8)] = data - - return results - - def applied(self, sample, user, _id, is_engine=False): - ''' - @returns Boolean. True if added to the applied list - False if not added to the applied list - ''' - if (not isinstance(user, User)) or (not isinstance(sample, Sample)): - return False - - key = [str(sample.id), str(user.id)] - if is_engine: - engine_id = bson.objectid.ObjectId(_id) - engine = Engine.objects(id=engine_id, - applied__contains=key) - - # Check if user has already applied the signature - if len(engine): - return True - - try: - engine = Engine.objects(id=engine_id).get() - except DoesNotExist: - # Engine does not exist - return False - - engine.applied.append(key) - engine.save() - - else: - metadata_id = bson.objectid.ObjectId(_id) - functions = Function.objects(metadata__id=metadata_id, - metadata__applied__contains=key) - - # Check if user has already applied the signature - if len(functions): - return True - - try: - function = Function.objects(metadata__id=metadata_id).get() - except DoesNotExist: - # Metadata does not exist - return False - - # Get metadata - for metadata in function.metadata: - if metadata.id == metadata_id: - metadata.applied.append(key) - break - - function.save() - - return True - - def unapplied(self, sample, user, _id, is_engine=False): - ''' - @returns Boolean. True if not in metadata's applied list - False if still in the applied list - ''' - if (not isinstance(sample, Sample)) or (not isinstance(user, User)): - return False - - key = [str(sample.id), str(user.id)] - if is_engine: - engine_id = bson.objectid.ObjectId(_id) - engine = Engine.objects(id=engine_id, - applied__contains=key) - - # Check if user has already applied the signature - if not len(engine): - return True - - try: - engine = Engine.objects(id=engine_id).get() - except DoesNotExist: - # Engine does not exist - return False - - engine.applied.remove(key) - engine.save() - - else: - metadata_id = bson.objectid.ObjectId(_id) - functions = Function.objects(metadata__id=metadata_id, - metadata__applied__contains=key) - - # Check if user does not have it applied already - if not len(functions): - return True - - try: - function = functions.get() - except DoesNotExist: - # Metadata does not exist - return True - - # Get metadata - for metadata in function.metadata: - if metadata.id == metadata_id: - metadata.applied.remove(key) - break - - function.save() - - return True - - def engines(self, active=True): - return Engine.objects(active=bool(active)) - - def get_engine(self, engine_id): - engines = Engine.objects(id = engine_id) - if not engines: - return None - - return engines[0] diff --git a/server/first/engines/basic_masking.py b/server/first/engines/basic_masking.py deleted file mode 100644 index c3ead1f..0000000 --- a/server/first/engines/basic_masking.py +++ /dev/null @@ -1,222 +0,0 @@ -#------------------------------------------------------------------------------- -# -# FIRST Engine: Basic Masking -# Author: Angel M. Villegas (anvilleg@cisco.com) -# Last Modified: March 2016 -# -# Uses Distorm3 to obtain instructions and then removes certain instruction -# details to normalize it into a standard form to be compared to other -# functions. -# -# Maskes out: -# - ESP/EBP Offsets -# - Absolute Calls?? -# - Global Offsets?? -# -# Requirements -# ------------ -# - Distorm3 -# -# Installation -# ------------ -# None -# -#------------------------------------------------------------------------------- - -# Python Modules -import re -from hashlib import sha256 - -# FIRST Modules -from first.error import FIRSTError -from first.engines import AbstractEngine -from first.engines.results import FunctionResult - -# Third Party Modules -from bson.objectid import ObjectId -from distorm3 import DecomposeGenerator, Decode32Bits, Decode64Bits, Decode16Bits -from mongoengine.queryset import DoesNotExist, MultipleObjectsReturned -from mongoengine import Document, StringField, ListField, IntField, \ - ObjectIdField - -class BasicMasking(Document): - sha256 = StringField(max_length=64, required=True) - architecture = StringField(max_length=64, required=True) - instructions = ListField(StringField(max_length=124), required=True) - total_bytes = IntField(required=True, default=0) - functions = ListField(ObjectIdField(), default=list) - - meta = { - 'indexes' : [('sha256', 'architecture', 'instructions')] - } - - def dump(self): - return {'sha256' : self.sha256, - 'architecture' : self.architecture, - 'instructions' : self.instructions, - 'total_bytes' : self.total_bytes, - 'functions' : self.function_list()} - - def function_list(self): - return [str(x) for x in self.functions] - - -class BasicMaskingEngine(AbstractEngine): - _name = 'BasicMasking' - _description = ('Masks ESP/EBP offsets, calls/jmps offsets, and global ' - 'offsets (Intel Only). Requires at least 8 instructions.') - _required_db_names = ['first_db'] - - def normalize(self, opcodes, architecture): - changed_bits = 0 - dt = None - mapping = {'intel32' : Decode32Bits, - 'intel64' : Decode64Bits, - 'intel16' : Decode16Bits} - if architecture in mapping: - dt = mapping[architecture] - else: - return (None, changed_bits, None) - - try: - normalized = [] - original = [] - for i in DecomposeGenerator(0, opcodes, dt): - # If disassembly is not valid then junk data has been sent - if not i.valid: - return (None, 0, None) - - original.append(i._toText()) - instr = i.mnemonic + ' ' - - # Special mnemonic masking (Call, Jmp, JCC) - if (i.mnemonic == 'CALL') or i.mnemonic.startswith('J'): - operand = i.operands[0]._toText() - - if 'Immediate' == i.operands[0].type: - instr += '0x' - changed_bits += i.operands[0].size - - else: - regex = '^\[R(S|I)P(\+|\-)0x[\da-f]+\]$' - if re.match(regex, operand): - instr += re.sub(regex, r'[R\1P\2', operand) + '0x]' - changed_bits += i.operands[0].dispSize - else: - # Nothing will be masked out - instr = i._toText() - - normalized.append(instr) - continue - - operand_instrs = [] - for operand_obj in i.operands: - operand = operand_obj._toText() - if ((re.match('^\[E(S|B)P', operand) or re.match('^\[R(I|S)P', operand)) - and operand_obj.dispSize): - # Offset from EBP/ESP and RIP/RSP - masked = operand.replace(hex(operand_obj.disp), '0x') - operand_instrs.append(masked) - changed_bits += operand_obj.dispSize - - elif 'Immediate' == operand_obj.type: - value = operand_obj.value - # Masking off immediates within the standard VA of the sample - if ((0x400000 <= value <= 0x500000) - or (0x10000000 <= value <= 0x20000000) - or (0x1C0000000 <= value <= 0x1D0000000) - or (0x140000000 <= value <= 0x150000000)): - operand_instrs.append('0x') - changed_bits += operand_obj.size - - else: - operand_instrs.append(operand) - - elif 'AbsoluterMemoryAddress' == operand_obj.type: - operand_instrs.append('0x') - changed_bits += operand_obj.dispSize - - elif 'AbsoluteMemory' == operand_obj.type: - masked = operand.replace(hex(operand_obj.disp), '0x') - operand_instrs.append(masked) - changed_bits += operand_obj.dispSize - - else: - operand_instrs.append(operand) - - normalized.append(instr + ', '.join(operand_instrs)) - - h_sha256 = sha256(''.join(normalized)).hexdigest() - return (normalized, changed_bits, h_sha256) - # For debugging - #return (original, normalized, changed_bits, h_sha256) - - except Exception as e: - return (None, changed_bits, None) - - def _add(self, function): - ''' - - ''' - opcodes = function['opcodes'] - architecture = function['architecture'] - normalized, changed, h_sha256 = self.normalize(opcodes, architecture) - - if (not h_sha256) or (not normalized) or (8 > len(normalized)): - return - - try: - db_obj = BasicMasking.objects( sha256=h_sha256, - architecture=architecture, - instructions=normalized).get() - except DoesNotExist: - db_obj = BasicMasking( sha256=h_sha256, - architecture=architecture, - instructions=normalized, - total_bytes=len(opcodes)) - - function_id = ObjectId(function['id']) - if function_id not in db_obj.functions: - db_obj.functions.append(function_id) - db_obj.save() - - def _scan(self, opcodes, architecture, apis): - '''Returns List of tuples (function ID, similarity percentage)''' - db = self._dbs['first_db'] - normalized, changed, h_sha256 = self.normalize(opcodes, architecture) - - if (not h_sha256) or (not normalized) or (8 > len(normalized)): - return - - try: - db_obj = BasicMasking.objects( sha256=h_sha256, - architecture=architecture, - instructions=normalized).get() - except DoesNotExist: - return None - - results = [] - for function_id in db_obj.function_list(): - function = db.find_function(_id=ObjectId(function_id)) - - if not function or not function.metadata: - continue - - # Similarity = 90% (opcodes and the masking changes) - # + 10% (api overlap) - similarity = 100 - ((changed / (len(opcodes) * 8.0)) * 100) - if similarity > 90.0: - similarity = 90.0 - - # The APIs will count up to 10% of the similarity score - total_apis = len(function.apis) - overlap = float(len(set(function.apis).intersection(apis))) - if 0 != total_apis: - similarity += (overlap / total_apis) * 10 - - results.append(FunctionResult(function_id, similarity)) - - return results - - def _uninstall(self): - BasicMasking.drop_collection() diff --git a/server/first/engines/mnemonic_hash.py b/server/first/engines/mnemonic_hash.py deleted file mode 100644 index 19b23ae..0000000 --- a/server/first/engines/mnemonic_hash.py +++ /dev/null @@ -1,145 +0,0 @@ -#------------------------------------------------------------------------------- -# -# FIRST Engine: Mnemonic Hash -# Uses Distorm3 to obtain mnemonics from the opcodes, reduces the opcodes to -# a single string and hashes it for future lookup -# -# Copyright (C) 2016 Angel M. Villegas -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Requirements -# ------------ -# - distorm3 -# - mongoengine -# -#------------------------------------------------------------------------------- - -# Python Modules -from hashlib import sha256 - -# FIRST Modules -from first.error import FIRSTError -from first.engines import AbstractEngine -from first.engines.results import FunctionResult - -# Third Party Modules -from bson.objectid import ObjectId -from distorm3 import DecomposeGenerator, Decode32Bits, Decode64Bits, Decode16Bits -from mongoengine.queryset import DoesNotExist, MultipleObjectsReturned -from mongoengine import Document, StringField, ListField, ObjectIdField - -class MnemonicHash(Document): - sha256 = StringField(max_length=64, required=True) - architecture = StringField(max_length=64, required=True) - functions = ListField(ObjectIdField(), default=list) - - meta = { - 'indexes' : [('sha256', 'architecture')] - } - - def dump(self): - return {'sha256' : self.sha256, - 'architecture' : self.architecture, - 'functions' : self.function_list()} - - def function_list(self): - return [str(x) for x in self.functions] - - -class MnemonicHashEngine(AbstractEngine): - _name = 'MnemonicHash' - _description = ('Uses mnemonics from the opcodes to generate a hash ' - '(Intel Only). Requires at least 8 mnemonics.') - _required_db_names = ['first_db'] - - def mnemonic_hash(self, opcodes, architecture): - dt = None - mapping = {'intel32' : Decode32Bits, - 'intel64' : Decode64Bits, - 'intel16' : Decode16Bits} - if architecture in mapping: - dt = mapping[architecture] - else: - return (None, None) - - try: - iterable = DecomposeGenerator(0, opcodes, dt) - - # Uses valid to ensure we are not creating hashes with 'db 0xYY' - mnemonics = [d.mnemonic for d in iterable if d.valid] - return (mnemonics, sha256(''.join(mnemonics)).hexdigest()) - - except Exception as e: - return (None, None) - - def _add(self, function): - ''' - Nothing needs to be implemented since the Function Model has the - sha256 of the opcodes - ''' - opcodes = function['opcodes'] - architecture = function['architecture'] - mnemonics, mnemonic_sha256 = self.mnemonic_hash(opcodes, architecture) - - if (not mnemonic_sha256) or (not mnemonics) or (8 > len(mnemonics)): - return - - try: - db_obj = MnemonicHash.objects( sha256=mnemonic_sha256, - architecture=architecture).get() - except DoesNotExist: - db_obj = MnemonicHash( sha256=mnemonic_sha256, - architecture=architecture) - - function_id = ObjectId(function['id']) - if function_id not in db_obj.functions: - db_obj.functions.append(function_id) - db_obj.save() - - def _scan(self, opcodes, architecture, apis): - '''Returns List of tuples (function ID, similarity percentage)''' - db = self._dbs['first_db'] - mnemonics, mnemonic_sha256 = self.mnemonic_hash(opcodes, architecture) - - if (not mnemonic_sha256) or (not mnemonics) or (8 > len(mnemonics)): - return - - try: - db_obj = MnemonicHash.objects( sha256=mnemonic_sha256, - architecture=architecture).get() - except DoesNotExist: - return None - - results = [] - for function_id in db_obj.function_list(): - similarity = 75.0 - function = db.find_function(_id=ObjectId(function_id)) - - if not function or not function.metadata: - continue - - # The APIs will count up to 10% of the similarity score - total_apis = len(function.apis) - overlap = float(len(set(function.apis).intersection(apis))) - if 0 != total_apis: - similarity += (overlap / total_apis) * 10 - - results.append(FunctionResult(function_id, similarity)) - - return results - - def _uninstall(self): - MnemonicHash.drop_collection() diff --git a/server/first/models.py b/server/first/models.py deleted file mode 100644 index 1846882..0000000 --- a/server/first/models.py +++ /dev/null @@ -1,197 +0,0 @@ -#------------------------------------------------------------------------------- -# -# FIRST MongoDB Models -# Copyright (C) 2016 Angel M. Villegas -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Requirements -# ------------ -# mongoengine (https://pypi.python.org/pypi/mongoengine/) -# -#------------------------------------------------------------------------------- - - -# Python Modules -from __future__ import unicode_literals -import datetime - -# Third Party Modules -from bson.objectid import ObjectId -from mongoengine import Document, StringField, UUIDField, \ - DateTimeField, LongField, ReferenceField, \ - BinaryField, ListField, BooleanField, ObjectIdField, \ - IntField, EmbeddedDocument, EmbeddedDocumentListField - -class User(Document): - name = StringField(max_length=128, required=True) - email = StringField(max_length=254, unique=True) - handle = StringField(max_length=32, required=True) - number = IntField(required=True) - api_key = UUIDField(required=True, unique=True) - created = DateTimeField(default=datetime.datetime.utcnow, required=True) - rank = LongField(default=0) - active = BooleanField(default=True) - - service = StringField(max_length=16, required=True) - auth_data = StringField(max_length=4096, required=True) - - meta = { - 'indexes' : [('handle', 'number'), 'api_key', 'email'] - } - - @property - def user_handle(self): - return '{0.handle}#{0.number:04d}'.format(self) - - def dump(self, full=False): - data = {'handle' : self.user_handle} - - if full: - data.update({ 'id' : str(self.id), - 'name' : self.name, - 'email' : self.email, - 'api_key' : self.api_key, - 'rank' : self.rank, - 'created' : self.created, - 'active' : self.active}) - - return data - - -class Engine(Document): - name = StringField(max_length=16, required=True, unique=True) - description = StringField(max_length=128, required=True) - path = StringField(max_length=256, required=True) - obj_name = StringField(max_length=32, required=True) - applied = ListField(default=list) - developer = ReferenceField(User) - active = BooleanField(default=False) - - meta = { - 'indexes' : ['name'] - } - - def dump(self, full=False): - data = {'name' : self.name, - 'description' : self.description, - 'rank' : self.rank, - 'developer' : self.developer.user_handle} - - if full: - data.update({'id' : str(self.id), 'path' : self.path}) - - return data - - @property - def rank(self): - return len(self.applied) - - -class Metadata(EmbeddedDocument): - id = ObjectIdField(required=True, default=lambda: ObjectId()) - user = ReferenceField(User) - name = ListField(StringField(max_length=128), default=list) - prototype = ListField(StringField(max_length=256), default=list) - comment = ListField(StringField(max_length=512), default=list) - committed = ListField(DateTimeField(), default=list) - applied = ListField(default=list) - - meta = { - 'indexes' : ['user'] - } - - def dump(self, full=False): - data = {'creator' : self.user.user_handle, - 'name' : self.name[0], - 'prototype' : self.prototype[0], - 'comment' : self.comment[0], - 'rank' : len(self.applied)} - - if full: - data['history'] = [] - for i in xrange(len(self.name) - 1, -1, -1): - # Convert back with: - # datetime.datetime.strptime(
, '%Y-%m-%dT%H:%M:%S.%f') - committed = self.committed[i].isoformat() - data['history'].append({'name' : self.name[i], - 'prototype' : self.prototype[i], - 'comment' : self.comment[i], - 'committed' : committed}) - - return data - - def has_changed(self, name, prototype, comment): - if (not self.name) or (not self.prototype) or (not comment): - return True - - if ((self.name[0] != name) - or (self.prototype[0] != prototype) - or (self.comment[0] != comment)): - return True - - return False - - @property - def rank(self): - return len(self.applied) - -# Use bson.Binary to insert binary data -class Function(Document): - sha256 = StringField(max_length=64) - opcodes = BinaryField() - apis = ListField(StringField(max_length=128), default=list) - metadata = EmbeddedDocumentListField(Metadata, default=list) - # Return value from idaapi.get_file_type_name() - architecture = StringField(max_length=64, required=True) - - meta = { - 'indexes' : [] - } - - def dump(self): - return {'id' : self.id, - 'opcodes' : self.opcodes, - 'apis' : self.apis, - 'metadata' : [str(x.id) for x in self.metadata], - 'architecture' : self.architecture, - 'sha256' : self.sha256} - - -class Sample(Document): - md5 = StringField(max_length=32, required=True) - crc32 = IntField(required=True) - sha1 = StringField(max_length=40) - sha256 = StringField(max_length=64) - seen_by = ListField(ReferenceField(User), default=list) - functions = ListField(ReferenceField(Function), default=list) - last_seen = DateTimeField(default=datetime.datetime.utcnow) - - meta = { - 'indexes' : [('md5', 'crc32')] - } - - def dump(self): - data = {'md5' : self.md5, 'crc32' : self.crc32, - 'seen_by' : [str(x.id) for x in self.seen_by], - 'functions' : [str(x.id) for x in self.functions]} - - if 'sha1' in self: - data['sha1'] = self.sha1 - - if 'sha256' in self: - data['sha256'] = self.sha256 - - return data diff --git a/server/first/settings.py b/server/first/settings.py index 53faf89..08ecb18 100644 --- a/server/first/settings.py +++ b/server/first/settings.py @@ -14,7 +14,9 @@ import json # Read in configuration data -FIRST_CONFIG_FILE = 'first_config.json' +FIRST_CONFIG_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '..', + 'first_config.json') CONFIG = {} try: config_data = json.load(file(FIRST_CONFIG_FILE)) diff --git a/server/first_core/__init__.py b/server/first_core/__init__.py new file mode 100644 index 0000000..4b7b779 --- /dev/null +++ b/server/first_core/__init__.py @@ -0,0 +1,27 @@ +#------------------------------------------------------------------------------- +# +# Intializes FIRST's DBManager and EngineManager +# Copyright (C) 2016 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------- + +# FIRST Modules +from first_core.dbs import FIRSTDBManager +from first_core.engines import FIRSTEngineManager + +DBManager = FIRSTDBManager() +EngineManager = FIRSTEngineManager(DBManager) diff --git a/server/first/auth.py b/server/first_core/auth.py similarity index 98% rename from server/first/auth.py rename to server/first_core/auth.py index 724399e..756b7e6 100644 --- a/server/first/auth.py +++ b/server/first_core/auth.py @@ -38,8 +38,8 @@ # FIRST Modules # TODO: Use DBManager to get user objects and do User operations -from first.models import User -from first.error import FIRSTError +from first_core.models import User +from first_core.error import FIRSTError # Thirdy Party import httplib2 @@ -57,7 +57,7 @@ def __init__(self, message): def verify_api_key(api_key): - users = User.objects(api_key=api_key) + users = User.objects.filter(api_key=api_key) if not users: return None diff --git a/server/first/dbs/__init__.py b/server/first_core/dbs/__init__.py similarity index 97% rename from server/first/dbs/__init__.py rename to server/first_core/dbs/__init__.py index 3b28548..ce27b71 100644 --- a/server/first/dbs/__init__.py +++ b/server/first_core/dbs/__init__.py @@ -24,7 +24,7 @@ from hashlib import md5 # FIRST Modules -from first.error import FIRSTError +from first_core.error import FIRSTError # Class for FirstDB related exceptions class FIRSTDBError(FIRSTError): @@ -110,6 +110,6 @@ def get(self, db_name): # FIRST DB Classes -from first.dbs.builtin_db import FIRSTDB +from first_core.dbs.builtin_db import FIRSTDB possible_dbs = [FIRSTDB] diff --git a/server/first_core/dbs/builtin_db.py b/server/first_core/dbs/builtin_db.py new file mode 100644 index 0000000..ef1dea2 --- /dev/null +++ b/server/first_core/dbs/builtin_db.py @@ -0,0 +1,433 @@ +#------------------------------------------------------------------------------- +# +# FIRST DB Module for completing operations with the MongoDB backend +# Copyright (C) 2016 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Requirements +# ------------ +# - werkzeug +# +#------------------------------------------------------------------------------- + +# Python Modules +import re +import math +import json +import hashlib +import ConfigParser +from hashlib import md5 + +# Third Party Modules +import bson + +from django.utils import timezone +from django.core.paginator import Paginator +from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned + +# FIRST Modules +from first_core.dbs import AbstractDB +from first_core.util import make_id, parse_id, separate_metadata, \ + is_engine_metadata +from first_core.models import User, Sample, \ + Engine, \ + Metadata, MetadataDetails, AppliedMetadata, \ + Function, FunctionApis + + +class FIRSTDB(AbstractDB): + _name = 'first_db' + standards = { 'intel16', 'intel32', 'intel64', 'arm32', 'arm64', 'mips', + 'ppc', 'sparc', 'sysz'} + + # + # Functions called by FIRST Framework + #-------------------------------------------------------------------------- + def __init__(self, config): + ''' + Constructor. + + @param conf: ConfigParser.RawConfigParser + ''' + self._is_installed = True + ''' + section = 'mongodb_settings' + + if (not config.has_section(section) + or not config.has_option(section, 'db')): + raise FirstDBError('DB settings not available', skip=True) + + if section.upper() not in app.config: + app.config[section.upper()] = {} + + app.config[section.upper()]['db'] = conf.get(section, 'db') + self.db.init_app(app) + ''' + + def get_architectures(self): + field = 'architecture' + architectures = Function.objects.values(field).distinct() + + standards = FIRSTDB.standards.copy() + standards.update({x[field] for x in architectures}) + return list(standards) + + def get_sample(self, md5_hash, crc32, create=False): + try: + # Get Sample from DB + return Sample.objects.get(md5=md5_hash, crc32=crc32) + + except ObjectDoesNotExist: + if not create: + return None + + # Create Sample for DB + sample = Sample(md5=md5_hash, crc32=crc32) + sample.last_seen = timezone.now() + sample.save() + return sample + + except MultipleObjectsReturned: + # TODO: log occurance + raise + + def sample_seen_by_user(self, sample, user): + if (not isinstance(sample, Sample)) or (not isinstance(user, User)): + return None + + if not Sample.objects.filter(pk=sample.id, seen_by=user).count(): + sample.seen_by.add(user) + + def checkin(self, user, md5_hash, crc32, sha1_hash=None, sha256_hash=None): + ''' + TODO: + + @returns String error message on Failure + None + ''' + if not isinstance(user, User): + return False + + # Validate data + if ((not re.match('^[a-f\d]{32}$', md5_hash)) + or (sha1_hash and not re.match('^[a-f\d]{40}$', sha1_hash)) + or (sha256_hash and not re.match('^[a-f\d]{64}$', sha256_hash))): + return False + + sample = self.get_sample(md5_hash, crc32, True) + if not sample: + return False + + sample.last_seen = timezone.now() + if not Sample.objects.filter(pk=sample.id, seen_by=user).count(): + sample.seen_by.add(user) + + if None != sha1_hash: + sample.sha1 = sha1_hash + + if None != sha256_hash: + sample.sha256 = sha256_hash + + sample.save() + return True + + def get_function_metadata(self, _id): + '''Get the metadata associated with the provided Function ID + + Args: + _id (:obj:`int`): ID from Function model + + Returns: + QuerySet. + ''' + return Metadata.objects.filter(function__pk=_id) + + def get_function(self, opcodes, architecture, apis, create=False, **kwargs): + sha256_hash = hashlib.sha256(opcodes).hexdigest() + function = None + + try: + function = Function.objects.get(sha256=sha256_hash, + opcodes=bson.Binary(opcodes), + architecture=architecture) #, + #apis__api=apis) + except ObjectDoesNotExist: + if create: + # Create function and add it to sample + function = Function.objects.create( sha256=sha256_hash, + opcodes=bson.Binary(opcodes), + architecture=architecture) + + apis_ = [FunctionApis.objects.get_or_create(x)[0] for x in apis] + for api in apis_: + function.apis.add(api) + + return function + + def get_all_functions(self): + try: + return Function.objects.all() + + except: + return [] + + def find_function(self, _id=None, opcodes=None, apis=None, architecture=None, h_sha256=None): + try: + # User function ID + if None != _id: + return Function.objects.get(pk=_id) + + # User opcodes and apis + elif None not in [opcodes, apis]: + return Function.objects.get(opcodes=opcodes, apis=apis) + + # Use hash, architecture + elif None not in [architecture, h_sha256]: + return Function.objects.get(sha256=h_sha256, + architecture=architecture) + + else: + return None + + except ObjectDoesNotExist: + return None + + except MultipleObjectsReturned: + # TODO: Log + raise + + def add_function_to_sample(self, sample, function): + if (not isinstance(sample, Sample)) or (not isinstance(function, Function)): + return False + + if not Sample.objects.filter(pk=sample.id, functions=function).count(): + sample.functions.add(function) + + return True + + def add_metadata_to_function(self, user, function, name, prototype, comment, **kwargs): + if (not isinstance(function, Function)) or (not isinstance(user, User)): + return None + + # Check to see if user already has metadata associated with the sample + metadata = None + print function.id + print user.id + if Function.objects.filter(pk=function.id, metadata__user=user).count(): + # Metadata already exists + metadata = Metadata.objects.get(function=function, user=user) + + else: + metadata = Metadata.objects.create(user=user) + function.metadata.add(metadata) + + if metadata.has_changed(name, prototype, comment): + md = MetadataDetails.objects.create(name=name, + prototype=prototype, + comment=comment) + metadata.details.add(md) + + return metadata.id + + def get_metadata_list(self, metadata): + results = [] + metadata_ids, engine_metadata = separate_metadata(metadata) + + for _id, metadata in Metadata.objects.in_bulk(metadata_ids).iteritems(): + data = metadata.dump() + data['id'] = make_id(0, metadata=metadata.id) + results.append(data) + + for flag, _id, metadata_id in engine_metadata: + engines = Engine.objects.get(pk=_id) + # TODO: Send metadata_id to engine for more info + if (not engines) or (len(engines) > 1): + continue + + data = {'id' : make_id(flag, metadata_id, _id), + 'engine' : engine.name, + 'description' : engine.description} + results.append(data) + + return results + + def delete_metadata(self, user, metadata_id): + if not isinstance(user, User): + return False + + user_metadata, engine_metadata = separate_metadata([metadata_id]) + if not user_metadata: + return False + + # User must be the creator of the metadata to delete it + metadata_id = user_metadata[0] + try: + metadata = Metadata.objects.get(pk=metadata_id, user=user) + metadata.delete() + return True + + except ObjectDoesNotExist: + return False + + def created(self, user, page, max_metadata=20): + pages = 0 + results = [] + + if (page < 1) or (not isinstance(user, User)): + return (results, pages) + + p = Paginator(Metadata.objects.filter(user=user), max_metadata) + pages = p.num_pages + + if page > pages: + return (results, pages) + + for metadata in p.page(page): + temp = metadata.dump() + temp['id'] = make_id(0, metadata=metadata.id) + results.append(temp) + + return (results, pages) + + def metadata_history(self, metadata): + results = {} + metadata_ids, engine_metadata = separate_metadata(metadata) + e_comment = ('Generated by Engine: {0.name}\n{0.description}\n\n' + 'Developer: {0.developer.user_handle}') + + for _id, metadata in Metadata.objects.in_bulk(metadata_ids).iteritems(): + data = metadata.dump(True) + result_key = make_id(0, metadata=_id) + results[result_key] = { 'creator' : data['creator'], + 'history' : data['history']} + + # Provide information for engine created metadata... + for flag, engine_id, _id in engine_metadata: + engine = self.get_engine(engine_id) + if not engine: + continue + data = {'creator' : engine.name, + 'history' : [{'committed' : '', + 'name' : 'N/A', + 'prototype' : 'N/A', + 'comment' : e_comment.format(engine)}]} + result_key = make_id(flag, engine=engine_id, metadata=_id) + results[result_key] = data + + return results + + def applied(self, sample, user, _id): + ''' + @returns Boolean. True if added to the applied list + False if not added to the applied list + ''' + if (not isinstance(user, User)) or (not isinstance(sample, Sample)): + return False + + flag, engine_data, metadata_id = parse_id(_id) + if is_engine_metadata(_id): + pass + # TODO: add this capability back again + #engine_id = _id + #engine = Engine.objects(id=engine_id, + # applied__contains=key) + + ## Check if user has already applied the signature + #if len(engine): + # return True + + #try: + # engine = Engine.objects(id=engine_id).get() + #except ObjectDoesNotExist: + # # Engine does not exist + # return False + + #engine.applied.append(key) + #engine.save() + + else: + try: + # Ensure Metadata exists + metadata = Metadata.objects.get(pk=metadata_id) + except ObjectDoesNotExist: + # Metadata does not exist + return False + + r = AppliedMetadata.objects.get_or_create( user=user, + sample=sample, + metadata=metadata) + + return True + + def unapplied(self, sample, user, _id): + ''' + @returns Boolean. True if not in metadata's applied list + False if still in the applied list + ''' + if (not isinstance(sample, Sample)) or (not isinstance(user, User)): + return False + + flag, engine_data, metadata_id = parse_id(_id) + if is_engine_metadata(_id): + pass + # TODO: add this capability back again + #engine_id = _id + #engine = Engine.objects(id=engine_id, + # applied__contains=key) + + ## Check if user has already applied the signature + #if not len(engine): + # return True + + #try: + # engine = Engine.objects(id=engine_id).get() + #except ObjectDoesNotExist: + # # Engine does not exist + # return False + + #engine.applied.remove(key) + #engine.save() + + else: + try: + # Ensure Metadata exists + metadata = Metadata.objects.get(pk=metadata_id) + except ObjectDoesNotExist: + # Metadata does not exist + return False + + try: + data = AppliedMetadata.objects.get( user=user, + sample=sample, + metadata=metadata) + data.delete() + return True + + except ObjectDoesNotExist: + return True + + + return False + + def engines(self, active=True): + return Engine.objects.filter(active=bool(active)) + + def get_engine(self, engine_id): + engines = Engine.objects.filter(pk=engine_id) + if not engines.count(): + return None + + return engines.first() diff --git a/server/first/engines/__init__.py b/server/first_core/engines/__init__.py similarity index 98% rename from server/first/engines/__init__.py rename to server/first_core/engines/__init__.py index 4fce345..19d708c 100644 --- a/server/first/engines/__init__.py +++ b/server/first_core/engines/__init__.py @@ -15,9 +15,9 @@ import sys # First Modules -from first.error import FIRSTError -from first.dbs import FIRSTDBManager -from first.engines.results import Result +from first_core.error import FIRSTError +from first_core.dbs import FIRSTDBManager +from first_core.engines.results import Result # Third Party Modules from bson.objectid import ObjectId diff --git a/server/first/engines/exact_match.py b/server/first_core/engines/exact_match.py similarity index 90% rename from server/first/engines/exact_match.py rename to server/first_core/engines/exact_match.py index 3a78eb3..c9c7b32 100644 --- a/server/first/engines/exact_match.py +++ b/server/first_core/engines/exact_match.py @@ -23,9 +23,9 @@ from hashlib import sha256 # FIRST Modules -from first.error import FIRSTError -from first.engines import AbstractEngine -from first.engines.results import FunctionResult +from first_core.error import FIRSTError +from first_core.engines import AbstractEngine +from first_core.engines.results import FunctionResult class ExactMatchEngine(AbstractEngine): _name = 'ExactMatch' @@ -50,7 +50,7 @@ def _scan(self, opcodes, architecture, apis): return None similarity = 90.0 - if set(function.apis) == set(apis): + if set(function.apis.values()) == set(apis): similarity += 10.0 return [FunctionResult(str(function.id), similarity)] diff --git a/server/first/engines/results.py b/server/first_core/engines/results.py similarity index 90% rename from server/first/engines/results.py rename to server/first_core/engines/results.py index afbe89c..5e3a6db 100644 --- a/server/first/engines/results.py +++ b/server/first_core/engines/results.py @@ -5,6 +5,7 @@ # Last Modified: August 2016 # #------------------------------------------------------------------------------- +from first_core.util import make_id class Result(object): '''Abstract class to encapsulate results returned from Engines''' @@ -87,23 +88,19 @@ class FunctionResult(Result): This Result class is crafted for general engines that want to return a list of functions to the EngineManager - ID values are 25 hex character string. For metadata created by users, - not engines, the most significant bit is not set. + ID values are 26 hex character string. For metadata created by users, + not engines, the flag byte not set. ''' def _get_metadata(self, db): if not hasattr(self, '_metadata'): - func = db.find_function(_id=self.id) - if not func: - return None - - self._metadata = func.metadata + self._metadata = list(db.get_function_metadata(self.id)) self._metadata.sort(key=lambda x: x.rank) data = None if len(self._metadata) > 0: metadata = self._metadata.pop() data = metadata.dump() - data['id'] = '0{}'.format(metadata.id) + data['id'] = make_id(0, metadata=metadata.id) return data @@ -119,9 +116,11 @@ class EngineResult(Result): ''' def _init(self, **kwargs): self._data = None + self._metadata = 0 + if 'data' in kwargs: self._data = kwargs['data'] - self._data['id'] = '8{}'.format(self.id) + self._data['id'] = make_id(1, self._metadata, self.id) def _get_metadata(self, db): data = self._data diff --git a/server/first/engines/skeleton.py_ b/server/first_core/engines/skeleton.py_ similarity index 96% rename from server/first/engines/skeleton.py_ rename to server/first_core/engines/skeleton.py_ index e005e82..2954768 100644 --- a/server/first/engines/skeleton.py_ +++ b/server/first_core/engines/skeleton.py_ @@ -20,8 +20,8 @@ # FIRST Modules -from first.error import FIRSTError -from first.engines import AbstractEngine +from first_core.error import FIRSTError +from first_core.engines import AbstractEngine # Third Party Modules diff --git a/server/first/error.py b/server/first_core/error.py similarity index 100% rename from server/first/error.py rename to server/first_core/error.py diff --git a/server/first_core/models.py b/server/first_core/models.py new file mode 100644 index 0000000..cdfb45b --- /dev/null +++ b/server/first_core/models.py @@ -0,0 +1,27 @@ +#------------------------------------------------------------------------------- +# +# FIRST Django ORM Models +# Copyright (C) 2017 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------- + + +# Python Modules +from __future__ import unicode_literals + +# FIRST Modules +from www.models import * diff --git a/server/first_core/util.py b/server/first_core/util.py new file mode 100644 index 0000000..96fee63 --- /dev/null +++ b/server/first_core/util.py @@ -0,0 +1,105 @@ +#------------------------------------------------------------------------------- +# +# FIRST Utility and Helper Functions +# Copyright (C) 2017 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------- + + +def make_id(flags, metadata=0, engine=0): + '''Creates an unique ID for client use. + + Args: + flag (:obj:`int`): Value between 0 and 255. + MSB set when ID is from an engine. + metadata (:obj:`int`, optional): The Metadata model ID + engine (:obj:`int`, optional): The Engine model ID + + Returns: + string: A 26 byte hex string + ''' + data = [flags, metadata, engine] + if (None in data) or (not all([type(x) in [int, long] for x in data])): + return None + + if ((engine > (2**32 - 1)) or (metadata > (2**64 - 1)) + or (flags > (2**8 - 1))): + return None + + return '{:02x}{:08x}{:016x}'.format(flags, engine, metadata) + + +def parse_id(_id): + if len(_id) != 26: + return (None, None, None) + + _id = int(_id, 16) + flag = _id >> (8 * 12) + engine_data = (_id >> (8 * 8)) & (0xFFFFFFFF) + metadata_id = _id & 0xFFFFFFFFFFFFFFFF + + return (flag, engine_data, metadata_id) + +def separate_metadata(ids): + '''Returns parsed IDs for user and engine generated metadata. + + Args: + ids (:obj:`list`): List of 26 hex strings + metadata (:obj:`int`, optional): The Metadata model ID + engine (:obj:`int`, optional): The Engine model ID + + Returns: + tuple: Index 0 contains user created metadata IDs + Index 1 contains engine created metadata details + ''' + # ID: Flag Byte | Engine 4 bytes | Metadata 8 bytes = 13 bytes + # 26 ASCII characters + # If Flag is set then more processing is needed and it is not + # metadata created by the user + user_metadata = [] + engine_metadata = [] + for x in ids: + flag, engine_data, metadata_id = parse_id(x) + if None in [flag, engine_data, metadata_id]: + continue + + if not flag: + user_metadata.append(metadata_id) + else: + engine_metadata.append((flag, engine_data, metadata_id)) + + return (user_metadata, engine_metadata) + +def is_user_metadata(_id): + details = parse_id(_id) + if None in details: + return False + + if not details[0]: + return True + + return False + +def is_engine_metadata(_id): + details = parse_id(_id) + if None in details: + return False + + if details[0]: + return True + + return False diff --git a/server/rest/urls.py b/server/rest/urls.py index d75b467..013f6f4 100644 --- a/server/rest/urls.py +++ b/server/rest/urls.py @@ -20,8 +20,7 @@ views.metadata_unapplied, name='metadata_unapplied'), url(r'^metadata/get/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})$', views.metadata_get, name='metadata_get'), - # TODO: migrate to ids with 25 characters - url(r'^metadata/delete/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})/(?i)(?P<_id>[A-F\d]{24,25})$', + url(r'^metadata/delete/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})/(?i)(?P<_id>[A-F\d]{26})$', views.metadata_delete, name='metadata_delete'), url(r'^metadata/created/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})$', views.metadata_created, name='metadata_created'), diff --git a/server/rest/views.py b/server/rest/views.py index e83a76f..2fd67fd 100644 --- a/server/rest/views.py +++ b/server/rest/views.py @@ -12,14 +12,15 @@ from django.views.decorators.http import require_GET, require_POST # FIRST Modules -from first import DBManager, EngineManager -from first.auth import verify_api_key, Authentication, FIRSTAuthError, \ +from first_core import DBManager, EngineManager +from first_core.util import make_id, is_engine_metadata +from first_core.auth import verify_api_key, Authentication, FIRSTAuthError, \ require_login, require_apikey MAX_FUNCTIONS = 20 MAX_METADATA = 20 -VALIDATE_IDS = lambda x: re.match('^[a-f\d]{24,25}$', x) +VALIDATE_IDS = lambda x: re.match('^[A-Fa-f\d]{26}$', x) #----------------------------------------------------------------------------- # @@ -217,8 +218,7 @@ def metadata_add(request, md5_hash, crc32, user): f = functions[client_key] # Check if the id sent back is from an engine, if so skip it - if (('id' in f) and (f['id']) and (len(f['id']) == 25) - and ((int(f['id'][0]) >> 3) & 1)): + if (('id' in f) and (f['id']) and not is_engine_metadata(f['id'])): continue; function = db.get_function(create=True, **f) @@ -238,7 +238,8 @@ def metadata_add(request, md5_hash, crc32, user): 'function in FIRST')}) # The '0' indicated the metadata_id is from a user. - results[client_key] = '0{}'.format(metadata_id) + print metadata_id + results[client_key] = make_id(0, metadata=metadata_id) # Set the user as applying the metadata db.applied(sample, user, metadata_id) @@ -295,6 +296,7 @@ def metadata_history(request, user): return render(request, 'rest/error_json.html', {'msg' : 'Exceeded max bulk request'}) + print metadata if None in map(VALIDATE_IDS, metadata): return render(request, 'rest/error_json.html', {'msg' : 'Invalid metadata id'}) @@ -544,26 +546,15 @@ def metadata_status_change(_id, user, md5_hash, crc32, applied): return render(None, 'rest/error_json.html', {'msg' : 'Invalid metadata information'}) - # Currently 24-25, early beta used a 24 byte string, moved to 25 byte one - # TODO: Change to 25 only once it is closed beta time if not VALIDATE_IDS(_id): return render(None, 'rest/error_json.html', {'msg' : 'Invalid id value'}) - metadata_id = _id - if len(_id) == 25: - metadata_id = _id[1:] - db = DBManager.first_db if not db: return render(None, 'rest/error_json.html', {'msg' : 'Unable to connect to FIRST DB'}) - is_engine = False - if ((len(_id) == 25) and (int(_id[0], 16) & 0x8)): - # Metadata came from an engine - is_engine = True - # Get sample sample = db.get_sample(md5_hash, crc32) if not sample: @@ -571,8 +562,8 @@ def metadata_status_change(_id, user, md5_hash, crc32, applied): {'msg' : 'Sample does not exist in FIRST'}) if applied: - results = db.applied(sample, user, metadata_id, is_engine) + results = db.applied(sample, user, _id) else: - results = db.unapplied(sample, user, metadata_id, is_engine) + results = db.unapplied(sample, user, _id) return HttpResponse(json.dumps({'failed' : False, 'results' : results})) diff --git a/server/utilities/__init__.py b/server/utilities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/utilities/engine_shell.py b/server/utilities/engine_shell.py index bb68cf6..07fec89 100644 --- a/server/utilities/engine_shell.py +++ b/server/utilities/engine_shell.py @@ -31,9 +31,10 @@ # FIRST Modules import first.settings -from first.models import Engine, User -from first.engines import AbstractEngine -from first import DBManager, EngineManager +import first.wsgi +from first_core.models import Engine, User +from first_core.engines import AbstractEngine +from first_core import DBManager, EngineManager class EngineCmd(Cmd): @@ -95,7 +96,7 @@ def do_list(self, line): print 'No engines are currently installed' return - for engine in Engine.objects: + for engine in Engine.objects.all(): name = engine.name description = engine.description print '+{}+{}+'.format('-' * 18, '-' * 50) @@ -139,7 +140,7 @@ def do_install(self, line): try: path, obj_name, email = line.split(' ') - developer = User.objects(email=email).get() + developer = User.objects.get(email=email) __import__(path) module = sys.modules[path] @@ -160,9 +161,11 @@ def do_install(self, line): return e.install() - engine = Engine(name=e.name, description=e.description, path=path, - obj_name=obj_name, developer=developer, active=True) - engine.save() + engine = Engine.objects.create( name=e.name, + description=e.description, + path=path, + obj_name=obj_name, + developer=developer, active=True) print 'Engine added to FIRST' return @@ -286,7 +289,7 @@ def do_populate(self, line): print 'The below errors occured:\n{}'.format('\n '.join(errors)) def _get_db_engine_obj(self, name): - engine = Engine.objects(name=name) + engine = Engine.objects.filter(name=name) if not engine: print 'Unable to locate Engine "{}"'.format(name) return @@ -294,7 +297,7 @@ def _get_db_engine_obj(self, name): if len(engine) > 1: print 'More than one engine "{}" exists'.format(name) for e in engine: - print ' - {}'.format(e.name) + print ' - {}: {}'.format(e.name, e.description) return diff --git a/server/utilities/mongo_to_django_orm.py b/server/utilities/mongo_to_django_orm.py new file mode 100644 index 0000000..70150c9 --- /dev/null +++ b/server/utilities/mongo_to_django_orm.py @@ -0,0 +1,347 @@ +#! /usr/bin/python +#------------------------------------------------------------------------------- +# +# FIRST MongoDB to Django ORM Conversion Script +# Copyright (C) 2016 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Requirements +# ------------ +# mongoengine (https://pypi.python.org/pypi/mongoengine/) +# +# +#------------------------------------------------------------------------------- + +# Python Modules +import os +import sys +import time +import datetime +from getpass import getpass +from argparse import ArgumentParser + +# DEBUG +from pprint import pprint +import gc + +# Add app package to sys path +#sys.path.append(os.path.abspath('..')) +#os.environ['DJANGO_SETTINGS_MODULE'] = 'first.settings' + + +# FIRST Modules +import first_core.models as ORM + +# Third Party Modules +from bson import Binary +from bson.objectid import ObjectId +import mongoengine +from mongoengine import Document, StringField, UUIDField, \ + DateTimeField, LongField, ReferenceField, \ + BinaryField, ListField, BooleanField, ObjectIdField, \ + IntField, EmbeddedDocument, EmbeddedDocumentListField +from django.core.paginator import Paginator, EmptyPage + +def info(): + print 'INFO: {} {}'.format(len(gc.get_objects()), sum([sys.getsizeof(o) for o in gc.get_objects()])) + +def migrate_users(): + for u in User.objects.all(): + user, created = ORM.User.objects.get_or_create(**u.dump()) + +def migrate_engines(): + for e in Engine.objects.all(): + engine = e.dump() + engine['developer'] = ORM.User.objects.get(email=e.developer.email) + engine = ORM.Engine.objects.create(**engine) + +def migrate_samples(): + paginator = Paginator(Sample.objects.all(), 100) + for s in Sample.objects.all().exclude('functions').select_related(): + sample, created = ORM.Sample.objects.get_or_create(**s.dump()) + for u in s.seen_by: + sample.seen_by.add(ORM.User.objects.get(email=u.email)) + +def migrate_functions(skip, limit): + i = 0 + for f in Function.objects.skip(skip).limit(limit).select_related(3): + function, created = ORM.Function.objects.get_or_create(**f.dump()) + # Convert Functions + #migrate_function_for_sample(sample, s) + if created: + # Add APIs to function + migrate_apis(function, f) + + # Add to samples + for s in Sample.objects.only('md5', 'crc32').filter(functions=f.id): + ORM.Sample.objects.get(md5=s.md5, crc32=s.crc32).functions.add(function) + #sample = ORM.Sample.objects.get(md5=s.md5, crc32=s.crc32) + #sample.functions.add(function) + + # Add metadata assocaited with the function + migrate_metadata(function, f) + + i += 1 + if 0 == (i % 1000): + print '---{}---'.format(i) + info() + gc.collect() + info() + +def _mf(): + for i in xrange(0, Function.objects.count(), 1000): + print '--{}'.format(i) + migrate_functions(i, 1000) + #migfunc(Function.objects.exclude('metadata').all()[i:i+1000]) + + if i % 20000 == 0: + info() + gc.collect() + info() + +def migfunc(qs): + #info() + #functions = {} + for f in qs: + function, created = ORM.Function.objects.get_or_create(**f.dump()) + # Convert APIs + if created: + # Add APIs to function + migrate_apis(function, f) + + # Add to samples + for s in Sample.objects.only('md5', 'crc32').filter(functions=f.id): + sample = ORM.Sample.objects.get(md5=s.md5, crc32=s.crc32) + sample.functions.add(function) + + # Add metadata assocaited with the function + #migrate_metadata(function, f) + #gc.collect() + #info() + +def migrate_function_for_sample(sample, s): + print '{} - {}'.format(s.md5, len(s.functions)) + info() + for f in s.functions: + if type(f) != Function: + print 'Abandoned object: {}'.format(f) + continue + #info() + #pprint(f.dump()) + function, created = ORM.Function.objects.get_or_create(**f.dump()) + sample.functions.add(function) + + if created: + # Convert APIs + migrate_apis(function, f) + + # Convert Metadata + migrate_metadata(function, f, sample) + + + gc.collect() + info() + +def migrate_apis(function, f): + for a in f.apis: + api, _ = ORM.FunctionApis.objects.get_or_create(api=a) + function.apis.add(api) + + gc.collect() + +def migrate_metadata(function, f): + print 'Metadata: {} - {}'.format(f.sha256, len(f.metadata)) + for m in f.metadata: + creator = ORM.User.objects.get(email=m.user.email) + metadata = ORM.Metadata.objects.create(user=creator) + function.metadata.add(metadata) + + # Convert Metadata Details + for d in m.details(): + details = ORM.MetadataDetails.objects.create(**d) + metadata.details.add(details) + + # Convert Metadata Applied + for s_id, u_id in m.applied: + s_ = Sample.objects.only('md5', 'crc32').get(pk=s_id) + u = User.objects.only('email').get(pk=u_id) + sample_ = ORM.Sample.objects.get(md5=s_.md5, crc32=s_.crc32) + user_ = ORM.User.objects.get(email=u.email) + ORM.AppliedMetadata.objects.create(metadata=metadata, + user=user_, + sample=sample_) + +def main(args): + pass_prompt = 'Enter MongoDB password for {}: '.format(args.user) + mongoengine.connect(args.d, + host=args.host, + port=args.port, + user=args.user, + password=getpass(pass_prompt)) + # Convert User + migrate_users() + + # Convert Engine + migrate_engines() + + # Convert Samples + migrate_samples() + + + + +#------------------------------------------------------------------------------- +# MongoDB Models +# FIRST v0.0.1 +#------------------------------------------------------------------------------- +class User(Document): + name = StringField(max_length=128, required=True) + email = StringField(max_length=254, unique=True) + handle = StringField(max_length=32, required=True) + number = IntField(required=True) + api_key = UUIDField(required=True, unique=True) + created = DateTimeField(default=datetime.datetime.utcnow, required=True) + rank = LongField(default=0) + active = BooleanField(default=True) + + service = StringField(max_length=16, required=True) + auth_data = StringField(max_length=4096, required=True) + + meta = { + 'indexes' : [('handle', 'number'), 'api_key', 'email'] + } + + def dump(self): + return {'name' : self.name, + 'email' : self.email, + 'handle' : self.handle, + 'number' : self.number, + 'api_key' : self.api_key, + 'created' : self.created, + 'rank' : self.rank, + 'active' : self.active} + + +class Engine(Document): + name = StringField(max_length=16, required=True, unique=True) + description = StringField(max_length=128, required=True) + path = StringField(max_length=256, required=True) + obj_name = StringField(max_length=32, required=True) + applied = ListField(default=list) + developer = ReferenceField(User) + active = BooleanField(default=False) + + meta = { + 'indexes' : ['name'] + } + + def dump(self): + return {'name' : self.name, + 'description' : self.description, + 'path' : self.path, + 'obj_name' : self.obj_name, + 'developer' : self.developer, + 'active' : self.active} + + +class Metadata(EmbeddedDocument): + id = ObjectIdField(required=True, default=lambda: ObjectId()) + user = ReferenceField(User) + name = ListField(StringField(max_length=128), default=list) + prototype = ListField(StringField(max_length=256), default=list) + comment = ListField(StringField(max_length=512), default=list) + committed = ListField(DateTimeField(), default=list) + applied = ListField(default=list) + + meta = { + 'indexes' : ['user'] + } + + def details(self): + return [{'committed' : self.committed[i], + 'name' : self.name[i], + 'prototype' : self.prototype[i], + 'comment' : self.comment[i]} for i in xrange(len(self.name))] + + +# Use bson.Binary to insert binary data +class Function(Document): + sha256 = StringField(max_length=64) + opcodes = BinaryField() + apis = ListField(StringField(max_length=128), default=list) + metadata = EmbeddedDocumentListField(Metadata, default=list) + architecture = StringField(max_length=64, required=True) + + meta = { + 'indexes' : [] + } + + def dump(self): + return {'opcodes' : Binary(self.opcodes), + 'architecture' : self.architecture, + 'sha256' : self.sha256} + + +class Sample(Document): + md5 = StringField(max_length=32, required=True) + crc32 = IntField(required=True) + sha1 = StringField(max_length=40) + sha256 = StringField(max_length=64) + seen_by = ListField(ReferenceField(User), default=list) + functions = ListField(ReferenceField(Function), default=list) + last_seen = DateTimeField(default=datetime.datetime.utcnow) + + meta = { + 'indexes' : [('md5', 'crc32')] + } + + def dump(self): + data = {'md5' : self.md5, 'crc32' : self.crc32} + + if hasattr(self, 'sha1'): + data['sha1'] = self.sha1 + + if hasattr(self, 'sha256'): + data['sha256'] = self.sha256 + + return data + +if __name__ == '__main__': + parser = ArgumentParser(('FIRST Mongo to Django ORM Conversion Script\n' + 'This script should be used to convert FIRST v0.0.1 to FIRST v0.1.0\n' + )) + + # Arguments + parser.add_argument('--mongo-host', '-h', help='The MongoDB host') + parser.add_argument('--mongo-port', '-p', help='The MongoDB port', type=int) + parser.add_argument('--mongo-user', '-u', help='The MongoDB user') + parser.add_argument('--mongo-db', '-d', help='The MongoDB db name') + +# TODO: remove +mongoengine.connect('beta') +print ' + Adding Users' +start = time.time() +migrate_users() +print '[+] Users Added ({} s)'.format(time.time() - start) +print ' + Adding Samples' +start = time.time() +migrate_samples() +print '[+] Adding Samples ({} s)'.format(time.time() - start) +print ' + Adding Functions & Metadata' +start = time.time() +#migrate_functions() +_mf() +print '[+] Adding Functions & Metadata ({} s)'.format(time.time() - start) diff --git a/server/utilities/populate_engine.py b/server/utilities/populate_engine.py index b26cc2b..0417b4d 100644 --- a/server/utilities/populate_engine.py +++ b/server/utilities/populate_engine.py @@ -27,7 +27,7 @@ from argparse import ArgumentParser # FIRST Modules -from ..app.first import EngineManager, DBManager +from first_core import EngineManager, DBManager def main(): global total, completed, operation_complete diff --git a/server/www/migrations/0001_initial.py b/server/www/migrations/0001_initial.py new file mode 100644 index 0000000..c272eed --- /dev/null +++ b/server/www/migrations/0001_initial.py @@ -0,0 +1,204 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2017-08-25 16:11 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='AppliedMetadata', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ], + options={ + 'db_table': 'AppliedMetadata', + }, + ), + migrations.CreateModel( + name='Engine', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=16, unique=True)), + ('description', models.CharField(max_length=128)), + ('path', models.CharField(max_length=256)), + ('obj_name', models.CharField(max_length=32)), + ('active', models.BooleanField(default=False)), + ], + options={ + 'db_table': 'Engine', + }, + ), + migrations.CreateModel( + name='Function', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('sha256', models.CharField(max_length=64)), + ('opcodes', models.BinaryField()), + ('architecture', models.CharField(max_length=64)), + ], + options={ + 'db_table': 'Function', + }, + ), + migrations.CreateModel( + name='FunctionApis', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('api', models.CharField(max_length=128, unique=True)), + ], + options={ + 'db_table': 'FunctionApis', + }, + ), + migrations.CreateModel( + name='Metadata', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ], + options={ + 'db_table': 'Metadata', + }, + ), + migrations.CreateModel( + name='MetadataDetails', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=256)), + ('prototype', models.CharField(max_length=256)), + ('comment', models.CharField(max_length=512)), + ('committed', models.DateTimeField(default=django.utils.timezone.now)), + ], + options={ + 'db_table': 'MetadataDetails', + }, + ), + migrations.CreateModel( + name='Sample', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('md5', models.CharField(max_length=32)), + ('crc32', models.BigIntegerField()), + ('sha1', models.CharField(blank=True, max_length=40, null=True)), + ('sha256', models.CharField(blank=True, max_length=64, null=True)), + ('last_seen', models.DateTimeField(blank=True, default=django.utils.timezone.now)), + ], + options={ + 'db_table': 'Sample', + }, + ), + migrations.CreateModel( + name='User', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('name', models.CharField(max_length=128)), + ('email', models.CharField(max_length=254)), + ('handle', models.CharField(max_length=32)), + ('number', models.IntegerField()), + ('api_key', models.UUIDField(unique=True)), + ('created', models.DateTimeField(default=django.utils.timezone.now)), + ('rank', models.BigIntegerField(default=0)), + ('active', models.BooleanField(default=True)), + ('service', models.CharField(max_length=16)), + ('auth_data', models.CharField(max_length=4096)), + ], + options={ + 'db_table': 'User', + }, + ), + migrations.AddIndex( + model_name='user', + index=models.Index(fields=['email'], name='User_email_ffa2e0_idx'), + ), + migrations.AddIndex( + model_name='user', + index=models.Index(fields=['api_key'], name='User_api_key_c4f2d6_idx'), + ), + migrations.AlterIndexTogether( + name='user', + index_together=set([('handle', 'number')]), + ), + migrations.AddField( + model_name='sample', + name='functions', + field=models.ManyToManyField(to='www.Function'), + ), + migrations.AddField( + model_name='sample', + name='seen_by', + field=models.ManyToManyField(to='www.User'), + ), + migrations.AddField( + model_name='metadata', + name='details', + field=models.ManyToManyField(to='www.MetadataDetails'), + ), + migrations.AddField( + model_name='metadata', + name='user', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.User'), + ), + migrations.AddField( + model_name='function', + name='apis', + field=models.ManyToManyField(to='www.FunctionApis'), + ), + migrations.AddField( + model_name='function', + name='metadata', + field=models.ManyToManyField(to='www.Metadata'), + ), + migrations.AddField( + model_name='engine', + name='developer', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.User'), + ), + migrations.AddField( + model_name='appliedmetadata', + name='metadata', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.Metadata'), + ), + migrations.AddField( + model_name='appliedmetadata', + name='sample', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.Sample'), + ), + migrations.AddField( + model_name='appliedmetadata', + name='user', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.User'), + ), + migrations.AlterUniqueTogether( + name='sample', + unique_together=set([('md5', 'crc32')]), + ), + migrations.AlterIndexTogether( + name='sample', + index_together=set([('md5', 'crc32')]), + ), + migrations.AddIndex( + model_name='metadata', + index=models.Index(fields=['user'], name='Metadata_user_id_aea908_idx'), + ), + migrations.AlterUniqueTogether( + name='function', + unique_together=set([('sha256', 'architecture')]), + ), + migrations.AddIndex( + model_name='engine', + index=models.Index(fields=['name'], name='Engine_name_14ac74_idx'), + ), + migrations.AlterUniqueTogether( + name='appliedmetadata', + unique_together=set([('metadata', 'sample', 'user')]), + ), + ] diff --git a/server/www/models.py b/server/www/models.py index 63e25b1..b5b244e 100644 --- a/server/www/models.py +++ b/server/www/models.py @@ -22,21 +22,21 @@ # Python Modules from __future__ import unicode_literals -import datetime # Third Party Modules from django.db import models from django.utils import timezone -from django.db.transaction import commit class User(models.Model): + id = models.BigAutoField(primary_key=True) + name = models.CharField(max_length=128) email = models.CharField(max_length=254) handle = models.CharField(max_length=32) number = models.IntegerField() api_key = models.UUIDField(unique=True) - created = models.DateTimeField(default=timezone.now, auto_now_add=True) + created = models.DateTimeField(default=timezone.now) rank = models.BigIntegerField(default=0) active = models.BooleanField(default=True) @@ -61,6 +61,7 @@ def dump(self, full=False): return data class Meta: + db_table = 'User' indexes = [ models.Index(fields=['email']), models.Index(fields=['api_key']), @@ -68,178 +69,167 @@ class Meta: index_together = ("handle", "number") - class Engine(models.Model): name = models.CharField(max_length=16, unique=True) description = models.CharField(max_length=128) path = models.CharField(max_length=256) obj_name = models.CharField(max_length=32) - developer = models.ForeignKey(User) + developer = models.ForeignKey('User') active = models.BooleanField(default=False) - class Meta: - indexes = [ - models.Index(fields=['name']), - ] + @property + def rank(self): + # TODO: Complete + #return len(self.applied) + return 0 def dump(self, full=False): data = {'name' : self.name, 'description' : self.description, 'rank' : self.rank, - 'developer' : Engine.objects.filter(engine_id = self.id)} + 'developer' : self.developer.user_handle} if full: data.update({'path' : self.path}) return data - @property - def rank(self): - return len(self.applied) + class Meta: + db_table = 'Engine' + indexes = [ + models.Index(fields=['name']), + ] -class AppliedEngine(models.Model): - engine_id = models.ForeignKey(Engine) - sample_id = models.OneToOneField('Sample') - user_id = models.OneToOneField(User) - engine_metadata_id = models.BigIntegerField(); + +# TODO: Create scheme for tracking applied metadata for engines +# +#class AppliedEngine(models.Model): +# engine_id = models.ForeignKey(Engine) +# sample_id = models.ForeignKey(Sample) +# user_id = models.ForeignKey(User) +# engine_metadata_id = models.BigIntegerField(); +# +# class Meta: +# db_table = 'AppliedEngine' +# unique_together = ("sample_id", "user_id", "engine_metadata_id") + +class AppliedMetadata(models.Model): + metadata = models.ForeignKey('Metadata') + sample = models.ForeignKey('Sample') + user = models.ForeignKey('User') class Meta: - unique_together = ("sample_id", "user_id", "engine_metadata_id") + db_table = 'AppliedMetadata' + unique_together = ("metadata", "sample", "user") -class Metadata(models.Model): - user = models.OneToOneField(User ) + +class MetadataDetails(models.Model): + name = models.CharField(max_length=256) + prototype = models.CharField(max_length=256) + comment = models.CharField(max_length=512) + committed = models.DateTimeField(default=timezone.now) class Meta: - indexes = [ - models.Index(fields=['user']), - ] + db_table = 'MetadataDetails' - def dump(self, full=False): - data = {'creator' : User.objects.filter(metadata_id = self.id), - 'name' : MetaDataName.objects.filter(metadata_id = self.id).first(), - 'prototype' : MetaDataPrototype.objects.filter(metadata_id = self.id).first(), - 'comment' : MetaDataComment.objects.filter(metadata_id = self.id).first(), - 'rank' : len(self.applied)} - if full: - data['history'] = [] - name = MetaDataName.objects.filter(metadata_id = self.id) - committed = MetaDataCommited.objects.filter(metadata_id = self.id) - prototype = MetaDataPrototype.objects.filter(metadata_id = self.id); - comment = MetaDataComment.objects.filter(metadata_id = self.id); - - for i in xrange(len(name) - 1, -1, -1): - # Convert back with: - # datetime.datetime.strptime(
, '%Y-%m-%dT%H:%M:%S.%f') - commit = committed[i].isoformat() - data['history'].append({'name' : name[i], - 'prototype' : prototype[i], - 'comment' : comment[i], - 'committed' : commit}) +class Metadata(models.Model): + id = models.BigAutoField(primary_key=True) - return data + user = models.ForeignKey('User') + details = models.ManyToManyField('MetadataDetails') + + @property + def rank(self): + if hasattr(self, 'id'): + return AppliedMetadata.objects.filter(metadata=self.id).count() + + return 0 def has_changed(self, name, prototype, comment): - if (not self.name) or (not self.prototype) or (not comment): + if not self.details.exists(): return True - actualName = MetaDataName.objects.filter(metadata_id = self.id).first() - actualPrototype = MetaDataPrototype.objects.filter(metadata_id = self.id).first() - actualComment = MetaDataComment.objects.filter(metadata_id = self.id).first(); - - if ((actualName.name != name) - or (actualPrototype.prototype != prototype) - or (actualComment.comment != comment)): + latest = self.details.latest('committed') + if ((latest.name != name) + or (latest.prototype != prototype) + or (latest.comment != comment)): return True return False - @property - def rank(self): - return len(self.applied) + def dump(self, full=False): + data = {'creator' : self.user.user_handle} + latest_details = self.details.latest('committed') + data.update({ + 'name' : latest_details.name, + 'prototype' : latest_details.prototype, + 'comment' : latest_details.comment, + 'rank' : self.rank + }) + + if full: + # Convert committed time back with: + # datetime.datetime.strptime(
, '%Y-%m-%dT%H:%M:%S.%f') + data['history'] = [{'name' : d.name, + 'prototype' : d.prototype, + 'comment' : d.comment, + 'committed' : d.committed.isoformat()} + for d in self.details.order_by('committed')] + + return data -class AppliedMetaData(models.Model): - metadata_id = models.ForeignKey(Engine) - sample_id = models.OneToOneField('Sample') - user_id = models.OneToOneField(User) - engine_metadata_id = models.BigIntegerField(); class Meta: - unique_together = ("metadata_id", "sample_id", "user_id") + db_table = 'Metadata' + indexes = [models.Index(fields=['user'])] -class MetaDataName(models.Model): - name = models.CharField(max_length=128) - models.ForeignKey(Metadata) -class MetaDataPrototype(models.Model): - prototype = models.CharField(max_length=256) - models.ForeignKey(Metadata) +class FunctionApis(models.Model): + api = models.CharField(max_length=128, unique=True) -class MetaDataComment(models.Model): - comment = models.CharField(max_length=128) - models.ForeignKey(Metadata) + class Meta: + db_table = 'FunctionApis' -class MetaDataCommited(models.Model): - committed = models.DateTimeField(default=datetime.datetime.utcnow, blank=True) - models.ForeignKey(Metadata) class Function(models.Model): + id = models.BigAutoField(primary_key=True) + sha256 = models.CharField(max_length=64) - opcodes = models.BinaryField - metadata = models.ForeignKey(Metadata) - mnemonic_hash = models.ForeignKey('MnemonicHash') - # Return value from idaapi.get_file_type_name() + opcodes = models.BinaryField() + apis = models.ManyToManyField('FunctionApis') + metadata = models.ManyToManyField('Metadata') architecture = models.CharField(max_length=64) def dump(self): - return {'id' : self.id, - 'opcodes' : self.opcodes, - 'apis' : self.functionapis_set.all(), - 'metadata' : [str(x.api) for x in Metadata.objects.filter(function_id = self.id)], + return {'opcodes' : self.opcodes, 'architecture' : self.architecture, 'sha256' : self.sha256} -class FunctionApis(models.Model): - api = models.CharField(max_length=64) - models.ForeignKey(Function) + class Meta: + db_table = 'Function' + unique_together = ('sha256', 'architecture') class Sample(models.Model): + id = models.BigAutoField(primary_key=True) + md5 = models.CharField(max_length=32) - crc32 = models.IntegerField() - sha1 = models.CharField(max_length=40) - sha256 = models.CharField(max_length=64) - seen_by = models.ManyToManyField( User, blank=True) - functions = models.ManyToManyField( Function, blank=True) - last_seen = models.DateTimeField(default=datetime.datetime.utcnow, blank=True) + crc32 = models.BigIntegerField() + sha1 = models.CharField(max_length=40, null=True, blank=True) + sha256 = models.CharField(max_length=64, null=True, blank=True) + seen_by = models.ManyToManyField('User') + functions = models.ManyToManyField('Function') + last_seen = models.DateTimeField(default=timezone.now, blank=True) class Meta: + db_table = 'Sample' index_together = ['md5', 'crc32'] + unique_together = ('md5', 'crc32') def dump(self): - data = {'md5' : self.md5, 'crc32' : self.crc32, - 'seen_by' : [str(x.id) for x in User.objects.filter(sample_id = self.id)], - 'functions' : [str(x.id) for x in Function.objects.filter(sample_id = self.id)]} - - if 'sha1' in self: - data['sha1'] = self.sha1 - - if 'sha256' in self: - data['sha256'] = self.sha256 - - return data - -class MnemonicHash(models.Model): - sha256 = models.CharField(max_length=64) - architecture = models.CharField(max_length=64) - - class Meta: - index_together = ('sha256', 'architecture') - - def dump(self): - return {'sha256' : self.sha256, - 'architecture' : self.architecture, - 'functions' : self.function_set.all()} - - def function_list(self): - return [str(x) for x in Function.objects.filter(MnemonicHash_id = self.id)] + return {'md5' : self.md5, 'crc32' : self.crc32, + 'seen_by' : [str(x.id) for x in self.seen_by.all()], + 'functions' : [str(x.id) for x in self.functions.all()], + 'sha1' : self.sha1, + 'sha256' : self.sha256} diff --git a/server/www/views.py b/server/www/views.py index 289e9b6..6fb7458 100644 --- a/server/www/views.py +++ b/server/www/views.py @@ -9,8 +9,8 @@ # FIRST Modules -from www.models import Function, User -from first.auth import Authentication, require_login, FIRSTAuthError +from first_core.models import Function, User +from first_core.auth import Authentication, require_login, FIRSTAuthError def handler404(request): return render(request, 'www/404.html', None)