diff --git a/.gitignore b/.gitignore index ba74660..b72cd4d 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,4 @@ docs/_build/ # PyBuilder target/ +server/first_config.json diff --git a/docs/conf.py b/docs/conf.py index 05ddb4f..ce9f27d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -41,9 +41,8 @@ # Autodoc settings autoclass_content = 'class' -autodoc_mock_imports = ['httplib2', 'oauth2client', 'apiclient', 'mongoengine', - 'django', 'bson', 'distorm3', 'mongoengine.queryset', - 'bson.objectid'] +autodoc_mock_imports = ['httplib2', 'oauth2client', 'apiclient', + 'django', 'capstone'] class _MyMockModule(sphinx.ext.autodoc._MockModule): '''Class created to get around autodoc issues with server's dependencies.''' diff --git a/docs/engines/index.rst b/docs/engines/index.rst index 666f868..819a4eb 100644 --- a/docs/engines/index.rst +++ b/docs/engines/index.rst @@ -8,12 +8,29 @@ Engines Engine Shell ============ +The Python script ``engine_shell.py`` provides you with some functionality to manage engines installed into FIRST. Below you will see the script's operations. + +.. code:: + + +========================================================+ + | FIRST Engine Shell Menu | + +========================================================+ + | list | List all engines currently installed | + | info | Get info on an engine | + | install | Installs engine | + | delete | Removes engine record but not other DB data | + | enable | Enable engine (Engine will be enabled) | + | populate | Sending all functions to engine | + | disable | Disable engine (Engine will be disabled) | + +--------------------------------------------------------+ + + Testing Engines =============== TODO -.. autoclass:: first.engines.AbstractEngine +.. autoclass:: first_core.engines.AbstractEngine :noindex: :members: :undoc-members: diff --git a/docs/index.rst b/docs/index.rst index 1f18595..5c531e0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -17,9 +17,16 @@ Installing your own FIRST server can be quick and easy with an Ubuntu machine an **After cloning the Git repo** - Save your google auth json information to install/google_secret.json + Save your google auth json information to install/google_secret.json. To generate a google_secret.json file you will need to go to https://console.developers.google.com, create a project, select the project, select Credentials in the left set of links under APIs & services. Once selected, select the Create credentials drop down menu and click OAuth client ID. Select Web application, and fill out the details. Set the Authorized redirect URIs to your server name with `/oauth/google` - Optionally, you can add install/ssl/apache.crt and apache.key file if you have an SSL certificate you would prefer to use. + Examples + + .. code:: + + http://localhost:8888/oauth/google + http://first.talosintelligence.com/oauth/google + + Once created you will have the option to down the JSON file containing the generated secret. Optionally, you can add install/ssl/apache.crt and apache.key file if you have an SSL certificate you would prefer to use. .. code:: @@ -32,16 +39,23 @@ When the FIRST server is installed, no engines are installed. FIRST comes with t .. note:: - Before engines can be installed, the developer must be registered with the system. Ensure the developer is registered before progressing. + Before engines can be installed, the developer must be registered with the system. This can be accomplished through the web UI if OAuth has been setup or manually by the user_shell.py located in the utilities folder. + + .. code:: + + $ cd FIRST-server/server/utilities + $ python user_shell.py adduser + + Ensure the developer is registered before progressing. Python script ``engine_shell.py`` can be provided with command line arguments or used as a shell. To quickly install the three available engines run the below commands: .. code:: $ cd FIRST-server/server/utilities - $ python engine_shell.py install first.engines.exact_match ExactMatchEngine - $ python engine_shell.py install first.engines.mnemonic_hash MnemonicHashEngine - $ python engine_shell.py install first.engines.basic_masking BasicMaskingEngine + $ python engine_shell.py install first_core.engines.exact_match ExactMatchEngine + $ python engine_shell.py install first_core.engines.mnemonic_hash MnemonicHashEngine + $ python engine_shell.py install first_core.engines.basic_masking BasicMaskingEngine Once an engine is installed you can start using your FIRST installation to add and/or query for annotations. Without engines FIRST will still be able to store annotations, but will never return any results for query operations. diff --git a/install/requirements.txt b/install/requirements.txt index 3bb0125..0831050 100644 --- a/install/requirements.txt +++ b/install/requirements.txt @@ -1,8 +1,7 @@ mysqlclient -mongoengine django werkzeug -distorm3 +capstone httplib2 oauth2client google-api-python-client diff --git a/server/engines/__init__.py b/server/engines/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/engines/admin.py b/server/engines/admin.py new file mode 100644 index 0000000..13be29d --- /dev/null +++ b/server/engines/admin.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.contrib import admin + +# Register your models here. diff --git a/server/engines/apps.py b/server/engines/apps.py new file mode 100644 index 0000000..fa6398b --- /dev/null +++ b/server/engines/apps.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.apps import AppConfig + + +class EnginesConfig(AppConfig): + name = 'engines' diff --git a/server/engines/migrations/__init__.py b/server/engines/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/engines/models.py b/server/engines/models.py new file mode 100644 index 0000000..1dfab76 --- /dev/null +++ b/server/engines/models.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models + +# Create your models here. diff --git a/server/engines/tests.py b/server/engines/tests.py new file mode 100644 index 0000000..5982e6b --- /dev/null +++ b/server/engines/tests.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.test import TestCase + +# Create your tests here. diff --git a/server/engines/views.py b/server/engines/views.py new file mode 100644 index 0000000..e784a0b --- /dev/null +++ b/server/engines/views.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.shortcuts import render + +# Create your views here. diff --git a/server/example_config.json b/server/example_config.json new file mode 100644 index 0000000..43ed855 --- /dev/null +++ b/server/example_config.json @@ -0,0 +1,15 @@ +{ + "secret_key" : "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", + + "db_engine" : "django.db.backends.mysql", + "db_dbname" : "first", + "db_user" : "user", + "db_password" : "pass", + "db_host" : "localhost", + "db_port" : 3306, + + "debug" : true, + "allowed_hosts" : ["localhost", "testserver"], + + "oauth_path" : "", +} diff --git a/server/first/__init__.py b/server/first/__init__.py index 80bd52a..e69de29 100644 --- a/server/first/__init__.py +++ b/server/first/__init__.py @@ -1,27 +0,0 @@ -#------------------------------------------------------------------------------- -# -# Intializes FIRST's DBManager and EngineManager -# Copyright (C) 2016 Angel M. Villegas -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -#------------------------------------------------------------------------------- - -# FIRST Modules -from first.dbs import FIRSTDBManager -from first.engines import FIRSTEngineManager - -DBManager = FIRSTDBManager() -EngineManager = FIRSTEngineManager(DBManager) diff --git a/server/first/dbs/builtin_db.py b/server/first/dbs/builtin_db.py deleted file mode 100644 index 5f4437d..0000000 --- a/server/first/dbs/builtin_db.py +++ /dev/null @@ -1,466 +0,0 @@ -#------------------------------------------------------------------------------- -# -# FIRST DB Module for completing operations with the MongoDB backend -# Copyright (C) 2016 Angel M. Villegas -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Requirements -# ------------ -# - flask -# - mongoengine -# - werkzeug -# -#------------------------------------------------------------------------------- - -# Python Modules -import re -import math -import json -import hashlib -import datetime -import ConfigParser -from hashlib import md5 - -# Third Party Modules -import bson -from mongoengine import Q -from mongoengine.queryset import DoesNotExist, MultipleObjectsReturned - -# FIRST Modules -from first.dbs import AbstractDB -from first.models import User, Metadata, Function, Sample, Engine - - -class FIRSTDB(AbstractDB): - _name = 'first_db' - standards = { 'intel16', 'intel32', 'intel64', 'arm32', 'arm64', 'mips', - 'ppc', 'sparc', 'sysz'} - - # - # Functions called by FIRST Framework - #-------------------------------------------------------------------------- - def __init__(self, config): - ''' - Constructor. - - @param conf: ConfigParser.RawConfigParser - ''' - self._is_installed = True - ''' - section = 'mongodb_settings' - - if (not config.has_section(section) - or not config.has_option(section, 'db')): - raise FirstDBError('DB settings not available', skip=True) - - if section.upper() not in app.config: - app.config[section.upper()] = {} - - app.config[section.upper()]['db'] = conf.get(section, 'db') - self.db.init_app(app) - ''' - - def get_architectures(self): - standards = FIRSTDB.standards.copy() - standards.update(Function.objects().distinct(field='architecture')) - return list(standards) - - def get_sample(self, md5_hash, crc32, create=False): - try: - # Get Sample from DB - return Sample.objects.get(md5=md5_hash, crc32=crc32) - - except DoesNotExist: - if not create: - return None - - # Create Sample for DB - sample = Sample(md5=md5_hash, crc32=crc32) - sample.last_seen = datetime.datetime.now() - sample.save() - return sample - - def sample_seen_by_user(self, sample, user): - if (not isinstance(sample, Sample)) or (not isinstance(user, User)): - return None - - if user not in sample.seen_by: - sample.seen_by.append(user) - sample.save() - - def checkin(self, user, md5_hash, crc32, sha1_hash=None, sha256_hash=None): - ''' - TODO: - - @returns String error message on Failure - None - ''' - if not isinstance(user, User): - return False - - # Validate data - if ((not re.match('^[a-f\d]{32}$', md5_hash)) - or (sha1_hash and not re.match('^[a-f\d]{40}$', sha1_hash)) - or (sha256_hash and not re.match('^[a-f\d]{64}$', sha256_hash))): - return False - - sample = self.get_sample(md5_hash, crc32, True) - if not sample: - return False - - sample.last_seen = datetime.datetime.now() - if user not in sample.seen_by: - sample.seen_by.append(user) - - if None != sha1_hash: - sample.sha1 = sha1_hash - - if None != sha256_hash: - sample.sha256 = sha256_hash - - sample.save() - return True - - def get_function(self, opcodes, architecture, apis, create=False, **kwargs): - function = None - - try: - function = Function.objects.get(sha256=hashlib.sha256(opcodes).hexdigest(), - opcodes=bson.Binary(opcodes), - architecture=architecture, - apis=apis) - except DoesNotExist: - # Create function and add it to sample - function = Function(sha256=hashlib.sha256(opcodes).hexdigest(), - opcodes=bson.Binary(opcodes), - architecture=architecture, - apis=apis) - function.save() - - return function - - def get_all_functions(self): - try: - return Function.objects.all() - - except: - return [] - - def find_function(self, _id=None, opcodes=None, apis=None, architecture=None, h_sha256=None): - try: - # User function ID - if None != _id: - return Function.objects(id=bson.objectid.ObjectId(_id)).get() - - # User opcodes and apis - elif None not in [opcodes, apis]: - return Function.objects(opcodes=opcodes, apis=apis).get() - - # Use hash, architecture - elif None not in [architecture, h_sha256]: - return Function.objects(sha256=h_sha256, architecture=architecture).get() - - else: - return None - - except DoesNotExist: - return None - - def add_function_to_sample(self, sample, function): - if (not isinstance(sample, Sample)) or (not isinstance(function, Function)): - return False - - if function not in sample.functions: - sample.functions.append(function) - sample.save() - - return True - - def add_metadata_to_function(self, user, function, name, prototype, comment, **kwargs): - if (not isinstance(function, Function)) or (not isinstance(user, User)): - return None - - # Check to see if user already has metadata associated with the sample - metadata = None - for m in function.metadata: - if user == m.user: - if m.has_changed(name, prototype, comment): - m.name = [name] + m.name - m.prototype = [prototype] + m.prototype - m.comment = [comment] + m.comment - m.committed = [datetime.datetime.now()] + m.committed - - metadata = m - break - - if not metadata: - metadata = Metadata(user=user, name=[name], - prototype=[prototype], - comment=[comment], - committed=[datetime.datetime.now()]) - function.metadata.append(metadata) - - function.save() - return str(metadata.id) - - def get_metadata_list(self, metadata): - results = [] - user_metadata, engine_metadata = self.separate_metadata(metadata) - - metadata_ids = map(bson.objectid.ObjectId, user_metadata) - mongo_filter = Q(metadata__id=metadata_ids[0]) - for mid in metadata_ids[1:]: - mongo_filter |= Q(metadata__id=mid) - - matches = Function.objects.filter(mongo_filter).only('metadata') - for function in matches: - for metadata in function.metadata: - if metadata.id in metadata_ids: - data = metadata.dump() - data['id'] = str(metadata.id) - results.append(data) - - # Remove id from list to shorten list - del metadata_ids[metadata_ids.index(metadata.id)] - - for _id in engine_metadata: - engines = Engine.object(id=_id) - if (not engines) or (len(engines) > 1): - continue - - data = {'id' : _id, 'engine' : engine.name, - 'description' : engine.description} - results.append(data) - - return results - - def delete_metadata(self, user, metadata_id): - if not isinstance(user, User): - return False - - user_metadata, engine_metadata = self.separate_metadata([metadata_id]) - if not user_metadata: - return False - - # User must be the creator of the metadata to delete it - metadata_id = bson.objectid.ObjectId(user_metadata[0]) - try: - Function.objects(metadata__user=user, metadata__id=metadata_id).update_one(pull__metadata__id=metadata_id) - return True - except DoesNotExist: - return False - - def created(self, user, page, max_metadata=20): - offset = (page - 1) * max_metadata - results = [] - pages = 0 - - if (offset < 0) or (not isinstance(user, User)): - return (results, pages) - - try: - matches = Function.objects(metadata__user=user).only('metadata') - total = Function.objects(metadata__user=user).count() + 0.0 - pages = int(math.ceil(total / max_metadata)) - if page > pages: - return (results, pages) - - matches = matches.skip(offset).limit(max_metadata) - - except ValueError: - return (results, pages) - - for function in matches: - for metadata in function.metadata: - if user == metadata.user: - temp = metadata.dump() - temp['id'] = FIRSTDB.make_id(metadata.id, 0) - results.append(temp) - - # Bail out of inner loop early since a user can only - # create one metadata entry per function - break - - return (results, pages) - - @staticmethod - def make_id(_id, flags): - return '{:1x}{}'.format(flags & 0xF, _id) - - def separate_metadata(self, metadata): - # Get metadata created by users only, MSB should not be set - user_metadata = [] - engine_metadata = [] - for x in metadata: - if len(x) == 24: - user_metadata.append(x) - elif (len(x) == 25) and (((int(x[0], 16) >> 3) & 1) == 0): - user_metadata.append(x[1:]) - elif (len(x) == 25) and (((int(x[0], 16) >> 3) & 1) == 1): - engine_metadata.append(x[1:]) - - return (user_metadata, engine_metadata) - - def metadata_history(self, metadata): - results = {} - user_metadata, engine_metadata = self.separate_metadata(metadata) - e_comment = ('Generated by Engine: {0.name}\n{0.description}\n\n' - 'Developer: {0.developer.user_handle}') - - if len(user_metadata) > 0: - metadata_ids = map(bson.objectid.ObjectId, user_metadata) - mongo_filter = Q(metadata__id=metadata_ids[0]) - for mid in metadata_ids[1:]: - mongo_filter |= Q(metadata__id=mid) - - matches = Function.objects.filter(mongo_filter).only('metadata') - for function in matches: - for metadata in function.metadata: - if metadata.id in metadata_ids: - data = metadata.dump(True) - _id = FIRSTDB.make_id(metadata.id, 0) - results[_id] = {'creator' : data['creator'], - 'history' : data['history']} - # Remove id from list to shorten list - del metadata_ids[metadata_ids.index(metadata.id)] - - # Provide information for engine created metadata... - for engine_id in engine_metadata: - engine = self.get_engine(engine_id) - if not engine: - continue - data = {'creator' : engine.name, - 'history' : [{'committed' : '', - 'name' : 'N/A', - 'prototype' : 'N/A', - 'comment' : e_comment.format(engine)}]} - results[FIRSTDB.make_id(engine_id, 8)] = data - - return results - - def applied(self, sample, user, _id, is_engine=False): - ''' - @returns Boolean. True if added to the applied list - False if not added to the applied list - ''' - if (not isinstance(user, User)) or (not isinstance(sample, Sample)): - return False - - key = [str(sample.id), str(user.id)] - if is_engine: - engine_id = bson.objectid.ObjectId(_id) - engine = Engine.objects(id=engine_id, - applied__contains=key) - - # Check if user has already applied the signature - if len(engine): - return True - - try: - engine = Engine.objects(id=engine_id).get() - except DoesNotExist: - # Engine does not exist - return False - - engine.applied.append(key) - engine.save() - - else: - metadata_id = bson.objectid.ObjectId(_id) - functions = Function.objects(metadata__id=metadata_id, - metadata__applied__contains=key) - - # Check if user has already applied the signature - if len(functions): - return True - - try: - function = Function.objects(metadata__id=metadata_id).get() - except DoesNotExist: - # Metadata does not exist - return False - - # Get metadata - for metadata in function.metadata: - if metadata.id == metadata_id: - metadata.applied.append(key) - break - - function.save() - - return True - - def unapplied(self, sample, user, _id, is_engine=False): - ''' - @returns Boolean. True if not in metadata's applied list - False if still in the applied list - ''' - if (not isinstance(sample, Sample)) or (not isinstance(user, User)): - return False - - key = [str(sample.id), str(user.id)] - if is_engine: - engine_id = bson.objectid.ObjectId(_id) - engine = Engine.objects(id=engine_id, - applied__contains=key) - - # Check if user has already applied the signature - if not len(engine): - return True - - try: - engine = Engine.objects(id=engine_id).get() - except DoesNotExist: - # Engine does not exist - return False - - engine.applied.remove(key) - engine.save() - - else: - metadata_id = bson.objectid.ObjectId(_id) - functions = Function.objects(metadata__id=metadata_id, - metadata__applied__contains=key) - - # Check if user does not have it applied already - if not len(functions): - return True - - try: - function = functions.get() - except DoesNotExist: - # Metadata does not exist - return True - - # Get metadata - for metadata in function.metadata: - if metadata.id == metadata_id: - metadata.applied.remove(key) - break - - function.save() - - return True - - def engines(self, active=True): - return Engine.objects(active=bool(active)) - - def get_engine(self, engine_id): - engines = Engine.objects(id = engine_id) - if not engines: - return None - - return engines[0] diff --git a/server/first/engines/basic_masking.py b/server/first/engines/basic_masking.py deleted file mode 100644 index c3ead1f..0000000 --- a/server/first/engines/basic_masking.py +++ /dev/null @@ -1,222 +0,0 @@ -#------------------------------------------------------------------------------- -# -# FIRST Engine: Basic Masking -# Author: Angel M. Villegas (anvilleg@cisco.com) -# Last Modified: March 2016 -# -# Uses Distorm3 to obtain instructions and then removes certain instruction -# details to normalize it into a standard form to be compared to other -# functions. -# -# Maskes out: -# - ESP/EBP Offsets -# - Absolute Calls?? -# - Global Offsets?? -# -# Requirements -# ------------ -# - Distorm3 -# -# Installation -# ------------ -# None -# -#------------------------------------------------------------------------------- - -# Python Modules -import re -from hashlib import sha256 - -# FIRST Modules -from first.error import FIRSTError -from first.engines import AbstractEngine -from first.engines.results import FunctionResult - -# Third Party Modules -from bson.objectid import ObjectId -from distorm3 import DecomposeGenerator, Decode32Bits, Decode64Bits, Decode16Bits -from mongoengine.queryset import DoesNotExist, MultipleObjectsReturned -from mongoengine import Document, StringField, ListField, IntField, \ - ObjectIdField - -class BasicMasking(Document): - sha256 = StringField(max_length=64, required=True) - architecture = StringField(max_length=64, required=True) - instructions = ListField(StringField(max_length=124), required=True) - total_bytes = IntField(required=True, default=0) - functions = ListField(ObjectIdField(), default=list) - - meta = { - 'indexes' : [('sha256', 'architecture', 'instructions')] - } - - def dump(self): - return {'sha256' : self.sha256, - 'architecture' : self.architecture, - 'instructions' : self.instructions, - 'total_bytes' : self.total_bytes, - 'functions' : self.function_list()} - - def function_list(self): - return [str(x) for x in self.functions] - - -class BasicMaskingEngine(AbstractEngine): - _name = 'BasicMasking' - _description = ('Masks ESP/EBP offsets, calls/jmps offsets, and global ' - 'offsets (Intel Only). Requires at least 8 instructions.') - _required_db_names = ['first_db'] - - def normalize(self, opcodes, architecture): - changed_bits = 0 - dt = None - mapping = {'intel32' : Decode32Bits, - 'intel64' : Decode64Bits, - 'intel16' : Decode16Bits} - if architecture in mapping: - dt = mapping[architecture] - else: - return (None, changed_bits, None) - - try: - normalized = [] - original = [] - for i in DecomposeGenerator(0, opcodes, dt): - # If disassembly is not valid then junk data has been sent - if not i.valid: - return (None, 0, None) - - original.append(i._toText()) - instr = i.mnemonic + ' ' - - # Special mnemonic masking (Call, Jmp, JCC) - if (i.mnemonic == 'CALL') or i.mnemonic.startswith('J'): - operand = i.operands[0]._toText() - - if 'Immediate' == i.operands[0].type: - instr += '0x' - changed_bits += i.operands[0].size - - else: - regex = '^\[R(S|I)P(\+|\-)0x[\da-f]+\]$' - if re.match(regex, operand): - instr += re.sub(regex, r'[R\1P\2', operand) + '0x]' - changed_bits += i.operands[0].dispSize - else: - # Nothing will be masked out - instr = i._toText() - - normalized.append(instr) - continue - - operand_instrs = [] - for operand_obj in i.operands: - operand = operand_obj._toText() - if ((re.match('^\[E(S|B)P', operand) or re.match('^\[R(I|S)P', operand)) - and operand_obj.dispSize): - # Offset from EBP/ESP and RIP/RSP - masked = operand.replace(hex(operand_obj.disp), '0x') - operand_instrs.append(masked) - changed_bits += operand_obj.dispSize - - elif 'Immediate' == operand_obj.type: - value = operand_obj.value - # Masking off immediates within the standard VA of the sample - if ((0x400000 <= value <= 0x500000) - or (0x10000000 <= value <= 0x20000000) - or (0x1C0000000 <= value <= 0x1D0000000) - or (0x140000000 <= value <= 0x150000000)): - operand_instrs.append('0x') - changed_bits += operand_obj.size - - else: - operand_instrs.append(operand) - - elif 'AbsoluterMemoryAddress' == operand_obj.type: - operand_instrs.append('0x') - changed_bits += operand_obj.dispSize - - elif 'AbsoluteMemory' == operand_obj.type: - masked = operand.replace(hex(operand_obj.disp), '0x') - operand_instrs.append(masked) - changed_bits += operand_obj.dispSize - - else: - operand_instrs.append(operand) - - normalized.append(instr + ', '.join(operand_instrs)) - - h_sha256 = sha256(''.join(normalized)).hexdigest() - return (normalized, changed_bits, h_sha256) - # For debugging - #return (original, normalized, changed_bits, h_sha256) - - except Exception as e: - return (None, changed_bits, None) - - def _add(self, function): - ''' - - ''' - opcodes = function['opcodes'] - architecture = function['architecture'] - normalized, changed, h_sha256 = self.normalize(opcodes, architecture) - - if (not h_sha256) or (not normalized) or (8 > len(normalized)): - return - - try: - db_obj = BasicMasking.objects( sha256=h_sha256, - architecture=architecture, - instructions=normalized).get() - except DoesNotExist: - db_obj = BasicMasking( sha256=h_sha256, - architecture=architecture, - instructions=normalized, - total_bytes=len(opcodes)) - - function_id = ObjectId(function['id']) - if function_id not in db_obj.functions: - db_obj.functions.append(function_id) - db_obj.save() - - def _scan(self, opcodes, architecture, apis): - '''Returns List of tuples (function ID, similarity percentage)''' - db = self._dbs['first_db'] - normalized, changed, h_sha256 = self.normalize(opcodes, architecture) - - if (not h_sha256) or (not normalized) or (8 > len(normalized)): - return - - try: - db_obj = BasicMasking.objects( sha256=h_sha256, - architecture=architecture, - instructions=normalized).get() - except DoesNotExist: - return None - - results = [] - for function_id in db_obj.function_list(): - function = db.find_function(_id=ObjectId(function_id)) - - if not function or not function.metadata: - continue - - # Similarity = 90% (opcodes and the masking changes) - # + 10% (api overlap) - similarity = 100 - ((changed / (len(opcodes) * 8.0)) * 100) - if similarity > 90.0: - similarity = 90.0 - - # The APIs will count up to 10% of the similarity score - total_apis = len(function.apis) - overlap = float(len(set(function.apis).intersection(apis))) - if 0 != total_apis: - similarity += (overlap / total_apis) * 10 - - results.append(FunctionResult(function_id, similarity)) - - return results - - def _uninstall(self): - BasicMasking.drop_collection() diff --git a/server/first/engines/mnemonic_hash.py b/server/first/engines/mnemonic_hash.py deleted file mode 100644 index 19b23ae..0000000 --- a/server/first/engines/mnemonic_hash.py +++ /dev/null @@ -1,145 +0,0 @@ -#------------------------------------------------------------------------------- -# -# FIRST Engine: Mnemonic Hash -# Uses Distorm3 to obtain mnemonics from the opcodes, reduces the opcodes to -# a single string and hashes it for future lookup -# -# Copyright (C) 2016 Angel M. Villegas -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Requirements -# ------------ -# - distorm3 -# - mongoengine -# -#------------------------------------------------------------------------------- - -# Python Modules -from hashlib import sha256 - -# FIRST Modules -from first.error import FIRSTError -from first.engines import AbstractEngine -from first.engines.results import FunctionResult - -# Third Party Modules -from bson.objectid import ObjectId -from distorm3 import DecomposeGenerator, Decode32Bits, Decode64Bits, Decode16Bits -from mongoengine.queryset import DoesNotExist, MultipleObjectsReturned -from mongoengine import Document, StringField, ListField, ObjectIdField - -class MnemonicHash(Document): - sha256 = StringField(max_length=64, required=True) - architecture = StringField(max_length=64, required=True) - functions = ListField(ObjectIdField(), default=list) - - meta = { - 'indexes' : [('sha256', 'architecture')] - } - - def dump(self): - return {'sha256' : self.sha256, - 'architecture' : self.architecture, - 'functions' : self.function_list()} - - def function_list(self): - return [str(x) for x in self.functions] - - -class MnemonicHashEngine(AbstractEngine): - _name = 'MnemonicHash' - _description = ('Uses mnemonics from the opcodes to generate a hash ' - '(Intel Only). Requires at least 8 mnemonics.') - _required_db_names = ['first_db'] - - def mnemonic_hash(self, opcodes, architecture): - dt = None - mapping = {'intel32' : Decode32Bits, - 'intel64' : Decode64Bits, - 'intel16' : Decode16Bits} - if architecture in mapping: - dt = mapping[architecture] - else: - return (None, None) - - try: - iterable = DecomposeGenerator(0, opcodes, dt) - - # Uses valid to ensure we are not creating hashes with 'db 0xYY' - mnemonics = [d.mnemonic for d in iterable if d.valid] - return (mnemonics, sha256(''.join(mnemonics)).hexdigest()) - - except Exception as e: - return (None, None) - - def _add(self, function): - ''' - Nothing needs to be implemented since the Function Model has the - sha256 of the opcodes - ''' - opcodes = function['opcodes'] - architecture = function['architecture'] - mnemonics, mnemonic_sha256 = self.mnemonic_hash(opcodes, architecture) - - if (not mnemonic_sha256) or (not mnemonics) or (8 > len(mnemonics)): - return - - try: - db_obj = MnemonicHash.objects( sha256=mnemonic_sha256, - architecture=architecture).get() - except DoesNotExist: - db_obj = MnemonicHash( sha256=mnemonic_sha256, - architecture=architecture) - - function_id = ObjectId(function['id']) - if function_id not in db_obj.functions: - db_obj.functions.append(function_id) - db_obj.save() - - def _scan(self, opcodes, architecture, apis): - '''Returns List of tuples (function ID, similarity percentage)''' - db = self._dbs['first_db'] - mnemonics, mnemonic_sha256 = self.mnemonic_hash(opcodes, architecture) - - if (not mnemonic_sha256) or (not mnemonics) or (8 > len(mnemonics)): - return - - try: - db_obj = MnemonicHash.objects( sha256=mnemonic_sha256, - architecture=architecture).get() - except DoesNotExist: - return None - - results = [] - for function_id in db_obj.function_list(): - similarity = 75.0 - function = db.find_function(_id=ObjectId(function_id)) - - if not function or not function.metadata: - continue - - # The APIs will count up to 10% of the similarity score - total_apis = len(function.apis) - overlap = float(len(set(function.apis).intersection(apis))) - if 0 != total_apis: - similarity += (overlap / total_apis) * 10 - - results.append(FunctionResult(function_id, similarity)) - - return results - - def _uninstall(self): - MnemonicHash.drop_collection() diff --git a/server/first/models.py b/server/first/models.py deleted file mode 100644 index 1846882..0000000 --- a/server/first/models.py +++ /dev/null @@ -1,197 +0,0 @@ -#------------------------------------------------------------------------------- -# -# FIRST MongoDB Models -# Copyright (C) 2016 Angel M. Villegas -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Requirements -# ------------ -# mongoengine (https://pypi.python.org/pypi/mongoengine/) -# -#------------------------------------------------------------------------------- - - -# Python Modules -from __future__ import unicode_literals -import datetime - -# Third Party Modules -from bson.objectid import ObjectId -from mongoengine import Document, StringField, UUIDField, \ - DateTimeField, LongField, ReferenceField, \ - BinaryField, ListField, BooleanField, ObjectIdField, \ - IntField, EmbeddedDocument, EmbeddedDocumentListField - -class User(Document): - name = StringField(max_length=128, required=True) - email = StringField(max_length=254, unique=True) - handle = StringField(max_length=32, required=True) - number = IntField(required=True) - api_key = UUIDField(required=True, unique=True) - created = DateTimeField(default=datetime.datetime.utcnow, required=True) - rank = LongField(default=0) - active = BooleanField(default=True) - - service = StringField(max_length=16, required=True) - auth_data = StringField(max_length=4096, required=True) - - meta = { - 'indexes' : [('handle', 'number'), 'api_key', 'email'] - } - - @property - def user_handle(self): - return '{0.handle}#{0.number:04d}'.format(self) - - def dump(self, full=False): - data = {'handle' : self.user_handle} - - if full: - data.update({ 'id' : str(self.id), - 'name' : self.name, - 'email' : self.email, - 'api_key' : self.api_key, - 'rank' : self.rank, - 'created' : self.created, - 'active' : self.active}) - - return data - - -class Engine(Document): - name = StringField(max_length=16, required=True, unique=True) - description = StringField(max_length=128, required=True) - path = StringField(max_length=256, required=True) - obj_name = StringField(max_length=32, required=True) - applied = ListField(default=list) - developer = ReferenceField(User) - active = BooleanField(default=False) - - meta = { - 'indexes' : ['name'] - } - - def dump(self, full=False): - data = {'name' : self.name, - 'description' : self.description, - 'rank' : self.rank, - 'developer' : self.developer.user_handle} - - if full: - data.update({'id' : str(self.id), 'path' : self.path}) - - return data - - @property - def rank(self): - return len(self.applied) - - -class Metadata(EmbeddedDocument): - id = ObjectIdField(required=True, default=lambda: ObjectId()) - user = ReferenceField(User) - name = ListField(StringField(max_length=128), default=list) - prototype = ListField(StringField(max_length=256), default=list) - comment = ListField(StringField(max_length=512), default=list) - committed = ListField(DateTimeField(), default=list) - applied = ListField(default=list) - - meta = { - 'indexes' : ['user'] - } - - def dump(self, full=False): - data = {'creator' : self.user.user_handle, - 'name' : self.name[0], - 'prototype' : self.prototype[0], - 'comment' : self.comment[0], - 'rank' : len(self.applied)} - - if full: - data['history'] = [] - for i in xrange(len(self.name) - 1, -1, -1): - # Convert back with: - # datetime.datetime.strptime(
, '%Y-%m-%dT%H:%M:%S.%f') - committed = self.committed[i].isoformat() - data['history'].append({'name' : self.name[i], - 'prototype' : self.prototype[i], - 'comment' : self.comment[i], - 'committed' : committed}) - - return data - - def has_changed(self, name, prototype, comment): - if (not self.name) or (not self.prototype) or (not comment): - return True - - if ((self.name[0] != name) - or (self.prototype[0] != prototype) - or (self.comment[0] != comment)): - return True - - return False - - @property - def rank(self): - return len(self.applied) - -# Use bson.Binary to insert binary data -class Function(Document): - sha256 = StringField(max_length=64) - opcodes = BinaryField() - apis = ListField(StringField(max_length=128), default=list) - metadata = EmbeddedDocumentListField(Metadata, default=list) - # Return value from idaapi.get_file_type_name() - architecture = StringField(max_length=64, required=True) - - meta = { - 'indexes' : [] - } - - def dump(self): - return {'id' : self.id, - 'opcodes' : self.opcodes, - 'apis' : self.apis, - 'metadata' : [str(x.id) for x in self.metadata], - 'architecture' : self.architecture, - 'sha256' : self.sha256} - - -class Sample(Document): - md5 = StringField(max_length=32, required=True) - crc32 = IntField(required=True) - sha1 = StringField(max_length=40) - sha256 = StringField(max_length=64) - seen_by = ListField(ReferenceField(User), default=list) - functions = ListField(ReferenceField(Function), default=list) - last_seen = DateTimeField(default=datetime.datetime.utcnow) - - meta = { - 'indexes' : [('md5', 'crc32')] - } - - def dump(self): - data = {'md5' : self.md5, 'crc32' : self.crc32, - 'seen_by' : [str(x.id) for x in self.seen_by], - 'functions' : [str(x.id) for x in self.functions]} - - if 'sha1' in self: - data['sha1'] = self.sha1 - - if 'sha256' in self: - data['sha256'] = self.sha256 - - return data diff --git a/server/first/settings.py b/server/first/settings.py index d2e318c..c438114 100644 --- a/server/first/settings.py +++ b/server/first/settings.py @@ -11,9 +11,21 @@ """ import os - -# Third Party Modules -import mongoengine +import json + +# Read in configuration data +FIRST_CONFIG_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '..', + 'first_config.json') +CONFIG = {} +try: + config_data = json.load(file(FIRST_CONFIG_FILE)) + if type(config_data) == dict: + CONFIG = config_data +except IOError as ioe: + print '[1st] IOError: {}'.format(ioe) +except ValueError as ve: + print '[1st] ValueError: {}'.format(ve) # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -23,18 +35,20 @@ # See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = 'd2nev@620*3vi@qvynch)seb4^pghp=-)aenfs(4%)-k@xqpo9' +SECRET_KEY = CONFIG.get('secret_key', + 'd2nev@620*3vi@qvynch)seb4^pghp=-)aenfs(4%)-k@xqpo9') # SECURITY WARNING: don't run with debug turned on in production! -DEBUG = True +DEBUG = CONFIG.get('debug', True) -ALLOWED_HOSTS = [] +ALLOWED_HOSTS = CONFIG.get('allowed_hosts', []) # Application definition INSTALLED_APPS = [ 'www.apps.WwwConfig', + 'engines.apps.EnginesConfig', 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', @@ -80,31 +94,17 @@ # Database # https://docs.djangoproject.com/en/1.10/ref/settings/#databases -# MySQL Settings -_MYSQL_USER = os.environ.get('MYSQL_USER', 'root') -_MYSQL_PASSWORD = os.environ.get('MYSQL_PASSWORD', '') -_MYSQL_DATABASE = os.environ.get('MYSQL_DATABASE', 'first_db') -_MYSQL_HOST = os.environ.get('MYSQL_HOST', 'localhost') -_MYSQL_PORT = os.environ.get('MYSQL_PORT', 3306) - DATABASES = { 'default': { - 'ENGINE': 'django.db.backends.mysql', - 'NAME': _MYSQL_DATABASE, - 'USER': _MYSQL_USER, - 'PASSWORD': _MYSQL_PASSWORD, - 'HOST': _MYSQL_HOST, - 'PORT': _MYSQL_PORT + 'ENGINE': CONFIG.get('db_engine', 'django.db.backends.mysql'), + 'NAME': CONFIG.get('db_dbname', 'first_db'), + 'USER': CONFIG.get('db_user', 'root'), + 'PASSWORD': CONFIG.get('db_password', ''), + 'HOST': CONFIG.get('db_host', 'localhost'), + 'PORT': CONFIG.get('db_port', 3306) } } -# MongoDB settings -_MONGODB_HOST = os.environ.get('MONGO_HOST', 'localhost') -_MONGODB_PORT = int(os.environ.get('MONGO_PORT', 27017)) -_MONGODB_NAME = os.environ.get('MONGO_NAME', 'first_db') - -mongoengine.connect(_MONGODB_NAME, host=_MONGODB_HOST, port=_MONGODB_PORT) - # Password validation # https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators @@ -127,19 +127,19 @@ # Internationalization # https://docs.djangoproject.com/en/1.10/topics/i18n/ -LANGUAGE_CODE = 'en-us' +LANGUAGE_CODE = CONFIG.get('language_code', 'en-us') -TIME_ZONE = 'EST' +TIME_ZONE = CONFIG.get('time_zone', 'EST') -USE_I18N = True +USE_I18N = CONFIG.get('use_i18n', True) -USE_L10N = True +USE_L10N = CONFIG.get('use_l10n', True) -USE_TZ = True +USE_TZ = CONFIG.get('use_tz', True) # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.10/howto/static-files/ STATIC_ROOT = os.path.join(BASE_DIR, 'static') -STATIC_URL = '/static/' +STATIC_URL = CONFIG.get('static_url', '/static/') diff --git a/server/first_core/__init__.py b/server/first_core/__init__.py new file mode 100644 index 0000000..4b7b779 --- /dev/null +++ b/server/first_core/__init__.py @@ -0,0 +1,27 @@ +#------------------------------------------------------------------------------- +# +# Intializes FIRST's DBManager and EngineManager +# Copyright (C) 2016 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------- + +# FIRST Modules +from first_core.dbs import FIRSTDBManager +from first_core.engines import FIRSTEngineManager + +DBManager = FIRSTDBManager() +EngineManager = FIRSTEngineManager(DBManager) diff --git a/server/first/auth.py b/server/first_core/auth.py similarity index 90% rename from server/first/auth.py rename to server/first_core/auth.py index 724399e..bd05ba9 100644 --- a/server/first/auth.py +++ b/server/first_core/auth.py @@ -32,20 +32,21 @@ from functools import wraps # Django Modules +from django.core.exceptions import ObjectDoesNotExist from django.http import HttpResponse, HttpRequest from django.shortcuts import render, redirect from django.urls import reverse # FIRST Modules # TODO: Use DBManager to get user objects and do User operations -from first.models import User -from first.error import FIRSTError +from first.settings import CONFIG +from first_core.models import User +from first_core.error import FIRSTError # Thirdy Party import httplib2 from oauth2client import client from apiclient import discovery -from mongoengine.queryset import DoesNotExist @@ -57,7 +58,7 @@ def __init__(self, message): def verify_api_key(api_key): - users = User.objects(api_key=api_key) + users = User.objects.filter(api_key=api_key) if not users: return None @@ -75,7 +76,7 @@ def decorated_function(*args, **kwargs): if key: user = verify_api_key(key) del kwargs['api_key'] - if user: + if user and user.active: kwargs['user'] = user return view_function(*args, **kwargs) @@ -118,7 +119,7 @@ class Authentication(): def __init__(self, request): self.request = request redirect_uri = request.build_absolute_uri(reverse('www:oauth', kwargs={'service' : 'google'})) - secret = os.environ.get('GOOGLE_SECRET', '/usr/local/etc/google_secret.json') + secret = CONFIG.get('oauth_path', '/usr/local/etc/google_secret.json') try: self.flow = {'google' : client.flow_from_clientsecrets(secret, scope=['https://www.googleapis.com/auth/userinfo.profile', @@ -178,12 +179,10 @@ def login_step_2(self, auth_code, url, login=True): if not oauth.access_token_expired: http_auth = oauth.authorize(httplib2.Http()) - service = discovery.build('plus', 'v1', http_auth) - info = service.people().get(userId='me', fields='displayName,emails') - info = info.execute() - email = info['emails'][0]['value'] - self.request.session['info'] = {'name' : info['displayName'], - 'email' : email} + service = discovery.build('oauth2', 'v2', http_auth) + response = service.userinfo().v2().me().get().execute() + self.request.session['info'] = {'name' : response['name'], + 'email' : response['email']} expires = credentials['id_token']['exp'] #expires = datetime.datetime.fromtimestamp(expires) @@ -200,7 +199,7 @@ def login_step_2(self, auth_code, url, login=True): return redirect(url) - except DoesNotExist: + except ObjectDoesNotExist: self.request.session.flush() raise FIRSTAuthError('User is not registered.') @@ -236,7 +235,7 @@ def register_user(self): user = None continue - except DoesNotExist: + except ObjectDoesNotExist: pass # Create random 4 digit value for the handle @@ -248,7 +247,7 @@ def register_user(self): user = User.objects.get(handle=handle, number=num) user = None - except DoesNotExist: + except ObjectDoesNotExist: user = User(name=name, email=email, api_key=api_key, @@ -269,5 +268,5 @@ def get_user_data(email): user = User.objects.get(email=email) return user - except DoesNotExist: + except ObjectDoesNotExist: return None diff --git a/server/first/dbs/__init__.py b/server/first_core/dbs/__init__.py similarity index 97% rename from server/first/dbs/__init__.py rename to server/first_core/dbs/__init__.py index 3b28548..ce27b71 100644 --- a/server/first/dbs/__init__.py +++ b/server/first_core/dbs/__init__.py @@ -24,7 +24,7 @@ from hashlib import md5 # FIRST Modules -from first.error import FIRSTError +from first_core.error import FIRSTError # Class for FirstDB related exceptions class FIRSTDBError(FIRSTError): @@ -110,6 +110,6 @@ def get(self, db_name): # FIRST DB Classes -from first.dbs.builtin_db import FIRSTDB +from first_core.dbs.builtin_db import FIRSTDB possible_dbs = [FIRSTDB] diff --git a/server/first_core/dbs/builtin_db.py b/server/first_core/dbs/builtin_db.py new file mode 100644 index 0000000..e140b9d --- /dev/null +++ b/server/first_core/dbs/builtin_db.py @@ -0,0 +1,428 @@ +#------------------------------------------------------------------------------- +# +# FIRST DB Module for completing operations with the MongoDB backend +# Copyright (C) 2016 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Requirements +# ------------ +# - werkzeug +# +#------------------------------------------------------------------------------- + +# Python Modules +import re +import math +import json +import hashlib +import ConfigParser +from hashlib import md5 + +# Third Party Modules +from django.utils import timezone +from django.core.paginator import Paginator +from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned + +# FIRST Modules +from first_core.dbs import AbstractDB +from first_core.util import make_id, parse_id, separate_metadata, \ + is_engine_metadata +from first_core.models import User, Sample, \ + Engine, \ + Metadata, MetadataDetails, AppliedMetadata, \ + Function, FunctionApis + + +class FIRSTDB(AbstractDB): + _name = 'first_db' + standards = { 'intel16', 'intel32', 'intel64', 'arm32', 'arm64', 'mips', + 'ppc', 'sparc', 'sysz'} + + # + # Functions called by FIRST Framework + #-------------------------------------------------------------------------- + def __init__(self, config): + ''' + Constructor. + + @param conf: ConfigParser.RawConfigParser + ''' + self._is_installed = True + ''' + section = 'mongodb_settings' + + if (not config.has_section(section) + or not config.has_option(section, 'db')): + raise FirstDBError('DB settings not available', skip=True) + + if section.upper() not in app.config: + app.config[section.upper()] = {} + + app.config[section.upper()]['db'] = conf.get(section, 'db') + self.db.init_app(app) + ''' + + def get_architectures(self): + field = 'architecture' + architectures = Function.objects.values(field).distinct() + + standards = FIRSTDB.standards.copy() + standards.update({x[field] for x in architectures}) + return list(standards) + + def get_sample(self, md5_hash, crc32, create=False): + try: + # Get Sample from DB + return Sample.objects.get(md5=md5_hash, crc32=crc32) + + except ObjectDoesNotExist: + if not create: + return None + + # Create Sample for DB + sample = Sample(md5=md5_hash, crc32=crc32) + sample.last_seen = timezone.now() + sample.save() + return sample + + except MultipleObjectsReturned: + # TODO: log occurance + raise + + def sample_seen_by_user(self, sample, user): + if (not isinstance(sample, Sample)) or (not isinstance(user, User)): + return None + + if not Sample.objects.filter(pk=sample.id, seen_by=user).count(): + sample.seen_by.add(user) + + def checkin(self, user, md5_hash, crc32, sha1_hash=None, sha256_hash=None): + ''' + TODO: + + @returns String error message on Failure + None + ''' + if not isinstance(user, User): + return False + + # Validate data + if ((not re.match('^[a-f\d]{32}$', md5_hash)) + or (sha1_hash and not re.match('^[a-f\d]{40}$', sha1_hash)) + or (sha256_hash and not re.match('^[a-f\d]{64}$', sha256_hash))): + return False + + sample = self.get_sample(md5_hash, crc32, True) + if not sample: + return False + + sample.last_seen = timezone.now() + if not Sample.objects.filter(pk=sample.id, seen_by=user).count(): + sample.seen_by.add(user) + + if None != sha1_hash: + sample.sha1 = sha1_hash + + if None != sha256_hash: + sample.sha256 = sha256_hash + + sample.save() + return True + + def get_function_metadata(self, _id): + '''Get the metadata associated with the provided Function ID + + Args: + _id (:obj:`int`): ID from Function model + + Returns: + QuerySet. + ''' + return Metadata.objects.filter(function__pk=_id) + + def get_function(self, opcodes, architecture, apis, create=False, **kwargs): + sha256_hash = hashlib.sha256(opcodes).hexdigest() + function = None + + try: + function = Function.objects.get(sha256=sha256_hash, + opcodes=opcodes, + architecture=architecture) #, + #apis__api=apis) + except ObjectDoesNotExist: + if create: + # Create function and add it to sample + function = Function.objects.create( sha256=sha256_hash, + opcodes=opcodes, + architecture=architecture) + + apis_ = [FunctionApis.objects.get_or_create(x)[0] for x in apis] + for api in apis_: + function.apis.add(api) + + return function + + def get_all_functions(self): + try: + return Function.objects.all() + + except: + return [] + + def find_function(self, _id=None, opcodes=None, apis=None, architecture=None, h_sha256=None): + try: + # User function ID + if None != _id: + return Function.objects.get(pk=_id) + + # User opcodes and apis + elif None not in [opcodes, apis]: + return Function.objects.get(opcodes=opcodes, apis=apis) + + # Use hash, architecture + elif None not in [architecture, h_sha256]: + return Function.objects.get(sha256=h_sha256, + architecture=architecture) + + else: + return None + + except ObjectDoesNotExist: + return None + + except MultipleObjectsReturned: + # TODO: Log + raise + + def add_function_to_sample(self, sample, function): + if (not isinstance(sample, Sample)) or (not isinstance(function, Function)): + return False + + if not Sample.objects.filter(pk=sample.id, functions=function).count(): + sample.functions.add(function) + + return True + + def add_metadata_to_function(self, user, function, name, prototype, comment, **kwargs): + if (not isinstance(function, Function)) or (not isinstance(user, User)): + return None + + # Check to see if user already has metadata associated with the sample + metadata = None + if Function.objects.filter(pk=function.id, metadata__user=user).count(): + # Metadata already exists + metadata = Metadata.objects.get(function=function, user=user) + else: + metadata = Metadata.objects.create(user=user) + function.metadata.add(metadata) + + if metadata.has_changed(name, prototype, comment): + md = MetadataDetails.objects.create(name=name, + prototype=prototype, + comment=comment) + metadata.details.add(md) + + return metadata.id + + def get_metadata_list(self, metadata): + results = [] + metadata_ids, engine_metadata = separate_metadata(metadata) + + for _id, metadata in Metadata.objects.in_bulk(metadata_ids).iteritems(): + data = metadata.dump() + data['id'] = make_id(0, metadata=metadata.id) + results.append(data) + + for flag, _id, metadata_id in engine_metadata: + engines = Engine.objects.get(pk=_id) + # TODO: Send metadata_id to engine for more info + if (not engines) or (len(engines) > 1): + continue + + data = {'id' : make_id(flag, metadata_id, _id), + 'engine' : engine.name, + 'description' : engine.description} + results.append(data) + + return results + + def delete_metadata(self, user, metadata_id): + if not isinstance(user, User): + return False + + user_metadata, engine_metadata = separate_metadata([metadata_id]) + if not user_metadata: + return False + + # User must be the creator of the metadata to delete it + metadata_id = user_metadata[0] + try: + metadata = Metadata.objects.get(pk=metadata_id, user=user) + metadata.delete() + return True + + except ObjectDoesNotExist: + return False + + def created(self, user, page, max_metadata=20): + pages = 0 + results = [] + + if (page < 1) or (not isinstance(user, User)): + return (results, pages) + + p = Paginator(Metadata.objects.filter(user=user), max_metadata) + pages = p.num_pages + + if page > pages: + return (results, pages) + + for metadata in p.page(page): + temp = metadata.dump() + temp['id'] = make_id(0, metadata=metadata.id) + results.append(temp) + + return (results, pages) + + def metadata_history(self, metadata): + results = {} + metadata_ids, engine_metadata = separate_metadata(metadata) + e_comment = ('Generated by Engine: {0.name}\n{0.description}\n\n' + 'Developer: {0.developer.user_handle}') + + for _id, metadata in Metadata.objects.in_bulk(metadata_ids).iteritems(): + data = metadata.dump(True) + result_key = make_id(0, metadata=_id) + results[result_key] = { 'creator' : data['creator'], + 'history' : data['history']} + + # Provide information for engine created metadata... + for flag, engine_id, _id in engine_metadata: + engine = self.get_engine(engine_id) + if not engine: + continue + data = {'creator' : engine.name, + 'history' : [{'committed' : '', + 'name' : 'N/A', + 'prototype' : 'N/A', + 'comment' : e_comment.format(engine)}]} + result_key = make_id(flag, engine=engine_id, metadata=_id) + results[result_key] = data + + return results + + def applied(self, sample, user, _id): + ''' + @returns Boolean. True if added to the applied list + False if not added to the applied list + ''' + if (not isinstance(user, User)) or (not isinstance(sample, Sample)): + return False + + flag, engine_data, metadata_id = parse_id(_id) + if is_engine_metadata(_id): + pass + # TODO: add this capability back again + #engine_id = _id + #engine = Engine.objects(id=engine_id, + # applied__contains=key) + + ## Check if user has already applied the signature + #if len(engine): + # return True + + #try: + # engine = Engine.objects(id=engine_id).get() + #except ObjectDoesNotExist: + # # Engine does not exist + # return False + + #engine.applied.append(key) + #engine.save() + + else: + try: + # Ensure Metadata exists + metadata = Metadata.objects.get(pk=metadata_id) + except ObjectDoesNotExist: + # Metadata does not exist + return False + + r = AppliedMetadata.objects.get_or_create( user=user, + sample=sample, + metadata=metadata) + + return True + + def unapplied(self, sample, user, _id): + ''' + @returns Boolean. True if not in metadata's applied list + False if still in the applied list + ''' + if (not isinstance(sample, Sample)) or (not isinstance(user, User)): + return False + + flag, engine_data, metadata_id = parse_id(_id) + if is_engine_metadata(_id): + pass + # TODO: add this capability back again + #engine_id = _id + #engine = Engine.objects(id=engine_id, + # applied__contains=key) + + ## Check if user has already applied the signature + #if not len(engine): + # return True + + #try: + # engine = Engine.objects(id=engine_id).get() + #except ObjectDoesNotExist: + # # Engine does not exist + # return False + + #engine.applied.remove(key) + #engine.save() + + else: + try: + # Ensure Metadata exists + metadata = Metadata.objects.get(pk=metadata_id) + except ObjectDoesNotExist: + # Metadata does not exist + return False + + try: + data = AppliedMetadata.objects.get( user=user, + sample=sample, + metadata=metadata) + data.delete() + return True + + except ObjectDoesNotExist: + return True + + + return False + + def engines(self, active=True): + return Engine.objects.filter(active=bool(active)) + + def get_engine(self, engine_id): + engines = Engine.objects.filter(pk=engine_id) + if not engines.count(): + return None + + return engines.first() diff --git a/server/first_core/disassembly/__init__.py b/server/first_core/disassembly/__init__.py new file mode 100644 index 0000000..4bf79de --- /dev/null +++ b/server/first_core/disassembly/__init__.py @@ -0,0 +1,172 @@ +# Third Party Modules +from capstone import * +from capstone.ppc import * +from capstone.systemz import * +from capstone.arm import * +from capstone.arm64 import * +from capstone.x86 import * +from capstone.sparc import * +from capstone.mips import * + +arch_mapping = { + 'ppc' : (CS_ARCH_PPC, CS_MODE_32), + 'ppc32' : (CS_ARCH_PPC, CS_MODE_32), + 'ppc64' : (CS_ARCH_PPC, CS_MODE_64), + 'intel16' : (CS_ARCH_X86, CS_MODE_16), + 'sysz' : (CS_ARCH_SYSZ, None), + 'arm32' : (CS_ARCH_ARM, CS_MODE_ARM), + 'intel32' : (CS_ARCH_X86, CS_MODE_32), + 'intel64' : (CS_ARCH_X86, CS_MODE_64), + 'sparc' : (CS_ARCH_SPARC, None), + 'arm64' : (CS_ARCH_ARM64, CS_MODE_ARM), + 'mips' : (CS_ARCH_MIPS, CS_MODE_32), + 'mips64' : (CS_ARCH_MIPS, CS_MODE_64) +} + +reg_mapping = { + 'ppc' : PPC_OP_REG, 'ppc32' : PPC_OP_REG, 'ppc64' : PPC_OP_REG, + 'sysz' : SYSZ_OP_REG, + 'intel16' : X86_OP_REG, 'intel32' : X86_OP_REG, 'intel64' : X86_OP_REG, + 'sparc' : SPARC_OP_REG, + 'arm32' : ARM_OP_REG, 'arm64' : ARM64_OP_REG, + 'mips' : MIPS_OP_REG, 'mips64' : MIPS_OP_REG +} + +imm_mapping = { + 'ppc' : PPC_OP_IMM, 'ppc32' : PPC_OP_IMM, 'ppc64' : PPC_OP_IMM, + 'sysz' : SYSZ_OP_IMM, + 'intel16' : X86_OP_IMM, 'intel32' : X86_OP_IMM, 'intel64' : X86_OP_IMM, + 'sparc' : SPARC_OP_IMM, + 'arm32' : ARM_OP_IMM, 'arm64' : ARM64_OP_IMM, + 'mips' : MIPS_OP_IMM, 'mips64' : MIPS_OP_IMM +} + +mem_mapping = { + 'ppc' : PPC_OP_MEM, 'ppc32' : PPC_OP_MEM, 'ppc64' : PPC_OP_MEM, + 'sysz' : SYSZ_OP_MEM, + 'intel16' : X86_OP_MEM, 'intel32' : X86_OP_MEM, 'intel64' : X86_OP_MEM, + 'sparc' : SPARC_OP_MEM, + 'arm32' : ARM_OP_MEM, 'arm64' : ARM64_OP_MEM, + 'mips' : MIPS_OP_MEM, 'mips64' : MIPS_OP_MEM +} + +invalid_mapping = { + 'ppc' : PPC_OP_INVALID, 'ppc32' : PPC_OP_INVALID, 'ppc64' : PPC_OP_INVALID, + 'sysz' : SYSZ_OP_INVALID, + 'intel16' : X86_OP_INVALID, 'intel32' : X86_OP_INVALID, 'intel64' : X86_OP_INVALID, + 'sparc' : SPARC_OP_INVALID, + 'arm32' : ARM_OP_INVALID, 'arm64' : ARM64_OP_INVALID, + 'mips' : MIPS_OP_INVALID, 'mips64' : MIPS_OP_INVALID +} + +_call_mapping = { + 'ppc' : [], + 'sysz' : [], + 'x86' : [X86_INS_CALL], + 'sysz' : [], + 'sparc' : [], + 'arm' : [], + 'arm64' : [], + 'mips' : [] +} +call_mapping = { + 'ppc' : _call_mapping['ppc'], + 'ppc32' : _call_mapping['ppc'], + 'ppc64' : _call_mapping['ppc'], + 'sysz' : _call_mapping['sysz'], + 'intel16' : _call_mapping['x86'], + 'intel32' : _call_mapping['x86'], + 'intel64' : _call_mapping['x86'], + 'sparc' : _call_mapping['sparc'], + 'arm32' : _call_mapping['arm'], 'arm64' : _call_mapping['arm64'], + 'mips' : _call_mapping['mips'], 'mips64' : _call_mapping['mips'] +} + +_jump_mapping = { + 'x86' : [ X86_INS_JA, X86_INS_JAE, X86_INS_JB, X86_INS_JBE, X86_INS_JCXZ, + X86_INS_JE, X86_INS_JECXZ, X86_INS_JG, X86_INS_JGE, X86_INS_JL, + X86_INS_JLE, X86_INS_JMP, X86_INS_JNE, X86_INS_JNO, X86_INS_JNP, + X86_INS_JNS, X86_INS_JO, X86_INS_JP, X86_INS_JRCXZ, X86_INS_JS, + X86_INS_LJMP] +} +jump_mapping = { + 'intel16' : _jump_mapping['x86'], + 'intel32' : _jump_mapping['x86'], + 'intel64' : _jump_mapping['x86'] +} + +stack_offsets = { + 'intel16' : [X86_REG_SP], + 'intel32' : [X86_REG_EBP, X86_REG_ESP], + 'intel64' : [X86_REG_RSP] +} + + +class Disassembly(object): + def __init__(self, architecture, code): + self.md = None + self.data = [] + self.code = code + self.iterator = None + self.architecture = architecture + + self.valid = False + + if architecture in arch_mapping: + arch, mode = arch_mapping[architecture] + self.md = Cs(arch, mode) + self.md.detail = True + self.iterator = self.md.disasm(self.code, 0) + self.valid = True + + + + def instructions(self): + # When first called function will return cached instructions + for i in xrange(len(self.data)): + yield self.data[i] + + # Then iterate through non-cached instructions + if self.iterator: + for i in self.iterator: + self.data.append(i) + yield i + + self.iterator = None + + + def _check_mapping(self, mapping, operand, attr='type', equal=True): + if ((not hasattr(operand, attr)) + or (self.architecture not in mapping)): + False + + if equal: + return getattr(operand, attr) == mapping[self.architecture] + + return getattr(operand, attr) in mapping[self.architecture] + + # Operand Related Functionality + def is_op_reg(self, operand): + return self._check_mapping(reg_mapping, operand) + + def is_op_mem(self, operand): + return self._check_mapping(mem_mapping, operand) + + def is_op_imm(self, operand): + return self._check_mapping(imm_mapping, operand) + + def is_op_invalid(self, operand): + return self._check_mapping(invalid_mapping, operand) + + def is_stack_offset(self, operand): + if not hasattr(operand, 'mem'): + return False + return self._check_mapping(stack_offsets, operand.mem, 'base', False) + + + # Instruction Related functionality + def is_call(self, instr): + return self._check_mapping(call_mapping, instr, 'id', False) + + def is_jump(self, instr): + return self._check_mapping(jump_mapping, instr, 'id', False) diff --git a/server/first/engines/__init__.py b/server/first_core/engines/__init__.py similarity index 92% rename from server/first/engines/__init__.py rename to server/first_core/engines/__init__.py index 4fce345..cd17b5d 100644 --- a/server/first/engines/__init__.py +++ b/server/first_core/engines/__init__.py @@ -15,12 +15,12 @@ import sys # First Modules -from first.error import FIRSTError -from first.dbs import FIRSTDBManager -from first.engines.results import Result +from first_core.error import FIRSTError +from first_core.dbs import FIRSTDBManager +from first_core.engines.results import Result +from first_core.disassembly import Disassembly # Third Party Modules -from bson.objectid import ObjectId # Class for FirstEngine related exceptions @@ -96,9 +96,9 @@ def add(self, function): self._add(function) - def scan(self, opcodes, architecture, apis): + def scan(self, opcodes, architecture, apis, **kwargs): '''Returns a list of Result objects''' - results = self._scan(opcodes, architecture, apis) + results = self._scan(opcodes, architecture, apis, **kwargs) if isinstance(results, Result): return [results] @@ -131,7 +131,7 @@ def _add(self, function): '''Returns nothing''' raise FIRSTEngineError('Not Implemented') - def _scan(self, opcodes, architecture, apis): + def _scan(self, opcodes, architecture, apis, **kwargs): '''Returns List of function IDs''' raise FIRSTEngineError('Not Implemented') @@ -170,9 +170,7 @@ def _engines(self): # Dynamically (re)load engines engines = [] for e in active_engines: - if e.path in sys.modules: - reload(sys.modules[e.path]) - else: + if e.path not in sys.modules: __import__(e.path) module = sys.modules[e.path] @@ -220,9 +218,13 @@ def add(self, function): ''' required_keys = {'id', 'apis', 'opcodes', 'architecture', 'sha256'} if (dict != type(function)) or not required_keys.issubset(function.keys()): - print 'Data provided is not the correct type or required keys not provided' + print '[1stEM] Data provided is not the correct type or required keys not provided' return None + dis = Disassembly(function['architecture'], function['opcodes']) + if dis: + function['disassembly'] = dis + # Send function details to each registered engine errors = {} for engine in self._engines: @@ -267,10 +269,12 @@ def scan(self, user, opcodes, architecture, apis): engine_results = {} engines = self._engines + dis = Disassembly(architecture, opcodes) for i in xrange(len(engines)): engine = engines[i] try: - results = engine.scan(opcodes, architecture, apis) + results = engine.scan(opcodes, architecture, apis, + disassembly=dis) if results: engine_results[i] = results diff --git a/server/first_core/engines/basic_masking.py b/server/first_core/engines/basic_masking.py new file mode 100644 index 0000000..75d8097 --- /dev/null +++ b/server/first_core/engines/basic_masking.py @@ -0,0 +1,251 @@ +#------------------------------------------------------------------------------- +# +# FIRST Engine: Basic Masking +# Author: Angel M. Villegas (anvilleg@cisco.com) +# Last Modified: August 2017 +# +# Uses Capstone to obtain instructions and then removes certain instruction +# details to normalize it into a standard form to be compared to other +# functions. +# +# Masks out: +# - ESP/EBP Offsets +# - Absolute Calls?? +# - Global Offsets?? +# +# Requirements +# ------------ +# - Capstone +# +# Installation +# ------------ +# None +# +#------------------------------------------------------------------------------- + +# Python Modules +import re +from hashlib import sha256 + +# FIRST Modules +from first_core.error import FIRSTError +from first_core.engines import AbstractEngine +from first_core.engines.results import FunctionResult + +# Third Party Modules +from capstone import * +from django.db import models +from django.core.exceptions import ObjectDoesNotExist + +MIN_REQUIRED_INSTRUCTIONS = 8 + +class BasicMasking(models.Model): + sha256 = models.CharField(max_length=64) + architecture = models.CharField(max_length=64) + + total_bytes = models.IntegerField() + functions = models.ManyToManyField('BasicMaskingFunction') + + class Meta: + app_label = 'engines' + index_together = ('sha256', 'architecture') + unique_together = ('sha256', 'architecture', 'total_bytes') + + def dump(self): + return {'sha256' : self.sha256, + 'architecture' : self.architecture, + 'total_bytes' : self.total_bytes, + 'functions' : self.functions.all()} + +class BasicMaskingFunction(models.Model): + func = models.BigIntegerField() + + class Meta: + app_label = 'engines' + + +class BasicMaskingEngine(AbstractEngine): + _name = 'BasicMasking' + _description = ('Masks calls/jmps offsets. Requires at least 8 instructions.') + _required_db_names = ['first_db'] + + def normalize(self, disassembly): + if not disassembly: + return (0, None) + + changed_bytes = 0 + + try: + normalized = [] + original = [] + for i in disassembly.instructions(): + original.append(str(i.bytes).encode('hex')) + instr = ''.join(chr(x) for x in i.opcode if x) + + # Special mnemonic masking (Call, Jmp, JCC) + if disassembly.is_call(i) or disassembly.is_jump(i): + operand = i.op_str + + if disassembly.is_op_imm(i.operands[0]): + changed_bytes += len(i.bytes) - len(instr) + + # TODO: Add capability to mask off stack reg for more + # than Intel + #elif (disassembly.is_op_mem(i.operands[0]) + # and disassembly.is_stack_offset(i.operands[0])): + # instr += i.reg_name(i.operands[0].value.reg) + # #changed_bits += i.operands[0].dispSize + else: + instr += ''.join(chr(x) for x in i.bytes[len(instr):]) + + normalized.append(instr) + continue + + else: + normalized.append(str(i.bytes)) + + ''' + # Below code is from Distorm3 version + # TODO: Migrate to and understand how to accomplish in Capstone + operand_instrs = [] + for operand_obj in i.operands: + # TODO + #operand = operand_obj._toText() + if ((re.match('^\[E(S|B)P', operand) or re.match('^\[R(I|S)P', operand)) + and operand_obj.dispSize): + # Offset from EBP/ESP and RIP/RSP + masked = operand.replace(hex(operand_obj.disp), '0x') + operand_instrs.append(masked) + changed_bits += operand_obj.dispSize + + elif 'Immediate' == operand_obj.type: + value = operand_obj.value + # Masking off immediates within the standard VA of the sample + if ((0x400000 <= value <= 0x500000) + or (0x10000000 <= value <= 0x20000000) + or (0x1C0000000 <= value <= 0x1D0000000) + or (0x140000000 <= value <= 0x150000000)): + operand_instrs.append('0x') + changed_bits += operand_obj.size + + else: + operand_instrs.append(operand) + + elif 'AbsoluterMemoryAddress' == operand_obj.type: + operand_instrs.append('0x') + changed_bits += operand_obj.dispSize + + elif 'AbsoluteMemory' == operand_obj.type: + masked = operand.replace(hex(operand_obj.disp), '0x') + operand_instrs.append(masked) + changed_bits += operand_obj.dispSize + + else: + operand_instrs.append(operand) + + normalized.append(instr + ', '.join(operand_instrs)) + ''' + + if MIN_REQUIRED_INSTRUCTIONS > len(normalized): + print 145 + return (0, None) + + h_sha256 = sha256(''.join(normalized)).hexdigest() + return (changed_bytes, h_sha256) + + except Exception as e: + print 160, e + + return (0, None) + + def _add(self, function): + ''' + Masks specific details from the disassembly to provide a fuzzy hash. + ''' + opcodes_size = len(function['opcodes']) + architecture = function['architecture'] + disassembly = function.get('disassembly') + changed, h_sha256 = self.normalize(disassembly) + + if not h_sha256: + return + + try: + db_obj = BasicMasking.objects.get(sha256=h_sha256, + architecture=architecture) + except ObjectDoesNotExist: + db_obj = BasicMasking.objects.create(sha256=h_sha256, + architecture=architecture, + total_bytes=opcodes_size) + + function_id = function['id'] + count = BasicMasking.objects.filter(sha256=h_sha256, + architecture=architecture, + functions__func=function_id).count() + + if not count: + func, _ = BasicMaskingFunction.objects.get_or_create(func=function_id) + db_obj.functions.add(func) + + def _scan(self, opcodes, architecture, apis, disassembly): + '''Returns List of tuples (function ID, similarity percentage)''' + db = self._dbs['first_db'] + changed, h_sha256 = self.normalize(disassembly) + + if not h_sha256: + return + + try: + db_obj = BasicMasking.objects.get(sha256=h_sha256, + architecture=architecture) + except ObjectDoesNotExist: + return None + + + results = [] + for f in db_obj.functions.all(): + similarity = 75.0 + function_id = f.func + function = db.find_function(_id=function_id) + + if (not function) or (not function.metadata.count()): + continue + + # Similarity = 90% (opcodes and the masking changes) + # + 10% (api overlap) + similarity = 100 - ((changed / (len(opcodes) * 8.0)) * 100) + if similarity > 90.0: + similarity = 90.0 + + # The APIs will count up to 10% of the similarity score + total_apis = function.apis.count() + if total_apis: + func_apis = {x['api'] for x in function.apis.values('api')} + overlap = float(len(func_apis.intersection(apis))) + similarity += (overlap / total_apis) * 10 + + results.append(FunctionResult(str(function_id), similarity)) + + return results + + def _install(self): + try: + from django.core.management import execute_from_command_line + except ImportError: + # The above import may fail for some other reason. Ensure that the + # issue is really that Django is missing to avoid masking other + # exceptions on Python 2. + try: + import django + except ImportError: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) + raise + execute_from_command_line(['manage.py', 'makemigrations', 'engines']) + execute_from_command_line(['manage.py', 'migrate', 'engines']) + + def _uninstall(self): + print 'Manually delete tables associated with {}'.format(self.engine_name) diff --git a/server/first/engines/exact_match.py b/server/first_core/engines/exact_match.py similarity index 87% rename from server/first/engines/exact_match.py rename to server/first_core/engines/exact_match.py index 3a78eb3..fb10857 100644 --- a/server/first/engines/exact_match.py +++ b/server/first_core/engines/exact_match.py @@ -23,9 +23,9 @@ from hashlib import sha256 # FIRST Modules -from first.error import FIRSTError -from first.engines import AbstractEngine -from first.engines.results import FunctionResult +from first_core.error import FIRSTError +from first_core.engines import AbstractEngine +from first_core.engines.results import FunctionResult class ExactMatchEngine(AbstractEngine): _name = 'ExactMatch' @@ -39,7 +39,7 @@ def _add(self, function): ''' pass - def _scan(self, opcodes, architecture, apis): + def _scan(self, opcodes, architecture, apis, disassembly): '''Returns List of FunctionResults''' db = self._dbs['first_db'] @@ -50,7 +50,7 @@ def _scan(self, opcodes, architecture, apis): return None similarity = 90.0 - if set(function.apis) == set(apis): + if set(function.apis.values()) == set(apis): similarity += 10.0 return [FunctionResult(str(function.id), similarity)] diff --git a/server/first_core/engines/mnemonic_hash.py b/server/first_core/engines/mnemonic_hash.py new file mode 100644 index 0000000..63bc636 --- /dev/null +++ b/server/first_core/engines/mnemonic_hash.py @@ -0,0 +1,168 @@ +#------------------------------------------------------------------------------- +# +# FIRST Engine: Mnemonic Hash +# Uses Distorm3 to obtain mnemonics from the opcodes, reduces the opcodes to +# a single string and hashes it for future lookup +# +# Copyright (C) 2017 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Requirements +# ------------ +# - distorm3 +# +#------------------------------------------------------------------------------- + +# Python Modules +from hashlib import sha256 + +# FIRST Modules +from first_core.error import FIRSTError +from first_core.engines import AbstractEngine +from first_core.engines.results import FunctionResult + +# Third Party Modules +from capstone import * +from django.db import models +from django.core.exceptions import ObjectDoesNotExist + +MIN_REQUIRED_MNEMONICS = 8 + +class MnemonicHash(models.Model): + sha256 = models.CharField(max_length=64) + architecture = models.CharField(max_length=64) + functions = models.ManyToManyField('MnemonicHashFunctions') + + class Meta: + app_label = 'engines' + index_together = ('sha256', 'architecture') + unique_together = ('sha256', 'architecture') + + def dump(self): + return {'sha256' : self.sha256, + 'architecture' : self.architecture, + 'functions' : self.functions.all()} + +class MnemonicHashFunctions(models.Model): + func = models.BigIntegerField() + + class Meta: + app_label = 'engines' + + +class MnemonicHashEngine(AbstractEngine): + _name = 'MnemonicHash' + _description = ('Uses mnemonics from the opcodes to generate a hash ' + '(architecture support limited to: intel32, intel64, ' + 'arm, arm64, mips32, mips64, ppc32, ppc64, sparc). ' + 'Requires at least 8 mnemonics.') + _required_db_names = ['first_db'] + + def mnemonic_hash(self, disassembly): + if not disassembly: + return (None, None) + + try: + mnemonics = [i.mnemonic for i in disassembly.instructions()] + if len(mnemonics) < MIN_REQUIRED_MNEMONICS: + return (None, None) + + return (mnemonics, sha256(''.join(mnemonics)).hexdigest()) + + except Exception as e: + raise e + return (None, None) + + def _add(self, function): + ''' + Creates a mnemonic hash based on the provided architecture and opcodes + via disassembling the opcodes and discarding the instruction operands. + ''' + architecture = function['architecture'] + disassembly = function.get('disassembly') + mnemonics, mnemonic_sha256 = self.mnemonic_hash(disassembly) + if None in [mnemonic_sha256, mnemonics]: + return + + db_obj, _ = MnemonicHash.objects.get_or_create(sha256=mnemonic_sha256, + architecture=architecture) + function_id = function['id'] + count = MnemonicHash.objects.filter(sha256=mnemonic_sha256, + architecture=architecture, + functions__func=function_id).count() + + if not count: + func, _ = MnemonicHashFunctions.objects.get_or_create(func=function_id) + db_obj.functions.add(func) + + def _scan(self, opcodes, architecture, apis, disassembly): + '''Returns List of tuples (function ID, similarity percentage)''' + db = self._dbs['first_db'] + mnemonics, mnemonic_sha256 = self.mnemonic_hash(disassembly) + + if None in [mnemonic_sha256, mnemonics]: + return + + try: + db_obj = MnemonicHash.objects.get(sha256=mnemonic_sha256, + architecture=architecture) + except ObjectDoesNotExist: + return None + + results = [] + for f in db_obj.functions.all(): + similarity = 75.0 + function_id = f.func + function = db.find_function(_id=function_id) + + if (not function) or (not function.metadata.count()): + continue + + # The APIs will count up to 10% of the similarity score + total_apis = function.apis.count() + if total_apis: + func_apis = {x['api'] for x in function.apis.values('api')} + overlap = float(len(func_apis.intersection(apis))) + similarity += (overlap / total_apis) * 10 + + else: + similarity += 5 + + results.append(FunctionResult(str(function_id), similarity)) + + return results + + def _install(self): + try: + from django.core.management import execute_from_command_line + except ImportError: + # The above import may fail for some other reason. Ensure that the + # issue is really that Django is missing to avoid masking other + # exceptions on Python 2. + try: + import django + except ImportError: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) + raise + execute_from_command_line(['manage.py', 'makemigrations', 'engines']) + execute_from_command_line(['manage.py', 'migrate', 'engines']) + + def _uninstall(self): + print 'Manually delete tables associated with {}'.format(self.engine_name) diff --git a/server/first/engines/results.py b/server/first_core/engines/results.py similarity index 90% rename from server/first/engines/results.py rename to server/first_core/engines/results.py index afbe89c..5e3a6db 100644 --- a/server/first/engines/results.py +++ b/server/first_core/engines/results.py @@ -5,6 +5,7 @@ # Last Modified: August 2016 # #------------------------------------------------------------------------------- +from first_core.util import make_id class Result(object): '''Abstract class to encapsulate results returned from Engines''' @@ -87,23 +88,19 @@ class FunctionResult(Result): This Result class is crafted for general engines that want to return a list of functions to the EngineManager - ID values are 25 hex character string. For metadata created by users, - not engines, the most significant bit is not set. + ID values are 26 hex character string. For metadata created by users, + not engines, the flag byte not set. ''' def _get_metadata(self, db): if not hasattr(self, '_metadata'): - func = db.find_function(_id=self.id) - if not func: - return None - - self._metadata = func.metadata + self._metadata = list(db.get_function_metadata(self.id)) self._metadata.sort(key=lambda x: x.rank) data = None if len(self._metadata) > 0: metadata = self._metadata.pop() data = metadata.dump() - data['id'] = '0{}'.format(metadata.id) + data['id'] = make_id(0, metadata=metadata.id) return data @@ -119,9 +116,11 @@ class EngineResult(Result): ''' def _init(self, **kwargs): self._data = None + self._metadata = 0 + if 'data' in kwargs: self._data = kwargs['data'] - self._data['id'] = '8{}'.format(self.id) + self._data['id'] = make_id(1, self._metadata, self.id) def _get_metadata(self, db): data = self._data diff --git a/server/first/engines/skeleton.py_ b/server/first_core/engines/skeleton.py_ similarity index 96% rename from server/first/engines/skeleton.py_ rename to server/first_core/engines/skeleton.py_ index e005e82..2954768 100644 --- a/server/first/engines/skeleton.py_ +++ b/server/first_core/engines/skeleton.py_ @@ -20,8 +20,8 @@ # FIRST Modules -from first.error import FIRSTError -from first.engines import AbstractEngine +from first_core.error import FIRSTError +from first_core.engines import AbstractEngine # Third Party Modules diff --git a/server/first/error.py b/server/first_core/error.py similarity index 100% rename from server/first/error.py rename to server/first_core/error.py diff --git a/server/first_core/models.py b/server/first_core/models.py new file mode 100644 index 0000000..cdfb45b --- /dev/null +++ b/server/first_core/models.py @@ -0,0 +1,27 @@ +#------------------------------------------------------------------------------- +# +# FIRST Django ORM Models +# Copyright (C) 2017 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------- + + +# Python Modules +from __future__ import unicode_literals + +# FIRST Modules +from www.models import * diff --git a/server/first_core/util.py b/server/first_core/util.py new file mode 100644 index 0000000..5b49269 --- /dev/null +++ b/server/first_core/util.py @@ -0,0 +1,110 @@ +#------------------------------------------------------------------------------- +# +# FIRST Utility and Helper Functions +# Copyright (C) 2017 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------- + + +def make_id(flags, metadata=0, engine=0): + '''Creates an unique ID for client use. + + Args: + flag (:obj:`int`): Value between 0 and 255. + MSB set when ID is from an engine. + metadata (:obj:`int`, optional): The Metadata model ID + engine (:obj:`int`, optional): The Engine model ID + + Returns: + string: A 26 byte hex string + ''' + data = [flags, metadata, engine] + if (None in data) or (not all([type(x) in [int, long] for x in data])): + return None + + if ((engine > (2**32 - 1)) or (metadata > (2**64 - 1)) + or (flags > (2**8 - 1))): + return None + + return '{:02x}{:08x}{:016x}'.format(flags, engine, metadata) + + +def parse_id(_id): + if type(_id) in [str, unicode]: + if len(_id) != 26: + return (None, None, None) + + _id = int(_id, 16) + + elif type(id) not in [int, long]: + return (None, None, None) + + flag = _id >> (8 * 12) + engine_data = (_id >> (8 * 8)) & (0xFFFFFFFF) + metadata_id = _id & 0xFFFFFFFFFFFFFFFF + + return (flag, engine_data, metadata_id) + +def separate_metadata(ids): + '''Returns parsed IDs for user and engine generated metadata. + + Args: + ids (:obj:`list`): List of 26 hex strings + metadata (:obj:`int`, optional): The Metadata model ID + engine (:obj:`int`, optional): The Engine model ID + + Returns: + tuple: Index 0 contains user created metadata IDs + Index 1 contains engine created metadata details + ''' + # ID: Flag Byte | Engine 4 bytes | Metadata 8 bytes = 13 bytes + # 26 ASCII characters + # If Flag is set then more processing is needed and it is not + # metadata created by the user + user_metadata = [] + engine_metadata = [] + for x in ids: + flag, engine_data, metadata_id = parse_id(x) + if None in [flag, engine_data, metadata_id]: + continue + + if not flag: + user_metadata.append(metadata_id) + else: + engine_metadata.append((flag, engine_data, metadata_id)) + + return (user_metadata, engine_metadata) + +def is_user_metadata(_id): + details = parse_id(_id) + if None in details: + return False + + if not details[0]: + return True + + return False + +def is_engine_metadata(_id): + details = parse_id(_id) + if None in details: + return False + + if details[0]: + return True + + return False diff --git a/server/rest/urls.py b/server/rest/urls.py index d75b467..013f6f4 100644 --- a/server/rest/urls.py +++ b/server/rest/urls.py @@ -20,8 +20,7 @@ views.metadata_unapplied, name='metadata_unapplied'), url(r'^metadata/get/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})$', views.metadata_get, name='metadata_get'), - # TODO: migrate to ids with 25 characters - url(r'^metadata/delete/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})/(?i)(?P<_id>[A-F\d]{24,25})$', + url(r'^metadata/delete/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})/(?i)(?P<_id>[A-F\d]{26})$', views.metadata_delete, name='metadata_delete'), url(r'^metadata/created/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})$', views.metadata_created, name='metadata_created'), diff --git a/server/rest/views.py b/server/rest/views.py index c3b0be1..f38fb36 100644 --- a/server/rest/views.py +++ b/server/rest/views.py @@ -12,14 +12,15 @@ from django.views.decorators.http import require_GET, require_POST # FIRST Modules -from first import DBManager, EngineManager -from first.auth import verify_api_key, Authentication, FIRSTAuthError, \ +from first_core import DBManager, EngineManager +from first_core.util import make_id, is_engine_metadata +from first_core.auth import verify_api_key, Authentication, FIRSTAuthError, \ require_login, require_apikey MAX_FUNCTIONS = 20 MAX_METADATA = 20 -VALIDATE_IDS = lambda x: re.match('^[a-f\d]{24,25}$', x) +VALIDATE_IDS = lambda x: re.match('^[A-Fa-f\d]{26}$', x) #----------------------------------------------------------------------------- # @@ -217,8 +218,7 @@ def metadata_add(request, md5_hash, crc32, user): f = functions[client_key] # Check if the id sent back is from an engine, if so skip it - if (('id' in f) and (f['id']) and (len(f['id']) == 25) - and ((int(f['id'][0]) >> 3) & 1)): + if (('id' in f) and (f['id']) and is_engine_metadata(f['id'])): continue; function = db.get_function(create=True, **f) @@ -238,13 +238,14 @@ def metadata_add(request, md5_hash, crc32, user): 'function in FIRST')}) # The '0' indicated the metadata_id is from a user. - results[client_key] = '0{}'.format(metadata_id) + _id = make_id(0, metadata=metadata_id) + results[client_key] = _id # Set the user as applying the metadata - db.applied(sample, user, metadata_id) + db.applied(sample, user, _id) # Send opcode to EngineManager - EngineManager.add(function.dump()) + EngineManager.add(function.dump(True)) return HttpResponse(json.dumps({'failed' : False, 'results' : results})) @@ -544,26 +545,15 @@ def metadata_status_change(_id, user, md5_hash, crc32, applied): return render(None, 'rest/error_json.html', {'msg' : 'Invalid metadata information'}) - # Currently 24-25, early beta used a 24 byte string, moved to 25 byte one - # TODO: Change to 25 only once it is closed beta time if not VALIDATE_IDS(_id): return render(None, 'rest/error_json.html', {'msg' : 'Invalid id value'}) - metadata_id = _id - if len(_id) == 25: - metadata_id = _id[1:] - db = DBManager.first_db if not db: return render(None, 'rest/error_json.html', {'msg' : 'Unable to connect to FIRST DB'}) - is_engine = False - if ((len(_id) == 25) and (int(_id[0], 16) & 0x8)): - # Metadata came from an engine - is_engine = True - # Get sample sample = db.get_sample(md5_hash, crc32) if not sample: @@ -571,8 +561,8 @@ def metadata_status_change(_id, user, md5_hash, crc32, applied): {'msg' : 'Sample does not exist in FIRST'}) if applied: - results = db.applied(sample, user, metadata_id, is_engine) + results = db.applied(sample, user, _id) else: - results = db.unapplied(sample, user, metadata_id, is_engine) + results = db.unapplied(sample, user, _id) return HttpResponse(json.dumps({'failed' : False, 'results' : results})) diff --git a/server/utilities/__init__.py b/server/utilities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/utilities/engine_shell.py b/server/utilities/engine_shell.py index bb68cf6..80059a1 100644 --- a/server/utilities/engine_shell.py +++ b/server/utilities/engine_shell.py @@ -30,10 +30,15 @@ sys.path.append(os.path.abspath('..')) # FIRST Modules +import first.wsgi import first.settings -from first.models import Engine, User -from first.engines import AbstractEngine -from first import DBManager, EngineManager +from first_core.engines import AbstractEngine +from first_core.disassembly import Disassembly +from first_core import DBManager, EngineManager +from first_core.models import Engine, User, Function + +# Third Party Modules +from django.core.paginator import Paginator class EngineCmd(Cmd): @@ -95,7 +100,7 @@ def do_list(self, line): print 'No engines are currently installed' return - for engine in Engine.objects: + for engine in Engine.objects.all(): name = engine.name description = engine.description print '+{}+{}+'.format('-' * 18, '-' * 50) @@ -139,7 +144,7 @@ def do_install(self, line): try: path, obj_name, email = line.split(' ') - developer = User.objects(email=email).get() + developer = User.objects.get(email=email) __import__(path) module = sys.modules[path] @@ -160,9 +165,11 @@ def do_install(self, line): return e.install() - engine = Engine(name=e.name, description=e.description, path=path, - obj_name=obj_name, developer=developer, active=True) - engine.save() + engine = Engine.objects.create( name=e.name, + description=e.description, + path=path, + obj_name=obj_name, + developer=developer, active=True) print 'Engine added to FIRST' return @@ -239,7 +246,7 @@ def do_populate(self, line): engines = [] for engine_name in populate_engines: if engine_name not in all_engines: - print '[Error] Engine "{}" is not installed' + print '[Error] Engine "{}" is not installed'.format(engine_name) continue engines.append(all_engines[engine_name]) @@ -249,7 +256,7 @@ def do_populate(self, line): return print 'Starting to populate engines:\n-\t{}'.format('\n-\t'.join([e.name for e in engines])) - functions = db.get_all_functions() + functions = db.get_all_functions().order_by('pk') total = functions.count() msg = ' [Status] {0:.2f}% Completed ({1} out of {2})\r' @@ -258,12 +265,16 @@ def do_populate(self, line): offset = 0 limit = 500 - for j in xrange(0, total, limit): - functions = db.get_all_functions().skip(j).limit(limit) + paginator = Paginator(functions, 100) + for j in paginator.page_range: + functions = paginator.page(j) for function in functions: - details = function.dump() - del details['metadata'] + details = function.dump(True) + + dis = Disassembly(details['architecture'], details['opcodes']) + if dis: + details['disassembly'] = dis for engine in engines: try: @@ -286,7 +297,7 @@ def do_populate(self, line): print 'The below errors occured:\n{}'.format('\n '.join(errors)) def _get_db_engine_obj(self, name): - engine = Engine.objects(name=name) + engine = Engine.objects.filter(name=name) if not engine: print 'Unable to locate Engine "{}"'.format(name) return @@ -294,7 +305,7 @@ def _get_db_engine_obj(self, name): if len(engine) > 1: print 'More than one engine "{}" exists'.format(name) for e in engine: - print ' - {}'.format(e.name) + print ' - {}: {}'.format(e.name, e.description) return diff --git a/server/utilities/migrate_data.py b/server/utilities/migrate_data.py new file mode 100644 index 0000000..a9d3ca0 --- /dev/null +++ b/server/utilities/migrate_data.py @@ -0,0 +1,245 @@ +#!/usr/bin/python +import json +import os +import base64 +import binascii +import argparse + +def main(prefix): + + # Declarations for ID counters + function_ids = {} + function_id_counter = 1 + + apis_ids = {} + apis_id_counter = 1 + + user_ids = {} + user_id_counter = 1 + + sample_ids = {} + sample_id_counter = 1 + + engine_ids = {} + engine_id_counter = 1 + + metadata_details_ids = {} + metadata_details_id_counter = 1 + + metadata_id_counter = 1 + + # Collections used to keep temporary data + applied_metadata_temp = [] + + # User + with open(os.path.join(prefix, "user.json"), "r") as f: + with open(os.path.join(prefix, "User"), "w") as f_out: + for l in f: + d = json.loads(l.strip()) + f_out.write(("0|%s|%s|%s|%d|%s|%s|%d|%d|%s|%s\n" % (d['name'], + d['email'], + d['handle'], + d['number'], + binascii.hexlify(base64.b64decode(d['api_key']["$binary"])).lower(), + str(d['created']['$date'])[:-5] + "Z", + int(d['rank']['$numberLong']), + 1 if d['active'] else 0, + d['service'], + d['auth_data'])).encode('UTF-8')) + user_ids[d["_id"]["$oid"]] = user_id_counter + user_id_counter += 1 + + # Functions, Function APIs, Metadata + with open(os.path.join(prefix, "function.json"), "r") as f: + f_FunctionApis = open(os.path.join(prefix, "FunctionApis"), "w") + f_Function = open(os.path.join(prefix, "Function"), "w") + f_Function_apis = open(os.path.join(prefix, "Function_apis"), "w") + f_Metadata = open(os.path.join(prefix, "Metadata"), "w") + f_Function_metadata = open(os.path.join(prefix, "Function_metadata"), "w") + f_MetadataDetails = open(os.path.join(prefix, "MetadataDetails"), "w") + f_Metadata_details = open(os.path.join(prefix, "Metadata_details"), "w") + + # We need to keep track of unique functions, otherwise we might + # insert repeated records in the CSV. + unique_functions = {} + + # Keep track of unique metadata details, to avoid repetitions + unique_metadata_details = {} + + for l in f: + d = json.loads(l.strip()) + + opcodes_text = binascii.hexlify(base64.b64decode(d["opcodes"]["$binary"])).upper() + + if (d['sha256'], d['architecture']) not in unique_functions: + # Add new function + unique_functions[(d['sha256'], d['architecture'])] = function_id_counter + f_Function.write(("0|%s|%s|%s\n") % (d["sha256"], opcodes_text, d["architecture"])) + # Map of function_ids + function_ids[d["_id"]["$oid"]] = function_id_counter + function_id_counter += 1 + else: + # Duplicate function, reuse previous function id, but consider its linked data + function_ids[d["_id"]["$oid"]] = unique_functions[(d['sha256'], d['architecture'])] + #print("Discarding duplicate function... Reusing id %d" % function_ids[d["_id"]["$oid"]]) + + if "apis" in d: + for a in d["apis"]: + if a not in apis_ids: + apis_ids[a] = apis_id_counter + apis_id_counter += 1 + f_FunctionApis.write("0|%s\n" % (a)) + f_Function_apis.write("0|%d|%d\n" % (function_ids[d["_id"]["$oid"]], apis_ids[a])) + + if "metadata" in d: + # 0 - N Metadata records, each record is associated to a User and Function, + # and each Metadata record can be associated to several MetadataDetails. + for m in d["metadata"]: + # Get user id + if "user" in m and "$oid" in m["user"] and m["user"]["$oid"] in user_ids: + user_id = user_ids[m["user"]["$oid"]] + else: + user_id = 0 + + # This is an 1-N relationship between Metadata and User + f_Metadata.write("0|%d\n" % (user_id)) + # This an N-M relationship between Function and Metadata + f_Function_metadata.write("0|%d|%d\n" % (function_ids[d["_id"]["$oid"]], metadata_id_counter)) + + # Store temporarly the Applied relationship (N-M) between User, Metadata, and Sample + # We temporarily store the oid because we don't have the mapped ids yet. + if "applied" in m: + for application in m["applied"]: + applied_metadata_temp.append((metadata_id_counter, application[0], application[1])) + + if metadata_id_counter not in unique_metadata_details: + unique_metadata_details[metadata_id_counter] = [] + + # Metadata details (name, comment, committed, prototype) + nb_details = max(len(m.get("name", [])), len(m.get("comment", [])),len(m.get("committed", [])),len(m.get("prototype", []))) + for i in range(0, nb_details): + name = m["name"][i] if "name" in m and (len(m["name"]) > i) else "" + comment = m["comment"][i] if "comment" in m and (len(m["comment"]) > i) else "" + committed = m["committed"][i]["$date"][:-5] + "Z" if "committed" in m and (len(m["committed"]) > i) else "" + prototype = m["prototype"][i] if "prototype" in m and (len(m["prototype"]) > i) else "" + + # We consider only unique entries. Unique by: name, comment, prototype and metadata_id + # where metadata_id represents each unique (User,Function) tuple. + if (name, comment, prototype) not in unique_metadata_details[metadata_id_counter]: + unique_metadata_details[metadata_id_counter].append((name, comment, prototype)) + f_MetadataDetails.write("0|%s|%s|%s|%s\t\n" % (name, prototype, comment, committed)) + f_Metadata_details.write(("0|%d|%d\n" % (metadata_id_counter, metadata_details_id_counter))) + metadata_details_id_counter += 1 + + metadata_id_counter += 1 + + f_FunctionApis.close() + f_Function.close() + f_Function_apis.close() + f_Metadata.close() + f_Function_metadata.close() + f_MetadataDetails.close() + f_Metadata_details.close() + + # Sample + + sample_seen_by = [] + sample_functions = {} + + with open(os.path.join(prefix, "sample.json"), "r") as f: + f_Sample = open(os.path.join(prefix, "Sample"), "w") + f_Sample_seen_by = open(os.path.join(prefix, "Sample_seen_by"), "w") + + for l in f: + d = json.loads(l.strip()) + + if isinstance(d['crc32'], dict) and "$numberLong" in d['crc32']: + d['crc32'] = int(d['crc32']['$numberLong']) + if not 'sha1' in d: + d['sha1'] = "" + if not 'sha256' in d: + d['sha256'] = "" + + f_Sample.write("0|%s|%d|%s|%s|%s\n" % (d['md5'], d['crc32'], d['sha1'], d['sha256'], str(d['last_seen']['$date'])[:-5] + "Z")) + + # Seen by + for l in d['seen_by']: + if l['$oid'] in user_ids: + f_Sample_seen_by.write("0|%d|%d\n" % (sample_id_counter, user_ids[l['$oid']])) + + if sample_id_counter not in sample_functions: + sample_functions[sample_id_counter] = [] + + # Functions + for l in d['functions']: + if l['$oid'] in function_ids: + if function_ids[l['$oid']] not in sample_functions[sample_id_counter]: + sample_functions[sample_id_counter].append(function_ids[l['$oid']]) + + sample_ids[d["_id"]["$oid"]] = sample_id_counter + sample_id_counter += 1 + + f_Sample.close() + f_Sample_seen_by.close() + + f_Sample_functions = open(os.path.join(prefix, "Sample_functions"), "w") + for sid in sample_functions: + for fid in sample_functions[sid]: + f_Sample_functions.write("0|%d|%d\n" % (sid, fid)) + f_Sample_functions.close() + + # Engine + with open(os.path.join(prefix, "engine.json"), "r") as f: + f_Engine = open(os.path.join(prefix, "Engine"), "w") + for l in f: + d = json.loads(l.strip()) + + if 'developer' in d and '$oid' in d['developer'] and d['developer']['$oid'] in user_ids: + developer_id = user_ids[d['developer']['$oid']] + else: + developer_id = 0 + + f_Engine.write("0|%s|%s|%s|%s|%d|%d\n" % (d['name'], d['description'], d['path'], d['obj_name'], 1 if d['active'] else 0, developer_id)) + engine_ids[d["_id"]["$oid"]] = engine_id_counter + engine_id_counter += 1 + + # Applied metadata + + f = open(os.path.join(prefix, "AppliedMetadata"), "w") + for metadata_id, sample_oid, user_oid in applied_metadata_temp: + f.write("0|%d|%d|%d\n" % (metadata_id, sample_ids[sample_oid], user_ids[user_oid])) + f.close() + +if __name__ == "__main__": + description = """Convert mongoexport generated JSON Files into MySQL import CSV files. + + Expected input files: + + function.json + sample.json + engine.json + user.json + + These files should be generated by running the following commands over the + mongo database: + + mongoexport -d [database name] -c function -o function.json + mongoexport -d [database name] -c sample -o sample.json + mongoexport -d [database name] -c engine -o function.json + mongoexport -d [database name] -c user -o user.json + + Finally, the generated files can be imported into MySQL by running the mysql queries + in mysql_import.sql, from the directory where the output files were generated. + + mysql --user [user] --password --host [host] < /path/to/mysql_import.sql + + WARNING: These MySQL script requires the database tables to be created before-hand: + See FIRST-server documentation to understand how to generate and apply + the corresponding Django migrations. + WARNING: This script handles function duplications, so the number of functions + in the mongo export and the resulting MySQL database might vary. + """ + parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument('path', type=str, help='The path where the input json files (see --help) are located, and where the output files will be generated.') + args = parser.parse_args() + main(args.path) diff --git a/server/utilities/mongo_to_django_orm.py b/server/utilities/mongo_to_django_orm.py new file mode 100644 index 0000000..4139872 --- /dev/null +++ b/server/utilities/mongo_to_django_orm.py @@ -0,0 +1,297 @@ +#! /usr/bin/python +#------------------------------------------------------------------------------- +# +# FIRST MongoDB to Django ORM Conversion Script +# Copyright (C) 2016 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Requirements +# ------------ +# mongoengine (https://pypi.python.org/pypi/mongoengine/) +# +# +#------------------------------------------------------------------------------- + +# Python Modules +import os +import sys +import time +import datetime +from getpass import getpass +from argparse import ArgumentParser + +# DEBUG +from pprint import pprint +import gc + +# FIRST Modules +import first_core.models as ORM + +# Third Party Modules +from bson import Binary +from bson.objectid import ObjectId +import mongoengine +from mongoengine import Document, StringField, UUIDField, \ + DateTimeField, LongField, ReferenceField, \ + BinaryField, ListField, BooleanField, ObjectIdField, \ + IntField, EmbeddedDocument, EmbeddedDocumentListField +from django.core.paginator import Paginator, EmptyPage + +def info(): + print 'INFO: {} {}'.format(len(gc.get_objects()), sum([sys.getsizeof(o) for o in gc.get_objects()])) + +def migrate_users(): + for u in User.objects.all(): + user, created = ORM.User.objects.get_or_create(**u.dump()) + +def migrate_engines(): + for e in Engine.objects.all(): + engine = e.dump() + engine['developer'] = ORM.User.objects.get(email=e.developer.email) + engine = ORM.Engine.objects.create(**engine) + +def migrate_samples(): + paginator = Paginator(Sample.objects.all(), 100) + for s in Sample.objects.all().exclude('functions').select_related(): + sample, created = ORM.Sample.objects.get_or_create(**s.dump()) + for u in s.seen_by: + sample.seen_by.add(ORM.User.objects.get(email=u.email)) + +def migrate_functions(skip, limit): + i = 0 + for f in Function.objects.skip(skip).limit(limit).select_related(3): + function, created = ORM.Function.objects.get_or_create(**f.dump()) + # Convert Functions + if created: + # Add APIs to function + migrate_apis(function, f) + + # Add to samples + for s in Sample.objects.only('md5', 'crc32').filter(functions=f.id): + ORM.Sample.objects.get(md5=s.md5, crc32=s.crc32).functions.add(function) + #sample = ORM.Sample.objects.get(md5=s.md5, crc32=s.crc32) + #sample.functions.add(function) + + # Add metadata assocaited with the function + migrate_metadata(function, f) + + i += 1 + if 0 == (i % 1000): + print '---{}---'.format(i) + info() + gc.collect() + info() + +def _mf(): + for i in xrange(0, Function.objects.count(), 1000): + print '--{}'.format(i) + migrate_functions(i, 1000) + + if i % 20000 == 0: + info() + gc.collect() + info() + +def migrate_apis(function, f): + for a in f.apis: + api, _ = ORM.FunctionApis.objects.get_or_create(api=a) + function.apis.add(api) + + gc.collect() + +def migrate_metadata(function, f): + print 'Metadata: {} - {}'.format(f.sha256, len(f.metadata)) + for m in f.metadata: + creator = ORM.User.objects.get(email=m.user.email) + metadata = ORM.Metadata.objects.create(user=creator) + function.metadata.add(metadata) + + # Convert Metadata Details + for d in m.details(): + details = ORM.MetadataDetails.objects.create(**d) + metadata.details.add(details) + + # Convert Metadata Applied + for s_id, u_id in m.applied: + s_ = Sample.objects.only('md5', 'crc32').get(pk=s_id) + u = User.objects.only('email').get(pk=u_id) + sample_ = ORM.Sample.objects.get(md5=s_.md5, crc32=s_.crc32) + user_ = ORM.User.objects.get(email=u.email) + ORM.AppliedMetadata.objects.create(metadata=metadata, + user=user_, + sample=sample_) + +def main(args): + pass_prompt = 'Enter MongoDB password for {}: '.format(args.user) + mongoengine.connect(args.mongo_db, + host=args.mongo_host, + port=args.mongo_port, + user=args.mongo_user, + password=getpass(pass_prompt)) + # Convert User + print ' + Adding Users' + start = time.time() + migrate_users() + print '[+] Users Added ({} s)'.format(time.time() - start) + + # Convert Engine + print ' + Adding Engines' + start = time.time() + migrate_engines() + print '[+] Adding Engines ({} s)'.format(time.time() - start) + + # Convert Samples + print ' + Adding Samples' + start = time.time() + migrate_samples() + print '[+] Adding Samples ({} s)'.format(time.time() - start) + + # Convert Functions and their Metadata + print ' + Adding Functions & Metadata' + start = time.time() + _mf() + print '[+] Adding Functions & Metadata ({} s)'.format(time.time() - start) + + + + +#------------------------------------------------------------------------------- +# MongoDB Models +# FIRST v0.0.1 +#------------------------------------------------------------------------------- +class User(Document): + name = StringField(max_length=128, required=True) + email = StringField(max_length=254, unique=True) + handle = StringField(max_length=32, required=True) + number = IntField(required=True) + api_key = UUIDField(required=True, unique=True) + created = DateTimeField(default=datetime.datetime.utcnow, required=True) + rank = LongField(default=0) + active = BooleanField(default=True) + + service = StringField(max_length=16, required=True) + auth_data = StringField(max_length=4096, required=True) + + meta = { + 'indexes' : [('handle', 'number'), 'api_key', 'email'] + } + + def dump(self): + return {'name' : self.name, + 'email' : self.email, + 'handle' : self.handle, + 'number' : self.number, + 'api_key' : self.api_key, + 'created' : self.created, + 'rank' : self.rank, + 'active' : self.active} + + +class Engine(Document): + name = StringField(max_length=16, required=True, unique=True) + description = StringField(max_length=128, required=True) + path = StringField(max_length=256, required=True) + obj_name = StringField(max_length=32, required=True) + applied = ListField(default=list) + developer = ReferenceField(User) + active = BooleanField(default=False) + + meta = { + 'indexes' : ['name'] + } + + def dump(self): + return {'name' : self.name, + 'description' : self.description, + 'path' : self.path, + 'obj_name' : self.obj_name, + 'developer' : self.developer, + 'active' : self.active} + + +class Metadata(EmbeddedDocument): + id = ObjectIdField(required=True, default=lambda: ObjectId()) + user = ReferenceField(User) + name = ListField(StringField(max_length=128), default=list) + prototype = ListField(StringField(max_length=256), default=list) + comment = ListField(StringField(max_length=512), default=list) + committed = ListField(DateTimeField(), default=list) + applied = ListField(default=list) + + meta = { + 'indexes' : ['user'] + } + + def details(self): + return [{'committed' : self.committed[i], + 'name' : self.name[i], + 'prototype' : self.prototype[i], + 'comment' : self.comment[i]} for i in xrange(len(self.name))] + +# Use bson.Binary to insert binary data +class Function(Document): + sha256 = StringField(max_length=64) + opcodes = BinaryField() + apis = ListField(StringField(max_length=128), default=list) + metadata = EmbeddedDocumentListField(Metadata, default=list) + architecture = StringField(max_length=64, required=True) + + meta = { + 'indexes' : [] + } + + def dump(self): + return {'opcodes' : Binary(self.opcodes), + 'architecture' : self.architecture, + 'sha256' : self.sha256} + + +class Sample(Document): + md5 = StringField(max_length=32, required=True) + crc32 = IntField(required=True) + sha1 = StringField(max_length=40) + sha256 = StringField(max_length=64) + seen_by = ListField(ReferenceField(User), default=list) + functions = ListField(ReferenceField(Function), default=list) + last_seen = DateTimeField(default=datetime.datetime.utcnow) + + meta = { + 'indexes' : [('md5', 'crc32')] + } + + def dump(self): + data = {'md5' : self.md5, 'crc32' : self.crc32} + + if hasattr(self, 'sha1'): + data['sha1'] = self.sha1 + + if hasattr(self, 'sha256'): + data['sha256'] = self.sha256 + + return data + +if __name__ == '__main__': + parser = ArgumentParser(('FIRST Mongo to Django ORM Conversion Script\n' + 'This script should be used to convert FIRST v0.0.1 to FIRST v0.1.0\n' + )) + + # Arguments + parser.add_argument('--mongo-host', '--host', help='The MongoDB host') + parser.add_argument('--mongo-port', '-p', help='The MongoDB port', type=int) + parser.add_argument('--mongo-user', '-u', help='The MongoDB user') + parser.add_argument('--mongo-db', '-d', help='The MongoDB db name') + + main(parser.parse_args()) diff --git a/server/utilities/mysql_import.sql b/server/utilities/mysql_import.sql new file mode 100644 index 0000000..0dfaf42 --- /dev/null +++ b/server/utilities/mysql_import.sql @@ -0,0 +1,66 @@ +USE first_db; + +DELETE FROM AppliedMetadata; +ALTER TABLE AppliedMetadata AUTO_INCREMENT = 1; + +DELETE FROM Metadata_details; +ALTER TABLE Metadata_details AUTO_INCREMENT = 1; + +DELETE FROM Function_metadata; +ALTER TABLE Function_metadata AUTO_INCREMENT = 1; + +DELETE FROM MetadataDetails; +ALTER TABLE MetadataDetails AUTO_INCREMENT = 1; + +DELETE FROM Metadata; +ALTER TABLE Metadata AUTO_INCREMENT = 1; + +DELETE FROM Engine; +ALTER TABLE Engine AUTO_INCREMENT = 1; + +DELETE FROM Sample_seen_by; +ALTER TABLE Sample_seen_by AUTO_INCREMENT = 1; + +DELETE FROM Sample_functions; +ALTER TABLE Sample_functions AUTO_INCREMENT = 1; + +DELETE FROM Function_apis; +ALTER TABLE Function_apis AUTO_INCREMENT = 1; + +DELETE FROM FunctionApis; +ALTER TABLE FunctionApis AUTO_INCREMENT = 1; + +DELETE FROM Sample; +ALTER TABLE Sample AUTO_INCREMENT = 1; + +DELETE FROM Function; +ALTER TABLE Function AUTO_INCREMENT = 1; + +DELETE FROM User; +ALTER TABLE User AUTO_INCREMENT = 1; + +LOAD DATA LOCAL INFILE "FunctionApis" INTO TABLE FunctionApis COLUMNS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE "Function" INTO TABLE Function FIELDS TERMINATED BY "|" (id, sha256, @var1, architecture) SET opcodes = UNHEX(@var1); + +LOAD DATA LOCAL INFILE"Function_apis" INTO TABLE Function_apis FIELDS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE "User" INTO TABLE User FIELDS TERMINATED BY "|" (id, name, email, handle, number, api_key, @var1, rank, active, service, auth_data) SET created = STR_TO_DATE(@var1, '%Y-%m-%dT%H:%i:%SZ'); + +LOAD DATA LOCAL INFILE"Sample" INTO TABLE Sample FIELDS TERMINATED BY "|" (id, md5, crc32, sha1, sha256, @var1) SET last_seen = STR_TO_DATE(@var1, '%Y-%m-%dT%H:%i:%SZ'); + +LOAD DATA LOCAL INFILE"Sample_functions" INTO TABLE Sample_functions FIELDS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE"MetadataDetails" INTO TABLE MetadataDetails FIELDS TERMINATED BY "|" LINES TERMINATED BY "\t\n" (id, name, prototype, comment, @var1) SET committed = STR_TO_DATE(@var1, '%Y-%m-%dT%H:%i:%SZ'); + +LOAD DATA LOCAL INFILE"Sample_seen_by" INTO TABLE Sample_seen_by FIELDS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE"Engine" INTO TABLE Engine FIELDS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE"Metadata" INTO TABLE Metadata FIELDS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE"Metadata_details" INTO TABLE Metadata_details FIELDS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE"AppliedMetadata" INTO TABLE AppliedMetadata FIELDS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE"Function_metadata" INTO TABLE Function_metadata FIELDS TERMINATED BY "|"; diff --git a/server/utilities/populate_engine.py b/server/utilities/populate_engine.py deleted file mode 100644 index b26cc2b..0000000 --- a/server/utilities/populate_engine.py +++ /dev/null @@ -1,90 +0,0 @@ -#------------------------------------------------------------------------------- -# -# Sends all function data to engine for it to be processed by engine -# Copyright (C) 2016 Angel M. Villegas -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Requirements -# ------------ -# Flask's mongoengine (https://pypi.python.org/pypi/flask-mongoengine/) -# -#------------------------------------------------------------------------------- - -# Python Modules -from argparse import ArgumentParser - -# FIRST Modules -from ..app.first import EngineManager, DBManager - -def main(): - global total, completed, operation_complete - - parser = ArgumentParser(description='Populate engine\'s metadata.') - parser.add_argument('engines', metavar='E', type=str, nargs='+', - help='an engine name to populate') - - args = parser.parse_args() - - db = DBManager.first_db - if not db: - print '[Error] Unable to connect to FIRST DB, exiting...' - return - - # Get all engines the user entered - all_engines = EngineManager.get_engines() - engines = [] - for engine_name in args.engines: - if engine_name not in all_engines: - print '[Error] Engine "{}" is not installed' - continue - - engines.append(all_engines[engine_name]) - - if not engines: - print 'No engines to populate, exiting...' - return - - print 'Starting to populate engines:\n-\t{}'.format('\n-\t'.join([e.name for e in engines])) - functions = db.get_all_functions() - total = len(functions) - - msg = ' [Status] {0:.2f}% Completed ({1} out of {2})' - errors = [] - i = 0.0 - for function in functions: - details = function.dump() - del details['metadata'] - - for engine in engines: - try: - engine.add(details) - - except Exception as e: - msg = '[Error] Engine "{}": {}'.format(engine.name, e) - errors.append(msg) - print msg - - i += 1 - if 0 == (i % 25): - print msg.format((i / total) * 100, int(i), total) - - # Wait for thread to end - print 'Populating engines complete, exiting...' - if errors: - print 'The below errors occured:\n{}'.format('\n'.join(errors)) - -if __name__ == '__main__': - main() diff --git a/server/utilities/user_shell.py b/server/utilities/user_shell.py new file mode 100644 index 0000000..c472eab --- /dev/null +++ b/server/utilities/user_shell.py @@ -0,0 +1,230 @@ +#! /usr/bin/python +#------------------------------------------------------------------------------- +# +# Utility Shell to manage User related operations +# Copyright (C) 2016 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------- +# Python Modules +import re +import os +import sys +from cmd import Cmd +from uuid import uuid4 +from pprint import pprint +from argparse import ArgumentParser + +# Add app package to sys path +sys.path.append(os.path.abspath('..')) + +# FIRST Modules +import first.wsgi +import first.settings +from first_core.disassembly import Disassembly +from first_core.models import User + +# Third Party Modules +from django.core.paginator import Paginator + +class UserCmd(Cmd): + + def __init__(self): + Cmd.__init__(self) + self.prompt = 'FIRST>> ' + + def emptyline(self): + '''Prevent the resubmission of the last command''' + return + + def default(self, line): + print '"{}" is unknown command'.format(line) + + def preloop(self): + print ( '\n\n' + '+========================================================+\n' + '| FIRST User Shell Menu |\n' + '+========================================================+\n' + '| list | List all users currently installed |\n' + '| info | Get info on an user |\n' + '| adduser | Registers a user manually |\n' + '| enable | Enable user |\n' + '| disable | Disable user account |\n' + '+--------------------------------------------------------+\n') + + def postcmd(self, stop, line): + if not stop: + self.preloop() + return stop + + def do_back(self, line): + '''Step out of current shell''' + return 1 + + def do_exit(self, line): + '''Exit shell''' + sys.exit(0) + + def do_quit(self, line): + '''Exit shell''' + sys.exit(0) + + def do_shell(self, line): + '''Run line in python''' + exec line + +class RootCmd(UserCmd): + def do_list(self, line): + print 'list - List all registered users' + if line in ['help', '?']: + print 'Usage: list \n' + return + + print 'Registered Users\n' + if User.objects.count() == 0: + print 'No users are registered' + return + + header = ( '+{}+{}+\n'.format('-' * 39, '-' * 10) + + '| {0:^37} | {1:^8} |\n'.format('User Handle', 'Active') + + '+{}+{}+'.format('-' * 39, '-' * 10)) + i = 0 + for user in User.objects.all(): + handle = user.user_handle + if (i % 15) == 0: + print header + print '| {0:37} | {1:^8} |'.format(handle, user.active) + i += 1 + + print '+{}+{}+'.format('-' * 39, '-' * 10) + + def do_adduser(self, line): + print 'info - Manually add user to FIRST' + if line in ['', 'help', '?']: + print 'Usage: adduser ' + return + + line = line.split(' ') + if len(line) !=2: + print 'The correct arguments were not provided.' + return + + # Verify handle provided is valid + handle, num = self._expand_user_handle(line[0]) + if None in [handle, num]: + return + + if not re.match(r'^[a-zA-Z\d\._]+@[a-zA-Z\d\.\-_]+(?:\.[a-zA-Z]{2,4})+$', line[1]): + print 'Invalid email provided.' + return + + email = line[1] + user = self._get_db_user_obj(line[0]) + if user: + print 'User {} already exists'.format(line[0]) + return + + user = User(email=email, handle=handle, number=num, api_key=uuid4()) + user.name = raw_input('Enter user name: ') + user.save() + + print 'User {0.user_handle} created (api key: {0.api_key})'.format(user) + + + def do_info(self, line): + print 'info - Displays details about a registered User' + if line in ['', 'help', '?']: + print 'Usage: info ' + return + + user = self._get_db_user_obj(line) + if not user: + return + + print ('+' + '-'*65 + '+\n' + '| Name | {0.name:53} |\n' + '+' + '-'*9 + '+' + '-'*55 + '\n' + '| Email | {0.email:53} |\n' + '+' + '-'*9 + '+' + '-'*55 + '\n' + '| Handle | {0.user_handle:53} |\n' + '+' + '-'*9 + '+' + '-'*55 + '\n' + '| Created | {1:53} |\n' + '+' + '-'*9 + '+' + '-'*55 + '\n' + '| Active | {0.active:53} |\n' + '+' + '-'*65 + '+\n').format(user, str(user.created)) + + def do_enable(self, line): + print 'enable - Enable user \n' + if line in ['', 'help', '?']: + print 'Usage: enable ' + return + + user = self._get_db_user_obj(line) + if not user: + return + + user.active = True + user.save() + print 'User "{}" enabled'.format(line) + + def do_disable(self, line): + print 'disable - Disable user \n' + if line in ['', 'help', '?']: + print 'Usage: disable ' + return + + user = self._get_db_user_obj(line) + if not user: + return + + user.active = False + user.save() + print 'User "{}" disabled'.format(line) + + def _expand_user_handle(self, user_handle): + matches = re.match('^([^#]+)#(\d{4})$', user_handle) + if not matches: + print 'The provided handle is invalid' + return (None, None) + + handle, num = matches.groups() + return (handle, int(num)) + + + def _get_db_user_obj(self, line): + handle, num = self._expand_user_handle(line) + if None in [handle, num]: + return + + user = User.objects.filter(handle=handle, number=int(num)) + if not user: + print 'Unable to locate User handle "{}"'.format(line) + return + + return user.get() + + +if __name__ == '__main__': + shell = RootCmd() + if len(sys.argv) > 1: + shell.onecmd(' '.join(sys.argv[1:])) + sys.exit(0) + + while 1: + try: + shell.cmdloop() + except Exception as err: + pprint(err) diff --git a/server/www/migrations/0001_initial.py b/server/www/migrations/0001_initial.py new file mode 100644 index 0000000..6d695b7 --- /dev/null +++ b/server/www/migrations/0001_initial.py @@ -0,0 +1,204 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.18 on 2019-02-11 15:48 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='AppliedMetadata', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ], + options={ + 'db_table': 'AppliedMetadata', + }, + ), + migrations.CreateModel( + name='Engine', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=16, unique=True)), + ('description', models.CharField(max_length=256)), + ('path', models.CharField(max_length=256)), + ('obj_name', models.CharField(max_length=32)), + ('active', models.BooleanField(default=False)), + ], + options={ + 'db_table': 'Engine', + }, + ), + migrations.CreateModel( + name='Function', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('sha256', models.CharField(max_length=64)), + ('opcodes', models.BinaryField()), + ('architecture', models.CharField(max_length=64)), + ], + options={ + 'db_table': 'Function', + }, + ), + migrations.CreateModel( + name='FunctionApis', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('api', models.CharField(max_length=128, unique=True)), + ], + options={ + 'db_table': 'FunctionApis', + }, + ), + migrations.CreateModel( + name='Metadata', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ], + options={ + 'db_table': 'Metadata', + }, + ), + migrations.CreateModel( + name='MetadataDetails', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=256)), + ('prototype', models.CharField(max_length=256)), + ('comment', models.CharField(max_length=512)), + ('committed', models.DateTimeField(default=django.utils.timezone.now)), + ], + options={ + 'db_table': 'MetadataDetails', + }, + ), + migrations.CreateModel( + name='Sample', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('md5', models.CharField(max_length=32)), + ('crc32', models.BigIntegerField()), + ('sha1', models.CharField(blank=True, max_length=40, null=True)), + ('sha256', models.CharField(blank=True, max_length=64, null=True)), + ('last_seen', models.DateTimeField(blank=True, default=django.utils.timezone.now)), + ], + options={ + 'db_table': 'Sample', + }, + ), + migrations.CreateModel( + name='User', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('name', models.CharField(max_length=128)), + ('email', models.CharField(max_length=254)), + ('handle', models.CharField(max_length=32)), + ('number', models.IntegerField()), + ('api_key', models.UUIDField(unique=True)), + ('created', models.DateTimeField(default=django.utils.timezone.now)), + ('rank', models.BigIntegerField(default=0)), + ('active', models.BooleanField(default=True)), + ('service', models.CharField(max_length=16)), + ('auth_data', models.CharField(max_length=32768)), + ], + options={ + 'db_table': 'User', + }, + ), + migrations.AddIndex( + model_name='user', + index=models.Index(fields=['email'], name='User_email_ffa2e0_idx'), + ), + migrations.AddIndex( + model_name='user', + index=models.Index(fields=['api_key'], name='User_api_key_c4f2d6_idx'), + ), + migrations.AlterIndexTogether( + name='user', + index_together=set([('handle', 'number')]), + ), + migrations.AddField( + model_name='sample', + name='functions', + field=models.ManyToManyField(to='www.Function'), + ), + migrations.AddField( + model_name='sample', + name='seen_by', + field=models.ManyToManyField(to='www.User'), + ), + migrations.AddField( + model_name='metadata', + name='details', + field=models.ManyToManyField(to='www.MetadataDetails'), + ), + migrations.AddField( + model_name='metadata', + name='user', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.User'), + ), + migrations.AddField( + model_name='function', + name='apis', + field=models.ManyToManyField(to='www.FunctionApis'), + ), + migrations.AddField( + model_name='function', + name='metadata', + field=models.ManyToManyField(to='www.Metadata'), + ), + migrations.AddField( + model_name='engine', + name='developer', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.User'), + ), + migrations.AddField( + model_name='appliedmetadata', + name='metadata', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.Metadata'), + ), + migrations.AddField( + model_name='appliedmetadata', + name='sample', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.Sample'), + ), + migrations.AddField( + model_name='appliedmetadata', + name='user', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.User'), + ), + migrations.AlterUniqueTogether( + name='sample', + unique_together=set([('md5', 'crc32')]), + ), + migrations.AlterIndexTogether( + name='sample', + index_together=set([('md5', 'crc32')]), + ), + migrations.AddIndex( + model_name='metadata', + index=models.Index(fields=['user'], name='Metadata_user_id_aea908_idx'), + ), + migrations.AlterUniqueTogether( + name='function', + unique_together=set([('sha256', 'architecture')]), + ), + migrations.AddIndex( + model_name='engine', + index=models.Index(fields=['name'], name='Engine_name_14ac74_idx'), + ), + migrations.AlterUniqueTogether( + name='appliedmetadata', + unique_together=set([('metadata', 'sample', 'user')]), + ), + ] diff --git a/server/www/models.py b/server/www/models.py index 999885a..a0efb4a 100644 --- a/server/www/models.py +++ b/server/www/models.py @@ -1,7 +1,7 @@ #------------------------------------------------------------------------------- # -# FIRST MongoDB Models -# Copyright (C) 2016 Angel M. Villegas +# FIRST Django ORM Models +# Copyright (C) 2017 Angel M. Villegas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,40 +17,31 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# Requirements -# ------------ -# mongoengine (https://pypi.python.org/pypi/mongoengine/) -# #------------------------------------------------------------------------------- # Python Modules from __future__ import unicode_literals -import datetime # Third Party Modules -from bson.objectid import ObjectId -from mongoengine import Document, StringField, UUIDField, \ - DateTimeField, LongField, ReferenceField, \ - BinaryField, ListField, BooleanField, ObjectIdField, \ - IntField, EmbeddedDocument, EmbeddedDocumentListField - -class User(Document): - name = StringField(max_length=128, required=True) - email = StringField(max_length=254, unique=True) - handle = StringField(max_length=32, required=True) - number = IntField(required=True) - api_key = UUIDField(required=True, unique=True) - created = DateTimeField(default=datetime.datetime.utcnow, required=True) - rank = LongField(default=0) - active = BooleanField(default=True) - - service = StringField(max_length=16, required=True) - auth_data = StringField(max_length=4096, required=True) - - meta = { - 'indexes' : [('handle', 'number'), 'api_key', 'email'] - } +from django.db import models +from django.utils import timezone + + +class User(models.Model): + id = models.BigAutoField(primary_key=True) + + name = models.CharField(max_length=128) + email = models.CharField(max_length=254) + handle = models.CharField(max_length=32) + number = models.IntegerField() + api_key = models.UUIDField(unique=True) + created = models.DateTimeField(default=timezone.now) + rank = models.BigIntegerField(default=0) + active = models.BooleanField(default=True) + + service = models.CharField(max_length=16) + auth_data = models.CharField(max_length=32768) @property def user_handle(self): @@ -67,22 +58,31 @@ def dump(self, full=False): 'rank' : self.rank, 'created' : self.created, 'active' : self.active}) - return data + class Meta: + db_table = 'User' + indexes = [ + models.Index(fields=['email']), + models.Index(fields=['api_key']), + ] + index_together = ("handle", "number") -class Engine(Document): - name = StringField(max_length=16, required=True, unique=True) - description = StringField(max_length=128, required=True) - path = StringField(max_length=256, required=True) - obj_name = StringField(max_length=32, required=True) - applied = ListField(default=list) - developer = ReferenceField(User) - active = BooleanField(default=False) - meta = { - 'indexes' : ['name'] - } +class Engine(models.Model): + name = models.CharField(max_length=16, unique=True) + description = models.CharField(max_length=256) + path = models.CharField(max_length=256) + obj_name = models.CharField(max_length=32) + + developer = models.ForeignKey('User') + active = models.BooleanField(default=False) + + @property + def rank(self): + # TODO: Complete + #return len(self.applied) + return 0 def dump(self, full=False): data = {'name' : self.name, @@ -91,107 +91,151 @@ def dump(self, full=False): 'developer' : self.developer.user_handle} if full: - data.update({'id' : str(self.id), 'path' : self.path}) + data.update({'path' : self.path}) return data + class Meta: + db_table = 'Engine' + indexes = [ + models.Index(fields=['name']), + ] + + +# TODO: Create scheme for tracking applied metadata for engines +# +#class AppliedEngine(models.Model): +# engine_id = models.ForeignKey(Engine) +# sample_id = models.ForeignKey(Sample) +# user_id = models.ForeignKey(User) +# engine_metadata_id = models.BigIntegerField(); +# +# class Meta: +# db_table = 'AppliedEngine' +# unique_together = ("sample_id", "user_id", "engine_metadata_id") + +class AppliedMetadata(models.Model): + metadata = models.ForeignKey('Metadata') + sample = models.ForeignKey('Sample') + user = models.ForeignKey('User') + + class Meta: + db_table = 'AppliedMetadata' + unique_together = ("metadata", "sample", "user") + + +class MetadataDetails(models.Model): + name = models.CharField(max_length=256) + prototype = models.CharField(max_length=256) + comment = models.CharField(max_length=512) + committed = models.DateTimeField(default=timezone.now) + + class Meta: + db_table = 'MetadataDetails' + + +class Metadata(models.Model): + id = models.BigAutoField(primary_key=True) + + user = models.ForeignKey('User') + details = models.ManyToManyField('MetadataDetails') + @property def rank(self): - return len(self.applied) + if hasattr(self, 'id'): + return AppliedMetadata.objects.filter(metadata=self.id).count() + + return 0 + def has_changed(self, name, prototype, comment): + if not self.details.exists(): + return True -class Metadata(EmbeddedDocument): - id = ObjectIdField(required=True, default=lambda: ObjectId()) - user = ReferenceField(User) - name = ListField(StringField(max_length=128), default=list) - prototype = ListField(StringField(max_length=256), default=list) - comment = ListField(StringField(max_length=512), default=list) - committed = ListField(DateTimeField(), default=list) - applied = ListField(default=list) + latest = self.details.latest('committed') + if ((latest.name != name) + or (latest.prototype != prototype) + or (latest.comment != comment)): + return True - meta = { - 'indexes' : ['user'] - } + return False def dump(self, full=False): - data = {'creator' : self.user.user_handle, - 'name' : self.name[0], - 'prototype' : self.prototype[0], - 'comment' : self.comment[0], - 'rank' : len(self.applied)} + data = {'creator' : self.user.user_handle} + latest_details = self.details.latest('committed') + data.update({ + 'name' : latest_details.name, + 'prototype' : latest_details.prototype, + 'comment' : latest_details.comment, + 'rank' : self.rank + }) if full: - data['history'] = [] - for i in xrange(len(self.name) - 1, -1, -1): - # Convert back with: - # datetime.datetime.strptime(
, '%Y-%m-%dT%H:%M:%S.%f') - committed = self.committed[i].isoformat() - data['history'].append({'name' : self.name[i], - 'prototype' : self.prototype[i], - 'comment' : self.comment[i], - 'committed' : committed}) + # Convert committed time back with: + # datetime.datetime.strptime(
, '%Y-%m-%dT%H:%M:%S.%f') + data['history'] = [{'name' : d.name, + 'prototype' : d.prototype, + 'comment' : d.comment, + 'committed' : d.committed.isoformat()} + for d in self.details.order_by('committed')] return data - def has_changed(self, name, prototype, comment): - if (not self.name) or (not self.prototype) or (not comment): - return True + class Meta: + db_table = 'Metadata' + indexes = [models.Index(fields=['user'])] - if ((self.name[0] != name) - or (self.prototype[0] != prototype) - or (self.comment[0] != comment)): - return True - return False +class FunctionApis(models.Model): + api = models.CharField(max_length=128, unique=True) - @property - def rank(self): - return len(self.applied) + class Meta: + db_table = 'FunctionApis' -# Use bson.Binary to insert binary data -class Function(Document): - sha256 = StringField(max_length=64) - opcodes = BinaryField() - apis = ListField(StringField(max_length=64), default=list) - metadata = EmbeddedDocumentListField(Metadata, default=list) - # Return value from idaapi.get_file_type_name() - architecture = StringField(max_length=64, required=True) - meta = { - 'indexes' : [] - } +class Function(models.Model): + id = models.BigAutoField(primary_key=True) - def dump(self): - return {'id' : self.id, - 'opcodes' : self.opcodes, - 'apis' : self.apis, - 'metadata' : [str(x.id) for x in self.metadata], + sha256 = models.CharField(max_length=64) + opcodes = models.BinaryField() + apis = models.ManyToManyField('FunctionApis') + metadata = models.ManyToManyField('Metadata') + architecture = models.CharField(max_length=64) + + def dump(self, full=False): + data = {'opcodes' : self.opcodes, 'architecture' : self.architecture, 'sha256' : self.sha256} + if full: + data['apis'] = [x['api'] for x in self.apis.values('api')] + data['id'] = self.id -class Sample(Document): - md5 = StringField(max_length=32, required=True) - crc32 = IntField(required=True) - sha1 = StringField(max_length=40) - sha256 = StringField(max_length=64) - seen_by = ListField(ReferenceField(User), default=list) - functions = ListField(ReferenceField(Function), default=list) - last_seen = DateTimeField(default=datetime.datetime.utcnow) + return data - meta = { - 'indexes' : [('md5', 'crc32')] - } + class Meta: + db_table = 'Function' + unique_together = ('sha256', 'architecture') - def dump(self): - data = {'md5' : self.md5, 'crc32' : self.crc32, - 'seen_by' : [str(x.id) for x in self.seen_by], - 'functions' : [str(x.id) for x in self.functions]} - if 'sha1' in self: - data['sha1'] = self.sha1 +class Sample(models.Model): + id = models.BigAutoField(primary_key=True) - if 'sha256' in self: - data['sha256'] = self.sha256 + md5 = models.CharField(max_length=32) + crc32 = models.BigIntegerField() + sha1 = models.CharField(max_length=40, null=True, blank=True) + sha256 = models.CharField(max_length=64, null=True, blank=True) + seen_by = models.ManyToManyField('User') + functions = models.ManyToManyField('Function') + last_seen = models.DateTimeField(default=timezone.now, blank=True) - return data + class Meta: + db_table = 'Sample' + index_together = ['md5', 'crc32'] + unique_together = ('md5', 'crc32') + + def dump(self): + return {'md5' : self.md5, 'crc32' : self.crc32, + 'seen_by' : [str(x.id) for x in self.seen_by.all()], + 'functions' : [str(x.id) for x in self.functions.all()], + 'sha1' : self.sha1, + 'sha256' : self.sha256} diff --git a/server/www/templates/www/base_site.html b/server/www/templates/www/base_site.html index 01d4e26..c5127b0 100644 --- a/server/www/templates/www/base_site.html +++ b/server/www/templates/www/base_site.html @@ -154,7 +154,7 @@
- Closed Beta + Beta
@@ -425,7 +425,7 @@

Install

Documentation

- Check out our Docs. + Check out our Docs (Server - IDA Plugin).

@@ -563,7 +563,6 @@ diff --git a/server/www/views.py b/server/www/views.py index 289e9b6..089658d 100644 --- a/server/www/views.py +++ b/server/www/views.py @@ -9,8 +9,8 @@ # FIRST Modules -from www.models import Function, User -from first.auth import Authentication, require_login, FIRSTAuthError +from first_core.models import Function, User +from first_core.auth import Authentication, require_login, FIRSTAuthError def handler404(request): return render(request, 'www/404.html', None) @@ -38,7 +38,7 @@ def profile(request): if not user: return redirect(reverse('www:index')) - count = Function.objects(metadata__user=user).count() + count = Function.objects.filter(metadata__user=user).count() data = {'title' : 'Profile', 'user' : user.dump(True), 'metadata_count' : count}