diff --git a/server/first/__init__.py b/server/first/__init__.py
index 80bd52a..e69de29 100644
--- a/server/first/__init__.py
+++ b/server/first/__init__.py
@@ -1,27 +0,0 @@
-#-------------------------------------------------------------------------------
-#
-# Intializes FIRST's DBManager and EngineManager
-# Copyright (C) 2016 Angel M. Villegas
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-#-------------------------------------------------------------------------------
-
-# FIRST Modules
-from first.dbs import FIRSTDBManager
-from first.engines import FIRSTEngineManager
-
-DBManager = FIRSTDBManager()
-EngineManager = FIRSTEngineManager(DBManager)
diff --git a/server/first/dbs/builtin_db.py b/server/first/dbs/builtin_db.py
deleted file mode 100644
index 5f4437d..0000000
--- a/server/first/dbs/builtin_db.py
+++ /dev/null
@@ -1,466 +0,0 @@
-#-------------------------------------------------------------------------------
-#
-# FIRST DB Module for completing operations with the MongoDB backend
-# Copyright (C) 2016 Angel M. Villegas
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Requirements
-# ------------
-# - flask
-# - mongoengine
-# - werkzeug
-#
-#-------------------------------------------------------------------------------
-
-# Python Modules
-import re
-import math
-import json
-import hashlib
-import datetime
-import ConfigParser
-from hashlib import md5
-
-# Third Party Modules
-import bson
-from mongoengine import Q
-from mongoengine.queryset import DoesNotExist, MultipleObjectsReturned
-
-# FIRST Modules
-from first.dbs import AbstractDB
-from first.models import User, Metadata, Function, Sample, Engine
-
-
-class FIRSTDB(AbstractDB):
- _name = 'first_db'
- standards = { 'intel16', 'intel32', 'intel64', 'arm32', 'arm64', 'mips',
- 'ppc', 'sparc', 'sysz'}
-
- #
- # Functions called by FIRST Framework
- #--------------------------------------------------------------------------
- def __init__(self, config):
- '''
- Constructor.
-
- @param conf: ConfigParser.RawConfigParser
- '''
- self._is_installed = True
- '''
- section = 'mongodb_settings'
-
- if (not config.has_section(section)
- or not config.has_option(section, 'db')):
- raise FirstDBError('DB settings not available', skip=True)
-
- if section.upper() not in app.config:
- app.config[section.upper()] = {}
-
- app.config[section.upper()]['db'] = conf.get(section, 'db')
- self.db.init_app(app)
- '''
-
- def get_architectures(self):
- standards = FIRSTDB.standards.copy()
- standards.update(Function.objects().distinct(field='architecture'))
- return list(standards)
-
- def get_sample(self, md5_hash, crc32, create=False):
- try:
- # Get Sample from DB
- return Sample.objects.get(md5=md5_hash, crc32=crc32)
-
- except DoesNotExist:
- if not create:
- return None
-
- # Create Sample for DB
- sample = Sample(md5=md5_hash, crc32=crc32)
- sample.last_seen = datetime.datetime.now()
- sample.save()
- return sample
-
- def sample_seen_by_user(self, sample, user):
- if (not isinstance(sample, Sample)) or (not isinstance(user, User)):
- return None
-
- if user not in sample.seen_by:
- sample.seen_by.append(user)
- sample.save()
-
- def checkin(self, user, md5_hash, crc32, sha1_hash=None, sha256_hash=None):
- '''
- TODO:
-
- @returns String error message on Failure
- None
- '''
- if not isinstance(user, User):
- return False
-
- # Validate data
- if ((not re.match('^[a-f\d]{32}$', md5_hash))
- or (sha1_hash and not re.match('^[a-f\d]{40}$', sha1_hash))
- or (sha256_hash and not re.match('^[a-f\d]{64}$', sha256_hash))):
- return False
-
- sample = self.get_sample(md5_hash, crc32, True)
- if not sample:
- return False
-
- sample.last_seen = datetime.datetime.now()
- if user not in sample.seen_by:
- sample.seen_by.append(user)
-
- if None != sha1_hash:
- sample.sha1 = sha1_hash
-
- if None != sha256_hash:
- sample.sha256 = sha256_hash
-
- sample.save()
- return True
-
- def get_function(self, opcodes, architecture, apis, create=False, **kwargs):
- function = None
-
- try:
- function = Function.objects.get(sha256=hashlib.sha256(opcodes).hexdigest(),
- opcodes=bson.Binary(opcodes),
- architecture=architecture,
- apis=apis)
- except DoesNotExist:
- # Create function and add it to sample
- function = Function(sha256=hashlib.sha256(opcodes).hexdigest(),
- opcodes=bson.Binary(opcodes),
- architecture=architecture,
- apis=apis)
- function.save()
-
- return function
-
- def get_all_functions(self):
- try:
- return Function.objects.all()
-
- except:
- return []
-
- def find_function(self, _id=None, opcodes=None, apis=None, architecture=None, h_sha256=None):
- try:
- # User function ID
- if None != _id:
- return Function.objects(id=bson.objectid.ObjectId(_id)).get()
-
- # User opcodes and apis
- elif None not in [opcodes, apis]:
- return Function.objects(opcodes=opcodes, apis=apis).get()
-
- # Use hash, architecture
- elif None not in [architecture, h_sha256]:
- return Function.objects(sha256=h_sha256, architecture=architecture).get()
-
- else:
- return None
-
- except DoesNotExist:
- return None
-
- def add_function_to_sample(self, sample, function):
- if (not isinstance(sample, Sample)) or (not isinstance(function, Function)):
- return False
-
- if function not in sample.functions:
- sample.functions.append(function)
- sample.save()
-
- return True
-
- def add_metadata_to_function(self, user, function, name, prototype, comment, **kwargs):
- if (not isinstance(function, Function)) or (not isinstance(user, User)):
- return None
-
- # Check to see if user already has metadata associated with the sample
- metadata = None
- for m in function.metadata:
- if user == m.user:
- if m.has_changed(name, prototype, comment):
- m.name = [name] + m.name
- m.prototype = [prototype] + m.prototype
- m.comment = [comment] + m.comment
- m.committed = [datetime.datetime.now()] + m.committed
-
- metadata = m
- break
-
- if not metadata:
- metadata = Metadata(user=user, name=[name],
- prototype=[prototype],
- comment=[comment],
- committed=[datetime.datetime.now()])
- function.metadata.append(metadata)
-
- function.save()
- return str(metadata.id)
-
- def get_metadata_list(self, metadata):
- results = []
- user_metadata, engine_metadata = self.separate_metadata(metadata)
-
- metadata_ids = map(bson.objectid.ObjectId, user_metadata)
- mongo_filter = Q(metadata__id=metadata_ids[0])
- for mid in metadata_ids[1:]:
- mongo_filter |= Q(metadata__id=mid)
-
- matches = Function.objects.filter(mongo_filter).only('metadata')
- for function in matches:
- for metadata in function.metadata:
- if metadata.id in metadata_ids:
- data = metadata.dump()
- data['id'] = str(metadata.id)
- results.append(data)
-
- # Remove id from list to shorten list
- del metadata_ids[metadata_ids.index(metadata.id)]
-
- for _id in engine_metadata:
- engines = Engine.object(id=_id)
- if (not engines) or (len(engines) > 1):
- continue
-
- data = {'id' : _id, 'engine' : engine.name,
- 'description' : engine.description}
- results.append(data)
-
- return results
-
- def delete_metadata(self, user, metadata_id):
- if not isinstance(user, User):
- return False
-
- user_metadata, engine_metadata = self.separate_metadata([metadata_id])
- if not user_metadata:
- return False
-
- # User must be the creator of the metadata to delete it
- metadata_id = bson.objectid.ObjectId(user_metadata[0])
- try:
- Function.objects(metadata__user=user, metadata__id=metadata_id).update_one(pull__metadata__id=metadata_id)
- return True
- except DoesNotExist:
- return False
-
- def created(self, user, page, max_metadata=20):
- offset = (page - 1) * max_metadata
- results = []
- pages = 0
-
- if (offset < 0) or (not isinstance(user, User)):
- return (results, pages)
-
- try:
- matches = Function.objects(metadata__user=user).only('metadata')
- total = Function.objects(metadata__user=user).count() + 0.0
- pages = int(math.ceil(total / max_metadata))
- if page > pages:
- return (results, pages)
-
- matches = matches.skip(offset).limit(max_metadata)
-
- except ValueError:
- return (results, pages)
-
- for function in matches:
- for metadata in function.metadata:
- if user == metadata.user:
- temp = metadata.dump()
- temp['id'] = FIRSTDB.make_id(metadata.id, 0)
- results.append(temp)
-
- # Bail out of inner loop early since a user can only
- # create one metadata entry per function
- break
-
- return (results, pages)
-
- @staticmethod
- def make_id(_id, flags):
- return '{:1x}{}'.format(flags & 0xF, _id)
-
- def separate_metadata(self, metadata):
- # Get metadata created by users only, MSB should not be set
- user_metadata = []
- engine_metadata = []
- for x in metadata:
- if len(x) == 24:
- user_metadata.append(x)
- elif (len(x) == 25) and (((int(x[0], 16) >> 3) & 1) == 0):
- user_metadata.append(x[1:])
- elif (len(x) == 25) and (((int(x[0], 16) >> 3) & 1) == 1):
- engine_metadata.append(x[1:])
-
- return (user_metadata, engine_metadata)
-
- def metadata_history(self, metadata):
- results = {}
- user_metadata, engine_metadata = self.separate_metadata(metadata)
- e_comment = ('Generated by Engine: {0.name}\n{0.description}\n\n'
- 'Developer: {0.developer.user_handle}')
-
- if len(user_metadata) > 0:
- metadata_ids = map(bson.objectid.ObjectId, user_metadata)
- mongo_filter = Q(metadata__id=metadata_ids[0])
- for mid in metadata_ids[1:]:
- mongo_filter |= Q(metadata__id=mid)
-
- matches = Function.objects.filter(mongo_filter).only('metadata')
- for function in matches:
- for metadata in function.metadata:
- if metadata.id in metadata_ids:
- data = metadata.dump(True)
- _id = FIRSTDB.make_id(metadata.id, 0)
- results[_id] = {'creator' : data['creator'],
- 'history' : data['history']}
- # Remove id from list to shorten list
- del metadata_ids[metadata_ids.index(metadata.id)]
-
- # Provide information for engine created metadata...
- for engine_id in engine_metadata:
- engine = self.get_engine(engine_id)
- if not engine:
- continue
- data = {'creator' : engine.name,
- 'history' : [{'committed' : '',
- 'name' : 'N/A',
- 'prototype' : 'N/A',
- 'comment' : e_comment.format(engine)}]}
- results[FIRSTDB.make_id(engine_id, 8)] = data
-
- return results
-
- def applied(self, sample, user, _id, is_engine=False):
- '''
- @returns Boolean. True if added to the applied list
- False if not added to the applied list
- '''
- if (not isinstance(user, User)) or (not isinstance(sample, Sample)):
- return False
-
- key = [str(sample.id), str(user.id)]
- if is_engine:
- engine_id = bson.objectid.ObjectId(_id)
- engine = Engine.objects(id=engine_id,
- applied__contains=key)
-
- # Check if user has already applied the signature
- if len(engine):
- return True
-
- try:
- engine = Engine.objects(id=engine_id).get()
- except DoesNotExist:
- # Engine does not exist
- return False
-
- engine.applied.append(key)
- engine.save()
-
- else:
- metadata_id = bson.objectid.ObjectId(_id)
- functions = Function.objects(metadata__id=metadata_id,
- metadata__applied__contains=key)
-
- # Check if user has already applied the signature
- if len(functions):
- return True
-
- try:
- function = Function.objects(metadata__id=metadata_id).get()
- except DoesNotExist:
- # Metadata does not exist
- return False
-
- # Get metadata
- for metadata in function.metadata:
- if metadata.id == metadata_id:
- metadata.applied.append(key)
- break
-
- function.save()
-
- return True
-
- def unapplied(self, sample, user, _id, is_engine=False):
- '''
- @returns Boolean. True if not in metadata's applied list
- False if still in the applied list
- '''
- if (not isinstance(sample, Sample)) or (not isinstance(user, User)):
- return False
-
- key = [str(sample.id), str(user.id)]
- if is_engine:
- engine_id = bson.objectid.ObjectId(_id)
- engine = Engine.objects(id=engine_id,
- applied__contains=key)
-
- # Check if user has already applied the signature
- if not len(engine):
- return True
-
- try:
- engine = Engine.objects(id=engine_id).get()
- except DoesNotExist:
- # Engine does not exist
- return False
-
- engine.applied.remove(key)
- engine.save()
-
- else:
- metadata_id = bson.objectid.ObjectId(_id)
- functions = Function.objects(metadata__id=metadata_id,
- metadata__applied__contains=key)
-
- # Check if user does not have it applied already
- if not len(functions):
- return True
-
- try:
- function = functions.get()
- except DoesNotExist:
- # Metadata does not exist
- return True
-
- # Get metadata
- for metadata in function.metadata:
- if metadata.id == metadata_id:
- metadata.applied.remove(key)
- break
-
- function.save()
-
- return True
-
- def engines(self, active=True):
- return Engine.objects(active=bool(active))
-
- def get_engine(self, engine_id):
- engines = Engine.objects(id = engine_id)
- if not engines:
- return None
-
- return engines[0]
diff --git a/server/first/engines/basic_masking.py b/server/first/engines/basic_masking.py
deleted file mode 100644
index c3ead1f..0000000
--- a/server/first/engines/basic_masking.py
+++ /dev/null
@@ -1,222 +0,0 @@
-#-------------------------------------------------------------------------------
-#
-# FIRST Engine: Basic Masking
-# Author: Angel M. Villegas (anvilleg@cisco.com)
-# Last Modified: March 2016
-#
-# Uses Distorm3 to obtain instructions and then removes certain instruction
-# details to normalize it into a standard form to be compared to other
-# functions.
-#
-# Maskes out:
-# - ESP/EBP Offsets
-# - Absolute Calls??
-# - Global Offsets??
-#
-# Requirements
-# ------------
-# - Distorm3
-#
-# Installation
-# ------------
-# None
-#
-#-------------------------------------------------------------------------------
-
-# Python Modules
-import re
-from hashlib import sha256
-
-# FIRST Modules
-from first.error import FIRSTError
-from first.engines import AbstractEngine
-from first.engines.results import FunctionResult
-
-# Third Party Modules
-from bson.objectid import ObjectId
-from distorm3 import DecomposeGenerator, Decode32Bits, Decode64Bits, Decode16Bits
-from mongoengine.queryset import DoesNotExist, MultipleObjectsReturned
-from mongoengine import Document, StringField, ListField, IntField, \
- ObjectIdField
-
-class BasicMasking(Document):
- sha256 = StringField(max_length=64, required=True)
- architecture = StringField(max_length=64, required=True)
- instructions = ListField(StringField(max_length=124), required=True)
- total_bytes = IntField(required=True, default=0)
- functions = ListField(ObjectIdField(), default=list)
-
- meta = {
- 'indexes' : [('sha256', 'architecture', 'instructions')]
- }
-
- def dump(self):
- return {'sha256' : self.sha256,
- 'architecture' : self.architecture,
- 'instructions' : self.instructions,
- 'total_bytes' : self.total_bytes,
- 'functions' : self.function_list()}
-
- def function_list(self):
- return [str(x) for x in self.functions]
-
-
-class BasicMaskingEngine(AbstractEngine):
- _name = 'BasicMasking'
- _description = ('Masks ESP/EBP offsets, calls/jmps offsets, and global '
- 'offsets (Intel Only). Requires at least 8 instructions.')
- _required_db_names = ['first_db']
-
- def normalize(self, opcodes, architecture):
- changed_bits = 0
- dt = None
- mapping = {'intel32' : Decode32Bits,
- 'intel64' : Decode64Bits,
- 'intel16' : Decode16Bits}
- if architecture in mapping:
- dt = mapping[architecture]
- else:
- return (None, changed_bits, None)
-
- try:
- normalized = []
- original = []
- for i in DecomposeGenerator(0, opcodes, dt):
- # If disassembly is not valid then junk data has been sent
- if not i.valid:
- return (None, 0, None)
-
- original.append(i._toText())
- instr = i.mnemonic + ' '
-
- # Special mnemonic masking (Call, Jmp, JCC)
- if (i.mnemonic == 'CALL') or i.mnemonic.startswith('J'):
- operand = i.operands[0]._toText()
-
- if 'Immediate' == i.operands[0].type:
- instr += '0x'
- changed_bits += i.operands[0].size
-
- else:
- regex = '^\[R(S|I)P(\+|\-)0x[\da-f]+\]$'
- if re.match(regex, operand):
- instr += re.sub(regex, r'[R\1P\2', operand) + '0x]'
- changed_bits += i.operands[0].dispSize
- else:
- # Nothing will be masked out
- instr = i._toText()
-
- normalized.append(instr)
- continue
-
- operand_instrs = []
- for operand_obj in i.operands:
- operand = operand_obj._toText()
- if ((re.match('^\[E(S|B)P', operand) or re.match('^\[R(I|S)P', operand))
- and operand_obj.dispSize):
- # Offset from EBP/ESP and RIP/RSP
- masked = operand.replace(hex(operand_obj.disp), '0x')
- operand_instrs.append(masked)
- changed_bits += operand_obj.dispSize
-
- elif 'Immediate' == operand_obj.type:
- value = operand_obj.value
- # Masking off immediates within the standard VA of the sample
- if ((0x400000 <= value <= 0x500000)
- or (0x10000000 <= value <= 0x20000000)
- or (0x1C0000000 <= value <= 0x1D0000000)
- or (0x140000000 <= value <= 0x150000000)):
- operand_instrs.append('0x')
- changed_bits += operand_obj.size
-
- else:
- operand_instrs.append(operand)
-
- elif 'AbsoluterMemoryAddress' == operand_obj.type:
- operand_instrs.append('0x')
- changed_bits += operand_obj.dispSize
-
- elif 'AbsoluteMemory' == operand_obj.type:
- masked = operand.replace(hex(operand_obj.disp), '0x')
- operand_instrs.append(masked)
- changed_bits += operand_obj.dispSize
-
- else:
- operand_instrs.append(operand)
-
- normalized.append(instr + ', '.join(operand_instrs))
-
- h_sha256 = sha256(''.join(normalized)).hexdigest()
- return (normalized, changed_bits, h_sha256)
- # For debugging
- #return (original, normalized, changed_bits, h_sha256)
-
- except Exception as e:
- return (None, changed_bits, None)
-
- def _add(self, function):
- '''
-
- '''
- opcodes = function['opcodes']
- architecture = function['architecture']
- normalized, changed, h_sha256 = self.normalize(opcodes, architecture)
-
- if (not h_sha256) or (not normalized) or (8 > len(normalized)):
- return
-
- try:
- db_obj = BasicMasking.objects( sha256=h_sha256,
- architecture=architecture,
- instructions=normalized).get()
- except DoesNotExist:
- db_obj = BasicMasking( sha256=h_sha256,
- architecture=architecture,
- instructions=normalized,
- total_bytes=len(opcodes))
-
- function_id = ObjectId(function['id'])
- if function_id not in db_obj.functions:
- db_obj.functions.append(function_id)
- db_obj.save()
-
- def _scan(self, opcodes, architecture, apis):
- '''Returns List of tuples (function ID, similarity percentage)'''
- db = self._dbs['first_db']
- normalized, changed, h_sha256 = self.normalize(opcodes, architecture)
-
- if (not h_sha256) or (not normalized) or (8 > len(normalized)):
- return
-
- try:
- db_obj = BasicMasking.objects( sha256=h_sha256,
- architecture=architecture,
- instructions=normalized).get()
- except DoesNotExist:
- return None
-
- results = []
- for function_id in db_obj.function_list():
- function = db.find_function(_id=ObjectId(function_id))
-
- if not function or not function.metadata:
- continue
-
- # Similarity = 90% (opcodes and the masking changes)
- # + 10% (api overlap)
- similarity = 100 - ((changed / (len(opcodes) * 8.0)) * 100)
- if similarity > 90.0:
- similarity = 90.0
-
- # The APIs will count up to 10% of the similarity score
- total_apis = len(function.apis)
- overlap = float(len(set(function.apis).intersection(apis)))
- if 0 != total_apis:
- similarity += (overlap / total_apis) * 10
-
- results.append(FunctionResult(function_id, similarity))
-
- return results
-
- def _uninstall(self):
- BasicMasking.drop_collection()
diff --git a/server/first/engines/mnemonic_hash.py b/server/first/engines/mnemonic_hash.py
deleted file mode 100644
index 19b23ae..0000000
--- a/server/first/engines/mnemonic_hash.py
+++ /dev/null
@@ -1,145 +0,0 @@
-#-------------------------------------------------------------------------------
-#
-# FIRST Engine: Mnemonic Hash
-# Uses Distorm3 to obtain mnemonics from the opcodes, reduces the opcodes to
-# a single string and hashes it for future lookup
-#
-# Copyright (C) 2016 Angel M. Villegas
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Requirements
-# ------------
-# - distorm3
-# - mongoengine
-#
-#-------------------------------------------------------------------------------
-
-# Python Modules
-from hashlib import sha256
-
-# FIRST Modules
-from first.error import FIRSTError
-from first.engines import AbstractEngine
-from first.engines.results import FunctionResult
-
-# Third Party Modules
-from bson.objectid import ObjectId
-from distorm3 import DecomposeGenerator, Decode32Bits, Decode64Bits, Decode16Bits
-from mongoengine.queryset import DoesNotExist, MultipleObjectsReturned
-from mongoengine import Document, StringField, ListField, ObjectIdField
-
-class MnemonicHash(Document):
- sha256 = StringField(max_length=64, required=True)
- architecture = StringField(max_length=64, required=True)
- functions = ListField(ObjectIdField(), default=list)
-
- meta = {
- 'indexes' : [('sha256', 'architecture')]
- }
-
- def dump(self):
- return {'sha256' : self.sha256,
- 'architecture' : self.architecture,
- 'functions' : self.function_list()}
-
- def function_list(self):
- return [str(x) for x in self.functions]
-
-
-class MnemonicHashEngine(AbstractEngine):
- _name = 'MnemonicHash'
- _description = ('Uses mnemonics from the opcodes to generate a hash '
- '(Intel Only). Requires at least 8 mnemonics.')
- _required_db_names = ['first_db']
-
- def mnemonic_hash(self, opcodes, architecture):
- dt = None
- mapping = {'intel32' : Decode32Bits,
- 'intel64' : Decode64Bits,
- 'intel16' : Decode16Bits}
- if architecture in mapping:
- dt = mapping[architecture]
- else:
- return (None, None)
-
- try:
- iterable = DecomposeGenerator(0, opcodes, dt)
-
- # Uses valid to ensure we are not creating hashes with 'db 0xYY'
- mnemonics = [d.mnemonic for d in iterable if d.valid]
- return (mnemonics, sha256(''.join(mnemonics)).hexdigest())
-
- except Exception as e:
- return (None, None)
-
- def _add(self, function):
- '''
- Nothing needs to be implemented since the Function Model has the
- sha256 of the opcodes
- '''
- opcodes = function['opcodes']
- architecture = function['architecture']
- mnemonics, mnemonic_sha256 = self.mnemonic_hash(opcodes, architecture)
-
- if (not mnemonic_sha256) or (not mnemonics) or (8 > len(mnemonics)):
- return
-
- try:
- db_obj = MnemonicHash.objects( sha256=mnemonic_sha256,
- architecture=architecture).get()
- except DoesNotExist:
- db_obj = MnemonicHash( sha256=mnemonic_sha256,
- architecture=architecture)
-
- function_id = ObjectId(function['id'])
- if function_id not in db_obj.functions:
- db_obj.functions.append(function_id)
- db_obj.save()
-
- def _scan(self, opcodes, architecture, apis):
- '''Returns List of tuples (function ID, similarity percentage)'''
- db = self._dbs['first_db']
- mnemonics, mnemonic_sha256 = self.mnemonic_hash(opcodes, architecture)
-
- if (not mnemonic_sha256) or (not mnemonics) or (8 > len(mnemonics)):
- return
-
- try:
- db_obj = MnemonicHash.objects( sha256=mnemonic_sha256,
- architecture=architecture).get()
- except DoesNotExist:
- return None
-
- results = []
- for function_id in db_obj.function_list():
- similarity = 75.0
- function = db.find_function(_id=ObjectId(function_id))
-
- if not function or not function.metadata:
- continue
-
- # The APIs will count up to 10% of the similarity score
- total_apis = len(function.apis)
- overlap = float(len(set(function.apis).intersection(apis)))
- if 0 != total_apis:
- similarity += (overlap / total_apis) * 10
-
- results.append(FunctionResult(function_id, similarity))
-
- return results
-
- def _uninstall(self):
- MnemonicHash.drop_collection()
diff --git a/server/first/models.py b/server/first/models.py
deleted file mode 100644
index 1846882..0000000
--- a/server/first/models.py
+++ /dev/null
@@ -1,197 +0,0 @@
-#-------------------------------------------------------------------------------
-#
-# FIRST MongoDB Models
-# Copyright (C) 2016 Angel M. Villegas
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Requirements
-# ------------
-# mongoengine (https://pypi.python.org/pypi/mongoengine/)
-#
-#-------------------------------------------------------------------------------
-
-
-# Python Modules
-from __future__ import unicode_literals
-import datetime
-
-# Third Party Modules
-from bson.objectid import ObjectId
-from mongoengine import Document, StringField, UUIDField, \
- DateTimeField, LongField, ReferenceField, \
- BinaryField, ListField, BooleanField, ObjectIdField, \
- IntField, EmbeddedDocument, EmbeddedDocumentListField
-
-class User(Document):
- name = StringField(max_length=128, required=True)
- email = StringField(max_length=254, unique=True)
- handle = StringField(max_length=32, required=True)
- number = IntField(required=True)
- api_key = UUIDField(required=True, unique=True)
- created = DateTimeField(default=datetime.datetime.utcnow, required=True)
- rank = LongField(default=0)
- active = BooleanField(default=True)
-
- service = StringField(max_length=16, required=True)
- auth_data = StringField(max_length=4096, required=True)
-
- meta = {
- 'indexes' : [('handle', 'number'), 'api_key', 'email']
- }
-
- @property
- def user_handle(self):
- return '{0.handle}#{0.number:04d}'.format(self)
-
- def dump(self, full=False):
- data = {'handle' : self.user_handle}
-
- if full:
- data.update({ 'id' : str(self.id),
- 'name' : self.name,
- 'email' : self.email,
- 'api_key' : self.api_key,
- 'rank' : self.rank,
- 'created' : self.created,
- 'active' : self.active})
-
- return data
-
-
-class Engine(Document):
- name = StringField(max_length=16, required=True, unique=True)
- description = StringField(max_length=128, required=True)
- path = StringField(max_length=256, required=True)
- obj_name = StringField(max_length=32, required=True)
- applied = ListField(default=list)
- developer = ReferenceField(User)
- active = BooleanField(default=False)
-
- meta = {
- 'indexes' : ['name']
- }
-
- def dump(self, full=False):
- data = {'name' : self.name,
- 'description' : self.description,
- 'rank' : self.rank,
- 'developer' : self.developer.user_handle}
-
- if full:
- data.update({'id' : str(self.id), 'path' : self.path})
-
- return data
-
- @property
- def rank(self):
- return len(self.applied)
-
-
-class Metadata(EmbeddedDocument):
- id = ObjectIdField(required=True, default=lambda: ObjectId())
- user = ReferenceField(User)
- name = ListField(StringField(max_length=128), default=list)
- prototype = ListField(StringField(max_length=256), default=list)
- comment = ListField(StringField(max_length=512), default=list)
- committed = ListField(DateTimeField(), default=list)
- applied = ListField(default=list)
-
- meta = {
- 'indexes' : ['user']
- }
-
- def dump(self, full=False):
- data = {'creator' : self.user.user_handle,
- 'name' : self.name[0],
- 'prototype' : self.prototype[0],
- 'comment' : self.comment[0],
- 'rank' : len(self.applied)}
-
- if full:
- data['history'] = []
- for i in xrange(len(self.name) - 1, -1, -1):
- # Convert back with:
- # datetime.datetime.strptime(
, '%Y-%m-%dT%H:%M:%S.%f')
- committed = self.committed[i].isoformat()
- data['history'].append({'name' : self.name[i],
- 'prototype' : self.prototype[i],
- 'comment' : self.comment[i],
- 'committed' : committed})
-
- return data
-
- def has_changed(self, name, prototype, comment):
- if (not self.name) or (not self.prototype) or (not comment):
- return True
-
- if ((self.name[0] != name)
- or (self.prototype[0] != prototype)
- or (self.comment[0] != comment)):
- return True
-
- return False
-
- @property
- def rank(self):
- return len(self.applied)
-
-# Use bson.Binary to insert binary data
-class Function(Document):
- sha256 = StringField(max_length=64)
- opcodes = BinaryField()
- apis = ListField(StringField(max_length=128), default=list)
- metadata = EmbeddedDocumentListField(Metadata, default=list)
- # Return value from idaapi.get_file_type_name()
- architecture = StringField(max_length=64, required=True)
-
- meta = {
- 'indexes' : []
- }
-
- def dump(self):
- return {'id' : self.id,
- 'opcodes' : self.opcodes,
- 'apis' : self.apis,
- 'metadata' : [str(x.id) for x in self.metadata],
- 'architecture' : self.architecture,
- 'sha256' : self.sha256}
-
-
-class Sample(Document):
- md5 = StringField(max_length=32, required=True)
- crc32 = IntField(required=True)
- sha1 = StringField(max_length=40)
- sha256 = StringField(max_length=64)
- seen_by = ListField(ReferenceField(User), default=list)
- functions = ListField(ReferenceField(Function), default=list)
- last_seen = DateTimeField(default=datetime.datetime.utcnow)
-
- meta = {
- 'indexes' : [('md5', 'crc32')]
- }
-
- def dump(self):
- data = {'md5' : self.md5, 'crc32' : self.crc32,
- 'seen_by' : [str(x.id) for x in self.seen_by],
- 'functions' : [str(x.id) for x in self.functions]}
-
- if 'sha1' in self:
- data['sha1'] = self.sha1
-
- if 'sha256' in self:
- data['sha256'] = self.sha256
-
- return data
diff --git a/server/first/settings.py b/server/first/settings.py
index 53faf89..08ecb18 100644
--- a/server/first/settings.py
+++ b/server/first/settings.py
@@ -14,7 +14,9 @@
import json
# Read in configuration data
-FIRST_CONFIG_FILE = 'first_config.json'
+FIRST_CONFIG_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+ '..',
+ 'first_config.json')
CONFIG = {}
try:
config_data = json.load(file(FIRST_CONFIG_FILE))
diff --git a/server/first_core/__init__.py b/server/first_core/__init__.py
new file mode 100644
index 0000000..4b7b779
--- /dev/null
+++ b/server/first_core/__init__.py
@@ -0,0 +1,27 @@
+#-------------------------------------------------------------------------------
+#
+# Intializes FIRST's DBManager and EngineManager
+# Copyright (C) 2016 Angel M. Villegas
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#-------------------------------------------------------------------------------
+
+# FIRST Modules
+from first_core.dbs import FIRSTDBManager
+from first_core.engines import FIRSTEngineManager
+
+DBManager = FIRSTDBManager()
+EngineManager = FIRSTEngineManager(DBManager)
diff --git a/server/first/auth.py b/server/first_core/auth.py
similarity index 98%
rename from server/first/auth.py
rename to server/first_core/auth.py
index 724399e..756b7e6 100644
--- a/server/first/auth.py
+++ b/server/first_core/auth.py
@@ -38,8 +38,8 @@
# FIRST Modules
# TODO: Use DBManager to get user objects and do User operations
-from first.models import User
-from first.error import FIRSTError
+from first_core.models import User
+from first_core.error import FIRSTError
# Thirdy Party
import httplib2
@@ -57,7 +57,7 @@ def __init__(self, message):
def verify_api_key(api_key):
- users = User.objects(api_key=api_key)
+ users = User.objects.filter(api_key=api_key)
if not users:
return None
diff --git a/server/first/dbs/__init__.py b/server/first_core/dbs/__init__.py
similarity index 97%
rename from server/first/dbs/__init__.py
rename to server/first_core/dbs/__init__.py
index 3b28548..ce27b71 100644
--- a/server/first/dbs/__init__.py
+++ b/server/first_core/dbs/__init__.py
@@ -24,7 +24,7 @@
from hashlib import md5
# FIRST Modules
-from first.error import FIRSTError
+from first_core.error import FIRSTError
# Class for FirstDB related exceptions
class FIRSTDBError(FIRSTError):
@@ -110,6 +110,6 @@ def get(self, db_name):
# FIRST DB Classes
-from first.dbs.builtin_db import FIRSTDB
+from first_core.dbs.builtin_db import FIRSTDB
possible_dbs = [FIRSTDB]
diff --git a/server/first_core/dbs/builtin_db.py b/server/first_core/dbs/builtin_db.py
new file mode 100644
index 0000000..ef1dea2
--- /dev/null
+++ b/server/first_core/dbs/builtin_db.py
@@ -0,0 +1,433 @@
+#-------------------------------------------------------------------------------
+#
+# FIRST DB Module for completing operations with the MongoDB backend
+# Copyright (C) 2016 Angel M. Villegas
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Requirements
+# ------------
+# - werkzeug
+#
+#-------------------------------------------------------------------------------
+
+# Python Modules
+import re
+import math
+import json
+import hashlib
+import ConfigParser
+from hashlib import md5
+
+# Third Party Modules
+import bson
+
+from django.utils import timezone
+from django.core.paginator import Paginator
+from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
+
+# FIRST Modules
+from first_core.dbs import AbstractDB
+from first_core.util import make_id, parse_id, separate_metadata, \
+ is_engine_metadata
+from first_core.models import User, Sample, \
+ Engine, \
+ Metadata, MetadataDetails, AppliedMetadata, \
+ Function, FunctionApis
+
+
+class FIRSTDB(AbstractDB):
+ _name = 'first_db'
+ standards = { 'intel16', 'intel32', 'intel64', 'arm32', 'arm64', 'mips',
+ 'ppc', 'sparc', 'sysz'}
+
+ #
+ # Functions called by FIRST Framework
+ #--------------------------------------------------------------------------
+ def __init__(self, config):
+ '''
+ Constructor.
+
+ @param conf: ConfigParser.RawConfigParser
+ '''
+ self._is_installed = True
+ '''
+ section = 'mongodb_settings'
+
+ if (not config.has_section(section)
+ or not config.has_option(section, 'db')):
+ raise FirstDBError('DB settings not available', skip=True)
+
+ if section.upper() not in app.config:
+ app.config[section.upper()] = {}
+
+ app.config[section.upper()]['db'] = conf.get(section, 'db')
+ self.db.init_app(app)
+ '''
+
+ def get_architectures(self):
+ field = 'architecture'
+ architectures = Function.objects.values(field).distinct()
+
+ standards = FIRSTDB.standards.copy()
+ standards.update({x[field] for x in architectures})
+ return list(standards)
+
+ def get_sample(self, md5_hash, crc32, create=False):
+ try:
+ # Get Sample from DB
+ return Sample.objects.get(md5=md5_hash, crc32=crc32)
+
+ except ObjectDoesNotExist:
+ if not create:
+ return None
+
+ # Create Sample for DB
+ sample = Sample(md5=md5_hash, crc32=crc32)
+ sample.last_seen = timezone.now()
+ sample.save()
+ return sample
+
+ except MultipleObjectsReturned:
+ # TODO: log occurance
+ raise
+
+ def sample_seen_by_user(self, sample, user):
+ if (not isinstance(sample, Sample)) or (not isinstance(user, User)):
+ return None
+
+ if not Sample.objects.filter(pk=sample.id, seen_by=user).count():
+ sample.seen_by.add(user)
+
+ def checkin(self, user, md5_hash, crc32, sha1_hash=None, sha256_hash=None):
+ '''
+ TODO:
+
+ @returns String error message on Failure
+ None
+ '''
+ if not isinstance(user, User):
+ return False
+
+ # Validate data
+ if ((not re.match('^[a-f\d]{32}$', md5_hash))
+ or (sha1_hash and not re.match('^[a-f\d]{40}$', sha1_hash))
+ or (sha256_hash and not re.match('^[a-f\d]{64}$', sha256_hash))):
+ return False
+
+ sample = self.get_sample(md5_hash, crc32, True)
+ if not sample:
+ return False
+
+ sample.last_seen = timezone.now()
+ if not Sample.objects.filter(pk=sample.id, seen_by=user).count():
+ sample.seen_by.add(user)
+
+ if None != sha1_hash:
+ sample.sha1 = sha1_hash
+
+ if None != sha256_hash:
+ sample.sha256 = sha256_hash
+
+ sample.save()
+ return True
+
+ def get_function_metadata(self, _id):
+ '''Get the metadata associated with the provided Function ID
+
+ Args:
+ _id (:obj:`int`): ID from Function model
+
+ Returns:
+ QuerySet.
+ '''
+ return Metadata.objects.filter(function__pk=_id)
+
+ def get_function(self, opcodes, architecture, apis, create=False, **kwargs):
+ sha256_hash = hashlib.sha256(opcodes).hexdigest()
+ function = None
+
+ try:
+ function = Function.objects.get(sha256=sha256_hash,
+ opcodes=bson.Binary(opcodes),
+ architecture=architecture) #,
+ #apis__api=apis)
+ except ObjectDoesNotExist:
+ if create:
+ # Create function and add it to sample
+ function = Function.objects.create( sha256=sha256_hash,
+ opcodes=bson.Binary(opcodes),
+ architecture=architecture)
+
+ apis_ = [FunctionApis.objects.get_or_create(x)[0] for x in apis]
+ for api in apis_:
+ function.apis.add(api)
+
+ return function
+
+ def get_all_functions(self):
+ try:
+ return Function.objects.all()
+
+ except:
+ return []
+
+ def find_function(self, _id=None, opcodes=None, apis=None, architecture=None, h_sha256=None):
+ try:
+ # User function ID
+ if None != _id:
+ return Function.objects.get(pk=_id)
+
+ # User opcodes and apis
+ elif None not in [opcodes, apis]:
+ return Function.objects.get(opcodes=opcodes, apis=apis)
+
+ # Use hash, architecture
+ elif None not in [architecture, h_sha256]:
+ return Function.objects.get(sha256=h_sha256,
+ architecture=architecture)
+
+ else:
+ return None
+
+ except ObjectDoesNotExist:
+ return None
+
+ except MultipleObjectsReturned:
+ # TODO: Log
+ raise
+
+ def add_function_to_sample(self, sample, function):
+ if (not isinstance(sample, Sample)) or (not isinstance(function, Function)):
+ return False
+
+ if not Sample.objects.filter(pk=sample.id, functions=function).count():
+ sample.functions.add(function)
+
+ return True
+
+ def add_metadata_to_function(self, user, function, name, prototype, comment, **kwargs):
+ if (not isinstance(function, Function)) or (not isinstance(user, User)):
+ return None
+
+ # Check to see if user already has metadata associated with the sample
+ metadata = None
+ print function.id
+ print user.id
+ if Function.objects.filter(pk=function.id, metadata__user=user).count():
+ # Metadata already exists
+ metadata = Metadata.objects.get(function=function, user=user)
+
+ else:
+ metadata = Metadata.objects.create(user=user)
+ function.metadata.add(metadata)
+
+ if metadata.has_changed(name, prototype, comment):
+ md = MetadataDetails.objects.create(name=name,
+ prototype=prototype,
+ comment=comment)
+ metadata.details.add(md)
+
+ return metadata.id
+
+ def get_metadata_list(self, metadata):
+ results = []
+ metadata_ids, engine_metadata = separate_metadata(metadata)
+
+ for _id, metadata in Metadata.objects.in_bulk(metadata_ids).iteritems():
+ data = metadata.dump()
+ data['id'] = make_id(0, metadata=metadata.id)
+ results.append(data)
+
+ for flag, _id, metadata_id in engine_metadata:
+ engines = Engine.objects.get(pk=_id)
+ # TODO: Send metadata_id to engine for more info
+ if (not engines) or (len(engines) > 1):
+ continue
+
+ data = {'id' : make_id(flag, metadata_id, _id),
+ 'engine' : engine.name,
+ 'description' : engine.description}
+ results.append(data)
+
+ return results
+
+ def delete_metadata(self, user, metadata_id):
+ if not isinstance(user, User):
+ return False
+
+ user_metadata, engine_metadata = separate_metadata([metadata_id])
+ if not user_metadata:
+ return False
+
+ # User must be the creator of the metadata to delete it
+ metadata_id = user_metadata[0]
+ try:
+ metadata = Metadata.objects.get(pk=metadata_id, user=user)
+ metadata.delete()
+ return True
+
+ except ObjectDoesNotExist:
+ return False
+
+ def created(self, user, page, max_metadata=20):
+ pages = 0
+ results = []
+
+ if (page < 1) or (not isinstance(user, User)):
+ return (results, pages)
+
+ p = Paginator(Metadata.objects.filter(user=user), max_metadata)
+ pages = p.num_pages
+
+ if page > pages:
+ return (results, pages)
+
+ for metadata in p.page(page):
+ temp = metadata.dump()
+ temp['id'] = make_id(0, metadata=metadata.id)
+ results.append(temp)
+
+ return (results, pages)
+
+ def metadata_history(self, metadata):
+ results = {}
+ metadata_ids, engine_metadata = separate_metadata(metadata)
+ e_comment = ('Generated by Engine: {0.name}\n{0.description}\n\n'
+ 'Developer: {0.developer.user_handle}')
+
+ for _id, metadata in Metadata.objects.in_bulk(metadata_ids).iteritems():
+ data = metadata.dump(True)
+ result_key = make_id(0, metadata=_id)
+ results[result_key] = { 'creator' : data['creator'],
+ 'history' : data['history']}
+
+ # Provide information for engine created metadata...
+ for flag, engine_id, _id in engine_metadata:
+ engine = self.get_engine(engine_id)
+ if not engine:
+ continue
+ data = {'creator' : engine.name,
+ 'history' : [{'committed' : '',
+ 'name' : 'N/A',
+ 'prototype' : 'N/A',
+ 'comment' : e_comment.format(engine)}]}
+ result_key = make_id(flag, engine=engine_id, metadata=_id)
+ results[result_key] = data
+
+ return results
+
+ def applied(self, sample, user, _id):
+ '''
+ @returns Boolean. True if added to the applied list
+ False if not added to the applied list
+ '''
+ if (not isinstance(user, User)) or (not isinstance(sample, Sample)):
+ return False
+
+ flag, engine_data, metadata_id = parse_id(_id)
+ if is_engine_metadata(_id):
+ pass
+ # TODO: add this capability back again
+ #engine_id = _id
+ #engine = Engine.objects(id=engine_id,
+ # applied__contains=key)
+
+ ## Check if user has already applied the signature
+ #if len(engine):
+ # return True
+
+ #try:
+ # engine = Engine.objects(id=engine_id).get()
+ #except ObjectDoesNotExist:
+ # # Engine does not exist
+ # return False
+
+ #engine.applied.append(key)
+ #engine.save()
+
+ else:
+ try:
+ # Ensure Metadata exists
+ metadata = Metadata.objects.get(pk=metadata_id)
+ except ObjectDoesNotExist:
+ # Metadata does not exist
+ return False
+
+ r = AppliedMetadata.objects.get_or_create( user=user,
+ sample=sample,
+ metadata=metadata)
+
+ return True
+
+ def unapplied(self, sample, user, _id):
+ '''
+ @returns Boolean. True if not in metadata's applied list
+ False if still in the applied list
+ '''
+ if (not isinstance(sample, Sample)) or (not isinstance(user, User)):
+ return False
+
+ flag, engine_data, metadata_id = parse_id(_id)
+ if is_engine_metadata(_id):
+ pass
+ # TODO: add this capability back again
+ #engine_id = _id
+ #engine = Engine.objects(id=engine_id,
+ # applied__contains=key)
+
+ ## Check if user has already applied the signature
+ #if not len(engine):
+ # return True
+
+ #try:
+ # engine = Engine.objects(id=engine_id).get()
+ #except ObjectDoesNotExist:
+ # # Engine does not exist
+ # return False
+
+ #engine.applied.remove(key)
+ #engine.save()
+
+ else:
+ try:
+ # Ensure Metadata exists
+ metadata = Metadata.objects.get(pk=metadata_id)
+ except ObjectDoesNotExist:
+ # Metadata does not exist
+ return False
+
+ try:
+ data = AppliedMetadata.objects.get( user=user,
+ sample=sample,
+ metadata=metadata)
+ data.delete()
+ return True
+
+ except ObjectDoesNotExist:
+ return True
+
+
+ return False
+
+ def engines(self, active=True):
+ return Engine.objects.filter(active=bool(active))
+
+ def get_engine(self, engine_id):
+ engines = Engine.objects.filter(pk=engine_id)
+ if not engines.count():
+ return None
+
+ return engines.first()
diff --git a/server/first/engines/__init__.py b/server/first_core/engines/__init__.py
similarity index 98%
rename from server/first/engines/__init__.py
rename to server/first_core/engines/__init__.py
index 4fce345..19d708c 100644
--- a/server/first/engines/__init__.py
+++ b/server/first_core/engines/__init__.py
@@ -15,9 +15,9 @@
import sys
# First Modules
-from first.error import FIRSTError
-from first.dbs import FIRSTDBManager
-from first.engines.results import Result
+from first_core.error import FIRSTError
+from first_core.dbs import FIRSTDBManager
+from first_core.engines.results import Result
# Third Party Modules
from bson.objectid import ObjectId
diff --git a/server/first/engines/exact_match.py b/server/first_core/engines/exact_match.py
similarity index 90%
rename from server/first/engines/exact_match.py
rename to server/first_core/engines/exact_match.py
index 3a78eb3..c9c7b32 100644
--- a/server/first/engines/exact_match.py
+++ b/server/first_core/engines/exact_match.py
@@ -23,9 +23,9 @@
from hashlib import sha256
# FIRST Modules
-from first.error import FIRSTError
-from first.engines import AbstractEngine
-from first.engines.results import FunctionResult
+from first_core.error import FIRSTError
+from first_core.engines import AbstractEngine
+from first_core.engines.results import FunctionResult
class ExactMatchEngine(AbstractEngine):
_name = 'ExactMatch'
@@ -50,7 +50,7 @@ def _scan(self, opcodes, architecture, apis):
return None
similarity = 90.0
- if set(function.apis) == set(apis):
+ if set(function.apis.values()) == set(apis):
similarity += 10.0
return [FunctionResult(str(function.id), similarity)]
diff --git a/server/first/engines/results.py b/server/first_core/engines/results.py
similarity index 90%
rename from server/first/engines/results.py
rename to server/first_core/engines/results.py
index afbe89c..5e3a6db 100644
--- a/server/first/engines/results.py
+++ b/server/first_core/engines/results.py
@@ -5,6 +5,7 @@
# Last Modified: August 2016
#
#-------------------------------------------------------------------------------
+from first_core.util import make_id
class Result(object):
'''Abstract class to encapsulate results returned from Engines'''
@@ -87,23 +88,19 @@ class FunctionResult(Result):
This Result class is crafted for general engines that want to return
a list of functions to the EngineManager
- ID values are 25 hex character string. For metadata created by users,
- not engines, the most significant bit is not set.
+ ID values are 26 hex character string. For metadata created by users,
+ not engines, the flag byte not set.
'''
def _get_metadata(self, db):
if not hasattr(self, '_metadata'):
- func = db.find_function(_id=self.id)
- if not func:
- return None
-
- self._metadata = func.metadata
+ self._metadata = list(db.get_function_metadata(self.id))
self._metadata.sort(key=lambda x: x.rank)
data = None
if len(self._metadata) > 0:
metadata = self._metadata.pop()
data = metadata.dump()
- data['id'] = '0{}'.format(metadata.id)
+ data['id'] = make_id(0, metadata=metadata.id)
return data
@@ -119,9 +116,11 @@ class EngineResult(Result):
'''
def _init(self, **kwargs):
self._data = None
+ self._metadata = 0
+
if 'data' in kwargs:
self._data = kwargs['data']
- self._data['id'] = '8{}'.format(self.id)
+ self._data['id'] = make_id(1, self._metadata, self.id)
def _get_metadata(self, db):
data = self._data
diff --git a/server/first/engines/skeleton.py_ b/server/first_core/engines/skeleton.py_
similarity index 96%
rename from server/first/engines/skeleton.py_
rename to server/first_core/engines/skeleton.py_
index e005e82..2954768 100644
--- a/server/first/engines/skeleton.py_
+++ b/server/first_core/engines/skeleton.py_
@@ -20,8 +20,8 @@
# FIRST Modules
-from first.error import FIRSTError
-from first.engines import AbstractEngine
+from first_core.error import FIRSTError
+from first_core.engines import AbstractEngine
# Third Party Modules
diff --git a/server/first/error.py b/server/first_core/error.py
similarity index 100%
rename from server/first/error.py
rename to server/first_core/error.py
diff --git a/server/first_core/models.py b/server/first_core/models.py
new file mode 100644
index 0000000..cdfb45b
--- /dev/null
+++ b/server/first_core/models.py
@@ -0,0 +1,27 @@
+#-------------------------------------------------------------------------------
+#
+# FIRST Django ORM Models
+# Copyright (C) 2017 Angel M. Villegas
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#-------------------------------------------------------------------------------
+
+
+# Python Modules
+from __future__ import unicode_literals
+
+# FIRST Modules
+from www.models import *
diff --git a/server/first_core/util.py b/server/first_core/util.py
new file mode 100644
index 0000000..96fee63
--- /dev/null
+++ b/server/first_core/util.py
@@ -0,0 +1,105 @@
+#-------------------------------------------------------------------------------
+#
+# FIRST Utility and Helper Functions
+# Copyright (C) 2017 Angel M. Villegas
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#-------------------------------------------------------------------------------
+
+
+def make_id(flags, metadata=0, engine=0):
+ '''Creates an unique ID for client use.
+
+ Args:
+ flag (:obj:`int`): Value between 0 and 255.
+ MSB set when ID is from an engine.
+ metadata (:obj:`int`, optional): The Metadata model ID
+ engine (:obj:`int`, optional): The Engine model ID
+
+ Returns:
+ string: A 26 byte hex string
+ '''
+ data = [flags, metadata, engine]
+ if (None in data) or (not all([type(x) in [int, long] for x in data])):
+ return None
+
+ if ((engine > (2**32 - 1)) or (metadata > (2**64 - 1))
+ or (flags > (2**8 - 1))):
+ return None
+
+ return '{:02x}{:08x}{:016x}'.format(flags, engine, metadata)
+
+
+def parse_id(_id):
+ if len(_id) != 26:
+ return (None, None, None)
+
+ _id = int(_id, 16)
+ flag = _id >> (8 * 12)
+ engine_data = (_id >> (8 * 8)) & (0xFFFFFFFF)
+ metadata_id = _id & 0xFFFFFFFFFFFFFFFF
+
+ return (flag, engine_data, metadata_id)
+
+def separate_metadata(ids):
+ '''Returns parsed IDs for user and engine generated metadata.
+
+ Args:
+ ids (:obj:`list`): List of 26 hex strings
+ metadata (:obj:`int`, optional): The Metadata model ID
+ engine (:obj:`int`, optional): The Engine model ID
+
+ Returns:
+ tuple: Index 0 contains user created metadata IDs
+ Index 1 contains engine created metadata details
+ '''
+ # ID: Flag Byte | Engine 4 bytes | Metadata 8 bytes = 13 bytes
+ # 26 ASCII characters
+ # If Flag is set then more processing is needed and it is not
+ # metadata created by the user
+ user_metadata = []
+ engine_metadata = []
+ for x in ids:
+ flag, engine_data, metadata_id = parse_id(x)
+ if None in [flag, engine_data, metadata_id]:
+ continue
+
+ if not flag:
+ user_metadata.append(metadata_id)
+ else:
+ engine_metadata.append((flag, engine_data, metadata_id))
+
+ return (user_metadata, engine_metadata)
+
+def is_user_metadata(_id):
+ details = parse_id(_id)
+ if None in details:
+ return False
+
+ if not details[0]:
+ return True
+
+ return False
+
+def is_engine_metadata(_id):
+ details = parse_id(_id)
+ if None in details:
+ return False
+
+ if details[0]:
+ return True
+
+ return False
diff --git a/server/rest/urls.py b/server/rest/urls.py
index d75b467..013f6f4 100644
--- a/server/rest/urls.py
+++ b/server/rest/urls.py
@@ -20,8 +20,7 @@
views.metadata_unapplied, name='metadata_unapplied'),
url(r'^metadata/get/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})$',
views.metadata_get, name='metadata_get'),
- # TODO: migrate to ids with 25 characters
- url(r'^metadata/delete/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})/(?i)(?P<_id>[A-F\d]{24,25})$',
+ url(r'^metadata/delete/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})/(?i)(?P<_id>[A-F\d]{26})$',
views.metadata_delete, name='metadata_delete'),
url(r'^metadata/created/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})$',
views.metadata_created, name='metadata_created'),
diff --git a/server/rest/views.py b/server/rest/views.py
index e83a76f..2fd67fd 100644
--- a/server/rest/views.py
+++ b/server/rest/views.py
@@ -12,14 +12,15 @@
from django.views.decorators.http import require_GET, require_POST
# FIRST Modules
-from first import DBManager, EngineManager
-from first.auth import verify_api_key, Authentication, FIRSTAuthError, \
+from first_core import DBManager, EngineManager
+from first_core.util import make_id, is_engine_metadata
+from first_core.auth import verify_api_key, Authentication, FIRSTAuthError, \
require_login, require_apikey
MAX_FUNCTIONS = 20
MAX_METADATA = 20
-VALIDATE_IDS = lambda x: re.match('^[a-f\d]{24,25}$', x)
+VALIDATE_IDS = lambda x: re.match('^[A-Fa-f\d]{26}$', x)
#-----------------------------------------------------------------------------
#
@@ -217,8 +218,7 @@ def metadata_add(request, md5_hash, crc32, user):
f = functions[client_key]
# Check if the id sent back is from an engine, if so skip it
- if (('id' in f) and (f['id']) and (len(f['id']) == 25)
- and ((int(f['id'][0]) >> 3) & 1)):
+ if (('id' in f) and (f['id']) and not is_engine_metadata(f['id'])):
continue;
function = db.get_function(create=True, **f)
@@ -238,7 +238,8 @@ def metadata_add(request, md5_hash, crc32, user):
'function in FIRST')})
# The '0' indicated the metadata_id is from a user.
- results[client_key] = '0{}'.format(metadata_id)
+ print metadata_id
+ results[client_key] = make_id(0, metadata=metadata_id)
# Set the user as applying the metadata
db.applied(sample, user, metadata_id)
@@ -295,6 +296,7 @@ def metadata_history(request, user):
return render(request, 'rest/error_json.html',
{'msg' : 'Exceeded max bulk request'})
+ print metadata
if None in map(VALIDATE_IDS, metadata):
return render(request, 'rest/error_json.html',
{'msg' : 'Invalid metadata id'})
@@ -544,26 +546,15 @@ def metadata_status_change(_id, user, md5_hash, crc32, applied):
return render(None, 'rest/error_json.html',
{'msg' : 'Invalid metadata information'})
- # Currently 24-25, early beta used a 24 byte string, moved to 25 byte one
- # TODO: Change to 25 only once it is closed beta time
if not VALIDATE_IDS(_id):
return render(None, 'rest/error_json.html',
{'msg' : 'Invalid id value'})
- metadata_id = _id
- if len(_id) == 25:
- metadata_id = _id[1:]
-
db = DBManager.first_db
if not db:
return render(None, 'rest/error_json.html',
{'msg' : 'Unable to connect to FIRST DB'})
- is_engine = False
- if ((len(_id) == 25) and (int(_id[0], 16) & 0x8)):
- # Metadata came from an engine
- is_engine = True
-
# Get sample
sample = db.get_sample(md5_hash, crc32)
if not sample:
@@ -571,8 +562,8 @@ def metadata_status_change(_id, user, md5_hash, crc32, applied):
{'msg' : 'Sample does not exist in FIRST'})
if applied:
- results = db.applied(sample, user, metadata_id, is_engine)
+ results = db.applied(sample, user, _id)
else:
- results = db.unapplied(sample, user, metadata_id, is_engine)
+ results = db.unapplied(sample, user, _id)
return HttpResponse(json.dumps({'failed' : False, 'results' : results}))
diff --git a/server/utilities/__init__.py b/server/utilities/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/server/utilities/engine_shell.py b/server/utilities/engine_shell.py
index bb68cf6..07fec89 100644
--- a/server/utilities/engine_shell.py
+++ b/server/utilities/engine_shell.py
@@ -31,9 +31,10 @@
# FIRST Modules
import first.settings
-from first.models import Engine, User
-from first.engines import AbstractEngine
-from first import DBManager, EngineManager
+import first.wsgi
+from first_core.models import Engine, User
+from first_core.engines import AbstractEngine
+from first_core import DBManager, EngineManager
class EngineCmd(Cmd):
@@ -95,7 +96,7 @@ def do_list(self, line):
print 'No engines are currently installed'
return
- for engine in Engine.objects:
+ for engine in Engine.objects.all():
name = engine.name
description = engine.description
print '+{}+{}+'.format('-' * 18, '-' * 50)
@@ -139,7 +140,7 @@ def do_install(self, line):
try:
path, obj_name, email = line.split(' ')
- developer = User.objects(email=email).get()
+ developer = User.objects.get(email=email)
__import__(path)
module = sys.modules[path]
@@ -160,9 +161,11 @@ def do_install(self, line):
return
e.install()
- engine = Engine(name=e.name, description=e.description, path=path,
- obj_name=obj_name, developer=developer, active=True)
- engine.save()
+ engine = Engine.objects.create( name=e.name,
+ description=e.description,
+ path=path,
+ obj_name=obj_name,
+ developer=developer, active=True)
print 'Engine added to FIRST'
return
@@ -286,7 +289,7 @@ def do_populate(self, line):
print 'The below errors occured:\n{}'.format('\n '.join(errors))
def _get_db_engine_obj(self, name):
- engine = Engine.objects(name=name)
+ engine = Engine.objects.filter(name=name)
if not engine:
print 'Unable to locate Engine "{}"'.format(name)
return
@@ -294,7 +297,7 @@ def _get_db_engine_obj(self, name):
if len(engine) > 1:
print 'More than one engine "{}" exists'.format(name)
for e in engine:
- print ' - {}'.format(e.name)
+ print ' - {}: {}'.format(e.name, e.description)
return
diff --git a/server/utilities/mongo_to_django_orm.py b/server/utilities/mongo_to_django_orm.py
new file mode 100644
index 0000000..70150c9
--- /dev/null
+++ b/server/utilities/mongo_to_django_orm.py
@@ -0,0 +1,347 @@
+#! /usr/bin/python
+#-------------------------------------------------------------------------------
+#
+# FIRST MongoDB to Django ORM Conversion Script
+# Copyright (C) 2016 Angel M. Villegas
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Requirements
+# ------------
+# mongoengine (https://pypi.python.org/pypi/mongoengine/)
+#
+#
+#-------------------------------------------------------------------------------
+
+# Python Modules
+import os
+import sys
+import time
+import datetime
+from getpass import getpass
+from argparse import ArgumentParser
+
+# DEBUG
+from pprint import pprint
+import gc
+
+# Add app package to sys path
+#sys.path.append(os.path.abspath('..'))
+#os.environ['DJANGO_SETTINGS_MODULE'] = 'first.settings'
+
+
+# FIRST Modules
+import first_core.models as ORM
+
+# Third Party Modules
+from bson import Binary
+from bson.objectid import ObjectId
+import mongoengine
+from mongoengine import Document, StringField, UUIDField, \
+ DateTimeField, LongField, ReferenceField, \
+ BinaryField, ListField, BooleanField, ObjectIdField, \
+ IntField, EmbeddedDocument, EmbeddedDocumentListField
+from django.core.paginator import Paginator, EmptyPage
+
+def info():
+ print 'INFO: {} {}'.format(len(gc.get_objects()), sum([sys.getsizeof(o) for o in gc.get_objects()]))
+
+def migrate_users():
+ for u in User.objects.all():
+ user, created = ORM.User.objects.get_or_create(**u.dump())
+
+def migrate_engines():
+ for e in Engine.objects.all():
+ engine = e.dump()
+ engine['developer'] = ORM.User.objects.get(email=e.developer.email)
+ engine = ORM.Engine.objects.create(**engine)
+
+def migrate_samples():
+ paginator = Paginator(Sample.objects.all(), 100)
+ for s in Sample.objects.all().exclude('functions').select_related():
+ sample, created = ORM.Sample.objects.get_or_create(**s.dump())
+ for u in s.seen_by:
+ sample.seen_by.add(ORM.User.objects.get(email=u.email))
+
+def migrate_functions(skip, limit):
+ i = 0
+ for f in Function.objects.skip(skip).limit(limit).select_related(3):
+ function, created = ORM.Function.objects.get_or_create(**f.dump())
+ # Convert Functions
+ #migrate_function_for_sample(sample, s)
+ if created:
+ # Add APIs to function
+ migrate_apis(function, f)
+
+ # Add to samples
+ for s in Sample.objects.only('md5', 'crc32').filter(functions=f.id):
+ ORM.Sample.objects.get(md5=s.md5, crc32=s.crc32).functions.add(function)
+ #sample = ORM.Sample.objects.get(md5=s.md5, crc32=s.crc32)
+ #sample.functions.add(function)
+
+ # Add metadata assocaited with the function
+ migrate_metadata(function, f)
+
+ i += 1
+ if 0 == (i % 1000):
+ print '---{}---'.format(i)
+ info()
+ gc.collect()
+ info()
+
+def _mf():
+ for i in xrange(0, Function.objects.count(), 1000):
+ print '--{}'.format(i)
+ migrate_functions(i, 1000)
+ #migfunc(Function.objects.exclude('metadata').all()[i:i+1000])
+
+ if i % 20000 == 0:
+ info()
+ gc.collect()
+ info()
+
+def migfunc(qs):
+ #info()
+ #functions = {}
+ for f in qs:
+ function, created = ORM.Function.objects.get_or_create(**f.dump())
+ # Convert APIs
+ if created:
+ # Add APIs to function
+ migrate_apis(function, f)
+
+ # Add to samples
+ for s in Sample.objects.only('md5', 'crc32').filter(functions=f.id):
+ sample = ORM.Sample.objects.get(md5=s.md5, crc32=s.crc32)
+ sample.functions.add(function)
+
+ # Add metadata assocaited with the function
+ #migrate_metadata(function, f)
+ #gc.collect()
+ #info()
+
+def migrate_function_for_sample(sample, s):
+ print '{} - {}'.format(s.md5, len(s.functions))
+ info()
+ for f in s.functions:
+ if type(f) != Function:
+ print 'Abandoned object: {}'.format(f)
+ continue
+ #info()
+ #pprint(f.dump())
+ function, created = ORM.Function.objects.get_or_create(**f.dump())
+ sample.functions.add(function)
+
+ if created:
+ # Convert APIs
+ migrate_apis(function, f)
+
+ # Convert Metadata
+ migrate_metadata(function, f, sample)
+
+
+ gc.collect()
+ info()
+
+def migrate_apis(function, f):
+ for a in f.apis:
+ api, _ = ORM.FunctionApis.objects.get_or_create(api=a)
+ function.apis.add(api)
+
+ gc.collect()
+
+def migrate_metadata(function, f):
+ print 'Metadata: {} - {}'.format(f.sha256, len(f.metadata))
+ for m in f.metadata:
+ creator = ORM.User.objects.get(email=m.user.email)
+ metadata = ORM.Metadata.objects.create(user=creator)
+ function.metadata.add(metadata)
+
+ # Convert Metadata Details
+ for d in m.details():
+ details = ORM.MetadataDetails.objects.create(**d)
+ metadata.details.add(details)
+
+ # Convert Metadata Applied
+ for s_id, u_id in m.applied:
+ s_ = Sample.objects.only('md5', 'crc32').get(pk=s_id)
+ u = User.objects.only('email').get(pk=u_id)
+ sample_ = ORM.Sample.objects.get(md5=s_.md5, crc32=s_.crc32)
+ user_ = ORM.User.objects.get(email=u.email)
+ ORM.AppliedMetadata.objects.create(metadata=metadata,
+ user=user_,
+ sample=sample_)
+
+def main(args):
+ pass_prompt = 'Enter MongoDB password for {}: '.format(args.user)
+ mongoengine.connect(args.d,
+ host=args.host,
+ port=args.port,
+ user=args.user,
+ password=getpass(pass_prompt))
+ # Convert User
+ migrate_users()
+
+ # Convert Engine
+ migrate_engines()
+
+ # Convert Samples
+ migrate_samples()
+
+
+
+
+#-------------------------------------------------------------------------------
+# MongoDB Models
+# FIRST v0.0.1
+#-------------------------------------------------------------------------------
+class User(Document):
+ name = StringField(max_length=128, required=True)
+ email = StringField(max_length=254, unique=True)
+ handle = StringField(max_length=32, required=True)
+ number = IntField(required=True)
+ api_key = UUIDField(required=True, unique=True)
+ created = DateTimeField(default=datetime.datetime.utcnow, required=True)
+ rank = LongField(default=0)
+ active = BooleanField(default=True)
+
+ service = StringField(max_length=16, required=True)
+ auth_data = StringField(max_length=4096, required=True)
+
+ meta = {
+ 'indexes' : [('handle', 'number'), 'api_key', 'email']
+ }
+
+ def dump(self):
+ return {'name' : self.name,
+ 'email' : self.email,
+ 'handle' : self.handle,
+ 'number' : self.number,
+ 'api_key' : self.api_key,
+ 'created' : self.created,
+ 'rank' : self.rank,
+ 'active' : self.active}
+
+
+class Engine(Document):
+ name = StringField(max_length=16, required=True, unique=True)
+ description = StringField(max_length=128, required=True)
+ path = StringField(max_length=256, required=True)
+ obj_name = StringField(max_length=32, required=True)
+ applied = ListField(default=list)
+ developer = ReferenceField(User)
+ active = BooleanField(default=False)
+
+ meta = {
+ 'indexes' : ['name']
+ }
+
+ def dump(self):
+ return {'name' : self.name,
+ 'description' : self.description,
+ 'path' : self.path,
+ 'obj_name' : self.obj_name,
+ 'developer' : self.developer,
+ 'active' : self.active}
+
+
+class Metadata(EmbeddedDocument):
+ id = ObjectIdField(required=True, default=lambda: ObjectId())
+ user = ReferenceField(User)
+ name = ListField(StringField(max_length=128), default=list)
+ prototype = ListField(StringField(max_length=256), default=list)
+ comment = ListField(StringField(max_length=512), default=list)
+ committed = ListField(DateTimeField(), default=list)
+ applied = ListField(default=list)
+
+ meta = {
+ 'indexes' : ['user']
+ }
+
+ def details(self):
+ return [{'committed' : self.committed[i],
+ 'name' : self.name[i],
+ 'prototype' : self.prototype[i],
+ 'comment' : self.comment[i]} for i in xrange(len(self.name))]
+
+
+# Use bson.Binary to insert binary data
+class Function(Document):
+ sha256 = StringField(max_length=64)
+ opcodes = BinaryField()
+ apis = ListField(StringField(max_length=128), default=list)
+ metadata = EmbeddedDocumentListField(Metadata, default=list)
+ architecture = StringField(max_length=64, required=True)
+
+ meta = {
+ 'indexes' : []
+ }
+
+ def dump(self):
+ return {'opcodes' : Binary(self.opcodes),
+ 'architecture' : self.architecture,
+ 'sha256' : self.sha256}
+
+
+class Sample(Document):
+ md5 = StringField(max_length=32, required=True)
+ crc32 = IntField(required=True)
+ sha1 = StringField(max_length=40)
+ sha256 = StringField(max_length=64)
+ seen_by = ListField(ReferenceField(User), default=list)
+ functions = ListField(ReferenceField(Function), default=list)
+ last_seen = DateTimeField(default=datetime.datetime.utcnow)
+
+ meta = {
+ 'indexes' : [('md5', 'crc32')]
+ }
+
+ def dump(self):
+ data = {'md5' : self.md5, 'crc32' : self.crc32}
+
+ if hasattr(self, 'sha1'):
+ data['sha1'] = self.sha1
+
+ if hasattr(self, 'sha256'):
+ data['sha256'] = self.sha256
+
+ return data
+
+if __name__ == '__main__':
+ parser = ArgumentParser(('FIRST Mongo to Django ORM Conversion Script\n'
+ 'This script should be used to convert FIRST v0.0.1 to FIRST v0.1.0\n'
+ ))
+
+ # Arguments
+ parser.add_argument('--mongo-host', '-h', help='The MongoDB host')
+ parser.add_argument('--mongo-port', '-p', help='The MongoDB port', type=int)
+ parser.add_argument('--mongo-user', '-u', help='The MongoDB user')
+ parser.add_argument('--mongo-db', '-d', help='The MongoDB db name')
+
+# TODO: remove
+mongoengine.connect('beta')
+print ' + Adding Users'
+start = time.time()
+migrate_users()
+print '[+] Users Added ({} s)'.format(time.time() - start)
+print ' + Adding Samples'
+start = time.time()
+migrate_samples()
+print '[+] Adding Samples ({} s)'.format(time.time() - start)
+print ' + Adding Functions & Metadata'
+start = time.time()
+#migrate_functions()
+_mf()
+print '[+] Adding Functions & Metadata ({} s)'.format(time.time() - start)
diff --git a/server/utilities/populate_engine.py b/server/utilities/populate_engine.py
index b26cc2b..0417b4d 100644
--- a/server/utilities/populate_engine.py
+++ b/server/utilities/populate_engine.py
@@ -27,7 +27,7 @@
from argparse import ArgumentParser
# FIRST Modules
-from ..app.first import EngineManager, DBManager
+from first_core import EngineManager, DBManager
def main():
global total, completed, operation_complete
diff --git a/server/www/migrations/0001_initial.py b/server/www/migrations/0001_initial.py
new file mode 100644
index 0000000..c272eed
--- /dev/null
+++ b/server/www/migrations/0001_initial.py
@@ -0,0 +1,204 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11.4 on 2017-08-25 16:11
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import django.db.models.deletion
+import django.utils.timezone
+
+
+class Migration(migrations.Migration):
+
+ initial = True
+
+ dependencies = [
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='AppliedMetadata',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ],
+ options={
+ 'db_table': 'AppliedMetadata',
+ },
+ ),
+ migrations.CreateModel(
+ name='Engine',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('name', models.CharField(max_length=16, unique=True)),
+ ('description', models.CharField(max_length=128)),
+ ('path', models.CharField(max_length=256)),
+ ('obj_name', models.CharField(max_length=32)),
+ ('active', models.BooleanField(default=False)),
+ ],
+ options={
+ 'db_table': 'Engine',
+ },
+ ),
+ migrations.CreateModel(
+ name='Function',
+ fields=[
+ ('id', models.BigAutoField(primary_key=True, serialize=False)),
+ ('sha256', models.CharField(max_length=64)),
+ ('opcodes', models.BinaryField()),
+ ('architecture', models.CharField(max_length=64)),
+ ],
+ options={
+ 'db_table': 'Function',
+ },
+ ),
+ migrations.CreateModel(
+ name='FunctionApis',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('api', models.CharField(max_length=128, unique=True)),
+ ],
+ options={
+ 'db_table': 'FunctionApis',
+ },
+ ),
+ migrations.CreateModel(
+ name='Metadata',
+ fields=[
+ ('id', models.BigAutoField(primary_key=True, serialize=False)),
+ ],
+ options={
+ 'db_table': 'Metadata',
+ },
+ ),
+ migrations.CreateModel(
+ name='MetadataDetails',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('name', models.CharField(max_length=256)),
+ ('prototype', models.CharField(max_length=256)),
+ ('comment', models.CharField(max_length=512)),
+ ('committed', models.DateTimeField(default=django.utils.timezone.now)),
+ ],
+ options={
+ 'db_table': 'MetadataDetails',
+ },
+ ),
+ migrations.CreateModel(
+ name='Sample',
+ fields=[
+ ('id', models.BigAutoField(primary_key=True, serialize=False)),
+ ('md5', models.CharField(max_length=32)),
+ ('crc32', models.BigIntegerField()),
+ ('sha1', models.CharField(blank=True, max_length=40, null=True)),
+ ('sha256', models.CharField(blank=True, max_length=64, null=True)),
+ ('last_seen', models.DateTimeField(blank=True, default=django.utils.timezone.now)),
+ ],
+ options={
+ 'db_table': 'Sample',
+ },
+ ),
+ migrations.CreateModel(
+ name='User',
+ fields=[
+ ('id', models.BigAutoField(primary_key=True, serialize=False)),
+ ('name', models.CharField(max_length=128)),
+ ('email', models.CharField(max_length=254)),
+ ('handle', models.CharField(max_length=32)),
+ ('number', models.IntegerField()),
+ ('api_key', models.UUIDField(unique=True)),
+ ('created', models.DateTimeField(default=django.utils.timezone.now)),
+ ('rank', models.BigIntegerField(default=0)),
+ ('active', models.BooleanField(default=True)),
+ ('service', models.CharField(max_length=16)),
+ ('auth_data', models.CharField(max_length=4096)),
+ ],
+ options={
+ 'db_table': 'User',
+ },
+ ),
+ migrations.AddIndex(
+ model_name='user',
+ index=models.Index(fields=['email'], name='User_email_ffa2e0_idx'),
+ ),
+ migrations.AddIndex(
+ model_name='user',
+ index=models.Index(fields=['api_key'], name='User_api_key_c4f2d6_idx'),
+ ),
+ migrations.AlterIndexTogether(
+ name='user',
+ index_together=set([('handle', 'number')]),
+ ),
+ migrations.AddField(
+ model_name='sample',
+ name='functions',
+ field=models.ManyToManyField(to='www.Function'),
+ ),
+ migrations.AddField(
+ model_name='sample',
+ name='seen_by',
+ field=models.ManyToManyField(to='www.User'),
+ ),
+ migrations.AddField(
+ model_name='metadata',
+ name='details',
+ field=models.ManyToManyField(to='www.MetadataDetails'),
+ ),
+ migrations.AddField(
+ model_name='metadata',
+ name='user',
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.User'),
+ ),
+ migrations.AddField(
+ model_name='function',
+ name='apis',
+ field=models.ManyToManyField(to='www.FunctionApis'),
+ ),
+ migrations.AddField(
+ model_name='function',
+ name='metadata',
+ field=models.ManyToManyField(to='www.Metadata'),
+ ),
+ migrations.AddField(
+ model_name='engine',
+ name='developer',
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.User'),
+ ),
+ migrations.AddField(
+ model_name='appliedmetadata',
+ name='metadata',
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.Metadata'),
+ ),
+ migrations.AddField(
+ model_name='appliedmetadata',
+ name='sample',
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.Sample'),
+ ),
+ migrations.AddField(
+ model_name='appliedmetadata',
+ name='user',
+ field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.User'),
+ ),
+ migrations.AlterUniqueTogether(
+ name='sample',
+ unique_together=set([('md5', 'crc32')]),
+ ),
+ migrations.AlterIndexTogether(
+ name='sample',
+ index_together=set([('md5', 'crc32')]),
+ ),
+ migrations.AddIndex(
+ model_name='metadata',
+ index=models.Index(fields=['user'], name='Metadata_user_id_aea908_idx'),
+ ),
+ migrations.AlterUniqueTogether(
+ name='function',
+ unique_together=set([('sha256', 'architecture')]),
+ ),
+ migrations.AddIndex(
+ model_name='engine',
+ index=models.Index(fields=['name'], name='Engine_name_14ac74_idx'),
+ ),
+ migrations.AlterUniqueTogether(
+ name='appliedmetadata',
+ unique_together=set([('metadata', 'sample', 'user')]),
+ ),
+ ]
diff --git a/server/www/models.py b/server/www/models.py
index 63e25b1..b5b244e 100644
--- a/server/www/models.py
+++ b/server/www/models.py
@@ -22,21 +22,21 @@
# Python Modules
from __future__ import unicode_literals
-import datetime
# Third Party Modules
from django.db import models
from django.utils import timezone
-from django.db.transaction import commit
class User(models.Model):
+ id = models.BigAutoField(primary_key=True)
+
name = models.CharField(max_length=128)
email = models.CharField(max_length=254)
handle = models.CharField(max_length=32)
number = models.IntegerField()
api_key = models.UUIDField(unique=True)
- created = models.DateTimeField(default=timezone.now, auto_now_add=True)
+ created = models.DateTimeField(default=timezone.now)
rank = models.BigIntegerField(default=0)
active = models.BooleanField(default=True)
@@ -61,6 +61,7 @@ def dump(self, full=False):
return data
class Meta:
+ db_table = 'User'
indexes = [
models.Index(fields=['email']),
models.Index(fields=['api_key']),
@@ -68,178 +69,167 @@ class Meta:
index_together = ("handle", "number")
-
class Engine(models.Model):
name = models.CharField(max_length=16, unique=True)
description = models.CharField(max_length=128)
path = models.CharField(max_length=256)
obj_name = models.CharField(max_length=32)
- developer = models.ForeignKey(User)
+ developer = models.ForeignKey('User')
active = models.BooleanField(default=False)
- class Meta:
- indexes = [
- models.Index(fields=['name']),
- ]
+ @property
+ def rank(self):
+ # TODO: Complete
+ #return len(self.applied)
+ return 0
def dump(self, full=False):
data = {'name' : self.name,
'description' : self.description,
'rank' : self.rank,
- 'developer' : Engine.objects.filter(engine_id = self.id)}
+ 'developer' : self.developer.user_handle}
if full:
data.update({'path' : self.path})
return data
- @property
- def rank(self):
- return len(self.applied)
+ class Meta:
+ db_table = 'Engine'
+ indexes = [
+ models.Index(fields=['name']),
+ ]
-class AppliedEngine(models.Model):
- engine_id = models.ForeignKey(Engine)
- sample_id = models.OneToOneField('Sample')
- user_id = models.OneToOneField(User)
- engine_metadata_id = models.BigIntegerField();
+
+# TODO: Create scheme for tracking applied metadata for engines
+#
+#class AppliedEngine(models.Model):
+# engine_id = models.ForeignKey(Engine)
+# sample_id = models.ForeignKey(Sample)
+# user_id = models.ForeignKey(User)
+# engine_metadata_id = models.BigIntegerField();
+#
+# class Meta:
+# db_table = 'AppliedEngine'
+# unique_together = ("sample_id", "user_id", "engine_metadata_id")
+
+class AppliedMetadata(models.Model):
+ metadata = models.ForeignKey('Metadata')
+ sample = models.ForeignKey('Sample')
+ user = models.ForeignKey('User')
class Meta:
- unique_together = ("sample_id", "user_id", "engine_metadata_id")
+ db_table = 'AppliedMetadata'
+ unique_together = ("metadata", "sample", "user")
-class Metadata(models.Model):
- user = models.OneToOneField(User )
+
+class MetadataDetails(models.Model):
+ name = models.CharField(max_length=256)
+ prototype = models.CharField(max_length=256)
+ comment = models.CharField(max_length=512)
+ committed = models.DateTimeField(default=timezone.now)
class Meta:
- indexes = [
- models.Index(fields=['user']),
- ]
+ db_table = 'MetadataDetails'
- def dump(self, full=False):
- data = {'creator' : User.objects.filter(metadata_id = self.id),
- 'name' : MetaDataName.objects.filter(metadata_id = self.id).first(),
- 'prototype' : MetaDataPrototype.objects.filter(metadata_id = self.id).first(),
- 'comment' : MetaDataComment.objects.filter(metadata_id = self.id).first(),
- 'rank' : len(self.applied)}
- if full:
- data['history'] = []
- name = MetaDataName.objects.filter(metadata_id = self.id)
- committed = MetaDataCommited.objects.filter(metadata_id = self.id)
- prototype = MetaDataPrototype.objects.filter(metadata_id = self.id);
- comment = MetaDataComment.objects.filter(metadata_id = self.id);
-
- for i in xrange(len(name) - 1, -1, -1):
- # Convert back with:
- # datetime.datetime.strptime(, '%Y-%m-%dT%H:%M:%S.%f')
- commit = committed[i].isoformat()
- data['history'].append({'name' : name[i],
- 'prototype' : prototype[i],
- 'comment' : comment[i],
- 'committed' : commit})
+class Metadata(models.Model):
+ id = models.BigAutoField(primary_key=True)
- return data
+ user = models.ForeignKey('User')
+ details = models.ManyToManyField('MetadataDetails')
+
+ @property
+ def rank(self):
+ if hasattr(self, 'id'):
+ return AppliedMetadata.objects.filter(metadata=self.id).count()
+
+ return 0
def has_changed(self, name, prototype, comment):
- if (not self.name) or (not self.prototype) or (not comment):
+ if not self.details.exists():
return True
- actualName = MetaDataName.objects.filter(metadata_id = self.id).first()
- actualPrototype = MetaDataPrototype.objects.filter(metadata_id = self.id).first()
- actualComment = MetaDataComment.objects.filter(metadata_id = self.id).first();
-
- if ((actualName.name != name)
- or (actualPrototype.prototype != prototype)
- or (actualComment.comment != comment)):
+ latest = self.details.latest('committed')
+ if ((latest.name != name)
+ or (latest.prototype != prototype)
+ or (latest.comment != comment)):
return True
return False
- @property
- def rank(self):
- return len(self.applied)
+ def dump(self, full=False):
+ data = {'creator' : self.user.user_handle}
+ latest_details = self.details.latest('committed')
+ data.update({
+ 'name' : latest_details.name,
+ 'prototype' : latest_details.prototype,
+ 'comment' : latest_details.comment,
+ 'rank' : self.rank
+ })
+
+ if full:
+ # Convert committed time back with:
+ # datetime.datetime.strptime(, '%Y-%m-%dT%H:%M:%S.%f')
+ data['history'] = [{'name' : d.name,
+ 'prototype' : d.prototype,
+ 'comment' : d.comment,
+ 'committed' : d.committed.isoformat()}
+ for d in self.details.order_by('committed')]
+
+ return data
-class AppliedMetaData(models.Model):
- metadata_id = models.ForeignKey(Engine)
- sample_id = models.OneToOneField('Sample')
- user_id = models.OneToOneField(User)
- engine_metadata_id = models.BigIntegerField();
class Meta:
- unique_together = ("metadata_id", "sample_id", "user_id")
+ db_table = 'Metadata'
+ indexes = [models.Index(fields=['user'])]
-class MetaDataName(models.Model):
- name = models.CharField(max_length=128)
- models.ForeignKey(Metadata)
-class MetaDataPrototype(models.Model):
- prototype = models.CharField(max_length=256)
- models.ForeignKey(Metadata)
+class FunctionApis(models.Model):
+ api = models.CharField(max_length=128, unique=True)
-class MetaDataComment(models.Model):
- comment = models.CharField(max_length=128)
- models.ForeignKey(Metadata)
+ class Meta:
+ db_table = 'FunctionApis'
-class MetaDataCommited(models.Model):
- committed = models.DateTimeField(default=datetime.datetime.utcnow, blank=True)
- models.ForeignKey(Metadata)
class Function(models.Model):
+ id = models.BigAutoField(primary_key=True)
+
sha256 = models.CharField(max_length=64)
- opcodes = models.BinaryField
- metadata = models.ForeignKey(Metadata)
- mnemonic_hash = models.ForeignKey('MnemonicHash')
- # Return value from idaapi.get_file_type_name()
+ opcodes = models.BinaryField()
+ apis = models.ManyToManyField('FunctionApis')
+ metadata = models.ManyToManyField('Metadata')
architecture = models.CharField(max_length=64)
def dump(self):
- return {'id' : self.id,
- 'opcodes' : self.opcodes,
- 'apis' : self.functionapis_set.all(),
- 'metadata' : [str(x.api) for x in Metadata.objects.filter(function_id = self.id)],
+ return {'opcodes' : self.opcodes,
'architecture' : self.architecture,
'sha256' : self.sha256}
-class FunctionApis(models.Model):
- api = models.CharField(max_length=64)
- models.ForeignKey(Function)
+ class Meta:
+ db_table = 'Function'
+ unique_together = ('sha256', 'architecture')
class Sample(models.Model):
+ id = models.BigAutoField(primary_key=True)
+
md5 = models.CharField(max_length=32)
- crc32 = models.IntegerField()
- sha1 = models.CharField(max_length=40)
- sha256 = models.CharField(max_length=64)
- seen_by = models.ManyToManyField( User, blank=True)
- functions = models.ManyToManyField( Function, blank=True)
- last_seen = models.DateTimeField(default=datetime.datetime.utcnow, blank=True)
+ crc32 = models.BigIntegerField()
+ sha1 = models.CharField(max_length=40, null=True, blank=True)
+ sha256 = models.CharField(max_length=64, null=True, blank=True)
+ seen_by = models.ManyToManyField('User')
+ functions = models.ManyToManyField('Function')
+ last_seen = models.DateTimeField(default=timezone.now, blank=True)
class Meta:
+ db_table = 'Sample'
index_together = ['md5', 'crc32']
+ unique_together = ('md5', 'crc32')
def dump(self):
- data = {'md5' : self.md5, 'crc32' : self.crc32,
- 'seen_by' : [str(x.id) for x in User.objects.filter(sample_id = self.id)],
- 'functions' : [str(x.id) for x in Function.objects.filter(sample_id = self.id)]}
-
- if 'sha1' in self:
- data['sha1'] = self.sha1
-
- if 'sha256' in self:
- data['sha256'] = self.sha256
-
- return data
-
-class MnemonicHash(models.Model):
- sha256 = models.CharField(max_length=64)
- architecture = models.CharField(max_length=64)
-
- class Meta:
- index_together = ('sha256', 'architecture')
-
- def dump(self):
- return {'sha256' : self.sha256,
- 'architecture' : self.architecture,
- 'functions' : self.function_set.all()}
-
- def function_list(self):
- return [str(x) for x in Function.objects.filter(MnemonicHash_id = self.id)]
+ return {'md5' : self.md5, 'crc32' : self.crc32,
+ 'seen_by' : [str(x.id) for x in self.seen_by.all()],
+ 'functions' : [str(x.id) for x in self.functions.all()],
+ 'sha1' : self.sha1,
+ 'sha256' : self.sha256}
diff --git a/server/www/views.py b/server/www/views.py
index 289e9b6..6fb7458 100644
--- a/server/www/views.py
+++ b/server/www/views.py
@@ -9,8 +9,8 @@
# FIRST Modules
-from www.models import Function, User
-from first.auth import Authentication, require_login, FIRSTAuthError
+from first_core.models import Function, User
+from first_core.auth import Authentication, require_login, FIRSTAuthError
def handler404(request):
return render(request, 'www/404.html', None)