From b4425421ff4d9a10b80fec46e40211a168e438b0 Mon Sep 17 00:00:00 2001 From: peevtod Date: Thu, 10 Aug 2017 16:54:31 +0300 Subject: [PATCH 01/17] Refactoring model to use django orm --- server/www/models.py | 256 ++++++++++++++++++++++++++----------------- 1 file changed, 155 insertions(+), 101 deletions(-) diff --git a/server/www/models.py b/server/www/models.py index 999885a..fe885b7 100644 --- a/server/www/models.py +++ b/server/www/models.py @@ -27,31 +27,21 @@ # Python Modules from __future__ import unicode_literals import datetime - -# Third Party Modules -from bson.objectid import ObjectId -from mongoengine import Document, StringField, UUIDField, \ - DateTimeField, LongField, ReferenceField, \ - BinaryField, ListField, BooleanField, ObjectIdField, \ - IntField, EmbeddedDocument, EmbeddedDocumentListField - -class User(Document): - name = StringField(max_length=128, required=True) - email = StringField(max_length=254, unique=True) - handle = StringField(max_length=32, required=True) - number = IntField(required=True) - api_key = UUIDField(required=True, unique=True) - created = DateTimeField(default=datetime.datetime.utcnow, required=True) - rank = LongField(default=0) - active = BooleanField(default=True) - - service = StringField(max_length=16, required=True) - auth_data = StringField(max_length=4096, required=True) - - meta = { - 'indexes' : [('handle', 'number'), 'api_key', 'email'] - } - +from django.db import models +from django.db.transaction import commit + + +class User(models.Model): + name = models.CharField(max_length=128) + email = models.CharField(max_length=254) + handle = models.CharField(max_length=32) + number = models.IntegerField() + created = models.DateTimeField(default=datetime.datetime.utcnow) + rank = models.BigIntegerField(default=0) + active = models.BooleanField(default=True) + service = models.CharField(max_length=16) + auth_data = models.CharField(max_length=4096) + @property def user_handle(self): return '{0.handle}#{0.number:04d}'.format(self) @@ -63,83 +53,107 @@ def dump(self, full=False): data.update({ 'id' : str(self.id), 'name' : self.name, 'email' : self.email, - 'api_key' : self.api_key, 'rank' : self.rank, 'created' : self.created, 'active' : self.active}) - return data - - -class Engine(Document): - name = StringField(max_length=16, required=True, unique=True) - description = StringField(max_length=128, required=True) - path = StringField(max_length=256, required=True) - obj_name = StringField(max_length=32, required=True) - applied = ListField(default=list) - developer = ReferenceField(User) - active = BooleanField(default=False) - - meta = { - 'indexes' : ['name'] - } + + class Meta: + indexes = [ + models.Index(fields=['email']), + ] + index_together = ("handle", "number") + + + +class API(models.Model): + key = models.UUIDField(unique=True) + # many to many relationship: + user = models.ManyToManyField(User,blank=True) + + +class Engine(models.Model): + name = models.CharField(max_length=16, unique=True) + description = models.CharField(max_length=128) + path = models.CharField(max_length=256) + obj_name = models.CharField(max_length=32) + + developer = models.OneToOneField(User) + active = models.BooleanField(default=False) + + class Meta: + indexes = [ + models.Index(fields=['name']), + ] def dump(self, full=False): data = {'name' : self.name, 'description' : self.description, 'rank' : self.rank, - 'developer' : self.developer.user_handle} + 'developer' : Engine.objects.filter(engine_id = self.id)} if full: - data.update({'id' : str(self.id), 'path' : self.path}) + data.update({'path' : self.path}) return data @property def rank(self): return len(self.applied) - - -class Metadata(EmbeddedDocument): - id = ObjectIdField(required=True, default=lambda: ObjectId()) - user = ReferenceField(User) - name = ListField(StringField(max_length=128), default=list) - prototype = ListField(StringField(max_length=256), default=list) - comment = ListField(StringField(max_length=512), default=list) - committed = ListField(DateTimeField(), default=list) - applied = ListField(default=list) - - meta = { - 'indexes' : ['user'] - } - + +class AppliedEngine(models.Model): + engine_id = models.ForeignKey(Engine) + sample_id = models.OneToOneField('Sample') + user_id = models.OneToOneField(User) + engine_metadata_id = models.BigIntegerField(); + + class Meta: + unique_together = ("sample_id", "user_id", "engine_metadata_id") + +class Metadata(models.Model): + user = models.OneToOneField(User ) + + class Meta: + indexes = [ + models.Index(fields=['user']), + ] + def dump(self, full=False): - data = {'creator' : self.user.user_handle, - 'name' : self.name[0], - 'prototype' : self.prototype[0], - 'comment' : self.comment[0], + data = {'creator' : User.objects.filter(metadata_id = self.id), + 'name' : MetaDataName.objects.filter(metadata_id = self.id).first(), + 'prototype' : MetaDataPrototype.objects.filter(metadata_id = self.id).first(), + 'comment' : MetaDataComment.objects.filter(metadata_id = self.id).first(), 'rank' : len(self.applied)} if full: data['history'] = [] - for i in xrange(len(self.name) - 1, -1, -1): + name = MetaDataName.objects.filter(metadata_id = self.id) + committed = MetaDataCommited.objects.filter(metadata_id = self.id) + prototype = MetaDataPrototype.objects.filter(metadata_id = self.id); + comment = MetaDataComment.objects.filter(metadata_id = self.id); + + for i in xrange(len(name) - 1, -1, -1): # Convert back with: # datetime.datetime.strptime(
, '%Y-%m-%dT%H:%M:%S.%f') - committed = self.committed[i].isoformat() - data['history'].append({'name' : self.name[i], - 'prototype' : self.prototype[i], - 'comment' : self.comment[i], - 'committed' : committed}) + commit = committed[i].isoformat() + data['history'].append({'name' : name[i], + 'prototype' : prototype[i], + 'comment' : comment[i], + 'committed' : commit}) return data def has_changed(self, name, prototype, comment): if (not self.name) or (not self.prototype) or (not comment): return True - - if ((self.name[0] != name) - or (self.prototype[0] != prototype) - or (self.comment[0] != comment)): + + actualName = MetaDataName.objects.filter(metadata_id = self.id).first() + actualPrototype = MetaDataPrototype.objects.filter(metadata_id = self.id).first() + actualComment = MetaDataComment.objects.filter(metadata_id = self.id).first(); + + if ((actualName.name != name) + or (actualPrototype.prototype != prototype) + or (actualComment.comment != comment)): return True return False @@ -148,45 +162,67 @@ def has_changed(self, name, prototype, comment): def rank(self): return len(self.applied) -# Use bson.Binary to insert binary data -class Function(Document): - sha256 = StringField(max_length=64) - opcodes = BinaryField() - apis = ListField(StringField(max_length=64), default=list) - metadata = EmbeddedDocumentListField(Metadata, default=list) +class AppliedMetaData(models.Model): + metadata_id = models.ForeignKey(Engine) + sample_id = models.OneToOneField('Sample') + user_id = models.OneToOneField(User) + engine_metadata_id = models.BigIntegerField(); + class Meta: + unique_together = ("metadata_id", "sample_id", "user_id") + +class MetaDataName(models.Model): + name = models.CharField(max_length=128) + models.ForeignKey(Metadata) + +class MetaDataPrototype(models.Model): + prototype = models.CharField(max_length=256) + models.ForeignKey(Metadata) + +class MetaDataComment(models.Model): + comment = models.CharField(max_length=128) + models.ForeignKey(Metadata) + +class MetaDataCommited(models.Model): + committed = models.DateTimeField(default=datetime.datetime.utcnow, blank=True) + models.ForeignKey(Metadata) + +class Function(models.Model): + sha256 = models.CharField(max_length=64) + opcodes = models.BinaryField + metadata = models.ForeignKey(Metadata) + mnemonic_hash = models.ForeignKey('MnemonicHash') # Return value from idaapi.get_file_type_name() - architecture = StringField(max_length=64, required=True) - - meta = { - 'indexes' : [] - } + architecture = models.CharField(max_length=64) def dump(self): return {'id' : self.id, 'opcodes' : self.opcodes, - 'apis' : self.apis, - 'metadata' : [str(x.id) for x in self.metadata], + 'apis' : self.functionapis_set.all(), + 'metadata' : [str(x.api) for x in Metadata.objects.filter(function_id = self.id)], 'architecture' : self.architecture, 'sha256' : self.sha256} - -class Sample(Document): - md5 = StringField(max_length=32, required=True) - crc32 = IntField(required=True) - sha1 = StringField(max_length=40) - sha256 = StringField(max_length=64) - seen_by = ListField(ReferenceField(User), default=list) - functions = ListField(ReferenceField(Function), default=list) - last_seen = DateTimeField(default=datetime.datetime.utcnow) - - meta = { - 'indexes' : [('md5', 'crc32')] - } - +class FunctionApis(models.Model): + api = models.CharField(max_length=64) + models.ForeignKey(Function) + + +class Sample(models.Model): + md5 = models.CharField(max_length=32) + crc32 = models.IntegerField() + sha1 = models.CharField(max_length=40) + sha256 = models.CharField(max_length=64) + seen_by = models.ManyToManyField( User, blank=True) + functions = models.ManyToManyField( Function, blank=True) + last_seen = models.DateTimeField(default=datetime.datetime.utcnow, blank=True) + + class Meta: + index_together = ['md5', 'crc32'] + def dump(self): data = {'md5' : self.md5, 'crc32' : self.crc32, - 'seen_by' : [str(x.id) for x in self.seen_by], - 'functions' : [str(x.id) for x in self.functions]} + 'seen_by' : [str(x.id) for x in User.objects.filter(sample_id = self.id)], + 'functions' : [str(x.id) for x in Function.objects.filter(sample_id = self.id)]} if 'sha1' in self: data['sha1'] = self.sha1 @@ -195,3 +231,21 @@ def dump(self): data['sha256'] = self.sha256 return data + +class MnemonicHash(models.Model): + sha256 = models.CharField(max_length=64) + architecture = models.CharField(max_length=64) + + class Meta: + index_together = ('sha256', 'architecture') + + def dump(self): + return {'sha256' : self.sha256, + 'architecture' : self.architecture, + 'functions' : self.function_set.all()} + + def function_list(self): + return [str(x) for x in Function.objects.filter(MnemonicHash_id = self.id)] + + + From 188a2fc36702f3bbcaeeee6b2b1e974abdf7735a Mon Sep 17 00:00:00 2001 From: demonduck Date: Thu, 10 Aug 2017 15:17:35 -0400 Subject: [PATCH 02/17] backing up code --- .gitignore | 1 + server/example_config.json | 15 +++++++ server/first/settings.py | 61 ++++++++++++-------------- server/www/models.py | 90 ++++++++++++++++++-------------------- 4 files changed, 87 insertions(+), 80 deletions(-) create mode 100644 server/example_config.json diff --git a/.gitignore b/.gitignore index ba74660..b72cd4d 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,4 @@ docs/_build/ # PyBuilder target/ +server/first_config.json diff --git a/server/example_config.json b/server/example_config.json new file mode 100644 index 0000000..43ed855 --- /dev/null +++ b/server/example_config.json @@ -0,0 +1,15 @@ +{ + "secret_key" : "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", + + "db_engine" : "django.db.backends.mysql", + "db_dbname" : "first", + "db_user" : "user", + "db_password" : "pass", + "db_host" : "localhost", + "db_port" : 3306, + + "debug" : true, + "allowed_hosts" : ["localhost", "testserver"], + + "oauth_path" : "", +} diff --git a/server/first/settings.py b/server/first/settings.py index d2e318c..53faf89 100644 --- a/server/first/settings.py +++ b/server/first/settings.py @@ -11,9 +11,19 @@ """ import os - -# Third Party Modules -import mongoengine +import json + +# Read in configuration data +FIRST_CONFIG_FILE = 'first_config.json' +CONFIG = {} +try: + config_data = json.load(file(FIRST_CONFIG_FILE)) + if type(config_data) == dict: + CONFIG = config_data +except IOError as ioe: + print '[1st] IOError: {}'.format(ioe) +except ValueError as ve: + print '[1st] ValueError: {}'.format(ve) # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -23,12 +33,13 @@ # See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = 'd2nev@620*3vi@qvynch)seb4^pghp=-)aenfs(4%)-k@xqpo9' +SECRET_KEY = CONFIG.get('secret_key', + 'd2nev@620*3vi@qvynch)seb4^pghp=-)aenfs(4%)-k@xqpo9') # SECURITY WARNING: don't run with debug turned on in production! -DEBUG = True +DEBUG = CONFIG.get('debug', True) -ALLOWED_HOSTS = [] +ALLOWED_HOSTS = CONFIG.get('allowed_hosts', []) # Application definition @@ -80,31 +91,17 @@ # Database # https://docs.djangoproject.com/en/1.10/ref/settings/#databases -# MySQL Settings -_MYSQL_USER = os.environ.get('MYSQL_USER', 'root') -_MYSQL_PASSWORD = os.environ.get('MYSQL_PASSWORD', '') -_MYSQL_DATABASE = os.environ.get('MYSQL_DATABASE', 'first_db') -_MYSQL_HOST = os.environ.get('MYSQL_HOST', 'localhost') -_MYSQL_PORT = os.environ.get('MYSQL_PORT', 3306) - DATABASES = { 'default': { - 'ENGINE': 'django.db.backends.mysql', - 'NAME': _MYSQL_DATABASE, - 'USER': _MYSQL_USER, - 'PASSWORD': _MYSQL_PASSWORD, - 'HOST': _MYSQL_HOST, - 'PORT': _MYSQL_PORT + 'ENGINE': CONFIG.get('db_engine', 'django.db.backends.mysql'), + 'NAME': CONFIG.get('db_dbname', 'first_db'), + 'USER': CONFIG.get('db_user', 'root'), + 'PASSWORD': CONFIG.get('db_password', ''), + 'HOST': CONFIG.get('db_host', 'localhost'), + 'PORT': CONFIG.get('db_port', 3306) } } -# MongoDB settings -_MONGODB_HOST = os.environ.get('MONGO_HOST', 'localhost') -_MONGODB_PORT = int(os.environ.get('MONGO_PORT', 27017)) -_MONGODB_NAME = os.environ.get('MONGO_NAME', 'first_db') - -mongoengine.connect(_MONGODB_NAME, host=_MONGODB_HOST, port=_MONGODB_PORT) - # Password validation # https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators @@ -127,19 +124,19 @@ # Internationalization # https://docs.djangoproject.com/en/1.10/topics/i18n/ -LANGUAGE_CODE = 'en-us' +LANGUAGE_CODE = CONFIG.get('language_code', 'en-us') -TIME_ZONE = 'EST' +TIME_ZONE = CONFIG.get('time_zone', 'EST') -USE_I18N = True +USE_I18N = CONFIG.get('use_i18n', True) -USE_L10N = True +USE_L10N = CONFIG.get('use_l10n', True) -USE_TZ = True +USE_TZ = CONFIG.get('use_tz', True) # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.10/howto/static-files/ STATIC_ROOT = os.path.join(BASE_DIR, 'static') -STATIC_URL = '/static/' +STATIC_URL = CONFIG.get('static_url', '/static/') diff --git a/server/www/models.py b/server/www/models.py index fe885b7..63e25b1 100644 --- a/server/www/models.py +++ b/server/www/models.py @@ -1,7 +1,7 @@ #------------------------------------------------------------------------------- # -# FIRST MongoDB Models -# Copyright (C) 2016 Angel M. Villegas +# FIRST Django ORM Models +# Copyright (C) 2017 Angel M. Villegas # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,17 +17,16 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# Requirements -# ------------ -# mongoengine (https://pypi.python.org/pypi/mongoengine/) -# #------------------------------------------------------------------------------- # Python Modules from __future__ import unicode_literals import datetime + +# Third Party Modules from django.db import models +from django.utils import timezone from django.db.transaction import commit @@ -36,12 +35,14 @@ class User(models.Model): email = models.CharField(max_length=254) handle = models.CharField(max_length=32) number = models.IntegerField() - created = models.DateTimeField(default=datetime.datetime.utcnow) + api_key = models.UUIDField(unique=True) + created = models.DateTimeField(default=timezone.now, auto_now_add=True) rank = models.BigIntegerField(default=0) active = models.BooleanField(default=True) + service = models.CharField(max_length=16) - auth_data = models.CharField(max_length=4096) - + auth_data = models.CharField(max_length=4096) + @property def user_handle(self): return '{0.handle}#{0.number:04d}'.format(self) @@ -53,37 +54,33 @@ def dump(self, full=False): data.update({ 'id' : str(self.id), 'name' : self.name, 'email' : self.email, + 'api_key' : self.api_key, 'rank' : self.rank, 'created' : self.created, 'active' : self.active}) return data - + class Meta: indexes = [ - models.Index(fields=['email']), + models.Index(fields=['email']), + models.Index(fields=['api_key']), ] index_together = ("handle", "number") - - -class API(models.Model): - key = models.UUIDField(unique=True) - # many to many relationship: - user = models.ManyToManyField(User,blank=True) - - + + class Engine(models.Model): name = models.CharField(max_length=16, unique=True) description = models.CharField(max_length=128) path = models.CharField(max_length=256) obj_name = models.CharField(max_length=32) - - developer = models.OneToOneField(User) + + developer = models.ForeignKey(User) active = models.BooleanField(default=False) - + class Meta: indexes = [ - models.Index(fields=['name']), + models.Index(fields=['name']), ] def dump(self, full=False): @@ -100,24 +97,24 @@ def dump(self, full=False): @property def rank(self): return len(self.applied) - -class AppliedEngine(models.Model): + +class AppliedEngine(models.Model): engine_id = models.ForeignKey(Engine) - sample_id = models.OneToOneField('Sample') + sample_id = models.OneToOneField('Sample') user_id = models.OneToOneField(User) engine_metadata_id = models.BigIntegerField(); - + class Meta: - unique_together = ("sample_id", "user_id", "engine_metadata_id") + unique_together = ("sample_id", "user_id", "engine_metadata_id") class Metadata(models.Model): - user = models.OneToOneField(User ) - + user = models.OneToOneField(User ) + class Meta: indexes = [ - models.Index(fields=['user']), + models.Index(fields=['user']), ] - + def dump(self, full=False): data = {'creator' : User.objects.filter(metadata_id = self.id), 'name' : MetaDataName.objects.filter(metadata_id = self.id).first(), @@ -131,7 +128,7 @@ def dump(self, full=False): committed = MetaDataCommited.objects.filter(metadata_id = self.id) prototype = MetaDataPrototype.objects.filter(metadata_id = self.id); comment = MetaDataComment.objects.filter(metadata_id = self.id); - + for i in xrange(len(name) - 1, -1, -1): # Convert back with: # datetime.datetime.strptime(
, '%Y-%m-%dT%H:%M:%S.%f') @@ -146,11 +143,11 @@ def dump(self, full=False): def has_changed(self, name, prototype, comment): if (not self.name) or (not self.prototype) or (not comment): return True - + actualName = MetaDataName.objects.filter(metadata_id = self.id).first() actualPrototype = MetaDataPrototype.objects.filter(metadata_id = self.id).first() actualComment = MetaDataComment.objects.filter(metadata_id = self.id).first(); - + if ((actualName.name != name) or (actualPrototype.prototype != prototype) or (actualComment.comment != comment)): @@ -162,26 +159,26 @@ def has_changed(self, name, prototype, comment): def rank(self): return len(self.applied) -class AppliedMetaData(models.Model): +class AppliedMetaData(models.Model): metadata_id = models.ForeignKey(Engine) - sample_id = models.OneToOneField('Sample') + sample_id = models.OneToOneField('Sample') user_id = models.OneToOneField(User) engine_metadata_id = models.BigIntegerField(); class Meta: - unique_together = ("metadata_id", "sample_id", "user_id") - + unique_together = ("metadata_id", "sample_id", "user_id") + class MetaDataName(models.Model): name = models.CharField(max_length=128) models.ForeignKey(Metadata) - + class MetaDataPrototype(models.Model): prototype = models.CharField(max_length=256) models.ForeignKey(Metadata) - + class MetaDataComment(models.Model): comment = models.CharField(max_length=128) models.ForeignKey(Metadata) - + class MetaDataCommited(models.Model): committed = models.DateTimeField(default=datetime.datetime.utcnow, blank=True) models.ForeignKey(Metadata) @@ -205,7 +202,7 @@ def dump(self): class FunctionApis(models.Model): api = models.CharField(max_length=64) models.ForeignKey(Function) - + class Sample(models.Model): md5 = models.CharField(max_length=32) @@ -218,7 +215,7 @@ class Sample(models.Model): class Meta: index_together = ['md5', 'crc32'] - + def dump(self): data = {'md5' : self.md5, 'crc32' : self.crc32, 'seen_by' : [str(x.id) for x in User.objects.filter(sample_id = self.id)], @@ -238,7 +235,7 @@ class MnemonicHash(models.Model): class Meta: index_together = ('sha256', 'architecture') - + def dump(self): return {'sha256' : self.sha256, 'architecture' : self.architecture, @@ -246,6 +243,3 @@ def dump(self): def function_list(self): return [str(x) for x in Function.objects.filter(MnemonicHash_id = self.id)] - - - From 50ac33f554dd05835cdb1675bf57426b332b5e8c Mon Sep 17 00:00:00 2001 From: demonduck Date: Tue, 15 Aug 2017 08:04:36 -0400 Subject: [PATCH 03/17] backing up code --- server/first/engines/mnemonic_hash.py | 26 +++- server/www/migrations/0001_initial.py | 198 ++++++++++++++++++++++++++ server/www/models.py | 194 ++++++++++++------------- 3 files changed, 311 insertions(+), 107 deletions(-) create mode 100644 server/www/migrations/0001_initial.py diff --git a/server/first/engines/mnemonic_hash.py b/server/first/engines/mnemonic_hash.py index 19b23ae..38dfedd 100644 --- a/server/first/engines/mnemonic_hash.py +++ b/server/first/engines/mnemonic_hash.py @@ -38,9 +38,29 @@ # Third Party Modules from bson.objectid import ObjectId from distorm3 import DecomposeGenerator, Decode32Bits, Decode64Bits, Decode16Bits -from mongoengine.queryset import DoesNotExist, MultipleObjectsReturned -from mongoengine import Document, StringField, ListField, ObjectIdField +from django.db import models + +class MnemonicHash(models.Model): + sha256 = models.CharField(max_length=64) + architecture = models.CharField(max_length=64) + functions = model.ManyToManyField(MnemonicHashFunctions) + + class Meta: + index_together = ('sha256', 'architecture') + + def dump(self): + return {'sha256' : self.sha256, + 'architecture' : self.architecture, + 'functions' : self.functions.all()} + + def function_list(self): + return [str(x) for x in self.functions.all()] + +class MnemonicHashFunctions(models.Model): + func = models.BigIntegerField() + +''' class MnemonicHash(Document): sha256 = StringField(max_length=64, required=True) architecture = StringField(max_length=64, required=True) @@ -57,7 +77,7 @@ def dump(self): def function_list(self): return [str(x) for x in self.functions] - +''' class MnemonicHashEngine(AbstractEngine): _name = 'MnemonicHash' diff --git a/server/www/migrations/0001_initial.py b/server/www/migrations/0001_initial.py new file mode 100644 index 0000000..8a858c7 --- /dev/null +++ b/server/www/migrations/0001_initial.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2017-08-15 11:54 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='AppliedMetadata', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ], + options={ + 'db_table': 'AppliedMetadata', + }, + ), + migrations.CreateModel( + name='Engine', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=16, unique=True)), + ('description', models.CharField(max_length=128)), + ('path', models.CharField(max_length=256)), + ('obj_name', models.CharField(max_length=32)), + ('active', models.BooleanField(default=False)), + ], + options={ + 'db_table': 'Engine', + }, + ), + migrations.CreateModel( + name='Function', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('sha256', models.CharField(max_length=64)), + ('opcodes', models.BinaryField()), + ('architecture', models.CharField(max_length=64)), + ], + options={ + 'db_table': 'Function', + }, + ), + migrations.CreateModel( + name='FunctionApis', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('api', models.CharField(max_length=64)), + ], + options={ + 'db_table': 'FunctionApis', + }, + ), + migrations.CreateModel( + name='Metadata', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ], + options={ + 'db_table': 'Metadata', + }, + ), + migrations.CreateModel( + name='MetadataDetails', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=256)), + ('prototype', models.CharField(max_length=256)), + ('comment', models.CharField(max_length=256)), + ('committed', models.DateTimeField(default=django.utils.timezone.now)), + ], + options={ + 'db_table': 'MetadataDetails', + }, + ), + migrations.CreateModel( + name='Sample', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('md5', models.CharField(max_length=32)), + ('crc32', models.IntegerField()), + ('sha1', models.CharField(blank=True, max_length=40, null=True)), + ('sha256', models.CharField(blank=True, max_length=64, null=True)), + ('last_seen', models.DateTimeField(blank=True, default=django.utils.timezone.now)), + ], + ), + migrations.CreateModel( + name='User', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=128)), + ('email', models.CharField(max_length=254)), + ('handle', models.CharField(max_length=32)), + ('number', models.IntegerField()), + ('api_key', models.UUIDField(unique=True)), + ('created', models.DateTimeField(default=django.utils.timezone.now)), + ('rank', models.BigIntegerField(default=0)), + ('active', models.BooleanField(default=True)), + ('service', models.CharField(max_length=16)), + ('auth_data', models.CharField(max_length=4096)), + ], + options={ + 'db_table': 'User', + }, + ), + migrations.AddIndex( + model_name='user', + index=models.Index(fields=['email'], name='User_email_ffa2e0_idx'), + ), + migrations.AddIndex( + model_name='user', + index=models.Index(fields=['api_key'], name='User_api_key_c4f2d6_idx'), + ), + migrations.AlterIndexTogether( + name='user', + index_together=set([('handle', 'number')]), + ), + migrations.AddField( + model_name='sample', + name='functions', + field=models.ManyToManyField(to='www.Function'), + ), + migrations.AddField( + model_name='sample', + name='seen_by', + field=models.ManyToManyField(to='www.User'), + ), + migrations.AddField( + model_name='metadata', + name='applied', + field=models.ManyToManyField(to='www.AppliedMetadata'), + ), + migrations.AddField( + model_name='metadata', + name='details', + field=models.ManyToManyField(to='www.MetadataDetails'), + ), + migrations.AddField( + model_name='metadata', + name='user', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.User'), + ), + migrations.AddField( + model_name='function', + name='apis', + field=models.ManyToManyField(to='www.FunctionApis'), + ), + migrations.AddField( + model_name='function', + name='metadata', + field=models.ManyToManyField(to='www.Metadata'), + ), + migrations.AddField( + model_name='engine', + name='developer', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.User'), + ), + migrations.AddField( + model_name='appliedmetadata', + name='metadata_id', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.Metadata'), + ), + migrations.AddField( + model_name='appliedmetadata', + name='sample_id', + field=models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to='www.Sample'), + ), + migrations.AddField( + model_name='appliedmetadata', + name='user_id', + field=models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to='www.User'), + ), + migrations.AlterIndexTogether( + name='sample', + index_together=set([('md5', 'crc32')]), + ), + migrations.AddIndex( + model_name='metadata', + index=models.Index(fields=['user'], name='Metadata_user_id_aea908_idx'), + ), + migrations.AddIndex( + model_name='engine', + index=models.Index(fields=['name'], name='Engine_name_14ac74_idx'), + ), + migrations.AlterUniqueTogether( + name='appliedmetadata', + unique_together=set([('metadata_id', 'sample_id', 'user_id')]), + ), + ] diff --git a/server/www/models.py b/server/www/models.py index 63e25b1..0433367 100644 --- a/server/www/models.py +++ b/server/www/models.py @@ -27,7 +27,6 @@ # Third Party Modules from django.db import models from django.utils import timezone -from django.db.transaction import commit class User(models.Model): @@ -36,7 +35,7 @@ class User(models.Model): handle = models.CharField(max_length=32) number = models.IntegerField() api_key = models.UUIDField(unique=True) - created = models.DateTimeField(default=timezone.now, auto_now_add=True) + created = models.DateTimeField(default=timezone.now) rank = models.BigIntegerField(default=0) active = models.BooleanField(default=True) @@ -61,6 +60,7 @@ def dump(self, full=False): return data class Meta: + db_table = 'User' indexes = [ models.Index(fields=['email']), models.Index(fields=['api_key']), @@ -68,158 +68,159 @@ class Meta: index_together = ("handle", "number") - class Engine(models.Model): name = models.CharField(max_length=16, unique=True) description = models.CharField(max_length=128) path = models.CharField(max_length=256) obj_name = models.CharField(max_length=32) - developer = models.ForeignKey(User) + developer = models.ForeignKey('User') active = models.BooleanField(default=False) - class Meta: - indexes = [ - models.Index(fields=['name']), - ] + #@property + #def rank(self): + # return len(self.applied) def dump(self, full=False): data = {'name' : self.name, 'description' : self.description, 'rank' : self.rank, - 'developer' : Engine.objects.filter(engine_id = self.id)} + 'developer' : self.developer.user_handle} if full: data.update({'path' : self.path}) return data - @property - def rank(self): - return len(self.applied) + class Meta: + db_table = 'Engine' + indexes = [ + models.Index(fields=['name']), + ] + + +# TODO: Create scheme for tracking applied metadata for engines +# +#class AppliedEngine(models.Model): +# engine_id = models.ForeignKey(Engine) +# sample_id = models.ForeignKey(Sample) +# user_id = models.ForeignKey(User) +# engine_metadata_id = models.BigIntegerField(); +# +# class Meta: +# db_table = 'AppliedEngine' +# unique_together = ("sample_id", "user_id", "engine_metadata_id") -class AppliedEngine(models.Model): - engine_id = models.ForeignKey(Engine) +class AppliedMetadata(models.Model): + metadata_id = models.ForeignKey('Metadata') sample_id = models.OneToOneField('Sample') - user_id = models.OneToOneField(User) - engine_metadata_id = models.BigIntegerField(); + user_id = models.OneToOneField('User') class Meta: - unique_together = ("sample_id", "user_id", "engine_metadata_id") + db_table = 'AppliedMetadata' + unique_together = ("metadata_id", "sample_id", "user_id") -class Metadata(models.Model): - user = models.OneToOneField(User ) + +class MetadataDetails(models.Model): + name = models.CharField(max_length=256) + prototype = models.CharField(max_length=256) + comment = models.CharField(max_length=256) + committed = models.DateTimeField(default=timezone.now) class Meta: - indexes = [ - models.Index(fields=['user']), - ] + db_table = 'MetadataDetails' - def dump(self, full=False): - data = {'creator' : User.objects.filter(metadata_id = self.id), - 'name' : MetaDataName.objects.filter(metadata_id = self.id).first(), - 'prototype' : MetaDataPrototype.objects.filter(metadata_id = self.id).first(), - 'comment' : MetaDataComment.objects.filter(metadata_id = self.id).first(), - 'rank' : len(self.applied)} - if full: - data['history'] = [] - name = MetaDataName.objects.filter(metadata_id = self.id) - committed = MetaDataCommited.objects.filter(metadata_id = self.id) - prototype = MetaDataPrototype.objects.filter(metadata_id = self.id); - comment = MetaDataComment.objects.filter(metadata_id = self.id); - - for i in xrange(len(name) - 1, -1, -1): - # Convert back with: - # datetime.datetime.strptime(
, '%Y-%m-%dT%H:%M:%S.%f') - commit = committed[i].isoformat() - data['history'].append({'name' : name[i], - 'prototype' : prototype[i], - 'comment' : comment[i], - 'committed' : commit}) +class Metadata(models.Model): + user = models.ForeignKey('User') + details = models.ManyToManyField('Metadatadetails') + applied = models.ManyToManyField('AppliedMetadata') - return data + @property + def rank(self): + return self.applied.count() def has_changed(self, name, prototype, comment): - if (not self.name) or (not self.prototype) or (not comment): + if not self.details.exists(): return True - actualName = MetaDataName.objects.filter(metadata_id = self.id).first() - actualPrototype = MetaDataPrototype.objects.filter(metadata_id = self.id).first() - actualComment = MetaDataComment.objects.filter(metadata_id = self.id).first(); - - if ((actualName.name != name) - or (actualPrototype.prototype != prototype) - or (actualComment.comment != comment)): + latest = self.details.latest('committed') + if ((latest.name != name) + or (latest.prototype != prototype) + or (latest.comment != comment)): return True return False - @property - def rank(self): - return len(self.applied) + def dump(self, full=False): + data = {'creator' : self.user.user_handle} + latest_details = self.details.latest('committed') + data.update({ + 'name' : latest_details.name, + 'prototype' : latest_details.prototype, + 'comment' : latest_details.comment, + 'rank' : AppliedMetadata(metadata_id=latest_details.id).count() + }) + + if full: + data['history'] = [] + for d in xrange(self.details.count()): + # Convert committed time back with: + # datetime.datetime.strptime(
, '%Y-%m-%dT%H:%M:%S.%f') + data['history'].append({'name' : d.name, + 'prototype' : d.prototype, + 'comment' : d.comment, + 'committed' : d.commit.isoformat()}) + + return data -class AppliedMetaData(models.Model): - metadata_id = models.ForeignKey(Engine) - sample_id = models.OneToOneField('Sample') - user_id = models.OneToOneField(User) - engine_metadata_id = models.BigIntegerField(); class Meta: - unique_together = ("metadata_id", "sample_id", "user_id") + db_table = 'Metadata' + indexes = [models.Index(fields=['user'])] -class MetaDataName(models.Model): - name = models.CharField(max_length=128) - models.ForeignKey(Metadata) -class MetaDataPrototype(models.Model): - prototype = models.CharField(max_length=256) - models.ForeignKey(Metadata) +class FunctionApis(models.Model): + api = models.CharField(max_length=64) -class MetaDataComment(models.Model): - comment = models.CharField(max_length=128) - models.ForeignKey(Metadata) + class Meta: + db_table = 'FunctionApis' -class MetaDataCommited(models.Model): - committed = models.DateTimeField(default=datetime.datetime.utcnow, blank=True) - models.ForeignKey(Metadata) class Function(models.Model): sha256 = models.CharField(max_length=64) - opcodes = models.BinaryField - metadata = models.ForeignKey(Metadata) - mnemonic_hash = models.ForeignKey('MnemonicHash') - # Return value from idaapi.get_file_type_name() + opcodes = models.BinaryField() + apis = models.ManyToManyField('FunctionApis') + metadata = models.ManyToManyField('Metadata') architecture = models.CharField(max_length=64) def dump(self): return {'id' : self.id, 'opcodes' : self.opcodes, - 'apis' : self.functionapis_set.all(), - 'metadata' : [str(x.api) for x in Metadata.objects.filter(function_id = self.id)], + 'apis' : [str(x.api) for x in self.apis.all()], + 'metadata' : [str(x.api) for x in self.metadata.all()], 'architecture' : self.architecture, 'sha256' : self.sha256} -class FunctionApis(models.Model): - api = models.CharField(max_length=64) - models.ForeignKey(Function) + class Meta: + db_table = 'Function' class Sample(models.Model): md5 = models.CharField(max_length=32) crc32 = models.IntegerField() - sha1 = models.CharField(max_length=40) - sha256 = models.CharField(max_length=64) - seen_by = models.ManyToManyField( User, blank=True) - functions = models.ManyToManyField( Function, blank=True) - last_seen = models.DateTimeField(default=datetime.datetime.utcnow, blank=True) + sha1 = models.CharField(max_length=40, null=True, blank=True) + sha256 = models.CharField(max_length=64, null=True, blank=True) + seen_by = models.ManyToManyField('User') + functions = models.ManyToManyField('Function') + last_seen = models.DateTimeField(default=timezone.now, blank=True) class Meta: index_together = ['md5', 'crc32'] def dump(self): data = {'md5' : self.md5, 'crc32' : self.crc32, - 'seen_by' : [str(x.id) for x in User.objects.filter(sample_id = self.id)], - 'functions' : [str(x.id) for x in Function.objects.filter(sample_id = self.id)]} + 'seen_by' : [str(x.id) for x in self.seen_by.all()], + 'functions' : [str(x.id) for x in self.functions.all()]} if 'sha1' in self: data['sha1'] = self.sha1 @@ -228,18 +229,3 @@ def dump(self): data['sha256'] = self.sha256 return data - -class MnemonicHash(models.Model): - sha256 = models.CharField(max_length=64) - architecture = models.CharField(max_length=64) - - class Meta: - index_together = ('sha256', 'architecture') - - def dump(self): - return {'sha256' : self.sha256, - 'architecture' : self.architecture, - 'functions' : self.function_set.all()} - - def function_list(self): - return [str(x) for x in Function.objects.filter(MnemonicHash_id = self.id)] From c0daa12ce5a31e45d4d55a73707733960833b7ff Mon Sep 17 00:00:00 2001 From: demonduck Date: Wed, 30 Aug 2017 14:01:19 -0400 Subject: [PATCH 04/17] django ORM migration for core engine MnemonicHashEngine and BasicMaskingEngine are no longer available until ported --- server/first/__init__.py | 27 - server/first/dbs/builtin_db.py | 466 ------------------ server/first/engines/basic_masking.py | 222 --------- server/first/engines/mnemonic_hash.py | 165 ------- server/first/models.py | 197 -------- server/first/settings.py | 4 +- server/first_core/__init__.py | 27 + server/{first => first_core}/auth.py | 6 +- server/{first => first_core}/dbs/__init__.py | 4 +- server/first_core/dbs/builtin_db.py | 433 ++++++++++++++++ .../{first => first_core}/engines/__init__.py | 6 +- .../engines/exact_match.py | 8 +- .../{first => first_core}/engines/results.py | 17 +- .../engines/skeleton.py_ | 4 +- server/{first => first_core}/error.py | 0 server/first_core/models.py | 27 + server/first_core/util.py | 105 ++++ server/rest/urls.py | 3 +- server/rest/views.py | 29 +- server/utilities/__init__.py | 0 server/utilities/engine_shell.py | 23 +- server/utilities/mongo_to_django_orm.py | 346 +++++++++++++ server/utilities/populate_engine.py | 2 +- server/www/migrations/0001_initial.py | 44 +- server/www/models.py | 78 +-- server/www/views.py | 4 +- 26 files changed, 1056 insertions(+), 1191 deletions(-) delete mode 100644 server/first/dbs/builtin_db.py delete mode 100644 server/first/engines/basic_masking.py delete mode 100644 server/first/engines/mnemonic_hash.py delete mode 100644 server/first/models.py create mode 100644 server/first_core/__init__.py rename server/{first => first_core}/auth.py (98%) rename server/{first => first_core}/dbs/__init__.py (97%) create mode 100644 server/first_core/dbs/builtin_db.py rename server/{first => first_core}/engines/__init__.py (98%) rename server/{first => first_core}/engines/exact_match.py (90%) rename server/{first => first_core}/engines/results.py (90%) rename server/{first => first_core}/engines/skeleton.py_ (96%) rename server/{first => first_core}/error.py (100%) create mode 100644 server/first_core/models.py create mode 100644 server/first_core/util.py create mode 100644 server/utilities/__init__.py create mode 100644 server/utilities/mongo_to_django_orm.py diff --git a/server/first/__init__.py b/server/first/__init__.py index 80bd52a..e69de29 100644 --- a/server/first/__init__.py +++ b/server/first/__init__.py @@ -1,27 +0,0 @@ -#------------------------------------------------------------------------------- -# -# Intializes FIRST's DBManager and EngineManager -# Copyright (C) 2016 Angel M. Villegas -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -#------------------------------------------------------------------------------- - -# FIRST Modules -from first.dbs import FIRSTDBManager -from first.engines import FIRSTEngineManager - -DBManager = FIRSTDBManager() -EngineManager = FIRSTEngineManager(DBManager) diff --git a/server/first/dbs/builtin_db.py b/server/first/dbs/builtin_db.py deleted file mode 100644 index 5f4437d..0000000 --- a/server/first/dbs/builtin_db.py +++ /dev/null @@ -1,466 +0,0 @@ -#------------------------------------------------------------------------------- -# -# FIRST DB Module for completing operations with the MongoDB backend -# Copyright (C) 2016 Angel M. Villegas -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Requirements -# ------------ -# - flask -# - mongoengine -# - werkzeug -# -#------------------------------------------------------------------------------- - -# Python Modules -import re -import math -import json -import hashlib -import datetime -import ConfigParser -from hashlib import md5 - -# Third Party Modules -import bson -from mongoengine import Q -from mongoengine.queryset import DoesNotExist, MultipleObjectsReturned - -# FIRST Modules -from first.dbs import AbstractDB -from first.models import User, Metadata, Function, Sample, Engine - - -class FIRSTDB(AbstractDB): - _name = 'first_db' - standards = { 'intel16', 'intel32', 'intel64', 'arm32', 'arm64', 'mips', - 'ppc', 'sparc', 'sysz'} - - # - # Functions called by FIRST Framework - #-------------------------------------------------------------------------- - def __init__(self, config): - ''' - Constructor. - - @param conf: ConfigParser.RawConfigParser - ''' - self._is_installed = True - ''' - section = 'mongodb_settings' - - if (not config.has_section(section) - or not config.has_option(section, 'db')): - raise FirstDBError('DB settings not available', skip=True) - - if section.upper() not in app.config: - app.config[section.upper()] = {} - - app.config[section.upper()]['db'] = conf.get(section, 'db') - self.db.init_app(app) - ''' - - def get_architectures(self): - standards = FIRSTDB.standards.copy() - standards.update(Function.objects().distinct(field='architecture')) - return list(standards) - - def get_sample(self, md5_hash, crc32, create=False): - try: - # Get Sample from DB - return Sample.objects.get(md5=md5_hash, crc32=crc32) - - except DoesNotExist: - if not create: - return None - - # Create Sample for DB - sample = Sample(md5=md5_hash, crc32=crc32) - sample.last_seen = datetime.datetime.now() - sample.save() - return sample - - def sample_seen_by_user(self, sample, user): - if (not isinstance(sample, Sample)) or (not isinstance(user, User)): - return None - - if user not in sample.seen_by: - sample.seen_by.append(user) - sample.save() - - def checkin(self, user, md5_hash, crc32, sha1_hash=None, sha256_hash=None): - ''' - TODO: - - @returns String error message on Failure - None - ''' - if not isinstance(user, User): - return False - - # Validate data - if ((not re.match('^[a-f\d]{32}$', md5_hash)) - or (sha1_hash and not re.match('^[a-f\d]{40}$', sha1_hash)) - or (sha256_hash and not re.match('^[a-f\d]{64}$', sha256_hash))): - return False - - sample = self.get_sample(md5_hash, crc32, True) - if not sample: - return False - - sample.last_seen = datetime.datetime.now() - if user not in sample.seen_by: - sample.seen_by.append(user) - - if None != sha1_hash: - sample.sha1 = sha1_hash - - if None != sha256_hash: - sample.sha256 = sha256_hash - - sample.save() - return True - - def get_function(self, opcodes, architecture, apis, create=False, **kwargs): - function = None - - try: - function = Function.objects.get(sha256=hashlib.sha256(opcodes).hexdigest(), - opcodes=bson.Binary(opcodes), - architecture=architecture, - apis=apis) - except DoesNotExist: - # Create function and add it to sample - function = Function(sha256=hashlib.sha256(opcodes).hexdigest(), - opcodes=bson.Binary(opcodes), - architecture=architecture, - apis=apis) - function.save() - - return function - - def get_all_functions(self): - try: - return Function.objects.all() - - except: - return [] - - def find_function(self, _id=None, opcodes=None, apis=None, architecture=None, h_sha256=None): - try: - # User function ID - if None != _id: - return Function.objects(id=bson.objectid.ObjectId(_id)).get() - - # User opcodes and apis - elif None not in [opcodes, apis]: - return Function.objects(opcodes=opcodes, apis=apis).get() - - # Use hash, architecture - elif None not in [architecture, h_sha256]: - return Function.objects(sha256=h_sha256, architecture=architecture).get() - - else: - return None - - except DoesNotExist: - return None - - def add_function_to_sample(self, sample, function): - if (not isinstance(sample, Sample)) or (not isinstance(function, Function)): - return False - - if function not in sample.functions: - sample.functions.append(function) - sample.save() - - return True - - def add_metadata_to_function(self, user, function, name, prototype, comment, **kwargs): - if (not isinstance(function, Function)) or (not isinstance(user, User)): - return None - - # Check to see if user already has metadata associated with the sample - metadata = None - for m in function.metadata: - if user == m.user: - if m.has_changed(name, prototype, comment): - m.name = [name] + m.name - m.prototype = [prototype] + m.prototype - m.comment = [comment] + m.comment - m.committed = [datetime.datetime.now()] + m.committed - - metadata = m - break - - if not metadata: - metadata = Metadata(user=user, name=[name], - prototype=[prototype], - comment=[comment], - committed=[datetime.datetime.now()]) - function.metadata.append(metadata) - - function.save() - return str(metadata.id) - - def get_metadata_list(self, metadata): - results = [] - user_metadata, engine_metadata = self.separate_metadata(metadata) - - metadata_ids = map(bson.objectid.ObjectId, user_metadata) - mongo_filter = Q(metadata__id=metadata_ids[0]) - for mid in metadata_ids[1:]: - mongo_filter |= Q(metadata__id=mid) - - matches = Function.objects.filter(mongo_filter).only('metadata') - for function in matches: - for metadata in function.metadata: - if metadata.id in metadata_ids: - data = metadata.dump() - data['id'] = str(metadata.id) - results.append(data) - - # Remove id from list to shorten list - del metadata_ids[metadata_ids.index(metadata.id)] - - for _id in engine_metadata: - engines = Engine.object(id=_id) - if (not engines) or (len(engines) > 1): - continue - - data = {'id' : _id, 'engine' : engine.name, - 'description' : engine.description} - results.append(data) - - return results - - def delete_metadata(self, user, metadata_id): - if not isinstance(user, User): - return False - - user_metadata, engine_metadata = self.separate_metadata([metadata_id]) - if not user_metadata: - return False - - # User must be the creator of the metadata to delete it - metadata_id = bson.objectid.ObjectId(user_metadata[0]) - try: - Function.objects(metadata__user=user, metadata__id=metadata_id).update_one(pull__metadata__id=metadata_id) - return True - except DoesNotExist: - return False - - def created(self, user, page, max_metadata=20): - offset = (page - 1) * max_metadata - results = [] - pages = 0 - - if (offset < 0) or (not isinstance(user, User)): - return (results, pages) - - try: - matches = Function.objects(metadata__user=user).only('metadata') - total = Function.objects(metadata__user=user).count() + 0.0 - pages = int(math.ceil(total / max_metadata)) - if page > pages: - return (results, pages) - - matches = matches.skip(offset).limit(max_metadata) - - except ValueError: - return (results, pages) - - for function in matches: - for metadata in function.metadata: - if user == metadata.user: - temp = metadata.dump() - temp['id'] = FIRSTDB.make_id(metadata.id, 0) - results.append(temp) - - # Bail out of inner loop early since a user can only - # create one metadata entry per function - break - - return (results, pages) - - @staticmethod - def make_id(_id, flags): - return '{:1x}{}'.format(flags & 0xF, _id) - - def separate_metadata(self, metadata): - # Get metadata created by users only, MSB should not be set - user_metadata = [] - engine_metadata = [] - for x in metadata: - if len(x) == 24: - user_metadata.append(x) - elif (len(x) == 25) and (((int(x[0], 16) >> 3) & 1) == 0): - user_metadata.append(x[1:]) - elif (len(x) == 25) and (((int(x[0], 16) >> 3) & 1) == 1): - engine_metadata.append(x[1:]) - - return (user_metadata, engine_metadata) - - def metadata_history(self, metadata): - results = {} - user_metadata, engine_metadata = self.separate_metadata(metadata) - e_comment = ('Generated by Engine: {0.name}\n{0.description}\n\n' - 'Developer: {0.developer.user_handle}') - - if len(user_metadata) > 0: - metadata_ids = map(bson.objectid.ObjectId, user_metadata) - mongo_filter = Q(metadata__id=metadata_ids[0]) - for mid in metadata_ids[1:]: - mongo_filter |= Q(metadata__id=mid) - - matches = Function.objects.filter(mongo_filter).only('metadata') - for function in matches: - for metadata in function.metadata: - if metadata.id in metadata_ids: - data = metadata.dump(True) - _id = FIRSTDB.make_id(metadata.id, 0) - results[_id] = {'creator' : data['creator'], - 'history' : data['history']} - # Remove id from list to shorten list - del metadata_ids[metadata_ids.index(metadata.id)] - - # Provide information for engine created metadata... - for engine_id in engine_metadata: - engine = self.get_engine(engine_id) - if not engine: - continue - data = {'creator' : engine.name, - 'history' : [{'committed' : '', - 'name' : 'N/A', - 'prototype' : 'N/A', - 'comment' : e_comment.format(engine)}]} - results[FIRSTDB.make_id(engine_id, 8)] = data - - return results - - def applied(self, sample, user, _id, is_engine=False): - ''' - @returns Boolean. True if added to the applied list - False if not added to the applied list - ''' - if (not isinstance(user, User)) or (not isinstance(sample, Sample)): - return False - - key = [str(sample.id), str(user.id)] - if is_engine: - engine_id = bson.objectid.ObjectId(_id) - engine = Engine.objects(id=engine_id, - applied__contains=key) - - # Check if user has already applied the signature - if len(engine): - return True - - try: - engine = Engine.objects(id=engine_id).get() - except DoesNotExist: - # Engine does not exist - return False - - engine.applied.append(key) - engine.save() - - else: - metadata_id = bson.objectid.ObjectId(_id) - functions = Function.objects(metadata__id=metadata_id, - metadata__applied__contains=key) - - # Check if user has already applied the signature - if len(functions): - return True - - try: - function = Function.objects(metadata__id=metadata_id).get() - except DoesNotExist: - # Metadata does not exist - return False - - # Get metadata - for metadata in function.metadata: - if metadata.id == metadata_id: - metadata.applied.append(key) - break - - function.save() - - return True - - def unapplied(self, sample, user, _id, is_engine=False): - ''' - @returns Boolean. True if not in metadata's applied list - False if still in the applied list - ''' - if (not isinstance(sample, Sample)) or (not isinstance(user, User)): - return False - - key = [str(sample.id), str(user.id)] - if is_engine: - engine_id = bson.objectid.ObjectId(_id) - engine = Engine.objects(id=engine_id, - applied__contains=key) - - # Check if user has already applied the signature - if not len(engine): - return True - - try: - engine = Engine.objects(id=engine_id).get() - except DoesNotExist: - # Engine does not exist - return False - - engine.applied.remove(key) - engine.save() - - else: - metadata_id = bson.objectid.ObjectId(_id) - functions = Function.objects(metadata__id=metadata_id, - metadata__applied__contains=key) - - # Check if user does not have it applied already - if not len(functions): - return True - - try: - function = functions.get() - except DoesNotExist: - # Metadata does not exist - return True - - # Get metadata - for metadata in function.metadata: - if metadata.id == metadata_id: - metadata.applied.remove(key) - break - - function.save() - - return True - - def engines(self, active=True): - return Engine.objects(active=bool(active)) - - def get_engine(self, engine_id): - engines = Engine.objects(id = engine_id) - if not engines: - return None - - return engines[0] diff --git a/server/first/engines/basic_masking.py b/server/first/engines/basic_masking.py deleted file mode 100644 index c3ead1f..0000000 --- a/server/first/engines/basic_masking.py +++ /dev/null @@ -1,222 +0,0 @@ -#------------------------------------------------------------------------------- -# -# FIRST Engine: Basic Masking -# Author: Angel M. Villegas (anvilleg@cisco.com) -# Last Modified: March 2016 -# -# Uses Distorm3 to obtain instructions and then removes certain instruction -# details to normalize it into a standard form to be compared to other -# functions. -# -# Maskes out: -# - ESP/EBP Offsets -# - Absolute Calls?? -# - Global Offsets?? -# -# Requirements -# ------------ -# - Distorm3 -# -# Installation -# ------------ -# None -# -#------------------------------------------------------------------------------- - -# Python Modules -import re -from hashlib import sha256 - -# FIRST Modules -from first.error import FIRSTError -from first.engines import AbstractEngine -from first.engines.results import FunctionResult - -# Third Party Modules -from bson.objectid import ObjectId -from distorm3 import DecomposeGenerator, Decode32Bits, Decode64Bits, Decode16Bits -from mongoengine.queryset import DoesNotExist, MultipleObjectsReturned -from mongoengine import Document, StringField, ListField, IntField, \ - ObjectIdField - -class BasicMasking(Document): - sha256 = StringField(max_length=64, required=True) - architecture = StringField(max_length=64, required=True) - instructions = ListField(StringField(max_length=124), required=True) - total_bytes = IntField(required=True, default=0) - functions = ListField(ObjectIdField(), default=list) - - meta = { - 'indexes' : [('sha256', 'architecture', 'instructions')] - } - - def dump(self): - return {'sha256' : self.sha256, - 'architecture' : self.architecture, - 'instructions' : self.instructions, - 'total_bytes' : self.total_bytes, - 'functions' : self.function_list()} - - def function_list(self): - return [str(x) for x in self.functions] - - -class BasicMaskingEngine(AbstractEngine): - _name = 'BasicMasking' - _description = ('Masks ESP/EBP offsets, calls/jmps offsets, and global ' - 'offsets (Intel Only). Requires at least 8 instructions.') - _required_db_names = ['first_db'] - - def normalize(self, opcodes, architecture): - changed_bits = 0 - dt = None - mapping = {'intel32' : Decode32Bits, - 'intel64' : Decode64Bits, - 'intel16' : Decode16Bits} - if architecture in mapping: - dt = mapping[architecture] - else: - return (None, changed_bits, None) - - try: - normalized = [] - original = [] - for i in DecomposeGenerator(0, opcodes, dt): - # If disassembly is not valid then junk data has been sent - if not i.valid: - return (None, 0, None) - - original.append(i._toText()) - instr = i.mnemonic + ' ' - - # Special mnemonic masking (Call, Jmp, JCC) - if (i.mnemonic == 'CALL') or i.mnemonic.startswith('J'): - operand = i.operands[0]._toText() - - if 'Immediate' == i.operands[0].type: - instr += '0x' - changed_bits += i.operands[0].size - - else: - regex = '^\[R(S|I)P(\+|\-)0x[\da-f]+\]$' - if re.match(regex, operand): - instr += re.sub(regex, r'[R\1P\2', operand) + '0x]' - changed_bits += i.operands[0].dispSize - else: - # Nothing will be masked out - instr = i._toText() - - normalized.append(instr) - continue - - operand_instrs = [] - for operand_obj in i.operands: - operand = operand_obj._toText() - if ((re.match('^\[E(S|B)P', operand) or re.match('^\[R(I|S)P', operand)) - and operand_obj.dispSize): - # Offset from EBP/ESP and RIP/RSP - masked = operand.replace(hex(operand_obj.disp), '0x') - operand_instrs.append(masked) - changed_bits += operand_obj.dispSize - - elif 'Immediate' == operand_obj.type: - value = operand_obj.value - # Masking off immediates within the standard VA of the sample - if ((0x400000 <= value <= 0x500000) - or (0x10000000 <= value <= 0x20000000) - or (0x1C0000000 <= value <= 0x1D0000000) - or (0x140000000 <= value <= 0x150000000)): - operand_instrs.append('0x') - changed_bits += operand_obj.size - - else: - operand_instrs.append(operand) - - elif 'AbsoluterMemoryAddress' == operand_obj.type: - operand_instrs.append('0x') - changed_bits += operand_obj.dispSize - - elif 'AbsoluteMemory' == operand_obj.type: - masked = operand.replace(hex(operand_obj.disp), '0x') - operand_instrs.append(masked) - changed_bits += operand_obj.dispSize - - else: - operand_instrs.append(operand) - - normalized.append(instr + ', '.join(operand_instrs)) - - h_sha256 = sha256(''.join(normalized)).hexdigest() - return (normalized, changed_bits, h_sha256) - # For debugging - #return (original, normalized, changed_bits, h_sha256) - - except Exception as e: - return (None, changed_bits, None) - - def _add(self, function): - ''' - - ''' - opcodes = function['opcodes'] - architecture = function['architecture'] - normalized, changed, h_sha256 = self.normalize(opcodes, architecture) - - if (not h_sha256) or (not normalized) or (8 > len(normalized)): - return - - try: - db_obj = BasicMasking.objects( sha256=h_sha256, - architecture=architecture, - instructions=normalized).get() - except DoesNotExist: - db_obj = BasicMasking( sha256=h_sha256, - architecture=architecture, - instructions=normalized, - total_bytes=len(opcodes)) - - function_id = ObjectId(function['id']) - if function_id not in db_obj.functions: - db_obj.functions.append(function_id) - db_obj.save() - - def _scan(self, opcodes, architecture, apis): - '''Returns List of tuples (function ID, similarity percentage)''' - db = self._dbs['first_db'] - normalized, changed, h_sha256 = self.normalize(opcodes, architecture) - - if (not h_sha256) or (not normalized) or (8 > len(normalized)): - return - - try: - db_obj = BasicMasking.objects( sha256=h_sha256, - architecture=architecture, - instructions=normalized).get() - except DoesNotExist: - return None - - results = [] - for function_id in db_obj.function_list(): - function = db.find_function(_id=ObjectId(function_id)) - - if not function or not function.metadata: - continue - - # Similarity = 90% (opcodes and the masking changes) - # + 10% (api overlap) - similarity = 100 - ((changed / (len(opcodes) * 8.0)) * 100) - if similarity > 90.0: - similarity = 90.0 - - # The APIs will count up to 10% of the similarity score - total_apis = len(function.apis) - overlap = float(len(set(function.apis).intersection(apis))) - if 0 != total_apis: - similarity += (overlap / total_apis) * 10 - - results.append(FunctionResult(function_id, similarity)) - - return results - - def _uninstall(self): - BasicMasking.drop_collection() diff --git a/server/first/engines/mnemonic_hash.py b/server/first/engines/mnemonic_hash.py deleted file mode 100644 index 38dfedd..0000000 --- a/server/first/engines/mnemonic_hash.py +++ /dev/null @@ -1,165 +0,0 @@ -#------------------------------------------------------------------------------- -# -# FIRST Engine: Mnemonic Hash -# Uses Distorm3 to obtain mnemonics from the opcodes, reduces the opcodes to -# a single string and hashes it for future lookup -# -# Copyright (C) 2016 Angel M. Villegas -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Requirements -# ------------ -# - distorm3 -# - mongoengine -# -#------------------------------------------------------------------------------- - -# Python Modules -from hashlib import sha256 - -# FIRST Modules -from first.error import FIRSTError -from first.engines import AbstractEngine -from first.engines.results import FunctionResult - -# Third Party Modules -from bson.objectid import ObjectId -from distorm3 import DecomposeGenerator, Decode32Bits, Decode64Bits, Decode16Bits -from django.db import models - - -class MnemonicHash(models.Model): - sha256 = models.CharField(max_length=64) - architecture = models.CharField(max_length=64) - functions = model.ManyToManyField(MnemonicHashFunctions) - - class Meta: - index_together = ('sha256', 'architecture') - - def dump(self): - return {'sha256' : self.sha256, - 'architecture' : self.architecture, - 'functions' : self.functions.all()} - - def function_list(self): - return [str(x) for x in self.functions.all()] - -class MnemonicHashFunctions(models.Model): - func = models.BigIntegerField() - -''' -class MnemonicHash(Document): - sha256 = StringField(max_length=64, required=True) - architecture = StringField(max_length=64, required=True) - functions = ListField(ObjectIdField(), default=list) - - meta = { - 'indexes' : [('sha256', 'architecture')] - } - - def dump(self): - return {'sha256' : self.sha256, - 'architecture' : self.architecture, - 'functions' : self.function_list()} - - def function_list(self): - return [str(x) for x in self.functions] -''' - -class MnemonicHashEngine(AbstractEngine): - _name = 'MnemonicHash' - _description = ('Uses mnemonics from the opcodes to generate a hash ' - '(Intel Only). Requires at least 8 mnemonics.') - _required_db_names = ['first_db'] - - def mnemonic_hash(self, opcodes, architecture): - dt = None - mapping = {'intel32' : Decode32Bits, - 'intel64' : Decode64Bits, - 'intel16' : Decode16Bits} - if architecture in mapping: - dt = mapping[architecture] - else: - return (None, None) - - try: - iterable = DecomposeGenerator(0, opcodes, dt) - - # Uses valid to ensure we are not creating hashes with 'db 0xYY' - mnemonics = [d.mnemonic for d in iterable if d.valid] - return (mnemonics, sha256(''.join(mnemonics)).hexdigest()) - - except Exception as e: - return (None, None) - - def _add(self, function): - ''' - Nothing needs to be implemented since the Function Model has the - sha256 of the opcodes - ''' - opcodes = function['opcodes'] - architecture = function['architecture'] - mnemonics, mnemonic_sha256 = self.mnemonic_hash(opcodes, architecture) - - if (not mnemonic_sha256) or (not mnemonics) or (8 > len(mnemonics)): - return - - try: - db_obj = MnemonicHash.objects( sha256=mnemonic_sha256, - architecture=architecture).get() - except DoesNotExist: - db_obj = MnemonicHash( sha256=mnemonic_sha256, - architecture=architecture) - - function_id = ObjectId(function['id']) - if function_id not in db_obj.functions: - db_obj.functions.append(function_id) - db_obj.save() - - def _scan(self, opcodes, architecture, apis): - '''Returns List of tuples (function ID, similarity percentage)''' - db = self._dbs['first_db'] - mnemonics, mnemonic_sha256 = self.mnemonic_hash(opcodes, architecture) - - if (not mnemonic_sha256) or (not mnemonics) or (8 > len(mnemonics)): - return - - try: - db_obj = MnemonicHash.objects( sha256=mnemonic_sha256, - architecture=architecture).get() - except DoesNotExist: - return None - - results = [] - for function_id in db_obj.function_list(): - similarity = 75.0 - function = db.find_function(_id=ObjectId(function_id)) - - if not function or not function.metadata: - continue - - # The APIs will count up to 10% of the similarity score - total_apis = len(function.apis) - overlap = float(len(set(function.apis).intersection(apis))) - if 0 != total_apis: - similarity += (overlap / total_apis) * 10 - - results.append(FunctionResult(function_id, similarity)) - - return results - - def _uninstall(self): - MnemonicHash.drop_collection() diff --git a/server/first/models.py b/server/first/models.py deleted file mode 100644 index 1846882..0000000 --- a/server/first/models.py +++ /dev/null @@ -1,197 +0,0 @@ -#------------------------------------------------------------------------------- -# -# FIRST MongoDB Models -# Copyright (C) 2016 Angel M. Villegas -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Requirements -# ------------ -# mongoengine (https://pypi.python.org/pypi/mongoengine/) -# -#------------------------------------------------------------------------------- - - -# Python Modules -from __future__ import unicode_literals -import datetime - -# Third Party Modules -from bson.objectid import ObjectId -from mongoengine import Document, StringField, UUIDField, \ - DateTimeField, LongField, ReferenceField, \ - BinaryField, ListField, BooleanField, ObjectIdField, \ - IntField, EmbeddedDocument, EmbeddedDocumentListField - -class User(Document): - name = StringField(max_length=128, required=True) - email = StringField(max_length=254, unique=True) - handle = StringField(max_length=32, required=True) - number = IntField(required=True) - api_key = UUIDField(required=True, unique=True) - created = DateTimeField(default=datetime.datetime.utcnow, required=True) - rank = LongField(default=0) - active = BooleanField(default=True) - - service = StringField(max_length=16, required=True) - auth_data = StringField(max_length=4096, required=True) - - meta = { - 'indexes' : [('handle', 'number'), 'api_key', 'email'] - } - - @property - def user_handle(self): - return '{0.handle}#{0.number:04d}'.format(self) - - def dump(self, full=False): - data = {'handle' : self.user_handle} - - if full: - data.update({ 'id' : str(self.id), - 'name' : self.name, - 'email' : self.email, - 'api_key' : self.api_key, - 'rank' : self.rank, - 'created' : self.created, - 'active' : self.active}) - - return data - - -class Engine(Document): - name = StringField(max_length=16, required=True, unique=True) - description = StringField(max_length=128, required=True) - path = StringField(max_length=256, required=True) - obj_name = StringField(max_length=32, required=True) - applied = ListField(default=list) - developer = ReferenceField(User) - active = BooleanField(default=False) - - meta = { - 'indexes' : ['name'] - } - - def dump(self, full=False): - data = {'name' : self.name, - 'description' : self.description, - 'rank' : self.rank, - 'developer' : self.developer.user_handle} - - if full: - data.update({'id' : str(self.id), 'path' : self.path}) - - return data - - @property - def rank(self): - return len(self.applied) - - -class Metadata(EmbeddedDocument): - id = ObjectIdField(required=True, default=lambda: ObjectId()) - user = ReferenceField(User) - name = ListField(StringField(max_length=128), default=list) - prototype = ListField(StringField(max_length=256), default=list) - comment = ListField(StringField(max_length=512), default=list) - committed = ListField(DateTimeField(), default=list) - applied = ListField(default=list) - - meta = { - 'indexes' : ['user'] - } - - def dump(self, full=False): - data = {'creator' : self.user.user_handle, - 'name' : self.name[0], - 'prototype' : self.prototype[0], - 'comment' : self.comment[0], - 'rank' : len(self.applied)} - - if full: - data['history'] = [] - for i in xrange(len(self.name) - 1, -1, -1): - # Convert back with: - # datetime.datetime.strptime(
, '%Y-%m-%dT%H:%M:%S.%f') - committed = self.committed[i].isoformat() - data['history'].append({'name' : self.name[i], - 'prototype' : self.prototype[i], - 'comment' : self.comment[i], - 'committed' : committed}) - - return data - - def has_changed(self, name, prototype, comment): - if (not self.name) or (not self.prototype) or (not comment): - return True - - if ((self.name[0] != name) - or (self.prototype[0] != prototype) - or (self.comment[0] != comment)): - return True - - return False - - @property - def rank(self): - return len(self.applied) - -# Use bson.Binary to insert binary data -class Function(Document): - sha256 = StringField(max_length=64) - opcodes = BinaryField() - apis = ListField(StringField(max_length=128), default=list) - metadata = EmbeddedDocumentListField(Metadata, default=list) - # Return value from idaapi.get_file_type_name() - architecture = StringField(max_length=64, required=True) - - meta = { - 'indexes' : [] - } - - def dump(self): - return {'id' : self.id, - 'opcodes' : self.opcodes, - 'apis' : self.apis, - 'metadata' : [str(x.id) for x in self.metadata], - 'architecture' : self.architecture, - 'sha256' : self.sha256} - - -class Sample(Document): - md5 = StringField(max_length=32, required=True) - crc32 = IntField(required=True) - sha1 = StringField(max_length=40) - sha256 = StringField(max_length=64) - seen_by = ListField(ReferenceField(User), default=list) - functions = ListField(ReferenceField(Function), default=list) - last_seen = DateTimeField(default=datetime.datetime.utcnow) - - meta = { - 'indexes' : [('md5', 'crc32')] - } - - def dump(self): - data = {'md5' : self.md5, 'crc32' : self.crc32, - 'seen_by' : [str(x.id) for x in self.seen_by], - 'functions' : [str(x.id) for x in self.functions]} - - if 'sha1' in self: - data['sha1'] = self.sha1 - - if 'sha256' in self: - data['sha256'] = self.sha256 - - return data diff --git a/server/first/settings.py b/server/first/settings.py index 53faf89..08ecb18 100644 --- a/server/first/settings.py +++ b/server/first/settings.py @@ -14,7 +14,9 @@ import json # Read in configuration data -FIRST_CONFIG_FILE = 'first_config.json' +FIRST_CONFIG_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '..', + 'first_config.json') CONFIG = {} try: config_data = json.load(file(FIRST_CONFIG_FILE)) diff --git a/server/first_core/__init__.py b/server/first_core/__init__.py new file mode 100644 index 0000000..4b7b779 --- /dev/null +++ b/server/first_core/__init__.py @@ -0,0 +1,27 @@ +#------------------------------------------------------------------------------- +# +# Intializes FIRST's DBManager and EngineManager +# Copyright (C) 2016 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------- + +# FIRST Modules +from first_core.dbs import FIRSTDBManager +from first_core.engines import FIRSTEngineManager + +DBManager = FIRSTDBManager() +EngineManager = FIRSTEngineManager(DBManager) diff --git a/server/first/auth.py b/server/first_core/auth.py similarity index 98% rename from server/first/auth.py rename to server/first_core/auth.py index 724399e..756b7e6 100644 --- a/server/first/auth.py +++ b/server/first_core/auth.py @@ -38,8 +38,8 @@ # FIRST Modules # TODO: Use DBManager to get user objects and do User operations -from first.models import User -from first.error import FIRSTError +from first_core.models import User +from first_core.error import FIRSTError # Thirdy Party import httplib2 @@ -57,7 +57,7 @@ def __init__(self, message): def verify_api_key(api_key): - users = User.objects(api_key=api_key) + users = User.objects.filter(api_key=api_key) if not users: return None diff --git a/server/first/dbs/__init__.py b/server/first_core/dbs/__init__.py similarity index 97% rename from server/first/dbs/__init__.py rename to server/first_core/dbs/__init__.py index 3b28548..ce27b71 100644 --- a/server/first/dbs/__init__.py +++ b/server/first_core/dbs/__init__.py @@ -24,7 +24,7 @@ from hashlib import md5 # FIRST Modules -from first.error import FIRSTError +from first_core.error import FIRSTError # Class for FirstDB related exceptions class FIRSTDBError(FIRSTError): @@ -110,6 +110,6 @@ def get(self, db_name): # FIRST DB Classes -from first.dbs.builtin_db import FIRSTDB +from first_core.dbs.builtin_db import FIRSTDB possible_dbs = [FIRSTDB] diff --git a/server/first_core/dbs/builtin_db.py b/server/first_core/dbs/builtin_db.py new file mode 100644 index 0000000..ef1dea2 --- /dev/null +++ b/server/first_core/dbs/builtin_db.py @@ -0,0 +1,433 @@ +#------------------------------------------------------------------------------- +# +# FIRST DB Module for completing operations with the MongoDB backend +# Copyright (C) 2016 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Requirements +# ------------ +# - werkzeug +# +#------------------------------------------------------------------------------- + +# Python Modules +import re +import math +import json +import hashlib +import ConfigParser +from hashlib import md5 + +# Third Party Modules +import bson + +from django.utils import timezone +from django.core.paginator import Paginator +from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned + +# FIRST Modules +from first_core.dbs import AbstractDB +from first_core.util import make_id, parse_id, separate_metadata, \ + is_engine_metadata +from first_core.models import User, Sample, \ + Engine, \ + Metadata, MetadataDetails, AppliedMetadata, \ + Function, FunctionApis + + +class FIRSTDB(AbstractDB): + _name = 'first_db' + standards = { 'intel16', 'intel32', 'intel64', 'arm32', 'arm64', 'mips', + 'ppc', 'sparc', 'sysz'} + + # + # Functions called by FIRST Framework + #-------------------------------------------------------------------------- + def __init__(self, config): + ''' + Constructor. + + @param conf: ConfigParser.RawConfigParser + ''' + self._is_installed = True + ''' + section = 'mongodb_settings' + + if (not config.has_section(section) + or not config.has_option(section, 'db')): + raise FirstDBError('DB settings not available', skip=True) + + if section.upper() not in app.config: + app.config[section.upper()] = {} + + app.config[section.upper()]['db'] = conf.get(section, 'db') + self.db.init_app(app) + ''' + + def get_architectures(self): + field = 'architecture' + architectures = Function.objects.values(field).distinct() + + standards = FIRSTDB.standards.copy() + standards.update({x[field] for x in architectures}) + return list(standards) + + def get_sample(self, md5_hash, crc32, create=False): + try: + # Get Sample from DB + return Sample.objects.get(md5=md5_hash, crc32=crc32) + + except ObjectDoesNotExist: + if not create: + return None + + # Create Sample for DB + sample = Sample(md5=md5_hash, crc32=crc32) + sample.last_seen = timezone.now() + sample.save() + return sample + + except MultipleObjectsReturned: + # TODO: log occurance + raise + + def sample_seen_by_user(self, sample, user): + if (not isinstance(sample, Sample)) or (not isinstance(user, User)): + return None + + if not Sample.objects.filter(pk=sample.id, seen_by=user).count(): + sample.seen_by.add(user) + + def checkin(self, user, md5_hash, crc32, sha1_hash=None, sha256_hash=None): + ''' + TODO: + + @returns String error message on Failure + None + ''' + if not isinstance(user, User): + return False + + # Validate data + if ((not re.match('^[a-f\d]{32}$', md5_hash)) + or (sha1_hash and not re.match('^[a-f\d]{40}$', sha1_hash)) + or (sha256_hash and not re.match('^[a-f\d]{64}$', sha256_hash))): + return False + + sample = self.get_sample(md5_hash, crc32, True) + if not sample: + return False + + sample.last_seen = timezone.now() + if not Sample.objects.filter(pk=sample.id, seen_by=user).count(): + sample.seen_by.add(user) + + if None != sha1_hash: + sample.sha1 = sha1_hash + + if None != sha256_hash: + sample.sha256 = sha256_hash + + sample.save() + return True + + def get_function_metadata(self, _id): + '''Get the metadata associated with the provided Function ID + + Args: + _id (:obj:`int`): ID from Function model + + Returns: + QuerySet. + ''' + return Metadata.objects.filter(function__pk=_id) + + def get_function(self, opcodes, architecture, apis, create=False, **kwargs): + sha256_hash = hashlib.sha256(opcodes).hexdigest() + function = None + + try: + function = Function.objects.get(sha256=sha256_hash, + opcodes=bson.Binary(opcodes), + architecture=architecture) #, + #apis__api=apis) + except ObjectDoesNotExist: + if create: + # Create function and add it to sample + function = Function.objects.create( sha256=sha256_hash, + opcodes=bson.Binary(opcodes), + architecture=architecture) + + apis_ = [FunctionApis.objects.get_or_create(x)[0] for x in apis] + for api in apis_: + function.apis.add(api) + + return function + + def get_all_functions(self): + try: + return Function.objects.all() + + except: + return [] + + def find_function(self, _id=None, opcodes=None, apis=None, architecture=None, h_sha256=None): + try: + # User function ID + if None != _id: + return Function.objects.get(pk=_id) + + # User opcodes and apis + elif None not in [opcodes, apis]: + return Function.objects.get(opcodes=opcodes, apis=apis) + + # Use hash, architecture + elif None not in [architecture, h_sha256]: + return Function.objects.get(sha256=h_sha256, + architecture=architecture) + + else: + return None + + except ObjectDoesNotExist: + return None + + except MultipleObjectsReturned: + # TODO: Log + raise + + def add_function_to_sample(self, sample, function): + if (not isinstance(sample, Sample)) or (not isinstance(function, Function)): + return False + + if not Sample.objects.filter(pk=sample.id, functions=function).count(): + sample.functions.add(function) + + return True + + def add_metadata_to_function(self, user, function, name, prototype, comment, **kwargs): + if (not isinstance(function, Function)) or (not isinstance(user, User)): + return None + + # Check to see if user already has metadata associated with the sample + metadata = None + print function.id + print user.id + if Function.objects.filter(pk=function.id, metadata__user=user).count(): + # Metadata already exists + metadata = Metadata.objects.get(function=function, user=user) + + else: + metadata = Metadata.objects.create(user=user) + function.metadata.add(metadata) + + if metadata.has_changed(name, prototype, comment): + md = MetadataDetails.objects.create(name=name, + prototype=prototype, + comment=comment) + metadata.details.add(md) + + return metadata.id + + def get_metadata_list(self, metadata): + results = [] + metadata_ids, engine_metadata = separate_metadata(metadata) + + for _id, metadata in Metadata.objects.in_bulk(metadata_ids).iteritems(): + data = metadata.dump() + data['id'] = make_id(0, metadata=metadata.id) + results.append(data) + + for flag, _id, metadata_id in engine_metadata: + engines = Engine.objects.get(pk=_id) + # TODO: Send metadata_id to engine for more info + if (not engines) or (len(engines) > 1): + continue + + data = {'id' : make_id(flag, metadata_id, _id), + 'engine' : engine.name, + 'description' : engine.description} + results.append(data) + + return results + + def delete_metadata(self, user, metadata_id): + if not isinstance(user, User): + return False + + user_metadata, engine_metadata = separate_metadata([metadata_id]) + if not user_metadata: + return False + + # User must be the creator of the metadata to delete it + metadata_id = user_metadata[0] + try: + metadata = Metadata.objects.get(pk=metadata_id, user=user) + metadata.delete() + return True + + except ObjectDoesNotExist: + return False + + def created(self, user, page, max_metadata=20): + pages = 0 + results = [] + + if (page < 1) or (not isinstance(user, User)): + return (results, pages) + + p = Paginator(Metadata.objects.filter(user=user), max_metadata) + pages = p.num_pages + + if page > pages: + return (results, pages) + + for metadata in p.page(page): + temp = metadata.dump() + temp['id'] = make_id(0, metadata=metadata.id) + results.append(temp) + + return (results, pages) + + def metadata_history(self, metadata): + results = {} + metadata_ids, engine_metadata = separate_metadata(metadata) + e_comment = ('Generated by Engine: {0.name}\n{0.description}\n\n' + 'Developer: {0.developer.user_handle}') + + for _id, metadata in Metadata.objects.in_bulk(metadata_ids).iteritems(): + data = metadata.dump(True) + result_key = make_id(0, metadata=_id) + results[result_key] = { 'creator' : data['creator'], + 'history' : data['history']} + + # Provide information for engine created metadata... + for flag, engine_id, _id in engine_metadata: + engine = self.get_engine(engine_id) + if not engine: + continue + data = {'creator' : engine.name, + 'history' : [{'committed' : '', + 'name' : 'N/A', + 'prototype' : 'N/A', + 'comment' : e_comment.format(engine)}]} + result_key = make_id(flag, engine=engine_id, metadata=_id) + results[result_key] = data + + return results + + def applied(self, sample, user, _id): + ''' + @returns Boolean. True if added to the applied list + False if not added to the applied list + ''' + if (not isinstance(user, User)) or (not isinstance(sample, Sample)): + return False + + flag, engine_data, metadata_id = parse_id(_id) + if is_engine_metadata(_id): + pass + # TODO: add this capability back again + #engine_id = _id + #engine = Engine.objects(id=engine_id, + # applied__contains=key) + + ## Check if user has already applied the signature + #if len(engine): + # return True + + #try: + # engine = Engine.objects(id=engine_id).get() + #except ObjectDoesNotExist: + # # Engine does not exist + # return False + + #engine.applied.append(key) + #engine.save() + + else: + try: + # Ensure Metadata exists + metadata = Metadata.objects.get(pk=metadata_id) + except ObjectDoesNotExist: + # Metadata does not exist + return False + + r = AppliedMetadata.objects.get_or_create( user=user, + sample=sample, + metadata=metadata) + + return True + + def unapplied(self, sample, user, _id): + ''' + @returns Boolean. True if not in metadata's applied list + False if still in the applied list + ''' + if (not isinstance(sample, Sample)) or (not isinstance(user, User)): + return False + + flag, engine_data, metadata_id = parse_id(_id) + if is_engine_metadata(_id): + pass + # TODO: add this capability back again + #engine_id = _id + #engine = Engine.objects(id=engine_id, + # applied__contains=key) + + ## Check if user has already applied the signature + #if not len(engine): + # return True + + #try: + # engine = Engine.objects(id=engine_id).get() + #except ObjectDoesNotExist: + # # Engine does not exist + # return False + + #engine.applied.remove(key) + #engine.save() + + else: + try: + # Ensure Metadata exists + metadata = Metadata.objects.get(pk=metadata_id) + except ObjectDoesNotExist: + # Metadata does not exist + return False + + try: + data = AppliedMetadata.objects.get( user=user, + sample=sample, + metadata=metadata) + data.delete() + return True + + except ObjectDoesNotExist: + return True + + + return False + + def engines(self, active=True): + return Engine.objects.filter(active=bool(active)) + + def get_engine(self, engine_id): + engines = Engine.objects.filter(pk=engine_id) + if not engines.count(): + return None + + return engines.first() diff --git a/server/first/engines/__init__.py b/server/first_core/engines/__init__.py similarity index 98% rename from server/first/engines/__init__.py rename to server/first_core/engines/__init__.py index 4fce345..19d708c 100644 --- a/server/first/engines/__init__.py +++ b/server/first_core/engines/__init__.py @@ -15,9 +15,9 @@ import sys # First Modules -from first.error import FIRSTError -from first.dbs import FIRSTDBManager -from first.engines.results import Result +from first_core.error import FIRSTError +from first_core.dbs import FIRSTDBManager +from first_core.engines.results import Result # Third Party Modules from bson.objectid import ObjectId diff --git a/server/first/engines/exact_match.py b/server/first_core/engines/exact_match.py similarity index 90% rename from server/first/engines/exact_match.py rename to server/first_core/engines/exact_match.py index 3a78eb3..c9c7b32 100644 --- a/server/first/engines/exact_match.py +++ b/server/first_core/engines/exact_match.py @@ -23,9 +23,9 @@ from hashlib import sha256 # FIRST Modules -from first.error import FIRSTError -from first.engines import AbstractEngine -from first.engines.results import FunctionResult +from first_core.error import FIRSTError +from first_core.engines import AbstractEngine +from first_core.engines.results import FunctionResult class ExactMatchEngine(AbstractEngine): _name = 'ExactMatch' @@ -50,7 +50,7 @@ def _scan(self, opcodes, architecture, apis): return None similarity = 90.0 - if set(function.apis) == set(apis): + if set(function.apis.values()) == set(apis): similarity += 10.0 return [FunctionResult(str(function.id), similarity)] diff --git a/server/first/engines/results.py b/server/first_core/engines/results.py similarity index 90% rename from server/first/engines/results.py rename to server/first_core/engines/results.py index afbe89c..5e3a6db 100644 --- a/server/first/engines/results.py +++ b/server/first_core/engines/results.py @@ -5,6 +5,7 @@ # Last Modified: August 2016 # #------------------------------------------------------------------------------- +from first_core.util import make_id class Result(object): '''Abstract class to encapsulate results returned from Engines''' @@ -87,23 +88,19 @@ class FunctionResult(Result): This Result class is crafted for general engines that want to return a list of functions to the EngineManager - ID values are 25 hex character string. For metadata created by users, - not engines, the most significant bit is not set. + ID values are 26 hex character string. For metadata created by users, + not engines, the flag byte not set. ''' def _get_metadata(self, db): if not hasattr(self, '_metadata'): - func = db.find_function(_id=self.id) - if not func: - return None - - self._metadata = func.metadata + self._metadata = list(db.get_function_metadata(self.id)) self._metadata.sort(key=lambda x: x.rank) data = None if len(self._metadata) > 0: metadata = self._metadata.pop() data = metadata.dump() - data['id'] = '0{}'.format(metadata.id) + data['id'] = make_id(0, metadata=metadata.id) return data @@ -119,9 +116,11 @@ class EngineResult(Result): ''' def _init(self, **kwargs): self._data = None + self._metadata = 0 + if 'data' in kwargs: self._data = kwargs['data'] - self._data['id'] = '8{}'.format(self.id) + self._data['id'] = make_id(1, self._metadata, self.id) def _get_metadata(self, db): data = self._data diff --git a/server/first/engines/skeleton.py_ b/server/first_core/engines/skeleton.py_ similarity index 96% rename from server/first/engines/skeleton.py_ rename to server/first_core/engines/skeleton.py_ index e005e82..2954768 100644 --- a/server/first/engines/skeleton.py_ +++ b/server/first_core/engines/skeleton.py_ @@ -20,8 +20,8 @@ # FIRST Modules -from first.error import FIRSTError -from first.engines import AbstractEngine +from first_core.error import FIRSTError +from first_core.engines import AbstractEngine # Third Party Modules diff --git a/server/first/error.py b/server/first_core/error.py similarity index 100% rename from server/first/error.py rename to server/first_core/error.py diff --git a/server/first_core/models.py b/server/first_core/models.py new file mode 100644 index 0000000..cdfb45b --- /dev/null +++ b/server/first_core/models.py @@ -0,0 +1,27 @@ +#------------------------------------------------------------------------------- +# +# FIRST Django ORM Models +# Copyright (C) 2017 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------- + + +# Python Modules +from __future__ import unicode_literals + +# FIRST Modules +from www.models import * diff --git a/server/first_core/util.py b/server/first_core/util.py new file mode 100644 index 0000000..96fee63 --- /dev/null +++ b/server/first_core/util.py @@ -0,0 +1,105 @@ +#------------------------------------------------------------------------------- +# +# FIRST Utility and Helper Functions +# Copyright (C) 2017 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------- + + +def make_id(flags, metadata=0, engine=0): + '''Creates an unique ID for client use. + + Args: + flag (:obj:`int`): Value between 0 and 255. + MSB set when ID is from an engine. + metadata (:obj:`int`, optional): The Metadata model ID + engine (:obj:`int`, optional): The Engine model ID + + Returns: + string: A 26 byte hex string + ''' + data = [flags, metadata, engine] + if (None in data) or (not all([type(x) in [int, long] for x in data])): + return None + + if ((engine > (2**32 - 1)) or (metadata > (2**64 - 1)) + or (flags > (2**8 - 1))): + return None + + return '{:02x}{:08x}{:016x}'.format(flags, engine, metadata) + + +def parse_id(_id): + if len(_id) != 26: + return (None, None, None) + + _id = int(_id, 16) + flag = _id >> (8 * 12) + engine_data = (_id >> (8 * 8)) & (0xFFFFFFFF) + metadata_id = _id & 0xFFFFFFFFFFFFFFFF + + return (flag, engine_data, metadata_id) + +def separate_metadata(ids): + '''Returns parsed IDs for user and engine generated metadata. + + Args: + ids (:obj:`list`): List of 26 hex strings + metadata (:obj:`int`, optional): The Metadata model ID + engine (:obj:`int`, optional): The Engine model ID + + Returns: + tuple: Index 0 contains user created metadata IDs + Index 1 contains engine created metadata details + ''' + # ID: Flag Byte | Engine 4 bytes | Metadata 8 bytes = 13 bytes + # 26 ASCII characters + # If Flag is set then more processing is needed and it is not + # metadata created by the user + user_metadata = [] + engine_metadata = [] + for x in ids: + flag, engine_data, metadata_id = parse_id(x) + if None in [flag, engine_data, metadata_id]: + continue + + if not flag: + user_metadata.append(metadata_id) + else: + engine_metadata.append((flag, engine_data, metadata_id)) + + return (user_metadata, engine_metadata) + +def is_user_metadata(_id): + details = parse_id(_id) + if None in details: + return False + + if not details[0]: + return True + + return False + +def is_engine_metadata(_id): + details = parse_id(_id) + if None in details: + return False + + if details[0]: + return True + + return False diff --git a/server/rest/urls.py b/server/rest/urls.py index d75b467..013f6f4 100644 --- a/server/rest/urls.py +++ b/server/rest/urls.py @@ -20,8 +20,7 @@ views.metadata_unapplied, name='metadata_unapplied'), url(r'^metadata/get/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})$', views.metadata_get, name='metadata_get'), - # TODO: migrate to ids with 25 characters - url(r'^metadata/delete/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})/(?i)(?P<_id>[A-F\d]{24,25})$', + url(r'^metadata/delete/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})/(?i)(?P<_id>[A-F\d]{26})$', views.metadata_delete, name='metadata_delete'), url(r'^metadata/created/(?i)(?P[A-F\d]{8}\-(?:[A-F\d]{4}\-){3}[A-F\d]{12})$', views.metadata_created, name='metadata_created'), diff --git a/server/rest/views.py b/server/rest/views.py index e83a76f..2fd67fd 100644 --- a/server/rest/views.py +++ b/server/rest/views.py @@ -12,14 +12,15 @@ from django.views.decorators.http import require_GET, require_POST # FIRST Modules -from first import DBManager, EngineManager -from first.auth import verify_api_key, Authentication, FIRSTAuthError, \ +from first_core import DBManager, EngineManager +from first_core.util import make_id, is_engine_metadata +from first_core.auth import verify_api_key, Authentication, FIRSTAuthError, \ require_login, require_apikey MAX_FUNCTIONS = 20 MAX_METADATA = 20 -VALIDATE_IDS = lambda x: re.match('^[a-f\d]{24,25}$', x) +VALIDATE_IDS = lambda x: re.match('^[A-Fa-f\d]{26}$', x) #----------------------------------------------------------------------------- # @@ -217,8 +218,7 @@ def metadata_add(request, md5_hash, crc32, user): f = functions[client_key] # Check if the id sent back is from an engine, if so skip it - if (('id' in f) and (f['id']) and (len(f['id']) == 25) - and ((int(f['id'][0]) >> 3) & 1)): + if (('id' in f) and (f['id']) and not is_engine_metadata(f['id'])): continue; function = db.get_function(create=True, **f) @@ -238,7 +238,8 @@ def metadata_add(request, md5_hash, crc32, user): 'function in FIRST')}) # The '0' indicated the metadata_id is from a user. - results[client_key] = '0{}'.format(metadata_id) + print metadata_id + results[client_key] = make_id(0, metadata=metadata_id) # Set the user as applying the metadata db.applied(sample, user, metadata_id) @@ -295,6 +296,7 @@ def metadata_history(request, user): return render(request, 'rest/error_json.html', {'msg' : 'Exceeded max bulk request'}) + print metadata if None in map(VALIDATE_IDS, metadata): return render(request, 'rest/error_json.html', {'msg' : 'Invalid metadata id'}) @@ -544,26 +546,15 @@ def metadata_status_change(_id, user, md5_hash, crc32, applied): return render(None, 'rest/error_json.html', {'msg' : 'Invalid metadata information'}) - # Currently 24-25, early beta used a 24 byte string, moved to 25 byte one - # TODO: Change to 25 only once it is closed beta time if not VALIDATE_IDS(_id): return render(None, 'rest/error_json.html', {'msg' : 'Invalid id value'}) - metadata_id = _id - if len(_id) == 25: - metadata_id = _id[1:] - db = DBManager.first_db if not db: return render(None, 'rest/error_json.html', {'msg' : 'Unable to connect to FIRST DB'}) - is_engine = False - if ((len(_id) == 25) and (int(_id[0], 16) & 0x8)): - # Metadata came from an engine - is_engine = True - # Get sample sample = db.get_sample(md5_hash, crc32) if not sample: @@ -571,8 +562,8 @@ def metadata_status_change(_id, user, md5_hash, crc32, applied): {'msg' : 'Sample does not exist in FIRST'}) if applied: - results = db.applied(sample, user, metadata_id, is_engine) + results = db.applied(sample, user, _id) else: - results = db.unapplied(sample, user, metadata_id, is_engine) + results = db.unapplied(sample, user, _id) return HttpResponse(json.dumps({'failed' : False, 'results' : results})) diff --git a/server/utilities/__init__.py b/server/utilities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/utilities/engine_shell.py b/server/utilities/engine_shell.py index bb68cf6..07fec89 100644 --- a/server/utilities/engine_shell.py +++ b/server/utilities/engine_shell.py @@ -31,9 +31,10 @@ # FIRST Modules import first.settings -from first.models import Engine, User -from first.engines import AbstractEngine -from first import DBManager, EngineManager +import first.wsgi +from first_core.models import Engine, User +from first_core.engines import AbstractEngine +from first_core import DBManager, EngineManager class EngineCmd(Cmd): @@ -95,7 +96,7 @@ def do_list(self, line): print 'No engines are currently installed' return - for engine in Engine.objects: + for engine in Engine.objects.all(): name = engine.name description = engine.description print '+{}+{}+'.format('-' * 18, '-' * 50) @@ -139,7 +140,7 @@ def do_install(self, line): try: path, obj_name, email = line.split(' ') - developer = User.objects(email=email).get() + developer = User.objects.get(email=email) __import__(path) module = sys.modules[path] @@ -160,9 +161,11 @@ def do_install(self, line): return e.install() - engine = Engine(name=e.name, description=e.description, path=path, - obj_name=obj_name, developer=developer, active=True) - engine.save() + engine = Engine.objects.create( name=e.name, + description=e.description, + path=path, + obj_name=obj_name, + developer=developer, active=True) print 'Engine added to FIRST' return @@ -286,7 +289,7 @@ def do_populate(self, line): print 'The below errors occured:\n{}'.format('\n '.join(errors)) def _get_db_engine_obj(self, name): - engine = Engine.objects(name=name) + engine = Engine.objects.filter(name=name) if not engine: print 'Unable to locate Engine "{}"'.format(name) return @@ -294,7 +297,7 @@ def _get_db_engine_obj(self, name): if len(engine) > 1: print 'More than one engine "{}" exists'.format(name) for e in engine: - print ' - {}'.format(e.name) + print ' - {}: {}'.format(e.name, e.description) return diff --git a/server/utilities/mongo_to_django_orm.py b/server/utilities/mongo_to_django_orm.py new file mode 100644 index 0000000..a4406d7 --- /dev/null +++ b/server/utilities/mongo_to_django_orm.py @@ -0,0 +1,346 @@ +#! /usr/bin/python +#------------------------------------------------------------------------------- +# +# FIRST MongoDB to Django ORM Conversion Script +# Copyright (C) 2016 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Requirements +# ------------ +# mongoengine (https://pypi.python.org/pypi/mongoengine/) +# +# +#------------------------------------------------------------------------------- + +# Python Modules +import os +import sys +import time +import datetime +from getpass import getpass +from argparse import ArgumentParser + +# DEBUG +from pprint import pprint +import gc + +# Add app package to sys path +#sys.path.append(os.path.abspath('..')) +#os.environ['DJANGO_SETTINGS_MODULE'] = 'first.settings' + + +# FIRST Modules + +# Third Party Modules +from bson import Binary +from bson.objectid import ObjectId +import mongoengine +from mongoengine import Document, StringField, UUIDField, \ + DateTimeField, LongField, ReferenceField, \ + BinaryField, ListField, BooleanField, ObjectIdField, \ + IntField, EmbeddedDocument, EmbeddedDocumentListField +from django.core.paginator import Paginator, EmptyPage + +def info(): + print 'INFO: {} {}'.format(len(gc.get_objects()), sum([sys.getsizeof(o) for o in gc.get_objects()])) + +def migrate_users(): + for u in User.objects.all(): + user, created = ORM.User.objects.get_or_create(**u.dump()) + +def migrate_engines(): + for e in Engine.objects.all(): + engine = e.dump() + engine['developer'] = ORM.User.objects.get(email=e.developer.email) + engine = ORM.Engine.objects.create(**engine) + +def migrate_samples(): + paginator = Paginator(Sample.objects.all(), 100) + for s in Sample.objects.all().exclude('functions').select_related(): + sample, created = ORM.Sample.objects.get_or_create(**s.dump()) + for u in s.seen_by: + sample.seen_by.add(ORM.User.objects.get(email=u.email)) + +def migrate_functions(skip, limit): + i = 0 + for f in Function.objects.skip(skip).limit(limit).select_related(3): + function, created = ORM.Function.objects.get_or_create(**f.dump()) + # Convert Functions + #migrate_function_for_sample(sample, s) + if created: + # Add APIs to function + migrate_apis(function, f) + + # Add to samples + for s in Sample.objects.only('md5', 'crc32').filter(functions=f.id): + ORM.Sample.objects.get(md5=s.md5, crc32=s.crc32).functions.add(function) + #sample = ORM.Sample.objects.get(md5=s.md5, crc32=s.crc32) + #sample.functions.add(function) + + # Add metadata assocaited with the function + migrate_metadata(function, f) + + i += 1 + if 0 == (i % 1000): + print '---{}---'.format(i) + info() + gc.collect() + info() + +def _mf(): + for i in xrange(0, Function.objects.count(), 1000): + print '--{}'.format(i) + migrate_functions(i, 1000) + #migfunc(Function.objects.exclude('metadata').all()[i:i+1000]) + + if i % 20000 == 0: + info() + gc.collect() + info() + +def migfunc(qs): + #info() + #functions = {} + for f in qs: + function, created = ORM.Function.objects.get_or_create(**f.dump()) + # Convert APIs + if created: + # Add APIs to function + migrate_apis(function, f) + + # Add to samples + for s in Sample.objects.only('md5', 'crc32').filter(functions=f.id): + sample = ORM.Sample.objects.get(md5=s.md5, crc32=s.crc32) + sample.functions.add(function) + + # Add metadata assocaited with the function + #migrate_metadata(function, f) + #gc.collect() + #info() + +def migrate_function_for_sample(sample, s): + print '{} - {}'.format(s.md5, len(s.functions)) + info() + for f in s.functions: + if type(f) != Function: + print 'Abandoned object: {}'.format(f) + continue + #info() + #pprint(f.dump()) + function, created = ORM.Function.objects.get_or_create(**f.dump()) + sample.functions.add(function) + + if created: + # Convert APIs + migrate_apis(function, f) + + # Convert Metadata + migrate_metadata(function, f, sample) + + + gc.collect() + info() + +def migrate_apis(function, f): + for a in f.apis: + api, _ = ORM.FunctionApis.objects.get_or_create(api=a) + function.apis.add(api) + + gc.collect() + +def migrate_metadata(function, f): + print 'Metadata: {} - {}'.format(f.sha256, len(f.metadata)) + for m in f.metadata: + creator = ORM.User.objects.get(email=m.user.email) + metadata = ORM.Metadata.objects.create(user=creator) + function.metadata.add(metadata) + + # Convert Metadata Details + for d in m.details(): + details = ORM.MetadataDetails.objects.create(**d) + metadata.details.add(details) + + # Convert Metadata Applied + for s_id, u_id in m.applied: + s_ = Sample.objects.only('md5', 'crc32').get(pk=s_id) + u = User.objects.only('email').get(pk=u_id) + sample_ = ORM.Sample.objects.get(md5=s_.md5, crc32=s_.crc32) + user_ = ORM.User.objects.get(email=u.email) + ORM.AppliedMetadata.objects.create(metadata=metadata, + user=user_, + sample=sample_) + +def main(args): + pass_prompt = 'Enter MongoDB password for {}: '.format(args.user) + mongoengine.connect(args.d, + host=args.host, + port=args.port, + user=args.user, + password=getpass(pass_prompt)) + # Convert User + migrate_users() + + # Convert Engine + migrate_engines() + + # Convert Samples + migrate_samples() + + + + +#------------------------------------------------------------------------------- +# MongoDB Models +# FIRST v0.0.1 +#------------------------------------------------------------------------------- +class User(Document): + name = StringField(max_length=128, required=True) + email = StringField(max_length=254, unique=True) + handle = StringField(max_length=32, required=True) + number = IntField(required=True) + api_key = UUIDField(required=True, unique=True) + created = DateTimeField(default=datetime.datetime.utcnow, required=True) + rank = LongField(default=0) + active = BooleanField(default=True) + + service = StringField(max_length=16, required=True) + auth_data = StringField(max_length=4096, required=True) + + meta = { + 'indexes' : [('handle', 'number'), 'api_key', 'email'] + } + + def dump(self): + return {'name' : self.name, + 'email' : self.email, + 'handle' : self.handle, + 'number' : self.number, + 'api_key' : self.api_key, + 'created' : self.created, + 'rank' : self.rank, + 'active' : self.active} + + +class Engine(Document): + name = StringField(max_length=16, required=True, unique=True) + description = StringField(max_length=128, required=True) + path = StringField(max_length=256, required=True) + obj_name = StringField(max_length=32, required=True) + applied = ListField(default=list) + developer = ReferenceField(User) + active = BooleanField(default=False) + + meta = { + 'indexes' : ['name'] + } + + def dump(self): + return {'name' : self.name, + 'description' : self.description, + 'path' : self.path, + 'obj_name' : self.obj_name, + 'developer' : self.developer, + 'active' : self.active} + + +class Metadata(EmbeddedDocument): + id = ObjectIdField(required=True, default=lambda: ObjectId()) + user = ReferenceField(User) + name = ListField(StringField(max_length=128), default=list) + prototype = ListField(StringField(max_length=256), default=list) + comment = ListField(StringField(max_length=512), default=list) + committed = ListField(DateTimeField(), default=list) + applied = ListField(default=list) + + meta = { + 'indexes' : ['user'] + } + + def details(self): + return [{'committed' : self.committed[i], + 'name' : self.name[i], + 'prototype' : self.prototype[i], + 'comment' : self.comment[i]} for i in xrange(len(self.name))] + + +# Use bson.Binary to insert binary data +class Function(Document): + sha256 = StringField(max_length=64) + opcodes = BinaryField() + apis = ListField(StringField(max_length=128), default=list) + metadata = EmbeddedDocumentListField(Metadata, default=list) + architecture = StringField(max_length=64, required=True) + + meta = { + 'indexes' : [] + } + + def dump(self): + return {'opcodes' : Binary(self.opcodes), + 'architecture' : self.architecture, + 'sha256' : self.sha256} + + +class Sample(Document): + md5 = StringField(max_length=32, required=True) + crc32 = IntField(required=True) + sha1 = StringField(max_length=40) + sha256 = StringField(max_length=64) + seen_by = ListField(ReferenceField(User), default=list) + functions = ListField(ReferenceField(Function), default=list) + last_seen = DateTimeField(default=datetime.datetime.utcnow) + + meta = { + 'indexes' : [('md5', 'crc32')] + } + + def dump(self): + data = {'md5' : self.md5, 'crc32' : self.crc32} + + if hasattr(self, 'sha1'): + data['sha1'] = self.sha1 + + if hasattr(self, 'sha256'): + data['sha256'] = self.sha256 + + return data + +if __name__ == '__main__': + parser = ArgumentParser(('FIRST Mongo to Django ORM Conversion Script\n' + 'This script should be used to convert FIRST v0.0.1 to FIRST v0.1.0\n' + )) + + # Arguments + parser.add_argument('--mongo-host', '-h', help='The MongoDB host') + parser.add_argument('--mongo-port', '-p', help='The MongoDB port', type=int) + parser.add_argument('--mongo-user', '-u', help='The MongoDB user') + parser.add_argument('--mongo-db', '-d', help='The MongoDB db name') + +# TODO: remove +mongoengine.connect('beta') +print ' + Adding Users' +start = time.time() +migrate_users() +print '[+] Users Added ({} s)'.format(time.time() - start) +print ' + Adding Samples' +start = time.time() +migrate_samples() +print '[+] Adding Samples ({} s)'.format(time.time() - start) +print ' + Adding Functions & Metadata' +start = time.time() +#migrate_functions() +_mf() +print '[+] Adding Functions & Metadata ({} s)'.format(time.time() - start) diff --git a/server/utilities/populate_engine.py b/server/utilities/populate_engine.py index b26cc2b..0417b4d 100644 --- a/server/utilities/populate_engine.py +++ b/server/utilities/populate_engine.py @@ -27,7 +27,7 @@ from argparse import ArgumentParser # FIRST Modules -from ..app.first import EngineManager, DBManager +from first_core import EngineManager, DBManager def main(): global total, completed, operation_complete diff --git a/server/www/migrations/0001_initial.py b/server/www/migrations/0001_initial.py index 8a858c7..c272eed 100644 --- a/server/www/migrations/0001_initial.py +++ b/server/www/migrations/0001_initial.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Generated by Django 1.11.4 on 2017-08-15 11:54 +# Generated by Django 1.11.4 on 2017-08-25 16:11 from __future__ import unicode_literals from django.db import migrations, models @@ -41,7 +41,7 @@ class Migration(migrations.Migration): migrations.CreateModel( name='Function', fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('id', models.BigAutoField(primary_key=True, serialize=False)), ('sha256', models.CharField(max_length=64)), ('opcodes', models.BinaryField()), ('architecture', models.CharField(max_length=64)), @@ -54,7 +54,7 @@ class Migration(migrations.Migration): name='FunctionApis', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('api', models.CharField(max_length=64)), + ('api', models.CharField(max_length=128, unique=True)), ], options={ 'db_table': 'FunctionApis', @@ -63,7 +63,7 @@ class Migration(migrations.Migration): migrations.CreateModel( name='Metadata', fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('id', models.BigAutoField(primary_key=True, serialize=False)), ], options={ 'db_table': 'Metadata', @@ -75,7 +75,7 @@ class Migration(migrations.Migration): ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('name', models.CharField(max_length=256)), ('prototype', models.CharField(max_length=256)), - ('comment', models.CharField(max_length=256)), + ('comment', models.CharField(max_length=512)), ('committed', models.DateTimeField(default=django.utils.timezone.now)), ], options={ @@ -85,18 +85,21 @@ class Migration(migrations.Migration): migrations.CreateModel( name='Sample', fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('id', models.BigAutoField(primary_key=True, serialize=False)), ('md5', models.CharField(max_length=32)), - ('crc32', models.IntegerField()), + ('crc32', models.BigIntegerField()), ('sha1', models.CharField(blank=True, max_length=40, null=True)), ('sha256', models.CharField(blank=True, max_length=64, null=True)), ('last_seen', models.DateTimeField(blank=True, default=django.utils.timezone.now)), ], + options={ + 'db_table': 'Sample', + }, ), migrations.CreateModel( name='User', fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('id', models.BigAutoField(primary_key=True, serialize=False)), ('name', models.CharField(max_length=128)), ('email', models.CharField(max_length=254)), ('handle', models.CharField(max_length=32)), @@ -134,11 +137,6 @@ class Migration(migrations.Migration): name='seen_by', field=models.ManyToManyField(to='www.User'), ), - migrations.AddField( - model_name='metadata', - name='applied', - field=models.ManyToManyField(to='www.AppliedMetadata'), - ), migrations.AddField( model_name='metadata', name='details', @@ -166,18 +164,22 @@ class Migration(migrations.Migration): ), migrations.AddField( model_name='appliedmetadata', - name='metadata_id', + name='metadata', field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.Metadata'), ), migrations.AddField( model_name='appliedmetadata', - name='sample_id', - field=models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to='www.Sample'), + name='sample', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.Sample'), ), migrations.AddField( model_name='appliedmetadata', - name='user_id', - field=models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to='www.User'), + name='user', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='www.User'), + ), + migrations.AlterUniqueTogether( + name='sample', + unique_together=set([('md5', 'crc32')]), ), migrations.AlterIndexTogether( name='sample', @@ -187,12 +189,16 @@ class Migration(migrations.Migration): model_name='metadata', index=models.Index(fields=['user'], name='Metadata_user_id_aea908_idx'), ), + migrations.AlterUniqueTogether( + name='function', + unique_together=set([('sha256', 'architecture')]), + ), migrations.AddIndex( model_name='engine', index=models.Index(fields=['name'], name='Engine_name_14ac74_idx'), ), migrations.AlterUniqueTogether( name='appliedmetadata', - unique_together=set([('metadata_id', 'sample_id', 'user_id')]), + unique_together=set([('metadata', 'sample', 'user')]), ), ] diff --git a/server/www/models.py b/server/www/models.py index 0433367..b5b244e 100644 --- a/server/www/models.py +++ b/server/www/models.py @@ -22,7 +22,6 @@ # Python Modules from __future__ import unicode_literals -import datetime # Third Party Modules from django.db import models @@ -30,6 +29,8 @@ class User(models.Model): + id = models.BigAutoField(primary_key=True) + name = models.CharField(max_length=128) email = models.CharField(max_length=254) handle = models.CharField(max_length=32) @@ -77,9 +78,11 @@ class Engine(models.Model): developer = models.ForeignKey('User') active = models.BooleanField(default=False) - #@property - #def rank(self): - # return len(self.applied) + @property + def rank(self): + # TODO: Complete + #return len(self.applied) + return 0 def dump(self, full=False): data = {'name' : self.name, @@ -112,19 +115,19 @@ class Meta: # unique_together = ("sample_id", "user_id", "engine_metadata_id") class AppliedMetadata(models.Model): - metadata_id = models.ForeignKey('Metadata') - sample_id = models.OneToOneField('Sample') - user_id = models.OneToOneField('User') + metadata = models.ForeignKey('Metadata') + sample = models.ForeignKey('Sample') + user = models.ForeignKey('User') class Meta: db_table = 'AppliedMetadata' - unique_together = ("metadata_id", "sample_id", "user_id") + unique_together = ("metadata", "sample", "user") class MetadataDetails(models.Model): name = models.CharField(max_length=256) prototype = models.CharField(max_length=256) - comment = models.CharField(max_length=256) + comment = models.CharField(max_length=512) committed = models.DateTimeField(default=timezone.now) class Meta: @@ -132,13 +135,17 @@ class Meta: class Metadata(models.Model): + id = models.BigAutoField(primary_key=True) + user = models.ForeignKey('User') - details = models.ManyToManyField('Metadatadetails') - applied = models.ManyToManyField('AppliedMetadata') + details = models.ManyToManyField('MetadataDetails') @property def rank(self): - return self.applied.count() + if hasattr(self, 'id'): + return AppliedMetadata.objects.filter(metadata=self.id).count() + + return 0 def has_changed(self, name, prototype, comment): if not self.details.exists(): @@ -159,18 +166,17 @@ def dump(self, full=False): 'name' : latest_details.name, 'prototype' : latest_details.prototype, 'comment' : latest_details.comment, - 'rank' : AppliedMetadata(metadata_id=latest_details.id).count() + 'rank' : self.rank }) if full: - data['history'] = [] - for d in xrange(self.details.count()): - # Convert committed time back with: - # datetime.datetime.strptime(
, '%Y-%m-%dT%H:%M:%S.%f') - data['history'].append({'name' : d.name, - 'prototype' : d.prototype, - 'comment' : d.comment, - 'committed' : d.commit.isoformat()}) + # Convert committed time back with: + # datetime.datetime.strptime(
, '%Y-%m-%dT%H:%M:%S.%f') + data['history'] = [{'name' : d.name, + 'prototype' : d.prototype, + 'comment' : d.comment, + 'committed' : d.committed.isoformat()} + for d in self.details.order_by('committed')] return data @@ -180,13 +186,15 @@ class Meta: class FunctionApis(models.Model): - api = models.CharField(max_length=64) + api = models.CharField(max_length=128, unique=True) class Meta: db_table = 'FunctionApis' class Function(models.Model): + id = models.BigAutoField(primary_key=True) + sha256 = models.CharField(max_length=64) opcodes = models.BinaryField() apis = models.ManyToManyField('FunctionApis') @@ -194,20 +202,20 @@ class Function(models.Model): architecture = models.CharField(max_length=64) def dump(self): - return {'id' : self.id, - 'opcodes' : self.opcodes, - 'apis' : [str(x.api) for x in self.apis.all()], - 'metadata' : [str(x.api) for x in self.metadata.all()], + return {'opcodes' : self.opcodes, 'architecture' : self.architecture, 'sha256' : self.sha256} class Meta: db_table = 'Function' + unique_together = ('sha256', 'architecture') class Sample(models.Model): + id = models.BigAutoField(primary_key=True) + md5 = models.CharField(max_length=32) - crc32 = models.IntegerField() + crc32 = models.BigIntegerField() sha1 = models.CharField(max_length=40, null=True, blank=True) sha256 = models.CharField(max_length=64, null=True, blank=True) seen_by = models.ManyToManyField('User') @@ -215,17 +223,13 @@ class Sample(models.Model): last_seen = models.DateTimeField(default=timezone.now, blank=True) class Meta: + db_table = 'Sample' index_together = ['md5', 'crc32'] + unique_together = ('md5', 'crc32') def dump(self): - data = {'md5' : self.md5, 'crc32' : self.crc32, + return {'md5' : self.md5, 'crc32' : self.crc32, 'seen_by' : [str(x.id) for x in self.seen_by.all()], - 'functions' : [str(x.id) for x in self.functions.all()]} - - if 'sha1' in self: - data['sha1'] = self.sha1 - - if 'sha256' in self: - data['sha256'] = self.sha256 - - return data + 'functions' : [str(x.id) for x in self.functions.all()], + 'sha1' : self.sha1, + 'sha256' : self.sha256} diff --git a/server/www/views.py b/server/www/views.py index 289e9b6..6fb7458 100644 --- a/server/www/views.py +++ b/server/www/views.py @@ -9,8 +9,8 @@ # FIRST Modules -from www.models import Function, User -from first.auth import Authentication, require_login, FIRSTAuthError +from first_core.models import Function, User +from first_core.auth import Authentication, require_login, FIRSTAuthError def handler404(request): return render(request, 'www/404.html', None) From 0f873fc21c3ab0c9bff900f82d6a588f354e2bd4 Mon Sep 17 00:00:00 2001 From: demonduck Date: Wed, 30 Aug 2017 14:02:36 -0400 Subject: [PATCH 05/17] mongo to Django ORM script --- server/utilities/mongo_to_django_orm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/server/utilities/mongo_to_django_orm.py b/server/utilities/mongo_to_django_orm.py index a4406d7..70150c9 100644 --- a/server/utilities/mongo_to_django_orm.py +++ b/server/utilities/mongo_to_django_orm.py @@ -43,6 +43,7 @@ # FIRST Modules +import first_core.models as ORM # Third Party Modules from bson import Binary From 7e5f4e914fac073ddcfa912ad8a86d300bd7e213 Mon Sep 17 00:00:00 2001 From: demonduck Date: Thu, 31 Aug 2017 15:27:42 -0400 Subject: [PATCH 06/17] backing up code small orm migration bug fixes, MnemonicHashEngine migrated to capstone --- server/engines/__init__.py | 0 server/engines/admin.py | 6 + server/engines/apps.py | 8 + server/engines/models.py | 6 + server/engines/tests.py | 6 + server/engines/views.py | 6 + server/first/settings.py | 1 + server/first_core/dbs/builtin_db.py | 3 - server/first_core/disassembly/__init__.py | 110 ++++++++++++++ server/first_core/engines/__init__.py | 22 +-- server/first_core/engines/exact_match.py | 2 +- server/first_core/engines/mnemonic_hash.py | 168 +++++++++++++++++++++ server/first_core/util.py | 9 +- server/rest/views.py | 11 +- server/utilities/engine_shell.py | 22 ++- server/utilities/mongo_to_django_orm.py | 92 +++-------- server/www/models.py | 12 +- 17 files changed, 382 insertions(+), 102 deletions(-) create mode 100644 server/engines/__init__.py create mode 100644 server/engines/admin.py create mode 100644 server/engines/apps.py create mode 100644 server/engines/models.py create mode 100644 server/engines/tests.py create mode 100644 server/engines/views.py create mode 100644 server/first_core/disassembly/__init__.py create mode 100644 server/first_core/engines/mnemonic_hash.py diff --git a/server/engines/__init__.py b/server/engines/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/engines/admin.py b/server/engines/admin.py new file mode 100644 index 0000000..13be29d --- /dev/null +++ b/server/engines/admin.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.contrib import admin + +# Register your models here. diff --git a/server/engines/apps.py b/server/engines/apps.py new file mode 100644 index 0000000..fa6398b --- /dev/null +++ b/server/engines/apps.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.apps import AppConfig + + +class EnginesConfig(AppConfig): + name = 'engines' diff --git a/server/engines/models.py b/server/engines/models.py new file mode 100644 index 0000000..1dfab76 --- /dev/null +++ b/server/engines/models.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models + +# Create your models here. diff --git a/server/engines/tests.py b/server/engines/tests.py new file mode 100644 index 0000000..5982e6b --- /dev/null +++ b/server/engines/tests.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.test import TestCase + +# Create your tests here. diff --git a/server/engines/views.py b/server/engines/views.py new file mode 100644 index 0000000..e784a0b --- /dev/null +++ b/server/engines/views.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.shortcuts import render + +# Create your views here. diff --git a/server/first/settings.py b/server/first/settings.py index 08ecb18..c438114 100644 --- a/server/first/settings.py +++ b/server/first/settings.py @@ -48,6 +48,7 @@ INSTALLED_APPS = [ 'www.apps.WwwConfig', + 'engines.apps.EnginesConfig', 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', diff --git a/server/first_core/dbs/builtin_db.py b/server/first_core/dbs/builtin_db.py index ef1dea2..4bfd76a 100644 --- a/server/first_core/dbs/builtin_db.py +++ b/server/first_core/dbs/builtin_db.py @@ -224,12 +224,9 @@ def add_metadata_to_function(self, user, function, name, prototype, comment, **k # Check to see if user already has metadata associated with the sample metadata = None - print function.id - print user.id if Function.objects.filter(pk=function.id, metadata__user=user).count(): # Metadata already exists metadata = Metadata.objects.get(function=function, user=user) - else: metadata = Metadata.objects.create(user=user) function.metadata.add(metadata) diff --git a/server/first_core/disassembly/__init__.py b/server/first_core/disassembly/__init__.py new file mode 100644 index 0000000..34a6c1b --- /dev/null +++ b/server/first_core/disassembly/__init__.py @@ -0,0 +1,110 @@ +# Third Party Modules +from capstone import * +from capstone.ppc import * +from capstone.systemz import * +from capstone.arm import * +from capstone.arm64 import * +from capstone.x86 import * +from capstone.sparc import * +from capstone.mips import * + +arch_mapping = { + 'ppc' : (CS_ARCH_PPC, CS_MODE_32), + 'ppc32' : (CS_ARCH_PPC, CS_MODE_32), + 'ppc64' : (CS_ARCH_PPC, CS_MODE_64), + 'intel16' : (CS_ARCH_X86, CS_MODE_16), + 'sysz' : (CS_ARCH_SYSZ, None), + 'arm32' : (CS_ARCH_ARM, CS_MODE_ARM), + 'intel32' : (CS_ARCH_X86, CS_MODE_ARM), + 'intel64' : (CS_ARCH_X86, CS_MODE_64), + 'sparc' : (CS_ARCH_SPARC, None), + 'arm64' : (CS_ARCH_ARM64, CS_MODE_ARM), + 'mips' : (CS_ARCH_MIPS, CS_MODE_32), + 'mips64' : (CS_ARCH_MIPS, CS_MODE_64) +} + +reg_mapping = { + 'ppc' : PPC_OP_REG, 'ppc32' : PPC_OP_REG, 'ppc64' : PPC_OP_REG, + 'sysz' : SYSZ_OP_REG, + 'intel16' : X86_OP_REG, 'intel32' : X86_OP_REG, 'intel64' : X86_OP_REG, + 'sparc' : SPARC_OP_REG, + 'arm32' : ARM_OP_REG, 'arm64' : ARM64_OP_REG, + 'mips' : MIPS_OP_REG, 'mips64' : MIPS_OP_REG +} + +imm_mapping = { + 'ppc' : PPC_OP_IMM, 'ppc32' : PPC_OP_IMM, 'ppc64' : PPC_OP_IMM, + 'sysz' : SYSZ_OP_IMM, + 'intel16' : X86_OP_IMM, 'intel32' : X86_OP_IMM, 'intel64' : X86_OP_IMM, + 'sparc' : SPARC_OP_IMM, + 'arm32' : ARM_OP_IMM, 'arm64' : ARM64_OP_IMM, + 'mips' : MIPS_OP_IMM, 'mips64' : MIPS_OP_IMM +} + +mem_mapping = { + 'ppc' : PPC_OP_MEM, 'ppc32' : PPC_OP_MEM, 'ppc64' : PPC_OP_MEM, + 'sysz' : SYSZ_OP_MEM, + 'intel16' : X86_OP_MEM, 'intel32' : X86_OP_MEM, 'intel64' : X86_OP_MEM, + 'sparc' : SPARC_OP_MEM, + 'arm32' : ARM_OP_MEM, 'arm64' : ARM64_OP_MEM, + 'mips' : MIPS_OP_MEM, 'mips64' : MIPS_OP_MEM +} + +invalid_mapping = { + 'ppc' : PPC_OP_INVALID, 'ppc32' : PPC_OP_INVALID, 'ppc64' : PPC_OP_INVALID, + 'sysz' : SYSZ_OP_INVALID, + 'intel16' : X86_OP_INVALID, 'intel32' : X86_OP_INVALID, 'intel64' : X86_OP_INVALID, + 'sparc' : SPARC_OP_INVALID, + 'arm32' : ARM_OP_INVALID, 'arm64' : ARM64_OP_INVALID, + 'mips' : MIPS_OP_INVALID, 'mips64' : MIPS_OP_INVALID +} + + +class Disassembly(object): + def __init__(self, architecture, code): + self.md = None + self.data = [] + self.code = code + self.iterator = None + self.architecture = architecture + + self.valid = False + + if architecture in arch_mapping: + arch, mode = arch_mapping[architecture] + self.md = Cs(arch, mode) + self.md.details = True + self.iterator = self.md.disasm(self.code, 0) + self.valid = True + + def instructions(self): + # When first called function will return cached instructions + for i in xrange(len(self.data)): + yield self.data[i] + + # Then iterate through non-cached instructions + if not self.iterator: + for i in self.iterator: + self.data.append(i) + yield i + + self.iterator = None + + def _check_mapping(self, mapping, operand): + if ((not hasattr(operand, 'type')) + or (self.architecture not in mapping)): + False + + return operand.type == mapping[self.architecture] + + def is_op_reg(self, operand): + return self._check_mapping(reg_mapping, operand) + + def is_op_mem(self, operand): + return self._check_mapping(mem_mapping, operand) + + def is_op_imm(self, operand): + return self._check_mapping(imm_mapping, operand) + + def is_op_invalid(self, operand): + return self._check_mapping(invalid_mapping, operand) diff --git a/server/first_core/engines/__init__.py b/server/first_core/engines/__init__.py index 19d708c..cd17b5d 100644 --- a/server/first_core/engines/__init__.py +++ b/server/first_core/engines/__init__.py @@ -18,9 +18,9 @@ from first_core.error import FIRSTError from first_core.dbs import FIRSTDBManager from first_core.engines.results import Result +from first_core.disassembly import Disassembly # Third Party Modules -from bson.objectid import ObjectId # Class for FirstEngine related exceptions @@ -96,9 +96,9 @@ def add(self, function): self._add(function) - def scan(self, opcodes, architecture, apis): + def scan(self, opcodes, architecture, apis, **kwargs): '''Returns a list of Result objects''' - results = self._scan(opcodes, architecture, apis) + results = self._scan(opcodes, architecture, apis, **kwargs) if isinstance(results, Result): return [results] @@ -131,7 +131,7 @@ def _add(self, function): '''Returns nothing''' raise FIRSTEngineError('Not Implemented') - def _scan(self, opcodes, architecture, apis): + def _scan(self, opcodes, architecture, apis, **kwargs): '''Returns List of function IDs''' raise FIRSTEngineError('Not Implemented') @@ -170,9 +170,7 @@ def _engines(self): # Dynamically (re)load engines engines = [] for e in active_engines: - if e.path in sys.modules: - reload(sys.modules[e.path]) - else: + if e.path not in sys.modules: __import__(e.path) module = sys.modules[e.path] @@ -220,9 +218,13 @@ def add(self, function): ''' required_keys = {'id', 'apis', 'opcodes', 'architecture', 'sha256'} if (dict != type(function)) or not required_keys.issubset(function.keys()): - print 'Data provided is not the correct type or required keys not provided' + print '[1stEM] Data provided is not the correct type or required keys not provided' return None + dis = Disassembly(function['architecture'], function['opcodes']) + if dis: + function['disassembly'] = dis + # Send function details to each registered engine errors = {} for engine in self._engines: @@ -267,10 +269,12 @@ def scan(self, user, opcodes, architecture, apis): engine_results = {} engines = self._engines + dis = Disassembly(architecture, opcodes) for i in xrange(len(engines)): engine = engines[i] try: - results = engine.scan(opcodes, architecture, apis) + results = engine.scan(opcodes, architecture, apis, + disassembly=dis) if results: engine_results[i] = results diff --git a/server/first_core/engines/exact_match.py b/server/first_core/engines/exact_match.py index c9c7b32..fb10857 100644 --- a/server/first_core/engines/exact_match.py +++ b/server/first_core/engines/exact_match.py @@ -39,7 +39,7 @@ def _add(self, function): ''' pass - def _scan(self, opcodes, architecture, apis): + def _scan(self, opcodes, architecture, apis, disassembly): '''Returns List of FunctionResults''' db = self._dbs['first_db'] diff --git a/server/first_core/engines/mnemonic_hash.py b/server/first_core/engines/mnemonic_hash.py new file mode 100644 index 0000000..5829205 --- /dev/null +++ b/server/first_core/engines/mnemonic_hash.py @@ -0,0 +1,168 @@ +#------------------------------------------------------------------------------- +# +# FIRST Engine: Mnemonic Hash +# Uses Distorm3 to obtain mnemonics from the opcodes, reduces the opcodes to +# a single string and hashes it for future lookup +# +# Copyright (C) 2017 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Requirements +# ------------ +# - distorm3 +# +#------------------------------------------------------------------------------- + +# Python Modules +from hashlib import sha256 + +# FIRST Modules +from first_core.error import FIRSTError +from first_core.engines import AbstractEngine +from first_core.engines.results import FunctionResult + +# Third Party Modules +from capstone import * +from django.db import models +from django.core.exceptions import ObjectDoesNotExist + +MIN_REQUIRED_MNEMONICS = 8 + +class MnemonicHash(models.Model): + sha256 = models.CharField(max_length=64) + architecture = models.CharField(max_length=64) + functions = models.ManyToManyField('MnemonicHashFunctions') + + class Meta: + app_label = 'engines' + index_together = ('sha256', 'architecture') + unique_together = ('sha256', 'architecture') + + def dump(self): + return {'sha256' : self.sha256, + 'architecture' : self.architecture, + 'functions' : self.functions.all()} + +class MnemonicHashFunctions(models.Model): + func = models.BigIntegerField() + + class Meta: + app_label = 'engines' + + +class MnemonicHashEngine(AbstractEngine): + _name = 'MnemonicHash' + _description = ('Uses mnemonics from the opcodes to generate a hash ' + '(architecture support limited to: intel32, intel64, ' + 'arm, arm64, mips32, mips64, ppc32, ppc64, sparc). ' + 'Requires at least 8 mnemonics.') + _required_db_names = ['first_db'] + + def mnemonic_hash(self, disassembly): + if not disassembly: + return (None, None) + + try: + mnemonics = [i.mnemonic for i in disassembly.instructions()] + if len(mnemonics) < MIN_REQUIRED_MNEMONICS: + return (None, None) + + return (mnemonics, sha256(''.join(mnemonics)).hexdigest()) + + except Exception as e: + raise e + return (None, None) + + def _add(self, function): + ''' + Creates a mnemonic hash based on the provided architecture and opcodes + via disassembling the opcodes and discarding the instruction operands. + ''' + architecture = function['architecture'] + disassembly = function.get('disassembly') + mnemonics, mnemonic_sha256 = self.mnemonic_hash(disassembly) + if None in [mnemonic_sha256, mnemonics]: + return + + db_obj, _ = MnemonicHash.objects.get_or_create(sha256=mnemonic_sha256, + architecture=architecture) + function_id = function['id'] + count = MnemonicHash.objects.filter(sha256=mnemonic_sha256, + architecture=architecture, + functions__func=function_id).count() + + if not count: + func, _ = MnemonicHashFunctions.objects.get_or_create(func=function_id) + db_obj.functions.add(func) + + def _scan(self, opcodes, architecture, apis, disassembly): + '''Returns List of tuples (function ID, similarity percentage)''' + db = self._dbs['first_db'] + mnemonics, mnemonic_sha256 = self.mnemonic_hash(disassembly) + + if None in [mnemonic_sha256, mnemonics]: + return + + try: + db_obj = MnemonicHash.objects.get(sha256=mnemonic_sha256, + architecture=architecture) + except ObjectDoesNotExist: + return None + + results = [] + for f in db_obj.functions.all(): + similarity = 75.0 + function_id = f.func + function = db.find_function(_id=function_id) + + if (not function) or (not function.metadata.count()): + continue + + # The APIs will count up to 10% of the similarity score + total_apis = function.apis.count() + if total_apis: + func_apis = {x['api'] for x in function.apis.values('api')} + overlap = float(len(func_apis.intersection(apis))) + similarity += (overlap / total_apis) * 10 + + else: + similarity += 5 + + results.append(FunctionResult(function_id, similarity)) + + return results + + def _install(self): + try: + from django.core.management import execute_from_command_line + except ImportError: + # The above import may fail for some other reason. Ensure that the + # issue is really that Django is missing to avoid masking other + # exceptions on Python 2. + try: + import django + except ImportError: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) + raise + execute_from_command_line(['manage.py', 'makemigrations', 'engines']) + execute_from_command_line(['manage.py', 'migrate', 'engines']) + + def _uninstall(self): + print 'Manually delete tables associated with {}'.format(self.engine_name) diff --git a/server/first_core/util.py b/server/first_core/util.py index 96fee63..5b49269 100644 --- a/server/first_core/util.py +++ b/server/first_core/util.py @@ -44,10 +44,15 @@ def make_id(flags, metadata=0, engine=0): def parse_id(_id): - if len(_id) != 26: + if type(_id) in [str, unicode]: + if len(_id) != 26: + return (None, None, None) + + _id = int(_id, 16) + + elif type(id) not in [int, long]: return (None, None, None) - _id = int(_id, 16) flag = _id >> (8 * 12) engine_data = (_id >> (8 * 8)) & (0xFFFFFFFF) metadata_id = _id & 0xFFFFFFFFFFFFFFFF diff --git a/server/rest/views.py b/server/rest/views.py index 2fd67fd..fe6f6f6 100644 --- a/server/rest/views.py +++ b/server/rest/views.py @@ -218,7 +218,7 @@ def metadata_add(request, md5_hash, crc32, user): f = functions[client_key] # Check if the id sent back is from an engine, if so skip it - if (('id' in f) and (f['id']) and not is_engine_metadata(f['id'])): + if (('id' in f) and (f['id']) and is_engine_metadata(f['id'])): continue; function = db.get_function(create=True, **f) @@ -238,14 +238,14 @@ def metadata_add(request, md5_hash, crc32, user): 'function in FIRST')}) # The '0' indicated the metadata_id is from a user. - print metadata_id - results[client_key] = make_id(0, metadata=metadata_id) + _id = make_id(0, metadata=metadata_id) + results[client_key] = _id # Set the user as applying the metadata - db.applied(sample, user, metadata_id) + db.applied(sample, user, _id) # Send opcode to EngineManager - EngineManager.add(function.dump()) + EngineManager.add(function.dump(True)) return HttpResponse(json.dumps({'failed' : False, 'results' : results})) @@ -296,7 +296,6 @@ def metadata_history(request, user): return render(request, 'rest/error_json.html', {'msg' : 'Exceeded max bulk request'}) - print metadata if None in map(VALIDATE_IDS, metadata): return render(request, 'rest/error_json.html', {'msg' : 'Invalid metadata id'}) diff --git a/server/utilities/engine_shell.py b/server/utilities/engine_shell.py index 07fec89..0a9973d 100644 --- a/server/utilities/engine_shell.py +++ b/server/utilities/engine_shell.py @@ -30,11 +30,15 @@ sys.path.append(os.path.abspath('..')) # FIRST Modules -import first.settings import first.wsgi -from first_core.models import Engine, User +import first.settings from first_core.engines import AbstractEngine +from first_core.disassembly import Disassembly from first_core import DBManager, EngineManager +from first_core.models import Engine, User, Function + +# Third Party Modules +from django.core.paginator import Paginator class EngineCmd(Cmd): @@ -252,7 +256,7 @@ def do_populate(self, line): return print 'Starting to populate engines:\n-\t{}'.format('\n-\t'.join([e.name for e in engines])) - functions = db.get_all_functions() + functions = db.get_all_functions().order_by('pk') total = functions.count() msg = ' [Status] {0:.2f}% Completed ({1} out of {2})\r' @@ -261,12 +265,16 @@ def do_populate(self, line): offset = 0 limit = 500 - for j in xrange(0, total, limit): - functions = db.get_all_functions().skip(j).limit(limit) + paginator = Paginator(functions, 100) + for j in paginator.page_range: + functions = paginator.page(j) for function in functions: - details = function.dump() - del details['metadata'] + details = function.dump(True) + + dis = Disassembly(details['architecture'], details['opcodes']) + if dis: + details['disassembly'] = dis for engine in engines: try: diff --git a/server/utilities/mongo_to_django_orm.py b/server/utilities/mongo_to_django_orm.py index 70150c9..4139872 100644 --- a/server/utilities/mongo_to_django_orm.py +++ b/server/utilities/mongo_to_django_orm.py @@ -37,11 +37,6 @@ from pprint import pprint import gc -# Add app package to sys path -#sys.path.append(os.path.abspath('..')) -#os.environ['DJANGO_SETTINGS_MODULE'] = 'first.settings' - - # FIRST Modules import first_core.models as ORM @@ -80,7 +75,6 @@ def migrate_functions(skip, limit): for f in Function.objects.skip(skip).limit(limit).select_related(3): function, created = ORM.Function.objects.get_or_create(**f.dump()) # Convert Functions - #migrate_function_for_sample(sample, s) if created: # Add APIs to function migrate_apis(function, f) @@ -105,56 +99,12 @@ def _mf(): for i in xrange(0, Function.objects.count(), 1000): print '--{}'.format(i) migrate_functions(i, 1000) - #migfunc(Function.objects.exclude('metadata').all()[i:i+1000]) if i % 20000 == 0: info() gc.collect() info() -def migfunc(qs): - #info() - #functions = {} - for f in qs: - function, created = ORM.Function.objects.get_or_create(**f.dump()) - # Convert APIs - if created: - # Add APIs to function - migrate_apis(function, f) - - # Add to samples - for s in Sample.objects.only('md5', 'crc32').filter(functions=f.id): - sample = ORM.Sample.objects.get(md5=s.md5, crc32=s.crc32) - sample.functions.add(function) - - # Add metadata assocaited with the function - #migrate_metadata(function, f) - #gc.collect() - #info() - -def migrate_function_for_sample(sample, s): - print '{} - {}'.format(s.md5, len(s.functions)) - info() - for f in s.functions: - if type(f) != Function: - print 'Abandoned object: {}'.format(f) - continue - #info() - #pprint(f.dump()) - function, created = ORM.Function.objects.get_or_create(**f.dump()) - sample.functions.add(function) - - if created: - # Convert APIs - migrate_apis(function, f) - - # Convert Metadata - migrate_metadata(function, f, sample) - - - gc.collect() - info() - def migrate_apis(function, f): for a in f.apis: api, _ = ORM.FunctionApis.objects.get_or_create(api=a) @@ -186,19 +136,34 @@ def migrate_metadata(function, f): def main(args): pass_prompt = 'Enter MongoDB password for {}: '.format(args.user) - mongoengine.connect(args.d, - host=args.host, - port=args.port, - user=args.user, + mongoengine.connect(args.mongo_db, + host=args.mongo_host, + port=args.mongo_port, + user=args.mongo_user, password=getpass(pass_prompt)) # Convert User + print ' + Adding Users' + start = time.time() migrate_users() + print '[+] Users Added ({} s)'.format(time.time() - start) # Convert Engine + print ' + Adding Engines' + start = time.time() migrate_engines() + print '[+] Adding Engines ({} s)'.format(time.time() - start) # Convert Samples + print ' + Adding Samples' + start = time.time() migrate_samples() + print '[+] Adding Samples ({} s)'.format(time.time() - start) + + # Convert Functions and their Metadata + print ' + Adding Functions & Metadata' + start = time.time() + _mf() + print '[+] Adding Functions & Metadata ({} s)'.format(time.time() - start) @@ -276,7 +241,6 @@ def details(self): 'prototype' : self.prototype[i], 'comment' : self.comment[i]} for i in xrange(len(self.name))] - # Use bson.Binary to insert binary data class Function(Document): sha256 = StringField(max_length=64) @@ -325,23 +289,9 @@ def dump(self): )) # Arguments - parser.add_argument('--mongo-host', '-h', help='The MongoDB host') + parser.add_argument('--mongo-host', '--host', help='The MongoDB host') parser.add_argument('--mongo-port', '-p', help='The MongoDB port', type=int) parser.add_argument('--mongo-user', '-u', help='The MongoDB user') parser.add_argument('--mongo-db', '-d', help='The MongoDB db name') -# TODO: remove -mongoengine.connect('beta') -print ' + Adding Users' -start = time.time() -migrate_users() -print '[+] Users Added ({} s)'.format(time.time() - start) -print ' + Adding Samples' -start = time.time() -migrate_samples() -print '[+] Adding Samples ({} s)'.format(time.time() - start) -print ' + Adding Functions & Metadata' -start = time.time() -#migrate_functions() -_mf() -print '[+] Adding Functions & Metadata ({} s)'.format(time.time() - start) + main(parser.parse_args()) diff --git a/server/www/models.py b/server/www/models.py index b5b244e..be8710c 100644 --- a/server/www/models.py +++ b/server/www/models.py @@ -71,7 +71,7 @@ class Meta: class Engine(models.Model): name = models.CharField(max_length=16, unique=True) - description = models.CharField(max_length=128) + description = models.CharField(max_length=256) path = models.CharField(max_length=256) obj_name = models.CharField(max_length=32) @@ -201,11 +201,17 @@ class Function(models.Model): metadata = models.ManyToManyField('Metadata') architecture = models.CharField(max_length=64) - def dump(self): - return {'opcodes' : self.opcodes, + def dump(self, full=False): + data = {'opcodes' : self.opcodes, 'architecture' : self.architecture, 'sha256' : self.sha256} + if full: + data['apis'] = [x['api'] for x in self.apis.values('api')] + data['id'] = self.id + + return data + class Meta: db_table = 'Function' unique_together = ('sha256', 'architecture') From 89914bceec99cec13221c43772294a0f6fae0a85 Mon Sep 17 00:00:00 2001 From: demonduck Date: Fri, 1 Sep 2017 19:59:29 -0400 Subject: [PATCH 07/17] capstone migration v1 BasicMasking needs more work to get where it was with distorm. Bug fixes all around. Added user_shell.py in utilities to allow for easier user management. --- docs/engines/index.rst | 19 +- docs/index.rst | 17 +- server/first_core/auth.py | 2 +- server/first_core/disassembly/__init__.py | 74 +++++- server/first_core/engines/basic_masking.py | 258 +++++++++++++++++++++ server/utilities/engine_shell.py | 2 +- server/utilities/populate_engine.py | 90 ------- server/utilities/user_shell.py | 232 ++++++++++++++++++ server/www/templates/www/base_site.html | 4 +- 9 files changed, 592 insertions(+), 106 deletions(-) create mode 100644 server/first_core/engines/basic_masking.py delete mode 100644 server/utilities/populate_engine.py create mode 100644 server/utilities/user_shell.py diff --git a/docs/engines/index.rst b/docs/engines/index.rst index 666f868..819a4eb 100644 --- a/docs/engines/index.rst +++ b/docs/engines/index.rst @@ -8,12 +8,29 @@ Engines Engine Shell ============ +The Python script ``engine_shell.py`` provides you with some functionality to manage engines installed into FIRST. Below you will see the script's operations. + +.. code:: + + +========================================================+ + | FIRST Engine Shell Menu | + +========================================================+ + | list | List all engines currently installed | + | info | Get info on an engine | + | install | Installs engine | + | delete | Removes engine record but not other DB data | + | enable | Enable engine (Engine will be enabled) | + | populate | Sending all functions to engine | + | disable | Disable engine (Engine will be disabled) | + +--------------------------------------------------------+ + + Testing Engines =============== TODO -.. autoclass:: first.engines.AbstractEngine +.. autoclass:: first_core.engines.AbstractEngine :noindex: :members: :undoc-members: diff --git a/docs/index.rst b/docs/index.rst index 1f18595..0e29530 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -17,7 +17,7 @@ Installing your own FIRST server can be quick and easy with an Ubuntu machine an **After cloning the Git repo** - Save your google auth json information to install/google_secret.json + Save your google auth json information to install/google_secret.json. To generate a google_secret.json file you will need to go to https://console.developers.google.com, create a project, select the project, select Credentials in the left set of links under APIs & services. Once selected, select the Create credentials drop down menu and click OAuth client ID. Select Web application, and fill out the details. Once created you will have the option to down the JSON file containing the generated secret. Optionally, you can add install/ssl/apache.crt and apache.key file if you have an SSL certificate you would prefer to use. @@ -32,16 +32,23 @@ When the FIRST server is installed, no engines are installed. FIRST comes with t .. note:: - Before engines can be installed, the developer must be registered with the system. Ensure the developer is registered before progressing. + Before engines can be installed, the developer must be registered with the system. This can be accomplished through the web UI if OAuth has been setup or manually by the user_shell.py located in the utilities folder. + + .. code:: + + $ cd FIRST-server/server/utilities + $ python user_shell.py + + Ensure the developer is registered before progressing. Python script ``engine_shell.py`` can be provided with command line arguments or used as a shell. To quickly install the three available engines run the below commands: .. code:: $ cd FIRST-server/server/utilities - $ python engine_shell.py install first.engines.exact_match ExactMatchEngine - $ python engine_shell.py install first.engines.mnemonic_hash MnemonicHashEngine - $ python engine_shell.py install first.engines.basic_masking BasicMaskingEngine + $ python engine_shell.py install first_core.engines.exact_match ExactMatchEngine + $ python engine_shell.py install first_core.engines.mnemonic_hash MnemonicHashEngine + $ python engine_shell.py install first_core.engines.basic_masking BasicMaskingEngine Once an engine is installed you can start using your FIRST installation to add and/or query for annotations. Without engines FIRST will still be able to store annotations, but will never return any results for query operations. diff --git a/server/first_core/auth.py b/server/first_core/auth.py index 756b7e6..9cd474d 100644 --- a/server/first_core/auth.py +++ b/server/first_core/auth.py @@ -75,7 +75,7 @@ def decorated_function(*args, **kwargs): if key: user = verify_api_key(key) del kwargs['api_key'] - if user: + if user and user.active: kwargs['user'] = user return view_function(*args, **kwargs) diff --git a/server/first_core/disassembly/__init__.py b/server/first_core/disassembly/__init__.py index 34a6c1b..4bf79de 100644 --- a/server/first_core/disassembly/__init__.py +++ b/server/first_core/disassembly/__init__.py @@ -15,7 +15,7 @@ 'intel16' : (CS_ARCH_X86, CS_MODE_16), 'sysz' : (CS_ARCH_SYSZ, None), 'arm32' : (CS_ARCH_ARM, CS_MODE_ARM), - 'intel32' : (CS_ARCH_X86, CS_MODE_ARM), + 'intel32' : (CS_ARCH_X86, CS_MODE_32), 'intel64' : (CS_ARCH_X86, CS_MODE_64), 'sparc' : (CS_ARCH_SPARC, None), 'arm64' : (CS_ARCH_ARM64, CS_MODE_ARM), @@ -59,6 +59,48 @@ 'mips' : MIPS_OP_INVALID, 'mips64' : MIPS_OP_INVALID } +_call_mapping = { + 'ppc' : [], + 'sysz' : [], + 'x86' : [X86_INS_CALL], + 'sysz' : [], + 'sparc' : [], + 'arm' : [], + 'arm64' : [], + 'mips' : [] +} +call_mapping = { + 'ppc' : _call_mapping['ppc'], + 'ppc32' : _call_mapping['ppc'], + 'ppc64' : _call_mapping['ppc'], + 'sysz' : _call_mapping['sysz'], + 'intel16' : _call_mapping['x86'], + 'intel32' : _call_mapping['x86'], + 'intel64' : _call_mapping['x86'], + 'sparc' : _call_mapping['sparc'], + 'arm32' : _call_mapping['arm'], 'arm64' : _call_mapping['arm64'], + 'mips' : _call_mapping['mips'], 'mips64' : _call_mapping['mips'] +} + +_jump_mapping = { + 'x86' : [ X86_INS_JA, X86_INS_JAE, X86_INS_JB, X86_INS_JBE, X86_INS_JCXZ, + X86_INS_JE, X86_INS_JECXZ, X86_INS_JG, X86_INS_JGE, X86_INS_JL, + X86_INS_JLE, X86_INS_JMP, X86_INS_JNE, X86_INS_JNO, X86_INS_JNP, + X86_INS_JNS, X86_INS_JO, X86_INS_JP, X86_INS_JRCXZ, X86_INS_JS, + X86_INS_LJMP] +} +jump_mapping = { + 'intel16' : _jump_mapping['x86'], + 'intel32' : _jump_mapping['x86'], + 'intel64' : _jump_mapping['x86'] +} + +stack_offsets = { + 'intel16' : [X86_REG_SP], + 'intel32' : [X86_REG_EBP, X86_REG_ESP], + 'intel64' : [X86_REG_RSP] +} + class Disassembly(object): def __init__(self, architecture, code): @@ -73,30 +115,37 @@ def __init__(self, architecture, code): if architecture in arch_mapping: arch, mode = arch_mapping[architecture] self.md = Cs(arch, mode) - self.md.details = True + self.md.detail = True self.iterator = self.md.disasm(self.code, 0) self.valid = True + + def instructions(self): # When first called function will return cached instructions for i in xrange(len(self.data)): yield self.data[i] # Then iterate through non-cached instructions - if not self.iterator: + if self.iterator: for i in self.iterator: self.data.append(i) yield i self.iterator = None - def _check_mapping(self, mapping, operand): - if ((not hasattr(operand, 'type')) + + def _check_mapping(self, mapping, operand, attr='type', equal=True): + if ((not hasattr(operand, attr)) or (self.architecture not in mapping)): False - return operand.type == mapping[self.architecture] + if equal: + return getattr(operand, attr) == mapping[self.architecture] + + return getattr(operand, attr) in mapping[self.architecture] + # Operand Related Functionality def is_op_reg(self, operand): return self._check_mapping(reg_mapping, operand) @@ -108,3 +157,16 @@ def is_op_imm(self, operand): def is_op_invalid(self, operand): return self._check_mapping(invalid_mapping, operand) + + def is_stack_offset(self, operand): + if not hasattr(operand, 'mem'): + return False + return self._check_mapping(stack_offsets, operand.mem, 'base', False) + + + # Instruction Related functionality + def is_call(self, instr): + return self._check_mapping(call_mapping, instr, 'id', False) + + def is_jump(self, instr): + return self._check_mapping(jump_mapping, instr, 'id', False) diff --git a/server/first_core/engines/basic_masking.py b/server/first_core/engines/basic_masking.py new file mode 100644 index 0000000..f166f7f --- /dev/null +++ b/server/first_core/engines/basic_masking.py @@ -0,0 +1,258 @@ +#------------------------------------------------------------------------------- +# +# FIRST Engine: Basic Masking +# Author: Angel M. Villegas (anvilleg@cisco.com) +# Last Modified: August 2017 +# +# Uses Capstone to obtain instructions and then removes certain instruction +# details to normalize it into a standard form to be compared to other +# functions. +# +# Masks out: +# - ESP/EBP Offsets +# - Absolute Calls?? +# - Global Offsets?? +# +# Requirements +# ------------ +# - Capstone +# +# Installation +# ------------ +# None +# +#------------------------------------------------------------------------------- + +# Python Modules +import re +from hashlib import sha256 + +# FIRST Modules +from first_core.error import FIRSTError +from first_core.engines import AbstractEngine +from first_core.engines.results import FunctionResult + +# Third Party Modules +from capstone import * +from django.db import models +from django.core.exceptions import ObjectDoesNotExist + +MIN_REQUIRED_INSTRUCTIONS = 8 + +class BasicMasking(models.Model): + sha256 = models.CharField(max_length=64) + architecture = models.CharField(max_length=64) + + total_bytes = models.IntegerField() + functions = models.ManyToManyField('BasicMaskingFunction') + + class Meta: + app_label = 'engines' + index_together = ('sha256', 'architecture') + unique_together = ('sha256', 'architecture', 'total_bytes') + + def dump(self): + return {'sha256' : self.sha256, + 'architecture' : self.architecture, + 'total_bytes' : self.total_bytes, + 'functions' : self.functions.all()} + +class BasicMaskingFunction(models.Model): + func = models.BigIntegerField() + + class Meta: + app_label = 'engines' + + +class BasicMaskingEngine(AbstractEngine): + _name = 'BasicMasking' + _description = ('Masks calls/jmps offsets. Requires at least 8 instructions.') + _required_db_names = ['first_db'] + + def normalize(self, disassembly): + if not disassembly: + return (0, None) + + changed_bytes = 0 + + try: + normalized = [] + original = [] + for i in disassembly.instructions(): + original.append(str(i.bytes).encode('hex')) + instr = ''.join(chr(x) for x in i.opcode if x) + + # Special mnemonic masking (Call, Jmp, JCC) + if disassembly.is_call(i) or disassembly.is_jump(i): + operand = i.op_str + + if disassembly.is_op_imm(i.operands[0]): + changed_bytes += len(i.bytes) - len(instr) + + # TODO: Add capability to mask off stack reg for more + # than Intel + #elif (disassembly.is_op_mem(i.operands[0]) + # and disassembly.is_stack_offset(i.operands[0])): + # instr += i.reg_name(i.operands[0].value.reg) + # #changed_bits += i.operands[0].dispSize + else: + instr += ''.join(chr(x) for x in i.bytes[len(instr):]) + + normalized.append(instr) + continue + + else: + normalized.append(str(i.bytes)) + + ''' + # Below code is from Distorm3 version + # TODO: Migrate to and understand how to accomplish in Capstone + operand_instrs = [] + for operand_obj in i.operands: + # TODO + #operand = operand_obj._toText() + if ((re.match('^\[E(S|B)P', operand) or re.match('^\[R(I|S)P', operand)) + and operand_obj.dispSize): + # Offset from EBP/ESP and RIP/RSP + masked = operand.replace(hex(operand_obj.disp), '0x') + operand_instrs.append(masked) + changed_bits += operand_obj.dispSize + + elif 'Immediate' == operand_obj.type: + value = operand_obj.value + # Masking off immediates within the standard VA of the sample + if ((0x400000 <= value <= 0x500000) + or (0x10000000 <= value <= 0x20000000) + or (0x1C0000000 <= value <= 0x1D0000000) + or (0x140000000 <= value <= 0x150000000)): + operand_instrs.append('0x') + changed_bits += operand_obj.size + + else: + operand_instrs.append(operand) + + elif 'AbsoluterMemoryAddress' == operand_obj.type: + operand_instrs.append('0x') + changed_bits += operand_obj.dispSize + + elif 'AbsoluteMemory' == operand_obj.type: + masked = operand.replace(hex(operand_obj.disp), '0x') + operand_instrs.append(masked) + changed_bits += operand_obj.dispSize + + else: + operand_instrs.append(operand) + + normalized.append(instr + ', '.join(operand_instrs)) + ''' + + print 'Original' + print original + print 'Normalized' + print [x.encode('hex') for x in normalized] + + if MIN_REQUIRED_INSTRUCTIONS > len(normalized): + print 145 + return (0, None) + + h_sha256 = sha256(''.join(normalized)).hexdigest() + print (changed_bytes, h_sha256) + return (changed_bytes, h_sha256) + + except Exception as e: + print 160, e + + return (0, None) + + def _add(self, function): + ''' + Masks specific details from the disassembly to provide a fuzzy hash. + ''' + opcodes_size = len(function['opcodes']) + architecture = function['architecture'] + disassembly = function.get('disassembly') + changed, h_sha256 = self.normalize(disassembly) + + if not h_sha256: + return + + + try: + db_obj = BasicMasking.objects.get(sha256=h_sha256, + architecture=architecture) + except ObjectDoesNotExist: + db_obj = BasicMasking.objects.create(sha256=h_sha256, + architecture=architecture, + total_bytes=opcodes_size) + + function_id = function['id'] + count = BasicMasking.objects.filter(sha256=h_sha256, + architecture=architecture, + functions__func=function_id).count() + + if not count: + func, _ = BasicMaskingFunction.objects.get_or_create(func=function_id) + db_obj.functions.add(func) + + def _scan(self, opcodes, architecture, apis, disassembly): + '''Returns List of tuples (function ID, similarity percentage)''' + db = self._dbs['first_db'] + changed, h_sha256 = self.normalize(disassembly) + + if not h_sha256: + return + + try: + db_obj = BasicMasking.objects.get(sha256=h_sha256, + architecture=architecture) + except ObjectDoesNotExist: + return None + + + results = [] + for f in db_obj.functions.all(): + similarity = 75.0 + function_id = f.func + function = db.find_function(_id=function_id) + + if (not function) or (not function.metadata.count()): + continue + + # Similarity = 90% (opcodes and the masking changes) + # + 10% (api overlap) + similarity = 100 - ((changed / (len(opcodes) * 8.0)) * 100) + if similarity > 90.0: + similarity = 90.0 + + # The APIs will count up to 10% of the similarity score + total_apis = function.apis.count() + if total_apis: + func_apis = {x['api'] for x in function.apis.values('api')} + overlap = float(len(func_apis.intersection(apis))) + similarity += (overlap / total_apis) * 10 + + results.append(FunctionResult(function_id, similarity)) + + return results + + def _install(self): + try: + from django.core.management import execute_from_command_line + except ImportError: + # The above import may fail for some other reason. Ensure that the + # issue is really that Django is missing to avoid masking other + # exceptions on Python 2. + try: + import django + except ImportError: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) + raise + execute_from_command_line(['manage.py', 'makemigrations', 'engines']) + execute_from_command_line(['manage.py', 'migrate', 'engines']) + + def _uninstall(self): + print 'Manually delete tables associated with {}'.format(self.engine_name) diff --git a/server/utilities/engine_shell.py b/server/utilities/engine_shell.py index 0a9973d..80059a1 100644 --- a/server/utilities/engine_shell.py +++ b/server/utilities/engine_shell.py @@ -246,7 +246,7 @@ def do_populate(self, line): engines = [] for engine_name in populate_engines: if engine_name not in all_engines: - print '[Error] Engine "{}" is not installed' + print '[Error] Engine "{}" is not installed'.format(engine_name) continue engines.append(all_engines[engine_name]) diff --git a/server/utilities/populate_engine.py b/server/utilities/populate_engine.py deleted file mode 100644 index 0417b4d..0000000 --- a/server/utilities/populate_engine.py +++ /dev/null @@ -1,90 +0,0 @@ -#------------------------------------------------------------------------------- -# -# Sends all function data to engine for it to be processed by engine -# Copyright (C) 2016 Angel M. Villegas -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Requirements -# ------------ -# Flask's mongoengine (https://pypi.python.org/pypi/flask-mongoengine/) -# -#------------------------------------------------------------------------------- - -# Python Modules -from argparse import ArgumentParser - -# FIRST Modules -from first_core import EngineManager, DBManager - -def main(): - global total, completed, operation_complete - - parser = ArgumentParser(description='Populate engine\'s metadata.') - parser.add_argument('engines', metavar='E', type=str, nargs='+', - help='an engine name to populate') - - args = parser.parse_args() - - db = DBManager.first_db - if not db: - print '[Error] Unable to connect to FIRST DB, exiting...' - return - - # Get all engines the user entered - all_engines = EngineManager.get_engines() - engines = [] - for engine_name in args.engines: - if engine_name not in all_engines: - print '[Error] Engine "{}" is not installed' - continue - - engines.append(all_engines[engine_name]) - - if not engines: - print 'No engines to populate, exiting...' - return - - print 'Starting to populate engines:\n-\t{}'.format('\n-\t'.join([e.name for e in engines])) - functions = db.get_all_functions() - total = len(functions) - - msg = ' [Status] {0:.2f}% Completed ({1} out of {2})' - errors = [] - i = 0.0 - for function in functions: - details = function.dump() - del details['metadata'] - - for engine in engines: - try: - engine.add(details) - - except Exception as e: - msg = '[Error] Engine "{}": {}'.format(engine.name, e) - errors.append(msg) - print msg - - i += 1 - if 0 == (i % 25): - print msg.format((i / total) * 100, int(i), total) - - # Wait for thread to end - print 'Populating engines complete, exiting...' - if errors: - print 'The below errors occured:\n{}'.format('\n'.join(errors)) - -if __name__ == '__main__': - main() diff --git a/server/utilities/user_shell.py b/server/utilities/user_shell.py new file mode 100644 index 0000000..a10ad27 --- /dev/null +++ b/server/utilities/user_shell.py @@ -0,0 +1,232 @@ +#! /usr/bin/python +#------------------------------------------------------------------------------- +# +# Utility Shell to manage Engine related operations +# Copyright (C) 2016 Angel M. Villegas +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +#------------------------------------------------------------------------------- +# Python Modules +import re +import os +import sys +from cmd import Cmd +from uuid import uuid4 +from pprint import pprint +from argparse import ArgumentParser + +# Add app package to sys path +sys.path.append(os.path.abspath('..')) + +# FIRST Modules +import first.wsgi +import first.settings +from first_core.engines import AbstractEngine +from first_core.disassembly import Disassembly +from first_core import DBManager, EngineManager +from first_core.models import Engine, User, Function + +# Third Party Modules +from django.core.paginator import Paginator + +class EngineCmd(Cmd): + + def __init__(self): + Cmd.__init__(self) + self.prompt = 'FIRST>> ' + + def emptyline(self): + '''Prevent the resubmission of the last command''' + return + + def default(self, line): + print '"{}" is unknown command'.format(line) + + def preloop(self): + print ( '\n\n' + '+========================================================+\n' + '| FIRST User Shell Menu |\n' + '+========================================================+\n' + '| list | List all engines currently installed |\n' + '| info | Get info on an engine |\n' + '| adduser | Registers a user manually |\n' + '| enable | Enable engine (Engine will be enabled) |\n' + '| disable | Disable user account |\n' + '+--------------------------------------------------------+\n') + + def postcmd(self, stop, line): + if not stop: + self.preloop() + return stop + + def do_back(self, line): + '''Step out of current shell''' + return 1 + + def do_exit(self, line): + '''Exit shell''' + sys.exit(0) + + def do_quit(self, line): + '''Exit shell''' + sys.exit(0) + + def do_shell(self, line): + '''Run line in python''' + exec line + +class RootCmd(EngineCmd): + def do_list(self, line): + print 'list - List all registered users' + if line in ['help', '?']: + print 'Usage: list \n' + return + + print 'Registered Users\n' + if User.objects.count() == 0: + print 'No users are registered' + return + + header = ( '+{}+{}+\n'.format('-' * 39, '-' * 10) + + '| {0:^37} | {1:^8} |\n'.format('User Handle', 'Active') + + '+{}+{}+'.format('-' * 39, '-' * 10)) + i = 0 + for user in User.objects.all(): + handle = user.user_handle + if (i % 15) == 0: + print header + print '| {0:37} | {1:^8} |'.format(handle, user.active) + i += 1 + + print '+{}+{}+'.format('-' * 39, '-' * 10) + + def do_adduser(self, line): + print 'info - Manually add user to FIRST' + if line in ['', 'help', '?']: + print 'Usage: adduser ' + return + + line = line.split(' ') + if len(line) !=2: + print 'The correct arguments were not provided.' + return + + # Verify handle provided is valid + handle, num = self._expand_user_handle(line[0]) + if None in [handle, num]: + return + + if not re.match(r'^[a-zA-Z\d\._]+@[a-zA-Z\d\.\-_]+(?:\.[a-zA-Z]{2,4})+$', line[1]): + print 'Invalid email provided.' + return + + email = line[1] + user = self._get_db_user_obj(line[0]) + if user: + print 'User {} already exists'.format(line[0]) + return + + user = User(email=email, handle=handle, number=num, api_key=uuid4()) + user.name = raw_input('Enter user name: ') + user.save() + + print 'User {0.user_handle} created (api key: {0.api_key})'.format(user) + + + def do_info(self, line): + print 'info - Displays details about a registered User' + if line in ['', 'help', '?']: + print 'Usage: info ' + return + + user = self._get_db_user_obj(line) + if not user: + return + + print ('+' + '-'*65 + '+\n' + '| Name | {0.name:53} |\n' + '+' + '-'*9 + '+' + '-'*55 + '\n' + '| Email | {0.email:53} |\n' + '+' + '-'*9 + '+' + '-'*55 + '\n' + '| Handle | {0.user_handle:53} |\n' + '+' + '-'*9 + '+' + '-'*55 + '\n' + '| Created | {1:53} |\n' + '+' + '-'*9 + '+' + '-'*55 + '\n' + '| Active | {0.active:53} |\n' + '+' + '-'*69 + '+\n').format(user, str(user.created)) + + def do_enable(self, line): + print 'enable - Enable user \n' + if line in ['', 'help', '?']: + print 'Usage: enable ' + return + + user = self._get_db_user_obj(line) + if not user: + return + + user.active = True + user.save() + print 'User "{}" enabled'.format(line) + + def do_disable(self, line): + print 'disable - Disable user \n' + if line in ['', 'help', '?']: + print 'Usage: disable ' + return + + user = self._get_db_user_obj(line) + if not user: + return + + user.active = False + user.save() + print 'User "{}" disabled'.format(line) + + def _expand_user_handle(self, user_handle): + matches = re.match('^([^#]+)#(\d{4})$', user_handle) + if not matches: + print 'The provided handle is invalid' + return (None, None) + + handle, num = matches.groups() + return (handle, int(num)) + + + def _get_db_user_obj(self, line): + handle, num = self._expand_user_handle(line) + if None in [handle, num]: + return + + user = User.objects.filter(handle=handle, number=int(num)) + if not user: + print 'Unable to locate User handle "{}"'.format(line) + return + + return user.get() + + +if __name__ == '__main__': + shell = RootCmd() + if len(sys.argv) > 1: + shell.onecmd(' '.join(sys.argv[1:])) + sys.exit(0) + + while 1: + try: + shell.cmdloop() + except Exception as err: + pprint(err) diff --git a/server/www/templates/www/base_site.html b/server/www/templates/www/base_site.html index 01d4e26..8b1b6f8 100644 --- a/server/www/templates/www/base_site.html +++ b/server/www/templates/www/base_site.html @@ -154,7 +154,7 @@
- Closed Beta + Beta
@@ -425,7 +425,7 @@

Install

Documentation

- Check out our Docs. + Check out our Docs (Server - IDA Plugin).

From c5afac8740b81bd5c57cbde47acf3e0b0b85e804 Mon Sep 17 00:00:00 2001 From: demonduck Date: Fri, 1 Sep 2017 20:00:20 -0400 Subject: [PATCH 08/17] adding forgotten migrations folder --- server/engines/migrations/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 server/engines/migrations/__init__.py diff --git a/server/engines/migrations/__init__.py b/server/engines/migrations/__init__.py new file mode 100644 index 0000000..e69de29 From 70cdd9ea77a9c412521949accc27f1e52affe0b8 Mon Sep 17 00:00:00 2001 From: demonduck Date: Mon, 11 Sep 2017 08:49:29 -0400 Subject: [PATCH 09/17] Code cleanup --- install/requirements.txt | 3 +-- server/first_core/engines/basic_masking.py | 7 ------- server/utilities/user_shell.py | 18 ++++++++---------- 3 files changed, 9 insertions(+), 19 deletions(-) diff --git a/install/requirements.txt b/install/requirements.txt index 3bb0125..0831050 100644 --- a/install/requirements.txt +++ b/install/requirements.txt @@ -1,8 +1,7 @@ mysqlclient -mongoengine django werkzeug -distorm3 +capstone httplib2 oauth2client google-api-python-client diff --git a/server/first_core/engines/basic_masking.py b/server/first_core/engines/basic_masking.py index f166f7f..46c88cd 100644 --- a/server/first_core/engines/basic_masking.py +++ b/server/first_core/engines/basic_masking.py @@ -146,17 +146,11 @@ def normalize(self, disassembly): normalized.append(instr + ', '.join(operand_instrs)) ''' - print 'Original' - print original - print 'Normalized' - print [x.encode('hex') for x in normalized] - if MIN_REQUIRED_INSTRUCTIONS > len(normalized): print 145 return (0, None) h_sha256 = sha256(''.join(normalized)).hexdigest() - print (changed_bytes, h_sha256) return (changed_bytes, h_sha256) except Exception as e: @@ -176,7 +170,6 @@ def _add(self, function): if not h_sha256: return - try: db_obj = BasicMasking.objects.get(sha256=h_sha256, architecture=architecture) diff --git a/server/utilities/user_shell.py b/server/utilities/user_shell.py index a10ad27..c472eab 100644 --- a/server/utilities/user_shell.py +++ b/server/utilities/user_shell.py @@ -1,7 +1,7 @@ #! /usr/bin/python #------------------------------------------------------------------------------- # -# Utility Shell to manage Engine related operations +# Utility Shell to manage User related operations # Copyright (C) 2016 Angel M. Villegas # # This program is free software; you can redistribute it and/or modify @@ -34,15 +34,13 @@ # FIRST Modules import first.wsgi import first.settings -from first_core.engines import AbstractEngine from first_core.disassembly import Disassembly -from first_core import DBManager, EngineManager -from first_core.models import Engine, User, Function +from first_core.models import User # Third Party Modules from django.core.paginator import Paginator -class EngineCmd(Cmd): +class UserCmd(Cmd): def __init__(self): Cmd.__init__(self) @@ -60,10 +58,10 @@ def preloop(self): '+========================================================+\n' '| FIRST User Shell Menu |\n' '+========================================================+\n' - '| list | List all engines currently installed |\n' - '| info | Get info on an engine |\n' + '| list | List all users currently installed |\n' + '| info | Get info on an user |\n' '| adduser | Registers a user manually |\n' - '| enable | Enable engine (Engine will be enabled) |\n' + '| enable | Enable user |\n' '| disable | Disable user account |\n' '+--------------------------------------------------------+\n') @@ -88,7 +86,7 @@ def do_shell(self, line): '''Run line in python''' exec line -class RootCmd(EngineCmd): +class RootCmd(UserCmd): def do_list(self, line): print 'list - List all registered users' if line in ['help', '?']: @@ -166,7 +164,7 @@ def do_info(self, line): '| Created | {1:53} |\n' '+' + '-'*9 + '+' + '-'*55 + '\n' '| Active | {0.active:53} |\n' - '+' + '-'*69 + '+\n').format(user, str(user.created)) + '+' + '-'*65 + '+\n').format(user, str(user.created)) def do_enable(self, line): print 'enable - Enable user \n' From b86049a3fc25a46bddcfdb2f05964380adbd15d3 Mon Sep 17 00:00:00 2001 From: demonduck Date: Wed, 13 Sep 2017 11:58:55 -0400 Subject: [PATCH 10/17] bug fixes --- docs/conf.py | 3 +-- server/first_core/auth.py | 13 +++++++------ server/first_core/dbs/builtin_db.py | 6 ++---- server/www/views.py | 2 +- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 05ddb4f..20738bf 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,8 +42,7 @@ # Autodoc settings autoclass_content = 'class' autodoc_mock_imports = ['httplib2', 'oauth2client', 'apiclient', 'mongoengine', - 'django', 'bson', 'distorm3', 'mongoengine.queryset', - 'bson.objectid'] + 'django', 'capstone', 'mongoengine.queryset'] class _MyMockModule(sphinx.ext.autodoc._MockModule): '''Class created to get around autodoc issues with server's dependencies.''' diff --git a/server/first_core/auth.py b/server/first_core/auth.py index 9cd474d..cc4b775 100644 --- a/server/first_core/auth.py +++ b/server/first_core/auth.py @@ -32,12 +32,14 @@ from functools import wraps # Django Modules +from django.core.exceptions import ObjectDoesNotExist from django.http import HttpResponse, HttpRequest from django.shortcuts import render, redirect from django.urls import reverse # FIRST Modules # TODO: Use DBManager to get user objects and do User operations +from first.settings import CONFIG from first_core.models import User from first_core.error import FIRSTError @@ -45,7 +47,6 @@ import httplib2 from oauth2client import client from apiclient import discovery -from mongoengine.queryset import DoesNotExist @@ -118,7 +119,7 @@ class Authentication(): def __init__(self, request): self.request = request redirect_uri = request.build_absolute_uri(reverse('www:oauth', kwargs={'service' : 'google'})) - secret = os.environ.get('GOOGLE_SECRET', '/usr/local/etc/google_secret.json') + secret = CONFIG.get('oauth_path', '/usr/local/etc/google_secret.json') try: self.flow = {'google' : client.flow_from_clientsecrets(secret, scope=['https://www.googleapis.com/auth/userinfo.profile', @@ -200,7 +201,7 @@ def login_step_2(self, auth_code, url, login=True): return redirect(url) - except DoesNotExist: + except ObjectDoesNotExist: self.request.session.flush() raise FIRSTAuthError('User is not registered.') @@ -236,7 +237,7 @@ def register_user(self): user = None continue - except DoesNotExist: + except ObjectDoesNotExist: pass # Create random 4 digit value for the handle @@ -248,7 +249,7 @@ def register_user(self): user = User.objects.get(handle=handle, number=num) user = None - except DoesNotExist: + except ObjectDoesNotExist: user = User(name=name, email=email, api_key=api_key, @@ -269,5 +270,5 @@ def get_user_data(email): user = User.objects.get(email=email) return user - except DoesNotExist: + except ObjectDoesNotExist: return None diff --git a/server/first_core/dbs/builtin_db.py b/server/first_core/dbs/builtin_db.py index 4bfd76a..e140b9d 100644 --- a/server/first_core/dbs/builtin_db.py +++ b/server/first_core/dbs/builtin_db.py @@ -32,8 +32,6 @@ from hashlib import md5 # Third Party Modules -import bson - from django.utils import timezone from django.core.paginator import Paginator from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned @@ -161,14 +159,14 @@ def get_function(self, opcodes, architecture, apis, create=False, **kwargs): try: function = Function.objects.get(sha256=sha256_hash, - opcodes=bson.Binary(opcodes), + opcodes=opcodes, architecture=architecture) #, #apis__api=apis) except ObjectDoesNotExist: if create: # Create function and add it to sample function = Function.objects.create( sha256=sha256_hash, - opcodes=bson.Binary(opcodes), + opcodes=opcodes, architecture=architecture) apis_ = [FunctionApis.objects.get_or_create(x)[0] for x in apis] diff --git a/server/www/views.py b/server/www/views.py index 6fb7458..089658d 100644 --- a/server/www/views.py +++ b/server/www/views.py @@ -38,7 +38,7 @@ def profile(request): if not user: return redirect(reverse('www:index')) - count = Function.objects(metadata__user=user).count() + count = Function.objects.filter(metadata__user=user).count() data = {'title' : 'Profile', 'user' : user.dump(True), 'metadata_count' : count} From ac5bd964a40022bf9bd68677937ee9cd25a52850 Mon Sep 17 00:00:00 2001 From: demonduck Date: Wed, 13 Sep 2017 12:22:47 -0400 Subject: [PATCH 11/17] removing auto imports no longer used --- docs/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 20738bf..ce9f27d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -41,8 +41,8 @@ # Autodoc settings autoclass_content = 'class' -autodoc_mock_imports = ['httplib2', 'oauth2client', 'apiclient', 'mongoengine', - 'django', 'capstone', 'mongoengine.queryset'] +autodoc_mock_imports = ['httplib2', 'oauth2client', 'apiclient', + 'django', 'capstone'] class _MyMockModule(sphinx.ext.autodoc._MockModule): '''Class created to get around autodoc issues with server's dependencies.''' From 56f78a886cce98e4410c13a46826e7a8f3e837e2 Mon Sep 17 00:00:00 2001 From: demonduck Date: Mon, 30 Oct 2017 11:11:03 -0400 Subject: [PATCH 12/17] web ui update Removed login button to prevent user confusion --- server/www/templates/www/base_site.html | 1 - 1 file changed, 1 deletion(-) diff --git a/server/www/templates/www/base_site.html b/server/www/templates/www/base_site.html index 8b1b6f8..c5127b0 100644 --- a/server/www/templates/www/base_site.html +++ b/server/www/templates/www/base_site.html @@ -563,7 +563,6 @@ From 3fcb2e3720c2f22b86f81e55dec3d829f0e03ee0 Mon Sep 17 00:00:00 2001 From: demonduck Date: Wed, 13 Dec 2017 10:05:12 -0500 Subject: [PATCH 13/17] fix for collapsing results and updated docs --- docs/index.rst | 13 ++++++++++--- server/first_core/engines/basic_masking.py | 2 +- server/first_core/engines/mnemonic_hash.py | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 0e29530..5c531e0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -17,9 +17,16 @@ Installing your own FIRST server can be quick and easy with an Ubuntu machine an **After cloning the Git repo** - Save your google auth json information to install/google_secret.json. To generate a google_secret.json file you will need to go to https://console.developers.google.com, create a project, select the project, select Credentials in the left set of links under APIs & services. Once selected, select the Create credentials drop down menu and click OAuth client ID. Select Web application, and fill out the details. Once created you will have the option to down the JSON file containing the generated secret. + Save your google auth json information to install/google_secret.json. To generate a google_secret.json file you will need to go to https://console.developers.google.com, create a project, select the project, select Credentials in the left set of links under APIs & services. Once selected, select the Create credentials drop down menu and click OAuth client ID. Select Web application, and fill out the details. Set the Authorized redirect URIs to your server name with `/oauth/google` - Optionally, you can add install/ssl/apache.crt and apache.key file if you have an SSL certificate you would prefer to use. + Examples + + .. code:: + + http://localhost:8888/oauth/google + http://first.talosintelligence.com/oauth/google + + Once created you will have the option to down the JSON file containing the generated secret. Optionally, you can add install/ssl/apache.crt and apache.key file if you have an SSL certificate you would prefer to use. .. code:: @@ -37,7 +44,7 @@ When the FIRST server is installed, no engines are installed. FIRST comes with t .. code:: $ cd FIRST-server/server/utilities - $ python user_shell.py + $ python user_shell.py adduser Ensure the developer is registered before progressing. diff --git a/server/first_core/engines/basic_masking.py b/server/first_core/engines/basic_masking.py index 46c88cd..75d8097 100644 --- a/server/first_core/engines/basic_masking.py +++ b/server/first_core/engines/basic_masking.py @@ -224,7 +224,7 @@ def _scan(self, opcodes, architecture, apis, disassembly): overlap = float(len(func_apis.intersection(apis))) similarity += (overlap / total_apis) * 10 - results.append(FunctionResult(function_id, similarity)) + results.append(FunctionResult(str(function_id), similarity)) return results diff --git a/server/first_core/engines/mnemonic_hash.py b/server/first_core/engines/mnemonic_hash.py index 5829205..63bc636 100644 --- a/server/first_core/engines/mnemonic_hash.py +++ b/server/first_core/engines/mnemonic_hash.py @@ -141,7 +141,7 @@ def _scan(self, opcodes, architecture, apis, disassembly): else: similarity += 5 - results.append(FunctionResult(function_id, similarity)) + results.append(FunctionResult(str(function_id), similarity)) return results From 4fadc483348d0e8601549addf52ba611ddb64fc8 Mon Sep 17 00:00:00 2001 From: demonduck Date: Mon, 23 Apr 2018 08:10:15 -0400 Subject: [PATCH 14/17] fix for ubuntu/mysql client connection issue --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index f930b38..c97df40 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,7 +30,7 @@ services: - ./data/mongodb:/data/db restart: always mysql: - image: mysql + image: mysql:5.7 expose: - 3306 environment: From 7d1f021b031bcd4c1049bb51286dccae9bf1cd80 Mon Sep 17 00:00:00 2001 From: xabiugarte Date: Mon, 11 Feb 2019 11:49:40 -0500 Subject: [PATCH 15/17] Use the Google Userinfo API instead of Google Plus --- server/first_core/auth.py | 10 ++++------ server/www/migrations/0001_initial.py | 6 +++--- server/www/models.py | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/server/first_core/auth.py b/server/first_core/auth.py index cc4b775..bd05ba9 100644 --- a/server/first_core/auth.py +++ b/server/first_core/auth.py @@ -179,12 +179,10 @@ def login_step_2(self, auth_code, url, login=True): if not oauth.access_token_expired: http_auth = oauth.authorize(httplib2.Http()) - service = discovery.build('plus', 'v1', http_auth) - info = service.people().get(userId='me', fields='displayName,emails') - info = info.execute() - email = info['emails'][0]['value'] - self.request.session['info'] = {'name' : info['displayName'], - 'email' : email} + service = discovery.build('oauth2', 'v2', http_auth) + response = service.userinfo().v2().me().get().execute() + self.request.session['info'] = {'name' : response['name'], + 'email' : response['email']} expires = credentials['id_token']['exp'] #expires = datetime.datetime.fromtimestamp(expires) diff --git a/server/www/migrations/0001_initial.py b/server/www/migrations/0001_initial.py index c272eed..6d695b7 100644 --- a/server/www/migrations/0001_initial.py +++ b/server/www/migrations/0001_initial.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Generated by Django 1.11.4 on 2017-08-25 16:11 +# Generated by Django 1.11.18 on 2019-02-11 15:48 from __future__ import unicode_literals from django.db import migrations, models @@ -29,7 +29,7 @@ class Migration(migrations.Migration): fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('name', models.CharField(max_length=16, unique=True)), - ('description', models.CharField(max_length=128)), + ('description', models.CharField(max_length=256)), ('path', models.CharField(max_length=256)), ('obj_name', models.CharField(max_length=32)), ('active', models.BooleanField(default=False)), @@ -109,7 +109,7 @@ class Migration(migrations.Migration): ('rank', models.BigIntegerField(default=0)), ('active', models.BooleanField(default=True)), ('service', models.CharField(max_length=16)), - ('auth_data', models.CharField(max_length=4096)), + ('auth_data', models.CharField(max_length=32768)), ], options={ 'db_table': 'User', diff --git a/server/www/models.py b/server/www/models.py index be8710c..a0efb4a 100644 --- a/server/www/models.py +++ b/server/www/models.py @@ -41,7 +41,7 @@ class User(models.Model): active = models.BooleanField(default=True) service = models.CharField(max_length=16) - auth_data = models.CharField(max_length=4096) + auth_data = models.CharField(max_length=32768) @property def user_handle(self): From 01c5aaf0c228166c7839f78e4dd307a6cf2d90b9 Mon Sep 17 00:00:00 2001 From: xabiugarte Date: Wed, 27 Feb 2019 06:44:55 -0500 Subject: [PATCH 16/17] Added MongoDB to MySQL migration scripts --- server/utilities/migrate_data.py | 245 ++++++++++++++++++++++++++++++ server/utilities/mysql_import.sql | 66 ++++++++ 2 files changed, 311 insertions(+) create mode 100644 server/utilities/migrate_data.py create mode 100644 server/utilities/mysql_import.sql diff --git a/server/utilities/migrate_data.py b/server/utilities/migrate_data.py new file mode 100644 index 0000000..e3637fe --- /dev/null +++ b/server/utilities/migrate_data.py @@ -0,0 +1,245 @@ +#!/usr/bin/python +import json +import os +import base64 +import binascii +import argparse + +def main(prefix): + + # Declarations for ID counters + function_ids = {} + function_id_counter = 1 + + apis_ids = {} + apis_id_counter = 1 + + user_ids = {} + user_id_counter = 1 + + sample_ids = {} + sample_id_counter = 1 + + engine_ids = {} + engine_id_counter = 1 + + metadata_details_ids = {} + metadata_details_id_counter = 1 + + metadata_id_counter = 1 + + # Collections used to keep temporary data + applied_metadata_temp = [] + + # User + with open(os.path.join(prefix, "user.json"), "r") as f: + with open(os.path.join(prefix, "User"), "w") as f_out: + for l in f: + d = json.loads(l.strip()) + f_out.write("0|%s|%s|%s|%d|%s|%s|%d|%d|%s|%s\n" % (d['name'], + d['email'], + d['handle'], + d['number'], + binascii.hexlify(base64.b64decode(d['api_key']["$binary"])).lower(), + str(d['created']['$date'])[:-5] + "Z", + int(d['rank']['$numberLong']), + 1 if d['active'] else 0, + d['service'], + d['auth_data'])) + user_ids[d["_id"]["$oid"]] = user_id_counter + user_id_counter += 1 + + # Functions, Function APIs, Metadata + with open(os.path.join(prefix, "function.json"), "r") as f: + f_FunctionApis = open(os.path.join(prefix, "FunctionApis"), "w") + f_Function = open(os.path.join(prefix, "Function"), "w") + f_Function_apis = open(os.path.join(prefix, "Function_apis"), "w") + f_Metadata = open(os.path.join(prefix, "Metadata"), "w") + f_Function_metadata = open(os.path.join(prefix, "Function_metadata"), "w") + f_MetadataDetails = open(os.path.join(prefix, "MetadataDetails"), "w") + f_Metadata_details = open(os.path.join(prefix, "Metadata_details"), "w") + + # We need to keep track of unique functions, otherwise we might + # insert repeated records in the CSV. + unique_functions = {} + + # Keep track of unique metadata details, to avoid repetitions + unique_metadata_details = {} + + for l in f: + d = json.loads(l.strip()) + + opcodes_text = binascii.hexlify(base64.b64decode(d["opcodes"]["$binary"])).upper() + + if (d['sha256'], d['architecture']) not in unique_functions: + # Add new function + unique_functions[(d['sha256'], d['architecture'])] = function_id_counter + f_Function.write(("0|%s|%s|%s\n") % (d["sha256"], opcodes_text, d["architecture"])) + # Map of function_ids + function_ids[d["_id"]["$oid"]] = function_id_counter + function_id_counter += 1 + else: + # Duplicate function, reuse previous function id, but consider its linked data + function_ids[d["_id"]["$oid"]] = unique_functions[(d['sha256'], d['architecture'])] + #print("Discarding duplicate function... Reusing id %d" % function_ids[d["_id"]["$oid"]]) + + if "apis" in d: + for a in d["apis"]: + if a not in apis_ids: + apis_ids[a] = apis_id_counter + apis_id_counter += 1 + f_FunctionApis.write("0|%s\n" % (a)) + f_Function_apis.write("0|%d|%d\n" % (function_ids[d["_id"]["$oid"]], apis_ids[a])) + + if "metadata" in d: + # 0 - N Metadata records, each record is associated to a User and Function, + # and each Metadata record can be associated to several MetadataDetails. + for m in d["metadata"]: + # Get user id + if "user" in m and "$oid" in m["user"] and m["user"]["$oid"] in user_ids: + user_id = user_ids[m["user"]["$oid"]] + else: + user_id = 0 + + # This is an 1-N relationship between Metadata and User + f_Metadata.write("0|%d\n" % (user_id)) + # This an N-M relationship between Function and Metadata + f_Function_metadata.write("0|%d|%d\n" % (function_ids[d["_id"]["$oid"]], metadata_id_counter)) + + # Store temporarly the Applied relationship (N-M) between User, Metadata, and Sample + # We temporarily store the oid because we don't have the mapped ids yet. + if "applied" in m: + for application in m["applied"]: + applied_metadata_temp.append((metadata_id_counter, application[0], application[1])) + + if metadata_id_counter not in unique_metadata_details: + unique_metadata_details[metadata_id_counter] = [] + + # Metadata details (name, comment, committed, prototype) + nb_details = max(len(m.get("name", [])), len(m.get("comment", [])),len(m.get("committed", [])),len(m.get("prototype", []))) + for i in range(0, nb_details): + name = m["name"][i] if "name" in m and (len(m["name"]) > i) else "" + comment = m["comment"][i] if "comment" in m and (len(m["comment"]) > i) else "" + committed = m["committed"][i]["$date"][:-5] + "Z" if "committed" in m and (len(m["committed"]) > i) else "" + prototype = m["prototype"][i] if "prototype" in m and (len(m["prototype"]) > i) else "" + + # We consider only unique entries. Unique by: name, comment, prototype and metadata_id + # where metadata_id represents each unique (User,Function) tuple. + if (name, comment, prototype) not in unique_metadata_details[metadata_id_counter]: + unique_metadata_details[metadata_id_counter].append((name, comment, prototype)) + f_MetadataDetails.write("0|%s|%s|%s|%s\t\n" % (name, prototype, comment, committed)) + f_Metadata_details.write(("0|%d|%d\n" % (metadata_id_counter, metadata_details_id_counter))) + metadata_details_id_counter += 1 + + metadata_id_counter += 1 + + f_FunctionApis.close() + f_Function.close() + f_Function_apis.close() + f_Metadata.close() + f_Function_metadata.close() + f_MetadataDetails.close() + f_Metadata_details.close() + + # Sample + + sample_seen_by = [] + sample_functions = {} + + with open(os.path.join(prefix, "sample.json"), "r") as f: + f_Sample = open(os.path.join(prefix, "Sample"), "w") + f_Sample_seen_by = open(os.path.join(prefix, "Sample_seen_by"), "w") + + for l in f: + d = json.loads(l.strip()) + + if isinstance(d['crc32'], dict) and "$numberLong" in d['crc32']: + d['crc32'] = int(d['crc32']['$numberLong']) + if not 'sha1' in d: + d['sha1'] = "" + if not 'sha256' in d: + d['sha256'] = "" + + f_Sample.write("0|%s|%d|%s|%s|%s\n" % (d['md5'], d['crc32'], d['sha1'], d['sha256'], str(d['last_seen']['$date'])[:-5] + "Z")) + + # Seen by + for l in d['seen_by']: + if l['$oid'] in user_ids: + f_Sample_seen_by.write("0|%d|%d\n" % (sample_id_counter, user_ids[l['$oid']])) + + if sample_id_counter not in sample_functions: + sample_functions[sample_id_counter] = [] + + # Functions + for l in d['functions']: + if l['$oid'] in function_ids: + if function_ids[l['$oid']] not in sample_functions[sample_id_counter]: + sample_functions[sample_id_counter].append(function_ids[l['$oid']]) + + sample_ids[d["_id"]["$oid"]] = sample_id_counter + sample_id_counter += 1 + + f_Sample.close() + f_Sample_seen_by.close() + + f_Sample_functions = open(os.path.join(prefix, "Sample_functions"), "w") + for sid in sample_functions: + for fid in sample_functions[sid]: + f_Sample_functions.write("0|%d|%d\n" % (sid, fid)) + f_Sample_functions.close() + + # Engine + with open(os.path.join(prefix, "engine.json"), "r") as f: + f_Engine = open(os.path.join(prefix, "Engine"), "w") + for l in f: + d = json.loads(l.strip()) + + if 'developer' in d and '$oid' in d['developer'] and d['developer']['$oid'] in user_ids: + developer_id = user_ids[d['developer']['$oid']] + else: + developer_id = 0 + + f_Engine.write("0|%s|%s|%s|%s|%d|%d\n" % (d['name'], d['description'], d['path'], d['obj_name'], 1 if d['active'] else 0, developer_id)) + engine_ids[d["_id"]["$oid"]] = engine_id_counter + engine_id_counter += 1 + + # Applied metadata + + f = open(os.path.join(prefix, "AppliedMetadata"), "w") + for metadata_id, sample_oid, user_oid in applied_metadata_temp: + f.write("0|%d|%d|%d\n" % (metadata_id, sample_ids[sample_oid], user_ids[user_oid])) + f.close() + +if __name__ == "__main__": + description = """Convert mongoexport generated JSON Files into MySQL import CSV files. + + Expected input files: + + function.json + sample.json + engine.json + user.json + + These files should be generated by running the following commands over the + mongo database: + + mongoexport -d [database name] -c function -o function.json + mongoexport -d [database name] -c sample -o sample.json + mongoexport -d [database name] -c engine -o function.json + mongoexport -d [database name] -c user -o user.json + + Finally, the generated files can be imported into MySQL by running the mysql queries + in mysql_import.sql, from the directory where the output files were generated. + + mysql --user [user] --password --host [host] < /path/to/mysql_import.sql + + WARNING: These MySQL script requires the database tables to be created before-hand: + See FIRST-server documentation to understand how to generate and apply + the corresponding Django migrations. + WARNING: This script handles function duplications, so the number of functions + in the mongo export and the resulting MySQL database might vary. + """ + parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument('path', type=str, help='The path where the input json files (see --help) are located, and where the output files will be generated.') + args = parser.parse_args() + main(args.path) diff --git a/server/utilities/mysql_import.sql b/server/utilities/mysql_import.sql new file mode 100644 index 0000000..0dfaf42 --- /dev/null +++ b/server/utilities/mysql_import.sql @@ -0,0 +1,66 @@ +USE first_db; + +DELETE FROM AppliedMetadata; +ALTER TABLE AppliedMetadata AUTO_INCREMENT = 1; + +DELETE FROM Metadata_details; +ALTER TABLE Metadata_details AUTO_INCREMENT = 1; + +DELETE FROM Function_metadata; +ALTER TABLE Function_metadata AUTO_INCREMENT = 1; + +DELETE FROM MetadataDetails; +ALTER TABLE MetadataDetails AUTO_INCREMENT = 1; + +DELETE FROM Metadata; +ALTER TABLE Metadata AUTO_INCREMENT = 1; + +DELETE FROM Engine; +ALTER TABLE Engine AUTO_INCREMENT = 1; + +DELETE FROM Sample_seen_by; +ALTER TABLE Sample_seen_by AUTO_INCREMENT = 1; + +DELETE FROM Sample_functions; +ALTER TABLE Sample_functions AUTO_INCREMENT = 1; + +DELETE FROM Function_apis; +ALTER TABLE Function_apis AUTO_INCREMENT = 1; + +DELETE FROM FunctionApis; +ALTER TABLE FunctionApis AUTO_INCREMENT = 1; + +DELETE FROM Sample; +ALTER TABLE Sample AUTO_INCREMENT = 1; + +DELETE FROM Function; +ALTER TABLE Function AUTO_INCREMENT = 1; + +DELETE FROM User; +ALTER TABLE User AUTO_INCREMENT = 1; + +LOAD DATA LOCAL INFILE "FunctionApis" INTO TABLE FunctionApis COLUMNS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE "Function" INTO TABLE Function FIELDS TERMINATED BY "|" (id, sha256, @var1, architecture) SET opcodes = UNHEX(@var1); + +LOAD DATA LOCAL INFILE"Function_apis" INTO TABLE Function_apis FIELDS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE "User" INTO TABLE User FIELDS TERMINATED BY "|" (id, name, email, handle, number, api_key, @var1, rank, active, service, auth_data) SET created = STR_TO_DATE(@var1, '%Y-%m-%dT%H:%i:%SZ'); + +LOAD DATA LOCAL INFILE"Sample" INTO TABLE Sample FIELDS TERMINATED BY "|" (id, md5, crc32, sha1, sha256, @var1) SET last_seen = STR_TO_DATE(@var1, '%Y-%m-%dT%H:%i:%SZ'); + +LOAD DATA LOCAL INFILE"Sample_functions" INTO TABLE Sample_functions FIELDS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE"MetadataDetails" INTO TABLE MetadataDetails FIELDS TERMINATED BY "|" LINES TERMINATED BY "\t\n" (id, name, prototype, comment, @var1) SET committed = STR_TO_DATE(@var1, '%Y-%m-%dT%H:%i:%SZ'); + +LOAD DATA LOCAL INFILE"Sample_seen_by" INTO TABLE Sample_seen_by FIELDS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE"Engine" INTO TABLE Engine FIELDS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE"Metadata" INTO TABLE Metadata FIELDS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE"Metadata_details" INTO TABLE Metadata_details FIELDS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE"AppliedMetadata" INTO TABLE AppliedMetadata FIELDS TERMINATED BY "|"; + +LOAD DATA LOCAL INFILE"Function_metadata" INTO TABLE Function_metadata FIELDS TERMINATED BY "|"; From 8ad31bc83a6d31fa8101cffc14cf555290b2c1f0 Mon Sep 17 00:00:00 2001 From: xabiugarte Date: Thu, 28 Feb 2019 04:31:11 -0500 Subject: [PATCH 17/17] Fix for unicode support in user data, in migration script --- server/utilities/migrate_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/utilities/migrate_data.py b/server/utilities/migrate_data.py index e3637fe..a9d3ca0 100644 --- a/server/utilities/migrate_data.py +++ b/server/utilities/migrate_data.py @@ -36,7 +36,7 @@ def main(prefix): with open(os.path.join(prefix, "User"), "w") as f_out: for l in f: d = json.loads(l.strip()) - f_out.write("0|%s|%s|%s|%d|%s|%s|%d|%d|%s|%s\n" % (d['name'], + f_out.write(("0|%s|%s|%s|%d|%s|%s|%d|%d|%s|%s\n" % (d['name'], d['email'], d['handle'], d['number'], @@ -45,7 +45,7 @@ def main(prefix): int(d['rank']['$numberLong']), 1 if d['active'] else 0, d['service'], - d['auth_data'])) + d['auth_data'])).encode('UTF-8')) user_ids[d["_id"]["$oid"]] = user_id_counter user_id_counter += 1