Skip to content

Commit

Permalink
Merge pull request #86 from commial/fix/miasm-new-api
Browse files Browse the repository at this point in the history
Fix/miasm new api
  • Loading branch information
commial authored Feb 13, 2019
2 parents 590d4cf + 7877448 commit a600f4a
Show file tree
Hide file tree
Showing 10 changed files with 81 additions and 84 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ before_script:
# install pyparsing
- "pip install pyparsing"
# install miasm
- "cd ..;git clone https://github.com/cea-sec/miasm miasm && cd miasm;"
- "cd ..;git clone -b 'v0.1.1' https://github.com/cea-sec/miasm miasm && cd miasm;"
- "python setup.py build build_ext -I$(pwd)/../virtualenv/include -L$(pwd)/../virtualenv/tinycc"
- "python setup.py install"
- "cd ..;"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ Installation

### Standard

_Sibyl_ requires at least _Miasm2_ version `67117bf` and the corresponding version of _Elfesteem_.
_Sibyl_ requires at least _Miasm2_ version `v0.1.1` and the corresponding version of _Elfesteem_.
For the `qemu` engine, the `unicorn` python package must be installed (refer to the documentation of Unicorn for more detail).

_Sibyl_ comes as a Python module, and the installation follow the standard procedure:
Expand Down
5 changes: 2 additions & 3 deletions sibyl/actions/learn.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,8 @@ def run(self):
# If function address is not set then use the symbol address
if self.args.address is None:
cont = Container.from_stream(open(self.args.program))
try:
address = cont.symbol_pool[self.args.functionname].offset
except KeyError:
address = cont.loc_db.get_name_offset(self.args.functionname)
if address is None:
raise ValueError("Symbol %s does not exists in %s" % (self.args.functionname, self.args.program))
else:
address = int(self.args.address, 0)
Expand Down
9 changes: 1 addition & 8 deletions sibyl/commons.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def parse_functions(self):
args = {}
for i, param in enumerate(ext.type.args.params):
args_order.append(param.name)
args[param.name] = objc_func.args[i]
args[param.name] = objc_func.args[i][1]

self.functions[func_name] = FuncPrototype(func_name,
objc_func.type_ret,
Expand All @@ -100,13 +100,6 @@ def objc_is_dereferenceable(target_type):
return isinstance(target_type, (ObjCPtr, ObjCArray))


def expr_to_types(c_handler, expr):
"""Return the types of @expr, based on @c_handler knowledge"""
# XXX Temporary bug fix for c_handler.expr_to_types, as the current version
# does not support partial offsets
return [x.ctype for x in c_handler.access_c_gen.get_access(expr).info]


class FuncPrototype(object):
"""Stand for a function's prototype"""

Expand Down
2 changes: 2 additions & 0 deletions sibyl/heuristics/func.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def recursive_call(func_heur, addresses):
# Merge label2block, take care of disassembly order due to cache
for node in cfg_temp.nodes():
label2block.setdefault(node, cfg_temp.loc_key_to_block(node))
# Avoid re-disassembling
mdis.dont_dis.append(loc_db.get_location_offset(node))
log_asmblock.setLevel(cur_log_level)

# Find potential addresses
Expand Down
66 changes: 28 additions & 38 deletions sibyl/learn/findref.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,15 @@
from miasm2.analysis.machine import Machine
from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE, EXCEPT_ACCESS_VIOL, EXCEPT_DIV_BY_ZERO, EXCEPT_PRIV_INSN
from miasm2.core.bin_stream import bin_stream_vm
from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec
from miasm2.analysis.dse import ESETrackModif
import miasm2.expression.expression as m2_expr
from miasm2.ir.ir import AssignBlock
from miasm2.core.objc import CHandler

from sibyl.commons import objc_is_dereferenceable, expr_to_types
from sibyl.commons import objc_is_dereferenceable
from sibyl.config import config


class EmulatedSymbExecWithModif(EmulatedSymbExec):

def __init__(self, *args, **kwargs):
super(EmulatedSymbExecWithModif, self).__init__(*args, **kwargs)
self.modified_exprs = set()

def apply_change(self, dst, src):
self.modified_exprs.add(dst)
super(EmulatedSymbExecWithModif, self).apply_change(dst, src)

def emul_ir_blocks(self, *args, **kwargs):
self.modified_exprs = set()
addr = super(EmulatedSymbExecWithModif, self).emul_ir_blocks(*args, **kwargs)
return addr

class ExtractRef(object):
'''
Class used to concolic run a snapshot and extract references to input
Expand Down Expand Up @@ -99,13 +84,13 @@ def end_func(self, jitter):

def is_pointer(self, expr):
"""Return True if expr may be a pointer"""
target_types = expr_to_types(self.c_handler, expr)
target_types = self.c_handler.expr_to_types(expr)

return any(objc_is_dereferenceable(target_type)
for target_type in target_types)

def is_symbolic(self, expr):
return expr.is_mem() and not expr.arg.is_int()
return expr.is_mem() and not expr.ptr.is_int()

def get_arg_n(self, arg_number):
"""Return the Expression corresponding to the argument number
Expand All @@ -128,7 +113,7 @@ def callback(self, jitter):

# When it is possible, consider only elements modified in the last run
# -> speed up to avoid browsing the whole memory
to_consider = self.symb.modified_exprs
to_consider = self.symb.modified_expr

for symbol in to_consider:
# Do not consider PC
Expand All @@ -138,13 +123,15 @@ def callback(self, jitter):
# Read from ... @NN[... argX ...] ...
symb_value = self.symb.eval_expr(symbol)
to_replace = {}
for expr in m2_expr.ExprAff(symbol,
symb_value).get_r(mem_read=True):
for expr in m2_expr.ExprAssign(
symbol,
symb_value
).get_r(mem_read=True):
if self.is_symbolic(expr):
if isinstance(expr, m2_expr.ExprMem):
if expr.is_mem():
# Consider each byte individually
# Case: @32[X] with only @8[X+1] to replace
addr_expr = expr.arg
addr_expr = expr.ptr
new_expr = []
consider = False
for offset in xrange(expr.size/8):
Expand Down Expand Up @@ -179,7 +166,7 @@ def callback(self, jitter):
if isinstance(symbol, m2_expr.ExprMem):
# Replace only in ptr (case to_replace: @[arg] = 8, expr:
# @[arg] = @[arg])
symbol = m2_expr.ExprMem(self.symb.expr_simp(symbol.arg.replace_expr(to_replace)),
symbol = m2_expr.ExprMem(self.symb.expr_simp(symbol.ptr.replace_expr(to_replace)),
symbol.size)
self.symb.apply_change(symbol, symb_value)

Expand Down Expand Up @@ -210,15 +197,12 @@ def callback(self, jitter):
return True

# Update state
## Reset cache structures
self.mdis.job_done.clear()
self.symb_ir.blocks.clear()

## Update current state
asm_block = self.mdis.dis_bloc(cur_addr)
irblocks = self.symb_ir.add_bloc(asm_block)
self.symb.reset_modified()
asm_block = self.mdis.dis_block(cur_addr)
ircfg = self.symb_ir.new_ircfg()
self.symb_ir.add_asmblock_to_ircfg(asm_block, ircfg)

self.symb.emul_ir_blocks(cur_addr)
self.symb.run_at(ircfg, cur_addr)

return True

Expand All @@ -235,8 +219,8 @@ def prepare_symbexec(self, jitter, return_addr):

# Symbexec engine
## Prepare the symbexec engine
self.symb_ir = self.machine.ir()
self.symb = EmulatedSymbExecWithModif(jitter.cpu, jitter.vm, self.symb_ir, {})
self.symb_ir = self.machine.ir(self.mdis.loc_db)
self.symb = ESETrackModif(jitter.cpu, jitter.vm, self.symb_ir, {})
self.symb.enable_emulated_simplifications()

## Update registers value
Expand All @@ -263,7 +247,10 @@ def prepare_symbexec(self, jitter, return_addr):

# Inject argument
self.init_values = {}
struct_expr_types = {}
# Expr -> set(ObjC types), for Expr -> C
typed_exprs = {}
# Expr name -> ObjC type, for C -> Expr
typed_C_ids = {}
self.args_symbols = []
for i, param_name in enumerate(self.prototype.args_order):
cur_arg_abi = self.get_arg_n(i)
Expand All @@ -274,13 +261,15 @@ def prepare_symbexec(self, jitter, return_addr):
if objc_is_dereferenceable(arg_type):
# Convert the argument to symbol to track access based on it
self.symb.apply_change(cur_arg_abi, cur_arg)
struct_expr_types[cur_arg.name] = arg_type
typed_exprs[cur_arg] = set([arg_type])
typed_C_ids[cur_arg.name] = arg_type
self.args_symbols.append(cur_arg)

# Init Expr <-> C conversion
# Strict access is deliberately not enforced (example: memcpy(struct))
self.c_handler = CHandler(self.types, struct_expr_types,
self.c_handler = CHandler(self.types, typed_exprs,
enforce_strict_access=False)
self.typed_C_ids = typed_C_ids

# Init output structures
self.memories_read = set()
Expand Down Expand Up @@ -330,6 +319,7 @@ def build_references(self):
self.snapshot.memory_out = AssignBlock(memory_out)
self.snapshot.output_value = output_value
self.snapshot.c_handler = self.c_handler
self.snapshot.typed_C_ids = self.typed_C_ids
self.snapshot.arguments_symbols = self.args_symbols
self.snapshot.init_values = self.init_values

Expand Down
Loading

0 comments on commit a600f4a

Please sign in to comment.