Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: angr/pyvex
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: tyb0807/pyvex
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Can’t automatically merge. Don’t worry, you can still create the pull request.
  • 11 commits
  • 4 files changed
  • 1 contributor

Commits on Jun 15, 2017

  1. Copy the full SHA
    37b0239 View commit details
  2. Copy the full SHA
    00131ce View commit details
  3. Copy the full SHA
    7e1fec4 View commit details
  4. Test case for ARM postprocess

    tyb0807 committed Jun 15, 2017
    Copy the full SHA
    91a3a08 View commit details
  5. Copy the full SHA
    8edf5e2 View commit details

Commits on Jun 19, 2017

  1. Copy the full SHA
    1d3e461 View commit details
  2. Copy the full SHA
    086d653 View commit details
  3. Copy the full SHA
    e424e95 View commit details
  4. Test case for ARM postprocess

    tyb0807 committed Jun 19, 2017
    Copy the full SHA
    86e27bc View commit details
  5. Copy the full SHA
    34caaec View commit details

Commits on Jun 20, 2017

  1. Copy the full SHA
    f2c3109 View commit details
Showing with 366 additions and 11 deletions.
  1. +1 −1 pyvex/block.py
  2. +86 −9 pyvex/lift/fixes.py
  3. +1 −1 { → tests}/test.py
  4. +278 −0 tests/test_arm_postprocess.py
2 changes: 1 addition & 1 deletion pyvex/block.py
Original file line number Diff line number Diff line change
@@ -162,7 +162,7 @@ def instructions(self):
"""
The number of instructions in this block
"""
return len([s.addr for s in self.statements if isinstance(s, stmt.IMark)])
return len([s for s in self.statements if isinstance(s, stmt.IMark)])

@property
def size(self):
95 changes: 86 additions & 9 deletions pyvex/lift/fixes.py
Original file line number Diff line number Diff line change
@@ -17,18 +17,95 @@ def _post_process_ARM(self):
# MOV LR, PC
# MOV PC, R8

lr_store_id = None
inst_ctr = 1
for i, stt in reversed(list(enumerate(self.irsb.statements))):
if isinstance(stt, stmt.Put):
if stt.offset == self.irsb.arch.registers['lr'][0]:
lr_store_id = i
# Note that the value of PC is directly used in IRStatements, i.e
# instead of having:
# t0 = GET:I32(pc)
# PUT(lr) = t0
# we have:
# PUT(lr) = 0x10400
# The only case (that I've seen so far) where a temporary variable
# is assigned to LR is:
# t2 = ITE(cond, t0, t1)
# PUT(lr) = t2

pc_holders = {}
lr_store_pc = False
inst_ctr = 0
next_irsb_addr = self.irsb.statements[0].addr + self.irsb.size
for stt in self.irsb.statements:
if type(stt) == stmt.Put:
# LR is modified just before the last instruction of the
# block...
if stt.offset == self.irsb.arch.registers['lr'][0] \
and inst_ctr == self.irsb.instructions - 1:
# ... by a constant, so test whether it is the address
# of the next IRSB
if type(stt.data) == expr.Const:
if stt.data.con.value == next_irsb_addr:
lr_store_pc = True
# ... by a temporary variable, so test whether it holds
# the address of the next IRSB
elif type(stt.data) == expr.RdTmp:
if next_irsb_addr == pc_holders.get(stt.data.tmp):
lr_store_pc = True
break
if isinstance(stt, stmt.IMark):
else:
reg_name = self.irsb.arch.translate_register_name(stt.offset)
if type(stt.data) == expr.Const:
pc_holders[reg_name] = stt.data.con.value
elif type(stt.data) == expr.RdTmp and pc_holders.get(stt.data.tmp) is not None:
pc_holders[reg_name] = pc_holders[stt.data.tmp]
elif type(stt.data) == expr.Get and pc_holders.get(stt.data.offset) is not None:
pc_holders[reg_name] = pc_holders[stt.data.offset]
elif type(stt) == stmt.WrTmp:
# the PC value may propagate through the block, and since
# LR is modified at the end of the block, the PC value have
# to be incremented in order to match the address of the
# next IRSB. So the only propagation ways that can lead to
# a function call are:
# - Iop_Add* operations (even "sub r0, #-4" is compiled
# as "add r0, #4")
# - Iop_And*, Iop_Or*, Iop_Xor*, Iop_Sh*, Iop_Not* (there
# may be some tricky and twisted ways to increment PC)
if type(stt.data) in (expr.Unop, expr.Binop, expr.Triop, expr.Qop):
if all(type(a) == expr.Const
or (type(a) == expr.RdTmp and pc_holders.get(a.tmp) is not None)
for a in stt.data.args):
op = stt.data.op
vals = [a.con.value if type(a) == expr.Const else pc_holders[a.tmp] \
for a in stt.data.args]
if 'Iop_Add' in op:
pc_holders[stt.tmp] = sum(vals)
elif 'Iop_And' in op:
pc_holders[stt.tmp] = reduce(lambda a, b: a & b, vals)
elif 'Iop_Or' in op:
pc_holders[stt.tmp] = reduce(lambda a, b: a | b, vals)
elif 'Iop_Xor' in op:
pc_holders[stt.tmp] = reduce(lambda a, b: a ^ b, vals)
elif 'Iop_Shl' in op:
pc_holders[stt.tmp] = vals[0] << vals[1]
elif any(o in op for o in ('Iop_Shr', 'Iop_Sar')):
pc_holders[stt.tmp] = vals[0] >> vals[1]
elif type(stt.data) == expr.Get:
reg_name = self.irsb.arch.translate_register_name(stt.data.offset)
if pc_holders.get(reg_name) is not None:
pc_holders[stt.tmp] = pc_holders[reg_name]
elif type(stt.data) == expr.ITE:
for d in (stt.data.iffalse, stt.data.iftrue):
if type(d) == expr.Const:
pc_holders[stt.tmp] = d.con.value
elif type(d) == expr.RdTmp and pc_holders.get(d.tmp) is not None:
pc_holders[stt.tmp] = pc_holders[d.tmp]
elif type(stt.data) == expr.RdTmp and pc_holders.get(stt.data.tmp) is not None:
pc_holders[stt.tmp] = pc_holders[stt.data.tmp]
elif type(stt.data) == expr.Const:
pc_holders[stt.tmp] = stt.data.con.value

elif type(stt) == stmt.IMark:
inst_ctr += 1

if lr_store_id is not None and inst_ctr == 2:
self.irsb.jumpkind = "Ijk_Call"
if lr_store_pc:
self.irsb.jumpkind = 'Ijk_Call'

_post_process_ARMEL = _post_process_ARM
_post_process_ARMHF = _post_process_ARM
2 changes: 1 addition & 1 deletion test.py → tests/test.py
Original file line number Diff line number Diff line change
@@ -144,7 +144,7 @@ def test_irstmt_noop():
nose.tools.assert_equal(irnop.tag, "Ist_NoOp")
nose.tools.assert_equal(type(irnop), type(irnop2))
nose.tools.assert_equal(type(irnop), type(irnop3))

def test_irstmt_imark():
m = pyvex.IRStmt.IMark(1,2,3)
nose.tools.assert_equal(m.tag, "Ist_IMark")
278 changes: 278 additions & 0 deletions tests/test_arm_postprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
import pyvex
import archinfo
import nose

##########################
### ARM Postprocessing ###
##########################
def test_arm_postprocess():
for i in xrange(3):
# Thumb

# push {r7}
# add r7, sp, #0
# mov.w r1, #6
# mov r0, pc
# add.w lr, r0, r1
# b.w 10408
irsb = pyvex.IRSB(data=('\x80\xb4'
'\x00\xaf'
'\x4f\xf0\x06\x01'
'\x78\x46'
'\x00\xeb\x01\x0e'
'\xff\xf7\xec\xbf'),
mem_addr=0x1041f,
arch=archinfo.ArchARMEL(),
num_inst=6,
bytes_offset=1,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

# mov lr, pc
# b.w 10408
irsb = pyvex.IRSB(data=('\xfe\x46'
'\xe9\xe7'),
mem_addr=0x10431,
arch=archinfo.ArchARMEL(),
num_inst=2,
bytes_offset=1,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

# add r2, pc, #0
# add.w lr, r2, #4
# ldr.w pc, [pc, #52]
irsb = pyvex.IRSB(data=('\x00\xa2'
'\x02\xf1\x06\x0e'
'\xdf\xf8\x34\xf0'),
mem_addr=0x10435,
arch=archinfo.ArchARMEL(),
num_inst=3,
bytes_offset=1,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

# ldr r0, [pc, #48]
# mov r1, pc
# add.w r2, r1, #4
# add.w r3, r2, #4
# add.w r4, r3, #4
# add.w lr, r4, #4
# mov pc, r0
irsb = pyvex.IRSB(data=('\x0c\x48'
'\x79\x46'
'\x01\xf1\x04\x02'
'\x02\xf1\x04\x03'
'\x03\xf1\x04\x04'
'\x04\xf1\x04\x0e'
'\x87\x46'),
mem_addr=0x1043f,
arch=archinfo.ArchARMEL(),
num_inst=7,
bytes_offset=1,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

# eor.w r0, r0, r0
# mov lr, pc
# b.n 10460
irsb = pyvex.IRSB(data=('\x80\xea\x00\x00'
'\x86\x46'
'\x01\xe0'),
mem_addr=0x10455,
arch=archinfo.ArchARMEL(),
num_inst=3,
bytes_offset=1,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Boring')

# Thumb compiled with optimizations (gcc -O2)

# mov.w r1, #6
# mov r0, pc
# add.w lr, r0, r1
# b.w 104bc
irsb = pyvex.IRSB(data=('\x4f\xf0\x06\x01'
'\x78\x46'
'\x00\xeb\x01\x0e'
'\x00\xf0\xc5\xb8'),
mem_addr=0x10325,
arch=archinfo.ArchARMEL(),
num_inst=4,
bytes_offset=1,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

# ldr r0, [pc, #56]
# mov r1, pc
# add.w r2, r1, #4
# add.w r3, r2, #4
# add.w r4, r3, #4
# add.w lr, r4, #4
# mov pc, r0
irsb = pyvex.IRSB(data=('\x0e\x48'
'\x79\x46'
'\x01\xf1\x04\x02'
'\x02\xf1\x04\x03'
'\x03\xf1\x04\x04'
'\x04\xf1\x04\x0e'
'\x87\x46'),
mem_addr=0x10333,
arch=archinfo.ArchARMEL(),
num_inst=7,
bytes_offset=1,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

# add r2, pc, #0
# add.w lr, r2, #6
# ldr.w pc, [pc, #28]
irsb = pyvex.IRSB(data=('\x00\xa2'
'\x02\xf1\x06\x0e'
'\xdf\xf8\x1c\xf0'),
mem_addr=0x10349,
arch=archinfo.ArchARMEL(),
num_inst=3,
bytes_offset=1,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

# mov lr, pc
# b.w 104bc
irsb = pyvex.IRSB(data=('\xfe\x46'
'\xb2\xe0'),
mem_addr=0x10353,
arch=archinfo.ArchARMEL(),
num_inst=2,
bytes_offset=1,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

# eor.w r0, r0, r0
# mov lr, pc
# b.n 10362
irsb = pyvex.IRSB(data=('\x80\xea\x00\x00'
'\x86\x46'
'\x01\xe0'),
mem_addr=0x10357,
arch=archinfo.ArchARMEL(),
num_inst=3,
bytes_offset=1,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Boring')

# ARM compiled with optimizations (gcc -O2)

# mov r1, #4
# mov r0, pc
# add lr, r0, r1
# ldr pc, [pc, #56]
irsb = pyvex.IRSB(data=('\x04\x10\xa0\xe3'
'\x0f\x00\xa0\xe1'
'\x01\xe0\x80\xe0'
'\x38\xf0\x9f\xe5'),
mem_addr=0x10298,
arch=archinfo.ArchARMEL(),
num_inst=4,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

# add r1, pc, #0
# add r2, r1, #4
# add r3, r2, #4
# add r4, r3, #4
# add lr, r4, #4
# b 10414
irsb = pyvex.IRSB(data=('\x00\x10\x8f\xe2'
'\x04\x20\x81\xe2'
'\x04\x30\x82\xe2'
'\x04\x40\x83\xe2'
'\x04\xe0\x84\xe2'
'\x54\x00\x00\xea'),
mem_addr=0x102a8,
arch=archinfo.ArchARMEL(),
num_inst=6,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

# mov lr, pc
# b 10414
irsb = pyvex.IRSB(data=('\x0f\xe0\xa0\xe1'
'\x52\x00\x00\xea'),
mem_addr=0x102c0,
arch=archinfo.ArchARMEL(),
num_inst=2,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

# eor r0, r0, r0
# mov lr, r0
# b 102d8
irsb = pyvex.IRSB(data=('\x00\x00\x20\xe0'
'\x00\xe0\xa0\xe1'
'\x00\x00\x00\xea'),
mem_addr=0x102c8,
arch=archinfo.ArchARMEL(),
num_inst=3,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Boring')

# ARM

# push {fp}
# add fp, sp, #0
# mov r1, #4
# mov r0, pc
# add lr, r0, r1
# ldr pc, [pc, #68]
irsb = pyvex.IRSB(data=('\x04\xb0\x2d\xe5'
'\x00\xb0\x8d\xe2'
'\x04\x10\xa0\xe3'
'\x0f\x00\xa0\xe1'
'\x01\xe0\x80\xe0'
'\x44\xf0\x9f\xe5'),
mem_addr=0x103e8,
arch=archinfo.ArchARMEL(),
num_inst=6,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

# add r1, pc, #0
# add r2, r1, #4
# add r3, r2, #4
# add r4, r3, #4
# add lr, r4, #4
# b 103c4
irsb = pyvex.IRSB(data=('\x00\x10\x8f\xe2'
'\x04\x20\x81\xe2'
'\x04\x30\x82\xe2'
'\x04\x40\x83\xe2'
'\x04\xe0\x84\xe2'
'\x54\xff\xff\xea'),
mem_addr=0x10400,
arch=archinfo.ArchARMEL(),
num_inst=6,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

# mov lr, pc
# b 103c4
irsb = pyvex.IRSB(data=('\x0f\xe0\xa0\xe1'
'\xe8\xff\xff\xea'),
mem_addr=0x10418,
arch=archinfo.ArchARMEL(),
num_inst=2,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call')

# eor r0, r0, r0
# mov lr, r0
# b 10430
irsb = pyvex.IRSB(data=('\x00\x00\x20\xe0'
'\x00\xe0\xa0\xe1'
'\x00\x00\x00\xea'),
mem_addr=0x10420,
arch=archinfo.ArchARMEL(),
num_inst=3,
opt_level=i)
nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Boring')