Skip to content

Commit e84d995

Browse files
committed
revamp gadget.transit_type
1 parent 8c2e26d commit e84d995

File tree

2 files changed

+127
-133
lines changed

2 files changed

+127
-133
lines changed

angrop/gadget_finder/gadget_analyzer.py

Lines changed: 117 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def _analyze_gadget(self, addr, allow_conditional_branches):
148148

149149
# Step 3: gadget effect analysis
150150
l.debug("... analyzing rop potential of block")
151-
gadget = self._create_gadget(addr, init_state, final_state, ctrl_type)
151+
gadget = self._create_gadget(addr, init_state, final_state, ctrl_type, allow_conditional_branches)
152152
if not gadget:
153153
continue
154154

@@ -286,47 +286,23 @@ def _try_stepping_past_syscall(self, state):
286286
except Exception: # pylint:disable=broad-exception-caught
287287
return state
288288

289-
def _identify_transit_type(self, final_state, ctrl_type):
290-
# FIXME: not always jump, could be call as well
291-
if ctrl_type == 'register':
292-
return "jmp_reg"
293-
if ctrl_type == 'syscall':
294-
return ctrl_type
295-
296-
if ctrl_type == 'pivot':
297-
# FIXME: this logic feels wrong
298-
variables = list(final_state.ip.variables)
299-
if all(x.startswith("sreg_") for x in variables):
289+
@staticmethod
290+
def _control_to_transit_type(ctrl_type):
291+
match ctrl_type:
292+
case 'syscall':
293+
return None
294+
case 'pivot':
295+
return None
296+
case 'register':
300297
return "jmp_reg"
301-
for act in final_state.history.actions:
302-
if act.type != 'mem':
303-
continue
304-
if act.size != self.project.arch.bits:
305-
continue
306-
if (act.data.ast == final_state.ip).symbolic or \
307-
not final_state.solver.eval(act.data.ast == final_state.ip):
308-
continue
309-
sols = final_state.solver.eval_upto(final_state.regs.sp-act.addr.ast, 2)
310-
if len(sols) != 1:
311-
continue
312-
if sols[0] != final_state.arch.bytes:
313-
continue
314-
return "ret"
315-
return "pop_pc"
316-
317-
assert ctrl_type == 'stack'
318-
319-
v = final_state.memory.load(final_state.regs.sp - final_state.arch.bytes,
320-
size=final_state.arch.bytes,
321-
endness=final_state.arch.memory_endness)
322-
if v is final_state.ip:
323-
return "ret"
324-
325-
return "pop_pc"
326-
327-
def _create_gadget(self, addr, init_state, final_state, ctrl_type):
328-
transit_type = self._identify_transit_type(final_state, ctrl_type)
329-
298+
case 'stack':
299+
return 'pop_pc'
300+
case 'memory':
301+
return "jmp_mem"
302+
case _:
303+
raise ValueError("Unknown control type")
304+
305+
def _create_gadget(self, addr, init_state, final_state, ctrl_type, do_cond_branch):
330306
# create the gadget
331307
if ctrl_type == 'syscall' or self._does_syscall(final_state):
332308
# gadgets that do syscall and pivoting are too complicated
@@ -342,44 +318,46 @@ def _create_gadget(self, addr, init_state, final_state, ctrl_type):
342318

343319
# FIXME this doesnt handle multiple steps
344320
gadget.block_length = self.project.factory.block(addr).size
345-
gadget.transit_type = transit_type
346-
347-
# for jmp_reg gadget, record the jump target register
348-
if transit_type == "jmp_reg":
349-
gadget.pc_reg = list(final_state.ip.variables)[0].split('_', 1)[1].rsplit('-')[0]
350321

351322
# compute sp change
352323
l.debug("... computing sp change")
353324
self._compute_sp_change(init_state, final_state, gadget)
354-
if gadget.stack_change % (self.project.arch.bytes) != 0:
325+
if (gadget.stack_change % self.project.arch.bytes) != 0:
355326
l.debug("... uneven sp change")
356327
return None
357328
if gadget.stack_change < 0:
358329
l.debug("stack change is negative!!")
359330
#FIXME: technically, it can be negative, e.g. call instructions
360331
return None
361332

362-
# record pc_offset
363-
if type(gadget) is not PivotGadget and transit_type in ['pop_pc', 'ret']:
364-
idx = list(final_state.ip.variables)[0].split('_')[2]
365-
gadget.pc_offset = int(idx) * self.project.arch.bytes
366-
if gadget.pc_offset >= gadget.stack_change:
367-
return None
333+
# transit_type-based handling
334+
transit_type = self._control_to_transit_type(ctrl_type)
335+
gadget.transit_type = transit_type
336+
match transit_type:
337+
case 'pop_pc': # record pc_offset
338+
idx = list(final_state.ip.variables)[0].split('_')[2]
339+
gadget.pc_offset = int(idx) * self.project.arch.bytes
340+
if gadget.pc_offset >= gadget.stack_change:
341+
return None
342+
case 'jmp_reg': # record pc_reg
343+
gadget.pc_reg = list(final_state.ip.variables)[0].split('_', 1)[1].rsplit('-')[0]
344+
case 'jmp_mem': # record pc_target
345+
for a in reversed(final_state.history.actions):
346+
if a.type == 'mem' and a.action == 'read':
347+
if (a.data.ast == final_state.ip).is_true():
348+
gadget.pc_target = a.addr.ast
349+
break
368350

351+
# register effect analysis
369352
l.info("... checking for controlled regs")
370353
self._check_reg_changes(final_state, init_state, gadget)
371-
372-
# check for reg moves
373-
# get reg reads
374354
reg_reads = self._get_reg_reads(final_state)
375355
l.debug("... checking for reg moves")
376356
self._check_reg_change_dependencies(init_state, final_state, gadget)
377357
self._check_reg_movers(init_state, final_state, reg_reads, gadget)
378-
379-
# check concretized registers
380358
self._analyze_concrete_regs(init_state, final_state, gadget)
381359

382-
# check mem accesses
360+
# memory access analysis
383361
l.debug("... analyzing mem accesses")
384362
if not self._analyze_mem_access(final_state, init_state, gadget):
385363
l.debug("... too many symbolic memory accesses")
@@ -390,39 +368,40 @@ def _create_gadget(self, addr, init_state, final_state, ctrl_type):
390368
l.debug("... mem access with no addr dependencies")
391369
return None
392370

393-
# Store block address list for gadgets with conditional branches
394371
gadget.bbl_addrs = list(final_state.history.bbl_addrs)
395372
gadget.isn_count = sum(self.project.factory.block(addr).instructions for addr in gadget.bbl_addrs)
396373

397-
constraint_vars = {
398-
var
399-
for constraint in final_state.history.jump_guards
400-
for var in constraint.variables
401-
}
402-
403-
gadget.has_conditional_branch = len(constraint_vars) > 0
404-
405-
for action in final_state.history.actions:
406-
if action.type == 'mem':
407-
constraint_vars |= action.addr.variables
408-
409-
for var in constraint_vars:
410-
if var.startswith("sreg_"):
411-
gadget.constraint_regs.add(var.split('_', 1)[1].split('-', 1)[0])
412-
elif not var.startswith("symbolic_stack_"):
413-
l.debug("... constraint not controlled by registers and stack")
414-
return None
374+
# conditional branch analysis
375+
if do_cond_branch:
376+
constraint_vars = {
377+
var
378+
for constraint in final_state.history.jump_guards
379+
for var in constraint.variables
380+
}
381+
382+
gadget.has_conditional_branch = len(constraint_vars) > 0
383+
384+
for action in final_state.history.actions:
385+
if action.type == 'mem':
386+
constraint_vars |= action.addr.variables
387+
388+
for var in constraint_vars:
389+
if var.startswith("sreg_"):
390+
gadget.constraint_regs.add(var.split('_', 1)[1].split('-', 1)[0])
391+
elif not var.startswith("symbolic_stack_"):
392+
l.debug("... constraint not controlled by registers and stack")
393+
return None
415394

416-
gadget.popped_regs = {
417-
reg
418-
for reg in gadget.popped_regs
419-
if final_state.registers.load(reg).variables.isdisjoint(constraint_vars)
420-
}
395+
gadget.popped_regs = {
396+
reg
397+
for reg in gadget.popped_regs
398+
if final_state.registers.load(reg).variables.isdisjoint(constraint_vars)
399+
}
421400

422-
gadget.popped_reg_vars = {
423-
reg: final_state.registers.load(reg).variables
424-
for reg in gadget.popped_regs
425-
}
401+
gadget.popped_reg_vars = {
402+
reg: final_state.registers.load(reg).variables
403+
for reg in gadget.popped_regs
404+
}
426405

427406
return gadget
428407

@@ -520,62 +499,36 @@ def _check_reg_movers(self, symbolic_state, symbolic_p, reg_reads, gadget):
520499
if ast_1 is ast_2:
521500
gadget.reg_moves.append(RopRegMove(from_reg, reg, half_bits))
522501

523-
# TODO: need to handle reg calls
524502
def _check_for_control_type(self, init_state, final_state):
525503
"""
526-
:return: the data provenance of the controlled ip in the final state, either the stack or registers
504+
:return: the data provenance of the controlled ip in the final state
527505
"""
528506

529507
ip = final_state.ip
530508

531-
# this gadget arrives a syscall
509+
# this gadget arrives at a syscall
532510
if self.is_in_kernel(final_state):
533511
return 'syscall'
534512

535-
# the ip is controlled by stack
513+
# the ip is controlled by stack (ret)
536514
if self._check_if_stack_controls_ast(ip, init_state):
537515
return "stack"
538516

539-
# the ip is not controlled by regs
517+
# the ip is not controlled by regs/mem
540518
if not ip.variables:
541519
return None
520+
ip_variables = list(ip.variables)
542521

543-
# the ip is fully controlled by regs
544-
variables = list(ip.variables)
545-
if all(x.startswith("sreg_") for x in variables):
522+
# the ip is fully controlled by regs (jmp rax)
523+
if all(x.startswith("sreg_") for x in ip_variables):
546524
return "register"
547525

548-
# this is a stack pivoting gadget
549-
if all(x.startswith("symbolic_read_") for x in variables) and len(final_state.regs.sp.variables) == 1:
550-
# we don't fully control sp
551-
if not init_state.solver.satisfiable(extra_constraints=[final_state.regs.sp == 0x41414100]):
552-
return None
553-
# make sure the control after pivot is reasonable
554-
555-
# find where the ip is read from
556-
saved_ip_addr = None
557-
for act in final_state.history.actions:
558-
if act.type == 'mem' and act.action == 'read':
559-
if (
560-
act.size == self.project.arch.bits
561-
and isinstance(act.data.ast, claripy.ast.BV)
562-
and not (act.data.ast == ip).symbolic
563-
):
564-
if init_state.solver.eval(act.data.ast == ip):
565-
saved_ip_addr = act.addr.ast
566-
break
567-
if saved_ip_addr is None:
568-
return None
526+
# the ip is fully controlled by memory and sp is not symbolic (jmp [rax])
527+
if all(x.startswith("symbolic_read_") for x in ip_variables) and not final_state.regs.sp.symbolic:
528+
return "memory"
569529

570-
# if the saved ip is too far away from the final sp, that's a bad gadget
571-
sols = final_state.solver.eval_upto(final_state.regs.sp - saved_ip_addr, 2)
572-
if len(sols) != 1: # the saved ip has a symbolic distance from the final sp, bad
573-
return None
574-
offset = sols[0]
575-
if offset > self._stack_bsize: # filter out gadgets like mov rsp, rax; ret 0x1000
576-
return None
577-
if offset % self.project.arch.bytes != 0: # filter misaligned gadgets
578-
return None
530+
# this is a stack pivoting gadget
531+
if self._check_if_stack_pivot(init_state, final_state):
579532
return "pivot"
580533

581534
return None
@@ -628,6 +581,45 @@ def _check_if_stack_controls_ast(self, ast, initial_state, gadget_stack_change=N
628581

629582
return ans
630583

584+
def _check_if_stack_pivot(init_state, final_state):
585+
ip_variables = list(final_state.ip.variables)
586+
if any(not x.startswith("symbolic_read_") for x in ip_variables):
587+
return None
588+
if len(final_state.regs.sp.variables) != 1:
589+
return None
590+
591+
# check if we fully control sp
592+
if not init_state.solver.satisfiable(extra_constraints=[final_state.regs.sp == 0x41414100]):
593+
return None
594+
595+
# make sure the control after pivot is reasonable
596+
597+
# find where the ip is read from
598+
saved_ip_addr = None
599+
for act in final_state.history.actions:
600+
if act.type == 'mem' and act.action == 'read':
601+
if (
602+
act.size == self.project.arch.bits
603+
and isinstance(act.data.ast, claripy.ast.BV)
604+
and not (act.data.ast == ip).symbolic
605+
):
606+
if init_state.solver.eval(act.data.ast == ip):
607+
saved_ip_addr = act.addr.ast
608+
break
609+
if saved_ip_addr is None:
610+
return None
611+
612+
# if the saved ip is too far away from the final sp, that's a bad gadget
613+
sols = final_state.solver.eval_upto(final_state.regs.sp - saved_ip_addr, 2)
614+
if len(sols) != 1: # the saved ip has a symbolic distance from the final sp, bad
615+
return None
616+
offset = sols[0]
617+
if offset > self._stack_bsize: # filter out gadgets like mov rsp, rax; ret 0x1000
618+
return None
619+
if offset % self.project.arch.bytes != 0: # filter misaligned gadgets
620+
return None
621+
return "pivot"
622+
631623
def _to_signed(self, value):
632624
bits = self.project.arch.bits
633625
if value >> (bits-1): # if the MSB is 1, this value is negative

angrop/rop_gadget.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -119,15 +119,16 @@ def __init__(self, addr):
119119
self.mem_writes = []
120120
self.mem_changes = []
121121

122-
# TODO: pc shouldn't be treated differently from other registers
123-
# it is just a register. With the register setting framework, we will be able to
124-
# utilize gadgets like `call qword ptr [rax+rbx]` because we have the dependency information.
125-
# transition information, i.e. how to pass the control flow to the next gadget
122+
# gadget transition
123+
# we now support the following gadget transitions
124+
# 1. pop_pc: ret, jmp [sp+X], pop pc,X,Y, retn), this type of gadgets are "self-contained"
125+
# 2. jmp_reg: jmp reg <- requires reg setting before using it (call falls here as well)
126+
# 3. jmp_mem: jmp [reg+X] <- requires mem setting before using it (call is here as well)
126127
self.transit_type: str = None # type: ignore
127-
self.pc_reg = None
128-
# pc_offset is exclusively used when transit_type is "pop_pc",
129-
# when pc_offset==stack_change-arch_bytes, transit_type is basically ret
130-
self.pc_offset = None
128+
129+
self.pc_offset = None # for pop_pc, ret is basically pc_offset == stack_change - arch.bytes
130+
self.pc_reg = None # for jmp_reg, which register it jumps to
131+
self.pc_target = None # for jmp_mem, where it jumps to
131132

132133
# List of basic block addresses for gadgets with conditional branches
133134
self.bbl_addrs = []
@@ -274,6 +275,7 @@ class SyscallGadget(RopGadget):
274275
def __init__(self, addr):
275276
super().__init__(addr)
276277
self.makes_syscall = False
278+
# TODO: starts_with_syscall should be removed, not useful at all
277279
self.starts_with_syscall = False
278280

279281
def __str__(self):

0 commit comments

Comments
 (0)