Skip to content

Commit

Permalink
Detect tail calls in stack size estimation (#2005)
Browse files Browse the repository at this point in the history
In some cases, the deepest stack involves a tail call. For example,
consider `write_fmt`, which has no stack of its own and branches
directly to `core::fmt::write` (instead of using `bl`):

```asm
0804a704 <core::fmt::Write::write_fmt>:
 804a704:   460a        mov r2, r1
 804a706:   6849        ldr r1, [r1, #4]
 804a708:   2901        cmp r1, #1
 804a70a:   bf18        it  ne
 804a70c:   2900        cmpne   r1, #0
 804a70e:   4901        ldr r1, [pc, #4]    ; (804a714 <core::fmt::Write::write_fmt+0x10>)
 804a710:   f7ff ba68   b.w 8049be4 <core::fmt::write>
 804a714:   0804b024    .word   0x0804b024
```

This PR adds basic tail call support to the disassembler. The main
effect is adding 80 bytes to panic stack depth.

Before:
```
udprpc: 2464 bytes (limit is 4096)
     [+8] _start
  [+2264] main
    [+16] core::array::<impl core::ops::index::IndexMut<I> for [T; N]>::index_mut
    [+16] <core::ops::range::Range<usize> as core::slice::index::SliceIndex<[T]>>::index_mut
    [+56] core::slice::index::slice_end_index_len_fail
    [+40] core::panicking::panic_fmt
    [+56] rust_begin_unwind
     [+8] userlib::sys_panic_stub
```

After:
```
udprpc: 2544 bytes (limit is 4096)
     [+8] _start
  [+2264] main
    [+16] core::array::<impl core::ops::index::IndexMut<I> for [T; N]>::index_mut
    [+16] <core::ops::range::Range<usize> as core::slice::index::SliceIndex<[T]>>::index_mut
    [+56] core::slice::index::slice_end_index_len_fail
    [+40] core::panicking::panic_fmt
    [+56] rust_begin_unwind
     [+0] core::fmt::Write::write_fmt
    [+88] core::fmt::write
```

Unfortunately, this _does not_ detect the stack overflow in #2004, which
happens during dynamic dispatch. Still, it's a step in the right
direction!
  • Loading branch information
mkeeter authored Feb 11, 2025
1 parent add2d0a commit 26dbadc
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 7 deletions.
2 changes: 1 addition & 1 deletion app/demo-stm32h7-nucleo/app-h743.toml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ start = true
features = ["h743"]
priority = 3
name = "drv-stm32h7-rng"
stacksize = 256
stacksize = 512
start = true
task-slots = ["sys", "user_leds"]
uses = ["rng"]
Expand Down
2 changes: 1 addition & 1 deletion app/demo-stm32h7-nucleo/app-h753.toml
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ start = true
features = ["h753"]
priority = 3
name = "drv-stm32h7-rng"
stacksize = 256
stacksize = 512
start = true
task-slots = ["sys", "user_leds"]
uses = ["rng"]
Expand Down
2 changes: 1 addition & 1 deletion app/gemini-bu/app.toml
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ name = "drv-stm32h7-rng"
priority = 6
uses = ["rng"]
start = true
stacksize = 256
stacksize = 512
task-slots = ["sys", "user_leds"]

[tasks.update_server]
Expand Down
2 changes: 1 addition & 1 deletion app/gimletlet/base-gimletlet2.toml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ name = "drv-stm32h7-rng"
priority = 6
uses = ["rng"]
start = true
stacksize = 256
stacksize = 512
task-slots = ["sys", "user_leds"]

[tasks.update_server]
Expand Down
2 changes: 1 addition & 1 deletion app/sidecar/base.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ name = "drv-stm32h7-rng"
priority = 6
uses = ["rng"]
start = true
stacksize = 256
stacksize = 512
task-slots = ["sys"]

[tasks.update_server]
Expand Down
11 changes: 9 additions & 2 deletions build/xtask/src/dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1236,17 +1236,24 @@ pub fn get_max_stack(
}
chunks.extend(chunk); // don't forget the trailing chunk!

let frame_size = addr_to_frame_size.get(&base_addr).copied();
let mut calls = BTreeSet::new();
for (addr, chunk) in chunks {
let instrs = cs
.disasm_all(&chunk, addr.into())
.map_err(|e| anyhow!("disassembly failed: {e:?}"))?;
for instr in instrs.iter() {
for (i, instr) in instrs.iter().enumerate() {
let detail = cs.insn_detail(instr).map_err(|e| {
anyhow!("could not get instruction details: {e}")
})?;

// Detect tail calls, which are jumps at the final instruction
// when the function itself has no stack frame.
let can_tail = frame_size == Some(0) && i == instrs.len() - 1;
if detail.groups().iter().any(|g| {
g == &InsnGroupId(InsnGroupType::CS_GRP_CALL as u8)
|| (g == &InsnGroupId(InsnGroupType::CS_GRP_JUMP as u8)
&& can_tail)
}) {
let arch = detail.arch_detail();
let ops = arch.operands();
Expand Down Expand Up @@ -1290,7 +1297,7 @@ pub fn get_max_stack(
FunctionData {
name,
short_name,
frame_size: addr_to_frame_size.get(&base_addr).map(|i| *i),
frame_size,
calls,
},
);
Expand Down

0 comments on commit 26dbadc

Please sign in to comment.