Skip to content

Commit

Permalink
Unwind information performance improvements
Browse files Browse the repository at this point in the history
This commit:
- sort the FDEs rather than the whole unwind information, which is a
  smaller vector. This showed no performance changes in benchmarks;
- optimise the unwind info in-place. Most of the performance gains in
  this commits comes from this changes;

Wrote a benchmark with criterion.rs and saw a ~25% decrease in time
spent building and optimising the unwind info.

```
Running benches/unwind_info.rs (target/release/deps/unwind_info-b3269dc277690031)
WARNING: HTML report generation will become a non-default optional feature in Criterion.rs 0.4.0.
This feature is being moved to cargo-criterion (https://github.com/bheisler/cargo-criterion) and will be optional in a future version of Criterion.rs. To silence this warning, either switch to cargo-criterion or enable the 'html_reports' feature in your Cargo.toml.

Gnuplot not found, using plotters backend
unwind info + sorting   time:   [37.697 ms 39.061 ms 40.592 ms]
                        change: [-26.710% -23.315% -19.870%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 8 outliers among 100 measurements (8.00%)
  3 (3.00%) high mild
  5 (5.00%) high severe
```

For this test, this node.js binary was used
```
/home/javierhonduco/.vscode-server/cli/servers/Stable-e170252f762678dec6ca2cc69aba1570769a5d39/server/node: ELF 64-bit LSB executable, x86-64, version 1 (GNU/Linux), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 3.2.0, BuildID[sha1]=e034c66a6dc897492d6ecae6c6b046669f94ad96, with debug_info, not stripped, too many notes (256)
```

I am not commiting the benchmarks as I bumped into some rough edges with
criterion and I am not sure if it's maintained anymore. Might be worth
adding some benchmarks later on.

Test Plan
=========

Compared the unwind info before and after this change for a bunch of
executables.
  • Loading branch information
javierhonduco committed May 1, 2024
1 parent a8eaa1b commit eff7f34
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 113 deletions.
29 changes: 23 additions & 6 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ use tracing_subscriber::FmtSubscriber;
use lightswitch::collector::Collector;
use lightswitch::object::ObjectFile;
use lightswitch::profiler::Profiler;
use lightswitch::unwind_info::{compact_printing_callback, UnwindInfoBuilder};
use lightswitch::unwind_info::in_memory_unwind_info;
use lightswitch::unwind_info::remove_redundant;
use lightswitch::unwind_info::remove_unnecesary_markers;
use lightswitch::unwind_info::UnwindInfoBuilder;

const SAMPLE_FREQ_RANGE: RangeInclusive<usize> = 1..=1009;

Expand Down Expand Up @@ -125,6 +128,25 @@ fn main() -> Result<(), Box<dyn Error>> {

let args = Cli::parse();

if let Some(path) = args.show_unwind_info {
let mut unwind_info = in_memory_unwind_info(&path).unwrap();
remove_unnecesary_markers(&mut unwind_info);
remove_redundant(&mut unwind_info);

for compact_row in unwind_info {
let pc = compact_row.pc;
let cfa_type = compact_row.cfa_type;
let rbp_type = compact_row.rbp_type;
let cfa_offset = compact_row.cfa_offset;
let rbp_offset = compact_row.rbp_offset;
println!(
"pc: {:x} cfa_type: {:<2} rbp_type: {:<2} cfa_offset: {:<4} rbp_offset: {:<4}",
pc, cfa_type, rbp_type, cfa_offset, rbp_offset
);
}
return Ok(());
}

let subscriber = FmtSubscriber::builder()
.with_max_level(if args.filter_logs {
Level::TRACE
Expand All @@ -137,11 +159,6 @@ fn main() -> Result<(), Box<dyn Error>> {

tracing::subscriber::set_global_default(subscriber).expect("setting default subscriber failed");

if let Some(path) = args.show_unwind_info {
UnwindInfoBuilder::with_callback(&path, compact_printing_callback)?.process()?;
return Ok(());
}

if let Some(path) = args.show_info {
let objet_file = ObjectFile::new(&PathBuf::from(path.clone())).unwrap();
println!("build id {:?}", objet_file.build_id());
Expand Down
12 changes: 2 additions & 10 deletions src/profiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -752,17 +752,9 @@ impl Profiler<'_> {
}
span.exit();

let span: span::EnteredSpan = span!(Level::DEBUG, "sort unwind info").entered();
found_unwind_info.sort_by(|a, b| {
let a_pc = a.pc;
let b_pc = b.pc;
a_pc.cmp(&b_pc)
});
span.exit();

let span: span::EnteredSpan = span!(Level::DEBUG, "optimize unwind info").entered();
let found_unwind_info = remove_unnecesary_markers(&found_unwind_info);
let found_unwind_info = remove_redundant(&found_unwind_info);
remove_unnecesary_markers(&mut found_unwind_info);
remove_redundant(&mut found_unwind_info);
span.exit();

debug!(
Expand Down
212 changes: 115 additions & 97 deletions src/unwind_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use thiserror::Error;

use crate::bpf::profiler_bindings::stack_unwind_row_t;
use anyhow::anyhow;
use tracing::error;
use tracing::{error, span, Level};

#[repr(u8)]
pub enum CfaType {
Expand Down Expand Up @@ -252,16 +252,16 @@ impl<'a> UnwindInfoBuilder<'a> {
let eh_frame = EhFrame::new(eh_frame_data, endian);
let mut entries_iter = eh_frame.entries(&bases);

let mut ctx = Box::new(UnwindContext::new());
let mut cur_cie = None;
let mut pc_and_fde_offset = Vec::new();

while let Ok(Some(entry)) = entries_iter.next() {
match entry {
CieOrFde::Cie(cie) => {
cur_cie = Some(cie);
}
CieOrFde::Fde(partial_fde) => {
if let Ok(fde) = partial_fde.parse(|eh_frame, bases, cie_offset| {
let fde = partial_fde.parse(|eh_frame, bases, cie_offset| {
if let Some(cie) = &cur_cie {
if cie.offset() == cie_offset.0 {
return Ok(cie.clone());
Expand All @@ -272,111 +272,125 @@ impl<'a> UnwindInfoBuilder<'a> {
cur_cie = Some(cie.clone());
}
cie
}) {
(self.callback)(&UnwindData::Function(
fde.initial_address(),
fde.initial_address() + fde.len(),
));

let mut table = fde.rows(&eh_frame, &bases, &mut ctx)?;

loop {
let mut compact_row = CompactUnwindRow::default();

match table.next_row() {
Ok(None) => break,
Ok(Some(row)) => {
compact_row.pc = row.start_address();
match row.cfa() {
CfaRule::RegisterAndOffset { register, offset } => {
if register == &RBP_X86 {
compact_row.cfa_type =
CfaType::FramePointerOffset as u8;
} else if register == &RSP_X86 {
compact_row.cfa_type =
CfaType::StackPointerOffset as u8;
} else {
compact_row.cfa_type =
CfaType::UnsupportedRegisterOffset as u8;
}

match u16::try_from(*offset) {
Ok(off) => {
compact_row.cfa_offset = off;
}
Err(_) => {
compact_row.cfa_type =
CfaType::OffsetDidNotFit as u8;
}
}
}
CfaRule::Expression(exp) => {
let found_expression = exp.0.slice();

if found_expression == *PLT1 {
compact_row.cfa_offset = PltType::Plt1 as u16;
} else if found_expression == *PLT2 {
compact_row.cfa_offset = PltType::Plt2 as u16;
}

compact_row.cfa_type = CfaType::Expression as u8;
}
};

match row.register(RBP_X86) {
gimli::RegisterRule::Undefined => {}
gimli::RegisterRule::Offset(offset) => {
compact_row.rbp_type = RbpType::CfaOffset as u8;
compact_row.rbp_offset =
i16::try_from(offset).expect("convert rbp offset");
}
gimli::RegisterRule::Register(_reg) => {
compact_row.rbp_type = RbpType::Register as u8;
}
gimli::RegisterRule::Expression(_) => {
compact_row.rbp_type = RbpType::Expression as u8;
}
_ => {
// print!(", rbp unsupported {:?}", rbp);
}
}
});

if let Ok(fde) = fde {
pc_and_fde_offset.push((fde.initial_address(), fde.offset()));
}
}
}
}

if row.register(fde.cie().return_address_register())
== gimli::RegisterRule::Undefined
{
compact_row.rbp_type =
RbpType::UndefinedReturnAddress as u8;
let span = span!(Level::DEBUG, "sort pc and fdes").entered();
pc_and_fde_offset.sort_by_key(|(pc, _)| *pc);
span.exit();

let mut ctx = Box::new(UnwindContext::new());
for (_, fde_offset) in pc_and_fde_offset {
let fde = eh_frame.fde_from_offset(
&bases,
gimli::EhFrameOffset(fde_offset),
EhFrame::cie_from_offset,
)?;

(self.callback)(&UnwindData::Function(
fde.initial_address(),
fde.initial_address() + fde.len(),
));

let mut table = fde.rows(&eh_frame, &bases, &mut ctx)?;

loop {
let mut compact_row = CompactUnwindRow::default();

match table.next_row() {
Ok(None) => break,
Ok(Some(row)) => {
compact_row.pc = row.start_address();
match row.cfa() {
CfaRule::RegisterAndOffset { register, offset } => {
if register == &RBP_X86 {
compact_row.cfa_type = CfaType::FramePointerOffset as u8;
} else if register == &RSP_X86 {
compact_row.cfa_type = CfaType::StackPointerOffset as u8;
} else {
compact_row.cfa_type = CfaType::UnsupportedRegisterOffset as u8;
}

match u16::try_from(*offset) {
Ok(off) => {
compact_row.cfa_offset = off;
}
Err(_) => {
compact_row.cfa_type = CfaType::OffsetDidNotFit as u8;
}
}
}
CfaRule::Expression(exp) => {
let found_expression = exp.0.slice();

// print!(", ra {:?}", row.register(fde.cie().return_address_register()));
if found_expression == *PLT1 {
compact_row.cfa_offset = PltType::Plt1 as u16;
} else if found_expression == *PLT2 {
compact_row.cfa_offset = PltType::Plt2 as u16;
}
_ => continue,

compact_row.cfa_type = CfaType::Expression as u8;
}
};

(self.callback)(&UnwindData::Instruction(compact_row));
match row.register(RBP_X86) {
gimli::RegisterRule::Undefined => {}
gimli::RegisterRule::Offset(offset) => {
compact_row.rbp_type = RbpType::CfaOffset as u8;
compact_row.rbp_offset =
i16::try_from(offset).expect("convert rbp offset");
}
gimli::RegisterRule::Register(_reg) => {
compact_row.rbp_type = RbpType::Register as u8;
}
gimli::RegisterRule::Expression(_) => {
compact_row.rbp_type = RbpType::Expression as u8;
}
_ => {
// print!(", rbp unsupported {:?}", rbp);
}
}
// start_addresses.push(fde.initial_address() as u32);
// end_addresses.push((fde.initial_address() + fde.len()) as u32);

if row.register(fde.cie().return_address_register())
== gimli::RegisterRule::Undefined
{
compact_row.rbp_type = RbpType::UndefinedReturnAddress as u8;
}

// print!(", ra {:?}", row.register(fde.cie().return_address_register()));
}
_ => continue,
}

(self.callback)(&UnwindData::Instruction(compact_row));
}
// start_addresses.push(fde.initial_address() as u32);
// end_addresses.push((fde.initial_address() + fde.len()) as u32);
}
Ok(())
}
}

// Must be sorted. Also, not very optimized as of now.
pub fn remove_unnecesary_markers(info: &Vec<stack_unwind_row_t>) -> Vec<stack_unwind_row_t> {
let mut unwind_info = Vec::with_capacity(info.len());
// Must be sorted.
pub fn remove_unnecesary_markers(unwind_info: &mut Vec<stack_unwind_row_t>) {
let mut last_row: Option<stack_unwind_row_t> = None;
let mut new_i: usize = 0;

for i in 0..unwind_info.len() {
let row = unwind_info[i];

for row in info {
if let Some(last_row_unwrapped) = last_row {
let previous_is_redundant_marker = (last_row_unwrapped.cfa_type
== CfaType::EndFdeMarker as u8)
&& last_row_unwrapped.pc == row.pc;
if previous_is_redundant_marker {
unwind_info.pop();
new_i -= 1;
}
}

Expand All @@ -387,22 +401,25 @@ pub fn remove_unnecesary_markers(info: &Vec<stack_unwind_row_t>) -> Vec<stack_un
}

if !current_is_redundant_marker {
unwind_info.push(*row);
unwind_info[new_i] = row;
new_i += 1;
}

last_row = Some(*row);
last_row = Some(row);
}

unwind_info
unwind_info.truncate(new_i);
}

// Must be sorted. Also, not very optimized as of now.
pub fn remove_redundant(info: &Vec<stack_unwind_row_t>) -> Vec<stack_unwind_row_t> {
let mut unwind_info = Vec::with_capacity(info.len());
// Must be sorted.
pub fn remove_redundant(unwind_info: &mut Vec<stack_unwind_row_t>) {
let mut last_row: Option<stack_unwind_row_t> = None;
let mut new_i: usize = 0;

for row in info {
for i in 0..unwind_info.len() {
let mut redundant = false;
let row = unwind_info[i];

if let Some(last_row_unwrapped) = last_row {
redundant = row.cfa_type == last_row_unwrapped.cfa_type
&& row.cfa_offset == last_row_unwrapped.cfa_offset
Expand All @@ -411,17 +428,18 @@ pub fn remove_redundant(info: &Vec<stack_unwind_row_t>) -> Vec<stack_unwind_row_
}

if !redundant {
unwind_info.push(*row);
unwind_info[new_i] = row;
new_i += 1;
}

last_row = Some(*row);
last_row = Some(row);
}

unwind_info
unwind_info.truncate(new_i);
}

pub fn in_memory_unwind_info(path: &str) -> anyhow::Result<Vec<stack_unwind_row_t>> {
let mut unwind_info = Vec::new();
let mut unwind_info: Vec<stack_unwind_row_t> = Vec::new();
let mut last_function_end_addr: Option<u64> = None;
let mut last_row = None;

Expand Down

0 comments on commit eff7f34

Please sign in to comment.