From eff7f34b2a0bfea6a4e7226a32d6a88ffbd7c46c Mon Sep 17 00:00:00 2001 From: Francisco Javier Honduvilla Coto Date: Mon, 29 Apr 2024 13:23:22 +0100 Subject: [PATCH] Unwind information performance improvements This commit: - sort the FDEs rather than the whole unwind information, which is a smaller vector. This showed no performance changes in benchmarks; - optimise the unwind info in-place. Most of the performance gains in this commits comes from this changes; Wrote a benchmark with criterion.rs and saw a ~25% decrease in time spent building and optimising the unwind info. ``` Running benches/unwind_info.rs (target/release/deps/unwind_info-b3269dc277690031) WARNING: HTML report generation will become a non-default optional feature in Criterion.rs 0.4.0. This feature is being moved to cargo-criterion (https://github.com/bheisler/cargo-criterion) and will be optional in a future version of Criterion.rs. To silence this warning, either switch to cargo-criterion or enable the 'html_reports' feature in your Cargo.toml. Gnuplot not found, using plotters backend unwind info + sorting time: [37.697 ms 39.061 ms 40.592 ms] change: [-26.710% -23.315% -19.870%] (p = 0.00 < 0.05) Performance has improved. Found 8 outliers among 100 measurements (8.00%) 3 (3.00%) high mild 5 (5.00%) high severe ``` For this test, this node.js binary was used ``` /home/javierhonduco/.vscode-server/cli/servers/Stable-e170252f762678dec6ca2cc69aba1570769a5d39/server/node: ELF 64-bit LSB executable, x86-64, version 1 (GNU/Linux), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 3.2.0, BuildID[sha1]=e034c66a6dc897492d6ecae6c6b046669f94ad96, with debug_info, not stripped, too many notes (256) ``` I am not commiting the benchmarks as I bumped into some rough edges with criterion and I am not sure if it's maintained anymore. Might be worth adding some benchmarks later on. Test Plan ========= Compared the unwind info before and after this change for a bunch of executables. --- src/main.rs | 29 +++++-- src/profiler.rs | 12 +-- src/unwind_info.rs | 212 ++++++++++++++++++++++++--------------------- 3 files changed, 140 insertions(+), 113 deletions(-) diff --git a/src/main.rs b/src/main.rs index 1477292..4aec28c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,7 +20,10 @@ use tracing_subscriber::FmtSubscriber; use lightswitch::collector::Collector; use lightswitch::object::ObjectFile; use lightswitch::profiler::Profiler; -use lightswitch::unwind_info::{compact_printing_callback, UnwindInfoBuilder}; +use lightswitch::unwind_info::in_memory_unwind_info; +use lightswitch::unwind_info::remove_redundant; +use lightswitch::unwind_info::remove_unnecesary_markers; +use lightswitch::unwind_info::UnwindInfoBuilder; const SAMPLE_FREQ_RANGE: RangeInclusive = 1..=1009; @@ -125,6 +128,25 @@ fn main() -> Result<(), Box> { let args = Cli::parse(); + if let Some(path) = args.show_unwind_info { + let mut unwind_info = in_memory_unwind_info(&path).unwrap(); + remove_unnecesary_markers(&mut unwind_info); + remove_redundant(&mut unwind_info); + + for compact_row in unwind_info { + let pc = compact_row.pc; + let cfa_type = compact_row.cfa_type; + let rbp_type = compact_row.rbp_type; + let cfa_offset = compact_row.cfa_offset; + let rbp_offset = compact_row.rbp_offset; + println!( + "pc: {:x} cfa_type: {:<2} rbp_type: {:<2} cfa_offset: {:<4} rbp_offset: {:<4}", + pc, cfa_type, rbp_type, cfa_offset, rbp_offset + ); + } + return Ok(()); + } + let subscriber = FmtSubscriber::builder() .with_max_level(if args.filter_logs { Level::TRACE @@ -137,11 +159,6 @@ fn main() -> Result<(), Box> { tracing::subscriber::set_global_default(subscriber).expect("setting default subscriber failed"); - if let Some(path) = args.show_unwind_info { - UnwindInfoBuilder::with_callback(&path, compact_printing_callback)?.process()?; - return Ok(()); - } - if let Some(path) = args.show_info { let objet_file = ObjectFile::new(&PathBuf::from(path.clone())).unwrap(); println!("build id {:?}", objet_file.build_id()); diff --git a/src/profiler.rs b/src/profiler.rs index 76b1419..4d7112e 100644 --- a/src/profiler.rs +++ b/src/profiler.rs @@ -752,17 +752,9 @@ impl Profiler<'_> { } span.exit(); - let span: span::EnteredSpan = span!(Level::DEBUG, "sort unwind info").entered(); - found_unwind_info.sort_by(|a, b| { - let a_pc = a.pc; - let b_pc = b.pc; - a_pc.cmp(&b_pc) - }); - span.exit(); - let span: span::EnteredSpan = span!(Level::DEBUG, "optimize unwind info").entered(); - let found_unwind_info = remove_unnecesary_markers(&found_unwind_info); - let found_unwind_info = remove_redundant(&found_unwind_info); + remove_unnecesary_markers(&mut found_unwind_info); + remove_redundant(&mut found_unwind_info); span.exit(); debug!( diff --git a/src/unwind_info.rs b/src/unwind_info.rs index b5d4a63..9f02cee 100644 --- a/src/unwind_info.rs +++ b/src/unwind_info.rs @@ -9,7 +9,7 @@ use thiserror::Error; use crate::bpf::profiler_bindings::stack_unwind_row_t; use anyhow::anyhow; -use tracing::error; +use tracing::{error, span, Level}; #[repr(u8)] pub enum CfaType { @@ -252,8 +252,8 @@ impl<'a> UnwindInfoBuilder<'a> { let eh_frame = EhFrame::new(eh_frame_data, endian); let mut entries_iter = eh_frame.entries(&bases); - let mut ctx = Box::new(UnwindContext::new()); let mut cur_cie = None; + let mut pc_and_fde_offset = Vec::new(); while let Ok(Some(entry)) = entries_iter.next() { match entry { @@ -261,7 +261,7 @@ impl<'a> UnwindInfoBuilder<'a> { cur_cie = Some(cie); } CieOrFde::Fde(partial_fde) => { - if let Ok(fde) = partial_fde.parse(|eh_frame, bases, cie_offset| { + let fde = partial_fde.parse(|eh_frame, bases, cie_offset| { if let Some(cie) = &cur_cie { if cie.offset() == cie_offset.0 { return Ok(cie.clone()); @@ -272,111 +272,125 @@ impl<'a> UnwindInfoBuilder<'a> { cur_cie = Some(cie.clone()); } cie - }) { - (self.callback)(&UnwindData::Function( - fde.initial_address(), - fde.initial_address() + fde.len(), - )); - - let mut table = fde.rows(&eh_frame, &bases, &mut ctx)?; - - loop { - let mut compact_row = CompactUnwindRow::default(); - - match table.next_row() { - Ok(None) => break, - Ok(Some(row)) => { - compact_row.pc = row.start_address(); - match row.cfa() { - CfaRule::RegisterAndOffset { register, offset } => { - if register == &RBP_X86 { - compact_row.cfa_type = - CfaType::FramePointerOffset as u8; - } else if register == &RSP_X86 { - compact_row.cfa_type = - CfaType::StackPointerOffset as u8; - } else { - compact_row.cfa_type = - CfaType::UnsupportedRegisterOffset as u8; - } - - match u16::try_from(*offset) { - Ok(off) => { - compact_row.cfa_offset = off; - } - Err(_) => { - compact_row.cfa_type = - CfaType::OffsetDidNotFit as u8; - } - } - } - CfaRule::Expression(exp) => { - let found_expression = exp.0.slice(); - - if found_expression == *PLT1 { - compact_row.cfa_offset = PltType::Plt1 as u16; - } else if found_expression == *PLT2 { - compact_row.cfa_offset = PltType::Plt2 as u16; - } - - compact_row.cfa_type = CfaType::Expression as u8; - } - }; - - match row.register(RBP_X86) { - gimli::RegisterRule::Undefined => {} - gimli::RegisterRule::Offset(offset) => { - compact_row.rbp_type = RbpType::CfaOffset as u8; - compact_row.rbp_offset = - i16::try_from(offset).expect("convert rbp offset"); - } - gimli::RegisterRule::Register(_reg) => { - compact_row.rbp_type = RbpType::Register as u8; - } - gimli::RegisterRule::Expression(_) => { - compact_row.rbp_type = RbpType::Expression as u8; - } - _ => { - // print!(", rbp unsupported {:?}", rbp); - } - } + }); + + if let Ok(fde) = fde { + pc_and_fde_offset.push((fde.initial_address(), fde.offset())); + } + } + } + } - if row.register(fde.cie().return_address_register()) - == gimli::RegisterRule::Undefined - { - compact_row.rbp_type = - RbpType::UndefinedReturnAddress as u8; + let span = span!(Level::DEBUG, "sort pc and fdes").entered(); + pc_and_fde_offset.sort_by_key(|(pc, _)| *pc); + span.exit(); + + let mut ctx = Box::new(UnwindContext::new()); + for (_, fde_offset) in pc_and_fde_offset { + let fde = eh_frame.fde_from_offset( + &bases, + gimli::EhFrameOffset(fde_offset), + EhFrame::cie_from_offset, + )?; + + (self.callback)(&UnwindData::Function( + fde.initial_address(), + fde.initial_address() + fde.len(), + )); + + let mut table = fde.rows(&eh_frame, &bases, &mut ctx)?; + + loop { + let mut compact_row = CompactUnwindRow::default(); + + match table.next_row() { + Ok(None) => break, + Ok(Some(row)) => { + compact_row.pc = row.start_address(); + match row.cfa() { + CfaRule::RegisterAndOffset { register, offset } => { + if register == &RBP_X86 { + compact_row.cfa_type = CfaType::FramePointerOffset as u8; + } else if register == &RSP_X86 { + compact_row.cfa_type = CfaType::StackPointerOffset as u8; + } else { + compact_row.cfa_type = CfaType::UnsupportedRegisterOffset as u8; + } + + match u16::try_from(*offset) { + Ok(off) => { + compact_row.cfa_offset = off; + } + Err(_) => { + compact_row.cfa_type = CfaType::OffsetDidNotFit as u8; } + } + } + CfaRule::Expression(exp) => { + let found_expression = exp.0.slice(); - // print!(", ra {:?}", row.register(fde.cie().return_address_register())); + if found_expression == *PLT1 { + compact_row.cfa_offset = PltType::Plt1 as u16; + } else if found_expression == *PLT2 { + compact_row.cfa_offset = PltType::Plt2 as u16; } - _ => continue, + + compact_row.cfa_type = CfaType::Expression as u8; } + }; - (self.callback)(&UnwindData::Instruction(compact_row)); + match row.register(RBP_X86) { + gimli::RegisterRule::Undefined => {} + gimli::RegisterRule::Offset(offset) => { + compact_row.rbp_type = RbpType::CfaOffset as u8; + compact_row.rbp_offset = + i16::try_from(offset).expect("convert rbp offset"); + } + gimli::RegisterRule::Register(_reg) => { + compact_row.rbp_type = RbpType::Register as u8; + } + gimli::RegisterRule::Expression(_) => { + compact_row.rbp_type = RbpType::Expression as u8; + } + _ => { + // print!(", rbp unsupported {:?}", rbp); + } } - // start_addresses.push(fde.initial_address() as u32); - // end_addresses.push((fde.initial_address() + fde.len()) as u32); + + if row.register(fde.cie().return_address_register()) + == gimli::RegisterRule::Undefined + { + compact_row.rbp_type = RbpType::UndefinedReturnAddress as u8; + } + + // print!(", ra {:?}", row.register(fde.cie().return_address_register())); } + _ => continue, } + + (self.callback)(&UnwindData::Instruction(compact_row)); } + // start_addresses.push(fde.initial_address() as u32); + // end_addresses.push((fde.initial_address() + fde.len()) as u32); } Ok(()) } } -// Must be sorted. Also, not very optimized as of now. -pub fn remove_unnecesary_markers(info: &Vec) -> Vec { - let mut unwind_info = Vec::with_capacity(info.len()); +// Must be sorted. +pub fn remove_unnecesary_markers(unwind_info: &mut Vec) { let mut last_row: Option = None; + let mut new_i: usize = 0; + + for i in 0..unwind_info.len() { + let row = unwind_info[i]; - for row in info { if let Some(last_row_unwrapped) = last_row { let previous_is_redundant_marker = (last_row_unwrapped.cfa_type == CfaType::EndFdeMarker as u8) && last_row_unwrapped.pc == row.pc; if previous_is_redundant_marker { - unwind_info.pop(); + new_i -= 1; } } @@ -387,22 +401,25 @@ pub fn remove_unnecesary_markers(info: &Vec) -> Vec) -> Vec { - let mut unwind_info = Vec::with_capacity(info.len()); +// Must be sorted. +pub fn remove_redundant(unwind_info: &mut Vec) { let mut last_row: Option = None; + let mut new_i: usize = 0; - for row in info { + for i in 0..unwind_info.len() { let mut redundant = false; + let row = unwind_info[i]; + if let Some(last_row_unwrapped) = last_row { redundant = row.cfa_type == last_row_unwrapped.cfa_type && row.cfa_offset == last_row_unwrapped.cfa_offset @@ -411,17 +428,18 @@ pub fn remove_redundant(info: &Vec) -> Vec anyhow::Result> { - let mut unwind_info = Vec::new(); + let mut unwind_info: Vec = Vec::new(); let mut last_function_end_addr: Option = None; let mut last_row = None;