From 68951d29c31e817fb4a5d63129baaaa967d6812b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Mon, 10 Feb 2025 13:38:18 -0800 Subject: [PATCH] Add support for pre-caching process VMA metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users may legitimately be interested in being able to symbolize addresses inside a process even if the process has exited in the meantime. To support such use cases, introduce a new top-level API to the Symbolizer: the cache() method allows for caching various data pertaining a symbolization source. At this point we only support caching of process VMA metadata, but in the future we can support pre-parsing ELF and DWARF metadata and similar, which could speed up symbolization requests significantly. Refs: #433 Signed-off-by: Daniel Müller --- CHANGELOG.md | 2 + src/symbolize/cache.rs | 111 ++++++++++++++++++++++++++++++++++++ src/symbolize/mod.rs | 1 + src/symbolize/symbolizer.rs | 65 +++++++++++++++++++-- tests/suite/symbolize.rs | 40 +++++++++++++ 5 files changed, 213 insertions(+), 6 deletions(-) create mode 100644 src/symbolize/cache.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index e1c9a83e..cb745c59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ Unreleased ---------- - Added support for 32 bit ELF binaries +- Added `symbolize::Symbolizer::cache` method for caching process VMA + metadata - Renamed `symbolize::Source::kernel_image` to `vmlinux` - Adjusted kernel symbolization logic to give preference to `vmlinux` file, if present diff --git a/src/symbolize/cache.rs b/src/symbolize/cache.rs new file mode 100644 index 00000000..2c73cec0 --- /dev/null +++ b/src/symbolize/cache.rs @@ -0,0 +1,111 @@ +//! Definitions of symbolization caching targets. + +use std::fmt::Debug; +use std::fmt::Formatter; +use std::fmt::Result as FmtResult; + +use crate::Pid; + + +/// Configuration for caching of process-level data. +#[derive(Clone)] +pub struct Process { + /// The referenced process' ID. + pub pid: Pid, + /// Whether to cache the process' VMAs for later use. + /// + /// Caching VMAs can be useful, because it conceptually enables the + /// library to serve a symbolization request targeting a process + /// even if said process has since exited the system. + /// + /// Note that once VMAs have been cached this way, the library will + /// refrain from re-reading updated VMAs unless instructed to. + /// Hence, if you have reason to believe that a process may have + /// changed its memory regions (by loading a new shared object, for + /// example), you would have to make another request to cache them + /// yourself. + /// + /// Note furthermore that if you cache VMAs to later symbolize + /// addresses after the original process has already exited, you + /// will have to opt-out of usage of `/proc//map_files/` as + /// part of the symbolization request. Refer to + /// [`source::Process::map_files`][crate::symbolize::source::Process::map_files]. + pub cache_vmas: bool, + /// The struct is non-exhaustive and open to extension. + #[doc(hidden)] + pub _non_exhaustive: (), +} + +impl Process { + /// Create a new [`Process`] object using the provided `pid`. + /// + /// `cache_vmas` default to `true` when using this constructor. + #[inline] + pub fn new(pid: Pid) -> Self { + Self { + pid, + cache_vmas: true, + _non_exhaustive: (), + } + } +} + +impl Debug for Process { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + let Self { + pid, + cache_vmas: _, + _non_exhaustive: (), + } = self; + + f.debug_tuple(stringify!(Process)) + .field(&format_args!("{pid}")) + .finish() + } +} + +impl From for Cache<'static> { + #[inline] + fn from(process: Process) -> Self { + Self::Process(process) + } +} + + +/// A description of what data to use to cache in advance, so that +/// subsequent symbolization requests can be satisfied quicker. +#[derive(Clone)] +#[non_exhaustive] +pub enum Cache<'dat> { + /// Information about a process. + Process(Process), + #[doc(hidden)] + Phantom(&'dat ()), +} + +impl Debug for Cache<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + match self { + Self::Process(process) => Debug::fmt(process, f), + Self::Phantom(()) => unreachable!(), + } + } +} + + +#[cfg(test)] +mod tests { + use super::*; + + + /// Exercise the `Debug` representation of various types. + #[test] + fn debug_repr() { + let process = Process::new(Pid::Slf); + assert_eq!(format!("{process:?}"), "Process(self)"); + let process = Process::new(Pid::from(1234)); + assert_eq!(format!("{process:?}"), "Process(1234)"); + let cache = Cache::from(process); + assert_eq!(format!("{cache:?}"), "Process(1234)"); + } +} diff --git a/src/symbolize/mod.rs b/src/symbolize/mod.rs index 03e87d31..73b0e988 100644 --- a/src/symbolize/mod.rs +++ b/src/symbolize/mod.rs @@ -91,6 +91,7 @@ //! [`gsym-in-apk`](https://github.com/libbpf/blazesym/blob/main/examples/gsym-in-apk) //! example, which illustrates the basic workflow. +pub mod cache; pub mod source; mod symbolizer; diff --git a/src/symbolize/symbolizer.rs b/src/symbolize/symbolizer.rs index d116e093..3fcadae3 100644 --- a/src/symbolize/symbolizer.rs +++ b/src/symbolize/symbolizer.rs @@ -1,4 +1,6 @@ use std::borrow::Cow; +use std::cell::RefCell; +use std::collections::HashMap; use std::ffi::OsStr; use std::fmt::Debug; use std::fmt::Formatter; @@ -54,6 +56,8 @@ use crate::IntoError as _; use crate::Pid; use crate::Result; +use super::cache; +use super::cache::Cache; #[cfg(feature = "apk")] use super::source::Apk; #[cfg(feature = "breakpad")] @@ -396,6 +400,7 @@ impl Builder { gsym_cache: FileCache::builder().enable_auto_reload(auto_reload).build(), ksym_cache: FileCache::builder().enable_auto_reload(auto_reload).build(), perf_map_cache: FileCache::builder().enable_auto_reload(auto_reload).build(), + process_vma_cache: RefCell::new(HashMap::new()), process_cache: InsertMap::new(), find_sym_opts, demangle, @@ -627,6 +632,11 @@ pub struct Symbolizer { gsym_cache: FileCache>, ksym_cache: FileCache>, perf_map_cache: FileCache, + /// Cache of VMA data on per-process basis. + /// + /// This member is only populated by explicit requests for caching + /// data by the user. + process_vma_cache: RefCell>>, process_cache: InsertMap>>, find_sym_opts: FindSymOpts, demangle: bool, @@ -898,9 +908,6 @@ impl Symbolizer { perf_map: bool, map_files: bool, ) -> Result> { - let mut entry_iter = maps::parse_filtered(pid)?; - let entries = |_addr| entry_iter.next(); - let mut handler = SymbolizeHandler { symbolizer: self, pid, @@ -914,9 +921,27 @@ impl Symbolizer { addrs, |handler: &mut SymbolizeHandler<'_>| handler.all_symbols.as_mut_slice(), |sorted_addrs| -> Result> { - let () = - normalize_sorted_user_addrs_with_entries(sorted_addrs, entries, &mut handler)?; - Ok(handler) + if let Some(cached) = self.process_vma_cache.borrow().get(&pid) { + let mut entry_iter = cached.iter().map(Ok); + let entries = |_addr| entry_iter.next(); + + let () = normalize_sorted_user_addrs_with_entries( + sorted_addrs, + entries, + &mut handler, + )?; + Ok(handler) + } else { + let mut entry_iter = maps::parse_filtered(pid)?; + let entries = |_addr| entry_iter.next(); + + let () = normalize_sorted_user_addrs_with_entries( + sorted_addrs, + entries, + &mut handler, + )?; + Ok(handler) + } }, )?; Ok(handler.all_symbols) @@ -1022,6 +1047,34 @@ impl Symbolizer { )) } + /// Cache some or all information associated with a symbolization + /// source. + /// + /// Symbolization data is generally being cached when symbolization + /// is performed. However, sometimes it is necessary to cache data + /// early, for example to make subsequent symbolization requests as + /// fast running as possible. In rare instances it can also be a + /// matter of correctness. Process metadata such as VMAs and their + /// offsets can be cached so that even after the processes exited it + /// symbolization requests can still be satisfied. + #[cfg_attr(feature = "tracing", crate::log::instrument(skip_all, fields(cache = ?cache), err))] + pub fn cache(&self, cache: &Cache) -> Result<()> { + match cache { + Cache::Process(cache::Process { + pid, + cache_vmas, + _non_exhaustive: (), + }) => { + if *cache_vmas { + let parsed = maps::parse_filtered(*pid)?.collect::>>()?; + let _prev = self.process_vma_cache.borrow_mut().insert(*pid, parsed); + } + } + Cache::Phantom(()) => unreachable!(), + } + Ok(()) + } + /// Symbolize a list of addresses. /// /// Symbolize a list of addresses using the provided symbolization diff --git a/tests/suite/symbolize.rs b/tests/suite/symbolize.rs index 5c23a3a6..34f94431 100644 --- a/tests/suite/symbolize.rs +++ b/tests/suite/symbolize.rs @@ -16,6 +16,7 @@ use std::process; use blazesym::helper::ElfResolver; use blazesym::inspect; use blazesym::normalize; +use blazesym::symbolize::cache; use blazesym::symbolize::source::Breakpad; use blazesym::symbolize::source::Elf; use blazesym::symbolize::source::GsymData; @@ -788,6 +789,45 @@ fn symbolize_process_in_mount_namespace() { }); } +/// Check that we can symbolize addresses from a process that has +/// already exited, based on VMA data cached earlier. +#[cfg(linux)] +#[test] +fn symbolize_process_exited_cached_vmas() { + let test_so = Path::new(&env!("CARGO_MANIFEST_DIR")) + .join("data") + .join("libtest-so.so"); + let wait = Path::new(&env!("CARGO_MANIFEST_DIR")) + .join("data") + .join("test-wait.bin"); + + let symbolizer = Symbolizer::new(); + + let (pid, addr) = RemoteProcess::default() + .arg(&test_so) + .exec(&wait, |pid, addr| { + // Cache VMA information about the process while it is alive. + let () = symbolizer + .cache(&cache::Cache::from(cache::Process::new(pid))) + .unwrap(); + (pid, addr) + }); + + // By now the process is guaranteed to be dead (modulo PID reuse...). + let mut process = Process::new(pid); + // We need to opt out of map file usage, because those files will no + // longer be present with the process having exited. + process.map_files = false; + + let src = Source::Process(process); + let result = symbolizer + .symbolize_single(&src, Input::AbsAddr(addr)) + .unwrap() + .into_sym() + .unwrap(); + assert_eq!(result.name, "await_input"); +} + /// Check that we can symbolize an address residing in a zip archive. #[test] fn symbolize_process_zip() {