diff --git a/Cargo.toml b/Cargo.toml index 57851d0a..d761cf5b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -151,6 +151,7 @@ anyhow = "1.0.71" blazesym-dev = {path = "dev", features = ["generate-unit-test-files"]} # TODO: Use 0.5.2 once released. criterion = {git = "https://github.com/bheisler/criterion.rs.git", rev = "b913e232edd98780961ecfbae836ec77ede49259", default-features = false, features = ["rayon", "cargo_bench_support"]} +rand = {version = "0.9", default-features = false, features = ["std", "thread_rng"]} scopeguard = "1.2" stats_alloc = {version = "0.1.1", features = ["nightly"]} tempfile = "3.4" diff --git a/capi/src/symbolize.rs b/capi/src/symbolize.rs index c24c4d81..de6c0f32 100644 --- a/capi/src/symbolize.rs +++ b/capi/src/symbolize.rs @@ -171,6 +171,7 @@ impl From for Kernel { Self { kallsyms: to_maybe_path(kallsyms), vmlinux: to_maybe_path(vmlinux), + kaslr_offset: None, debug_syms, _non_exhaustive: (), } diff --git a/src/elf/mod.rs b/src/elf/mod.rs index d82d464e..0bbf1126 100644 --- a/src/elf/mod.rs +++ b/src/elf/mod.rs @@ -11,7 +11,6 @@ pub(crate) mod types; // of concerns that is not a workable location. pub(crate) static DEFAULT_DEBUG_DIRS: &[&str] = &["/usr/lib/debug", "/lib/debug/"]; -#[cfg(test)] pub(crate) use parser::BackendImpl; pub(crate) use parser::ElfParser; pub(crate) use resolver::ElfResolverData; diff --git a/src/elf/parser.rs b/src/elf/parser.rs index e1f4752a..9085a5d2 100644 --- a/src/elf/parser.rs +++ b/src/elf/parser.rs @@ -887,7 +887,6 @@ where _backend: B::ObjTy, } -#[cfg(test)] impl ElfParser { fn open_file_io

(file: File, path: P) -> Self where diff --git a/src/kernel/kaslr.rs b/src/kernel/kaslr.rs index 2b5c1903..980e0807 100644 --- a/src/kernel/kaslr.rs +++ b/src/kernel/kaslr.rs @@ -1,20 +1,14 @@ -use std::error::Error as StdError; use std::fs::File; -use std::io; -use std::io::Read as _; -use std::path::Path; use std::str; -use std::str::FromStr; use crate::elf; use crate::elf::types::ElfN_Nhdr; use crate::elf::BackendImpl; use crate::elf::ElfParser; +use crate::log; use crate::util::align_up_u32; use crate::util::from_radix_16; use crate::util::split_bytes; -use crate::Addr; -use crate::Error; use crate::ErrorExt as _; use crate::ErrorKind; use crate::IntoError as _; @@ -112,6 +106,19 @@ fn find_kcore_kaslr_offset() -> Result> { Ok(offset) } +pub(crate) fn find_kalsr_offset() -> Result> { + if let offset @ Some(o) = find_kcore_kaslr_offset()? { + log::debug!("determined KASLR offset to be {o:#x} based on {PROC_KCORE} contents"); + return Ok(offset) + } + + // TODO: Try other methods of determining KASLR offset, including + // comparisons between `/proc/kallsyms` values to + // `System.map-*` contents or parsing `dmesg` (no, really...) + + Ok(None) +} + #[cfg(test)] mod tests { diff --git a/src/kernel/mod.rs b/src/kernel/mod.rs index 91e39b8f..f58f7d38 100644 --- a/src/kernel/mod.rs +++ b/src/kernel/mod.rs @@ -1,11 +1,8 @@ #[cfg(feature = "bpf")] mod bpf; +mod kaslr; mod ksym; mod resolver; -// Still work in progress. -#[allow(unused)] -#[cfg(test)] -mod kaslr; // TODO: KsymResolver should ideally be an implementation detail. pub(crate) use ksym::KsymResolver; diff --git a/src/kernel/resolver.rs b/src/kernel/resolver.rs index 89e5d94c..8bddabaf 100644 --- a/src/kernel/resolver.rs +++ b/src/kernel/resolver.rs @@ -11,20 +11,25 @@ use crate::symbolize::ResolvedSym; use crate::symbolize::Symbolize; use crate::Addr; use crate::Error; +use crate::ErrorExt as _; +use crate::IntoError as _; use crate::Result; +use super::kaslr::find_kalsr_offset; use super::ksym::KsymResolver; pub(crate) struct KernelResolver { ksym_resolver: Option>, elf_resolver: Option>, + kaslr_offset: u64, } impl KernelResolver { pub(crate) fn new( ksym_resolver: Option>, elf_resolver: Option>, + kaslr_offset: Option, ) -> Result { if ksym_resolver.is_none() && elf_resolver.is_none() { return Err(Error::with_not_found( @@ -32,17 +37,35 @@ impl KernelResolver { )) } + let kaslr_offset = if let Some(kaslr_offset) = kaslr_offset { + kaslr_offset + } else { + find_kalsr_offset() + .context("failed to query system KASLR offset")? + .unwrap_or_default() + }; + Ok(KernelResolver { ksym_resolver, elf_resolver, + kaslr_offset, }) } } impl Symbolize for KernelResolver { fn find_sym(&self, addr: Addr, opts: &FindSymOpts) -> Result, Reason>> { + let elf_addr = || { + addr.checked_sub(self.kaslr_offset).ok_or_invalid_input(|| { + format!( + "address {addr:#x} is less then KASLR offset ({:#x})", + self.kaslr_offset + ) + }) + }; + match (self.elf_resolver.as_ref(), self.ksym_resolver.as_ref()) { - (Some(elf_resolver), None) => elf_resolver.find_sym(addr, opts), + (Some(elf_resolver), None) => elf_resolver.find_sym(elf_addr()?, opts), (None, Some(ksym_resolver)) => ksym_resolver.find_sym(addr, opts), (Some(elf_resolver), Some(ksym_resolver)) => { // We give preference to vmlinux, because it is likely @@ -50,7 +73,7 @@ impl Symbolize for KernelResolver { // address, though, we fall back to kallsyms. This is // helpful for example for kernel modules, which // naturally are not captured by vmlinux. - let result = elf_resolver.find_sym(addr, opts)?; + let result = elf_resolver.find_sym(elf_addr()?, opts)?; if result.is_ok() { Ok(result) } else { @@ -95,7 +118,7 @@ mod tests { #[test] fn debug_repr() { let ksym = Rc::new(KsymResolver::load_file_name(Path::new(KALLSYMS)).unwrap()); - let kernel = KernelResolver::new(Some(ksym), None).unwrap(); + let kernel = KernelResolver::new(Some(ksym), None, Some(0)).unwrap(); assert_ne!(format!("{kernel:?}"), ""); } } diff --git a/src/symbolize/source.rs b/src/symbolize/source.rs index 24e1bbac..0ed58c0a 100644 --- a/src/symbolize/source.rs +++ b/src/symbolize/source.rs @@ -194,6 +194,13 @@ pub struct Kernel { /// `vmlinux` will generally be given preference and `kallsyms` acts /// as a fallback. pub vmlinux: MaybeDefault, + /// The KASLR offset to use. + /// + /// Given a value of `None`, the library will attempt to deduce the + /// offset itself. Note that this value only has relevance when a + /// kernel image is used for symbolization, because `kallsyms` based + /// data already include randomization adjusted addresses. + pub kaslr_offset: Option, /// Whether or not to consult debug symbols from `vmlinux` to /// satisfy the request (if present). /// @@ -212,6 +219,7 @@ impl Default for Kernel { Self { kallsyms: MaybeDefault::Default, vmlinux: MaybeDefault::Default, + kaslr_offset: None, debug_syms: true, _non_exhaustive: (), } diff --git a/src/symbolize/symbolizer.rs b/src/symbolize/symbolizer.rs index 1d8f3849..5186026d 100644 --- a/src/symbolize/symbolizer.rs +++ b/src/symbolize/symbolizer.rs @@ -942,6 +942,7 @@ impl Symbolizer { let Kernel { kallsyms, vmlinux, + kaslr_offset, debug_syms, _non_exhaustive: (), } = src; @@ -992,7 +993,10 @@ impl Symbolizer { .elf_cache .elf_resolver(&vmlinux, self.maybe_debug_dirs(*debug_syms)); match result { - Ok(resolver) => Some(resolver), + Ok(resolver) => { + log::debug!("found suitable vmlinux file `{}`", vmlinux.display()); + Some(resolver) + } Err(err) => { log::warn!( "failed to load vmlinux `{}`: {err}; ignoring...", @@ -1008,7 +1012,7 @@ impl Symbolizer { MaybeDefault::None => None, }; - KernelResolver::new(ksym_resolver.cloned(), elf_resolver.cloned()) + KernelResolver::new(ksym_resolver.cloned(), elf_resolver.cloned(), *kaslr_offset) } #[cfg(not(linux))] diff --git a/tests/suite/symbolize.rs b/tests/suite/symbolize.rs index dc90e6ba..748564e1 100644 --- a/tests/suite/symbolize.rs +++ b/tests/suite/symbolize.rs @@ -2,6 +2,7 @@ use std::env; use std::ffi::OsStr; use std::fs::copy; use std::fs::read as read_file; +use std::fs::File; use std::io; use std::io::Read as _; use std::io::Write as _; @@ -47,6 +48,7 @@ use blazesym::__private::find_the_answer_fn_in_zip; #[cfg(linux)] use blazesym_dev::with_bpf_symbolization_target_addrs; +use rand::Rng as _; use scopeguard::defer; use tempfile::tempdir; @@ -1024,6 +1026,7 @@ fn symbolize_kernel_no_valid_source() { let kernel = Kernel { kallsyms: MaybeDefault::None, vmlinux: MaybeDefault::None, + kaslr_offset: Some(0), ..Default::default() }; let src = Source::Kernel(kernel); @@ -1051,6 +1054,7 @@ fn symbolize_kernel_kallsyms() { .join("kallsyms"), ), vmlinux, + kaslr_offset: Some(0), ..Default::default() }; let src = Source::Kernel(kernel); @@ -1107,6 +1111,7 @@ fn symbolize_kernel_vmlinux() { .join("data") .join("test-stable-addrs.bin"), ), + kaslr_offset: Some(0), debug_syms: true, ..Default::default() }; @@ -1130,12 +1135,84 @@ fn symbolize_kernel_vmlinux() { test(src.clone(), false); } +/// Test symbolization of a kernel address using vmlinux and the system +/// KASLR state. +#[test] +#[ignore = "test requires discoverable vmlinux file present"] +fn symbolize_kernel_system_vmlinux() { + fn find_kernel_syms() -> Vec<(Addr, String)> { + let mut file = File::open("/proc/kallsyms").unwrap(); + let mut content = String::new(); + let _cnt = file.read_to_string(&mut content).unwrap(); + let pairs = content + .lines() + .filter_map(|line| { + let [addr, ty, name] = line + .split_ascii_whitespace() + .collect::>() + .get(0..3)? + .try_into() + .unwrap(); + if !["T", "t"].contains(&ty) { + return None + } + let addr = Addr::from_str_radix(addr, 16).unwrap(); + Some((addr, name)) + }) + .collect::>(); + + let mut rng = rand::rng(); + let pairs = (0..20) + .map(|_| { + let idx = rng.random_range(0..pairs.len()); + let addr = pairs[idx].0; + let name = pairs[idx].1; + (addr, name.to_string()) + }) + .collect::>(); + + pairs + } + + let syms = find_kernel_syms(); + let kernel = Kernel { + kallsyms: MaybeDefault::None, + vmlinux: MaybeDefault::Default, + kaslr_offset: None, + ..Default::default() + }; + let src = Source::Kernel(kernel); + let symbolizer = Symbolizer::new(); + let symbolized = symbolizer + .symbolize( + &src, + Input::AbsAddr( + syms.iter() + .map(|(addr, _name)| *addr) + .collect::>() + .as_slice(), + ), + ) + .unwrap(); + assert_eq!(symbolized.len(), syms.len()); + for (i, sym) in symbolized.iter().enumerate() { + let sym = sym.as_sym().unwrap(); + assert_eq!(sym.name, syms[i].1, "{sym:?} | {:?}", syms[i]); + } +} + /// Test symbolization of a kernel address inside a BPF program. #[cfg(linux)] #[test] fn symbolize_kernel_bpf_program() { with_bpf_symbolization_target_addrs(|handle_getpid, subprogram| { - let src = Source::Kernel(Kernel::default()); + let kernel = Kernel { + // KASLR offset shouldn't have any effect for BPF program + // symbolization. + kaslr_offset: Some(u64::MAX), + ..Default::default() + }; + let src = Source::Kernel(kernel); let symbolizer = Symbolizer::new(); let result = symbolizer .symbolize(&src, Input::AbsAddr(&[handle_getpid, subprogram]))