Skip to content

Commit

Permalink
Experimental unwinding - WIP
Browse files Browse the repository at this point in the history
Ensure we use elf addresses instead of absolute addresses
  • Loading branch information
r1viollet committed Apr 13, 2024
1 parent da416ea commit db5f51f
Show file tree
Hide file tree
Showing 5 changed files with 188 additions and 7 deletions.
8 changes: 4 additions & 4 deletions include/async-profiler/codeCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#ifndef _CODECACHE_H
#define _CODECACHE_H

// #include <jvmti.h>
#include <stdint.h>

#define NO_MIN_ADDRESS ((const void *)-1)
#define NO_MAX_ADDRESS ((const void *)0)
Expand Down Expand Up @@ -78,7 +78,7 @@ class CodeCache {
short _lib_index;
const void *_min_address;
const void *_max_address;
const char *_text_base;
const void *_text_base;

void **_got_start;
void **_got_end;
Expand Down Expand Up @@ -113,7 +113,7 @@ class CodeCache {

void setTextBase(const char *text_base) { _text_base = text_base; }

const char *getTextBase() { return _text_base; }
const void *getTextBase() { return _text_base; }

void **gotStart() const { return _got_start; }

Expand All @@ -136,7 +136,7 @@ class CodeCache {
void makeGotPatchable();

void setDwarfTable(FrameDesc *table, int length);
FrameDesc *findFrameDesc(const void *pc);
FrameDesc *findFrameDesc(uint64_t elf_address);
};

class CodeCacheArray {
Expand Down
8 changes: 6 additions & 2 deletions src/async-profiler/codeCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,13 @@
#include "codeCache.h"
#include "dwarf.h"
#include "os.h"

#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <limits>
#include <cassert>

char *NativeFunc::create(const char *name, short lib_index) {
NativeFunc *f = (NativeFunc *)malloc(sizeof(NativeFunc) + 1 + strlen(name));
Expand Down Expand Up @@ -216,8 +219,9 @@ void CodeCache::setDwarfTable(FrameDesc *table, int length) {
_dwarf_table_length = length;
}

FrameDesc *CodeCache::findFrameDesc(const void *pc) {
u32 target_loc = (const char *)pc - _text_base;
FrameDesc *CodeCache::findFrameDesc(uintptr_t elf_address) {
assert(elf_address < std::numeric_limits<u32>::max());
const u32 target_loc = (const u32)elf_address;
int low = 0;
int high = _dwarf_table_length - 1;

Expand Down
2 changes: 1 addition & 1 deletion src/async-profiler/stackWalker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer,
CodeCacheArray *cache) {
FrameDesc *f;
CodeCache *cc = findLibraryByAddress(cache, sc.pc);
if (cc == NULL || (f = cc->findFrameDesc(sc.pc)) == NULL) {
if (cc == NULL || (f = cc->findFrameDesc(static_cast<const char*>(sc.pc) - static_cast<const char*>(cc->getTextBase()))) == NULL) {
f = &FrameDesc::default_frame;
}
// const char *sym = cc?cc->binarySearch(sc.pc):"unknown";
Expand Down
3 changes: 3 additions & 0 deletions src/async-profiler/symbols_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,9 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array,
printf("offset from get_elf_offset: %lx \n", elf_offset);
printf("last readable: %lx \n", last_readable_base);
}
else {
printf("Failed to read elf offsets \n");
}

// Do not parse the same executable twice, e.g. on Alpine Linux
if (parsed_inodes.insert(map.dev() | inode << 16).second) {
Expand Down
174 changes: 174 additions & 0 deletions test/dwarf_unwind-ut.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#include <gtest/gtest.h>

#include "savecontext.hpp"
#include "stackWalker.h"
#include "unwind_state.hpp"

#include <array>

#include "async-profiler/codeCache.h"
#include "async-profiler/stack_context.h"
#include "async-profiler/symbols.h"

// Retrieves instruction pointer
#define _THIS_IP_ \
({ \
__label__ __here; \
__here: \
(unsigned long)&&__here; \
})

// #include "ddprof_defs.hpp"

// temp copy pasta
#define PERF_SAMPLE_STACK_SIZE (4096UL * 8)

std::byte stack[PERF_SAMPLE_STACK_SIZE];

DDPROF_NOINLINE size_t funcA(std::array<uint64_t, ddprof::k_perf_register_count> &regs);
DDPROF_NOINLINE size_t funcB(std::array<uint64_t, ddprof::k_perf_register_count> &regs);

size_t funcB(std::array<uint64_t, ddprof::k_perf_register_count> &regs) {
printf("dwarf_unwind-ut:%s %lx \n", __FUNCTION__, _THIS_IP_);
std::span<const std::byte> bounds = ddprof::retrieve_stack_bounds();
size_t size = ddprof::save_context(bounds, regs, stack);

return size;
}

size_t funcA(std::array<uint64_t, ddprof::k_perf_register_count> &regs) {
printf("dwarf_unwind-ut:%s %lx \n", __FUNCTION__, _THIS_IP_);
return funcB(regs);
}

TEST(dwarf_unwind, simple) {
CodeCacheArray cache_arary;
// Load libraries
Symbols::parsePidLibraries(getpid(), &cache_arary, false);
std::array<uint64_t, ddprof::k_perf_register_count> regs;
size_t size_stack = funcA(regs);
EXPECT_TRUE(size_stack);

ap::StackContext sc = ap::from_regs(std::span(regs));
ap::StackBuffer buffer(stack, sc.sp, sc.sp + size_stack);

void *callchain[128];
int n = stackWalk(&cache_arary, sc, buffer,
const_cast<const void **>(callchain), 128, 0);
const char *syms[128];
for (int i = 0; i < n; ++i) {
{ // retrieve symbol
CodeCache *code_cache = findLibraryByAddress(
&cache_arary, reinterpret_cast<void *>(callchain[i]));
if (code_cache) {
syms[i] = code_cache->binarySearch(callchain[i]);
printf("IP = %p - %s\n", callchain[i], syms[i]);
}
}
}

// Check that we found the expected functions during unwinding
ASSERT_TRUE(std::string(syms[0]).find("save_context") != std::string::npos);
ASSERT_TRUE(std::string(syms[1]).find("funcB") != std::string::npos);
ASSERT_TRUE(std::string(syms[2]).find("funcA") != std::string::npos);
}

#ifdef ALLOC_TRACKER
#include "allocation_tracker.hpp"
#include "perf_ringbuffer.hpp"
#include "ringbuffer_holder.hpp"
#include "ringbuffer_utils.hpp"
#include <span>
#include "defer.hpp"

namespace ddprof {
static const uint64_t kSamplingRate = 1;

DDPROF_NOINLINE void func_save_sleep(size_t size);
DDPROF_NOINLINE void func_intermediate_0(size_t size);
DDPROF_NOINLINE void func_intermediate_1(size_t size);

DDPROF_NOINLINE void func_save_sleep(size_t size) {
ddprof::TrackerThreadLocalState *tl_state = AllocationTracker::get_tl_state();
assert(tl_state);
int i = 0;
while (++i < 100000) {

ddprof::AllocationTracker::track_allocation_s(0xdeadbeef, size, *tl_state);
// prevent tail call optimization
getpid();
usleep(100);
// printf("Save context nb -- %d \n", i);
}
}

void func_intermediate_0(size_t size) { func_intermediate_1(size); }

void func_intermediate_1(size_t size) { func_save_sleep(size); }

TEST(dwarf_unwind, remote) {
const uint64_t rate = 1;
const size_t buf_size_order = 5;
ddprof::RingBufferHolder ring_buffer{buf_size_order,
RingBufferType::kMPSCRingBuffer};
AllocationTracker::allocation_tracking_init(
kSamplingRate,
AllocationTracker::kDeterministicSampling |
AllocationTracker::kTrackDeallocations,
k_default_perf_stack_sample_size, ring_buffer.get_buffer_info(), {});
defer { AllocationTracker::allocation_tracking_free(); };

// Fork
pid_t temp_pid = fork();
if (!temp_pid) {
func_intermediate_0(10);
// char *const argList[] = {"sleep", "10", nullptr};
// execvp("sleep", argList);
return;
}

// Load libraries from the fork - Cache array is relent to a single pid
CodeCacheArray cache_arary;
sleep(1);
Symbols::parsePidLibraries(temp_pid, &cache_arary, false);
// Establish a ring buffer ?

ddprof::MPSCRingBufferReader reader{&ring_buffer.get_ring_buffer()};
ASSERT_GT(reader.available_size(), 0);

auto buf = reader.read_sample();
ASSERT_FALSE(buf.empty());
const perf_event_header *hdr =
reinterpret_cast<const perf_event_header *>(buf.data());
ASSERT_EQ(hdr->type, PERF_RECORD_SAMPLE);

// convert based on mask for this watcher (default in this case)
perf_event_sample *sample = hdr2samp(hdr, ddprof::perf_event_default_sample_type());

std::span<const uint64_t, ddprof::k_perf_register_count> regs_span{sample->regs, ddprof::k_perf_register_count};
ap::StackContext sc = ap::from_regs(regs_span);
std::span<const std::byte> stack{
reinterpret_cast<const std::byte *>(sample->data_stack), sample->size_stack};
ap::StackBuffer buffer(stack, sc.sp, sc.sp + sample->size_stack);

void *callchain[ddprof::kMaxStackDepth];
int n =
stackWalk(&cache_arary, sc, buffer, const_cast<const void **>(callchain),
ddprof::kMaxStackDepth, 0);

std::array<const char *, ddprof::kMaxStackDepth> syms;
for (int i = 0; i < n; ++i) {
{ // retrieve symbol
CodeCache *code_cache = findLibraryByAddress(
&cache_arary, reinterpret_cast<void *>(callchain[i]));
if (code_cache) {
syms[i] = code_cache->binarySearch(callchain[i]);
printf("IP = %p - %s\n", callchain[i], syms[i]);
}
}
// cleanup the producer fork
kill(temp_pid, SIGTERM);
}
}
}
#endif

0 comments on commit db5f51f

Please sign in to comment.