Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ongoing] Experimental unwinding feature #405

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,18 @@ add_subdirectory(src/event_parser)
# elfutils
include(Findelfutils)

# -- Async profiler --
set(ASYNC_PROFILER_LIB_DIR ${CMAKE_SOURCE_DIR})
set(ASYNC_PROFILER_SRC_DIR ${ASYNC_PROFILER_LIB_DIR}/src/async-profiler)
set(ASYNC_PROFILER_LIB_INCLUDE ${ASYNC_PROFILER_LIB_DIR}/include/async-profiler)
aux_source_directory(${ASYNC_PROFILER_SRC_DIR} ASYNC_PROFILER_SOURCES)
add_library(async_prof_lib STATIC ${ASYNC_PROFILER_SOURCES})
target_include_directories(async_prof_lib PUBLIC ${ASYNC_PROFILER_LIB_INCLUDE}
${CMAKE_SOURCE_DIR}/include)
target_link_libraries(async_prof_lib PRIVATE dw elf Threads::Threads)
set_property(TARGET async_prof_lib PROPERTY POSITION_INDEPENDENT_CODE ON)
add_library(DDProf::AsyncProf ALIAS async_prof_lib)

# ---- Static analysis ----
include(ClangTidy)
include(Format)
Expand Down
149 changes: 149 additions & 0 deletions include/async-profiler/arch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/*
* Copyright 2017 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef _ARCH_H
#define _ARCH_H

typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;

static inline u64 atomicInc(volatile u64 &var, u64 increment = 1) {
return __sync_fetch_and_add(&var, increment);
}

static inline int atomicInc(volatile int &var, int increment = 1) {
return __sync_fetch_and_add(&var, increment);
}

static inline u64 loadAcquire(u64 &var) {
return __atomic_load_n(&var, __ATOMIC_ACQUIRE);
}

static inline void storeRelease(u64 &var, u64 value) {
return __atomic_store_n(&var, value, __ATOMIC_RELEASE);
}

#if defined(__x86_64__) || defined(__i386__)

typedef unsigned char instruction_t;
const instruction_t BREAKPOINT = 0xcc;
const int BREAKPOINT_OFFSET = 0;

const int SYSCALL_SIZE = 2;
const int FRAME_PC_SLOT = 1;
const int ADJUST_RET = 1;
const int PLT_HEADER_SIZE = 16;
const int PLT_ENTRY_SIZE = 16;
const int PERF_REG_PC = 8; // PERF_REG_X86_IP

# define spinPause() asm volatile("pause")
# define rmb() asm volatile("lfence" : : : "memory")
# define flushCache(addr) \
asm volatile("mfence; clflush (%0); mfence" : : "r"(addr) : "memory")

#elif defined(__arm__) || defined(__thumb__)

typedef unsigned int instruction_t;
const instruction_t BREAKPOINT = 0xe7f001f0;
const instruction_t BREAKPOINT_THUMB = 0xde01de01;
const int BREAKPOINT_OFFSET = 0;

const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 1;
const int ADJUST_RET = 0;
const int PLT_HEADER_SIZE = 20;
const int PLT_ENTRY_SIZE = 12;
const int PERF_REG_PC = 15; // PERF_REG_ARM_PC

# define spinPause() asm volatile("yield")
# define rmb() asm volatile("dmb ish" : : : "memory")
# define flushCache(addr) \
__builtin___clear_cache((char *)(addr), \
(char *)(addr) + sizeof(instruction_t))

#elif defined(__aarch64__)

typedef unsigned int instruction_t;
const instruction_t BREAKPOINT = 0xd4200000;
const int BREAKPOINT_OFFSET = 0;

const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 1;
const int ADJUST_RET = 0;
const int PLT_HEADER_SIZE = 32;
const int PLT_ENTRY_SIZE = 16;
const int PERF_REG_PC = 32; // PERF_REG_ARM64_PC

# define spinPause() asm volatile("isb")
# define rmb() asm volatile("dmb ish" : : : "memory")
# define flushCache(addr) \
__builtin___clear_cache((char *)(addr), \
(char *)(addr) + sizeof(instruction_t))

#elif defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)

typedef unsigned int instruction_t;
const instruction_t BREAKPOINT = 0x7fe00008;
// We place the break point in the third instruction slot on PPCLE as the first
// two are skipped if the call comes from within the same compilation unit
// according to the LE ABI.
const int BREAKPOINT_OFFSET = 8;

const int SYSCALL_SIZE = sizeof(instruction_t);
const int FRAME_PC_SLOT = 2;
const int ADJUST_RET = 0;
const int PLT_HEADER_SIZE = 24;
const int PLT_ENTRY_SIZE = 24;
const int PERF_REG_PC = 32; // PERF_REG_POWERPC_NIP

# define spinPause() \
asm volatile("yield") // does nothing, but using or 1,1,1 would lead to
// other problems
# define rmb() \
asm volatile("sync" \
: \
: \
: "memory") // lwsync would do but better safe than sorry
# define flushCache(addr) \
__builtin___clear_cache((char *)(addr), \
(char *)(addr) + sizeof(instruction_t))

#else

# error "Compiling on unsupported arch"

#endif

// Return address signing support.
// Apple M1 has 47 bit virtual addresses.
#if defined(__aarch64__) && defined(__APPLE__)
# define ADDRESS_BITS 47
# define WX_MEMORY true
#else
# define WX_MEMORY false
#endif

#ifdef ADDRESS_BITS
static inline const void *stripPointer(const void *p) {
return (const void *)((unsigned long)p & ((1UL << ADDRESS_BITS) - 1));
}
#else
# define stripPointer(p) (p)
#endif

#endif // _ARCH_H
162 changes: 162 additions & 0 deletions include/async-profiler/codeCache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
* Copyright 2017 Andrei Pangin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef _CODECACHE_H
#define _CODECACHE_H

#include <stdint.h>
#include <vector>

#define NO_MIN_ADDRESS ((const void *)-1)
#define NO_MAX_ADDRESS ((const void *)0)

typedef bool (*NamePredicate)(const char *name);

const int INITIAL_CODE_CACHE_CAPACITY = 1000;
const int MAX_NATIVE_LIBS = 2048;

class NativeFunc {
private:
short _lib_index;
char _mark;
char _reserved;
char _name[0];

static NativeFunc *from(const char *name) {
return (NativeFunc *)(name - sizeof(NativeFunc));
}

public:
static char *create(const char *name, short lib_index);
static void destroy(char *name);

static short libIndex(const char *name) { return from(name)->_lib_index; }

static bool isMarked(const char *name) { return from(name)->_mark != 0; }

static void mark(const char *name) { from(name)->_mark = 1; }
};

class CodeBlob {
public:
const void *_start;
const void *_end;
char *_name;

static int comparator(const void *c1, const void *c2) {
CodeBlob *cb1 = (CodeBlob *)c1;
CodeBlob *cb2 = (CodeBlob *)c2;
if (cb1->_start < cb2->_start) {
return -1;
} else if (cb1->_start > cb2->_start) {
return 1;
} else if (cb1->_end == cb2->_end) {
return 0;
} else {
return cb1->_end > cb2->_end ? -1 : 1;
}
}
};

class FrameDesc;

class CodeCache {
protected:
char *_name;
short _lib_index;
const void *_min_address;
const void *_max_address;
const void *_text_base;

void **_got_start;
void **_got_end;
bool _got_patchable;

int _capacity;
int _count;
CodeBlob *_blobs;

void expand();

public:
// todo fix hacky override for remote
using FrameDescTable = std::vector<FrameDesc>;
FrameDescTable _dwarf_table;

CodeCache(const char *name, short lib_index = -1,
const void *min_address = NO_MIN_ADDRESS,
const void *max_address = NO_MAX_ADDRESS);

~CodeCache();

const char *name() const { return _name; }

const void *minAddress() const { return _min_address; }

const void *maxAddress() const { return _max_address; }

bool contains(const void *address) const {
return address >= _min_address && address < _max_address;
}

void setTextBase(const char *text_base) { _text_base = text_base; }

const void *getTextBase() { return _text_base; }

void **gotStart() const { return _got_start; }

void **gotEnd() const { return _got_end; }

void add(const void *start, int length, const char *name,
bool update_bounds = false);
void updateBounds(const void *start, const void *end);
void sort();
void mark(NamePredicate predicate);

CodeBlob *find(const void *address);
const char *binarySearch(const void *address);
const void *findSymbol(const char *name);
const void *findSymbolByPrefix(const char *prefix);
const void *findSymbolByPrefix(const char *prefix, int prefix_len);

void setGlobalOffsetTable(void **start, void **end, bool patchable);
void **findGlobalOffsetEntry(void *address);
void makeGotPatchable();

void setDwarfTable(FrameDescTable &&table);
FrameDesc *findFrameDesc(uint64_t elf_address);
};

class CodeCacheArray {
private:
CodeCache *_libs[MAX_NATIVE_LIBS];
int _count;

public:
CodeCacheArray() : _count(0) {}

CodeCache *operator[](int index) { return _libs[index]; }

int count() { return __atomic_load_n(&_count, __ATOMIC_ACQUIRE); }

void add(CodeCache *lib) {
int index = __atomic_load_n(&_count, __ATOMIC_ACQUIRE);
_libs[index] = lib;
__atomic_store_n(&_count, index + 1, __ATOMIC_RELEASE);
}
};

#endif // _CODECACHE_H
Loading