Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
rui314 committed Jan 3, 2025
1 parent 6a342f2 commit 0a112d0
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 36 deletions.
6 changes: 3 additions & 3 deletions src/mold.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,9 @@ static consteval i64 get_branch_distance() {
// on the target architecture. For example, ARM32's B instruction jumps to
// the branch's address + immediate + 4 (i.e., B with offset 0 jumps to
// the next instruction), while RISC-V has no such implicit bias. Here, we
// subtract 16 as a safety margin that is large enough for all targets.
// subtract 32 as a safety margin that is large enough for all targets.
template <needs_thunk E>
static constexpr i64 branch_distance = get_branch_distance<E>() - 16;
static constexpr i64 branch_distance = get_branch_distance<E>() - 32;

template <needs_thunk E>
void gather_thunk_addresses(Context<E> &ctx);
Expand Down Expand Up @@ -2950,7 +2950,7 @@ inline void Symbol<E>::set_djb_hash(Context<E> &ctx, u32 hash) {
}

template <typename E>
inline u64
u64
Symbol<E>::get_thunk_addr(Context<E> &ctx, u64 P) const requires needs_thunk<E> {
std::span<u64> vec = ctx.symbol_aux[aux_idx].thunk_addrs;
u64 lo = (P < branch_distance<E>) ? 0 : P - branch_distance<E>;
Expand Down
14 changes: 4 additions & 10 deletions src/output-chunks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -892,16 +892,10 @@ template <typename E>
void OutputSection<E>::compute_section_size(Context<E> &ctx) {
ElfShdr<E> &shdr = this->shdr;

// On most RISC systems, we need to create so-called "range extension
// thunks" to extend branch instructions reach, as their jump
// instructions' reach is limited. create_range_extension_thunks()
// computes the size of the section while inserting thunks.
if constexpr (needs_thunk<E>) {
if ((shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable) {
create_range_extension_thunks(ctx);
return;
}
}
// Text sections must to be handled by create_range_extension_thunks()
// if they may need range extension thunks.
assert(!needs_thunk<E> || !(shdr.sh_flags & SHF_EXECINSTR) ||
ctx.arg.relocatable);

// Since one output section may contain millions of input sections,
// we first split input sections into groups and assign offsets to
Expand Down
15 changes: 9 additions & 6 deletions src/passes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1343,15 +1343,18 @@ void compute_section_sizes(Context<E> &ctx) {
Timer t(ctx, "compute_section_sizes");

if constexpr (needs_thunk<E>) {
// We cannot use parallel-for for compute_section_size() which may
// call create_range_extension_thunks() because that function is
// not thread-safe.
auto is_text = [&](Chunk<E> *chunk) {
return chunk->to_osec() && (chunk->shdr.sh_flags & SHF_EXECINSTR) &&
!ctx.arg.relocatable;
};

// create_range_extension_thunks is not thread-safe
for (Chunk<E> *chunk : ctx.chunks)
if (chunk->shdr.sh_flags & SHF_EXECINSTR)
chunk->compute_section_size(ctx);
if (is_text(chunk))
chunk->to_osec()->create_range_extension_thunks(ctx);

tbb::parallel_for_each(ctx.chunks, [&](Chunk<E> *chunk) {
if (!(chunk->shdr.sh_flags & SHF_EXECINSTR))
if (!is_text(chunk))
chunk->compute_section_size(ctx);
});
} else {
Expand Down
34 changes: 17 additions & 17 deletions src/thunks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
if (is_reachable(ctx, isec, sym, rel))
continue;

// Add the symbol to the current thunk if it's not added already.
// Add the symbol to the current thunk if it's not added already
// by other thread.
if (!sym.flags.test_and_set()) {
std::scoped_lock lock(mu);
thunk->symbols.push_back(&sym);
Expand Down Expand Up @@ -237,31 +238,30 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
//
// In this function, we create a list of all addresses in range extension
// thunks for each symbol, so that it is easy to find one.
//
// Note that thunk_addrs must be sorted for binary search.
template <>
void gather_thunk_addresses(Context<E> &ctx) {
Timer t(ctx, "gather_thunk_addresses");

std::vector<Symbol<E> *> syms;
std::vector<OutputSection<E> *> sections;
for (Chunk<E> *chunk : ctx.chunks)
if (OutputSection<E> *osec = chunk->to_osec())
sections.push_back(osec);

for (Chunk<E> *chunk : ctx.chunks) {
if (OutputSection<E> *osec = chunk->to_osec()) {
for (std::unique_ptr<Thunk<E>> &thunk : osec->thunks) {
for (i64 i = 0; i < thunk->symbols.size(); i++) {
Symbol<E> &sym = *thunk->symbols[i];
sym.add_aux(ctx);
sort(sections, [](OutputSection<E> *a, OutputSection<E> *b) {
return a->shdr.sh_addr < b->shdr.sh_addr;
});

std::vector<u64> &vec = ctx.symbol_aux[sym.aux_idx].thunk_addrs;
if (vec.empty())
syms.push_back(&sym);
vec.push_back(thunk->get_addr(i));
}
for (OutputSection<E> *osec : sections) {
for (std::unique_ptr<Thunk<E>> &thunk : osec->thunks) {
for (i64 i = 0; i < thunk->symbols.size(); i++) {
Symbol<E> &sym = *thunk->symbols[i];
sym.add_aux(ctx);
ctx.symbol_aux[sym.aux_idx].thunk_addrs.push_back(thunk->get_addr(i));
}
}
}

tbb::parallel_for_each(syms, [&](Symbol<E> *sym) {
sort(ctx.symbol_aux[sym->aux_idx].thunk_addrs);
});
}

} // namespace mold
Expand Down

0 comments on commit 0a112d0

Please sign in to comment.