[packages/mold] upstream crash fix; rel 2
atler
atler at pld-linux.org
Sun May 4 14:05:12 CEST 2025
commit 4bad3265070afe595b21af362644d68ad8638a0b
Author: Jan Palus <atler at pld-linux.org>
Date: Sun May 4 13:16:25 2025 +0200
upstream crash fix; rel 2
see https://github.com/rui314/mold/issues/1449
1449.patch | 250 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
mold.spec | 4 +-
2 files changed, 253 insertions(+), 1 deletion(-)
---
diff --git a/mold.spec b/mold.spec
index e78b8e7..6159fe0 100644
--- a/mold.spec
+++ b/mold.spec
@@ -5,11 +5,12 @@
Summary: mold: A Modern Linker
Name: mold
Version: 2.38.1
-Release: 1
+Release: 2
License: MIT
Group: Development/Libraries
Source0: https://github.com/rui314/mold/archive/v%{version}/%{name}-%{version}.tar.gz
# Source0-md5: 28d611644ecfdb7a1e1c443ed76fa4e8
+Patch0: 1449.patch
URL: https://github.com/rui314/mold
BuildRequires: blake3-devel
BuildRequires: cmake >= 3.14
@@ -38,6 +39,7 @@ especially in rapid debug-edit-rebuild cycles.
%prep
%setup -q
+%patch -P0 -p1
%{__rm} -r third-party/{mimalloc,tbb}
diff --git a/1449.patch b/1449.patch
new file mode 100644
index 0000000..6e868d7
--- /dev/null
+++ b/1449.patch
@@ -0,0 +1,250 @@
+From fded2d8a77cda00944b4ef982f617c07ae2ce46d Mon Sep 17 00:00:00 2001
+From: Rui Ueyama <ruiu at cs.stanford.edu>
+Date: Sun, 4 May 2025 10:17:57 +0900
+Subject: [PATCH] Remove redundant thunks after computing section addresses
+
+Previously, we might mistakenly remove necessary symbols from thunks,
+causing the linker to crash.
+
+This fixes https://github.com/rui314/mold/issues/1449
+---
+ src/main.cc | 15 +++---
+ src/mold.h | 2 +-
+ src/output-chunks.cc | 3 ++
+ src/passes.cc | 2 +-
+ src/thunks.cc | 116 ++++++++++++++++++++++++++++++++++---------
+ 5 files changed, 106 insertions(+), 32 deletions(-)
+
+diff --git a/src/main.cc b/src/main.cc
+index 2e07a80055..950c0f3395 100644
+--- a/src/main.cc
++++ b/src/main.cc
+@@ -536,13 +536,6 @@ int mold_main(int argc, char **argv) {
+ if (ctx.arg.emit_relocs)
+ create_reloc_sections(ctx);
+
+- // We've created range extension thunks with a pessimistive assumption
+- // that all out-of-section references are out of range. Now that we are
+- // able to assign addresses to all SHF_ALLOC output sections, we can
+- // eliminate excessive thunks.
+- if constexpr (needs_thunk<E>)
+- remove_redundant_thunks(ctx);
+-
+ // Compute .symtab and .strtab sizes for each file.
+ if (!ctx.arg.strip_all)
+ create_output_symtab(ctx);
+@@ -562,6 +555,14 @@ int mold_main(int argc, char **argv) {
+ filesize = set_osec_offsets(ctx);
+ }
+
++ // We've created range extension thunks with a pessimistive assumption
++ // that all out-of-section references are out of range. Now that we know
++ // the addresses of all sections,, we can eliminate excessive thunks.
++ if constexpr (needs_thunk<E>) {
++ remove_redundant_thunks(ctx);
++ filesize = set_osec_offsets(ctx);
++ }
++
+ if constexpr (is_arm32<E>) {
+ if (ctx.extra.exidx) {
+ ctx.extra.exidx->remove_duplicate_entries(ctx);
+diff --git a/src/mold.h b/src/mold.h
+index 9570c5a026..17832b5f3e 100644
+--- a/src/mold.h
++++ b/src/mold.h
+@@ -570,7 +570,7 @@ class OutputSection : public Chunk<E> {
+ void populate_symtab(Context<E> &ctx) override;
+
+ void scan_abs_relocations(Context<E> &ctx);
+- void create_range_extension_thunks(Context<E> &ctx, bool first_pass);
++ void create_range_extension_thunks(Context<E> &ctx);
+
+ std::vector<InputSection<E> *> members;
+ std::vector<std::unique_ptr<Thunk<E>>> thunks;
+diff --git a/src/output-chunks.cc b/src/output-chunks.cc
+index 9284bf636a..17d643b4cc 100644
+--- a/src/output-chunks.cc
++++ b/src/output-chunks.cc
+@@ -1162,6 +1162,9 @@ void OutputSection<E>::populate_symtab(Context<E> &ctx) {
+ u8 *strtab_base = ctx.buf + ctx.strtab->shdr.sh_offset;
+ u8 *strtab = strtab_base + this->strtab_offset;
+
++ memset(esym, 0, this->num_local_symtab * sizeof(ElfSym<E>));
++ memset(strtab, 0, this->strtab_size);
++
+ auto write_esym = [&](u64 addr, i64 st_name) {
+ memset(esym, 0, sizeof(*esym));
+ esym->st_name = st_name;
+diff --git a/src/passes.cc b/src/passes.cc
+index a433414db1..835dc6506e 100644
+--- a/src/passes.cc
++++ b/src/passes.cc
+@@ -1440,7 +1440,7 @@ void compute_section_sizes(Context<E> &ctx) {
+
+ // create_range_extension_thunks is not thread-safe
+ for (Chunk<E> *chunk : std::span(vec.begin(), tail.begin()))
+- chunk->to_osec()->create_range_extension_thunks(ctx, true);
++ chunk->to_osec()->create_range_extension_thunks(ctx);
+
+ tbb::parallel_for_each(tail, [&](Chunk<E> *chunk) {
+ chunk->compute_section_size(ctx);
+diff --git a/src/thunks.cc b/src/thunks.cc
+index cc45785c66..d8ae146ad4 100644
+--- a/src/thunks.cc
++++ b/src/thunks.cc
+@@ -45,28 +45,25 @@ static constexpr i64 max_thunk_size = 1024 * 1024;
+ static constexpr i64 thunk_align = 16;
+
+ template <typename E>
+-static bool is_reachable(Context<E> &ctx, bool first_pass, InputSection<E> &isec,
++static bool is_reachable(Context<E> &ctx, InputSection<E> &isec,
+ Symbol<E> &sym, const ElfRel<E> &rel) {
++ // We assume pessimistically that all out-of-section relocations are
++ // out-of-range. Excessive thunks will be removed later by
++ // remove_redundant_thunks().
++ InputSection<E> *isec2 = sym.get_input_section();
++ if (!isec2 || isec.output_section != isec2->output_section)
++ return false;
++
+ // If the target section is in the same output section but
+ // hasn't got any address yet, that's unreacahble.
+- InputSection<E> *isec2 = sym.get_input_section();
+- if (isec2 && isec.output_section == isec2->output_section &&
+- isec2->offset == -1)
++ if (isec2->offset == -1)
+ return false;
+
+- // We don't know about the final file layout on the first pass, so
+- // we assume pessimistically that all out-of-section relocations are
+- // out-of-range. Excessive thunks will be removed on the second pass.
+- if (first_pass) {
+- if (!isec2 || isec.output_section != isec2->output_section)
+- return false;
+-
+- // Even if the target is the same section, we branch to its PLT
+- // if it has one. So a symbol with a PLT is also considered an
+- // out-of-section reference.
+- if (sym.has_plt(ctx))
+- return false;
+- }
++ // Even if the target is the same section, we branch to its PLT
++ // if it has one. So a symbol with a PLT is also considered an
++ // out-of-section reference.
++ if (sym.has_plt(ctx))
++ return false;
+
+ // Compute a distance between the relocated place and the symbol
+ // and check if they are within reach.
+@@ -105,8 +102,7 @@ static bool needs_shim(Context<E> &ctx, Symbol<E> &sym, const ElfRel<E> &rel) {
+ }
+
+ template <>
+-void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx,
+- bool first_pass) {
++void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
+ std::span<InputSection<E> *> m = members;
+ if (m.empty())
+ return;
+@@ -200,8 +196,7 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx,
+ continue;
+
+ // Skip if we can directly branch to the destination.
+- if (is_reachable(ctx, first_pass, isec, sym, rel) &&
+- !needs_shim(ctx, sym, rel))
++ if (is_reachable(ctx, isec, sym, rel) && !needs_shim(ctx, sym, rel))
+ continue;
+
+ // Add the symbol to the current thunk if it's not added already.
+@@ -233,15 +228,90 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx,
+ this->shdr.sh_size = offset;
+ }
+
++// create_range_extension_thunks creates thunks with a pessimistic
++// assumption that all out-of-section references are out of range.
++// After computing output section addresses, we revisit all thunks to
++// remove unneeded symbols from them.
++//
++// We create more thunks than necessary and then eliminate some of
++// them later, instead of just creating thunks at this stage. This is
++// because we can safely shrink sections after assigning addresses to
++// them without worrying about making existing references to thunks go
++// out of range. On the other hand, if we insert thunks after
++// assigning addresses to sections, references to thunks could become
++// out of range due to the new extra gaps for thunks. Thus, the
++// creation of thunks is a two-pass process.
+ template <>
+ void remove_redundant_thunks(Context<E> &ctx) {
+ Timer t(ctx, "remove_redundant_thunks");
+- set_osec_offsets(ctx);
+
++ // Gather output executable sections
++ std::vector<OutputSection<E> *> sections;
+ for (Chunk<E> *chunk : ctx.chunks)
+ if (OutputSection<E> *osec = chunk->to_osec())
+ if (osec->shdr.sh_flags & SHF_EXECINSTR)
+- osec->create_range_extension_thunks(ctx, false);
++ sections.push_back(osec);
++
++ // Mark all symbols that actually need range extension thunks
++ for (OutputSection<E> *osec : sections) {
++ tbb::parallel_for_each(osec->members, [&](InputSection<E> *isec) {
++ for (const ElfRel<E> &rel : isec->get_rels(ctx)) {
++ if (!is_func_call_rel(rel))
++ continue;
++
++ Symbol<E> &sym = *isec->file.symbols[rel.r_sym];
++ if (!sym.file)
++ continue;
++
++ if (!needs_shim(ctx, sym, rel)) {
++ i64 S = sym.get_addr(ctx, NO_OPD);
++ i64 A = get_addend(*isec, rel);
++ i64 P = isec->get_addr() + rel.r_offset;
++ i64 val = S + A - P;
++ if (-branch_distance<E> <= val && val < branch_distance<E>)
++ continue;
++ }
++ sym.flags.test_and_set();
++ }
++ });
++ }
++
++ // Remove symbols from thunks if they don't actually need range
++ // extension thunks
++ std::vector<Symbol<E> *> syms;
++
++ for (OutputSection<E> *osec : sections) {
++ for (std::unique_ptr<Thunk<E>> &thunk : osec->thunks) {
++ append(syms, thunk->symbols);
++ std::erase_if(thunk->symbols, [&](Symbol<E> *sym) { return !sym->flags; });
++ }
++ }
++
++ // Reset flags for future use
++ for (Symbol<E> *sym : syms)
++ sym->flags = 0;
++
++ // Recompute section sizes
++ tbb::parallel_for_each(sections, [&](OutputSection<E> *osec) {
++ std::span<InputSection<E> *> m = osec->members;
++ std::span<std::unique_ptr<Thunk<E>>> t = osec->thunks;
++ i64 offset = 0;
++
++ while (!m.empty() || !t.empty()) {
++ if (!m.empty() && (t.empty() || m[0]->offset < t[0]->offset)) {
++ offset = align_to(offset, 1 << m[0]->p2align);
++ m[0]->offset = offset;
++ offset += m[0]->sh_size;
++ m = m.subspan(1);
++ } else {
++ offset = align_to(offset, thunk_align);
++ t[0]->offset = offset;
++ offset += t[0]->size();
++ t = t.subspan(1);
++ }
++ }
++ osec->shdr.sh_size = offset;
++ });
+ }
+
+ // When applying relocations, we want to know the address in a reachable
================================================================
---- gitweb:
http://git.pld-linux.org/gitweb.cgi/packages/mold.git/commitdiff/4bad3265070afe595b21af362644d68ad8638a0b
More information about the pld-cvs-commit
mailing list