[packages/mold] upstream crash fix; rel 2

Sun May 4 14:05:12 CEST 2025

commit 4bad3265070afe595b21af362644d68ad8638a0b
Author: Jan Palus <atler at pld-linux.org>
Date:   Sun May 4 13:16:25 2025 +0200

    upstream crash fix; rel 2
    
    see https://github.com/rui314/mold/issues/1449

 1449.patch | 250 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 mold.spec  |   4 +-
 2 files changed, 253 insertions(+), 1 deletion(-)
---

diff --git a/mold.spec b/mold.spec
index e78b8e7..6159fe0 100644
--- a/mold.spec
+++ b/mold.spec
@@ -5,11 +5,12 @@
 Summary:	mold: A Modern Linker
 Name:		mold
 Version:	2.38.1
-Release:	1
+Release:	2
 License:	MIT
 Group:		Development/Libraries
 Source0:	https://github.com/rui314/mold/archive/v%{version}/%{name}-%{version}.tar.gz
 # Source0-md5:	28d611644ecfdb7a1e1c443ed76fa4e8
+Patch0:		1449.patch
 URL:		https://github.com/rui314/mold
 BuildRequires:	blake3-devel
 BuildRequires:	cmake >= 3.14
@@ -38,6 +39,7 @@ especially in rapid debug-edit-rebuild cycles.
 
 %prep
 %setup -q
+%patch -P0 -p1
 
 %{__rm} -r third-party/{mimalloc,tbb}
 
diff --git a/1449.patch b/1449.patch
new file mode 100644
index 0000000..6e868d7
--- /dev/null
+++ b/1449.patch
@@ -0,0 +1,250 @@
+From fded2d8a77cda00944b4ef982f617c07ae2ce46d Mon Sep 17 00:00:00 2001
+From: Rui Ueyama <ruiu at cs.stanford.edu>
+Date: Sun, 4 May 2025 10:17:57 +0900
+Subject: [PATCH] Remove redundant thunks after computing section addresses
+
+Previously, we might mistakenly remove necessary symbols from thunks,
+causing the linker to crash.
+
+This fixes https://github.com/rui314/mold/issues/1449
+---
+ src/main.cc          |  15 +++---
+ src/mold.h           |   2 +-
+ src/output-chunks.cc |   3 ++
+ src/passes.cc        |   2 +-
+ src/thunks.cc        | 116 ++++++++++++++++++++++++++++++++++---------
+ 5 files changed, 106 insertions(+), 32 deletions(-)
+
+diff --git a/src/main.cc b/src/main.cc
+index 2e07a80055..950c0f3395 100644
+--- a/src/main.cc
++++ b/src/main.cc
+@@ -536,13 +536,6 @@ int mold_main(int argc, char **argv) {
+   if (ctx.arg.emit_relocs)
+     create_reloc_sections(ctx);
+ 
+-  // We've created range extension thunks with a pessimistive assumption
+-  // that all out-of-section references are out of range. Now that we are
+-  // able to assign addresses to all SHF_ALLOC output sections, we can
+-  // eliminate excessive thunks.
+-  if constexpr (needs_thunk<E>)
+-    remove_redundant_thunks(ctx);
+-
+   // Compute .symtab and .strtab sizes for each file.
+   if (!ctx.arg.strip_all)
+     create_output_symtab(ctx);
+@@ -562,6 +555,14 @@ int mold_main(int argc, char **argv) {
+     filesize = set_osec_offsets(ctx);
+   }
+ 
++  // We've created range extension thunks with a pessimistive assumption
++  // that all out-of-section references are out of range. Now that we know
++  // the addresses of all sections,, we can eliminate excessive thunks.
++  if constexpr (needs_thunk<E>) {
++    remove_redundant_thunks(ctx);
++    filesize = set_osec_offsets(ctx);
++  }
++
+   if constexpr (is_arm32<E>) {
+     if (ctx.extra.exidx) {
+       ctx.extra.exidx->remove_duplicate_entries(ctx);
+diff --git a/src/mold.h b/src/mold.h
+index 9570c5a026..17832b5f3e 100644
+--- a/src/mold.h
++++ b/src/mold.h
+@@ -570,7 +570,7 @@ class OutputSection : public Chunk<E> {
+   void populate_symtab(Context<E> &ctx) override;
+ 
+   void scan_abs_relocations(Context<E> &ctx);
+-  void create_range_extension_thunks(Context<E> &ctx, bool first_pass);
++  void create_range_extension_thunks(Context<E> &ctx);
+ 
+   std::vector<InputSection<E> *> members;
+   std::vector<std::unique_ptr<Thunk<E>>> thunks;
+diff --git a/src/output-chunks.cc b/src/output-chunks.cc
+index 9284bf636a..17d643b4cc 100644
+--- a/src/output-chunks.cc
++++ b/src/output-chunks.cc
+@@ -1162,6 +1162,9 @@ void OutputSection<E>::populate_symtab(Context<E> &ctx) {
+     u8 *strtab_base = ctx.buf + ctx.strtab->shdr.sh_offset;
+     u8 *strtab = strtab_base + this->strtab_offset;
+ 
++    memset(esym, 0, this->num_local_symtab * sizeof(ElfSym<E>));
++    memset(strtab, 0, this->strtab_size);
++
+     auto write_esym = [&](u64 addr, i64 st_name) {
+       memset(esym, 0, sizeof(*esym));
+       esym->st_name = st_name;
+diff --git a/src/passes.cc b/src/passes.cc
+index a433414db1..835dc6506e 100644
+--- a/src/passes.cc
++++ b/src/passes.cc
+@@ -1440,7 +1440,7 @@ void compute_section_sizes(Context<E> &ctx) {
+ 
+     // create_range_extension_thunks is not thread-safe
+     for (Chunk<E> *chunk : std::span(vec.begin(), tail.begin()))
+-      chunk->to_osec()->create_range_extension_thunks(ctx, true);
++      chunk->to_osec()->create_range_extension_thunks(ctx);
+ 
+     tbb::parallel_for_each(tail, [&](Chunk<E> *chunk) {
+       chunk->compute_section_size(ctx);
+diff --git a/src/thunks.cc b/src/thunks.cc
+index cc45785c66..d8ae146ad4 100644
+--- a/src/thunks.cc
++++ b/src/thunks.cc
+@@ -45,28 +45,25 @@ static constexpr i64 max_thunk_size = 1024 * 1024;
+ static constexpr i64 thunk_align = 16;
+ 
+ template <typename E>
+-static bool is_reachable(Context<E> &ctx, bool first_pass, InputSection<E> &isec,
++static bool is_reachable(Context<E> &ctx, InputSection<E> &isec,
+                          Symbol<E> &sym, const ElfRel<E> &rel) {
++  // We assume pessimistically that all out-of-section relocations are
++  // out-of-range. Excessive thunks will be removed later by
++  // remove_redundant_thunks().
++  InputSection<E> *isec2 = sym.get_input_section();
++  if (!isec2 || isec.output_section != isec2->output_section)
++    return false;
++
+   // If the target section is in the same output section but
+   // hasn't got any address yet, that's unreacahble.
+-  InputSection<E> *isec2 = sym.get_input_section();
+-  if (isec2 && isec.output_section == isec2->output_section &&
+-      isec2->offset == -1)
++  if (isec2->offset == -1)
+     return false;
+ 
+-  // We don't know about the final file layout on the first pass, so
+-  // we assume pessimistically that all out-of-section relocations are
+-  // out-of-range. Excessive thunks will be removed on the second pass.
+-  if (first_pass) {
+-    if (!isec2 || isec.output_section != isec2->output_section)
+-      return false;
+-
+-    // Even if the target is the same section, we branch to its PLT
+-    // if it has one. So a symbol with a PLT is also considered an
+-    // out-of-section reference.
+-    if (sym.has_plt(ctx))
+-      return false;
+-  }
++  // Even if the target is the same section, we branch to its PLT
++  // if it has one. So a symbol with a PLT is also considered an
++  // out-of-section reference.
++  if (sym.has_plt(ctx))
++    return false;
+ 
+   // Compute a distance between the relocated place and the symbol
+   // and check if they are within reach.
+@@ -105,8 +102,7 @@ static bool needs_shim(Context<E> &ctx, Symbol<E> &sym, const ElfRel<E> &rel) {
+ }
+ 
+ template <>
+-void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx,
+-                                                     bool first_pass) {
++void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx) {
+   std::span<InputSection<E> *> m = members;
+   if (m.empty())
+     return;
+@@ -200,8 +196,7 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx,
+           continue;
+ 
+         // Skip if we can directly branch to the destination.
+-        if (is_reachable(ctx, first_pass, isec, sym, rel) &&
+-            !needs_shim(ctx, sym, rel))
++        if (is_reachable(ctx, isec, sym, rel) && !needs_shim(ctx, sym, rel))
+           continue;
+ 
+         // Add the symbol to the current thunk if it's not added already.
+@@ -233,15 +228,90 @@ void OutputSection<E>::create_range_extension_thunks(Context<E> &ctx,
+   this->shdr.sh_size = offset;
+ }
+ 
++// create_range_extension_thunks creates thunks with a pessimistic
++// assumption that all out-of-section references are out of range.
++// After computing output section addresses, we revisit all thunks to
++// remove unneeded symbols from them.
++//
++// We create more thunks than necessary and then eliminate some of
++// them later, instead of just creating thunks at this stage. This is
++// because we can safely shrink sections after assigning addresses to
++// them without worrying about making existing references to thunks go
++// out of range. On the other hand, if we insert thunks after
++// assigning addresses to sections, references to thunks could become
++// out of range due to the new extra gaps for thunks. Thus, the
++// creation of thunks is a two-pass process.
+ template <>
+ void remove_redundant_thunks(Context<E> &ctx) {
+   Timer t(ctx, "remove_redundant_thunks");
+-  set_osec_offsets(ctx);
+ 
++  // Gather output executable sections
++  std::vector<OutputSection<E> *> sections;
+   for (Chunk<E> *chunk : ctx.chunks)
+     if (OutputSection<E> *osec = chunk->to_osec())
+       if (osec->shdr.sh_flags & SHF_EXECINSTR)
+-        osec->create_range_extension_thunks(ctx, false);
++        sections.push_back(osec);
++
++  // Mark all symbols that actually need range extension thunks
++  for (OutputSection<E> *osec : sections) {
++    tbb::parallel_for_each(osec->members, [&](InputSection<E> *isec) {
++      for (const ElfRel<E> &rel : isec->get_rels(ctx)) {
++        if (!is_func_call_rel(rel))
++          continue;
++
++        Symbol<E> &sym = *isec->file.symbols[rel.r_sym];
++        if (!sym.file)
++          continue;
++
++        if (!needs_shim(ctx, sym, rel)) {
++          i64 S = sym.get_addr(ctx, NO_OPD);
++          i64 A = get_addend(*isec, rel);
++          i64 P = isec->get_addr() + rel.r_offset;
++          i64 val = S + A - P;
++          if (-branch_distance<E> <= val && val < branch_distance<E>)
++            continue;
++        }
++        sym.flags.test_and_set();
++      }
++    });
++  }
++
++  // Remove symbols from thunks if they don't actually need range
++  // extension thunks
++  std::vector<Symbol<E> *> syms;
++
++  for (OutputSection<E> *osec : sections) {
++    for (std::unique_ptr<Thunk<E>> &thunk : osec->thunks) {
++      append(syms, thunk->symbols);
++      std::erase_if(thunk->symbols, [&](Symbol<E> *sym) { return !sym->flags; });
++    }
++  }
++
++  // Reset flags for future use
++  for (Symbol<E> *sym : syms)
++    sym->flags = 0;
++
++  // Recompute section sizes
++  tbb::parallel_for_each(sections, [&](OutputSection<E> *osec) {
++    std::span<InputSection<E> *> m = osec->members;
++    std::span<std::unique_ptr<Thunk<E>>> t = osec->thunks;
++    i64 offset = 0;
++
++    while (!m.empty() || !t.empty()) {
++      if (!m.empty() && (t.empty() || m[0]->offset < t[0]->offset)) {
++        offset = align_to(offset, 1 << m[0]->p2align);
++        m[0]->offset = offset;
++        offset += m[0]->sh_size;
++        m = m.subspan(1);
++      } else {
++        offset = align_to(offset, thunk_align);
++        t[0]->offset = offset;
++        offset += t[0]->size();
++        t = t.subspan(1);
++      }
++    }
++    osec->shdr.sh_size = offset;
++  });
+ }
+ 
+ // When applying relocations, we want to know the address in a reachable
================================================================

---- gitweb:

http://git.pld-linux.org/gitweb.cgi/packages/mold.git/commitdiff/4bad3265070afe595b21af362644d68ad8638a0b