https://github.com/TartanLlama updated https://github.com/llvm/llvm-project/pull/200855
>From 83c5f9ab46715c25d95e0bfe6b6d5b0e1e2dd5b0 Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Sun, 31 May 2026 17:23:22 +0100 Subject: [PATCH 01/16] Cooperative multithreading changes --- clang/lib/Driver/ToolChains/WebAssembly.cpp | 8 +- clang/test/Driver/wasm-toolchain.c | 7 ++ lld/test/wasm/cooperative-multithreading.s | 81 +++++++++++++++++++ lld/test/wasm/thread-context-abi-mismatch.s | 4 +- lld/wasm/Config.h | 6 ++ lld/wasm/Driver.cpp | 11 ++- lld/wasm/Options.td | 5 +- lld/wasm/Relocations.cpp | 2 +- lld/wasm/SyntheticSections.cpp | 20 ++--- lld/wasm/Writer.cpp | 45 +++++++---- .../WebAssembly/WebAssemblySubtarget.cpp | 7 +- .../Target/WebAssembly/WebAssemblySubtarget.h | 4 + .../WebAssembly/WebAssemblyTargetMachine.cpp | 11 ++- .../WebAssembly/cooperative-strip-tls.ll | 20 +++++ .../WebAssembly/target-features-tls.ll | 1 + 15 files changed, 193 insertions(+), 39 deletions(-) create mode 100644 lld/test/wasm/cooperative-multithreading.s create mode 100644 llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index 4c1cd937e81aa..ce5463b167a58 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -88,8 +88,8 @@ static bool WantsPthread(const llvm::Triple &Triple, const ArgList &Args) { return WantsPthread; } -static bool WantsLibcallThreadContext(const llvm::Triple &Triple, - const ArgList &Args) { +static bool WantsCooperativeMultithreading(const llvm::Triple &Triple, + const ArgList &Args) { return Triple.getOS() == llvm::Triple::WASIp3; } @@ -174,8 +174,8 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); - if (WantsLibcallThreadContext(ToolChain.getTriple(), Args)) - CmdArgs.push_back("--libcall-thread-context"); + if (WantsCooperativeMultithreading(ToolChain.getTriple(), Args)) + CmdArgs.push_back("--cooperative-multithreading"); if (WantsPthread(ToolChain.getTriple(), Args)) CmdArgs.push_back("--shared-memory"); diff --git a/clang/test/Driver/wasm-toolchain.c b/clang/test/Driver/wasm-toolchain.c index 29a94aeec77a9..40d75da3166d9 100644 --- a/clang/test/Driver/wasm-toolchain.c +++ b/clang/test/Driver/wasm-toolchain.c @@ -303,3 +303,10 @@ // RUN: | FileCheck -check-prefix=LINK_WALI_BASIC %s // LINK_WALI_BASIC: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" // LINK_WALI_BASIC: wasm-ld{{.*}}" "-L/foo/lib/wasm32-linux-muslwali" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" + +// Test that `wasm32-wasip3` passes `--cooperative-multithreading` to the linker. + +// RUN: %clang -### --target=wasm32-wasip3 -fuse-ld=lld %s --sysroot /foo 2>&1 \ +// RUN: | FileCheck -check-prefix=LINK_WASIP3_COOP %s +// LINK_WASIP3_COOP: wasm-ld{{.*}}" {{.*}} "--cooperative-multithreading" +// LINK_WASIP3_COOP-NOT: "--libcall-thread-context" diff --git a/lld/test/wasm/cooperative-multithreading.s b/lld/test/wasm/cooperative-multithreading.s new file mode 100644 index 0000000000000..cb41dd392d5e2 --- /dev/null +++ b/lld/test/wasm/cooperative-multithreading.s @@ -0,0 +1,81 @@ +# Test that --cooperative-multithreading uses the libcall ABI naming for +# thread-context globals (__init_stack_pointer, __init_tls_base, etc.) and +# works without --shared-memory and atomics. + +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: wasm-ld --cooperative-multithreading -no-gc-sections -o %t.wasm %t.o +# RUN: obj2yaml %t.wasm | FileCheck %s +# RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS + +.globl __wasm_get_tls_base +__wasm_get_tls_base: + .functype __wasm_get_tls_base () -> (i32) + i32.const 0 + end_function + +.globl _start +_start: + .functype _start () -> (i32) + call __wasm_get_tls_base + i32.const tls1@TLSREL + i32.add + i32.load 0 + call __wasm_get_tls_base + i32.const tls2@TLSREL + i32.add + i32.load 0 + i32.add + end_function + +.section .tdata.tls1,"",@ +.globl tls1 +tls1: + .int32 1 + .size tls1, 4 + +.section .tdata.tls2,"",@ +.globl tls2 +tls2: + .int32 2 + .size tls2, 4 + +.section .custom_section.target_features,"",@ + .int8 2 + .int8 43 + .int8 11 + .ascii "bulk-memory" + .int8 43 + .int8 7 + .ascii "atomics" + +# Memory must NOT be marked as shared. +# CHECK: - Type: MEMORY +# CHECK-NEXT: Memories: +# CHECK-NEXT: - Minimum: 0x2 +# CHECK-NOT: Shared: false + +# Globals should use the libcall ABI naming, not the global ABI. +# CHECK: GlobalNames: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Name: __init_stack_pointer +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Name: __init_tls_base +# CHECK-NEXT: - Index: 2 +# CHECK-NEXT: Name: __tls_size +# CHECK-NEXT: - Index: 3 +# CHECK-NEXT: Name: __tls_align + +# DIS-LABEL: <__wasm_init_memory>: + +# DIS-LABEL: <_start>: +# DIS-EMPTY: +# DIS-NEXT: call {{[0-9]+}} +# DIS-NEXT: i32.const 0 +# DIS-NEXT: i32.add +# DIS-NEXT: i32.load 0 +# DIS-NEXT: call {{[0-9]+}} +# DIS-NEXT: i32.const 4 +# DIS-NEXT: i32.add +# DIS-NEXT: i32.load 0 +# DIS-NEXT: i32.add +# DIS-NEXT: end diff --git a/lld/test/wasm/thread-context-abi-mismatch.s b/lld/test/wasm/thread-context-abi-mismatch.s index 069534cbe5762..acab6fd59d9b7 100644 --- a/lld/test/wasm/thread-context-abi-mismatch.s +++ b/lld/test/wasm/thread-context-abi-mismatch.s @@ -4,9 +4,9 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s # RUN: not wasm-ld --libcall-thread-context %t.o -o %t.wasm 2>&1 | FileCheck %s +# RUN: not wasm-ld --cooperative-multithreading %t.o -o %t.wasm 2>&1 | FileCheck %s -# CHECK: object file uses globals for thread context, but --libcall-thread-context was specified - +# CHECK: object file uses globals for thread context, but --libcall-thread-context or --cooperative-multithreading was specified .globl _start _start: .functype _start () -> () diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index 71a378a412e9e..873d25d130424 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -46,6 +46,8 @@ enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic }; // For --build-id. enum class BuildIdKind { None, Fast, Sha1, Hexstring, Uuid }; +enum class ThreadModel { Single, Cooperative, SharedMemory }; + // This struct contains the global configuration for the linker. // Most fields are direct mapping from the command line options // and such fields have the same name as the corresponding options. @@ -65,6 +67,7 @@ struct Config { bool growableTable; bool gcSections; llvm::StringSet<> keepSections; + bool cooperativeMultithreading; bool libcallThreadContext; std::optional<std::pair<llvm::StringRef, llvm::StringRef>> memoryImport; std::optional<llvm::StringRef> memoryExport; @@ -134,6 +137,9 @@ struct Config { std::optional<std::vector<std::string>> features; std::optional<std::vector<std::string>> extraFeatures; llvm::SmallVector<uint8_t, 0> buildIdVector; + + ThreadModel threadModel = ThreadModel::Single; + bool isMultithreaded() const { return threadModel != ThreadModel::Single; } }; // The Ctx object hold all other (non-configuration) global state. diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index fe1e2eec95037..20b398fc39a0c 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -561,6 +561,7 @@ static void readConfigs(opt::InputArgList &args) { ctx.arg.soName = args.getLastArgValue(OPT_soname); ctx.arg.importTable = args.hasArg(OPT_import_table); ctx.arg.importUndefined = args.hasArg(OPT_import_undefined); + ctx.arg.cooperativeMultithreading = args.hasArg(OPT_cooperative_multithreading); ctx.arg.libcallThreadContext = args.hasArg(OPT_libcall_thread_context); ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2); if (ctx.arg.ltoo > 3) @@ -755,6 +756,12 @@ static void setConfigs() { if (!ctx.arg.memoryExport.has_value() && !ctx.arg.memoryImport.has_value()) { ctx.arg.memoryExport = memoryName; } + + if (ctx.arg.cooperativeMultithreading) { + ctx.arg.threadModel = ThreadModel::Cooperative; + ctx.arg.libcallThreadContext = true; + } else if (ctx.arg.sharedMemory) + ctx.arg.threadModel = ThreadModel::SharedMemory; } // Some command line options or some combinations of them are not allowed. @@ -964,7 +971,7 @@ static void createSyntheticSymbols() { createGlobalVariable(stack_pointer_name, !ctx.arg.libcallThreadContext); } - if (ctx.arg.sharedMemory) { + if (ctx.arg.isMultithreaded()) { // TLS symbols are all hidden/dso-local auto tls_base_name = ctx.arg.libcallThreadContext ? "__init_tls_base" : "__tls_base"; @@ -1028,7 +1035,7 @@ static void createOptionalSymbols() { // // __tls_size and __tls_align are not needed in this case since they are only // needed for __wasm_init_tls (which we do not create in this case). - if (!ctx.arg.sharedMemory) + if (!ctx.sym.tlsBase) ctx.sym.tlsBase = createOptionalGlobal("__tls_base", false); } diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td index 144eee33061e1..8ad386ca0ce39 100644 --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -238,9 +238,12 @@ def page_size: JJ<"page-size=">, def initial_memory: JJ<"initial-memory=">, HelpText<"Initial size of the linear memory">; +def cooperative_multithreading: FF<"cooperative-multithreading">, + HelpText<"Enable cooperative multithreading.">; + def libcall_thread_context: FF<"libcall-thread-context">, HelpText<"Use library calls for thread context access instead of globals.">; - + def max_memory: JJ<"max-memory=">, HelpText<"Maximum size of the linear memory">; diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp index a1840abe88b3a..cb597fdeffcf3 100644 --- a/lld/wasm/Relocations.cpp +++ b/lld/wasm/Relocations.cpp @@ -125,7 +125,7 @@ void scanRelocations(InputChunk *chunk) { // In single-threaded builds TLS is lowered away and TLS data can be // merged with normal data and allowing TLS relocation in non-TLS // segments. - if (ctx.arg.sharedMemory) { + if (ctx.arg.isMultithreaded()) { if (!sym->isTLS()) { error(toString(file) + ": relocation " + relocTypeToString(reloc.Type) + diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index d1a01c7ec3f9d..a465f2fb590b3 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -57,7 +57,7 @@ void writeGetTLSBase(const Ctx &ctx, raw_ostream &os) { writeU8(os, WASM_OPCODE_CALL, "call"); writeUleb128(os, ctx.sym.getTLSBase->getFunctionIndex(), "function index"); } else { - writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_SET"); + writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base"); } } @@ -265,11 +265,11 @@ void ImportSection::writeBody() { import.Kind = WASM_EXTERNAL_MEMORY; import.Memory.Flags = 0; import.Memory.Minimum = out.memorySec->numMemoryPages; - if (out.memorySec->maxMemoryPages != 0 || ctx.arg.sharedMemory) { + if (out.memorySec->maxMemoryPages != 0 || ctx.arg.threadModel == ThreadModel::SharedMemory) { import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX; import.Memory.Maximum = out.memorySec->maxMemoryPages; } - if (ctx.arg.sharedMemory) + if (ctx.arg.threadModel == ThreadModel::SharedMemory) import.Memory.Flags |= WASM_LIMITS_FLAG_IS_SHARED; if (is64) import.Memory.Flags |= WASM_LIMITS_FLAG_IS_64; @@ -406,12 +406,12 @@ void TableSection::assignIndexes() { void MemorySection::writeBody() { raw_ostream &os = bodyOutputStream; - bool hasMax = maxMemoryPages != 0 || ctx.arg.sharedMemory; + bool hasMax = maxMemoryPages != 0 || ctx.arg.threadModel == ThreadModel::SharedMemory; writeUleb128(os, 1, "memory count"); unsigned flags = 0; if (hasMax) flags |= WASM_LIMITS_FLAG_HAS_MAX; - if (ctx.arg.sharedMemory) + if (ctx.arg.threadModel == ThreadModel::SharedMemory) flags |= WASM_LIMITS_FLAG_IS_SHARED; if (ctx.arg.is64.value_or(false)) flags |= WASM_LIMITS_FLAG_IS_64; @@ -532,7 +532,7 @@ void GlobalSection::writeBody() { mutable_ = true; // With multi-threading any TLS globals must be mutable since they get // set during `__wasm_apply_global_tls_relocs` - if (ctx.arg.sharedMemory && sym->isTLS()) + if (ctx.arg.isMultithreaded() && sym->isTLS()) mutable_ = true; } WasmGlobalType type{itype, mutable_}; @@ -569,10 +569,10 @@ void GlobalSection::writeBody() { } else { WasmInitExpr initExpr; if (auto *d = dyn_cast<DefinedData>(sym)) - // In the sharedMemory case TLS globals are set during - // `__wasm_apply_global_tls_relocs`, but in the non-shared case + // In the multithreaded case, TLS globals are set during + // `__wasm_apply_global_tls_relocs`, but in the single-threaded case // we know the absolute value at link time. - initExpr = intConst(d->getVA(/*absolute=*/!ctx.arg.sharedMemory), is64); + initExpr = intConst(d->getVA(/*absolute=*/!ctx.arg.isMultithreaded()), is64); else if (auto *f = dyn_cast<FunctionSymbol>(sym)) initExpr = intConst(f->isStub ? 0 : f->getTableIndex(), is64); else { @@ -680,7 +680,7 @@ bool DataCountSection::isNeeded() const { // instructions are not yet supported in input files. However, in the case // of shared memory, lld itself will generate these instructions as part of // `__wasm_init_memory`. See Writer::createInitMemoryFunction. - return numSegments && ctx.arg.sharedMemory; + return numSegments && ctx.arg.isMultithreaded(); } void LinkingSection::writeBody() { diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 688bb829e1c42..79e3c46410e8d 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -425,13 +425,13 @@ void Writer::layoutMemory() { // Even in the absense of any actual TLS data, this symbol can still be // referenced (for example by __builtin_thread_pointer, which should not // return NULL). - if (!ctx.arg.sharedMemory && ctx.sym.tlsBase) { + if (!ctx.arg.isMultithreaded() && ctx.sym.tlsBase) { auto *tlsBase = cast<DefinedGlobal>(ctx.sym.tlsBase); setGlobalPtr(tlsBase, fixedTLSBase); } // Make space for the memory initialization flag - if (ctx.arg.sharedMemory && hasPassiveInitializedSegments()) { + if (ctx.arg.threadModel == ThreadModel::SharedMemory && hasPassiveInitializedSegments()) { memoryPtr = alignTo(memoryPtr, 4); ctx.sym.initMemoryFlag = symtab->addSyntheticDataSymbol( "__wasm_init_memory_flag", WASM_SYMBOL_VISIBILITY_HIDDEN); @@ -519,7 +519,7 @@ void Writer::layoutMemory() { // If no maxMemory config was supplied but we are building with // shared memory, we need to pick a sensible upper limit. - if (ctx.arg.sharedMemory && maxMemory == 0) { + if (ctx.arg.threadModel == ThreadModel::SharedMemory && maxMemory == 0) { if (ctx.isPic) maxMemory = maxMemorySetting; else @@ -1057,7 +1057,15 @@ static StringRef getOutputDataSegmentName(const InputChunk &seg) { OutputSegment *Writer::createOutputSegment(StringRef name) { LLVM_DEBUG(dbgs() << "new segment: " << name << "\n"); OutputSegment *s = make<OutputSegment>(name); - if (ctx.arg.sharedMemory) + // In the shared memory case, all data segments must be passive since they + // will be initialized once by the main thread and then shared with other + // threads. In the non-shared memory case, we use passive segments only for + // TLS segments, so that they can be reused, and for .bss segments, which + // don't need to be included in the binary at all. + bool needsPassiveInit = ctx.arg.threadModel == ThreadModel::SharedMemory || + (ctx.arg.threadModel == ThreadModel::Cooperative && + (s->isTLS() || s->name.starts_with(".bss"))); + if (needsPassiveInit) s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE; if (!ctx.arg.relocatable && name.starts_with(".bss")) s->isBss = true; @@ -1198,7 +1206,7 @@ void Writer::createSyntheticInitFunctions() { } } - if (ctx.arg.sharedMemory) { + if (ctx.arg.isMultithreaded()) { if (out.globalSec->needsTLSRelocations()) { ctx.sym.applyGlobalTLSRelocs = symtab->addSyntheticFunction( "__wasm_apply_global_tls_relocs", WASM_SYMBOL_VISIBILITY_HIDDEN, @@ -1247,7 +1255,7 @@ void Writer::createInitMemoryFunction() { assert(ctx.sym.initMemory); assert(hasPassiveInitializedSegments()); uint64_t flagAddress; - if (ctx.arg.sharedMemory) { + if (ctx.arg.threadModel == ThreadModel::SharedMemory) { assert(ctx.sym.initMemoryFlag); flagAddress = ctx.sym.initMemoryFlag->getVA(); } @@ -1315,7 +1323,7 @@ void Writer::createInitMemoryFunction() { } }; - if (ctx.arg.sharedMemory) { + if (ctx.arg.threadModel == ThreadModel::SharedMemory) { // With PIC code we cache the flag address in local 0 if (ctx.isPic) { writeUleb128(os, 1, "num local decls"); @@ -1378,7 +1386,7 @@ void Writer::createInitMemoryFunction() { // When we initialize the TLS segment we also set the TLS base. // This allows the runtime to use this static copy of the TLS data // for the first/main thread. - if (ctx.arg.sharedMemory && s->isTLS()) { + if (ctx.arg.isMultithreaded() && s->isTLS()) { if (ctx.isPic) { // Cache the result of the addionion in local 0 writeU8(os, WASM_OPCODE_LOCAL_TEE, "local.tee"); @@ -1410,7 +1418,7 @@ void Writer::createInitMemoryFunction() { } } - if (ctx.arg.sharedMemory) { + if (ctx.arg.threadModel == ThreadModel::SharedMemory) { // Set flag to 2 to mark end of initialization writeGetFlagAddress(); writeI32Const(os, 2, "flag value"); @@ -1449,7 +1457,7 @@ void Writer::createInitMemoryFunction() { if (needsPassiveInitialization(s) && !s->isBss) { // The TLS region should not be dropped since its is needed // during the initialization of each thread (__wasm_init_tls). - if (ctx.arg.sharedMemory && s->isTLS()) + if (ctx.arg.isMultithreaded() && s->isTLS()) continue; // data.drop instruction writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix"); @@ -1502,7 +1510,7 @@ void Writer::createApplyDataRelocationsFunction() { writeUleb128(os, 0, "num locals"); bool generated = false; for (const OutputSegment *seg : segments) - if (!ctx.arg.sharedMemory || !seg->isTLS()) + if (!ctx.arg.isMultithreaded() || !seg->isTLS()) for (const InputChunk *inSeg : seg->inputSegments) generated |= inSeg->generateRelocationCode(os); @@ -1656,10 +1664,17 @@ void Writer::createInitTLSFunction() { writeUleb128(os, 0, "num locals"); if (tlsSeg) { - writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); - writeUleb128(os, 0, "local index"); - writeSetTLSBase(ctx, os); + /* + // In cooperative threading mode the runtime is responsible for calling + // __wasm_set_tls_base separately; __wasm_init_tls only copies the TLS + // template data. + if (!ctx.arg.libcallThreadContext) { + writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); + writeUleb128(os, 0, "local index"); + writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set"); + writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "global index"); + }*/ // FIXME(wvo): this local needs to be I64 in wasm64, or we need an extend // op. @@ -1791,7 +1806,7 @@ void Writer::run() { // `__memory_base` import. Unless we support the extended const expression we // can't do addition inside the constant expression, so we much combine the // segments into a single one that can live at `__memory_base`. - if (ctx.isPic && !ctx.arg.extendedConst && !ctx.arg.sharedMemory) { + if (ctx.isPic && !ctx.arg.extendedConst && ctx.arg.threadModel != ThreadModel::SharedMemory) { // In shared memory mode all data segments are passive and initialized // via __wasm_init_memory. log("-- combineOutputSegments"); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index 6326b7d76db82..9dea29fb0205d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -40,9 +40,12 @@ WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU, ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS); - // WASIP3 implies using the libcall thread context. - if (TargetTriple.getOS() == Triple::WASIp3) + // WASIP3 uses cooperative multithreading, which implies using libcall + // thread context. + if (TargetTriple.getOS() == Triple::WASIp3) { + HasCooperativeMultithreading = true; HasLibcallThreadContext = true; + } FeatureBitset Bits = getFeatureBits(); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h index 5c6f4cb5b36ff..f637ce59ebfce 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -52,6 +52,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool HasExtendedConst = false; bool HasFP16 = false; bool HasGC = false; + bool HasCooperativeMultithreading = false; bool HasLibcallThreadContext = false; bool HasMultiMemory = false; bool HasMultivalue = false; @@ -117,6 +118,9 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool hasExtendedConst() const { return HasExtendedConst; } bool hasFP16() const { return HasFP16; } bool hasGC() const { return HasGC; } + bool hasCooperativeMultithreading() const { + return HasCooperativeMultithreading; + } bool hasLibcallThreadContext() const { return HasLibcallThreadContext; } bool hasMultiMemory() const { return HasMultiMemory; } bool hasMultivalue() const { return HasMultivalue; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 1361dd99b7072..ee15c9093ff3b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -283,10 +283,17 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass { bool StrippedAtomics = false; bool StrippedTLS = false; + // In cooperative threading mode, thread locals are meaningful even without + // atomics. + bool CooperativeThreading = + WasmTM->getSubtargetImpl()->hasCooperativeMultithreading(); + if (!Features[WebAssembly::FeatureAtomics]) { StrippedAtomics = stripAtomics(M); - StrippedTLS = stripThreadLocals(M); - } else if (!Features[WebAssembly::FeatureBulkMemory]) { + if (!CooperativeThreading) + StrippedTLS = stripThreadLocals(M); + } + if (!Features[WebAssembly::FeatureBulkMemory]) { StrippedTLS |= stripThreadLocals(M); } diff --git a/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll b/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll new file mode 100644 index 0000000000000..46ac1cd0509b7 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll @@ -0,0 +1,20 @@ +; Test that in cooperative threading mode (wasm32-wasip3), thread-local variables +; are NOT stripped even when atomics are absent. In non-cooperative mode +; (wasm32-unknown-unknown) TLS is stripped to .bss when atomics are absent. + +; RUN: llc < %s -mtriple=wasm32-wasip3 -mcpu=mvp -mattr=-atomics,+bulk-memory \ +; RUN: | FileCheck %s --check-prefixes=COOP +; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=mvp -mattr=-atomics,+bulk-memory \ +; RUN: | FileCheck %s --check-prefixes=PLAIN + +target triple = "wasm32-unknown-unknown" + +@foo = internal thread_local global i32 0 + +; Cooperative threading: TLS is preserved — the section stays .tbss. +; COOP: .tbss.foo +; COOP-NOT: .bss.foo + +; Non-cooperative: TLS stripped +; PLAIN: .bss.foo +; PLAIN-NOT: .tbss.foo diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll index 4abe01a73aeee..92333f3c7b9f1 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll @@ -32,3 +32,4 @@ target triple = "wasm32-unknown-unknown" ; BULK-MEM-NEXT: .int8 15 ; BULK-MEM-NEXT: .ascii "bulk-memory-opt" ; BULK-MEM-NEXT: .tbss.foo,"T",@ + >From 8fea1e2a54c8055132798269ca8b619729ea9db2 Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Mon, 1 Jun 2026 16:17:23 +0100 Subject: [PATCH 02/16] Cleanup createInitTLSFunction --- lld/wasm/Writer.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 79e3c46410e8d..aa6b84c6f925f 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -1664,17 +1664,9 @@ void Writer::createInitTLSFunction() { writeUleb128(os, 0, "num locals"); if (tlsSeg) { + writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); + writeUleb128(os, 0, "local index"); writeSetTLSBase(ctx, os); - /* - // In cooperative threading mode the runtime is responsible for calling - // __wasm_set_tls_base separately; __wasm_init_tls only copies the TLS - // template data. - if (!ctx.arg.libcallThreadContext) { - writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); - writeUleb128(os, 0, "local index"); - writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set"); - writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "global index"); - }*/ // FIXME(wvo): this local needs to be I64 in wasm64, or we need an extend // op. >From ca1b2937d96037de1007677ca739a09162cae96f Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Mon, 1 Jun 2026 16:19:54 +0100 Subject: [PATCH 03/16] Remove newline --- llvm/test/CodeGen/WebAssembly/target-features-tls.ll | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll index 92333f3c7b9f1..4abe01a73aeee 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll @@ -32,4 +32,3 @@ target triple = "wasm32-unknown-unknown" ; BULK-MEM-NEXT: .int8 15 ; BULK-MEM-NEXT: .ascii "bulk-memory-opt" ; BULK-MEM-NEXT: .tbss.foo,"T",@ - >From e76828df6b96738c4b49708ac7888a63faa17d3b Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Mon, 1 Jun 2026 16:35:47 +0100 Subject: [PATCH 04/16] Replace libcall-thread-context flag --- lld/test/wasm/stack-pointer-abi.s | 2 +- lld/test/wasm/thread-context-abi-mismatch.s | 3 +-- lld/test/wasm/tls-libcall.s | 2 +- lld/wasm/Driver.cpp | 1 - lld/wasm/Options.td | 3 --- lld/wasm/Writer.cpp | 2 +- 6 files changed, 4 insertions(+), 9 deletions(-) diff --git a/lld/test/wasm/stack-pointer-abi.s b/lld/test/wasm/stack-pointer-abi.s index 869f972710991..fbae0475bcba2 100644 --- a/lld/test/wasm/stack-pointer-abi.s +++ b/lld/test/wasm/stack-pointer-abi.s @@ -1,5 +1,5 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s -# RUN: wasm-ld --libcall-thread-context --no-gc-sections -o %t.libcall.wasm %t.o +# RUN: wasm-ld --cooperative-threading --no-gc-sections -o %t.libcall.wasm %t.o # RUN: obj2yaml %t.libcall.wasm | FileCheck %s --check-prefix=LIBCALL # RUN: wasm-ld --no-gc-sections -o %t.global.wasm %t.o # RUN: obj2yaml %t.global.wasm | FileCheck %s --check-prefix=GLOBAL diff --git a/lld/test/wasm/thread-context-abi-mismatch.s b/lld/test/wasm/thread-context-abi-mismatch.s index acab6fd59d9b7..a817ca4407aab 100644 --- a/lld/test/wasm/thread-context-abi-mismatch.s +++ b/lld/test/wasm/thread-context-abi-mismatch.s @@ -3,10 +3,9 @@ # as an indication that the global thread context ABI is being used. # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s -# RUN: not wasm-ld --libcall-thread-context %t.o -o %t.wasm 2>&1 | FileCheck %s # RUN: not wasm-ld --cooperative-multithreading %t.o -o %t.wasm 2>&1 | FileCheck %s -# CHECK: object file uses globals for thread context, but --libcall-thread-context or --cooperative-multithreading was specified +# CHECK: object file uses globals for thread context, but --cooperative-multithreading was specified .globl _start _start: .functype _start () -> () diff --git a/lld/test/wasm/tls-libcall.s b/lld/test/wasm/tls-libcall.s index df8b8f8be0207..a0a7f37379bac 100644 --- a/lld/test/wasm/tls-libcall.s +++ b/lld/test/wasm/tls-libcall.s @@ -1,5 +1,5 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s -# RUN: wasm-ld --libcall-thread-context --shared-memory -no-gc-sections -o %t.wasm %t.o +# RUN: wasm-ld --cooperative-threading --shared-memory -no-gc-sections -o %t.wasm %t.o # RUN: obj2yaml %t.wasm | FileCheck %s # RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 20b398fc39a0c..1ef4f55becc50 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -562,7 +562,6 @@ static void readConfigs(opt::InputArgList &args) { ctx.arg.importTable = args.hasArg(OPT_import_table); ctx.arg.importUndefined = args.hasArg(OPT_import_undefined); ctx.arg.cooperativeMultithreading = args.hasArg(OPT_cooperative_multithreading); - ctx.arg.libcallThreadContext = args.hasArg(OPT_libcall_thread_context); ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2); if (ctx.arg.ltoo > 3) error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo)); diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td index 8ad386ca0ce39..bd2a7a19e0887 100644 --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -241,9 +241,6 @@ def initial_memory: JJ<"initial-memory=">, def cooperative_multithreading: FF<"cooperative-multithreading">, HelpText<"Enable cooperative multithreading.">; -def libcall_thread_context: FF<"libcall-thread-context">, - HelpText<"Use library calls for thread context access instead of globals.">; - def max_memory: JJ<"max-memory=">, HelpText<"Maximum size of the linear memory">; diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index aa6b84c6f925f..2128c1b213e5c 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -653,7 +653,7 @@ void Writer::populateTargetFeatures() { sym->importModule && sym->importModule == "env"; })) error(fileName + ": object file uses globals for thread context, " - "but --libcall-thread-context was specified"); + "but --cooperative-threading was specified"); } if (inferFeatures) >From 77c1c717e48adbe1f5800b0268a586e658dd6b5c Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Mon, 1 Jun 2026 16:36:36 +0100 Subject: [PATCH 05/16] fmt --- lld/wasm/Driver.cpp | 3 ++- lld/wasm/SyntheticSections.cpp | 9 ++++++--- lld/wasm/Writer.cpp | 10 ++++++---- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 1ef4f55becc50..17781995815f5 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -561,7 +561,8 @@ static void readConfigs(opt::InputArgList &args) { ctx.arg.soName = args.getLastArgValue(OPT_soname); ctx.arg.importTable = args.hasArg(OPT_import_table); ctx.arg.importUndefined = args.hasArg(OPT_import_undefined); - ctx.arg.cooperativeMultithreading = args.hasArg(OPT_cooperative_multithreading); + ctx.arg.cooperativeMultithreading = + args.hasArg(OPT_cooperative_multithreading); ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2); if (ctx.arg.ltoo > 3) error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo)); diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index a465f2fb590b3..6c7d46787d661 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -265,7 +265,8 @@ void ImportSection::writeBody() { import.Kind = WASM_EXTERNAL_MEMORY; import.Memory.Flags = 0; import.Memory.Minimum = out.memorySec->numMemoryPages; - if (out.memorySec->maxMemoryPages != 0 || ctx.arg.threadModel == ThreadModel::SharedMemory) { + if (out.memorySec->maxMemoryPages != 0 || + ctx.arg.threadModel == ThreadModel::SharedMemory) { import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX; import.Memory.Maximum = out.memorySec->maxMemoryPages; } @@ -406,7 +407,8 @@ void TableSection::assignIndexes() { void MemorySection::writeBody() { raw_ostream &os = bodyOutputStream; - bool hasMax = maxMemoryPages != 0 || ctx.arg.threadModel == ThreadModel::SharedMemory; + bool hasMax = + maxMemoryPages != 0 || ctx.arg.threadModel == ThreadModel::SharedMemory; writeUleb128(os, 1, "memory count"); unsigned flags = 0; if (hasMax) @@ -572,7 +574,8 @@ void GlobalSection::writeBody() { // In the multithreaded case, TLS globals are set during // `__wasm_apply_global_tls_relocs`, but in the single-threaded case // we know the absolute value at link time. - initExpr = intConst(d->getVA(/*absolute=*/!ctx.arg.isMultithreaded()), is64); + initExpr = + intConst(d->getVA(/*absolute=*/!ctx.arg.isMultithreaded()), is64); else if (auto *f = dyn_cast<FunctionSymbol>(sym)) initExpr = intConst(f->isStub ? 0 : f->getTableIndex(), is64); else { diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 2128c1b213e5c..cf99208456a11 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -431,7 +431,8 @@ void Writer::layoutMemory() { } // Make space for the memory initialization flag - if (ctx.arg.threadModel == ThreadModel::SharedMemory && hasPassiveInitializedSegments()) { + if (ctx.arg.threadModel == ThreadModel::SharedMemory && + hasPassiveInitializedSegments()) { memoryPtr = alignTo(memoryPtr, 4); ctx.sym.initMemoryFlag = symtab->addSyntheticDataSymbol( "__wasm_init_memory_flag", WASM_SYMBOL_VISIBILITY_HIDDEN); @@ -1063,8 +1064,8 @@ OutputSegment *Writer::createOutputSegment(StringRef name) { // TLS segments, so that they can be reused, and for .bss segments, which // don't need to be included in the binary at all. bool needsPassiveInit = ctx.arg.threadModel == ThreadModel::SharedMemory || - (ctx.arg.threadModel == ThreadModel::Cooperative && - (s->isTLS() || s->name.starts_with(".bss"))); + (ctx.arg.threadModel == ThreadModel::Cooperative && + (s->isTLS() || s->name.starts_with(".bss"))); if (needsPassiveInit) s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE; if (!ctx.arg.relocatable && name.starts_with(".bss")) @@ -1798,7 +1799,8 @@ void Writer::run() { // `__memory_base` import. Unless we support the extended const expression we // can't do addition inside the constant expression, so we much combine the // segments into a single one that can live at `__memory_base`. - if (ctx.isPic && !ctx.arg.extendedConst && ctx.arg.threadModel != ThreadModel::SharedMemory) { + if (ctx.isPic && !ctx.arg.extendedConst && + ctx.arg.threadModel != ThreadModel::SharedMemory) { // In shared memory mode all data segments are passive and initialized // via __wasm_init_memory. log("-- combineOutputSegments"); >From 763392d058e4298f0bee6e66f17adb703b552a66 Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Mon, 1 Jun 2026 16:43:01 +0100 Subject: [PATCH 06/16] Correct output segments --- lld/wasm/Writer.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index cf99208456a11..d60dfcdaf43a6 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -1125,7 +1125,7 @@ void Writer::combineOutputSegments() { // This restriction does not apply when the extended const extension is // available: https://github.com/WebAssembly/extended-const assert(!ctx.arg.extendedConst); - assert(ctx.isPic && !ctx.arg.sharedMemory); + assert(ctx.isPic && !ctx.arg.isMultithreaded()); if (segments.size() <= 1) return; OutputSegment *combined = make<OutputSegment>(".data"); @@ -1799,10 +1799,9 @@ void Writer::run() { // `__memory_base` import. Unless we support the extended const expression we // can't do addition inside the constant expression, so we much combine the // segments into a single one that can live at `__memory_base`. - if (ctx.isPic && !ctx.arg.extendedConst && - ctx.arg.threadModel != ThreadModel::SharedMemory) { - // In shared memory mode all data segments are passive and initialized - // via __wasm_init_memory. + if (ctx.isPic && !ctx.arg.extendedConst && !ctx.arg.isMultithreaded()) { + // In multithreaded modes (shared or cooperative), data segments may be + // passive and must not be combined into a single active segment. log("-- combineOutputSegments"); combineOutputSegments(); } >From c68d4de08b8ac66b38a6d24adad0c26040aaa134 Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Mon, 1 Jun 2026 16:53:02 +0100 Subject: [PATCH 07/16] Cleanup --- lld/wasm/Config.h | 6 +----- lld/wasm/Driver.cpp | 5 ----- lld/wasm/SyntheticSections.cpp | 8 ++++---- lld/wasm/Writer.cpp | 14 +++++++------- 4 files changed, 12 insertions(+), 21 deletions(-) diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index 873d25d130424..af74f0f40bbdf 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -46,8 +46,6 @@ enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic }; // For --build-id. enum class BuildIdKind { None, Fast, Sha1, Hexstring, Uuid }; -enum class ThreadModel { Single, Cooperative, SharedMemory }; - // This struct contains the global configuration for the linker. // Most fields are direct mapping from the command line options // and such fields have the same name as the corresponding options. @@ -68,7 +66,6 @@ struct Config { bool gcSections; llvm::StringSet<> keepSections; bool cooperativeMultithreading; - bool libcallThreadContext; std::optional<std::pair<llvm::StringRef, llvm::StringRef>> memoryImport; std::optional<llvm::StringRef> memoryExport; bool sharedMemory; @@ -138,8 +135,7 @@ struct Config { std::optional<std::vector<std::string>> extraFeatures; llvm::SmallVector<uint8_t, 0> buildIdVector; - ThreadModel threadModel = ThreadModel::Single; - bool isMultithreaded() const { return threadModel != ThreadModel::Single; } + bool isMultithreaded() const { return sharedMemory || cooperativeMultithreading; } }; // The Ctx object hold all other (non-configuration) global state. diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 17781995815f5..b06e0fbb55eec 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -757,11 +757,6 @@ static void setConfigs() { ctx.arg.memoryExport = memoryName; } - if (ctx.arg.cooperativeMultithreading) { - ctx.arg.threadModel = ThreadModel::Cooperative; - ctx.arg.libcallThreadContext = true; - } else if (ctx.arg.sharedMemory) - ctx.arg.threadModel = ThreadModel::SharedMemory; } // Some command line options or some combinations of them are not allowed. diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index 6c7d46787d661..753a1c7fe5c82 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -266,11 +266,11 @@ void ImportSection::writeBody() { import.Memory.Flags = 0; import.Memory.Minimum = out.memorySec->numMemoryPages; if (out.memorySec->maxMemoryPages != 0 || - ctx.arg.threadModel == ThreadModel::SharedMemory) { + ctx.arg.sharedMemory) { import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX; import.Memory.Maximum = out.memorySec->maxMemoryPages; } - if (ctx.arg.threadModel == ThreadModel::SharedMemory) + if (ctx.arg.sharedMemory) import.Memory.Flags |= WASM_LIMITS_FLAG_IS_SHARED; if (is64) import.Memory.Flags |= WASM_LIMITS_FLAG_IS_64; @@ -408,12 +408,12 @@ void MemorySection::writeBody() { raw_ostream &os = bodyOutputStream; bool hasMax = - maxMemoryPages != 0 || ctx.arg.threadModel == ThreadModel::SharedMemory; + maxMemoryPages != 0 || ctx.arg.sharedMemory; writeUleb128(os, 1, "memory count"); unsigned flags = 0; if (hasMax) flags |= WASM_LIMITS_FLAG_HAS_MAX; - if (ctx.arg.threadModel == ThreadModel::SharedMemory) + if (ctx.arg.sharedMemory) flags |= WASM_LIMITS_FLAG_IS_SHARED; if (ctx.arg.is64.value_or(false)) flags |= WASM_LIMITS_FLAG_IS_64; diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index d60dfcdaf43a6..48145d005c117 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -431,7 +431,7 @@ void Writer::layoutMemory() { } // Make space for the memory initialization flag - if (ctx.arg.threadModel == ThreadModel::SharedMemory && + if (ctx.arg.sharedMemory && hasPassiveInitializedSegments()) { memoryPtr = alignTo(memoryPtr, 4); ctx.sym.initMemoryFlag = symtab->addSyntheticDataSymbol( @@ -520,7 +520,7 @@ void Writer::layoutMemory() { // If no maxMemory config was supplied but we are building with // shared memory, we need to pick a sensible upper limit. - if (ctx.arg.threadModel == ThreadModel::SharedMemory && maxMemory == 0) { + if (ctx.arg.sharedMemory && maxMemory == 0) { if (ctx.isPic) maxMemory = maxMemorySetting; else @@ -1063,8 +1063,8 @@ OutputSegment *Writer::createOutputSegment(StringRef name) { // threads. In the non-shared memory case, we use passive segments only for // TLS segments, so that they can be reused, and for .bss segments, which // don't need to be included in the binary at all. - bool needsPassiveInit = ctx.arg.threadModel == ThreadModel::SharedMemory || - (ctx.arg.threadModel == ThreadModel::Cooperative && + bool needsPassiveInit = ctx.arg.sharedMemory || + (ctx.arg.cooperativeMultithreading && (s->isTLS() || s->name.starts_with(".bss"))); if (needsPassiveInit) s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE; @@ -1256,7 +1256,7 @@ void Writer::createInitMemoryFunction() { assert(ctx.sym.initMemory); assert(hasPassiveInitializedSegments()); uint64_t flagAddress; - if (ctx.arg.threadModel == ThreadModel::SharedMemory) { + if (ctx.arg.sharedMemory) { assert(ctx.sym.initMemoryFlag); flagAddress = ctx.sym.initMemoryFlag->getVA(); } @@ -1324,7 +1324,7 @@ void Writer::createInitMemoryFunction() { } }; - if (ctx.arg.threadModel == ThreadModel::SharedMemory) { + if (ctx.arg.sharedMemory) { // With PIC code we cache the flag address in local 0 if (ctx.isPic) { writeUleb128(os, 1, "num local decls"); @@ -1419,7 +1419,7 @@ void Writer::createInitMemoryFunction() { } } - if (ctx.arg.threadModel == ThreadModel::SharedMemory) { + if (ctx.arg.sharedMemory) { // Set flag to 2 to mark end of initialization writeGetFlagAddress(); writeI32Const(os, 2, "flag value"); >From c1a15696ba1d5de74ca5c0404b4405cc2f148a4e Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Mon, 1 Jun 2026 17:07:29 +0100 Subject: [PATCH 08/16] Cleanup options --- clang/lib/Driver/ToolChains/WebAssembly.cpp | 6 +++++- lld/test/wasm/cooperative-multithreading.s | 4 ++++ lld/wasm/Config.h | 1 + lld/wasm/Driver.cpp | 8 ++++++-- lld/wasm/Options.td | 2 +- 5 files changed, 17 insertions(+), 4 deletions(-) diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index ce5463b167a58..d1e1766a0dee3 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -93,6 +93,10 @@ static bool WantsCooperativeMultithreading(const llvm::Triple &Triple, return Triple.getOS() == llvm::Triple::WASIp3; } +static bool WantsSharedMemory(const llvm::Triple &Triple, const ArgList &Args) { + return WantsPthread(Triple, Args) && !WantsCooperativeMultithreading(Triple, Args); +} + void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, @@ -177,7 +181,7 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (WantsCooperativeMultithreading(ToolChain.getTriple(), Args)) CmdArgs.push_back("--cooperative-multithreading"); - if (WantsPthread(ToolChain.getTriple(), Args)) + if (WantsSharedMemory(ToolChain.getTriple(), Args)) CmdArgs.push_back("--shared-memory"); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { diff --git a/lld/test/wasm/cooperative-multithreading.s b/lld/test/wasm/cooperative-multithreading.s index cb41dd392d5e2..5adfaa99e40a0 100644 --- a/lld/test/wasm/cooperative-multithreading.s +++ b/lld/test/wasm/cooperative-multithreading.s @@ -7,6 +7,10 @@ # RUN: obj2yaml %t.wasm | FileCheck %s # RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS +# Test that --cooperative-multithreading and --shared-memory are mutually exclusive. +# RUN: not wasm-ld --cooperative-multithreading --shared-memory %t.o -o %t2.wasm 2>&1 | FileCheck %s --check-prefix=INCOMPAT +# INCOMPAT: --cooperative-multithreading is incompatible with --shared-memory + .globl __wasm_get_tls_base __wasm_get_tls_base: .functype __wasm_get_tls_base () -> (i32) diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index af74f0f40bbdf..60b04ad5abc87 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -66,6 +66,7 @@ struct Config { bool gcSections; llvm::StringSet<> keepSections; bool cooperativeMultithreading; + bool libcallThreadContext; std::optional<std::pair<llvm::StringRef, llvm::StringRef>> memoryImport; std::optional<llvm::StringRef> memoryExport; bool sharedMemory; diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index b06e0fbb55eec..605aa5dcebe94 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -556,13 +556,12 @@ static void readConfigs(opt::InputArgList &args) { } else if (args.hasArg(OPT_export_memory)) { ctx.arg.memoryExport = memoryName; } - ctx.arg.sharedMemory = args.hasArg(OPT_shared_memory); ctx.arg.soName = args.getLastArgValue(OPT_soname); ctx.arg.importTable = args.hasArg(OPT_import_table); ctx.arg.importUndefined = args.hasArg(OPT_import_undefined); ctx.arg.cooperativeMultithreading = - args.hasArg(OPT_cooperative_multithreading); + args.hasArg(OPT_cooperative_multithreading);; ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2); if (ctx.arg.ltoo > 3) error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo)); @@ -757,6 +756,11 @@ static void setConfigs() { ctx.arg.memoryExport = memoryName; } + if (ctx.arg.cooperativeMultithreading) { + if (ctx.arg.sharedMemory) + error("--cooperative-multithreading is incompatible with --shared-memory"); + ctx.arg.libcallThreadContext = true; + } } // Some command line options or some combinations of them are not allowed. diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td index bd2a7a19e0887..6d18a0400ef97 100644 --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -240,7 +240,7 @@ def initial_memory: JJ<"initial-memory=">, def cooperative_multithreading: FF<"cooperative-multithreading">, HelpText<"Enable cooperative multithreading.">; - + def max_memory: JJ<"max-memory=">, HelpText<"Maximum size of the linear memory">; >From 843491367a07d034b255ba630e6b00e17c58bca5 Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Mon, 1 Jun 2026 17:10:42 +0100 Subject: [PATCH 09/16] fmt --- clang/lib/Driver/ToolChains/WebAssembly.cpp | 3 ++- lld/wasm/Config.h | 4 +++- lld/wasm/Driver.cpp | 8 +++++--- lld/wasm/SyntheticSections.cpp | 6 ++---- lld/wasm/Writer.cpp | 9 ++++----- 5 files changed, 16 insertions(+), 14 deletions(-) diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index d1e1766a0dee3..d8f23175eb58b 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -94,7 +94,8 @@ static bool WantsCooperativeMultithreading(const llvm::Triple &Triple, } static bool WantsSharedMemory(const llvm::Triple &Triple, const ArgList &Args) { - return WantsPthread(Triple, Args) && !WantsCooperativeMultithreading(Triple, Args); + return WantsPthread(Triple, Args) && + !WantsCooperativeMultithreading(Triple, Args); } void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index 60b04ad5abc87..d4789b88203eb 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -136,7 +136,9 @@ struct Config { std::optional<std::vector<std::string>> extraFeatures; llvm::SmallVector<uint8_t, 0> buildIdVector; - bool isMultithreaded() const { return sharedMemory || cooperativeMultithreading; } + bool isMultithreaded() const { + return sharedMemory || cooperativeMultithreading; + } }; // The Ctx object hold all other (non-configuration) global state. diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 605aa5dcebe94..b2723220b5afb 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -556,12 +556,14 @@ static void readConfigs(opt::InputArgList &args) { } else if (args.hasArg(OPT_export_memory)) { ctx.arg.memoryExport = memoryName; } + ctx.arg.sharedMemory = args.hasArg(OPT_shared_memory); ctx.arg.soName = args.getLastArgValue(OPT_soname); ctx.arg.importTable = args.hasArg(OPT_import_table); ctx.arg.importUndefined = args.hasArg(OPT_import_undefined); ctx.arg.cooperativeMultithreading = - args.hasArg(OPT_cooperative_multithreading);; + args.hasArg(OPT_cooperative_multithreading); + ; ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2); if (ctx.arg.ltoo > 3) error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo)); @@ -755,10 +757,10 @@ static void setConfigs() { if (!ctx.arg.memoryExport.has_value() && !ctx.arg.memoryImport.has_value()) { ctx.arg.memoryExport = memoryName; } - if (ctx.arg.cooperativeMultithreading) { if (ctx.arg.sharedMemory) - error("--cooperative-multithreading is incompatible with --shared-memory"); + error( + "--cooperative-multithreading is incompatible with --shared-memory"); ctx.arg.libcallThreadContext = true; } } diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index 753a1c7fe5c82..050f61c7f5c56 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -265,8 +265,7 @@ void ImportSection::writeBody() { import.Kind = WASM_EXTERNAL_MEMORY; import.Memory.Flags = 0; import.Memory.Minimum = out.memorySec->numMemoryPages; - if (out.memorySec->maxMemoryPages != 0 || - ctx.arg.sharedMemory) { + if (out.memorySec->maxMemoryPages != 0 || ctx.arg.sharedMemory) { import.Memory.Flags |= WASM_LIMITS_FLAG_HAS_MAX; import.Memory.Maximum = out.memorySec->maxMemoryPages; } @@ -407,8 +406,7 @@ void TableSection::assignIndexes() { void MemorySection::writeBody() { raw_ostream &os = bodyOutputStream; - bool hasMax = - maxMemoryPages != 0 || ctx.arg.sharedMemory; + bool hasMax = maxMemoryPages != 0 || ctx.arg.sharedMemory; writeUleb128(os, 1, "memory count"); unsigned flags = 0; if (hasMax) diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 48145d005c117..d90ca859f3479 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -431,8 +431,7 @@ void Writer::layoutMemory() { } // Make space for the memory initialization flag - if (ctx.arg.sharedMemory && - hasPassiveInitializedSegments()) { + if (ctx.arg.sharedMemory && hasPassiveInitializedSegments()) { memoryPtr = alignTo(memoryPtr, 4); ctx.sym.initMemoryFlag = symtab->addSyntheticDataSymbol( "__wasm_init_memory_flag", WASM_SYMBOL_VISIBILITY_HIDDEN); @@ -1063,9 +1062,9 @@ OutputSegment *Writer::createOutputSegment(StringRef name) { // threads. In the non-shared memory case, we use passive segments only for // TLS segments, so that they can be reused, and for .bss segments, which // don't need to be included in the binary at all. - bool needsPassiveInit = ctx.arg.sharedMemory || - (ctx.arg.cooperativeMultithreading && - (s->isTLS() || s->name.starts_with(".bss"))); + bool needsPassiveInit = + ctx.arg.sharedMemory || (ctx.arg.cooperativeMultithreading && + (s->isTLS() || s->name.starts_with(".bss"))); if (needsPassiveInit) s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE; if (!ctx.arg.relocatable && name.starts_with(".bss")) >From 95db2cb7aa88390b3e92971bc50c0cde6881c319 Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Fri, 5 Jun 2026 09:19:17 +0100 Subject: [PATCH 10/16] Fix tests --- lld/test/wasm/stack-pointer-abi.s | 2 +- lld/test/wasm/tls-libcall.s | 2 +- lld/wasm/Writer.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lld/test/wasm/stack-pointer-abi.s b/lld/test/wasm/stack-pointer-abi.s index fbae0475bcba2..c8c6370dbc7ff 100644 --- a/lld/test/wasm/stack-pointer-abi.s +++ b/lld/test/wasm/stack-pointer-abi.s @@ -1,5 +1,5 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s -# RUN: wasm-ld --cooperative-threading --no-gc-sections -o %t.libcall.wasm %t.o +# RUN: wasm-ld --cooperative-multithreading --no-gc-sections -o %t.libcall.wasm %t.o # RUN: obj2yaml %t.libcall.wasm | FileCheck %s --check-prefix=LIBCALL # RUN: wasm-ld --no-gc-sections -o %t.global.wasm %t.o # RUN: obj2yaml %t.global.wasm | FileCheck %s --check-prefix=GLOBAL diff --git a/lld/test/wasm/tls-libcall.s b/lld/test/wasm/tls-libcall.s index a0a7f37379bac..b8d8935dbf766 100644 --- a/lld/test/wasm/tls-libcall.s +++ b/lld/test/wasm/tls-libcall.s @@ -1,5 +1,5 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s -# RUN: wasm-ld --cooperative-threading --shared-memory -no-gc-sections -o %t.wasm %t.o +# RUN: wasm-ld --cooperative-multithreading -no-gc-sections -o %t.wasm %t.o # RUN: obj2yaml %t.wasm | FileCheck %s # RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index a03e5ff34e9e2..9f68432e1dc33 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -650,7 +650,7 @@ void Writer::populateTargetFeatures() { sym->importModule && sym->importModule == "env"; })) error(fileName + ": object file uses globals for thread context, " - "but --cooperative-threading was specified"); + "but --cooperative-multithreading was specified"); } if (inferFeatures) >From 617d774fe49e4a531c3f52483f1819d18d631d77 Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Fri, 5 Jun 2026 11:25:55 +0100 Subject: [PATCH 11/16] Update comment --- lld/wasm/Driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index b2723220b5afb..347f78b342513 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -1027,7 +1027,7 @@ static void createOptionalSymbols() { if (ctx.sym.firstPageEnd) ctx.sym.firstPageEnd->setVA(ctx.arg.pageSize); - // For non-shared memory programs we still need to define __tls_base since we + // For non-multithreaded programs we still need to define __tls_base since we // allow object files built with TLS to be linked into single threaded // programs, and such object files can contain references to this symbol. // >From dd5aace9abcc3fcc4717c54450f26287397676e4 Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Fri, 5 Jun 2026 11:29:32 +0100 Subject: [PATCH 12/16] Update test --- llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll b/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll index 46ac1cd0509b7..0cefa1b6b1f21 100644 --- a/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll +++ b/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll @@ -1,6 +1,6 @@ ; Test that in cooperative threading mode (wasm32-wasip3), thread-local variables ; are NOT stripped even when atomics are absent. In non-cooperative mode -; (wasm32-unknown-unknown) TLS is stripped to .bss when atomics are absent. +; (wasm32-unknown-unknown) TLS is treated as normal data when atomics are absent. ; RUN: llc < %s -mtriple=wasm32-wasip3 -mcpu=mvp -mattr=-atomics,+bulk-memory \ ; RUN: | FileCheck %s --check-prefixes=COOP @@ -10,11 +10,16 @@ target triple = "wasm32-unknown-unknown" @foo = internal thread_local global i32 0 +@bar = internal thread_local global i32 1 ; Cooperative threading: TLS is preserved — the section stays .tbss. ; COOP: .tbss.foo +; COOP: .tdata.bar ; COOP-NOT: .bss.foo +; COOP-NOT: .data.bar ; Non-cooperative: TLS stripped ; PLAIN: .bss.foo +; PLAIN: .data.bar ; PLAIN-NOT: .tbss.foo +; PLAIN-NOT: .tdata.bar >From 8258f165ba7c51dacc5d1c4bab9a0d435997c9e5 Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Mon, 8 Jun 2026 09:35:32 +0100 Subject: [PATCH 13/16] Cooperative multithreading -> cooperative threading --- clang/lib/Driver/ToolChains/WebAssembly.cpp | 2 +- clang/test/Driver/wasm-toolchain.c | 5 ++--- ...rative-multithreading.s => cooperative-threading.s} | 10 +++++----- lld/test/wasm/stack-pointer-abi.s | 2 +- lld/test/wasm/thread-context-abi-mismatch.s | 4 ++-- lld/test/wasm/tls-libcall.s | 2 +- lld/wasm/Config.h | 4 ++-- lld/wasm/Driver.cpp | 8 ++++---- lld/wasm/Options.td | 2 +- lld/wasm/Writer.cpp | 4 ++-- 10 files changed, 21 insertions(+), 22 deletions(-) rename lld/test/wasm/{cooperative-multithreading.s => cooperative-threading.s} (82%) diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index 9add4e157dc53..be418e7db1724 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -180,7 +180,7 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); if (WantsCooperativeMultithreading(ToolChain.getTriple(), Args)) - CmdArgs.push_back("--cooperative-multithreading"); + CmdArgs.push_back("--cooperative-threading"); if (WantsSharedMemory(ToolChain.getTriple(), Args)) CmdArgs.push_back("--shared-memory"); diff --git a/clang/test/Driver/wasm-toolchain.c b/clang/test/Driver/wasm-toolchain.c index 40d75da3166d9..c02a102fab081 100644 --- a/clang/test/Driver/wasm-toolchain.c +++ b/clang/test/Driver/wasm-toolchain.c @@ -304,9 +304,8 @@ // LINK_WALI_BASIC: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" // LINK_WALI_BASIC: wasm-ld{{.*}}" "-L/foo/lib/wasm32-linux-muslwali" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" -// Test that `wasm32-wasip3` passes `--cooperative-multithreading` to the linker. +// Test that `wasm32-wasip3` passes `--cooperative-threading` to the linker. // RUN: %clang -### --target=wasm32-wasip3 -fuse-ld=lld %s --sysroot /foo 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_WASIP3_COOP %s -// LINK_WASIP3_COOP: wasm-ld{{.*}}" {{.*}} "--cooperative-multithreading" -// LINK_WASIP3_COOP-NOT: "--libcall-thread-context" +// LINK_WASIP3_COOP: wasm-ld{{.*}}" {{.*}} "--cooperative-threading" diff --git a/lld/test/wasm/cooperative-multithreading.s b/lld/test/wasm/cooperative-threading.s similarity index 82% rename from lld/test/wasm/cooperative-multithreading.s rename to lld/test/wasm/cooperative-threading.s index 5adfaa99e40a0..89f3ebc82864c 100644 --- a/lld/test/wasm/cooperative-multithreading.s +++ b/lld/test/wasm/cooperative-threading.s @@ -1,15 +1,15 @@ -# Test that --cooperative-multithreading uses the libcall ABI naming for +# Test that --cooperative-threading uses the libcall ABI naming for # thread-context globals (__init_stack_pointer, __init_tls_base, etc.) and # works without --shared-memory and atomics. # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s -# RUN: wasm-ld --cooperative-multithreading -no-gc-sections -o %t.wasm %t.o +# RUN: wasm-ld --cooperative-threading -no-gc-sections -o %t.wasm %t.o # RUN: obj2yaml %t.wasm | FileCheck %s # RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS -# Test that --cooperative-multithreading and --shared-memory are mutually exclusive. -# RUN: not wasm-ld --cooperative-multithreading --shared-memory %t.o -o %t2.wasm 2>&1 | FileCheck %s --check-prefix=INCOMPAT -# INCOMPAT: --cooperative-multithreading is incompatible with --shared-memory +# Test that --cooperative-threading and --shared-memory are mutually exclusive. +# RUN: not wasm-ld --cooperative-threading --shared-memory %t.o -o %t2.wasm 2>&1 | FileCheck %s --check-prefix=INCOMPAT +# INCOMPAT: --cooperative-threading is incompatible with --shared-memory .globl __wasm_get_tls_base __wasm_get_tls_base: diff --git a/lld/test/wasm/stack-pointer-abi.s b/lld/test/wasm/stack-pointer-abi.s index c8c6370dbc7ff..fbae0475bcba2 100644 --- a/lld/test/wasm/stack-pointer-abi.s +++ b/lld/test/wasm/stack-pointer-abi.s @@ -1,5 +1,5 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s -# RUN: wasm-ld --cooperative-multithreading --no-gc-sections -o %t.libcall.wasm %t.o +# RUN: wasm-ld --cooperative-threading --no-gc-sections -o %t.libcall.wasm %t.o # RUN: obj2yaml %t.libcall.wasm | FileCheck %s --check-prefix=LIBCALL # RUN: wasm-ld --no-gc-sections -o %t.global.wasm %t.o # RUN: obj2yaml %t.global.wasm | FileCheck %s --check-prefix=GLOBAL diff --git a/lld/test/wasm/thread-context-abi-mismatch.s b/lld/test/wasm/thread-context-abi-mismatch.s index a817ca4407aab..3debc1de662a1 100644 --- a/lld/test/wasm/thread-context-abi-mismatch.s +++ b/lld/test/wasm/thread-context-abi-mismatch.s @@ -3,9 +3,9 @@ # as an indication that the global thread context ABI is being used. # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s -# RUN: not wasm-ld --cooperative-multithreading %t.o -o %t.wasm 2>&1 | FileCheck %s +# RUN: not wasm-ld --cooperative-threading %t.o -o %t.wasm 2>&1 | FileCheck %s -# CHECK: object file uses globals for thread context, but --cooperative-multithreading was specified +# CHECK: object file uses globals for thread context, but --cooperative-threading was specified .globl _start _start: .functype _start () -> () diff --git a/lld/test/wasm/tls-libcall.s b/lld/test/wasm/tls-libcall.s index b8d8935dbf766..d8fb1c5e8a9ca 100644 --- a/lld/test/wasm/tls-libcall.s +++ b/lld/test/wasm/tls-libcall.s @@ -1,5 +1,5 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s -# RUN: wasm-ld --cooperative-multithreading -no-gc-sections -o %t.wasm %t.o +# RUN: wasm-ld --cooperative-threading -no-gc-sections -o %t.wasm %t.o # RUN: obj2yaml %t.wasm | FileCheck %s # RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index 70c32d60831ee..517789b2d3494 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -65,7 +65,7 @@ struct Config { bool growableTable; bool gcSections; llvm::StringSet<> keepSections; - bool cooperativeMultithreading; + bool cooperativeThreading; bool libcallThreadContext; std::optional<std::pair<llvm::StringRef, llvm::StringRef>> memoryImport; std::optional<llvm::StringRef> memoryExport; @@ -137,7 +137,7 @@ struct Config { llvm::SmallVector<uint8_t, 0> buildIdVector; bool isMultithreaded() const { - return sharedMemory || cooperativeMultithreading; + return sharedMemory || cooperativeThreading; } }; diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 347f78b342513..90c60da814114 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -561,8 +561,8 @@ static void readConfigs(opt::InputArgList &args) { ctx.arg.soName = args.getLastArgValue(OPT_soname); ctx.arg.importTable = args.hasArg(OPT_import_table); ctx.arg.importUndefined = args.hasArg(OPT_import_undefined); - ctx.arg.cooperativeMultithreading = - args.hasArg(OPT_cooperative_multithreading); + ctx.arg.cooperativeThreading = + args.hasArg(OPT_cooperative_threading); ; ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2); if (ctx.arg.ltoo > 3) @@ -757,10 +757,10 @@ static void setConfigs() { if (!ctx.arg.memoryExport.has_value() && !ctx.arg.memoryImport.has_value()) { ctx.arg.memoryExport = memoryName; } - if (ctx.arg.cooperativeMultithreading) { + if (ctx.arg.cooperativeThreading) { if (ctx.arg.sharedMemory) error( - "--cooperative-multithreading is incompatible with --shared-memory"); + "--cooperative-threading is incompatible with --shared-memory"); ctx.arg.libcallThreadContext = true; } } diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td index 6d18a0400ef97..bd46794e067b3 100644 --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -238,7 +238,7 @@ def page_size: JJ<"page-size=">, def initial_memory: JJ<"initial-memory=">, HelpText<"Initial size of the linear memory">; -def cooperative_multithreading: FF<"cooperative-multithreading">, +def cooperative_threading: FF<"cooperative-threading">, HelpText<"Enable cooperative multithreading.">; def max_memory: JJ<"max-memory=">, diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 9f68432e1dc33..42c0a48d0defb 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -650,7 +650,7 @@ void Writer::populateTargetFeatures() { sym->importModule && sym->importModule == "env"; })) error(fileName + ": object file uses globals for thread context, " - "but --cooperative-multithreading was specified"); + "but --cooperative-threading was specified"); } if (inferFeatures) @@ -1060,7 +1060,7 @@ OutputSegment *Writer::createOutputSegment(StringRef name) { // TLS segments, so that they can be reused, and for .bss segments, which // don't need to be included in the binary at all. bool needsPassiveInit = - ctx.arg.sharedMemory || (ctx.arg.cooperativeMultithreading && + ctx.arg.sharedMemory || (ctx.arg.cooperativeThreading && (s->isTLS() || s->name.starts_with(".bss"))); if (needsPassiveInit) s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE; >From e1218c4dcf262270495b036aff82ce1f372d2a1f Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Mon, 8 Jun 2026 09:39:07 +0100 Subject: [PATCH 14/16] Simplify comment --- lld/wasm/Driver.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 90c60da814114..dd2fcdd871940 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -1027,15 +1027,11 @@ static void createOptionalSymbols() { if (ctx.sym.firstPageEnd) ctx.sym.firstPageEnd->setVA(ctx.arg.pageSize); - // For non-multithreaded programs we still need to define __tls_base since we - // allow object files built with TLS to be linked into single threaded - // programs, and such object files can contain references to this symbol. - // - // However, in this case __tls_base is immutable and points directly to the - // start of the `.tdata` static segment. - // - // __tls_size and __tls_align are not needed in this case since they are only - // needed for __wasm_init_tls (which we do not create in this case). + // TLS object files may be linked into single-threaded programs, so + // __tls_base must always be defined. In this case it is immutable and points + // directly to the start of the `.tdata` segment. __tls_size and __tls_align + // are omitted since they are only used by __wasm_init_tls, which is not created + // in this case. if (!ctx.sym.tlsBase) ctx.sym.tlsBase = createOptionalGlobal("__tls_base", false); } >From bec4064356994e163c59cb91f8a5d15994d1a608 Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Mon, 8 Jun 2026 09:40:43 +0100 Subject: [PATCH 15/16] Fix test --- lld/test/wasm/cooperative-threading.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/test/wasm/cooperative-threading.s b/lld/test/wasm/cooperative-threading.s index 89f3ebc82864c..39df73858b6da 100644 --- a/lld/test/wasm/cooperative-threading.s +++ b/lld/test/wasm/cooperative-threading.s @@ -56,7 +56,7 @@ tls2: # CHECK: - Type: MEMORY # CHECK-NEXT: Memories: # CHECK-NEXT: - Minimum: 0x2 -# CHECK-NOT: Shared: false +# CHECK-NOT: Shared # Globals should use the libcall ABI naming, not the global ABI. # CHECK: GlobalNames: >From b4c392e7a537f33ae79827cfad353e8e5864f5fd Mon Sep 17 00:00:00 2001 From: Sy Brand <[email protected]> Date: Mon, 8 Jun 2026 09:58:10 +0100 Subject: [PATCH 16/16] Update comment --- lld/wasm/Writer.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 42c0a48d0defb..ca79e59768b5b 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -1056,9 +1056,11 @@ OutputSegment *Writer::createOutputSegment(StringRef name) { OutputSegment *s = make<OutputSegment>(name); // In the shared memory case, all data segments must be passive since they // will be initialized once by the main thread and then shared with other - // threads. In the non-shared memory case, we use passive segments only for - // TLS segments, so that they can be reused, and for .bss segments, which - // don't need to be included in the binary at all. + // threads. In the cooperative threading case, TLS segments must be passive + // so they can be re-initialized per-thread via memory.init, and .bss + // segments are passive to avoid serializing their zero bytes into the binary; + // they are still present as passive segment entries and zero-filled via + // memory.fill in __wasm_init_memory. bool needsPassiveInit = ctx.arg.sharedMemory || (ctx.arg.cooperativeThreading && (s->isTLS() || s->name.starts_with(".bss"))); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
