================
@@ -187,84 +193,337 @@ static Error executeCommands(StringRef ExecutablePath,
   return Error::success();
 }
 
-static Expected<SmallVector<std::string>> getInput(const ArgList &Args) {
-  // Collect all input bitcode files to be passed to the linking stage.
-  SmallVector<std::string> BitcodeFiles;
-  auto Inputs = Args.filtered(OPT_INPUT);
-  if (Inputs.empty())
-    return createStringError("no input files provided");
-  for (const opt::Arg *Arg : Inputs) {
-    StringRef Filename = Arg->getValue();
-    if (!sys::fs::exists(Filename) || sys::fs::is_directory(Filename))
-      return createStringError("input file '" + Filename + "' does not exist");
-    file_magic Magic;
-    if (auto EC = identify_magic(Filename, Magic))
-      return createStringError("failed to open file '" + Filename + "'");
-    // TODO: Current use case involves LLVM IR bitcode files as input.
-    // This will be extended to support SPIR-V IR files.
-    if (Magic != file_magic::bitcode)
-      return createStringError("unsupported file type for '" + Filename + "'");
-    BitcodeFiles.push_back(std::string(Filename));
+namespace {
+/// A minimal symbol interface used to drive archive member extraction. Only 
the
+/// flags required by the symbol-resolution fixed-point loop are tracked.
+struct Symbol {
+  enum Flags {
+    None = 0,
+    Undefined = 1 << 0,
+    Weak = 1 << 1,
+  };
+
+  Symbol() : SymFlags(None) {}
+  Symbol(Symbol::Flags F) : SymFlags(F) {}
+  Symbol(const irsymtab::Reader::SymbolRef Sym) : SymFlags(0) {
+    if (Sym.isUndefined())
+      SymFlags |= Undefined;
+    if (Sym.isWeak())
+      SymFlags |= Weak;
   }
-  return BitcodeFiles;
-}
 
-/// Handle cases where input file is a LLVM IR bitcode file.
-/// When clang-sycl-linker is called via clang-linker-wrapper tool, input files
-/// are LLVM IR bitcode files.
-// TODO: Support SPIR-V IR files.
-static Expected<std::unique_ptr<Module>> getBitcodeModule(StringRef File,
-                                                          LLVMContext &C) {
-  SMDiagnostic Err;
+  bool isWeak() const { return SymFlags & Weak; }
+  bool isUndefined() const { return SymFlags & Undefined; }
 
-  auto M = getLazyIRFileModule(File, Err, C);
-  if (M)
-    return std::move(M);
-  return createStringError(Err.getMessage());
-}
+  uint32_t SymFlags;
+};
+
+/// Description of a single input (positional file or -l library).
+struct InputDesc {
+  enum class Kind { File, Library };
+
+  StringRef Value; // File path, or library name for -l (the value after -l).
+  Kind InputKind = Kind::File;
+  bool WholeArchive = false; // --whole-archive state in effect at this input.
+};
+
+/// An input buffer pending archive-member resolution, together with its parsed
+/// IR symbol table. The symbol table is parsed once and reused across all
+/// fixed-point passes so members are not re-parsed on every pass.
+struct PendingInput {
+  std::unique_ptr<MemoryBuffer> Buffer;
+  bool IsLazy = false;
+  bool FromArchive = false;
+  IRSymtabFile Symtab;
+};
+
+/// Resolved input buffers and their target triple.
+struct ResolvedInputs {
+  SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
+  llvm::Triple TargetTriple;
+  StringRef TripleSource; // Source of the triple (--triple= or filename)
+};
+} // namespace
 
 static std::optional<std::string> findFile(StringRef Dir, const Twine &Name) {
-  SmallString<128> Path(Dir);
-  llvm::sys::path::append(Path, Name);
+  SmallString<128> Path;
+  sys::path::append(Path, Dir, Name);
+  // Skip directories so a directory whose name matches the requested library
+  // does not stop the search; a later -L path may hold the real archive.
   if (sys::fs::exists(Path) && !sys::fs::is_directory(Path))
-    return std::string(Path);
+    return static_cast<std::string>(Path);
   return std::nullopt;
 }
 
 static std::optional<std::string>
-searchLibrary(StringRef Name, ArrayRef<StringRef> SearchPaths) {
-  // An absolute path is taken as-is; -L paths are only consulted for relative
-  // names.
-  if (sys::path::is_absolute(Name)) {
-    if (sys::fs::exists(Name) && !sys::fs::is_directory(Name))
-      return std::string(Name);
-    return std::nullopt;
-  }
+findFromSearchPaths(StringRef Name, ArrayRef<StringRef> SearchPaths) {
   for (StringRef Dir : SearchPaths)
     if (std::optional<std::string> File = findFile(Dir, Name))
       return File;
   return std::nullopt;
 }
 
-/// Gather all library files. The list of files and its location are passed 
from
-/// driver.
-static Expected<SmallVector<std::string>>
-getBCLibraryNames(const ArgList &Args) {
+/// Search for static libraries in the linker's library path given input like
+/// `-lfoo`, `-l:libfoo.a`, or `-l/absolute/path/to/lib.a`.
+static std::optional<std::string>
+searchLibrary(StringRef Input, ArrayRef<StringRef> SearchPaths) {
+  // An absolute path is taken as-is; -L paths are only consulted for relative
+  // names.
+  if (sys::path::is_absolute(Input)) {
+    if (sys::fs::exists(Input) && !sys::fs::is_directory(Input))
+      return Input.str();
+    return std::nullopt;
+  }
+
+  if (Input.starts_with(":"))
+    return findFromSearchPaths(Input.drop_front(), SearchPaths);
+  SmallString<128> LibName("lib");
+  LibName += Input;
+  LibName += ".a";
+  return findFromSearchPaths(LibName, SearchPaths);
+}
+
+/// Scan a member's pre-parsed IR symbol table against \p SymTab and return 
true
+/// if the member should be extracted: it is non-lazy, or it defines a symbol
+/// that resolves a currently-undefined reference. Mirrors a linker's archive
+/// member selection.
+static bool scanSymbols(const IRSymtabFile &Symtab, StringMap<Symbol> &SymTab,
+                        bool IsLazy) {
+  bool Extracted = !IsLazy;
+  StringMap<Symbol> PendingSymbols;
+  for (unsigned I = 0; I != Symtab.Mods.size(); ++I) {
+    for (const auto &IRSym : Symtab.TheReader.module_symbols(I)) {
+      if (IRSym.isFormatSpecific() || !IRSym.isGlobal())
+        continue;
+
+      bool IsNewSymbol = IsLazy && !SymTab.count(IRSym.getName());
+      StringMap<Symbol> &Target = IsNewSymbol ? PendingSymbols : SymTab;
+      Symbol &OldSym = Target[IRSym.getName()];
+      Symbol Sym(IRSym);
+
+      if (OldSym.SymFlags == Symbol::None) {
+        OldSym = Sym;
+        if (!IsNewSymbol)
+          continue;
+      }
+
+      bool ResolvesReference =
+          !Sym.isUndefined() &&
+          (OldSym.isUndefined() || (OldSym.isWeak() && !Sym.isWeak())) &&
+          !(OldSym.isWeak() && OldSym.isUndefined() && IsLazy);
+      Extracted |= ResolvesReference;
+
+      if (ResolvesReference)
+        OldSym = Sym;
+    }
+  }
+  if (Extracted && IsLazy)
+    for (const auto &[Name, Sym] : PendingSymbols)
+      SymTab[Name] = Sym;
+  return Extracted;
+}
+
+/// Parse \p Buffer's IR symbol table and append it to \p Inputs. Errors if the
+/// buffer is not LLVM bitcode (the only member type the SYCL linker supports).
+static Error addBitcodeInput(SmallVector<PendingInput> &Inputs,
+                             std::unique_ptr<MemoryBuffer> Buffer, bool IsLazy,
+                             bool FromArchive) {
+  if (identify_magic(Buffer->getBuffer()) != file_magic::bitcode)
+    return createStringError("unsupported file type: '" +
+                             Buffer->getBufferIdentifier() + "'");
+  Expected<IRSymtabFile> SymtabOrErr = readIRSymtab(Buffer->getMemBufferRef());
+  if (!SymtabOrErr)
+    return SymtabOrErr.takeError();
+  Inputs.push_back(
+      {std::move(Buffer), IsLazy, FromArchive, std::move(*SymtabOrErr)});
+  return Error::success();
+}
+
+/// Resolve archive members from the given inputs using a symbol-driven
+/// fixed-point algorithm. For each input:
+/// - If it's a Library, search for lib<name>.a or :<name> in SearchPaths
+/// - If it's a File, use the path directly
+/// - Archives are expanded and members are lazily extracted based on symbol
+///   references unless WholeArchive is true
+/// - Non-archive bitcode inputs are always included
+///
+/// Returns the buffers to link, in extraction order, along with the resolved
+/// target triple. All returned buffers have compatible target triples;
+/// incompatible archive members are filtered during resolution.
+static Expected<ResolvedInputs> resolveArchiveMembers(
+    ArrayRef<InputDesc> Order, ArrayRef<StringRef> SearchPaths,
+    ArrayRef<StringRef> ForcedUndefs, StringRef TargetTripleArgValue) {
+  // Collect every candidate member, parsing each one's IR symbol table once.
+  SmallVector<PendingInput> Inputs;
+
+  for (const InputDesc &Desc : Order) {
+    std::optional<std::string> Filename;
+
+    if (Desc.InputKind == InputDesc::Kind::Library) {
+      Filename = searchLibrary(Desc.Value, SearchPaths);
+      if (!Filename)
+        return createStringError("unable to find library -l" + Desc.Value);
+    } else {
+      if (!sys::fs::exists(Desc.Value))
+        return createStringError("input file not found: '" + Desc.Value + "'");
+      if (sys::fs::is_directory(Desc.Value))
+        return createStringError("'" + Desc.Value + "': Is a directory");
+      Filename = Desc.Value.str();
+    }
+
+    auto BufferOrErr =
+        errorOrToExpected(MemoryBuffer::getFileOrSTDIN(*Filename));
+    if (!BufferOrErr)
+      return createFileError(*Filename, BufferOrErr.takeError());
+
+    MemoryBufferRef Buffer = (*BufferOrErr)->getMemBufferRef();
+    switch (identify_magic(Buffer.getBuffer())) {
+    case file_magic::bitcode:
+      if (Error Err = addBitcodeInput(Inputs, std::move(*BufferOrErr),
+                                      /*IsLazy=*/false, /*FromArchive=*/false))
+        return Err;
+      break;
+    case file_magic::archive: {
+      Expected<std::unique_ptr<object::Archive>> LibFile =
+          object::Archive::create(Buffer);
+      if (!LibFile)
+        return LibFile.takeError();
+      Error Err = Error::success();
+      for (auto Child : (*LibFile)->children(Err)) {
+        auto ChildBufferOrErr = Child.getMemoryBufferRef();
+        if (!ChildBufferOrErr)
+          return ChildBufferOrErr.takeError();
+        // Include archive name in buffer identifier for better diagnostics.
+        std::string BufferIdentifier =
+            (*Filename + "(" + ChildBufferOrErr->getBufferIdentifier() + ")")
+                .str();
+        std::unique_ptr<MemoryBuffer> ChildBuffer =
+            MemoryBuffer::getMemBufferCopy(ChildBufferOrErr->getBuffer(),
+                                           BufferIdentifier);
+        if (Error E = addBitcodeInput(Inputs, std::move(ChildBuffer),
+                                      !Desc.WholeArchive, 
/*FromArchive=*/true))
+          return E;
+      }
+      if (Err)
+        return Err;
+      break;
+    }
+    default:
+      return createStringError("unsupported file type: '" + *Filename + "'");
+    }
+  }
+
+  // Resolve the target triple: use --triple= if provided, otherwise infer from
+  // the first non-archive input with a non-empty triple.
+  llvm::Triple TargetTriple(TargetTripleArgValue);
+  StringRef TripleSource = TargetTriple.empty() ? "" : "--triple=";
+
+  if (TargetTriple.empty()) {
+    for (const PendingInput &In : Inputs) {
+      if (!In.FromArchive && In.Symtab.Mods.size() > 0) {
+        StringRef Triple = In.Symtab.TheReader.getTargetTriple();
+        if (!Triple.empty()) {
+          TargetTriple = llvm::Triple(Triple);
+          TripleSource = In.Buffer->getBufferIdentifier();
+          break;
+        }
+      }
+    }
+  }
+
+  // Seed symbol table with forced undefined symbols.
+  StringMap<Symbol> SymTab;
+  for (StringRef Sym : ForcedUndefs)
+    SymTab[Sym] = Symbol(Symbol::Undefined);
+
+  // Fixed-point loop to extract archive members. Each pass may resolve symbols
+  // that unlock further members; iterate until no new member is extracted.
+  SmallVector<std::unique_ptr<MemoryBuffer>> Resolved;
+  bool Extracted = true;
+  while (Extracted) {
+    Extracted = false;
+    for (PendingInput &In : Inputs) {
+      if (!In.Buffer)
+        continue;
+
+      // Filter archive members by target triple before symbol scanning.
+      // Members built for a different target are silently skipped, matching 
how
+      // a real linker treats device libraries built for other architectures.
+      if (In.FromArchive) {
+        StringRef MemberTriple = In.Symtab.TheReader.getTargetTriple();
+        if (!MemberTriple.empty() && MemberTriple != TargetTriple.str()) {
+          if (Verbose)
+            errs() << formatv(
+                "archive resolution: skipping {0}: triple {1} != {2}\n",
+                In.Buffer->getBufferIdentifier(), MemberTriple,
+                TargetTriple.str());
+          In.Buffer.reset();
----------------
YuriPlyakhin wrote:

`In.Symtab` still references the buffer.
```suggestion
          In.Buffer.reset();
          In.Symtab = {};
```

https://github.com/llvm/llvm-project/pull/202829
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to