[llvm-branch-commits] [lld] cc1cf63 - [lld-macho] Implement option: -undefined TREATMENT

2020-12-17 Thread Greg McGary via llvm-branch-commits

Author: Greg McGary
Date: 2020-12-17T17:40:50-08:00
New Revision: cc1cf6332a301331ef1b20e24948159dc291014a

URL: 
https://github.com/llvm/llvm-project/commit/cc1cf6332a301331ef1b20e24948159dc291014a
DIFF: 
https://github.com/llvm/llvm-project/commit/cc1cf6332a301331ef1b20e24948159dc291014a.diff

LOG: [lld-macho] Implement option: -undefined TREATMENT

TREATMENT can be `error`, `warning`, `suppress`, or `dynamic_lookup`
The `dymanic_lookup` remains unimplemented for now.

Differential Revision: https://reviews.llvm.org/D93263

Added: 
lld/test/MachO/treat-undef-sym.s

Modified: 
lld/MachO/Config.h
lld/MachO/Driver.cpp
lld/MachO/Options.td
lld/MachO/SymbolTable.cpp
lld/MachO/SymbolTable.h
lld/MachO/Writer.cpp
lld/test/MachO/demangle.s
lld/test/MachO/invalid/stub-link.s
lld/test/MachO/invalid/undefined-symbol.s
lld/test/MachO/weak-reference.s

Removed: 




diff  --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 029b9ab2296c..4f27ec2db45f 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -30,6 +30,14 @@ struct PlatformInfo {
   llvm::VersionTuple sdk;
 };
 
+enum class UndefinedSymbolTreatment {
+  unknown,
+  error,
+  warning,
+  suppress,
+  dynamic_lookup,
+};
+
 struct Configuration {
   Symbol *entry;
   bool hasReexports = false;
@@ -52,6 +60,8 @@ struct Configuration {
   bool demangle = false;
   llvm::MachO::Architecture arch;
   PlatformInfo platform;
+  UndefinedSymbolTreatment undefinedSymbolTreatment =
+  UndefinedSymbolTreatment::error;
   llvm::MachO::HeaderFileType outputType;
   std::vector systemLibraryRoots;
   std::vector librarySearchPaths;

diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 4f9c111bd8fb..63d101270cf5 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -599,6 +599,22 @@ static void handlePlatformVersion(const opt::Arg *arg) {
 error(Twine("malformed sdk version: ") + sdkVersionStr);
 }
 
+static void handleUndefined(const opt::Arg *arg) {
+  StringRef treatmentStr = arg->getValue(0);
+  config->undefinedSymbolTreatment =
+  llvm::StringSwitch(treatmentStr)
+  .Case("error", UndefinedSymbolTreatment::error)
+  .Case("warning", UndefinedSymbolTreatment::warning)
+  .Case("suppress", UndefinedSymbolTreatment::suppress)
+  .Case("dynamic_lookup", UndefinedSymbolTreatment::dynamic_lookup)
+  .Default(UndefinedSymbolTreatment::unknown);
+  if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::unknown) {
+warn(Twine("unknown -undefined TREATMENT '") + treatmentStr +
+ "', defaulting to 'error'");
+config->undefinedSymbolTreatment = UndefinedSymbolTreatment::error;
+  }
+}
+
 static void warnIfDeprecatedOption(const opt::Option &opt) {
   if (!opt.getGroup().isValid())
 return;
@@ -809,6 +825,9 @@ bool macho::link(llvm::ArrayRef argsArr, bool 
canExitEarly,
 case OPT_platform_version:
   handlePlatformVersion(arg);
   break;
+case OPT_undefined:
+  handleUndefined(arg);
+  break;
 default:
   break;
 }

diff  --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index e3ee14a74328..1ab2f9109ee0 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -408,7 +408,6 @@ def U : Separate<["-"], "U">,
 def undefined : Separate<["-"], "undefined">,
  MetaVarName<"">,
  HelpText<"Handle undefined symbols according to : error, 
warning, suppress, or dynamic_lookup (default is error)">,
- Flags<[HelpHidden]>,
  Group;
 def rpath : Separate<["-"], "rpath">,
  MetaVarName<"">,

diff  --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 93a2508951a5..ea231c9786e2 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -7,6 +7,7 @@
 
//===--===//
 
 #include "SymbolTable.h"
+#include "Config.h"
 #include "InputFiles.h"
 #include "Symbols.h"
 #include "lld/Common/ErrorHandler.h"
@@ -154,4 +155,26 @@ Symbol *SymbolTable::addDSOHandle(const MachHeaderSection 
*header) {
   return s;
 }
 
+void lld::macho::treatUndefinedSymbol(StringRef symbolName,
+  StringRef fileName) {
+  std::string message = ("undefined symbol: " + symbolName).str();
+  if (!fileName.empty())
+message += ("\n>>> referenced by " + fileName).str();
+  switch (config->undefinedSymbolTreatment) {
+  case UndefinedSymbolTreatment::suppress:
+break;
+  case UndefinedSymbolTreatment::error:
+error(message);
+break;
+  case UndefinedSymbolTreatment::warning:
+warn(message);
+break;
+  case UndefinedSymbolTreatment::dynamic_lookup:
+error("dynamic_lookup unimplemented for " + message);
+break;
+  case UndefinedSymbolTreatment::unknown:
+llvm_unreachable("unknown -undefined TREATMENT");
+  }
+}
+
 SymbolTable *macho::symtab;

diff  --git a/lld/MachO/Symbol

[llvm-branch-commits] [lld] d4ec334 - [lld-macho][nfc] Refactor to accommodate paired relocs

2020-12-17 Thread Greg McGary via llvm-branch-commits

Author: Greg McGary
Date: 2020-12-17T20:21:41-08:00
New Revision: d4ec3346b1baf31819d20a8950ced8be8f66a408

URL: 
https://github.com/llvm/llvm-project/commit/d4ec3346b1baf31819d20a8950ced8be8f66a408
DIFF: 
https://github.com/llvm/llvm-project/commit/d4ec3346b1baf31819d20a8950ced8be8f66a408.diff

LOG: [lld-macho][nfc] Refactor to accommodate paired relocs

This is a refactor to pave the way for supporting paired-ADDEND for ARM64. The 
only paired reloc type for X86_64 is SUBTRACTOR. In a later diff, I will add 
SUBTRACTOR for both X86_64 and ARM64.

* s/`getImplicitAddend`/`getAddend`/ because it handles all forms of addend: 
implicit, explicit, paired.
* add predicate `bool isPairedReloc()`
* check range of `relInfo.r_symbolnum` is internal, unrelated to user-input, so 
use `assert()`, not `error()`
* minor cleanups & rearrangements in `InputFile::parseRelocations()`

Differential Revision: https://reviews.llvm.org/D90614

Added: 


Modified: 
lld/MachO/Arch/X86_64.cpp
lld/MachO/InputFiles.cpp
lld/MachO/Target.h

Removed: 




diff  --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index c776e21d6f5f..729ef603adb7 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -25,8 +25,9 @@ namespace {
 struct X86_64 : TargetInfo {
   X86_64();
 
-  uint64_t getImplicitAddend(MemoryBufferRef, const section_64 &,
- const relocation_info &) const override;
+  bool isPairedReloc(relocation_info) const override;
+  uint64_t getAddend(MemoryBufferRef, const section_64 &, relocation_info,
+ relocation_info) const override;
   void relocateOne(uint8_t *loc, const Reloc &, uint64_t val) const override;
 
   void writeStub(uint8_t *buf, const macho::Symbol &) const override;
@@ -43,7 +44,7 @@ struct X86_64 : TargetInfo {
 } // namespace
 
 static std::string getErrorLocation(MemoryBufferRef mb, const section_64 &sec,
-const relocation_info &rel) {
+relocation_info rel) {
   return ("invalid relocation at offset " + std::to_string(rel.r_address) +
   " of " + sec.segname + "," + sec.sectname + " in " +
   mb.getBufferIdentifier())
@@ -51,7 +52,7 @@ static std::string getErrorLocation(MemoryBufferRef mb, const 
section_64 &sec,
 }
 
 static void validateLength(MemoryBufferRef mb, const section_64 &sec,
-   const relocation_info &rel,
+   relocation_info rel,
ArrayRef validLengths) {
   if (find(validLengths, rel.r_length) != validLengths.end())
 return;
@@ -68,8 +69,13 @@ static void validateLength(MemoryBufferRef mb, const 
section_64 &sec,
   fatal(msg);
 }
 
-uint64_t X86_64::getImplicitAddend(MemoryBufferRef mb, const section_64 &sec,
-   const relocation_info &rel) const {
+bool X86_64::isPairedReloc(relocation_info rel) const {
+  return rel.r_type == X86_64_RELOC_SUBTRACTOR;
+}
+
+uint64_t X86_64::getAddend(MemoryBufferRef mb, const section_64 &sec,
+   relocation_info rel,
+   relocation_info pairedRel) const {
   auto *buf = reinterpret_cast(mb.getBufferStart());
   const uint8_t *loc = buf + sec.offset + rel.r_address;
 
@@ -139,7 +145,7 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, 
uint64_t val) const {
 break;
   default:
 llvm_unreachable(
-"getImplicitAddend should have flagged all unhandled relocation 
types");
+"getAddend should have flagged all unhandled relocation types");
   }
 
   switch (r.length) {

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index ce66c9650446..3a4466dd123a 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -206,31 +206,53 @@ static InputSection 
*findContainingSubsection(SubsectionMap &map,
 void ObjFile::parseRelocations(const section_64 &sec,
SubsectionMap &subsecMap) {
   auto *buf = reinterpret_cast(mb.getBufferStart());
-  ArrayRef anyRelInfos(
-  reinterpret_cast(buf + sec.reloff),
-  sec.nreloc);
-
-  for (const any_relocation_info &anyRelInfo : anyRelInfos) {
-if (anyRelInfo.r_word0 & R_SCATTERED)
+  ArrayRef relInfos(
+  reinterpret_cast(buf + sec.reloff), sec.nreloc);
+
+  for (size_t i = 0; i < relInfos.size(); i++) {
+// Paired relocations serve as Mach-O's method for attaching a
+// supplemental datum to a primary relocation record. ELF does not
+// need them because the *_RELOC_RELA records contain the extra
+// addend field, vs. *_RELOC_REL which omit the addend.
+//
+// The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend,
+// and the paired *_RELOC_UNSIGNED record holds the minuend. The
+// datum for each is a symbolic address. The result is the runtime
+// offset between two 

[llvm-branch-commits] [lld] 9993071 - Handle overflow beyond the 127 common encodings limit

2020-12-19 Thread Greg McGary via llvm-branch-commits

Author: Greg McGary
Date: 2020-12-19T14:54:37-08:00
New Revision: 99930719c66df9a8b67f3575d251b182c9cc8ee9

URL: 
https://github.com/llvm/llvm-project/commit/99930719c66df9a8b67f3575d251b182c9cc8ee9
DIFF: 
https://github.com/llvm/llvm-project/commit/99930719c66df9a8b67f3575d251b182c9cc8ee9.diff

LOG: Handle overflow beyond the 127 common encodings limit

The common encodings table holds only 127 entries. The encodings index for 
compact entries is 8 bits wide, and indexes 127..255 are stored locally to each 
second-level page. Prior to this diff, lld would `fatal()` if encodings 
overflowed the 127 limit.

This diff populates a per-second-level-page encodings table as needed. When the 
per-page encodings table hits its limit, we must terminate the page. If such 
early termination would consume fewer entries than a regular (non-compact) 
encoding page, then we prefer the regular format.

Caveat: one reason the common-encoding table might overflow is because of DWARF 
debug-info references, which are not yet implemented and will come with a later 
diff.

Differential Revision: https://reviews.llvm.org/D93267

Added: 


Modified: 
lld/MachO/UnwindInfoSection.cpp
lld/MachO/UnwindInfoSection.h
lld/test/MachO/tools/generate-cfi-funcs.py
lld/test/MachO/tools/validate-unwind-info.py

Removed: 




diff  --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index ed6cf050f576..acb4a9d69b79 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -25,6 +25,24 @@ using namespace llvm::MachO;
 using namespace lld;
 using namespace lld::macho;
 
+#define COMMON_ENCODINGS_MAX 127
+#define COMPACT_ENCODINGS_MAX 256
+
+#define SECOND_LEVEL_PAGE_BYTES 4096
+#define SECOND_LEVEL_PAGE_WORDS (SECOND_LEVEL_PAGE_BYTES / sizeof(uint32_t))
+#define REGULAR_SECOND_LEVEL_ENTRIES_MAX   
\
+  ((SECOND_LEVEL_PAGE_BYTES -  
\
+sizeof(unwind_info_regular_second_level_page_header)) /
\
+   sizeof(unwind_info_regular_second_level_entry))
+#define COMPRESSED_SECOND_LEVEL_ENTRIES_MAX
\
+  ((SECOND_LEVEL_PAGE_BYTES -  
\
+sizeof(unwind_info_compressed_second_level_page_header)) / 
\
+   sizeof(uint32_t))
+
+#define COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24
+#define COMPRESSED_ENTRY_FUNC_OFFSET_MASK  
\
+  UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0)
+
 // Compact Unwind format is a Mach-O evolution of DWARF Unwind that
 // optimizes space and exception-time lookup.  Most DWARF unwind
 // entries can be replaced with Compact Unwind entries, but the ones
@@ -101,7 +119,7 @@ void UnwindInfoSection::finalize() {
   // Rather than sort & fold the 32-byte entries directly, we create a
   // vector of pointers to entries and sort & fold that instead.
   cuPtrVector.reserve(cuCount);
-  for (const auto &cuEntry : cuVector)
+  for (const CompactUnwindEntry64 &cuEntry : cuVector)
 cuPtrVector.emplace_back(&cuEntry);
   std::sort(cuPtrVector.begin(), cuPtrVector.end(),
 [](const CompactUnwindEntry64 *a, const CompactUnwindEntry64 *b) {
@@ -129,13 +147,11 @@ void UnwindInfoSection::finalize() {
   cuPtrVector.erase(foldWrite, cuPtrVector.end());
 
   // Count frequencies of the folded encodings
-  llvm::DenseMap encodingFrequencies;
+  EncodingMap encodingFrequencies;
   for (auto cuPtrEntry : cuPtrVector)
 encodingFrequencies[cuPtrEntry->encoding]++;
-  if (encodingFrequencies.size() > UNWIND_INFO_COMMON_ENCODINGS_MAX)
-error("TODO(gkm): handle common encodings table overflow");
 
-  // Make a table of encodings, sorted by descending frequency
+  // Make a vector of encodings, sorted by descending frequency
   for (const auto &frequency : encodingFrequencies)
 commonEncodings.emplace_back(frequency);
   std::sort(commonEncodings.begin(), commonEncodings.end(),
@@ -148,37 +164,67 @@ void UnwindInfoSection::finalize() {
   return a.second > b.second;
 });
 
-  // Split folded encodings into pages, limited by capacity of a page
-  // and the 24-bit range of function offset
-  //
-  // Record the page splits as a vector of iterators on cuPtrVector
-  // such that successive elements form a semi-open interval. E.g.,
-  // page X's bounds are thus: [ pageBounds[X] .. pageBounds[X+1] )
-  //
-  // Note that pageBounds.size() is one greater than the number of
-  // pages, and pageBounds.back() holds the sentinel cuPtrVector.cend()
-  pageBounds.push_back(cuPtrVector.cbegin());
-  // TODO(gkm): cut 1st page entries short to accommodate section headers ???
-  CompactUnwindEntry64 cuEntryKey;
-  for (size_t i = 0;;) {
-// Limit the search to entries that can fit within a 4 KiB page.
-const auto pageBegin = pageBoun