clayborg created this revision.
clayborg added reviewers: labath, aprantl.
Herald added subscribers: llvm-commits, hiraditya, mgorny.
Herald added a project: LLVM.
clayborg removed a subscriber: lldb-commits.
Herald added a subscriber: ormris.

Lookup functions are designed to not fully decode a FunctionInfo, LineTable or 
InlineInfo, they decode only what is needed into a LookupResult object. This 
allows lookups to avoid costly memory allocations and avoid parsing large 
amounts of information one a suitable match is found.

LookupResult objects contain the address that was looked up, the concrete 
function address range, the name of the concrete function, and a list of source 
locations. One for each inline function, and one for the concrete function. 
This allows one address to turn into multiple frames and improves the signal 
you get when symbolicating addresses in GSYM files.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D70993

Files:
  llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
  llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
  llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
  llvm/include/llvm/DebugInfo/GSYM/LineTable.h
  llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
  llvm/include/llvm/DebugInfo/GSYM/Range.h
  llvm/lib/DebugInfo/GSYM/CMakeLists.txt
  llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
  llvm/lib/DebugInfo/GSYM/GsymReader.cpp
  llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
  llvm/lib/DebugInfo/GSYM/LineTable.cpp
  llvm/lib/DebugInfo/GSYM/LookupResult.cpp
  llvm/lib/DebugInfo/GSYM/Range.cpp
  llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp

Index: llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
===================================================================
--- llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -1302,3 +1302,144 @@
                             "address 0x1030 not in GSYM");
   }
 }
+
+TEST(GSYMTest, TestGsymLookups) {
+  // Test creating a GSYM file with a function that has a inline information.
+  // Verify that lookups work correctly. Lookups do not decode the entire
+  // FunctionInfo or InlineInfo, they only extract information needed for the
+  // lookup to happen which avoids allocations which can slow down
+  // symbolication.
+  GsymCreator GC;
+  FunctionInfo FI(0x1000, 0x100, GC.insertString("main"));
+  const auto ByteOrder = support::endian::system_endianness();
+  FI.OptLineTable = LineTable();
+  const uint32_t MainFileIndex = GC.insertFile("/tmp/main.c");
+  const uint32_t FooFileIndex = GC.insertFile("/tmp/foo.h");
+  FI.OptLineTable->push(LineEntry(0x1000, MainFileIndex, 5));
+  FI.OptLineTable->push(LineEntry(0x1010, FooFileIndex, 10));
+  FI.OptLineTable->push(LineEntry(0x1012, FooFileIndex, 20));
+  FI.OptLineTable->push(LineEntry(0x1014, FooFileIndex, 11));
+  FI.OptLineTable->push(LineEntry(0x1016, FooFileIndex, 30));
+  FI.OptLineTable->push(LineEntry(0x1018, FooFileIndex, 12));
+  FI.OptLineTable->push(LineEntry(0x1020, MainFileIndex, 8));
+  FI.Inline = InlineInfo();
+
+  FI.Inline->Name = GC.insertString("inline1");
+  FI.Inline->CallFile = MainFileIndex;
+  FI.Inline->CallLine = 6;
+  FI.Inline->Ranges.insert(AddressRange(0x1010, 0x1020));
+  InlineInfo Inline2;
+  Inline2.Name = GC.insertString("inline2");
+  Inline2.CallFile = FooFileIndex;
+  Inline2.CallLine = 33;
+  Inline2.Ranges.insert(AddressRange(0x1012, 0x1014));
+  FI.Inline->Children.emplace_back(Inline2);
+  InlineInfo Inline3;
+  Inline3.Name = GC.insertString("inline3");
+  Inline3.CallFile = FooFileIndex;
+  Inline3.CallLine = 35;
+  Inline3.Ranges.insert(AddressRange(0x1016, 0x1018));
+  FI.Inline->Children.emplace_back(Inline3);
+  GC.addFunctionInfo(std::move(FI));
+  Error FinalizeErr = GC.finalize(llvm::nulls());
+  ASSERT_FALSE(FinalizeErr);
+  SmallString<512> Str;
+  raw_svector_ostream OutStrm(Str);
+  FileWriter FW(OutStrm, ByteOrder);
+  llvm::Error Err = GC.encode(FW);
+  ASSERT_FALSE((bool)Err);
+  if (auto Gsym = GsymReader::copyBuffer(OutStrm.str())) {
+    // Verify inline info is correct when doing lookups.
+    auto LR = Gsym->lookup(0x1000);
+    ASSERT_TRUE(bool(LR));
+    ASSERT_EQ(LR->Locations.size(), 1u);
+    EXPECT_EQ(LR->Locations[0].Name, "main");
+    EXPECT_EQ(LR->Locations[0].Line, 5u);
+    EXPECT_EQ(LR->Locations[0].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[0].Base, "main.c");
+
+    LR = Gsym->lookup(0x100F);
+    ASSERT_TRUE(bool(LR));
+    ASSERT_EQ(LR->Locations.size(), 1u);
+    EXPECT_EQ(LR->Locations[0].Name, "main");
+    EXPECT_EQ(LR->Locations[0].Line, 5u);
+    EXPECT_EQ(LR->Locations[0].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[0].Base, "main.c");
+
+    LR = Gsym->lookup(0x1010);
+    ASSERT_TRUE(bool(LR));
+    ASSERT_EQ(LR->Locations.size(), 2u);
+    EXPECT_EQ(LR->Locations[0].Name, "inline1");
+    EXPECT_EQ(LR->Locations[0].Line, 10u);
+    EXPECT_EQ(LR->Locations[0].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[0].Base, "foo.h");
+    EXPECT_EQ(LR->Locations[1].Name, "main");
+    EXPECT_EQ(LR->Locations[1].Line, 6u);
+    EXPECT_EQ(LR->Locations[1].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[1].Base, "main.c");
+
+    LR = Gsym->lookup(0x1012);
+    ASSERT_TRUE(bool(LR));
+    ASSERT_EQ(LR->Locations.size(), 3u);
+    EXPECT_EQ(LR->Locations[0].Name, "inline2");
+    EXPECT_EQ(LR->Locations[0].Line, 20u);
+    EXPECT_EQ(LR->Locations[0].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[0].Base, "foo.h");
+    EXPECT_EQ(LR->Locations[1].Name, "inline1");
+    EXPECT_EQ(LR->Locations[1].Line, 33u);
+    EXPECT_EQ(LR->Locations[1].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[1].Base, "foo.h");
+    EXPECT_EQ(LR->Locations[2].Name, "main");
+    EXPECT_EQ(LR->Locations[2].Line, 6u);
+    EXPECT_EQ(LR->Locations[2].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[2].Base, "main.c");
+
+    LR = Gsym->lookup(0x1014);
+    ASSERT_TRUE(bool(LR));
+    EXPECT_EQ(LR->Locations.size(), 2u);
+    EXPECT_EQ(LR->Locations[0].Name, "inline1");
+    EXPECT_EQ(LR->Locations[0].Line, 11u);
+    EXPECT_EQ(LR->Locations[0].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[0].Base, "foo.h");
+    EXPECT_EQ(LR->Locations[1].Name, "main");
+    EXPECT_EQ(LR->Locations[1].Line, 6u);
+    EXPECT_EQ(LR->Locations[1].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[1].Base, "main.c");
+
+    LR = Gsym->lookup(0x1016);
+    ASSERT_TRUE(bool(LR));
+    ASSERT_EQ(LR->Locations.size(), 3u);
+    EXPECT_EQ(LR->Locations[0].Name, "inline3");
+    EXPECT_EQ(LR->Locations[0].Line, 30u);
+    EXPECT_EQ(LR->Locations[0].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[0].Base, "foo.h");
+    EXPECT_EQ(LR->Locations[1].Name, "inline1");
+    EXPECT_EQ(LR->Locations[1].Line, 35u);
+    EXPECT_EQ(LR->Locations[1].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[1].Base, "foo.h");
+    EXPECT_EQ(LR->Locations[2].Name, "main");
+    EXPECT_EQ(LR->Locations[2].Line, 6u);
+    EXPECT_EQ(LR->Locations[2].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[2].Base, "main.c");
+
+    LR = Gsym->lookup(0x1018);
+    ASSERT_TRUE(bool(LR));
+    EXPECT_EQ(LR->Locations.size(), 2u);
+    EXPECT_EQ(LR->Locations[0].Name, "inline1");
+    EXPECT_EQ(LR->Locations[0].Line, 12u);
+    EXPECT_EQ(LR->Locations[0].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[0].Base, "foo.h");
+    EXPECT_EQ(LR->Locations[1].Name, "main");
+    EXPECT_EQ(LR->Locations[1].Line, 6u);
+    EXPECT_EQ(LR->Locations[1].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[1].Base, "main.c");
+
+    LR = Gsym->lookup(0x1020);
+    ASSERT_TRUE(bool(LR));
+    EXPECT_EQ(LR->Locations.size(), 1u);
+    EXPECT_EQ(LR->Locations[0].Name, "main");
+    EXPECT_EQ(LR->Locations[0].Line, 8u);
+    EXPECT_EQ(LR->Locations[0].Dir, "/tmp");
+    EXPECT_EQ(LR->Locations[0].Base, "main.c");
+  }
+}
Index: llvm/lib/DebugInfo/GSYM/Range.cpp
===================================================================
--- llvm/lib/DebugInfo/GSYM/Range.cpp
+++ llvm/lib/DebugInfo/GSYM/Range.cpp
@@ -100,3 +100,15 @@
   for (auto &Range : Ranges)
     Range.decode(Data, BaseAddr, Offset);
 }
+
+void AddressRange::skip(DataExtractor &Data, uint64_t &Offset) {
+  Data.getULEB128(&Offset);
+  Data.getULEB128(&Offset);
+}
+
+uint64_t AddressRanges::skip(DataExtractor &Data, uint64_t &Offset) {
+  uint64_t NumRanges = Data.getULEB128(&Offset);
+  for (uint64_t I=0; I<NumRanges; ++I)
+    AddressRange::skip(Data, Offset);
+  return NumRanges;
+}
Index: llvm/lib/DebugInfo/GSYM/LookupResult.cpp
===================================================================
--- /dev/null
+++ llvm/lib/DebugInfo/GSYM/LookupResult.cpp
@@ -0,0 +1,58 @@
+//===- LookupResult.cpp -------------------------------------------------*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/LookupResult.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace gsym;
+
+std::string LookupResult::getSourceFile(uint32_t Index) const {
+  std::string Fullpath;
+  if (Index < Locations.size()) {
+    if (!Locations[Index].Dir.empty()) {
+      if (Locations[Index].Base.empty()) {
+        Fullpath = Locations[Index].Dir;
+      } else {
+        llvm::SmallString<64> Storage;
+        llvm::sys::path::append(Storage, Locations[Index].Dir,
+                                Locations[Index].Base);
+        Fullpath.assign(Storage.begin(), Storage.end());
+      }
+    } else if (!Locations[Index].Base.empty())
+      Fullpath = Locations[Index].Base;
+  }
+  return Fullpath;
+}
+
+void LookupResult::dump(llvm::raw_ostream &OS) const {
+  OS << HEX64(LookupAddr) << ": ";
+  auto NumLocations = Locations.size();
+  for (size_t I = 0; I < NumLocations; ++I) {
+    if (I > 0) {
+      OS << '\n';
+      OS.indent(20);
+    }
+    const bool IsInlined = I + 1 != NumLocations;
+    OS << Locations[I].Name;
+    if (!Locations[I].Base.empty()) {
+      OS << " @ ";
+      if (!Locations[I].Dir.empty())
+        OS << Locations[I].Dir << '/';
+      OS << Locations[I].Base << ':' << Locations[I].Line;
+    }
+
+    if (IsInlined)
+      OS << " [inlined]";
+  }
+  OS << '\n';
+}
Index: llvm/lib/DebugInfo/GSYM/LineTable.cpp
===================================================================
--- llvm/lib/DebugInfo/GSYM/LineTable.cpp
+++ llvm/lib/DebugInfo/GSYM/LineTable.cpp
@@ -262,8 +262,8 @@
 // Parse the line table on the fly and find the row we are looking for.
 // We will need to determine if we need to cache the line table by calling
 // LineTable::parseAllEntries(...) or just call this function each time.
-// There is a CPU vs memory tradeoff we will need to determine.
-LineEntry LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Addr) {
+// There is a CPU vs memory tradeoff we will need to determined.
+Expected<LineEntry> LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Addr) {
   LineEntry Result;
   llvm::Error Err = parse(Data, BaseAddr,
                           [Addr, &Result](const LineEntry &Row) -> bool {
@@ -277,7 +277,13 @@
     }
     return true; // Keep parsing till we find the right row.
   });
-  return Result;
+  if (Err)
+    return std::move(Err);
+  if (Result.isValid())
+    return Result;
+  return createStringError(std::errc::invalid_argument,
+                           "address 0x%" PRIx64 " is not in the line table",
+                           Addr);
 }
 
 raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LineTable &LT) {
Index: llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
===================================================================
--- llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
+++ llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/DebugInfo/GSYM/FileEntry.h"
 #include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/GsymReader.h"
 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
 #include "llvm/Support/DataExtractor.h"
 #include <algorithm>
@@ -60,6 +61,108 @@
   return llvm::None;
 }
 
+/// Skip an InlineInfo object in the specified data at the specified offset.
+///
+/// Used during the InlineInfo::lookup() call to quickly skip child InlineInfo
+/// objects where the addres ranges isn't contained in the InlineInfo object
+/// or its children. This avoids allocations by not appending child InlineInfo
+/// objects to the InlineInfo::Children array.
+///
+/// \param Data The binary stream to read the data from.
+///
+/// \param Offset The byte offset within \a Data.
+///
+/// \param SkippedRanges If true, address ranges have already been skipped.
+
+bool skip(DataExtractor &Data, uint64_t &Offset, bool SkippedRanges) {
+  if (!SkippedRanges) {
+    if (AddressRanges::skip(Data, Offset) == 0)
+      return false;
+  }
+  bool HasChildren = Data.getU8(&Offset) != 0;
+  Data.getU32(&Offset); // Skip Inline.Name.
+  Data.getULEB128(&Offset); // Skip Inline.CallFile.
+  Data.getULEB128(&Offset); // Skip Inline.CallLine.
+  if (HasChildren) {
+    while (skip(Data, Offset, false /* SkippedRanges */))
+      /* Do nothing */;
+  }
+  // We skipped a valid InlineInfo.
+  return true;
+}
+
+/// A Lookup helper functions.
+///
+/// Used during the InlineInfo::lookup() call to quickly only parse an
+/// InlineInfo object if the address falls within this object. This avoids
+/// allocations by not appending child InlineInfo objects to the
+/// InlineInfo::Children array and also skips any InlineInfo objects that do
+/// not contain the address we are looking up.
+///
+/// \param Data The binary stream to read the data from.
+///
+/// \param Offset The byte offset within \a Data.
+///
+/// \param BaseAddr The address that the relative address range offsets are
+///                 relative to.
+
+bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset,
+            uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs,
+            llvm::Error &Err) {
+  InlineInfo Inline;
+  Inline.Ranges.decode(Data, BaseAddr, Offset);
+  if (Inline.Ranges.empty())
+    return true;
+  // Check if the address is contained within the inline information, and if
+  // not, quickly skip this InlineInfo object and all its children.
+  if (!Inline.Ranges.contains(Addr)) {
+    skip(Data, Offset, true /* SkippedRanges */);
+    return false;
+  }
+
+  // The address range is contained within this InlineInfo, add the source
+  // location for this InlineInfo and any children that contain the address.
+  bool HasChildren = Data.getU8(&Offset) != 0;
+  Inline.Name = Data.getU32(&Offset);
+  Inline.CallFile = (uint32_t)Data.getULEB128(&Offset);
+  Inline.CallLine = (uint32_t)Data.getULEB128(&Offset);
+  if (HasChildren) {
+    // Child address ranges are encoded relative to the first address in the
+    // parent InlineInfo object.
+    const auto ChildBaseAddr = Inline.Ranges[0].Start;
+    bool Done = false;
+    while (!Done)
+      Done = lookup(GR, Data, Offset, ChildBaseAddr, Addr, SrcLocs, Err);
+  }
+
+  Optional<FileEntry> CallFile = GR.getFile(Inline.CallFile);
+  if (!CallFile) {
+    Err = createStringError(std::errc::invalid_argument,
+                            "failed to extract file[%" PRIu32 "]",
+                            Inline.CallFile);
+    return false;
+  }
+
+  SourceLocation SrcLoc;
+  SrcLoc.Name = SrcLocs.back().Name;
+  SrcLoc.Dir = GR.getString(CallFile->Dir);
+  SrcLoc.Base = GR.getString(CallFile->Base);
+  SrcLoc.Line = Inline.CallLine;
+  SrcLocs.back().Name = GR.getString(Inline.Name);
+  SrcLocs.push_back(SrcLoc);
+  return true;
+}
+
+llvm::Error InlineInfo::lookup(const GsymReader &GR, DataExtractor &Data,
+                               uint64_t BaseAddr, uint64_t Addr,
+                               SourceLocations &SrcLocs) {
+  // Call our recursive helper function starting at offset zero.
+  uint64_t Offset = 0;
+  llvm::Error Err = Error::success();
+  ::lookup(GR, Data, Offset, BaseAddr, Addr, SrcLocs, Err);
+  return Err;
+}
+
 /// Decode an InlineInfo in Data at the specified offset.
 ///
 /// A local helper function to decode InlineInfo objects. This function is
Index: llvm/lib/DebugInfo/GSYM/GsymReader.cpp
===================================================================
--- llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -263,3 +263,18 @@
                            "failed to extract address[%" PRIu64 "]",
                            *AddressIndex);
 }
+
+llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const {
+  Expected<uint64_t> AddressIndex = getAddressIndex(Addr);
+  if (!AddressIndex)
+    return AddressIndex.takeError();
+  // Address info offsets size should have been checked in parse().
+  assert(*AddressIndex < AddrInfoOffsets.size());
+  auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex];
+  DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4);
+  if (Optional<uint64_t> OptAddr = getAddress(*AddressIndex))
+    return FunctionInfo::lookup(Data, *this, *OptAddr, Addr);
+  return createStringError(std::errc::invalid_argument,
+                           "failed to extract address[%" PRIu64 "]",
+                           *AddressIndex);
+}
Index: llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
===================================================================
--- llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
 #include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/GsymReader.h"
 #include "llvm/DebugInfo/GSYM/LineTable.h"
 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
 #include "llvm/Support/DataExtractor.h"
@@ -145,3 +146,104 @@
   O.writeU32(0);
   return FuncInfoOffset;
 }
+
+
+llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
+                                                  const GsymReader &GR,
+                                                  uint64_t FuncAddr,
+                                                  uint64_t Addr) {
+  LookupResult LR;
+  LR.LookupAddr = Addr;
+  LR.FuncRange.Start = FuncAddr;
+  uint64_t Offset = 0;
+  LR.FuncRange.End = FuncAddr + Data.getU32(&Offset);
+  uint32_t NameOffset = Data.getU32(&Offset);
+  // The "lookup" functions doesn't report errors as accurately as the "decode"
+  // function as it is meant to be fast. For more accurage errors we could call
+  // "decode".
+  if (!Data.isValidOffset(Offset))
+    return createStringError(std::errc::io_error,
+                              "FunctionInfo data is truncated");
+  // This function will be called with the result of a binary search of the
+  // address table, we must still make sure the address does not fall into a
+  // gap between functions are after the last function.
+  if (Addr >= LR.FuncRange.End)
+    return createStringError(std::errc::io_error,
+        "address 0x%" PRIx64 " is not in GSYM", Addr);
+
+  if (NameOffset == 0)
+    return createStringError(std::errc::io_error,
+        "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x00000000",
+        Offset - 4);
+  LR.FuncName = GR.getString(NameOffset);
+  bool Done = false;
+  Optional<LineEntry> LineEntry;
+  Optional<DataExtractor> InlineInfoData;
+  while (!Done) {
+    if (!Data.isValidOffsetForDataOfSize(Offset, 8))
+      return createStringError(std::errc::io_error,
+                               "FunctionInfo data is truncated");
+    const uint32_t InfoType = Data.getU32(&Offset);
+    const uint32_t InfoLength = Data.getU32(&Offset);
+    const StringRef InfoBytes = Data.getData().substr(Offset, InfoLength);
+    if (InfoLength != InfoBytes.size())
+      return createStringError(std::errc::io_error,
+                               "FunctionInfo data is truncated");
+    DataExtractor InfoData(InfoBytes, Data.isLittleEndian(),
+                           Data.getAddressSize());
+    switch (InfoType) {
+      case InfoType::EndOfList:
+        Done = true;
+        break;
+
+      case InfoType::LineTableInfo:
+        if (auto ExpectedLE = LineTable::lookup(InfoData, FuncAddr, Addr))
+          LineEntry = ExpectedLE.get();
+        else
+          return ExpectedLE.takeError();
+        break;
+
+      case InfoType::InlineInfo:
+        // We will parse the inline info after our line table, but only if
+        // we have a line entry.
+        InlineInfoData = InfoData;
+        break;
+
+      default:
+        break;
+    }
+    Offset += InfoLength;
+  }
+
+  if (!LineEntry) {
+    // We don't have a valid line entry for our address, fill in our source
+    // location as best we can and return.
+    SourceLocation SrcLoc;
+    SrcLoc.Name = LR.FuncName;
+    LR.Locations.push_back(SrcLoc);
+    return LR;
+  }
+
+  Optional<FileEntry> LineEntryFile = GR.getFile(LineEntry->File);
+  if (!LineEntryFile)
+    return createStringError(std::errc::invalid_argument,
+                              "failed to extract file[%" PRIu32 "]",
+                              LineEntry->File);
+
+  SourceLocation SrcLoc;
+  SrcLoc.Name = LR.FuncName;
+  SrcLoc.Dir = GR.getString(LineEntryFile->Dir);
+  SrcLoc.Base = GR.getString(LineEntryFile->Base);
+  SrcLoc.Line = LineEntry->Line;
+  LR.Locations.push_back(SrcLoc);
+  // If we don't have inline information, we are done.
+  if (!InlineInfoData)
+    return LR;
+  // We have inline information. Try to augment the lookup result with this
+  // data.
+  llvm::Error Err = InlineInfo::lookup(GR, *InlineInfoData, FuncAddr, Addr,
+                                       LR.Locations);
+  if (Err)
+    return std::move(Err);
+  return LR;
+}
Index: llvm/lib/DebugInfo/GSYM/CMakeLists.txt
===================================================================
--- llvm/lib/DebugInfo/GSYM/CMakeLists.txt
+++ llvm/lib/DebugInfo/GSYM/CMakeLists.txt
@@ -6,6 +6,7 @@
   GsymReader.cpp
   InlineInfo.cpp
   LineTable.cpp
+  LookupResult.cpp
   Range.cpp
 
   ADDITIONAL_HEADER_DIRS
Index: llvm/include/llvm/DebugInfo/GSYM/Range.h
===================================================================
--- llvm/include/llvm/DebugInfo/GSYM/Range.h
+++ llvm/include/llvm/DebugInfo/GSYM/Range.h
@@ -61,6 +61,14 @@
   void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset);
   void encode(FileWriter &O, uint64_t BaseAddr) const;
   /// @}
+
+  /// Skip an address range object in the specified data a the specified
+  /// offset.
+  ///
+  /// \param Data The binary stream to read the data from.
+  ///
+  /// \param Offset The byte offset within \a Data.
+  static void skip(DataExtractor &Data, uint64_t &Offset);
 };
 
 raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R);
@@ -100,6 +108,16 @@
   void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset);
   void encode(FileWriter &O, uint64_t BaseAddr) const;
   /// @}
+
+  /// Skip an address range object in the specified data a the specified
+  /// offset.
+  ///
+  /// \param Data The binary stream to read the data from.
+  ///
+  /// \param Offset The byte offset within \a Data.
+  ///
+  /// \returns The number of address ranges that were skipped.
+  static uint64_t skip(DataExtractor &Data, uint64_t &Offset);
 };
 
 raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR);
Index: llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
@@ -0,0 +1,53 @@
+//===- LookupResult.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H
+#define LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H
+
+#include "llvm/DebugInfo/GSYM/Range.h"
+#include "llvm/ADT/StringRef.h"
+#include <inttypes.h>
+#include <vector>
+
+namespace llvm {
+class raw_ostream;
+namespace gsym {
+struct FileEntry;
+
+struct SourceLocation {
+  StringRef Name; ///< Function or symbol name.
+  StringRef Dir; ///< Line entry source file directory path.
+  StringRef Base; ///< Line entry source file basename.
+  uint32_t Line = 0; ///< Source file line number.
+};
+
+using SourceLocations = std::vector<SourceLocation>;
+
+
+struct LookupResult {
+  uint64_t LookupAddr = 0; ///< The address that this lookup pertains to.
+  AddressRange FuncRange; ///< The concrete function address range.
+  StringRef FuncName; ///< The concrete function name that contains LookupAddr.
+  /// The source locations that match this address. This information will only
+  /// be filled in if the FunctionInfo contains a line table. If an address is
+  /// for a concrete function with no inlined functions, this array will have
+  /// one entry. If an address points to an inline function, there will be one
+  /// SourceLocation for each inlined function with the last entry pointing to
+  /// the concrete function itself. This allows one address to generate
+  /// multiple locations and allows unwinding of inline call stacks. The
+  /// deepest inline function will appear at index zero in the source locations
+  /// array, and the concrete function will appear at the end of the array.
+  SourceLocations Locations;
+  void dump(llvm::raw_ostream &OS) const;
+  std::string getSourceFile(uint32_t Index) const;
+};
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H
Index: llvm/include/llvm/DebugInfo/GSYM/LineTable.h
===================================================================
--- llvm/include/llvm/DebugInfo/GSYM/LineTable.h
+++ llvm/include/llvm/DebugInfo/GSYM/LineTable.h
@@ -119,8 +119,25 @@
   typedef std::vector<gsym::LineEntry> Collection;
   Collection Lines; ///< All line entries in the line table.
 public:
-  static LineEntry lookup(DataExtractor &Data, uint64_t BaseAddr,
-                          uint64_t Addr);
+  /// Lookup a single address within a line table's data.
+  ///
+  /// Clients have the option to decode an entire line table using
+  /// LineTable::decode() or just find a single matching entry using this
+  /// function. The benefit of using this function is that parsed LineEntry
+  /// objects that do not match will not be stored in an array. This will avoid
+  /// memory allocation costs and parsing can stop once a match has been found.
+  ///
+  /// \param Data The binary stream to read the data from. This object must
+  /// have the data for the LineTable object starting at offset zero. The data
+  /// can contain more data than needed.
+  ///
+  /// \param BaseAddr The base address to use when decoding the line table.
+  /// This will be the FunctionInfo's start address and will be used to
+  /// initialize the line table row prior to parsing any opcodes.
+  ///
+  /// \returns An LineEntry object if a match is found, error otherwise.
+  static Expected<LineEntry> lookup(DataExtractor &Data, uint64_t BaseAddr,
+                                    uint64_t Addr);
 
   /// Decode an LineTable object from a binary data stream.
   ///
Index: llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
===================================================================
--- llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
+++ llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
@@ -10,6 +10,8 @@
 #define LLVM_DEBUGINFO_GSYM_INLINEINFO_H
 
 #include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/GSYM/LineEntry.h"
+#include "llvm/DebugInfo/GSYM/LookupResult.h"
 #include "llvm/DebugInfo/GSYM/Range.h"
 #include "llvm/Support/Error.h"
 #include <stdint.h>
@@ -21,6 +23,7 @@
 
 namespace gsym {
 
+class GsymReader;
 /// Inline information stores the name of the inline function along with
 /// an array of address ranges. It also stores the call file and call line
 /// that called this inline function. This allows us to unwind inline call
@@ -74,6 +77,52 @@
 
   using InlineArray = std::vector<const InlineInfo *>;
 
+  /// Lookup a single address within the inline info data.
+  ///
+  /// Clients have the option to decode an entire InlineInfo object (using
+  /// InlineInfo::decode() ) or just find the matching inline info using this
+  /// function. The benefit of using this function is that only the information
+  /// needed for the lookup will be extracted, other info can be skipped and
+  /// parsing can stop as soon as the deepest match is found. This allows
+  /// symbolication tools to be fast and efficient and avoid allocation costs
+  /// when doing lookups.
+  ///
+  /// This function will augment the SourceLocations array \a SrcLocs with any
+  /// inline information that pertains to \a Addr. If no inline information
+  /// exists for \a Addr, then \a SrcLocs will be left untouched. If there is
+  /// inline information for \a Addr, then \a SrcLocs will be modifiied to
+  /// contain the deepest most inline function's SourceLocation at index zero
+  /// in the array and proceed up the the concrete function source file and
+  /// line at the end of the array.
+  ///
+  /// \param GR The GSYM reader that contains the string and file table that
+  /// will be used to fill in the source locations.
+  ///
+  /// \param Data The binary stream to read the data from. This object must
+  /// have the data for the LineTable object starting at offset zero. The data
+  /// can contain more data than needed.
+  ///
+  /// \param BaseAddr The base address to use when decoding the line table.
+  /// This will be the FunctionInfo's start address and will be used to
+  /// decode the correct addresses for the inline information.
+  ///
+  /// \param Addr The address to lookup.
+  ///
+  /// \param SrcLocs The inline source locations that matches \a Addr. This
+  ///                array must be initialized with the matching line entry
+  ///                from the line table upon entry. The name of the concrete
+  ///                function must be supplied since it will get pushed to
+  ///                the last SourceLocation entry and the inline information
+  ///                will fill in the source file and line from the inline
+  ///                information.
+  ///
+  /// \returns An error if the inline information is corrupt, or
+  ///          Error::success() for all other cases, even when no information
+  ///          is added to \a SrcLocs.
+  static llvm::Error lookup(const GsymReader &GR, DataExtractor &Data,
+                            uint64_t BaseAddr, uint64_t Addr,
+                            SourceLocations &SrcLocs);
+
   /// Lookup an address in the InlineInfo object
   ///
   /// This function is used to symbolicate an inline call stack and can
Index: llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
===================================================================
--- llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -94,28 +94,45 @@
 
   /// Get the full function info for an address.
   ///
+  /// This should be called when a client will store a copy of the complete
+  /// FunctionInfo for a given address. For one off lookups, use the lookup()
+  /// function below.
+  ///
+  /// Symbolication server processes might want to parse the entire function
+  /// info for a given address and cache it if the process stays around to
+  /// service many symbolication addresses, like for parsing profiling
+  /// information.
+  ///
   /// \param Addr A virtual address from the orignal object file to lookup.
+  ///
   /// \returns An expected FunctionInfo that contains the function info object
   /// or an error object that indicates reason for failing to lookup the
-  /// address,
+  /// address.
   llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
 
+  /// Lookup an address in the a GSYM.
+  ///
+  /// Lookup just the information needed for a specific address \a Addr. This
+  /// function is faster that calling getFunctionInfo() as it will only return
+  /// information that pertains to \a Addr and allows the parsing to skip any
+  /// extra information encoded for other addresses. For example the line table
+  /// parsing can stop when a matching LineEntry has been fouhnd, and the
+  /// InlineInfo can stop parsing early once a match has been found and also
+  /// skip information that doesn't match. This avoids memory allocations and
+  /// is much faster for lookups.
+  ///
+  /// \param Addr A virtual address from the orignal object file to lookup.
+  /// \returns An expected LookupResult that contains only the information
+  /// needed for the current address, or an error object that indicates reason
+  /// for failing to lookup the address.
+  llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
+
   /// Get a string from the string table.
   ///
   /// \param Offset The string table offset for the string to retrieve.
   /// \returns The string from the strin table.
   StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
 
-protected:
-  /// Gets an address from the address table.
-  ///
-  /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
-  ///
-  /// \param Index A index into the address table.
-  /// \returns A resolved virtual address for adddress in the address table
-  /// or llvm::None if Index is out of bounds.
-  Optional<uint64_t> getAddress(size_t Index) const;
-
   /// Get the a file entry for the suppplied file index.
   ///
   /// Used to convert any file indexes in the FunctionInfo data back into
@@ -131,6 +148,16 @@
     return llvm::None;
   }
 
+protected:
+  /// Gets an address from the address table.
+  ///
+  /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
+  ///
+  /// \param Index A index into the address table.
+  /// \returns A resolved virtual address for adddress in the address table
+  /// or llvm::None if Index is out of bounds.
+  Optional<uint64_t> getAddress(size_t Index) const;
+
   /// Get an appropriate address info offsets array.
   ///
   /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
Index: llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
===================================================================
--- llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
+++ llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -12,6 +12,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
 #include "llvm/DebugInfo/GSYM/LineTable.h"
+#include "llvm/DebugInfo/GSYM/LookupResult.h"
 #include "llvm/DebugInfo/GSYM/Range.h"
 #include "llvm/DebugInfo/GSYM/StringTable.h"
 #include <tuple>
@@ -21,6 +22,7 @@
 class raw_ostream;
 namespace gsym {
 
+class GsymReader;
 /// Function information in GSYM files encodes information for one contiguous
 /// address range. If a function has discontiguous address ranges, they will
 /// need to be encoded using multiple FunctionInfo objects.
@@ -140,6 +142,33 @@
   /// function info that was successfully written into the stream.
   llvm::Expected<uint64_t> encode(FileWriter &O) const;
 
+
+  /// Lookup an address within a FunctionInfo object's data stream.
+  ///
+  /// Instead of decoding an entire FunctionInfo object when doing lookups,
+  /// we can decode only the information we need from the FunctionInfo's data
+  /// for the specific address. The lookup result information is returned as
+  /// a LookupResult.
+  ///
+  /// \param Data The binary stream to read the data from. This object must
+  /// have the data for the object starting at offset zero. The data
+  /// can contain more data than needed.
+  ///
+  /// \param GR The GSYM reader that contains the string and file table that
+  /// will be used to fill in information in the returned result.
+  ///
+  /// \param FuncAddr The function start address decoded from the GsymReader.
+  ///
+  /// \param Addr The address to lookup.
+  ///
+  /// \returns An LookupResult or an error describing the issue that was
+  /// encountered during decoding. An error should only be returned if the
+  /// address is not contained in the FunctionInfo or if the data is corrupted.
+  static llvm::Expected<LookupResult> lookup(DataExtractor &Data,
+                                             const GsymReader &GR,
+                                             uint64_t FuncAddr,
+                                             uint64_t Addr);
+
   uint64_t startAddress() const { return Range.Start; }
   uint64_t endAddress() const { return Range.End; }
   uint64_t size() const { return Range.size(); }
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to