cameron314 created this revision.

This patch fixes preamble skipping when the preamble region includes a byte 
order mark (BOM). Previously, parsing would fail if preamble PCH generation was 
enabled and a BOM was present.

This also fixes the preamble incorrectly being invalidated when a BOM appeared 
or disappeared, even if the contents after the BOM had not changed. This may 
seem to be an obscure edge case, but it is important for IDEs that pass buffer 
overrides that never (or always) have a BOM, yet the underlying file from the 
initial parse that generated a PCH might (or might not) have a BOM.

Test cases for these scenarios are included.

Note: This depends on the test infrastructure introduced in 
https://reviews.llvm.org/D37474.


https://reviews.llvm.org/D37491

Files:
  include/clang/Frontend/PrecompiledPreamble.h
  include/clang/Lex/Lexer.h
  include/clang/Lex/PreprocessorOptions.h
  lib/Frontend/ASTUnit.cpp
  lib/Frontend/FrontendActions.cpp
  lib/Frontend/PrecompiledPreamble.cpp
  lib/Lex/Lexer.cpp
  lib/Lex/Preprocessor.cpp
  unittests/Frontend/PchPreambleTest.cpp

Index: unittests/Frontend/PchPreambleTest.cpp
===================================================================
--- unittests/Frontend/PchPreambleTest.cpp
+++ unittests/Frontend/PchPreambleTest.cpp
@@ -91,8 +91,19 @@
     PreprocessorOptions &PPOpts = CI->getPreprocessorOpts();
     PPOpts.RemappedFilesKeepOriginalName = true;
 
+    struct DebugConsumer : DiagnosticConsumer
+    {
+      void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
+        const Diagnostic &Info) override {
+        DiagnosticConsumer::HandleDiagnostic(DiagLevel, Info);
+        SmallString<16> DiagText;
+        Info.FormatDiagnostic(DiagText);
+        llvm::errs() << DiagText << '\n';
+      }
+    };
+
     IntrusiveRefCntPtr<DiagnosticsEngine>
-      Diags(CompilerInstance::createDiagnostics(new DiagnosticOptions, new DiagnosticConsumer));
+      Diags(CompilerInstance::createDiagnostics(new DiagnosticOptions, new DebugConsumer));
 
     FileManager *FileMgr = new FileManager(FSOpts, VFS);
 
@@ -153,4 +164,64 @@
   ASSERT_EQ(initialCounts[2], GetFileReadCount(Header2));
 }
 
+TEST_F(PchPreambleTest, ReparseWithDisappearingBom) {
+  std::string Header = "//./header.h";
+  std::string Main = "//./main.cpp";
+  AddFile(Header, "int random() { return 4; }");
+  AddFile(Main,
+    "\xef\xbb\xbf"
+    "#include \"//./header.h\"\n"
+    "int main() { return random() * 2; }");
+
+  std::unique_ptr<ASTUnit> AST(ParseAST(Main));
+  ASSERT_TRUE(AST.get());
+  ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
+
+  unsigned initialCounts[] = {
+    GetFileReadCount(Main),
+    GetFileReadCount(Header)
+  };
+
+  // Remove BOM but keep the rest the same
+  RemapFile(Main,
+    "#include \"//./header.h\"\n"
+    "int main() { return random() * 2; }");
+
+  ASSERT_TRUE(ReparseAST(AST));
+  ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
+
+  ASSERT_EQ(initialCounts[0], GetFileReadCount(Main));
+  ASSERT_EQ(initialCounts[1], GetFileReadCount(Header));
+}
+
+TEST_F(PchPreambleTest, ReparseWithAppearingBom) {
+  std::string Header = "//./header.h";
+  std::string Main = "//./main.cpp";
+  AddFile(Header, "int random() { return 4; }");
+  AddFile(Main,
+    "#include \"//./header.h\"\n"
+    "int main() { return random() * 2; }");
+
+  std::unique_ptr<ASTUnit> AST(ParseAST(Main));
+  ASSERT_TRUE(AST.get());
+  ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
+
+  unsigned initialCounts[] = {
+    GetFileReadCount(Main),
+    GetFileReadCount(Header)
+  };
+
+  // Suddenly, a BOM appeared
+  RemapFile(Main,
+    "\xef\xbb\xbf"
+    "#include \"//./header.h\"\n"
+    "int main() { return random() * 2; }");
+
+  ASSERT_TRUE(ReparseAST(AST));
+  ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
+
+  ASSERT_EQ(initialCounts[0], GetFileReadCount(Main));
+  ASSERT_EQ(initialCounts[1], GetFileReadCount(Header));
+}
+
 } // anonymous namespace
Index: lib/Lex/Preprocessor.cpp
===================================================================
--- lib/Lex/Preprocessor.cpp
+++ lib/Lex/Preprocessor.cpp
@@ -516,9 +516,9 @@
     // If we've been asked to skip bytes in the main file (e.g., as part of a
     // precompiled preamble), do so now.
     if (SkipMainFilePreamble.first > 0)
-      CurLexer->SkipBytes(SkipMainFilePreamble.first, 
-                          SkipMainFilePreamble.second);
-    
+      CurLexer->SetByteOffset(SkipMainFilePreamble.first,
+                              SkipMainFilePreamble.second);
+
     // Tell the header info that the main file was entered.  If the file is later
     // #imported, it won't be re-entered.
     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
Index: lib/Lex/Lexer.cpp
===================================================================
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -552,9 +552,9 @@
 
 } // end anonymous namespace
 
-std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,
-                                                 const LangOptions &LangOpts,
-                                                 unsigned MaxLines) {
+PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
+                                      const LangOptions &LangOpts,
+                                      unsigned MaxLines) {
   // Create a lexer starting at the beginning of the file. Note that we use a
   // "fake" file source location at offset 1 so that the lexer will track our
   // position within the file.
@@ -688,7 +688,8 @@
   else
     End = TheTok.getLocation();
 
-  return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(),
+  return PreambleBounds(StartLoc.getRawEncoding() - FileLoc.getRawEncoding(),
+                        End.getRawEncoding() - StartLoc.getRawEncoding(),
                         TheTok.isAtStartOfLine());
 }
 
@@ -1394,9 +1395,9 @@
 // Helper methods for lexing.
 //===----------------------------------------------------------------------===//
 
-/// \brief Routine that indiscriminately skips bytes in the source file.
-void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) {
-  BufferPtr += Bytes;
+/// \brief Routine that indiscriminately sets the offset into the source file.
+void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) {
+  BufferPtr = BufferStart + Offset;
   if (BufferPtr > BufferEnd)
     BufferPtr = BufferEnd;
   // FIXME: What exactly does the StartOfLine bit mean?  There are two
Index: lib/Frontend/PrecompiledPreamble.cpp
===================================================================
--- lib/Frontend/PrecompiledPreamble.cpp
+++ lib/Frontend/PrecompiledPreamble.cpp
@@ -195,8 +195,7 @@
 PreambleBounds clang::ComputePreambleBounds(const LangOptions &LangOpts,
                                             llvm::MemoryBuffer *Buffer,
                                             unsigned MaxLines) {
-  auto Pre = Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines);
-  return PreambleBounds(Pre.first, Pre.second);
+  return Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines);
 }
 
 llvm::ErrorOr<PrecompiledPreamble> PrecompiledPreamble::Build(
@@ -224,9 +223,9 @@
 
   // Save the preamble text for later; we'll need to compare against it for
   // subsequent reparses.
-  std::vector<char> PreambleBytes(MainFileBuffer->getBufferStart(),
-                                  MainFileBuffer->getBufferStart() +
-                                      Bounds.Size);
+  auto PreambleStart = MainFileBuffer->getBufferStart() + Bounds.StartOffset;
+  std::vector<char> PreambleBytes(PreambleStart,
+                                  PreambleStart + Bounds.Size);
   bool PreambleEndsAtStartOfLine = Bounds.PreambleEndsAtStartOfLine;
 
   // Tell the compiler invocation to generate a temporary precompiled header.
@@ -289,8 +288,10 @@
 
   // Remap the main source file to the preamble buffer.
   StringRef MainFilePath = FrontendOpts.Inputs[0].getFile();
+  auto StartOffset = Bounds.StartOffset;
+  auto EndOffset = StartOffset + Bounds.Size;
   auto PreambleInputBuffer = llvm::MemoryBuffer::getMemBufferCopy(
-      MainFileBuffer->getBuffer().slice(0, Bounds.Size), MainFilePath);
+      MainFileBuffer->getBuffer().slice(StartOffset, EndOffset), MainFilePath);
   if (PreprocessorOpts.RetainRemappedFileBuffers) {
     // MainFileBuffer will be deleted by unique_ptr after leaving the method.
     PreprocessorOpts.addRemappedFile(MainFilePath, PreambleInputBuffer.get());
@@ -338,20 +339,22 @@
 
   return PrecompiledPreamble(
       std::move(*PreamblePCHFile), std::move(PreambleBytes),
-      PreambleEndsAtStartOfLine, std::move(FilesInPreamble));
+      PreambleEndsAtStartOfLine, std::move(FilesInPreamble),
+      Bounds.StartOffset);
 }
 
 PreambleBounds PrecompiledPreamble::getBounds() const {
-  return PreambleBounds(PreambleBytes.size(), PreambleEndsAtStartOfLine);
+  return PreambleBounds(PreambleOffset, PreambleBytes.size(),
+                        PreambleEndsAtStartOfLine);
 }
 
 bool PrecompiledPreamble::CanReuse(const CompilerInvocation &Invocation,
                                    const llvm::MemoryBuffer *MainFileBuffer,
                                    PreambleBounds Bounds,
                                    vfs::FileSystem *VFS) const {
 
   assert(
-      Bounds.Size <= MainFileBuffer->getBufferSize() &&
+      Bounds.StartOffset + Bounds.Size <= MainFileBuffer->getBufferSize() &&
       "Buffer is too large. Bounds were calculated from a different buffer?");
 
   auto PreambleInvocation = std::make_shared<CompilerInvocation>(Invocation);
@@ -367,7 +370,8 @@
   // new main file.
   if (PreambleBytes.size() != Bounds.Size ||
       PreambleEndsAtStartOfLine != Bounds.PreambleEndsAtStartOfLine ||
-      memcmp(PreambleBytes.data(), MainFileBuffer->getBufferStart(),
+      memcmp(PreambleBytes.data(),
+             MainFileBuffer->getBufferStart() + Bounds.StartOffset,
              Bounds.Size) != 0)
     return false;
   // The preamble has not changed. We may be able to re-use the precompiled
@@ -430,7 +434,8 @@
   auto &PreprocessorOpts = CI.getPreprocessorOpts();
 
   // Configure ImpicitPCHInclude.
-  PreprocessorOpts.PrecompiledPreambleBytes.first = PreambleBytes.size();
+  PreprocessorOpts.PrecompiledPreambleBytes.first = PreambleOffset +
+      PreambleBytes.size();
   PreprocessorOpts.PrecompiledPreambleBytes.second = PreambleEndsAtStartOfLine;
   PreprocessorOpts.ImplicitPCHInclude = PCHFile.getFilePath();
   PreprocessorOpts.DisablePCHValidation = true;
@@ -440,13 +445,20 @@
   PreprocessorOpts.addRemappedFile(MainFilePath, MainFileBuffer);
 }
 
+void PrecompiledPreamble::UpdateOffset(unsigned NewOffset)
+{
+  PreambleOffset = NewOffset;
+}
+
 PrecompiledPreamble::PrecompiledPreamble(
     TempPCHFile PCHFile, std::vector<char> PreambleBytes,
     bool PreambleEndsAtStartOfLine,
-    llvm::StringMap<PreambleFileHash> FilesInPreamble)
+    llvm::StringMap<PreambleFileHash> FilesInPreamble,
+    unsigned PreambleOffset)
     : PCHFile(std::move(PCHFile)), FilesInPreamble(FilesInPreamble),
       PreambleBytes(std::move(PreambleBytes)),
-      PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
+      PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine),
+      PreambleOffset(PreambleOffset) {}
 
 llvm::ErrorOr<PrecompiledPreamble::TempPCHFile>
 PrecompiledPreamble::TempPCHFile::CreateNewPreamblePCHFile() {
Index: lib/Frontend/FrontendActions.cpp
===================================================================
--- lib/Frontend/FrontendActions.cpp
+++ lib/Frontend/FrontendActions.cpp
@@ -590,8 +590,9 @@
   CompilerInstance &CI = getCompilerInstance();
   auto Buffer = CI.getFileManager().getBufferForFile(getCurrentFile());
   if (Buffer) {
-    unsigned Preamble =
-        Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).first;
+    PreambleBounds Bounds =
+        Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts());
+    unsigned Preamble = Bounds.StartOffset + Bounds.Size;
     llvm::outs().write((*Buffer)->getBufferStart(), Preamble);
   }
 }
Index: lib/Frontend/ASTUnit.cpp
===================================================================
--- lib/Frontend/ASTUnit.cpp
+++ lib/Frontend/ASTUnit.cpp
@@ -1266,6 +1266,7 @@
     if (Preamble->CanReuse(PreambleInvocationIn, MainFileBuffer.get(), Bounds,
                            VFS.get())) {
       // Okay! We can re-use the precompiled preamble.
+      Preamble->UpdateOffset(Bounds.StartOffset);
 
       // Set the state of the diagnostic object to mimic its state
       // after parsing the preamble.
Index: include/clang/Lex/PreprocessorOptions.h
===================================================================
--- include/clang/Lex/PreprocessorOptions.h
+++ include/clang/Lex/PreprocessorOptions.h
@@ -160,7 +160,7 @@
                           DisablePCHValidation(false),
                           AllowPCHWithCompilerErrors(false),
                           DumpDeserializedPCHDecls(false),
-                          PrecompiledPreambleBytes(0, true),
+                          PrecompiledPreambleBytes(0, false),
                           GeneratePreamble(false),
                           RemappedFilesKeepOriginalName(true),
                           RetainRemappedFileBuffers(false),
@@ -195,7 +195,7 @@
     LexEditorPlaceholders = true;
     RetainRemappedFileBuffers = true;
     PrecompiledPreambleBytes.first = 0;
-    PrecompiledPreambleBytes.second = 0;
+    PrecompiledPreambleBytes.second = false;
   }
 };
 
Index: include/clang/Lex/Lexer.h
===================================================================
--- include/clang/Lex/Lexer.h
+++ include/clang/Lex/Lexer.h
@@ -39,6 +39,24 @@
   CMK_Perforce
 };
 
+/// Describes the bounds (start, size) of the preamble and a flag required by
+/// PreprocessorOptions::PrecompiledPreambleBytes.
+struct PreambleBounds {
+  PreambleBounds(unsigned Offset, unsigned Size, bool PreambleEndsAtStartOfLine)
+    : StartOffset(Offset), Size(Size),
+      PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
+
+  /// \brief Start offset of the preamble, in bytes.
+  unsigned StartOffset;
+  /// \brief Size of the preamble in bytes.
+  unsigned Size;
+  /// \brief Whether the preamble ends at the start of a new line.
+  ///
+  /// Used to inform the lexer as to whether it's starting at the beginning of
+  /// a line after skipping the preamble.
+  bool PreambleEndsAtStartOfLine;
+};
+
 /// Lexer - This provides a simple interface that turns a text buffer into a
 /// stream of tokens.  This provides no support for file reading or buffering,
 /// or buffering/seeking of tokens, only forward lexing is supported.  It relies
@@ -442,12 +460,12 @@
   /// \param MaxLines If non-zero, restrict the length of the preamble
   /// to fewer than this number of lines.
   ///
-  /// \returns The offset into the file where the preamble ends and the rest
-  /// of the file begins along with a boolean value indicating whether 
-  /// the preamble ends at the beginning of a new line.
-  static std::pair<unsigned, bool> ComputePreamble(StringRef Buffer,
-                                                   const LangOptions &LangOpts,
-                                                   unsigned MaxLines = 0);
+  /// \returns The offset into the file where the preamble starts, its size,
+  /// and a boolean value indicating whether the preamble ends at the
+  /// beginning of a new line.
+  static PreambleBounds ComputePreamble(StringRef Buffer,
+                                        const LangOptions &LangOpts,
+                                        unsigned MaxLines = 0);
 
   /// \brief Checks that the given token is the first token that occurs after
   /// the given location (this excludes comments and whitespace). Returns the
@@ -618,7 +636,7 @@
   //===--------------------------------------------------------------------===//
   // Other lexer functions.
 
-  void SkipBytes(unsigned Bytes, bool StartOfLine);
+  void SetByteOffset(unsigned Offset, bool StartOfLine);
 
   void PropagateLineStartLeadingSpaceInfo(Token &Result);
 
Index: include/clang/Frontend/PrecompiledPreamble.h
===================================================================
--- include/clang/Frontend/PrecompiledPreamble.h
+++ include/clang/Frontend/PrecompiledPreamble.h
@@ -36,21 +36,6 @@
 class DeclGroupRef;
 class PCHContainerOperations;
 
-/// A size of the preamble and a flag required by
-/// PreprocessorOptions::PrecompiledPreambleBytes.
-struct PreambleBounds {
-  PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
-      : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
-
-  /// \brief Size of the preamble in bytes.
-  unsigned Size;
-  /// \brief Whether the preamble ends at the start of a new line.
-  ///
-  /// Used to inform the lexer as to whether it's starting at the beginning of
-  /// a line after skipping the preamble.
-  bool PreambleEndsAtStartOfLine;
-};
-
 /// \brief Runs lexer to compute suggested preamble bounds.
 PreambleBounds ComputePreambleBounds(const LangOptions &LangOpts,
                                      llvm::MemoryBuffer *Buffer,
@@ -111,10 +96,16 @@
   void AddImplicitPreamble(CompilerInvocation &CI,
                            llvm::MemoryBuffer *MainFileBuffer) const;
 
+  /// Updates the preamble offset. This is necessary when the preamble can be
+  /// reused for another parse, but its offset has changed (e.g. when a BOM
+  /// appears or disappears between parses).
+  void UpdateOffset(unsigned NewOffset);
+
 private:
   PrecompiledPreamble(TempPCHFile PCHFile, std::vector<char> PreambleBytes,
                       bool PreambleEndsAtStartOfLine,
-                      llvm::StringMap<PreambleFileHash> FilesInPreamble);
+                      llvm::StringMap<PreambleFileHash> FilesInPreamble,
+                      unsigned PreambleOffset);
 
   /// A temp file that would be deleted on destructor call. If destructor is not
   /// called for any reason, the file will be deleted at static objects'
@@ -197,6 +188,8 @@
   std::vector<char> PreambleBytes;
   /// See PreambleBounds::PreambleEndsAtStartOfLine
   bool PreambleEndsAtStartOfLine;
+  /// The start offset of the bounds used to build the preamble.
+  unsigned PreambleOffset;
 };
 
 /// A set of callbacks to gather useful information while building a preamble.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to