[clang] d333a0d - Revert "[Modules] No transitive source location change (#86912)"

2024-04-30 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-30T22:32:02+08:00
New Revision: d333a0de6829616427182b26923b14d779ce1dbb

URL: 
https://github.com/llvm/llvm-project/commit/d333a0de6829616427182b26923b14d779ce1dbb
DIFF: 
https://github.com/llvm/llvm-project/commit/d333a0de6829616427182b26923b14d779ce1dbb.diff

LOG: Revert "[Modules] No transitive source location change (#86912)"

This reverts commit 6c3110464bac3600685af9650269b0b2b8669d34.

Required by the post commit comments: 
https://github.com/llvm/llvm-project/pull/86912

Added: 


Modified: 
clang/include/clang/Basic/SourceLocation.h
clang/include/clang/Serialization/ASTBitCodes.h
clang/include/clang/Serialization/ASTReader.h
clang/include/clang/Serialization/ASTWriter.h
clang/include/clang/Serialization/ModuleFile.h
clang/include/clang/Serialization/SourceLocationEncoding.h
clang/lib/Frontend/ASTUnit.cpp
clang/lib/Serialization/ASTReader.cpp
clang/lib/Serialization/ASTReaderDecl.cpp
clang/lib/Serialization/ASTWriter.cpp
clang/lib/Serialization/ASTWriterDecl.cpp
clang/lib/Serialization/ModuleFile.cpp
clang/test/Modules/pr61067.cppm
clang/unittests/Serialization/SourceLocationEncodingTest.cpp

Removed: 
clang/test/Modules/no-transitive-source-location-change.cppm



diff  --git a/clang/include/clang/Basic/SourceLocation.h 
b/clang/include/clang/Basic/SourceLocation.h
index 7a0f5ba8d1270b..00b1e0fa855b7a 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -90,7 +90,6 @@ class SourceLocation {
   friend class ASTWriter;
   friend class SourceManager;
   friend struct llvm::FoldingSetTrait;
-  friend class SourceLocationEncoding;
 
 public:
   using UIntTy = uint32_t;

diff  --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index 93e971d7e142c3..a8df5a0bda0850 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -23,7 +23,6 @@
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/OperatorKinds.h"
 #include "clang/Basic/SourceLocation.h"
-#include "clang/Serialization/SourceLocationEncoding.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Bitstream/BitCodes.h"
 #include 
@@ -168,38 +167,45 @@ const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1;
 
 /// Source range/offset of a preprocessed entity.
 struct PPEntityOffset {
-  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
-
   /// Raw source location of beginning of range.
-  RawLocEncoding Begin;
+  SourceLocation::UIntTy Begin;
 
   /// Raw source location of end of range.
-  RawLocEncoding End;
+  SourceLocation::UIntTy End;
 
   /// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
   uint32_t BitOffset;
 
-  PPEntityOffset(RawLocEncoding Begin, RawLocEncoding End, uint32_t BitOffset)
-  : Begin(Begin), End(End), BitOffset(BitOffset) {}
+  PPEntityOffset(SourceRange R, uint32_t BitOffset)
+  : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()),
+BitOffset(BitOffset) {}
+
+  SourceLocation getBegin() const {
+return SourceLocation::getFromRawEncoding(Begin);
+  }
 
-  RawLocEncoding getBegin() const { return Begin; }
-  RawLocEncoding getEnd() const { return End; }
+  SourceLocation getEnd() const {
+return SourceLocation::getFromRawEncoding(End);
+  }
 };
 
 /// Source range of a skipped preprocessor region
 struct PPSkippedRange {
-  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
-
   /// Raw source location of beginning of range.
-  RawLocEncoding Begin;
+  SourceLocation::UIntTy Begin;
   /// Raw source location of end of range.
-  RawLocEncoding End;
+  SourceLocation::UIntTy End;
 
-  PPSkippedRange(RawLocEncoding Begin, RawLocEncoding End)
-  : Begin(Begin), End(End) {}
+  PPSkippedRange(SourceRange R)
+  : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()) 
{
+  }
 
-  RawLocEncoding getBegin() const { return Begin; }
-  RawLocEncoding getEnd() const { return End; }
+  SourceLocation getBegin() const {
+return SourceLocation::getFromRawEncoding(Begin);
+  }
+  SourceLocation getEnd() const {
+return SourceLocation::getFromRawEncoding(End);
+  }
 };
 
 /// Offset in the AST file. Use splitted 64-bit integer into low/high
@@ -225,10 +231,8 @@ struct UnderalignedInt64 {
 
 /// Source location and bit offset of a declaration.
 struct DeclOffset {
-  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
-
   /// Raw source location.
-  RawLocEncoding RawLoc = 0;
+  SourceLocation::UIntTy Loc = 0;
 
   /// Offset relative to the start of the DECLTYPES_BLOCK block. Keep
   /// structure alignment 32-bit and avoid padding gap because undefined
@@ -236,15 +240,17 @@ struct DeclOffset {
   UnderalignedInt64 BitOffset;
 
   DeclOffset() = default;
-  

[clang] [Modules] No transitive source location change (PR #86912)

2024-04-30 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

> I strongly suspect that this patch badly broke the [Solaris/sparcv9 
> buildbot](https://lab.llvm.org/buildbot/#/builders/72/builds/4046): it 
> introduced more than 1000 failures.
> 
> Please fix or revert.

I'll revert this. Due to I can't reproduce this. When the bot gets stable, 
please tell if it is the real problem.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Implement P3034R1 Module Declarations Shouldn’t be Macros (PR #90574)

2024-04-30 Thread Chuanqi Xu via cfe-commits


@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %s -triple 
x86_64-linux-gnu -DTEST=1 -verify
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %s -triple 
x86_64-linux-gnu -DTEST=2 -verify
+
+module;
+export module x;
+#include "version.h"
+#if TEST == 1

ChuanqiXu9 wrote:

https://llvm.org/docs/TestingGuide.html#extra-files

Also it is somewhat clearly that split file is better than the sections guarded 
by #if-#endif. We can find the example by searching `split-file` under 
clang/test/Modules

https://github.com/llvm/llvm-project/pull/90574
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Implement P3034R1 Module Declarations Shouldn’t be Macros (PR #90574)

2024-04-30 Thread Chuanqi Xu via cfe-commits


@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %s -triple 
x86_64-linux-gnu -DTEST=1 -verify
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %s -triple 
x86_64-linux-gnu -DTEST=2 -verify
+
+module;
+export module x;
+#include "version.h"
+#if TEST == 1

ChuanqiXu9 wrote:

we prefer using split-file now.

https://github.com/llvm/llvm-project/pull/90574
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Implement P3034R1 Module Declarations Shouldn’t be Macros (PR #90574)

2024-04-30 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 commented:

LGTM otherwise. I'd like to leave this to  @cor3ntin 

https://github.com/llvm/llvm-project/pull/90574
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Implement P3034R1 Module Declarations Shouldn’t be Macros (PR #90574)

2024-04-30 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 edited 
https://github.com/llvm/llvm-project/pull/90574
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 10aab63 - [NFC] [tests] Don't try to remove and create the same directory

2024-04-30 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-30T17:08:40+08:00
New Revision: 10aab63c9cb49d3ddfbe2cf8992de433efeef6f1

URL: 
https://github.com/llvm/llvm-project/commit/10aab63c9cb49d3ddfbe2cf8992de433efeef6f1
DIFF: 
https://github.com/llvm/llvm-project/commit/10aab63c9cb49d3ddfbe2cf8992de433efeef6f1.diff

LOG: [NFC] [tests] Don't try to remove and create the same directory

In the test of
clang/test/Modules/no-transitive-source-location-change.cppm, there were
reports about invalid directory names in windows. The reason may be that
we may remove and create the same directory. This patch tries to avoid
such patterns for that.

Added: 


Modified: 
clang/test/Modules/no-transitive-source-location-change.cppm

Removed: 




diff  --git a/clang/test/Modules/no-transitive-source-location-change.cppm 
b/clang/test/Modules/no-transitive-source-location-change.cppm
index 83cf6fb4f684d0..303142a1af890b 100644
--- a/clang/test/Modules/no-transitive-source-location-change.cppm
+++ b/clang/test/Modules/no-transitive-source-location-change.cppm
@@ -3,7 +3,6 @@
 //
 // RUN: rm -rf %t
 // RUN: split-file %s %t
-// RUN: cd %t
 //
 // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm
 // RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-module-interface -o 
%t/A.v1.pcm
@@ -25,10 +24,6 @@
 // RUN: -o %t/C.v1.pcm
 // RUN: not 
diff  %t/C.v1.pcm %t/C.pcm  &> /dev/null
 //
-// RUN: rm -rf %t
-// RUN: split-file %s %t
-// RUN: cd %t
-//
 // Test again with reduced BMI.
 // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o 
%t/A.pcm
 // RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-reduced-module-interface -o 
%t/A.v1.pcm



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NFC] [C++20] [Modules] Use new class CXX20ModulesGenerator to genera… (PR #90570)

2024-04-30 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 closed 
https://github.com/llvm/llvm-project/pull/90570
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NFC] [C++20] [Modules] Use new class CXX20ModulesGenerator to genera… (PR #90570)

2024-04-30 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 updated 
https://github.com/llvm/llvm-project/pull/90570

>From d73596affed67978c703c92789de045e9ebf0f6b Mon Sep 17 00:00:00 2001
From: Chuanqi Xu 
Date: Tue, 30 Apr 2024 13:28:52 +0800
Subject: [PATCH] [NFC] [C++20] [Modules] Use new class CXX20ModulesGenerator
 to generate module file for C++20 modules instead of PCHGenerator

Previously we're re-using PCHGenerator to generate the module file for
C++20 modules. But this is slighty more or less odd. This patch tries
to use a new class 'CXX20ModulesGenerator' to generate the module file
for C++20 modules.
---
 clang/include/clang/Serialization/ASTWriter.h | 25 ---
 clang/lib/Frontend/FrontendActions.cpp| 11 +++-
 clang/lib/Serialization/GeneratePCH.cpp   | 25 +++
 clang/test/Modules/pr67893.cppm   |  2 +-
 clang/test/Modules/search-partitions.cpp  |  8 +++---
 5 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTWriter.h 
b/clang/include/clang/Serialization/ASTWriter.h
index 6c45b7348b8552..6f64ece9c5a19b 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -885,6 +885,8 @@ class ASTWriter : public ASTDeserializationListener,
 /// AST and semantic-analysis consumer that generates a
 /// precompiled header from the parsed source code.
 class PCHGenerator : public SemaConsumer {
+  void anchor() override;
+
   Preprocessor 
   std::string OutputFile;
   std::string isysroot;
@@ -928,17 +930,34 @@ class PCHGenerator : public SemaConsumer {
   bool hasEmittedPCH() const { return Buffer->IsComplete; }
 };
 
-class ReducedBMIGenerator : public PCHGenerator {
+class CXX20ModulesGenerator : public PCHGenerator {
+  void anchor() override;
+
 protected:
   virtual Module *getEmittingModule(ASTContext ) override;
 
+  CXX20ModulesGenerator(Preprocessor , InMemoryModuleCache ,
+StringRef OutputFile, bool GeneratingReducedBMI);
+
 public:
-  ReducedBMIGenerator(Preprocessor , InMemoryModuleCache ,
-  StringRef OutputFile);
+  CXX20ModulesGenerator(Preprocessor , InMemoryModuleCache ,
+StringRef OutputFile)
+  : CXX20ModulesGenerator(PP, ModuleCache, OutputFile,
+  /*GeneratingReducedBMI=*/false) {}
 
   void HandleTranslationUnit(ASTContext ) override;
 };
 
+class ReducedBMIGenerator : public CXX20ModulesGenerator {
+  void anchor() override;
+
+public:
+  ReducedBMIGenerator(Preprocessor , InMemoryModuleCache ,
+  StringRef OutputFile)
+  : CXX20ModulesGenerator(PP, ModuleCache, OutputFile,
+  /*GeneratingReducedBMI=*/true) {}
+};
+
 /// If we can elide the definition of \param D in reduced BMI.
 ///
 /// Generally, we can elide the definition of a declaration if it won't affect
diff --git a/clang/lib/Frontend/FrontendActions.cpp 
b/clang/lib/Frontend/FrontendActions.cpp
index 480dfa8c975933..454653a31534cd 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -272,13 +272,10 @@ bool GenerateModuleInterfaceAction::BeginSourceFileAction(
 std::unique_ptr
 GenerateModuleInterfaceAction::CreateASTConsumer(CompilerInstance ,
  StringRef InFile) {
-  CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
-  CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
-
-  std::vector> Consumers =
-  CreateMultiplexConsumer(CI, InFile);
-  if (Consumers.empty())
-return nullptr;
+  std::vector> Consumers;
+  Consumers.push_back(std::make_unique(
+  CI.getPreprocessor(), CI.getModuleCache(),
+  CI.getFrontendOpts().OutputFile));
 
   if (CI.getFrontendOpts().GenReducedBMI &&
   !CI.getFrontendOpts().ModuleOutputPath.empty()) {
diff --git a/clang/lib/Serialization/GeneratePCH.cpp 
b/clang/lib/Serialization/GeneratePCH.cpp
index a2ddbe4624aae4..cc06106a47708e 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -88,31 +88,30 @@ ASTDeserializationListener 
*PCHGenerator::GetASTDeserializationListener() {
   return 
 }
 
-ReducedBMIGenerator::ReducedBMIGenerator(Preprocessor ,
- InMemoryModuleCache ,
- StringRef OutputFile)
+void PCHGenerator::anchor() {}
+
+CXX20ModulesGenerator::CXX20ModulesGenerator(Preprocessor ,
+ InMemoryModuleCache ,
+ StringRef OutputFile,
+ bool GeneratingReducedBMI)
 : PCHGenerator(
   PP, ModuleCache, OutputFile, llvm::StringRef(),
   std::make_shared(),
   /*Extensions=*/ArrayRef>(),
   /*AllowASTWithErrors*/ false, /*IncludeTimestamps=*/false,
   

[clang] b2b463b - [C++20] [Modules] Add signature to the BMI recording export imported

2024-04-30 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-30T16:33:34+08:00
New Revision: b2b463bd8f6b21f040b80c4493682cf74f8dced5

URL: 
https://github.com/llvm/llvm-project/commit/b2b463bd8f6b21f040b80c4493682cf74f8dced5
DIFF: 
https://github.com/llvm/llvm-project/commit/b2b463bd8f6b21f040b80c4493682cf74f8dced5.diff

LOG: [C++20] [Modules] Add signature to the BMI recording export imported
modules

After https://github.com/llvm/llvm-project/pull/86912,
for the following example,

```
export module A;
export import B;
```

The generated BMI of `A` won't change if the source location in `A`
changes. Further, we plan avoid more such changes.

However, it is slightly problematic since `export import` should
propagate all the changes.

So this patch adds a signature to the BMI of C++20 modules so that we
can propagate the changes correctly.

Added: 
clang/test/Modules/force-transitive-changes.cppm

Modified: 
clang/include/clang/Serialization/ASTWriter.h
clang/lib/Serialization/ASTWriter.cpp
clang/test/Modules/no-transitive-source-location-change.cppm

Removed: 




diff  --git a/clang/include/clang/Serialization/ASTWriter.h 
b/clang/include/clang/Serialization/ASTWriter.h
index 428bf6a5a791b3..921678d278d6e2 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -525,6 +525,7 @@ class ASTWriter : public ASTDeserializationListener,
 
   /// Calculate hash of the pcm content.
   std::pair createSignature() const;
+  ASTFileSignature createSignatureForNamedModule() const;
 
   void WriteInputFiles(SourceManager , HeaderSearchOptions );
   void WriteSourceManagerBlock(SourceManager ,

diff  --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index 4d85f6eb10d232..c3fcd1a4df2368 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1174,26 +1174,47 @@ ASTWriter::createSignature() const {
   return std::make_pair(ASTBlockHash, Signature);
 }
 
+ASTFileSignature ASTWriter::createSignatureForNamedModule() const {
+  llvm::SHA1 Hasher;
+  Hasher.update(StringRef(Buffer.data(), Buffer.size()));
+
+  assert(WritingModule);
+  assert(WritingModule->isNamedModule());
+
+  // We need to combine all the export imported modules no matter
+  // we used it or not.
+  for (auto [ExportImported, _] : WritingModule->Exports)
+Hasher.update(ExportImported->Signature);
+
+  return ASTFileSignature::create(Hasher.result());
+}
+
+static void BackpatchSignatureAt(llvm::BitstreamWriter ,
+ const ASTFileSignature , uint64_t BitNo) {
+  for (uint8_t Byte : S) {
+Stream.BackpatchByte(BitNo, Byte);
+BitNo += 8;
+  }
+}
+
 ASTFileSignature ASTWriter::backpatchSignature() {
+  if (isWritingStdCXXNamedModules()) {
+ASTFileSignature Signature = createSignatureForNamedModule();
+BackpatchSignatureAt(Stream, Signature, SignatureOffset);
+return Signature;
+  }
+
   if (!WritingModule ||
   !PP->getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent)
 return {};
 
   // For implicit modules, write the hash of the PCM as its signature.
-
-  auto BackpatchSignatureAt = [&](const ASTFileSignature , uint64_t BitNo) {
-for (uint8_t Byte : S) {
-  Stream.BackpatchByte(BitNo, Byte);
-  BitNo += 8;
-}
-  };
-
   ASTFileSignature ASTBlockHash;
   ASTFileSignature Signature;
   std::tie(ASTBlockHash, Signature) = createSignature();
 
-  BackpatchSignatureAt(ASTBlockHash, ASTBlockHashOffset);
-  BackpatchSignatureAt(Signature, SignatureOffset);
+  BackpatchSignatureAt(Stream, ASTBlockHash, ASTBlockHashOffset);
+  BackpatchSignatureAt(Stream, Signature, SignatureOffset);
 
   return Signature;
 }
@@ -1210,9 +1231,11 @@ void ASTWriter::writeUnhashedControlBlock(Preprocessor 
,
   RecordData Record;
   Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5);
 
-  // For implicit modules, write the hash of the PCM as its signature.
-  if (WritingModule &&
-  PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) {
+  // For implicit modules and C++20 named modules, write the hash of the PCM as
+  // its signature.
+  if (isWritingStdCXXNamedModules() ||
+  (WritingModule &&
+   PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent)) {
 // At this point, we don't know the actual signature of the file or the AST
 // block - we're only able to compute those at the end of the serialization
 // process. Let's store dummy signatures for now, and replace them with the
@@ -1223,21 +1246,24 @@ void ASTWriter::writeUnhashedControlBlock(Preprocessor 
,
 auto Dummy = ASTFileSignature::createDummy();
 SmallString<128> Blob{Dummy.begin(), Dummy.end()};
 
-auto Abbrev = std::make_shared();
-Abbrev->Add(BitCodeAbbrevOp(AST_BLOCK_HASH));
-Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
-unsigned 

[clang] [NFC] [C++20] [Modules] Use new class CXX20ModulesGenerator to genera… (PR #90570)

2024-04-30 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

The test failure looks no related. I'll commit this after formatted.

https://github.com/llvm/llvm-project/pull/90570
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-30 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 closed 
https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NFC] [C++20] [Modules] Use new class CXX20ModulesGenerator to genera… (PR #90570)

2024-04-30 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 ready_for_review 
https://github.com/llvm/llvm-project/pull/90570
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NFC] [C++20] [Modules] Use new class CXX20ModulesGenerator to genera… (PR #90570)

2024-04-30 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 created 
https://github.com/llvm/llvm-project/pull/90570

…te module file for C++20 modules instead of PCHGenerator

Previously we're re-using PCHGenerator to generate the module file for C++20 
modules. But this is slighty more or less odd. This patch tries to use a new 
class 'CXX20ModulesGenerator' to generate the module file for C++20 modules.

>From 7a8214efbfc1cc5e16c22bd7e3a21061d5a9555c Mon Sep 17 00:00:00 2001
From: Chuanqi Xu 
Date: Tue, 30 Apr 2024 13:28:52 +0800
Subject: [PATCH] [NFC] [C++20] [Modules] Use new class CXX20ModulesGenerator
 to generate module file for C++20 modules instead of PCHGenerator

Previously we're re-using PCHGenerator to generate the module file for
C++20 modules. But this is slighty more or less odd. This patch tries
to use a new class 'CXX20ModulesGenerator' to generate the module file
for C++20 modules.
---
 clang/include/clang/Serialization/ASTWriter.h | 23 ++---
 clang/lib/Frontend/FrontendActions.cpp| 11 +++-
 clang/lib/Serialization/GeneratePCH.cpp   | 25 +++
 clang/test/Modules/pr67893.cppm   |  2 +-
 clang/test/Modules/search-partitions.cpp  |  8 +++---
 5 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTWriter.h 
b/clang/include/clang/Serialization/ASTWriter.h
index 6c45b7348b8552..259208b7a91aec 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -885,6 +885,8 @@ class ASTWriter : public ASTDeserializationListener,
 /// AST and semantic-analysis consumer that generates a
 /// precompiled header from the parsed source code.
 class PCHGenerator : public SemaConsumer {
+  void anchor() override;
+
   Preprocessor 
   std::string OutputFile;
   std::string isysroot;
@@ -928,17 +930,32 @@ class PCHGenerator : public SemaConsumer {
   bool hasEmittedPCH() const { return Buffer->IsComplete; }
 };
 
-class ReducedBMIGenerator : public PCHGenerator {
+class CXX20ModulesGenerator : public PCHGenerator {
+  void anchor() override;
 protected:
   virtual Module *getEmittingModule(ASTContext ) override;
 
+  CXX20ModulesGenerator(Preprocessor , InMemoryModuleCache ,
+StringRef OutputFile, bool GeneratingReducedBMI);
+
 public:
-  ReducedBMIGenerator(Preprocessor , InMemoryModuleCache ,
-  StringRef OutputFile);
+  CXX20ModulesGenerator(Preprocessor , InMemoryModuleCache ,
+StringRef OutputFile)
+  : CXX20ModulesGenerator(PP, ModuleCache, OutputFile,
+  /*GeneratingReducedBMI=*/false) {}
 
   void HandleTranslationUnit(ASTContext ) override;
 };
 
+class ReducedBMIGenerator : public CXX20ModulesGenerator {
+  void anchor() override;
+public:
+  ReducedBMIGenerator(Preprocessor , InMemoryModuleCache ,
+  StringRef OutputFile)
+  : CXX20ModulesGenerator(PP, ModuleCache, OutputFile,
+  /*GeneratingReducedBMI=*/true) {}
+};
+
 /// If we can elide the definition of \param D in reduced BMI.
 ///
 /// Generally, we can elide the definition of a declaration if it won't affect
diff --git a/clang/lib/Frontend/FrontendActions.cpp 
b/clang/lib/Frontend/FrontendActions.cpp
index 480dfa8c975933..454653a31534cd 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -272,13 +272,10 @@ bool GenerateModuleInterfaceAction::BeginSourceFileAction(
 std::unique_ptr
 GenerateModuleInterfaceAction::CreateASTConsumer(CompilerInstance ,
  StringRef InFile) {
-  CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
-  CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
-
-  std::vector> Consumers =
-  CreateMultiplexConsumer(CI, InFile);
-  if (Consumers.empty())
-return nullptr;
+  std::vector> Consumers;
+  Consumers.push_back(std::make_unique(
+  CI.getPreprocessor(), CI.getModuleCache(),
+  CI.getFrontendOpts().OutputFile));
 
   if (CI.getFrontendOpts().GenReducedBMI &&
   !CI.getFrontendOpts().ModuleOutputPath.empty()) {
diff --git a/clang/lib/Serialization/GeneratePCH.cpp 
b/clang/lib/Serialization/GeneratePCH.cpp
index a2ddbe4624aae4..cc06106a47708e 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -88,31 +88,30 @@ ASTDeserializationListener 
*PCHGenerator::GetASTDeserializationListener() {
   return 
 }
 
-ReducedBMIGenerator::ReducedBMIGenerator(Preprocessor ,
- InMemoryModuleCache ,
- StringRef OutputFile)
+void PCHGenerator::anchor() {}
+
+CXX20ModulesGenerator::CXX20ModulesGenerator(Preprocessor ,
+ InMemoryModuleCache ,
+ StringRef OutputFile,
+ 

[clang] ec527b2 - [C++20] [Modules] Don't skip pragma diagnostic mappings

2024-04-30 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-30T14:43:57+08:00
New Revision: ec527b21bb4196355184aa95ef31aa561b8e8b7b

URL: 
https://github.com/llvm/llvm-project/commit/ec527b21bb4196355184aa95ef31aa561b8e8b7b
DIFF: 
https://github.com/llvm/llvm-project/commit/ec527b21bb4196355184aa95ef31aa561b8e8b7b.diff

LOG: [C++20] [Modules] Don't skip pragma diagnostic mappings

Close https://github.com/llvm/llvm-project/issues/75057

Previously, I thought the diagnostic mappings is not meaningful with
modules incorrectly. And this problem get revealed by another change
recently. So this patch tried to rever the previous "optimization"
partially.

Added: 
clang/test/Modules/pr75057.cppm

Modified: 
clang/lib/Frontend/FrontendActions.cpp
clang/lib/Serialization/GeneratePCH.cpp

Removed: 




diff  --git a/clang/lib/Frontend/FrontendActions.cpp 
b/clang/lib/Frontend/FrontendActions.cpp
index 04eb1041326713..480dfa8c975933 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -274,7 +274,6 @@ 
GenerateModuleInterfaceAction::CreateASTConsumer(CompilerInstance ,
  StringRef InFile) {
   CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
   CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
-  CI.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true;
 
   std::vector> Consumers =
   CreateMultiplexConsumer(CI, InFile);

diff  --git a/clang/lib/Serialization/GeneratePCH.cpp 
b/clang/lib/Serialization/GeneratePCH.cpp
index bed74399098d7f..a2ddbe4624aae4 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -117,7 +117,6 @@ void ReducedBMIGenerator::HandleTranslationUnit(ASTContext 
) {
   getPreprocessor().getHeaderSearchInfo().getHeaderSearchOpts();
   HSOpts.ModulesSkipDiagnosticOptions = true;
   HSOpts.ModulesSkipHeaderSearchPaths = true;
-  HSOpts.ModulesSkipPragmaDiagnosticMappings = true;
 
   PCHGenerator::HandleTranslationUnit(Ctx);
 

diff  --git a/clang/test/Modules/pr75057.cppm b/clang/test/Modules/pr75057.cppm
new file mode 100644
index 00..96781b3ccacc0b
--- /dev/null
+++ b/clang/test/Modules/pr75057.cppm
@@ -0,0 +1,66 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// Treat the behavior of using headers as baseline.
+// RUN: %clang_cc1 -std=c++20 %t/use-header.cc -isystem %t -fsyntax-only 
-verify
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -isystem %t -emit-module-interface -o 
%t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/use-module.cc -isystem %t 
-fmodule-file=a=%t/a.pcm -fsyntax-only -verify
+
+// Test again with reduced BMI.
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -isystem %t 
-emit-reduced-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/use-module.cc -isystem %t 
-fmodule-file=a=%t/a.pcm -fsyntax-only -verify
+
+//--- sys.h
+#ifndef SYS_H
+#define SYS_H
+
+#pragma GCC system_header
+
+template 
+struct [[deprecated]] iterator {};
+
+_Pragma("GCC diagnostic push")
+_Pragma("GCC diagnostic ignored \"-Wdeprecated\"") 
+_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+
+template 
+struct reverse_iterator 
+: public iterator {};
+
+_Pragma("GCC diagnostic pop")
+
+template 
+class C {
+public:
+void i() {
+reverse_iterator i;
+}
+};
+
+#endif
+
+//--- use-header.cc
+// expected-no-diagnostics
+// However, we see unexpected warnings
+#include 
+
+void use() {
+C().i();
+}
+
+//--- a.cppm
+module;
+#include 
+export module a;
+export using ::iterator;
+export using ::C;
+
+//--- use-module.cc
+// expected-no-diagnostics
+import a;
+
+void use() {
+C().i();
+}



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 6b961e2 - Revert "[C++20] [Modules] Don't skip pragma diagnostic mappings"

2024-04-30 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-30T14:28:11+08:00
New Revision: 6b961e2abfffd8b5a508b5958849b13b0feafa50

URL: 
https://github.com/llvm/llvm-project/commit/6b961e2abfffd8b5a508b5958849b13b0feafa50
DIFF: 
https://github.com/llvm/llvm-project/commit/6b961e2abfffd8b5a508b5958849b13b0feafa50.diff

LOG: Revert "[C++20] [Modules] Don't skip pragma diagnostic mappings"
and "[NFC] [C++20] [Modules] Use new class CXX20ModulesGenerator to
generate module file for C++20 modules instead of PCHGenerator"

This reverts commit fb21343473e33e9a886b42d2fe95d1cec1cd0030.
and commit 18268ac0f48d93c2bcddb69732761971669c09ab.

It looks like there are some problems about linking the compiler

Added: 


Modified: 
clang/include/clang/Serialization/ASTWriter.h
clang/lib/Frontend/FrontendActions.cpp
clang/lib/Serialization/GeneratePCH.cpp
clang/test/Modules/pr67893.cppm
clang/test/Modules/search-partitions.cpp

Removed: 
clang/test/Modules/pr75057.cppm



diff  --git a/clang/include/clang/Serialization/ASTWriter.h 
b/clang/include/clang/Serialization/ASTWriter.h
index 4e433deaaf2dbc..6c45b7348b8552 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -928,30 +928,17 @@ class PCHGenerator : public SemaConsumer {
   bool hasEmittedPCH() const { return Buffer->IsComplete; }
 };
 
-class CXX20ModulesGenerator : public PCHGenerator {
+class ReducedBMIGenerator : public PCHGenerator {
 protected:
   virtual Module *getEmittingModule(ASTContext ) override;
 
-  CXX20ModulesGenerator(Preprocessor , InMemoryModuleCache ,
-StringRef OutputFile, bool GeneratingReducedBMI);
-
 public:
-  CXX20ModulesGenerator(Preprocessor , InMemoryModuleCache ,
-StringRef OutputFile)
-  : CXX20ModulesGenerator(PP, ModuleCache, OutputFile,
-  /*GeneratingReducedBMI=*/false) {}
+  ReducedBMIGenerator(Preprocessor , InMemoryModuleCache ,
+  StringRef OutputFile);
 
   void HandleTranslationUnit(ASTContext ) override;
 };
 
-class ReducedBMIGenerator : public CXX20ModulesGenerator {
-public:
-  ReducedBMIGenerator(Preprocessor , InMemoryModuleCache ,
-  StringRef OutputFile)
-  : CXX20ModulesGenerator(PP, ModuleCache, OutputFile,
-  /*GeneratingReducedBMI=*/true) {}
-};
-
 /// If we can elide the definition of \param D in reduced BMI.
 ///
 /// Generally, we can elide the definition of a declaration if it won't affect

diff  --git a/clang/lib/Frontend/FrontendActions.cpp 
b/clang/lib/Frontend/FrontendActions.cpp
index 454653a31534cd..04eb1041326713 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -272,10 +272,14 @@ bool GenerateModuleInterfaceAction::BeginSourceFileAction(
 std::unique_ptr
 GenerateModuleInterfaceAction::CreateASTConsumer(CompilerInstance ,
  StringRef InFile) {
-  std::vector> Consumers;
-  Consumers.push_back(std::make_unique(
-  CI.getPreprocessor(), CI.getModuleCache(),
-  CI.getFrontendOpts().OutputFile));
+  CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
+  CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
+  CI.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true;
+
+  std::vector> Consumers =
+  CreateMultiplexConsumer(CI, InFile);
+  if (Consumers.empty())
+return nullptr;
 
   if (CI.getFrontendOpts().GenReducedBMI &&
   !CI.getFrontendOpts().ModuleOutputPath.empty()) {

diff  --git a/clang/lib/Serialization/GeneratePCH.cpp 
b/clang/lib/Serialization/GeneratePCH.cpp
index 53dda5f9a38580..bed74399098d7f 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -88,32 +88,36 @@ ASTDeserializationListener 
*PCHGenerator::GetASTDeserializationListener() {
   return 
 }
 
-CXX20ModulesGenerator::CXX20ModulesGenerator(Preprocessor ,
- InMemoryModuleCache ,
- StringRef OutputFile,
- bool GeneratingReducedBMI)
+ReducedBMIGenerator::ReducedBMIGenerator(Preprocessor ,
+ InMemoryModuleCache ,
+ StringRef OutputFile)
 : PCHGenerator(
   PP, ModuleCache, OutputFile, llvm::StringRef(),
   std::make_shared(),
   /*Extensions=*/ArrayRef>(),
   /*AllowASTWithErrors*/ false, /*IncludeTimestamps=*/false,
   /*BuildingImplicitModule=*/false, /*ShouldCacheASTInMemory=*/false,
-  GeneratingReducedBMI) {}
+  /*GeneratingReducedBMI=*/true) {}
 
-Module *CXX20ModulesGenerator::getEmittingModule(ASTContext ) {
+Module *ReducedBMIGenerator::getEmittingModule(ASTContext ) {
   

[clang] fb21343 - [C++20] [Modules] Don't skip pragma diagnostic mappings

2024-04-29 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-30T13:50:20+08:00
New Revision: fb21343473e33e9a886b42d2fe95d1cec1cd0030

URL: 
https://github.com/llvm/llvm-project/commit/fb21343473e33e9a886b42d2fe95d1cec1cd0030
DIFF: 
https://github.com/llvm/llvm-project/commit/fb21343473e33e9a886b42d2fe95d1cec1cd0030.diff

LOG: [C++20] [Modules] Don't skip pragma diagnostic mappings

Close https://github.com/llvm/llvm-project/issues/75057

Previously, I thought the diagnostic mappings is not meaningful with
modules incorrectly. And this problem get revealed by another change
recently. So this patch tried to rever the previous "optimization"
partially.

Added: 
clang/test/Modules/pr75057.cppm

Modified: 
clang/lib/Serialization/GeneratePCH.cpp

Removed: 




diff  --git a/clang/lib/Serialization/GeneratePCH.cpp 
b/clang/lib/Serialization/GeneratePCH.cpp
index 7b97b73f7bbd00..53dda5f9a38580 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -114,7 +114,6 @@ void 
CXX20ModulesGenerator::HandleTranslationUnit(ASTContext ) {
   getPreprocessor().getHeaderSearchInfo().getHeaderSearchOpts();
   HSOpts.ModulesSkipDiagnosticOptions = true;
   HSOpts.ModulesSkipHeaderSearchPaths = true;
-  HSOpts.ModulesSkipPragmaDiagnosticMappings = true;
 
   PCHGenerator::HandleTranslationUnit(Ctx);
 

diff  --git a/clang/test/Modules/pr75057.cppm b/clang/test/Modules/pr75057.cppm
new file mode 100644
index 00..374c324e9f495b
--- /dev/null
+++ b/clang/test/Modules/pr75057.cppm
@@ -0,0 +1,62 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// Treat the behavior of using headers as baseline.
+// RUN: %clang_cc1 -std=c++20 %t/use-header.cc -isystem %t -fsyntax-only 
-verify
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -isystem %t -emit-module-interface -o 
%t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/use-module.cc -isystem %t 
-fmodule-file=a=%t/a.pcm -fsyntax-only -verify
+
+//--- sys.h
+#ifndef SYS_H
+#define SYS_H
+
+#pragma GCC system_header
+
+template 
+struct [[deprecated]] iterator {};
+
+_Pragma("GCC diagnostic push")
+_Pragma("GCC diagnostic ignored \"-Wdeprecated\"") 
+_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+
+template 
+struct reverse_iterator 
+: public iterator {};
+
+_Pragma("GCC diagnostic pop")
+
+template 
+class C {
+public:
+void i() {
+reverse_iterator i;
+}
+};
+
+#endif
+
+//--- use-header.cc
+// expected-no-diagnostics
+// However, we see unexpected warnings
+#include 
+
+void use() {
+C().i();
+}
+
+//--- a.cppm
+module;
+#include 
+export module a;
+export using ::iterator;
+export using ::C;
+
+//--- use-module.cc
+// expected-no-diagnostics
+import a;
+
+void use() {
+C().i();
+}



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 18268ac - [NFC] [C++20] [Modules] Use new class CXX20ModulesGenerator to generate module file for C++20 modules instead of PCHGenerator

2024-04-29 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-30T13:30:31+08:00
New Revision: 18268ac0f48d93c2bcddb69732761971669c09ab

URL: 
https://github.com/llvm/llvm-project/commit/18268ac0f48d93c2bcddb69732761971669c09ab
DIFF: 
https://github.com/llvm/llvm-project/commit/18268ac0f48d93c2bcddb69732761971669c09ab.diff

LOG: [NFC] [C++20] [Modules] Use new class CXX20ModulesGenerator to generate 
module file for C++20 modules instead of PCHGenerator

Previously we're re-using PCHGenerator to generate the module file for
C++20 modules. But this is slighty more or less odd. This patch tries
to use a new class 'CXX20ModulesGenerator' to generate the module file
for C++20 modules.

Added: 


Modified: 
clang/include/clang/Serialization/ASTWriter.h
clang/lib/Frontend/FrontendActions.cpp
clang/lib/Serialization/GeneratePCH.cpp
clang/test/Modules/pr67893.cppm
clang/test/Modules/search-partitions.cpp

Removed: 




diff  --git a/clang/include/clang/Serialization/ASTWriter.h 
b/clang/include/clang/Serialization/ASTWriter.h
index 6c45b7348b8552..4e433deaaf2dbc 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -928,17 +928,30 @@ class PCHGenerator : public SemaConsumer {
   bool hasEmittedPCH() const { return Buffer->IsComplete; }
 };
 
-class ReducedBMIGenerator : public PCHGenerator {
+class CXX20ModulesGenerator : public PCHGenerator {
 protected:
   virtual Module *getEmittingModule(ASTContext ) override;
 
+  CXX20ModulesGenerator(Preprocessor , InMemoryModuleCache ,
+StringRef OutputFile, bool GeneratingReducedBMI);
+
 public:
-  ReducedBMIGenerator(Preprocessor , InMemoryModuleCache ,
-  StringRef OutputFile);
+  CXX20ModulesGenerator(Preprocessor , InMemoryModuleCache ,
+StringRef OutputFile)
+  : CXX20ModulesGenerator(PP, ModuleCache, OutputFile,
+  /*GeneratingReducedBMI=*/false) {}
 
   void HandleTranslationUnit(ASTContext ) override;
 };
 
+class ReducedBMIGenerator : public CXX20ModulesGenerator {
+public:
+  ReducedBMIGenerator(Preprocessor , InMemoryModuleCache ,
+  StringRef OutputFile)
+  : CXX20ModulesGenerator(PP, ModuleCache, OutputFile,
+  /*GeneratingReducedBMI=*/true) {}
+};
+
 /// If we can elide the definition of \param D in reduced BMI.
 ///
 /// Generally, we can elide the definition of a declaration if it won't affect

diff  --git a/clang/lib/Frontend/FrontendActions.cpp 
b/clang/lib/Frontend/FrontendActions.cpp
index 04eb1041326713..454653a31534cd 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -272,14 +272,10 @@ bool GenerateModuleInterfaceAction::BeginSourceFileAction(
 std::unique_ptr
 GenerateModuleInterfaceAction::CreateASTConsumer(CompilerInstance ,
  StringRef InFile) {
-  CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
-  CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
-  CI.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true;
-
-  std::vector> Consumers =
-  CreateMultiplexConsumer(CI, InFile);
-  if (Consumers.empty())
-return nullptr;
+  std::vector> Consumers;
+  Consumers.push_back(std::make_unique(
+  CI.getPreprocessor(), CI.getModuleCache(),
+  CI.getFrontendOpts().OutputFile));
 
   if (CI.getFrontendOpts().GenReducedBMI &&
   !CI.getFrontendOpts().ModuleOutputPath.empty()) {

diff  --git a/clang/lib/Serialization/GeneratePCH.cpp 
b/clang/lib/Serialization/GeneratePCH.cpp
index bed74399098d7f..7b97b73f7bbd00 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -88,31 +88,28 @@ ASTDeserializationListener 
*PCHGenerator::GetASTDeserializationListener() {
   return 
 }
 
-ReducedBMIGenerator::ReducedBMIGenerator(Preprocessor ,
- InMemoryModuleCache ,
- StringRef OutputFile)
+CXX20ModulesGenerator::CXX20ModulesGenerator(Preprocessor ,
+ InMemoryModuleCache ,
+ StringRef OutputFile,
+ bool GeneratingReducedBMI)
 : PCHGenerator(
   PP, ModuleCache, OutputFile, llvm::StringRef(),
   std::make_shared(),
   /*Extensions=*/ArrayRef>(),
   /*AllowASTWithErrors*/ false, /*IncludeTimestamps=*/false,
   /*BuildingImplicitModule=*/false, /*ShouldCacheASTInMemory=*/false,
-  /*GeneratingReducedBMI=*/true) {}
+  GeneratingReducedBMI) {}
 
-Module *ReducedBMIGenerator::getEmittingModule(ASTContext ) {
+Module *CXX20ModulesGenerator::getEmittingModule(ASTContext ) {
   Module *M = Ctx.getCurrentNamedModule();
   assert(M 

[clang] 38067c5 - [C++20] [Modules] [Reduced BMI] Avoid force writing static declarations

2024-04-29 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-30T11:34:34+08:00
New Revision: 38067c50a9459caed2892e38b2ae5026a8bff8e2

URL: 
https://github.com/llvm/llvm-project/commit/38067c50a9459caed2892e38b2ae5026a8bff8e2
DIFF: 
https://github.com/llvm/llvm-project/commit/38067c50a9459caed2892e38b2ae5026a8bff8e2.diff

LOG: [C++20] [Modules] [Reduced BMI] Avoid force writing static declarations
within module purview

Close https://github.com/llvm/llvm-project/issues/90259

Technically, the static declarations shouldn't be leaked from the module
interface, otherwise it is an illegal program according to the spec. So
we can get rid of the static declarations from the reduced BMI
technically. Then we can close the above issue.

However, there are too many `static inline` codes in existing headers.
So it will be a pretty big breaking change if we do this globally.

Added: 
clang/test/Modules/pr90259.cppm

Modified: 
clang/lib/Serialization/ASTWriter.cpp

Removed: 




diff  --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index 0408eeb6a95b00..7db60c67d71234 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -3205,6 +3205,17 @@ void ASTWriter::WriteType(QualType T) {
 // Declaration Serialization
 
//===--===//
 
+static bool IsInternalDeclFromFileContext(const Decl *D) {
+  auto *ND = dyn_cast(D);
+  if (!ND)
+return false;
+
+  if (!D->getDeclContext()->getRedeclContext()->isFileContext())
+return false;
+
+  return ND->getFormalLinkage() == Linkage::Internal;
+}
+
 /// Write the block containing all of the declaration IDs
 /// lexically declared within the given DeclContext.
 ///
@@ -3225,6 +3236,15 @@ uint64_t 
ASTWriter::WriteDeclContextLexicalBlock(ASTContext ,
 if (DoneWritingDeclsAndTypes && !wasDeclEmitted(D))
   continue;
 
+// We don't need to write decls with internal linkage into reduced BMI.
+// If such decls gets emitted due to it get used from inline functions,
+// the program illegal. However, there are too many use of static inline
+// functions in the global module fragment and it will be breaking change
+// to forbid that. So we have to allow to emit such declarations from GMF.
+if (GeneratingReducedBMI && !D->isFromExplicitGlobalModule() &&
+IsInternalDeclFromFileContext(D))
+  continue;
+
 KindDeclPairs.push_back(D->getKind());
 KindDeclPairs.push_back(GetDeclRef(D).get());
   }
@@ -3886,6 +3906,13 @@ class ASTDeclContextNameLookupTrait {
   !Writer.wasDeclEmitted(DeclForLocalLookup))
 continue;
 
+  // Try to avoid writing internal decls to reduced BMI.
+  // See comments in ASTWriter::WriteDeclContextLexicalBlock for details.
+  if (Writer.isGeneratingReducedBMI() &&
+  !DeclForLocalLookup->isFromExplicitGlobalModule() &&
+  IsInternalDeclFromFileContext(DeclForLocalLookup))
+continue;
+
   DeclIDs.push_back(Writer.GetDeclRef(DeclForLocalLookup));
 }
 return std::make_pair(Start, DeclIDs.size());
@@ -4257,6 +4284,12 @@ uint64_t 
ASTWriter::WriteDeclContextVisibleBlock(ASTContext ,
 if (DoneWritingDeclsAndTypes && !wasDeclEmitted(ND))
   continue;
 
+// We don't need to force emitting internal decls into reduced BMI.
+// See comments in ASTWriter::WriteDeclContextLexicalBlock for details.
+if (GeneratingReducedBMI && !ND->isFromExplicitGlobalModule() &&
+IsInternalDeclFromFileContext(ND))
+  continue;
+
 GetDeclRef(ND);
   }
 }
@@ -4917,8 +4950,7 @@ void ASTWriter::PrepareWritingSpecialDecls(Sema ) 
{
   // is ill-formed. However, in practice, there are a lot of projects
   // uses `static inline` in the headers. So we can't get rid of all
   // static entities in reduced BMI now.
-  if (auto *ND = dyn_cast(D);
-  ND && ND->getFormalLinkage() == Linkage::Internal)
+  if (IsInternalDeclFromFileContext(D))
 continue;
 }
 

diff  --git a/clang/test/Modules/pr90259.cppm b/clang/test/Modules/pr90259.cppm
new file mode 100644
index 00..17786998a2a729
--- /dev/null
+++ b/clang/test/Modules/pr90259.cppm
@@ -0,0 +1,44 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/mod1.cppm -emit-reduced-module-interface -o 
%t/mod-mod1.pcm
+// RUN: %clang_cc1 -std=c++20 %t/mod.cppm -fprebuilt-module-path=%t  \
+// RUN: -emit-reduced-module-interface -o %t/mod.pcm
+// RUN: %clang_cc1 -std=c++20 %t/use.cpp -fprebuilt-module-path=%t -verify 
-fsyntax-only
+
+//--- mod1.cppm
+export module mod:mod1;
+namespace {
+int abc = 43;
+}
+namespace mod {
+static int def = 44;
+}
+export int f() {
+return abc + mod::def;
+}
+
+//--- mod.cppm
+// expected-no-diagnostics
+export module 

[clang] [Coroutines][Test] Specify target triple in coro-elide-thinlto (PR #90549)

2024-04-29 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 approved this pull request.


https://github.com/llvm/llvm-project/pull/90549
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [coro] Lower `llvm.coro.await.suspend.handle` to resume with tail call (PR #89751)

2024-04-29 Thread Chuanqi Xu via cfe-commits


@@ -1056,6 +1083,25 @@ void CoroCloner::create() {
   // Set up the new entry block.
   replaceEntryBlock();
 
+  // Turn symmetric transfers into musttail calls.
+  for (CallInst *ResumeCall : Shape.SymmetricTransfers) {
+ResumeCall = cast(VMap[ResumeCall]);
+ResumeCall->setCallingConv(NewF->getCallingConv());
+if (TTI.supportsTailCallFor(ResumeCall)) {
+  // FIXME: Could we support symmetric transfer effectively without
+  // musttail?
+  ResumeCall->setTailCallKind(CallInst::TCK_MustTail);
+}
+
+// Put a 'ret void' after the call, and split any remaining instructions to

ChuanqiXu9 wrote:

> Also, maybe this would become moot if we address 
> https://discourse.llvm.org/t/coro-pre-split-handling-of-the-suspend-edge/75043
>  like @jyknight suggested (i.e. not even have the misleading edge)?

But IIRC, it is still possible that we'll have code inserted between 
`llvm.coro.await.suspend.{.*}` and `llvm.coro.suspend`, which is the problem 
we're discussing.

> Sorry for insisting on this, it's maybe because I got "bitten" before (with 
> the suspend), but what other examples do we have where, silently, 
> instructions don't get executed after a call?

If I read correctly, @zmodem said he'd like to mention this in the doc or check 
it by assertions or verifiers. So it looks consensus to me?

https://github.com/llvm/llvm-project/pull/89751
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 367efa0 - [NFC] [Modules] Avoid scanning the stored decl list twice when replace

2024-04-28 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-28T18:31:10+08:00
New Revision: 367efa0b0542e6f4171e8c914728946c302ab24b

URL: 
https://github.com/llvm/llvm-project/commit/367efa0b0542e6f4171e8c914728946c302ab24b
DIFF: 
https://github.com/llvm/llvm-project/commit/367efa0b0542e6f4171e8c914728946c302ab24b.diff

LOG: [NFC] [Modules] Avoid scanning the stored decl list twice when replace
external decls

This patch fixes a FIXME in StoredDeclList::replaceExternalDecls.

StoredDeclList::replaceExternalDecls will iterate the list first to
remove some declarations and iterate the list again to get the tail of
the list.

It should be better to avoid the second iterations.

Added: 


Modified: 
clang/include/clang/AST/DeclContextInternals.h

Removed: 




diff  --git a/clang/include/clang/AST/DeclContextInternals.h 
b/clang/include/clang/AST/DeclContextInternals.h
index 42cc677f82135e..e169c485921929 100644
--- a/clang/include/clang/AST/DeclContextInternals.h
+++ b/clang/include/clang/AST/DeclContextInternals.h
@@ -42,11 +42,12 @@ class StoredDeclsList {
   /// external declarations.
   DeclsAndHasExternalTy Data;
 
-  template
-  void erase_if(Fn ShouldErase) {
+  template  DeclListNode::Decls *erase_if(Fn ShouldErase) {
 Decls List = Data.getPointer();
+
 if (!List)
-  return;
+  return nullptr;
+
 ASTContext  = getASTContext();
 DeclListNode::Decls NewHead = nullptr;
 DeclListNode::Decls *NewLast = nullptr;
@@ -79,6 +80,17 @@ class StoredDeclsList {
 Data.setPointer(NewHead);
 
 assert(llvm::none_of(getLookupResult(), ShouldErase) && "Still exists!");
+
+if (!Data.getPointer())
+  // All declarations are erased.
+  return nullptr;
+else if (NewHead.is())
+  // The list only contains a declaration, the header itself.
+  return (DeclListNode::Decls *)
+else {
+  assert(NewLast && NewLast->is() && "Not the tail?");
+  return NewLast;
+}
   }
 
   void erase(NamedDecl *ND) {
@@ -161,7 +173,7 @@ class StoredDeclsList {
   void replaceExternalDecls(ArrayRef Decls) {
 // Remove all declarations that are either external or are replaced with
 // external declarations with higher visibilities.
-erase_if([Decls](NamedDecl *ND) {
+DeclListNode::Decls *Tail = erase_if([Decls](NamedDecl *ND) {
   if (ND->isFromASTFile())
 return true;
   // FIXME: Can we get rid of this loop completely?
@@ -189,24 +201,15 @@ class StoredDeclsList {
   DeclsAsList = Node;
 }
 
-DeclListNode::Decls Head = Data.getPointer();
-if (Head.isNull()) {
+if (!Data.getPointer()) {
   Data.setPointer(DeclsAsList);
   return;
 }
 
-// Find the end of the existing list.
-// FIXME: It would be possible to preserve information from erase_if to
-// avoid this rescan looking for the end of the list.
-DeclListNode::Decls *Tail = 
-while (DeclListNode *Node = Tail->dyn_cast())
-  Tail = >Rest;
-
 // Append the Decls.
 DeclListNode *Node = C.AllocateDeclListNode(Tail->get());
 Node->Rest = DeclsAsList;
 *Tail = Node;
-Data.setPointer(Head);
   }
 
   /// Return the list of all the decls.



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 487967a - [Modules] Don't replace local declarations with external declaration with lower visibility

2024-04-28 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-28T15:23:39+08:00
New Revision: 487967af82053cd08022635a2ff768385d936c80

URL: 
https://github.com/llvm/llvm-project/commit/487967af82053cd08022635a2ff768385d936c80
DIFF: 
https://github.com/llvm/llvm-project/commit/487967af82053cd08022635a2ff768385d936c80.diff

LOG: [Modules] Don't replace local declarations with external declaration with 
lower visibility

Close https://github.com/llvm/llvm-project/issues/88400

For the reproducer:

```
//--- header.h

namespace N {
template
concept X = true;

template
class Y {
public:
template
friend class Y;
};

inline Y x;
}

//--- bar.cppm
module;
export module bar;
namespace N {
// To make sure N::Y won't get elided.
using N::x;
}

//--- foo.cc
// expected-no-diagnostics
import bar;
void y() {
N::Y y{};
};
```

it will crash. The root cause is that in
`StoredDeclsList::replaceExternalDecls`, we will replace the
existing declarations with external declarations.

Then for the reproducer, the redecl chain for Y is like:

```
Y (Local) -> Y (Local, friend) -> Y (Imported) -> Y(Imported, friend)
```

Before the lookup, the stored lookup result is `Y(Local)` then we find
`Y(Imported)`. And now we repalce `Y(Local)` with `Y(Imported)`. But
`Y(Imported)` is not visible. So we tried to find if there is any
redeclarations visible but we find `Y(Local, friend)`, then problem
happens.

The solution is try to avoid the replace to happen if the external
declaration has lower visibility then we can always find the local
declarations. This may help the lookup performance slightly.

Also I found the implementation of
`StoredDeclsList::replaceExternalDecls` is not efficiency. It has an
`O(n*m)` complexities. But let's improve that in the future.

Added: 
clang/test/Modules/pr88400.cppm

Modified: 
clang/include/clang/AST/DeclContextInternals.h

Removed: 




diff  --git a/clang/include/clang/AST/DeclContextInternals.h 
b/clang/include/clang/AST/DeclContextInternals.h
index c4734ab5789538..42cc677f82135e 100644
--- a/clang/include/clang/AST/DeclContextInternals.h
+++ b/clang/include/clang/AST/DeclContextInternals.h
@@ -160,12 +160,16 @@ class StoredDeclsList {
 
   void replaceExternalDecls(ArrayRef Decls) {
 // Remove all declarations that are either external or are replaced with
-// external declarations.
+// external declarations with higher visibilities.
 erase_if([Decls](NamedDecl *ND) {
   if (ND->isFromASTFile())
 return true;
+  // FIXME: Can we get rid of this loop completely?
   for (NamedDecl *D : Decls)
-if (D->declarationReplaces(ND, /*IsKnownNewer=*/false))
+// Only replace the local declaration if the external declaration has
+// higher visibilities.
+if (D->getModuleOwnershipKind() <= ND->getModuleOwnershipKind() &&
+D->declarationReplaces(ND, /*IsKnownNewer=*/false))
   return true;
   return false;
 });

diff  --git a/clang/test/Modules/pr88400.cppm b/clang/test/Modules/pr88400.cppm
new file mode 100644
index 00..ff69137a0b9040
--- /dev/null
+++ b/clang/test/Modules/pr88400.cppm
@@ -0,0 +1,61 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/bar.cppm -emit-module-interface -o %t/bar.pcm
+// RUN: %clang_cc1 -std=c++20 %t/foo.cc -fmodule-file=bar=%t/bar.pcm 
-fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/bar.cc -fmodule-file=bar=%t/bar.pcm 
-fsyntax-only -verify
+//
+// RUN: %clang_cc1 -std=c++20 %t/bar.cppm -emit-reduced-module-interface -o 
%t/bar.pcm
+// RUN: %clang_cc1 -std=c++20 %t/foo.cc -fmodule-file=bar=%t/bar.pcm 
-fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/bar.cc -fmodule-file=bar=%t/bar.pcm 
-fsyntax-only -verify
+
+//--- header.h
+#pragma once
+
+namespace N {
+template
+concept X = true;
+
+template
+class Y {
+public:
+template
+friend class Y;
+};
+
+inline Y x;
+}
+
+//--- bar.cppm
+module;
+
+#include "header.h"
+
+export module bar;
+
+namespace N {
+// To make sure N::Y won't get elided.
+using N::x;
+}
+
+//--- foo.cc
+// expected-no-diagnostics
+#include "header.h"
+
+import bar;
+
+void y() {
+N::Y y{};
+};
+
+//--- bar.cc
+// expected-no-diagnostics
+import bar;
+
+#include "header.h"
+
+void y() {
+N::Y y{};
+};
+



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -530,43 +527,43 @@ Now the linkage name of ``NS::foo()`` will be 
``_ZN2NS3fooEv``.
 Module Initializers
 ~~~
 
-All the importable module units are required to emit an initializer function.
-The initializer function should contain calls to importing modules first and
-all the dynamic-initializers in the current module unit then.
-
-Translation units explicitly or implicitly importing named modules must call
-the initializer functions of the imported named modules within the sequence of
-the dynamic-initializers in the TU. Initializations of entities at namespace
-scope are appearance-ordered. This (recursively) extends into imported modules
-at the point of appearance of the import declaration.
+All importable module units are required to emit an initializer function. The
+initializer function emits calls to imported modules first followed by calls
+to all to dynamic initializers in the current module unit.
 
-It is allowed to omit calls to importing modules if it is known empty.
+Translation units that explicitly or implicitly import a named module must call
+the initializer functions of the imported named module within the sequence of
+the dynamic initializers in the translation unit. Initializations of entities
+at namespace scope are appearance-ordered. This (recursively) extends to
+imported modules at the point of appearance of the import declaration.
 
-It is allowed to omit calls to importing modules for which is known to be 
called.
+If the imported module is known to be empty, the call to its initializer may be
+omitted. Additionally, if the imported module is known to have already been
+imported, the call to its initializer may be omitted.
 
 Reduced BMI
 ---
 
-To support the 2 phase compilation model, Clang chose to put everything needed 
to
-produce an object into the BMI. But every consumer of the BMI, except itself, 
doesn't
-need such informations. It makes the BMI to larger and so may introduce 
unnecessary
-dependencies into the BMI. To mitigate the problem, we decided to reduce the 
information
-contained in the BMI.
+To support the two-phase compilation model, Clang puts everything needed to
+produce an object into the BMI. However, other consumers of the BMI generally
+don't need that informations. This makes the BMI larger and may introduce
+unnecessary dependencies for the BMI. To mitigate the problem, Clang added a
+compiler option to reduce the information contained in the BMI. These two
+formats are known as Full BMI and Reduced BMI, respectively.
 
-To be clear, we call the default BMI as Full BMI and the new introduced BMI as 
Reduced
-BMI.
+Users can use the ``-fexperimental-modules-reduced-bmi`` option to produce a
+Reduced BMI.

ChuanqiXu9 wrote:

Do you mean `Reduced BMI`? The name was discussed in 
`https://discourse.llvm.org/t/rfc-c-20-modules-introduce-thin-bmi-and-decls-hash/74755/52`.

https://github.com/llvm/llvm-project/pull/90237
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -8,79 +8,60 @@ Standard C++ Modules
 Introduction
 
 
-The term ``modules`` has a lot of meanings. For the users of Clang, modules may
-refer to ``Objective-C Modules``, ``Clang C++ Modules`` (or ``Clang Header 
Modules``,
-etc.) or ``Standard C++ Modules``. The implementation of all these kinds of 
modules in Clang
-has a lot of shared code, but from the perspective of users, their semantics 
and
-command line interfaces are very different. This document focuses on
-an introduction of how to use standard C++ modules in Clang.
-
-There is already a detailed document about `Clang modules `_, it
-should be helpful to read `Clang modules `_ if you want to know
-more about the general idea of modules. Since standard C++ modules have 
different semantics
-(and work flows) from `Clang modules`, this page describes the background and 
use of
-Clang with standard C++ modules.
-
-Modules exist in two forms in the C++ Language Specification. They can refer to
-either "Named Modules" or to "Header Units". This document covers both forms.
+The term ``modules`` has a lot of meanings. For Clang users, modules may refer
+to ``Objective-C Modules``, `Clang Modules `_ (also called
+``Clang Header Modules``, etc.) or ``C++20 Modules`` (or
+``Standard C++ Modules``). The implementation of all these kinds of modules in
+Clang shares a lot of code, but from the perspective of users, their semantics
+and command line interfaces are very different. This document focuses on an
+introduction of focusing on the use of C++20 modules in Clang. In the remainder
+of this document, the term ``modules`` will refer to Standard C++20 modules and
+the term ``Clang modules`` will refer to the Clang modules extension.
+
+Modules exist in two forms in the C++ Standard. They can refer to either
+"Named Modules" or "Header Units". This document covers both forms.
 
 Standard C++ Named modules
 ==
 
-This document was intended to be a manual first and foremost, however, we 
consider it helpful to
-introduce some language background here for readers who are not familiar with
-the new language feature. This document is not intended to be a language
-tutorial; it will only introduce necessary concepts about the
-structure and building of the project.
+In order to understand compiler behavior, it is helpful to introduce some
+language background here for readers who are not familiar with the C++ feature.
+This document is not a tutorial on C++; it only introduces necessary concepts
+to better understand use of modules for a project.
 
 Background and terminology
 --
 
-Modules
-~~~
-
-In this document, the term ``Modules``/``modules`` refers to standard C++ 
modules
-feature if it is not decorated by ``Clang``.
-
-Clang Modules
-~
-
-In this document, the term ``Clang Modules``/``Clang modules`` refer to Clang
-c++ modules extension. These are also known as ``Clang header modules``,
-``Clang module map modules`` or ``Clang c++ modules``.
-
 Module and module unit
 ~~
 
-A module consists of one or more module units. A module unit is a special
-translation unit. Every module unit must have a module declaration. The syntax
-of the module declaration is:
+A module consists of one or more module units. A module unit is a special kind
+of translation unit. Every module unit must have a module declaration. The

ChuanqiXu9 wrote:

Technically not true: https://eel.is/c++draft/gram.basic#:translation-unit.

More specifically, a valid module unit may be:

```
module;
#include 
export module M;
...
```

Here the first `module;` keywords is not considered as module declaration.

https://github.com/llvm/llvm-project/pull/90237
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -577,15 +574,16 @@ the generated BMI specified by ``-o`` will be full BMI 
and the BMI specified by
-> ...
-> consumer_n.cpp
 
-We don't emit diagnostics if ``-fexperimental-modules-reduced-bmi`` is used 
with a non-module
-unit. This design helps the end users of one phase compilation model to 
perform experiments
-early without asking for the help of build systems. The users of build systems 
which supports
-two phase compilation model still need helps from build systems.
+Clang does not emit diagnostics when ``-fexperimental-modules-reduced-bmi`` is
+used with a non-module unit. This design helps the end users of the one-phase
+compilation model to perform experiments without needing to modify the build

ChuanqiXu9 wrote:

Or try Reduced BMI?

https://github.com/llvm/llvm-project/pull/90237
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -738,22 +736,21 @@ the following style significantly:
   import M;
   ... // use declarations from module M.
 
-The key part of the tip is to reduce the duplications from the text includes.
+Reducing the duplication from textual includes is what improves compile-time
+performance.
 
-Ideas for converting to modules

+Transitioning to modules
+
 
-For new libraries, we encourage them to use modules completely from day one if 
possible.
-This will be pretty helpful to make the whole ecosystems to get ready.
+New code and libraries should use modules from the start if possible. However,

ChuanqiXu9 wrote:

I am not sure if `should` is a too strong term from the non-native speaker 
perspective.

https://github.com/llvm/llvm-project/pull/90237
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 commented:

Big thanks!

I left some comments about correctness or clearness. And all other change looks 
good to me.

https://github.com/llvm/llvm-project/pull/90237
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -8,79 +8,60 @@ Standard C++ Modules
 Introduction
 
 
-The term ``modules`` has a lot of meanings. For the users of Clang, modules may
-refer to ``Objective-C Modules``, ``Clang C++ Modules`` (or ``Clang Header 
Modules``,
-etc.) or ``Standard C++ Modules``. The implementation of all these kinds of 
modules in Clang
-has a lot of shared code, but from the perspective of users, their semantics 
and
-command line interfaces are very different. This document focuses on
-an introduction of how to use standard C++ modules in Clang.
-
-There is already a detailed document about `Clang modules `_, it
-should be helpful to read `Clang modules `_ if you want to know
-more about the general idea of modules. Since standard C++ modules have 
different semantics
-(and work flows) from `Clang modules`, this page describes the background and 
use of
-Clang with standard C++ modules.
-
-Modules exist in two forms in the C++ Language Specification. They can refer to
-either "Named Modules" or to "Header Units". This document covers both forms.
+The term ``modules`` has a lot of meanings. For Clang users, modules may refer
+to ``Objective-C Modules``, `Clang Modules `_ (also called
+``Clang Header Modules``, etc.) or ``C++20 Modules`` (or
+``Standard C++ Modules``). The implementation of all these kinds of modules in
+Clang shares a lot of code, but from the perspective of users, their semantics
+and command line interfaces are very different. This document focuses on an
+introduction of focusing on the use of C++20 modules in Clang. In the remainder
+of this document, the term ``modules`` will refer to Standard C++20 modules and
+the term ``Clang modules`` will refer to the Clang modules extension.
+
+Modules exist in two forms in the C++ Standard. They can refer to either
+"Named Modules" or "Header Units". This document covers both forms.
 
 Standard C++ Named modules
 ==
 
-This document was intended to be a manual first and foremost, however, we 
consider it helpful to
-introduce some language background here for readers who are not familiar with
-the new language feature. This document is not intended to be a language
-tutorial; it will only introduce necessary concepts about the
-structure and building of the project.
+In order to understand compiler behavior, it is helpful to introduce some
+language background here for readers who are not familiar with the C++ feature.
+This document is not a tutorial on C++; it only introduces necessary concepts
+to better understand use of modules for a project.
 
 Background and terminology
 --
 
-Modules
-~~~
-
-In this document, the term ``Modules``/``modules`` refers to standard C++ 
modules
-feature if it is not decorated by ``Clang``.
-
-Clang Modules
-~
-
-In this document, the term ``Clang Modules``/``Clang modules`` refer to Clang
-c++ modules extension. These are also known as ``Clang header modules``,
-``Clang module map modules`` or ``Clang c++ modules``.
-
 Module and module unit
 ~~
 
-A module consists of one or more module units. A module unit is a special
-translation unit. Every module unit must have a module declaration. The syntax
-of the module declaration is:
+A module consists of one or more module units. A module unit is a special kind
+of translation unit. Every module unit must have a module declaration. The
+syntax of the module declaration is:
 
 .. code-block:: c++
 
   [export] module module_name[:partition_name];
 
-Terms enclosed in ``[]`` are optional. The syntax of ``module_name`` and 
``partition_name``
-in regex form corresponds to ``[a-zA-Z_][a-zA-Z_0-9\.]*``. In particular, a 
literal dot ``.``
-in the name has no semantic meaning (e.g. implying a hierarchy).
-
-In this document, module units are classified into:
-
-* Primary module interface unit.
-
-* Module implementation unit.
+Terms enclosed in ``[]`` are optional. ``module_name`` and ``partition_name``
+are typical C++ identifiers, except that they may contain a period (``.``).

ChuanqiXu9 wrote:

Yes

https://github.com/llvm/llvm-project/pull/90237
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -312,75 +300,76 @@ So all of the following name is not valid by default:
 __test
 // and so on ...
 
-If you still want to use the reserved module names for any reason, use
-``-Wno-reserved-module-identifier`` to suppress the warning.
+Using a reserved module name is strongly discouraged, but
+``-Wno-reserved-module-identifier`` can be used to suppress the warning.
 
-How to specify the dependent BMIs
-~
+Specifying dependent BMIs
+~
 
-There are 3 methods to specify the dependent BMIs:
+There are 3 ways to specify a dependent BMI:
 
-* (1) ``-fprebuilt-module-path=``.
-* (2) ``-fmodule-file=`` (Deprecated).
-* (3) ``-fmodule-file==``.
+1. ``-fprebuilt-module-path=``.
+2. ``-fmodule-file=`` (Deprecated).
+3. ``-fmodule-file==``.
 
-The option ``-fprebuilt-module-path`` tells the compiler the path where to 
search for dependent BMIs.
-It may be used multiple times just like ``-I`` for specifying paths for header 
files. The look up rule here is:
+The ``-fprebuilt-module-path`` option specifies the path to search for
+dependent BMIs. Multiple paths may be specified, similar to using ``-I`` to
+specify a search path for header files. When importing a module ``M``, the
+compiler looks for ``M.pcm`` in the directories specified by
+``-fprebuilt-module-path``. Similarly,  When importing a partition module unit
+``M:P``, the compiler looks for ``M-P.pcm`` in the directories specified by
+``-fprebuilt-module-path``.
 
-* (1) When we import module M. The compiler would look up M.pcm in the 
directories specified
-  by ``-fprebuilt-module-path``.
-* (2) When we import partition module unit M:P. The compiler would look up 
M-P.pcm in the
-  directories specified by ``-fprebuilt-module-path``.
-
-The option ``-fmodule-file=`` tells the compiler to load the 
specified BMI directly.
-The option ``-fmodule-file==`` tells the compiler to 
load the specified BMI
-for the module specified by  when necessary. The main 
difference is that
+The ``-fmodule-file=`` option causes the compiler to load the
+specified BMI directly. The ``-fmodule-file==``
+option causes the compiler to load the specified BMI for the module specified
+by  when necessary. The main difference is that
 ``-fmodule-file=`` will load the BMI eagerly, whereas
-``-fmodule-file==`` will only load the BMI lazily, 
which is similar
-with ``-fprebuilt-module-path``. The option ``-fmodule-file=`` 
for named modules is deprecated
-and is planning to be removed in future versions.
+``-fmodule-file==`` will only load the BMI lazily,
+which is similar to ``-fprebuilt-module-path``. The
+``-fmodule-file=`` option for named modules is deprecated and will
+be removed in a future version of Clang.
 
-In case all ``-fprebuilt-module-path=``, 
``-fmodule-file=`` and
-``-fmodule-file==`` exist, the 
``-fmodule-file=`` option
-takes highest precedence and ``-fmodule-file==`` 
will take the second
-highest precedence.
+When these options are specified in the same invocation of the compiler, the
+``-fmodule-file=`` option takes precedence over
+``-fmodule-file==``, which takes precedence over
+``-fprebuilt-module-path=``.
 
-We need to specify all the dependent (directly and indirectly) BMIs.
-See https://github.com/llvm/llvm-project/issues/62707 for detail.
+Note: you must specify all the (directly or indirectly) dependent BMIs
+explicitly. See https://github.com/llvm/llvm-project/issues/62707 for details.
 
-When we compile a ``module implementation unit``, we must specify the BMI of 
the corresponding
-``primary module interface unit``.
-Since the language specification says a module implementation unit implicitly 
imports
-the primary module interface unit.
+When compiling a ``module implementation unit``, the BMI of the corresponding
+``primary module interface unit`` must be specified. This is because a module
+implementation unit implicitly imports the primary module interface unit.
 
   [module.unit]p8
 
   A module-declaration that contains neither an export-keyword nor a 
module-partition implicitly
   imports the primary module interface unit of the module as if by a 
module-import-declaration.
 
-All of the 3 options ``-fprebuilt-module-path=``, 
``-fmodule-file=``
-and ``-fmodule-file==`` may occur multiple times.
-For example, the command line to compile ``M.cppm`` in
-the above example could be rewritten into:
+The ``-fprebuilt-module-path=``, 
``-fmodule-file=``, 
+and ``-fmodule-file==`` options may be specified
+multiple times. For example, the command line to compile ``M.cppm`` in
+the previous example could be rewritten as:
 
 .. code-block:: console
 
   $ clang++ -std=c++20 M.cppm --precompile 
-fmodule-file=M:interface_part=M-interface_part.pcm 
-fmodule-file=M:impl_part=M-impl_part.pcm -o M.pcm
 
 When there are multiple ``-fmodule-file==`` options for the same
-, the last ``-fmodule-file==`` will override the 
previous
-``-fmodule-file==`` options.
+, 

[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -8,109 +8,91 @@ Standard C++ Modules
 Introduction
 
 
-The term ``modules`` has a lot of meanings. For the users of Clang, modules may
-refer to ``Objective-C Modules``, ``Clang C++ Modules`` (or ``Clang Header 
Modules``,
-etc.) or ``Standard C++ Modules``. The implementation of all these kinds of 
modules in Clang
-has a lot of shared code, but from the perspective of users, their semantics 
and
-command line interfaces are very different. This document focuses on
-an introduction of how to use standard C++ modules in Clang.
-
-There is already a detailed document about `Clang modules `_, it
-should be helpful to read `Clang modules `_ if you want to know
-more about the general idea of modules. Since standard C++ modules have 
different semantics
-(and work flows) from `Clang modules`, this page describes the background and 
use of
-Clang with standard C++ modules.
-
-Modules exist in two forms in the C++ Language Specification. They can refer to
-either "Named Modules" or to "Header Units". This document covers both forms.
+The term ``module`` has a lot of meanings. For Clang users, a module may refer
+to an ``Objective-C Module``, `Clang Module `_ (also called a
+``Clang Header Module``) or a ``C++20 Module`` (or a ``Standard C++ Module``).
+The implementation of all these kinds of modules in Clang shares a lot of code,
+but from the perspective of users, their semantics and command line interfaces
+are very different. This document focuses on an introduction to the use of
+C++20 modules in Clang. In the remainder of this document, the term ``module``
+will refer to Standard C++20 modules and the term ``Clang module`` will refer
+to the Clang modules extension.
+
+Modules exist in two forms in the C++ Standard. They can refer to either
+"Named Modules" or "Header Units". This document covers both forms.
 
 Standard C++ Named modules
 ==
 
-This document was intended to be a manual first and foremost, however, we 
consider it helpful to
-introduce some language background here for readers who are not familiar with
-the new language feature. This document is not intended to be a language
-tutorial; it will only introduce necessary concepts about the
-structure and building of the project.
+In order to understand compiler behavior, it is helpful to introduce some
+terms and definitions for readers who are not familiar with the C++ feature.
+This document is not a tutorial on C++; it only introduces necessary concepts
+to better understand use of modules in a project.
 
 Background and terminology
 --
 
-Modules
-~~~
-
-In this document, the term ``Modules``/``modules`` refers to standard C++ 
modules
-feature if it is not decorated by ``Clang``.
-
-Clang Modules
-~
-
-In this document, the term ``Clang Modules``/``Clang modules`` refer to Clang
-c++ modules extension. These are also known as ``Clang header modules``,
-``Clang module map modules`` or ``Clang c++ modules``.
-
 Module and module unit
 ~~
 
-A module consists of one or more module units. A module unit is a special
-translation unit. Every module unit must have a module declaration. The syntax
-of the module declaration is:
+A module consists of one or more module units. A module unit is a special kind
+of translation unit. Every module unit must have a module declaration. The
+syntax of the module declaration is:
 
 .. code-block:: c++
 
   [export] module module_name[:partition_name];
 
-Terms enclosed in ``[]`` are optional. The syntax of ``module_name`` and 
``partition_name``
-in regex form corresponds to ``[a-zA-Z_][a-zA-Z_0-9\.]*``. In particular, a 
literal dot ``.``
-in the name has no semantic meaning (e.g. implying a hierarchy).
+Terms enclosed in ``[]`` are optional. ``module_name`` and ``partition_name``
+are typical C++ identifiers, except that they may contain a period (``.``).
+Note that a ``.`` in the name has no semantic meaning (e.g. implying a
+hierarchy or referring to the file system).
 
-In this document, module units are classified into:
+In this document, module units are classified as:
 
-* Primary module interface unit.
-
-* Module implementation unit.
-
-* Module interface partition unit.
-
-* Internal module partition unit.
+* Primary module interface unit
+* Module implementation unit
+* Module partition interface unit
+* Module partition implementation unit
 
 A primary module interface unit is a module unit whose module declaration is
-``export module module_name;``. The ``module_name`` here denotes the name of 
the
+``export module module_name;`` where ``module_name`` denotes the name of the
 module. A module should have one and only one primary module interface unit.
 
 A module implementation unit is a module unit whose module declaration is
-``module module_name;``. A module could have multiple module implementation
-units with the same declaration.
+``module module_name;``. Multiple module 

[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -925,45 +923,41 @@ In that case, you need to convert your source files (.cpp 
files) to module imple
   // Following off should be unchanged.
   ...
 
-The module implementation unit will import the primary module implicitly.
-We don't include any headers in the module implementation units
-here since we want to avoid duplicated declarations between translation units.
-This is the reason why we add non-exported using declarations from the third
-party libraries in the primary module interface unit.
+The module implementation unit will import the primary module implicitly. Do
+not include any headers in the module implementation units because that avoids
+duplicated declarations between translation units. This is why non-exported
+using declarations are added from third-party libraries in the primary module
+interface unit.
 
-And if you provide your library as ``libyour_library.so``, you probably need to
-provide a modular one ``libyour_library_modules.so`` since you changed the ABI.
+If the library is provided as ``libyour_library.so``, a modular library (e.g.,
+``libyour_library_modules.so``) may also need to be provided for ABI
+compatibility.
 
 What if there are headers only inclued by the source files
 ^^
 
-The above practice may be problematic if there are headers only included by 
the source
-files. If you're using private module fragment, you may solve the issue by 
including them
-in the private module fragment. While it is OK to solve it by including the 
implementation
-headers in the module purview if you're using implementation module units, it 
may be
-suboptimal since the primary module interface units now containing entities 
not belongs
-to the interface.
-
-If you're a perfectionist, maybe you can improve it by introducing internal 
module partition unit.
+The above practice may be problematic if there are headers only included by the
+source files. When using a private module fragment, this issue may be solved by
+including those headers in the private module fragment. While it is OK to solve
+it by including the implementation headers in the module purview when using
+implementation module units, it may be suboptimal because the primary module
+interface units now contain entities that do not belong to the interface.
 
-The internal module partition unit is an importable module unit which is 
internal
-to the module itself. The concept just meets the headers only included by the 
source files.
-
-We don't show code snippet since it may be too verbose or not good or not 
general.
-But it may not be too hard if you can understand the points of the section.
+This can potentially be improved by introducing module partition implementation
+unit. The module partition implementation unit is an importable module unit
+which is internal to the module itself. However, this approach may not always
+be the best way forward.

ChuanqiXu9 wrote:

Maybe I misunderstand the sentence "However, this approach may not always
be the best way forward." But it reads as, it is not good to use `module 
partition implementation unit`. This is not true.

https://github.com/llvm/llvm-project/pull/90237
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -400,24 +389,27 @@ And the compilation process for module units are like:
 mod1.cppm -> clang++ mod1.cppm ... -> mod1.pcm --,--> clang++ 
mod1.pcm ... -> mod1.o -+
 src2.cpp +> clang++ 
src2.cpp ---> src2.o -'
 
-As the diagrams show, we need to compile the BMI from module units to object 
files and link the object files.
-(But we can't do this for the BMI from header units. See the later section for 
the definition of header units)
+As the diagrams show, we need to compile the BMI from module units to object
+files and then link the object files. (However, we can't do this for the BMI
+from header units. See the section on :ref:`header units ` for
+more details.
 
-If we want to create a module library, we can't just ship the BMIs in an 
archive.
-We must compile these BMIs(``*.pcm``) into object files(``*.o``) and add those 
object files to the archive instead.
+BMIs cannot be shipped in an archive to create a module library. Instead, the
+BMIs(``*.pcm``) are compiled into object files(``*.o``) and those object files
+are added to the archive instead.
 
-Consistency Requirement
-~~~
+Consistency Requirements
+
 
-If we envision modules as a cache to speed up compilation, then - as with 
other caching techniques -
-it is important to keep cache consistency.
-So **currently** Clang will do very strict check for consistency.
+If modules are thought of as a kind of cache to speed up compilation, then, as
+with other caching techniques, it is important to keep cache consistency. Clang
+does very strict checking for that.
 
 Options consistency
 ^^^
 
-The language option of module units and their non-module-unit users should be 
consistent.
-The following example is not allowed:
+Language dialect compiler options for module units and their non-module-unit

ChuanqiXu9 wrote:

This is my first time to see the term `Language dialect compiler options`. 
Maybe it is better to explain the language option as compiler options may 
affect the semantics of the program if the term "language option" is not clear?



https://github.com/llvm/llvm-project/pull/90237
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -216,51 +198,56 @@ We explain the options in the following sections.
 How to enable standard C++ modules
 ~~
 
-Currently, standard C++ modules are enabled automatically
-if the language standard is ``-std=c++20`` or newer.
+Standard C++ modules are enabled automatically if the language standard is
+``-std=c++20`` or newer.
 
 How to produce a BMI
 
 
-We can generate a BMI for an importable module unit by either ``--precompile``
-or ``-fmodule-output`` flags.
+To generate a BMI for an importable module unit, use either the 
``--precompile``
+or ``-fmodule-output`` command line option.
 
-The ``--precompile`` option generates the BMI as the output of the compilation 
and the output path
-can be specified using the ``-o`` option.
+The ``--precompile`` option generates the BMI as the output of the compilation
+and the output path can be specified using the ``-o`` option.
 
-The ``-fmodule-output`` option generates the BMI as a by-product of the 
compilation.
-If ``-fmodule-output=`` is specified, the BMI will be emitted the specified 
location. Then if
-``-fmodule-output`` and ``-c`` are specified, the BMI will be emitted in the 
directory of the
-output file with the name of the input file with the new extension ``.pcm``. 
Otherwise, the BMI
-will be emitted in the working directory with the name of the input file with 
the new extension
+The ``-fmodule-output`` option generates the BMI as a by-product of the
+compilation. If ``-fmodule-output=`` is specified, the BMI will be emitted to
+the specified location. If ``-fmodule-output`` and ``-c`` are specified, the
+BMI will be emitted in the directory of the output file with the name of the
+input file with the extension ``.pcm``. Otherwise, the BMI will be emitted in
+the working directory with the name of the input file with the extension

ChuanqiXu9 wrote:

e.g,

```
clang++ a.cpp -c -o result/a.o
```

then `.` is the `working directory` and `./result` is the directory of the 
output file.

https://github.com/llvm/llvm-project/pull/90237
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -8,109 +8,91 @@ Standard C++ Modules
 Introduction
 
 
-The term ``modules`` has a lot of meanings. For the users of Clang, modules may
-refer to ``Objective-C Modules``, ``Clang C++ Modules`` (or ``Clang Header 
Modules``,
-etc.) or ``Standard C++ Modules``. The implementation of all these kinds of 
modules in Clang
-has a lot of shared code, but from the perspective of users, their semantics 
and
-command line interfaces are very different. This document focuses on
-an introduction of how to use standard C++ modules in Clang.
-
-There is already a detailed document about `Clang modules `_, it
-should be helpful to read `Clang modules `_ if you want to know
-more about the general idea of modules. Since standard C++ modules have 
different semantics
-(and work flows) from `Clang modules`, this page describes the background and 
use of
-Clang with standard C++ modules.
-
-Modules exist in two forms in the C++ Language Specification. They can refer to
-either "Named Modules" or to "Header Units". This document covers both forms.
+The term ``module`` has a lot of meanings. For Clang users, a module may refer
+to an ``Objective-C Module``, `Clang Module `_ (also called a
+``Clang Header Module``) or a ``C++20 Module`` (or a ``Standard C++ Module``).
+The implementation of all these kinds of modules in Clang shares a lot of code,
+but from the perspective of users, their semantics and command line interfaces
+are very different. This document focuses on an introduction to the use of
+C++20 modules in Clang. In the remainder of this document, the term ``module``
+will refer to Standard C++20 modules and the term ``Clang module`` will refer
+to the Clang modules extension.
+
+Modules exist in two forms in the C++ Standard. They can refer to either
+"Named Modules" or "Header Units". This document covers both forms.
 
 Standard C++ Named modules
 ==
 
-This document was intended to be a manual first and foremost, however, we 
consider it helpful to
-introduce some language background here for readers who are not familiar with
-the new language feature. This document is not intended to be a language
-tutorial; it will only introduce necessary concepts about the
-structure and building of the project.
+In order to understand compiler behavior, it is helpful to introduce some
+terms and definitions for readers who are not familiar with the C++ feature.
+This document is not a tutorial on C++; it only introduces necessary concepts
+to better understand use of modules in a project.
 
 Background and terminology
 --
 
-Modules
-~~~
-
-In this document, the term ``Modules``/``modules`` refers to standard C++ 
modules
-feature if it is not decorated by ``Clang``.
-
-Clang Modules
-~
-
-In this document, the term ``Clang Modules``/``Clang modules`` refer to Clang
-c++ modules extension. These are also known as ``Clang header modules``,
-``Clang module map modules`` or ``Clang c++ modules``.
-
 Module and module unit
 ~~
 
-A module consists of one or more module units. A module unit is a special
-translation unit. Every module unit must have a module declaration. The syntax
-of the module declaration is:
+A module consists of one or more module units. A module unit is a special kind
+of translation unit. Every module unit must have a module declaration. The
+syntax of the module declaration is:
 
 .. code-block:: c++
 
   [export] module module_name[:partition_name];
 
-Terms enclosed in ``[]`` are optional. The syntax of ``module_name`` and 
``partition_name``
-in regex form corresponds to ``[a-zA-Z_][a-zA-Z_0-9\.]*``. In particular, a 
literal dot ``.``
-in the name has no semantic meaning (e.g. implying a hierarchy).
+Terms enclosed in ``[]`` are optional. ``module_name`` and ``partition_name``
+are typical C++ identifiers, except that they may contain a period (``.``).
+Note that a ``.`` in the name has no semantic meaning (e.g. implying a
+hierarchy or referring to the file system).
 
-In this document, module units are classified into:
+In this document, module units are classified as:
 
-* Primary module interface unit.
-
-* Module implementation unit.
-
-* Module interface partition unit.
-
-* Internal module partition unit.
+* Primary module interface unit
+* Module implementation unit
+* Module partition interface unit
+* Module partition implementation unit
 
 A primary module interface unit is a module unit whose module declaration is
-``export module module_name;``. The ``module_name`` here denotes the name of 
the
+``export module module_name;`` where ``module_name`` denotes the name of the
 module. A module should have one and only one primary module interface unit.
 
 A module implementation unit is a module unit whose module declaration is
-``module module_name;``. A module could have multiple module implementation
-units with the same declaration.
+``module module_name;``. Multiple module 

[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -530,43 +527,43 @@ Now the linkage name of ``NS::foo()`` will be 
``_ZN2NS3fooEv``.
 Module Initializers
 ~~~
 
-All the importable module units are required to emit an initializer function.
-The initializer function should contain calls to importing modules first and
-all the dynamic-initializers in the current module unit then.
-
-Translation units explicitly or implicitly importing named modules must call
-the initializer functions of the imported named modules within the sequence of
-the dynamic-initializers in the TU. Initializations of entities at namespace
-scope are appearance-ordered. This (recursively) extends into imported modules
-at the point of appearance of the import declaration.
+All importable module units are required to emit an initializer function. The
+initializer function emits calls to imported modules first followed by calls
+to all to dynamic initializers in the current module unit.
 
-It is allowed to omit calls to importing modules if it is known empty.
+Translation units that explicitly or implicitly import a named module must call
+the initializer functions of the imported named module within the sequence of
+the dynamic initializers in the translation unit. Initializations of entities
+at namespace scope are appearance-ordered. This (recursively) extends to
+imported modules at the point of appearance of the import declaration.

ChuanqiXu9 wrote:

I feel it might not be related here?

https://github.com/llvm/llvm-project/pull/90237
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -633,36 +630,37 @@ example:
   // module M's interface, so is discarded
   int c = use_h();   // OK
 
-In the above example, the function definition of ``N::g`` is elided from the 
Reduced
-BMI of ``M.cppm``. Then the use of ``use_g`` in ``M-impl.cpp`` fails
-to instantiate. For such issues, users can add references to ``N::g`` in the 
module purview
-of ``M.cppm`` to make sure it is reachable, e.g., ``using N::g;``.
-
-We think the Reduced BMI is the correct direction. But given it is a drastic 
change,
-we'd like to make it experimental first to avoid breaking existing users. The 
roadmap
-of Reduced BMI may be:
-
-1. ``-fexperimental-modules-reduced-bmi`` is opt in for 1~2 releases. The 
period depends
-on testing feedbacks.
-2. We would announce Reduced BMI is not experimental and introduce 
``-fmodules-reduced-bmi``.
-and suggest users to enable this mode. This may takes 1~2 releases too.
-3. Finally we will enable this by default. When that time comes, the term BMI 
will refer to
-the reduced BMI today and the Full BMI will only be meaningful to build 
systems which
-loves to support two phase compilations.
+In the above example, the function definition of ``N::g`` is elided from the
+Reduced BMI of ``M.cppm``. Then the use of ``use_g`` in ``M-impl.cpp``
+fails to instantiate. For such issues, users can add references to ``N::g`` in
+the module purview of ``M.cppm`` to ensure it is reachable, e.g.
+``using N::g;``.
+
+Long-term, Clang is likely to make Reduced BMIs the default rather than Full
+BMIs. Because it would be a drastic change of user interface, it is initially

ChuanqiXu9 wrote:

```suggestion
BMIs. Because it would be a drastic change, it is initially
```

not only the `user interface`, it requires some fundamental changes in the 
serializer, so there might be some bugs in the implementation.

https://github.com/llvm/llvm-project/pull/90237
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -312,75 +300,76 @@ So all of the following name is not valid by default:
 __test
 // and so on ...
 
-If you still want to use the reserved module names for any reason, use
-``-Wno-reserved-module-identifier`` to suppress the warning.
+Using a reserved module name is strongly discouraged, but
+``-Wno-reserved-module-identifier`` can be used to suppress the warning.
 
-How to specify the dependent BMIs
-~
+Specifying dependent BMIs
+~
 
-There are 3 methods to specify the dependent BMIs:
+There are 3 ways to specify a dependent BMI:

ChuanqiXu9 wrote:

If we have the following code:

```
// foo.cc
import a;
...
```

then we need a BMI of module `a` to compile `foo.cc`. Here the BMI of module 
`a` is the dependent BMI for `foo.cc`. I feel this clear. But if we don't think 
so, we can add a definition for that.

https://github.com/llvm/llvm-project/pull/90237
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 edited 
https://github.com/llvm/llvm-project/pull/90237
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revise the modules document for clarity (PR #90237)

2024-04-27 Thread Chuanqi Xu via cfe-commits


@@ -530,43 +527,43 @@ Now the linkage name of ``NS::foo()`` will be 
``_ZN2NS3fooEv``.
 Module Initializers
 ~~~
 
-All the importable module units are required to emit an initializer function.
-The initializer function should contain calls to importing modules first and
-all the dynamic-initializers in the current module unit then.
-
-Translation units explicitly or implicitly importing named modules must call
-the initializer functions of the imported named modules within the sequence of
-the dynamic-initializers in the TU. Initializations of entities at namespace
-scope are appearance-ordered. This (recursively) extends into imported modules
-at the point of appearance of the import declaration.
+All importable module units are required to emit an initializer function. The

ChuanqiXu9 wrote:

They are required to handle the dynamic initializations of non-inline variables 
in the module unit. But the importable module units have to emit the 
initializer even if there is no dynamic initialization. Otherwise, the importer 
may calling a non-exist function.

https://github.com/llvm/llvm-project/pull/90237
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Pipelines] Do not run CoroSplit and CoroCleanup in LTO pre-link pipeline (PR #90310)

2024-04-27 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 approved this pull request.

Feel not bad.

https://github.com/llvm/llvm-project/pull/90310
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] Detect ODR mismatches for enums in non-C++ like in C++. (PR #90298)

2024-04-27 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 approved this pull request.

I have no idea why it was. But the current change looks pretty fine to me.

https://github.com/llvm/llvm-project/pull/90298
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] D41416: [modules] [pch] Do not deserialize all lazy template specializations when looking for one. (PR #83108)

2024-04-25 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

Rebased with main.

https://github.com/llvm/llvm-project/pull/83108
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] D41416: [modules] [pch] Do not deserialize all lazy template specializations when looking for one. (PR #83108)

2024-04-25 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 updated 
https://github.com/llvm/llvm-project/pull/83108

>From be1c83fb885536c3e65657c6549bd20dd29d9649 Mon Sep 17 00:00:00 2001
From: Vassil Vassilev 
Date: Sun, 7 Jan 2018 15:16:11 +0200
Subject: [PATCH 1/3] D41416: [modules] [pch] Do not deserialize all lazy
 template specializations when looking for one.

---
 clang/include/clang/AST/DeclTemplate.h|  36 +++-
 clang/lib/AST/DeclTemplate.cpp| 100 +-
 clang/lib/AST/ODRHash.cpp |  15 
 clang/lib/Serialization/ASTReader.cpp |  25 --
 clang/lib/Serialization/ASTReaderDecl.cpp |  46 ++
 clang/lib/Serialization/ASTWriter.cpp |  21 -
 clang/lib/Serialization/ASTWriterDecl.cpp |  76 +---
 7 files changed, 249 insertions(+), 70 deletions(-)

diff --git a/clang/include/clang/AST/DeclTemplate.h 
b/clang/include/clang/AST/DeclTemplate.h
index 3ee03eebdb8ca4..24578cfdaa7c7c 100644
--- a/clang/include/clang/AST/DeclTemplate.h
+++ b/clang/include/clang/AST/DeclTemplate.h
@@ -256,6 +256,9 @@ class TemplateArgumentList final
   TemplateArgumentList(const TemplateArgumentList &) = delete;
   TemplateArgumentList =(const TemplateArgumentList &) = delete;
 
+  /// Create hash for the given arguments.
+  static unsigned ComputeODRHash(ArrayRef Args);
+
   /// Create a new template argument list that copies the given set of
   /// template arguments.
   static TemplateArgumentList *CreateCopy(ASTContext ,
@@ -730,6 +733,26 @@ class RedeclarableTemplateDecl : public TemplateDecl,
   }
 
   void anchor() override;
+  struct LazySpecializationInfo {
+GlobalDeclID DeclID = GlobalDeclID();
+unsigned ODRHash = ~0U;
+bool IsPartial = false;
+LazySpecializationInfo(GlobalDeclID ID, unsigned Hash = ~0U,
+   bool Partial = false)
+  : DeclID(ID), ODRHash(Hash), IsPartial(Partial) { }
+LazySpecializationInfo() { }
+bool operator<(const LazySpecializationInfo ) const {
+  return DeclID < Other.DeclID;
+}
+bool operator==(const LazySpecializationInfo ) const {
+  assert((DeclID != Other.DeclID || ODRHash == Other.ODRHash) &&
+ "Hashes differ!");
+  assert((DeclID != Other.DeclID || IsPartial == Other.IsPartial) &&
+ "Both must be the same kinds!");
+  return DeclID == Other.DeclID;
+}
+  };
+
 protected:
   template  struct SpecEntryTraits {
 using DeclType = EntryType;
@@ -770,7 +793,12 @@ class RedeclarableTemplateDecl : public TemplateDecl,
 return SpecIterator(isEnd ? Specs.end() : Specs.begin());
   }
 
-  void loadLazySpecializationsImpl() const;
+  void loadLazySpecializationsImpl(bool OnlyPartial = false) const;
+
+  void loadLazySpecializationsImpl(llvm::ArrayRef Args,
+   TemplateParameterList *TPL = nullptr) const;
+
+  Decl *loadLazySpecializationImpl(LazySpecializationInfo ) const;
 
   template 
   typename SpecEntryTraits::DeclType*
@@ -797,7 +825,7 @@ class RedeclarableTemplateDecl : public TemplateDecl,
 ///
 /// The first value in the array is the number of specializations/partial
 /// specializations that follow.
-GlobalDeclID *LazySpecializations = nullptr;
+LazySpecializationInfo *LazySpecializations = nullptr;
 
 /// The set of "injected" template arguments used within this
 /// template.
@@ -2284,7 +2312,7 @@ class ClassTemplateDecl : public RedeclarableTemplateDecl 
{
   friend class TemplateDeclInstantiator;
 
   /// Load any lazily-loaded specializations from the external source.
-  void LoadLazySpecializations() const;
+  void LoadLazySpecializations(bool OnlyPartial = false) const;
 
   /// Get the underlying class declarations of the template.
   CXXRecordDecl *getTemplatedDecl() const {
@@ -3056,7 +3084,7 @@ class VarTemplateDecl : public RedeclarableTemplateDecl {
   friend class ASTDeclWriter;
 
   /// Load any lazily-loaded specializations from the external source.
-  void LoadLazySpecializations() const;
+  void LoadLazySpecializations(bool OnlyPartial = false) const;
 
   /// Get the underlying variable declarations of the template.
   VarDecl *getTemplatedDecl() const {
diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp
index d27a30e0c5fce1..1afd7b4550c917 100644
--- a/clang/lib/AST/DeclTemplate.cpp
+++ b/clang/lib/AST/DeclTemplate.cpp
@@ -20,6 +20,8 @@
 #include "clang/AST/TemplateBase.h"
 #include "clang/AST/TemplateName.h"
 #include "clang/AST/Type.h"
+#include "clang/AST/ODRHash.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/LLVM.h"
@@ -331,17 +333,46 @@ RedeclarableTemplateDecl::CommonBase 
*RedeclarableTemplateDecl::getCommonPtr() c
   return Common;
 }
 
-void RedeclarableTemplateDecl::loadLazySpecializationsImpl() const {
+void RedeclarableTemplateDecl::loadLazySpecializationsImpl(
+ bool 

[clang] [Serialization] Load Specializations Lazily (PR #76774)

2024-04-25 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

Given we're pursuing https://github.com/llvm/llvm-project/pull/83237 series. 
I'll close this one.

https://github.com/llvm/llvm-project/pull/76774
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Serialization] Load Specializations Lazily (PR #76774)

2024-04-25 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 closed 
https://github.com/llvm/llvm-project/pull/76774
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-25 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

> The changes LGTM, don't want to block this on my remaining nits.

Thanks for reviewing this.

> 
> I believe @Bigcheese wanted to test test impact on PCM size on our side 
> before this lands.

I've rebased this with main. I'll wait for the results from @Bigcheese 

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-25 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 updated 
https://github.com/llvm/llvm-project/pull/86912

>From 2c20a6200fb2790b3a891ffc8c43682c113c7e8a Mon Sep 17 00:00:00 2001
From: Chuanqi Xu 
Date: Mon, 18 Mar 2024 08:36:55 +0800
Subject: [PATCH] [Modules] No transitive source location change

---
 clang/include/clang/Basic/SourceLocation.h|  1 +
 .../include/clang/Serialization/ASTBitCodes.h | 56 +---
 clang/include/clang/Serialization/ASTReader.h | 48 ++
 clang/include/clang/Serialization/ASTWriter.h |  4 +
 .../include/clang/Serialization/ModuleFile.h  | 14 ++-
 .../Serialization/SourceLocationEncoding.h| 91 +--
 clang/lib/Frontend/ASTUnit.cpp|  2 -
 clang/lib/Serialization/ASTReader.cpp | 57 
 clang/lib/Serialization/ASTReaderDecl.cpp |  2 +-
 clang/lib/Serialization/ASTWriter.cpp | 41 +++--
 clang/lib/Serialization/ASTWriterDecl.cpp |  8 +-
 clang/lib/Serialization/ModuleFile.cpp|  1 -
 .../no-transitive-source-location-change.cppm | 69 ++
 clang/test/Modules/pr61067.cppm   | 25 -
 .../SourceLocationEncodingTest.cpp| 12 ++-
 15 files changed, 269 insertions(+), 162 deletions(-)
 create mode 100644 clang/test/Modules/no-transitive-source-location-change.cppm

diff --git a/clang/include/clang/Basic/SourceLocation.h 
b/clang/include/clang/Basic/SourceLocation.h
index 00b1e0fa855b7a..7a0f5ba8d1270b 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -90,6 +90,7 @@ class SourceLocation {
   friend class ASTWriter;
   friend class SourceManager;
   friend struct llvm::FoldingSetTrait;
+  friend class SourceLocationEncoding;
 
 public:
   using UIntTy = uint32_t;
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index 186c3b722ced16..94a3d24d47926b 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -23,6 +23,7 @@
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/OperatorKinds.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Serialization/SourceLocationEncoding.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Bitstream/BitCodes.h"
 #include 
@@ -167,45 +168,38 @@ const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1;
 
 /// Source range/offset of a preprocessed entity.
 struct PPEntityOffset {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location of beginning of range.
-  SourceLocation::UIntTy Begin;
+  RawLocEncoding Begin;
 
   /// Raw source location of end of range.
-  SourceLocation::UIntTy End;
+  RawLocEncoding End;
 
   /// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
   uint32_t BitOffset;
 
-  PPEntityOffset(SourceRange R, uint32_t BitOffset)
-  : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()),
-BitOffset(BitOffset) {}
-
-  SourceLocation getBegin() const {
-return SourceLocation::getFromRawEncoding(Begin);
-  }
+  PPEntityOffset(RawLocEncoding Begin, RawLocEncoding End, uint32_t BitOffset)
+  : Begin(Begin), End(End), BitOffset(BitOffset) {}
 
-  SourceLocation getEnd() const {
-return SourceLocation::getFromRawEncoding(End);
-  }
+  RawLocEncoding getBegin() const { return Begin; }
+  RawLocEncoding getEnd() const { return End; }
 };
 
 /// Source range of a skipped preprocessor region
 struct PPSkippedRange {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location of beginning of range.
-  SourceLocation::UIntTy Begin;
+  RawLocEncoding Begin;
   /// Raw source location of end of range.
-  SourceLocation::UIntTy End;
+  RawLocEncoding End;
 
-  PPSkippedRange(SourceRange R)
-  : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()) 
{
-  }
+  PPSkippedRange(RawLocEncoding Begin, RawLocEncoding End)
+  : Begin(Begin), End(End) {}
 
-  SourceLocation getBegin() const {
-return SourceLocation::getFromRawEncoding(Begin);
-  }
-  SourceLocation getEnd() const {
-return SourceLocation::getFromRawEncoding(End);
-  }
+  RawLocEncoding getBegin() const { return Begin; }
+  RawLocEncoding getEnd() const { return End; }
 };
 
 /// Offset in the AST file. Use splitted 64-bit integer into low/high
@@ -231,8 +225,10 @@ struct UnderalignedInt64 {
 
 /// Source location and bit offset of a declaration.
 struct DeclOffset {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location.
-  SourceLocation::UIntTy Loc = 0;
+  RawLocEncoding RawLoc = 0;
 
   /// Offset relative to the start of the DECLTYPES_BLOCK block. Keep
   /// structure alignment 32-bit and avoid padding gap because undefined
@@ -240,17 +236,15 @@ struct DeclOffset {
   UnderalignedInt64 BitOffset;
 
   DeclOffset() = default;
-  DeclOffset(SourceLocation Loc, uint64_t BitOffset,
- 

[clang] fe47e8f - [NFC] [ASTUnit] [Serialization] Transalte local decl ID to global decl ID before consuming

2024-04-25 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-25T15:55:46+08:00
New Revision: fe47e8ff3ae7fc8975eaade6bfa6679737c28b93

URL: 
https://github.com/llvm/llvm-project/commit/fe47e8ff3ae7fc8975eaade6bfa6679737c28b93
DIFF: 
https://github.com/llvm/llvm-project/commit/fe47e8ff3ae7fc8975eaade6bfa6679737c28b93.diff

LOG: [NFC] [ASTUnit] [Serialization] Transalte local decl ID to global decl ID 
before consuming

Discovered from
https://github.com/llvm/llvm-project/commit/d86cc73bbfd9a22d9a0d498d72c9b2ee235128e9.

There is a potential issue of using DeclID in ASTUnit. ASTUnit may
record the declaration ID from ASTWriter. And after loading the
preamble, the ASTUnit may consume the recorded declaration ID directly
in ExternalASTSource. This is not good. According to the design, all
local declaration ID consumed in ASTReader need to be translated by
`ASTReader::getGlobaldeclID()`.

This will be problematic if we changed the encodings of declaration IDs or if we
make preamble to work more complexly.

Added: 


Modified: 
clang/lib/Frontend/ASTUnit.cpp

Removed: 




diff  --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp
index 2f75313e8a4c50..1b93588553a276 100644
--- a/clang/lib/Frontend/ASTUnit.cpp
+++ b/clang/lib/Frontend/ASTUnit.cpp
@@ -1467,13 +1467,12 @@ void ASTUnit::RealizeTopLevelDeclsFromPreamble() {
 
   std::vector Resolved;
   Resolved.reserve(TopLevelDeclsInPreamble.size());
-  ExternalASTSource  = *getASTContext().getExternalSource();
+  // The module file of the preamble.
+  serialization::ModuleFile  = 
Reader->getModuleManager().getPrimaryModule();
   for (const auto TopLevelDecl : TopLevelDeclsInPreamble) {
 // Resolve the declaration ID to an actual declaration, possibly
 // deserializing the declaration in the process.
-//
-// FIMXE: We shouldn't convert a LocalDeclID to GlobalDeclID directly.
-if (Decl *D = Source.GetExternalDecl(GlobalDeclID(TopLevelDecl.get(
+if (Decl *D = Reader->GetDecl(Reader->getGlobalDeclID(MF, TopLevelDecl)))
   Resolved.push_back(D);
   }
   TopLevelDeclsInPreamble.clear();



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NFC] Move DeclID from serialization/ASTBitCodes.h to AST/DeclID.h (PR #89873)

2024-04-24 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 closed 
https://github.com/llvm/llvm-project/pull/89873
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NFC] Move DeclID from serialization/ASTBitCodes.h to AST/DeclID.h (PR #89873)

2024-04-24 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 updated 
https://github.com/llvm/llvm-project/pull/89873

>From d83b9cda6c7d943e90c324fa2c1c7e7ffaf88e1c Mon Sep 17 00:00:00 2001
From: Chuanqi Xu 
Date: Wed, 24 Apr 2024 13:35:01 +0800
Subject: [PATCH] [NFC] Move DeclID from serialization/ASTBitCodes.h to
 AST/DeclID.h

Previously, the DeclID is defined in serialization/ASTBitCodes.h under
clang::serialization namespace. However, actually the DeclID is not
purely used in serialization part. The DeclID is already widely used in
AST and all around the clang project via classes like `LazyPtrDecl` or
calling `ExternalASTSource::getExernalDecl()`. All such uses are via the
raw underlying type of `DeclID` as `uint32_t`. This is not pretty good.

This patch moves the DeclID class family to a new header `AST/DeclID.h`
so that the whole project can use the wrapped class `DeclID`,
`GlobalDeclID` and `LocalDeclID` instead of the raw underlying type.
This can improve the readability and the type safety.
---
 clang/include/clang/AST/ASTContext.h  |   4 +-
 clang/include/clang/AST/DeclBase.h|   4 +-
 clang/include/clang/AST/DeclID.h  | 177 ++
 clang/include/clang/AST/DeclTemplate.h|   2 +-
 clang/include/clang/AST/ExternalASTSource.h   |   4 +-
 clang/include/clang/Frontend/ASTUnit.h|   2 +-
 .../clang/Frontend/MultiplexConsumer.h|   2 +-
 .../clang/Sema/MultiplexExternalSemaSource.h  |   2 +-
 .../include/clang/Serialization/ASTBitCodes.h | 161 +---
 .../ASTDeserializationListener.h  |   2 +-
 clang/include/clang/Serialization/ASTReader.h | 126 ++---
 .../clang/Serialization/ASTRecordReader.h |   6 +-
 clang/include/clang/Serialization/ASTWriter.h |  22 +--
 .../include/clang/Serialization/ModuleFile.h  |   8 +-
 clang/lib/AST/ASTContext.cpp  |   3 +-
 clang/lib/AST/Decl.cpp|  46 ++---
 clang/lib/AST/DeclBase.cpp|   4 +-
 clang/lib/AST/DeclCXX.cpp |  63 +++
 clang/lib/AST/DeclFriend.cpp  |   2 +-
 clang/lib/AST/DeclObjC.cpp|  24 +--
 clang/lib/AST/DeclOpenMP.cpp  |  18 +-
 clang/lib/AST/DeclTemplate.cpp|  41 ++--
 clang/lib/AST/ExternalASTSource.cpp   |   2 +-
 clang/lib/Frontend/ASTUnit.cpp|   4 +-
 clang/lib/Frontend/FrontendAction.cpp |   6 +-
 clang/lib/Frontend/MultiplexConsumer.cpp  |   3 +-
 .../lib/Sema/MultiplexExternalSemaSource.cpp  |   2 +-
 clang/lib/Serialization/ASTReader.cpp |  16 +-
 clang/lib/Serialization/ASTReaderDecl.cpp |  18 +-
 clang/lib/Serialization/ASTWriter.cpp |   4 +-
 clang/lib/Serialization/ASTWriterDecl.cpp |   4 +-
 31 files changed, 384 insertions(+), 398 deletions(-)
 create mode 100644 clang/include/clang/AST/DeclID.h

diff --git a/clang/include/clang/AST/ASTContext.h 
b/clang/include/clang/AST/ASTContext.h
index d5ed20ff50157d..ecec9bfcf30079 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -455,7 +455,7 @@ class ASTContext : public RefCountedBase {
   /// initialization of another module).
   struct PerModuleInitializers {
 llvm::SmallVector Initializers;
-llvm::SmallVector LazyInitializers;
+llvm::SmallVector LazyInitializers;
 
 void resolve(ASTContext );
   };
@@ -1059,7 +1059,7 @@ class ASTContext : public RefCountedBase {
   /// or an ImportDecl nominating another module that has initializers.
   void addModuleInitializer(Module *M, Decl *Init);
 
-  void addLazyModuleInitializers(Module *M, ArrayRef IDs);
+  void addLazyModuleInitializers(Module *M, ArrayRef IDs);
 
   /// Get the initializations to perform when importing a module, if any.
   ArrayRef getModuleInitializers(Module *M);
diff --git a/clang/include/clang/AST/DeclBase.h 
b/clang/include/clang/AST/DeclBase.h
index d8cafc3d81526e..474e51c1df6d68 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -15,6 +15,7 @@
 
 #include "clang/AST/ASTDumperUtils.h"
 #include "clang/AST/AttrIterator.h"
+#include "clang/AST/DeclID.h"
 #include "clang/AST/DeclarationName.h"
 #include "clang/AST/SelectorLocationsKind.h"
 #include "clang/Basic/IdentifierTable.h"
@@ -239,9 +240,6 @@ class alignas(8) Decl {
 ModulePrivate
   };
 
-  /// An ID number that refers to a declaration in an AST file.
-  using DeclID = uint32_t;
-
 protected:
   /// The next declaration within the same lexical
   /// DeclContext. These pointers form the linked list that is
diff --git a/clang/include/clang/AST/DeclID.h b/clang/include/clang/AST/DeclID.h
new file mode 100644
index 00..e2c6dd65e86bc3
--- /dev/null
+++ b/clang/include/clang/AST/DeclID.h
@@ -0,0 +1,177 @@
+//===--- DeclID.h - ID number for deserialized declarations  *- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See 

[clang] [NFC] Move DeclID from serialization/ASTBitCodes.h to AST/DeclID.h (PR #89873)

2024-04-23 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 ready_for_review 
https://github.com/llvm/llvm-project/pull/89873
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NFC] Move DeclID from serialization/ASTBitCodes.h to AST/DeclID.h (PR #89873)

2024-04-23 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 created 
https://github.com/llvm/llvm-project/pull/89873

Previously, the DeclID is defined in serialization/ASTBitCodes.h under 
clang::serialization namespace. However, actually the DeclID is not purely used 
in serialization part. The DeclID is already widely used in AST and all around 
the clang project via classes like `LazyPtrDecl` or calling 
`ExternalASTSource::getExernalDecl()`. All such uses are via the raw underlying 
type of `DeclID` as `uint32_t`. This is not pretty good.

This patch moves the DeclID class family to a new header `AST/DeclID.h` so that 
the whole project can use the wrapped class `DeclID`, `GlobalDeclID` and 
`LocalDeclID` instead of the raw underlying type. This can improve the 
readability and the type safety.

>From 3c8e76dcf7746d7ede5434e0fbf025802590bd68 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu 
Date: Wed, 24 Apr 2024 13:35:01 +0800
Subject: [PATCH] [NFC] Move DeclID from serialization/ASTBitCodes.h to
 AST/DeclID.h

Previously, the DeclID is defined in serialization/ASTBitCodes.h under
clang::serialization namespace. However, actually the DeclID is not
purely used in serialization part. The DeclID is already widely used in
AST and all around the clang project via classes like `LazyPtrDecl` or
calling `ExternalASTSource::getExernalDecl()`. All such uses are via the
raw underlying type of `DeclID` as `uint32_t`. This is not pretty good.

This patch moves the DeclID class family to a new header `AST/DeclID.h`
so that the whole project can use the wrapped class `DeclID`,
`GlobalDeclID` and `LocalDeclID` instead of the raw underlying type.
This can improve the readability and the type safety.
---
 clang/include/clang/AST/ASTContext.h  |   4 +-
 clang/include/clang/AST/DeclBase.h|   4 +-
 clang/include/clang/AST/DeclID.h  | 175 ++
 clang/include/clang/AST/DeclTemplate.h|   2 +-
 clang/include/clang/AST/ExternalASTSource.h   |   4 +-
 clang/include/clang/Frontend/ASTUnit.h|   2 +-
 .../clang/Frontend/MultiplexConsumer.h|   2 +-
 .../clang/Sema/MultiplexExternalSemaSource.h  |   2 +-
 .../include/clang/Serialization/ASTBitCodes.h | 161 +---
 .../ASTDeserializationListener.h  |   2 +-
 clang/include/clang/Serialization/ASTReader.h | 126 ++---
 .../clang/Serialization/ASTRecordReader.h |   6 +-
 clang/include/clang/Serialization/ASTWriter.h |  22 +--
 .../include/clang/Serialization/ModuleFile.h  |   8 +-
 clang/lib/AST/ASTContext.cpp  |   3 +-
 clang/lib/AST/Decl.cpp|  46 ++---
 clang/lib/AST/DeclBase.cpp|   4 +-
 clang/lib/AST/DeclCXX.cpp |  63 +++
 clang/lib/AST/DeclFriend.cpp  |   2 +-
 clang/lib/AST/DeclObjC.cpp|  24 +--
 clang/lib/AST/DeclOpenMP.cpp  |  18 +-
 clang/lib/AST/DeclTemplate.cpp|  41 ++--
 clang/lib/AST/ExternalASTSource.cpp   |   2 +-
 clang/lib/Frontend/ASTUnit.cpp|   4 +-
 clang/lib/Frontend/FrontendAction.cpp |   6 +-
 clang/lib/Frontend/MultiplexConsumer.cpp  |   3 +-
 .../lib/Sema/MultiplexExternalSemaSource.cpp  |   2 +-
 clang/lib/Serialization/ASTReader.cpp |  16 +-
 clang/lib/Serialization/ASTReaderDecl.cpp |  18 +-
 clang/lib/Serialization/ASTWriter.cpp |   4 +-
 clang/lib/Serialization/ASTWriterDecl.cpp |   4 +-
 31 files changed, 382 insertions(+), 398 deletions(-)
 create mode 100644 clang/include/clang/AST/DeclID.h

diff --git a/clang/include/clang/AST/ASTContext.h 
b/clang/include/clang/AST/ASTContext.h
index d5ed20ff50157d..ecec9bfcf30079 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -455,7 +455,7 @@ class ASTContext : public RefCountedBase {
   /// initialization of another module).
   struct PerModuleInitializers {
 llvm::SmallVector Initializers;
-llvm::SmallVector LazyInitializers;
+llvm::SmallVector LazyInitializers;
 
 void resolve(ASTContext );
   };
@@ -1059,7 +1059,7 @@ class ASTContext : public RefCountedBase {
   /// or an ImportDecl nominating another module that has initializers.
   void addModuleInitializer(Module *M, Decl *Init);
 
-  void addLazyModuleInitializers(Module *M, ArrayRef IDs);
+  void addLazyModuleInitializers(Module *M, ArrayRef IDs);
 
   /// Get the initializations to perform when importing a module, if any.
   ArrayRef getModuleInitializers(Module *M);
diff --git a/clang/include/clang/AST/DeclBase.h 
b/clang/include/clang/AST/DeclBase.h
index d8cafc3d81526e..474e51c1df6d68 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -15,6 +15,7 @@
 
 #include "clang/AST/ASTDumperUtils.h"
 #include "clang/AST/AttrIterator.h"
+#include "clang/AST/DeclID.h"
 #include "clang/AST/DeclarationName.h"
 #include "clang/AST/SelectorLocationsKind.h"
 #include 

[clang] [C++20] [Modules] [Itanium ABI] Generate the vtable in the module unit of dynamic classes (PR #75912)

2024-04-23 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

@rjmccall @dwblaikie ping

https://github.com/llvm/llvm-project/pull/75912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [coro] Lower `llvm.coro.await.suspend.handle` to resume with tail call (PR #89751)

2024-04-23 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 edited 
https://github.com/llvm/llvm-project/pull/89751
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [coro] Lower `llvm.coro.await.suspend.handle` to resume with tail call (PR #89751)

2024-04-23 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 commented:

The higher level idea looks fine.

https://github.com/llvm/llvm-project/pull/89751
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [coro] Lower `llvm.coro.await.suspend.handle` to resume with tail call (PR #89751)

2024-04-23 Thread Chuanqi Xu via cfe-commits


@@ -1523,24 +1442,16 @@ struct SwitchCoroutineSplitter {
 
 createResumeEntryBlock(F, Shape);
 auto *ResumeClone =
-createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume);
+createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI);
 auto *DestroyClone =
-createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind);
+createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI);
 auto *CleanupClone =
-createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup);
+createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, 
TTI);
 
 postSplitCleanup(*ResumeClone);
 postSplitCleanup(*DestroyClone);
 postSplitCleanup(*CleanupClone);
 
-// Adding musttail call to support symmetric transfer.
-// Skip targets which don't support tail call.
-//
-// FIXME: Could we support symmetric transfer effectively without musttail

ChuanqiXu9 wrote:

Maybe we need to remain the FIXME

https://github.com/llvm/llvm-project/pull/89751
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [coro] Lower `llvm.coro.await.suspend.handle` to resume with tail call (PR #89751)

2024-04-23 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 edited 
https://github.com/llvm/llvm-project/pull/89751
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] coroutine: generate valid mangled name in CodeGenFunction::generateAwaitSuspendWrapper (PR #89731)

2024-04-23 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 approved this pull request.

LGTM. Thanks.

https://github.com/llvm/llvm-project/pull/89731
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] b467c6b - [NFC] [Serialization] Turn type alias GlobalDeclID into a class

2024-04-23 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-23T17:52:58+08:00
New Revision: b467c6b53660dcaa458c2b5d7fbf5f93ee2af910

URL: 
https://github.com/llvm/llvm-project/commit/b467c6b53660dcaa458c2b5d7fbf5f93ee2af910
DIFF: 
https://github.com/llvm/llvm-project/commit/b467c6b53660dcaa458c2b5d7fbf5f93ee2af910.diff

LOG: [NFC] [Serialization] Turn type alias GlobalDeclID into a class

Succsessor of b8e3b2ad66cf78ad2b. This patch also converts the type
alias GlobalDeclID to a class to improve the readability and type
safety.

Added: 


Modified: 
clang/include/clang/Serialization/ASTBitCodes.h
clang/include/clang/Serialization/ASTReader.h
clang/include/clang/Serialization/ASTRecordReader.h
clang/lib/Serialization/ASTReader.cpp
clang/lib/Serialization/ASTReaderDecl.cpp
clang/lib/Serialization/ASTReaderInternals.h
clang/lib/Serialization/ASTWriter.cpp

Removed: 




diff  --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index ca51a2dff3d57b..dcfa4ac0c19677 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -79,9 +79,71 @@ class LocalDeclID {
   DeclID ID;
 };
 
-// FIXME: Turn GlobalDeclID into class so we can have some type safety when
-// we go from local ID to global and vice-versa.
-using GlobalDeclID = DeclID;
+/// Wrapper class for DeclID. This is helpful to not mix the use of LocalDeclID
+/// and GlobalDeclID to improve the type safety.
+class GlobalDeclID {
+public:
+  GlobalDeclID() : ID(0) {}
+  explicit GlobalDeclID(DeclID ID) : ID(ID) {}
+
+  DeclID get() const { return ID; }
+
+  explicit operator DeclID() const { return ID; }
+
+  friend bool operator==(const GlobalDeclID , const GlobalDeclID ) {
+return LHS.ID == RHS.ID;
+  }
+  friend bool operator!=(const GlobalDeclID , const GlobalDeclID ) {
+return LHS.ID != RHS.ID;
+  }
+  // We may sort the global decl ID.
+  friend bool operator<(const GlobalDeclID , const GlobalDeclID ) {
+return LHS.ID < RHS.ID;
+  }
+  friend bool operator>(const GlobalDeclID , const GlobalDeclID ) {
+return LHS.ID > RHS.ID;
+  }
+  friend bool operator<=(const GlobalDeclID , const GlobalDeclID ) {
+return LHS.ID <= RHS.ID;
+  }
+  friend bool operator>=(const GlobalDeclID , const GlobalDeclID ) {
+return LHS.ID >= RHS.ID;
+  }
+
+private:
+  DeclID ID;
+};
+
+/// A helper iterator adaptor to convert the iterators to `SmallVector`
+/// to the iterators to `SmallVector`.
+class GlobalDeclIDIterator
+: public llvm::iterator_adaptor_base {
+public:
+  GlobalDeclIDIterator() : iterator_adaptor_base(nullptr) {}
+
+  GlobalDeclIDIterator(const DeclID *ID) : iterator_adaptor_base(ID) {}
+
+  value_type operator*() const { return GlobalDeclID(*I); }
+
+  bool operator==(const GlobalDeclIDIterator ) const { return I == RHS.I; }
+};
+
+/// A helper iterator adaptor to convert the iterators to
+/// `SmallVector` to the iterators to `SmallVector`.
+class DeclIDIterator
+: public llvm::iterator_adaptor_base {
+public:
+  DeclIDIterator() : iterator_adaptor_base(nullptr) {}
+
+  DeclIDIterator(const GlobalDeclID *ID) : iterator_adaptor_base(ID) {}
+
+  value_type operator*() const { return DeclID(*I); }
+
+  bool operator==(const DeclIDIterator ) const { return I == RHS.I; }
+};
 
 /// An ID number that refers to a type in an AST file.
 ///
@@ -2169,6 +2231,27 @@ template <> struct 
DenseMapInfo {
   }
 };
 
+template <> struct DenseMapInfo {
+  using DeclID = clang::serialization::DeclID;
+  using GlobalDeclID = clang::serialization::GlobalDeclID;
+
+  static GlobalDeclID getEmptyKey() {
+return GlobalDeclID(DenseMapInfo::getEmptyKey());
+  }
+
+  static GlobalDeclID getTombstoneKey() {
+return GlobalDeclID(DenseMapInfo::getTombstoneKey());
+  }
+
+  static unsigned getHashValue(const GlobalDeclID ) {
+return DenseMapInfo::getHashValue(Key.get());
+  }
+
+  static bool isEqual(const GlobalDeclID , const GlobalDeclID ) {
+return L == R;
+  }
+};
+
 } // namespace llvm
 
 #endif // LLVM_CLANG_SERIALIZATION_ASTBITCODES_H

diff  --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index 42aecf059907e8..ed917aa1642293 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -504,7 +504,7 @@ class ASTReader
   static_assert(std::is_same_v);
 
   using GlobalDeclMapType =
-  ContinuousRangeMap;
+  ContinuousRangeMap;
 
   /// Mapping from global declaration IDs to the module in which the
   /// declaration resides.
@@ -513,14 +513,14 @@ class ASTReader
   using FileOffset = std::pair;
   using FileOffsetsTy = SmallVector;
   using DeclUpdateOffsetsMap =
-  llvm::DenseMap;
+  llvm::DenseMap;
 
   /// Declarations that have modifications residing in a later file
   /// in the chain.
   

[clang] b8e3b2a - [NFC] [Serialization] Turn type alias LocalDeclID into class

2024-04-23 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-23T16:56:14+08:00
New Revision: b8e3b2ad66cf78ad2b7832577b1d58dc93c5da21

URL: 
https://github.com/llvm/llvm-project/commit/b8e3b2ad66cf78ad2b7832577b1d58dc93c5da21
DIFF: 
https://github.com/llvm/llvm-project/commit/b8e3b2ad66cf78ad2b7832577b1d58dc93c5da21.diff

LOG: [NFC] [Serialization] Turn type alias LocalDeclID into class

Previously, the LocalDeclID and GlobalDeclID are defined as:

```
using LocalDeclID = DeclID;
using GlobalDeclID = DeclID;
```

This is more or less concerning that we may misuse LocalDeclID and
GlobalDeclID without understanding it. There is also a FIXME saying
this.

This patch tries to turn LocalDeclID into a class to improve the type
safety here.

Added: 


Modified: 
clang/include/clang/Serialization/ASTBitCodes.h
clang/include/clang/Serialization/ASTReader.h
clang/include/clang/Serialization/ModuleFile.h
clang/lib/Serialization/ASTReader.cpp
clang/lib/Serialization/ASTReaderDecl.cpp
clang/utils/TableGen/ClangAttrEmitter.cpp

Removed: 




diff  --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index c91a1c1c82edd4..ca51a2dff3d57b 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -69,9 +69,18 @@ using IdentifierID = uint32_t;
 /// FIXME: Merge with Decl::DeclID
 using DeclID = uint32_t;
 
-// FIXME: Turn these into classes so we can have some type safety when
+class LocalDeclID {
+public:
+  explicit LocalDeclID(DeclID ID) : ID(ID) {}
+
+  DeclID get() const { return ID; }
+
+private:
+  DeclID ID;
+};
+
+// FIXME: Turn GlobalDeclID into class so we can have some type safety when
 // we go from local ID to global and vice-versa.
-using LocalDeclID = DeclID;
 using GlobalDeclID = DeclID;
 
 /// An ID number that refers to a type in an AST file.

diff  --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index fe9644eaca4916..42aecf059907e8 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -1924,7 +1924,7 @@ class ASTReader
   Decl *GetExistingDecl(serialization::DeclID ID);
 
   /// Reads a declaration with the given local ID in the given module.
-  Decl *GetLocalDecl(ModuleFile , serialization::DeclID LocalID) {
+  Decl *GetLocalDecl(ModuleFile , serialization::LocalDeclID LocalID) {
 return GetDecl(getGlobalDeclID(F, LocalID));
   }
 
@@ -1932,7 +1932,7 @@ class ASTReader
   ///
   /// \returns The requested declaration, casted to the given return type.
   template 
-  T *GetLocalDeclAs(ModuleFile , serialization::DeclID LocalID) {
+  T *GetLocalDeclAs(ModuleFile , serialization::LocalDeclID LocalID) {
 return cast_or_null(GetLocalDecl(F, LocalID));
   }
 

diff  --git a/clang/include/clang/Serialization/ModuleFile.h 
b/clang/include/clang/Serialization/ModuleFile.h
index 675e1e9bc355c5..492c35dceb08d4 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -474,7 +474,7 @@ class ModuleFile {
   llvm::DenseMap GlobalToLocalDeclIDs;
 
   /// Array of file-level DeclIDs sorted by file.
-  const serialization::DeclID *FileSortedDecls = nullptr;
+  const serialization::LocalDeclID *FileSortedDecls = nullptr;
   unsigned NumFileSortedDecls = 0;
 
   /// Array of category list location information within this

diff  --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 9764fdc6cd2d49..cfb6ab42c36bd7 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -954,14 +954,16 @@ ASTSelectorLookupTrait::ReadData(Selector, const unsigned 
char* d,
   // Load instance methods
   for (unsigned I = 0; I != NumInstanceMethods; ++I) {
 if (ObjCMethodDecl *Method = Reader.GetLocalDeclAs(
-F, endian::readNext(d)))
+F,
+LocalDeclID(endian::readNext(d
   Result.Instance.push_back(Method);
   }
 
   // Load factory methods
   for (unsigned I = 0; I != NumFactoryMethods; ++I) {
 if (ObjCMethodDecl *Method = Reader.GetLocalDeclAs(
-F, endian::readNext(d)))
+F,
+LocalDeclID(endian::readNext(d
   Result.Factory.push_back(Method);
   }
 
@@ -1091,7 +1093,8 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const 
internal_key_type& k,
 SmallVector DeclIDs;
 for (; DataLen > 0; DataLen -= sizeof(DeclID))
   DeclIDs.push_back(Reader.getGlobalDeclID(
-  F, endian::readNext(d)));
+  F,
+  LocalDeclID(endian::readNext(d;
 Reader.SetGloballyVisibleDecls(II, DeclIDs);
   }
 
@@ -1212,7 +1215,7 @@ void 
ASTDeclContextNameLookupTrait::ReadDataInto(internal_key_type,
   using namespace llvm::support;
 
   for (unsigned NumDecls = DataLen / 

[clang] [Modules] No transitive source location change (PR #86912)

2024-04-22 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

@jansvoboda11 @Bigcheese ping

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] aac695d - [NFC] [Serialization] Use semantical type 'DeclID' for 'CreateDeserialized'

2024-04-19 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-19T16:24:47+08:00
New Revision: aac695da42cf48ccb29c2fe495ead564cc913471

URL: 
https://github.com/llvm/llvm-project/commit/aac695da42cf48ccb29c2fe495ead564cc913471
DIFF: 
https://github.com/llvm/llvm-project/commit/aac695da42cf48ccb29c2fe495ead564cc913471.diff

LOG: [NFC] [Serialization] Use semantical type 'DeclID' for 'CreateDeserialized'

Previously we use 'unsigned' as the type of ID in 'CreateDeserialized'.

And the type of `DeclID` in serialization is 'uint32_t', so there is
minor inconsistency.

Also more importantly, if we want to extend the type of DeclID from
uint32_t to uint64_t, we may be in trouble due to we forgot updating the
a lot of 'CreateDeserialized'.

So this patch tries to use semantical type 'DeclID' for
'*Decl::CreateDeserialized' to make sure it is tightly consistent.

Added: 


Modified: 
clang/include/clang/AST/Decl.h
clang/include/clang/AST/DeclBase.h
clang/include/clang/AST/DeclCXX.h
clang/include/clang/AST/DeclFriend.h
clang/include/clang/AST/DeclObjC.h
clang/include/clang/AST/DeclOpenMP.h
clang/include/clang/AST/DeclTemplate.h
clang/include/clang/Serialization/ASTReader.h
clang/lib/AST/Decl.cpp
clang/lib/AST/DeclBase.cpp
clang/lib/AST/DeclCXX.cpp
clang/lib/AST/DeclFriend.cpp
clang/lib/AST/DeclObjC.cpp
clang/lib/AST/DeclOpenMP.cpp
clang/lib/AST/DeclTemplate.cpp

Removed: 




diff  --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 0a9c9e17d3f9f9..8b121896d66d15 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -157,7 +157,7 @@ class PragmaCommentDecl final
SourceLocation CommentLoc,
PragmaMSCommentKind CommentKind,
StringRef Arg);
-  static PragmaCommentDecl *CreateDeserialized(ASTContext , unsigned ID,
+  static PragmaCommentDecl *CreateDeserialized(ASTContext , DeclID ID,
unsigned ArgSize);
 
   PragmaMSCommentKind getCommentKind() const { return CommentKind; }
@@ -192,7 +192,7 @@ class PragmaDetectMismatchDecl final
   SourceLocation Loc, StringRef Name,
   StringRef Value);
   static PragmaDetectMismatchDecl *
-  CreateDeserialized(ASTContext , unsigned ID, unsigned NameValueSize);
+  CreateDeserialized(ASTContext , DeclID ID, unsigned NameValueSize);
 
   StringRef getName() const { return getTrailingObjects(); }
   StringRef getValue() const { return getTrailingObjects() + ValueStart; 
}
@@ -518,7 +518,7 @@ class LabelDecl : public NamedDecl {
   static LabelDecl *Create(ASTContext , DeclContext *DC,
SourceLocation IdentL, IdentifierInfo *II,
SourceLocation GnuLabelL);
-  static LabelDecl *CreateDeserialized(ASTContext , unsigned ID);
+  static LabelDecl *CreateDeserialized(ASTContext , DeclID ID);
 
   LabelStmt *getStmt() const { return TheStmt; }
   void setStmt(LabelStmt *T) { TheStmt = T; }
@@ -581,7 +581,7 @@ class NamespaceDecl : public NamedDecl, public DeclContext,
IdentifierInfo *Id, NamespaceDecl *PrevDecl,
bool Nested);
 
-  static NamespaceDecl *CreateDeserialized(ASTContext , unsigned ID);
+  static NamespaceDecl *CreateDeserialized(ASTContext , DeclID ID);
 
   using redecl_range = redeclarable_base::redecl_range;
   using redecl_iterator = redeclarable_base::redecl_iterator;
@@ -1146,7 +1146,7 @@ class VarDecl : public DeclaratorDecl, public 
Redeclarable {
  const IdentifierInfo *Id, QualType T,
  TypeSourceInfo *TInfo, StorageClass S);
 
-  static VarDecl *CreateDeserialized(ASTContext , unsigned ID);
+  static VarDecl *CreateDeserialized(ASTContext , DeclID ID);
 
   SourceRange getSourceRange() const override LLVM_READONLY;
 
@@ -1728,7 +1728,7 @@ class ImplicitParamDecl : public VarDecl {
   static ImplicitParamDecl *Create(ASTContext , QualType T,
ImplicitParamKind ParamKind);
 
-  static ImplicitParamDecl *CreateDeserialized(ASTContext , unsigned ID);
+  static ImplicitParamDecl *CreateDeserialized(ASTContext , DeclID ID);
 
   ImplicitParamDecl(ASTContext , DeclContext *DC, SourceLocation IdLoc,
 const IdentifierInfo *Id, QualType Type,
@@ -1782,7 +1782,7 @@ class ParmVarDecl : public VarDecl {
  TypeSourceInfo *TInfo, StorageClass S,
  Expr *DefArg);
 
-  static ParmVarDecl *CreateDeserialized(ASTContext , unsigned ID);
+  static ParmVarDecl *CreateDeserialized(ASTContext , DeclID ID);
 
   SourceRange getSourceRange() const override LLVM_READONLY;
 
@@ -2178,7 +2178,7 @@ class FunctionDecl : public 

[clang] [Modules] No transitive source location change (PR #86912)

2024-04-18 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 updated 
https://github.com/llvm/llvm-project/pull/86912

>From ddb4074b0460daf7b42531ec62e97347b3f2e14d Mon Sep 17 00:00:00 2001
From: Chuanqi Xu 
Date: Mon, 18 Mar 2024 08:36:55 +0800
Subject: [PATCH 1/4] [Modules] No transitive source location change

---
 clang/include/clang/Basic/SourceLocation.h|  1 +
 .../include/clang/Serialization/ASTBitCodes.h | 56 ++--
 clang/include/clang/Serialization/ASTReader.h | 54 +++-
 clang/include/clang/Serialization/ASTWriter.h |  4 +
 .../include/clang/Serialization/ModuleFile.h  |  4 -
 .../Serialization/SourceLocationEncoding.h| 88 +--
 clang/lib/Frontend/ASTUnit.cpp|  2 -
 clang/lib/Serialization/ASTReader.cpp | 84 +++---
 clang/lib/Serialization/ASTReaderDecl.cpp |  2 +-
 clang/lib/Serialization/ASTWriter.cpp | 41 +++--
 clang/lib/Serialization/ASTWriterDecl.cpp |  8 +-
 clang/lib/Serialization/ModuleFile.cpp|  1 -
 .../no-transitive-source-location-change.cppm | 69 +++
 clang/test/Modules/pr61067.cppm   | 25 --
 .../SourceLocationEncodingTest.cpp| 12 +--
 15 files changed, 275 insertions(+), 176 deletions(-)
 create mode 100644 clang/test/Modules/no-transitive-source-location-change.cppm

diff --git a/clang/include/clang/Basic/SourceLocation.h 
b/clang/include/clang/Basic/SourceLocation.h
index 00b1e0fa855b7a..7a0f5ba8d1270b 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -90,6 +90,7 @@ class SourceLocation {
   friend class ASTWriter;
   friend class SourceManager;
   friend struct llvm::FoldingSetTrait;
+  friend class SourceLocationEncoding;
 
 public:
   using UIntTy = uint32_t;
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index 500098dd3dab1d..eca776a77e4557 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -22,6 +22,7 @@
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/OperatorKinds.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Serialization/SourceLocationEncoding.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Bitstream/BitCodes.h"
 #include 
@@ -175,45 +176,38 @@ const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1;
 
 /// Source range/offset of a preprocessed entity.
 struct PPEntityOffset {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location of beginning of range.
-  SourceLocation::UIntTy Begin;
+  RawLocEncoding Begin;
 
   /// Raw source location of end of range.
-  SourceLocation::UIntTy End;
+  RawLocEncoding End;
 
   /// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
   uint32_t BitOffset;
 
-  PPEntityOffset(SourceRange R, uint32_t BitOffset)
-  : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()),
-BitOffset(BitOffset) {}
-
-  SourceLocation getBegin() const {
-return SourceLocation::getFromRawEncoding(Begin);
-  }
+  PPEntityOffset(RawLocEncoding Begin, RawLocEncoding End, uint32_t BitOffset)
+  : Begin(Begin), End(End), BitOffset(BitOffset) {}
 
-  SourceLocation getEnd() const {
-return SourceLocation::getFromRawEncoding(End);
-  }
+  RawLocEncoding getBegin() const { return Begin; }
+  RawLocEncoding getEnd() const { return End; }
 };
 
 /// Source range of a skipped preprocessor region
 struct PPSkippedRange {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location of beginning of range.
-  SourceLocation::UIntTy Begin;
+  RawLocEncoding Begin;
   /// Raw source location of end of range.
-  SourceLocation::UIntTy End;
+  RawLocEncoding End;
 
-  PPSkippedRange(SourceRange R)
-  : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()) 
{
-  }
+  PPSkippedRange(RawLocEncoding Begin, RawLocEncoding End)
+  : Begin(Begin), End(End) {}
 
-  SourceLocation getBegin() const {
-return SourceLocation::getFromRawEncoding(Begin);
-  }
-  SourceLocation getEnd() const {
-return SourceLocation::getFromRawEncoding(End);
-  }
+  RawLocEncoding getBegin() const { return Begin; }
+  RawLocEncoding getEnd() const { return End; }
 };
 
 /// Offset in the AST file. Use splitted 64-bit integer into low/high
@@ -239,8 +233,10 @@ struct UnderalignedInt64 {
 
 /// Source location and bit offset of a declaration.
 struct DeclOffset {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location.
-  SourceLocation::UIntTy Loc = 0;
+  RawLocEncoding RawLoc = 0;
 
   /// Offset relative to the start of the DECLTYPES_BLOCK block. Keep
   /// structure alignment 32-bit and avoid padding gap because undefined
@@ -248,17 +244,15 @@ struct DeclOffset {
   UnderalignedInt64 BitOffset;
 
   DeclOffset() = default;
-  DeclOffset(SourceLocation Loc, uint64_t BitOffset,
- 

[clang] [Modules] No transitive source location change (PR #86912)

2024-04-18 Thread Chuanqi Xu via cfe-commits


@@ -4082,14 +4069,14 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile ) 
const {
   : ModuleMgr.lookupByFileName(Name));
 if (!OM) {
   std::string Msg =
-  "SourceLocation remap refers to unknown module, cannot find ";
+  "cannot find module ";

ChuanqiXu9 wrote:

Done

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-18 Thread Chuanqi Xu via cfe-commits


@@ -2221,33 +,45 @@ class ASTReader
 return Sema::AlignPackInfo::getFromRawEncoding(Raw);
   }
 
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Read a source location from raw form and return it in its
   /// originating module file's source location space.
-  SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
+  std::pair
+  ReadUntranslatedSourceLocation(RawLocEncoding Raw,
+ LocSeq *Seq = nullptr) const {
 return SourceLocationEncoding::decode(Raw, Seq);
   }
 
   /// Read a source location from raw form.
-  SourceLocation ReadSourceLocation(ModuleFile ,
-SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
-SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
-return TranslateSourceLocation(ModuleFile, Loc);
+  SourceLocation ReadSourceLocation(ModuleFile , RawLocEncoding Raw,
+   LocSeq *Seq = nullptr) const {
+if (!MF.ModuleOffsetMap.empty())
+  ReadModuleOffsetMap(MF);
+
+auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
+ModuleFile *OwningModuleFile =
+ModuleFileIndex == 0 ?  : MF.DependentModules[ModuleFileIndex - 1];
+
+assert(!SourceMgr.isLoadedSourceLocation(Loc) && "Run out source location 
space");

ChuanqiXu9 wrote:

But the value of `Loc`  may not be valid. Also I feel it is fine to have some 
redundant assertions. It helps the reader to understand the codes better.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-18 Thread Chuanqi Xu via cfe-commits


@@ -2220,33 +2221,40 @@ class ASTReader
 return Sema::AlignPackInfo::getFromRawEncoding(Raw);
   }
 
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Read a source location from raw form and return it in its
   /// originating module file's source location space.
-  SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
+  std::pair
+  ReadUntranslatedSourceLocation(RawLocEncoding Raw,
+ LocSeq *Seq = nullptr) const {
 return SourceLocationEncoding::decode(Raw, Seq);
   }
 
   /// Read a source location from raw form.
-  SourceLocation ReadSourceLocation(ModuleFile ,
-SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
-SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
-return TranslateSourceLocation(ModuleFile, Loc);
+  SourceLocation ReadSourceLocation(ModuleFile , RawLocEncoding Raw,
+   LocSeq *Seq = nullptr) const {
+if (!MF.ModuleOffsetMap.empty())
+  ReadModuleOffsetMap(MF);
+
+auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
+ModuleFile *OwningModuleFile =
+ModuleFileIndex ? MF.DependentModules[ModuleFileIndex - 1] : 
+return TranslateSourceLocation(*OwningModuleFile, Loc);
   }
 
   /// Translate a source location from another module file's source
   /// location space into ours.
   SourceLocation TranslateSourceLocation(ModuleFile ,
  SourceLocation Loc) const {
-if (!ModuleFile.ModuleOffsetMap.empty())
-  ReadModuleOffsetMap(ModuleFile);
-assert(ModuleFile.SLocRemap.find(Loc.getOffset()) !=
-   ModuleFile.SLocRemap.end() &&
-   "Cannot find offset to remap.");
-SourceLocation::IntTy Remap =
-ModuleFile.SLocRemap.find(Loc.getOffset())->second;
-return Loc.getLocWithOffset(Remap);
+if (Loc.isInvalid())
+  return Loc;
+
+// It implies that the Loc is already translated.
+if (SourceMgr.isLoadedSourceLocation(Loc))
+  return Loc;

ChuanqiXu9 wrote:

> Now that TranslateSourceLocation() is only called from ReadSourceLocation()

Sadly, this is not true. `TranslateSourceLocation()` may be called in 
`ASTReader::ReadAST()`:

https://github.com/llvm/llvm-project/blob/aac4d03423dd6b7bdef0f2eb03c570f3e2ca6630/clang/lib/Serialization/ASTReader.cpp#L4588-L4591

The input value of  `TranslateSourceLocation()` there may come from a reading 
of untranslated source location in `ASTReader::ReadControlBlock` when reading 
imported modules. Or the input value may come from the argument of  
`ASTReader::ReadAST()`, where must be a translated source location. 

Then it looks really dangerous to me. So I add the FIXME. We may not be able to 
change the signature of the argument of `TranslateSourceLocation()` to 
`UntranslatedSourceLocation` since that will require us to change the signature 
of `ASTReader::ReadAST()`.

The reason why actually it works, is that, in the case the input value comes 
from a translated source location (passed directly in `ASTReader::ReadAST()`), 
the value of `M.ImportedBy` may always be null **now**.  But I feel it is 
dangerous if someone changes it suddenly.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] aac4d03 - [C++20] [Modules] Mark exported all declarations as used

2024-04-18 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-19T09:46:18+08:00
New Revision: aac4d03423dd6b7bdef0f2eb03c570f3e2ca6630

URL: 
https://github.com/llvm/llvm-project/commit/aac4d03423dd6b7bdef0f2eb03c570f3e2ca6630
DIFF: 
https://github.com/llvm/llvm-project/commit/aac4d03423dd6b7bdef0f2eb03c570f3e2ca6630.diff

LOG: [C++20] [Modules] Mark exported all declarations as used

Close https://github.com/llvm/llvm-project/issues/85122

As the title suggested, it looks pretty sensible.

Added: 
clang/test/Modules/pr85122.cppm

Modified: 
clang/lib/Sema/SemaModule.cpp

Removed: 




diff  --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp
index 67658c93ed3baf..ad118ac90e4aa6 100644
--- a/clang/lib/Sema/SemaModule.cpp
+++ b/clang/lib/Sema/SemaModule.cpp
@@ -1003,6 +1003,10 @@ Decl *Sema::ActOnFinishExportDecl(Scope *S, Decl *D, 
SourceLocation RBraceLoc) {
 }
   }
 
+  // Anything exported from a module should never be considered unused.
+  for (auto *Exported : ED->decls())
+Exported->markUsed(getASTContext());
+
   return D;
 }
 

diff  --git a/clang/test/Modules/pr85122.cppm b/clang/test/Modules/pr85122.cppm
new file mode 100644
index 00..a4c89f13711a36
--- /dev/null
+++ b/clang/test/Modules/pr85122.cppm
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -std=c++20 %s -Wall -fsyntax-only -verify
+
+// expected-no-diagnostics
+export module a;
+
+export constexpr auto a = []{};



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-18 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

Fix conflicts and rebase with main.

@Bigcheese @jansvoboda11  ping~

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] e7a8dd9 - [docs] [C++20] [Modules] Mentioning Module Initializer

2024-04-18 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-18T16:12:14+08:00
New Revision: e7a8dd9b0d419403fe1d8adeb177a4ec78e036cc

URL: 
https://github.com/llvm/llvm-project/commit/e7a8dd9b0d419403fe1d8adeb177a4ec78e036cc
DIFF: 
https://github.com/llvm/llvm-project/commit/e7a8dd9b0d419403fe1d8adeb177a4ec78e036cc.diff

LOG: [docs] [C++20] [Modules] Mentioning Module Initializer

Although we want to treat the module initializer as a transparent
concept to users, but it shows that people need to understand
the concept to understand how to understand and distribute modules.

So it is better to mention this too.

Added: 


Modified: 
clang/docs/StandardCPlusPlusModules.rst

Removed: 




diff  --git a/clang/docs/StandardCPlusPlusModules.rst 
b/clang/docs/StandardCPlusPlusModules.rst
index 8d5529d5d37db5..ee57fb5da64857 100644
--- a/clang/docs/StandardCPlusPlusModules.rst
+++ b/clang/docs/StandardCPlusPlusModules.rst
@@ -483,6 +483,13 @@ violations with the flag enabled.
 ABI Impacts
 ---
 
+This section describes the new ABI changes brought by modules.
+
+Only Itanium C++ ABI related change are mentioned
+
+Mangling Names
+~~
+
 The declarations in a module unit which are not in the global module fragment 
have new linkage names.
 
 For example,
@@ -520,6 +527,23 @@ is attached to the global module fragments. For example:
 
 Now the linkage name of ``NS::foo()`` will be ``_ZN2NS3fooEv``.
 
+Module Initializers
+~~~
+
+All the importable module units are required to emit an initializer function.
+The initializer function should contain calls to importing modules first and
+all the dynamic-initializers in the current module unit then.
+
+Translation units explicitly or implicitly importing named modules must call
+the initializer functions of the imported named modules within the sequence of
+the dynamic-initializers in the TU. Initializations of entities at namespace
+scope are appearance-ordered. This (recursively) extends into imported modules
+at the point of appearance of the import declaration.
+
+It is allowed to omit calls to importing modules if it is known empty.
+
+It is allowed to omit calls to importing modules for which is known to be 
called.
+
 Reduced BMI
 ---
 



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 5d4e072 - [C++20] [Modules] [Reduced BMI] Don't eagerly write static entities in

2024-04-18 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-18T15:38:02+08:00
New Revision: 5d4e072a253afac018e312d75290314bd2d03b30

URL: 
https://github.com/llvm/llvm-project/commit/5d4e072a253afac018e312d75290314bd2d03b30
DIFF: 
https://github.com/llvm/llvm-project/commit/5d4e072a253afac018e312d75290314bd2d03b30.diff

LOG: [C++20] [Modules] [Reduced BMI] Don't eagerly write static entities in
module purview

For,

```
export module A;
static int impl() { ... }
export int func() { return impl(); }
```

Previously, even with reduced BMI, the function `impl` will be emitted
into the BMI. After the patch, the static entities in module purview
won't get emitted eagerly. Now the static entities may only be emitted
if required.

Note that, this restriction is actually more relaxed than the language
standard required. The language spec said, the program is ill-formed if
any TU-local entities get exposed. However, we can't do this since there
are many static entities in the headers of existing libraries.
Forbidding that will cause many existing program fail immediately.

Another note here is, we can't do this for non-static non-exported
entities, they can be used for other module units within the same
module.

Added: 
clang/test/Modules/unreached-static-entities.cppm

Modified: 
clang/lib/Serialization/ASTWriter.cpp

Removed: 




diff  --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index 9ae9648f38a3b7..522b8ce56c3266 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -4892,8 +4892,21 @@ void ASTWriter::PrepareWritingSpecialDecls(Sema 
) {
 if (D->isFromASTFile())
   continue;
 
-if (GeneratingReducedBMI && D->isFromExplicitGlobalModule())
-  continue;
+if (GeneratingReducedBMI) {
+  if (D->isFromExplicitGlobalModule())
+continue;
+
+  // Don't force emitting static entities.
+  //
+  // Technically, all static entities shouldn't be in reduced BMI. The
+  // language also specifies that the program exposes TU-local entities
+  // is ill-formed. However, in practice, there are a lot of projects
+  // uses `static inline` in the headers. So we can't get rid of all
+  // static entities in reduced BMI now.
+  if (auto *ND = dyn_cast(D);
+  ND && ND->getFormalLinkage() == Linkage::Internal)
+continue;
+}
 
 GetDeclRef(D);
   }

diff  --git a/clang/test/Modules/unreached-static-entities.cppm 
b/clang/test/Modules/unreached-static-entities.cppm
new file mode 100644
index 00..10f70ae09e5a1a
--- /dev/null
+++ b/clang/test/Modules/unreached-static-entities.cppm
@@ -0,0 +1,22 @@
+// Test that the static function only used in non-inline functions won't get 
emitted
+// into the BMI.
+//
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+//
+// RUN: %clang_cc1 -std=c++20 %s -emit-reduced-module-interface -o %t/S.pcm
+// RUN: llvm-bcanalyzer --dump --disable-histogram --show-binary-blobs 
%t/S.pcm > %t/S.dump
+// RUN: cat %t/S.dump | FileCheck %s
+
+export module S;
+static int static_func() {
+return 43;
+}
+
+export int func() {
+return static_func();
+}
+
+// CHECK: https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 7ec342b - [C++20] [Modules] [Reduced BMI] Write Special Decl Lazily

2024-04-18 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-18T14:35:38+08:00
New Revision: 7ec342ba16ca97206aa2ddd5b7ec0da063042e03

URL: 
https://github.com/llvm/llvm-project/commit/7ec342ba16ca97206aa2ddd5b7ec0da063042e03
DIFF: 
https://github.com/llvm/llvm-project/commit/7ec342ba16ca97206aa2ddd5b7ec0da063042e03.diff

LOG: [C++20] [Modules] [Reduced BMI] Write Special Decl Lazily

ASTWrite would write some special records for the consumer of the AST
can get the information easily. This is fine. But currently all the
special records are written eagerly, which is conflicting with reduced
BMI.

In reduced BMI, we hope to write things as less as possible. It is not a
goal for reduced BMI to retain all the informations. So in this patch we
won't record the special declarations eagerly before we starting write.
But only writing the sepcial records after we writes decls and types,
then only the reached declarations will be collected.

Added: 
clang/test/Modules/reduced-bmi-empty-module-purview-std.cppm

Modified: 
clang/include/clang/Serialization/ASTWriter.h
clang/lib/Serialization/ASTWriter.cpp

Removed: 




diff  --git a/clang/include/clang/Serialization/ASTWriter.h 
b/clang/include/clang/Serialization/ASTWriter.h
index e093e6ad9d12c7..13b4ad4ad2953d 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -714,6 +714,8 @@ class ASTWriter : public ASTDeserializationListener,
 
   /// Emit a reference to a declaration.
   void AddDeclRef(const Decl *D, RecordDataImpl );
+  // Emit a reference to a declaration if the declaration was emitted.
+  void AddEmittedDeclRef(const Decl *D, RecordDataImpl );
 
   /// Force a declaration to be emitted and get its ID.
   serialization::DeclID GetDeclRef(const Decl *D);

diff  --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index ffeab4db9f6afc..9ae9648f38a3b7 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -4741,12 +4741,12 @@ static void AddLazyVectorDecls(ASTWriter , 
Vector ) {
   }
 }
 
-template
-static void AddLazyVectorDecls(ASTWriter , Vector ,
-   ASTWriter::RecordData ) {
+template 
+static void AddLazyVectorEmiitedDecls(ASTWriter , Vector ,
+  ASTWriter::RecordData ) {
   for (typename Vector::iterator I = Vec.begin(nullptr, true), E = Vec.end();
I != E; ++I) {
-Writer.AddDeclRef(*I, Record);
+Writer.AddEmittedDeclRef(*I, Record);
   }
 }
 
@@ -4882,6 +4882,25 @@ void ASTWriter::PrepareWritingSpecialDecls(Sema 
) {
   RegisterPredefDecl(Context.TypePackElementDecl,
  PREDEF_DECL_TYPE_PACK_ELEMENT_ID);
 
+  const TranslationUnitDecl *TU = Context.getTranslationUnitDecl();
+
+  // Force all top level declarations to be emitted.
+  //
+  // We start emitting top level declarations from the module purview to
+  // implement the eliding unreachable declaration feature.
+  for (const auto *D : TU->noload_decls()) {
+if (D->isFromASTFile())
+  continue;
+
+if (GeneratingReducedBMI && D->isFromExplicitGlobalModule())
+  continue;
+
+GetDeclRef(D);
+  }
+
+  if (GeneratingReducedBMI)
+return;
+
   // Writing all of the tentative definitions in this file, in
   // TentativeDefinitions order.  Generally, this record will be empty for
   // headers.
@@ -4943,22 +4962,6 @@ void ASTWriter::PrepareWritingSpecialDecls(Sema 
) {
  SemaRef.getMismatchingDeleteExpressions())
   GetDeclRef(DeleteExprsInfo.first);
 
-  const TranslationUnitDecl *TU = Context.getTranslationUnitDecl();
-
-  // Force all top level declarations to be emitted.
-  //
-  // We start emitting top level declarations from the module purview to
-  // implement the eliding unreachable declaration feature.
-  for (const auto *D : TU->noload_decls()) {
-if (D->isFromASTFile())
-  continue;
-
-if (GeneratingReducedBMI && D->isFromExplicitGlobalModule())
-  continue;
-
-GetDeclRef(D);
-  }
-
   // Make sure visible decls, added to DeclContexts previously loaded from
   // an AST file, are registered for serialization. Likewise for template
   // specializations added to imported templates.
@@ -5001,21 +5004,22 @@ void ASTWriter::WriteSpecialDeclRecords(Sema ) {
   
   // Write the record containing tentative definitions.
   RecordData TentativeDefinitions;
-  AddLazyVectorDecls(*this, SemaRef.TentativeDefinitions, 
TentativeDefinitions);
+  AddLazyVectorEmiitedDecls(*this, SemaRef.TentativeDefinitions,
+TentativeDefinitions);
   if (!TentativeDefinitions.empty())
 Stream.EmitRecord(TENTATIVE_DEFINITIONS, TentativeDefinitions);
 
   // Write the record containing unused file scoped decls.
   RecordData UnusedFileScopedDecls;
   if (!isModule)
-AddLazyVectorDecls(*this, SemaRef.UnusedFileScopedDecls,
-   

[clang] 83cc605 - [NFC] [Serialization] Extract logics to write special decls from

2024-04-18 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-18T14:34:24+08:00
New Revision: 83cc60565a8a78ce00ee0c29f167400902e60737

URL: 
https://github.com/llvm/llvm-project/commit/83cc60565a8a78ce00ee0c29f167400902e60737
DIFF: 
https://github.com/llvm/llvm-project/commit/83cc60565a8a78ce00ee0c29f167400902e60737.diff

LOG: [NFC] [Serialization] Extract logics to write special decls from
ASTWriter::WriteASTCore

ASTWriter::WriteASTCore is a big function and slightly hard to read.
This patch extracts the logics to force emitting special declarations
and writing special records for these declarations into member
functions.

This is helpful to improve the readability and also helpful for the
following patch to write special declarations lazily.

Added: 


Modified: 
clang/include/clang/Serialization/ASTWriter.h
clang/lib/Serialization/ASTWriter.cpp

Removed: 




diff  --git a/clang/include/clang/Serialization/ASTWriter.h 
b/clang/include/clang/Serialization/ASTWriter.h
index 892d7363e06dd7..e093e6ad9d12c7 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -559,6 +559,8 @@ class ASTWriter : public ASTDeserializationListener,
   void WriteIdentifierTable(Preprocessor , IdentifierResolver ,
 bool IsModule);
   void WriteDeclAndTypes(ASTContext );
+  void PrepareWritingSpecialDecls(Sema );
+  void WriteSpecialDeclRecords(Sema );
   void WriteDeclUpdatesBlocks(RecordDataImpl );
   void WriteDeclContextVisibleUpdate(const DeclContext *DC);
   void WriteFPPragmaOptions(const FPOptionsOverride );

diff  --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index c3cf242391b743..ffeab4db9f6afc 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -4733,6 +4733,14 @@ ASTFileSignature ASTWriter::WriteAST(Sema , 
StringRef OutputFile,
   return Signature;
 }
 
+template
+static void AddLazyVectorDecls(ASTWriter , Vector ) {
+  for (typename Vector::iterator I = Vec.begin(nullptr, true), E = Vec.end();
+   I != E; ++I) {
+Writer.GetDeclRef(*I);
+  }
+}
+
 template
 static void AddLazyVectorDecls(ASTWriter , Vector ,
ASTWriter::RecordData ) {
@@ -4835,24 +4843,10 @@ void ASTWriter::computeNonAffectingInputFiles() {
   FileMgr.trackVFSUsage(false);
 }
 
-ASTFileSignature ASTWriter::WriteASTCore(Sema , StringRef isysroot,
- Module *WritingModule) {
-  using namespace llvm;
-
-  bool isModule = WritingModule != nullptr;
-
-  // Make sure that the AST reader knows to finalize itself.
-  if (Chain)
-Chain->finalizeForWriting();
-
+void ASTWriter::PrepareWritingSpecialDecls(Sema ) {
   ASTContext  = SemaRef.Context;
-  Preprocessor  = SemaRef.PP;
-
-  // This needs to be done very early, since everything that writes
-  // SourceLocations or FileIDs depends on it.
-  computeNonAffectingInputFiles();
 
-  writeUnhashedControlBlock(PP, Context);
+  bool isModule = WritingModule != nullptr;
 
   // Set up predefined declaration IDs.
   auto RegisterPredefDecl = [&] (Decl *D, PredefinedDeclIDs ID) {
@@ -4888,43 +4882,144 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema 
, StringRef isysroot,
   RegisterPredefDecl(Context.TypePackElementDecl,
  PREDEF_DECL_TYPE_PACK_ELEMENT_ID);
 
-  // Build a record containing all of the tentative definitions in this file, 
in
+  // Writing all of the tentative definitions in this file, in
   // TentativeDefinitions order.  Generally, this record will be empty for
   // headers.
   RecordData TentativeDefinitions;
-  AddLazyVectorDecls(*this, SemaRef.TentativeDefinitions, 
TentativeDefinitions);
+  AddLazyVectorDecls(*this, SemaRef.TentativeDefinitions);
 
-  // Build a record containing all of the file scoped decls in this file.
-  RecordData UnusedFileScopedDecls;
+  // Writing all of the file scoped decls in this file.
   if (!isModule)
-AddLazyVectorDecls(*this, SemaRef.UnusedFileScopedDecls,
-   UnusedFileScopedDecls);
+AddLazyVectorDecls(*this, SemaRef.UnusedFileScopedDecls);
 
-  // Build a record containing all of the delegating constructors we still need
+  // Writing all of the delegating constructors we still need
   // to resolve.
-  RecordData DelegatingCtorDecls;
   if (!isModule)
-AddLazyVectorDecls(*this, SemaRef.DelegatingCtorDecls, 
DelegatingCtorDecls);
+AddLazyVectorDecls(*this, SemaRef.DelegatingCtorDecls);
 
-  // Write the set of weak, undeclared identifiers. We always write the
-  // entire table, since later PCH files in a PCH chain are only interested in
-  // the results at the end of the chain.
-  RecordData WeakUndeclaredIdentifiers;
-  for (const auto  :
-   SemaRef.WeakUndeclaredIdentifiers) {
-const IdentifierInfo *const II = WeakUndeclaredIdentifierList.first;
-for (const auto  : 

[clang] [C++20] [Modules] [Itanium ABI] Generate the vtable in the module unit of dynamic classes (PR #75912)

2024-04-17 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

@rjmccall @dwblaikie ping

https://github.com/llvm/llvm-project/pull/75912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] f2695a1 - [C++20] [Modules] Avoid writing untouched DeclUpdates from GMF in

2024-04-17 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-18T11:00:28+08:00
New Revision: f2695a1c2f561da42b973187a254b6eeb7da76a9

URL: 
https://github.com/llvm/llvm-project/commit/f2695a1c2f561da42b973187a254b6eeb7da76a9
DIFF: 
https://github.com/llvm/llvm-project/commit/f2695a1c2f561da42b973187a254b6eeb7da76a9.diff

LOG: [C++20] [Modules] Avoid writing untouched DeclUpdates from GMF in
Reduced BMI

Mitigate https://github.com/llvm/llvm-project/issues/61447

The root cause of the above problem is that when we write a declaration,
we need to lookup all the redeclarations in the imported modules. Then
it will be pretty slow if there are too many redeclarations in different
modules. This patch doesn't solve the porblem.

What the patchs mitigated is, when we writing a named module, we shouldn't
write the declarations from GMF if it is unreferenced **in current
module unit**. The difference here is that, if the declaration is used
in the imported modules, we used to emit it as an update. But we
definitely want to avoid that after this patch.

For that reproducer in
https://github.com/llvm/llvm-project/issues/61447, it used to take 2.5s
to compile and now it only takes 0.49s to compile, which is a big win.

Added: 
clang/test/Modules/reduced-bmi-empty-module-purview.cppm

Modified: 
clang/include/clang/AST/ASTMutationListener.h
clang/include/clang/AST/Decl.h
clang/include/clang/Serialization/ASTWriter.h
clang/lib/AST/ASTContext.cpp
clang/lib/AST/Decl.cpp
clang/lib/Frontend/MultiplexConsumer.cpp
clang/lib/Sema/SemaModule.cpp
clang/lib/Serialization/ASTWriter.cpp

Removed: 




diff  --git a/clang/include/clang/AST/ASTMutationListener.h 
b/clang/include/clang/AST/ASTMutationListener.h
index 8879f9f3229ff3..2c4ec2ce67f36b 100644
--- a/clang/include/clang/AST/ASTMutationListener.h
+++ b/clang/include/clang/AST/ASTMutationListener.h
@@ -27,6 +27,7 @@ namespace clang {
   class FunctionTemplateDecl;
   class Module;
   class NamedDecl;
+  class NamespaceDecl;
   class ObjCCategoryDecl;
   class ObjCContainerDecl;
   class ObjCInterfaceDecl;
@@ -35,6 +36,7 @@ namespace clang {
   class QualType;
   class RecordDecl;
   class TagDecl;
+  class TranslationUnitDecl;
   class ValueDecl;
   class VarDecl;
   class VarTemplateDecl;
@@ -147,6 +149,31 @@ class ASTMutationListener {
   virtual void AddedAttributeToRecord(const Attr *Attr,
   const RecordDecl *Record) {}
 
+  /// The parser find the named module declaration.
+  virtual void EnteringModulePurview() {}
+
+  /// An mangling number was added to a Decl
+  ///
+  /// \param D The decl that got a mangling number
+  ///
+  /// \param Number The mangling number that was added to the Decl
+  virtual void AddedManglingNumber(const Decl *D, unsigned Number) {}
+
+  /// An static local number was added to a Decl
+  ///
+  /// \param D The decl that got a static local number
+  ///
+  /// \param Number The static local number that was added to the Decl
+  virtual void AddedStaticLocalNumbers(const Decl *D, unsigned Number) {}
+
+  /// An anonymous namespace was added the translation unit decl
+  ///
+  /// \param TU The translation unit decl that got a new anonymous namespace
+  ///
+  /// \param AnonNamespace The anonymous namespace that was added
+  virtual void AddedAnonymousNamespace(const TranslationUnitDecl *TU,
+   NamespaceDecl *AnonNamespace) {}
+
   // NOTE: If new methods are added they should also be added to
   // MultiplexASTMutationListener.
 };

diff  --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 01af50ca694fdd..91449ebcceec37 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -120,7 +120,7 @@ class TranslationUnitDecl : public Decl,
   ASTContext () const { return Ctx; }
 
   NamespaceDecl *getAnonymousNamespace() const { return AnonymousNamespace; }
-  void setAnonymousNamespace(NamespaceDecl *D) { AnonymousNamespace = D; }
+  void setAnonymousNamespace(NamespaceDecl *D);
 
   static TranslationUnitDecl *Create(ASTContext );
 

diff  --git a/clang/include/clang/Serialization/ASTWriter.h 
b/clang/include/clang/Serialization/ASTWriter.h
index 443f7703104700..892d7363e06dd7 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -399,6 +399,11 @@ class ASTWriter : public ASTDeserializationListener,
   /// record containing modifications to them.
   DeclUpdateMap DeclUpdates;
 
+  /// DeclUpdates added during parsing the GMF. We split these from
+  /// DeclUpdates since we want to add these updates in GMF on need.
+  /// Only meaningful for reduced BMI.
+  DeclUpdateMap DeclUpdatesFromGMF;
+
   using FirstLatestDeclMap = llvm::DenseMap;
 
   /// Map of first declarations from a chained PCH that point to the
@@ -866,6 +871,11 @@ class ASTWriter : public 

[clang] e6ecff8 - [C++20] [Modules] Add Release Notes and Documents for Reduced BMI

2024-04-16 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-17T10:42:48+08:00
New Revision: e6ecff8d95b9175e70e0d43e14c2975c8f69d718

URL: 
https://github.com/llvm/llvm-project/commit/e6ecff8d95b9175e70e0d43e14c2975c8f69d718
DIFF: 
https://github.com/llvm/llvm-project/commit/e6ecff8d95b9175e70e0d43e14c2975c8f69d718.diff

LOG: [C++20] [Modules] Add Release Notes and Documents for Reduced BMI

See
https://discourse.llvm.org/t/rfc-c-20-modules-introduce-thin-bmi-and-decls-hash/74755,
https://github.com/llvm/llvm-project/pull/75894 and
https://github.com/llvm/llvm-project/pull/85050 for the background.

Added: 


Modified: 
clang/docs/ReleaseNotes.rst
clang/docs/StandardCPlusPlusModules.rst

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 3752b6ce157600..efc32212f300cf 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -214,6 +214,9 @@ New Compiler Flags
   This diagnostic can be disabled to make ``-Wmissing-field-initializers`` 
behave
   like it did before Clang 18.x. Fixes #GH56628
 
+- ``-fexperimental-modules-reduced-bmi`` enables the Reduced BMI for C++20 
named modules.
+  See the document of standard C++ modules for details.
+
 Deprecated Compiler Flags
 -
 

diff  --git a/clang/docs/StandardCPlusPlusModules.rst 
b/clang/docs/StandardCPlusPlusModules.rst
index c5478bba45f389..8d5529d5d37db5 100644
--- a/clang/docs/StandardCPlusPlusModules.rst
+++ b/clang/docs/StandardCPlusPlusModules.rst
@@ -520,6 +520,112 @@ is attached to the global module fragments. For example:
 
 Now the linkage name of ``NS::foo()`` will be ``_ZN2NS3fooEv``.
 
+Reduced BMI
+---
+
+To support the 2 phase compilation model, Clang chose to put everything needed 
to
+produce an object into the BMI. But every consumer of the BMI, except itself, 
doesn't
+need such informations. It makes the BMI to larger and so may introduce 
unnecessary
+dependencies into the BMI. To mitigate the problem, we decided to reduce the 
information
+contained in the BMI.
+
+To be clear, we call the default BMI as Full BMI and the new introduced BMI as 
Reduced
+BMI.
+
+Users can use ``-fexperimental-modules-reduced-bmi`` flag to enable the 
Reduced BMI.
+
+For one phase compilation model (CMake implements this model), with
+``-fexperimental-modules-reduced-bmi``, the generated BMI will be Reduced BMI 
automatically.
+(The output path of the BMI is specified by ``-fmodule-output=`` as usual one 
phase
+compilation model).
+
+It is still possible to support Reduced BMI in two phase compilation model. 
With
+``-fexperimental-modules-reduced-bmi``, ``--precompile`` and 
``-fmodule-output=`` specified,
+the generated BMI specified by ``-o`` will be full BMI and the BMI specified by
+``-fmodule-output=`` will be Reduced BMI. The dependency graph may be:
+
+.. code-block:: none
+
+  module-unit.cppm --> module-unit.full.pcm -> module-unit.o
+|
+-> module-unit.reduced.pcm -> consumer1.cpp
+   -> consumer2.cpp
+   -> ...
+   -> consumer_n.cpp
+
+We don't emit diagnostics if ``-fexperimental-modules-reduced-bmi`` is used 
with a non-module
+unit. This design helps the end users of one phase compilation model to 
perform experiments
+early without asking for the help of build systems. The users of build systems 
which supports
+two phase compilation model still need helps from build systems.
+
+Within Reduced BMI, we won't write unreachable entities from GMF, definitions 
of non-inline
+functions and non-inline variables. This may not be a transparent change.
+`[module.global.frag]ex2 
`_ may be a good
+example:
+
+.. code-block:: c++
+
+  // foo.h
+  namespace N {
+struct X {};
+int d();
+int e();
+inline int f(X, int = d()) { return e(); }
+int g(X);
+int h(X);
+  }
+
+  // M.cppm
+  module;
+  #include "foo.h"
+  export module M;
+  template int use_f() {
+N::X x;   // N::X, N, and :: are decl-reachable from 
use_f
+return f(x, 123); // N::f is decl-reachable from use_f,
+  // N::e is indirectly decl-reachable from 
use_f
+  //   because it is decl-reachable from N::f, 
and
+  // N::d is decl-reachable from use_f
+  //   because it is decl-reachable from N::f
+  //   even though it is not used in this call
+  }
+  template int use_g() {
+N::X x;   // N::X, N, and :: are decl-reachable from 
use_g
+return g((T(), x));   // N::g is not decl-reachable from use_g
+  }
+  template int use_h() {
+N::X x;

[clang] d26dd58 - [StmtProfile] Don't profile the body of lambda expressions

2024-04-16 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-16T15:41:26+08:00
New Revision: d26dd58ca5b59032eb371b8f51d9134acdd8d3ad

URL: 
https://github.com/llvm/llvm-project/commit/d26dd58ca5b59032eb371b8f51d9134acdd8d3ad
DIFF: 
https://github.com/llvm/llvm-project/commit/d26dd58ca5b59032eb371b8f51d9134acdd8d3ad.diff

LOG: [StmtProfile] Don't profile the body of lambda expressions

Close https://github.com/llvm/llvm-project/issues/87609

We tried to profile the body of the lambda expressions in
https://reviews.llvm.org/D153957. But as the original comments show,
it is indeed dangerous. After we tried to skip calculating the ODR
hash values recently, we have fall into this trap twice.

So in this patch, I choose to not profile the body of the lambda
expression. The signature of the lambda is still profiled.

Added: 
clang/test/Modules/hashing-decls-in-exprs-from-gmf-2.cppm

Modified: 
clang/include/clang/AST/DeclBase.h
clang/include/clang/Serialization/ASTReader.h
clang/lib/AST/Decl.cpp
clang/lib/AST/DeclBase.cpp
clang/lib/AST/StmtProfile.cpp
clang/lib/Serialization/ASTReader.cpp
clang/lib/Serialization/ASTReaderDecl.cpp
clang/lib/Serialization/ASTWriter.cpp
clang/lib/Serialization/ASTWriterDecl.cpp

Removed: 




diff  --git a/clang/include/clang/AST/DeclBase.h 
b/clang/include/clang/AST/DeclBase.h
index 2194d268fa86f0..1079993f496945 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -672,16 +672,6 @@ class alignas(8) Decl {
   /// Whether this declaration comes from explicit global module.
   bool isFromExplicitGlobalModule() const;
 
-  /// Check if we should skip checking ODRHash for declaration \param D.
-  ///
-  /// The existing ODRHash mechanism seems to be not stable enough and
-  /// the false positive ODR violation reports are annoying and we rarely see
-  /// true ODR violation reports. Also we learned that MSVC disabled ODR checks
-  /// for declarations in GMF. So we try to disable ODR checks in the GMF to
-  /// get better user experiences before we make the ODR violation checks 
stable
-  /// enough.
-  bool shouldSkipCheckingODR() const;
-
   /// Return true if this declaration has an attribute which acts as
   /// definition of the entity, such as 'alias' or 'ifunc'.
   bool hasDefiningAttr() const;

diff  --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index e3fde887f99cb7..43ee06c524b3a0 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -2457,6 +2457,12 @@ class BitsUnpacker {
   uint32_t Value;
   uint32_t CurrentBitsIndex = ~0;
 };
+
+inline bool shouldSkipCheckingODR(const Decl *D) {
+  return D->getASTContext().getLangOpts().SkipODRCheckInGMF &&
+ D->isFromExplicitGlobalModule();
+}
+
 } // namespace clang
 
 #endif // LLVM_CLANG_SERIALIZATION_ASTREADER_H

diff  --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 2b2d5a2663a18b..33b6f8611f2162 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -4534,7 +4534,7 @@ unsigned FunctionDecl::getODRHash() {
   }
 
   class ODRHash Hash;
-  Hash.AddFunctionDecl(this, /*SkipBody=*/shouldSkipCheckingODR());
+  Hash.AddFunctionDecl(this);
   setHasODRHash(true);
   ODRHash = Hash.CalculateHash();
   return ODRHash;

diff  --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index 66a727d9dd0c39..434926324c96ca 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -1106,11 +1106,6 @@ bool Decl::isFromExplicitGlobalModule() const {
   return getOwningModule() && getOwningModule()->isExplicitGlobalModule();
 }
 
-bool Decl::shouldSkipCheckingODR() const {
-  return getASTContext().getLangOpts().SkipODRCheckInGMF &&
- isFromExplicitGlobalModule();
-}
-
 static Decl::Kind getKind(const Decl *D) { return D->getKind(); }
 static Decl::Kind getKind(const DeclContext *DC) { return DC->getDeclKind(); }
 

diff  --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index d2aac1e640380f..789e4634bd293b 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -2071,13 +2071,31 @@ StmtProfiler::VisitLambdaExpr(const LambdaExpr *S) {
   }
 
   CXXRecordDecl *Lambda = S->getLambdaClass();
-  ID.AddInteger(Lambda->getODRHash());
-
   for (const auto  : Lambda->captures()) {
 ID.AddInteger(Capture.getCaptureKind());
 if (Capture.capturesVariable())
   VisitDecl(Capture.getCapturedVar());
   }
+
+  // Profiling the body of the lambda may be dangerous during deserialization.
+  // So we'd like only to profile the signature here.
+  ODRHash Hasher;
+  // FIXME: We can't get the operator call easily by
+  // `CXXRecordDecl::getLambdaCallOperator()` if we're in deserialization.
+  // So we have to do something raw here.
+  for (auto *SubDecl : Lambda->decls()) {
+FunctionDecl 

[clang] 39016e3 - [C++20] [Modules] Don't import non-inline function bodies even if it is always-inline

2024-04-15 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-16T13:06:44+08:00
New Revision: 39016e33b0fe78ddb1f11822f71a8a233af4dca9

URL: 
https://github.com/llvm/llvm-project/commit/39016e33b0fe78ddb1f11822f71a8a233af4dca9
DIFF: 
https://github.com/llvm/llvm-project/commit/39016e33b0fe78ddb1f11822f71a8a233af4dca9.diff

LOG: [C++20] [Modules] Don't import non-inline function bodies even if it is 
always-inline

Recommit
https://github.com/llvm/llvm-project/commit/1ecbab56dcbb78268c8d19af34a50591f90b12a0

Close https://github.com/llvm/llvm-project/issues/80949

The new thing in this commit is to allow to import the function body
from instantiations if it is marked with always-inline. See the
discussion in https://github.com/llvm/llvm-project/issues/86893 for
details.

Added: 


Modified: 
clang/lib/CodeGen/CodeGenModule.cpp
clang/test/CodeGenCXX/module-funcs-from-imports.cppm

Removed: 




diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index e44749672d5827..0c447b20cef40d 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3952,9 +3952,20 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) {
   // behavior may break ABI compatibility of the current unit.
   if (const Module *M = F->getOwningModule();
   M && M->getTopLevelModule()->isNamedModule() &&
-  getContext().getCurrentNamedModule() != M->getTopLevelModule() &&
-  !F->hasAttr())
-return false;
+  getContext().getCurrentNamedModule() != M->getTopLevelModule()) {
+// There are practices to mark template member function as always-inline
+// and mark the template as extern explicit instantiation but not give
+// the definition for member function. So we have to emit the function
+// from explicitly instantiation with always-inline.
+//
+// See https://github.com/llvm/llvm-project/issues/86893 for details.
+//
+// TODO: Maybe it is better to give it a warning if we call a non-inline
+// function from other module units which is marked as always-inline.
+if (!F->isTemplateInstantiation() || !F->hasAttr()) {
+  return false;
+}
+  }
 
   if (F->hasAttr())
 return false;

diff  --git a/clang/test/CodeGenCXX/module-funcs-from-imports.cppm 
b/clang/test/CodeGenCXX/module-funcs-from-imports.cppm
index 33cdf437110a9e..a2a9122fc39130 100644
--- a/clang/test/CodeGenCXX/module-funcs-from-imports.cppm
+++ b/clang/test/CodeGenCXX/module-funcs-from-imports.cppm
@@ -23,6 +23,21 @@ int func_in_gmf_not_called() {
 return 44;
 }
 
+template 
+class A {
+public:
+__attribute__((always_inline))
+inline constexpr int getValue() {
+return 43;
+}
+
+inline constexpr int getValue2() {
+return 43;
+}
+};
+
+extern template class A;
+
 //--- M.cppm
 module;
 #include "foo.h"
@@ -47,17 +62,21 @@ int always_inline_func() {
 return 45;
 }
 
+export using ::A;
+
 //--- Use.cpp
 import M;
 int use() {
-return exported_func() + always_inline_func();
+A a;
+return exported_func() + always_inline_func() +
+   a.getValue() + a.getValue2();
 }
 
-// Checks that none of the function (except the always_inline_func) in the 
importees
-// are generated in the importer's code.
 // CHECK-O0: define{{.*}}_Z3usev(
 // CHECK-O0: declare{{.*}}_ZW1M13exported_funcv(
-// CHECK-O0: define{{.*}}available_externally{{.*}}_ZW1M18always_inline_funcv(
+// CHECK-O0: declare{{.*}}_ZW1M18always_inline_funcv(
+// CHECK-O0: define{{.*}}@_ZN1AIcE8getValueEv(
+// CHECK-O0: declare{{.*}}@_ZN1AIcE9getValue2Ev(
 // CHECK-O0-NOT: func_in_gmf
 // CHECK-O0-NOT: func_in_gmf_not_called
 // CHECK-O0-NOT: non_exported_func
@@ -68,7 +87,9 @@ int use() {
 // O0 to keep consistent ABI.
 // CHECK-O1: define{{.*}}_Z3usev(
 // CHECK-O1: declare{{.*}}_ZW1M13exported_funcv(
-// CHECK-O1: define{{.*}}available_externally{{.*}}_ZW1M18always_inline_funcv(
+// CHECK-O1: declare{{.*}}_ZW1M18always_inline_funcv(
+// CHECK-O1: define{{.*}}@_ZN1AIcE8getValueEv(
+// CHECK-O1: declare{{.*}}@_ZN1AIcE9getValue2Ev(
 // CHECK-O1-NOT: func_in_gmf
 // CHECK-O1-NOT: func_in_gmf_not_called
 // CHECK-O1-NOT: non_exported_func



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Drop unaligned from calls to readNext (NFC) (PR #88842)

2024-04-15 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 approved this pull request.


https://github.com/llvm/llvm-project/pull/88842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [unused-includes] PCHContainerOperations uses MemoryBufferRef, not MemoryBuffer. NFC. (PR #88794)

2024-04-15 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 approved this pull request.

LGTM. Thanks.

https://github.com/llvm/llvm-project/pull/88794
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [unused-includes][Serialization] Remove unused includes. NFC. (PR #88790)

2024-04-15 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 approved this pull request.

LGTM. Thanks.

https://github.com/llvm/llvm-project/pull/88790
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits


@@ -149,14 +157,44 @@ class SourceLocationSequence::State {
   operator SourceLocationSequence *() { return  }
 };
 
-inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
-   SourceLocationSequence *Seq) {
-  return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
+inline SourceLocationEncoding::RawLocEncoding
+SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
+   unsigned BaseModuleFileIndex,
+   SourceLocationSequence *Seq) {
+  // If the source location is a local source location, we can try to optimize
+  // the similar sequences to only record the differences.
+  if (!BaseOffset)
+return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
+
+  if (Loc.isInvalid())
+return 0;
+  
+  // Otherwise, the higher bits are used to store the module file index,
+  // so it is meaningless to optimize the source locations into small
+  // integers. Let's try to always use the raw encodings.
+  assert(Loc.getOffset() >= BaseOffset);
+  Loc = Loc.getLocWithOffset(-BaseOffset);
+  RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());
+  assert(Encoded < ((RawLocEncoding)1 << 32));

ChuanqiXu9 wrote:

Oh, the compiler may be too smart : )

My intention was like, if someday we turned 64 bit source location on, this 
assertion can make us remember  to update here.

But given the warning, I don't know how to write the assertion here. So I just 
removed it.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits


@@ -2220,33 +2221,40 @@ class ASTReader
 return Sema::AlignPackInfo::getFromRawEncoding(Raw);
   }
 
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Read a source location from raw form and return it in its
   /// originating module file's source location space.
-  SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
+  std::pair
+  ReadUntranslatedSourceLocation(RawLocEncoding Raw,
+ LocSeq *Seq = nullptr) const {
 return SourceLocationEncoding::decode(Raw, Seq);
   }
 
   /// Read a source location from raw form.
-  SourceLocation ReadSourceLocation(ModuleFile ,
-SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
-SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
-return TranslateSourceLocation(ModuleFile, Loc);
+  SourceLocation ReadSourceLocation(ModuleFile , RawLocEncoding Raw,
+   LocSeq *Seq = nullptr) const {
+if (!MF.ModuleOffsetMap.empty())
+  ReadModuleOffsetMap(MF);
+
+auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
+ModuleFile *OwningModuleFile =
+ModuleFileIndex ? MF.DependentModules[ModuleFileIndex - 1] : 

ChuanqiXu9 wrote:

Done.

I feel the idea to store `MF` itself at index `0` may be slightly tricky. 

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits


@@ -2220,33 +2221,40 @@ class ASTReader
 return Sema::AlignPackInfo::getFromRawEncoding(Raw);
   }
 
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Read a source location from raw form and return it in its
   /// originating module file's source location space.
-  SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
+  std::pair
+  ReadUntranslatedSourceLocation(RawLocEncoding Raw,
+ LocSeq *Seq = nullptr) const {
 return SourceLocationEncoding::decode(Raw, Seq);
   }
 
   /// Read a source location from raw form.
-  SourceLocation ReadSourceLocation(ModuleFile ,
-SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
-SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
-return TranslateSourceLocation(ModuleFile, Loc);
+  SourceLocation ReadSourceLocation(ModuleFile , RawLocEncoding Raw,
+   LocSeq *Seq = nullptr) const {
+if (!MF.ModuleOffsetMap.empty())
+  ReadModuleOffsetMap(MF);
+
+auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
+ModuleFile *OwningModuleFile =
+ModuleFileIndex ? MF.DependentModules[ModuleFileIndex - 1] : 
+return TranslateSourceLocation(*OwningModuleFile, Loc);
   }
 
   /// Translate a source location from another module file's source
   /// location space into ours.
   SourceLocation TranslateSourceLocation(ModuleFile ,
  SourceLocation Loc) const {
-if (!ModuleFile.ModuleOffsetMap.empty())
-  ReadModuleOffsetMap(ModuleFile);
-assert(ModuleFile.SLocRemap.find(Loc.getOffset()) !=
-   ModuleFile.SLocRemap.end() &&
-   "Cannot find offset to remap.");
-SourceLocation::IntTy Remap =
-ModuleFile.SLocRemap.find(Loc.getOffset())->second;
-return Loc.getLocWithOffset(Remap);
+if (Loc.isInvalid())
+  return Loc;
+
+// It implies that the Loc is already translated.
+if (SourceMgr.isLoadedSourceLocation(Loc))
+  return Loc;

ChuanqiXu9 wrote:

My thought was like "all the recorded source location are local to its module 
file", so it won't be translated. But after I review how we judge whether a 
source location is loaded in source manager, I find this may be dangerous in 
extreme cases.  We judge if a source location is loaded by comparing the offset 
with the current loaded offset. So it may be dangerous if we have too many 
loaded source location in the TU and the source location we read is too big, 
then we may misread it as loaded source location.

My intention for this code is to make `TranslateSourceLocation` to be symmetric 
with the original one. The original `TranslateSourceLocation` is re-enterable. 
We could call `TranslateSourceLocation`  on a translated source location. But 
we can't do this for the `TranslateSourceLocation`   in the current patch.

So I just removed the check and leave a FIXME comment. I'd like to fix this as 
a NFC patch after we land this.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 updated 
https://github.com/llvm/llvm-project/pull/86912

>From 8e07cbdd0f348484d532d7827e4b4a7888e70978 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu 
Date: Mon, 18 Mar 2024 08:36:55 +0800
Subject: [PATCH 1/3] [Modules] No transitive source location change

---
 clang/include/clang/Basic/SourceLocation.h|  1 +
 .../include/clang/Serialization/ASTBitCodes.h | 56 ++--
 clang/include/clang/Serialization/ASTReader.h | 54 +++-
 clang/include/clang/Serialization/ASTWriter.h |  4 +
 .../include/clang/Serialization/ModuleFile.h  |  4 -
 .../Serialization/SourceLocationEncoding.h| 88 +--
 clang/lib/Frontend/ASTUnit.cpp|  2 -
 clang/lib/Serialization/ASTReader.cpp | 84 +++---
 clang/lib/Serialization/ASTReaderDecl.cpp |  2 +-
 clang/lib/Serialization/ASTWriter.cpp | 41 +++--
 clang/lib/Serialization/ASTWriterDecl.cpp |  8 +-
 clang/lib/Serialization/ModuleFile.cpp|  1 -
 .../no-transitive-source-location-change.cppm | 69 +++
 clang/test/Modules/pr61067.cppm   | 25 --
 .../SourceLocationEncodingTest.cpp| 12 +--
 15 files changed, 275 insertions(+), 176 deletions(-)
 create mode 100644 clang/test/Modules/no-transitive-source-location-change.cppm

diff --git a/clang/include/clang/Basic/SourceLocation.h 
b/clang/include/clang/Basic/SourceLocation.h
index 00b1e0fa855b7a..7a0f5ba8d1270b 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -90,6 +90,7 @@ class SourceLocation {
   friend class ASTWriter;
   friend class SourceManager;
   friend struct llvm::FoldingSetTrait;
+  friend class SourceLocationEncoding;
 
 public:
   using UIntTy = uint32_t;
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index f762116fea956c..f94b32d762effc 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -22,6 +22,7 @@
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/OperatorKinds.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Serialization/SourceLocationEncoding.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Bitstream/BitCodes.h"
 #include 
@@ -175,45 +176,38 @@ const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1;
 
 /// Source range/offset of a preprocessed entity.
 struct PPEntityOffset {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location of beginning of range.
-  SourceLocation::UIntTy Begin;
+  RawLocEncoding Begin;
 
   /// Raw source location of end of range.
-  SourceLocation::UIntTy End;
+  RawLocEncoding End;
 
   /// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
   uint32_t BitOffset;
 
-  PPEntityOffset(SourceRange R, uint32_t BitOffset)
-  : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()),
-BitOffset(BitOffset) {}
-
-  SourceLocation getBegin() const {
-return SourceLocation::getFromRawEncoding(Begin);
-  }
+  PPEntityOffset(RawLocEncoding Begin, RawLocEncoding End, uint32_t BitOffset)
+  : Begin(Begin), End(End), BitOffset(BitOffset) {}
 
-  SourceLocation getEnd() const {
-return SourceLocation::getFromRawEncoding(End);
-  }
+  RawLocEncoding getBegin() const { return Begin; }
+  RawLocEncoding getEnd() const { return End; }
 };
 
 /// Source range of a skipped preprocessor region
 struct PPSkippedRange {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location of beginning of range.
-  SourceLocation::UIntTy Begin;
+  RawLocEncoding Begin;
   /// Raw source location of end of range.
-  SourceLocation::UIntTy End;
+  RawLocEncoding End;
 
-  PPSkippedRange(SourceRange R)
-  : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()) 
{
-  }
+  PPSkippedRange(RawLocEncoding Begin, RawLocEncoding End)
+  : Begin(Begin), End(End) {}
 
-  SourceLocation getBegin() const {
-return SourceLocation::getFromRawEncoding(Begin);
-  }
-  SourceLocation getEnd() const {
-return SourceLocation::getFromRawEncoding(End);
-  }
+  RawLocEncoding getBegin() const { return Begin; }
+  RawLocEncoding getEnd() const { return End; }
 };
 
 /// Offset in the AST file. Use splitted 64-bit integer into low/high
@@ -239,8 +233,10 @@ struct UnderalignedInt64 {
 
 /// Source location and bit offset of a declaration.
 struct DeclOffset {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location.
-  SourceLocation::UIntTy Loc = 0;
+  RawLocEncoding RawLoc = 0;
 
   /// Offset relative to the start of the DECLTYPES_BLOCK block. Keep
   /// structure alignment 32-bit and avoid padding gap because undefined
@@ -248,17 +244,15 @@ struct DeclOffset {
   UnderalignedInt64 BitOffset;
 
   DeclOffset() = default;
-  DeclOffset(SourceLocation Loc, uint64_t BitOffset,
- 

[clang] aa27414 - Revert "[C++20] [Modules] Don't import non-inline function bodies even if it is marked as always_inline"

2024-04-15 Thread Chuanqi Xu via cfe-commits

Author: Chuanqi Xu
Date: 2024-04-15T17:06:03+08:00
New Revision: aa2741449c3609b2ae244d8d3f3e14ad16de72e4

URL: 
https://github.com/llvm/llvm-project/commit/aa2741449c3609b2ae244d8d3f3e14ad16de72e4
DIFF: 
https://github.com/llvm/llvm-project/commit/aa2741449c3609b2ae244d8d3f3e14ad16de72e4.diff

LOG: Revert "[C++20] [Modules] Don't import non-inline function bodies even if 
it is marked as always_inline"

This reverts commit 1ecbab56dcbb78268c8d19af34a50591f90b12a0.

See the discussion in https://github.com/llvm/llvm-project/issues/86893.

The original commit receives too many complaints. Let's try to
workaround the issue to give better user experiences.

Added: 


Modified: 
clang/lib/CodeGen/CodeGenModule.cpp
clang/test/CodeGenCXX/module-funcs-from-imports.cppm

Removed: 




diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 73a9cb9d6e0424..e44749672d5827 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3952,7 +3952,8 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) {
   // behavior may break ABI compatibility of the current unit.
   if (const Module *M = F->getOwningModule();
   M && M->getTopLevelModule()->isNamedModule() &&
-  getContext().getCurrentNamedModule() != M->getTopLevelModule())
+  getContext().getCurrentNamedModule() != M->getTopLevelModule() &&
+  !F->hasAttr())
 return false;
 
   if (F->hasAttr())

diff  --git a/clang/test/CodeGenCXX/module-funcs-from-imports.cppm 
b/clang/test/CodeGenCXX/module-funcs-from-imports.cppm
index 8d04328eaf3fea..33cdf437110a9e 100644
--- a/clang/test/CodeGenCXX/module-funcs-from-imports.cppm
+++ b/clang/test/CodeGenCXX/module-funcs-from-imports.cppm
@@ -53,11 +53,11 @@ int use() {
 return exported_func() + always_inline_func();
 }
 
-// Checks that none of the function in the importees
+// Checks that none of the function (except the always_inline_func) in the 
importees
 // are generated in the importer's code.
 // CHECK-O0: define{{.*}}_Z3usev(
 // CHECK-O0: declare{{.*}}_ZW1M13exported_funcv(
-// CHECK-O0: declare{{.*}}_ZW1M18always_inline_funcv(
+// CHECK-O0: define{{.*}}available_externally{{.*}}_ZW1M18always_inline_funcv(
 // CHECK-O0-NOT: func_in_gmf
 // CHECK-O0-NOT: func_in_gmf_not_called
 // CHECK-O0-NOT: non_exported_func
@@ -68,7 +68,7 @@ int use() {
 // O0 to keep consistent ABI.
 // CHECK-O1: define{{.*}}_Z3usev(
 // CHECK-O1: declare{{.*}}_ZW1M13exported_funcv(
-// CHECK-O1: declare{{.*}}_ZW1M18always_inline_funcv(
+// CHECK-O1: define{{.*}}available_externally{{.*}}_ZW1M18always_inline_funcv(
 // CHECK-O1-NOT: func_in_gmf
 // CHECK-O1-NOT: func_in_gmf_not_called
 // CHECK-O1-NOT: non_exported_func



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits

ChuanqiXu9 wrote:

> I have a few minor comments on the patch. I want to do some additional perf 
> testing on module scanning perf because I'm a bit concerned about the cost of 
> `ASTWriter::getRawSourceLocationEncoding`.

What's the perf results about scanning?

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits


@@ -2220,40 +2227,47 @@ class ASTReader
 return Sema::AlignPackInfo::getFromRawEncoding(Raw);
   }
 
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Read a source location from raw form and return it in its
   /// originating module file's source location space.
-  SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
+  std::pair
+  ReadUntranslatedSourceLocation(RawLocEncoding Raw,
+ LocSeq *Seq = nullptr) const {
 return SourceLocationEncoding::decode(Raw, Seq);
   }
 
   /// Read a source location from raw form.
-  SourceLocation ReadSourceLocation(ModuleFile ,
-SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
-SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
-return TranslateSourceLocation(ModuleFile, Loc);
+  SourceLocation ReadRawSourceLocation(ModuleFile , RawLocEncoding Raw,
+   LocSeq *Seq = nullptr) const {
+if (!MF.ModuleOffsetMap.empty())
+  ReadModuleOffsetMap(MF);
+
+auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
+ModuleFile *ModuleFileHomingLoc =
+ModuleFileIndex ? ImportedModuleFiles[][ModuleFileIndex - 1] : 
+return TranslateSourceLocation(*ModuleFileHomingLoc, Loc);
   }
 
   /// Translate a source location from another module file's source
   /// location space into ours.
   SourceLocation TranslateSourceLocation(ModuleFile ,
  SourceLocation Loc) const {
-if (!ModuleFile.ModuleOffsetMap.empty())
-  ReadModuleOffsetMap(ModuleFile);
-assert(ModuleFile.SLocRemap.find(Loc.getOffset()) !=
-   ModuleFile.SLocRemap.end() &&
-   "Cannot find offset to remap.");
-SourceLocation::IntTy Remap =
-ModuleFile.SLocRemap.find(Loc.getOffset())->second;
-return Loc.getLocWithOffset(Remap);
+if (Loc.isInvalid())
+  return Loc;
+
+// It implies that the Loc is already translated.
+if (SourceMgr.isLoadedSourceLocation(Loc))
+  return Loc;
+
+return Loc.getLocWithOffset(ModuleFile.SLocEntryBaseOffset - 2);

ChuanqiXu9 wrote:

There are a lot `- 2` in Serialization and SourceManager. I just tried to 
follow that. Sadly I didn't find clear comments explaining this. By reading the 
codes, I think it is trying to skip 2 dummy SLocEntries (the invalid source 
location and invalid expansion location, see `SourceManager::clearIDTables()`) 
since they are not sensitive to the context.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits


@@ -2220,40 +2227,47 @@ class ASTReader
 return Sema::AlignPackInfo::getFromRawEncoding(Raw);
   }
 
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Read a source location from raw form and return it in its
   /// originating module file's source location space.
-  SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
+  std::pair
+  ReadUntranslatedSourceLocation(RawLocEncoding Raw,
+ LocSeq *Seq = nullptr) const {
 return SourceLocationEncoding::decode(Raw, Seq);
   }
 
   /// Read a source location from raw form.
-  SourceLocation ReadSourceLocation(ModuleFile ,
-SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
-SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
-return TranslateSourceLocation(ModuleFile, Loc);
+  SourceLocation ReadRawSourceLocation(ModuleFile , RawLocEncoding Raw,
+   LocSeq *Seq = nullptr) const {
+if (!MF.ModuleOffsetMap.empty())
+  ReadModuleOffsetMap(MF);
+
+auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
+ModuleFile *ModuleFileHomingLoc =
+ModuleFileIndex ? ImportedModuleFiles[][ModuleFileIndex - 1] : 

ChuanqiXu9 wrote:

We have a bounds check in the `operator[]` of SmallVector. So I guess it may be 
fine.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits


@@ -4078,8 +4065,8 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile ) const {
   return;

ChuanqiXu9 wrote:

Done.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits


@@ -2220,40 +2227,47 @@ class ASTReader
 return Sema::AlignPackInfo::getFromRawEncoding(Raw);
   }
 
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Read a source location from raw form and return it in its
   /// originating module file's source location space.
-  SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
+  std::pair
+  ReadUntranslatedSourceLocation(RawLocEncoding Raw,
+ LocSeq *Seq = nullptr) const {
 return SourceLocationEncoding::decode(Raw, Seq);
   }
 
   /// Read a source location from raw form.
-  SourceLocation ReadSourceLocation(ModuleFile ,
-SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
-SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
-return TranslateSourceLocation(ModuleFile, Loc);
+  SourceLocation ReadRawSourceLocation(ModuleFile , RawLocEncoding Raw,
+   LocSeq *Seq = nullptr) const {
+if (!MF.ModuleOffsetMap.empty())
+  ReadModuleOffsetMap(MF);
+
+auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
+ModuleFile *ModuleFileHomingLoc =

ChuanqiXu9 wrote:

Done.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits


@@ -149,14 +157,44 @@ class SourceLocationSequence::State {
   operator SourceLocationSequence *() { return  }
 };
 
-inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
-   SourceLocationSequence *Seq) {
-  return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
+inline SourceLocationEncoding::RawLocEncoding
+SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
+   unsigned BaseModuleFileIndex,
+   SourceLocationSequence *Seq) {
+  // If the source location is a local source location, we can try to optimize
+  // the similar sequences to only record the differences.
+  if (!BaseOffset)
+return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
+
+  if (Loc.isInvalid())
+return 0;
+  
+  // Otherwise, the higher bits are used to store the module file index,
+  // so it is meaningless to optimize the source locations into small
+  // integers. Let's try to always use the raw encodings.
+  assert(Loc.getOffset() >= BaseOffset);
+  Loc = Loc.getLocWithOffset(-BaseOffset);
+  RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());
+  assert(Encoded < ((RawLocEncoding)1 << 32));
+
+  assert(BaseModuleFileIndex < ((RawLocEncoding)1 << 32));
+  Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32;
+  return Encoded;
 }
-inline SourceLocation
-SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) {
-  return Seq ? Seq->decode(Encoded)
- : SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
+inline std::pair
+SourceLocationEncoding::decode(RawLocEncoding Encoded,
+   SourceLocationSequence *Seq) {
+  unsigned ModuleFileIndex = Encoded >> 32;
+
+  if (!ModuleFileIndex)
+return {Seq ? Seq->decode(Encoded)
+ : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
+ModuleFileIndex};
+
+  Encoded &= ((RawLocEncoding)1 << 33) - 1;

ChuanqiXu9 wrote:

Done.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits


@@ -149,14 +157,44 @@ class SourceLocationSequence::State {
   operator SourceLocationSequence *() { return  }
 };
 
-inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
-   SourceLocationSequence *Seq) {
-  return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
+inline SourceLocationEncoding::RawLocEncoding
+SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
+   unsigned BaseModuleFileIndex,
+   SourceLocationSequence *Seq) {
+  // If the source location is a local source location, we can try to optimize
+  // the similar sequences to only record the differences.
+  if (!BaseOffset)
+return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
+
+  if (Loc.isInvalid())
+return 0;
+  
+  // Otherwise, the higher bits are used to store the module file index,
+  // so it is meaningless to optimize the source locations into small
+  // integers. Let's try to always use the raw encodings.
+  assert(Loc.getOffset() >= BaseOffset);
+  Loc = Loc.getLocWithOffset(-BaseOffset);
+  RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());
+  assert(Encoded < ((RawLocEncoding)1 << 32));
+
+  assert(BaseModuleFileIndex < ((RawLocEncoding)1 << 32));
+  Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32;

ChuanqiXu9 wrote:

Now I assert it should be less than `(1<<16)`

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 edited 
https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits


@@ -696,7 +696,7 @@ class ASTReader
   /// Mapping from global submodule IDs to the module file in which the
   /// submodule resides along with the offset that should be added to the
   /// global submodule ID to produce a local ID.
-  GlobalSubmoduleMapType GlobalSubmoduleMap;
+  mutable GlobalSubmoduleMapType GlobalSubmoduleMap;

ChuanqiXu9 wrote:

Oh, I don't know why it remains. Maybe I added during the work progress.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits


@@ -2220,40 +2227,47 @@ class ASTReader
 return Sema::AlignPackInfo::getFromRawEncoding(Raw);
   }
 
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Read a source location from raw form and return it in its
   /// originating module file's source location space.
-  SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
+  std::pair
+  ReadUntranslatedSourceLocation(RawLocEncoding Raw,
+ LocSeq *Seq = nullptr) const {
 return SourceLocationEncoding::decode(Raw, Seq);
   }
 
   /// Read a source location from raw form.
-  SourceLocation ReadSourceLocation(ModuleFile ,
-SourceLocation::UIntTy Raw,
-LocSeq *Seq = nullptr) const {
-SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
-return TranslateSourceLocation(ModuleFile, Loc);
+  SourceLocation ReadRawSourceLocation(ModuleFile , RawLocEncoding Raw,

ChuanqiXu9 wrote:

Done.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits


@@ -5574,10 +5577,34 @@ void ASTWriter::AddFileID(FileID FID, RecordDataImpl 
) {
   Record.push_back(getAdjustedFileID(FID).getOpaqueValue());
 }
 
+SourceLocationEncoding::RawLocEncoding
+ASTWriter::getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq) {
+  unsigned BaseOffset = 0;
+  unsigned ModuleFileIndex = 0;
+
+  // See SourceLocationEncoding.h for the encoding details.
+  if (Context->getSourceManager().isLoadedSourceLocation(Loc) &&
+  Loc.isValid()) {
+assert(getChain());
+auto SLocMapI = getChain()->GlobalSLocOffsetMap.find(
+SourceManager::MaxLoadedOffset - Loc.getOffset() - 1);
+assert(SLocMapI != getChain()->GlobalSLocOffsetMap.end() &&
+   "Corrupted global sloc offset map");
+ModuleFile *F = SLocMapI->second;
+BaseOffset = F->SLocEntryBaseOffset - 2;
+// 0 means the location is not loaded. So we need to add 1 to the index to
+// make it clear.
+ModuleFileIndex = F->Index + 1;
+assert(()->getModuleManager()[F->Index] == F);

ChuanqiXu9 wrote:

Yes but I'd like to remain it. The assertions make me feel better.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits


@@ -942,6 +942,12 @@ class ASTReader
   /// Sema tracks these to emit deferred diags.
   llvm::SmallSetVector DeclsToCheckForDeferredDiags;
 
+  /// The module files imported by different module files. Indirectly imported
+  /// module files are included too. The information comes from
+  /// ReadModuleOffsetMap(ModuleFile&).
+  mutable llvm::DenseMap>
+  ImportedModuleFiles;

ChuanqiXu9 wrote:

Done in the new commit.

There was a similar member `Imports` in `ModuleFile`, which records the 
directly imported module files from the coding. But, yes, it is good to remove 
the map lookups and let's try to handle the ambiguous by commenting.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits


@@ -149,14 +157,44 @@ class SourceLocationSequence::State {
   operator SourceLocationSequence *() { return  }
 };
 
-inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
-   SourceLocationSequence *Seq) {
-  return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
+inline SourceLocationEncoding::RawLocEncoding
+SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
+   unsigned BaseModuleFileIndex,
+   SourceLocationSequence *Seq) {
+  // If the source location is a local source location, we can try to optimize
+  // the similar sequences to only record the differences.
+  if (!BaseOffset)
+return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
+
+  if (Loc.isInvalid())
+return 0;
+  
+  // Otherwise, the higher bits are used to store the module file index,
+  // so it is meaningless to optimize the source locations into small
+  // integers. Let's try to always use the raw encodings.
+  assert(Loc.getOffset() >= BaseOffset);
+  Loc = Loc.getLocWithOffset(-BaseOffset);
+  RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());
+  assert(Encoded < ((RawLocEncoding)1 << 32));

ChuanqiXu9 wrote:

I guess you may refer to the next line and that one is indeed comparing 
unsigned with `(1<<32)`, and I fixed that. This line should be fine since 
Encoded has type `RawLocEncoding `.

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Modules] No transitive source location change (PR #86912)

2024-04-15 Thread Chuanqi Xu via cfe-commits

https://github.com/ChuanqiXu9 updated 
https://github.com/llvm/llvm-project/pull/86912

>From 8e07cbdd0f348484d532d7827e4b4a7888e70978 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu 
Date: Mon, 18 Mar 2024 08:36:55 +0800
Subject: [PATCH 1/2] [Modules] No transitive source location change

---
 clang/include/clang/Basic/SourceLocation.h|  1 +
 .../include/clang/Serialization/ASTBitCodes.h | 56 ++--
 clang/include/clang/Serialization/ASTReader.h | 54 +++-
 clang/include/clang/Serialization/ASTWriter.h |  4 +
 .../include/clang/Serialization/ModuleFile.h  |  4 -
 .../Serialization/SourceLocationEncoding.h| 88 +--
 clang/lib/Frontend/ASTUnit.cpp|  2 -
 clang/lib/Serialization/ASTReader.cpp | 84 +++---
 clang/lib/Serialization/ASTReaderDecl.cpp |  2 +-
 clang/lib/Serialization/ASTWriter.cpp | 41 +++--
 clang/lib/Serialization/ASTWriterDecl.cpp |  8 +-
 clang/lib/Serialization/ModuleFile.cpp|  1 -
 .../no-transitive-source-location-change.cppm | 69 +++
 clang/test/Modules/pr61067.cppm   | 25 --
 .../SourceLocationEncodingTest.cpp| 12 +--
 15 files changed, 275 insertions(+), 176 deletions(-)
 create mode 100644 clang/test/Modules/no-transitive-source-location-change.cppm

diff --git a/clang/include/clang/Basic/SourceLocation.h 
b/clang/include/clang/Basic/SourceLocation.h
index 00b1e0fa855b7a..7a0f5ba8d1270b 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -90,6 +90,7 @@ class SourceLocation {
   friend class ASTWriter;
   friend class SourceManager;
   friend struct llvm::FoldingSetTrait;
+  friend class SourceLocationEncoding;
 
 public:
   using UIntTy = uint32_t;
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index f762116fea956c..f94b32d762effc 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -22,6 +22,7 @@
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/OperatorKinds.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Serialization/SourceLocationEncoding.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Bitstream/BitCodes.h"
 #include 
@@ -175,45 +176,38 @@ const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1;
 
 /// Source range/offset of a preprocessed entity.
 struct PPEntityOffset {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location of beginning of range.
-  SourceLocation::UIntTy Begin;
+  RawLocEncoding Begin;
 
   /// Raw source location of end of range.
-  SourceLocation::UIntTy End;
+  RawLocEncoding End;
 
   /// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
   uint32_t BitOffset;
 
-  PPEntityOffset(SourceRange R, uint32_t BitOffset)
-  : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()),
-BitOffset(BitOffset) {}
-
-  SourceLocation getBegin() const {
-return SourceLocation::getFromRawEncoding(Begin);
-  }
+  PPEntityOffset(RawLocEncoding Begin, RawLocEncoding End, uint32_t BitOffset)
+  : Begin(Begin), End(End), BitOffset(BitOffset) {}
 
-  SourceLocation getEnd() const {
-return SourceLocation::getFromRawEncoding(End);
-  }
+  RawLocEncoding getBegin() const { return Begin; }
+  RawLocEncoding getEnd() const { return End; }
 };
 
 /// Source range of a skipped preprocessor region
 struct PPSkippedRange {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location of beginning of range.
-  SourceLocation::UIntTy Begin;
+  RawLocEncoding Begin;
   /// Raw source location of end of range.
-  SourceLocation::UIntTy End;
+  RawLocEncoding End;
 
-  PPSkippedRange(SourceRange R)
-  : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()) 
{
-  }
+  PPSkippedRange(RawLocEncoding Begin, RawLocEncoding End)
+  : Begin(Begin), End(End) {}
 
-  SourceLocation getBegin() const {
-return SourceLocation::getFromRawEncoding(Begin);
-  }
-  SourceLocation getEnd() const {
-return SourceLocation::getFromRawEncoding(End);
-  }
+  RawLocEncoding getBegin() const { return Begin; }
+  RawLocEncoding getEnd() const { return End; }
 };
 
 /// Offset in the AST file. Use splitted 64-bit integer into low/high
@@ -239,8 +233,10 @@ struct UnderalignedInt64 {
 
 /// Source location and bit offset of a declaration.
 struct DeclOffset {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location.
-  SourceLocation::UIntTy Loc = 0;
+  RawLocEncoding RawLoc = 0;
 
   /// Offset relative to the start of the DECLTYPES_BLOCK block. Keep
   /// structure alignment 32-bit and avoid padding gap because undefined
@@ -248,17 +244,15 @@ struct DeclOffset {
   UnderalignedInt64 BitOffset;
 
   DeclOffset() = default;
-  DeclOffset(SourceLocation Loc, uint64_t BitOffset,
- 

<    1   2   3   4   5   6   7   8   9   10   >