benlangmuir updated this revision to Diff 448370.
benlangmuir added a comment.

- Added comments to tests
- Added a missing test that I accidentally deleted
- Rebased on latest main


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D129884/new/

https://reviews.llvm.org/D129884

Files:
  clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
  clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
  clang/test/ClangScanDeps/modules-context-hash-ignore-macros.c
  clang/test/ClangScanDeps/modules-context-hash-module-map-path.c
  clang/test/ClangScanDeps/modules-context-hash-outputs.c
  clang/test/ClangScanDeps/modules-context-hash-warnings.c

Index: clang/test/ClangScanDeps/modules-context-hash-warnings.c
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/modules-context-hash-warnings.c
@@ -0,0 +1,77 @@
+// Differences in -W warning options should result in different canonical module
+// build commands.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 \
+// RUN:   -format experimental-full > %t/deps.json
+// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
+
+// CHECK:      {
+// CHECK-NEXT:   "modules": [
+// CHECK:          {
+// CHECK:            "command-line": [
+// CHECK:              "-Wall"
+// CHECK:            ]
+// CHECK:            "context-hash": "[[HASH1:.*]]"
+// CHECK:            "name": "Mod"
+// CHECK-NEXT:     }
+// CHECK-NEXT:     {
+// CHECK:            "command-line": [
+// CHECK-NOT:          "-Wall"
+// CHECK:            ]
+// CHECK:            "context-hash": "[[HASH2:.*]]"
+// CHECK:            "name": "Mod"
+// CHECK-NEXT:     }
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   "translation-units": [
+// CHECK-NEXT:     {
+// CHECK:            "clang-module-deps": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:           "context-hash": "[[HASH1]]"
+// CHECK-NEXT:            "module-name": "Mod"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       "command-line": [
+// CHECK:              "-Wall"
+// CHECK:            ]
+// CHECK:            "input-file": "{{.*}}tu1.c"
+// CHECK-NEXT:     }
+// CHECK-NEXT:     {
+// CHECK:            "clang-module-deps": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:           "context-hash": "[[HASH2]]"
+// CHECK-NEXT:            "module-name": "Mod"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       "command-line": [
+// CHECK-NOT:          "-Wall"
+// CHECK:            ]
+// CHECK:            "input-file": "{{.*}}tu2.c"
+
+//--- cdb.json.template
+[
+  {
+    "directory": "DIR",
+    "command": "clang -Wall -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+    "file": "DIR/tu1.c"
+  },
+  {
+    "directory": "DIR",
+    "command": "clang -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+    "file": "DIR/tu2.c"
+  },
+]
+
+//--- module.modulemap
+module Mod { header "Mod.h" }
+
+//--- Mod.h
+
+//--- tu1.c
+#include "Mod.h"
+
+//--- tu2.c
+#include "Mod.h"
Index: clang/test/ClangScanDeps/modules-context-hash-outputs.c
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/modules-context-hash-outputs.c
@@ -0,0 +1,77 @@
+// If secondary output files such as .d are enabled, ensure it affects the
+// module context hash since it may impact the resulting module build commands.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 -generate-modules-path-args \
+// RUN:   -format experimental-full > %t/deps.json
+// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
+
+// CHECK:      {
+// CHECK-NEXT:   "modules": [
+// CHECK:          {
+// CHECK:            "command-line": [
+// CHECK:              "-dependency-file"
+// CHECK:            ]
+// CHECK:            "context-hash": "[[HASH1:.*]]"
+// CHECK:            "name": "Mod"
+// CHECK-NEXT:     }
+// CHECK-NEXT:     {
+// CHECK:            "command-line": [
+// CHECK-NOT:          "-dependency-file"
+// CHECK:            ]
+// CHECK:            "context-hash": "[[HASH2:.*]]"
+// CHECK:            "name": "Mod"
+// CHECK-NEXT:     }
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   "translation-units": [
+// CHECK-NEXT:     {
+// CHECK:            "clang-module-deps": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:           "context-hash": "[[HASH1]]"
+// CHECK-NEXT:            "module-name": "Mod"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       "command-line": [
+// CHECK:              "-MF"
+// CHECK:            ]
+// CHECK:            "input-file": "{{.*}}tu1.c"
+// CHECK-NEXT:     }
+// CHECK-NEXT:     {
+// CHECK:            "clang-module-deps": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:           "context-hash": "[[HASH2]]"
+// CHECK-NEXT:            "module-name": "Mod"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       "command-line": [
+// CHECK-NOT:          "-MF"
+// CHECK:            ]
+// CHECK:            "input-file": "{{.*}}tu2.c"
+
+//--- cdb.json.template
+[
+  {
+    "directory": "DIR",
+    "command": "clang -MD -MF DIR/tu1.d -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+    "file": "DIR/tu1.c"
+  },
+  {
+    "directory": "DIR",
+    "command": "clang -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+    "file": "DIR/tu2.c"
+  },
+]
+
+//--- module.modulemap
+module Mod { header "Mod.h" }
+
+//--- Mod.h
+
+//--- tu1.c
+#include "Mod.h"
+
+//--- tu2.c
+#include "Mod.h"
Index: clang/test/ClangScanDeps/modules-context-hash-module-map-path.c
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/modules-context-hash-module-map-path.c
@@ -0,0 +1,77 @@
+// Ensure the path to the modulemap input is included in the module context hash
+// irrespective of other TU command-line arguments, as it effects the canonical
+// module build command. In this test we use the difference in spelling between
+// module.modulemap and module.map, but it also applies to situations such as
+// differences in case-insensitive paths if they are not canonicalized away.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 -generate-modules-path-args \
+// RUN:   -format experimental-full > %t/deps.json
+
+// RUN: mv %t/module.modulemap %t/module.map
+// RUN: echo 'AFTER_MOVE' >> %t/deps.json
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 -generate-modules-path-args \
+// RUN:   -format experimental-full >> %t/deps.json
+
+// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
+
+// CHECK:      {
+// CHECK-NEXT:   "modules": [
+// CHECK:          {
+// CHECK:            "command-line": [
+// CHECK:              "{{.*}}module.modulemap"
+// CHECK:            ]
+// CHECK:            "context-hash": "[[HASH1:.*]]"
+// CHECK:            "name": "Mod"
+// CHECK-NEXT:     }
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   "translation-units": [
+// CHECK-NEXT:     {
+// CHECK:            "clang-module-deps": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:           "context-hash": "[[HASH1]]"
+// CHECK-NEXT:            "module-name": "Mod"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ]
+// CHECK-LABEL: AFTER_MOVE
+// CHECK:      {
+// CHECK-NEXT:   "modules": [
+// CHECK:          {
+// CHECK-NOT: [[HASH1]]
+// CHECK:            "command-line": [
+// CHECK:              "{{.*}}module.map"
+// CHECK:            ]
+// CHECK-NOT: [[HASH1]]
+// CHECK:            "name": "Mod"
+// CHECK-NEXT:     }
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   "translation-units": [
+// CHECK-NEXT:     {
+// CHECK:            "clang-module-deps": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:           "context-hash":
+// CHECK-NOT: [[HASH1]]
+// CHECK-NEXT:            "module-name": "Mod"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ]
+
+//--- cdb.json.template
+[
+  {
+    "directory": "DIR",
+    "command": "clang -fsyntax-only DIR/tu.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+    "file": "DIR/tu.c"
+  }
+]
+
+//--- module.modulemap
+module Mod { header "Mod.h" }
+
+//--- Mod.h
+
+//--- tu.c
+#include "Mod.h"
Index: clang/test/ClangScanDeps/modules-context-hash-ignore-macros.c
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/modules-context-hash-ignore-macros.c
@@ -0,0 +1,100 @@
+// Ensure '-DFOO -fmodules-ignore-macro=FOO' and '' both produce the same
+// canonical module build command.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 \
+// RUN:   -format experimental-full > %t/deps.json
+// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
+
+// CHECK:      {
+// CHECK-NEXT:   "modules": [
+// CHECK:          {
+// CHECK:            "command-line": [
+// CHECK-NOT:          "FOO"
+// CHECK-NOT:          "-fmodules-ignore-macro
+// CHECK:            ]
+// CHECK:            "context-hash": "[[HASH_NO_FOO:.*]]"
+// CHECK:            "name": "Mod"
+// CHECK-NEXT:     }
+// CHECK-NEXT:     {
+// CHECK:            "command-line": [
+// CHECK:              "-D"
+// CHECK-NEXT:         "FOO"
+// CHECK:            ]
+// CHECK:            "context-hash": "[[HASH_FOO:.*]]"
+// CHECK:            "name": "Mod"
+// CHECK-NEXT:     }
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   "translation-units": [
+// CHECK-NEXT:     {
+// CHECK:            "clang-module-deps": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:           "context-hash": "[[HASH_NO_FOO]]"
+// CHECK-NEXT:            "module-name": "Mod"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       "command-line": [
+// CHECK-NOT:          "-DFOO"
+// CHECK:            ]
+// CHECK:            "input-file": "{{.*}}tu1.c"
+// CHECK-NEXT:     }
+// CHECK-NEXT:     {
+// CHECK:            "clang-module-deps": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:           "context-hash": "[[HASH_FOO]]"
+// CHECK-NEXT:            "module-name": "Mod"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       "command-line": [
+// CHECK:              "-DFOO"
+// CHECK:            ]
+// CHECK:            "input-file": "{{.*}}tu2.c"
+// CHECK-NEXT:     }
+// CHECK-NEXT:     {
+// CHECK:            "clang-module-deps": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:           "context-hash": "[[HASH_NO_FOO]]"
+// CHECK-NEXT:            "module-name": "Mod"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       "command-line": [
+// CHECK:              "-DFOO"
+// CHECK:              "-fmodules-ignore-macro=FOO"
+// CHECK:            ]
+// CHECK:            "input-file": "{{.*}}tu3.c"
+
+//--- cdb.json.template
+[
+  {
+    "directory": "DIR",
+    "command": "clang -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+    "file": "DIR/tu1.c"
+  },
+  {
+    "directory": "DIR",
+    "command": "clang -DFOO -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+    "file": "DIR/tu2.c"
+  },
+  {
+    "directory": "DIR",
+    "command": "clang -DFOO -fmodules-ignore-macro=FOO -fsyntax-only DIR/tu3.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+    "file": "DIR/tu3.c"
+  },
+]
+
+//--- module.modulemap
+module Mod { header "Mod.h" }
+
+//--- Mod.h
+
+//--- tu1.c
+#include "Mod.h"
+
+//--- tu2.c
+#include "Mod.h"
+
+//--- tu3.c
+#include "Mod.h"
Index: clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
===================================================================
--- clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -12,6 +12,7 @@
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
+#include "llvm/Support/BLAKE3.h"
 #include "llvm/Support/StringSaver.h"
 
 using namespace clang;
@@ -77,6 +78,19 @@
   CI.getHeaderSearchOpts().ModuleCachePruneInterval = 7 * 24 * 60 * 60;
   CI.getHeaderSearchOpts().ModuleCachePruneAfter = 31 * 24 * 60 * 60;
 
+  // Remove any macro definitions that are explicitly ignored.
+  if (!CI.getHeaderSearchOpts().ModulesIgnoreMacros.empty()) {
+    llvm::erase_if(
+        CI.getPreprocessorOpts().Macros,
+        [&CI](const std::pair<std::string, bool> &Def) {
+          StringRef MacroDef = Def.first;
+          return CI.getHeaderSearchOpts().ModulesIgnoreMacros.contains(
+              llvm::CachedHashString(MacroDef.split('=').first));
+        });
+    // Remove the now unused option.
+    CI.getHeaderSearchOpts().ModulesIgnoreMacros.clear();
+  }
+
   // Report the prebuilt modules this module uses.
   for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
     CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
@@ -156,6 +170,50 @@
   return serializeCompilerInvocation(CI);
 }
 
+static std::string getModuleContextHash(const ModuleDeps &MD) {
+  llvm::HashBuilder<llvm::TruncatedBLAKE3<16>,
+                    llvm::support::endianness::native>
+      HashBuilder;
+  SmallString<32> Scratch;
+
+  // Hash the compiler version and serialization version to ensure the module
+  // will be readable.
+  HashBuilder.add(getClangFullRepositoryVersion());
+  HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
+
+  // Hash the BuildInvocation without any input files.
+  SmallVector<const char *, 32> DummyArgs;
+  MD.BuildInvocation.generateCC1CommandLine(DummyArgs, [&](const Twine &Arg) {
+    Scratch.clear();
+    StringRef Str = Arg.toStringRef(Scratch);
+    HashBuilder.add(Str);
+    return "<unused>";
+  });
+
+  // Hash the input file paths and module dependencies. These paths may differ
+  // even if the invocation is identical if they depend on the contents of the
+  // files in the TU -- for example, case-insensitive paths to modulemap files.
+  // Usually such a case would indicate a missed optimization to canonicalize,
+  // but it may be difficult to canonicalize all cases when there is a VFS.
+  HashBuilder.add(MD.ClangModuleMapFile);
+  for (const auto &Dep : MD.PrebuiltModuleDeps)
+    HashBuilder.add(Dep.PCMFile);
+  for (const auto &ID : MD.ClangModuleDeps) {
+    HashBuilder.add(ID.ModuleName);
+    HashBuilder.add(ID.ContextHash);
+  }
+
+  // Hash options that affect which callbacks are made for outputs.
+  HashBuilder.add(MD.HadDependencyFile);
+  HashBuilder.add(MD.HadSerializedDiagnostics);
+
+  llvm::BLAKE3Result<16> Hash = HashBuilder.final();
+  std::array<uint64_t, 2> Words;
+  static_assert(sizeof(Hash) == sizeof(Words), "Hash must match Words");
+  std::memcpy(Words.data(), Hash.data(), sizeof(Hash));
+  return toString(llvm::APInt(sizeof(Words) * 8, Words), 36, /*Signed=*/false);
+}
+
 std::vector<std::string>
 ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const {
   return serializeCompilerInvocation(BuildInvocation);
@@ -346,13 +404,12 @@
                                      .DiagnosticSerializationFile.empty();
   MD.HadDependencyFile =
       !MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty();
-  // FIXME: HadSerializedDiagnostics and HadDependencyFile should be included in
-  // the context hash since it can affect the command-line.
-  MD.ID.ContextHash = MD.BuildInvocation.getModuleHash();
 
   llvm::DenseSet<const Module *> AddedModules;
   addAllSubmoduleDeps(M, MD, AddedModules);
 
+  // Do this last since it requires the dependencies.
+  MD.ID.ContextHash = getModuleContextHash(MD);
   return MD.ID;
 }
 
Index: clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
===================================================================
--- clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
+++ clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
@@ -46,9 +46,11 @@
   /// or a header-name for C++20 header units.
   std::string ModuleName;
 
-  /// The context hash of a module represents the set of compiler options that
-  /// may make one version of a module incompatible with another. This includes
-  /// things like language mode, predefined macros, header search paths, etc...
+  /// The context hash of a module represents the compiler options that affect
+  /// the resulting command-line invocation.
+  ///
+  /// Modules with the same name and ContextHash but different invocations could
+  /// cause non-deterministic build results.
   ///
   /// Modules with the same name but a different \c ContextHash should be
   /// treated as separate modules for the purpose of a build.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to