[PATCH] D67249: [Modules][PCH] Hash input files content

2019-10-14 Thread Bruno Cardoso Lopes via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG2a1386c81de5: [Modules][PCH] Hash input files content 
(authored by bruno).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67249/new/

https://reviews.llvm.org/D67249

Files:
  clang/include/clang/Basic/DiagnosticSerializationKinds.td
  clang/include/clang/Driver/Options.td
  clang/include/clang/Lex/HeaderSearchOptions.h
  clang/include/clang/Serialization/ASTBitCodes.h
  clang/include/clang/Serialization/ASTReader.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Frontend/CompilerInstance.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/test/Modules/validate-file-content.m
  clang/test/PCH/validate-file-content.m

Index: clang/test/PCH/validate-file-content.m
===
--- /dev/null
+++ clang/test/PCH/validate-file-content.m
@@ -0,0 +1,29 @@
+// REQUIRES: shell
+//
+// Check driver works
+// RUN: %clang -x objective-c-header -fsyntax-only -fpch-validate-input-files-content %t/a.h -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s
+// CHECK-CC1: -fvalidate-ast-input-files-content
+//
+// PCH only: Test that a mtime mismatch without content change is fine
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: %clang_cc1 -emit-pch -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: %clang_cc1 -fsyntax-only -I %t -include-pch %t/a.pch %s -verify -fvalidate-ast-input-files-content
+//
+// PCH only: Test that a mtime mismatch with content change
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: %clang_cc1 -emit-pch -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: echo '// m.x' > %t/m.h
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: not %clang_cc1 -fsyntax-only -I %t -include-pch %t/a.pch %s -fvalidate-ast-input-files-content 2> %t/stderr
+// RUN: FileCheck %s < %t/stderr
+//
+// CHECK: file '[[M_H:.*[/\\]m\.h]]' has been modified since the precompiled header '[[A_PCH:.*/a\.pch]]' was built: content changed
+// CHECK: please rebuild precompiled header '[[A_PCH]]'
+// expected-no-diagnostics
Index: clang/test/Modules/validate-file-content.m
===
--- /dev/null
+++ clang/test/Modules/validate-file-content.m
@@ -0,0 +1,33 @@
+// REQUIRES: shell
+//
+// Check driver works
+// RUN: %clang -fmodules -fsyntax-only -fmodules-validate-input-files-content %s -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s
+// CHECK-CC1: -fvalidate-ast-input-files-content
+//
+// PCH+Modules: Test that a mtime mismatch without content change is fine
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: echo 'module m { header "m.h" }' > %t/module.modulemap
+// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t -include-pch %t/a.pch %s -verify -fvalidate-ast-input-files-content
+//
+// PCH+Modules: Test that a mtime mismatch with content change
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: echo 'module m { header "m.h" }' > %t/module.modulemap
+// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: echo '// m.x' > %t/m.h
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: not %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t -include-pch %t/a.pch %s -fvalidate-ast-input-files-content 2> %t/stderr
+// RUN: FileCheck %s < %t/stderr
+//
+// CHECK: file '[[M_H:.*[/\\]m\.h]]' has been modified since the precompiled header '[[A_PCH:.*/a\.pch]]' was built: content changed
+// CHECK: '[[M_H]]' required by '[[M_PCM:.*[/\\]m.*\.pcm]]'
+// CHECK: '[[M_PCM]]' required by '[[A_PCH]]'
+// CHECK: please rebuild precompiled header '[[A_PCH]]'
+// expected-no-diagnostics
Index: clang/lib/Serialization/ASTWriter.cpp
===
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -1099,6 +1099,7 @@
 
   BLOCK(INPUT_FILES_BLOCK);
   RECORD(INPUT_FILE);
+  RECORD(INPUT_FILE_HASH);
 
   // AST Top-Level Block.
   BLOCK(AST_BLOCK);
@@ -1764,6 +1765,7 @@
   bool IsTransient;
   bool BufferOverridden;
   

[PATCH] D67249: [Modules][PCH] Hash input files content

2019-10-01 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith accepted this revision.
rsmith added a comment.
This revision is now accepted and ready to land.

LGTM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67249/new/

https://reviews.llvm.org/D67249



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D67249: [Modules][PCH] Hash input files content

2019-10-01 Thread Bruno Cardoso Lopes via Phabricator via cfe-commits
bruno added a reviewer: v.g.vassilev.
bruno added a comment.

Ping!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67249/new/

https://reviews.llvm.org/D67249



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D67249: [Modules][PCH] Hash input files content

2019-09-13 Thread Juergen Ributzka via Phabricator via cfe-commits
ributzka added a comment.

xxhash64 is apparently faster than MD5 and SHA1, and produces good quality 
hashes. I am not sure about the hash quality of hash_code for this purpose.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67249/new/

https://reviews.llvm.org/D67249



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D67249: [Modules][PCH] Hash input files content

2019-09-13 Thread Bruno Cardoso Lopes via Phabricator via cfe-commits
bruno added a comment.



> Did you try xxHash64?

No, and I'm not planning to. I believe `hash_code` is enough here, already has 
low overhead on performance and size, and bunch of prior use elsewhere in 
LLVM/Clang. If there's a strong reason to do it I wanna know why that's the 
case first.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67249/new/

https://reviews.llvm.org/D67249



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D67249: [Modules][PCH] Hash input files content

2019-09-11 Thread Juergen Ributzka via Phabricator via cfe-commits
ributzka added a comment.

Did you try xxHash64?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67249/new/

https://reviews.llvm.org/D67249



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D67249: [Modules][PCH] Hash input files content

2019-09-10 Thread Bruno Cardoso Lopes via Phabricator via cfe-commits
bruno updated this revision to Diff 219607.
bruno added a comment.

Update the patch to use two ::Fixed, 32 in abbrev to encode hash.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67249/new/

https://reviews.llvm.org/D67249

Files:
  clang/include/clang/Basic/DiagnosticSerializationKinds.td
  clang/include/clang/Driver/Options.td
  clang/include/clang/Lex/HeaderSearchOptions.h
  clang/include/clang/Serialization/ASTBitCodes.h
  clang/include/clang/Serialization/ASTReader.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Frontend/CompilerInstance.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/test/Modules/validate-file-content.m
  clang/test/PCH/validate-file-content.m

Index: clang/test/PCH/validate-file-content.m
===
--- /dev/null
+++ clang/test/PCH/validate-file-content.m
@@ -0,0 +1,29 @@
+// REQUIRES: shell
+//
+// Check driver works
+// RUN: %clang -x objective-c-header -fsyntax-only -fpch-validate-input-files-content %t/a.h -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s
+// CHECK-CC1: -fvalidate-ast-input-files-content
+//
+// PCH only: Test that a mtime mismatch without content change is fine
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: %clang_cc1 -emit-pch -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: %clang_cc1 -fsyntax-only -I %t -include-pch %t/a.pch %s -verify -fvalidate-ast-input-files-content
+//
+// PCH only: Test that a mtime mismatch with content change
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: %clang_cc1 -emit-pch -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: echo '// m.x' > %t/m.h
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: not %clang_cc1 -fsyntax-only -I %t -include-pch %t/a.pch %s -fvalidate-ast-input-files-content 2> %t/stderr
+// RUN: FileCheck %s < %t/stderr
+//
+// CHECK: file '[[M_H:.*[/\\]m\.h]]' has been modified since the precompiled header '[[A_PCH:.*/a\.pch]]' was built: content changed
+// CHECK: please rebuild precompiled header '[[A_PCH]]'
+// expected-no-diagnostics
Index: clang/test/Modules/validate-file-content.m
===
--- /dev/null
+++ clang/test/Modules/validate-file-content.m
@@ -0,0 +1,33 @@
+// REQUIRES: shell
+//
+// Check driver works
+// RUN: %clang -fmodules -fsyntax-only -fmodules-validate-input-files-content %s -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s
+// CHECK-CC1: -fvalidate-ast-input-files-content
+//
+// PCH+Modules: Test that a mtime mismatch without content change is fine
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: echo 'module m { header "m.h" }' > %t/module.modulemap
+// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t -include-pch %t/a.pch %s -verify -fvalidate-ast-input-files-content
+//
+// PCH+Modules: Test that a mtime mismatch with content change
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: echo 'module m { header "m.h" }' > %t/module.modulemap
+// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: echo '// m.x' > %t/m.h
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: not %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t -include-pch %t/a.pch %s -fvalidate-ast-input-files-content 2> %t/stderr
+// RUN: FileCheck %s < %t/stderr
+//
+// CHECK: file '[[M_H:.*[/\\]m\.h]]' has been modified since the precompiled header '[[A_PCH:.*/a\.pch]]' was built: content changed
+// CHECK: '[[M_H]]' required by '[[M_PCM:.*[/\\]m.*\.pcm]]'
+// CHECK: '[[M_PCM]]' required by '[[A_PCH]]'
+// CHECK: please rebuild precompiled header '[[A_PCH]]'
+// expected-no-diagnostics
Index: clang/lib/Serialization/ASTWriter.cpp
===
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -1098,6 +1098,7 @@
 
   BLOCK(INPUT_FILES_BLOCK);
   RECORD(INPUT_FILE);
+  RECORD(INPUT_FILE_HASH);
 
   // AST Top-Level Block.
   BLOCK(AST_BLOCK);
@@ -1763,6 +1764,7 @@
   bool IsTransient;
   bool BufferOverridden;
   bool IsTopLevelModuleMap;
+  uint32_t 

[PATCH] D67249: [Modules][PCH] Hash input files content

2019-09-10 Thread Bruno Cardoso Lopes via Phabricator via cfe-commits
bruno marked an inline comment as done.
bruno added inline comments.



Comment at: clang/lib/Serialization/ASTWriter.cpp:1767
   bool IsTopLevelModuleMap;
+  uint32_t ContentHash[2];
 };

ributzka wrote:
> bruno wrote:
> > aprantl wrote:
> > > Why is this not a uint64_t?
> > Serializing a `uint64_t` directly instead of two `uint32_t` gives me a 
> > slightly bigger final cache. One could argue that the value is negligible 
> > (+800 bytes for a 40MB cache), but here's the rationale :)
> Creating an abbrev might fix this, because this should be a fixed size field. 
> The generic emission code for a record without an abbrev is not very space 
> efficient for single fields.
Right, I did more experiments, and although VBR::64 or Fixed::64 aren't 
supported, I could shave off 10KB by using abbrev + two ::Fixed, 32. Using two 
::VBR:32 is actually 400 bytes worse than the current approach. Thanks for the 
suggestion, gonna update the patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67249/new/

https://reviews.llvm.org/D67249



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D67249: [Modules][PCH] Hash input files content

2019-09-06 Thread Juergen Ributzka via Phabricator via cfe-commits
ributzka added inline comments.



Comment at: clang/lib/Serialization/ASTWriter.cpp:1767
   bool IsTopLevelModuleMap;
+  uint32_t ContentHash[2];
 };

bruno wrote:
> aprantl wrote:
> > Why is this not a uint64_t?
> Serializing a `uint64_t` directly instead of two `uint32_t` gives me a 
> slightly bigger final cache. One could argue that the value is negligible 
> (+800 bytes for a 40MB cache), but here's the rationale :)
Creating an abbrev might fix this, because this should be a fixed size field. 
The generic emission code for a record without an abbrev is not very space 
efficient for single fields.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67249/new/

https://reviews.llvm.org/D67249



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D67249: [Modules][PCH] Hash input files content

2019-09-06 Thread Bruno Cardoso Lopes via Phabricator via cfe-commits
bruno updated this revision to Diff 219168.
bruno added a comment.

Remove pasto from one of the testcases


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67249/new/

https://reviews.llvm.org/D67249

Files:
  clang/include/clang/Basic/DiagnosticSerializationKinds.td
  clang/include/clang/Driver/Options.td
  clang/include/clang/Lex/HeaderSearchOptions.h
  clang/include/clang/Serialization/ASTBitCodes.h
  clang/include/clang/Serialization/ASTReader.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Frontend/CompilerInstance.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/test/Modules/validate-file-content.m
  clang/test/PCH/validate-file-content.m

Index: clang/test/PCH/validate-file-content.m
===
--- /dev/null
+++ clang/test/PCH/validate-file-content.m
@@ -0,0 +1,29 @@
+// REQUIRES: shell
+//
+// Check driver works
+// RUN: %clang -x objective-c-header -fsyntax-only -fpch-validate-input-files-content %t/a.h -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s
+// CHECK-CC1: -fvalidate-ast-input-files-content
+//
+// PCH only: Test that a mtime mismatch without content change is fine
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: %clang_cc1 -emit-pch -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: %clang_cc1 -fsyntax-only -I %t -include-pch %t/a.pch %s -verify -fvalidate-ast-input-files-content
+//
+// PCH only: Test that a mtime mismatch with content change
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: %clang_cc1 -emit-pch -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: echo '// m.x' > %t/m.h
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: not %clang_cc1 -fsyntax-only -I %t -include-pch %t/a.pch %s -fvalidate-ast-input-files-content 2> %t/stderr
+// RUN: FileCheck %s < %t/stderr
+//
+// CHECK: file '[[M_H:.*[/\\]m\.h]]' has been modified since the precompiled header '[[A_PCH:.*/a\.pch]]' was built: content changed
+// CHECK: please rebuild precompiled header '[[A_PCH]]'
+// expected-no-diagnostics
Index: clang/test/Modules/validate-file-content.m
===
--- /dev/null
+++ clang/test/Modules/validate-file-content.m
@@ -0,0 +1,33 @@
+// REQUIRES: shell
+//
+// Check driver works
+// RUN: %clang -fmodules -fsyntax-only -fmodules-validate-input-files-content %s -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s
+// CHECK-CC1: -fvalidate-ast-input-files-content
+//
+// PCH+Modules: Test that a mtime mismatch without content change is fine
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: echo 'module m { header "m.h" }' > %t/module.modulemap
+// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t -include-pch %t/a.pch %s -verify -fvalidate-ast-input-files-content
+//
+// PCH+Modules: Test that a mtime mismatch with content change
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: echo 'module m { header "m.h" }' > %t/module.modulemap
+// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: echo '// m.x' > %t/m.h
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: not %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t -include-pch %t/a.pch %s -fvalidate-ast-input-files-content 2> %t/stderr
+// RUN: FileCheck %s < %t/stderr
+//
+// CHECK: file '[[M_H:.*[/\\]m\.h]]' has been modified since the precompiled header '[[A_PCH:.*/a\.pch]]' was built: content changed
+// CHECK: '[[M_H]]' required by '[[M_PCM:.*[/\\]m.*\.pcm]]'
+// CHECK: '[[M_PCM]]' required by '[[A_PCH]]'
+// CHECK: please rebuild precompiled header '[[A_PCH]]'
+// expected-no-diagnostics
Index: clang/lib/Serialization/ASTWriter.cpp
===
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -1098,6 +1098,7 @@
 
   BLOCK(INPUT_FILES_BLOCK);
   RECORD(INPUT_FILE);
+  RECORD(INPUT_FILE_HASH);
 
   // AST Top-Level Block.
   BLOCK(AST_BLOCK);
@@ -1763,6 +1764,7 @@
   bool IsTransient;
   bool BufferOverridden;
   bool IsTopLevelModuleMap;
+  uint32_t ContentHash[2];
 };
 
 } 

[PATCH] D67249: [Modules][PCH] Hash input files content

2019-09-06 Thread Bruno Cardoso Lopes via Phabricator via cfe-commits
bruno updated this revision to Diff 219166.
bruno added a comment.

Update testcase to use a more portable version of `touch`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67249/new/

https://reviews.llvm.org/D67249

Files:
  clang/include/clang/Basic/DiagnosticSerializationKinds.td
  clang/include/clang/Driver/Options.td
  clang/include/clang/Lex/HeaderSearchOptions.h
  clang/include/clang/Serialization/ASTBitCodes.h
  clang/include/clang/Serialization/ASTReader.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Frontend/CompilerInstance.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/test/Modules/validate-file-content.m
  clang/test/PCH/validate-file-content.m

Index: clang/test/PCH/validate-file-content.m
===
--- /dev/null
+++ clang/test/PCH/validate-file-content.m
@@ -0,0 +1,29 @@
+// REQUIRES: shell
+//
+// Check driver works
+// RUN: %clang -x objective-c-header -fsyntax-only -fpch-validate-input-files-content %t/a.h -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s
+// CHECK-CC1: -fvalidate-ast-input-files-content
+//
+// PCH only: Test that a mtime mismatch without content change is fine
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: %clang_cc1 -emit-pch -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: %clang_cc1 -fsyntax-only -I %t -include-pch %t/a.pch %s -verify -fvalidate-ast-input-files-content
+//
+// PCH only: Test that a mtime mismatch with content change
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: %clang_cc1 -emit-pch -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: echo '// m.x' > %t/m.h
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: not %clang_cc1 -fsyntax-only -I %t -include-pch %t/a.pch %s -fvalidate-ast-input-files-content 2> %t/stderr
+// RUN: FileCheck %s < %t/stderr
+//
+// CHECK: file '[[M_H:.*[/\\]m\.h]]' has been modified since the precompiled header '[[A_PCH:.*/a\.pch]]' was built: content changed
+// CHECK: please rebuild precompiled header '[[A_PCH]]'
+// expected-no-diagnostics
Index: clang/test/Modules/validate-file-content.m
===
--- /dev/null
+++ clang/test/Modules/validate-file-content.m
@@ -0,0 +1,33 @@
+// REQUIRES: shell
+//
+// Check driver works
+// RUN: %clang -fmodules -fsyntax-only -fmodules-validate-input-files-content %s -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s
+// CHECK-CC1: -fvalidate-ast-input-files-content
+//
+// PCH+Modules: Test that a mtime mismatch without content change is fine
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: echo 'module m { header "m.h" }' > %t/module.modulemap
+// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: touch -m -a -t 20290101 /tmp/x %t/m.h
+// RUN: %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t -include-pch %t/a.pch %s -verify -fvalidate-ast-input-files-content
+//
+// PCH+Modules: Test that a mtime mismatch with content change
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: echo 'module m { header "m.h" }' > %t/module.modulemap
+// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: echo '// m.x' > %t/m.h
+// RUN: touch -m -a -t 20290101 %t/m.h
+// RUN: not %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t -include-pch %t/a.pch %s -fvalidate-ast-input-files-content 2> %t/stderr
+// RUN: FileCheck %s < %t/stderr
+//
+// CHECK: file '[[M_H:.*[/\\]m\.h]]' has been modified since the precompiled header '[[A_PCH:.*/a\.pch]]' was built: content changed
+// CHECK: '[[M_H]]' required by '[[M_PCM:.*[/\\]m.*\.pcm]]'
+// CHECK: '[[M_PCM]]' required by '[[A_PCH]]'
+// CHECK: please rebuild precompiled header '[[A_PCH]]'
+// expected-no-diagnostics
Index: clang/lib/Serialization/ASTWriter.cpp
===
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -1098,6 +1098,7 @@
 
   BLOCK(INPUT_FILES_BLOCK);
   RECORD(INPUT_FILE);
+  RECORD(INPUT_FILE_HASH);
 
   // AST Top-Level Block.
   BLOCK(AST_BLOCK);
@@ -1763,6 +1764,7 @@
   bool IsTransient;
   bool BufferOverridden;
   bool IsTopLevelModuleMap;
+  uint32_t 

[PATCH] D67249: [Modules][PCH] Hash input files content

2019-09-06 Thread Bruno Cardoso Lopes via Phabricator via cfe-commits
bruno marked 2 inline comments as done.
bruno added a comment.



> Nice! Are you planning to address the FIXME's in a later update of this patch?

The FIXME's are just replaying what the code around does, both error dropping 
and `FileEntryRef` are recent changes that didn't make all the way through yet. 
I should help improving that at some point cause it will overall be good for 
modules, but those changes are orthogonal to this patch.




Comment at: clang/lib/Serialization/ASTWriter.cpp:1767
   bool IsTopLevelModuleMap;
+  uint32_t ContentHash[2];
 };

aprantl wrote:
> Why is this not a uint64_t?
Serializing a `uint64_t` directly instead of two `uint32_t` gives me a slightly 
bigger final cache. One could argue that the value is negligible (+800 bytes 
for a 40MB cache), but here's the rationale :)



Comment at: clang/test/Modules/validate-file-content.m:14
+// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules 
-fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h 
-fvalidate-ast-input-files-content
+// RUN: touch -m -a -A 01 %t/m.h
+// RUN: %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules 
-fimplicit-module-maps -I %t -include-pch %t/a.pch %s -verify 
-fvalidate-ast-input-files-content

aprantl wrote:
> Are there other tests that use touch and file timestamps in general? I wonder 
> if this could cause random issues when building on NFS or filesystems with a 
> really low timestamp granularity. I don't have a better suggestion either 
> since the mtime is part of the code being tested here.
> Are there other tests that use touch and file timestamps in general?

Yes, I found quite a few around the tests. However, I just tested `touch -m -a 
-A 01` on linux and it seems that `-A` isn't supported, maybe I'll just change 
the entire mtime for some time in the future, that's likely more portable.

> I wonder if this could cause random issues when building on NFS or 
> filesystems with a really low timestamp granularity. I don't have a better 
> suggestion either since the mtime is part of the code being tested here.

Right, perhaps changing the mtime completely might help here.





Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67249/new/

https://reviews.llvm.org/D67249



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D67249: [Modules][PCH] Hash input files content

2019-09-05 Thread Adrian Prantl via Phabricator via cfe-commits
aprantl added a comment.

Nice! Are you planning to address the FIXME's in a later update of this patch?




Comment at: clang/lib/Serialization/ASTWriter.cpp:1767
   bool IsTopLevelModuleMap;
+  uint32_t ContentHash[2];
 };

Why is this not a uint64_t?



Comment at: clang/test/Modules/validate-file-content.m:14
+// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules 
-fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h 
-fvalidate-ast-input-files-content
+// RUN: touch -m -a -A 01 %t/m.h
+// RUN: %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules 
-fimplicit-module-maps -I %t -include-pch %t/a.pch %s -verify 
-fvalidate-ast-input-files-content

Are there other tests that use touch and file timestamps in general? I wonder 
if this could cause random issues when building on NFS or filesystems with a 
really low timestamp granularity. I don't have a better suggestion either since 
the mtime is part of the code being tested here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67249/new/

https://reviews.llvm.org/D67249



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D67249: [Modules][PCH] Hash input files content

2019-09-05 Thread Bruno Cardoso Lopes via Phabricator via cfe-commits
bruno created this revision.
bruno added reviewers: dexonsmith, arphaman, rsmith, aprantl.
Herald added subscribers: cfe-commits, jkorous.
Herald added a project: clang.

When files often get touched during builds, the mtime based validation
leads to different problems in implicit modules builds, even when the
content doesn't actually change:

- Modules only: module invalidation due to out of date files. Usually

causing rebuild traffic.

- Modules + PCH: build failures because clang cannot rebuild a module if

it comes from building a PCH.

- PCH: build failures because clang cannot rebuild a PCH in case one of

the input headers has different mtime.

This patch proposes hashing the content of input files (headers and
module maps), which is performed during serialization time. When looking
at input files for validation, clang only computes the hash in case
there's a mtime mismatch.

I've tested a couple of different hash algorithms availble in LLVM in
face of building modules+pch for `#import `:

- `hash_code`: performace diff within the noise, total module cache

increased by 0.07%.

- `SHA1`: 5% slowdown. Haven't done real size measurements, but it'd be

BLOCK_ID+20 bytes per input file, instead of BLOCK_ID+8 bytes from
`hash_code`.

- `MD5`: 3% slowdown. Like above, but BLOCK_ID+16 bytes per input file.

Given the numbers above, the patch uses `hash_code`. The patch also
improves invalidation error msgs to point out which type of problem the
user is facing: "mtime", "size" or "content".

rdar://problem/29320105


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D67249

Files:
  clang/include/clang/Basic/DiagnosticSerializationKinds.td
  clang/include/clang/Driver/Options.td
  clang/include/clang/Lex/HeaderSearchOptions.h
  clang/include/clang/Serialization/ASTBitCodes.h
  clang/include/clang/Serialization/ASTReader.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Frontend/CompilerInstance.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/test/Modules/validate-file-content.m
  clang/test/PCH/validate-file-content.m

Index: clang/test/PCH/validate-file-content.m
===
--- /dev/null
+++ clang/test/PCH/validate-file-content.m
@@ -0,0 +1,29 @@
+// REQUIRES: shell
+//
+// Check driver works
+// RUN: %clang -x objective-c-header -fsyntax-only -fpch-validate-input-files-content %t/a.h -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s
+// CHECK-CC1: -fvalidate-ast-input-files-content
+//
+// PCH only: Test that a mtime mismatch without content change is fine
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: %clang_cc1 -emit-pch -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: touch -m -a -A 01 %t/m.h
+// RUN: %clang_cc1 -fsyntax-only -I %t -include-pch %t/a.pch %s -verify -fvalidate-ast-input-files-content
+//
+// PCH only: Test that a mtime mismatch with content change
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: %clang_cc1 -emit-pch -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: echo '// m.x' > %t/m.h
+// RUN: touch -m -a -A 01 %t/m.h
+// RUN: not %clang_cc1 -fsyntax-only -I %t -include-pch %t/a.pch %s -fvalidate-ast-input-files-content 2> %t/stderr
+// RUN: FileCheck %s < %t/stderr
+//
+// CHECK: file '[[M_H:.*[/\\]m\.h]]' has been modified since the precompiled header '[[A_PCH:.*/a\.pch]]' was built: content changed
+// CHECK: please rebuild precompiled header '[[A_PCH]]'
+// expected-no-diagnostics
Index: clang/test/Modules/validate-file-content.m
===
--- /dev/null
+++ clang/test/Modules/validate-file-content.m
@@ -0,0 +1,33 @@
+// REQUIRES: shell
+//
+// Check driver works
+// RUN: %clang -fmodules -fsyntax-only -fmodules-validate-input-files-content %s -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s
+// CHECK-CC1: -fvalidate-ast-input-files-content
+//
+// PCH+Modules: Test that a mtime mismatch without content change is fine
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: echo 'module m { header "m.h" }' > %t/module.modulemap
+// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: touch -m -a -A 01 %t/m.h
+// RUN: %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t -include-pch %t/a.pch %s -verify -fvalidate-ast-input-files-content
+//
+// PCH+Modules: Test that a mtime mismatch with content change
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: