stettberger updated this revision to Diff 135598.
stettberger added a comment.

[CHash] Stable TU-level AST Hashing for local and global hashes

This patch does integrate the AST hashing parts of the CHash project[1][2] into
CLang. The extension provided by this patch accomplishes 2 things:

- local hashes: It extends StmtDataCollector infrastructure by hash rules for 
declarations, types, and attributes. These rules are expressed in terms of 
TableGen files and capture what parts of a AST node are important to its 
semantic. Thereby, the rules only include information that is local to the 
respecting node (no pointers to other AST nodes should be followed).

  Since these DataCollectors do not impose rules how the AST nodes relate to 
each other, they provide a mechanism for "local" hashes.

- global hashes: The CHashVisitor combines the local hashes via the 
RecursiveAST Visitor into a global semantic hash that includes (for a given AST 
node) all local hashes of all AST nodes that can influence the compilation of 
this AST node.

  With these global hashes, we can deduce (early) in the compilation process 
wether the result of this compiler run will produce the same object file. This 
mechanism is *NOT* part of this commit.

[1] https://www.sra.uni-hannover.de/Research/cHash/
[2] "cHash: Detection of Redunandt Recompilations via AST hashing", USENIX 2017


Repository:
  rC Clang

https://reviews.llvm.org/D40731

Files:
  include/clang/AST/AttrDataCollectors.td
  include/clang/AST/CHashVisitor.h
  include/clang/AST/CMakeLists.txt
  include/clang/AST/DataCollection.h
  include/clang/AST/DeclDataCollectors.td
  include/clang/AST/StmtDataCollectors.td
  include/clang/AST/TypeDataCollectors.td
  unittests/AST/CHashTest.cpp
  unittests/AST/CMakeLists.txt

Index: unittests/AST/CMakeLists.txt
===================================================================
--- unittests/AST/CMakeLists.txt
+++ unittests/AST/CMakeLists.txt
@@ -17,6 +17,7 @@
   NamedDeclPrinterTest.cpp
   SourceLocationTest.cpp
   StmtPrinterTest.cpp
+  CHashTest.cpp
   )
 
 target_link_libraries(ASTTests
Index: unittests/AST/CHashTest.cpp
===================================================================
--- /dev/null
+++ unittests/AST/CHashTest.cpp
@@ -0,0 +1,91 @@
+//===- unittests/AST/DataCollectionTest.cpp -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains tests for the DataCollection module.
+//
+// They work by hashing the collected data of two nodes and asserting that the
+// hash values are equal iff the nodes are considered equal.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/CHashVisitor.h"
+#include "clang/Tooling/Tooling.h"
+#include "gtest/gtest.h"
+#include <memory>
+
+using namespace clang;
+using namespace tooling;
+
+
+class CHashConsumer : public ASTConsumer {
+    CompilerInstance &CI;
+    llvm::MD5::MD5Result *ASTHash;
+
+public:
+
+    CHashConsumer(CompilerInstance &CI, llvm::MD5::MD5Result *ASTHash)
+        : CI(CI), ASTHash(ASTHash){}
+
+    virtual void HandleTranslationUnit(clang::ASTContext &Context) override {
+        TranslationUnitDecl *TU = Context.getTranslationUnitDecl();
+
+        // Traversing the translation unit decl via a RecursiveASTVisitor
+        // will visit all nodes in the AST.
+        CHashVisitor<> Visitor(Context);
+        Visitor.TraverseDecl(TU);
+        // Copy Away the resulting hash
+        *ASTHash = *Visitor.getHash(TU);
+
+    }
+
+    ~CHashConsumer() override {}
+};
+
+struct CHashAction : public ASTFrontendAction {
+    llvm::MD5::MD5Result *Hash;
+
+    CHashAction(llvm::MD5::MD5Result *Hash) : Hash(Hash) {}
+
+    std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
+                                                   StringRef) override {
+        return std::unique_ptr<ASTConsumer>(new CHashConsumer(CI, Hash));
+    }
+};
+
+static testing::AssertionResult
+isASTHashEqual(StringRef Code1, StringRef Code2) {
+    llvm::MD5::MD5Result Hash1, Hash2;
+    if (!runToolOnCode(new CHashAction(&Hash1), Code1)) {
+        return testing::AssertionFailure()
+            << "Parsing error in (A)\"" << Code1.str() << "\"";
+    }
+    if (!runToolOnCode(new CHashAction(&Hash2), Code2)) {
+        return testing::AssertionFailure()
+            << "Parsing error in (B) \"" << Code2.str() << "\"";
+    }
+    return testing::AssertionResult(Hash1 == Hash2);
+}
+
+TEST(CHashVisitor, TestRecordTypes) {
+    ASSERT_TRUE(isASTHashEqual( // Unused struct
+                     "struct foobar { int a0; char a1; unsigned long a2; };",
+                     "struct foobar { int a0; char a1;};"
+                     ));
+
+}
+
+TEST(CHashVisitor, TestSourceStructure) {
+    ASSERT_FALSE(isASTHashEqual(
+                     "void foo() { int c; if (0) { c = 1; } }",
+                     "void foo() { int c; if (0) { } c = 1; }"));
+
+    ASSERT_FALSE(isASTHashEqual(
+                     "void f1() {} void f2() {       }",
+                     "void f1() {} void f2() { f1(); }"));
+}
Index: include/clang/AST/TypeDataCollectors.td
===================================================================
--- /dev/null
+++ include/clang/AST/TypeDataCollectors.td
@@ -0,0 +1,78 @@
+//--- Types ---------------------------------------------------------------//
+
+class Type {
+  code Code = [{
+     addData(llvm::hash_value(S->getTypeClass()));
+  }];
+}
+
+class BuiltinType {
+   code Code = [{
+      addData(S->getKind());
+   }];
+}
+
+class ArrayType  {
+   code Code = [{
+      addData(S->getSizeModifier());
+      addData(S->getIndexTypeCVRQualifiers());
+   }];
+}
+
+class ConstantArrayType {
+   code Code = [{
+      addData(S->getSize().getZExtValue());
+   }];
+}
+
+class VectorType {
+   code Code = [{
+      addData(S->getNumElements());
+      addData(S->getVectorKind());
+   }];
+}
+
+class FunctionType {
+   code Code = [{
+      addData(S->getRegParmType());
+      addData(S->getCallConv());
+   }];
+}
+
+class FunctionProtoType {
+   code Code = [{
+      addData(S->getExceptionSpecType());
+      addData(S->isVariadic());
+      addData(S->getRefQualifier());
+      addData(S->hasTrailingReturn());
+
+      addData(S->param_types());
+      addData(S->exceptions());
+   }];
+}
+
+class UnaryTransformType {
+   code Code = [{
+        addData(S->getUTTKind());
+   }];
+}
+
+class AttributedType {
+   code Code = [{
+        addData(S->getAttrKind());
+   }];
+}
+
+class ElaboratedType {
+   code Code = [{
+        addData(S->getKeyword());
+   }];
+}
+
+class ObjCObjectType {
+   code Code = [{
+        addData(S->getTypeArgsAsWritten());
+   }];
+}
+
+
Index: include/clang/AST/StmtDataCollectors.td
===================================================================
--- include/clang/AST/StmtDataCollectors.td
+++ include/clang/AST/StmtDataCollectors.td
@@ -5,6 +5,9 @@
     // macro-generated code.
     addData(data_collection::getMacroStack(S->getLocStart(), Context));
     addData(data_collection::getMacroStack(S->getLocEnd(), Context));
+
+    // CrossRef
+    addData(S->children());
   }];
 }
 
@@ -28,16 +31,29 @@
 class PredefinedExpr {
   code Code = [{
     addData(S->getIdentType());
+    addData(S->getFunctionName()->getString());
   }];
 }
 class TypeTraitExpr {
   code Code = [{
     addData(S->getTrait());
+    // CrossRef
+    addData(S->getNumArgs());
     for (unsigned i = 0; i < S->getNumArgs(); ++i)
       addData(S->getArg(i)->getType());
   }];
 }
 
+class UnaryExprOrTypeTraitExpr {
+  code Code = [{
+    addData(S->getKind());
+    if (S->isArgumentType()) {
+       addData(S->getArgumentType());
+    }
+  }];
+}
+
+
 //--- Calls --------------------------------------------------------------//
 class CallExpr {
   code Code = [{
@@ -72,6 +88,7 @@
 }
 class MemberExpr {
   code Code = [{
+    // I suspect this should be included: addData(S->isArrow());
     addData(S->getMemberDecl()->getName());
   }];
 }
@@ -124,6 +141,12 @@
   }];
 }
 
+class CastExpr {
+  code Code = [{
+    addData(S->getCastKind());
+  }];
+}
+
 //--- Miscellaneous Exprs ------------------------------------------------//
 class BinaryOperator {
   code Code = [{
@@ -136,6 +159,12 @@
   }];
 }
 
+class VAArgExpr {
+  code Code = [{
+       addData(S->isMicrosoftABI());
+  }];
+}
+
 //--- Control flow -------------------------------------------------------//
 class GotoStmt {
   code Code = [{
@@ -189,27 +218,48 @@
 }
 class GenericSelectionExpr {
   code Code = [{
+    // CrossRef
+    addData(S->getNumAssocs());
     for (unsigned i = 0; i < S->getNumAssocs(); ++i) {
       addData(S->getAssocType(i));
     }
   }];
 }
+
+class PseudoObjectExpr {
+  code Code = [{
+    // CrossRef
+    addData(S->semantics());
+  }];
+}
+
 class LambdaExpr {
   code Code = [{
+    addData(S->isGenericLambda());
+    addData(S->isMutable());
+    addData(S->hasExplicitParameters());
+    addData(S->hasExplicitResultType());
+
+
+    // CrossRef
+    addData(S->captures());
+    addData(S->explicit_captures());
     for (const LambdaCapture &C : S->captures()) {
       addData(C.isPackExpansion());
       addData(C.getCaptureKind());
       if (C.capturesVariable())
         addData(C.getCapturedVar()->getType());
     }
-    addData(S->isGenericLambda());
-    addData(S->isMutable());
+    
+
   }];
 }
 class DeclStmt {
   code Code = [{
-    auto numDecls = std::distance(S->decl_begin(), S->decl_end());
-    addData(static_cast<unsigned>(numDecls));
+    // CrossRef
+    addData(S->decls());
+
+    // FIXME? As this should be done by a using visitor
     for (const Decl *D : S->decls()) {
       if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
         addData(VD->getType());
@@ -222,6 +272,12 @@
     addData(S->isSimple());
     addData(S->isVolatile());
     addData(S->generateAsmString(Context));
+
+    // CrossRef + FIXME
+    addData(S->getNumInputs());
+    addData(S->getNumOutputs());
+    addData(S->getNumClobbers());
+
     for (unsigned i = 0; i < S->getNumInputs(); ++i) {
       addData(S->getInputConstraint(i));
     }
@@ -236,7 +292,21 @@
 class AttributedStmt {
   code Code = [{
     for (const Attr *A : S->getAttrs()) {
+       // We duplicate class Attr here to not rely on being integrated
+       // into a RecursiveASTVisitor.
+       std::string AttrString;      
+       llvm::raw_string_ostream OS(AttrString);
+       A->printPretty(OS, Context.getLangOpts());
+       OS.flush();
       addData(std::string(A->getSpelling()));
     }
   }];
 }
+
+class CompoundStmt {
+  code Code = [{
+    // CrossRef
+    addData(S->size());
+  }];
+}
+
Index: include/clang/AST/DeclDataCollectors.td
===================================================================
--- /dev/null
+++ include/clang/AST/DeclDataCollectors.td
@@ -0,0 +1,205 @@
+//--- Declarations -------------------------------------------------------//
+
+class Decl {
+  code Code = [{
+      // Every Declaration gets a tag field in the hash stream. It is
+      // hashed to add additional randomness to the hash
+      addData(llvm::hash_value(S->getKind()));
+
+      // CrossRef
+      addData(S->hasAttrs());
+      if (S->hasAttrs())
+        addData(S->attrs());
+  }];
+}
+
+class DeclContext  {
+   code Code = [{
+      // CrossRef
+      addData(S->decls());
+   }];
+}
+
+class BlockDecl  {
+   code Code = [{
+      // CrossRef
+      auto it = llvm::make_range(S->capture_begin(), S->capture_end());
+      addData(it);
+   }];
+}
+
+
+class ValueDecl  {
+  code Code = [{
+      addData(S->getType());
+      addData(S->isWeak());
+  }];
+}
+
+class NamedDecl {
+  code Code = [{
+      addData(S->getName());
+  }];
+}
+
+class TypeDecl {
+  code Code = [{
+      addData(QualType(S->getTypeForDecl(),0));
+  }];
+}
+
+class EnumDecl {
+  code Code = [{
+      addData(S->getNumPositiveBits());
+      addData(S->getNumNegativeBits());
+  }];
+}
+
+class EnumConstantDecl {
+  code Code = [{
+       /* Not every enum has a init expression. Therefore, 
+          we extract the actual enum value from it. */
+       addData(S->getInitVal().getExtValue());
+  }];
+}
+
+class TagDecl {
+  code Code = [{
+     addData(S->getTagKind());
+  }];
+}
+
+
+class TypedefNameDecl {
+  code Code = [{
+     addData(S->getUnderlyingType());
+  }];
+}
+
+class VarDecl {
+  code Code = [{
+      addData(S->getStorageClass());
+      addData(S->getTLSKind());
+      addData(S->isModulePrivate());
+      addData(S->isNRVOVariable());
+  }];
+}
+
+class ParmVarDecl {
+  code Code = [{
+       addData(S->isParameterPack());
+       addData(S->getOriginalType());
+  }];
+}
+
+class ImplicitParamDecl {
+  code Code = [{
+       addData(S->getParameterKind());
+  }];
+}
+
+class FunctionDecl {
+  code Code = [{
+       addData(S->isExternC());
+       addData(S->isGlobal());
+       addData(S->isNoReturn());
+       addData(S->getStorageClass());
+       addData(S->isInlineSpecified());
+       addData(S->isInlined());
+
+       // CrossRef
+       auto range = llvm::make_range(S->param_begin(), S->param_end());
+       addData(range);
+  }];
+}
+
+class LabelDecl {
+  code Code = [{
+       addData(S->isGnuLocal());
+       addData(S->isMSAsmLabel());
+       if (S->isMSAsmLabel()) {
+          addData(S->getMSAsmLabel());
+       }
+  }];
+}
+
+class CXXRecordDecl {
+  code Code = [{
+       // CrossRef
+       if (S->isCompleteDefinition()) {
+           addData(S->bases());
+       }
+  }];
+}
+
+class CXXConstructorDecl {
+  code Code = [{
+       // CrossRef
+       addData(S->inits());
+  }];
+}
+
+class FieldDecl {
+  code Code = [{
+      addData(S->isBitField());
+  }];
+}
+
+class CapturedDecl {
+  code Code = [{
+      addData(S->isNothrow());
+  }];
+}
+
+class DecompositionDecl {
+  code Code = [{
+      // CrossRef
+      addData(S->bindings());
+  }];
+}
+
+
+//--- Obj-C ---------------------------------------------------------//
+
+class ObjCCategoryDecl {
+  code Code = [{
+     // CrossRef
+     if (auto *it = S->getTypeParamList()) {
+       auto range = llvm::make_range(it->begin(), it->end());
+       addData(range);
+     }
+  }];
+}
+
+class ObjCInterfaceDecl {
+  code Code = [{
+     // CrossRef
+     if (auto *it = S->getTypeParamListAsWritten()) {
+       auto range = llvm::make_range(it->begin(), it->end());
+       addData(range);
+     }
+  }];
+}
+
+class ObjCMethodDecl {
+  code Code = [{
+     // CrossRef
+     auto range = llvm::make_range(S->param_begin(), S->param_end());
+     addData(range);
+  }];
+}
+
+//--- Templates -----------------------------------------------------//
+
+class FriendTemplateDecl {
+  code Code = [{
+      // CrossRef
+      addData(S->getNumTemplateParameters());
+      for (unsigned I = 0, E = S->getNumTemplateParameters(); I < E; ++I) {
+          auto TPL = S->getTemplateParameterList(I);
+          auto it  = llvm::make_range(TPL->begin(), TPL->end());
+          addData(it);
+      }
+  }];
+}
+
Index: include/clang/AST/DataCollection.h
===================================================================
--- include/clang/AST/DataCollection.h
+++ include/clang/AST/DataCollection.h
@@ -59,6 +59,16 @@
   DataConsumer.update(StringRef(reinterpret_cast<char *>(&Data), sizeof(Data)));
 }
 
+template <class T, class Type>
+void addDataToConsumer(T &DataConsumer, const llvm::iterator_range<Type> &R) {
+  addDataToConsumer(DataConsumer, std::distance(R.begin(), R.end()));
+}
+
+template <class T, class Type>
+void addDataToConsumer(T &DataConsumer, const llvm::ArrayRef<Type> &R) {
+  addDataToConsumer(DataConsumer, R.size());
+}
+
 } // end namespace data_collection
 } // end namespace clang
 
Index: include/clang/AST/CMakeLists.txt
===================================================================
--- include/clang/AST/CMakeLists.txt
+++ include/clang/AST/CMakeLists.txt
@@ -53,3 +53,15 @@
 clang_tablegen(StmtDataCollectors.inc -gen-clang-data-collectors
   SOURCE StmtDataCollectors.td
   TARGET StmtDataCollectors)
+
+clang_tablegen(DeclDataCollectors.inc -gen-clang-data-collectors
+  SOURCE DeclDataCollectors.td
+  TARGET DeclDataCollectors)
+
+clang_tablegen(AttrDataCollectors.inc -gen-clang-data-collectors
+  SOURCE AttrDataCollectors.td
+  TARGET AttrDataCollectors)
+
+clang_tablegen(TypeDataCollectors.inc -gen-clang-data-collectors
+  SOURCE TypeDataCollectors.td
+  TARGET TypeDataCollectors)
Index: include/clang/AST/CHashVisitor.h
===================================================================
--- /dev/null
+++ include/clang/AST/CHashVisitor.h
@@ -0,0 +1,320 @@
+//===--- CHashVisitor.h - Stable AST Hashing -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the APValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_AST_CHASHVISITOR_H
+#define LLVM_CLANG_AST_CHASHVISITOR_H
+
+#include "clang/AST/AST.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/DataCollection.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "llvm/Support/MD5.h"
+#include <map>
+#include <string>
+
+namespace clang {
+
+
+template <typename H = llvm::MD5, typename HR = llvm::MD5::MD5Result>
+class CHashVisitor : public clang::RecursiveASTVisitor<CHashVisitor<H, HR>> {
+
+  using Inherited = clang::RecursiveASTVisitor<CHashVisitor<H, HR>>;
+
+public:
+  using Hash = H;
+  using HashResult = HR;
+
+  /// Configure the RecursiveASTVisitor
+  bool shouldWalkTypesOfTypeLocs() const { return false; }
+
+protected:
+  ASTContext &Context;
+
+  // For the DataCollector, we implement a few addData() functions
+  void addData(uint64_t data) { topHash().update(data); }
+  void addData(const StringRef &str) { topHash().update(str); }
+  // On our way down, we meet a lot of qualified types.
+  void addData(const QualType &T) {
+    // 1. Hash referenced type
+    const Type *const ActualType = T.getTypePtr();
+    assert(ActualType != nullptr);
+
+    // FIXME: Structural hash
+    // 1.1 Was it already hashed?
+    const HashResult *const SavedDigest = getHash(ActualType);
+    if (SavedDigest) {
+      // 1.1.1 Use cached value
+      topHash().update(SavedDigest->Bytes);
+    } else {
+      // 1.1.2 Calculate hash for type
+      const Hash *const CurrentHash = pushHash();
+      Inherited::TraverseType(T); // Uses getTypePtr() internally
+      const HashResult TypeDigest = popHash(CurrentHash);
+      topHash().update(TypeDigest.Bytes);
+
+      // Store hash for underlying type
+      storeHash(ActualType, TypeDigest);
+    }
+
+    // Add the qulaifiers at this specific usage of the type
+    addData(T.getCVRQualifiers());
+  }
+  template<typename T>
+  void addData(const llvm::iterator_range<T> &x) {
+    addData(std::distance(x.begin(), x.end()));
+  }
+  template<typename T>
+  void addData(const llvm::ArrayRef<T> &x) {
+    addData(x.size());
+  }
+
+public:
+#define DEF_ADD_DATA_STORED(CLASS, CODE)                                       \
+  template <class = void> bool Visit##CLASS(const CLASS *S) {                  \
+    CODE;                                                                      \
+    return true;                                                               \
+  }
+#define DEF_ADD_DATA(CLASS, CODE) DEF_ADD_DATA_STORED(CLASS, CODE)
+#include "clang/AST/StmtDataCollectors.inc"
+#define DEF_ADD_DATA(CLASS, CODE) DEF_ADD_DATA_STORED(CLASS, CODE)
+#include "clang/AST/AttrDataCollectors.inc"
+#define DEF_ADD_DATA(CLASS, CODE) DEF_ADD_DATA_STORED(CLASS, CODE)
+#include "clang/AST/DeclDataCollectors.inc"
+#define DEF_ADD_DATA(CLASS, CODE) DEF_ADD_DATA_STORED(CLASS, CODE)
+#include "clang/AST/TypeDataCollectors.inc"
+
+  CHashVisitor(ASTContext &Context) : Context(Context) {}
+
+  /* For some special nodes, override the traverse function, since we
+     need both pre- and post order traversal */
+  bool TraverseTranslationUnitDecl(TranslationUnitDecl *TU) {
+    if (!TU)
+      return true;
+    // First, we push a new hash onto the hashing stack. This hash
+    // will capture everythin within the TU*/
+    Hash *CurrentHash = pushHash();
+
+    Inherited::WalkUpFromTranslationUnitDecl(TU);
+
+    // Do recursion on our own, since we want to exclude some children
+    const auto DC = cast<DeclContext>(TU);
+    for (auto *Child : DC->noload_decls()) {
+      if (isa<TypedefDecl>(Child) || isa<RecordDecl>(Child) ||
+          isa<EnumDecl>(Child))
+        continue;
+
+      // Extern variable definitions at the top-level
+      if (const auto VD = dyn_cast<VarDecl>(Child)) {
+        if (VD->hasExternalStorage()) {
+          continue;
+        }
+      }
+
+      if (const auto FD = dyn_cast<FunctionDecl>(Child)) {
+        // We try to avoid hashing of declarations that have no definition
+        if (!FD->isThisDeclarationADefinition()) {
+          bool doHashing = false;
+          // HOWEVER! If this declaration is an alias Declaration, we
+          // hash it no matter what
+          if (FD->hasAttrs()) {
+            for (const Attr *const A : FD->getAttrs()) {
+              if (A->getKind() == attr::Kind::Alias) {
+                doHashing = true;
+                break;
+              }
+            }
+          }
+          if (!doHashing)
+            continue;
+        }
+      }
+
+      TraverseDecl(Child);
+    }
+
+    storeHash(TU, popHash(CurrentHash));
+
+    return true;
+  }
+
+  /* For some special nodes, override the traverse function, since
+     we need both pre- and post order traversal. Storing of type
+     hashes is done in addData() */
+  bool TraverseDecl(Decl *D) {
+    if (!D)
+      return true;
+    /* For some declarations, we store the calculated hash value. */
+    bool CacheHash = false;
+    if (isa<FunctionDecl>(D) && cast<FunctionDecl>(D)->isDefined())
+      CacheHash = true;
+    if (isa<VarDecl>(D) && cast<VarDecl>(D)->hasGlobalStorage())
+      CacheHash = true;
+    if (isa<RecordDecl>(D) && dyn_cast<RecordDecl>(D)->isCompleteDefinition())
+      CacheHash = true;
+
+    if (!CacheHash) {
+      return Inherited::TraverseDecl(D);
+    }
+
+    const HashResult *const SavedDigest = getHash(D);
+    if (SavedDigest) {
+      topHash().update(SavedDigest->Bytes);
+      return true;
+    }
+    Hash *CurrentHash = pushHash();
+    bool Ret = Inherited::TraverseDecl(D);
+    HashResult CurrentHashResult = popHash(CurrentHash);
+    storeHash(D, CurrentHashResult);
+    if (!isa<TranslationUnitDecl>(D)) {
+      topHash().update(CurrentHashResult.Bytes);
+    }
+
+    return Ret;
+  }
+
+  /*****************************************************************
+   * When doing a semantic hash, we have to use cross-tree links to
+   * other parts of the AST, here we establish these links
+   */
+
+#define DEF_TYPE_GOTO_DECL(CLASS, EXPR)                                        \
+  bool Visit##CLASS(CLASS *T) {                                                \
+    Inherited::Visit##CLASS(T);                                                \
+    return TraverseDecl(EXPR);                                                 \
+  }
+
+  DEF_TYPE_GOTO_DECL(TypedefType, T->getDecl());
+  DEF_TYPE_GOTO_DECL(RecordType, T->getDecl());
+  // The EnumType forwards to the declaration. The declaration does
+  // not hand back to the type.
+  DEF_TYPE_GOTO_DECL(EnumType, T->getDecl());
+  bool TraverseEnumDecl(EnumDecl *E) {
+    /* In the original RecursiveASTVisitor
+       > if (D->getTypeForDecl()) {
+       >    TRY_TO(TraverseType(QualType(D->getTypeForDecl(), 0)));
+       > }
+       => NO, NO, NO, to avoid endless recursion
+    */
+    return Inherited::WalkUpFromEnumDecl(E);
+  }
+
+  bool VisitTypeDecl(TypeDecl *D) {
+    // If we would hash the resulting type for a typedef, we
+    // would get into an endless recursion.
+    if (!isa<TypedefNameDecl>(D) && !isa<RecordDecl>(D) && !isa<EnumDecl>(D)) {
+      addData(QualType(D->getTypeForDecl(), 0));
+    }
+    return true;
+  }
+
+  bool VisitDeclRefExpr(DeclRefExpr *E) {
+    ValueDecl *ValDecl = E->getDecl();
+    // Function Declarations are handled in VisitCallExpr
+    if (!ValDecl) {
+      return true;
+    }
+    if (isa<VarDecl>(ValDecl)) {
+      /* We emulate TraverseDecl here for VarDecl, because we
+       * are not allowed to call TraverseDecl here, since the
+       * initial expression of a DeclRefExpr might reference a
+       * sourronding Declaration itself. For example:
+       *
+       * struct foo {int N;}
+       * struct foo a = { sizeof(a) };
+       */
+      VarDecl *VD = static_cast<VarDecl *>(ValDecl);
+      VisitNamedDecl(VD);
+      Inherited::TraverseType(VD->getType());
+      VisitVarDecl(VD);
+    } else if (isa<FunctionDecl>(ValDecl)) {
+      /* Hash Functions without their body */
+      FunctionDecl *FD = static_cast<FunctionDecl *>(ValDecl);
+      Stmt *Body = FD->getBody();
+      FD->setBody(nullptr);
+      TraverseDecl(FD);
+      FD->setBody(Body);
+    } else {
+      TraverseDecl(ValDecl);
+    }
+    return true;
+  }
+
+  bool VisitValueDecl(ValueDecl *D) {
+    /* Field Declarations can induce recursions */
+    if (isa<FieldDecl>(D)) {
+      addData(std::string(D->getType().getAsString()));
+    } else {
+      addData(D->getType());
+    }
+    addData(D->isWeak());
+    return true;
+  }
+
+  /*****************************************************************
+   * For performance reasons, we cache some of the hashes for types
+   * and declarations.
+   */
+
+public:
+  // We store hashes for declarations and types in separate maps.
+  std::map<const Type *, HashResult> TypeSilo;
+  std::map<const Decl *, HashResult> DeclSilo;
+
+  void storeHash(const Type *Obj, HashResult Dig) { TypeSilo[Obj] = Dig; }
+
+  void storeHash(const Decl *Obj, HashResult Dig) { DeclSilo[Obj] = Dig; }
+
+  const HashResult *getHash(const Type *Obj) {
+    if (TypeSilo.find(Obj) != TypeSilo.end()) {
+      return &TypeSilo[Obj];
+    }
+    return nullptr;
+  }
+
+  const HashResult *getHash(const Decl *Obj) {
+    if (DeclSilo.find(Obj) != DeclSilo.end()) {
+      return &DeclSilo[Obj];
+    }
+    return nullptr;
+  }
+
+  /*****************************************************************
+   * In order to produce hashes for subtrees on the way, a hash
+   * stack is used. When a new subhash is meant to be calculated, we
+   * push a new stack on the hash. All hashing functions use always
+   * the top of the hashing stack.
+   */
+protected:
+  llvm::SmallVector<Hash, 32> HashStack;
+
+public:
+  Hash *pushHash() {
+    HashStack.push_back(Hash());
+    return &HashStack.back();
+  }
+
+  HashResult popHash(const Hash *ShouldBe = nullptr) {
+    assert(!ShouldBe || ShouldBe == &HashStack.back());
+
+    // Finalize the Hash and return the digest.
+    HashResult CurrentDigest;
+    topHash().final(CurrentDigest);
+    HashStack.pop_back();
+    return CurrentDigest;
+  }
+
+  Hash &topHash() { return HashStack.back(); }
+};
+
+} // namespace clang
+#endif // LLVM_CLANG_AST_CHASHVISITOR_H
Index: include/clang/AST/AttrDataCollectors.td
===================================================================
--- /dev/null
+++ include/clang/AST/AttrDataCollectors.td
@@ -0,0 +1,10 @@
+//--- Attributes ---------------------------------------------------------//
+class Attr {
+  code Code = [{
+    std::string AttrString;      
+    llvm::raw_string_ostream OS(AttrString);
+    S->printPretty(OS, Context.getLangOpts());
+    OS.flush();
+    addData(AttrString);
+  }];
+}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to