juliehockett updated this revision to Diff 137244.
juliehockett added a comment.

Adding hashing to reduce the size of USRs and updating tests.


https://reviews.llvm.org/D41102

Files:
  CMakeLists.txt
  clang-doc/BitcodeWriter.cpp
  clang-doc/BitcodeWriter.h
  clang-doc/CMakeLists.txt
  clang-doc/ClangDoc.h
  clang-doc/Mapper.cpp
  clang-doc/Mapper.h
  clang-doc/Representation.h
  clang-doc/Serialize.cpp
  clang-doc/Serialize.h
  clang-doc/tool/CMakeLists.txt
  clang-doc/tool/ClangDocMain.cpp
  docs/clang-doc.rst
  test/CMakeLists.txt
  test/clang-doc/mapper-class-in-class.cpp
  test/clang-doc/mapper-class-in-function.cpp
  test/clang-doc/mapper-class.cpp
  test/clang-doc/mapper-comments.cpp
  test/clang-doc/mapper-enum.cpp
  test/clang-doc/mapper-function.cpp
  test/clang-doc/mapper-method.cpp
  test/clang-doc/mapper-namespace.cpp
  test/clang-doc/mapper-struct.cpp
  test/clang-doc/mapper-union.cpp

Index: test/clang-doc/mapper-union.cpp
===================================================================
--- /dev/null
+++ test/clang-doc/mapper-union.cpp
@@ -0,0 +1,28 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo "" > %t/compile_flags.txt
+// RUN: cp "%s" "%t/test.cpp"
+// RUN: clang-doc --dump-mapper --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs
+// RUN: llvm-bcanalyzer %t/docs/bc/0B8A6B938B939B77C6325CCCC8AA3E938BF9E2E8.bc --dump | FileCheck %s
+
+union D { int X; int Y; };
+
+// CHECK: <BLOCKINFO_BLOCK/>
+// CHECK-NEXT: <VersionBlock NumWords=1 BlockCodeSize=4>
+  // CHECK-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-NEXT: </VersionBlock>
+// CHECK-NEXT: <RecordBlock NumWords=31 BlockCodeSize=4>
+  // CHECK-NEXT: <USR abbrevid=4 op0=40/> blob data = '0B8A6B938B939B77C6325CCCC8AA3E938BF9E2E8'
+  // CHECK-NEXT: <Name abbrevid=5 op0=1/> blob data = 'D'
+  // CHECK-NEXT: <TagType abbrevid=9 op0=2/>
+  // CHECK-NEXT: <MemberTypeBlock NumWords=6 BlockCodeSize=4>
+    // CHECK-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+    // CHECK-NEXT: <Name abbrevid=5 op0=4/> blob data = 'D::X'
+    // CHECK-NEXT: <Access abbrevid=6 op0=3/>
+  // CHECK-NEXT: </MemberTypeBlock>
+  // CHECK-NEXT: <MemberTypeBlock NumWords=6 BlockCodeSize=4>
+    // CHECK-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+    // CHECK-NEXT: <Name abbrevid=5 op0=4/> blob data = 'D::Y'
+    // CHECK-NEXT: <Access abbrevid=6 op0=3/>
+  // CHECK-NEXT: </MemberTypeBlock>
+// CHECK-NEXT: </RecordBlock>
Index: test/clang-doc/mapper-struct.cpp
===================================================================
--- /dev/null
+++ test/clang-doc/mapper-struct.cpp
@@ -0,0 +1,22 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo "" > %t/compile_flags.txt
+// RUN: cp "%s" "%t/test.cpp"
+// RUN: clang-doc --dump-mapper --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs
+// RUN: llvm-bcanalyzer %t/docs/bc/06B5F6A19BA9F6A832E127C9968282B94619B210.bc --dump | FileCheck %s
+
+struct C { int i; };
+
+// CHECK: <BLOCKINFO_BLOCK/>
+// CHECK-NEXT: <VersionBlock NumWords=1 BlockCodeSize=4>
+  // CHECK-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-NEXT: </VersionBlock>
+// CHECK-NEXT: <RecordBlock NumWords=22 BlockCodeSize=4>
+  // CHECK-NEXT: <USR abbrevid=4 op0=40/> blob data = '06B5F6A19BA9F6A832E127C9968282B94619B210'
+  // CHECK-NEXT: <Name abbrevid=5 op0=1/> blob data = 'C'
+  // CHECK-NEXT: <MemberTypeBlock NumWords=6 BlockCodeSize=4>
+    // CHECK-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+    // CHECK-NEXT: <Name abbrevid=5 op0=4/> blob data = 'C::i'
+    // CHECK-NEXT: <Access abbrevid=6 op0=3/>
+  // CHECK-NEXT: </MemberTypeBlock>
+// CHECK-NEXT: </RecordBlock>
Index: test/clang-doc/mapper-namespace.cpp
===================================================================
--- /dev/null
+++ test/clang-doc/mapper-namespace.cpp
@@ -0,0 +1,17 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo "" > %t/compile_flags.txt
+// RUN: cp "%s" "%t/test.cpp"
+// RUN: clang-doc --dump-mapper --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs
+// RUN: llvm-bcanalyzer %t/docs/bc/8D042EFFC98B373450BC6B5B90A330C25A150E9C.bc --dump | FileCheck %s
+
+namespace A {}
+
+// CHECK: <BLOCKINFO_BLOCK/>
+// CHECK-NEXT: <VersionBlock NumWords=1 BlockCodeSize=4>
+  // CHECK-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-NEXT: </VersionBlock>
+// CHECK-NEXT: <NamespaceBlock NumWords=14 BlockCodeSize=4>
+  // CHECK-NEXT: <USR abbrevid=4 op0=40/> blob data = '8D042EFFC98B373450BC6B5B90A330C25A150E9C'
+  // CHECK-NEXT: <Name abbrevid=5 op0=1/> blob data = 'A'
+// CHECK-NEXT: </NamespaceBlock>
Index: test/clang-doc/mapper-method.cpp
===================================================================
--- /dev/null
+++ test/clang-doc/mapper-method.cpp
@@ -0,0 +1,41 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo "" > %t/compile_flags.txt
+// RUN: cp "%s" "%t/test.cpp"
+// RUN: clang-doc --dump-mapper --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs
+// RUN: llvm-bcanalyzer %t/docs/bc/F0F9FC65FC90F54F690144A7AFB15DFC3D69B6E6.bc --dump | FileCheck %s --check-prefix CHECK-G-F
+// RUN: llvm-bcanalyzer %t/docs/bc/4202E8BF0ECB12AE354C8499C52725B0EE30AED5.bc --dump | FileCheck %s --check-prefix CHECK-G
+
+class G {
+public: 
+	int Method(int param) { return param; }
+};
+
+// CHECK-G: <BLOCKINFO_BLOCK/>
+// CHECK-G-NEXT: <VersionBlock NumWords=1 BlockCodeSize=4>
+  // CHECK-G-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-G-NEXT: </VersionBlock>
+// CHECK-G-NEXT: <RecordBlock NumWords=14 BlockCodeSize=4>
+  // CHECK-G-NEXT: <USR abbrevid=4 op0=40/> blob data = '4202E8BF0ECB12AE354C8499C52725B0EE30AED5'
+  // CHECK-G-NEXT: <Name abbrevid=5 op0=1/> blob data = 'G'
+  // CHECK-G-NEXT: <TagType abbrevid=9 op0=3/>
+// CHECK-G-NEXT: </RecordBlock>
+
+// CHECK-G-F: <BLOCKINFO_BLOCK/>
+// CHECK-G-F-NEXT: <VersionBlock NumWords=1 BlockCodeSize=4>
+  // CHECK-G-F-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-G-F-NEXT: </VersionBlock>
+// CHECK-G-F-NEXT: <FunctionBlock NumWords=54 BlockCodeSize=4>
+  // CHECK-G-F-NEXT: <USR abbrevid=4 op0=40/> blob data = 'F0F9FC65FC90F54F690144A7AFB15DFC3D69B6E6'
+  // CHECK-G-F-NEXT: <Name abbrevid=5 op0=6/> blob data = 'Method'
+  // CHECK-G-F-NEXT: <Namespace abbrevid=6 op0=1 op1=40/> blob data = '4202E8BF0ECB12AE354C8499C52725B0EE30AED5'
+  // CHECK-G-F-NEXT: <IsMethod abbrevid=11 op0=1/>
+  // CHECK-G-F-NEXT: <Parent abbrevid=9 op0=1 op1=40/> blob data = '4202E8BF0ECB12AE354C8499C52725B0EE30AED5'
+  // CHECK-G-F-NEXT: <TypeBlock NumWords=4 BlockCodeSize=4>
+    // CHECK-G-F-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+  // CHECK-G-F-NEXT: </TypeBlock>
+  // CHECK-G-F-NEXT: <FieldTypeBlock NumWords=7 BlockCodeSize=4>
+    // CHECK-G-F-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+    // CHECK-G-F-NEXT: <Name abbrevid=5 op0=5/> blob data = 'param'
+  // CHECK-G-F-NEXT: </FieldTypeBlock>
+// CHECK-G-F-NEXT: </FunctionBlock>
Index: test/clang-doc/mapper-function.cpp
===================================================================
--- /dev/null
+++ test/clang-doc/mapper-function.cpp
@@ -0,0 +1,24 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo "" > %t/compile_flags.txt
+// RUN: cp "%s" "%t/test.cpp"
+// RUN: clang-doc --dump-mapper --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs
+// RUN: llvm-bcanalyzer %t/docs/bc/A44B32CC3C087C9AF75DAF50DE193E85E7B2C16B.bc --dump | FileCheck %s
+
+int F(int param) { return param; }
+
+// CHECK: <BLOCKINFO_BLOCK/>
+// CHECK-NEXT: <VersionBlock NumWords=1 BlockCodeSize=4>
+  // CHECK-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-NEXT: </VersionBlock>
+// CHECK-NEXT: <FunctionBlock NumWords=29 BlockCodeSize=4>
+  // CHECK-NEXT: <USR abbrevid=4 op0=40/> blob data = 'A44B32CC3C087C9AF75DAF50DE193E85E7B2C16B'
+  // CHECK-NEXT: <Name abbrevid=5 op0=1/> blob data = 'F'
+  // CHECK-NEXT: <TypeBlock NumWords=4 BlockCodeSize=4>
+    // CHECK-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+  // CHECK-NEXT: </TypeBlock>
+  // CHECK-NEXT: <FieldTypeBlock NumWords=7 BlockCodeSize=4>
+    // CHECK-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+    // CHECK-NEXT: <Name abbrevid=5 op0=5/> blob data = 'param'
+  // CHECK-NEXT: </FieldTypeBlock>
+// CHECK-NEXT: </FunctionBlock>
Index: test/clang-doc/mapper-enum.cpp
===================================================================
--- /dev/null
+++ test/clang-doc/mapper-enum.cpp
@@ -0,0 +1,42 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo "" > %t/compile_flags.txt
+// RUN: cp "%s" "%t/test.cpp"
+// RUN: clang-doc --dump-mapper --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs
+// RUN: llvm-bcanalyzer %t/docs/bc/FC07BD34D5E77782C263FA944447929EA8753740.bc --dump | FileCheck %s --check-prefix CHECK-B
+// RUN: llvm-bcanalyzer %t/docs/bc/020E6C32A700C3170C009FCCD41671EDDBEAF575.bc --dump | FileCheck %s --check-prefix CHECK-C
+
+enum B { X, Y };
+
+// CHECK-B: <BLOCKINFO_BLOCK/>
+// CHECK-B-NEXT: <VersionBlock NumWords=1 BlockCodeSize=4>
+  // CHECK-B-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-B-NEXT: </VersionBlock>
+// CHECK-B-NEXT: <EnumBlock NumWords=26 BlockCodeSize=4>
+  // CHECK-B-NEXT: <USR abbrevid=4 op0=40/> blob data = 'FC07BD34D5E77782C263FA944447929EA8753740'
+  // CHECK-B-NEXT: <Name abbrevid=5 op0=1/> blob data = 'B'
+  // CHECK-B-NEXT: <TypeBlock NumWords=4 BlockCodeSize=4>
+    // CHECK-B-NEXT: <Type abbrevid=4 op0=3 op1=1/> blob data = 'X'
+  // CHECK-B-NEXT: </TypeBlock>
+  // CHECK-B-NEXT: <TypeBlock NumWords=4 BlockCodeSize=4>
+    // CHECK-B-NEXT: <Type abbrevid=4 op0=3 op1=1/> blob data = 'Y'
+  // CHECK-B-NEXT: </TypeBlock>
+// CHECK-B-NEXT: </EnumBlock>
+
+enum class C { A, B };
+
+// CHECK-C: <BLOCKINFO_BLOCK/>
+// CHECK-C-NEXT: <VersionBlock NumWords=1 BlockCodeSize=4>
+  // CHECK-C-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-C-NEXT: </VersionBlock>
+// CHECK-C-NEXT: <EnumBlock NumWords=26 BlockCodeSize=4>
+  // CHECK-C-NEXT: <USR abbrevid=4 op0=40/> blob data = '020E6C32A700C3170C009FCCD41671EDDBEAF575'
+  // CHECK-C-NEXT: <Name abbrevid=5 op0=1/> blob data = 'C'
+  // CHECK-C-NEXT: <Scoped abbrevid=9 op0=1/>
+  // CHECK-C-NEXT: <TypeBlock NumWords=4 BlockCodeSize=4>
+    // CHECK-C-NEXT: <Type abbrevid=4 op0=3 op1=4/> blob data = 'C::A'
+  // CHECK-C-NEXT: </TypeBlock>
+  // CHECK-C-NEXT: <TypeBlock NumWords=4 BlockCodeSize=4>
+    // CHECK-C-NEXT: <Type abbrevid=4 op0=3 op1=4/> blob data = 'C::B'
+  // CHECK-C-NEXT: </TypeBlock>
+// CHECK-C-NEXT: </EnumBlock>
Index: test/clang-doc/mapper-comments.cpp
===================================================================
--- /dev/null
+++ test/clang-doc/mapper-comments.cpp
@@ -0,0 +1,171 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo "" > %t/compile_flags.txt
+// RUN: cp "%s" "%t/test.cpp"
+// RUN: clang-doc --dump-mapper --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs
+// RUN: llvm-bcanalyzer %t/docs/bc/7574630614A535710E5A6ABCFFF98BCA2D06A4CA.bc --dump | FileCheck %s
+
+/// \brief Brief description.
+///
+/// Extended description that
+/// continues onto the next line.
+/// 
+/// <ul> class="test">
+///   <li> Testing.
+/// </ul>
+///
+/// \verbatim
+/// The description continues.
+/// \endverbatim
+///
+/// \param [out] I is a parameter.
+/// \param J is a parameter.
+/// \return int
+int F(int I, int J);
+
+// CHECK: <BLOCKINFO_BLOCK/>
+// CHECK-NEXT: <VersionBlock NumWords=1 BlockCodeSize=4>
+  // CHECK-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-NEXT: </VersionBlock>
+// CHECK-NEXT: <FunctionBlock NumWords=389 BlockCodeSize=4>
+  // CHECK-NEXT: <USR abbrevid=4 op0=40/> blob data = '7574630614A535710E5A6ABCFFF98BCA2D06A4CA'
+  // CHECK-NEXT: <Name abbrevid=5 op0=1/> blob data = 'F'
+  // CHECK-NEXT: <CommentBlock NumWords=351 BlockCodeSize=4>
+    // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'FullComment'
+    // CHECK-NEXT: <CommentBlock NumWords=13 BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+      // CHECK-NEXT: <CommentBlock NumWords=5 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords=31 BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=19/> blob data = 'BlockCommandComment'
+      // CHECK-NEXT: <Name abbrevid=6 op0=5/> blob data = 'brief'
+      // CHECK-NEXT: <CommentBlock NumWords=19 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+        // CHECK-NEXT: <CommentBlock NumWords=11 BlockCodeSize=4>
+          // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+          // CHECK-NEXT: <Text abbrevid=5 op0=19/> blob data = ' Brief description.'
+        // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords=37 BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+      // CHECK-NEXT: <CommentBlock NumWords=13 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+        // CHECK-NEXT: <Text abbrevid=5 op0=26/> blob data = ' Extended description that'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords=14 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+        // CHECK-NEXT: <Text abbrevid=5 op0=30/> blob data = ' continues onto the next line.'
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords=83 BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+      // CHECK-NEXT: <CommentBlock NumWords=5 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords=9 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=19/> blob data = 'HTMLStartTagComment'
+        // CHECK-NEXT: <Name abbrevid=6 op0=2/> blob data = 'ul'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords=10 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+        // CHECK-NEXT: <Text abbrevid=5 op0=14/> blob data = ' class="test">'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords=5 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords=9 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=19/> blob data = 'HTMLStartTagComment'
+        // CHECK-NEXT: <Name abbrevid=6 op0=2/> blob data = 'li'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords=9 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+        // CHECK-NEXT: <Text abbrevid=5 op0=9/> blob data = ' Testing.'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords=5 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+      // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: <CommentBlock NumWords=9 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=17/> blob data = 'HTMLEndTagComment'
+        // CHECK-NEXT: <Name abbrevid=6 op0=2/> blob data = 'ul'
+        // CHECK-NEXT: <SelfClosing abbrevid=10 op0=1/>
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords=13 BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+      // CHECK-NEXT: <CommentBlock NumWords=5 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords=32 BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=20/> blob data = 'VerbatimBlockComment'
+      // CHECK-NEXT: <Name abbrevid=6 op0=8/> blob data = 'verbatim'
+      // CHECK-NEXT: <CloseName abbrevid=9 op0=11/> blob data = 'endverbatim'
+      // CHECK-NEXT: <CommentBlock NumWords=16 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=24/> blob data = 'VerbatimBlockLineComment'
+        // CHECK-NEXT: <Text abbrevid=5 op0=27/> blob data = ' The description continues.'
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords=13 BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+      // CHECK-NEXT: <CommentBlock NumWords=5 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords=39 BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=19/> blob data = 'ParamCommandComment'
+      // CHECK-NEXT: <Direction abbrevid=7 op0=5/> blob data = '[out]'
+      // CHECK-NEXT: <ParamName abbrevid=8 op0=1/> blob data = 'I'
+      // CHECK-NEXT: <Explicit abbrevid=11 op0=1/>
+      // CHECK-NEXT: <CommentBlock NumWords=25 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+        // CHECK-NEXT: <CommentBlock NumWords=10 BlockCodeSize=4>
+          // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+          // CHECK-NEXT: <Text abbrevid=5 op0=16/> blob data = ' is a parameter.'
+        // CHECK-NEXT: </CommentBlock>
+        // CHECK-NEXT: <CommentBlock NumWords=5 BlockCodeSize=4>
+          // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+        // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords=38 BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=19/> blob data = 'ParamCommandComment'
+      // CHECK-NEXT: <Direction abbrevid=7 op0=4/> blob data = '[in]'
+      // CHECK-NEXT: <ParamName abbrevid=8 op0=1/> blob data = 'J'
+      // CHECK-NEXT: <CommentBlock NumWords=25 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+        // CHECK-NEXT: <CommentBlock NumWords=10 BlockCodeSize=4>
+          // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+          // CHECK-NEXT: <Text abbrevid=5 op0=16/> blob data = ' is a parameter.'
+        // CHECK-NEXT: </CommentBlock>
+        // CHECK-NEXT: <CommentBlock NumWords=5 BlockCodeSize=4>
+          // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+        // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: <CommentBlock NumWords=27 BlockCodeSize=4>
+      // CHECK-NEXT: <Kind abbrevid=4 op0=19/> blob data = 'BlockCommandComment'
+      // CHECK-NEXT: <Name abbrevid=6 op0=6/> blob data = 'return'
+      // CHECK-NEXT: <CommentBlock NumWords=15 BlockCodeSize=4>
+        // CHECK-NEXT: <Kind abbrevid=4 op0=16/> blob data = 'ParagraphComment'
+        // CHECK-NEXT: <CommentBlock NumWords=7 BlockCodeSize=4>
+          // CHECK-NEXT: <Kind abbrevid=4 op0=11/> blob data = 'TextComment'
+          // CHECK-NEXT: <Text abbrevid=5 op0=4/> blob data = ' int'
+        // CHECK-NEXT: </CommentBlock>
+      // CHECK-NEXT: </CommentBlock>
+    // CHECK-NEXT: </CommentBlock>
+  // CHECK-NEXT: </CommentBlock>
+  // CHECK-NEXT: <TypeBlock NumWords=4 BlockCodeSize=4>
+    // CHECK-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+  // CHECK-NEXT: </TypeBlock>
+  // CHECK-NEXT: <FieldTypeBlock NumWords=6 BlockCodeSize=4>
+    // CHECK-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+    // CHECK-NEXT: <Name abbrevid=5 op0=1/> blob data = 'I'
+  // CHECK-NEXT: </FieldTypeBlock>
+  // CHECK-NEXT: <FieldTypeBlock NumWords=6 BlockCodeSize=4>
+    // CHECK-NEXT: <Type abbrevid=4 op0=4 op1=3/> blob data = 'int'
+    // CHECK-NEXT: <Name abbrevid=5 op0=1/> blob data = 'J'
+  // CHECK-NEXT: </FieldTypeBlock>
+// CHECK-NEXT: </FunctionBlock>
Index: test/clang-doc/mapper-class.cpp
===================================================================
--- /dev/null
+++ test/clang-doc/mapper-class.cpp
@@ -0,0 +1,18 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo "" > %t/compile_flags.txt
+// RUN: cp "%s" "%t/test.cpp"
+// RUN: clang-doc --dump-mapper --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs
+// RUN: llvm-bcanalyzer %t/docs/bc/289584A8E0FF4178A794622A547AA622503967A1.bc --dump | FileCheck %s
+
+class E {};
+
+// CHECK: <BLOCKINFO_BLOCK/>
+// CHECK-NEXT: <VersionBlock NumWords=1 BlockCodeSize=4>
+  // CHECK-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-NEXT: </VersionBlock>
+// CHECK-NEXT: <RecordBlock NumWords=14 BlockCodeSize=4>
+  // CHECK-NEXT: <USR abbrevid=4 op0=40/> blob data = '289584A8E0FF4178A794622A547AA622503967A1'
+  // CHECK-NEXT: <Name abbrevid=5 op0=1/> blob data = 'E'
+  // CHECK-NEXT: <TagType abbrevid=9 op0=3/>
+// CHECK-NEXT: </RecordBlock>
Index: test/clang-doc/mapper-class-in-function.cpp
===================================================================
--- /dev/null
+++ test/clang-doc/mapper-class-in-function.cpp
@@ -0,0 +1,36 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo "" > %t/compile_flags.txt
+// RUN: cp "%s" "%t/test.cpp"
+// RUN: clang-doc --dump-mapper --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs
+// RUN: llvm-bcanalyzer %t/docs/bc/B6AC4C5C9F2EA3F2B3ECE1A33D349F4EE502B24E.bc --dump | FileCheck %s --check-prefix CHECK-H
+// RUN: llvm-bcanalyzer %t/docs/bc/FE4F139BB2A9041BA63F3D1267DE32EEBEF623BE.bc --dump | FileCheck %s --check-prefix CHECK-H-I
+
+void H() {
+	class I {};
+}
+
+// CHECK-H: <BLOCKINFO_BLOCK/>
+// CHECK-H-NEXT: <VersionBlock NumWords=1 BlockCodeSize=4>
+  // CHECK-H-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-H-NEXT: </VersionBlock>
+// CHECK-H-NEXT: <FunctionBlock NumWords=20 BlockCodeSize=4>
+  // CHECK-H-NEXT: <USR abbrevid=4 op0=40/> blob data = 'B6AC4C5C9F2EA3F2B3ECE1A33D349F4EE502B24E'
+  // CHECK-H-NEXT: <Name abbrevid=5 op0=1/> blob data = 'H'
+  // CHECK-H-NEXT: <TypeBlock NumWords=4 BlockCodeSize=4>
+    // CHECK-H-NEXT: <Type abbrevid=4 op0=4 op1=4/> blob data = 'void'
+  // CHECK-H-NEXT: </TypeBlock>
+// CHECK-H-NEXT: </FunctionBlock>
+
+// CHECK-H-I: <BLOCKINFO_BLOCK/>
+// CHECK-H-I-NEXT: <VersionBlock NumWords=1 BlockCodeSize=4>
+  // CHECK-H-I-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-H-I-NEXT: </VersionBlock>
+// CHECK-H-I-NEXT: <RecordBlock NumWords=26 BlockCodeSize=4>
+  // CHECK-H-I-NEXT: <USR abbrevid=4 op0=40/> blob data = 'FE4F139BB2A9041BA63F3D1267DE32EEBEF623BE'
+  // CHECK-H-I-NEXT: <Name abbrevid=5 op0=1/> blob data = 'I'
+  // CHECK-H-I-NEXT: <Namespace abbrevid=6 op0=2 op1=40/> blob data = 'B6AC4C5C9F2EA3F2B3ECE1A33D349F4EE502B24E'
+  // CHECK-H-I-NEXT: <TagType abbrevid=9 op0=3/>
+// CHECK-H-I-NEXT: </RecordBlock>
+
+
Index: test/clang-doc/mapper-class-in-class.cpp
===================================================================
--- /dev/null
+++ test/clang-doc/mapper-class-in-class.cpp
@@ -0,0 +1,33 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo "" > %t/compile_flags.txt
+// RUN: cp "%s" "%t/test.cpp"
+// RUN: clang-doc --dump-mapper --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs
+// RUN: llvm-bcanalyzer %t/docs/bc/641AB4A3D36399954ACDE29C7A8833032BF40472.bc --dump | FileCheck %s --check-prefix CHECK-X-Y
+// RUN: llvm-bcanalyzer %t/docs/bc/CA7C7935730B5EACD25F080E9C83FA087CCDC75E.bc --dump | FileCheck %s --check-prefix CHECK-X
+
+class X {
+  class Y {};
+};
+
+// CHECK-X: <BLOCKINFO_BLOCK/>
+// CHECK-X-NEXT: <VersionBlock NumWords=1 BlockCodeSize=4>
+  // CHECK-X-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-X-NEXT: </VersionBlock>
+// CHECK-X-NEXT: <RecordBlock NumWords=14 BlockCodeSize=4>
+  // CHECK-X-NEXT: <USR abbrevid=4 op0=40/> blob data = 'CA7C7935730B5EACD25F080E9C83FA087CCDC75E'
+  // CHECK-X-NEXT: <Name abbrevid=5 op0=1/> blob data = 'X'
+  // CHECK-X-NEXT: <TagType abbrevid=9 op0=3/>
+// CHECK-X-NEXT: </RecordBlock>
+
+
+// CHECK-X-Y: <BLOCKINFO_BLOCK/>
+// CHECK-X-Y-NEXT: <VersionBlock NumWords=1 BlockCodeSize=4>
+  // CHECK-X-Y-NEXT: <Version abbrevid=4 op0=1/>
+// CHECK-X-Y-NEXT: </VersionBlock>
+// CHECK-X-Y-NEXT: <RecordBlock NumWords=26 BlockCodeSize=4>
+  // CHECK-X-Y-NEXT: <USR abbrevid=4 op0=40/> blob data = '641AB4A3D36399954ACDE29C7A8833032BF40472'
+  // CHECK-X-Y-NEXT: <Name abbrevid=5 op0=1/> blob data = 'Y'
+  // CHECK-X-Y-NEXT: <Namespace abbrevid=6 op0=1 op1=40/> blob data = 'CA7C7935730B5EACD25F080E9C83FA087CCDC75E'
+  // CHECK-X-Y-NEXT: <TagType abbrevid=9 op0=3/>
+// CHECK-X-Y-NEXT: </RecordBlock>
Index: test/CMakeLists.txt
===================================================================
--- test/CMakeLists.txt
+++ test/CMakeLists.txt
@@ -41,6 +41,7 @@
   clang-apply-replacements
   clang-change-namespace
   clangd
+  clang-doc
   clang-include-fixer
   clang-move
   clang-query
Index: docs/clang-doc.rst
===================================================================
--- /dev/null
+++ docs/clang-doc.rst
@@ -0,0 +1,62 @@
+===================
+Clang-Doc
+===================
+
+.. contents::
+
+:program:`clang-doc` is a tool for generating C and C++ documenation from 
+source code and comments. 
+
+The tool is in a very early development stage, so you might encounter bugs and
+crashes. Submitting reports with information about how to reproduce the issue
+to `the LLVM bugtracker <https://llvm.org/bugs>`_ will definitely help the
+project. If you have any ideas or suggestions, please to put a feature request
+there.
+
+Use
+=====
+
+:program:`clang-doc` is a `LibTooling
+<http://clang.llvm.org/docs/LibTooling.html>`_-based tool, and so requires a
+compile command database for your project (for an example of how to do this 
+see `How To Setup Tooling For LLVM
+<http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html>`_).
+
+The tool can be used on a single file or multiple files as defined in 
+the compile commands database:
+
+.. code-block:: console
+
+  $ clang-doc /path/to/file.cpp -p /path/to/compile/commands
+
+This generates an intermediate representation of the declarations and their
+associated information in the specified TUs, serialized to LLVM bitcode.
+
+As currently implemented, the tool is only able to parse TUs that can be 
+stored in-memory. Future additions will extend the current framework to use
+map-reduce frameworks to allow for use with large codebases.
+
+:program:`clang-doc` offers the following options:
+
+.. code-block:: console
+
+	$ clang-doc --help
+USAGE: clang-doc [options] <source0> [... <sourceN>]
+
+OPTIONS:
+
+Generic Options:
+
+  -help                      - Display available options (-help-hidden for more)
+  -help-list                 - Display list of available options (-help-list-hidden for more)
+  -version                   - Display the version of this program
+
+clang-doc options:
+
+  -doxygen                   - Use only doxygen-style comments to generate docs.
+  -dump                      - Dump intermediate results to bitcode file.
+  -extra-arg=<string>        - Additional argument to append to the compiler command line
+  -extra-arg-before=<string> - Additional argument to prepend to the compiler command line
+  -omit-filenames            - Omit filenames in output.
+  -output=<string>           - Directory for outputting generated files.
+  -p=<string>                - Build path
Index: clang-doc/tool/ClangDocMain.cpp
===================================================================
--- /dev/null
+++ clang-doc/tool/ClangDocMain.cpp
@@ -0,0 +1,116 @@
+//===-- ClangDocMain.cpp - ClangDoc -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool for generating C and C++ documenation from source code
+// and comments. Generally, it runs a LibTooling FrontendAction on source files,
+// mapping each declaration in those files to its USR and serializing relevant
+// information into LLVM bitcode. It then runs a pass over the collected
+// declaration information, reducing by USR. There is an option to dump this
+// intermediate result to bitcode. Finally, it hands the reduced information
+// off to a generator, which does the final parsing from the intermediate
+// representation to the desired output format.
+//
+//===----------------------------------------------------------------------===//
+
+#include <string>
+#include "ClangDoc.h"
+#include "clang/AST/AST.h"
+#include "clang/AST/Decl.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchersInternal.h"
+#include "clang/Driver/Options.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Execution.h"
+#include "clang/Tooling/StandaloneExecution.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace clang::ast_matchers;
+using namespace clang::tooling;
+using namespace clang;
+
+static llvm::cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
+static llvm::cl::OptionCategory ClangDocCategory("clang-doc options");
+
+static llvm::cl::opt<std::string> OutDirectory(
+    "output", llvm::cl::desc("Directory for outputting generated files."),
+    llvm::cl::init("docs"), llvm::cl::cat(ClangDocCategory));
+
+static llvm::cl::opt<bool> DumpMapperResult(
+    "dump-mapper", llvm::cl::desc("Dump mapper results to bitcode file."),
+    llvm::cl::init(false), llvm::cl::cat(ClangDocCategory));
+
+static llvm::cl::opt<bool> OmitFilenames(
+    "omit-filenames", llvm::cl::desc("Omit filenames in output."),
+    llvm::cl::init(false), llvm::cl::cat(ClangDocCategory));
+
+static llvm::cl::opt<bool> DoxygenOnly(
+    "doxygen",
+    llvm::cl::desc("Use only doxygen-style comments to generate docs."),
+    llvm::cl::init(false), llvm::cl::cat(ClangDocCategory));
+
+int main(int argc, const char **argv) {
+  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
+  std::error_code OK;
+
+  auto Exec = clang::tooling::createExecutorFromCommandLineArgs(
+      argc, argv, ClangDocCategory);
+
+  if (!Exec) {
+    llvm::errs() << toString(Exec.takeError()) << "\n";
+    return 1;
+  }
+
+  ArgumentsAdjuster ArgAdjuster;
+  if (!DoxygenOnly)
+    ArgAdjuster = combineAdjusters(
+        getInsertArgumentAdjuster("-fparse-all-comments",
+                                  tooling::ArgumentInsertPosition::END),
+        ArgAdjuster);
+
+  // Mapping phase
+  llvm::outs() << "Mapping decls...\n";
+  auto Err = Exec->get()->execute(
+      llvm::make_unique<doc::MapperActionFactory>(
+          Exec->get()->getExecutionContext(), OmitFilenames),
+      ArgAdjuster);
+  if (Err) llvm::errs() << toString(std::move(Err)) << "\n";
+
+  if (DumpMapperResult) {
+    Exec->get()->getToolResults()->forEachResult([&](StringRef Key,
+                                                     StringRef Value) {
+      SmallString<128> IRRootPath;
+      llvm::sys::path::native(OutDirectory, IRRootPath);
+      llvm::sys::path::append(IRRootPath, "bc");
+      std::error_code DirectoryStatus =
+          llvm::sys::fs::create_directories(IRRootPath);
+      if (DirectoryStatus != OK) {
+        llvm::errs() << "Unable to create documentation directories.\n";
+        return;
+      }
+      llvm::sys::path::append(IRRootPath, Key + ".bc");
+      std::error_code OutErrorInfo;
+      llvm::raw_fd_ostream OS(IRRootPath, OutErrorInfo, llvm::sys::fs::F_None);
+      if (OutErrorInfo != OK) {
+        llvm::errs() << "Error opening documentation file.\n";
+        return;
+      }
+      OS << Value;
+      OS.close();
+    });
+  }
+
+  return 0;
+}
Index: clang-doc/tool/CMakeLists.txt
===================================================================
--- /dev/null
+++ clang-doc/tool/CMakeLists.txt
@@ -0,0 +1,17 @@
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)
+
+add_clang_executable(clang-doc
+  ClangDocMain.cpp
+  )
+
+target_link_libraries(clang-doc
+  PRIVATE
+  clangAST
+  clangASTMatchers
+  clangBasic
+  clangFrontend
+  clangDoc
+  clangTooling
+  clangToolingCore
+  )
+  
\ No newline at end of file
Index: clang-doc/Serialize.h
===================================================================
--- /dev/null
+++ clang-doc/Serialize.h
@@ -0,0 +1,53 @@
+//===-- Serializer.h - ClangDoc Serializer ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the serializing functions fro the clang-doc tool. Given
+// a particular declaration, it collects the appropriate information and returns
+// a serialized bitcode string for the declaration.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SERIALIZE_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SERIALIZE_H
+
+#include "Representation.h"
+#include "clang/AST/AST.h"
+#include "clang/AST/CommentVisitor.h"
+#include <string>
+#include <vector>
+
+using namespace clang::comments;
+
+namespace clang {
+namespace doc {
+namespace serialize {
+
+std::string emitInfo(const NamespaceDecl *D, const FullComment *FC,
+                     int LineNumber, StringRef File, bool OmitFilenames);
+std::string emitInfo(const RecordDecl *D, const FullComment *FC, int LineNumber,
+                     StringRef File, bool OmitFilenames);
+std::string emitInfo(const EnumDecl *D, const FullComment *FC, int LineNumber,
+                     StringRef File, bool OmitFilenames);
+std::string emitInfo(const FunctionDecl *D, const FullComment *FC,
+                     int LineNumber, StringRef File, bool OmitFilenames);
+std::string emitInfo(const CXXMethodDecl *D, const FullComment *FC,
+                     int LineNumber, StringRef File, bool OmitFilenames);
+
+// Function to hash a given USR value for storage.
+// As USRs (Unified Symbol Resolution) could be large, especially for functions
+// with long type arguments, we use 160-bits SHA1(USR) values to
+// guarantee the uniqueness of symbols while using a relatively small amount of
+// memory (vs storing USRs directly).
+std::string hashUSR(llvm::StringRef USR);
+
+}  // namespace serialize
+}  // namespace doc
+}  // namespace clang
+
+#endif  // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SERIALIZE_H
Index: clang-doc/Serialize.cpp
===================================================================
--- /dev/null
+++ clang-doc/Serialize.cpp
@@ -0,0 +1,337 @@
+//===-- Serializer.cpp - ClangDoc Serializer --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Serialize.h"
+#include "BitcodeWriter.h"
+#include "clang/AST/Comment.h"
+#include "clang/Index/USRGeneration.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/SHA1.h"
+
+using clang::comments::FullComment;
+
+namespace clang {
+namespace doc {
+namespace serialize {
+
+std::string hashUSR(llvm::StringRef USR) {
+  auto Hash = llvm::SHA1::hash(arrayRefFromStringRef(USR));
+  return llvm::toHex(llvm::toStringRef(Hash));
+}
+
+class ClangDocCommentVisitor
+    : public ConstCommentVisitor<ClangDocCommentVisitor> {
+ public:
+  ClangDocCommentVisitor(CommentInfo &CI) : CurrentCI(CI) {}
+
+  void parseComment(const comments::Comment *C);
+
+  void visitTextComment(const TextComment *C);
+  void visitInlineCommandComment(const InlineCommandComment *C);
+  void visitHTMLStartTagComment(const HTMLStartTagComment *C);
+  void visitHTMLEndTagComment(const HTMLEndTagComment *C);
+  void visitBlockCommandComment(const BlockCommandComment *C);
+  void visitParamCommandComment(const ParamCommandComment *C);
+  void visitTParamCommandComment(const TParamCommandComment *C);
+  void visitVerbatimBlockComment(const VerbatimBlockComment *C);
+  void visitVerbatimBlockLineComment(const VerbatimBlockLineComment *C);
+  void visitVerbatimLineComment(const VerbatimLineComment *C);
+
+ private:
+  std::string getCommandName(unsigned CommandID) const;
+  bool isWhitespaceOnly(StringRef S) const;
+
+  CommentInfo &CurrentCI;
+};
+
+void ClangDocCommentVisitor::parseComment(const comments::Comment *C) {
+  CurrentCI.Kind = C->getCommentKindName();
+  ConstCommentVisitor<ClangDocCommentVisitor>::visit(C);
+  for (comments::Comment *Child :
+       llvm::make_range(C->child_begin(), C->child_end())) {
+    CurrentCI.Children.emplace_back(llvm::make_unique<CommentInfo>());
+    ClangDocCommentVisitor Visitor(*CurrentCI.Children.back());
+    Visitor.parseComment(Child);
+  }
+}
+
+void ClangDocCommentVisitor::visitTextComment(const TextComment *C) {
+  if (!isWhitespaceOnly(C->getText())) CurrentCI.Text = C->getText();
+}
+
+void ClangDocCommentVisitor::visitInlineCommandComment(
+    const InlineCommandComment *C) {
+  CurrentCI.Name = getCommandName(C->getCommandID());
+  for (unsigned I = 0, E = C->getNumArgs(); I != E; ++I)
+    CurrentCI.Args.push_back(C->getArgText(I));
+}
+
+void ClangDocCommentVisitor::visitHTMLStartTagComment(
+    const HTMLStartTagComment *C) {
+  CurrentCI.Name = C->getTagName();
+  CurrentCI.SelfClosing = C->isSelfClosing();
+  for (unsigned I = 0, E = C->getNumAttrs(); I < E; ++I) {
+    const HTMLStartTagComment::Attribute &Attr = C->getAttr(I);
+    CurrentCI.AttrKeys.push_back(Attr.Name);
+    CurrentCI.AttrValues.push_back(Attr.Value);
+  }
+}
+
+void ClangDocCommentVisitor::visitHTMLEndTagComment(
+    const HTMLEndTagComment *C) {
+  CurrentCI.Name = C->getTagName();
+  CurrentCI.SelfClosing = true;
+}
+
+void ClangDocCommentVisitor::visitBlockCommandComment(
+    const BlockCommandComment *C) {
+  CurrentCI.Name = getCommandName(C->getCommandID());
+  for (unsigned I = 0, E = C->getNumArgs(); I < E; ++I)
+    CurrentCI.Args.push_back(C->getArgText(I));
+}
+
+void ClangDocCommentVisitor::visitParamCommandComment(
+    const ParamCommandComment *C) {
+  CurrentCI.Direction =
+      ParamCommandComment::getDirectionAsString(C->getDirection());
+  CurrentCI.Explicit = C->isDirectionExplicit();
+  if (C->hasParamName()) CurrentCI.ParamName = C->getParamNameAsWritten();
+}
+
+void ClangDocCommentVisitor::visitTParamCommandComment(
+    const TParamCommandComment *C) {
+  if (C->hasParamName()) CurrentCI.ParamName = C->getParamNameAsWritten();
+}
+
+void ClangDocCommentVisitor::visitVerbatimBlockComment(
+    const VerbatimBlockComment *C) {
+  CurrentCI.Name = getCommandName(C->getCommandID());
+  CurrentCI.CloseName = C->getCloseName();
+}
+
+void ClangDocCommentVisitor::visitVerbatimBlockLineComment(
+    const VerbatimBlockLineComment *C) {
+  if (!isWhitespaceOnly(C->getText())) CurrentCI.Text = C->getText();
+}
+
+void ClangDocCommentVisitor::visitVerbatimLineComment(
+    const VerbatimLineComment *C) {
+  if (!isWhitespaceOnly(C->getText())) CurrentCI.Text = C->getText();
+}
+
+bool ClangDocCommentVisitor::isWhitespaceOnly(llvm::StringRef S) const {
+  return std::all_of(S.begin(), S.end(), isspace);
+}
+
+std::string ClangDocCommentVisitor::getCommandName(unsigned CommandID) const {
+  const CommandInfo *Info = CommandTraits::getBuiltinCommandInfo(CommandID);
+  if (Info) return Info->Name;
+  // TODO: Add parsing for \file command.
+  return "<not a builtin command>";
+}
+
+// Serializing functions.
+
+template <typename T>
+static std::string serialize(T &I, bool OmitFilenames) {
+  SmallString<2048> Buffer;
+  llvm::BitstreamWriter Stream(Buffer);
+  ClangDocBitcodeWriter Writer(Stream, OmitFilenames);
+  Writer.emitBlock(I);
+  return Buffer.str().str();
+}
+
+static void parseFullComment(const FullComment *C, CommentInfo &CI) {
+  ClangDocCommentVisitor Visitor(CI);
+  Visitor.parseComment(C);
+}
+
+static std::string getUSRForDecl(const Decl *D) {
+  llvm::SmallString<128> USR;
+  if (index::generateUSRForDecl(D, USR)) return "";
+  return hashUSR(USR);
+}
+
+static RecordDecl *getDeclForType(const QualType &T) {
+  auto *Ty = T->getAs<RecordType>();
+  if (!Ty) return nullptr;
+  return Ty->getDecl()->getDefinition();
+}
+
+static void parseFields(RecordInfo &I, const RecordDecl *D) {
+  for (const FieldDecl *F : D->fields()) {
+    // FIXME: Set Access to the appropriate value.
+    std::string Type;
+    InfoType RefType;
+    if (const auto *T = getDeclForType(F->getTypeSourceInfo()->getType())) {
+      Type = getUSRForDecl(T);
+      if (dyn_cast<EnumDecl>(T))
+        RefType = InfoType::IT_enum;
+      else if (dyn_cast<RecordDecl>(T))
+        RefType = InfoType::IT_record;
+      else
+        RefType = InfoType::IT_default;
+    } else {
+      Type = F->getTypeSourceInfo()->getType().getAsString();
+      RefType = InfoType::IT_default;
+    }
+    I.Members.emplace_back(Type, RefType, F->getQualifiedNameAsString());
+  }
+  // TODO: Populate type comment description from info description.
+}
+
+static void parseEnumerators(EnumInfo &I, const EnumDecl *D) {
+  for (const EnumConstantDecl *E : D->enumerators())
+    I.Members.emplace_back(E->getQualifiedNameAsString(), InfoType::IT_enum);
+  // TODO: Populate member comment description from info description.
+}
+
+static void parseParameters(FunctionInfo &I, const FunctionDecl *D) {
+  for (const ParmVarDecl *P : D->parameters()) {
+    std::string Type;
+    InfoType RefType;
+    if (const auto *T = getDeclForType(P->getOriginalType())) {
+      Type = getUSRForDecl(T);
+      if (dyn_cast<EnumDecl>(T))
+        RefType = InfoType::IT_enum;
+      else if (dyn_cast<RecordDecl>(T))
+        RefType = InfoType::IT_record;
+      else
+        RefType = InfoType::IT_default;
+    } else {
+      Type = P->getOriginalType().getAsString();
+      RefType = InfoType::IT_default;
+    }
+    I.Params.emplace_back(Type, RefType, P->getQualifiedNameAsString());
+    // TODO: Populate field comment description from info description.
+  }
+}
+
+static void parseBases(RecordInfo &I, const CXXRecordDecl *D) {
+  for (const CXXBaseSpecifier &B : D->bases()) {
+    if (B.isVirtual()) continue;
+    if (const auto *P = getDeclForType(B.getType()))
+      I.Parents.emplace_back(getUSRForDecl(P), InfoType::IT_record);
+    else
+      I.Parents.emplace_back(B.getType().getAsString(), InfoType::IT_default);
+  }
+  for (const CXXBaseSpecifier &B : D->vbases()) {
+    if (const auto *P = getDeclForType(B.getType()))
+      I.VirtualParents.emplace_back(getUSRForDecl(P), InfoType::IT_record);
+    else
+      I.VirtualParents.emplace_back(B.getType().getAsString(),
+                                    InfoType::IT_default);
+  }
+}
+
+template <typename T>
+static void populateParentNamespaces(
+    llvm::SmallVector<Reference, 4> &Namespaces, const T *D) {
+  const auto *DC = dyn_cast<DeclContext>(D);
+  while ((DC = DC->getParent())) {
+    if (const auto *N = dyn_cast<NamespaceDecl>(DC))
+      Namespaces.emplace_back(getUSRForDecl(N), InfoType::IT_namespace);
+    else if (const auto *N = dyn_cast<RecordDecl>(DC))
+      Namespaces.emplace_back(getUSRForDecl(N), InfoType::IT_record);
+    else if (const auto *N = dyn_cast<FunctionDecl>(DC))
+      Namespaces.emplace_back(getUSRForDecl(N), InfoType::IT_function);
+    else if (const auto *N = dyn_cast<EnumDecl>(DC))
+      Namespaces.emplace_back(getUSRForDecl(N), InfoType::IT_enum);
+  }
+}
+
+template <typename T>
+static void populateInfo(Info &I, const T *D, const FullComment *C) {
+  I.USR = getUSRForDecl(D);
+  I.Name = D->getNameAsString();
+  populateParentNamespaces(I.Namespace, D);
+  if (C) {
+    I.Description.emplace_back();
+    parseFullComment(C, I.Description.back());
+  }
+}
+
+template <typename T>
+static void populateSymbolInfo(SymbolInfo &I, const T *D, const FullComment *C,
+                               int LineNumber, StringRef Filename) {
+  populateInfo(I, D, C);
+  if (D->isThisDeclarationADefinition())
+    I.DefLoc.emplace(LineNumber, Filename);
+  else
+    I.Loc.emplace_back(LineNumber, Filename);
+}
+
+static void populateFunctionInfo(FunctionInfo &I, const FunctionDecl *D,
+                                 const FullComment *FC, int LineNumber,
+                                 StringRef Filename) {
+  populateSymbolInfo(I, D, FC, LineNumber, Filename);
+  if (const auto *T = getDeclForType(D->getReturnType())) {
+    I.ReturnType.Type.USR = getUSRForDecl(T);
+    if (dyn_cast<EnumDecl>(T))
+      I.ReturnType.Type.RefType = InfoType::IT_enum;
+    else if (dyn_cast<RecordDecl>(T))
+      I.ReturnType.Type.RefType = InfoType::IT_record;
+    else
+      I.ReturnType.Type.RefType = InfoType::IT_default;
+  } else {
+    I.ReturnType.Type.USR = D->getReturnType().getAsString();
+    I.ReturnType.Type.RefType = InfoType::IT_default;
+  }
+  // TODO: Populate return type comment description from info description.
+  parseParameters(I, D);
+}
+
+std::string emitInfo(const NamespaceDecl *D, const FullComment *FC,
+                     int LineNumber, llvm::StringRef File, bool OmitFilenames) {
+  NamespaceInfo I;
+  populateInfo(I, D, FC);
+  return serialize(I, OmitFilenames);
+}
+
+std::string emitInfo(const RecordDecl *D, const FullComment *FC, int LineNumber,
+                     llvm::StringRef File, bool OmitFilenames) {
+  RecordInfo I;
+  populateSymbolInfo(I, D, FC, LineNumber, File);
+  I.TagType = D->getTagKind();
+  parseFields(I, D);
+  if (const auto *C = dyn_cast<CXXRecordDecl>(D)) parseBases(I, C);
+  return serialize(I, OmitFilenames);
+}
+
+std::string emitInfo(const FunctionDecl *D, const FullComment *FC,
+                     int LineNumber, llvm::StringRef File, bool OmitFilenames) {
+  FunctionInfo I;
+  populateFunctionInfo(I, D, FC, LineNumber, File);
+  I.Access = clang::AccessSpecifier::AS_none;
+  return serialize(I, OmitFilenames);
+}
+
+std::string emitInfo(const CXXMethodDecl *D, const FullComment *FC,
+                     int LineNumber, llvm::StringRef File, bool OmitFilenames) {
+  FunctionInfo I;
+  populateFunctionInfo(I, D, FC, LineNumber, File);
+  I.IsMethod = true;
+  I.Parent = Reference{getUSRForDecl(D->getParent()), InfoType::IT_record};
+  I.Access = D->getAccess();
+  return serialize(I, OmitFilenames);
+}
+
+std::string emitInfo(const EnumDecl *D, const FullComment *FC, int LineNumber,
+                     llvm::StringRef File, bool OmitFilenames) {
+  EnumInfo I;
+  populateSymbolInfo(I, D, FC, LineNumber, File);
+  I.Scoped = D->isScoped();
+  parseEnumerators(I, D);
+  return serialize(I, OmitFilenames);
+}
+
+}  // namespace serialize
+}  // namespace doc
+}  // namespace clang
Index: clang-doc/Representation.h
===================================================================
--- /dev/null
+++ clang-doc/Representation.h
@@ -0,0 +1,152 @@
+///===-- Representation.h - ClangDoc Represenation --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the internal representations of different declaration
+// types for the clang-doc tool.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_REPRESENTATION_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_REPRESENTATION_H
+
+#include "clang/AST/Type.h"
+#include "clang/Basic/Specifiers.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <string>
+
+namespace clang {
+namespace doc {
+
+struct Info;
+enum class InfoType {
+  IT_namespace,
+  IT_record,
+  IT_function,
+  IT_enum,
+  IT_default
+};
+
+// A representation of a parsed comment.
+struct CommentInfo {
+  CommentInfo() = default;
+  CommentInfo(CommentInfo &&Other) : Children(std::move(Other.Children)) {}
+  SmallString<16> Kind;
+  SmallString<64> Text;
+  SmallString<16> Name;
+  SmallString<4> Direction;
+  SmallString<16> ParamName;
+  SmallString<16> CloseName;
+  bool SelfClosing = false;
+  bool Explicit = false;
+  llvm::SmallVector<SmallString<16>, 4> AttrKeys;
+  llvm::SmallVector<SmallString<16>, 4> AttrValues;
+  llvm::SmallVector<SmallString<16>, 4> Args;
+  std::vector<std::unique_ptr<CommentInfo>> Children;
+};
+
+struct Reference {
+  Reference() = default;
+  Reference(llvm::StringRef USR, InfoType IT) : USR(USR), RefType(IT) {}
+
+  SmallString<16> USR;
+  InfoType RefType = InfoType::IT_default;
+};
+
+// TODO: Pull the CommentInfo for a type out of the info's CommentInfo.
+// A base struct for TypeInfos
+struct TypeInfo {
+  TypeInfo() = default;
+  TypeInfo(llvm::StringRef Type, InfoType IT) : Type(Type, IT) {}
+
+  Reference Type;
+  std::vector<CommentInfo> Description;
+};
+
+// Info for field types.
+struct FieldTypeInfo : public TypeInfo {
+  FieldTypeInfo() = default;
+  FieldTypeInfo(llvm::StringRef Type, InfoType IT) : TypeInfo(Type, IT) {}
+  FieldTypeInfo(llvm::StringRef Type, InfoType IT, llvm::StringRef Name)
+      : TypeInfo(Type, IT), Name(Name) {}
+
+  SmallString<16> Name;
+};
+
+// Info for member types.
+struct MemberTypeInfo : public FieldTypeInfo {
+  MemberTypeInfo() = default;
+  MemberTypeInfo(llvm::StringRef Type, InfoType IT, llvm::StringRef Name)
+      : FieldTypeInfo(Type, IT, Name) {}
+
+  AccessSpecifier Access = clang::AccessSpecifier::AS_none;
+};
+
+struct Location {
+  Location() = default;
+  Location(int LineNumber, SmallString<16> Filename)
+      : LineNumber(LineNumber), Filename(std::move(Filename)) {}
+
+  int LineNumber;
+  SmallString<32> Filename;
+};
+
+/// A base struct for Infos.
+struct Info {
+  Info() = default;
+  Info(Info &&Other) : Description(std::move(Other.Description)) {}
+  virtual ~Info() = default;
+
+  SmallString<16> USR;
+  SmallString<16> Name;
+  llvm::SmallVector<Reference, 4> Namespace;
+  std::vector<CommentInfo> Description;
+};
+
+struct NamespaceInfo : public Info {};
+
+struct SymbolInfo : public Info {
+  llvm::Optional<Location> DefLoc;
+  llvm::SmallVector<Location, 2> Loc;
+};
+
+// TODO: Expand to allow for documenting templating and default args.
+// Info for functions.
+struct FunctionInfo : public SymbolInfo {
+  bool IsMethod = false;
+  Reference Parent;
+  TypeInfo ReturnType;
+  llvm::SmallVector<FieldTypeInfo, 4> Params;
+  AccessSpecifier Access = AccessSpecifier::AS_none;
+};
+
+// TODO: Expand to allow for documenting templating, inheritance access,
+// friend classes
+// Info for types.
+struct RecordInfo : public SymbolInfo {
+  TagTypeKind TagType = TagTypeKind::TTK_Struct;
+  llvm::SmallVector<MemberTypeInfo, 4> Members;
+  llvm::SmallVector<Reference, 4> Parents;
+  llvm::SmallVector<Reference, 4> VirtualParents;
+};
+
+// TODO: Expand to allow for documenting templating.
+// Info for types.
+struct EnumInfo : public SymbolInfo {
+  bool Scoped = false;
+  llvm::SmallVector<TypeInfo, 4> Members;
+};
+
+// TODO: Add functionality to include separate markdown pages.
+
+}  // namespace doc
+}  // namespace clang
+
+#endif  // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_REPRESENTATION_H
Index: clang-doc/Mapper.h
===================================================================
--- /dev/null
+++ clang-doc/Mapper.h
@@ -0,0 +1,58 @@
+//===-- Mapper.h - ClangDoc Mapper ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Mapper piece of the clang-doc tool. It implements
+// a RecursiveASTVisitor to look at each declaration and populate the info
+// into the internal representation. Each seen declaration is serialized to
+// to bitcode and written out to the ExecutionContext as a KV pair where the
+// key is the declaration's USR and the value is the serialized bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MAPPER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MAPPER_H
+
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Tooling/Execution.h"
+
+using namespace clang::comments;
+using namespace clang::tooling;
+
+namespace clang {
+namespace doc {
+
+class MapASTVisitor : public clang::RecursiveASTVisitor<MapASTVisitor> {
+ public:
+  explicit MapASTVisitor(ASTContext *Ctx, ExecutionContext *ECtx,
+                         bool OmitFilenames)
+      : ECtx(ECtx), OmitFilenames(OmitFilenames) {}
+
+  bool VisitNamespaceDecl(const NamespaceDecl *D);
+  bool VisitRecordDecl(const RecordDecl *D);
+  bool VisitEnumDecl(const EnumDecl *D);
+  bool VisitCXXMethodDecl(const CXXMethodDecl *D);
+  bool VisitFunctionDecl(const FunctionDecl *D);
+
+ private:
+  template <typename T>
+  bool mapDecl(const T *D);
+
+  int getLine(const NamedDecl *D, const ASTContext &Context) const;
+  StringRef getFile(const NamedDecl *D, const ASTContext &Context) const;
+  comments::FullComment *getComment(const NamedDecl *D,
+                                    const ASTContext &Context) const;
+
+  ExecutionContext *ECtx;
+  bool OmitFilenames;
+};
+
+}  // namespace doc
+}  // namespace clang
+
+#endif  // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MAPPER_H
Index: clang-doc/Mapper.cpp
===================================================================
--- /dev/null
+++ clang-doc/Mapper.cpp
@@ -0,0 +1,81 @@
+//===-- Mapper.cpp - ClangDoc Mapper ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mapper.h"
+#include "BitcodeWriter.h"
+#include "Serialize.h"
+#include "clang/AST/Comment.h"
+#include "clang/Index/USRGeneration.h"
+
+using clang::comments::FullComment;
+
+namespace clang {
+namespace doc {
+
+template <typename T>
+bool MapASTVisitor::mapDecl(const T *D) {
+  // If we're looking a decl not in user files, skip this decl.
+  if (D->getASTContext().getSourceManager().isInSystemHeader(D->getLocation()))
+    return true;
+
+  llvm::SmallString<128> USR;
+  // If there is an error generating a USR for the decl, skip this decl.
+  if (index::generateUSRForDecl(D, USR)) return true;
+
+  ECtx->reportResult(
+      serialize::hashUSR(USR),
+      serialize::emitInfo(D, getComment(D, D->getASTContext()),
+                          getLine(D, D->getASTContext()),
+                          getFile(D, D->getASTContext()), OmitFilenames));
+  return true;
+}
+
+bool MapASTVisitor::VisitNamespaceDecl(const NamespaceDecl *D) {
+  return mapDecl(D);
+}
+
+bool MapASTVisitor::VisitRecordDecl(const RecordDecl *D) { return mapDecl(D); }
+
+bool MapASTVisitor::VisitEnumDecl(const EnumDecl *D) { return mapDecl(D); }
+
+bool MapASTVisitor::VisitCXXMethodDecl(const CXXMethodDecl *D) {
+  return mapDecl(D);
+}
+
+bool MapASTVisitor::VisitFunctionDecl(const FunctionDecl *D) {
+  // Don't visit CXXMethodDecls twice
+  if (dyn_cast<CXXMethodDecl>(D)) return true;
+  return mapDecl(D);
+}
+
+comments::FullComment *MapASTVisitor::getComment(
+    const NamedDecl *D, const ASTContext &Context) const {
+  RawComment *Comment = Context.getRawCommentForDeclNoCache(D);
+  // FIXME: Move setAttached to the initial comment parsing.
+  if (Comment) {
+    Comment->setAttached();
+    return Comment->parse(Context, nullptr, D);
+  }
+  return nullptr;
+}
+
+int MapASTVisitor::getLine(const NamedDecl *D,
+                           const ASTContext &Context) const {
+  return Context.getSourceManager().getPresumedLoc(D->getLocStart()).getLine();
+}
+
+llvm::StringRef MapASTVisitor::getFile(const NamedDecl *D,
+                                       const ASTContext &Context) const {
+  return Context.getSourceManager()
+      .getPresumedLoc(D->getLocStart())
+      .getFilename();
+}
+
+}  // namespace doc
+}  // namespace clang
Index: clang-doc/ClangDoc.h
===================================================================
--- /dev/null
+++ clang-doc/ClangDoc.h
@@ -0,0 +1,77 @@
+//===-- ClangDoc.h - ClangDoc -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the main entry point for the clang-doc tool. It runs
+// the clang-doc mapper on a given set of source code files using a
+// FrontendActionFactory.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANGDOC_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANGDOC_H
+
+#include "Mapper.h"
+#include "clang/AST/AST.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Frontend/ASTConsumers.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Tooling/Tooling.h"
+
+namespace clang {
+namespace doc {
+
+class MapperActionFactory : public tooling::FrontendActionFactory {
+ public:
+  MapperActionFactory(tooling::ExecutionContext *ECtx, bool OmitFilenames)
+      : ECtx(ECtx), OmitFilenames(OmitFilenames) {}
+
+  clang::FrontendAction *create() override {
+    class ClangDocConsumer : public clang::ASTConsumer {
+     public:
+      ClangDocConsumer(ASTContext *Ctx, ExecutionContext *ECtx,
+                       bool OmitFilenames)
+          : Mapper(Ctx, ECtx, OmitFilenames) {}
+      void HandleTranslationUnit(clang::ASTContext &Context) override {
+        Mapper.TraverseDecl(Context.getTranslationUnitDecl());
+      }
+
+     private:
+      MapASTVisitor Mapper;
+    };
+
+    class ClangDocAction : public clang::ASTFrontendAction {
+     public:
+      ClangDocAction(ExecutionContext *ECtx, bool &OmitFilenames)
+          : ECtx(ECtx), OmitFilenames(OmitFilenames) {}
+
+      std::unique_ptr<clang::ASTConsumer> CreateASTConsumer(
+          clang::CompilerInstance &Compiler, llvm::StringRef InFile) override {
+        return llvm::make_unique<ClangDocConsumer>(&Compiler.getASTContext(),
+                                                   ECtx, OmitFilenames);
+      }
+
+     private:
+      ExecutionContext *ECtx;
+      bool OmitFilenames;
+    };
+    return new ClangDocAction(ECtx, OmitFilenames);
+  }
+
+ private:
+  tooling::ExecutionContext *ECtx;
+  bool OmitFilenames;
+};
+
+}  // namespace doc
+}  // namespace clang
+
+#endif  // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANGDOC_H
Index: clang-doc/CMakeLists.txt
===================================================================
--- /dev/null
+++ clang-doc/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(LLVM_LINK_COMPONENTS
+  support
+  )
+
+add_clang_library(clangDoc
+  BitcodeWriter.cpp
+  Mapper.cpp
+  Serialize.cpp
+
+  LINK_LIBS
+  clangAnalysis
+  clangAST
+  clangASTMatchers
+  clangBasic
+  clangFrontend
+  clangIndex
+  clangLex
+  clangTooling
+  clangToolingCore
+  )
+
+add_subdirectory(tool)
Index: clang-doc/BitcodeWriter.h
===================================================================
--- /dev/null
+++ clang-doc/BitcodeWriter.h
@@ -0,0 +1,258 @@
+//===--  BitcodeWriter.h - ClangDoc Bitcode Writer --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a writer for serializing the clang-doc internal
+// representation to LLVM bitcode. The writer takes in a stream and emits the
+// generated bitcode to that stream.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_BITCODEWRITER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_BITCODEWRITER_H
+
+#include "Representation.h"
+#include "clang/AST/AST.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Bitcode/BitstreamWriter.h"
+#include <initializer_list>
+#include <vector>
+
+namespace clang {
+namespace doc {
+
+// Current version number of clang-doc bitcode.
+// Should be bumped when removing or changing BlockIds, RecordIds, or
+// BitCodeConstants, though they can be added without breaking it.
+static const unsigned VersionNumber = 1;
+
+struct BitCodeConstants {
+  static constexpr unsigned RecordSize = 16U;
+  static constexpr unsigned SignatureBitSize = 8U;
+  static constexpr unsigned SubblockIDSize = 4U;
+  static constexpr unsigned BoolSize = 1U;
+  static constexpr unsigned IntSize = 16U;
+  static constexpr unsigned StringLengthSize = 16U;
+  static constexpr unsigned FilenameLengthSize = 16U;
+  static constexpr unsigned LineNumberSize = 16U;
+  static constexpr unsigned ReferenceTypeSize = 8U;
+  static constexpr unsigned USRLengthSize = 16U;
+};
+
+// New Ids need to be added to both the enum here and the relevant IdNameMap in
+// the implementation file.
+enum BlockId {
+  BI_VERSION_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID,
+  BI_NAMESPACE_BLOCK_ID,
+  BI_ENUM_BLOCK_ID,
+  BI_TYPE_BLOCK_ID,
+  BI_FIELD_TYPE_BLOCK_ID,
+  BI_MEMBER_TYPE_BLOCK_ID,
+  BI_RECORD_BLOCK_ID,
+  BI_FUNCTION_BLOCK_ID,
+  BI_COMMENT_BLOCK_ID,
+  BI_FIRST = BI_VERSION_BLOCK_ID,
+  BI_LAST = BI_COMMENT_BLOCK_ID
+};
+
+// New Ids need to be added to the enum here, and to the relevant IdNameMap and
+// initialization list in the implementation file.
+#define INFORECORDS(X) X##_USR, X##_NAME, X##_NAMESPACE
+
+enum RecordId {
+  VERSION = 1,
+  INFORECORDS(FUNCTION),
+  FUNCTION_DEFLOCATION,
+  FUNCTION_LOCATION,
+  FUNCTION_PARENT,
+  FUNCTION_ACCESS,
+  FUNCTION_IS_METHOD,
+  COMMENT_KIND,
+  COMMENT_TEXT,
+  COMMENT_NAME,
+  COMMENT_DIRECTION,
+  COMMENT_PARAMNAME,
+  COMMENT_CLOSENAME,
+  COMMENT_SELFCLOSING,
+  COMMENT_EXPLICIT,
+  COMMENT_ATTRKEY,
+  COMMENT_ATTRVAL,
+  COMMENT_ARG,
+  TYPE_REF,
+  FIELD_TYPE_REF,
+  FIELD_TYPE_NAME,
+  MEMBER_TYPE_REF,
+  MEMBER_TYPE_NAME,
+  MEMBER_TYPE_ACCESS,
+  INFORECORDS(NAMESPACE),
+  INFORECORDS(ENUM),
+  ENUM_DEFLOCATION,
+  ENUM_LOCATION,
+  ENUM_SCOPED,
+  INFORECORDS(RECORD),
+  RECORD_DEFLOCATION,
+  RECORD_LOCATION,
+  RECORD_TAG_TYPE,
+  RECORD_PARENT,
+  RECORD_VPARENT,
+  RI_FIRST = VERSION,
+  RI_LAST = RECORD_VPARENT
+};
+
+static constexpr unsigned BlockIdCount = BI_LAST - BI_FIRST + 1;
+static constexpr unsigned RecordIdCount = RI_LAST - RI_FIRST + 1;
+
+#undef INFORECORDS
+
+template <typename Info>
+struct MapFromInfoToBlockId {
+  static const BlockId ID;
+};
+
+template <>
+struct MapFromInfoToBlockId<NamespaceInfo> {
+  static const BlockId ID = BI_NAMESPACE_BLOCK_ID;
+};
+
+template <>
+struct MapFromInfoToBlockId<EnumInfo> {
+  static const BlockId ID = BI_ENUM_BLOCK_ID;
+};
+
+template <>
+struct MapFromInfoToBlockId<RecordInfo> {
+  static const BlockId ID = BI_RECORD_BLOCK_ID;
+};
+
+template <>
+struct MapFromInfoToBlockId<FunctionInfo> {
+  static const BlockId ID = BI_FUNCTION_BLOCK_ID;
+};
+
+template <>
+struct MapFromInfoToBlockId<TypeInfo> {
+  static const BlockId ID = BI_TYPE_BLOCK_ID;
+};
+
+template <>
+struct MapFromInfoToBlockId<FieldTypeInfo> {
+  static const BlockId ID = BI_FIELD_TYPE_BLOCK_ID;
+};
+
+template <>
+struct MapFromInfoToBlockId<MemberTypeInfo> {
+  static const BlockId ID = BI_MEMBER_TYPE_BLOCK_ID;
+};
+
+template <>
+struct MapFromInfoToBlockId<CommentInfo> {
+  static const BlockId ID = BI_COMMENT_BLOCK_ID;
+};
+
+class ClangDocBitcodeWriter {
+ public:
+  ClangDocBitcodeWriter(llvm::BitstreamWriter &Stream,
+                        bool OmitFilenames = false)
+      : Stream(Stream), OmitFilenames(OmitFilenames) {
+    emitHeader();
+    emitBlockInfoBlock();
+    emitVersionBlock();
+  }
+
+#ifndef NDEBUG  // Don't want explicit dtor unless needed.
+  ~ClangDocBitcodeWriter() {
+    // Check that the static size is large-enough.
+    assert(Record.capacity() == BitCodeConstants::RecordSize);
+  }
+#endif
+
+  template <typename T>
+  void emitBlock(const T &I);
+
+ private:
+  class AbbreviationMap {
+    llvm::DenseMap<unsigned, unsigned> Abbrevs;
+
+   public:
+    AbbreviationMap() : Abbrevs(RecordIdCount) {}
+
+    void add(RecordId RID, unsigned AbbrevID);
+    unsigned get(RecordId RID) const;
+  };
+
+  class StreamSubBlockGuard {
+    llvm::BitstreamWriter &Stream;
+
+   public:
+    StreamSubBlockGuard(llvm::BitstreamWriter &Stream_, BlockId ID)
+        : Stream(Stream_) {
+      // NOTE: SubBlockIDSize could theoretically be calculated on the fly,
+      // based on the initialization list of records in each block.
+      Stream.EnterSubblock(ID, BitCodeConstants::SubblockIDSize);
+    }
+
+    StreamSubBlockGuard() = default;
+    StreamSubBlockGuard(const StreamSubBlockGuard &) = delete;
+    StreamSubBlockGuard &operator=(const StreamSubBlockGuard &) = delete;
+
+    ~StreamSubBlockGuard() { Stream.ExitBlock(); }
+  };
+
+  // Block emission of different info types.
+  void emitBlockContent(const NamespaceInfo &I);
+  void emitBlockContent(const RecordInfo &I);
+  void emitBlockContent(const FunctionInfo &I);
+  void emitBlockContent(const EnumInfo &I);
+  void emitBlockContent(const TypeInfo &B);
+  void emitBlockContent(const FieldTypeInfo &B);
+  void emitBlockContent(const MemberTypeInfo &B);
+  void emitBlockContent(const CommentInfo &B);
+
+  // Emission of validation and overview blocks.
+  void emitHeader();
+  void emitVersionBlock();
+  void emitRecordID(RecordId ID);
+  void emitBlockID(BlockId ID);
+  void emitBlockInfoBlock();
+  void emitBlockInfo(BlockId BID, const std::initializer_list<RecordId> &RIDs);
+
+  // Emission of individual record types.
+  void emitRecord(StringRef Str, RecordId ID);
+  void emitRecord(const Location &Loc, RecordId ID);
+  void emitRecord(const Reference &Ref, RecordId ID);
+  void emitRecord(bool Value, RecordId ID);
+  void emitRecord(int Value, RecordId ID);
+  void emitRecord(unsigned Value, RecordId ID);
+  bool prepRecordData(RecordId ID, bool ShouldEmit = true);
+
+  // Emission of appropriate abbreviation type.
+  void emitAbbrev(RecordId ID, BlockId Block);
+
+  // Static size is the maximum length of the block/record names we're pushing
+  // to this + 1. Longest is currently `MemberTypeBlock` at 15 chars.
+  SmallVector<uint32_t, BitCodeConstants::RecordSize> Record;
+  llvm::BitstreamWriter &Stream;
+  bool OmitFilenames;
+  AbbreviationMap Abbrevs;
+};
+
+/// \brief Entry point for writing an individual info to bitcode.
+///
+/// \param I The info to emit to bitcode.
+template <typename T>
+void ClangDocBitcodeWriter::emitBlock(const T &I) {
+  StreamSubBlockGuard Block(Stream, MapFromInfoToBlockId<T>::ID);
+  emitBlockContent(I);
+}
+
+}  // namespace doc
+}  // namespace clang
+
+#endif  // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_BITCODEWRITER_H
Index: clang-doc/BitcodeWriter.cpp
===================================================================
--- /dev/null
+++ clang-doc/BitcodeWriter.cpp
@@ -0,0 +1,447 @@
+//===--  BitcodeWriter.cpp - ClangDoc Bitcode Writer ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BitcodeWriter.h"
+#include "llvm/ADT/IndexedMap.h"
+
+namespace clang {
+namespace doc {
+
+// Since id enums are not zero-indexed, we need to transform the given id into
+// its associated index.
+struct BlockIdToIndexFunctor {
+  using argument_type = unsigned;
+  unsigned operator()(unsigned ID) const { return ID - BI_FIRST; }
+};
+
+struct RecordIdToIndexFunctor {
+  using argument_type = unsigned;
+  unsigned operator()(unsigned ID) const { return ID - RI_FIRST; }
+};
+
+using AbbrevDsc = void (*)(std::shared_ptr<llvm::BitCodeAbbrev> &Abbrev);
+
+static void AbbrevGen(std::shared_ptr<llvm::BitCodeAbbrev> &Abbrev,
+                      const std::initializer_list<llvm::BitCodeAbbrevOp> Ops) {
+  for (const auto &Op : Ops) Abbrev->Add(Op);
+}
+
+static void BoolAbbrev(std::shared_ptr<llvm::BitCodeAbbrev> &Abbrev) {
+  AbbrevGen(Abbrev,
+            {// 0. Boolean
+             llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed,
+                                   BitCodeConstants::BoolSize)});
+}
+
+static void IntAbbrev(std::shared_ptr<llvm::BitCodeAbbrev> &Abbrev) {
+  AbbrevGen(Abbrev,
+            {// 0. Fixed-size integer
+             llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed,
+                                   BitCodeConstants::IntSize)});
+}
+
+static void StringAbbrev(std::shared_ptr<llvm::BitCodeAbbrev> &Abbrev) {
+  AbbrevGen(Abbrev,
+            {// 0. Fixed-size integer (length of the following string)
+             llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed,
+                                   BitCodeConstants::StringLengthSize),
+             // 1. The string blob
+             llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)});
+}
+
+// Assumes that the file will not have more than 65535 lines.
+static void LocationAbbrev(std::shared_ptr<llvm::BitCodeAbbrev> &Abbrev) {
+  AbbrevGen(
+      Abbrev,
+      {// 0. Fixed-size integer (line number)
+       llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed,
+                             BitCodeConstants::LineNumberSize),
+       // 1. Fixed-size integer (length of the following string (filename))
+       llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed,
+                             BitCodeConstants::StringLengthSize),
+       // 2. The string blob
+       llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)});
+}
+
+static void ReferenceAbbrev(std::shared_ptr<llvm::BitCodeAbbrev> &Abbrev) {
+  AbbrevGen(Abbrev,
+            {// 0. Fixed-size integer (ref type)
+             llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed,
+                                   BitCodeConstants::USRLengthSize),
+             // 1. Fixed-size integer (length of the following string (USR))
+             llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed,
+                                   BitCodeConstants::ReferenceTypeSize),
+             // 2. The string blob
+             llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)});
+}
+
+struct RecordIdDsc {
+  llvm::StringRef Name;
+  AbbrevDsc Abbrev = nullptr;
+
+  RecordIdDsc() = default;
+  RecordIdDsc(llvm::StringRef Name, AbbrevDsc Abbrev)
+      : Name(Name), Abbrev(Abbrev) {}
+
+  // Is this 'description' valid?
+  operator bool() const {
+    return Abbrev != nullptr && Name.data() != nullptr && !Name.empty();
+  }
+};
+
+static const llvm::IndexedMap<llvm::StringRef, BlockIdToIndexFunctor>
+    BlockIdNameMap = []() {
+      llvm::IndexedMap<llvm::StringRef, BlockIdToIndexFunctor> BlockIdNameMap;
+      BlockIdNameMap.resize(BlockIdCount);
+
+      // There is no init-list constructor for the IndexedMap, so have to
+      // improvise
+      static constexpr std::initializer_list<
+          std::pair<BlockId, const char *const>>
+          Inits = {{BI_VERSION_BLOCK_ID, "VersionBlock"},
+                   {BI_NAMESPACE_BLOCK_ID, "NamespaceBlock"},
+                   {BI_ENUM_BLOCK_ID, "EnumBlock"},
+                   {BI_TYPE_BLOCK_ID, "TypeBlock"},
+                   {BI_FIELD_TYPE_BLOCK_ID, "FieldTypeBlock"},
+                   {BI_MEMBER_TYPE_BLOCK_ID, "MemberTypeBlock"},
+                   {BI_RECORD_BLOCK_ID, "RecordBlock"},
+                   {BI_FUNCTION_BLOCK_ID, "FunctionBlock"},
+                   {BI_COMMENT_BLOCK_ID, "CommentBlock"}};
+      static_assert(Inits.size() == BlockIdCount,
+                    "unexpected count of initializers");
+      for (const auto &Init : Inits) BlockIdNameMap[Init.first] = Init.second;
+      assert(BlockIdNameMap.size() == BlockIdCount);
+      return BlockIdNameMap;
+    }();
+
+static const llvm::IndexedMap<RecordIdDsc, RecordIdToIndexFunctor>
+    RecordIdNameMap = []() {
+      llvm::IndexedMap<RecordIdDsc, RecordIdToIndexFunctor> RecordIdNameMap;
+      RecordIdNameMap.resize(RecordIdCount);
+
+      // There is no init-list constructor for the IndexedMap, so have to
+      // improvise
+      static std::initializer_list<std::pair<RecordId, RecordIdDsc>> Inits = {
+          {VERSION, {"Version", &IntAbbrev}},
+          {COMMENT_KIND, {"Kind", &StringAbbrev}},
+          {COMMENT_TEXT, {"Text", &StringAbbrev}},
+          {COMMENT_NAME, {"Name", &StringAbbrev}},
+          {COMMENT_DIRECTION, {"Direction", &StringAbbrev}},
+          {COMMENT_PARAMNAME, {"ParamName", &StringAbbrev}},
+          {COMMENT_CLOSENAME, {"CloseName", &StringAbbrev}},
+          {COMMENT_SELFCLOSING, {"SelfClosing", &BoolAbbrev}},
+          {COMMENT_EXPLICIT, {"Explicit", &BoolAbbrev}},
+          {COMMENT_ATTRKEY, {"AttrKey", &StringAbbrev}},
+          {COMMENT_ATTRVAL, {"AttrVal", &StringAbbrev}},
+          {COMMENT_ARG, {"Arg", &StringAbbrev}},
+          {TYPE_REF, {"Type", &ReferenceAbbrev}},
+          {FIELD_TYPE_REF, {"Type", &ReferenceAbbrev}},
+          {FIELD_TYPE_NAME, {"Name", &StringAbbrev}},
+          {MEMBER_TYPE_REF, {"Type", &ReferenceAbbrev}},
+          {MEMBER_TYPE_NAME, {"Name", &StringAbbrev}},
+          {MEMBER_TYPE_ACCESS, {"Access", &IntAbbrev}},
+          {NAMESPACE_USR, {"USR", &StringAbbrev}},
+          {NAMESPACE_NAME, {"Name", &StringAbbrev}},
+          {NAMESPACE_NAMESPACE, {"Namespace", &ReferenceAbbrev}},
+          {ENUM_USR, {"USR", &StringAbbrev}},
+          {ENUM_NAME, {"Name", &StringAbbrev}},
+          {ENUM_NAMESPACE, {"Namespace", &ReferenceAbbrev}},
+          {ENUM_DEFLOCATION, {"DefLocation", &LocationAbbrev}},
+          {ENUM_LOCATION, {"Location", &LocationAbbrev}},
+          {ENUM_SCOPED, {"Scoped", &BoolAbbrev}},
+          {RECORD_USR, {"USR", &StringAbbrev}},
+          {RECORD_NAME, {"Name", &StringAbbrev}},
+          {RECORD_NAMESPACE, {"Namespace", &ReferenceAbbrev}},
+          {RECORD_DEFLOCATION, {"DefLocation", &LocationAbbrev}},
+          {RECORD_LOCATION, {"Location", &LocationAbbrev}},
+          {RECORD_TAG_TYPE, {"TagType", &IntAbbrev}},
+          {RECORD_PARENT, {"Parent", &ReferenceAbbrev}},
+          {RECORD_VPARENT, {"VParent", &ReferenceAbbrev}},
+          {FUNCTION_USR, {"USR", &StringAbbrev}},
+          {FUNCTION_NAME, {"Name", &StringAbbrev}},
+          {FUNCTION_NAMESPACE, {"Namespace", &ReferenceAbbrev}},
+          {FUNCTION_DEFLOCATION, {"DefLocation", &LocationAbbrev}},
+          {FUNCTION_LOCATION, {"Location", &LocationAbbrev}},
+          {FUNCTION_PARENT, {"Parent", &ReferenceAbbrev}},
+          {FUNCTION_ACCESS, {"Access", &IntAbbrev}},
+          {FUNCTION_IS_METHOD, {"IsMethod", &BoolAbbrev}}};
+      // assert(Inits.size() == RecordIdCount);
+      for (const auto &Init : Inits) {
+        RecordIdNameMap[Init.first] = Init.second;
+        assert((Init.second.Name.size() + 1) <= BitCodeConstants::RecordSize);
+      }
+      // assert(RecordIdNameMap.size() == RecordIdCount);
+      return RecordIdNameMap;
+    }();
+
+static const std::initializer_list<
+    std::pair<BlockId, std::initializer_list<RecordId>>>
+    RecordsByBlock{
+        // Version Block
+        {BI_VERSION_BLOCK_ID, {VERSION}},
+        // Comment Block
+        {BI_COMMENT_BLOCK_ID,
+         {COMMENT_KIND, COMMENT_TEXT, COMMENT_NAME, COMMENT_DIRECTION,
+          COMMENT_PARAMNAME, COMMENT_CLOSENAME, COMMENT_SELFCLOSING,
+          COMMENT_EXPLICIT, COMMENT_ATTRKEY, COMMENT_ATTRVAL, COMMENT_ARG}},
+        // Type Block
+        {BI_TYPE_BLOCK_ID, {TYPE_REF}},
+        // FieldType Block
+        {BI_FIELD_TYPE_BLOCK_ID, {FIELD_TYPE_REF, FIELD_TYPE_NAME}},
+        // MemberType Block
+        {BI_MEMBER_TYPE_BLOCK_ID,
+         {MEMBER_TYPE_REF, MEMBER_TYPE_NAME, MEMBER_TYPE_ACCESS}},
+        // Enum Block
+        {BI_ENUM_BLOCK_ID,
+         {ENUM_USR, ENUM_NAME, ENUM_NAMESPACE, ENUM_DEFLOCATION, ENUM_LOCATION,
+          ENUM_SCOPED}},
+        // Namespace Block
+        {BI_NAMESPACE_BLOCK_ID,
+         {NAMESPACE_USR, NAMESPACE_NAME, NAMESPACE_NAMESPACE}},
+        // Record Block
+        {BI_RECORD_BLOCK_ID,
+         {RECORD_USR, RECORD_NAME, RECORD_NAMESPACE, RECORD_DEFLOCATION,
+          RECORD_LOCATION, RECORD_TAG_TYPE, RECORD_PARENT, RECORD_VPARENT}},
+        // Function Block
+        {BI_FUNCTION_BLOCK_ID,
+         {FUNCTION_USR, FUNCTION_NAME, FUNCTION_NAMESPACE, FUNCTION_DEFLOCATION,
+          FUNCTION_LOCATION, FUNCTION_PARENT, FUNCTION_ACCESS,
+          FUNCTION_IS_METHOD}}};
+
+// AbbreviationMap
+
+void ClangDocBitcodeWriter::AbbreviationMap::add(RecordId RID,
+                                                 unsigned AbbrevID) {
+  assert(RecordIdNameMap[RID] && "Unknown RecordId.");
+  assert(Abbrevs.find(RID) == Abbrevs.end() && "Abbreviation already added.");
+  Abbrevs[RID] = AbbrevID;
+}
+
+unsigned ClangDocBitcodeWriter::AbbreviationMap::get(RecordId RID) const {
+  assert(RecordIdNameMap[RID] && "Unknown RecordId.");
+  assert(Abbrevs.find(RID) != Abbrevs.end() && "Unknown abbreviation.");
+  return Abbrevs.lookup(RID);
+}
+
+// Validation and Overview Blocks
+
+/// \brief Emits the magic number header to check that its the right format,
+/// in this case, 'DOCS'.
+void ClangDocBitcodeWriter::emitHeader() {
+  for (char C : llvm::StringRef("DOCS"))
+    Stream.Emit((unsigned)C, BitCodeConstants::SignatureBitSize);
+}
+
+void ClangDocBitcodeWriter::emitVersionBlock() {
+  StreamSubBlockGuard Block(Stream, BI_VERSION_BLOCK_ID);
+  emitRecord(VersionNumber, VERSION);
+}
+
+/// \brief Emits a block ID and the block name to the BLOCKINFO block.
+void ClangDocBitcodeWriter::emitBlockID(BlockId BID) {
+  const auto &BlockIdName = BlockIdNameMap[BID];
+  assert(BlockIdName.data() && BlockIdName.size() && "Unknown BlockId.");
+
+  Record.clear();
+  Record.push_back(BID);
+  Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record);
+  Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME,
+                    ArrayRef<unsigned char>(BlockIdNameMap[BID].bytes_begin(),
+                                            BlockIdNameMap[BID].bytes_end()));
+}
+
+/// \brief Emits a record name to the BLOCKINFO block.
+void ClangDocBitcodeWriter::emitRecordID(RecordId ID) {
+  assert(RecordIdNameMap[ID] && "Unknown RecordId.");
+  prepRecordData(ID);
+  Record.append(RecordIdNameMap[ID].Name.begin(),
+                RecordIdNameMap[ID].Name.end());
+  Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record);
+}
+
+// Abbreviations
+
+void ClangDocBitcodeWriter::emitAbbrev(RecordId ID, BlockId Block) {
+  assert(RecordIdNameMap[ID] && "Unknown abbreviation.");
+  auto Abbrev = std::make_shared<llvm::BitCodeAbbrev>();
+  Abbrev->Add(llvm::BitCodeAbbrevOp(ID));
+  RecordIdNameMap[ID].Abbrev(Abbrev);
+  Abbrevs.add(ID, Stream.EmitBlockInfoAbbrev(Block, std::move(Abbrev)));
+}
+
+// Records
+
+void ClangDocBitcodeWriter::emitRecord(llvm::StringRef Str, RecordId ID) {
+  assert(RecordIdNameMap[ID] && "Unknown RecordId.");
+  assert(RecordIdNameMap[ID].Abbrev == &StringAbbrev &&
+         "Abbrev type mismatch.");
+  if (!prepRecordData(ID, !Str.empty())) return;
+  assert(Str.size() < (1U << BitCodeConstants::StringLengthSize));
+  Record.push_back(Str.size());
+  Stream.EmitRecordWithBlob(Abbrevs.get(ID), Record, Str);
+}
+
+void ClangDocBitcodeWriter::emitRecord(const Location &Loc, RecordId ID) {
+  assert(RecordIdNameMap[ID] && "Unknown RecordId.");
+  assert(RecordIdNameMap[ID].Abbrev == &LocationAbbrev &&
+         "Abbrev type mismatch.");
+  if (!prepRecordData(ID, !OmitFilenames)) return;
+  // FIXME: Assert that the line number is of the appropriate size.
+  Record.push_back(Loc.LineNumber);
+  assert(Loc.Filename.size() < (1U << BitCodeConstants::StringLengthSize));
+  Record.push_back(Loc.Filename.size());
+  Stream.EmitRecordWithBlob(Abbrevs.get(ID), Record, Loc.Filename);
+}
+
+void ClangDocBitcodeWriter::emitRecord(const Reference &Ref, RecordId ID) {
+  assert(RecordIdNameMap[ID] && "Unknown RecordId.");
+  assert(RecordIdNameMap[ID].Abbrev == &ReferenceAbbrev &&
+         "Abbrev type mismatch.");
+  if (!prepRecordData(ID, !Ref.USR.empty())) return;
+  Record.push_back((int)Ref.RefType);
+  assert(Ref.USR.size() < (1U << BitCodeConstants::USRLengthSize));
+  Record.push_back(Ref.USR.size());
+  Stream.EmitRecordWithBlob(Abbrevs.get(ID), Record, Ref.USR);
+}
+
+void ClangDocBitcodeWriter::emitRecord(bool Val, RecordId ID) {
+  assert(RecordIdNameMap[ID] && "Unknown RecordId.");
+  assert(RecordIdNameMap[ID].Abbrev == &BoolAbbrev && "Abbrev type mismatch.");
+  if (!prepRecordData(ID, Val)) return;
+  Record.push_back(Val);
+  Stream.EmitRecordWithAbbrev(Abbrevs.get(ID), Record);
+}
+
+void ClangDocBitcodeWriter::emitRecord(int Val, RecordId ID) {
+  assert(RecordIdNameMap[ID] && "Unknown RecordId.");
+  assert(RecordIdNameMap[ID].Abbrev == &IntAbbrev && "Abbrev type mismatch.");
+  if (!prepRecordData(ID, Val)) return;
+  // FIXME: Assert that the integer is of the appropriate size.
+  Record.push_back(Val);
+  Stream.EmitRecordWithAbbrev(Abbrevs.get(ID), Record);
+}
+
+void ClangDocBitcodeWriter::emitRecord(unsigned Val, RecordId ID) {
+  assert(RecordIdNameMap[ID] && "Unknown RecordId.");
+  assert(RecordIdNameMap[ID].Abbrev == &IntAbbrev && "Abbrev type mismatch.");
+  if (!prepRecordData(ID, Val)) return;
+  assert(Val < (1U << BitCodeConstants::IntSize));
+  Record.push_back(Val);
+  Stream.EmitRecordWithAbbrev(Abbrevs.get(ID), Record);
+}
+
+bool ClangDocBitcodeWriter::prepRecordData(RecordId ID, bool ShouldEmit) {
+  assert(RecordIdNameMap[ID] && "Unknown RecordId.");
+  if (!ShouldEmit) return false;
+  Record.clear();
+  Record.push_back(ID);
+  return true;
+}
+
+// BlockInfo Block
+
+void ClangDocBitcodeWriter::emitBlockInfoBlock() {
+  Stream.EnterBlockInfoBlock();
+  for (const auto &Block : RecordsByBlock) {
+    assert(Block.second.size() < (1U << BitCodeConstants::SubblockIDSize));
+    emitBlockInfo(Block.first, Block.second);
+  }
+  Stream.ExitBlock();
+}
+
+void ClangDocBitcodeWriter::emitBlockInfo(
+    BlockId BID, const std::initializer_list<RecordId> &RIDs) {
+  assert(RIDs.size() < (1U << BitCodeConstants::SubblockIDSize));
+  emitBlockID(BID);
+  for (RecordId RID : RIDs) {
+    emitRecordID(RID);
+    emitAbbrev(RID, BID);
+  }
+}
+
+// Block emission
+
+void ClangDocBitcodeWriter::emitBlockContent(const TypeInfo &T) {
+  emitRecord(T.Type, TYPE_REF);
+  for (const auto &CI : T.Description) emitBlock(CI);
+}
+
+void ClangDocBitcodeWriter::emitBlockContent(const FieldTypeInfo &T) {
+  emitRecord(T.Type, FIELD_TYPE_REF);
+  emitRecord(T.Name, FIELD_TYPE_NAME);
+  for (const auto &CI : T.Description) emitBlock(CI);
+}
+
+void ClangDocBitcodeWriter::emitBlockContent(const MemberTypeInfo &T) {
+  emitRecord(T.Type, MEMBER_TYPE_REF);
+  emitRecord(T.Name, MEMBER_TYPE_NAME);
+  emitRecord(T.Access, MEMBER_TYPE_ACCESS);
+  for (const auto &CI : T.Description) emitBlock(CI);
+}
+
+void ClangDocBitcodeWriter::emitBlockContent(const CommentInfo &I) {
+  for (const auto &L :
+       std::initializer_list<std::pair<llvm::StringRef, RecordId>>{
+           {I.Kind, COMMENT_KIND},
+           {I.Text, COMMENT_TEXT},
+           {I.Name, COMMENT_NAME},
+           {I.Direction, COMMENT_DIRECTION},
+           {I.ParamName, COMMENT_PARAMNAME},
+           {I.CloseName, COMMENT_CLOSENAME}})
+    emitRecord(L.first, L.second);
+  emitRecord(I.SelfClosing, COMMENT_SELFCLOSING);
+  emitRecord(I.Explicit, COMMENT_EXPLICIT);
+  for (const auto &A : I.AttrKeys) emitRecord(A, COMMENT_ATTRKEY);
+  for (const auto &A : I.AttrValues) emitRecord(A, COMMENT_ATTRVAL);
+  for (const auto &A : I.Args) emitRecord(A, COMMENT_ARG);
+  for (const auto &C : I.Children) emitBlock(*C);
+}
+
+#define EMITINFO(X)                                               \
+  emitRecord(I.USR, X##_USR);                                     \
+  emitRecord(I.Name, X##_NAME);                                   \
+  for (const auto &N : I.Namespace) emitRecord(N, X##_NAMESPACE); \
+  for (const auto &CI : I.Description) emitBlock(CI);
+
+void ClangDocBitcodeWriter::emitBlockContent(const NamespaceInfo &I) {
+  EMITINFO(NAMESPACE)
+}
+
+void ClangDocBitcodeWriter::emitBlockContent(const EnumInfo &I) {
+  EMITINFO(ENUM)
+  if (I.DefLoc) emitRecord(I.DefLoc.getValue(), ENUM_DEFLOCATION);
+  for (const auto &L : I.Loc) emitRecord(L, ENUM_LOCATION);
+  emitRecord(I.Scoped, ENUM_SCOPED);
+  for (const auto &N : I.Members) emitBlock(N);
+}
+
+void ClangDocBitcodeWriter::emitBlockContent(const RecordInfo &I) {
+  EMITINFO(RECORD)
+  if (I.DefLoc) emitRecord(I.DefLoc.getValue(), RECORD_DEFLOCATION);
+  for (const auto &L : I.Loc) emitRecord(L, RECORD_LOCATION);
+  emitRecord(I.TagType, RECORD_TAG_TYPE);
+  for (const auto &N : I.Members) emitBlock(N);
+  for (const auto &P : I.Parents) emitRecord(P, RECORD_PARENT);
+  for (const auto &P : I.VirtualParents) emitRecord(P, RECORD_VPARENT);
+}
+
+void ClangDocBitcodeWriter::emitBlockContent(const FunctionInfo &I) {
+  EMITINFO(FUNCTION)
+  emitRecord(I.IsMethod, FUNCTION_IS_METHOD);
+  if (I.DefLoc) emitRecord(I.DefLoc.getValue(), FUNCTION_DEFLOCATION);
+  for (const auto &L : I.Loc) emitRecord(L, FUNCTION_LOCATION);
+  emitRecord(I.Parent, FUNCTION_PARENT);
+  emitBlock(I.ReturnType);
+  for (const auto &N : I.Params) emitBlock(N);
+}
+
+#undef EMITINFO
+
+}  // namespace doc
+}  // namespace clang
Index: CMakeLists.txt
===================================================================
--- CMakeLists.txt
+++ CMakeLists.txt
@@ -7,6 +7,7 @@
 endif()
 
 add_subdirectory(change-namespace)
+add_subdirectory(clang-doc)
 add_subdirectory(clang-query)
 add_subdirectory(clang-move)
 add_subdirectory(clangd)
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to