Author: Paul Kirth
Date: 2026-04-10T20:50:55Z
New Revision: fa2fc2ab9e11e751e9bd965b368b96276db1d549

URL: 
https://github.com/llvm/llvm-project/commit/fa2fc2ab9e11e751e9bd965b368b96276db1d549
DIFF: 
https://github.com/llvm/llvm-project/commit/fa2fc2ab9e11e751e9bd965b368b96276db1d549.diff

LOG: [clang-doc] Simplify parsing and reading bitcode blocks (#190053)

Much of the logic in the readBlock implementation is boilerplate, and is
repeated for each implementation/specialization. This will become much
worse as we introduce new custom block reading logic as we migrate
towards arena allocation. In preparation for that, we're introducing the
change in logic now, which should make later refactoring much more
straightforward.

Added: 
    

Modified: 
    clang-tools-extra/clang-doc/BitcodeReader.cpp
    clang-tools-extra/clang-doc/BitcodeReader.h

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clang-doc/BitcodeReader.cpp 
b/clang-tools-extra/clang-doc/BitcodeReader.cpp
index a3a73235cfbdf..b7f4d6aa7ba23 100644
--- a/clang-tools-extra/clang-doc/BitcodeReader.cpp
+++ b/clang-tools-extra/clang-doc/BitcodeReader.cpp
@@ -379,17 +379,15 @@ static llvm::Error parseRecord(const Record &R, unsigned 
ID,
   }
 }
 
-template <>
-llvm::Error ClangDocBitcodeReader::readBlock(unsigned ID, CommentInfo *I) {
+template <typename T, typename BlockBeginHandler, typename BlockEndHandler,
+          typename RecordHandler>
+llvm::Error
+ClangDocBitcodeReader::parseBlock(unsigned ID, T I, BlockBeginHandler &&BBH,
+                                  BlockEndHandler &&BEH, RecordHandler &&RH) {
   llvm::TimeTraceScope("Reducing infos", "readBlock");
   if (llvm::Error Err = Stream.EnterSubBlock(ID))
     return Err;
 
-  llvm::SmallVector<CommentInfo> LocalChildren;
-  llvm::SmallVector<StringRef> AttrKeys;
-  llvm::SmallVector<StringRef> AttrValues;
-  llvm::SmallVector<StringRef> Args;
-
   while (true) {
     unsigned BlockOrCode = 0;
     llvm::Expected<Cursor> C = skipUntilRecordOrBlock(BlockOrCode);
@@ -400,62 +398,89 @@ llvm::Error ClangDocBitcodeReader::readBlock(unsigned ID, 
CommentInfo *I) {
     case Cursor::BadBlock:
       return llvm::createStringError(llvm::inconvertibleErrorCode(),
                                      "bad block found");
-    case Cursor::BlockEnd: {
-      if (!LocalChildren.empty())
-        I->Children = allocateArray<CommentInfo>(LocalChildren, 
TransientArena);
-      if (!AttrKeys.empty()) {
-        StringRef *KeysMem =
-            TransientArena.Allocate<StringRef>(AttrKeys.size());
-        std::uninitialized_copy(AttrKeys.begin(), AttrKeys.end(), KeysMem);
-        I->AttrKeys = llvm::ArrayRef<StringRef>(KeysMem, AttrKeys.size());
-      }
-      if (!AttrValues.empty()) {
-        StringRef *ValuesMem =
-            TransientArena.Allocate<StringRef>(AttrValues.size());
-        std::uninitialized_copy(AttrValues.begin(), AttrValues.end(),
-                                ValuesMem);
-        I->AttrValues = llvm::ArrayRef<StringRef>(ValuesMem, 
AttrValues.size());
-      }
-      if (!Args.empty()) {
-        StringRef *ArgsMem = TransientArena.Allocate<StringRef>(Args.size());
-        std::uninitialized_copy(Args.begin(), Args.end(), ArgsMem);
-        I->Args = llvm::ArrayRef<StringRef>(ArgsMem, Args.size());
-      }
+    case Cursor::BlockEnd:
+      if (llvm::Error Err = BEH())
+        return Err;
       return llvm::Error::success();
-    }
-    case Cursor::BlockBegin:
-      if (BlockOrCode == BI_COMMENT_BLOCK_ID) {
-        CommentInfo Child;
-        if (llvm::Error Err = readBlock(BlockOrCode, &Child)) {
-          if (llvm::Error Skipped = Stream.SkipBlock())
-            return joinErrors(std::move(Err), std::move(Skipped));
-          return Err;
-        }
-        LocalChildren.push_back(std::move(Child));
-      } else {
-        if (llvm::Error Err = readSubBlock(BlockOrCode, I)) {
-          if (llvm::Error Skipped = Stream.SkipBlock())
-            return joinErrors(std::move(Err), std::move(Skipped));
-          return Err;
-        }
+    case Cursor::BlockBegin: {
+      llvm::Expected<bool> Handled = BBH(BlockOrCode);
+      if (!Handled)
+        return Handled.takeError();
+      if (*Handled)
+        continue;
+
+      if (llvm::Error Err = readSubBlock(BlockOrCode, I)) {
+        if (llvm::Error Skipped = Stream.SkipBlock())
+          return joinErrors(std::move(Err), std::move(Skipped));
+        return Err;
       }
       continue;
+    }
     case Cursor::Record:
       break;
     }
 
-    Record R;
-    llvm::StringRef Blob;
-    llvm::Expected<unsigned> MaybeRecID =
-        Stream.readRecord(BlockOrCode, R, &Blob);
-    if (!MaybeRecID)
-      return MaybeRecID.takeError();
-    if (llvm::Error Err = parseRecord(R, MaybeRecID.get(), Blob, I, AttrKeys,
-                                      AttrValues, Args))
+    if (llvm::Error Err = RH(BlockOrCode))
       return Err;
   }
 }
 
+template <>
+llvm::Error ClangDocBitcodeReader::readBlock(unsigned ID, CommentInfo *I) {
+  llvm::SmallVector<CommentInfo> LocalChildren;
+  llvm::SmallVector<StringRef> AttrKeys;
+  llvm::SmallVector<StringRef> AttrValues;
+  llvm::SmallVector<StringRef> Args;
+
+  return parseBlock(
+      ID, I,
+      [&](unsigned BlockOrCode) -> llvm::Expected<bool> {
+        if (BlockOrCode == BI_COMMENT_BLOCK_ID) {
+          CommentInfo Child;
+          if (llvm::Error Err = readBlock(BlockOrCode, &Child))
+            return std::move(Err);
+          LocalChildren.push_back(std::move(Child));
+          return true;
+        }
+        return false;
+      },
+      [&]() -> llvm::Error {
+        if (!LocalChildren.empty())
+          I->Children =
+              allocateArray<CommentInfo>(LocalChildren, TransientArena);
+        if (!AttrKeys.empty()) {
+          StringRef *KeysMem =
+              TransientArena.Allocate<StringRef>(AttrKeys.size());
+          std::uninitialized_copy(AttrKeys.begin(), AttrKeys.end(), KeysMem);
+          I->AttrKeys = llvm::ArrayRef<StringRef>(KeysMem, AttrKeys.size());
+        }
+        if (!AttrValues.empty()) {
+          StringRef *ValuesMem =
+              TransientArena.Allocate<StringRef>(AttrValues.size());
+          std::uninitialized_copy(AttrValues.begin(), AttrValues.end(),
+                                  ValuesMem);
+          I->AttrValues =
+              llvm::ArrayRef<StringRef>(ValuesMem, AttrValues.size());
+        }
+        if (!Args.empty()) {
+          StringRef *ArgsMem = TransientArena.Allocate<StringRef>(Args.size());
+          std::uninitialized_copy(Args.begin(), Args.end(), ArgsMem);
+          I->Args = llvm::ArrayRef<StringRef>(ArgsMem, Args.size());
+        }
+        return llvm::Error::success();
+      },
+      [&](unsigned BlockOrCode) -> llvm::Error {
+        Record R;
+        llvm::StringRef Blob;
+        llvm::Expected<unsigned> MaybeRecID =
+            Stream.readRecord(BlockOrCode, R, &Blob);
+        if (!MaybeRecID)
+          return MaybeRecID.takeError();
+        return parseRecord(R, MaybeRecID.get(), Blob, I, AttrKeys, AttrValues,
+                           Args);
+      });
+}
+
 static llvm::Error parseRecord(const Record &R, unsigned ID,
                                llvm::StringRef Blob, Reference *I, FieldId &F) 
{
   switch (ID) {
@@ -929,80 +954,38 @@ llvm::Error ClangDocBitcodeReader::readRecord(unsigned 
ID, Reference *I) {
 // Read a block of records into a single info.
 template <typename T>
 llvm::Error ClangDocBitcodeReader::readBlock(unsigned ID, T I) {
-  llvm::TimeTraceScope("Reducing infos", "readBlock");
-  if (llvm::Error Err = Stream.EnterSubBlock(ID))
-    return Err;
-
-  while (true) {
-    unsigned BlockOrCode = 0;
-    llvm::Expected<Cursor> C = skipUntilRecordOrBlock(BlockOrCode);
-    if (!C)
-      return C.takeError();
-
-    switch (*C) {
-    case Cursor::BadBlock:
-      return llvm::createStringError(llvm::inconvertibleErrorCode(),
-                                     "bad block found");
-    case Cursor::BlockEnd:
-      return llvm::Error::success();
-    case Cursor::BlockBegin:
-      if (llvm::Error Err = readSubBlock(BlockOrCode, I)) {
-        if (llvm::Error Skipped = Stream.SkipBlock())
-          return joinErrors(std::move(Err), std::move(Skipped));
-        return Err;
-      }
-      continue;
-    case Cursor::Record:
-      break;
-    }
-    if (auto Err = readRecord(BlockOrCode, I))
-      return Err;
-  }
+  return parseBlock(
+      ID, I, [](unsigned BlockOrCode) -> llvm::Expected<bool> { return false; 
},
+      []() -> llvm::Error { return llvm::Error::success(); },
+      [&](unsigned BlockOrCode) -> llvm::Error {
+        return readRecord(BlockOrCode, I);
+      });
 }
 
 template <>
 llvm::Error ClangDocBitcodeReader::readBlock(unsigned ID, FriendInfo *I) {
-  llvm::TimeTraceScope("Reducing infos", "readBlock");
-  if (llvm::Error Err = Stream.EnterSubBlock(ID))
-    return Err;
-
   llvm::SmallVector<FieldTypeInfo, 4> LocalParams;
 
-  while (true) {
-    unsigned BlockOrCode = 0;
-    llvm::Expected<Cursor> C = skipUntilRecordOrBlock(BlockOrCode);
-    if (!C)
-      return C.takeError();
-
-    switch (*C) {
-    case Cursor::BadBlock:
-      return llvm::createStringError(llvm::inconvertibleErrorCode(),
-                                     "bad block found");
-    case Cursor::BlockEnd: {
-      if (!LocalParams.empty())
-        I->Params = allocateArray<FieldTypeInfo>(LocalParams, TransientArena);
-      return llvm::Error::success();
-    }
-    case Cursor::BlockBegin:
-      if (BlockOrCode == BI_FIELD_TYPE_BLOCK_ID) {
-        FieldTypeInfo FI;
-        if (auto Err = readBlock(BlockOrCode, &FI))
-          return Err;
-        LocalParams.push_back(std::move(FI));
-        continue;
-      }
-      if (llvm::Error Err = readSubBlock(BlockOrCode, I)) {
-        if (llvm::Error Skipped = Stream.SkipBlock())
-          return joinErrors(std::move(Err), std::move(Skipped));
-        return Err;
-      }
-      continue;
-    case Cursor::Record:
-      break;
-    }
-    if (auto Err = readRecord(BlockOrCode, I))
-      return Err;
-  }
+  return parseBlock(
+      ID, I,
+      [&](unsigned BlockOrCode) -> llvm::Expected<bool> {
+        if (BlockOrCode == BI_FIELD_TYPE_BLOCK_ID) {
+          FieldTypeInfo FI;
+          if (auto Err = readBlock(BlockOrCode, &FI))
+            return std::move(Err);
+          LocalParams.push_back(std::move(FI));
+          return true;
+        }
+        return false;
+      },
+      [&]() -> llvm::Error {
+        if (!LocalParams.empty())
+          I->Params = allocateArray<FieldTypeInfo>(LocalParams, 
TransientArena);
+        return llvm::Error::success();
+      },
+      [&](unsigned BlockOrCode) -> llvm::Error {
+        return readRecord(BlockOrCode, I);
+      });
 }
 
 // TODO: fix inconsistentent returning of errors in add callbacks.

diff  --git a/clang-tools-extra/clang-doc/BitcodeReader.h 
b/clang-tools-extra/clang-doc/BitcodeReader.h
index 7516081e3f842..d3499fdee0f5d 100644
--- a/clang-tools-extra/clang-doc/BitcodeReader.h
+++ b/clang-tools-extra/clang-doc/BitcodeReader.h
@@ -45,6 +45,11 @@ class ClangDocBitcodeReader {
   // record found.
   template <typename T> llvm::Error readBlock(unsigned ID, T I);
 
+  template <typename T, typename BlockBeginHandler, typename BlockEndHandler,
+            typename RecordHandler>
+  llvm::Error parseBlock(unsigned ID, T I, BlockBeginHandler &&BBH,
+                         BlockEndHandler &&BEH, RecordHandler &&RH);
+
   // Step through a block of records to find the next data field.
   template <typename T> llvm::Error readSubBlock(unsigned ID, T I);
 


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to