https://github.com/Neil-N4 updated https://github.com/llvm/llvm-project/pull/205609
>From 4b1a963806a9654d28c92499024158df4d071859 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Wed, 24 Jun 2026 13:41:04 -0400 Subject: [PATCH 01/10] [clang-doc] Add Markdown AST node type definitions --- .../clang-doc/support/Markdown.h | 182 ++++++++++++++++++ .../unittests/clang-doc/CMakeLists.txt | 2 + .../clang-doc/MarkdownParserTest.cpp | 73 +++++++ 3 files changed, 257 insertions(+) create mode 100644 clang-tools-extra/clang-doc/support/Markdown.h create mode 100644 clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h new file mode 100644 index 0000000000000..97cadf00c9c2e --- /dev/null +++ b/clang-tools-extra/clang-doc/support/Markdown.h @@ -0,0 +1,182 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include <type_traits> + +namespace clang::doc::markdown { + +enum class NodeKind { + NK_Text, + NK_InlineCode, + NK_Emphasis, + NK_Strong, + NK_Paragraph, + NK_Heading, + NK_FencedCode, + NK_Table, + NK_UnorderedList, + NK_OrderedList, + NK_ListItem, + NK_BlockQuote, + NK_ThematicBreak, +}; + +struct MDNode { + NodeKind Kind; + explicit MDNode(NodeKind K) : Kind(K) {} +}; + +struct TextNode : MDNode { + llvm::StringRef Text; + explicit TextNode(llvm::StringRef T) : MDNode(NodeKind::NK_Text), Text(T) {} + static bool classof(const MDNode *N) { return N->Kind == NodeKind::NK_Text; } +}; +static_assert(std::is_trivially_destructible_v<TextNode>); + +struct InlineCodeNode : MDNode { + llvm::StringRef Code; + explicit InlineCodeNode(llvm::StringRef C) + : MDNode(NodeKind::NK_InlineCode), Code(C) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_InlineCode; + } +}; +static_assert(std::is_trivially_destructible_v<InlineCodeNode>); + +struct EmphasisNode : MDNode { + llvm::ArrayRef<MDNode *> Children; + explicit EmphasisNode(llvm::ArrayRef<MDNode *> C) + : MDNode(NodeKind::NK_Emphasis), Children(C) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_Emphasis; + } +}; +static_assert(std::is_trivially_destructible_v<EmphasisNode>); + +struct StrongNode : MDNode { + llvm::ArrayRef<MDNode *> Children; + explicit StrongNode(llvm::ArrayRef<MDNode *> C) + : MDNode(NodeKind::NK_Strong), Children(C) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_Strong; + } +}; +static_assert(std::is_trivially_destructible_v<StrongNode>); + +struct ParagraphNode : MDNode { + llvm::ArrayRef<MDNode *> Children; + explicit ParagraphNode(llvm::ArrayRef<MDNode *> C) + : MDNode(NodeKind::NK_Paragraph), Children(C) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_Paragraph; + } +}; +static_assert(std::is_trivially_destructible_v<ParagraphNode>); + +struct HeadingNode : MDNode { + unsigned Level; + llvm::ArrayRef<MDNode *> Children; + HeadingNode(unsigned L, llvm::ArrayRef<MDNode *> C) + : MDNode(NodeKind::NK_Heading), Level(L), Children(C) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_Heading; + } +}; +static_assert(std::is_trivially_destructible_v<HeadingNode>); + +struct FencedCodeNode : MDNode { + llvm::StringRef Lang; + llvm::ArrayRef<llvm::StringRef> Lines; + FencedCodeNode(llvm::StringRef L, llvm::ArrayRef<llvm::StringRef> Ls) + : MDNode(NodeKind::NK_FencedCode), Lang(L), Lines(Ls) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_FencedCode; + } +}; +static_assert(std::is_trivially_destructible_v<FencedCodeNode>); + +struct TableCell { + llvm::ArrayRef<MDNode *> Children; +}; +static_assert(std::is_trivially_destructible_v<TableCell>); + +struct TableRow { + llvm::ArrayRef<TableCell> Cells; +}; +static_assert(std::is_trivially_destructible_v<TableRow>); + +struct TableNode : MDNode { + TableRow Header; + llvm::ArrayRef<TableRow> Body; + TableNode(TableRow H, llvm::ArrayRef<TableRow> B) + : MDNode(NodeKind::NK_Table), Header(H), Body(B) {} + static bool classof(const MDNode *N) { return N->Kind == NodeKind::NK_Table; } +}; +static_assert(std::is_trivially_destructible_v<TableNode>); + +struct ListItemNode : MDNode { + llvm::ArrayRef<MDNode *> Children; + explicit ListItemNode(llvm::ArrayRef<MDNode *> C) + : MDNode(NodeKind::NK_ListItem), Children(C) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_ListItem; + } +}; +static_assert(std::is_trivially_destructible_v<ListItemNode>); + +struct UnorderedListNode : MDNode { + llvm::ArrayRef<ListItemNode *> Items; + explicit UnorderedListNode(llvm::ArrayRef<ListItemNode *> I) + : MDNode(NodeKind::NK_UnorderedList), Items(I) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_UnorderedList; + } +}; +static_assert(std::is_trivially_destructible_v<UnorderedListNode>); + +struct OrderedListNode : MDNode { + unsigned Start; + llvm::ArrayRef<ListItemNode *> Items; + OrderedListNode(unsigned S, llvm::ArrayRef<ListItemNode *> I) + : MDNode(NodeKind::NK_OrderedList), Start(S), Items(I) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_OrderedList; + } +}; +static_assert(std::is_trivially_destructible_v<OrderedListNode>); + +struct BlockQuoteNode : MDNode { + llvm::ArrayRef<MDNode *> Children; + explicit BlockQuoteNode(llvm::ArrayRef<MDNode *> C) + : MDNode(NodeKind::NK_BlockQuote), Children(C) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_BlockQuote; + } +}; +static_assert(std::is_trivially_destructible_v<BlockQuoteNode>); + +struct ThematicBreakNode : MDNode { + ThematicBreakNode() : MDNode(NodeKind::NK_ThematicBreak) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_ThematicBreak; + } +}; +static_assert(std::is_trivially_destructible_v<ThematicBreakNode>); + +llvm::ArrayRef<MDNode *> parseMarkdown(llvm::StringRef Text, + llvm::BumpPtrAllocator &Arena); + +} // namespace clang::doc::markdown + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H \ No newline at end of file diff --git a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt index 01b34ec9a791e..935df6da8ac78 100644 --- a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt @@ -31,6 +31,7 @@ add_extra_unittest(ClangDocTests SerializeTest.cpp YAMLGeneratorTest.cpp JSONGeneratorTest.cpp + MarkdownParserTest.cpp ) clang_target_link_libraries(ClangDocTests @@ -49,5 +50,6 @@ clang_target_link_libraries(ClangDocTests target_link_libraries(ClangDocTests PRIVATE clangDoc + clangDocSupport LLVMTestingSupport ) diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp new file mode 100644 index 0000000000000..c0b554ad67f7d --- /dev/null +++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp @@ -0,0 +1,73 @@ +#include "support/Markdown.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "gtest/gtest.h" + +using namespace clang::doc::markdown; +using namespace llvm; + +namespace { + +TEST(MarkdownNodeTest, TextNode) { + BumpPtrAllocator Arena; + auto *N = new (Arena) TextNode("hello"); + EXPECT_EQ(N->Kind, NodeKind::NK_Text); + EXPECT_EQ(N->Text, "hello"); + EXPECT_TRUE(isa<TextNode>(N)); +} + +TEST(MarkdownNodeTest, FencedCodeNode) { + BumpPtrAllocator Arena; + StringRef Lines[] = {"int x = 0;"}; + auto *N = new (Arena) FencedCodeNode("cpp", ArrayRef(Lines)); + EXPECT_EQ(N->Kind, NodeKind::NK_FencedCode); + EXPECT_EQ(N->Lang, "cpp"); + EXPECT_EQ(N->Lines.size(), 1u); + EXPECT_TRUE(isa<FencedCodeNode>(N)); +} + +TEST(MarkdownNodeTest, HeadingNode) { + BumpPtrAllocator Arena; + auto *N = new (Arena) HeadingNode(2, {}); + EXPECT_EQ(N->Kind, NodeKind::NK_Heading); + EXPECT_EQ(N->Level, 2u); + EXPECT_TRUE(isa<HeadingNode>(N)); +} + +TEST(MarkdownNodeTest, ThematicBreakNode) { + BumpPtrAllocator Arena; + auto *N = new (Arena) ThematicBreakNode(); + EXPECT_EQ(N->Kind, NodeKind::NK_ThematicBreak); + EXPECT_TRUE(isa<ThematicBreakNode>(N)); +} + +TEST(MarkdownNodeTest, InlineCodeNode) { + BumpPtrAllocator Arena; + auto *N = new (Arena) InlineCodeNode("foo()"); + EXPECT_EQ(N->Kind, NodeKind::NK_InlineCode); + EXPECT_EQ(N->Code, "foo()"); + EXPECT_TRUE(isa<InlineCodeNode>(N)); +} + +TEST(MarkdownNodeTest, EmphasisNode) { + BumpPtrAllocator Arena; + auto *N = new (Arena) EmphasisNode({}); + EXPECT_EQ(N->Kind, NodeKind::NK_Emphasis); + EXPECT_TRUE(isa<EmphasisNode>(N)); +} + +TEST(MarkdownNodeTest, UnorderedListNode) { + BumpPtrAllocator Arena; + auto *N = new (Arena) UnorderedListNode({}); + EXPECT_EQ(N->Kind, NodeKind::NK_UnorderedList); + EXPECT_TRUE(isa<UnorderedListNode>(N)); +} + +TEST(MarkdownNodeTest, ParagraphNode) { + BumpPtrAllocator Arena; + auto *N = new (Arena) ParagraphNode({}); + EXPECT_EQ(N->Kind, NodeKind::NK_Paragraph); + EXPECT_TRUE(isa<ParagraphNode>(N)); +} + +} // namespace \ No newline at end of file >From cd0760862b396c7bf8657c631980b6d702eb1a32 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Wed, 24 Jun 2026 21:19:23 -0400 Subject: [PATCH 02/10] [clang-doc] Address review feedback: rename Node, fix tests, EOF newlines --- .../clang-doc/support/Markdown.h | 116 ++++++++---------- .../clang-doc/MarkdownParserTest.cpp | 59 ++++----- 2 files changed, 75 insertions(+), 100 deletions(-) diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h index 97cadf00c9c2e..d05689f32e608 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.h +++ b/clang-tools-extra/clang-doc/support/Markdown.h @@ -11,7 +11,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Allocator.h" #include <type_traits> namespace clang::doc::markdown { @@ -32,82 +31,78 @@ enum class NodeKind { NK_ThematicBreak, }; -struct MDNode { +struct Node { NodeKind Kind; - explicit MDNode(NodeKind K) : Kind(K) {} + explicit Node(NodeKind K) : Kind(K) {} }; -struct TextNode : MDNode { +struct TextNode : Node { llvm::StringRef Text; - explicit TextNode(llvm::StringRef T) : MDNode(NodeKind::NK_Text), Text(T) {} - static bool classof(const MDNode *N) { return N->Kind == NodeKind::NK_Text; } + explicit TextNode(llvm::StringRef T) : Node(NodeKind::NK_Text), Text(T) {} + static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Text; } }; static_assert(std::is_trivially_destructible_v<TextNode>); -struct InlineCodeNode : MDNode { +struct InlineCodeNode : Node { llvm::StringRef Code; explicit InlineCodeNode(llvm::StringRef C) - : MDNode(NodeKind::NK_InlineCode), Code(C) {} - static bool classof(const MDNode *N) { + : Node(NodeKind::NK_InlineCode), Code(C) {} + static bool classof(const Node *N) { return N->Kind == NodeKind::NK_InlineCode; } }; static_assert(std::is_trivially_destructible_v<InlineCodeNode>); -struct EmphasisNode : MDNode { - llvm::ArrayRef<MDNode *> Children; - explicit EmphasisNode(llvm::ArrayRef<MDNode *> C) - : MDNode(NodeKind::NK_Emphasis), Children(C) {} - static bool classof(const MDNode *N) { +struct EmphasisNode : Node { + llvm::ArrayRef<Node *> Children; + explicit EmphasisNode(llvm::ArrayRef<Node *> C) + : Node(NodeKind::NK_Emphasis), Children(C) {} + static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Emphasis; } }; static_assert(std::is_trivially_destructible_v<EmphasisNode>); -struct StrongNode : MDNode { - llvm::ArrayRef<MDNode *> Children; - explicit StrongNode(llvm::ArrayRef<MDNode *> C) - : MDNode(NodeKind::NK_Strong), Children(C) {} - static bool classof(const MDNode *N) { - return N->Kind == NodeKind::NK_Strong; - } +struct StrongNode : Node { + llvm::ArrayRef<Node *> Children; + explicit StrongNode(llvm::ArrayRef<Node *> C) + : Node(NodeKind::NK_Strong), Children(C) {} + static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Strong; } }; static_assert(std::is_trivially_destructible_v<StrongNode>); -struct ParagraphNode : MDNode { - llvm::ArrayRef<MDNode *> Children; - explicit ParagraphNode(llvm::ArrayRef<MDNode *> C) - : MDNode(NodeKind::NK_Paragraph), Children(C) {} - static bool classof(const MDNode *N) { +struct ParagraphNode : Node { + llvm::ArrayRef<Node *> Children; + explicit ParagraphNode(llvm::ArrayRef<Node *> C) + : Node(NodeKind::NK_Paragraph), Children(C) {} + static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Paragraph; } }; static_assert(std::is_trivially_destructible_v<ParagraphNode>); -struct HeadingNode : MDNode { +struct HeadingNode : Node { unsigned Level; - llvm::ArrayRef<MDNode *> Children; - HeadingNode(unsigned L, llvm::ArrayRef<MDNode *> C) - : MDNode(NodeKind::NK_Heading), Level(L), Children(C) {} - static bool classof(const MDNode *N) { - return N->Kind == NodeKind::NK_Heading; - } + llvm::ArrayRef<Node *> Children; + HeadingNode(unsigned L, llvm::ArrayRef<Node *> C) + : Node(NodeKind::NK_Heading), Level(L), Children(C) {} + static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Heading; } }; static_assert(std::is_trivially_destructible_v<HeadingNode>); -struct FencedCodeNode : MDNode { +struct FencedCodeNode : Node { llvm::StringRef Lang; llvm::ArrayRef<llvm::StringRef> Lines; FencedCodeNode(llvm::StringRef L, llvm::ArrayRef<llvm::StringRef> Ls) - : MDNode(NodeKind::NK_FencedCode), Lang(L), Lines(Ls) {} - static bool classof(const MDNode *N) { + : Node(NodeKind::NK_FencedCode), Lang(L), Lines(Ls) {} + static bool classof(const Node *N) { return N->Kind == NodeKind::NK_FencedCode; } }; static_assert(std::is_trivially_destructible_v<FencedCodeNode>); struct TableCell { - llvm::ArrayRef<MDNode *> Children; + llvm::ArrayRef<Node *> Children; }; static_assert(std::is_trivially_destructible_v<TableCell>); @@ -116,67 +111,64 @@ struct TableRow { }; static_assert(std::is_trivially_destructible_v<TableRow>); -struct TableNode : MDNode { +struct TableNode : Node { TableRow Header; llvm::ArrayRef<TableRow> Body; TableNode(TableRow H, llvm::ArrayRef<TableRow> B) - : MDNode(NodeKind::NK_Table), Header(H), Body(B) {} - static bool classof(const MDNode *N) { return N->Kind == NodeKind::NK_Table; } + : Node(NodeKind::NK_Table), Header(H), Body(B) {} + static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Table; } }; static_assert(std::is_trivially_destructible_v<TableNode>); -struct ListItemNode : MDNode { - llvm::ArrayRef<MDNode *> Children; - explicit ListItemNode(llvm::ArrayRef<MDNode *> C) - : MDNode(NodeKind::NK_ListItem), Children(C) {} - static bool classof(const MDNode *N) { +struct ListItemNode : Node { + llvm::ArrayRef<Node *> Children; + explicit ListItemNode(llvm::ArrayRef<Node *> C) + : Node(NodeKind::NK_ListItem), Children(C) {} + static bool classof(const Node *N) { return N->Kind == NodeKind::NK_ListItem; } }; static_assert(std::is_trivially_destructible_v<ListItemNode>); -struct UnorderedListNode : MDNode { +struct UnorderedListNode : Node { llvm::ArrayRef<ListItemNode *> Items; explicit UnorderedListNode(llvm::ArrayRef<ListItemNode *> I) - : MDNode(NodeKind::NK_UnorderedList), Items(I) {} - static bool classof(const MDNode *N) { + : Node(NodeKind::NK_UnorderedList), Items(I) {} + static bool classof(const Node *N) { return N->Kind == NodeKind::NK_UnorderedList; } }; static_assert(std::is_trivially_destructible_v<UnorderedListNode>); -struct OrderedListNode : MDNode { +struct OrderedListNode : Node { unsigned Start; llvm::ArrayRef<ListItemNode *> Items; OrderedListNode(unsigned S, llvm::ArrayRef<ListItemNode *> I) - : MDNode(NodeKind::NK_OrderedList), Start(S), Items(I) {} - static bool classof(const MDNode *N) { + : Node(NodeKind::NK_OrderedList), Start(S), Items(I) {} + static bool classof(const Node *N) { return N->Kind == NodeKind::NK_OrderedList; } }; static_assert(std::is_trivially_destructible_v<OrderedListNode>); -struct BlockQuoteNode : MDNode { - llvm::ArrayRef<MDNode *> Children; - explicit BlockQuoteNode(llvm::ArrayRef<MDNode *> C) - : MDNode(NodeKind::NK_BlockQuote), Children(C) {} - static bool classof(const MDNode *N) { +struct BlockQuoteNode : Node { + llvm::ArrayRef<Node *> Children; + explicit BlockQuoteNode(llvm::ArrayRef<Node *> C) + : Node(NodeKind::NK_BlockQuote), Children(C) {} + static bool classof(const Node *N) { return N->Kind == NodeKind::NK_BlockQuote; } }; static_assert(std::is_trivially_destructible_v<BlockQuoteNode>); -struct ThematicBreakNode : MDNode { - ThematicBreakNode() : MDNode(NodeKind::NK_ThematicBreak) {} - static bool classof(const MDNode *N) { +struct ThematicBreakNode : Node { + ThematicBreakNode() : Node(NodeKind::NK_ThematicBreak) {} + static bool classof(const Node *N) { return N->Kind == NodeKind::NK_ThematicBreak; } }; static_assert(std::is_trivially_destructible_v<ThematicBreakNode>); -llvm::ArrayRef<MDNode *> parseMarkdown(llvm::StringRef Text, - llvm::BumpPtrAllocator &Arena); - } // namespace clang::doc::markdown #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H \ No newline at end of file diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp index c0b554ad67f7d..e65c07debb5e8 100644 --- a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp +++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp @@ -1,5 +1,4 @@ #include "support/Markdown.h" -#include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "gtest/gtest.h" @@ -9,65 +8,49 @@ using namespace llvm; namespace { TEST(MarkdownNodeTest, TextNode) { - BumpPtrAllocator Arena; - auto *N = new (Arena) TextNode("hello"); - EXPECT_EQ(N->Kind, NodeKind::NK_Text); - EXPECT_EQ(N->Text, "hello"); - EXPECT_TRUE(isa<TextNode>(N)); + TextNode N("hello"); + EXPECT_EQ(N.Kind, NodeKind::NK_Text); + EXPECT_EQ(N.Text, "hello"); } TEST(MarkdownNodeTest, FencedCodeNode) { - BumpPtrAllocator Arena; StringRef Lines[] = {"int x = 0;"}; - auto *N = new (Arena) FencedCodeNode("cpp", ArrayRef(Lines)); - EXPECT_EQ(N->Kind, NodeKind::NK_FencedCode); - EXPECT_EQ(N->Lang, "cpp"); - EXPECT_EQ(N->Lines.size(), 1u); - EXPECT_TRUE(isa<FencedCodeNode>(N)); + FencedCodeNode N("cpp", ArrayRef(Lines)); + EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); + EXPECT_EQ(N.Lang, "cpp"); + EXPECT_EQ(N.Lines.size(), 1u); } TEST(MarkdownNodeTest, HeadingNode) { - BumpPtrAllocator Arena; - auto *N = new (Arena) HeadingNode(2, {}); - EXPECT_EQ(N->Kind, NodeKind::NK_Heading); - EXPECT_EQ(N->Level, 2u); - EXPECT_TRUE(isa<HeadingNode>(N)); + HeadingNode N(2, {}); + EXPECT_EQ(N.Kind, NodeKind::NK_Heading); + EXPECT_EQ(N.Level, 2u); } TEST(MarkdownNodeTest, ThematicBreakNode) { - BumpPtrAllocator Arena; - auto *N = new (Arena) ThematicBreakNode(); - EXPECT_EQ(N->Kind, NodeKind::NK_ThematicBreak); - EXPECT_TRUE(isa<ThematicBreakNode>(N)); + ThematicBreakNode N; + EXPECT_EQ(N.Kind, NodeKind::NK_ThematicBreak); } TEST(MarkdownNodeTest, InlineCodeNode) { - BumpPtrAllocator Arena; - auto *N = new (Arena) InlineCodeNode("foo()"); - EXPECT_EQ(N->Kind, NodeKind::NK_InlineCode); - EXPECT_EQ(N->Code, "foo()"); - EXPECT_TRUE(isa<InlineCodeNode>(N)); + InlineCodeNode N("foo()"); + EXPECT_EQ(N.Kind, NodeKind::NK_InlineCode); + EXPECT_EQ(N.Code, "foo()"); } TEST(MarkdownNodeTest, EmphasisNode) { - BumpPtrAllocator Arena; - auto *N = new (Arena) EmphasisNode({}); - EXPECT_EQ(N->Kind, NodeKind::NK_Emphasis); - EXPECT_TRUE(isa<EmphasisNode>(N)); + EmphasisNode N({}); + EXPECT_EQ(N.Kind, NodeKind::NK_Emphasis); } TEST(MarkdownNodeTest, UnorderedListNode) { - BumpPtrAllocator Arena; - auto *N = new (Arena) UnorderedListNode({}); - EXPECT_EQ(N->Kind, NodeKind::NK_UnorderedList); - EXPECT_TRUE(isa<UnorderedListNode>(N)); + UnorderedListNode N({}); + EXPECT_EQ(N.Kind, NodeKind::NK_UnorderedList); } TEST(MarkdownNodeTest, ParagraphNode) { - BumpPtrAllocator Arena; - auto *N = new (Arena) ParagraphNode({}); - EXPECT_EQ(N->Kind, NodeKind::NK_Paragraph); - EXPECT_TRUE(isa<ParagraphNode>(N)); + ParagraphNode N({}); + EXPECT_EQ(N.Kind, NodeKind::NK_Paragraph); } } // namespace \ No newline at end of file >From bb5c2056f4860546a4d4991c5672574ea3c6a0e6 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Fri, 26 Jun 2026 15:33:55 -0400 Subject: [PATCH 03/10] [clang-doc] Address review feedback: proper classes with getters and dump, fix tests --- .../clang-doc/support/Markdown.h | 114 +++++++++++++++--- .../unittests/clang-doc/CMakeLists.txt | 1 - .../clang-doc/MarkdownParserTest.cpp | 27 +++-- 3 files changed, 118 insertions(+), 24 deletions(-) diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h index d05689f32e608..b5c6ccbfe0255 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.h +++ b/clang-tools-extra/clang-doc/support/Markdown.h @@ -11,6 +11,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/raw_ostream.h" #include <type_traits> namespace clang::doc::markdown { @@ -31,70 +33,115 @@ enum class NodeKind { NK_ThematicBreak, }; -struct Node { +class Node { +public: NodeKind Kind; explicit Node(NodeKind K) : Kind(K) {} + void dump() const { llvm::errs() << "Node\n"; } + static bool classof(const Node *) { return true; } }; -struct TextNode : Node { +class TextNode : public Node { llvm::StringRef Text; + +public: explicit TextNode(llvm::StringRef T) : Node(NodeKind::NK_Text), Text(T) {} + llvm::StringRef getText() const { return Text; } + void dump() const { llvm::errs() << "TextNode: " << Text << "\n"; } static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Text; } }; static_assert(std::is_trivially_destructible_v<TextNode>); -struct InlineCodeNode : Node { +class InlineCodeNode : public Node { llvm::StringRef Code; + +public: explicit InlineCodeNode(llvm::StringRef C) : Node(NodeKind::NK_InlineCode), Code(C) {} + llvm::StringRef getCode() const { return Code; } + void dump() const { llvm::errs() << "InlineCodeNode: " << Code << "\n"; } static bool classof(const Node *N) { return N->Kind == NodeKind::NK_InlineCode; } }; static_assert(std::is_trivially_destructible_v<InlineCodeNode>); -struct EmphasisNode : Node { +class EmphasisNode : public Node { llvm::ArrayRef<Node *> Children; + +public: explicit EmphasisNode(llvm::ArrayRef<Node *> C) : Node(NodeKind::NK_Emphasis), Children(C) {} + llvm::ArrayRef<Node *> getChildren() const { return Children; } + void dump() const { + llvm::errs() << "EmphasisNode (" << Children.size() << " children)\n"; + } static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Emphasis; } }; static_assert(std::is_trivially_destructible_v<EmphasisNode>); -struct StrongNode : Node { +class StrongNode : public Node { llvm::ArrayRef<Node *> Children; + +public: explicit StrongNode(llvm::ArrayRef<Node *> C) : Node(NodeKind::NK_Strong), Children(C) {} + llvm::ArrayRef<Node *> getChildren() const { return Children; } + void dump() const { + llvm::errs() << "StrongNode (" << Children.size() << " children)\n"; + } static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Strong; } }; static_assert(std::is_trivially_destructible_v<StrongNode>); -struct ParagraphNode : Node { +class ParagraphNode : public Node { llvm::ArrayRef<Node *> Children; + +public: explicit ParagraphNode(llvm::ArrayRef<Node *> C) : Node(NodeKind::NK_Paragraph), Children(C) {} + llvm::ArrayRef<Node *> getChildren() const { return Children; } + void dump() const { + llvm::errs() << "ParagraphNode (" << Children.size() << " children)\n"; + } static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Paragraph; } }; static_assert(std::is_trivially_destructible_v<ParagraphNode>); -struct HeadingNode : Node { +class HeadingNode : public Node { unsigned Level; llvm::ArrayRef<Node *> Children; + +public: HeadingNode(unsigned L, llvm::ArrayRef<Node *> C) : Node(NodeKind::NK_Heading), Level(L), Children(C) {} + unsigned getLevel() const { return Level; } + llvm::ArrayRef<Node *> getChildren() const { return Children; } + void dump() const { + llvm::errs() << "HeadingNode: level=" << Level << " (" << Children.size() + << " children)\n"; + } static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Heading; } }; static_assert(std::is_trivially_destructible_v<HeadingNode>); -struct FencedCodeNode : Node { +class FencedCodeNode : public Node { llvm::StringRef Lang; llvm::ArrayRef<llvm::StringRef> Lines; + +public: FencedCodeNode(llvm::StringRef L, llvm::ArrayRef<llvm::StringRef> Ls) : Node(NodeKind::NK_FencedCode), Lang(L), Lines(Ls) {} + llvm::StringRef getLang() const { return Lang; } + llvm::ArrayRef<llvm::StringRef> getLines() const { return Lines; } + void dump() const { + llvm::errs() << "FencedCodeNode: lang=" << Lang << " (" << Lines.size() + << " lines)\n"; + } static bool classof(const Node *N) { return N->Kind == NodeKind::NK_FencedCode; } @@ -111,58 +158,95 @@ struct TableRow { }; static_assert(std::is_trivially_destructible_v<TableRow>); -struct TableNode : Node { +class TableNode : public Node { TableRow Header; llvm::ArrayRef<TableRow> Body; + +public: TableNode(TableRow H, llvm::ArrayRef<TableRow> B) : Node(NodeKind::NK_Table), Header(H), Body(B) {} + const TableRow &getHeader() const { return Header; } + llvm::ArrayRef<TableRow> getBody() const { return Body; } + void dump() const { + llvm::errs() << "TableNode: " << Header.Cells.size() << " header cells, " + << Body.size() << " rows\n"; + } static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Table; } }; static_assert(std::is_trivially_destructible_v<TableNode>); -struct ListItemNode : Node { +class ListItemNode : public Node { llvm::ArrayRef<Node *> Children; + +public: explicit ListItemNode(llvm::ArrayRef<Node *> C) : Node(NodeKind::NK_ListItem), Children(C) {} + llvm::ArrayRef<Node *> getChildren() const { return Children; } + void dump() const { + llvm::errs() << "ListItemNode (" << Children.size() << " children)\n"; + } static bool classof(const Node *N) { return N->Kind == NodeKind::NK_ListItem; } }; static_assert(std::is_trivially_destructible_v<ListItemNode>); -struct UnorderedListNode : Node { +class UnorderedListNode : public Node { llvm::ArrayRef<ListItemNode *> Items; + +public: + UnorderedListNode() : Node(NodeKind::NK_UnorderedList), Items({}) {} explicit UnorderedListNode(llvm::ArrayRef<ListItemNode *> I) : Node(NodeKind::NK_UnorderedList), Items(I) {} + llvm::ArrayRef<ListItemNode *> getItems() const { return Items; } + void dump() const { + llvm::errs() << "UnorderedListNode (" << Items.size() << " items)\n"; + } static bool classof(const Node *N) { return N->Kind == NodeKind::NK_UnorderedList; } }; static_assert(std::is_trivially_destructible_v<UnorderedListNode>); -struct OrderedListNode : Node { +class OrderedListNode : public Node { unsigned Start; llvm::ArrayRef<ListItemNode *> Items; + +public: OrderedListNode(unsigned S, llvm::ArrayRef<ListItemNode *> I) : Node(NodeKind::NK_OrderedList), Start(S), Items(I) {} + unsigned getStart() const { return Start; } + llvm::ArrayRef<ListItemNode *> getItems() const { return Items; } + void dump() const { + llvm::errs() << "OrderedListNode: start=" << Start << " (" << Items.size() + << " items)\n"; + } static bool classof(const Node *N) { return N->Kind == NodeKind::NK_OrderedList; } }; static_assert(std::is_trivially_destructible_v<OrderedListNode>); -struct BlockQuoteNode : Node { +class BlockQuoteNode : public Node { llvm::ArrayRef<Node *> Children; + +public: explicit BlockQuoteNode(llvm::ArrayRef<Node *> C) : Node(NodeKind::NK_BlockQuote), Children(C) {} + llvm::ArrayRef<Node *> getChildren() const { return Children; } + void dump() const { + llvm::errs() << "BlockQuoteNode (" << Children.size() << " children)\n"; + } static bool classof(const Node *N) { return N->Kind == NodeKind::NK_BlockQuote; } }; static_assert(std::is_trivially_destructible_v<BlockQuoteNode>); -struct ThematicBreakNode : Node { +class ThematicBreakNode : public Node { +public: ThematicBreakNode() : Node(NodeKind::NK_ThematicBreak) {} + void dump() const { llvm::errs() << "ThematicBreakNode\n"; } static bool classof(const Node *N) { return N->Kind == NodeKind::NK_ThematicBreak; } @@ -171,4 +255,4 @@ static_assert(std::is_trivially_destructible_v<ThematicBreakNode>); } // namespace clang::doc::markdown -#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H \ No newline at end of file +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H diff --git a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt index 935df6da8ac78..688a547a7f031 100644 --- a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt @@ -50,6 +50,5 @@ clang_target_link_libraries(ClangDocTests target_link_libraries(ClangDocTests PRIVATE clangDoc - clangDocSupport LLVMTestingSupport ) diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp index e65c07debb5e8..4b7d3e4b7bb4b 100644 --- a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp +++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp @@ -1,5 +1,12 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #include "support/Markdown.h" -#include "llvm/Support/Casting.h" #include "gtest/gtest.h" using namespace clang::doc::markdown; @@ -10,21 +17,22 @@ namespace { TEST(MarkdownNodeTest, TextNode) { TextNode N("hello"); EXPECT_EQ(N.Kind, NodeKind::NK_Text); - EXPECT_EQ(N.Text, "hello"); + EXPECT_EQ(N.getText(), "hello"); } TEST(MarkdownNodeTest, FencedCodeNode) { - StringRef Lines[] = {"int x = 0;"}; + StringRef Lines[] = {"int x = 0;", "int y = 1;", "return x + y;"}; FencedCodeNode N("cpp", ArrayRef(Lines)); EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); - EXPECT_EQ(N.Lang, "cpp"); - EXPECT_EQ(N.Lines.size(), 1u); + EXPECT_EQ(N.getLang(), "cpp"); + EXPECT_EQ(N.getLines().size(), 3u); + EXPECT_EQ(N.getLines()[1], "int y = 1;"); } TEST(MarkdownNodeTest, HeadingNode) { HeadingNode N(2, {}); EXPECT_EQ(N.Kind, NodeKind::NK_Heading); - EXPECT_EQ(N.Level, 2u); + EXPECT_EQ(N.getLevel(), 2u); } TEST(MarkdownNodeTest, ThematicBreakNode) { @@ -35,22 +43,25 @@ TEST(MarkdownNodeTest, ThematicBreakNode) { TEST(MarkdownNodeTest, InlineCodeNode) { InlineCodeNode N("foo()"); EXPECT_EQ(N.Kind, NodeKind::NK_InlineCode); - EXPECT_EQ(N.Code, "foo()"); + EXPECT_EQ(N.getCode(), "foo()"); } TEST(MarkdownNodeTest, EmphasisNode) { EmphasisNode N({}); EXPECT_EQ(N.Kind, NodeKind::NK_Emphasis); + EXPECT_TRUE(N.getChildren().empty()); } TEST(MarkdownNodeTest, UnorderedListNode) { - UnorderedListNode N({}); + UnorderedListNode N; EXPECT_EQ(N.Kind, NodeKind::NK_UnorderedList); + EXPECT_TRUE(N.getItems().empty()); } TEST(MarkdownNodeTest, ParagraphNode) { ParagraphNode N({}); EXPECT_EQ(N.Kind, NodeKind::NK_Paragraph); + EXPECT_TRUE(N.getChildren().empty()); } } // namespace \ No newline at end of file >From db35d079da9f0d2122fe03f6d5ac3464a395bb91 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Fri, 26 Jun 2026 19:12:37 -0400 Subject: [PATCH 04/10] [clang-doc] Redesign nodes using ilist_node and separate Block/Inline hierarchies --- .../clang-doc/support/Markdown.h | 295 +++++++++--------- .../clang-doc/MarkdownParserTest.cpp | 20 +- 2 files changed, 162 insertions(+), 153 deletions(-) diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h index b5c6ccbfe0255..1920b7a08b8fe 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.h +++ b/clang-tools-extra/clang-doc/support/Markdown.h @@ -9,19 +9,23 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H #define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/simple_ilist.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" #include <type_traits> namespace clang::doc::markdown { enum class NodeKind { + // Inline nodes NK_Text, NK_InlineCode, NK_Emphasis, NK_Strong, + // Block nodes NK_Paragraph, NK_Heading, NK_FencedCode, @@ -31,227 +35,234 @@ enum class NodeKind { NK_ListItem, NK_BlockQuote, NK_ThematicBreak, + NK_Document, }; -class Node { -public: +// Forward declarations +struct InlineNode; +struct BlockNode; + +//===----------------------------------------------------------------------===// +// Inline nodes +//===----------------------------------------------------------------------===// + +struct InlineNode + : llvm::ilist_node<InlineNode, llvm::ilist_sentinel_tracking<true>> { NodeKind Kind; - explicit Node(NodeKind K) : Kind(K) {} - void dump() const { llvm::errs() << "Node\n"; } - static bool classof(const Node *) { return true; } + explicit InlineNode(NodeKind K) : Kind(K) {} + void dump(llvm::raw_ostream &OS = llvm::errs()) const; }; -class TextNode : public Node { +struct TextNode : InlineNode { +private: llvm::StringRef Text; public: - explicit TextNode(llvm::StringRef T) : Node(NodeKind::NK_Text), Text(T) {} + explicit TextNode(llvm::StringRef T) + : InlineNode(NodeKind::NK_Text), Text(T) {} llvm::StringRef getText() const { return Text; } - void dump() const { llvm::errs() << "TextNode: " << Text << "\n"; } - static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Text; } + void dump(llvm::raw_ostream &OS = llvm::errs()) const { + OS << "TextNode: " << Text << "\n"; + } + static bool classof(const InlineNode *N) { + return N->Kind == NodeKind::NK_Text; + } }; static_assert(std::is_trivially_destructible_v<TextNode>); -class InlineCodeNode : public Node { +struct InlineCodeNode : InlineNode { +private: llvm::StringRef Code; public: explicit InlineCodeNode(llvm::StringRef C) - : Node(NodeKind::NK_InlineCode), Code(C) {} + : InlineNode(NodeKind::NK_InlineCode), Code(C) {} llvm::StringRef getCode() const { return Code; } - void dump() const { llvm::errs() << "InlineCodeNode: " << Code << "\n"; } - static bool classof(const Node *N) { + void dump(llvm::raw_ostream &OS = llvm::errs()) const { + OS << "InlineCodeNode: " << Code << "\n"; + } + static bool classof(const InlineNode *N) { return N->Kind == NodeKind::NK_InlineCode; } }; static_assert(std::is_trivially_destructible_v<InlineCodeNode>); -class EmphasisNode : public Node { - llvm::ArrayRef<Node *> Children; - -public: - explicit EmphasisNode(llvm::ArrayRef<Node *> C) - : Node(NodeKind::NK_Emphasis), Children(C) {} - llvm::ArrayRef<Node *> getChildren() const { return Children; } - void dump() const { - llvm::errs() << "EmphasisNode (" << Children.size() << " children)\n"; +struct EmphasisNode : InlineNode { + llvm::simple_ilist<InlineNode, llvm::ilist_sentinel_tracking<true>> Children; + EmphasisNode() : InlineNode(NodeKind::NK_Emphasis) {} + void dump(llvm::raw_ostream &OS = llvm::errs()) const { + OS << "EmphasisNode\n"; } - static bool classof(const Node *N) { + static bool classof(const InlineNode *N) { return N->Kind == NodeKind::NK_Emphasis; } }; -static_assert(std::is_trivially_destructible_v<EmphasisNode>); - -class StrongNode : public Node { - llvm::ArrayRef<Node *> Children; -public: - explicit StrongNode(llvm::ArrayRef<Node *> C) - : Node(NodeKind::NK_Strong), Children(C) {} - llvm::ArrayRef<Node *> getChildren() const { return Children; } - void dump() const { - llvm::errs() << "StrongNode (" << Children.size() << " children)\n"; +struct StrongNode : InlineNode { + llvm::simple_ilist<InlineNode, llvm::ilist_sentinel_tracking<true>> Children; + StrongNode() : InlineNode(NodeKind::NK_Strong) {} + void dump(llvm::raw_ostream &OS = llvm::errs()) const { + OS << "StrongNode\n"; + } + static bool classof(const InlineNode *N) { + return N->Kind == NodeKind::NK_Strong; } - static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Strong; } }; -static_assert(std::is_trivially_destructible_v<StrongNode>); -class ParagraphNode : public Node { - llvm::ArrayRef<Node *> Children; +//===----------------------------------------------------------------------===// +// Block nodes +//===----------------------------------------------------------------------===// -public: - explicit ParagraphNode(llvm::ArrayRef<Node *> C) - : Node(NodeKind::NK_Paragraph), Children(C) {} - llvm::ArrayRef<Node *> getChildren() const { return Children; } - void dump() const { - llvm::errs() << "ParagraphNode (" << Children.size() << " children)\n"; +struct BlockNode + : llvm::ilist_node<BlockNode, llvm::ilist_sentinel_tracking<true>> { + NodeKind Kind; + explicit BlockNode(NodeKind K) : Kind(K) {} + void dump(llvm::raw_ostream &OS = llvm::errs()) const; +}; + +using InlineList = + llvm::simple_ilist<InlineNode, llvm::ilist_sentinel_tracking<true>>; +using BlockList = + llvm::simple_ilist<BlockNode, llvm::ilist_sentinel_tracking<true>>; + +struct ParagraphNode : BlockNode { + InlineList Children; + ParagraphNode() : BlockNode(NodeKind::NK_Paragraph) {} + void dump(llvm::raw_ostream &OS = llvm::errs()) const { + OS << "ParagraphNode\n"; } - static bool classof(const Node *N) { + static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_Paragraph; } }; -static_assert(std::is_trivially_destructible_v<ParagraphNode>); -class HeadingNode : public Node { +struct HeadingNode : BlockNode { +private: unsigned Level; - llvm::ArrayRef<Node *> Children; public: - HeadingNode(unsigned L, llvm::ArrayRef<Node *> C) - : Node(NodeKind::NK_Heading), Level(L), Children(C) {} + InlineList Children; + explicit HeadingNode(unsigned L) + : BlockNode(NodeKind::NK_Heading), Level(L) {} unsigned getLevel() const { return Level; } - llvm::ArrayRef<Node *> getChildren() const { return Children; } - void dump() const { - llvm::errs() << "HeadingNode: level=" << Level << " (" << Children.size() - << " children)\n"; + void dump(llvm::raw_ostream &OS = llvm::errs()) const { + OS << "HeadingNode: level=" << Level << "\n"; + } + static bool classof(const BlockNode *N) { + return N->Kind == NodeKind::NK_Heading; } - static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Heading; } }; -static_assert(std::is_trivially_destructible_v<HeadingNode>); -class FencedCodeNode : public Node { +struct FencedCodeNode : BlockNode { +private: llvm::StringRef Lang; - llvm::ArrayRef<llvm::StringRef> Lines; + llvm::StringRef Code; public: - FencedCodeNode(llvm::StringRef L, llvm::ArrayRef<llvm::StringRef> Ls) - : Node(NodeKind::NK_FencedCode), Lang(L), Lines(Ls) {} + FencedCodeNode(llvm::StringRef L, llvm::StringRef C) + : BlockNode(NodeKind::NK_FencedCode), Lang(L), Code(C) {} llvm::StringRef getLang() const { return Lang; } - llvm::ArrayRef<llvm::StringRef> getLines() const { return Lines; } - void dump() const { - llvm::errs() << "FencedCodeNode: lang=" << Lang << " (" << Lines.size() - << " lines)\n"; + llvm::StringRef getCode() const { return Code; } + void dump(llvm::raw_ostream &OS = llvm::errs()) const { + OS << "FencedCodeNode: lang=" << Lang << "\n"; } - static bool classof(const Node *N) { + static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_FencedCode; } }; static_assert(std::is_trivially_destructible_v<FencedCodeNode>); -struct TableCell { - llvm::ArrayRef<Node *> Children; -}; -static_assert(std::is_trivially_destructible_v<TableCell>); - -struct TableRow { - llvm::ArrayRef<TableCell> Cells; -}; -static_assert(std::is_trivially_destructible_v<TableRow>); - -class TableNode : public Node { - TableRow Header; - llvm::ArrayRef<TableRow> Body; - -public: - TableNode(TableRow H, llvm::ArrayRef<TableRow> B) - : Node(NodeKind::NK_Table), Header(H), Body(B) {} - const TableRow &getHeader() const { return Header; } - llvm::ArrayRef<TableRow> getBody() const { return Body; } - void dump() const { - llvm::errs() << "TableNode: " << Header.Cells.size() << " header cells, " - << Body.size() << " rows\n"; - } - static bool classof(const Node *N) { return N->Kind == NodeKind::NK_Table; } -}; -static_assert(std::is_trivially_destructible_v<TableNode>); - -class ListItemNode : public Node { - llvm::ArrayRef<Node *> Children; - -public: - explicit ListItemNode(llvm::ArrayRef<Node *> C) - : Node(NodeKind::NK_ListItem), Children(C) {} - llvm::ArrayRef<Node *> getChildren() const { return Children; } - void dump() const { - llvm::errs() << "ListItemNode (" << Children.size() << " children)\n"; +struct ListItemNode : BlockNode { + InlineList Children; + ListItemNode() : BlockNode(NodeKind::NK_ListItem) {} + void dump(llvm::raw_ostream &OS = llvm::errs()) const { + OS << "ListItemNode\n"; } - static bool classof(const Node *N) { + static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_ListItem; } }; -static_assert(std::is_trivially_destructible_v<ListItemNode>); -class UnorderedListNode : public Node { - llvm::ArrayRef<ListItemNode *> Items; - -public: - UnorderedListNode() : Node(NodeKind::NK_UnorderedList), Items({}) {} - explicit UnorderedListNode(llvm::ArrayRef<ListItemNode *> I) - : Node(NodeKind::NK_UnorderedList), Items(I) {} - llvm::ArrayRef<ListItemNode *> getItems() const { return Items; } - void dump() const { - llvm::errs() << "UnorderedListNode (" << Items.size() << " items)\n"; +struct UnorderedListNode : BlockNode { + llvm::simple_ilist<ListItemNode, llvm::ilist_sentinel_tracking<true>> Items; + UnorderedListNode() : BlockNode(NodeKind::NK_UnorderedList) {} + void dump(llvm::raw_ostream &OS = llvm::errs()) const { + OS << "UnorderedListNode\n"; } - static bool classof(const Node *N) { + static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_UnorderedList; } }; -static_assert(std::is_trivially_destructible_v<UnorderedListNode>); -class OrderedListNode : public Node { +struct OrderedListNode : BlockNode { +private: unsigned Start; - llvm::ArrayRef<ListItemNode *> Items; public: - OrderedListNode(unsigned S, llvm::ArrayRef<ListItemNode *> I) - : Node(NodeKind::NK_OrderedList), Start(S), Items(I) {} + llvm::simple_ilist<ListItemNode, llvm::ilist_sentinel_tracking<true>> Items; + explicit OrderedListNode(unsigned S = 1) + : BlockNode(NodeKind::NK_OrderedList), Start(S) {} unsigned getStart() const { return Start; } - llvm::ArrayRef<ListItemNode *> getItems() const { return Items; } - void dump() const { - llvm::errs() << "OrderedListNode: start=" << Start << " (" << Items.size() - << " items)\n"; + void dump(llvm::raw_ostream &OS = llvm::errs()) const { + OS << "OrderedListNode: start=" << Start << "\n"; } - static bool classof(const Node *N) { + static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_OrderedList; } }; -static_assert(std::is_trivially_destructible_v<OrderedListNode>); -class BlockQuoteNode : public Node { - llvm::ArrayRef<Node *> Children; - -public: - explicit BlockQuoteNode(llvm::ArrayRef<Node *> C) - : Node(NodeKind::NK_BlockQuote), Children(C) {} - llvm::ArrayRef<Node *> getChildren() const { return Children; } - void dump() const { - llvm::errs() << "BlockQuoteNode (" << Children.size() << " children)\n"; +struct BlockQuoteNode : BlockNode { + BlockList Children; + BlockQuoteNode() : BlockNode(NodeKind::NK_BlockQuote) {} + void dump(llvm::raw_ostream &OS = llvm::errs()) const { + OS << "BlockQuoteNode\n"; } - static bool classof(const Node *N) { + static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_BlockQuote; } }; -static_assert(std::is_trivially_destructible_v<BlockQuoteNode>); -class ThematicBreakNode : public Node { -public: - ThematicBreakNode() : Node(NodeKind::NK_ThematicBreak) {} - void dump() const { llvm::errs() << "ThematicBreakNode\n"; } - static bool classof(const Node *N) { +struct ThematicBreakNode : BlockNode { + ThematicBreakNode() : BlockNode(NodeKind::NK_ThematicBreak) {} + void dump(llvm::raw_ostream &OS = llvm::errs()) const { + OS << "ThematicBreakNode\n"; + } + static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_ThematicBreak; } }; -static_assert(std::is_trivially_destructible_v<ThematicBreakNode>); + +struct DocumentNode : BlockNode { + BlockList Children; + DocumentNode() : BlockNode(NodeKind::NK_Document) {} + void dump(llvm::raw_ostream &OS = llvm::errs()) const { + OS << "DocumentNode\n"; + } + static bool classof(const BlockNode *N) { + return N->Kind == NodeKind::NK_Document; + } +}; + +//===----------------------------------------------------------------------===// +// ASTContext - owns the arena and string pool +//===----------------------------------------------------------------------===// + +class ASTContext { + llvm::BumpPtrAllocator Arena; + llvm::StringSaver SSaver; + DocumentNode *Root = nullptr; + +public: + ASTContext() : SSaver(Arena) {} + + template <typename T, typename... Args> T *allocate(Args &&...args) { + return new (Arena.Allocate<T>()) T(std::forward<Args>(args)...); + } + + llvm::StringRef intern(llvm::StringRef S) { return SSaver.save(S); } + DocumentNode *getRoot() { return Root; } + void setRoot(DocumentNode *R) { Root = R; } +}; } // namespace clang::doc::markdown diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp index 4b7d3e4b7bb4b..8621a980ec3ac 100644 --- a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp +++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp @@ -21,16 +21,14 @@ TEST(MarkdownNodeTest, TextNode) { } TEST(MarkdownNodeTest, FencedCodeNode) { - StringRef Lines[] = {"int x = 0;", "int y = 1;", "return x + y;"}; - FencedCodeNode N("cpp", ArrayRef(Lines)); + FencedCodeNode N("cpp", "int x = 0;\nint y = 1;\nreturn x + y;"); EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); EXPECT_EQ(N.getLang(), "cpp"); - EXPECT_EQ(N.getLines().size(), 3u); - EXPECT_EQ(N.getLines()[1], "int y = 1;"); + EXPECT_EQ(N.getCode(), "int x = 0;\nint y = 1;\nreturn x + y;"); } TEST(MarkdownNodeTest, HeadingNode) { - HeadingNode N(2, {}); + HeadingNode N(2); EXPECT_EQ(N.Kind, NodeKind::NK_Heading); EXPECT_EQ(N.getLevel(), 2u); } @@ -47,21 +45,21 @@ TEST(MarkdownNodeTest, InlineCodeNode) { } TEST(MarkdownNodeTest, EmphasisNode) { - EmphasisNode N({}); + EmphasisNode N; EXPECT_EQ(N.Kind, NodeKind::NK_Emphasis); - EXPECT_TRUE(N.getChildren().empty()); + EXPECT_TRUE(N.Children.empty()); } TEST(MarkdownNodeTest, UnorderedListNode) { UnorderedListNode N; EXPECT_EQ(N.Kind, NodeKind::NK_UnorderedList); - EXPECT_TRUE(N.getItems().empty()); + EXPECT_TRUE(N.Items.empty()); } TEST(MarkdownNodeTest, ParagraphNode) { - ParagraphNode N({}); + ParagraphNode N; EXPECT_EQ(N.Kind, NodeKind::NK_Paragraph); - EXPECT_TRUE(N.getChildren().empty()); + EXPECT_TRUE(N.Children.empty()); } -} // namespace \ No newline at end of file +} // namespace >From 03715b2d7447fdc224be105e70a0168c62a0dd25 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Sat, 27 Jun 2026 14:49:44 -0400 Subject: [PATCH 05/10] [clang-doc] Address review feedback: print/dump in cpp, remove sentinel tracking, restrict allocate --- .../clang-doc/support/CMakeLists.txt | 1 + .../clang-doc/support/Markdown.cpp | 177 ++++++++++++++++++ .../clang-doc/support/Markdown.h | 111 +++++------ .../unittests/clang-doc/CMakeLists.txt | 1 + .../clang-doc/MarkdownParserTest.cpp | 12 +- 5 files changed, 238 insertions(+), 64 deletions(-) create mode 100644 clang-tools-extra/clang-doc/support/Markdown.cpp diff --git a/clang-tools-extra/clang-doc/support/CMakeLists.txt b/clang-tools-extra/clang-doc/support/CMakeLists.txt index 8ac913ffbe998..7dc11f07ff8b3 100644 --- a/clang-tools-extra/clang-doc/support/CMakeLists.txt +++ b/clang-tools-extra/clang-doc/support/CMakeLists.txt @@ -6,5 +6,6 @@ set(LLVM_LINK_COMPONENTS add_clang_library(clangDocSupport STATIC File.cpp + Markdown.cpp Utils.cpp ) diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp new file mode 100644 index 0000000000000..ad29ba4789ffb --- /dev/null +++ b/clang-tools-extra/clang-doc/support/Markdown.cpp @@ -0,0 +1,177 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Markdown.h" +#include "llvm/Support/Casting.h" + +namespace clang::doc::markdown { + +//===----------------------------------------------------------------------===// +// Inline node print/dump +//===----------------------------------------------------------------------===// + +void InlineNode::print(llvm::raw_ostream &OS) const { + switch (Kind) { + case NodeKind::NK_Text: + llvm::cast<TextNode>(this)->print(OS); + break; + case NodeKind::NK_InlineCode: + llvm::cast<InlineCodeNode>(this)->print(OS); + break; + case NodeKind::NK_Emphasis: + llvm::cast<EmphasisNode>(this)->print(OS); + break; + case NodeKind::NK_Strong: + llvm::cast<StrongNode>(this)->print(OS); + break; + default: + OS << "UnknownInlineNode\n"; + break; + } +} + +LLVM_DUMP_METHOD void InlineNode::dump() const { print(llvm::errs()); } + +void TextNode::print(llvm::raw_ostream &OS) const { + OS << "TextNode: " << getText() << "\n"; +} + +LLVM_DUMP_METHOD void TextNode::dump() const { print(llvm::errs()); } + +void InlineCodeNode::print(llvm::raw_ostream &OS) const { + OS << "InlineCodeNode: " << getCode() << "\n"; +} + +LLVM_DUMP_METHOD void InlineCodeNode::dump() const { print(llvm::errs()); } + +void EmphasisNode::print(llvm::raw_ostream &OS) const { + OS << "EmphasisNode\n"; + for (const auto &Child : Children) + Child.print(OS); +} + +LLVM_DUMP_METHOD void EmphasisNode::dump() const { print(llvm::errs()); } + +void StrongNode::print(llvm::raw_ostream &OS) const { + OS << "StrongNode\n"; + for (const auto &Child : Children) + Child.print(OS); +} + +LLVM_DUMP_METHOD void StrongNode::dump() const { print(llvm::errs()); } + +//===----------------------------------------------------------------------===// +// Block node print/dump +//===----------------------------------------------------------------------===// + +void BlockNode::print(llvm::raw_ostream &OS) const { + switch (Kind) { + case NodeKind::NK_Paragraph: + llvm::cast<ParagraphNode>(this)->print(OS); + break; + case NodeKind::NK_Heading: + llvm::cast<HeadingNode>(this)->print(OS); + break; + case NodeKind::NK_FencedCode: + llvm::cast<FencedCodeNode>(this)->print(OS); + break; + case NodeKind::NK_UnorderedList: + llvm::cast<UnorderedListNode>(this)->print(OS); + break; + case NodeKind::NK_OrderedList: + llvm::cast<OrderedListNode>(this)->print(OS); + break; + case NodeKind::NK_ListItem: + llvm::cast<ListItemNode>(this)->print(OS); + break; + case NodeKind::NK_BlockQuote: + llvm::cast<BlockQuoteNode>(this)->print(OS); + break; + case NodeKind::NK_ThematicBreak: + llvm::cast<ThematicBreakNode>(this)->print(OS); + break; + case NodeKind::NK_Document: + llvm::cast<DocumentNode>(this)->print(OS); + break; + default: + OS << "UnknownBlockNode\n"; + break; + } +} + +LLVM_DUMP_METHOD void BlockNode::dump() const { print(llvm::errs()); } + +void ParagraphNode::print(llvm::raw_ostream &OS) const { + OS << "ParagraphNode\n"; + for (const auto &Child : Children) + Child.print(OS); +} + +LLVM_DUMP_METHOD void ParagraphNode::dump() const { print(llvm::errs()); } + +void HeadingNode::print(llvm::raw_ostream &OS) const { + OS << "HeadingNode: level=" << getLevel() << "\n"; + for (const auto &Child : Children) + Child.print(OS); +} + +LLVM_DUMP_METHOD void HeadingNode::dump() const { print(llvm::errs()); } + +void FencedCodeNode::print(llvm::raw_ostream &OS) const { + OS << "FencedCodeNode: lang=" << getLang() << "\n" << getCode() << "\n"; +} + +LLVM_DUMP_METHOD void FencedCodeNode::dump() const { print(llvm::errs()); } + +void ListItemNode::print(llvm::raw_ostream &OS) const { + OS << "ListItemNode\n"; + for (const auto &Child : Children) + Child.print(OS); +} + +LLVM_DUMP_METHOD void ListItemNode::dump() const { print(llvm::errs()); } + +void UnorderedListNode::print(llvm::raw_ostream &OS) const { + OS << "UnorderedListNode\n"; + for (const auto &Item : Items) + Item.print(OS); +} + +LLVM_DUMP_METHOD void UnorderedListNode::dump() const { print(llvm::errs()); } + +void OrderedListNode::print(llvm::raw_ostream &OS) const { + OS << "OrderedListNode: start=" << getStart() << "\n"; + for (const auto &Item : Items) + Item.print(OS); +} + +LLVM_DUMP_METHOD void OrderedListNode::dump() const { print(llvm::errs()); } + +void BlockQuoteNode::print(llvm::raw_ostream &OS) const { + OS << "BlockQuoteNode\n"; + for (const auto &Child : Children) + Child.print(OS); +} + +LLVM_DUMP_METHOD void BlockQuoteNode::dump() const { print(llvm::errs()); } + +void ThematicBreakNode::print(llvm::raw_ostream &OS) const { + OS << "ThematicBreakNode\n"; +} + +LLVM_DUMP_METHOD void ThematicBreakNode::dump() const { print(llvm::errs()); } + +void DocumentNode::print(llvm::raw_ostream &OS) const { + OS << "DocumentNode\n"; + for (const auto &Child : Children) + Child.print(OS); +} + +LLVM_DUMP_METHOD void DocumentNode::dump() const { print(llvm::errs()); } + +} // namespace clang::doc::markdown diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h index 1920b7a08b8fe..410f133b0e74d 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.h +++ b/clang-tools-extra/clang-doc/support/Markdown.h @@ -13,7 +13,7 @@ #include "llvm/ADT/simple_ilist.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/StringSaver.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" #include <type_traits> @@ -38,7 +38,6 @@ enum class NodeKind { NK_Document, }; -// Forward declarations struct InlineNode; struct BlockNode; @@ -46,13 +45,15 @@ struct BlockNode; // Inline nodes //===----------------------------------------------------------------------===// -struct InlineNode - : llvm::ilist_node<InlineNode, llvm::ilist_sentinel_tracking<true>> { +struct InlineNode : llvm::ilist_node<InlineNode> { NodeKind Kind; explicit InlineNode(NodeKind K) : Kind(K) {} - void dump(llvm::raw_ostream &OS = llvm::errs()) const; + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; }; +using InlineList = llvm::simple_ilist<InlineNode>; + struct TextNode : InlineNode { private: llvm::StringRef Text; @@ -61,9 +62,8 @@ struct TextNode : InlineNode { explicit TextNode(llvm::StringRef T) : InlineNode(NodeKind::NK_Text), Text(T) {} llvm::StringRef getText() const { return Text; } - void dump(llvm::raw_ostream &OS = llvm::errs()) const { - OS << "TextNode: " << Text << "\n"; - } + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; static bool classof(const InlineNode *N) { return N->Kind == NodeKind::NK_Text; } @@ -78,9 +78,8 @@ struct InlineCodeNode : InlineNode { explicit InlineCodeNode(llvm::StringRef C) : InlineNode(NodeKind::NK_InlineCode), Code(C) {} llvm::StringRef getCode() const { return Code; } - void dump(llvm::raw_ostream &OS = llvm::errs()) const { - OS << "InlineCodeNode: " << Code << "\n"; - } + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; static bool classof(const InlineNode *N) { return N->Kind == NodeKind::NK_InlineCode; } @@ -88,22 +87,20 @@ struct InlineCodeNode : InlineNode { static_assert(std::is_trivially_destructible_v<InlineCodeNode>); struct EmphasisNode : InlineNode { - llvm::simple_ilist<InlineNode, llvm::ilist_sentinel_tracking<true>> Children; + InlineList Children; EmphasisNode() : InlineNode(NodeKind::NK_Emphasis) {} - void dump(llvm::raw_ostream &OS = llvm::errs()) const { - OS << "EmphasisNode\n"; - } + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; static bool classof(const InlineNode *N) { return N->Kind == NodeKind::NK_Emphasis; } }; struct StrongNode : InlineNode { - llvm::simple_ilist<InlineNode, llvm::ilist_sentinel_tracking<true>> Children; + InlineList Children; StrongNode() : InlineNode(NodeKind::NK_Strong) {} - void dump(llvm::raw_ostream &OS = llvm::errs()) const { - OS << "StrongNode\n"; - } + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; static bool classof(const InlineNode *N) { return N->Kind == NodeKind::NK_Strong; } @@ -113,24 +110,20 @@ struct StrongNode : InlineNode { // Block nodes //===----------------------------------------------------------------------===// -struct BlockNode - : llvm::ilist_node<BlockNode, llvm::ilist_sentinel_tracking<true>> { +struct BlockNode : llvm::ilist_node<BlockNode> { NodeKind Kind; explicit BlockNode(NodeKind K) : Kind(K) {} - void dump(llvm::raw_ostream &OS = llvm::errs()) const; + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; }; -using InlineList = - llvm::simple_ilist<InlineNode, llvm::ilist_sentinel_tracking<true>>; -using BlockList = - llvm::simple_ilist<BlockNode, llvm::ilist_sentinel_tracking<true>>; +using BlockList = llvm::simple_ilist<BlockNode>; struct ParagraphNode : BlockNode { InlineList Children; ParagraphNode() : BlockNode(NodeKind::NK_Paragraph) {} - void dump(llvm::raw_ostream &OS = llvm::errs()) const { - OS << "ParagraphNode\n"; - } + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_Paragraph; } @@ -145,9 +138,8 @@ struct HeadingNode : BlockNode { explicit HeadingNode(unsigned L) : BlockNode(NodeKind::NK_Heading), Level(L) {} unsigned getLevel() const { return Level; } - void dump(llvm::raw_ostream &OS = llvm::errs()) const { - OS << "HeadingNode: level=" << Level << "\n"; - } + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_Heading; } @@ -163,32 +155,29 @@ struct FencedCodeNode : BlockNode { : BlockNode(NodeKind::NK_FencedCode), Lang(L), Code(C) {} llvm::StringRef getLang() const { return Lang; } llvm::StringRef getCode() const { return Code; } - void dump(llvm::raw_ostream &OS = llvm::errs()) const { - OS << "FencedCodeNode: lang=" << Lang << "\n"; - } + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_FencedCode; } }; static_assert(std::is_trivially_destructible_v<FencedCodeNode>); -struct ListItemNode : BlockNode { +struct ListItemNode : BlockNode, llvm::ilist_node<ListItemNode> { InlineList Children; ListItemNode() : BlockNode(NodeKind::NK_ListItem) {} - void dump(llvm::raw_ostream &OS = llvm::errs()) const { - OS << "ListItemNode\n"; - } + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_ListItem; } }; struct UnorderedListNode : BlockNode { - llvm::simple_ilist<ListItemNode, llvm::ilist_sentinel_tracking<true>> Items; + llvm::simple_ilist<ListItemNode> Items; UnorderedListNode() : BlockNode(NodeKind::NK_UnorderedList) {} - void dump(llvm::raw_ostream &OS = llvm::errs()) const { - OS << "UnorderedListNode\n"; - } + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_UnorderedList; } @@ -199,13 +188,12 @@ struct OrderedListNode : BlockNode { unsigned Start; public: - llvm::simple_ilist<ListItemNode, llvm::ilist_sentinel_tracking<true>> Items; + llvm::simple_ilist<ListItemNode> Items; explicit OrderedListNode(unsigned S = 1) : BlockNode(NodeKind::NK_OrderedList), Start(S) {} unsigned getStart() const { return Start; } - void dump(llvm::raw_ostream &OS = llvm::errs()) const { - OS << "OrderedListNode: start=" << Start << "\n"; - } + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_OrderedList; } @@ -214,9 +202,8 @@ struct OrderedListNode : BlockNode { struct BlockQuoteNode : BlockNode { BlockList Children; BlockQuoteNode() : BlockNode(NodeKind::NK_BlockQuote) {} - void dump(llvm::raw_ostream &OS = llvm::errs()) const { - OS << "BlockQuoteNode\n"; - } + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_BlockQuote; } @@ -224,42 +211,44 @@ struct BlockQuoteNode : BlockNode { struct ThematicBreakNode : BlockNode { ThematicBreakNode() : BlockNode(NodeKind::NK_ThematicBreak) {} - void dump(llvm::raw_ostream &OS = llvm::errs()) const { - OS << "ThematicBreakNode\n"; - } + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_ThematicBreak; } }; struct DocumentNode : BlockNode { + // FIXME: add constructor that accepts children once parser is in place BlockList Children; DocumentNode() : BlockNode(NodeKind::NK_Document) {} - void dump(llvm::raw_ostream &OS = llvm::errs()) const { - OS << "DocumentNode\n"; - } + void print(llvm::raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump() const; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_Document; } }; //===----------------------------------------------------------------------===// -// ASTContext - owns the arena and string pool +// ASTContext - owns the arena //===----------------------------------------------------------------------===// +template <typename T> +using IsMarkdownNode = std::enable_if_t<std::is_base_of_v<InlineNode, T> || + std::is_base_of_v<BlockNode, T>>; + class ASTContext { llvm::BumpPtrAllocator Arena; - llvm::StringSaver SSaver; DocumentNode *Root = nullptr; public: - ASTContext() : SSaver(Arena) {} + ASTContext() = default; - template <typename T, typename... Args> T *allocate(Args &&...args) { + template <typename T, typename... Args, typename = IsMarkdownNode<T>> + T *allocate(Args &&...args) { return new (Arena.Allocate<T>()) T(std::forward<Args>(args)...); } - llvm::StringRef intern(llvm::StringRef S) { return SSaver.save(S); } DocumentNode *getRoot() { return Root; } void setRoot(DocumentNode *R) { Root = R; } }; diff --git a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt index 688a547a7f031..935df6da8ac78 100644 --- a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt @@ -50,5 +50,6 @@ clang_target_link_libraries(ClangDocTests target_link_libraries(ClangDocTests PRIVATE clangDoc + clangDocSupport LLVMTestingSupport ) diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp index 8621a980ec3ac..1b99776e7b6eb 100644 --- a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp +++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp @@ -21,10 +21,13 @@ TEST(MarkdownNodeTest, TextNode) { } TEST(MarkdownNodeTest, FencedCodeNode) { - FencedCodeNode N("cpp", "int x = 0;\nint y = 1;\nreturn x + y;"); + FencedCodeNode N("cpp", R"(int x = 0; +int y = 1; +return x + y;)"); EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); EXPECT_EQ(N.getLang(), "cpp"); - EXPECT_EQ(N.getCode(), "int x = 0;\nint y = 1;\nreturn x + y;"); + EXPECT_TRUE(N.getCode().contains("int x = 0;")); + EXPECT_TRUE(N.getCode().contains("int y = 1;")); } TEST(MarkdownNodeTest, HeadingNode) { @@ -46,8 +49,11 @@ TEST(MarkdownNodeTest, InlineCodeNode) { TEST(MarkdownNodeTest, EmphasisNode) { EmphasisNode N; + TextNode Child("emphasized"); + N.Children.push_back(Child); EXPECT_EQ(N.Kind, NodeKind::NK_Emphasis); - EXPECT_TRUE(N.Children.empty()); + EXPECT_FALSE(N.Children.empty()); + EXPECT_EQ(llvm::cast<TextNode>(N.Children.front()).getText(), "emphasized"); } TEST(MarkdownNodeTest, UnorderedListNode) { >From 691e26d1670d8d72527c30ce8c85d03d0e3ff8e2 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Sat, 27 Jun 2026 15:39:39 -0400 Subject: [PATCH 06/10] [clang-doc] Add basic Markdown parser with fenced code and plain text support --- .../clang-doc/support/Markdown.cpp | 72 ++++++++++++++++++- .../clang-doc/support/Markdown.h | 24 +++++-- .../clang-doc/MarkdownParserTest.cpp | 39 +++++++++- 3 files changed, 122 insertions(+), 13 deletions(-) diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp index ad29ba4789ffb..2ddbc31366fce 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.cpp +++ b/clang-tools-extra/clang-doc/support/Markdown.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "Markdown.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" namespace clang::doc::markdown { @@ -86,9 +88,6 @@ void BlockNode::print(llvm::raw_ostream &OS) const { case NodeKind::NK_OrderedList: llvm::cast<OrderedListNode>(this)->print(OS); break; - case NodeKind::NK_ListItem: - llvm::cast<ListItemNode>(this)->print(OS); - break; case NodeKind::NK_BlockQuote: llvm::cast<BlockQuoteNode>(this)->print(OS); break; @@ -174,4 +173,71 @@ void DocumentNode::print(llvm::raw_ostream &OS) const { LLVM_DUMP_METHOD void DocumentNode::dump() const { print(llvm::errs()); } +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +DocumentNode *parseMarkdown(llvm::StringRef Text, ASTContext &Ctx) { + auto *Doc = Ctx.allocate<DocumentNode>(); + Ctx.setRoot(Doc); + + llvm::SmallVector<llvm::StringRef> Lines; + Text.split(Lines, '\n'); + + size_t I = 0; + while (I < Lines.size()) { + llvm::StringRef Line = Lines[I].trim(); + + if (Line.empty()) { + ++I; + continue; + } + + // Fenced code block + if (Line.starts_with("```") || Line.starts_with("~~~")) { + char Fence = Line[0]; + llvm::StringRef Lang = Line.drop_front(3).trim(); + ++I; + llvm::SmallString<256> Code; + while (I < Lines.size()) { + llvm::StringRef Trimmed = Lines[I].trim(); + if (Trimmed.size() >= 3 && Trimmed[0] == Fence && Trimmed[1] == Fence && + Trimmed[2] == Fence) { + ++I; + break; + } + if (!Code.empty()) + Code += '\n'; + Code += Lines[I]; + ++I; + } + auto *Node = Ctx.allocate<FencedCodeNode>(Lang, Ctx.internString(Code)); + Doc->Children.push_back(*Node); + continue; + } + + // Plain text paragraph + llvm::SmallString<256> ParaText; + while (I < Lines.size()) { + llvm::StringRef L = Lines[I].trim(); + if (L.empty()) + break; + if (L.starts_with("```") || L.starts_with("~~~")) + break; + if (!ParaText.empty()) + ParaText += ' '; + ParaText += L; + ++I; + } + if (!ParaText.empty()) { + auto *Para = Ctx.allocate<ParagraphNode>(); + auto *TNode = Ctx.allocate<TextNode>(Ctx.internString(ParaText)); + Para->Children.push_back(*TNode); + Doc->Children.push_back(*Para); + } + } + + return Doc; +} + } // namespace clang::doc::markdown diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h index 410f133b0e74d..40b20dd460252 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.h +++ b/clang-tools-extra/clang-doc/support/Markdown.h @@ -15,6 +15,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> #include <type_traits> namespace clang::doc::markdown { @@ -32,7 +33,6 @@ enum class NodeKind { NK_Table, NK_UnorderedList, NK_OrderedList, - NK_ListItem, NK_BlockQuote, NK_ThematicBreak, NK_Document, @@ -163,14 +163,13 @@ struct FencedCodeNode : BlockNode { }; static_assert(std::is_trivially_destructible_v<FencedCodeNode>); -struct ListItemNode : BlockNode, llvm::ilist_node<ListItemNode> { +// ListItemNode is not a BlockNode -- it only lives inside UnorderedListNode +// and OrderedListNode, never directly in a BlockList. +struct ListItemNode : llvm::ilist_node<ListItemNode> { InlineList Children; - ListItemNode() : BlockNode(NodeKind::NK_ListItem) {} + ListItemNode() = default; void print(llvm::raw_ostream &OS) const; LLVM_DUMP_METHOD void dump() const; - static bool classof(const BlockNode *N) { - return N->Kind == NodeKind::NK_ListItem; - } }; struct UnorderedListNode : BlockNode { @@ -235,7 +234,8 @@ struct DocumentNode : BlockNode { template <typename T> using IsMarkdownNode = std::enable_if_t<std::is_base_of_v<InlineNode, T> || - std::is_base_of_v<BlockNode, T>>; + std::is_base_of_v<BlockNode, T> || + std::is_same_v<T, ListItemNode>>; class ASTContext { llvm::BumpPtrAllocator Arena; @@ -249,10 +249,20 @@ class ASTContext { return new (Arena.Allocate<T>()) T(std::forward<Args>(args)...); } + llvm::StringRef internString(llvm::StringRef S) { + char *Buf = Arena.Allocate<char>(S.size()); + std::copy(S.begin(), S.end(), Buf); + return llvm::StringRef(Buf, S.size()); + } + DocumentNode *getRoot() { return Root; } void setRoot(DocumentNode *R) { Root = R; } }; +/// Parse Markdown text into a DocumentNode. The caller provides an ASTContext +/// that owns the lifetime of all allocated nodes. +DocumentNode *parseMarkdown(llvm::StringRef Text, ASTContext &Ctx); + } // namespace clang::doc::markdown #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp index 1b99776e7b6eb..d5e8ea707031b 100644 --- a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp +++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp @@ -21,9 +21,7 @@ TEST(MarkdownNodeTest, TextNode) { } TEST(MarkdownNodeTest, FencedCodeNode) { - FencedCodeNode N("cpp", R"(int x = 0; -int y = 1; -return x + y;)"); + FencedCodeNode N("cpp", "int x = 0;\nint y = 1;"); EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); EXPECT_EQ(N.getLang(), "cpp"); EXPECT_TRUE(N.getCode().contains("int x = 0;")); @@ -68,4 +66,39 @@ TEST(MarkdownNodeTest, ParagraphNode) { EXPECT_TRUE(N.Children.empty()); } +TEST(MarkdownParserTest, PlainText) { + ASTContext Ctx; + auto *Doc = parseMarkdown("hello world", Ctx); + ASSERT_NE(Doc, nullptr); + ASSERT_FALSE(Doc->Children.empty()); + auto *Para = llvm::cast<ParagraphNode>(&Doc->Children.front()); + ASSERT_FALSE(Para->Children.empty()); + EXPECT_EQ(llvm::cast<TextNode>(Para->Children.front()).getText(), + "hello world"); +} + +TEST(MarkdownParserTest, FencedCodeBlock) { + ASTContext Ctx; + auto *Doc = parseMarkdown("~~~cpp\nint x = 0;\n~~~", Ctx); + ASSERT_NE(Doc, nullptr); + ASSERT_FALSE(Doc->Children.empty()); + auto *Code = llvm::cast<FencedCodeNode>(&Doc->Children.front()); + EXPECT_EQ(Code->getLang(), "cpp"); + EXPECT_TRUE(Code->getCode().contains("int x = 0;")); +} + +TEST(MarkdownParserTest, EmptyInput) { + ASTContext Ctx; + auto *Doc = parseMarkdown("", Ctx); + ASSERT_NE(Doc, nullptr); + EXPECT_TRUE(Doc->Children.empty()); +} + +TEST(MarkdownParserTest, TextThenFencedCode) { + ASTContext Ctx; + auto *Doc = parseMarkdown("some text\n\n~~~cpp\nint x = 0;\n~~~", Ctx); + ASSERT_NE(Doc, nullptr); + EXPECT_EQ(std::distance(Doc->Children.begin(), Doc->Children.end()), 2); +} + } // namespace >From 60fbdb666b2b2882a304e86cd3d7b13671a30b8f Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Sat, 27 Jun 2026 19:21:36 -0400 Subject: [PATCH 07/10] [clang-doc] Add unordered list parsing --- .../clang-doc/support/Markdown.cpp | 25 +++++++++++++++++++ .../clang-doc/MarkdownParserTest.cpp | 19 ++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp index 2ddbc31366fce..83633d71bfe7a 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.cpp +++ b/clang-tools-extra/clang-doc/support/Markdown.cpp @@ -177,6 +177,11 @@ LLVM_DUMP_METHOD void DocumentNode::dump() const { print(llvm::errs()); } // Parser //===----------------------------------------------------------------------===// +static bool isListMarker(llvm::StringRef Line) { + return Line.starts_with("- ") || Line.starts_with("* ") || + Line.starts_with("+ "); +} + DocumentNode *parseMarkdown(llvm::StringRef Text, ASTContext &Ctx) { auto *Doc = Ctx.allocate<DocumentNode>(); Ctx.setRoot(Doc); @@ -216,6 +221,24 @@ DocumentNode *parseMarkdown(llvm::StringRef Text, ASTContext &Ctx) { continue; } + // Unordered list + if (isListMarker(Line)) { + auto *List = Ctx.allocate<UnorderedListNode>(); + while (I < Lines.size()) { + llvm::StringRef L = Lines[I].trim(); + if (!isListMarker(L)) + break; + llvm::StringRef ItemText = L.drop_front(2).trim(); + auto *Item = Ctx.allocate<ListItemNode>(); + auto *TNode = Ctx.allocate<TextNode>(Ctx.internString(ItemText)); + Item->Children.push_back(*TNode); + List->Items.push_back(*Item); + ++I; + } + Doc->Children.push_back(*List); + continue; + } + // Plain text paragraph llvm::SmallString<256> ParaText; while (I < Lines.size()) { @@ -224,6 +247,8 @@ DocumentNode *parseMarkdown(llvm::StringRef Text, ASTContext &Ctx) { break; if (L.starts_with("```") || L.starts_with("~~~")) break; + if (isListMarker(L)) + break; if (!ParaText.empty()) ParaText += ' '; ParaText += L; diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp index d5e8ea707031b..16ba5917d8860 100644 --- a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp +++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp @@ -101,4 +101,23 @@ TEST(MarkdownParserTest, TextThenFencedCode) { EXPECT_EQ(std::distance(Doc->Children.begin(), Doc->Children.end()), 2); } +TEST(MarkdownParserTest, UnorderedList) { + ASTContext Ctx; + auto *Doc = parseMarkdown("- foo\n- bar\n- baz", Ctx); + ASSERT_NE(Doc, nullptr); + auto *List = llvm::cast<UnorderedListNode>(&Doc->Children.front()); + EXPECT_EQ(std::distance(List->Items.begin(), List->Items.end()), 3); + EXPECT_EQ( + llvm::cast<TextNode>(List->Items.front().Children.front()).getText(), + "foo"); +} + +TEST(MarkdownParserTest, SingleItemList) { + ASTContext Ctx; + auto *Doc = parseMarkdown("- only item", Ctx); + ASSERT_NE(Doc, nullptr); + auto *List = llvm::cast<UnorderedListNode>(&Doc->Children.front()); + EXPECT_EQ(std::distance(List->Items.begin(), List->Items.end()), 1); +} + } // namespace >From f23f0f7bf3d680a255ba42cee0af4bcde2d18472 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Sat, 27 Jun 2026 19:24:33 -0400 Subject: [PATCH 08/10] [clang-doc] Add ATX heading parsing --- .../clang-doc/support/Markdown.cpp | 18 ++++++++++++++++++ .../unittests/clang-doc/MarkdownParserTest.cpp | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp index 83633d71bfe7a..0f4188cfa1705 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.cpp +++ b/clang-tools-extra/clang-doc/support/Markdown.cpp @@ -221,6 +221,22 @@ DocumentNode *parseMarkdown(llvm::StringRef Text, ASTContext &Ctx) { continue; } + // ATX heading: 1-6 # characters followed by a space + if (Line.starts_with("#")) { + unsigned Level = 0; + while (Level < Line.size() && Line[Level] == '#') + ++Level; + if (Level <= 6 && Level < Line.size() && Line[Level] == ' ') { + llvm::StringRef Content = Line.drop_front(Level + 1).trim(); + auto *Heading = Ctx.allocate<HeadingNode>(Level); + auto *TNode = Ctx.allocate<TextNode>(Ctx.internString(Content)); + Heading->Children.push_back(*TNode); + Doc->Children.push_back(*Heading); + ++I; + continue; + } + } + // Unordered list if (isListMarker(Line)) { auto *List = Ctx.allocate<UnorderedListNode>(); @@ -249,6 +265,8 @@ DocumentNode *parseMarkdown(llvm::StringRef Text, ASTContext &Ctx) { break; if (isListMarker(L)) break; + if (L.starts_with("#")) + break; if (!ParaText.empty()) ParaText += ' '; ParaText += L; diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp index 16ba5917d8860..04a4adb6aec6a 100644 --- a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp +++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp @@ -120,4 +120,22 @@ TEST(MarkdownParserTest, SingleItemList) { EXPECT_EQ(std::distance(List->Items.begin(), List->Items.end()), 1); } +TEST(MarkdownParserTest, Heading1) { + ASTContext Ctx; + auto *Doc = parseMarkdown("# Hello", Ctx); + ASSERT_NE(Doc, nullptr); + auto *H = llvm::cast<HeadingNode>(&Doc->Children.front()); + EXPECT_EQ(H->getLevel(), 1u); + EXPECT_EQ(llvm::cast<TextNode>(H->Children.front()).getText(), "Hello"); +} + +TEST(MarkdownParserTest, Heading3) { + ASTContext Ctx; + auto *Doc = parseMarkdown("### Section", Ctx); + ASSERT_NE(Doc, nullptr); + auto *H = llvm::cast<HeadingNode>(&Doc->Children.front()); + EXPECT_EQ(H->getLevel(), 3u); + EXPECT_EQ(llvm::cast<TextNode>(H->Children.front()).getText(), "Section"); +} + } // namespace >From 2eff3d54de36e0cf9f3e57ff1175f090faf1b94d Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Sun, 28 Jun 2026 17:42:16 -0400 Subject: [PATCH 09/10] [clang-doc] Address review feedback: virtual print, separate CMake target, docs, thematic breaks --- .../clang-doc/support/CMakeLists.txt | 8 +- .../clang-doc/support/Markdown.cpp | 99 +++++-------------- .../clang-doc/support/Markdown.h | 86 ++++++++++------ .../unittests/clang-doc/CMakeLists.txt | 2 +- 4 files changed, 84 insertions(+), 111 deletions(-) diff --git a/clang-tools-extra/clang-doc/support/CMakeLists.txt b/clang-tools-extra/clang-doc/support/CMakeLists.txt index 7dc11f07ff8b3..2ea459b8a9404 100644 --- a/clang-tools-extra/clang-doc/support/CMakeLists.txt +++ b/clang-tools-extra/clang-doc/support/CMakeLists.txt @@ -5,7 +5,13 @@ set(LLVM_LINK_COMPONENTS ) add_clang_library(clangDocSupport STATIC + PARTIAL_SOURCES_INTENDED File.cpp - Markdown.cpp Utils.cpp ) + +add_clang_library(clangDocMarkdown STATIC + PARTIAL_SOURCES_INTENDED + Markdown.cpp + ) + \ No newline at end of file diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp index 0f4188cfa1705..d8656bfb4c39d 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.cpp +++ b/clang-tools-extra/clang-doc/support/Markdown.cpp @@ -9,7 +9,6 @@ #include "Markdown.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Casting.h" namespace clang::doc::markdown { @@ -17,92 +16,32 @@ namespace clang::doc::markdown { // Inline node print/dump //===----------------------------------------------------------------------===// -void InlineNode::print(llvm::raw_ostream &OS) const { - switch (Kind) { - case NodeKind::NK_Text: - llvm::cast<TextNode>(this)->print(OS); - break; - case NodeKind::NK_InlineCode: - llvm::cast<InlineCodeNode>(this)->print(OS); - break; - case NodeKind::NK_Emphasis: - llvm::cast<EmphasisNode>(this)->print(OS); - break; - case NodeKind::NK_Strong: - llvm::cast<StrongNode>(this)->print(OS); - break; - default: - OS << "UnknownInlineNode\n"; - break; - } -} - LLVM_DUMP_METHOD void InlineNode::dump() const { print(llvm::errs()); } void TextNode::print(llvm::raw_ostream &OS) const { OS << "TextNode: " << getText() << "\n"; } -LLVM_DUMP_METHOD void TextNode::dump() const { print(llvm::errs()); } - void InlineCodeNode::print(llvm::raw_ostream &OS) const { OS << "InlineCodeNode: " << getCode() << "\n"; } -LLVM_DUMP_METHOD void InlineCodeNode::dump() const { print(llvm::errs()); } - void EmphasisNode::print(llvm::raw_ostream &OS) const { OS << "EmphasisNode\n"; for (const auto &Child : Children) Child.print(OS); } -LLVM_DUMP_METHOD void EmphasisNode::dump() const { print(llvm::errs()); } - void StrongNode::print(llvm::raw_ostream &OS) const { OS << "StrongNode\n"; for (const auto &Child : Children) Child.print(OS); } -LLVM_DUMP_METHOD void StrongNode::dump() const { print(llvm::errs()); } - //===----------------------------------------------------------------------===// // Block node print/dump //===----------------------------------------------------------------------===// -void BlockNode::print(llvm::raw_ostream &OS) const { - switch (Kind) { - case NodeKind::NK_Paragraph: - llvm::cast<ParagraphNode>(this)->print(OS); - break; - case NodeKind::NK_Heading: - llvm::cast<HeadingNode>(this)->print(OS); - break; - case NodeKind::NK_FencedCode: - llvm::cast<FencedCodeNode>(this)->print(OS); - break; - case NodeKind::NK_UnorderedList: - llvm::cast<UnorderedListNode>(this)->print(OS); - break; - case NodeKind::NK_OrderedList: - llvm::cast<OrderedListNode>(this)->print(OS); - break; - case NodeKind::NK_BlockQuote: - llvm::cast<BlockQuoteNode>(this)->print(OS); - break; - case NodeKind::NK_ThematicBreak: - llvm::cast<ThematicBreakNode>(this)->print(OS); - break; - case NodeKind::NK_Document: - llvm::cast<DocumentNode>(this)->print(OS); - break; - default: - OS << "UnknownBlockNode\n"; - break; - } -} - LLVM_DUMP_METHOD void BlockNode::dump() const { print(llvm::errs()); } void ParagraphNode::print(llvm::raw_ostream &OS) const { @@ -111,22 +50,16 @@ void ParagraphNode::print(llvm::raw_ostream &OS) const { Child.print(OS); } -LLVM_DUMP_METHOD void ParagraphNode::dump() const { print(llvm::errs()); } - void HeadingNode::print(llvm::raw_ostream &OS) const { OS << "HeadingNode: level=" << getLevel() << "\n"; for (const auto &Child : Children) Child.print(OS); } -LLVM_DUMP_METHOD void HeadingNode::dump() const { print(llvm::errs()); } - void FencedCodeNode::print(llvm::raw_ostream &OS) const { OS << "FencedCodeNode: lang=" << getLang() << "\n" << getCode() << "\n"; } -LLVM_DUMP_METHOD void FencedCodeNode::dump() const { print(llvm::errs()); } - void ListItemNode::print(llvm::raw_ostream &OS) const { OS << "ListItemNode\n"; for (const auto &Child : Children) @@ -141,38 +74,28 @@ void UnorderedListNode::print(llvm::raw_ostream &OS) const { Item.print(OS); } -LLVM_DUMP_METHOD void UnorderedListNode::dump() const { print(llvm::errs()); } - void OrderedListNode::print(llvm::raw_ostream &OS) const { OS << "OrderedListNode: start=" << getStart() << "\n"; for (const auto &Item : Items) Item.print(OS); } -LLVM_DUMP_METHOD void OrderedListNode::dump() const { print(llvm::errs()); } - void BlockQuoteNode::print(llvm::raw_ostream &OS) const { OS << "BlockQuoteNode\n"; for (const auto &Child : Children) Child.print(OS); } -LLVM_DUMP_METHOD void BlockQuoteNode::dump() const { print(llvm::errs()); } - void ThematicBreakNode::print(llvm::raw_ostream &OS) const { OS << "ThematicBreakNode\n"; } -LLVM_DUMP_METHOD void ThematicBreakNode::dump() const { print(llvm::errs()); } - void DocumentNode::print(llvm::raw_ostream &OS) const { OS << "DocumentNode\n"; for (const auto &Child : Children) Child.print(OS); } -LLVM_DUMP_METHOD void DocumentNode::dump() const { print(llvm::errs()); } - //===----------------------------------------------------------------------===// // Parser //===----------------------------------------------------------------------===// @@ -182,6 +105,18 @@ static bool isListMarker(llvm::StringRef Line) { Line.starts_with("+ "); } +static bool isThematicBreak(llvm::StringRef Line) { + if (Line.empty()) + return false; + char Marker = Line[0]; + if (Marker != '-' && Marker != '*' && Marker != '_') + return false; + llvm::SmallString<3> Allowed = {Marker, ' '}; + if (Line.find_first_not_of(llvm::StringRef(Allowed)) != llvm::StringRef::npos) + return false; + return Line.count(Marker) >= 3; +} + DocumentNode *parseMarkdown(llvm::StringRef Text, ASTContext &Ctx) { auto *Doc = Ctx.allocate<DocumentNode>(); Ctx.setRoot(Doc); @@ -237,6 +172,14 @@ DocumentNode *parseMarkdown(llvm::StringRef Text, ASTContext &Ctx) { } } + // Thematic break: 3+ of -, *, or _ optionally separated by spaces + if (isThematicBreak(Line)) { + auto *Node = Ctx.allocate<ThematicBreakNode>(); + Doc->Children.push_back(*Node); + ++I; + continue; + } + // Unordered list if (isListMarker(Line)) { auto *List = Ctx.allocate<UnorderedListNode>(); @@ -267,6 +210,8 @@ DocumentNode *parseMarkdown(llvm::StringRef Text, ASTContext &Ctx) { break; if (L.starts_with("#")) break; + if (isThematicBreak(L)) + break; if (!ParaText.empty()) ParaText += ' '; ParaText += L; diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h index 40b20dd460252..3037f75068e19 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.h +++ b/clang-tools-extra/clang-doc/support/Markdown.h @@ -5,6 +5,18 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +/// +/// \file +/// Defines the Markdown AST node hierarchy for the clang-doc Markdown parser. +/// +/// Block nodes represent structural constructs (paragraphs, headings, lists, +/// fenced code blocks, etc). Inline nodes represent span-level content (text, +/// emphasis, inline code) that appears inside block nodes. +/// +/// All nodes are arena-allocated via ASTContext, which owns their lifetime. +/// The parser builds the tree by calling push_back() on simple_ilist members. +/// +//===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H #define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H @@ -42,18 +54,22 @@ struct InlineNode; struct BlockNode; //===----------------------------------------------------------------------===// -// Inline nodes +// Inline nodes -- span-level content inside block nodes //===----------------------------------------------------------------------===// +/// Base class for all inline nodes. Inline nodes represent span-level content +/// such as text, emphasis, and inline code. They live in InlineList members +/// of block nodes. struct InlineNode : llvm::ilist_node<InlineNode> { NodeKind Kind; explicit InlineNode(NodeKind K) : Kind(K) {} - void print(llvm::raw_ostream &OS) const; + virtual void print(llvm::raw_ostream &OS) const = 0; LLVM_DUMP_METHOD void dump() const; }; using InlineList = llvm::simple_ilist<InlineNode>; +/// A plain text run. struct TextNode : InlineNode { private: llvm::StringRef Text; @@ -62,14 +78,14 @@ struct TextNode : InlineNode { explicit TextNode(llvm::StringRef T) : InlineNode(NodeKind::NK_Text), Text(T) {} llvm::StringRef getText() const { return Text; } - void print(llvm::raw_ostream &OS) const; - LLVM_DUMP_METHOD void dump() const; + void print(llvm::raw_ostream &OS) const override; static bool classof(const InlineNode *N) { return N->Kind == NodeKind::NK_Text; } }; static_assert(std::is_trivially_destructible_v<TextNode>); +/// A backtick-delimited inline code span. struct InlineCodeNode : InlineNode { private: llvm::StringRef Code; @@ -78,57 +94,60 @@ struct InlineCodeNode : InlineNode { explicit InlineCodeNode(llvm::StringRef C) : InlineNode(NodeKind::NK_InlineCode), Code(C) {} llvm::StringRef getCode() const { return Code; } - void print(llvm::raw_ostream &OS) const; - LLVM_DUMP_METHOD void dump() const; + void print(llvm::raw_ostream &OS) const override; static bool classof(const InlineNode *N) { return N->Kind == NodeKind::NK_InlineCode; } }; static_assert(std::is_trivially_destructible_v<InlineCodeNode>); +/// An emphasis span (* or _). struct EmphasisNode : InlineNode { InlineList Children; EmphasisNode() : InlineNode(NodeKind::NK_Emphasis) {} - void print(llvm::raw_ostream &OS) const; - LLVM_DUMP_METHOD void dump() const; + void print(llvm::raw_ostream &OS) const override; static bool classof(const InlineNode *N) { return N->Kind == NodeKind::NK_Emphasis; } }; +/// A strong emphasis span (** or __). struct StrongNode : InlineNode { InlineList Children; StrongNode() : InlineNode(NodeKind::NK_Strong) {} - void print(llvm::raw_ostream &OS) const; - LLVM_DUMP_METHOD void dump() const; + void print(llvm::raw_ostream &OS) const override; static bool classof(const InlineNode *N) { return N->Kind == NodeKind::NK_Strong; } }; //===----------------------------------------------------------------------===// -// Block nodes +// Block nodes -- structural constructs //===----------------------------------------------------------------------===// +/// Base class for all block nodes. Block nodes represent structural constructs +/// such as paragraphs, headings, and lists. They live in BlockList members of +/// container block nodes. struct BlockNode : llvm::ilist_node<BlockNode> { NodeKind Kind; explicit BlockNode(NodeKind K) : Kind(K) {} - void print(llvm::raw_ostream &OS) const; + virtual void print(llvm::raw_ostream &OS) const = 0; LLVM_DUMP_METHOD void dump() const; }; using BlockList = llvm::simple_ilist<BlockNode>; +/// A paragraph of inline content. struct ParagraphNode : BlockNode { InlineList Children; ParagraphNode() : BlockNode(NodeKind::NK_Paragraph) {} - void print(llvm::raw_ostream &OS) const; - LLVM_DUMP_METHOD void dump() const; + void print(llvm::raw_ostream &OS) const override; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_Paragraph; } }; +/// An ATX heading (# through ######). struct HeadingNode : BlockNode { private: unsigned Level; @@ -138,13 +157,13 @@ struct HeadingNode : BlockNode { explicit HeadingNode(unsigned L) : BlockNode(NodeKind::NK_Heading), Level(L) {} unsigned getLevel() const { return Level; } - void print(llvm::raw_ostream &OS) const; - LLVM_DUMP_METHOD void dump() const; + void print(llvm::raw_ostream &OS) const override; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_Heading; } }; +/// A fenced code block (``` or ~~~). Lang holds the info string. struct FencedCodeNode : BlockNode { private: llvm::StringRef Lang; @@ -155,16 +174,15 @@ struct FencedCodeNode : BlockNode { : BlockNode(NodeKind::NK_FencedCode), Lang(L), Code(C) {} llvm::StringRef getLang() const { return Lang; } llvm::StringRef getCode() const { return Code; } - void print(llvm::raw_ostream &OS) const; - LLVM_DUMP_METHOD void dump() const; + void print(llvm::raw_ostream &OS) const override; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_FencedCode; } }; static_assert(std::is_trivially_destructible_v<FencedCodeNode>); -// ListItemNode is not a BlockNode -- it only lives inside UnorderedListNode -// and OrderedListNode, never directly in a BlockList. +/// A single item in an unordered or ordered list. +/// ListItemNode is not a BlockNode -- it only lives inside list nodes. struct ListItemNode : llvm::ilist_node<ListItemNode> { InlineList Children; ListItemNode() = default; @@ -172,16 +190,17 @@ struct ListItemNode : llvm::ilist_node<ListItemNode> { LLVM_DUMP_METHOD void dump() const; }; +/// An unordered list (-, *, or + markers). struct UnorderedListNode : BlockNode { llvm::simple_ilist<ListItemNode> Items; UnorderedListNode() : BlockNode(NodeKind::NK_UnorderedList) {} - void print(llvm::raw_ostream &OS) const; - LLVM_DUMP_METHOD void dump() const; + void print(llvm::raw_ostream &OS) const override; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_UnorderedList; } }; +/// An ordered list (1. 2. 3. markers). Start holds the first item number. struct OrderedListNode : BlockNode { private: unsigned Start; @@ -191,45 +210,45 @@ struct OrderedListNode : BlockNode { explicit OrderedListNode(unsigned S = 1) : BlockNode(NodeKind::NK_OrderedList), Start(S) {} unsigned getStart() const { return Start; } - void print(llvm::raw_ostream &OS) const; - LLVM_DUMP_METHOD void dump() const; + void print(llvm::raw_ostream &OS) const override; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_OrderedList; } }; +/// A block quote (> marker). struct BlockQuoteNode : BlockNode { BlockList Children; BlockQuoteNode() : BlockNode(NodeKind::NK_BlockQuote) {} - void print(llvm::raw_ostream &OS) const; - LLVM_DUMP_METHOD void dump() const; + void print(llvm::raw_ostream &OS) const override; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_BlockQuote; } }; +/// A thematic break (---, ***, or ___). struct ThematicBreakNode : BlockNode { ThematicBreakNode() : BlockNode(NodeKind::NK_ThematicBreak) {} - void print(llvm::raw_ostream &OS) const; - LLVM_DUMP_METHOD void dump() const; + void print(llvm::raw_ostream &OS) const override; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_ThematicBreak; } }; +/// The root document node. Contains all top-level block nodes. +// FIXME: add constructor that accepts children -- will be addressed in a +// follow-up patch before parser work proceeds. struct DocumentNode : BlockNode { - // FIXME: add constructor that accepts children once parser is in place BlockList Children; DocumentNode() : BlockNode(NodeKind::NK_Document) {} - void print(llvm::raw_ostream &OS) const; - LLVM_DUMP_METHOD void dump() const; + void print(llvm::raw_ostream &OS) const override; static bool classof(const BlockNode *N) { return N->Kind == NodeKind::NK_Document; } }; //===----------------------------------------------------------------------===// -// ASTContext - owns the arena +// ASTContext -- owns the arena and all allocated nodes //===----------------------------------------------------------------------===// template <typename T> @@ -237,6 +256,8 @@ using IsMarkdownNode = std::enable_if_t<std::is_base_of_v<InlineNode, T> || std::is_base_of_v<BlockNode, T> || std::is_same_v<T, ListItemNode>>; +/// Owns the bump pointer arena for all Markdown AST nodes. Nodes allocated +/// via allocate() have their lifetime tied to the ASTContext. class ASTContext { llvm::BumpPtrAllocator Arena; DocumentNode *Root = nullptr; @@ -249,6 +270,7 @@ class ASTContext { return new (Arena.Allocate<T>()) T(std::forward<Args>(args)...); } + /// Copy a string into the arena so its lifetime matches the ASTContext. llvm::StringRef internString(llvm::StringRef S) { char *Buf = Arena.Allocate<char>(S.size()); std::copy(S.begin(), S.end(), Buf); diff --git a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt index 935df6da8ac78..97a26f0e24d09 100644 --- a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt @@ -50,6 +50,6 @@ clang_target_link_libraries(ClangDocTests target_link_libraries(ClangDocTests PRIVATE clangDoc - clangDocSupport + clangDocMarkdown LLVMTestingSupport ) >From 1cc7178131be03695c5878031099e0324d69d91e Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Sun, 28 Jun 2026 20:15:40 -0400 Subject: [PATCH 10/10] [clang-doc] Fix virtual destructor warnings, add thematic break tests, add utility includes --- .../clang-doc/support/Markdown.cpp | 21 +++++++++++-------- .../clang-doc/support/Markdown.h | 11 +++++----- .../clang-doc/MarkdownParserTest.cpp | 14 +++++++++++++ 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp index d8656bfb4c39d..6fe76db6cf000 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.cpp +++ b/clang-tools-extra/clang-doc/support/Markdown.cpp @@ -111,7 +111,9 @@ static bool isThematicBreak(llvm::StringRef Line) { char Marker = Line[0]; if (Marker != '-' && Marker != '*' && Marker != '_') return false; - llvm::SmallString<3> Allowed = {Marker, ' '}; + llvm::SmallString<8> Allowed; + Allowed += Marker; + Allowed += ' '; if (Line.find_first_not_of(llvm::StringRef(Allowed)) != llvm::StringRef::npos) return false; return Line.count(Marker) >= 3; @@ -133,6 +135,15 @@ DocumentNode *parseMarkdown(llvm::StringRef Text, ASTContext &Ctx) { continue; } + // Thematic break must come before list and fenced code checks since + // "---" and "- - -" would otherwise match those patterns first. + if (isThematicBreak(Line)) { + auto *Node = Ctx.allocate<ThematicBreakNode>(); + Doc->Children.push_back(*Node); + ++I; + continue; + } + // Fenced code block if (Line.starts_with("```") || Line.starts_with("~~~")) { char Fence = Line[0]; @@ -172,14 +183,6 @@ DocumentNode *parseMarkdown(llvm::StringRef Text, ASTContext &Ctx) { } } - // Thematic break: 3+ of -, *, or _ optionally separated by spaces - if (isThematicBreak(Line)) { - auto *Node = Ctx.allocate<ThematicBreakNode>(); - Doc->Children.push_back(*Node); - ++I; - continue; - } - // Unordered list if (isListMarker(Line)) { auto *List = Ctx.allocate<UnorderedListNode>(); diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h index 3037f75068e19..5f56664c4c24e 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.h +++ b/clang-tools-extra/clang-doc/support/Markdown.h @@ -29,6 +29,7 @@ #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <type_traits> +#include <utility> namespace clang::doc::markdown { @@ -42,7 +43,7 @@ enum class NodeKind { NK_Paragraph, NK_Heading, NK_FencedCode, - NK_Table, + NK_Table, // TODO: add TableNode NK_UnorderedList, NK_OrderedList, NK_BlockQuote, @@ -63,6 +64,7 @@ struct BlockNode; struct InlineNode : llvm::ilist_node<InlineNode> { NodeKind Kind; explicit InlineNode(NodeKind K) : Kind(K) {} + virtual ~InlineNode() = default; virtual void print(llvm::raw_ostream &OS) const = 0; LLVM_DUMP_METHOD void dump() const; }; @@ -83,7 +85,6 @@ struct TextNode : InlineNode { return N->Kind == NodeKind::NK_Text; } }; -static_assert(std::is_trivially_destructible_v<TextNode>); /// A backtick-delimited inline code span. struct InlineCodeNode : InlineNode { @@ -99,7 +100,6 @@ struct InlineCodeNode : InlineNode { return N->Kind == NodeKind::NK_InlineCode; } }; -static_assert(std::is_trivially_destructible_v<InlineCodeNode>); /// An emphasis span (* or _). struct EmphasisNode : InlineNode { @@ -131,6 +131,7 @@ struct StrongNode : InlineNode { struct BlockNode : llvm::ilist_node<BlockNode> { NodeKind Kind; explicit BlockNode(NodeKind K) : Kind(K) {} + virtual ~BlockNode() = default; virtual void print(llvm::raw_ostream &OS) const = 0; LLVM_DUMP_METHOD void dump() const; }; @@ -179,7 +180,6 @@ struct FencedCodeNode : BlockNode { return N->Kind == NodeKind::NK_FencedCode; } }; -static_assert(std::is_trivially_destructible_v<FencedCodeNode>); /// A single item in an unordered or ordered list. /// ListItemNode is not a BlockNode -- it only lives inside list nodes. @@ -236,8 +236,7 @@ struct ThematicBreakNode : BlockNode { }; /// The root document node. Contains all top-level block nodes. -// FIXME: add constructor that accepts children -- will be addressed in a -// follow-up patch before parser work proceeds. +/// Children are added by the parser via push_back on the Children ilist. struct DocumentNode : BlockNode { BlockList Children; DocumentNode() : BlockNode(NodeKind::NK_Document) {} diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp index 04a4adb6aec6a..98d9dd7a7f5e8 100644 --- a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp +++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp @@ -138,4 +138,18 @@ TEST(MarkdownParserTest, Heading3) { EXPECT_EQ(llvm::cast<TextNode>(H->Children.front()).getText(), "Section"); } +TEST(MarkdownParserTest, ThematicBreakDashes) { + ASTContext Ctx; + auto *Doc = parseMarkdown("---", Ctx); + ASSERT_NE(Doc, nullptr); + EXPECT_EQ(Doc->Children.front().Kind, NodeKind::NK_ThematicBreak); +} + +TEST(MarkdownParserTest, ThematicBreakSpaced) { + ASTContext Ctx; + auto *Doc = parseMarkdown("- - -", Ctx); + ASSERT_NE(Doc, nullptr); + EXPECT_EQ(Doc->Children.front().Kind, NodeKind::NK_ThematicBreak); +} + } // namespace _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
