https://github.com/Neil-N4 updated https://github.com/llvm/llvm-project/pull/202991
>From b4623400ed04066b222882d46bbddf1819ffca9e Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Wed, 10 Jun 2026 09:51:48 -0400 Subject: [PATCH 1/3] [clang-doc] Add standalone Markdown parsing library --- .../clang-doc/support/CMakeLists.txt | 3 +- .../clang-doc/support/Markdown.cpp | 145 ++++++++++++++++++ .../clang-doc/support/Markdown.h | 72 +++++++++ .../unittests/clang-doc/CMakeLists.txt | 4 +- .../clang-doc/MarkdownParserTest.cpp | 94 ++++++++++++ 5 files changed, 316 insertions(+), 2 deletions(-) create mode 100644 clang-tools-extra/clang-doc/support/Markdown.cpp create mode 100644 clang-tools-extra/clang-doc/support/Markdown.h create mode 100644 clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp diff --git a/clang-tools-extra/clang-doc/support/CMakeLists.txt b/clang-tools-extra/clang-doc/support/CMakeLists.txt index 8ac913ffbe998..acff865190ff9 100644 --- a/clang-tools-extra/clang-doc/support/CMakeLists.txt +++ b/clang-tools-extra/clang-doc/support/CMakeLists.txt @@ -6,5 +6,6 @@ set(LLVM_LINK_COMPONENTS add_clang_library(clangDocSupport STATIC File.cpp + Markdown.cpp Utils.cpp - ) + ) \ No newline at end of file diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp new file mode 100644 index 0000000000000..776150b939d27 --- /dev/null +++ b/clang-tools-extra/clang-doc/support/Markdown.cpp @@ -0,0 +1,145 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Markdown.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/DebugLog.h" + +#define DEBUG_TYPE "clang-doc-markdown" + +using namespace llvm; + +namespace clang::doc::markdown { + +static MDNode makeText(StringRef S) { + return {NodeKind::NK_Text, S, {}}; +} + +// A line is a table separator if it only contains |, -, :, and spaces, +// and has at least one -. +static bool isSepRow(StringRef Line) { + return Line.contains('-') && + Line.find_first_not_of("|-: ") == StringRef::npos; +} + +// Returns true if Line begins with a bullet list marker (-, *, or +) +// followed by a space. +static bool isListItem(StringRef Line) { + return Line.starts_with("- ") || Line.starts_with("* ") || + Line.starts_with("+ "); +} + +static ArrayRef<MDNode> allocateNodes(const SmallVectorImpl<MDNode> &Nodes, + BumpPtrAllocator &Arena) { + if (Nodes.empty()) + return {}; + MDNode *Allocated = Arena.Allocate<MDNode>(Nodes.size()); + std::uninitialized_copy(Nodes.begin(), Nodes.end(), Allocated); + return ArrayRef<MDNode>(Allocated, Nodes.size()); +} + +ArrayRef<MDNode> parseMarkdown(StringRef ParagraphText, + BumpPtrAllocator &Arena) { + if (ParagraphText.trim().empty()) + return {}; + + SmallVector<StringRef, 16> Lines; + ParagraphText.split(Lines, '\n'); + + SmallVector<MDNode> Nodes; + size_t I = 0, E = Lines.size(); + + while (I < E) { + StringRef Line = Lines[I].trim(); + + if (Line.empty()) { + ++I; + continue; + } + + // TODO: Follow CommonMark spec §4.5 more closely -- opening fences may be + // indented up to 3 spaces, the closing fence must use the same character + // and be at least as long as the opening fence, and the closing fence may + // only be followed by spaces. Doxygen specifics should be handled on a + // case-by-case basis. + if (Line.starts_with("```") || Line.starts_with("~~~")) { + char Fence = Line[0]; + StringRef Lang = Line.drop_front(3).trim(); + SmallVector<MDNode> CodeLines; + ++I; + while (I < E) { + StringRef CodeLine = Lines[I].trim(); + if (CodeLine.size() >= 3 && + all_of(CodeLine.take_front(3), + [Fence](char C) { return C == Fence; })) + break; + CodeLines.push_back(makeText(Lines[I])); + ++I; + } + ++I; // skip closing fence + MDNode Code; + Code.Kind = NodeKind::NK_FencedCode; + Code.Content = Lang; + Code.Children = allocateNodes(CodeLines, Arena); + LDBG() << "emitting NK_FencedCode lang='" << Lang + << "' lines=" << CodeLines.size(); + Nodes.push_back(Code); + continue; + } + + // Pipe table: current line has | and next line is a separator row. + if (Line.contains('|') && I + 1 < E && isSepRow(Lines[I + 1].trim())) { + SmallVector<MDNode> Rows; + while (I < E && Lines[I].trim().contains('|')) { + Rows.push_back(makeText(Lines[I].trim())); + ++I; + } + MDNode Table; + Table.Kind = NodeKind::NK_Table; + Table.Content = {}; + Table.Children = allocateNodes(Rows, Arena); + LDBG() << "emitting NK_Table rows=" << Rows.size(); + Nodes.push_back(Table); + continue; + } + + // Unordered list item. + if (isListItem(Line)) { + SmallVector<MDNode> Items; + while (I < E) { + StringRef L = Lines[I].trim(); + if (!isListItem(L)) + break; + MDNode Item; + Item.Kind = NodeKind::NK_ListItem; + Item.Content = L.drop_front(2).trim(); + Item.Children = {}; + Items.push_back(Item); + ++I; + } + MDNode List; + List.Kind = NodeKind::NK_UnorderedList; + List.Content = {}; + List.Children = allocateNodes(Items, Arena); + LDBG() << "emitting NK_UnorderedList items=" << Items.size(); + Nodes.push_back(List); + continue; + } + + // Plain text fallback. + Nodes.push_back(makeText(Line)); + ++I; + } + + LDBG() << "parseMarkdown done nodes=" << Nodes.size(); + return allocateNodes(Nodes, Arena); +} + +} // namespace clang::doc::markdown \ No newline at end of file diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h new file mode 100644 index 0000000000000..890f764f937b1 --- /dev/null +++ b/clang-tools-extra/clang-doc/support/Markdown.h @@ -0,0 +1,72 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines a standalone Markdown parsing library for the LLVM +/// ecosystem. The parser takes plain text and returns a tree of typed nodes +/// with no knowledge of comments, Doxygen, or Clang-Doc internals. +/// +/// This is a simple Markdown parser for use inside Clang-Doc's comment +/// pipeline. You give it a paragraph of text and an arena allocator, and it +/// gives back a list of typed nodes describing the Markdown structure it found. +/// +/// The main entry point is parseMarkdown(). If the text has no Markdown in it, +/// you get back an empty list and can fall back to plain-text output. If it +/// does, you get a tree of MDNode structs where each node has a kind, optional +/// content (like the language tag on a code fence), and optional children. +/// +/// All nodes are allocated in the arena you pass in. You own the arena and are +/// responsible for keeping it alive as long as you use the nodes. +/// +/// The parser handles fenced code blocks, pipe tables, and unordered lists. +/// Anything it does not recognize comes back as a plain text node. It will +/// never crash on bad input. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" + +namespace clang::doc::markdown { + +enum class NodeKind { + // Block nodes + NK_Paragraph, + NK_FencedCode, + NK_Table, + NK_UnorderedList, + NK_OrderedList, + NK_ListItem, + NK_ThematicBreak, + // Inline nodes + NK_Text, + NK_InlineCode, + NK_Emphasis, + NK_Strong, + NK_SoftBreak, +}; + +struct MDNode { + NodeKind Kind; + llvm::StringRef Content; // lang tag for FencedCode, leaf text for Text + llvm::ArrayRef<MDNode> Children; // arena allocated +}; + +/// Parses Markdown from a single comment paragraph's text. +/// Returns an empty ArrayRef if no Markdown constructs are found, +/// so generators can fall back to plain-text rendering at zero cost. +llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, + llvm::BumpPtrAllocator &Arena); + +} // namespace clang::doc::markdown + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H \ No newline at end of file diff --git a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt index 01b34ec9a791e..b74207ac88fa7 100644 --- a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt @@ -26,6 +26,7 @@ add_extra_unittest(ClangDocTests ClangDocTest.cpp GeneratorTest.cpp HTMLGeneratorTest.cpp + MarkdownParserTest.cpp MDGeneratorTest.cpp MergeTest.cpp SerializeTest.cpp @@ -49,5 +50,6 @@ clang_target_link_libraries(ClangDocTests target_link_libraries(ClangDocTests PRIVATE clangDoc + clangDocSupport LLVMTestingSupport - ) + ) \ No newline at end of file diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp new file mode 100644 index 0000000000000..8df5efc7f1d5f --- /dev/null +++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp @@ -0,0 +1,94 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "support/Markdown.h" +#include "llvm/Support/Allocator.h" +#include "gtest/gtest.h" + +using namespace clang::doc::markdown; + +namespace { + +TEST(MarkdownParserTest, EmptyInput) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("", Arena); + EXPECT_TRUE(Nodes.empty()); +} + +TEST(MarkdownParserTest, WhitespaceOnlyInput) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown(" \n \n", Arena); + EXPECT_TRUE(Nodes.empty()); +} + +TEST(MarkdownParserTest, PlainText) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("hello world", Arena); + ASSERT_EQ(Nodes.size(), 1u); + EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_Text); + EXPECT_EQ(Nodes[0].Content, "hello world"); +} + +TEST(MarkdownParserTest, FencedCodeBlock) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("```cpp\nint x = 0;\n```", Arena); + ASSERT_EQ(Nodes.size(), 1u); + EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_FencedCode); + EXPECT_EQ(Nodes[0].Content, "cpp"); + ASSERT_EQ(Nodes[0].Children.size(), 1u); +} + +TEST(MarkdownParserTest, FencedCodeBlockNoLang) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("```\nsome code\n```", Arena); + ASSERT_EQ(Nodes.size(), 1u); + EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_FencedCode); + EXPECT_TRUE(Nodes[0].Content.empty()); +} + +TEST(MarkdownParserTest, UnterminatedFenceReturnsEmpty) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("```cpp\nint x = 0;", Arena); + // Unterminated fence should not crash and should produce a code node + // with whatever lines were found. + EXPECT_FALSE(Nodes.empty()); +} + +TEST(MarkdownParserTest, PipeTable) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("| A | B |\n|---|---|\n| 1 | 2 |", Arena); + ASSERT_EQ(Nodes.size(), 1u); + EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_Table); +} + +TEST(MarkdownParserTest, PipeCharacterWithoutSepRowIsPlainText) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("a | b\nc | d", Arena); + // No separator row so should not be parsed as a table + for (const auto &Node : Nodes) + EXPECT_NE(Node.Kind, NodeKind::NK_Table); +} + +TEST(MarkdownParserTest, UnorderedList) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("- foo\n- bar\n- baz", Arena); + ASSERT_EQ(Nodes.size(), 1u); + EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_UnorderedList); + ASSERT_EQ(Nodes[0].Children.size(), 3u); + EXPECT_EQ(Nodes[0].Children[0].Content, "foo"); + EXPECT_EQ(Nodes[0].Children[1].Content, "bar"); + EXPECT_EQ(Nodes[0].Children[2].Content, "baz"); +} + +TEST(MarkdownParserTest, MixedContent) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("some text\n```\ncode\n```\n- item", Arena); + EXPECT_EQ(Nodes.size(), 3u); +} + +} // namespace \ No newline at end of file >From f4cb4a28630e0f91289bfd4416c59114c5654ff7 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Wed, 10 Jun 2026 11:35:54 -0400 Subject: [PATCH 2/3] [clang-doc] Address review feedback: test fixture, raw strings, DEBUG_TYPE, EOF newlines --- .../clang-doc/support/Markdown.cpp | 4 +- .../clang-doc/support/Markdown.h | 2 +- .../clang-doc/MarkdownParserTest.cpp | 97 +++++++++++-------- 3 files changed, 61 insertions(+), 42 deletions(-) diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp index 776150b939d27..9e008abf8b08d 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.cpp +++ b/clang-tools-extra/clang-doc/support/Markdown.cpp @@ -12,7 +12,7 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/DebugLog.h" -#define DEBUG_TYPE "clang-doc-markdown" +#define DEBUG_TYPE "clang-doc" using namespace llvm; @@ -142,4 +142,4 @@ ArrayRef<MDNode> parseMarkdown(StringRef ParagraphText, return allocateNodes(Nodes, Arena); } -} // namespace clang::doc::markdown \ No newline at end of file +} // namespace clang::doc::markdown diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h index 890f764f937b1..09b79cc8f2437 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.h +++ b/clang-tools-extra/clang-doc/support/Markdown.h @@ -69,4 +69,4 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, } // namespace clang::doc::markdown -#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H \ No newline at end of file +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp index 8df5efc7f1d5f..ff9bad88da136 100644 --- a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp +++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp @@ -14,80 +14,99 @@ using namespace clang::doc::markdown; namespace { -TEST(MarkdownParserTest, EmptyInput) { +struct MarkdownParserTest : public ::testing::Test { llvm::BumpPtrAllocator Arena; +}; + +TEST_F(MarkdownParserTest, EmptyInput) { auto Nodes = parseMarkdown("", Arena); EXPECT_TRUE(Nodes.empty()); } -TEST(MarkdownParserTest, WhitespaceOnlyInput) { - llvm::BumpPtrAllocator Arena; +TEST_F(MarkdownParserTest, WhitespaceOnlyInput) { auto Nodes = parseMarkdown(" \n \n", Arena); EXPECT_TRUE(Nodes.empty()); } -TEST(MarkdownParserTest, PlainText) { - llvm::BumpPtrAllocator Arena; +TEST_F(MarkdownParserTest, PlainText) { auto Nodes = parseMarkdown("hello world", Arena); ASSERT_EQ(Nodes.size(), 1u); - EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_Text); - EXPECT_EQ(Nodes[0].Content, "hello world"); + const auto &N = Nodes[0]; + EXPECT_EQ(N.Kind, NodeKind::NK_Text); + EXPECT_EQ(N.Content, "hello world"); } -TEST(MarkdownParserTest, FencedCodeBlock) { - llvm::BumpPtrAllocator Arena; - auto Nodes = parseMarkdown("```cpp\nint x = 0;\n```", Arena); +TEST_F(MarkdownParserTest, FencedCodeBlock) { + auto Nodes = parseMarkdown(R"(```cpp +int x = 0; +````)", + Arena); ASSERT_EQ(Nodes.size(), 1u); - EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_FencedCode); - EXPECT_EQ(Nodes[0].Content, "cpp"); - ASSERT_EQ(Nodes[0].Children.size(), 1u); + const auto &N = Nodes[0]; + EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); + EXPECT_EQ(N.Content, "cpp"); + ASSERT_EQ(N.Children.size(), 1u); } -TEST(MarkdownParserTest, FencedCodeBlockNoLang) { - llvm::BumpPtrAllocator Arena; - auto Nodes = parseMarkdown("```\nsome code\n```", Arena); +TEST_F(MarkdownParserTest, FencedCodeBlockNoLang) { + auto Nodes = parseMarkdown(R"(``` +some code +```)", + Arena); ASSERT_EQ(Nodes.size(), 1u); - EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_FencedCode); - EXPECT_TRUE(Nodes[0].Content.empty()); + const auto &N = Nodes[0]; + EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); + EXPECT_TRUE(N.Content.empty()); } -TEST(MarkdownParserTest, UnterminatedFenceReturnsEmpty) { - llvm::BumpPtrAllocator Arena; - auto Nodes = parseMarkdown("```cpp\nint x = 0;", Arena); +TEST_F(MarkdownParserTest, UnterminatedFenceReturnsEmpty) { + auto Nodes = parseMarkdown(R"(```cpp +int x = 0;)", + Arena); // Unterminated fence should not crash and should produce a code node // with whatever lines were found. EXPECT_FALSE(Nodes.empty()); } -TEST(MarkdownParserTest, PipeTable) { - llvm::BumpPtrAllocator Arena; - auto Nodes = parseMarkdown("| A | B |\n|---|---|\n| 1 | 2 |", Arena); +TEST_F(MarkdownParserTest, PipeTable) { + auto Nodes = parseMarkdown(R"(| A | B | +|---|---| +| 1 | 2 |)", + Arena); ASSERT_EQ(Nodes.size(), 1u); EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_Table); } -TEST(MarkdownParserTest, PipeCharacterWithoutSepRowIsPlainText) { - llvm::BumpPtrAllocator Arena; - auto Nodes = parseMarkdown("a | b\nc | d", Arena); - // No separator row so should not be parsed as a table +TEST_F(MarkdownParserTest, PipeCharacterWithoutSepRowIsPlainText) { + auto Nodes = parseMarkdown(R"(a | b +c | d)", + Arena); + // No separator row so should not be parsed as a table. for (const auto &Node : Nodes) EXPECT_NE(Node.Kind, NodeKind::NK_Table); } -TEST(MarkdownParserTest, UnorderedList) { - llvm::BumpPtrAllocator Arena; - auto Nodes = parseMarkdown("- foo\n- bar\n- baz", Arena); +TEST_F(MarkdownParserTest, UnorderedList) { + auto Nodes = parseMarkdown(R"(- foo +- bar +- baz)", + Arena); ASSERT_EQ(Nodes.size(), 1u); - EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_UnorderedList); - ASSERT_EQ(Nodes[0].Children.size(), 3u); - EXPECT_EQ(Nodes[0].Children[0].Content, "foo"); - EXPECT_EQ(Nodes[0].Children[1].Content, "bar"); - EXPECT_EQ(Nodes[0].Children[2].Content, "baz"); + const auto &N = Nodes[0]; + EXPECT_EQ(N.Kind, NodeKind::NK_UnorderedList); + ASSERT_EQ(N.Children.size(), 3u); + EXPECT_EQ(N.Children[0].Content, "foo"); + EXPECT_EQ(N.Children[1].Content, "bar"); + EXPECT_EQ(N.Children[2].Content, "baz"); } -TEST(MarkdownParserTest, MixedContent) { - llvm::BumpPtrAllocator Arena; - auto Nodes = parseMarkdown("some text\n```\ncode\n```\n- item", Arena); +TEST_F(MarkdownParserTest, MixedContent) { + auto Nodes = parseMarkdown(R"(some text +``` +code +```` +- item)", + Arena); EXPECT_EQ(Nodes.size(), 3u); } >From 3ef8f62edab311caff0907ab2b9a0c3aaeb14353 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Wed, 10 Jun 2026 13:45:44 -0400 Subject: [PATCH 3/3] [clang-doc] Add CommonMark spec tests for fenced code blocks --- .../clang-doc/MarkdownParserTest.cpp | 112 +++++++++++++++++- 1 file changed, 108 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp index ff9bad88da136..4ca979c1f1d24 100644 --- a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp +++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp @@ -39,7 +39,7 @@ TEST_F(MarkdownParserTest, PlainText) { TEST_F(MarkdownParserTest, FencedCodeBlock) { auto Nodes = parseMarkdown(R"(```cpp int x = 0; -````)", +````````)", Arena); ASSERT_EQ(Nodes.size(), 1u); const auto &N = Nodes[0]; @@ -51,7 +51,7 @@ int x = 0; TEST_F(MarkdownParserTest, FencedCodeBlockNoLang) { auto Nodes = parseMarkdown(R"(``` some code -```)", +```````)", Arena); ASSERT_EQ(Nodes.size(), 1u); const auto &N = Nodes[0]; @@ -102,12 +102,116 @@ TEST_F(MarkdownParserTest, UnorderedList) { TEST_F(MarkdownParserTest, MixedContent) { auto Nodes = parseMarkdown(R"(some text -``` +``````` code -```` +```````` - item)", Arena); EXPECT_EQ(Nodes.size(), 3u); } +// CommonMark §4.5 example 120: tilde fences work the same as backtick fences. +TEST_F(MarkdownParserTest, TildeFence) { + auto Nodes = parseMarkdown(R"(~~~ +int x = 0; +~~~)", + Arena); + ASSERT_EQ(Nodes.size(), 1u); + const auto &N = Nodes[0]; + EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); + EXPECT_TRUE(N.Content.empty()); + ASSERT_EQ(N.Children.size(), 1u); +} + +// CommonMark §4.5 example 120: tilde fence with a language tag. +TEST_F(MarkdownParserTest, TildeFenceWithLang) { + auto Nodes = parseMarkdown(R"(~~~cpp +int x = 0; +~~~)", + Arena); + ASSERT_EQ(Nodes.size(), 1u); + const auto &N = Nodes[0]; + EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); + EXPECT_EQ(N.Content, "cpp"); + ASSERT_EQ(N.Children.size(), 1u); +} + +// CommonMark §4.5 example 122: a tilde line does not close a backtick fence. +TEST_F(MarkdownParserTest, ClosingFenceMustMatchOpeningChar) { + auto Nodes = parseMarkdown(R"(``` +aaa +~~~ +````````)", + Arena); + ASSERT_EQ(Nodes.size(), 1u); + const auto &N = Nodes[0]; + EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); + // ~~~ is content, not a closing fence. + ASSERT_EQ(N.Children.size(), 2u); +} + +// CommonMark §4.5 example 130: a code block can be empty. +TEST_F(MarkdownParserTest, EmptyFencedCodeBlock) { + auto Nodes = parseMarkdown(R"(``` +```````)", + Arena); + ASSERT_EQ(Nodes.size(), 1u); + const auto &N = Nodes[0]; + EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); + EXPECT_TRUE(N.Children.empty()); +} + +// CommonMark §4.5 example 129: a code block may contain only blank lines. +TEST_F(MarkdownParserTest, FencedCodeBlockBlankLineContent) { + auto Nodes = parseMarkdown("```\n\n \n```", Arena); + ASSERT_EQ(Nodes.size(), 1u); + const auto &N = Nodes[0]; + EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); + ASSERT_EQ(N.Children.size(), 2u); +} + +// CommonMark §4.5 example 142: lang tag is captured from the info string. +TEST_F(MarkdownParserTest, InfoStringLangTag) { + auto Nodes = parseMarkdown(R"(```ruby +def foo(x) + return 3 +end +``````)", + Arena); + ASSERT_EQ(Nodes.size(), 1u); + const auto &N = Nodes[0]; + EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); + EXPECT_EQ(N.Content, "ruby"); + ASSERT_EQ(N.Children.size(), 3u); +} + +// CommonMark §4.5 example 146: tilde fence info string may contain backticks. +TEST_F(MarkdownParserTest, TildeFenceInfoStringWithBackticks) { + auto Nodes = parseMarkdown(R"(~~~ aa ``` ~~~ +foo +~~~)", + Arena); + ASSERT_EQ(Nodes.size(), 1u); + const auto &N = Nodes[0]; + EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); + EXPECT_EQ(N.Content, "aa ``` ~~~"); + ASSERT_EQ(N.Children.size(), 1u); +} + +// CommonMark §4.5 example 124: closing fence must be at least as long as the +// opening fence. +// TODO: our parser currently closes on the first line with 3 matching fence +// chars regardless of opening fence length. Fix as part of the CommonMark +// TODO in parseMarkdown(). +TEST_F(MarkdownParserTest, ClosingFenceLengthTODO) { + auto Nodes = parseMarkdown("````\naaa\n```", Arena); + // The ``` line should not close the ```` fence per CommonMark, but our + // parser currently treats it as a closing fence. This test documents the + // current (non-conformant) behavior. + ASSERT_EQ(Nodes.size(), 1u); + const auto &N = Nodes[0]; + EXPECT_EQ(N.Kind, NodeKind::NK_FencedCode); + ASSERT_EQ(N.Children.size(), 1u); +} + } // namespace \ No newline at end of file _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
