================ @@ -0,0 +1,274 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Standalone Markdown parsing library for the LLVM ecosystem. +/// +/// The parser takes plain paragraph text and returns a polymorphic tree of +/// MDNode-derived objects allocated in a caller-supplied BumpPtrAllocator. +/// Node types form a closed class hierarchy rooted at MDNode. Each concrete +/// type carries exactly the fields it needs -- no overloaded Content field, +/// no unused arrays. Use llvm::isa<>/cast<>/dyn_cast<> for type-safe +/// downcasting; each concrete type provides classof() for this purpose. +/// +/// See +/// https://llvm.org/docs/ProgrammerManual.html#the-isa-cast-and-dyn-cast-templates +/// +/// Field ordering in each derived struct is chosen to minimize padding: +/// 4-byte fields (like Level or Start) are declared before 16-byte fields +/// (ArrayRef, StringRef) so that no implicit padding is inserted between the +/// base class's 4-byte Kind and the first derived field. +/// +/// Inline nodes (appear inside ParagraphNode, HeadingNode, etc.): +/// TextNode -- plain text run +/// SoftBreakNode -- soft line break +/// HardBreakNode -- hard line break (trailing spaces or backslash) +/// InlineCodeNode -- inline code span (`code`) +/// EmphasisNode -- emphasis (*text* or _text_) +/// StrongNode -- strong emphasis (**text** or __text__) +/// +/// Block nodes: +/// ParagraphNode -- sequence of inline nodes +/// HeadingNode -- ATX heading (# through ######), level 1-6 +/// FencedCodeNode -- fenced code block (``` or ~~~) +/// TableNode -- pipe table (raw row text; TODO: structured cells) +/// UnorderedListNode -- bullet list (-, *, +) +/// OrderedListNode -- numbered list with explicit start number +/// ListItemNode -- single item inside a list +/// BlockQuoteNode -- block quote (>) +/// ThematicBreakNode -- horizontal rule (---, ***, ___) +/// +/// All nodes are arena-allocated. The caller owns the arena and must keep it +/// alive for the lifetime of any returned nodes. The parser never crashes on +/// malformed input; unrecognized text falls back to TextNode. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_SUPPORT_MARKDOWN_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" + +namespace clang::doc::markdown { + +/// Discriminator for all Markdown AST nodes. Inline kinds are grouped before +/// block kinds so that the sentinels NK_LastInline and NK_FirstBlock enable +/// cheap range-based checks in classof() implementations. +enum class NodeKind { + // Inline nodes + NK_Text, + NK_SoftBreak, + NK_HardBreak, + NK_InlineCode, + NK_Emphasis, + NK_Strong, + NK_LastInline = NK_Strong, // sentinel -- all inline kinds are <= this + + // Block nodes + NK_Paragraph, + NK_Heading, + NK_FencedCode, + NK_Table, + NK_UnorderedList, + NK_OrderedList, + NK_ListItem, + NK_BlockQuote, + NK_ThematicBreak, + NK_FirstBlock = NK_Paragraph, // sentinel -- all block kinds are >= this +}; + +/// Base type for all Markdown AST nodes. Carries only the kind discriminator. +/// Nodes are arena-allocated and have no virtual destructor; use +/// llvm::isa<>/cast<>/dyn_cast<> for type-safe downcasting. +struct MDNode { + NodeKind Kind; + explicit MDNode(NodeKind K) : Kind(K) {} +}; + +//===----------------------------------------------------------------------===// +// Inline nodes +//===----------------------------------------------------------------------===// + +/// Plain text run. +struct TextNode : MDNode { + llvm::StringRef Text; + explicit TextNode(llvm::StringRef Text) + : MDNode(NodeKind::NK_Text), Text(Text) {} + static bool classof(const MDNode *N) { return N->Kind == NodeKind::NK_Text; } +}; + +/// Soft line break -- a newline that does not end the paragraph. +struct SoftBreakNode : MDNode { + SoftBreakNode() : MDNode(NodeKind::NK_SoftBreak) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_SoftBreak; + } +}; + +/// Hard line break -- two trailing spaces or a backslash before a newline. +struct HardBreakNode : MDNode { + HardBreakNode() : MDNode(NodeKind::NK_HardBreak) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_HardBreak; + } +}; + +/// Inline code span: `code`. Code does not include the surrounding backticks. +struct InlineCodeNode : MDNode { + llvm::StringRef Code; + explicit InlineCodeNode(llvm::StringRef Code) + : MDNode(NodeKind::NK_InlineCode), Code(Code) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_InlineCode; + } +}; + +/// Emphasized text: *text* or _text_. +struct EmphasisNode : MDNode { + llvm::ArrayRef<MDNode *> Children; + explicit EmphasisNode(llvm::ArrayRef<MDNode *> Children) + : MDNode(NodeKind::NK_Emphasis), Children(Children) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_Emphasis; + } +}; + +/// Strongly emphasized text: **text** or __text__. +struct StrongNode : MDNode { + llvm::ArrayRef<MDNode *> Children; + explicit StrongNode(llvm::ArrayRef<MDNode *> Children) + : MDNode(NodeKind::NK_Strong), Children(Children) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_Strong; + } +}; + +//===----------------------------------------------------------------------===// +// Block nodes +//===----------------------------------------------------------------------===// + +/// A paragraph -- sequence of inline nodes separated from other blocks by +/// blank lines. +struct ParagraphNode : MDNode { + llvm::ArrayRef<MDNode *> Children; + explicit ParagraphNode(llvm::ArrayRef<MDNode *> Children) + : MDNode(NodeKind::NK_Paragraph), Children(Children) {} + static bool classof(const MDNode *N) { + return N->Kind == NodeKind::NK_Paragraph; + } +}; + +/// ATX heading: one to six leading # characters. Level is declared before +/// Children to avoid padding between the base class's 4-byte Kind and the +/// 8-byte-aligned ArrayRef, keeping sizeof(HeadingNode) at 24 bytes. ---------------- Neil-N4 wrote:
Removed. Did the same for OrderedListNode https://github.com/llvm/llvm-project/pull/202991 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
