================ @@ -0,0 +1,157 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Markdown.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "clang-doc-markdown" + +namespace clang { +namespace doc { +namespace markdown { + +static MDNode makeText(llvm::StringRef S) { + return {NodeKind::NK_Text, S, {}}; +} + +// A line is a table separator if it only contains |, -, :, and spaces, +// and has at least one -. +static bool isSepRow(llvm::StringRef Line) { + return Line.contains('-') && + Line.find_first_not_of("|-: ") == llvm::StringRef::npos; +} + +static llvm::ArrayRef<MDNode> +allocateNodes(llvm::SmallVectorImpl<MDNode> &Nodes, + llvm::BumpPtrAllocator &Arena) { + if (Nodes.empty()) + return {}; + MDNode *Allocated = Arena.Allocate<MDNode>(Nodes.size()); + std::uninitialized_copy(Nodes.begin(), Nodes.end(), Allocated); + return llvm::ArrayRef<MDNode>(Allocated, Nodes.size()); +} + +llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, + llvm::BumpPtrAllocator &Arena) { + if (ParagraphText.trim().empty()) { + LLVM_DEBUG(llvm::dbgs() << "[md] empty input, returning nothing\n"); + return {}; + } + + llvm::SmallVector<llvm::StringRef, 16> Lines; + ParagraphText.split(Lines, '\n'); + + LLVM_DEBUG(llvm::dbgs() << "[md] parsing " << Lines.size() << " line(s)\n"); + + llvm::SmallVector<MDNode, 8> Nodes; + unsigned I = 0; + + while (I < Lines.size()) { + llvm::StringRef Line = Lines[I].trim(); + + if (Line.empty()) { + ++I; + continue; + } + + // Fenced code block: ``` or ~~~ + if (Line.starts_with("```") || Line.starts_with("~~~")) { + char Fence = Line[0]; + llvm::StringRef Lang = Line.drop_front(3).trim(); + LLVM_DEBUG(llvm::dbgs() + << "[md] fenced code block, lang='" << Lang << "'\n"); + llvm::SmallVector<MDNode, 4> CodeLines; + ++I; + while (I < Lines.size()) { + llvm::StringRef CodeLine = Lines[I].trim(); + if (CodeLine.size() >= 3 && + llvm::all_of(CodeLine.take_front(3), + [Fence](char C) { return C == Fence; })) { + LLVM_DEBUG(llvm::dbgs() + << "[md] closing fence found at line " << I << "\n"); + break; + } + CodeLines.push_back(makeText(Lines[I])); + ++I; + } + ++I; // skip closing fence + MDNode Code; + Code.Kind = NodeKind::NK_FencedCode; + Code.Content = Lang; + Code.Children = allocateNodes(CodeLines, Arena); + LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_FencedCode with " + << CodeLines.size() << " line(s)\n"); + Nodes.push_back(Code); + continue; + } + + // Pipe table: current line has | and next line is a separator row + if (Line.contains('|') && I + 1 < Lines.size() && + isSepRow(Lines[I + 1].trim())) { + LLVM_DEBUG(llvm::dbgs() + << "[md] pipe table detected at line " << I << "\n"); + llvm::SmallVector<MDNode, 4> Rows; + while (I < Lines.size() && Lines[I].trim().contains('|')) { + Rows.push_back(makeText(Lines[I].trim())); + ++I; + } + MDNode Table; + Table.Kind = NodeKind::NK_Table; + Table.Content = {}; + Table.Children = allocateNodes(Rows, Arena); + LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_Table with " << Rows.size() + << " row(s)\n"); + Nodes.push_back(Table); + continue; + } + + // Unordered list item + if (Line.starts_with("- ") || Line.starts_with("* ") || + Line.starts_with("+ ")) { + LLVM_DEBUG(llvm::dbgs() << "[md] unordered list at line " << I << "\n"); + llvm::SmallVector<MDNode, 4> Items; + while (I < Lines.size()) { + llvm::StringRef L = Lines[I].trim(); + if (!L.starts_with("- ") && !L.starts_with("* ") && + !L.starts_with("+ ")) + break; + MDNode Item; + Item.Kind = NodeKind::NK_ListItem; + Item.Content = L.drop_front(2).trim(); ---------------- ilovepi wrote:
I dislike magic numbers, but its hard to do much better. I guess we could set some local constants? not clear if any of its a win. up to you on how to handle or if we leave as is. https://github.com/llvm/llvm-project/pull/201746 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
