Copilot commented on code in PR #2478: URL: https://github.com/apache/groovy/pull/2478#discussion_r3107970589
########## subprojects/groovy-markdown/src/main/java/groovy/markdown/MarkdownDocument.java: ########## @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package groovy.markdown; + +import org.apache.groovy.lang.annotation.Incubating; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.function.Predicate; + +/** + * A parsed Markdown document, exposed as a list of node maps with convenience + * accessors for the most common navigation patterns. + * <p> + * Each node is a {@link Map} with at least a {@code type} key. The raw structure + * supports all standard Groovy list/map operations (e.g. {@code findAll}, + * {@code each}, {@code *.text}); the convenience accessors below recursively + * walk the tree so nested code blocks, links, etc. are included. + * + * @since 6.0.0 + */ +@Incubating +public class MarkdownDocument implements Iterable<Map<String, Object>> { + + private final List<Map<String, Object>> nodes; + + public MarkdownDocument(List<Map<String, Object>> nodes) { + this.nodes = nodes; + } + + /** + * The raw top-level nodes of the document. + */ + public List<Map<String, Object>> getNodes() { + return nodes; + } + + @Override + public Iterator<Map<String, Object>> iterator() { + return nodes.iterator(); + } + + /** + * All fenced and indented code blocks anywhere in the document. + */ + public List<Map<String, Object>> getCodeBlocks() { + return findAll(n -> "code_block".equals(n.get("type"))); + } + + /** + * All headings anywhere in the document, in document order. + */ + public List<Map<String, Object>> getHeadings() { + return findAll(n -> "heading".equals(n.get("type"))); + } + + /** + * All links anywhere in the document. + */ + public List<Map<String, Object>> getLinks() { + return findAll(n -> "link".equals(n.get("type"))); + } + + /** + * All tables anywhere in the document. Tables are only produced when + * {@code enableTables(true)} was set on the slurper. + */ + public List<Map<String, Object>> getTables() { + return findAll(n -> "table".equals(n.get("type"))); + } + + /** + * Plain-text projection of the document — formatting markers are stripped + * and block-level elements are separated by newlines. + */ + public String getText() { + StringBuilder sb = new StringBuilder(); + appendText(nodes, sb); + return sb.toString(); + } + + /** + * Return the nodes under the heading with the given text, up to the next + * heading of the same or higher level. The heading itself is not included. + * Returns an empty list if no matching heading is found. + */ + public List<Map<String, Object>> section(String headingText) { + for (int i = 0; i < nodes.size(); i++) { + Map<String, Object> n = nodes.get(i); + if (!"heading".equals(n.get("type")) || !headingText.equals(n.get("text"))) { + continue; + } + int level = ((Number) n.get("level")).intValue(); + int end = nodes.size(); + for (int j = i + 1; j < nodes.size(); j++) { + Map<String, Object> m = nodes.get(j); + if ("heading".equals(m.get("type")) + && ((Number) m.get("level")).intValue() <= level) { + end = j; + break; + } + } + return new ArrayList<>(nodes.subList(i + 1, end)); + } + return List.of(); + } + Review Comment: `section(String)` currently scans only the top-level `nodes` list. If a matching heading appears nested (e.g. inside a block quote or list), it will never be found, which can be surprising given that other accessors walk the full tree. Either clarify this in the Javadoc/user guide, or update the implementation to search headings recursively and compute the section boundaries accordingly. ########## subprojects/groovy-markdown/src/main/java/groovy/markdown/MarkdownDocument.java: ########## @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package groovy.markdown; + +import org.apache.groovy.lang.annotation.Incubating; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.function.Predicate; + +/** + * A parsed Markdown document, exposed as a list of node maps with convenience + * accessors for the most common navigation patterns. + * <p> + * Each node is a {@link Map} with at least a {@code type} key. The raw structure + * supports all standard Groovy list/map operations (e.g. {@code findAll}, + * {@code each}, {@code *.text}); the convenience accessors below recursively + * walk the tree so nested code blocks, links, etc. are included. + * + * @since 6.0.0 + */ +@Incubating +public class MarkdownDocument implements Iterable<Map<String, Object>> { + + private final List<Map<String, Object>> nodes; + + public MarkdownDocument(List<Map<String, Object>> nodes) { + this.nodes = nodes; + } + + /** + * The raw top-level nodes of the document. + */ + public List<Map<String, Object>> getNodes() { + return nodes; + } + + @Override + public Iterator<Map<String, Object>> iterator() { + return nodes.iterator(); + } + + /** + * All fenced and indented code blocks anywhere in the document. + */ + public List<Map<String, Object>> getCodeBlocks() { + return findAll(n -> "code_block".equals(n.get("type"))); + } + + /** + * All headings anywhere in the document, in document order. + */ + public List<Map<String, Object>> getHeadings() { + return findAll(n -> "heading".equals(n.get("type"))); + } + + /** + * All links anywhere in the document. + */ + public List<Map<String, Object>> getLinks() { + return findAll(n -> "link".equals(n.get("type"))); + } + + /** + * All tables anywhere in the document. Tables are only produced when + * {@code enableTables(true)} was set on the slurper. + */ + public List<Map<String, Object>> getTables() { + return findAll(n -> "table".equals(n.get("type"))); + } + + /** + * Plain-text projection of the document — formatting markers are stripped + * and block-level elements are separated by newlines. + */ + public String getText() { + StringBuilder sb = new StringBuilder(); + appendText(nodes, sb); + return sb.toString(); + } + + /** + * Return the nodes under the heading with the given text, up to the next + * heading of the same or higher level. The heading itself is not included. + * Returns an empty list if no matching heading is found. + */ + public List<Map<String, Object>> section(String headingText) { + for (int i = 0; i < nodes.size(); i++) { + Map<String, Object> n = nodes.get(i); + if (!"heading".equals(n.get("type")) || !headingText.equals(n.get("text"))) { + continue; + } + int level = ((Number) n.get("level")).intValue(); + int end = nodes.size(); + for (int j = i + 1; j < nodes.size(); j++) { + Map<String, Object> m = nodes.get(j); + if ("heading".equals(m.get("type")) + && ((Number) m.get("level")).intValue() <= level) { + end = j; + break; + } + } + return new ArrayList<>(nodes.subList(i + 1, end)); + } + return List.of(); + } + + private List<Map<String, Object>> findAll(Predicate<Map<String, Object>> pred) { + List<Map<String, Object>> out = new ArrayList<>(); + walk(nodes, n -> { + if (pred.test(n)) out.add(n); + }); + return out; + } + + @SuppressWarnings("unchecked") + private static void walk(List<Map<String, Object>> nodes, Consumer<Map<String, Object>> visitor) { + for (Map<String, Object> n : nodes) { + visitor.accept(n); + Object children = n.get("children"); + if (children instanceof List) { + walk((List<Map<String, Object>>) children, visitor); + } + Object items = n.get("items"); + if (items instanceof List) { + walk((List<Map<String, Object>>) items, visitor); + } + } + } + + @SuppressWarnings("unchecked") + private static void appendText(List<Map<String, Object>> nodes, StringBuilder sb) { + for (Map<String, Object> n : nodes) { + String type = (String) n.get("type"); + switch (type) { + case "text": + sb.append(n.get("value")); + break; + case "inline_code": + case "code_block": + case "html_block": + case "html_inline": + sb.append(n.get("text")); + break; + case "hard_line_break": + case "soft_line_break": + sb.append('\n'); + break; + case "table": + List<String> headers = (List<String>) n.get("headers"); + if (headers != null) { + sb.append(String.join(" | ", headers)).append('\n'); + } + List<Map<String, String>> rows = (List<Map<String, String>>) n.get("rows"); + if (rows != null) { + for (Map<String, String> row : rows) { + sb.append(String.join(" | ", row.values())).append('\n'); Review Comment: In the table branch, joining `row.values()` relies on the map's iteration order. While `tableToMap` currently uses a `LinkedHashMap`, this becomes fragile if callers manipulate row maps (or if an alternative map implementation is ever used). For stable output, consider iterating in `headers` order and pulling `row.get(header)` for each column. ########## subprojects/groovy-markdown/src/main/java/groovy/markdown/MarkdownDocument.java: ########## @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package groovy.markdown; + +import org.apache.groovy.lang.annotation.Incubating; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.function.Predicate; + +/** + * A parsed Markdown document, exposed as a list of node maps with convenience + * accessors for the most common navigation patterns. + * <p> + * Each node is a {@link Map} with at least a {@code type} key. The raw structure + * supports all standard Groovy list/map operations (e.g. {@code findAll}, + * {@code each}, {@code *.text}); the convenience accessors below recursively + * walk the tree so nested code blocks, links, etc. are included. + * + * @since 6.0.0 + */ +@Incubating +public class MarkdownDocument implements Iterable<Map<String, Object>> { + + private final List<Map<String, Object>> nodes; + + public MarkdownDocument(List<Map<String, Object>> nodes) { + this.nodes = nodes; + } + + /** + * The raw top-level nodes of the document. + */ + public List<Map<String, Object>> getNodes() { + return nodes; + } + + @Override + public Iterator<Map<String, Object>> iterator() { + return nodes.iterator(); + } + + /** + * All fenced and indented code blocks anywhere in the document. + */ + public List<Map<String, Object>> getCodeBlocks() { + return findAll(n -> "code_block".equals(n.get("type"))); + } + + /** + * All headings anywhere in the document, in document order. + */ + public List<Map<String, Object>> getHeadings() { + return findAll(n -> "heading".equals(n.get("type"))); + } + + /** + * All links anywhere in the document. + */ + public List<Map<String, Object>> getLinks() { + return findAll(n -> "link".equals(n.get("type"))); + } + + /** + * All tables anywhere in the document. Tables are only produced when + * {@code enableTables(true)} was set on the slurper. + */ + public List<Map<String, Object>> getTables() { + return findAll(n -> "table".equals(n.get("type"))); + } + + /** + * Plain-text projection of the document — formatting markers are stripped + * and block-level elements are separated by newlines. + */ + public String getText() { + StringBuilder sb = new StringBuilder(); + appendText(nodes, sb); + return sb.toString(); + } + + /** + * Return the nodes under the heading with the given text, up to the next + * heading of the same or higher level. The heading itself is not included. + * Returns an empty list if no matching heading is found. + */ + public List<Map<String, Object>> section(String headingText) { + for (int i = 0; i < nodes.size(); i++) { + Map<String, Object> n = nodes.get(i); + if (!"heading".equals(n.get("type")) || !headingText.equals(n.get("text"))) { + continue; + } + int level = ((Number) n.get("level")).intValue(); + int end = nodes.size(); + for (int j = i + 1; j < nodes.size(); j++) { + Map<String, Object> m = nodes.get(j); + if ("heading".equals(m.get("type")) + && ((Number) m.get("level")).intValue() <= level) { + end = j; + break; + } + } + return new ArrayList<>(nodes.subList(i + 1, end)); + } + return List.of(); + } + + private List<Map<String, Object>> findAll(Predicate<Map<String, Object>> pred) { + List<Map<String, Object>> out = new ArrayList<>(); + walk(nodes, n -> { + if (pred.test(n)) out.add(n); + }); + return out; + } + + @SuppressWarnings("unchecked") + private static void walk(List<Map<String, Object>> nodes, Consumer<Map<String, Object>> visitor) { + for (Map<String, Object> n : nodes) { + visitor.accept(n); + Object children = n.get("children"); + if (children instanceof List) { + walk((List<Map<String, Object>>) children, visitor); + } + Object items = n.get("items"); + if (items instanceof List) { + walk((List<Map<String, Object>>) items, visitor); + } + } + } + + @SuppressWarnings("unchecked") + private static void appendText(List<Map<String, Object>> nodes, StringBuilder sb) { + for (Map<String, Object> n : nodes) { + String type = (String) n.get("type"); + switch (type) { + case "text": + sb.append(n.get("value")); + break; + case "inline_code": + case "code_block": + case "html_block": + case "html_inline": + sb.append(n.get("text")); + break; + case "hard_line_break": + case "soft_line_break": + sb.append('\n'); Review Comment: `getText()` appends a newline for `hard_line_break`/`soft_line_break`, but the Javadoc says newlines separate block-level elements; inline line breaks typically project to spaces to avoid unexpected mid-paragraph newlines (and this also differs from `MarkdownSlurper.textOf`, which converts line breaks to spaces). Consider projecting these as spaces (or clarify the behavior in the Javadoc). ########## subprojects/groovy-markdown/src/main/java/groovy/markdown/MarkdownSlurper.java: ########## @@ -0,0 +1,315 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package groovy.markdown; + +import org.apache.groovy.lang.annotation.Incubating; +import org.commonmark.Extension; +import org.commonmark.ext.gfm.tables.TableBlock; +import org.commonmark.ext.gfm.tables.TableBody; +import org.commonmark.ext.gfm.tables.TableCell; +import org.commonmark.ext.gfm.tables.TableHead; +import org.commonmark.ext.gfm.tables.TableRow; +import org.commonmark.ext.gfm.tables.TablesExtension; +import org.commonmark.node.BlockQuote; +import org.commonmark.node.BulletList; +import org.commonmark.node.Code; +import org.commonmark.node.Emphasis; +import org.commonmark.node.FencedCodeBlock; +import org.commonmark.node.HardLineBreak; +import org.commonmark.node.Heading; +import org.commonmark.node.HtmlBlock; +import org.commonmark.node.HtmlInline; +import org.commonmark.node.Image; +import org.commonmark.node.IndentedCodeBlock; +import org.commonmark.node.Link; +import org.commonmark.node.ListItem; +import org.commonmark.node.Node; +import org.commonmark.node.OrderedList; +import org.commonmark.node.Paragraph; +import org.commonmark.node.SoftLineBreak; +import org.commonmark.node.StrongEmphasis; +import org.commonmark.node.Text; +import org.commonmark.node.ThematicBreak; +import org.commonmark.parser.Parser; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +/** + * Parses <a href="https://commonmark.org/">CommonMark</a> Markdown into a + * {@link MarkdownDocument} backed by nested lists and maps. + * <p> + * Usage: + * <pre><code class="groovyTestCase"> + * def doc = new groovy.markdown.MarkdownSlurper().parseText('# Hello') + * assert doc.headings[0].text == 'Hello' + * </code></pre> + * + * GFM-style tables are supported via an optional extension. Call + * {@link #enableTables(boolean) enableTables(true)} after adding + * {@code org.commonmark:commonmark-ext-gfm-tables} to the runtime classpath. + * + * @since 6.0.0 + */ +@Incubating +public class MarkdownSlurper { + + private static final String TABLES_EXT_CLASS = "org.commonmark.ext.gfm.tables.TablesExtension"; + + private boolean tablesEnabled; + + /** + * Enable GFM-style tables. Requires {@code commonmark-ext-gfm-tables} on the classpath. + * + * @param enable whether to enable table parsing + * @return this slurper for chaining + * @throws MarkdownRuntimeException if {@code enable} is true but the extension jar is missing + */ + public MarkdownSlurper enableTables(boolean enable) { + if (enable) { + try { + Class.forName(TABLES_EXT_CLASS); + } catch (ClassNotFoundException e) { + throw new MarkdownRuntimeException( + "GFM tables extension not on classpath. Add 'org.commonmark:commonmark-ext-gfm-tables' to enable tables."); Review Comment: When the tables extension is missing, `Class.forName` can throw `NoClassDefFoundError`/`LinkageError` as well as `ClassNotFoundException` (e.g. if transitive deps are missing). Consider catching `LinkageError` too and chaining the caught exception as the cause when throwing `MarkdownRuntimeException` so the underlying classpath problem is diagnosable. ########## subprojects/groovy-markdown/src/main/java/groovy/markdown/MarkdownSlurper.java: ########## @@ -0,0 +1,315 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package groovy.markdown; + +import org.apache.groovy.lang.annotation.Incubating; +import org.commonmark.Extension; +import org.commonmark.ext.gfm.tables.TableBlock; +import org.commonmark.ext.gfm.tables.TableBody; +import org.commonmark.ext.gfm.tables.TableCell; +import org.commonmark.ext.gfm.tables.TableHead; +import org.commonmark.ext.gfm.tables.TableRow; +import org.commonmark.ext.gfm.tables.TablesExtension; +import org.commonmark.node.BlockQuote; Review Comment: `commonmark-ext-gfm-tables` is declared as `compileOnly`, but this class has direct static references to extension types (e.g. `TableBlock`, `TablesExtension`, etc.). If the extension jar is absent at runtime, those symbolic references can trigger `NoClassDefFoundError` when `MarkdownSlurper` is loaded/verified, even when tables are never enabled. To keep tables truly optional, isolate all table-extension usage behind reflection (or move table support into a separate helper class that is only loaded when the extension is present) so `MarkdownSlurper` can be used without the extension jar on the classpath. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
