This is an automated email from the ASF dual-hosted git repository.
kwin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/maven-doxia.git
The following commit(s) were added to refs/heads/master by this push:
new 31bdbb85 Distinguish between linebreaks for formatting markup and
linebreaks in (#1039)
31bdbb85 is described below
commit 31bdbb85922e962ae4054ab00935567ed5f13370
Author: Konrad Windszus <[email protected]>
AuthorDate: Mon Mar 9 15:16:25 2026 +0100
Distinguish between linebreaks for formatting markup and linebreaks in
(#1039)
Add new Sink method "markupLineBreak" for insignificant linebreaks.
XhtmlParser detects insignificant linebreaks according to
https://developer.mozilla.org/en-US/docs/Web/CSS/Guides/Text/Whitespace
and emits them accordingly. It also collapses whitespaces.
It assumes no CSS overrides for boxed/inline elements and for
"white-space-collapse"
This closes #882
---
.../maven/doxia/parser/Xhtml5BaseParser.java | 818 +++++++++++++--------
.../apache/maven/doxia/sink/impl/SinkWrapper.java | 4 +-
.../maven/doxia/sink/impl/Xhtml5BaseSink.java | 21 +-
.../maven/doxia/parser/Xhtml5BaseParserTest.java | 68 +-
.../maven/doxia/sink/impl/AbstractSinkTest.java | 6 +-
.../doxia/sink/impl/SinkEventTestingSink.java | 8 +-
.../apache/maven/doxia/module/apt/AptParser.java | 4 +-
.../org/apache/maven/doxia/module/apt/AptSink.java | 5 -
.../maven/doxia/module/apt/AptParserTest.java | 6 +-
.../apache/maven/doxia/module/apt/AptSinkTest.java | 4 +-
.../maven/doxia/module/markdown/MarkdownSink.java | 88 ++-
.../doxia/module/markdown/MarkdownParserTest.java | 28 +-
.../doxia/module/markdown/MarkdownSinkTest.java | 61 +-
.../src/test/resources/link.html | 1 +
.../src/test/resources/table.html | 31 +
.../apache/maven/doxia/module/xdoc/XdocParser.java | 4 +-
.../maven/doxia/module/xdoc/XdocParserTest.java | 24 +
.../java/org/apache/maven/doxia/sink/Sink.java | 14 +-
pom.xml | 1 +
19 files changed, 793 insertions(+), 403 deletions(-)
diff --git
a/doxia-core/src/main/java/org/apache/maven/doxia/parser/Xhtml5BaseParser.java
b/doxia-core/src/main/java/org/apache/maven/doxia/parser/Xhtml5BaseParser.java
index a2b03c81..f43810e5 100644
---
a/doxia-core/src/main/java/org/apache/maven/doxia/parser/Xhtml5BaseParser.java
+++
b/doxia-core/src/main/java/org/apache/maven/doxia/parser/Xhtml5BaseParser.java
@@ -21,6 +21,8 @@ package org.apache.maven.doxia.parser;
import javax.swing.text.html.HTML.Attribute;
import java.io.Reader;
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Set;
@@ -109,6 +111,9 @@ public class Xhtml5BaseParser extends AbstractXmlParser
implements HtmlMarkup {
/** Used to distinguish <a href=""> from <a name="">. */
private boolean isLink;
+ /** If true, the next text event is at the beginning of a line inside a
block element, i.e. after a block tag or a line break/end block tag. */
+ protected boolean isBeginningOfLineInsideBlock = true;
+
/** Used to distinguish <a href=""> from <a name="">. */
private boolean isAnchor;
@@ -192,181 +197,276 @@ public class Xhtml5BaseParser extends AbstractXmlParser
implements HtmlMarkup {
protected boolean baseStartTag(String elementName, SinkEventAttributeSet
attribs, Sink sink) {
boolean visited = true;
-
- if (elementName.equals(HtmlMarkup.ARTICLE.toString())) {
- sink.article(attribs);
- } else if (elementName.equals(HtmlMarkup.NAV.toString())) {
- sink.navigation(attribs);
- } else if (elementName.equals(HtmlMarkup.ASIDE.toString())) {
- sink.sidebar(attribs);
- } else if (elementName.equals(HtmlMarkup.SECTION.toString())) {
- handleSectionStart(sink, attribs);
- } else if (elementName.equals(HtmlMarkup.H1.toString())) {
- handleHeadingStart(sink, Sink.SECTION_LEVEL_1, attribs);
- } else if (elementName.equals(HtmlMarkup.H2.toString())) {
- handleHeadingStart(sink, Sink.SECTION_LEVEL_2, attribs);
- } else if (elementName.equals(HtmlMarkup.H3.toString())) {
- handleHeadingStart(sink, Sink.SECTION_LEVEL_3, attribs);
- } else if (elementName.equals(HtmlMarkup.H4.toString())) {
- handleHeadingStart(sink, Sink.SECTION_LEVEL_4, attribs);
- } else if (elementName.equals(HtmlMarkup.H5.toString())) {
- handleHeadingStart(sink, Sink.SECTION_LEVEL_5, attribs);
- } else if (elementName.equals(HtmlMarkup.H6.toString())) {
- handleHeadingStart(sink, Sink.SECTION_LEVEL_6, attribs);
- } else if (elementName.equals(HtmlMarkup.HEADER.toString())) {
- sink.header(attribs);
- } else if (elementName.equals(HtmlMarkup.MAIN.toString())) {
- sink.content(attribs);
- } else if (elementName.equals(HtmlMarkup.FOOTER.toString())) {
- sink.footer(attribs);
- } else if (elementName.equals(HtmlMarkup.EM.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.EMPHASIS);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.STRONG.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.STRONG);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.SMALL.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.SMALL);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.S.toString())) {
-
attribs.addAttributes(SinkEventAttributeSet.Semantics.LINE_THROUGH);
- sink.inline(attribs);
- /* deprecated line-through support */
- } else if (elementName.equals(HtmlMarkup.CITE.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.CITATION);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.Q.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.QUOTE);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.DFN.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.DEFINITION);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.ABBR.toString())) {
-
attribs.addAttributes(SinkEventAttributeSet.Semantics.ABBREVIATION);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.I.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.ITALIC);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.B.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.BOLD);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.CODE.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.CODE);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.VAR.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.VARIABLE);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.SAMP.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.SAMPLE);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.KBD.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.KEYBOARD);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.SUP.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.SUPERSCRIPT);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.SUB.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.SUBSCRIPT);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.U.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.ANNOTATION);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.MARK.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.HIGHLIGHT);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.RUBY.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.RB.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_BASE);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.RT.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_TEXT);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.RTC.toString())) {
-
attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_TEXT_CONTAINER);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.RP.toString())) {
-
attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_PARANTHESES);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.BDI.toString())) {
-
attribs.addAttributes(SinkEventAttributeSet.Semantics.BIDIRECTIONAL_ISOLATION);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.BDO.toString())) {
-
attribs.addAttributes(SinkEventAttributeSet.Semantics.BIDIRECTIONAL_OVERRIDE);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.SPAN.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.PHRASE);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.INS.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.INSERT);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.DEL.toString())) {
- attribs.addAttributes(SinkEventAttributeSet.Semantics.DELETE);
- sink.inline(attribs);
- } else if (elementName.equals(HtmlMarkup.P.toString())) {
- handlePStart(sink, attribs);
- } else if (elementName.equals(HtmlMarkup.DIV.toString())) {
- handleDivStart(attribs, sink);
- } else if (elementName.equals(HtmlMarkup.PRE.toString())) {
- handlePreStart(attribs, sink);
- } else if (elementName.equals(HtmlMarkup.UL.toString())) {
- sink.list(attribs);
- } else if (elementName.equals(HtmlMarkup.OL.toString())) {
- handleOLStart(sink, attribs);
- } else if (elementName.equals(HtmlMarkup.LI.toString())) {
- handleLIStart(sink, attribs);
- } else if (elementName.equals(HtmlMarkup.DL.toString())) {
- sink.definitionList(attribs);
- } else if (elementName.equals(HtmlMarkup.DT.toString())) {
- if (hasDefinitionListItem) {
- // close previous listItem
- sink.definitionListItem_();
- }
- sink.definitionListItem(attribs);
- hasDefinitionListItem = true;
- sink.definedTerm(attribs);
- } else if (elementName.equals(HtmlMarkup.DD.toString())) {
- if (!hasDefinitionListItem) {
+ isBeginningOfLineInsideBlock = true;
+ switch (elementName) {
+ case "article":
+ sink.article(attribs);
+ break;
+ case "nav":
+ sink.navigation(attribs);
+ break;
+ case "aside":
+ sink.sidebar(attribs);
+ break;
+ case "section":
+ handleSectionStart(sink, attribs);
+ break;
+ case "h1":
+ handleHeadingStart(sink, Sink.SECTION_LEVEL_1, attribs);
+ break;
+ case "h2":
+ handleHeadingStart(sink, Sink.SECTION_LEVEL_2, attribs);
+ break;
+ case "h3":
+ handleHeadingStart(sink, Sink.SECTION_LEVEL_3, attribs);
+ break;
+ case "h4":
+ handleHeadingStart(sink, Sink.SECTION_LEVEL_4, attribs);
+ break;
+ case "h5":
+ handleHeadingStart(sink, Sink.SECTION_LEVEL_5, attribs);
+ break;
+ case "h6":
+ handleHeadingStart(sink, Sink.SECTION_LEVEL_6, attribs);
+ break;
+ case "header":
+ sink.header(attribs);
+ break;
+ case "main":
+ sink.content(attribs);
+ break;
+ case "footer":
+ sink.footer(attribs);
+ break;
+ case "em":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.EMPHASIS);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "strong":
+ attribs.addAttributes(SinkEventAttributeSet.Semantics.STRONG);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "small":
+ attribs.addAttributes(SinkEventAttributeSet.Semantics.SMALL);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "s":
+ /* deprecated line-through support */
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.LINE_THROUGH);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "cite":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.CITATION);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "q":
+ attribs.addAttributes(SinkEventAttributeSet.Semantics.QUOTE);
+ sink.inline(attribs);
+ break;
+ case "dfn":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.DEFINITION);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "abbr":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.ABBREVIATION);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "i":
+ attribs.addAttributes(SinkEventAttributeSet.Semantics.ITALIC);
+ sink.inline(attribs);
+ break;
+ case "b":
+ attribs.addAttributes(SinkEventAttributeSet.Semantics.BOLD);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "code":
+ attribs.addAttributes(SinkEventAttributeSet.Semantics.CODE);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "var":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.VARIABLE);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "samp":
+ attribs.addAttributes(SinkEventAttributeSet.Semantics.SAMPLE);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "kbd":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.KEYBOARD);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "sup":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.SUPERSCRIPT);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "sub":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.SUBSCRIPT);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "u":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.ANNOTATION);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "mark":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.HIGHLIGHT);
+ sink.inline(attribs);
+ break;
+ case "ruby":
+ attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "rb":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_BASE);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "rt":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_TEXT);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "rtc":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_TEXT_CONTAINER);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "rp":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_PARANTHESES);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "bdi":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.BIDIRECTIONAL_ISOLATION);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "bdo":
+
attribs.addAttributes(SinkEventAttributeSet.Semantics.BIDIRECTIONAL_OVERRIDE);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "span":
+ attribs.addAttributes(SinkEventAttributeSet.Semantics.PHRASE);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "ins":
+ attribs.addAttributes(SinkEventAttributeSet.Semantics.INSERT);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "del":
+ attribs.addAttributes(SinkEventAttributeSet.Semantics.DELETE);
+ sink.inline(attribs);
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "p":
+ handlePStart(sink, attribs);
+ break;
+ case "div":
+ handleDivStart(attribs, sink);
+ break;
+ case "pre":
+ handlePreStart(attribs, sink);
+ break;
+ case "ul":
+ sink.list(attribs);
+ break;
+ case "ol":
+ handleOLStart(sink, attribs);
+ break;
+ case "li":
+ handleLIStart(sink, attribs);
+ break;
+ case "dl":
+ sink.definitionList(attribs);
+ break;
+ case "dt":
+ if (hasDefinitionListItem) {
+ // close previous listItem
+ sink.definitionListItem_();
+ }
sink.definitionListItem(attribs);
- }
- sink.definition(attribs);
- } else if (elementName.equals(HtmlMarkup.FIGURE.toString())) {
- sink.figure(attribs);
- } else if (elementName.equals(HtmlMarkup.FIGCAPTION.toString())) {
- sink.figureCaption(attribs);
- } else if (elementName.equals(HtmlMarkup.A.toString())) {
- handleAStart(sink, attribs);
- } else if (elementName.equals(HtmlMarkup.TABLE.toString())) {
- handleTableStart(sink, attribs);
- } else if (elementName.equals(HtmlMarkup.TR.toString())) {
- sink.tableRow(attribs);
- } else if (elementName.equals(HtmlMarkup.TH.toString())) {
- sink.tableHeaderCell(attribs);
- } else if (elementName.equals(HtmlMarkup.TD.toString())) {
- sink.tableCell(attribs);
- } else if (elementName.equals(HtmlMarkup.CAPTION.toString())) {
- sink.tableCaption(attribs);
- } else if (elementName.equals(HtmlMarkup.BR.toString())) {
- sink.lineBreak(attribs);
- } else if (elementName.equals(HtmlMarkup.WBR.toString())) {
- sink.lineBreakOpportunity(attribs);
- } else if (elementName.equals(HtmlMarkup.HR.toString())) {
- sink.horizontalRule(attribs);
- } else if (elementName.equals(HtmlMarkup.IMG.toString())) {
- handleImgStart(sink, attribs);
- } else if (elementName.equals(HtmlMarkup.BLOCKQUOTE.toString())) {
- sink.blockquote(attribs);
- } else if (UNMATCHED_XHTML5_ELEMENTS.contains(elementName)) {
- handleUnknown(elementName, attribs, sink, TAG_TYPE_START);
- } else if (UNMATCHED_XHTML5_SIMPLE_ELEMENTS.contains(elementName)) {
- handleUnknown(elementName, attribs, sink, TAG_TYPE_SIMPLE);
- } else if (elementName.equals(HtmlMarkup.SCRIPT.toString())
- || elementName.equals(HtmlMarkup.STYLE.toString())) {
- handleUnknown(elementName, attribs, sink, TAG_TYPE_START);
- scriptBlock = true;
- } else {
- visited = false;
+ hasDefinitionListItem = true;
+ sink.definedTerm(attribs);
+ break;
+ case "dd":
+ if (!hasDefinitionListItem) {
+ sink.definitionListItem(attribs);
+ }
+ sink.definition(attribs);
+ break;
+ case "figure":
+ sink.figure(attribs);
+ break;
+ case "figcaption":
+ sink.figureCaption(attribs);
+ break;
+ case "a":
+ isBeginningOfLineInsideBlock = false;
+ handleAStart(sink, attribs);
+ break;
+ case "table":
+ handleTableStart(sink, attribs);
+ break;
+ case "tr":
+ sink.tableRow(attribs);
+ break;
+ case "th":
+ sink.tableHeaderCell(attribs);
+ break;
+ case "td":
+ sink.tableCell(attribs);
+ break;
+ case "caption":
+ sink.tableCaption(attribs);
+ break;
+ case "br":
+ sink.lineBreak(attribs);
+ break;
+ case "wbr":
+ sink.lineBreakOpportunity(attribs);
+ break;
+ case "hr":
+ sink.horizontalRule(attribs);
+ break;
+ case "img":
+ isBeginningOfLineInsideBlock = false;
+ handleImgStart(sink, attribs);
+ break;
+ case "blockquote":
+ sink.blockquote(attribs);
+ break;
+ case "script":
+ case "style":
+ handleUnknown(elementName, attribs, sink, TAG_TYPE_START);
+ scriptBlock = true;
+ break;
+ default:
+ if (UNMATCHED_XHTML5_ELEMENTS.contains(elementName)) {
+ handleUnknown(elementName, attribs, sink, TAG_TYPE_START);
+ } else if
(UNMATCHED_XHTML5_SIMPLE_ELEMENTS.contains(elementName)) {
+ handleUnknown(elementName, attribs, sink, TAG_TYPE_SIMPLE);
+ } else {
+ visited = false;
+ }
+ break;
}
return visited;
@@ -391,150 +491,163 @@ public class Xhtml5BaseParser extends AbstractXmlParser
implements HtmlMarkup {
protected boolean baseEndTag(String elementName, SinkEventAttributeSet
attribs, Sink sink) {
boolean visited = true;
-
- if (elementName.equals(HtmlMarkup.P.toString())) {
- sink.paragraph_();
- } else if (elementName.equals(HtmlMarkup.DIV.toString())) {
- handleDivEnd(sink);
- } else if (elementName.equals(HtmlMarkup.PRE.toString())) {
- verbatim_();
-
- sink.verbatim_();
- } else if (elementName.equals(HtmlMarkup.UL.toString())) {
- sink.list_();
- } else if (elementName.equals(HtmlMarkup.OL.toString())) {
- sink.numberedList_();
- orderedListDepth--;
- } else if (elementName.equals(HtmlMarkup.LI.toString())) {
- handleListItemEnd(sink);
- } else if (elementName.equals(HtmlMarkup.DL.toString())) {
- if (hasDefinitionListItem) {
+ isBeginningOfLineInsideBlock = true;
+
+ switch (elementName) {
+ case "p":
+ sink.paragraph_();
+ break;
+ case "div":
+ handleDivEnd(sink);
+ break;
+ case "pre":
+ verbatim_();
+ sink.verbatim_();
+ break;
+ case "ul":
+ sink.list_();
+ break;
+ case "ol":
+ sink.numberedList_();
+ orderedListDepth--;
+ break;
+ case "li":
+ handleListItemEnd(sink);
+ break;
+ case "dl":
+ if (hasDefinitionListItem) {
+ sink.definitionListItem_();
+ hasDefinitionListItem = false;
+ }
+ sink.definitionList_();
+ break;
+ case "dt":
+ sink.definedTerm_();
+ break;
+ case "dd":
+ sink.definition_();
sink.definitionListItem_();
hasDefinitionListItem = false;
- }
- sink.definitionList_();
- } else if (elementName.equals(HtmlMarkup.DT.toString())) {
- sink.definedTerm_();
- } else if (elementName.equals(HtmlMarkup.DD.toString())) {
- sink.definition_();
- sink.definitionListItem_();
- hasDefinitionListItem = false;
- } else if (elementName.equals(HtmlMarkup.FIGURE.toString())) {
- sink.figure_();
- } else if (elementName.equals(HtmlMarkup.FIGCAPTION.toString())) {
- sink.figureCaption_();
- } else if (elementName.equals(HtmlMarkup.A.toString())) {
- handleAEnd(sink);
- } else if (elementName.equals(HtmlMarkup.EM.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.STRONG.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.SMALL.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.S.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.CITE.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.Q.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.DFN.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.ABBR.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.I.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.B.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.CODE.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.VAR.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.SAMP.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.KBD.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.SUP.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.SUB.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.U.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.MARK.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.RUBY.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.RB.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.RT.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.RTC.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.RP.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.BDI.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.BDO.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.SPAN.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.INS.toString())) {
- sink.inline_();
- } else if (elementName.equals(HtmlMarkup.DEL.toString())) {
- sink.inline_();
- }
-
- //
----------------------------------------------------------------------
- // Tables
- //
----------------------------------------------------------------------
-
- else if (elementName.equals(HtmlMarkup.TABLE.toString())) {
- sink.tableRows_();
- sink.table_();
- } else if (elementName.equals(HtmlMarkup.TR.toString())) {
- sink.tableRow_();
- } else if (elementName.equals(HtmlMarkup.TH.toString())) {
- sink.tableHeaderCell_();
- } else if (elementName.equals(HtmlMarkup.TD.toString())) {
- sink.tableCell_();
- } else if (elementName.equals(HtmlMarkup.CAPTION.toString())) {
- sink.tableCaption_();
- } else if (elementName.equals(HtmlMarkup.ARTICLE.toString())) {
- sink.article_();
- } else if (elementName.equals(HtmlMarkup.NAV.toString())) {
- sink.navigation_();
- } else if (elementName.equals(HtmlMarkup.ASIDE.toString())) {
- sink.sidebar_();
- } else if (elementName.equals(HtmlMarkup.SECTION.toString())) {
- handleSectionEnd(sink);
- } else if (elementName.equals(HtmlMarkup.H1.toString())) {
- sink.sectionTitle1_();
- } else if (elementName.equals(HtmlMarkup.H2.toString())) {
- sink.sectionTitle2_();
- } else if (elementName.equals(HtmlMarkup.H3.toString())) {
- sink.sectionTitle3_();
- } else if (elementName.equals(HtmlMarkup.H4.toString())) {
- sink.sectionTitle4_();
- } else if (elementName.equals(HtmlMarkup.H5.toString())) {
- sink.sectionTitle5_();
- } else if (elementName.equals(HtmlMarkup.H6.toString())) {
- sink.sectionTitle6_();
- } else if (elementName.equals(HtmlMarkup.HEADER.toString())) {
- sink.header_();
- } else if (elementName.equals(HtmlMarkup.MAIN.toString())) {
- sink.content_();
- } else if (elementName.equals(HtmlMarkup.FOOTER.toString())) {
- sink.footer_();
- } else if (elementName.equals(HtmlMarkup.BLOCKQUOTE.toString())) {
- sink.blockquote_();
- } else if (UNMATCHED_XHTML5_ELEMENTS.contains(elementName)) {
- handleUnknown(elementName, attribs, sink, TAG_TYPE_END);
- } else if (elementName.equals(HtmlMarkup.SCRIPT.toString())
- || elementName.equals(HtmlMarkup.STYLE.toString())) {
- handleUnknown(elementName, attribs, sink, TAG_TYPE_END);
-
- scriptBlock = false;
- } else {
- visited = false;
+ break;
+ case "figure":
+ sink.figure_();
+ break;
+ case "figcaption":
+ sink.figureCaption_();
+ break;
+ case "a":
+ isBeginningOfLineInsideBlock = false;
+ handleAEnd(sink);
+ break;
+ case "em":
+ case "strong":
+ case "small":
+ case "s":
+ case "cite":
+ case "q":
+ case "dfn":
+ case "abbr":
+ case "i":
+ case "b":
+ case "code":
+ case "var":
+ case "samp":
+ case "kbd":
+ case "sup":
+ case "sub":
+ case "u":
+ case "mark":
+ case "ruby":
+ case "rb":
+ case "rt":
+ case "rtc":
+ case "rp":
+ case "bdi":
+ case "bdo":
+ case "span":
+ case "ins":
+ case "del":
+ sink.inline_();
+ isBeginningOfLineInsideBlock = false;
+ break;
+
+ //
----------------------------------------------------------------------
+ // Tables
+ //
----------------------------------------------------------------------
+
+ case "table":
+ sink.tableRows_();
+ sink.table_();
+ break;
+ case "tr":
+ sink.tableRow_();
+ break;
+ case "th":
+ sink.tableHeaderCell_();
+ break;
+ case "td":
+ sink.tableCell_();
+ break;
+ case "caption":
+ sink.tableCaption_();
+ break;
+ case "article":
+ sink.article_();
+ break;
+ case "nav":
+ sink.navigation_();
+ break;
+ case "aside":
+ sink.sidebar_();
+ break;
+ case "section":
+ handleSectionEnd(sink);
+ break;
+ case "h1":
+ sink.sectionTitle1_();
+ break;
+ case "h2":
+ sink.sectionTitle2_();
+ break;
+ case "h3":
+ sink.sectionTitle3_();
+ break;
+ case "h4":
+ sink.sectionTitle4_();
+ break;
+ case "h5":
+ sink.sectionTitle5_();
+ break;
+ case "h6":
+ sink.sectionTitle6_();
+ break;
+ case "header":
+ sink.header_();
+ break;
+ case "main":
+ sink.content_();
+ break;
+ case "footer":
+ sink.footer_();
+ break;
+ case "img":
+ isBeginningOfLineInsideBlock = false;
+ break;
+ case "blockquote":
+ sink.blockquote_();
+ break;
+ case "script":
+ case "style":
+ handleUnknown(elementName, attribs, sink, TAG_TYPE_END);
+ scriptBlock = false;
+ break;
+ default:
+ if (UNMATCHED_XHTML5_ELEMENTS.contains(elementName)) {
+ handleUnknown(elementName, attribs, sink, TAG_TYPE_END);
+ } else {
+ visited = false;
+ }
+ break;
}
return visited;
@@ -574,19 +687,79 @@ public class Xhtml5BaseParser extends AbstractXmlParser
implements HtmlMarkup {
protected void handleText(XmlPullParser parser, Sink sink) throws
XmlPullParserException {
String text = getText(parser);
- /*
- * NOTE: Don't do any whitespace trimming here. Whitespace
normalization has already been performed by the
- * parser so any whitespace that makes it here is significant.
- *
- * NOTE: text within script tags is ignored, scripting code should be
embedded in CDATA.
- */
+ if (!inVerbatim && text != null) {
+ // do special whitespace processing as outlined in
+ //
https://developer.mozilla.org/en-US/docs/Web/CSS/Guides/Text/Whitespace
+ if (isBeginningOfLineInsideBlock) {
+ // normalize linebreaks
+ processInsignificantLineBreaks(sink, text);
+ // trim leading whitespace from text being emitted
+ //
https://developer.mozilla.org/en-US/docs/Web/CSS/Guides/Text/Whitespace#trimming_and_positioning
+ String regex = "^\\s+";
+ text = text.replaceAll(regex, "");
+ }
+
+ // assume white-space-collapse: collapse for all non-verbatim text
(outside of <pre>)
+ text = collapseWhitespace(text);
+ }
if ((text != null && !text.isEmpty()) && !isScriptBlock()) {
sink.text(text);
+ isBeginningOfLineInsideBlock = false;
}
}
+ /**
+ * Process all line-breaks in the given text which are not significant for
the output, i.e. all line-breaks which are not within a verbatim block and
+ * are at the beginning of the given text.
+ * In addition it emits information about the whitespace characters
following the line-breaks as they may be relevant for the output (e.g. for
indentation).
+ *
+ * @param sink the sink to receive the events.
+ * @param text the text to process.
+ */
+ protected void processInsignificantLineBreaks(Sink sink, String text) {
+ CharacterIterator it = new
StringCharacterIterator(text.replaceAll("\\r\\n?", "\n"));
+
+ boolean wasNewLine = false;
+ int indentLevel = 0;
+ //
+ while (it.current() != CharacterIterator.DONE) {
+ char c = it.current();
+ if (c == '\n') {
+ if (wasNewLine) {
+ sink.markupLineBreak(indentLevel);
+ }
+ indentLevel = 0;
+ wasNewLine = true;
+ } else if (Character.isWhitespace(c)) {
+ indentLevel++;
+ } else {
+ // once non-whitespace character is reached we assume
everything following is relevant and emitted
+ // within the text event
+ break;
+ }
+ it.next();
+ }
+ if (wasNewLine) {
+ // if the text ends with a newline, we need to emit the last line
break
+ sink.markupLineBreak(indentLevel);
+ }
+ }
+
+ /**
+ * @see <a
href="https://developer.mozilla.org/en-US/docs/Web/CSS/Guides/Text/Whitespace#how_does_css_process_whitespace">How
does CSS process whitespace?</a>
+ * @see <a
href="https://drafts.csswg.org/css-text-4/#white-space-processing">CSS Text
Module Level 4 - White Space Processing</a>
+ *
+ * @param text
+ * @return
+ */
+ private static String collapseWhitespace(String text) {
+ // replace all sequences of whitespace characters with a single space
(this includes newlines, tabs, etc.)
+ return text.replaceAll("\\s+", " ");
+ }
+
@Override
protected void handleComment(XmlPullParser parser, Sink sink) throws
XmlPullParserException {
+ isBeginningOfLineInsideBlock = false;
String text = getText(parser);
if ("PB".equals(text.trim())) {
@@ -600,6 +773,7 @@ public class Xhtml5BaseParser extends AbstractXmlParser
implements HtmlMarkup {
@Override
protected void handleCdsect(XmlPullParser parser, Sink sink) throws
XmlPullParserException {
+ isBeginningOfLineInsideBlock = false;
String text = getText(parser);
if (isScriptBlock()) {
diff --git
a/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/SinkWrapper.java
b/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/SinkWrapper.java
index b8c9fff7..d82d3423 100644
--- a/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/SinkWrapper.java
+++ b/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/SinkWrapper.java
@@ -514,8 +514,8 @@ public class SinkWrapper extends AbstractSink {
}
@Override
- public void comment(String comment, boolean endsWithLineBreak) {
- delegate.comment(comment, endsWithLineBreak);
+ public void markupLineBreak(int indentLevel) {
+ delegate.markupLineBreak(indentLevel);
}
@Override
diff --git
a/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/Xhtml5BaseSink.java
b/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/Xhtml5BaseSink.java
index 245ed014..6db89aff 100644
---
a/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/Xhtml5BaseSink.java
+++
b/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/Xhtml5BaseSink.java
@@ -1544,17 +1544,12 @@ public class Xhtml5BaseSink extends AbstractXmlSink
implements HtmlMarkup {
@Override
public void comment(String comment) {
- comment(comment, false);
- }
-
- @Override
- public void comment(String comment, boolean endsWithLineBreak) {
if (comment != null) {
- write(encodeAsHtmlComment(comment, endsWithLineBreak,
getLocationLogPrefix()));
+ write(encodeAsHtmlComment(comment, getLocationLogPrefix()));
}
}
- public static String encodeAsHtmlComment(String comment, boolean
endsWithLineBreak, String locationLogPrefix) {
+ public static String encodeAsHtmlComment(String comment, String
locationLogPrefix) {
final String originalComment = comment;
// http://www.w3.org/TR/2000/REC-xml-20001006#sec-comments
@@ -1575,12 +1570,18 @@ public class Xhtml5BaseSink extends AbstractXmlSink
implements HtmlMarkup {
buffer.append(LESS_THAN).append(BANG).append(MINUS).append(MINUS);
buffer.append(comment);
buffer.append(MINUS).append(MINUS).append(GREATER_THAN);
- if (endsWithLineBreak) {
- buffer.append(EOL);
- }
return buffer.toString();
}
+ @Override
+ public void markupLineBreak(int indentLevel) {
+ if (headFlag) {
+ getTextBuffer().append(EOL);
+ } else {
+ write(EOL);
+ }
+ }
+
/**
* {@inheritDoc}
*
diff --git
a/doxia-core/src/test/java/org/apache/maven/doxia/parser/Xhtml5BaseParserTest.java
b/doxia-core/src/test/java/org/apache/maven/doxia/parser/Xhtml5BaseParserTest.java
index b29b8e3d..59a05a0d 100644
---
a/doxia-core/src/test/java/org/apache/maven/doxia/parser/Xhtml5BaseParserTest.java
+++
b/doxia-core/src/test/java/org/apache/maven/doxia/parser/Xhtml5BaseParserTest.java
@@ -280,8 +280,8 @@ class Xhtml5BaseParserTest extends AbstractParserTest {
el = it.next();
assertEquals("text", el.getName());
- // according to section 2.11 of the XML spec, parsers must normalize
line breaks to "\n"
- assertEquals("\n", (String) el.getArgs()[0]);
+ // the EOL must be normalized to a single space, as per the HTML spec
+ assertEquals(" ", (String) el.getArgs()[0]);
assertEquals("inline", it.next().getName());
assertEquals("text", it.next().getName());
@@ -366,6 +366,70 @@ class Xhtml5BaseParserTest extends AbstractParserTest {
assertEquals("verbatim_", it.next().getName());
}
+ @Test
+ void listWithInsignificantLineBreaks() throws Exception {
+ // test EOLs within lists (those don't have significance and should
not be reported as text events, but as
+ // markupLineBreak with the according indent level)
+ String text = "<ul>" + Xhtml5BaseParser.EOL + " <li>One</li> "
+ + Xhtml5BaseParser.EOL + " <li>Two</li> "
+ + Xhtml5BaseParser.EOL + "</ul>";
+
+ parser.parse(text, sink);
+
+ Iterator<SinkEventElement> it = sink.getEventList().iterator();
+
+ assertEquals("list", it.next().getName());
+ assertSinkEquals(it.next(), "markupLineBreak", 2);
+ assertEquals("listItem", it.next().getName());
+ assertEquals("text", it.next().getName());
+ assertEquals("listItem_", it.next().getName());
+ assertSinkEquals(it.next(), "markupLineBreak", 2);
+ assertEquals("listItem", it.next().getName());
+ assertEquals("text", it.next().getName());
+ assertEquals("listItem_", it.next().getName());
+ assertSinkEquals(it.next(), "markupLineBreak", 0);
+ assertEquals("list_", it.next().getName());
+ }
+
+ @Test
+ void whitespaceInBlockAndInlineElements() throws ParseException {
+ String text =
+ "<p>\n \n<ul>" + Xhtml5BaseParser.EOL + " <li><a
href=\"https://example.com\"> One</a> Another</li>"
+ + Xhtml5BaseParser.EOL + " <li> Two Another </li>
"
+ + Xhtml5BaseParser.EOL + "<li><img src=\"img.src\"/>
Three</li>"
+ + Xhtml5BaseParser.EOL + "</ul>" +
Xhtml5BaseParser.EOL + " </p>";
+
+ parser.parse(text, sink);
+
+ Iterator<SinkEventElement> it = sink.getEventList().iterator();
+ assertEquals("paragraph", it.next().getName());
+ assertSinkEquals(it.next(), "markupLineBreak", 2);
+ assertSinkEquals(it.next(), "markupLineBreak", 0);
+ assertEquals("list", it.next().getName());
+ assertSinkEquals(it.next(), "markupLineBreak", 2);
+ assertEquals("listItem", it.next().getName());
+ assertSinkEquals(
+ it.next(), "link", "https://example.com", new
SinkEventAttributeSet("href", "https://example.com"));
+ assertSinkEquals(it.next(), "text", " One", null);
+ assertEquals("link_", it.next().getName());
+ assertSinkEquals(it.next(), "text", " Another", null);
+ assertEquals("listItem_", it.next().getName());
+ assertSinkEquals(it.next(), "markupLineBreak", 2);
+ assertEquals("listItem", it.next().getName());
+ // test collapsing, currently no trimming at the end
+ assertSinkEquals(it.next(), "text", "Two Another ", null);
+ assertEquals("listItem_", it.next().getName());
+ assertSinkEquals(it.next(), "markupLineBreak", 0);
+ assertEquals("listItem", it.next().getName());
+ assertSinkEquals(it.next(), "figureGraphics", "img.src", new
SinkEventAttributeSet("src", "img.src"));
+ assertSinkEquals(it.next(), "text", " Three", null);
+ assertEquals("listItem_", it.next().getName());
+ assertSinkEquals(it.next(), "markupLineBreak", 0);
+ assertEquals("list_", it.next().getName());
+ assertSinkEquals(it.next(), "markupLineBreak", 3);
+ assertSinkEquals(it, "paragraph_");
+ }
+
@Test
void doxia250() throws Exception {
StringBuilder sb = new StringBuilder();
diff --git
a/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/AbstractSinkTest.java
b/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/AbstractSinkTest.java
index 022f80d0..424e52bf 100644
---
a/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/AbstractSinkTest.java
+++
b/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/AbstractSinkTest.java
@@ -1204,8 +1204,10 @@ public abstract class AbstractSinkTest extends
AbstractModuleTest {
@Test
public void twoConsecutiveBlockComments() {
String comment = "Simple comment";
- sink.comment(comment, true);
- sink.comment(comment, true);
+ sink.comment(comment);
+ sink.markupLineBreak(0);
+ sink.comment(comment);
+ sink.markupLineBreak(0);
sink.flush();
sink.close();
assertEquals(getCommentBlock(comment) + EOL + getCommentBlock(comment)
+ EOL, testWriter.toString());
diff --git
a/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/SinkEventTestingSink.java
b/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/SinkEventTestingSink.java
index 3a35f8fd..714c8ebf 100644
---
a/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/SinkEventTestingSink.java
+++
b/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/SinkEventTestingSink.java
@@ -288,13 +288,13 @@ public class SinkEventTestingSink extends AbstractSink {
}
@Override
- public void comment(String comment, boolean endsWithLineBreak) {
- addEvent("comment", new Object[] {comment, endsWithLineBreak});
+ public void comment(String comment) {
+ addEvent("comment", new Object[] {comment});
}
@Override
- public void comment(String comment) {
- addEvent("comment", new Object[] {comment});
+ public void markupLineBreak(int indentLevel) {
+ addEvent("markupLineBreak", new Object[] {indentLevel});
}
@Override
diff --git
a/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptParser.java
b/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptParser.java
index 7f03b19d..aed398ba 100644
---
a/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptParser.java
+++
b/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptParser.java
@@ -1838,7 +1838,9 @@ public class AptParser extends AbstractTextParser
implements AptMarkup {
public void traverse() throws AptParseException {
if (isEmitComments()) {
- AptParser.this.sink.comment(text, true);
+ AptParser.this.sink.comment(text);
+ // APT comments always end with a line break
+ AptParser.this.sink.markupLineBreak(0);
}
}
}
diff --git
a/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptSink.java
b/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptSink.java
index dab9a813..47a5a255 100644
---
a/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptSink.java
+++
b/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptSink.java
@@ -834,11 +834,6 @@ public class AptSink extends AbstractTextSink implements
AptMarkup {
}
public void comment(String comment) {
- comment(comment, false);
- }
-
- @Override
- public void comment(String comment, boolean endsWithLineBreak) {
rawText("" + COMMENT + COMMENT + comment + EOL); // comments always
end with a line break in APT
}
diff --git
a/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptParserTest.java
b/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptParserTest.java
index 63b76d4d..fbf0620f 100644
---
a/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptParserTest.java
+++
b/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptParserTest.java
@@ -106,8 +106,10 @@ class AptParserTest extends AbstractParserTest {
"paragraph",
"text",
"paragraph_");
- assertSinkEquals(it.next(), "comment", "some comment", Boolean.TRUE);
- assertSinkEquals(it.next(), "comment", "another comment",
Boolean.TRUE);
+ assertSinkEquals(it.next(), "comment", "some comment");
+ assertSinkEquals(it.next(), "markupLineBreak", 0);
+ assertSinkEquals(it.next(), "comment", "another comment");
+ assertSinkEquals(it.next(), "markupLineBreak", 0);
assertSinkEquals(it, "paragraph", "text", "paragraph_", "section1_",
"body_");
}
diff --git
a/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptSinkTest.java
b/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptSinkTest.java
index bbad9639..13d32fbe 100644
---
a/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptSinkTest.java
+++
b/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptSinkTest.java
@@ -296,8 +296,8 @@ class AptSinkTest extends AbstractSinkTest {
public void twoConsecutiveBlockComments() {
final Sink sink = getSink();
String comment = "Simple comment";
- sink.comment(comment, true);
- sink.comment(comment, true);
+ sink.comment(comment);
+ sink.comment(comment);
sink.flush();
sink.close();
assertEquals(getCommentBlock(comment) + getCommentBlock(comment),
getSinkContent(), "Wrong comment!");
diff --git
a/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownSink.java
b/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownSink.java
index 2547a742..fbc1d166 100644
---
a/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownSink.java
+++
b/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownSink.java
@@ -101,27 +101,33 @@ public class MarkdownSink extends Xhtml5BaseSink
implements MarkdownMarkup {
enum ElementContext {
ROOT_WITH_BUFFERING(
Type.GENERIC_CONTAINER,
+ true,
ElementContext::escapeMarkdown,
true), // only needs buffering until head()_ is called to make
sure to emit metadata first
ROOT_WITHOUT_BUFFERING(
- Type.GENERIC_CONTAINER, null, false), // used after
head()_/body() to prevent unnecessary buffering
- HEAD(Type.GENERIC_CONTAINER, null, true),
- BODY(Type.GENERIC_CONTAINER, ElementContext::escapeMarkdown),
+ Type.GENERIC_CONTAINER,
+ true,
+ null,
+ false), // used after head()_/body() to prevent unnecessary
buffering
+ HEAD(Type.GENERIC_CONTAINER, false, null, true),
+ BODY(Type.GENERIC_CONTAINER, true, ElementContext::escapeMarkdown),
// only the elements, which affect rendering of children and are
different from BODY or HEAD are listed here
- FIGURE(Type.INLINE, ElementContext::escapeMarkdown, true),
- HEADING(Type.LEAF_BLOCK, ElementContext::escapeMarkdown),
- CODE_BLOCK(Type.LEAF_BLOCK, null),
- CODE_SPAN(Type.INLINE, null, true),
- TABLE_CAPTION(Type.INLINE, ElementContext::escapeMarkdown),
- TABLE_ROW(Type.CONTAINER_BLOCK, null, true),
+ FIGURE(Type.INLINE, false, ElementContext::escapeMarkdown, true),
+ HEADING(Type.LEAF_BLOCK, false, ElementContext::escapeMarkdown),
+ CODE_BLOCK(Type.LEAF_BLOCK, false, null),
+ CODE_SPAN(Type.INLINE, false, null, true),
+ TABLE(Type.CONTAINER_BLOCK, false, null, false, "", true),
+ TABLE_CAPTION(Type.INLINE, false, ElementContext::escapeMarkdown),
+ TABLE_ROW(Type.INLINE, false, null, true), // special handling of
newlines
TABLE_CELL(
Type.INLINE,
+ false,
ElementContext::escapeForTableCell,
false), // special type, as allows containing inlines, but not
starting on a separate line
// same parameters as BODY but paragraphs inside list items are
handled differently
- LIST_ITEM(Type.CONTAINER_BLOCK, ElementContext::escapeMarkdown, false,
INDENT),
- BLOCKQUOTE(Type.CONTAINER_BLOCK, ElementContext::escapeMarkdown,
false, BLOCKQUOTE_START_MARKUP),
- HTML_BLOCK(Type.LEAF_BLOCK, ElementContext::escapeHtml, false, "",
true);
+ LIST_ITEM(Type.CONTAINER_BLOCK, false, ElementContext::escapeMarkdown,
false, INDENT),
+ BLOCKQUOTE(Type.CONTAINER_BLOCK, false,
ElementContext::escapeMarkdown, false, BLOCKQUOTE_START_MARKUP),
+ HTML_BLOCK(Type.LEAF_BLOCK, true, ElementContext::escapeHtml, false,
"", true);
/**
* @see <a
href="https://spec.commonmark.org/0.30/#blocks-and-inlines">CommonMark, 3
Blocks and inlines</a>
@@ -169,25 +175,42 @@ public class MarkdownSink extends Xhtml5BaseSink
implements MarkdownMarkup {
*/
final boolean requiresSurroundingByBlankLines;
- ElementContext(Type type, TextEscapeFunction escapeFunction) {
- this(type, escapeFunction, false);
+ /**
+ * If markup linebreaks (i.e. insignificant linebreaks in the source)
are allowed in this context.
+ * This is relevant for markdown as in some contexts (e.g. list items)
linebreaks are always significant (while for HTML they wouldn't be)
+ */
+ final boolean allowsMarkupLinebreaks;
+
+ ElementContext(Type type, boolean allowsMarkupLinebreaks,
TextEscapeFunction escapeFunction) {
+ this(type, allowsMarkupLinebreaks, escapeFunction, false);
}
- ElementContext(Type type, TextEscapeFunction escapeFunction, boolean
requiresBuffering) {
- this(type, escapeFunction, requiresBuffering, "");
+ ElementContext(
+ Type type,
+ boolean allowsMarkupLinebreaks,
+ TextEscapeFunction escapeFunction,
+ boolean requiresBuffering) {
+ this(type, allowsMarkupLinebreaks, escapeFunction,
requiresBuffering, "");
}
- ElementContext(Type type, TextEscapeFunction escapeFunction, boolean
requiresBuffering, String prefix) {
- this(type, escapeFunction, requiresBuffering, prefix, false);
+ ElementContext(
+ Type type,
+ boolean allowsMarkupLinebreaks,
+ TextEscapeFunction escapeFunction,
+ boolean requiresBuffering,
+ String prefix) {
+ this(type, allowsMarkupLinebreaks, escapeFunction,
requiresBuffering, prefix, false);
}
ElementContext(
Type type,
+ boolean allowsMarkupLinebreaks,
TextEscapeFunction escapeFunction,
boolean requiresBuffering,
String prefix,
boolean requiresSurroundingByBlankLines) {
this.type = type;
+ this.allowsMarkupLinebreaks = allowsMarkupLinebreaks;
this.escapeFunction = escapeFunction;
this.requiresBuffering = requiresBuffering;
if (type != Type.CONTAINER_BLOCK && prefix.length() != 0) {
@@ -234,6 +257,10 @@ public class MarkdownSink extends Xhtml5BaseSink
implements MarkdownMarkup {
return type == Type.CONTAINER_BLOCK || type ==
Type.GENERIC_CONTAINER;
}
+ public boolean isAllowsMarkupLinebreaks() {
+ return allowsMarkupLinebreaks;
+ }
+
/**
* First use XML escaping (leveraging the predefined entities, for
browsers)
* afterwards escape special characters in a text with a leading
backslash (for markdown parsers)
@@ -753,8 +780,7 @@ public class MarkdownSink extends Xhtml5BaseSink implements
MarkdownMarkup {
if (elementContextStack.element().isHtml()) {
super.table(attributes);
} else {
- ensureBlankLine();
- write(getLinePrefix());
+ startContext(ElementContext.TABLE);
}
}
@@ -762,6 +788,8 @@ public class MarkdownSink extends Xhtml5BaseSink implements
MarkdownMarkup {
public void table_() {
if (elementContextStack.element().isHtml()) {
super.table_();
+ } else {
+ endContext(ElementContext.TABLE);
}
}
@@ -1204,12 +1232,16 @@ public class MarkdownSink extends Xhtml5BaseSink
implements MarkdownMarkup {
@Override
public void lineBreak(SinkEventAttributes attributes) {
- if (elementContextStack.element() == ElementContext.CODE_BLOCK) {
- write(EOL);
+ if (elementContextStack.element() == ElementContext.TABLE_CELL) {
+ super.lineBreak(attributes);
} else {
- write("" + SPACE + SPACE + EOL);
+ if (elementContextStack.element() == ElementContext.CODE_BLOCK) {
+ write(EOL);
+ } else {
+ write("" + SPACE + SPACE + EOL);
+ }
+ write(getLinePrefix());
}
- write(getLinePrefix());
}
@Override
@@ -1264,6 +1296,14 @@ public class MarkdownSink extends Xhtml5BaseSink
implements MarkdownMarkup {
LOGGER.warn("{}Unknown Sink event '" + name + "', ignoring!",
getLocationLogPrefix());
}
+ @Override
+ public void markupLineBreak(int indentLevel) {
+ // not allowed in all contexts
+ if (elementContextStack.element().isAllowsMarkupLinebreaks()) {
+ super.markupLineBreak(indentLevel);
+ }
+ }
+
@Override
public void close() {
toogleToRootContextWithoutBuffering(true);
diff --git
a/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownParserTest.java
b/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownParserTest.java
index b3cefb07..817855e0 100644
---
a/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownParserTest.java
+++
b/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownParserTest.java
@@ -165,7 +165,7 @@ class MarkdownParserTest extends AbstractParserTest {
"paragraph",
"text",
"paragraph_",
- "text",
+ "markupLineBreak",
"verbatim",
"inline",
"text",
@@ -195,7 +195,7 @@ class MarkdownParserTest extends AbstractParserTest {
"paragraph",
"text",
"paragraph_",
- "text",
+ "markupLineBreak",
"verbatim",
"inline",
"text",
@@ -325,14 +325,14 @@ class MarkdownParserTest extends AbstractParserTest {
"head_",
"body",
"list",
- "text",
+ "markupLineBreak",
"listItem",
"text",
"listItem_",
"listItem",
"text",
"listItem_",
- "text",
+ "markupLineBreak",
"list_",
"body_");
@@ -355,14 +355,14 @@ class MarkdownParserTest extends AbstractParserTest {
"head_",
"body",
"numberedList",
- "text",
+ "markupLineBreak",
"numberedListItem",
"text",
"numberedListItem_",
"numberedListItem",
"text",
"numberedListItem_",
- "text",
+ "markupLineBreak",
"numberedList_",
"body_");
@@ -601,7 +601,7 @@ class MarkdownParserTest extends AbstractParserTest {
"head_",
"body",
"division",
- "text",
+ "markupLineBreak",
"paragraph",
"inline",
"text",
@@ -612,9 +612,9 @@ class MarkdownParserTest extends AbstractParserTest {
"inline_",
"text",
"paragraph_",
- "text",
+ "markupLineBreak",
"division_",
- "text",
+ "markupLineBreak",
"horizontalRule",
"section1",
"sectionTitle1",
@@ -623,27 +623,27 @@ class MarkdownParserTest extends AbstractParserTest {
"paragraph",
"text",
"paragraph_",
- "text",
+ "markupLineBreak",
"table",
"tableRows",
- "text",
+ "markupLineBreak",
"unknown", // tbody start
"tableRow",
"tableHeaderCell",
"text",
"tableHeaderCell_",
"tableRow_",
- "text",
+ "markupLineBreak",
"tableRow",
"tableCell",
"text",
"tableCell_",
"tableRow_",
- "text",
+ "markupLineBreak",
"unknown", // tbody end
"tableRows_",
"table_",
- "text",
+ "markupLineBreak",
"section1_",
"body_");
diff --git
a/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownSinkTest.java
b/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownSinkTest.java
index 1e88f084..d161f327 100644
---
a/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownSinkTest.java
+++
b/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownSinkTest.java
@@ -26,6 +26,7 @@ import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
+import org.apache.maven.doxia.module.xhtml5.Xhtml5Parser;
import org.apache.maven.doxia.parser.ParseException;
import org.apache.maven.doxia.parser.Parser;
import org.apache.maven.doxia.sink.Sink;
@@ -46,6 +47,9 @@ class MarkdownSinkTest extends AbstractSinkTest {
@Inject
protected MarkdownParser parser;
+ @Inject
+ protected Xhtml5Parser htmlParser;
+
protected String outputExtension() {
return "md";
}
@@ -160,7 +164,7 @@ class MarkdownSinkTest extends AbstractSinkTest {
+ MarkdownMarkup.TABLE_CELL_SEPARATOR_MARKUP + "---" +
MarkdownMarkup.TABLE_CELL_SEPARATOR_MARKUP + EOL
+ MarkdownMarkup.TABLE_ROW_PREFIX
+ cell + MarkdownMarkup.TABLE_CELL_SEPARATOR_MARKUP + cell +
MarkdownMarkup.TABLE_CELL_SEPARATOR_MARKUP
- + cell + MarkdownMarkup.TABLE_CELL_SEPARATOR_MARKUP + EOL;
+ + cell + MarkdownMarkup.TABLE_CELL_SEPARATOR_MARKUP + EOL +
EOL;
}
@Override
@@ -190,6 +194,7 @@ class MarkdownSinkTest extends AbstractSinkTest {
.append("2|")
.append(EOL);
}
+ expectedMarkup.append(EOL);
return expectedMarkup.toString();
}
@@ -364,7 +369,11 @@ class MarkdownSinkTest extends AbstractSinkTest {
}
private void parseFile(Parser parser, String file, Sink sink) throws
ParseException, IOException {
- try (Reader reader = getTestReader(file)) {
+ parseFile(parser, file, outputExtension(), sink);
+ }
+
+ private void parseFile(Parser parser, String file, String extension, Sink
sink) throws ParseException, IOException {
+ try (Reader reader = getTestReader(file, extension)) {
parser.parse(reader, sink);
}
}
@@ -398,7 +407,7 @@ class MarkdownSinkTest extends AbstractSinkTest {
sink.close();
String expected =
- "| | |" + EOL + "|---|---|" + EOL +
"|[link](target)|paragraph text with \\|**bold**|" + EOL;
+ "| | |" + EOL + "|---|---|" + EOL +
"|[link](target)|paragraph text with \\|**bold**|" + EOL + EOL;
assertEquals(expected, getSinkContent(), "Wrong link or paragraph
markup in table cell");
}
@@ -621,4 +630,50 @@ class MarkdownSinkTest extends AbstractSinkTest {
+ "<!--This is a comment-->";
assertEquals(expected, getSinkContent(), "Wrong metadata section");
}
+
+ @Test
+ void listItemsContainingInsignificantWhitespace() {
+ try (Sink sink = getSink()) {
+ sink.list();
+ sink.listItem();
+ sink.markupLineBreak(4);
+ sink.text("item 1");
+ sink.listItem_();
+ sink.listItem();
+ sink.markupLineBreak(4);
+ sink.text("item 2");
+ sink.listItem_();
+ sink.list_();
+ }
+ String expected = "- item 1" + EOL + "- item 2" + EOL;
+ assertEquals(expected, getSinkContent());
+ }
+
+ @Test
+ void tableWithInsignificantNewLines() throws ParseException, IOException {
+ try (Sink sink = getSink()) {
+ parseFile(htmlParser, "table", "html", sink);
+ }
+ String expected = "|Format<br />Newline|Short description|Doxia
Module|" + EOL
+ + "|---|---|---|" + EOL
+ + "|[iText](../modules/index.html#iText)|iText PDF
Library|[`doxia-module-itext`](../doxia/doxia-modules/doxia-module-itext/)|"
+ + EOL
+ + "|[FO](../modules/index.html#FO)<sup>*</sup>|XSL formatting
objects
\\(XSL-FO\\)|[`doxia-module-fo`](../doxia/doxia-modules/doxia-module-fo/)|"
+ + EOL
+ + "|[LaTeX](../modules/index.html#LaTeX)|LaTeX typesetting
system|[`doxia-module-latex`](../doxia/doxia-modules/doxia-module-latex/)|"
+ + EOL
+ + "|[RTF](../modules/index.html#RTF)|Microsoft Rich Text
Format|[`doxia-module-rtf`](../doxia/doxia-modules/doxia-module-rtf/)|"
+ + EOL + EOL;
+ assertEquals(expected, getSinkContent());
+ }
+
+ @Test
+ void linkFromHtml() throws ParseException, IOException {
+ try (Sink sink = getSink()) {
+ parseFile(htmlParser, "link", "html", getSink());
+ }
+ String expected =
"[plugin](http://maven.apache.org/maven-1.x/plugins/xdoc/reference/xdocs.html)
documentation."
+ + EOL + EOL;
+ assertEquals(expected, getSinkContent());
+ }
}
diff --git a/doxia-modules/doxia-module-markdown/src/test/resources/link.html
b/doxia-modules/doxia-module-markdown/src/test/resources/link.html
new file mode 100644
index 00000000..41a888e2
--- /dev/null
+++ b/doxia-modules/doxia-module-markdown/src/test/resources/link.html
@@ -0,0 +1 @@
+<p><a
href="http://maven.apache.org/maven-1.x/plugins/xdoc/reference/xdocs.html"
class="externalLink">plugin</a> documentation.</p>
\ No newline at end of file
diff --git a/doxia-modules/doxia-module-markdown/src/test/resources/table.html
b/doxia-modules/doxia-module-markdown/src/test/resources/table.html
new file mode 100644
index 00000000..d26039df
--- /dev/null
+++ b/doxia-modules/doxia-module-markdown/src/test/resources/table.html
@@ -0,0 +1,31 @@
+<table border="0">
+ <tr>
+ <th>Format<br/>Newline</th>
+ <th>Short description</th>
+ <th>Doxia Module</th>
+ </tr>
+
+ <tr>
+ <td><a href="../modules/index.html#iText">iText</a></td>
+ <td>iText PDF Library</td>
+ <td><a
href="../doxia/doxia-modules/doxia-module-itext/"><code>doxia-module-itext</code></a></td>
+ </tr>
+
+ <tr>
+ <td><a href="../modules/index.html#FO">FO</a><sup>*</sup></td>
+ <td>XSL formatting objects (XSL-FO)</td>
+ <td><a
href="../doxia/doxia-modules/doxia-module-fo/"><code>doxia-module-fo</code></a></td>
+ </tr>
+
+ <tr>
+ <td><a href="../modules/index.html#LaTeX">LaTeX</a></td>
+ <td>LaTeX typesetting system</td>
+ <td><a
href="../doxia/doxia-modules/doxia-module-latex/"><code>doxia-module-latex</code></a></td>
+ </tr>
+
+ <tr>
+ <td><a href="../modules/index.html#RTF">RTF</a></td>
+ <td>Microsoft Rich Text Format</td>
+ <td><a
href="../doxia/doxia-modules/doxia-module-rtf/"><code>doxia-module-rtf</code></a></td>
+ </tr>
+ </table>
diff --git
a/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
b/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
index 5c646dca..677e2060 100644
---
a/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
+++
b/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
@@ -109,7 +109,7 @@ public class XdocParser extends Xhtml1BaseParser implements
XdocMarkup {
protected void handleStartTag(XmlPullParser parser, Sink sink)
throws XmlPullParserException, MacroExecutionException {
isEmptyElement = parser.isEmptyElementTag();
-
+ isBeginningOfLineInsideBlock = true;
SinkEventAttributeSet attribs = getAttributesFromParser(parser);
if (parser.getName().equals(DOCUMENT_TAG.toString())) {
@@ -145,7 +145,6 @@ public class XdocParser extends Xhtml1BaseParser implements
XdocMarkup {
sink.head_();
this.inHead = false;
}
-
sink.body(attribs);
} else if (parser.getName().equals(SECTION_TAG.toString())) {
handleSectionStart(Sink.SECTION_LEVEL_1, sink, attribs, parser);
@@ -191,6 +190,7 @@ public class XdocParser extends Xhtml1BaseParser implements
XdocMarkup {
protected void handleEndTag(XmlPullParser parser, Sink sink)
throws XmlPullParserException, MacroExecutionException {
+ isBeginningOfLineInsideBlock = true;
if (parser.getName().equals(DOCUMENT_TAG.toString())) {
// Do nothing
return;
diff --git
a/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
b/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
index dcc9c202..f37790f0 100644
---
a/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
+++
b/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
@@ -408,6 +408,7 @@ class XdocParserTest extends AbstractParserTest {
SinkEventElement styleElm = it.next();
assertEquals("unknown", styleElm.getName());
assertEquals("style", styleElm.getArgs()[0]);
+ assertEquals("markupLineBreak", it.next().getName());
SinkEventElement cdataElm = it.next();
assertEquals("unknown", cdataElm.getName());
assertEquals("CDATA", cdataElm.getArgs()[0]);
@@ -480,4 +481,27 @@ class XdocParserTest extends AbstractParserTest {
assertSinkEquals(it.next(), "text", "test", null);
assertSinkEquals(it, "inline_");
}
+
+ @Test
+ void indentedTags() throws Exception {
+ final String text = "<section name=\"test\">\n" + " <p>test</p>\n"
+ "</section>";
+
+ SinkEventTestingSink sink = new SinkEventTestingSink();
+
+ parser.setValidate(false);
+ parser.parse(text, sink);
+ Iterator<SinkEventElement> it = sink.getEventList().iterator();
+ assertSinkEquals(
+ it,
+ "section1",
+ "sectionTitle1",
+ "text",
+ "sectionTitle1_",
+ "markupLineBreak",
+ "paragraph",
+ "text",
+ "paragraph_",
+ "markupLineBreak",
+ "section1_");
+ }
}
diff --git a/doxia-sink-api/src/main/java/org/apache/maven/doxia/sink/Sink.java
b/doxia-sink-api/src/main/java/org/apache/maven/doxia/sink/Sink.java
index 61a199cd..4797345b 100644
--- a/doxia-sink-api/src/main/java/org/apache/maven/doxia/sink/Sink.java
+++ b/doxia-sink-api/src/main/java/org/apache/maven/doxia/sink/Sink.java
@@ -1754,24 +1754,22 @@ public interface Sink extends AutoCloseable {
/**
* Add a comment.
- * Semantically the same as {@link #comment(String, boolean)} with second
argument being {@code false}.
*
* @param comment The comment to write.
* @since 1.1
- * @see #comment(String, boolean)
*/
void comment(String comment);
/**
- * Add a comment. The default implementation will just call {@link
#comment(String)}.
+ * Add a single line break with the specified indentation level. The
default implementation does nothing.
+ * This is different from emitting a line break with {@link
#lineBreak(SinkEventAttributes)} or {@link #text(String, SinkEventAttributes)}
as those line breaks are part of the content (i.e. affect rendering)
+ * while this line break is purely for pretty-printing the Sink's output
and should not affect the rendering of the content.
+ * This is useful for Sinks that emit text-based markup languages (e.g.
HTML, XML, etc.) to produce more human-readable output.
*
- * @param comment The comment to write.
- * @param endsWithLineBreak If {@code true} comment ends with a line
break, i.e. nothing else should follow on the same line
+ * @param indentLevel the indentation level, where 0 means no indentation,
1 means one level of indentation, etc. The sink can decide how many spaces/tabs
to use for each level of indentation.
* @since 2.1.0
*/
- default void comment(String comment, boolean endsWithLineBreak) {
- comment(comment);
- }
+ default void markupLineBreak(int indentLevel) {}
/**
* Add an unknown event. This may be used by parsers to notify a general
Sink about
diff --git a/pom.xml b/pom.xml
index f49fc441..ddc17311 100644
--- a/pom.xml
+++ b/pom.xml
@@ -216,6 +216,7 @@ under the License.
<exclude>src/test/resources/**/*.apt</exclude>
<exclude>src/test/resources/**/*.apt.vm</exclude>
<exclude>src/test/resources/**/*.md</exclude>
+ <exclude>src/test/resources/**/*.html</exclude>
<exclude>src/it/**/site/**/*.md</exclude>
<exclude>src/it/**/site/**/*.markdown</exclude>
</excludes>