This is an automated email from the ASF dual-hosted git repository.

kwin pushed a commit to branch feature/markup-linebreaks
in repository https://gitbox.apache.org/repos/asf/maven-doxia.git

commit 28f656e5bfbd17153e20c661cddd5b3e3a0fe44b
Author: Konrad Windszus <[email protected]>
AuthorDate: Thu Feb 26 09:14:21 2026 +0100

    Distinguish between linebreaks for formatting markup and linebreaks in
    output
    
    Add new Sink method "markupLineBreak" for insignificant linebreaks.
    XhtmlParser detects insignificant linebreaks according to
    https://developer.mozilla.org/en-US/docs/Web/CSS/Guides/Text/Whitespace
    and emits them accordingly. It also collapses whitespaces.
    
    It assumes no CSS overrides for boxed/inline elements and for
    "white-space-collapse"
    
    This closes #882
---
 .../maven/doxia/parser/Xhtml5BaseParser.java       | 811 +++++++++++++--------
 .../apache/maven/doxia/sink/impl/SinkWrapper.java  |   4 +-
 .../maven/doxia/sink/impl/Xhtml5BaseSink.java      |  21 +-
 .../maven/doxia/parser/Xhtml5BaseParserTest.java   |  29 +-
 .../maven/doxia/sink/impl/AbstractSinkTest.java    |   6 +-
 .../doxia/sink/impl/SinkEventTestingSink.java      |   8 +-
 .../apache/maven/doxia/module/apt/AptParser.java   |   4 +-
 .../org/apache/maven/doxia/module/apt/AptSink.java |   5 -
 .../maven/doxia/module/apt/AptParserTest.java      |   6 +-
 .../apache/maven/doxia/module/apt/AptSinkTest.java |   4 +-
 .../maven/doxia/module/markdown/MarkdownSink.java  |  56 +-
 .../doxia/module/markdown/MarkdownParserTest.java  |  28 +-
 .../doxia/module/markdown/MarkdownSinkTest.java    |  20 +
 .../apache/maven/doxia/module/xdoc/XdocParser.java |   4 +-
 .../maven/doxia/module/xdoc/XdocParserTest.java    |  24 +
 .../java/org/apache/maven/doxia/sink/Sink.java     |  12 +-
 16 files changed, 651 insertions(+), 391 deletions(-)

diff --git 
a/doxia-core/src/main/java/org/apache/maven/doxia/parser/Xhtml5BaseParser.java 
b/doxia-core/src/main/java/org/apache/maven/doxia/parser/Xhtml5BaseParser.java
index a2b03c81..e95ba44a 100644
--- 
a/doxia-core/src/main/java/org/apache/maven/doxia/parser/Xhtml5BaseParser.java
+++ 
b/doxia-core/src/main/java/org/apache/maven/doxia/parser/Xhtml5BaseParser.java
@@ -21,6 +21,8 @@ package org.apache.maven.doxia.parser;
 import javax.swing.text.html.HTML.Attribute;
 
 import java.io.Reader;
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.Set;
@@ -109,6 +111,8 @@ public class Xhtml5BaseParser extends AbstractXmlParser 
implements HtmlMarkup {
     /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
     private boolean isLink;
 
+    protected boolean isBeginningOfLineInsideBlock;
+
     /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
     private boolean isAnchor;
 
@@ -192,181 +196,274 @@ public class Xhtml5BaseParser extends AbstractXmlParser 
implements HtmlMarkup {
 
     protected boolean baseStartTag(String elementName, SinkEventAttributeSet 
attribs, Sink sink) {
         boolean visited = true;
-
-        if (elementName.equals(HtmlMarkup.ARTICLE.toString())) {
-            sink.article(attribs);
-        } else if (elementName.equals(HtmlMarkup.NAV.toString())) {
-            sink.navigation(attribs);
-        } else if (elementName.equals(HtmlMarkup.ASIDE.toString())) {
-            sink.sidebar(attribs);
-        } else if (elementName.equals(HtmlMarkup.SECTION.toString())) {
-            handleSectionStart(sink, attribs);
-        } else if (elementName.equals(HtmlMarkup.H1.toString())) {
-            handleHeadingStart(sink, Sink.SECTION_LEVEL_1, attribs);
-        } else if (elementName.equals(HtmlMarkup.H2.toString())) {
-            handleHeadingStart(sink, Sink.SECTION_LEVEL_2, attribs);
-        } else if (elementName.equals(HtmlMarkup.H3.toString())) {
-            handleHeadingStart(sink, Sink.SECTION_LEVEL_3, attribs);
-        } else if (elementName.equals(HtmlMarkup.H4.toString())) {
-            handleHeadingStart(sink, Sink.SECTION_LEVEL_4, attribs);
-        } else if (elementName.equals(HtmlMarkup.H5.toString())) {
-            handleHeadingStart(sink, Sink.SECTION_LEVEL_5, attribs);
-        } else if (elementName.equals(HtmlMarkup.H6.toString())) {
-            handleHeadingStart(sink, Sink.SECTION_LEVEL_6, attribs);
-        } else if (elementName.equals(HtmlMarkup.HEADER.toString())) {
-            sink.header(attribs);
-        } else if (elementName.equals(HtmlMarkup.MAIN.toString())) {
-            sink.content(attribs);
-        } else if (elementName.equals(HtmlMarkup.FOOTER.toString())) {
-            sink.footer(attribs);
-        } else if (elementName.equals(HtmlMarkup.EM.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.EMPHASIS);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.STRONG.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.STRONG);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.SMALL.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.SMALL);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.S.toString())) {
-            
attribs.addAttributes(SinkEventAttributeSet.Semantics.LINE_THROUGH);
-            sink.inline(attribs);
-            /* deprecated line-through support */
-        } else if (elementName.equals(HtmlMarkup.CITE.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.CITATION);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.Q.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.QUOTE);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.DFN.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.DEFINITION);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.ABBR.toString())) {
-            
attribs.addAttributes(SinkEventAttributeSet.Semantics.ABBREVIATION);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.I.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.ITALIC);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.B.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.BOLD);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.CODE.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.CODE);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.VAR.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.VARIABLE);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.SAMP.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.SAMPLE);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.KBD.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.KEYBOARD);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.SUP.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.SUPERSCRIPT);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.SUB.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.SUBSCRIPT);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.U.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.ANNOTATION);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.MARK.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.HIGHLIGHT);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.RUBY.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.RB.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_BASE);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.RT.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_TEXT);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.RTC.toString())) {
-            
attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_TEXT_CONTAINER);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.RP.toString())) {
-            
attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_PARANTHESES);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.BDI.toString())) {
-            
attribs.addAttributes(SinkEventAttributeSet.Semantics.BIDIRECTIONAL_ISOLATION);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.BDO.toString())) {
-            
attribs.addAttributes(SinkEventAttributeSet.Semantics.BIDIRECTIONAL_OVERRIDE);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.SPAN.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.PHRASE);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.INS.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.INSERT);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.DEL.toString())) {
-            attribs.addAttributes(SinkEventAttributeSet.Semantics.DELETE);
-            sink.inline(attribs);
-        } else if (elementName.equals(HtmlMarkup.P.toString())) {
-            handlePStart(sink, attribs);
-        } else if (elementName.equals(HtmlMarkup.DIV.toString())) {
-            handleDivStart(attribs, sink);
-        } else if (elementName.equals(HtmlMarkup.PRE.toString())) {
-            handlePreStart(attribs, sink);
-        } else if (elementName.equals(HtmlMarkup.UL.toString())) {
-            sink.list(attribs);
-        } else if (elementName.equals(HtmlMarkup.OL.toString())) {
-            handleOLStart(sink, attribs);
-        } else if (elementName.equals(HtmlMarkup.LI.toString())) {
-            handleLIStart(sink, attribs);
-        } else if (elementName.equals(HtmlMarkup.DL.toString())) {
-            sink.definitionList(attribs);
-        } else if (elementName.equals(HtmlMarkup.DT.toString())) {
-            if (hasDefinitionListItem) {
-                // close previous listItem
-                sink.definitionListItem_();
-            }
-            sink.definitionListItem(attribs);
-            hasDefinitionListItem = true;
-            sink.definedTerm(attribs);
-        } else if (elementName.equals(HtmlMarkup.DD.toString())) {
-            if (!hasDefinitionListItem) {
+        isBeginningOfLineInsideBlock = true;
+        switch (elementName) {
+            case "article":
+                sink.article(attribs);
+                break;
+            case "nav":
+                sink.navigation(attribs);
+                break;
+            case "aside":
+                sink.sidebar(attribs);
+                break;
+            case "section":
+                handleSectionStart(sink, attribs);
+                break;
+            case "h1":
+                handleHeadingStart(sink, Sink.SECTION_LEVEL_1, attribs);
+                break;
+            case "h2":
+                handleHeadingStart(sink, Sink.SECTION_LEVEL_2, attribs);
+                break;
+            case "h3":
+                handleHeadingStart(sink, Sink.SECTION_LEVEL_3, attribs);
+                break;
+            case "h4":
+                handleHeadingStart(sink, Sink.SECTION_LEVEL_4, attribs);
+                break;
+            case "h5":
+                handleHeadingStart(sink, Sink.SECTION_LEVEL_5, attribs);
+                break;
+            case "h6":
+                handleHeadingStart(sink, Sink.SECTION_LEVEL_6, attribs);
+                break;
+            case "header":
+                sink.header(attribs);
+                break;
+            case "main":
+                sink.content(attribs);
+                break;
+            case "footer":
+                sink.footer(attribs);
+                break;
+            case "em":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.EMPHASIS);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "strong":
+                attribs.addAttributes(SinkEventAttributeSet.Semantics.STRONG);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "small":
+                attribs.addAttributes(SinkEventAttributeSet.Semantics.SMALL);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "s":
+                /* deprecated line-through support */
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.LINE_THROUGH);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "cite":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.CITATION);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "q":
+                attribs.addAttributes(SinkEventAttributeSet.Semantics.QUOTE);
+                sink.inline(attribs);
+                break;
+            case "dfn":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.DEFINITION);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "abbr":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.ABBREVIATION);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "i":
+                attribs.addAttributes(SinkEventAttributeSet.Semantics.ITALIC);
+                sink.inline(attribs);
+                break;
+            case "b":
+                attribs.addAttributes(SinkEventAttributeSet.Semantics.BOLD);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "code":
+                attribs.addAttributes(SinkEventAttributeSet.Semantics.CODE);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "var":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.VARIABLE);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "samp":
+                attribs.addAttributes(SinkEventAttributeSet.Semantics.SAMPLE);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "kbd":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.KEYBOARD);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "sup":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.SUPERSCRIPT);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "sub":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.SUBSCRIPT);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "u":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.ANNOTATION);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "mark":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.HIGHLIGHT);
+                sink.inline(attribs);
+                break;
+            case "ruby":
+                attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "rb":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_BASE);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "rt":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_TEXT);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "rtc":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_TEXT_CONTAINER);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "rp":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.RUBY_PARANTHESES);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "bdi":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.BIDIRECTIONAL_ISOLATION);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "bdo":
+                
attribs.addAttributes(SinkEventAttributeSet.Semantics.BIDIRECTIONAL_OVERRIDE);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "span":
+                attribs.addAttributes(SinkEventAttributeSet.Semantics.PHRASE);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "ins":
+                attribs.addAttributes(SinkEventAttributeSet.Semantics.INSERT);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "del":
+                attribs.addAttributes(SinkEventAttributeSet.Semantics.DELETE);
+                sink.inline(attribs);
+                isBeginningOfLineInsideBlock = false;
+                break;
+            case "p":
+                handlePStart(sink, attribs);
+                break;
+            case "div":
+                handleDivStart(attribs, sink);
+                break;
+            case "pre":
+                handlePreStart(attribs, sink);
+                break;
+            case "ul":
+                sink.list(attribs);
+                break;
+            case "ol":
+                handleOLStart(sink, attribs);
+                break;
+            case "li":
+                handleLIStart(sink, attribs);
+                break;
+            case "dl":
+                sink.definitionList(attribs);
+                break;
+            case "dt":
+                if (hasDefinitionListItem) {
+                    // close previous listItem
+                    sink.definitionListItem_();
+                }
                 sink.definitionListItem(attribs);
-            }
-            sink.definition(attribs);
-        } else if (elementName.equals(HtmlMarkup.FIGURE.toString())) {
-            sink.figure(attribs);
-        } else if (elementName.equals(HtmlMarkup.FIGCAPTION.toString())) {
-            sink.figureCaption(attribs);
-        } else if (elementName.equals(HtmlMarkup.A.toString())) {
-            handleAStart(sink, attribs);
-        } else if (elementName.equals(HtmlMarkup.TABLE.toString())) {
-            handleTableStart(sink, attribs);
-        } else if (elementName.equals(HtmlMarkup.TR.toString())) {
-            sink.tableRow(attribs);
-        } else if (elementName.equals(HtmlMarkup.TH.toString())) {
-            sink.tableHeaderCell(attribs);
-        } else if (elementName.equals(HtmlMarkup.TD.toString())) {
-            sink.tableCell(attribs);
-        } else if (elementName.equals(HtmlMarkup.CAPTION.toString())) {
-            sink.tableCaption(attribs);
-        } else if (elementName.equals(HtmlMarkup.BR.toString())) {
-            sink.lineBreak(attribs);
-        } else if (elementName.equals(HtmlMarkup.WBR.toString())) {
-            sink.lineBreakOpportunity(attribs);
-        } else if (elementName.equals(HtmlMarkup.HR.toString())) {
-            sink.horizontalRule(attribs);
-        } else if (elementName.equals(HtmlMarkup.IMG.toString())) {
-            handleImgStart(sink, attribs);
-        } else if (elementName.equals(HtmlMarkup.BLOCKQUOTE.toString())) {
-            sink.blockquote(attribs);
-        } else if (UNMATCHED_XHTML5_ELEMENTS.contains(elementName)) {
-            handleUnknown(elementName, attribs, sink, TAG_TYPE_START);
-        } else if (UNMATCHED_XHTML5_SIMPLE_ELEMENTS.contains(elementName)) {
-            handleUnknown(elementName, attribs, sink, TAG_TYPE_SIMPLE);
-        } else if (elementName.equals(HtmlMarkup.SCRIPT.toString())
-                || elementName.equals(HtmlMarkup.STYLE.toString())) {
-            handleUnknown(elementName, attribs, sink, TAG_TYPE_START);
-            scriptBlock = true;
-        } else {
-            visited = false;
+                hasDefinitionListItem = true;
+                sink.definedTerm(attribs);
+                break;
+            case "dd":
+                if (!hasDefinitionListItem) {
+                    sink.definitionListItem(attribs);
+                }
+                sink.definition(attribs);
+                break;
+            case "figure":
+                sink.figure(attribs);
+                break;
+            case "figcaption":
+                sink.figureCaption(attribs);
+                break;
+            case "a":
+                handleAStart(sink, attribs);
+                break;
+            case "table":
+                handleTableStart(sink, attribs);
+                break;
+            case "tr":
+                sink.tableRow(attribs);
+                break;
+            case "th":
+                sink.tableHeaderCell(attribs);
+                break;
+            case "td":
+                sink.tableCell(attribs);
+                break;
+            case "caption":
+                sink.tableCaption(attribs);
+                break;
+            case "br":
+                sink.lineBreak(attribs);
+                break;
+            case "wbr":
+                sink.lineBreakOpportunity(attribs);
+                break;
+            case "hr":
+                sink.horizontalRule(attribs);
+                break;
+            case "img":
+                handleImgStart(sink, attribs);
+                break;
+            case "blockquote":
+                sink.blockquote(attribs);
+                break;
+            case "script":
+            case "style":
+                handleUnknown(elementName, attribs, sink, TAG_TYPE_START);
+                scriptBlock = true;
+                break;
+            default:
+                if (UNMATCHED_XHTML5_ELEMENTS.contains(elementName)) {
+                    handleUnknown(elementName, attribs, sink, TAG_TYPE_START);
+                } else if 
(UNMATCHED_XHTML5_SIMPLE_ELEMENTS.contains(elementName)) {
+                    handleUnknown(elementName, attribs, sink, TAG_TYPE_SIMPLE);
+                } else {
+                    visited = false;
+                }
+                break;
         }
 
         return visited;
@@ -391,150 +488,159 @@ public class Xhtml5BaseParser extends AbstractXmlParser 
implements HtmlMarkup {
 
     protected boolean baseEndTag(String elementName, SinkEventAttributeSet 
attribs, Sink sink) {
         boolean visited = true;
-
-        if (elementName.equals(HtmlMarkup.P.toString())) {
-            sink.paragraph_();
-        } else if (elementName.equals(HtmlMarkup.DIV.toString())) {
-            handleDivEnd(sink);
-        } else if (elementName.equals(HtmlMarkup.PRE.toString())) {
-            verbatim_();
-
-            sink.verbatim_();
-        } else if (elementName.equals(HtmlMarkup.UL.toString())) {
-            sink.list_();
-        } else if (elementName.equals(HtmlMarkup.OL.toString())) {
-            sink.numberedList_();
-            orderedListDepth--;
-        } else if (elementName.equals(HtmlMarkup.LI.toString())) {
-            handleListItemEnd(sink);
-        } else if (elementName.equals(HtmlMarkup.DL.toString())) {
-            if (hasDefinitionListItem) {
+        isBeginningOfLineInsideBlock = true;
+
+        switch (elementName) {
+            case "p":
+                sink.paragraph_();
+                break;
+            case "div":
+                handleDivEnd(sink);
+                break;
+            case "pre":
+                verbatim_();
+                sink.verbatim_();
+                break;
+            case "ul":
+                sink.list_();
+                break;
+            case "ol":
+                sink.numberedList_();
+                orderedListDepth--;
+                break;
+            case "li":
+                handleListItemEnd(sink);
+                break;
+            case "dl":
+                if (hasDefinitionListItem) {
+                    sink.definitionListItem_();
+                    hasDefinitionListItem = false;
+                }
+                sink.definitionList_();
+                break;
+            case "dt":
+                sink.definedTerm_();
+                break;
+            case "dd":
+                sink.definition_();
                 sink.definitionListItem_();
                 hasDefinitionListItem = false;
-            }
-            sink.definitionList_();
-        } else if (elementName.equals(HtmlMarkup.DT.toString())) {
-            sink.definedTerm_();
-        } else if (elementName.equals(HtmlMarkup.DD.toString())) {
-            sink.definition_();
-            sink.definitionListItem_();
-            hasDefinitionListItem = false;
-        } else if (elementName.equals(HtmlMarkup.FIGURE.toString())) {
-            sink.figure_();
-        } else if (elementName.equals(HtmlMarkup.FIGCAPTION.toString())) {
-            sink.figureCaption_();
-        } else if (elementName.equals(HtmlMarkup.A.toString())) {
-            handleAEnd(sink);
-        } else if (elementName.equals(HtmlMarkup.EM.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.STRONG.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.SMALL.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.S.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.CITE.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.Q.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.DFN.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.ABBR.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.I.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.B.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.CODE.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.VAR.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.SAMP.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.KBD.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.SUP.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.SUB.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.U.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.MARK.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.RUBY.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.RB.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.RT.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.RTC.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.RP.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.BDI.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.BDO.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.SPAN.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.INS.toString())) {
-            sink.inline_();
-        } else if (elementName.equals(HtmlMarkup.DEL.toString())) {
-            sink.inline_();
-        }
-
-        // 
----------------------------------------------------------------------
-        // Tables
-        // 
----------------------------------------------------------------------
-
-        else if (elementName.equals(HtmlMarkup.TABLE.toString())) {
-            sink.tableRows_();
-            sink.table_();
-        } else if (elementName.equals(HtmlMarkup.TR.toString())) {
-            sink.tableRow_();
-        } else if (elementName.equals(HtmlMarkup.TH.toString())) {
-            sink.tableHeaderCell_();
-        } else if (elementName.equals(HtmlMarkup.TD.toString())) {
-            sink.tableCell_();
-        } else if (elementName.equals(HtmlMarkup.CAPTION.toString())) {
-            sink.tableCaption_();
-        } else if (elementName.equals(HtmlMarkup.ARTICLE.toString())) {
-            sink.article_();
-        } else if (elementName.equals(HtmlMarkup.NAV.toString())) {
-            sink.navigation_();
-        } else if (elementName.equals(HtmlMarkup.ASIDE.toString())) {
-            sink.sidebar_();
-        } else if (elementName.equals(HtmlMarkup.SECTION.toString())) {
-            handleSectionEnd(sink);
-        } else if (elementName.equals(HtmlMarkup.H1.toString())) {
-            sink.sectionTitle1_();
-        } else if (elementName.equals(HtmlMarkup.H2.toString())) {
-            sink.sectionTitle2_();
-        } else if (elementName.equals(HtmlMarkup.H3.toString())) {
-            sink.sectionTitle3_();
-        } else if (elementName.equals(HtmlMarkup.H4.toString())) {
-            sink.sectionTitle4_();
-        } else if (elementName.equals(HtmlMarkup.H5.toString())) {
-            sink.sectionTitle5_();
-        } else if (elementName.equals(HtmlMarkup.H6.toString())) {
-            sink.sectionTitle6_();
-        } else if (elementName.equals(HtmlMarkup.HEADER.toString())) {
-            sink.header_();
-        } else if (elementName.equals(HtmlMarkup.MAIN.toString())) {
-            sink.content_();
-        } else if (elementName.equals(HtmlMarkup.FOOTER.toString())) {
-            sink.footer_();
-        } else if (elementName.equals(HtmlMarkup.BLOCKQUOTE.toString())) {
-            sink.blockquote_();
-        } else if (UNMATCHED_XHTML5_ELEMENTS.contains(elementName)) {
-            handleUnknown(elementName, attribs, sink, TAG_TYPE_END);
-        } else if (elementName.equals(HtmlMarkup.SCRIPT.toString())
-                || elementName.equals(HtmlMarkup.STYLE.toString())) {
-            handleUnknown(elementName, attribs, sink, TAG_TYPE_END);
-
-            scriptBlock = false;
-        } else {
-            visited = false;
+                break;
+            case "figure":
+                sink.figure_();
+                break;
+            case "figcaption":
+                sink.figureCaption_();
+                break;
+            case "a":
+                handleAEnd(sink);
+                break;
+            case "em":
+            case "strong":
+            case "small":
+            case "s":
+            case "cite":
+            case "q":
+            case "dfn":
+            case "abbr":
+            case "i":
+            case "b":
+            case "code":
+            case "var":
+            case "samp":
+            case "kbd":
+            case "sup":
+            case "sub":
+            case "u":
+            case "mark":
+            case "ruby":
+            case "rb":
+            case "rt":
+            case "rtc":
+            case "rp":
+            case "bdi":
+            case "bdo":
+            case "span":
+            case "ins":
+            case "del":
+                sink.inline_();
+                isBeginningOfLineInsideBlock = false;
+                break;
+
+            // 
----------------------------------------------------------------------
+            // Tables
+            // 
----------------------------------------------------------------------
+
+            case "table":
+                sink.tableRows_();
+                sink.table_();
+                break;
+            case "tr":
+                sink.tableRow_();
+                break;
+            case "th":
+                sink.tableHeaderCell_();
+                break;
+            case "td":
+                sink.tableCell_();
+                break;
+            case "caption":
+                sink.tableCaption_();
+                break;
+            case "article":
+                sink.article_();
+                break;
+            case "nav":
+                sink.navigation_();
+                break;
+            case "aside":
+                sink.sidebar_();
+                break;
+            case "section":
+                handleSectionEnd(sink);
+                break;
+            case "h1":
+                sink.sectionTitle1_();
+                break;
+            case "h2":
+                sink.sectionTitle2_();
+                break;
+            case "h3":
+                sink.sectionTitle3_();
+                break;
+            case "h4":
+                sink.sectionTitle4_();
+                break;
+            case "h5":
+                sink.sectionTitle5_();
+                break;
+            case "h6":
+                sink.sectionTitle6_();
+                break;
+            case "header":
+                sink.header_();
+                break;
+            case "main":
+                sink.content_();
+                break;
+            case "footer":
+                sink.footer_();
+                break;
+            case "blockquote":
+                sink.blockquote_();
+                break;
+            case "script":
+            case "style":
+                handleUnknown(elementName, attribs, sink, TAG_TYPE_END);
+                scriptBlock = false;
+                break;
+            default:
+                if (UNMATCHED_XHTML5_ELEMENTS.contains(elementName)) {
+                    handleUnknown(elementName, attribs, sink, TAG_TYPE_END);
+                } else {
+                    visited = false;
+                }
+                break;
         }
 
         return visited;
@@ -574,19 +680,79 @@ public class Xhtml5BaseParser extends AbstractXmlParser 
implements HtmlMarkup {
     protected void handleText(XmlPullParser parser, Sink sink) throws 
XmlPullParserException {
         String text = getText(parser);
 
-        /*
-         * NOTE: Don't do any whitespace trimming here. Whitespace 
normalization has already been performed by the
-         * parser so any whitespace that makes it here is significant.
-         *
-         * NOTE: text within script tags is ignored, scripting code should be 
embedded in CDATA.
-         */
+        if (!inVerbatim && text != null) {
+            // do special whitespace processing as outlined in
+            // 
https://developer.mozilla.org/en-US/docs/Web/CSS/Guides/Text/Whitespace
+            if (isBeginningOfLineInsideBlock) {
+                // normalize linebreaks
+                processInsignificantLineBreaks(sink, text);
+                // trim leading whitespace from text being emitted
+                // 
https://developer.mozilla.org/en-US/docs/Web/CSS/Guides/Text/Whitespace#trimming_and_positioning
+                String regex = "^\\s+";
+                text = text.replaceAll(regex, "");
+            }
+
+            // assume white-space-collapse: collapse for all non-verbatim text 
(outside of <pre>)
+            text = collapseWhitespace(text);
+        }
         if ((text != null && !text.isEmpty()) && !isScriptBlock()) {
             sink.text(text);
         }
+        isBeginningOfLineInsideBlock = false;
+    }
+
+    /**
+     * Process all line-breaks in the given text which are not significant for 
the output, i.e. all line-breaks which are not within a verbatim block and
+     * are at the beginning of the given text.
+     * In addition it emits information about the whitespace characters 
following the line-breaks as they may be relevant for the output (e.g. for 
indentation).
+     *
+     * @param sink the sink to receive the events.
+     * @param text the text to process.
+     */
+    protected void processInsignificantLineBreaks(Sink sink, String text) {
+        CharacterIterator it = new 
StringCharacterIterator(text.replaceAll("\\r\\n?", "\n"));
+
+        boolean wasNewLine = false;
+        int indentLevel = 0;
+        //
+        while (it.current() != CharacterIterator.DONE) {
+            char c = it.current();
+            if (c == '\n') {
+                if (wasNewLine) {
+                    sink.markupLineBreak(indentLevel);
+                }
+                indentLevel = 0;
+                wasNewLine = true;
+            } else if (Character.isWhitespace(c)) {
+                indentLevel++;
+            } else {
+                // once non-whitespace character is reached we assume 
everything following is relevant and emitted
+                // within the text event
+                break;
+            }
+            it.next();
+        }
+        if (wasNewLine) {
+            // if the text ends with a newline, we need to emit the last line 
break
+            sink.markupLineBreak(indentLevel);
+        }
+    }
+
+    /**
+     * @see <a 
href="https://developer.mozilla.org/en-US/docs/Web/CSS/Guides/Text/Whitespace#how_does_css_process_whitespace";>How
 does CSS process whitespace?</a>
+     * @see <a 
href="https://drafts.csswg.org/css-text-4/#white-space-processing";>CSS Text 
Module Level 4 - White Space Processing</a>
+     *
+     * @param text
+     * @return
+     */
+    private static String collapseWhitespace(String text) {
+        // replace all sequences of whitespace characters with a single space 
(this includes newlines, tabs, etc.)
+        return text.replaceAll("\\s+", " ");
     }
 
     @Override
     protected void handleComment(XmlPullParser parser, Sink sink) throws 
XmlPullParserException {
+        isBeginningOfLineInsideBlock = false;
         String text = getText(parser);
 
         if ("PB".equals(text.trim())) {
@@ -600,6 +766,7 @@ public class Xhtml5BaseParser extends AbstractXmlParser 
implements HtmlMarkup {
 
     @Override
     protected void handleCdsect(XmlPullParser parser, Sink sink) throws 
XmlPullParserException {
+        isBeginningOfLineInsideBlock = false;
         String text = getText(parser);
 
         if (isScriptBlock()) {
diff --git 
a/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/SinkWrapper.java 
b/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/SinkWrapper.java
index b8c9fff7..d82d3423 100644
--- a/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/SinkWrapper.java
+++ b/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/SinkWrapper.java
@@ -514,8 +514,8 @@ public class SinkWrapper extends AbstractSink {
     }
 
     @Override
-    public void comment(String comment, boolean endsWithLineBreak) {
-        delegate.comment(comment, endsWithLineBreak);
+    public void markupLineBreak(int indentLevel) {
+        delegate.markupLineBreak(indentLevel);
     }
 
     @Override
diff --git 
a/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/Xhtml5BaseSink.java 
b/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/Xhtml5BaseSink.java
index c4c66afa..9a5e88dc 100644
--- 
a/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/Xhtml5BaseSink.java
+++ 
b/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/Xhtml5BaseSink.java
@@ -1538,17 +1538,12 @@ public class Xhtml5BaseSink extends AbstractXmlSink 
implements HtmlMarkup {
 
     @Override
     public void comment(String comment) {
-        comment(comment, false);
-    }
-
-    @Override
-    public void comment(String comment, boolean endsWithLineBreak) {
         if (comment != null) {
-            write(encodeAsHtmlComment(comment, endsWithLineBreak, 
getLocationLogPrefix()));
+            write(encodeAsHtmlComment(comment, getLocationLogPrefix()));
         }
     }
 
-    public static String encodeAsHtmlComment(String comment, boolean 
endsWithLineBreak, String locationLogPrefix) {
+    public static String encodeAsHtmlComment(String comment, String 
locationLogPrefix) {
         final String originalComment = comment;
 
         // http://www.w3.org/TR/2000/REC-xml-20001006#sec-comments
@@ -1569,12 +1564,18 @@ public class Xhtml5BaseSink extends AbstractXmlSink 
implements HtmlMarkup {
         buffer.append(LESS_THAN).append(BANG).append(MINUS).append(MINUS);
         buffer.append(comment);
         buffer.append(MINUS).append(MINUS).append(GREATER_THAN);
-        if (endsWithLineBreak) {
-            buffer.append(EOL);
-        }
         return buffer.toString();
     }
 
+    @Override
+    public void markupLineBreak(int indentLevel) {
+        if (headFlag) {
+            getTextBuffer().append(EOL);
+        } else {
+            write(EOL);
+        }
+    }
+
     /**
      * {@inheritDoc}
      *
diff --git 
a/doxia-core/src/test/java/org/apache/maven/doxia/parser/Xhtml5BaseParserTest.java
 
b/doxia-core/src/test/java/org/apache/maven/doxia/parser/Xhtml5BaseParserTest.java
index b29b8e3d..54fc5178 100644
--- 
a/doxia-core/src/test/java/org/apache/maven/doxia/parser/Xhtml5BaseParserTest.java
+++ 
b/doxia-core/src/test/java/org/apache/maven/doxia/parser/Xhtml5BaseParserTest.java
@@ -280,8 +280,8 @@ class Xhtml5BaseParserTest extends AbstractParserTest {
 
         el = it.next();
         assertEquals("text", el.getName());
-        // according to section 2.11 of the XML spec, parsers must normalize 
line breaks to "\n"
-        assertEquals("\n", (String) el.getArgs()[0]);
+        // the EOL must be normalized to a single space, as per the HTML spec
+        assertEquals(" ", (String) el.getArgs()[0]);
 
         assertEquals("inline", it.next().getName());
         assertEquals("text", it.next().getName());
@@ -366,6 +366,31 @@ class Xhtml5BaseParserTest extends AbstractParserTest {
         assertEquals("verbatim_", it.next().getName());
     }
 
+    @Test
+    void listWithInsignificantLineBreaks() throws Exception {
+        // test EOLs within lists (those don't have significance and should 
not be reported as text events, but as
+        // markupLineBreak with the according indent level)
+        String text = "<ul>" + Xhtml5BaseParser.EOL + "  <li>One</li> "
+                + Xhtml5BaseParser.EOL + "  <li>Two</li>   "
+                + Xhtml5BaseParser.EOL + "</ul>";
+
+        parser.parse(text, sink);
+
+        Iterator<SinkEventElement> it = sink.getEventList().iterator();
+
+        assertEquals("list", it.next().getName());
+        assertSinkEquals(it.next(), "markupLineBreak", new Object[] {2});
+        assertEquals("listItem", it.next().getName());
+        assertEquals("text", it.next().getName());
+        assertEquals("listItem_", it.next().getName());
+        assertSinkEquals(it.next(), "markupLineBreak", new Object[] {2});
+        assertEquals("listItem", it.next().getName());
+        assertEquals("text", it.next().getName());
+        assertEquals("listItem_", it.next().getName());
+        assertSinkEquals(it.next(), "markupLineBreak", new Object[] {0});
+        assertEquals("list_", it.next().getName());
+    }
+
     @Test
     void doxia250() throws Exception {
         StringBuilder sb = new StringBuilder();
diff --git 
a/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/AbstractSinkTest.java
 
b/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/AbstractSinkTest.java
index 022f80d0..424e52bf 100644
--- 
a/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/AbstractSinkTest.java
+++ 
b/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/AbstractSinkTest.java
@@ -1204,8 +1204,10 @@ public abstract class AbstractSinkTest extends 
AbstractModuleTest {
     @Test
     public void twoConsecutiveBlockComments() {
         String comment = "Simple comment";
-        sink.comment(comment, true);
-        sink.comment(comment, true);
+        sink.comment(comment);
+        sink.markupLineBreak(0);
+        sink.comment(comment);
+        sink.markupLineBreak(0);
         sink.flush();
         sink.close();
         assertEquals(getCommentBlock(comment) + EOL + getCommentBlock(comment) 
+ EOL, testWriter.toString());
diff --git 
a/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/SinkEventTestingSink.java
 
b/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/SinkEventTestingSink.java
index 3a35f8fd..714c8ebf 100644
--- 
a/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/SinkEventTestingSink.java
+++ 
b/doxia-core/src/test/java/org/apache/maven/doxia/sink/impl/SinkEventTestingSink.java
@@ -288,13 +288,13 @@ public class SinkEventTestingSink extends AbstractSink {
     }
 
     @Override
-    public void comment(String comment, boolean endsWithLineBreak) {
-        addEvent("comment", new Object[] {comment, endsWithLineBreak});
+    public void comment(String comment) {
+        addEvent("comment", new Object[] {comment});
     }
 
     @Override
-    public void comment(String comment) {
-        addEvent("comment", new Object[] {comment});
+    public void markupLineBreak(int indentLevel) {
+        addEvent("markupLineBreak", new Object[] {indentLevel});
     }
 
     @Override
diff --git 
a/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptParser.java
 
b/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptParser.java
index 7f03b19d..aed398ba 100644
--- 
a/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptParser.java
+++ 
b/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptParser.java
@@ -1838,7 +1838,9 @@ public class AptParser extends AbstractTextParser 
implements AptMarkup {
 
         public void traverse() throws AptParseException {
             if (isEmitComments()) {
-                AptParser.this.sink.comment(text, true);
+                AptParser.this.sink.comment(text);
+                // APT comments always end with a line break
+                AptParser.this.sink.markupLineBreak(0);
             }
         }
     }
diff --git 
a/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptSink.java
 
b/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptSink.java
index dab9a813..47a5a255 100644
--- 
a/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptSink.java
+++ 
b/doxia-modules/doxia-module-apt/src/main/java/org/apache/maven/doxia/module/apt/AptSink.java
@@ -834,11 +834,6 @@ public class AptSink extends AbstractTextSink implements 
AptMarkup {
     }
 
     public void comment(String comment) {
-        comment(comment, false);
-    }
-
-    @Override
-    public void comment(String comment, boolean endsWithLineBreak) {
         rawText("" + COMMENT + COMMENT + comment + EOL); // comments always 
end with a line break in APT
     }
 
diff --git 
a/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptParserTest.java
 
b/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptParserTest.java
index 63b76d4d..fbf0620f 100644
--- 
a/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptParserTest.java
+++ 
b/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptParserTest.java
@@ -106,8 +106,10 @@ class AptParserTest extends AbstractParserTest {
                 "paragraph",
                 "text",
                 "paragraph_");
-        assertSinkEquals(it.next(), "comment", "some comment", Boolean.TRUE);
-        assertSinkEquals(it.next(), "comment", "another comment", 
Boolean.TRUE);
+        assertSinkEquals(it.next(), "comment", "some comment");
+        assertSinkEquals(it.next(), "markupLineBreak", 0);
+        assertSinkEquals(it.next(), "comment", "another comment");
+        assertSinkEquals(it.next(), "markupLineBreak", 0);
         assertSinkEquals(it, "paragraph", "text", "paragraph_", "section1_", 
"body_");
     }
 
diff --git 
a/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptSinkTest.java
 
b/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptSinkTest.java
index bbad9639..13d32fbe 100644
--- 
a/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptSinkTest.java
+++ 
b/doxia-modules/doxia-module-apt/src/test/java/org/apache/maven/doxia/module/apt/AptSinkTest.java
@@ -296,8 +296,8 @@ class AptSinkTest extends AbstractSinkTest {
     public void twoConsecutiveBlockComments() {
         final Sink sink = getSink();
         String comment = "Simple comment";
-        sink.comment(comment, true);
-        sink.comment(comment, true);
+        sink.comment(comment);
+        sink.comment(comment);
         sink.flush();
         sink.close();
         assertEquals(getCommentBlock(comment) + getCommentBlock(comment), 
getSinkContent(), "Wrong comment!");
diff --git 
a/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownSink.java
 
b/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownSink.java
index 88c987a0..c9896c3e 100644
--- 
a/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownSink.java
+++ 
b/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownSink.java
@@ -107,23 +107,24 @@ public class MarkdownSink extends Xhtml5BaseSink 
implements MarkdownMarkup {
     }
     /** Most important contextual metadata (of elements). This contains 
information about necessary escaping rules, potential prefixes and newlines */
     enum ElementContext {
-        HEAD(Type.GENERIC_CONTAINER, null, true),
-        BODY(Type.GENERIC_CONTAINER, ElementContext::escapeMarkdown),
+        HEAD(Type.GENERIC_CONTAINER, false, null, true),
+        BODY(Type.GENERIC_CONTAINER, true, ElementContext::escapeMarkdown),
         // only the elements, which affect rendering of children and are 
different from BODY or HEAD are listed here
-        FIGURE(Type.INLINE, ElementContext::escapeMarkdown, true),
-        HEADING(Type.LEAF_BLOCK, ElementContext::escapeMarkdown),
-        CODE_BLOCK(Type.LEAF_BLOCK, null),
-        CODE_SPAN(Type.INLINE, null, true),
-        TABLE_CAPTION(Type.INLINE, ElementContext::escapeMarkdown),
-        TABLE_ROW(Type.CONTAINER_BLOCK, null, true),
+        FIGURE(Type.INLINE, false, ElementContext::escapeMarkdown, true),
+        HEADING(Type.LEAF_BLOCK, false, ElementContext::escapeMarkdown),
+        CODE_BLOCK(Type.LEAF_BLOCK, false, null),
+        CODE_SPAN(Type.INLINE, false, null, true),
+        TABLE_CAPTION(Type.INLINE, false, ElementContext::escapeMarkdown),
+        TABLE_ROW(Type.CONTAINER_BLOCK, false, null, true),
         TABLE_CELL(
                 Type.LEAF_BLOCK,
+                false,
                 ElementContext::escapeForTableCell,
                 false), // special type, as allows containing inlines, but not 
starting on a separate line
         // same parameters as BODY but paragraphs inside list items are 
handled differently
-        LIST_ITEM(Type.CONTAINER_BLOCK, ElementContext::escapeMarkdown, false, 
INDENT),
-        BLOCKQUOTE(Type.CONTAINER_BLOCK, ElementContext::escapeMarkdown, 
false, BLOCKQUOTE_START_MARKUP),
-        HTML_BLOCK(Type.LEAF_BLOCK, ElementContext::escapeHtml, false, "", 
true);
+        LIST_ITEM(Type.CONTAINER_BLOCK, false, ElementContext::escapeMarkdown, 
false, INDENT),
+        BLOCKQUOTE(Type.CONTAINER_BLOCK, false, 
ElementContext::escapeMarkdown, false, BLOCKQUOTE_START_MARKUP),
+        HTML_BLOCK(Type.LEAF_BLOCK, true, ElementContext::escapeHtml, false, 
"", true);
 
         /**
          * @see <a 
href="https://spec.commonmark.org/0.30/#blocks-and-inlines";>CommonMark, 3 
Blocks and inlines</a>
@@ -170,26 +171,34 @@ public class MarkdownSink extends Xhtml5BaseSink 
implements MarkdownMarkup {
          * Only relevant for block element, if set to {@code true} the element 
requires to be surrounded by blank lines.
          */
         final boolean requiresSurroundingByBlankLines;
+        
+        /**
+         * If markup linebreaks (i.e. insignificant linebreaks in the source) 
are allowed in this context. 
+         * This is relevant for markdown as in some contexts (e.g. list items) 
linebreaks are always significant (while for HTML they wouldn't be)
+         */
+        final boolean allowsMarkupLinebreaks;
 
-        ElementContext(Type type, TextEscapeFunction escapeFunction) {
-            this(type, escapeFunction, false);
+        ElementContext(Type type, boolean allowsMarkupLinebreaks, 
TextEscapeFunction escapeFunction) {
+            this(type, allowsMarkupLinebreaks, escapeFunction, false);
         }
 
-        ElementContext(Type type, TextEscapeFunction escapeFunction, boolean 
requiresBuffering) {
-            this(type, escapeFunction, requiresBuffering, "");
+        ElementContext(Type type, boolean allowsMarkupLinebreaks, 
TextEscapeFunction escapeFunction, boolean requiresBuffering) {
+            this(type, allowsMarkupLinebreaks, escapeFunction, 
requiresBuffering, "");
         }
 
-        ElementContext(Type type, TextEscapeFunction escapeFunction, boolean 
requiresBuffering, String prefix) {
-            this(type, escapeFunction, requiresBuffering, prefix, false);
+        ElementContext(Type type, boolean allowsMarkupLinebreaks, 
TextEscapeFunction escapeFunction, boolean requiresBuffering, String prefix) {
+            this(type, allowsMarkupLinebreaks, escapeFunction, 
requiresBuffering, prefix, false);
         }
 
         ElementContext(
                 Type type,
+                boolean allowsMarkupLinebreaks,
                 TextEscapeFunction escapeFunction,
                 boolean requiresBuffering,
                 String prefix,
                 boolean requiresSurroundingByBlankLines) {
             this.type = type;
+            this.allowsMarkupLinebreaks = allowsMarkupLinebreaks;
             this.escapeFunction = escapeFunction;
             this.requiresBuffering = requiresBuffering;
             if (type != Type.CONTAINER_BLOCK && prefix.length() != 0) {
@@ -236,6 +245,11 @@ public class MarkdownSink extends Xhtml5BaseSink 
implements MarkdownMarkup {
             return type == Type.CONTAINER_BLOCK || type == 
Type.GENERIC_CONTAINER;
         }
 
+        
+        public boolean isAllowsMarkupLinebreaks() {
+            return allowsMarkupLinebreaks;
+        }
+
         /**
          * First use XML escaping (leveraging the predefined entities, for 
browsers)
          * afterwards escape special characters in a text with a leading 
backslash (for markdown parsers)
@@ -1271,6 +1285,14 @@ public class MarkdownSink extends Xhtml5BaseSink 
implements MarkdownMarkup {
         LOGGER.warn("{}Unknown Sink event '" + name + "', ignoring!", 
getLocationLogPrefix());
     }
 
+    @Override
+    public void markupLineBreak(int indentLevel) {
+        // not allowed in all contexts
+        if (elementContextStack.element().isAllowsMarkupLinebreaks()) {
+            super.markupLineBreak(indentLevel);
+        }
+    }
+
     protected void writeUnescaped(String text) {
         StringBuilder buffer = bufferStack.peek();
         if (buffer != null) {
diff --git 
a/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownParserTest.java
 
b/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownParserTest.java
index b3cefb07..817855e0 100644
--- 
a/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownParserTest.java
+++ 
b/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownParserTest.java
@@ -165,7 +165,7 @@ class MarkdownParserTest extends AbstractParserTest {
                 "paragraph",
                 "text",
                 "paragraph_",
-                "text",
+                "markupLineBreak",
                 "verbatim",
                 "inline",
                 "text",
@@ -195,7 +195,7 @@ class MarkdownParserTest extends AbstractParserTest {
                 "paragraph",
                 "text",
                 "paragraph_",
-                "text",
+                "markupLineBreak",
                 "verbatim",
                 "inline",
                 "text",
@@ -325,14 +325,14 @@ class MarkdownParserTest extends AbstractParserTest {
                 "head_",
                 "body",
                 "list",
-                "text",
+                "markupLineBreak",
                 "listItem",
                 "text",
                 "listItem_",
                 "listItem",
                 "text",
                 "listItem_",
-                "text",
+                "markupLineBreak",
                 "list_",
                 "body_");
 
@@ -355,14 +355,14 @@ class MarkdownParserTest extends AbstractParserTest {
                 "head_",
                 "body",
                 "numberedList",
-                "text",
+                "markupLineBreak",
                 "numberedListItem",
                 "text",
                 "numberedListItem_",
                 "numberedListItem",
                 "text",
                 "numberedListItem_",
-                "text",
+                "markupLineBreak",
                 "numberedList_",
                 "body_");
 
@@ -601,7 +601,7 @@ class MarkdownParserTest extends AbstractParserTest {
                 "head_",
                 "body",
                 "division",
-                "text",
+                "markupLineBreak",
                 "paragraph",
                 "inline",
                 "text",
@@ -612,9 +612,9 @@ class MarkdownParserTest extends AbstractParserTest {
                 "inline_",
                 "text",
                 "paragraph_",
-                "text",
+                "markupLineBreak",
                 "division_",
-                "text",
+                "markupLineBreak",
                 "horizontalRule",
                 "section1",
                 "sectionTitle1",
@@ -623,27 +623,27 @@ class MarkdownParserTest extends AbstractParserTest {
                 "paragraph",
                 "text",
                 "paragraph_",
-                "text",
+                "markupLineBreak",
                 "table",
                 "tableRows",
-                "text",
+                "markupLineBreak",
                 "unknown", // tbody start
                 "tableRow",
                 "tableHeaderCell",
                 "text",
                 "tableHeaderCell_",
                 "tableRow_",
-                "text",
+                "markupLineBreak",
                 "tableRow",
                 "tableCell",
                 "text",
                 "tableCell_",
                 "tableRow_",
-                "text",
+                "markupLineBreak",
                 "unknown", // tbody end
                 "tableRows_",
                 "table_",
-                "text",
+                "markupLineBreak",
                 "section1_",
                 "body_");
 
diff --git 
a/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownSinkTest.java
 
b/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownSinkTest.java
index 95c06f95..f107c883 100644
--- 
a/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownSinkTest.java
+++ 
b/doxia-modules/doxia-module-markdown/src/test/java/org/apache/maven/doxia/module/markdown/MarkdownSinkTest.java
@@ -595,4 +595,24 @@ class MarkdownSinkTest extends AbstractSinkTest {
                 + EOL;
         assertEquals(expected, getSinkContent());
     }
+
+    @Test
+    void listItemsContainingInsignificantWhitespace() {
+        try (Sink sink = getSink()) {
+            sink.list();
+            sink.listItem();
+            sink.markupLineBreak(4);
+            sink.text("item 1");
+            sink.listItem_();
+            sink.listItem();
+            sink.markupLineBreak(4);
+            sink.text("item 2");
+            sink.listItem_();
+            sink.list_();
+        }
+        String expected = "- item 1" + EOL
+                + "- item 2" + EOL;
+        assertEquals(expected, getSinkContent());
+    }
+    
 }
diff --git 
a/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
 
b/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
index 5c646dca..677e2060 100644
--- 
a/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
+++ 
b/doxia-modules/doxia-module-xdoc/src/main/java/org/apache/maven/doxia/module/xdoc/XdocParser.java
@@ -109,7 +109,7 @@ public class XdocParser extends Xhtml1BaseParser implements 
XdocMarkup {
     protected void handleStartTag(XmlPullParser parser, Sink sink)
             throws XmlPullParserException, MacroExecutionException {
         isEmptyElement = parser.isEmptyElementTag();
-
+        isBeginningOfLineInsideBlock = true;
         SinkEventAttributeSet attribs = getAttributesFromParser(parser);
 
         if (parser.getName().equals(DOCUMENT_TAG.toString())) {
@@ -145,7 +145,6 @@ public class XdocParser extends Xhtml1BaseParser implements 
XdocMarkup {
                 sink.head_();
                 this.inHead = false;
             }
-
             sink.body(attribs);
         } else if (parser.getName().equals(SECTION_TAG.toString())) {
             handleSectionStart(Sink.SECTION_LEVEL_1, sink, attribs, parser);
@@ -191,6 +190,7 @@ public class XdocParser extends Xhtml1BaseParser implements 
XdocMarkup {
 
     protected void handleEndTag(XmlPullParser parser, Sink sink)
             throws XmlPullParserException, MacroExecutionException {
+        isBeginningOfLineInsideBlock = true;
         if (parser.getName().equals(DOCUMENT_TAG.toString())) {
             // Do nothing
             return;
diff --git 
a/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
 
b/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
index dcc9c202..f37790f0 100644
--- 
a/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
+++ 
b/doxia-modules/doxia-module-xdoc/src/test/java/org/apache/maven/doxia/module/xdoc/XdocParserTest.java
@@ -408,6 +408,7 @@ class XdocParserTest extends AbstractParserTest {
         SinkEventElement styleElm = it.next();
         assertEquals("unknown", styleElm.getName());
         assertEquals("style", styleElm.getArgs()[0]);
+        assertEquals("markupLineBreak", it.next().getName());
         SinkEventElement cdataElm = it.next();
         assertEquals("unknown", cdataElm.getName());
         assertEquals("CDATA", cdataElm.getArgs()[0]);
@@ -480,4 +481,27 @@ class XdocParserTest extends AbstractParserTest {
         assertSinkEquals(it.next(), "text", "test", null);
         assertSinkEquals(it, "inline_");
     }
+
+    @Test
+    void indentedTags() throws Exception {
+        final String text = "<section name=\"test\">\n" + "    <p>test</p>\n" 
+ "</section>";
+
+        SinkEventTestingSink sink = new SinkEventTestingSink();
+
+        parser.setValidate(false);
+        parser.parse(text, sink);
+        Iterator<SinkEventElement> it = sink.getEventList().iterator();
+        assertSinkEquals(
+                it,
+                "section1",
+                "sectionTitle1",
+                "text",
+                "sectionTitle1_",
+                "markupLineBreak",
+                "paragraph",
+                "text",
+                "paragraph_",
+                "markupLineBreak",
+                "section1_");
+    }
 }
diff --git a/doxia-sink-api/src/main/java/org/apache/maven/doxia/sink/Sink.java 
b/doxia-sink-api/src/main/java/org/apache/maven/doxia/sink/Sink.java
index 61a199cd..972d96fe 100644
--- a/doxia-sink-api/src/main/java/org/apache/maven/doxia/sink/Sink.java
+++ b/doxia-sink-api/src/main/java/org/apache/maven/doxia/sink/Sink.java
@@ -1763,15 +1763,15 @@ public interface Sink extends AutoCloseable {
     void comment(String comment);
 
     /**
-     * Add a comment. The default implementation will just call {@link 
#comment(String)}.
+     * Add a single line break with the specified indentation level. The 
default implementation does nothing.
+     * This is different from emitting a line break with {@link 
#lineBreak(SinkEventAttributes)} or {@link #text(String, SinkEventAttributes)} 
as those line breaks are part of the content (i.e. affect rendering)
+     * while this line break is purely for pretty-printing the Sink's output 
and should not affect the rendering of the content.
+     * This is useful for Sinks that emit text-based markup languages (e.g. 
HTML, XML, etc.) to produce more human-readable output.
      *
-     * @param comment The comment to write.
-     * @param endsWithLineBreak If {@code true} comment ends with a line 
break, i.e. nothing else should follow on the same line
+     * @param indentLevel the indentation level, where 0 means no indentation, 
1 means one level of indentation, etc. The sink can decide how many spaces/tabs 
to use for each level of indentation.
      * @since 2.1.0
      */
-    default void comment(String comment, boolean endsWithLineBreak) {
-        comment(comment);
-    }
+    default void markupLineBreak(int indentLevel) {}
 
     /**
      * Add an unknown event. This may be used by parsers to notify a general 
Sink about

Reply via email to