Tim Starling has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/337771 )
Change subject: [WIP] RemexHtml tidy driver with p-wrapping
......................................................................
[WIP] RemexHtml tidy driver with p-wrapping
Some tests still fail. Requires non-existent version of RemexHtml.
Change-Id: I900155b7dd199b0ae2a3b9cdb6db5136fc4f35a8
---
M autoload.php
M composer.json
A includes/tidy/RemexCompatFormatter.php
A includes/tidy/RemexCompatMunger.php
A includes/tidy/RemexDriver.php
A includes/tidy/RemexMungerData.php
A tests/phpunit/includes/tidy/RemexDriverTest.php
7 files changed, 720 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/71/337771/1
diff --git a/autoload.php b/autoload.php
index 0e719ae..595b6c0 100644
--- a/autoload.php
+++ b/autoload.php
@@ -914,6 +914,10 @@
'MediaWiki\\Tidy\\RaggettInternalHHVM' => __DIR__ .
'/includes/tidy/RaggettInternalHHVM.php',
'MediaWiki\\Tidy\\RaggettInternalPHP' => __DIR__ .
'/includes/tidy/RaggettInternalPHP.php',
'MediaWiki\\Tidy\\RaggettWrapper' => __DIR__ .
'/includes/tidy/RaggettWrapper.php',
+ 'MediaWiki\\Tidy\\RemexDriver' => __DIR__ .
'/includes/tidy/RemexDriver.php',
+ 'MediaWiki\\Tidy\\RemexCompatMunger' => __DIR__ .
'/includes/tidy/RemexCompatMunger.php',
+ 'MediaWiki\\Tidy\\RemexCompatFormatter' => __DIR__ .
'/includes/tidy/RemexCompatFormatter.php',
+ 'MediaWiki\\Tidy\\RemexMungerData' => __DIR__ .
'/includes/tidy/RemexMungerData.php',
'MediaWiki\\Tidy\\TidyDriverBase' => __DIR__ .
'/includes/tidy/TidyDriverBase.php',
'MediaWiki\\Widget\\ComplexNamespaceInputWidget' => __DIR__ .
'/includes/widget/ComplexNamespaceInputWidget.php',
'MediaWiki\\Widget\\ComplexTitleInputWidget' => __DIR__ .
'/includes/widget/ComplexTitleInputWidget.php',
diff --git a/composer.json b/composer.json
index d41492e..a3d5546 100644
--- a/composer.json
+++ b/composer.json
@@ -38,6 +38,7 @@
"wikimedia/ip-set": "1.1.0",
"wikimedia/php-session-serializer": "1.0.4",
"wikimedia/relpath": "1.0.3",
+ "wikimedia/remex-html": "~1.0",
"wikimedia/running-stat": "1.1.0",
"wikimedia/scoped-callback": "1.0.0",
"wikimedia/utfnormal": "1.1.0",
diff --git a/includes/tidy/RemexCompatFormatter.php
b/includes/tidy/RemexCompatFormatter.php
new file mode 100644
index 0000000..2183432
--- /dev/null
+++ b/includes/tidy/RemexCompatFormatter.php
@@ -0,0 +1,68 @@
+<?php
+
+namespace MediaWiki\Tidy;
+
+use RemexHtml\HTMLData;
+use RemexHtml\Serializer\HtmlFormatter;
+use RemexHtml\Serializer\SerializerNode;
+use RemexHtml\Tokenizer\PlainAttributes;
+
+class RemexCompatFormatter extends HtmlFormatter {
+ private static $markedEmptyElements = [
+ 'li' => true,
+ 'p' => true,
+ 'tr' => true,
+ ];
+
+ public function __construct( $options = [] ) {
+ parent::__construct( $options );
+ $this->attributeEscapes["\xc2\xa0"] = ' ';
+ unset( $this->attributeEscapes["&"] );
+ $this->textEscapes["\xc2\xa0"] = ' ';
+ unset( $this->textEscapes["&"] );
+ }
+
+ public function startDocument( $fragmentNamespace, $fragmentName ) {
+ return '';
+ }
+
+ public function element( SerializerNode $parent, SerializerNode $node,
$contents ) {
+ $data = $node->snData;
+ if ( $data->isPWrapper ) {
+ if ( !$data->isDisabledPWrapper && !$data->isBlank ) {
+ return "<p>$contents</p>";
+ } else {
+ return $contents;
+ }
+ }
+
+ $name = $node->name;
+ $attrs = $node->attrs;
+ if ( isset( self::$markedEmptyElements[$name] ) &&
$attrs->count() === 0 ) {
+ if ( strspn( $contents, "\t\n\f\r " ) === strlen(
$contents ) ) {
+ return "<{$name}
class=\"mw-empty-elt\">$contents</{$name}>";
+ }
+ }
+
+ $s = "<$name";
+ foreach ( $attrs->getValues() as $attrName => $attrValue ) {
+ $encValue = strtr( $attrValue, $this->attributeEscapes
);
+ $s .= " $attrName=\"$encValue\"";
+ }
+ if ( $node->namespace === HTMLData::NS_HTML && isset(
$this->voidElements[$name] ) ) {
+ $s .= ' />';
+ return $s;
+ }
+
+ $s .= '>';
+ if ( $node->namespace === HTMLData::NS_HTML
+ && isset( $contents[0] ) && $contents[0] === "\n"
+ && isset( $this->prefixLfElements[$name] )
+ ) {
+ $s .= "\n$contents</$name>";
+ } else {
+ $s .= "$contents</$name>";
+ }
+ return $s;
+ }
+}
diff --git a/includes/tidy/RemexCompatMunger.php
b/includes/tidy/RemexCompatMunger.php
new file mode 100644
index 0000000..3783d61
--- /dev/null
+++ b/includes/tidy/RemexCompatMunger.php
@@ -0,0 +1,356 @@
+<?php
+
+namespace MediaWiki\Tidy;
+
+use RemexHtml\HTMLData;
+use RemexHtml\Serializer\Serializer;
+use RemexHtml\Serializer\SerializerNode;
+use RemexHtml\Tokenizer\Attributes;
+use RemexHtml\Tokenizer\PlainAttributes;
+use RemexHtml\TreeBuilder\TreeBuilder;
+use RemexHtml\TreeBuilder\TreeHandler;
+use RemexHtml\TreeBuilder\Element;
+
+class RemexCompatMunger implements TreeHandler {
+ private static $onlyInlineElements = [
+ "a" => true,
+ "abbr" => true,
+ "acronym" => true,
+ "applet" => true,
+ "b" => true,
+ "basefont" => true,
+ "bdo" => true,
+ "big" => true,
+ "br" => true,
+ "button" => true,
+ "cite" => true,
+ "code" => true,
+ "dfn" => true,
+ "em" => true,
+ "font" => true,
+ "i" => true,
+ "iframe" => true,
+ "img" => true,
+ "input" => true,
+ "kbd" => true,
+ "label" => true,
+ "legend" => true,
+ "map" => true,
+ "object" => true,
+ "param" => true,
+ "q" => true,
+ "rb" => true,
+ "rbc" => true,
+ "rp" => true,
+ "rt" => true,
+ "rtc" => true,
+ "ruby" => true,
+ "s" => true,
+ "samp" => true,
+ "select" => true,
+ "small" => true,
+ "span" => true,
+ "strike" => true,
+ "strong" => true,
+ "sub" => true,
+ "sup" => true,
+ "textarea" => true,
+ "tt" => true,
+ "u" => true,
+ "var" => true,
+ ];
+
+ private static $formattingElements = [
+ 'a' => true,
+ 'b' => true,
+ 'big' => true,
+ 'code' => true,
+ 'em' => true,
+ 'font' => true,
+ 'i' => true,
+ 'nobr' => true,
+ 's' => true,
+ 'small' => true,
+ 'strike' => true,
+ 'strong' => true,
+ 'tt' => true,
+ 'u' => true,
+ ];
+
+ /**
+ * Constructor
+ *
+ * @param Serializer $serializer
+ */
+ public function __construct( Serializer $serializer ) {
+ $this->serializer = $serializer;
+ }
+
+ public function startDocument( $fragmentNamespace, $fragmentName ) {
+ $this->serializer->startDocument( $fragmentNamespace,
$fragmentName );
+ $root = $this->serializer->getRootNode();
+ $root->snData = new RemexMungerData;
+ $root->snData->needsPWrapping = true;
+ }
+
+ public function endDocument( $pos ) {
+ $this->serializer->endDocument( $pos );
+ }
+
+ private function getParentForInsert( $preposition, $refElement,
$isBlank ) {
+ if ( $preposition === TreeBuilder::ROOT ) {
+ return [ $this->serializer->getRootNode(), null ];
+ } elseif ( $preposition === TreeBuilder::BEFORE ) {
+ return [ $this->serializer->getParentNode( $refElement
), $refElement->userData ];
+ } else {
+ $refNode = $refElement->userData;
+ $refData = $refNode->snData;
+ if ( $refData->currentCloneElement ) {
+ $refElement = $refData->currentCloneElement;
+ return [ $refElement->userData,
$refElement->userData ];
+ } elseif ( $refData->childPElement
+ && ( !$isBlank ||
!$refData->childPElement->userData->snData->isBlank )
+ ) {
+ $refElement = $refData->childPElement;
+ return [ $refElement->userData,
$refElement->userData ];
+ } else {
+ return [ $refNode, $refNode ];
+ }
+ }
+ }
+
+ /**
+ * Insert a p-wrapper
+ *
+ * @param SerializerNode $parent
+ * @param integer $sourceStart
+ * @return SerializerNode
+ */
+ private function insertPWrapper( SerializerNode $parent, $sourceStart )
{
+ $pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new
PlainAttributes );
+ $this->serializer->insertElement( TreeBuilder::UNDER, $parent,
$pWrap, false,
+ $sourceStart, 0 );
+ $data = new RemexMungerData;
+ $data->isPWrapper = true;
+ $data->wrapBaseNode = $parent;
+ $pWrap->userData->snData = $data;
+ $parent->snData->childPElement = $pWrap;
+ return $pWrap->userData;
+ }
+
+ public function characters( $preposition, $refElement, $text, $start,
$length,
+ $sourceStart, $sourceLength
+ ) {
+ $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) !==
$length;
+
+ list( $parent, $refNode ) = $this->getParentForInsert(
+ $preposition, $refElement, $isBlank );
+ $parentData = $parent->snData;
+
+ // If the parent is splittable and in block mode, split the tag
stack
+ if ( $preposition === TreeBuilder::UNDER
+ && $parentData->isSplittable
+ && !$parentData->ancestorPNode
+ ) {
+ $refNode = $this->splitTagStack( $refNode, true,
$sourceStart );
+ $parent = $refNode;
+ $parentData = $parent->snData;
+ }
+
+ if ( $preposition === TreeBuilder::UNDER &&
$parentData->needsPWrapping ) {
+ $refNode = $this->insertPWrapper( $refNode,
$sourceStart );
+ $parent = $refNode;
+ $parentData = $parent->snData;
+ }
+
+ if ( $isBlank ) {
+ // Non-whitespace characters detected
+ $parentData->hasText = true;
+ $parentData->isBlank = false;
+ }
+ $this->serializer->characters( $preposition, $refNode, $text,
$start,
+ $length, $sourceStart, $sourceLength );
+ }
+
+ public function insertElement( $preposition, $refElement, Element
$element, $void,
+ $sourceStart, $sourceLength
+ ) {
+ list( $parent, $newRef ) = $this->getParentForInsert(
+ $preposition, $refElement, false );
+ $parentData = $parent->snData;
+ $parentNs = $parent->namespace;
+ $parentName = $parent->name;
+ $elementName = $element->htmlName;
+
+ $inline = isset( self::$onlyInlineElements[$elementName] );
+ $under = $preposition === TreeBuilder::UNDER;
+
+ // If the element is non-inline and the parent is a p-wrapper,
+ // close the parent and insert into its parent instead
+ if ( $under && $parentData->isPWrapper ) {
+ if ( !$inline ) {
+ $newParent = $this->serializer->getParentNode(
$parent );
+ $parent = $newParent;
+ $parentData = $parent->snData;
+ $parentData->childPElement = null;
+ $newRef = $refElement->userData;
+ // FIXME cannot call endTag() since we don't
have an Element
+ }
+ }
+
+ // If the parent is splittable and the current element is
inline in block
+ // context, or if the current element is a block under a
p-wrapper, split
+ // the tag stack.
+ if ( $under && $parentData->isSplittable
+ && (bool)$parentData->ancestorPNode !== $inline
+ ) {
+ $newRef = $this->splitTagStack( $newRef, $inline,
$sourceStart );
+ $parent = $newRef;
+ $parentData = $parent->snData;
+ }
+
+ // If the element is inline and we are in body/blockquote, we
need
+ // to create a p-wrapper
+ if ( $under && $parentData->needsPWrapping && $inline ) {
+ $newRef = $this->insertPWrapper( $newRef, $sourceStart
);
+ $parent = $newRef;
+ $parentData = $parent->snData;
+ }
+
+ // If the element is non-inline and (despite attempting to
split above)
+ // there is still an ancestor p-wrap, disable that p-wrap
+ if ( $parentData->ancestorPNode && !$inline ) {
+ $parentData->ancestorPNode->snData->isDisabledPWrapper
= true;
+ }
+
+ // An element with element children is a non-blank element
+ $parentData->isBlank = false;
+
+ // Insert the element downstream and so initialise its userData
+ $this->serializer->insertElement( $preposition, $newRef,
+ $element, $void, $sourceStart, $sourceLength );
+
+ // Initialise snData
+ if ( !$element->userData->snData ) {
+ $elementData = $element->userData->snData = new
RemexMungerData;
+ }
+ if ( ( $parentData->isPWrapper || $parentData->isSplittable )
+ && isset( self::$formattingElements[$elementName] )
+ ) {
+ $elementData->isSplittable = true;
+ }
+ if ( $parentData->isPWrapper ) {
+ $elementData->ancestorPNode = $parent;
+ } elseif ( $parentData->ancestorPNode ) {
+ $elementData->ancestorPNode =
$parentData->ancestorPNode;
+ }
+ if ( $parentData->wrapBaseNode ) {
+ $elementData->wrapBaseNode = $parentData->wrapBaseNode;
+ } elseif ( $parentData->needsPWrapping ) {
+ $elementData->wrapBaseNode = $parent;
+ }
+ if ( $elementName === 'body'
+ || $elementName === 'blockquote'
+ || $elementName === 'html'
+ ) {
+ $elementData->needsPWrapping = true;
+ }
+ }
+
+ /**
+ * Clone nodes in a stack range and return the new parent
+ *
+ * @param SerializerNode $parentNode
+ * @param bool $inline
+ * @param integer $pos The source position
+ * @return SerializerNode
+ */
+ private function splitTagStack( SerializerNode $parentNode, $inline,
$pos ) {
+ $parentData = $parentNode->snData;
+ $wrapBase = $parentData->wrapBaseNode;
+ if ( $inline ) {
+ $cloneEnd = $wrapBase;
+ } else {
+ $cloneEnd = $parentData->ancestorPNode;
+ }
+
+ $serializer = $this->serializer;
+ $node = $parentNode;
+ $haveContent = false;
+ $root = $serializer->getRootNode();
+ while ( $node !== $cloneEnd ) {
+ $haveContent = $haveContent || $node->snData->hasText;
+
+ $nodes[] = $node;
+ $node = $serializer->getParentNode( $node );
+ if ( $node === $root ) {
+ throw new \Exception( 'Did not find end of
clone range' );
+ }
+ }
+
+ if ( !$haveContent ) {
+ return $parentNode;
+ }
+
+ if ( $inline ) {
+ $pWrap = $this->insertPWrapper( $wrapBase, $pos );
+ $node = $pWrap;
+ $nodes[] = $node;
+ } else {
+ $pWrap = null;
+ $node = $wrapBase;
+ $nodes[] = $node;
+ }
+
+ for ( $i = count( $nodes ) - 2; $i >= 0; $i-- ) {
+ $node = $nodes[$i];
+ $nodeParent = $nodes[$i + 1];
+ $element = new Element( $node->namespace, $node->name,
$node->attrs );
+ $this->serializer->insertElement( TreeBuilder::UNDER,
$nodeParent,
+ $element, false, $pos, 0 );
+ $node->snData->currentCloneElement = $element;
+
+ $node = $element->userData;
+ $elementData = $node->snData = new RemexMungerData;
+ if ( $pWrap ) {
+ $elementData->ancestorPNode = $pWrap;
+ }
+ $elementData->isSplittable = true;
+ $elementData->wrapBaseNode = $wrapBase;
+ }
+ return $node;
+ }
+
+ public function endTag( Element $element, $sourceStart, $sourceLength )
{
+ $this->serializer->endTag( $element, $sourceStart,
$sourceLength );
+ }
+
+ public function doctype( $name, $public, $system, $quirks,
$sourceStart, $sourceLength ) {
+ $this->serializer->doctype( $name, $public, $system, $quirks,
+ $sourceStart, $sourceLength );
+ }
+
+ public function comment( $preposition, $refElement, $text,
$sourceStart, $sourceLength ) {
+ list( $parent, $refNode ) = $this->getParentForInsert(
+ $preposition, $refElement, true );
+ $this->serializer->comment( $preposition, $refNode, $text,
+ $sourceStart, $sourceLength );
+ }
+
+ public function error( $text, $pos ) {
+ $this->serializer->error( $text, $pos );
+ }
+
+ public function mergeAttributes( Element $element, Attributes $attrs,
$sourceStart ) {
+ $this->serializer->mergeAttributes( $element, $attrs,
$sourceStart );
+ }
+
+ public function removeNode( Element $element, $sourceStart ) {
+ $this->serializer->removeNode( $element, $sourceStart );
+ }
+
+ public function reparentChildren( Element $element, Element $newParent,
$sourceStart ) {
+ $this->insertElement( TreeBuilder::UNDER, $element, $newParent,
false, $sourceStart, 0 );
+ $this->serializer->reparentChildren( $element, $newParent,
$sourceStart );
+ }
+}
diff --git a/includes/tidy/RemexDriver.php b/includes/tidy/RemexDriver.php
new file mode 100644
index 0000000..b42796e
--- /dev/null
+++ b/includes/tidy/RemexDriver.php
@@ -0,0 +1,37 @@
+<?php
+
+namespace MediaWiki\Tidy;
+
+use RemexHtml\Serializer\Serializer;
+use RemexHtml\Tokenizer\Tokenizer;
+use RemexHtml\TreeBuilder\Dispatcher;
+use RemexHtml\TreeBuilder\TreeBuilder;
+
+
+class RemexDriver extends TidyDriverBase {
+ public function __construct( array $config ) {
+ parent::__construct( $config );
+ }
+
+ public function tidy( $text ) {
+ $formatter = new RemexCompatFormatter;
+ $serializer = new Serializer( $formatter );
+ $munger = new RemexCompatMunger( $serializer );
+ $treeBuilder = new TreeBuilder( $munger, [
+ 'ignoreErrors' => true,
+ 'ignoreNulls' => true,
+ ] );
+ $dispatcher = new Dispatcher( $treeBuilder );
+ $tokenizer = new Tokenizer( $dispatcher, $text, [
+ 'ignoreErrors' => true,
+ 'ignoreCharRefs' => true,
+ 'ignoreNulls' => true,
+ 'skipPreprocess' => true,
+ ] );
+ $tokenizer->execute( [
+ 'fragmentNamespace' => \RemexHtml\HTMLData::NS_HTML,
+ 'fragmentName' => 'body'
+ ] );
+ return $serializer->getResult();
+ }
+}
diff --git a/includes/tidy/RemexMungerData.php
b/includes/tidy/RemexMungerData.php
new file mode 100644
index 0000000..9f93f57
--- /dev/null
+++ b/includes/tidy/RemexMungerData.php
@@ -0,0 +1,28 @@
+<?php
+
+namespace MediaWiki\Tidy;
+
+class RemexMungerData {
+ /** @var \RemexHtml\TreeBuilder\Element|null */
+ public $childPElement;
+
+ /** @var \RemexHtml\Serializer\SerializerNode|null */
+ public $ancestorPNode;
+
+ /** @var \RemexHtml\Serializer\SerializerNode|null */
+ public $wrapBaseNode;
+
+ /** @var \RemexHtml\TreeBuilder\Element|null */
+ public $currentCloneElement;
+
+ public $isPWrapper = false;
+ public $isSplittable = false;
+ public $needsPWrapping = false;
+ public $hasText = false;
+ public $isBlank = true;
+ public $isDisabledPWrapper = false;
+
+ public function __set( $name, $value ) {
+ throw new \Exception( "Cannot set property \"$name\"" );
+ }
+}
diff --git a/tests/phpunit/includes/tidy/RemexDriverTest.php
b/tests/phpunit/includes/tidy/RemexDriverTest.php
new file mode 100644
index 0000000..a085db3
--- /dev/null
+++ b/tests/phpunit/includes/tidy/RemexDriverTest.php
@@ -0,0 +1,226 @@
+<?php
+
+class RemexDriverTest extends MediaWikiTestCase {
+ static private $remexTidyTestData = [
+ // Tests from Html5Depurate
+ [
+ 'Empty string',
+ "",
+ ""
+ ],
+ [
+ 'Simple p-wrap',
+ "x",
+ "<p>x</p>"
+ ],
+ [
+ 'No p-wrap of blank node',
+ " ",
+ " "
+ ],
+ [
+ 'p-wrap terminated by div',
+ "x<div></div>",
+ "<p>x</p><div></div>"
+ ],
+ [
+ 'p-wrap not terminated by span',
+ "x<span></span>",
+ "<p>x<span></span></p>"
+ ],
+ [
+ 'An element is non-blank and so gets p-wrapped',
+ "<span></span>",
+ "<p><span></span></p>"
+ ],
+ [
+ 'The blank flag is set after a block-level element',
+ "<div></div> ",
+ "<div></div> "
+ ],
+ [
+ 'Blank detection between two block-level elements',
+ "<div></div> <div></div>",
+ "<div></div> <div></div>"
+ ],
+ [
+ 'But p-wrapping of non-blank content works after an
element',
+ "<div></div>x",
+ "<div></div><p>x</p>"
+ ],
+ [
+ 'p-wrapping between two block-level elements',
+ "<div></div>x<div></div>",
+ "<div></div><p>x</p><div></div>"
+ ],
+ [
+ 'p-wrap inside blockquote',
+ "<blockquote>x</blockquote>",
+ "<blockquote><p>x</p></blockquote>"
+ ],
+ [
+ 'A comment is blank for p-wrapping purposes',
+ "<!-- x -->",
+ "<!-- x -->"
+ ],
+ [
+ 'A comment is blank even when a p-wrap was opened by a
text node',
+ " <!-- x -->",
+ " <!-- x -->"
+ ],
+ [
+ 'A comment does not open a p-wrap',
+ "<!-- x -->x",
+ "<!-- x --><p>x</p>"
+ ],
+ [
+ 'A comment does not close a p-wrap',
+ "x<!-- x -->",
+ "<p>x<!-- x --></p>"
+ ],
+ [
+ 'Empty li',
+ "<ul><li></li></ul>",
+ "<ul><li class=\"mw-empty-elt\"></li></ul>"
+ ],
+ [
+ 'li with element',
+ "<ul><li><span></span></li></ul>",
+ "<ul><li><span></span></li></ul>"
+ ],
+ [
+ 'li with text',
+ "<ul><li>x</li></ul>",
+ "<ul><li>x</li></ul>"
+ ],
+ [
+ 'Empty tr',
+ "<table><tbody><tr></tr></tbody></table>",
+ "<table><tbody><tr
class=\"mw-empty-elt\"></tr></tbody></table>"
+ ],
+ [
+ 'Empty p',
+ "<p>\n</p>",
+ "<p class=\"mw-empty-elt\">\n</p>"
+ ],
+ [
+ 'No p-wrapping of an inline element which contains a
block element (T150317)',
+ "<small><div>x</div></small>",
+ "<small><div>x</div></small>"
+ ],
+ [
+ 'p-wrapping of an inline element which contains an
inline element',
+ "<small><b>x</b></small>",
+ "<p><small><b>x</b></small></p>"
+ ],
+ [
+ 'p-wrapping is enabled in a blockquote in an inline
element',
+ "<small><blockquote>x</blockquote></small>",
+ "<small><blockquote><p>x</p></blockquote></small>"
+ ],
+ [
+ 'All bare text should be p-wrapped even when surrounded
by block tags',
+
"<small><blockquote>x</blockquote></small>y<div></div>z",
+
"<small><blockquote><p>x</p></blockquote></small><p>y</p><div></div><p>z</p>"
+ ],
+
+ [
+ 'Split tag stack 1',
+ "<small>x<div>y</div>z</small>",
+
"<p><small>x</small></p><small><div>y</div></small><p><small>z</small></p>"
+ ],
+ [
+ 'Split tag stack 2',
+ "<small><div>y</div>z</small>",
+ "<small><div>y</div></small><p><small>z</small></p>"
+ ],
+ [
+ 'Split tag stack 3',
+ "<small>x<div>y</div></small>",
+ "<p><small>x</small></p><small><div>y</div></small>"
+ ],
+ [
+ 'Split tag stack 4',
+ "a<span>b<i>c<div>d</div></i>e</span>",
+
"<p>a<span>b<i>c</i></span></p><span><i><div>d</div></i></span><p><span>e</span></p>"
+ ],
+ [
+ "Split tag stack regression check 1",
+ "x<span><div>y</div></span>",
+ "<p>x</p><span><div>y</div></span>"
+ ],
+ [
+ "Split tag stack regression check 2",
+ "a<span><i><div>d</div></i>e</span>",
+
"<p>a</p><span><i><div>d</div></i></span><p><span>e</span></p>"
+ ],
+ // Simple tests from pwrap.js
+ [
+ 'Simple pwrap test 1',
+ 'a',
+ '<p>a</p>'
+ ],
+ [
+ '<span> is not a splittable tag, but gets p-wrapped in
simple wrapping scenarios',
+ '<span>a</span>',
+ '<p><span>a</span></p>'
+ ],
+ [
+ 'Simple pwrap test 3',
+ 'x <div>a</div> <div>b</div> y',
+ '<p>x </p><div>a</div> <div>b</div><p> y</p>'
+ ],
+ [
+ 'Simple pwrap test 4',
+ 'x<!--c--> <div>a</div> <div>b</div> <!--c-->y',
+ '<p>x<!--c--> </p><div>a</div> <div>b</div>
<!--c--><p>y</p>'
+ ],
+ // Complex tests from pwrap.js
+ [
+ 'Complex pwrap test 1',
+ '<i>x<div>a</div>y</i>',
+ '<p><i>x</i></p><i><div>a</div></i><p><i>y</i></p>'
+ ],
+ [
+ 'Complex pwrap test 2',
+ 'a<small>b</small><i>c<div>d</div>e</i>f',
+
'<p>a<small>b</small><i>c</i></p><i><div>d</div></i><p><i>e</i>f</p>'
+ ],
+ [
+ 'Complex pwrap test 3',
+ 'a<small>b<i>c<div>d</div></i>e</small>',
+
'<p>a<small>b<i>c</i></small></p><small><i><div>d</div></i></small><p><small>e</small></p>'
+ ],
+ [
+ 'Complex pwrap test 4',
+ 'x<small><div>y</div></small>',
+ '<p>x</p><small><div>y</div></small>'
+ ],
+ [
+ 'Complex pwrap test 5',
+ 'a<small><i><div>d</div></i>e</small>',
+
'<p>a</p><small><i><div>d</div></i></small><p><small>e</small></p>'
+ ],
+ [
+ 'Complex pwrap test 6',
+ '<i>a<div>b</div>c<b>d<div>e</div>f</b>g</i>',
+
'<p><i>a</i></p><i><div>b</div></i><p><i>c<b>d</b></i></p><i><b><div>e</div></b></i><p><i><b>f</b>g</i></p>'
+ ],
+ [
+ 'Complex pwrap test 7',
+
'<i><b><font><div>x</div></font></b><div>y</div><b><font><div>z</div></font></b></i>',
+
'<i><b><font><div>x</div></font></b><div>y</div><b><font><div>z</div></font></b></i>'
+ ],
+ ];
+
+ public function provider() {
+ return self::$remexTidyTestData;
+ }
+
+ /** @dataProvider provider */
+ public function testTidy( $desc, $input, $expected ) {
+ $r = new MediaWiki\Tidy\RemexDriver( [] );
+ $result = $r->tidy( $input );
+ $this->assertEquals( $expected, $result, $desc );
+ }
+}
--
To view, visit https://gerrit.wikimedia.org/r/337771
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I900155b7dd199b0ae2a3b9cdb6db5136fc4f35a8
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Tim Starling <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits