jenkins-bot has submitted this change and it was merged.
Change subject: Tidy up tidy usage
......................................................................
Tidy up tidy usage
* There's a branch path in the sanitizer that depends on $wgUseTidy,
which means the test output differs from on wiki.
* In general, we should set these variables to match the wiki behaviour
in tests.
* Exposes T92892, Sanitizer removes empty tags when tidy is disabled.
* Tweaked tests for T19663 to use an extension tag to show that
HTML5 tags with non-word characters make it through the parser
intact (before being ultimately sanitized).
Change-Id: I09c72fd739e11a8b757f37dc4c790758d782ad73
---
M RELEASE-NOTES-1.25
M includes/parser/ParserOptions.php
M tests/parser/parserTest.inc
M tests/parser/parserTests.txt
M tests/parser/parserTestsParserHook.php
M tests/phpunit/includes/parser/NewParserTest.php
6 files changed, 101 insertions(+), 62 deletions(-)
Approvals:
Arlolra: Looks good to me, but someone else must approve
Cscott: Looks good to me, approved
jenkins-bot: Verified
diff --git a/RELEASE-NOTES-1.25 b/RELEASE-NOTES-1.25
index 5e08efd..88bfc15 100644
--- a/RELEASE-NOTES-1.25
+++ b/RELEASE-NOTES-1.25
@@ -174,6 +174,8 @@
This requires the fa_sha1 field being populated.
* Removed rel="archives" from the "View history" link, as it did not pass
HTML validation.
+* $wgUseTidy is now set when parserTests are run with the tidy option to match
+ output on wiki.
=== Action API changes in 1.25 ===
* (T67403) XML tag highlighting is now only performed for formats
diff --git a/includes/parser/ParserOptions.php
b/includes/parser/ParserOptions.php
index b09fe76..9e06ee2 100644
--- a/includes/parser/ParserOptions.php
+++ b/includes/parser/ParserOptions.php
@@ -25,7 +25,7 @@
* @brief Set options of the Parser
*
* All member variables are supposed to be private in theory, although in
- * practise this is not the case.
+ * practice this is not the case.
*
* @ingroup Parser
*/
diff --git a/tests/parser/parserTest.inc b/tests/parser/parserTest.inc
index 17769ad..e18c22b 100644
--- a/tests/parser/parserTest.inc
+++ b/tests/parser/parserTest.inc
@@ -593,6 +593,14 @@
}
}
+ if ( isset( $opts['tidy'] ) ) {
+ if ( !$this->tidySupport->isEnabled() ) {
+ return $this->showSkipped();
+ } else {
+ $options->setTidy( true );
+ }
+ }
+
if ( isset( $opts['title'] ) ) {
$titleText = $opts['title'];
} else {
@@ -624,10 +632,6 @@
$output->setTOCEnabled( !isset( $opts['notoc'] ) );
$out = $output->getText();
if ( isset( $opts['tidy'] ) ) {
- if ( !$this->tidySupport->isEnabled() ) {
- return $this->showSkipped();
- }
- $out = MWTidy::tidy( $out );
$out = preg_replace( '/\s+$/', '', $out );
}
@@ -877,10 +881,7 @@
'wgDisableLangConversion' => false,
'wgDisableTitleConversion' => false,
// Tidy options.
- // We always set 'wgUseTidy' to false when parsing, but
certain
- // test-running modes still use tidy if available, so
ensure
- // that the tidy-related options are all set to their
defaults.
- 'wgUseTidy' => false,
+ 'wgUseTidy' => isset( $opts['tidy'] ),
'wgAlwaysUseTidy' => false,
'wgDebugTidy' => false,
'wgTidyConf' => $IP . '/includes/tidy.conf',
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index 966b666..53814c5 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -1209,6 +1209,8 @@
!! test
Non-word characters don't terminate tag names (bug 17663, 40670, 52022)
!! wikitext
+<blockquote|>a</blockquote>
+
<b→> doesn't terminate </b→>
<bä> doesn't terminate </bä>
@@ -1219,7 +1221,8 @@
<sub-ID#1>
!! html
-<p><b→> doesn't terminate </b→>
+<p><blockquote|>a</blockquote>
+</p><p><b→> doesn't terminate </b→>
</p><p><bä> doesn't terminate </bä>
</p><p><boo> doesn't terminate </boo>
</p><p><s.foo> doesn't terminate </s.foo>
@@ -1228,9 +1231,13 @@
!! end
# There is a tidy bug here: http://sourceforge.net/p/tidy/bugs/946/
+# If the non-word-character tag made it through the sanitizer, tidy
+# would munge it up.
!! test
Non-word characters don't terminate tag names + tidy
!! wikitext
+<blockquote|>a</blockquote>
+
<b→> doesn't terminate </b→>
<bä> doesn't terminate </bä>
@@ -1241,11 +1248,31 @@
<sub-ID#1>
!! html+tidy
+<p><blockquote|>a</p>
<p><b→> doesn't terminate </b→></p>
<p><bä> doesn't terminate </bä></p>
<p><boo> doesn't terminate </boo></p>
<p><s.foo> doesn't terminate </s.foo></p>
<p><sub-ID#1></p>
+!! end
+
+###
+### See tests/parser/parserTestsParserHook.php for the <tåg> extension)
+### This checks that HTML5 tags (with non-word characters in the tag
+### name) make it safely through the parser -- the Sanitizer will
+### munge them later, as it should.
+###
+!! test
+Non-word characters are valid in extension tags (T19663)
+!! wikitext
+<tåg>tåg</tåg>
+!! html
+<pre>
+'tåg'
+array (
+)
+</pre>
+
!! end
!! test
@@ -1254,10 +1281,8 @@
</b>
<s.foo>s</s>
-!! html
-<p></b>
-</p><p><s.foo>s</s>
-</p>
+!! html+tidy
+<p><s.foo>s</p>
!! end
###
@@ -1745,7 +1770,6 @@
!! end
## PHP parser emits output which is broken
-## XXX The parsoid output doesn't match the tidy output.
!! test
Unclosed HTML p-tags should be handled properly
!! wikitext
@@ -1755,9 +1779,10 @@
b
!! html/php+tidy
<div>
-<p>foo</div></p>
+<p>foo</p>
+</div>
<p>a</p>
-b</div>
+<p>b</p>
!! html/parsoid
<div data-parsoid='{"stx":"html"}'><p data-parsoid='{"stx":"html",
"autoInsertedEnd":true}'>foo</p></div>
<p>a</p>
@@ -7690,9 +7715,6 @@
!! end
# TODO: Fix html2html mode (bug 51055)!
-# This </br> handling was added as part of bug 50831; but it
-# differs from how PHP+tidy handles this. We should investigate
-# this.
!! test
Parsoid: Broken br tag recognition
!! options
@@ -7701,12 +7723,9 @@
</br>
<br/ >
-!! html/php+tidy
-<p></br></p>
+!! html+tidy
<p><br /></p>
-!! html/parsoid
-<p><br></p>
-<p><br/></p>
+<p><br /></p>
!! end
!! test
@@ -8303,10 +8322,6 @@
</small>
!!end
-# This is a bug in the PHP parser + tidy combination.
-# (The </tr> tag gets parsed as text and html-escaped by PHP,
-# and then fostered out of the table by tidy.)
-# We believe the Parsoid output to be correct.
!! test
Table with missing opening <tr> tag
!! options
@@ -8316,14 +8331,7 @@
<td>foo</td>
</tr>
</table>
-!! html/php+tidy
-<p></tr></p>
-<table>
-<tr>
-<td>foo</td>
-</tr>
-</table>
-!! html/parsoid
+!! html+tidy
<table>
<tr>
<td>foo</td>
@@ -13413,7 +13421,7 @@
!! end
!! test
-TOC regression (bug 9764)
+TOC regression (T11764)
!! wikitext
== title 1 ==
=== title 1.1 ===
@@ -13585,7 +13593,7 @@
!! end
!! test
-TOC regression (bug 12077)
+TOC regression (T14077)
!! wikitext
__TOC__
== title 1 ==
@@ -14210,16 +14218,17 @@
!! end
# FIXME: this is still bad HTML tag nesting
+# FIXME: doBlockLevels won't wrap this in a paragraph because it contains a div
!! test
Media link with nasty text
-fixme: doBlockLevels won't wrap this in a paragraph because it contains a div
!! wikitext
[[Media:Foobar.jpg|Safe Link<div style=display:none>"
onmouseover="alert(document.cookie)" onfoo="</div>]]
!! html
<a href="http://example.com/images/3/3a/Foobar.jpg" class="internal"
title="Foobar.jpg">Safe Link<div style="display:none">"
onmouseover="alert(document.cookie)" onfoo="</div></a>
!! html+tidy
-<p><a href="http://example.com/images/3/3a/Foobar.jpg" class="internal"
title="Foobar.jpg">Safe Link<div style="display:none">"
onmouseover="alert(document.cookie)" onfoo="</div></a></p>
+<p><a href="http://example.com/images/3/3a/Foobar.jpg" class="internal"
title="Foobar.jpg">Safe Link</a></p>
+<div style="display:none">" onmouseover="alert(document.cookie)" onfoo="</div>
!! end
!! test
@@ -15433,6 +15442,7 @@
<li class="toclevel-1 tocsection-1"><a href="#onmouseover.3D"><span
class="tocnumber">1</span> <span class="toctext">onmouseover=</span></a></li>
</ul>
</div>
+<p></p>
!! end
!! test
@@ -19205,6 +19215,7 @@
<h2><a href="#Quote" class="mw-headline-anchor" aria-hidden="true" title="Link
to this section">§</a><span class="mw-headline"
id="Quote"><blockquote>Quote</blockquote></span><span
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a
href="/index.php?title=Main_Page&action=edit&section=1" title="Edit
section: Quote">edit</a><span
class="mw-editsection-bracket">]</span></span></h2>
!! html+tidy
+<p></p>
<div id="toc" class="toc">
<div id="toctitle">
<h2>Contents</h2>
@@ -19213,6 +19224,7 @@
<li class="toclevel-1 tocsection-1"><a href="#Quote"><span
class="tocnumber">1</span> <span class="toctext">Quote</span></a></li>
</ul>
</div>
+<p></p>
<h2><a href="#Quote" class="mw-headline-anchor" aria-hidden="true" title="Link
to this section">§</a><span class="mw-headline" id="Quote"></span></h2>
<blockquote>
<p><span class="mw-headline" id="Quote">Quote</span></p>
@@ -19261,6 +19273,7 @@
<h2><a href="#Foo_Bar_2" class="mw-headline-anchor" aria-hidden="true"
title="Link to this section">§</a><span class="mw-headline"
id="Foo_Bar_2"><i>Foo</i> <blockquote>Bar</blockquote></span><span
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a
href="/index.php?title=Parser_test&action=edit&section=2" title="Edit
section: Foo Bar">edit</a><span
class="mw-editsection-bracket">]</span></span></h2>
!! html+tidy
+<p></p>
<div id="toc" class="toc">
<div id="toctitle">
<h2>Contents</h2>
@@ -19270,6 +19283,7 @@
<li class="toclevel-1 tocsection-2"><a href="#Foo_Bar_2"><span
class="tocnumber">2</span> <span class="toctext"><i>Foo</i> Bar</span></a></li>
</ul>
</div>
+<p></p>
<h2><a href="#Foo_Bar" class="mw-headline-anchor" aria-hidden="true"
title="Link to this section">§</a><span class="mw-headline"
id="Foo_Bar"><i>Foo</i> <b>Bar</b></span><span class="mw-editsection"><span
class="mw-editsection-bracket">[</span><a
href="/index.php?title=Parser_test&action=edit&section=1" title="Edit
section: Foo Bar">edit</a><span
class="mw-editsection-bracket">]</span></span></h2>
<h2><a href="#Foo_Bar_2" class="mw-headline-anchor" aria-hidden="true"
title="Link to this section">§</a><span class="mw-headline"
id="Foo_Bar_2"><i>Foo</i></span></h2>
<blockquote>
@@ -19344,6 +19358,37 @@
<h2><a href="#test" class="mw-headline-anchor" aria-hidden="true" title="Link
to this section">§</a><span class="mw-headline"
id="test"><bdi>test</bdi></span><span class="mw-editsection"><span
class="mw-editsection-bracket">[</span><a
href="/index.php?title=Parser_test&action=edit&section=1" title="Edit
section: test">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+!! end
+
+# Note that the html output does not have the <p></p>, but the
+# html+tidy output *does*. This is because the empty <p></p> is
+# removed by the sanitizer, but only when tidy is *not* enabled (!).
+!! test
+Empty <p> tag in TOC, removed by Sanitizer (T92892)
+!! wikitext
+__TOC__
+== x ==
+!! html
+<div id="toc" class="toc"><div id="toctitle"><h2>Contents</h2></div>
+<ul>
+<li class="toclevel-1 tocsection-1"><a href="#x"><span
class="tocnumber">1</span> <span class="toctext">x</span></a></li>
+</ul>
+</div>
+
+<h2><a href="#x" class="mw-headline-anchor" aria-hidden="true" title="Link to
this section">§</a><span class="mw-headline" id="x">x</span><span
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a
href="/index.php?title=Parser_test&action=edit&section=1" title="Edit
section: x">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+
+!! html+tidy
+<p></p>
+<div id="toc" class="toc">
+<div id="toctitle">
+<h2>Contents</h2>
+</div>
+<ul>
+<li class="toclevel-1 tocsection-1"><a href="#x"><span
class="tocnumber">1</span> <span class="toctext">x</span></a></li>
+</ul>
+</div>
+<p></p>
+<h2><a href="#x" class="mw-headline-anchor" aria-hidden="true" title="Link to
this section">§</a><span class="mw-headline" id="x">x</span><span
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a
href="/index.php?title=Parser_test&action=edit&section=1" title="Edit
section: x">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
!! end
!! article
@@ -21889,18 +21934,6 @@
</p><p>x<y
</p><p>a>b
</p><p>1<d e>f
-</p>
-!! end
-
-
-# This was a bug in the PHP parser (see bug 17663 and its dups,
-# https://bugzilla.wikimedia.org/show_bug.cgi?id=17663)
-!! test
-Tag names followed by punctuation should not be recognized as tags
-!! wikitext
-<s.ome> text
-!! html
-<p><s.ome> text
</p>
!! end
diff --git a/tests/parser/parserTestsParserHook.php
b/tests/parser/parserTestsParserHook.php
index c8b3e89..221fc79 100644
--- a/tests/parser/parserTestsParserHook.php
+++ b/tests/parser/parserTestsParserHook.php
@@ -29,6 +29,7 @@
static function setup( &$parser ) {
$parser->setHook( 'tag', array( __CLASS__, 'dumpHook' ) );
+ $parser->setHook( 'tåg', array( __CLASS__, 'dumpHook' ) );
$parser->setHook( 'statictag', array( __CLASS__,
'staticTagHook' ) );
return true;
}
diff --git a/tests/phpunit/includes/parser/NewParserTest.php
b/tests/phpunit/includes/parser/NewParserTest.php
index 3ce3e1f..ccd7f96 100644
--- a/tests/phpunit/includes/parser/NewParserTest.php
+++ b/tests/phpunit/includes/parser/NewParserTest.php
@@ -160,9 +160,6 @@
$this->djVuSupport = new DjVuSupport();
// Tidy support
$this->tidySupport = new TidySupport();
- // We always set 'wgUseTidy' to false when parsing, but certain
- // test-running modes still use tidy if available, so ensure
- // that the tidy-related options are all set to their defaults.
$tmpGlobals['wgUseTidy'] = false;
$tmpGlobals['wgAlwaysUseTidy'] = false;
$tmpGlobals['wgDebugTidy'] = false;
@@ -419,6 +416,7 @@
'wgMathDirectory' => $uploadDir . '/math',
'wgDefaultLanguageVariant' => $variant,
'wgLinkHolderBatchSize' => $linkHolderBatchSize,
+ 'wgUseTidy' => isset( $opts['tidy'] ),
);
if ( $config ) {
@@ -727,9 +725,18 @@
. "Current configuration is:\n\$wgTexvc
= '$wgTexvc'" );
}
}
+
if ( isset( $opts['djvu'] ) ) {
if ( !$this->djVuSupport->isEnabled() ) {
$this->markTestSkipped( "SKIPPED: djvu binaries
do not exist or are not executable.\n" );
+ }
+ }
+
+ if ( isset( $opts['tidy'] ) ) {
+ if ( !$this->tidySupport->isEnabled() ) {
+ $this->markTestSkipped( "SKIPPED: tidy
extension is not installed.\n" );
+ } else {
+ $options->setTidy( true );
}
}
@@ -753,12 +760,7 @@
$output->setTOCEnabled( !isset( $opts['notoc'] ) );
$out = $output->getText();
if ( isset( $opts['tidy'] ) ) {
- if ( !$this->tidySupport->isEnabled() ) {
- $this->markTestSkipped( "SKIPPED: tidy
extension is not installed.\n" );
- } else {
- $out = MWTidy::tidy( $out );
- $out = preg_replace( '/\s+$/', '', $out
);
- }
+ $out = preg_replace( '/\s+$/', '', $out );
}
if ( isset( $opts['showtitle'] ) ) {
--
To view, visit https://gerrit.wikimedia.org/r/181777
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I09c72fd739e11a8b757f37dc4c790758d782ad73
Gerrit-PatchSet: 19
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: Anomie <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: Jackmcbarn <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Tim Starling <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits