jenkins-bot has submitted this change and it was merged.
Change subject: New parserTests.php features
......................................................................
New parserTests.php features
Features to support T89331 analysis:
* Support dwdiff for word-level diffing
* Add --mark-ws feature which produces cleaner diffs when line breaks
differ
* Add optional normalization of parser test output, allowing significant
differences to be separated from insignificant differences.
Change-Id: I0e151caad1f8b2f97bf20b219f26f3101be82506
---
M tests/TestsAutoLoader.php
M tests/parser/parserTest.inc
M tests/parserTests.php
3 files changed, 132 insertions(+), 5 deletions(-)
Approvals:
Tim Starling: Looks good to me, approved
Subramanya Sastry: Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/tests/TestsAutoLoader.php b/tests/TestsAutoLoader.php
index 8b100a2..2bb1d2e 100644
--- a/tests/TestsAutoLoader.php
+++ b/tests/TestsAutoLoader.php
@@ -143,6 +143,7 @@
'NewParserTest' => "$testDir/phpunit/includes/parser/NewParserTest.php",
'MediaWikiParserTest' =>
"$testDir/phpunit/includes/parser/MediaWikiParserTest.php",
'ParserTest' => "$testDir/parser/parserTest.inc",
+ 'ParserTestResultNormalizer' => "$testDir/parser/parserTest.inc",
'ParserTestParserHook' => "$testDir/parser/parserTestsParserHook.php",
# tests/phpunit/includes/site
diff --git a/tests/parser/parserTest.inc b/tests/parser/parserTest.inc
index e519f59..d602194 100644
--- a/tests/parser/parserTest.inc
+++ b/tests/parser/parserTest.inc
@@ -82,6 +82,9 @@
public $regex = "";
private $savedGlobals = [];
+ private $useDwdiff = false;
+ private $markWhitespace = false;
+ private $normalizationFunctions = [];
/**
* Sets terminal colorization and diff/quick modes depending on OS and
@@ -116,6 +119,18 @@
|| isset( $options['compare'] ) ) ); //
redundant output
$this->showOutput = isset( $options['show-output'] );
+ $this->useDwdiff = isset( $options['dwdiff'] );
+ $this->markWhitespace = isset( $options['mark-ws'] );
+
+ if ( isset( $options['norm'] ) ) {
+ foreach ( explode( ',', $options['norm'] ) as $func ) {
+ if ( in_array( $func, [ 'removeTbody',
'trimWhitespace' ] ) ) {
+ $this->normalizationFunctions[] = $func;
+ } else {
+ echo "Warning: unknown normalization
option \"$func\"\n";
+ }
+ }
+ }
if ( isset( $options['filter'] ) ) {
$options['regex'] = $options['filter'];
@@ -699,6 +714,11 @@
}
$this->teardownGlobals();
+
+ if ( count( $this->normalizationFunctions ) ) {
+ $result = ParserTestResultNormalizer::normalize(
$result, $this->normalizationFunctions );
+ $out = ParserTestResultNormalizer::normalize( $out,
$this->normalizationFunctions );
+ }
$testResult = new ParserTestResult( $desc );
$testResult->expected = $result;
@@ -1469,6 +1489,16 @@
protected function quickDiff( $input, $output,
$inFileTail = 'expected', $outFileTail = 'actual'
) {
+ if ( $this->markWhitespace ) {
+ $pairs = [
+ "\n" => '¶',
+ ' ' => '·',
+ "\t" => '→'
+ ];
+ $input = strtr( $input, $pairs );
+ $output = strtr( $output, $pairs );
+ }
+
# Windows, or at least the fc utility, is retarded
$slash = wfIsWindows() ? '\\' : '/';
$prefix = wfTempDir() . "{$slash}mwParser-" . mt_rand();
@@ -1484,14 +1514,22 @@
global $wgDiff3;
// we assume that people with diff3 also have usual diff
- $shellCommand = ( wfIsWindows() && !$wgDiff3 ) ? 'fc' : 'diff
-au';
+ if ( $this->useDwdiff ) {
+ $shellCommand = 'dwdiff -Pc';
+ } else {
+ $shellCommand = ( wfIsWindows() && !$wgDiff3 ) ? 'fc' :
'diff -au';
+ }
$diff = wfShellExec( "$shellCommand $shellInfile $shellOutfile"
);
unlink( $infile );
unlink( $outfile );
- return $this->colorDiff( $diff );
+ if ( $this->useDwdiff ) {
+ return $diff;
+ } else {
+ return $this->colorDiff( $diff );
+ }
}
/**
@@ -1699,3 +1737,84 @@
return true;
}
}
+
+class ParserTestResultNormalizer {
+ protected $doc, $xpath, $invalid;
+
+ public static function normalize( $text, $funcs ) {
+ $norm = new self( $text );
+ if ( $norm->invalid ) {
+ return $text;
+ }
+ foreach ( $funcs as $func ) {
+ $norm->$func();
+ }
+ return $norm->serialize();
+ }
+
+ protected function __construct( $text ) {
+ $this->doc = new DOMDocument( '1.0', 'utf-8' );
+
+ // Note: parsing a supposedly XHTML document with an XML parser
is not
+ // guaranteed to give accurate results. For example, it may
introduce
+ // differences in the number of line breaks in <pre> tags.
+
+ MediaWiki\suppressWarnings();
+ if ( !$this->doc->loadXML( '<html><body>' . $text .
'</body></html>' ) ) {
+ $this->invalid = true;
+ }
+ MediaWiki\restoreWarnings();
+ $this->xpath = new DOMXPath( $this->doc );
+ $this->body = $this->xpath->query( '//body' )->item( 0 );
+ }
+
+ protected function removeTbody() {
+ foreach ( $this->xpath->query( '//tbody' ) as $tbody ) {
+ while ( $tbody->firstChild ) {
+ $child = $tbody->firstChild;
+ $tbody->removeChild( $child );
+ $tbody->parentNode->insertBefore( $child,
$tbody );
+ }
+ $tbody->parentNode->removeChild( $tbody );
+ }
+ }
+
+ /**
+ * The point of this function is to produce a normalized DOM in which
+ * Tidy's output matches the output of html5depurate. Tidy both trims
+ * and pretty-prints, so this requires fairly aggressive treatment.
+ *
+ * In particular, note that Tidy converts <pre>x</pre> to
<pre>\nx\n</pre>,
+ * which theoretically affects display since the second line break is
not
+ * ignored by compliant HTML parsers.
+ *
+ * This function also removes empty elements, as does Tidy.
+ */
+ protected function trimWhitespace() {
+ foreach ( $this->xpath->query( '//text()' ) as $child ) {
+ if ( strtolower( $child->parentNode->nodeName ) ===
'pre' ) {
+ // Just trim one line break from the start and
end
+ if ( substr_compare( $child->data, "\n", 0 )
=== 0 ) {
+ $child->data = substr( $child->data, 1
);
+ }
+ if ( substr_compare( $child->data, "\n", -1 )
=== 0 ) {
+ $child->data = substr( $child->data, 0,
-1 );
+ }
+ } else {
+ // Trim all whitespace
+ $child->data = trim( $child->data );
+ }
+ if ( $child->data === '' ) {
+ $child->parentNode->removeChild( $child );
+ }
+ }
+ }
+
+ /**
+ * Serialize the XML DOM for comparison purposes. This does not
generate HTML.
+ */
+ protected function serialize() {
+ return strtr( $this->doc->saveXML( $this->body ),
+ [ '<body>' => '', '</body>' => '' ] );
+ }
+}
diff --git a/tests/parserTests.php b/tests/parserTests.php
index b3cb89a..5e15694 100644
--- a/tests/parserTests.php
+++ b/tests/parserTests.php
@@ -27,8 +27,8 @@
define( 'MW_PARSER_TEST', true );
$options = [ 'quick', 'color', 'quiet', 'help', 'show-output',
- 'record', 'run-disabled', 'run-parsoid' ];
-$optionsWithArgs = [ 'regex', 'filter', 'seed', 'setversion', 'file' ];
+ 'record', 'run-disabled', 'run-parsoid', 'dwdiff', 'mark-ws' ];
+$optionsWithArgs = [ 'regex', 'filter', 'seed', 'setversion', 'file', 'norm' ];
require_once __DIR__ . '/../maintenance/commandLine.inc';
require_once __DIR__ . '/TestsAutoLoader.php';
@@ -54,9 +54,16 @@
--keep-uploads Re-use the same upload directory for each test, don't
delete it
--fuzz Do a fuzz test instead of a normal test
--seed <n> Start the fuzz test from the specified seed
- --help Show this help message
--run-disabled run disabled tests
--run-parsoid run parsoid tests (normally disabled)
+ --dwdiff Use dwdiff to display diff output
+ --mark-ws Mark whitespace in diffs by replacing it with symbols
+ --norm=<funcs> Apply a comma-separated list of normalization functions to
+ both the expected and actual output in order to resolve
+ irrelevant differences. The accepted normalization functions
+ are: removeTbody to remove <tbody> tags; and trimWhitespace
+ to trim whitespace from the start and end of text nodes.
+ --help Show this help message
ENDS;
exit( 0 );
--
To view, visit https://gerrit.wikimedia.org/r/236508
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I0e151caad1f8b2f97bf20b219f26f3101be82506
Gerrit-PatchSet: 8
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Tim Starling <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Jackmcbarn <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Tim Starling <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits