jenkins-bot has submitted this change and it was merged.
Change subject: Don't flatten spans
......................................................................
Don't flatten spans
... so that per-span information for different languages, i.e. lang and
dir attributes aren't lost.
Bug: T59582
Change-Id: If1b04714fdc0f4d581ddb858d8d53f6f340dc10b
---
M includes/ExtractFormatter.php
M tests/ExtractFormatterTest.php
2 files changed, 38 insertions(+), 1 deletion(-)
Approvals:
MaxSem: Looks good to me, approved
jenkins-bot: Verified
diff --git a/includes/ExtractFormatter.php b/includes/ExtractFormatter.php
index 6a6a261..06b8609 100644
--- a/includes/ExtractFormatter.php
+++ b/includes/ExtractFormatter.php
@@ -45,7 +45,7 @@
if ( $plainText ) {
$this->flattenAllTags();
} else {
- $this->flatten( array( 'span', 'a' ) );
+ $this->flatten( array( 'a' ) );
}
wfProfileOut( __METHOD__ );
}
@@ -132,4 +132,24 @@
wfProfileOut( __METHOD__ );
return $m[0];
}
+
+ /**
+ * Removes content we've chosen to remove then removes class and style
+ * attributes from the remaining span elements.
+ *
+ * @return array Array of removed DOMElements
+ */
+ public function filterContent() {
+ $removed = parent::filterContent();
+
+ $doc = $this->getDoc();
+ $spans = $doc->getElementsByTagName( 'span' );
+
+ foreach ( $spans as $span ) {
+ $span->removeAttribute( 'class' );
+ $span->removeAttribute( 'style' );
+ }
+
+ return $removed;
+ }
}
diff --git a/tests/ExtractFormatterTest.php b/tests/ExtractFormatterTest.php
index 64537c8..f7ad78b 100644
--- a/tests/ExtractFormatterTest.php
+++ b/tests/ExtractFormatterTest.php
@@ -28,12 +28,29 @@
. "</span> <small class=\"metadata audiolinkinfo\"
style=\"cursor:help;\">([[Wikipedia:Media help|<span style=\"cursor:help;\">"
. "help</span>]]ยท[[:File:nl-Nederlands.ogg|<span
style=\"cursor:help;\">info</span>]])</small></span>) is a"
. " [[West Germanic languages|West Germanic language]]
and the native language of most of the population of the [[Netherlands]]";
+
return array(
array(
"Dutch ( Nederlands ) is a West Germanic
language and the native language of most of the population of the Netherlands",
$dutch,
true,
),
+
+ array(
+ "<p><span><span
lang=\"baz\">qux</span></span>\n</p>",
+ '<span class="foo"><span
lang="baz">qux</span></span>',
+ false,
+ ),
+ array(
+ "<p><span><span
lang=\"baz\">qux</span></span>\n</p>",
+ '<span style="foo: bar;"><span
lang="baz">qux</span></span>',
+ false,
+ ),
+ array(
+ "<p><span><span
lang=\"qux\">quux</span></span>\n</p>",
+ '<span class="foo"><span style="bar: baz;"
lang="qux">quux</span></span>',
+ false,
+ ),
);
}
--
To view, visit https://gerrit.wikimedia.org/r/183496
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: If1b04714fdc0f4d581ddb858d8d53f6f340dc10b
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/TextExtracts
Gerrit-Branch: master
Gerrit-Owner: Phuedx <[email protected]>
Gerrit-Reviewer: MaxSem <[email protected]>
Gerrit-Reviewer: Waldir <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits