Arlolra has uploaded a new change for review.
https://gerrit.wikimedia.org/r/230926
Change subject: Reduce rt testing noise from normalizing empty tags
......................................................................
Reduce rt testing noise from normalizing empty tags
* Some examples,
enwiki/AMA_International_University
enwiki/2nd_General_Assembly_of_Nova_Scotia
Change-Id: I883dbe4b9fa5e85448093c2762dee2b79c9c0a28
---
M lib/mediawiki.DOMUtils.js
M lib/mediawiki.WikitextSerializer.js
M lib/wts.normalizeDOM.js
3 files changed, 20 insertions(+), 11 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid
refs/changes/26/230926/1
diff --git a/lib/mediawiki.DOMUtils.js b/lib/mediawiki.DOMUtils.js
index e32d901..66fbd04 100644
--- a/lib/mediawiki.DOMUtils.js
+++ b/lib/mediawiki.DOMUtils.js
@@ -1249,8 +1249,9 @@
/**
* Does `node` contain nothing or just non-newline whitespace?
+ * `strict` adds the condition that all whitespace is forbidden.
*/
- nodeEssentiallyEmpty: function(node) {
+ nodeEssentiallyEmpty: function(node, strict) {
var childNodes = node.childNodes;
if (0 === childNodes.length) {
return true;
@@ -1259,7 +1260,8 @@
while (n) {
if (DU.isElt(n) && !this.isDiffMarker(n)) {
return false;
- } else if (DU.isText(n) && !/^[
\t]*$/.test(n.nodeValue)) {
+ } else if (DU.isText(n) &&
+ (strict || !/^[
\t]*$/.test(n.nodeValue))) {
return false;
} else if (DU.isComment(n)) {
return false;
diff --git a/lib/mediawiki.WikitextSerializer.js
b/lib/mediawiki.WikitextSerializer.js
index 9e6ef8f..be217b5 100644
--- a/lib/mediawiki.WikitextSerializer.js
+++ b/lib/mediawiki.WikitextSerializer.js
@@ -1393,8 +1393,12 @@
var state = new SerializerState(this, this.options);
+ // Init state
+ state.selserMode = selserMode || false;
+ state.rtTestMode = state.rtTestMode && !state.selserMode; // always
false in selser mode
+
// Normalize the DOM
- (new Normalizer(state.env, selserMode)).normalizeDOM(body);
+ (new Normalizer(state)).normalizeDOM(body);
// Don't serialize the DOM if debugging is disabled
this.trace(function() {
@@ -1403,9 +1407,6 @@
var out = '';
- // Init state
- state.selserMode = selserMode || false;
- state.rtTestMode = state.rtTestMode && !state.selserMode; // always
false in selser mode
state.sep.lastSourceNode = body;
state.currLine.firstNode = body.firstChild;
diff --git a/lib/wts.normalizeDOM.js b/lib/wts.normalizeDOM.js
index f5f4bc3..57d629f 100644
--- a/lib/wts.normalizeDOM.js
+++ b/lib/wts.normalizeDOM.js
@@ -15,9 +15,10 @@
var DU = require('./mediawiki.DOMUtils.js').DOMUtils;
var Consts = require('./mediawiki.wikitext.constants.js').WikitextConstants;
-function Normalizer(env, selserMode) {
- this.env = env;
- this.inSelserMode = selserMode;
+function Normalizer(state) {
+ this.env = state.env;
+ this.inSelserMode = state.selserMode;
+ this.inRtTestMode = state.rtTestMode;
this.inInsertedContent = false;
}
@@ -182,11 +183,16 @@
Normalizer.prototype.stripIfEmpty = function(node) {
var next = DU.nextNonDeletedSibling(node);
- if (DU.nodeEssentiallyEmpty(node)) {
+ var dp = DU.getDataParsoid(node);
+ // In rtTestMode, let's reduce noise by requiring the node to be fully
+ // empty (ie. exclude whitespace text) and closed.
+ var strict = this.inRtTestMode;
+ var autoInserted = this.inRtTestMode &&
+ (dp.autoInsertedStart || dp.autoInsertedEnd);
+ if (!autoInserted && DU.nodeEssentiallyEmpty(node, strict)) {
// Update diff markers (before the deletion)
this.addDiffMarks(node, 'deleted', true);
this.addDiffMarks(node.parentNode, 'children-changed');
-
node.parentNode.removeChild(node);
return next;
} else {
--
To view, visit https://gerrit.wikimedia.org/r/230926
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I883dbe4b9fa5e85448093c2762dee2b79c9c0a28
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits