Subramanya Sastry has uploaded a new change for review.
https://gerrit.wikimedia.org/r/95696
Change subject: Fixed DSR errors introduce by DOM spec changes
......................................................................
Fixed DSR errors introduce by DOM spec changes
* DSR computation used DOM Spec markers on a-tags to detect
source wikitext syntax in computing dsr tag widths. This code
went out ofsync with DOM spec changes.
* This patch introduces helpers to detect a-tag source syntax
and adds additional data-parsoid markers for rfc/pmid/isbn
uses.
* Code cleanup to eliminate lower-case html tag names in
mediawiki.wikitext.constants.js and uses which eliminates the
only lower-case HTML tag names in that file
* No change in parser test results but 25 fewer DSR warnings
and eliminates NaN entries.
Change-Id: Idf0a15d64739da5d82f0632864987ff547daded6
---
M js/lib/dom.computeDSR.js
M js/lib/dom.migrateTemplateMarkerMetas.js
M js/lib/mediawiki.wikitext.constants.js
M js/lib/pegTokenizer.pegjs.txt
4 files changed, 64 insertions(+), 44 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid
refs/changes/96/95696/1
diff --git a/js/lib/dom.computeDSR.js b/js/lib/dom.computeDSR.js
index 49ac60d..ff4f287 100644
--- a/js/lib/dom.computeDSR.js
+++ b/js/lib/dom.computeDSR.js
@@ -5,6 +5,25 @@
Util = require('./mediawiki.Util.js').Util,
dumpDOM = require('./dom.dumper.js').dumpDOM;
+// Helper function to detect when an A-node uses [[..]] style wikilink syntax
+// mw:ExtLink rel-type is not sufficient anymore since [[..]] style links can
+// also be tagged ext-links
+function usesWikiLinkSyntax(aNode, dp) {
+ return aNode.getAttribute("rel") === "mw:WikiLink" ||
+ (dp.stx && dp.stx !== "url" && dp.stx !== "protocol");
+}
+
+function usesExtLinkSyntax(aNode, dp) {
+ return aNode.getAttribute("rel") === "mw:ExtLink" &&
+ (!dp.stx || (dp.stx !== "url" && dp.stx !== "protocol"));
+}
+
+function usesURLLinkSyntax(aNode, dp) {
+ return aNode.getAttribute("rel") === "mw:ExtLink" &&
+ dp.stx &&
+ (dp.stx === "url" || dp.stx === "protocol");
+}
+
/* ------------------------------------------------------------------------
* TSR = "Tag Source Range". Start and end offsets giving the location
* where the tag showed up in the original source.
@@ -79,8 +98,7 @@
if (!dp) {
return null;
} else {
- var aType = node.getAttribute("rel");
- if (aType === "mw:WikiLink" &&
+ if (usesWikiLinkSyntax(node, dp) &&
!DU.isExpandedAttrsMetaType(node.getAttribute("typeof")))
{
if (dp.stx === "piped") {
@@ -93,8 +111,10 @@
} else {
return [2, 2];
}
- } else if (aType === "mw:ExtLink" && dp.tsr && dp.stx
!== 'url') {
+ } else if (dp.tsr && usesExtLinkSyntax(node, dp)) {
return [dp.targetOff - dp.tsr[0], 1];
+ } else if (usesURLLinkSyntax(node, dp)) {
+ return [0, 0];
} else {
return null;
}
@@ -111,19 +131,19 @@
etWidth = widths[1];
}
} else {
- var nodeName = node.nodeName.toLowerCase();
+ var nodeName = node.nodeName;
// 'tr' tags not in the original source have zero width
- if (nodeName === 'tr' && !dp.startTagSrc) {
+ if (nodeName === 'TR' && !dp.startTagSrc) {
stWidth = 0;
etWidth = 0;
} else {
var wtTagWidth = Consts.WT_TagWidths[nodeName];
if (stWidth === null) {
// we didn't have a tsr to tell us how
wide this tag was.
- if (nodeName === 'a') {
+ if (nodeName === 'A') {
wtTagWidth =
computeATagWidth(node, dp);
stWidth = wtTagWidth ?
wtTagWidth[0] : null;
- } else if (nodeName === 'li' ||
nodeName === 'dd') {
+ } else if (nodeName === 'LI' ||
nodeName === 'DD') {
stWidth =
computeListEltWidth(node, nodeName);
} else if (wtTagWidth) {
stWidth = wtTagWidth[0];
@@ -364,13 +384,13 @@
* we don't have to worry about the above
decisions and checks.
*
----------------------------------------------------------------- */
- if (DU.hasNodeName(child, "a") &&
- child.getAttribute("rel") ===
"mw:WikiLink" &&
+ if (child.nodeName === 'A' &&
+ usesWikiLinkSyntax(child, dp) &&
dp.stx !== "piped")
{
/*
-------------------------------------------------------------
* This check here eliminates artifical
DSR mismatches on content
- * text of the a-node because of entity
expansion, etc.
+ * text of the A-node because of entity
expansion, etc.
*
* Ex: [[7%25 solution]] will be
rendered as:
* <a href=....>7% solution</a>
diff --git a/js/lib/dom.migrateTemplateMarkerMetas.js
b/js/lib/dom.migrateTemplateMarkerMetas.js
index c93c8c0..c6d27ab 100644
--- a/js/lib/dom.migrateTemplateMarkerMetas.js
+++ b/js/lib/dom.migrateTemplateMarkerMetas.js
@@ -33,7 +33,7 @@
// We can migrate the meta-tag across this node's
end-tag barrier only
// if that end-tag is zero-width.
- tagWidth =
Consts.WT_TagWidths[node.nodeName.toLowerCase()];
+ tagWidth = Consts.WT_TagWidths[node.nodeName];
if (tagWidth && tagWidth[0] === 0 &&
!DU.isLiteralHTMLNode(node)) {
node.parentNode.insertBefore(firstChild, node);
}
@@ -45,7 +45,7 @@
// We can migrate the meta-tag across this node's
end-tag barrier only
// if that end-tag is zero-width.
- tagWidth =
Consts.WT_TagWidths[node.nodeName.toLowerCase()];
+ tagWidth = Consts.WT_TagWidths[node.nodeName];
if (tagWidth && tagWidth[1] === 0 &&
!DU.isLiteralHTMLNode(node)) {
node.parentNode.insertBefore(lastChild,
node.nextSibling);
}
diff --git a/js/lib/mediawiki.wikitext.constants.js
b/js/lib/mediawiki.wikitext.constants.js
index f10b6c8..7fea8c1 100644
--- a/js/lib/mediawiki.wikitext.constants.js
+++ b/js/lib/mediawiki.wikitext.constants.js
@@ -210,34 +210,34 @@
// Known wikitext tag widths -- these are known statically
// but other widths are computed or updated based on actual wikitext
usage
WT_TagWidths: {
- "body" : [0,0],
- "html" : [0,0],
- "head" : [0,0],
- "p" : [0,0],
- "meta" : [0,0],
- "tbody" : [0,0],
- "pre" : [1,0],
- "ol" : [0,0],
- "ul" : [0,0],
- "dl" : [0,0],
- "li" : [1,0],
- "dt" : [1,0],
- "dd" : [1,0],
- "h1" : [1,1],
- "h2" : [2,2],
- "h3" : [3,3],
- "h4" : [4,4],
- "h5" : [5,5],
- "h6" : [6,6],
- "hr" : [4,0],
- "table" : [2,2],
- "tr" : [null,0],
- "td" : [null,0],
- "th" : [null,0],
- "b" : [3,3],
- "i" : [2,2],
- "br" : [0,0],
- "figure": [2,2]
+ "BODY" : [0,0],
+ "HTML" : [0,0],
+ "HEAD" : [0,0],
+ "P" : [0,0],
+ "META" : [0,0],
+ "TBODY" : [0,0],
+ "PRE" : [1,0],
+ "OL" : [0,0],
+ "UL" : [0,0],
+ "DL" : [0,0],
+ "LI" : [1,0],
+ "DT" : [1,0],
+ "DD" : [1,0],
+ "H1" : [1,1],
+ "H2" : [2,2],
+ "H3" : [3,3],
+ "H4" : [4,4],
+ "H5" : [5,5],
+ "H6" : [6,6],
+ "HR" : [4,0],
+ "TABLE" : [2,2],
+ "TR" : [null,0],
+ "TD" : [null,0],
+ "TH" : [null,0],
+ "B" : [3,3],
+ "I" : [2,2],
+ "BR" : [0,0],
+ "FIGURE": [2,2]
},
// HTML tags whose wikitext equivalents are zero-width.
@@ -256,10 +256,10 @@
Object.keys(WikitextConstants.WT_TagWidths).forEach(function(tag) {
// This special case can be fixed by maybe removing them WT_TagWidths.
// They may no longer be necessary -- to be investigated in another
patch.
- if (tag !== 'html' && tag !== 'head' && tag !== 'body') {
+ if (tag !== 'HTML' && tag !== 'HEAD' && tag !== 'BODY') {
var widths = WikitextConstants.WT_TagWidths[tag];
if (widths[0] === 0 && widths[1] === 0) {
- zeroWidthTags.push(tag.toUpperCase());
+ zeroWidthTags.push(tag);
}
}
});
diff --git a/js/lib/pegTokenizer.pegjs.txt b/js/lib/pegTokenizer.pegjs.txt
index d250d4b..5418acb 100644
--- a/js/lib/pegTokenizer.pegjs.txt
+++ b/js/lib/pegTokenizer.pegjs.txt
@@ -861,7 +861,7 @@
new KV('mw:content', [ref, identifier].join(' ')),
new KV( 'typeof', 'mw:ExtLink/' + ref )
],
- {tsr: [pos0, pos]})
+ {stx: "protocol", tsr: [pos0, pos]})
];
}
@@ -893,7 +893,7 @@
new KV('mw:content', 'ISBN ' + isbn),
new KV('typeof', 'mw:WikiLink/ISBN')
],
- {tsr: [pos0, pos]})
+ {stx: "protocol", tsr: [pos0, pos]})
];
}
--
To view, visit https://gerrit.wikimedia.org/r/95696
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Idf0a15d64739da5d82f0632864987ff547daded6
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits