jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/328434 )
Change subject: Get rid of the generic_tag rule
......................................................................
Get rid of the generic_tag rule
* It's only used in xmlish_tag.
Change-Id: Ic16d5ef900a03df06336b3497cc6977cb0ef0eb9
---
M lib/wt2html/pegTokenizer.pegjs
1 file changed, 42 insertions(+), 41 deletions(-)
Approvals:
Subramanya Sastry: Looks good to me, approved
jenkins-bot: Verified
diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs
index b4bd3e0..f487a3c 100644
--- a/lib/wt2html/pegTokenizer.pegjs
+++ b/lib/wt2html/pegTokenizer.pegjs
@@ -76,6 +76,24 @@
}
};
+ /* ------------------------------------------------------------------------
+ * Extension tags should be parsed with higher priority than anything else.
+ *
+ * The trick we use is to strip out the content inside a matching tag-pair
+ * and not tokenize it. The content, if it needs to parsed (for example,
+ * for <ref>, <*include*> tags), is parsed in a fresh tokenizer context
+ * which means any error correction that needs to happen is restricted to
+ * the scope of the extension content and doesn't spill over to the higher
+ * level. Ex: <math><!--foo</math>.
+ *
+ * IGNORE: {{ this just balances the blocks in this comment for pegjs
+ *
+ * This trick also lets us prevent extension content (that don't accept WT)
+ * from being parsed as wikitext (Ex: <math>\frac{foo\frac{bar}}</math>)
+ * We don't want the "}}" being treated as a template closing tag and
+ * closing outer templates.
+ * --------------------------------------------------------------------- */
+
var isXMLTag = function(name, block) {
var lName = name.toLowerCase();
var uName = name.toUpperCase();
@@ -1117,24 +1135,6 @@
}
/ & { return stops.dec('pre'); }
-/* -----------------------------------------------------------------------
- * Extension tags should be parsed with higher priority than anything else.
- * The trick we use is to strip out the content inside a matching tag-pair
- * and not tokenize it. The content, if it needs to parsed (for example,
- * for <ref>, <*include*> tags), is parsed in a fresh tokenizer context
- * which means any error correction that needs to happen is restricted to
- * the scope of the extension content and doesn't spill over to the higher
- * level. Ex: <math><!--foo</math>.
- *
- * This trick also lets us prevent extension content (that don't accept WT)
- * from being parsed as wikitext (Ex: <math>\frac{foo\frac{bar}}</math>)
- * We don't want the "}}" being treated as a template closing tag and closing
- * outer templates.
- * ----------------------------------------------------------------------- */
-
-xmlish_tag =
- t:generic_tag & { return isXMLTag(t.name, false); } { return
maybeExtensionTag(t); }
-
/*
* Nowiki treats anything inside it as plain text. It could thus also be
* defined as an extension that returns its raw input text, possibly wrapped
@@ -1211,7 +1211,7 @@
tag_name_chars = [^\t\n\v />\0]
tag_name = $([A-Za-z] tag_name_chars*)
-generic_tag
+xmlish_tag
= & {
// By the time we get to `doTableStuff` in the php parser, we've already
// safely encoded element attributes. See 55313f4e in core.
@@ -1221,13 +1221,14 @@
return stops.push('tableCellArg', false);
}
"<"
- end:"/"? name:tag_name
+ end:"/"? name:$(tn:tag_name & { return isXMLTag(tn, false); })
attribs:generic_newline_attributes
space_or_newline* // No need to preserve this -- canonicalize on RT via
dirty diff
selfclose:"/"?
bad_ws:space* // No need to preserve this -- canonicalize on RT via dirty
diff
">" {
stops.pop('tableCellArg');
+
var lcName = name.toLowerCase();
var isVoidElt = Util.isVoidElement(lcName) ? true : null;
// Support </br>
@@ -1248,9 +1249,27 @@
if (broken || bad_ws.length > 0) {
res.dataAttribs.brokenHTMLTag = true;
}
- return res;
+
+ return maybeExtensionTag(res);
}
/ & { return stops.pop('tableCellArg'); }
+
+/*
+ * A variant of xmlish_tag, but also checks if the tag name is a block-level
+ * tag as defined in
+ * http://www.w3.org/TR/html5/syntax.html#tag-open-state and
+ * following paragraphs.
+ */
+block_tag
+ = "<" end:"/"?
+ name:$(tn:tag_name & { return isXMLTag(tn, true); })
+ attribs:generic_newline_attributes
+ space_or_newline*
+ selfclose:"/"?
+ ">" {
+ var t = tu.buildXMLTag(name, name.toLowerCase(), attribs, end,
selfclose, tsrOffsets());
+ return [maybeExtensionTag(t)];
+ }
// A generic attribute that can span multiple lines.
generic_newline_attribute
@@ -1333,7 +1352,7 @@
// Accept insane tags-inside-attributes as attribute names.
// The sanitizer will strip and shadow them for roundtripping.
// Example: <hiddentext>generated with.. </hiddentext>
- / &generic_tag nb:nested_block_line
+ / &xmlish_tag nb:nested_block_line
// `nested_block_line` can return zero or more blocks.
// Assure that we've got at least one, otherwise that plus
// below is trouble.
@@ -1371,24 +1390,6 @@
/ s:$space* t:table_attribute_preprocessor_text &(space_or_newline/ eof /
'!!' / '|') {
return tu.getAttrVal(t, startOffset() + s.length, endOffset());
}
-
-/*
- * A variant of generic_tag, but also checks if the tag name is a block-level
- * tag as defined in
- * http://www.w3.org/TR/html5/syntax.html#tag-open-state and
- * following paragraphs.
- */
-block_tag
- = "<" end:"/"?
- name:$(tn:tag_name & { return isXMLTag(tn, true); })
- attribs:generic_newline_attributes
- space_or_newline*
- selfclose:"/"?
- ">" {
- var t = tu.buildXMLTag(name, name.toLowerCase(), attribs, end,
selfclose, tsrOffsets());
- return [maybeExtensionTag(t)];
- }
-
/*********************************************************
* Lists
@@ -1915,7 +1916,7 @@
/**
* noinclude / includeonly / onlyinclude rules. These are normally
- * handled by the generic_tag rule, except where generic tags are not
+ * handled by the xmlish_tag rule, except where generic tags are not
* allowed- for example in directives, which are allowed in various attribute
* names and -values.
*
--
To view, visit https://gerrit.wikimedia.org/r/328434
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ic16d5ef900a03df06336b3497cc6977cb0ef0eb9
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: C. Scott Ananian <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Tim Starling <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits