[MediaWiki-commits] [Gerrit] mediawiki...parsoid[master]: Get rid of the generic_tag rule

jenkins-bot (Code Review) Tue, 20 Dec 2016 16:06:26 -0800

jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/328434 )


Change subject: Get rid of the generic_tag rule
......................................................................


Get rid of the generic_tag rule

  * It's only used in xmlish_tag.

Change-Id: Ic16d5ef900a03df06336b3497cc6977cb0ef0eb9
---
M lib/wt2html/pegTokenizer.pegjs
1 file changed, 42 insertions(+), 41 deletions(-)

Approvals:
  Subramanya Sastry: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs
index b4bd3e0..f487a3c 100644
--- a/lib/wt2html/pegTokenizer.pegjs
+++ b/lib/wt2html/pegTokenizer.pegjs
@@ -76,6 +76,24 @@
         }
     };
 
+    /* ------------------------------------------------------------------------
+     * Extension tags should be parsed with higher priority than anything else.
+     *
+     * The trick we use is to strip out the content inside a matching tag-pair
+     * and not tokenize it. The content, if it needs to parsed (for example,
+     * for <ref>, <*include*> tags), is parsed in a fresh tokenizer context
+     * which means any error correction that needs to happen is restricted to
+     * the scope of the extension content and doesn't spill over to the higher
+     * level.  Ex: <math><!--foo</math>.
+     *
+     * IGNORE: {{ this just balances the blocks in this comment for pegjs
+     *
+     * This trick also lets us prevent extension content (that don't accept WT)
+     * from being parsed as wikitext (Ex: <math>\frac{foo\frac{bar}}</math>)
+     * We don't want the "}}" being treated as a template closing tag and
+     * closing outer templates.
+     * --------------------------------------------------------------------- */
+
     var isXMLTag = function(name, block) {
         var lName = name.toLowerCase();
         var uName = name.toUpperCase();
@@ -1117,24 +1135,6 @@
     }
   / & { return stops.dec('pre'); }
 
-/* -----------------------------------------------------------------------
- * Extension tags should be parsed with higher priority than anything else.
- * The trick we use is to strip out the content inside a matching tag-pair
- * and not tokenize it. The content, if it needs to parsed (for example,
- * for <ref>, <*include*> tags), is parsed in a fresh tokenizer context
- * which means any error correction that needs to happen is restricted to
- * the scope of the extension content and doesn't spill over to the higher
- * level.  Ex: <math><!--foo</math>.
- *
- * This trick also lets us prevent extension content (that don't accept WT)
- * from being parsed as wikitext (Ex: <math>\frac{foo\frac{bar}}</math>)
- * We don't want the "}}" being treated as a template closing tag and closing
- * outer templates.
- * ----------------------------------------------------------------------- */
-
-xmlish_tag =
-    t:generic_tag & { return isXMLTag(t.name, false); } { return 
maybeExtensionTag(t); }
-
 /*
  * Nowiki treats anything inside it as plain text. It could thus also be
  * defined as an extension that returns its raw input text, possibly wrapped
@@ -1211,7 +1211,7 @@
 tag_name_chars = [^\t\n\v />\0]
 tag_name = $([A-Za-z] tag_name_chars*)
 
-generic_tag
+xmlish_tag
   = & {
       // By the time we get to `doTableStuff` in the php parser, we've already
       // safely encoded element attributes. See 55313f4e in core.
@@ -1221,13 +1221,14 @@
       return stops.push('tableCellArg', false);
     }
     "<"
-    end:"/"? name:tag_name
+    end:"/"? name:$(tn:tag_name & { return isXMLTag(tn, false); })
     attribs:generic_newline_attributes
     space_or_newline* // No need to preserve this -- canonicalize on RT via 
dirty diff
     selfclose:"/"?
     bad_ws:space* // No need to preserve this -- canonicalize on RT via dirty 
diff
     ">" {
         stops.pop('tableCellArg');
+
         var lcName = name.toLowerCase();
         var isVoidElt = Util.isVoidElement(lcName) ? true : null;
         // Support </br>
@@ -1248,9 +1249,27 @@
         if (broken || bad_ws.length > 0) {
             res.dataAttribs.brokenHTMLTag = true;
         }
-        return res;
+
+        return maybeExtensionTag(res);
     }
     / & { return stops.pop('tableCellArg'); }
+
+/*
+ * A variant of xmlish_tag, but also checks if the tag name is a block-level
+ * tag as defined in
+ * http://www.w3.org/TR/html5/syntax.html#tag-open-state and
+ * following paragraphs.
+ */
+block_tag
+  = "<" end:"/"?
+    name:$(tn:tag_name & { return isXMLTag(tn, true); })
+    attribs:generic_newline_attributes
+    space_or_newline*
+    selfclose:"/"?
+    ">" {
+      var t = tu.buildXMLTag(name, name.toLowerCase(), attribs, end, 
selfclose, tsrOffsets());
+      return [maybeExtensionTag(t)];
+    }
 
 // A generic attribute that can span multiple lines.
 generic_newline_attribute
@@ -1333,7 +1352,7 @@
               // Accept insane tags-inside-attributes as attribute names.
               // The sanitizer will strip and shadow them for roundtripping.
               // Example: <hiddentext>generated with.. </hiddentext>
-              / &generic_tag nb:nested_block_line
+              / &xmlish_tag nb:nested_block_line
                 // `nested_block_line` can return zero or more blocks.
                 // Assure that we've got at least one, otherwise that plus
                 // below is trouble.
@@ -1371,24 +1390,6 @@
   / s:$space* t:table_attribute_preprocessor_text &(space_or_newline/ eof / 
'!!' / '|') {
       return tu.getAttrVal(t, startOffset() + s.length, endOffset());
     }
-
-/*
- * A variant of generic_tag, but also checks if the tag name is a block-level
- * tag as defined in
- * http://www.w3.org/TR/html5/syntax.html#tag-open-state and
- * following paragraphs.
- */
-block_tag
-  = "<" end:"/"?
-    name:$(tn:tag_name & { return isXMLTag(tn, true); })
-    attribs:generic_newline_attributes
-    space_or_newline*
-    selfclose:"/"?
-    ">" {
-      var t = tu.buildXMLTag(name, name.toLowerCase(), attribs, end, 
selfclose, tsrOffsets());
-      return [maybeExtensionTag(t)];
-    }
-
 
 /*********************************************************
  *   Lists
@@ -1915,7 +1916,7 @@
 
 /**
  * noinclude / includeonly / onlyinclude rules. These are normally
- * handled by the generic_tag rule, except where generic tags are not
+ * handled by the xmlish_tag rule, except where generic tags are not
  * allowed- for example in directives, which are allowed in various attribute
  * names and -values.
  *

-- 
To view, visit https://gerrit.wikimedia.org/r/328434
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ic16d5ef900a03df06336b3497cc6977cb0ef0eb9
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <abrea...@wikimedia.org>
Gerrit-Reviewer: C. Scott Ananian <canan...@wikimedia.org>
Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: Tim Starling <tstarl...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] mediawiki...parsoid[master]: Get rid of the generic_tag rule

Reply via email to