https://www.mediawiki.org/wiki/Special:Code/MediaWiki/112491
Revision: 112491
Author: gwicke
Date: 2012-02-27 16:51:20 +0000 (Mon, 27 Feb 2012)
Log Message:
-----------
Add martian-endtags regexp wrapper around dumpGrepper.
Modified Paths:
--------------
trunk/extensions/VisualEditor/tests/parser/dumpGrepper.js
Added Paths:
-----------
trunk/extensions/VisualEditor/tests/parser/dumpGrepPatterns/
trunk/extensions/VisualEditor/tests/parser/dumpGrepPatterns/martian-endtags.sh
Added:
trunk/extensions/VisualEditor/tests/parser/dumpGrepPatterns/martian-endtags.sh
===================================================================
---
trunk/extensions/VisualEditor/tests/parser/dumpGrepPatterns/martian-endtags.sh
(rev 0)
+++
trunk/extensions/VisualEditor/tests/parser/dumpGrepPatterns/martian-endtags.sh
2012-02-27 16:51:20 UTC (rev 112491)
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+# extension tag hooks enabled at en.wikipedia.org
+exts="categorytree|charinsert|gallery|hiero|imagemap|inputbox|math|nowiki|poem|pre|ref|references|source|syntaxhighlight|timeline"
+
+wiki="nowiki|includeonly|noinclude|onlyinclude"
+
+# just the html5 elements
+html5s="a|abbr|address|area|article|aside|audio|b|base|bdi|bdo|blockquote|body|br|button|canvas|caption|cite|code|col|colgroup|command|data|datalist|dd|del|details|dfn|div|dl|dt|em|embed|fieldset|figcaption|figure|footer|form|h1|h2|h3|h4|h5|h6|head|header|hgroup|hr|html|i|iframe|img|input|ins|kbd|keygen|label|legend|li|link|map|mark|menu|meta|meter|nav|noscript|object|ol|optgroup|option|output|p|param|pre|progress|q|rp|rt|ruby|s|samp|script|section|select|small|source|span|strong|style|sub|summary|sup|table|tbody|td|textarea|tfoot|th|thead|time|title|tr|track|u|ul|var|video|wbr"
+
+htmlold="center|font|tt"
+
+normaltags="$exts|$wiki|$html5s|$htmlold"
+
+#regexp="<(?!\/|$exts|$htmls)[^>]*>.*?<!--([^<]+|<(\/|$exts|$htmls)[^>]*>)*<\/(?!$exts|$htmls)[^>]*>"
+#regexp="<(?!/|$normaltags)[^&]+>[^&]+<!--[^&-]*</(?!$normaltags)((?!>).)+>"
+regexp="</(?=[a-z])(?!$normaltags)[^&]+>"
+#regexp="<(?!\/|$exts|$htmls)[^>]*>"
+
+#echo $regexp
+
+if [ -z "$1" ];then
+ echo "Usage: $0 <xmldump.gz>"
+ exit 1
+fi
+
+zcat $1 | node ../dumpGrepper.js -i "$regexp"
Property changes on:
trunk/extensions/VisualEditor/tests/parser/dumpGrepPatterns/martian-endtags.sh
___________________________________________________________________
Added: svn:executable
+ *
Added: svn:eol-style
+ native
Modified: trunk/extensions/VisualEditor/tests/parser/dumpGrepper.js
===================================================================
--- trunk/extensions/VisualEditor/tests/parser/dumpGrepper.js 2012-02-27
16:40:01 UTC (rev 112490)
+++ trunk/extensions/VisualEditor/tests/parser/dumpGrepper.js 2012-02-27
16:51:20 UTC (rev 112491)
@@ -29,8 +29,6 @@
}
} ).argv;
- console.log( argv );
-
var flags = '';
if(argv.i) {
flags += 'i';
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs