jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/329503 )
Change subject: Port table2wiki to core
..
Port table2wiki to core
Script table2wiki is ported from compat to core
Bug: T115104
Change-Id: I6aee53160d9b0f1d1e98cf780fe641752ae2f057
---
A scripts/table2wiki.py
1 file changed, 604 insertions(+), 0 deletions(-)
Approvals:
jenkins-bot: Verified
Whym: Looks good to me, but someone else must approve
Xqt: Looks good to me, approved
diff --git a/scripts/table2wiki.py b/scripts/table2wiki.py
new file mode 100644
index 000..3f432cd
--- /dev/null
+++ b/scripts/table2wiki.py
@@ -0,0 +1,604 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Nifty script to convert HTML-tables to MediaWiki's own syntax.
+
+These command line parameters can be used to specify which pages to work on:
+
+
+
+-xml Retrieve information from a local XML dump (pages_current,
see
+ http://download.wikimedia.org).
+ Argument can also be given as "-xml:filename".
+ Searches for pages with HTML tables, and tries to convert
them
+ on the live wiki.
+
+-sql Retrieve information from a local mirror.
+ Searches for pages with HTML tables, and tries to convert
them
+ on the live wiki.
+
+-namespace:n Number or name of namespace to process. The parameter can be
+ used multiple times. It works in combination with all other
+ parameters, except for the -start parameter. If you e.g.
+ want to iterate over all categories starting at M, use
+ -start:Category:M.
+
+This SQL query can be used to find pages to work on:
+
+ SELECT CONCAT('[[', cur_title, ']]')
+ FROM cur
+ WHERE (cur_text LIKE '%
+# (C) Pywikibot team, 2003-2013
+#
+# Distributed under the terms of the MIT license.
+#
+# Automatically ported from compat branch by compat2core.py script
+#
+from __future__ import absolute_import, unicode_literals
+
+__version__ = '$Id$'
+#
+
+import re
+
+import pywikibot
+from pywikibot import config
+from pywikibot import i18n
+from pywikibot import pagegenerators
+from pywikibot import xmlreader
+
+# This is required for the text that is shown when you run this script
+# with the parameter -help.
+docuReplacements = {
+'': pagegenerators.parameterHelp,
+}
+
+
+class TableXmlDumpPageGenerator(object):
+"""A page generator that will yield all pages that seem to contain an HTML
table."""
+
+def __init__(self, xmlfilename):
+"""Constructor."""
+self.xmldump = xmlreader.XmlDump(xmlfilename)
+
+def __iter__(self):
+tableTagR = re.compile('
Phantom42 has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/329503 )
Change subject: Port table2wiki to core
..
Port table2wiki to core
Script table2wiki is ported from compat to core
Bug: T115104
Change-Id: I6aee53160d9b0f1d1e98cf780fe641752ae2f057
---
A scripts/table2wiki.py
1 file changed, 586 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/03/329503/1
diff --git a/scripts/table2wiki.py b/scripts/table2wiki.py
new file mode 100644
index 000..f776feb
--- /dev/null
+++ b/scripts/table2wiki.py
@@ -0,0 +1,586 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Nifty script to convert HTML-tables to MediaWiki's own syntax.
+
+These command line parameters can be used to specify which pages to work on:
+
+
+
+-xml Retrieve information from a local XML dump (pages_current,
see
+ http://download.wikimedia.org).
+ Argument can also be given as "-xml:filename".
+ Searches for pages with HTML tables, and tries to convert
them
+ on the live wiki.
+
+-sql Retrieve information from a local mirror.
+ Searches for pages with HTML tables, and tries to convert
them
+ on the live wiki.
+
+-namespace:n Number or name of namespace to process. The parameter can be
+ used multiple times. It works in combination with all other
+ parameters, except for the -start parameter. If you e.g.
+ want to iterate over all categories starting at M, use
+ -start:Category:M.
+
+This SQL query can be used to find pages to work on:
+
+ SELECT CONCAT('[[', cur_title, ']]')
+ FROM cur
+ WHERE (cur_text LIKE '%
+# (C) Pywikibot team, 2003-2013
+#
+# Distributed under the terms of the MIT license.
+#
+# Automatically ported from compat branch by compat2core.py script
+#
+__version__ = '$Id$'
+#
+
+import re
+
+import pywikibot
+from pywikibot import i18n
+from pywikibot import config
+from pywikibot import pagegenerators
+
+# This is required for the text that is shown when you run this script
+# with the parameter -help.
+docuReplacements = {
+'': pagegenerators.parameterHelp,
+}
+
+
+class TableXmlDumpPageGenerator:
+'''
+A page generator that will yield all pages that seem to contain an HTML
+table.
+'''
+def __init__(self, xmlfilename):
+import xmlreader
+self.xmldump = xmlreader.XmlDump(xmlfilename)
+
+def __iter__(self):
+tableTagR = re.compile('