Eflyjason has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/399179 )
Change subject: Create a Python Pywikibot script to download Wikimedia database
dump
......................................................................
Create a Python Pywikibot script to download Wikimedia database dump
Bug: T123885
Change-Id: I3f2bad7a4bfa622017765958c3f7d6bcc9b42105
---
A scripts/download_dump.py
M scripts/i18n
2 files changed, 143 insertions(+), 1 deletion(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/79/399179/1
diff --git a/scripts/download_dump.py b/scripts/download_dump.py
new file mode 100644
index 0000000..15346ee
--- /dev/null
+++ b/scripts/download_dump.py
@@ -0,0 +1,142 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+This bot downloads dump from dumps.wikimedia.org.
+
+This script understands the following command - line arguments:
+
+¶ms;
+
+Furthermore, the following command line parameters are supported:
+
+ -hours:# Use this parameter if to make the script repeat itself
+ after # hours. Hours can be defined as a decimal. 0.01
+ hours are 36 seconds; 0.1 are 6 minutes.
+
+ -wikiname:# The name of the wiki (e.g. frwiki).
+
+ -filename:# The name of the file (e.g. abstract.xml)
+
+ -storepath:# The stored file's path.
+
+"""
+#
+# (C) Yifei He, 2017
+#
+# Distributed under the terms of the MIT license.
+#
+from __future__ import absolute_import, division, unicode_literals
+
+import datetime
+import os.path
+import requests
+import time
+from shutil import copyfile
+
+import pywikibot
+
+from pywikibot import i18n, Bot, pagegenerators
+
+# This is required for the text that is shown when you run this script
+# with the parameter -help.
+docuReplacements = {
+ '¶ms;': pagegenerators.parameterHelp,
+}
+
+class DownloadDumpBot(Bot):
+
+ """Download dump bot."""
+
+ availableOptions = {
+ 'hours': 1,
+ 'no_repeat': True,
+ 'wikiname': '',
+ 'filename': '',
+ 'storepath': '',
+ }
+
+ def __init__(self, **kwargs):
+ """Constructor."""
+ super(DownloadDumpBot, self).__init__(**kwargs)
+
+ def run(self):
+ """Run bot."""
+ while True:
+ wait = False
+ now = time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime())
+
+ download_filename = self.getOption('wikiname') + '-latest-' +
self.getOption('filename')
+ download_path = self.getOption('wikiname') + '/latest/' +
download_filename
+
+ # https://wikitech.wikimedia.org/wiki/Help:Toolforge#Dumps
+ toolforge_dump_path = '/public/dumps/public/' + download_path
+ if os.path.isfile(toolforge_dump_path):
+ copyfile(toolforge_dump_path, self.getOption('storepath') +
download_filename)
+ pywikibot.output('Copying file from ' + toolforge_dump_path)
+ else:
+ url = 'https://dumps.wikimedia.org/' + download_path
+
+ response = requests.get(url)
+ pywikibot.output('Downloading file from ' + url)
+
+ if response.status_code == 200:
+ with open(self.getOption('storepath') + download_filename,
'wb') as f:
+ f.write(response.content)
+ else:
+ response.raise_for_status()
+
+ if self.getOption('no_repeat'):
+ pywikibot.output('Done.')
+ return
+ elif not wait:
+ if self.getOption('hours') < 1.0:
+ pywikibot.output('Sleeping {0} minutes, now {1}'.format(
+ (self.getOption('hours') * 60), now))
+ else:
+ pywikibot.output('Sleeping {0} hours, now {1}'.format(
+ self.getOption('hours'), now))
+ time.sleep(self.getOption('hours') * 60 * 60)
+
+
+def main(*args):
+ """
+ Process command line arguments and invoke bot.
+
+ If args is an empty list, sys.argv is used.
+
+ @param args: command line arguments
+ @type args: list of unicode
+ """
+ opts = {}
+ local_args = pywikibot.handle_args(args)
+ gen_factory = pagegenerators.GeneratorFactory()
+ for arg in local_args:
+ if arg.startswith('-hours:'):
+ opts['hours'] = float(arg[len('-hours:'):])
+ opts['no_repeat'] = False
+ elif arg.startswith('-wikiname'):
+ if len(arg) == len('-wikiname'):
+ opts['wikiname'] = pywikibot.input(u'Enter the wiki name:')
+ else:
+ opts['wikiname'] = arg[len('-wikiname:'):]
+ elif arg.startswith('-filename'):
+ if len(arg) == len('-filename'):
+ opts['filename'] = pywikibot.input(u'Enter the filename:')
+ else:
+ opts['filename'] = arg[len('-filename:'):]
+ elif arg.startswith('-storepath'):
+ if len(arg) == len('-storepath'):
+ opts['storepath'] = pywikibot.input(u'Enter the store path:')
+ else:
+ opts['storepath'] = arg[len('-storepath:'):]
+ else:
+ gen_factory.handleArg(arg)
+
+ generator = gen_factory.getCombinedGenerator(preload=True)
+
+ bot = DownloadDumpBot(generator=generator, **opts)
+ bot.run()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/i18n b/scripts/i18n
index dbd96c5..3e74e89 160000
--- a/scripts/i18n
+++ b/scripts/i18n
@@ -1 +1 @@
-Subproject commit dbd96c5bcc3d1a25602b88f6d31230cf5c62ea43
+Subproject commit 3e74e8925affa9613b5ab4527ebb1d4331ae8c47
--
To view, visit https://gerrit.wikimedia.org/r/399179
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I3f2bad7a4bfa622017765958c3f7d6bcc9b42105
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Eflyjason <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits