Eflyjason has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/399179 )

Change subject: Create a Python Pywikibot script to download Wikimedia database 
dump
......................................................................

Create a Python Pywikibot script to download Wikimedia database dump

Bug: T123885
Change-Id: I3f2bad7a4bfa622017765958c3f7d6bcc9b42105
---
A scripts/download_dump.py
M scripts/i18n
2 files changed, 143 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/79/399179/1

diff --git a/scripts/download_dump.py b/scripts/download_dump.py
new file mode 100644
index 0000000..15346ee
--- /dev/null
+++ b/scripts/download_dump.py
@@ -0,0 +1,142 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+This bot downloads dump from dumps.wikimedia.org.
+
+This script understands the following command - line arguments:
+
+&params;
+
+Furthermore, the following command line parameters are supported:
+
+    -hours:#        Use this parameter if to make the script repeat itself
+                    after  # hours. Hours can be defined as a decimal. 0.01
+                    hours are 36 seconds; 0.1 are 6 minutes.
+
+    -wikiname:#     The name of the wiki (e.g. frwiki).
+
+    -filename:#     The name of the file (e.g. abstract.xml)
+
+    -storepath:#    The stored file's path.
+
+"""
+#
+# (C) Yifei He, 2017
+#
+# Distributed under the terms of the MIT license.
+#
+from __future__ import absolute_import, division, unicode_literals
+
+import datetime
+import os.path
+import requests
+import time
+from shutil import copyfile
+
+import pywikibot
+
+from pywikibot import i18n, Bot, pagegenerators
+
+# This is required for the text that is shown when you run this script
+# with the parameter -help.
+docuReplacements = {
+    '&params;': pagegenerators.parameterHelp,
+}
+
+class DownloadDumpBot(Bot):
+
+    """Download dump bot."""
+
+    availableOptions = {
+        'hours': 1,
+        'no_repeat': True,
+        'wikiname': '',
+        'filename': '',
+        'storepath': '',
+    }
+
+    def __init__(self, **kwargs):
+        """Constructor."""
+        super(DownloadDumpBot, self).__init__(**kwargs)
+
+    def run(self):
+        """Run bot."""
+        while True:
+            wait = False
+            now = time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime())
+
+            download_filename = self.getOption('wikiname') + '-latest-' + 
self.getOption('filename')
+            download_path = self.getOption('wikiname') + '/latest/' + 
download_filename
+
+            # https://wikitech.wikimedia.org/wiki/Help:Toolforge#Dumps
+            toolforge_dump_path = '/public/dumps/public/' + download_path
+            if os.path.isfile(toolforge_dump_path):
+                copyfile(toolforge_dump_path, self.getOption('storepath') + 
download_filename)
+                pywikibot.output('Copying file from ' + toolforge_dump_path)
+            else:
+                url = 'https://dumps.wikimedia.org/' + download_path
+
+                response = requests.get(url)
+                pywikibot.output('Downloading file from ' + url)
+
+                if response.status_code == 200:
+                    with open(self.getOption('storepath') + download_filename, 
'wb') as f:
+                        f.write(response.content)
+                else:
+                    response.raise_for_status()
+
+            if self.getOption('no_repeat'):
+                pywikibot.output('Done.')
+                return
+            elif not wait:
+                if self.getOption('hours') < 1.0:
+                    pywikibot.output('Sleeping {0} minutes, now {1}'.format(
+                        (self.getOption('hours') * 60), now))
+                else:
+                    pywikibot.output('Sleeping {0} hours, now {1}'.format(
+                        self.getOption('hours'), now))
+                time.sleep(self.getOption('hours') * 60 * 60)
+
+
+def main(*args):
+    """
+    Process command line arguments and invoke bot.
+
+    If args is an empty list, sys.argv is used.
+
+    @param args: command line arguments
+    @type args: list of unicode
+    """
+    opts = {}
+    local_args = pywikibot.handle_args(args)
+    gen_factory = pagegenerators.GeneratorFactory()
+    for arg in local_args:
+        if arg.startswith('-hours:'):
+            opts['hours'] = float(arg[len('-hours:'):])
+            opts['no_repeat'] = False
+        elif arg.startswith('-wikiname'):
+            if len(arg) == len('-wikiname'):
+                opts['wikiname'] = pywikibot.input(u'Enter the wiki name:')
+            else:
+                opts['wikiname'] = arg[len('-wikiname:'):]
+        elif arg.startswith('-filename'):
+            if len(arg) == len('-filename'):
+                opts['filename'] = pywikibot.input(u'Enter the filename:')
+            else:
+                opts['filename'] = arg[len('-filename:'):]
+        elif arg.startswith('-storepath'):
+            if len(arg) == len('-storepath'):
+                opts['storepath'] = pywikibot.input(u'Enter the store path:')
+            else:
+                opts['storepath'] = arg[len('-storepath:'):]
+        else:
+            gen_factory.handleArg(arg)
+
+    generator = gen_factory.getCombinedGenerator(preload=True)
+
+    bot = DownloadDumpBot(generator=generator, **opts)
+    bot.run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/i18n b/scripts/i18n
index dbd96c5..3e74e89 160000
--- a/scripts/i18n
+++ b/scripts/i18n
@@ -1 +1 @@
-Subproject commit dbd96c5bcc3d1a25602b88f6d31230cf5c62ea43
+Subproject commit 3e74e8925affa9613b5ab4527ebb1d4331ae8c47

-- 
To view, visit https://gerrit.wikimedia.org/r/399179
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I3f2bad7a4bfa622017765958c3f7d6bcc9b42105
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Eflyjason <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to