Lokal Profil has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/378800 )
Change subject: Add mechanism for storing wikipage locally instead of writing
to wiki
......................................................................
Add mechanism for storing wikipage locally instead of writing to wiki
This allows e.g. local (docker) testing of page saving outputs and unit testing
of report writing functions.
Right now the trigger for this is the command line argument
"-local_write:<path to dir of output files>" but a better solution would be
an environment variable detected by common.save_to_wiki_or_local().
Support for this command line argument is added to all erfgoedbot scripts
apart from add_coord_to_articles.py (which relies on user interaction).
Also:
* Minor cleanup of add_object_location to make linter cry less
Bug: T174614
Change-Id: I2bf650f99a57a0e93dbb0c3d6f520049c3579957
---
M README.md
M erfgoedbot/add_object_location_monuments.py
M erfgoedbot/categorize_images.py
M erfgoedbot/common.py
M erfgoedbot/database_statistics.py
M erfgoedbot/images_of_monuments_without_id.py
M erfgoedbot/missing_commonscat_links.py
M erfgoedbot/populate_image_table.py
M erfgoedbot/unused_monument_images.py
M erfgoedbot/update_database.py
M requirements.txt
M tests/test_common.py
12 files changed, 308 insertions(+), 98 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage
refs/changes/00/378800/1
diff --git a/README.md b/README.md
index 50b5352..98f3ba0 100644
--- a/README.md
+++ b/README.md
@@ -22,9 +22,10 @@
# Build and start the Docker containers
docker-compose -f docker-compose-bot.yml up --build -d
+mkdir -p docker_pages
# Run the bot to harvest a country
-docker-compose -f docker-compose-bot.yml run --rm bot python
erfgoedbot/update_database.py -countrycode:ge -langcode:ka -log
+docker-compose -f docker-compose-bot.yml run --rm bot python
erfgoedbot/update_database.py -countrycode:ge -langcode:ka -log
-local_write:docker_pages
# Update the monuments_all table
docker-compose -f docker-compose-bot.yml run --rm db mysql -h db
s51138__heritage_p --user=heritage --password=password <
erfgoedbot/sql/fill_table_monuments_all.sql
diff --git a/erfgoedbot/add_object_location_monuments.py
b/erfgoedbot/add_object_location_monuments.py
index 1ffc23f..25816f4 100644
--- a/erfgoedbot/add_object_location_monuments.py
+++ b/erfgoedbot/add_object_location_monuments.py
@@ -6,9 +6,9 @@
'''
import pywikibot
-from pywikibot import pagegenerators
import monuments_config as mconfig
+import common as common
from database_connection import (
close_database_connection,
connect_to_monuments_database,
@@ -16,7 +16,8 @@
)
-def locateCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
cursor2):
+def locateCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
+ cursor2, local_write):
'''
Locate images in a single country.
'''
@@ -28,7 +29,7 @@
locationTemplate = locateImage(
page, monumentId, countrycode, lang, countryconfig, conn, cursor)
if locationTemplate:
- addLocation(page, locationTemplate)
+ addLocation(page, locationTemplate, local_write)
def getMonumentsWithoutLocation(countryconfig, conn2, cursor2):
@@ -91,7 +92,10 @@
# not already a template on the page.
templates = page.templates()
- if u'Location' in page.templates() or u'Location dec' in page.templates()
or u'Object location' in page.templates() or u'Object location dec' in
page.templates():
+ if (u'Location' in templates or
+ u'Location dec' in templates or
+ u'Object location' in templates or
+ u'Object location dec' in templates):
pywikibot.output(
u'Location template already found at: %s' % page.title())
return False
@@ -106,8 +110,6 @@
'''
Get coordinates from the erfgoed database
'''
- result = None
-
query = u"""SELECT lat, lon, source FROM monuments_all
WHERE id=%s
AND country=%s
@@ -126,7 +128,7 @@
return False
-def addLocation(page, locationTemplate):
+def addLocation(page, locationTemplate, local_write):
try:
oldtext = page.get()
except pywikibot.NoPage:
@@ -139,7 +141,8 @@
newtext = putAfterTemplate(
oldtext, u'Information', locationTemplate, loose=True)
pywikibot.showDiff(oldtext, newtext)
- page.put(newtext, comment)
+ common.save_to_wiki_or_local(page, comment, newtext,
+ local_path=local_write)
def putAfterTemplate(oldtext, template, toadd, loose=True):
@@ -191,7 +194,7 @@
newtext = oldtext
cats = pywikibot.getCategoryLinks(newtext)
ll = pywikibot.getLanguageLinks(newtext)
- nextext = pywikibot.removeLanguageLinks(newtext)
+ newtext = pywikibot.removeLanguageLinks(newtext)
newtext = pywikibot.removeCategoryLinks(newtext)
newtext += u'\n' + toadd
newtext = pywikibot.replaceCategoryLinks(newtext, cats)
@@ -203,13 +206,11 @@
def main():
countrycode = u''
lang = u''
+ local_write = None
# Connect database, we need that
(conn, cursor) = connect_to_monuments_database()
(conn2, cursor2) = connect_to_commons_database()
-
- generator = None
- genFactory = pagegenerators.GeneratorFactory()
for arg in pywikibot.handleArgs():
option, sep, value = arg.partition(':')
@@ -217,10 +218,12 @@
countrycode = value
elif option == '-langcode':
lang = value
+ elif option == '-local_write':
+ local_write = value
else:
raise Exception(
- u'Bad parameters. Expected "-countrycode", "-langcode" or '
- u'pywikibot args. Found "{}"'.format(option))
+ u'Bad parameters. Expected "-countrycode", "-langcode", '
+ u'"-local_write" or pywikibot args. Found "{}"'.format(option))
pywikibot.setSite(pywikibot.getSite(u'commons', u'commons'))
@@ -232,7 +235,7 @@
pywikibot.output(
u'Working on countrycode "%s" in language "%s"' % (countrycode,
lang))
locateCountry(countrycode, lang, mconfig.countries.get(
- (countrycode, lang)), conn, cursor, conn2, cursor2)
+ (countrycode, lang)), conn, cursor, conn2, cursor2, local_write)
elif countrycode or lang:
raise Exception(u'The "countrycode" and "langcode" arguments must '
u'be used together.')
@@ -245,7 +248,8 @@
pywikibot.output(
u'Working on countrycode "%s" in language "%s"' %
(countrycode, lang))
locateCountry(
- countrycode, lang, countryconfig, conn, cursor, conn2,
cursor2)
+ countrycode, lang, countryconfig, conn, cursor, conn2,
+ cursor2, local_write)
close_database_connection(conn, cursor)
diff --git a/erfgoedbot/categorize_images.py b/erfgoedbot/categorize_images.py
index 1705d56..06c7b71 100644
--- a/erfgoedbot/categorize_images.py
+++ b/erfgoedbot/categorize_images.py
@@ -64,7 +64,7 @@
def categorizeImage(
countrycode, lang, commonsTemplateName, commonsCategoryBase,
- commonsCatTemplates, page, conn, cursor, harvest_type):
+ commonsCatTemplates, page, conn, cursor, harvest_type, local_write):
pywikibot.log(u'Working on: %s' % page.title())
site = pywikibot.Site(u'commons', u'commons')
commonsTemplate = pywikibot.Page(site, 'Template:%s' % commonsTemplateName)
@@ -113,7 +113,9 @@
if newcats:
comment = u'Adding categories based on [[Template:%s]] with identifier
%s (method %s)' % (
commonsTemplateName, monumentId, categorisation_method)
- replace_default_cat_with_new_categories_in_image(page,
commonsCategoryBase, newcats, comment, verbose=True)
+ replace_default_cat_with_new_categories_in_image(
+ page, commonsCategoryBase, newcats, comment, local_write,
+ verbose=True)
else:
pywikibot.log(u'Categories not found for %s' % page.title())
@@ -201,7 +203,8 @@
def replace_default_cat_with_new_categories_in_image(
- page, base_category, new_categories, comment, verbose=False):
+ page, base_category, new_categories, comment, local_write,
+ verbose=False):
old_text = page.get()
old_categories = list(page.categories())
@@ -217,7 +220,8 @@
if verbose:
pywikibot.showDiff(old_text, final_text)
try:
- page.put(final_text, comment)
+ common.save_to_wiki_or_local(
+ page, comment, final_text, local_path=local_write)
return True
except pywikibot.EditConflict:
pywikibot.log(
@@ -402,7 +406,8 @@
raise NoCommonsCatFromWikidataItemException(page)
-def processCountry(countrycode, lang, countryconfig, commonsCatTemplates,
conn, cursor, overridecat=None):
+def processCountry(countrycode, lang, countryconfig, commonsCatTemplates, conn,
+ cursor, local_write, overridecat=None):
'''
Work on a single country.
'''
@@ -443,17 +448,21 @@
if not totalImages >= 10000:
success = categorizeImage(
countrycode, lang, commonsTemplate, commonsCategoryBase,
- commonsCatTemplates, page, conn, cursor, harvest_type)
+ commonsCatTemplates, page, conn, cursor, harvest_type,
+ local_write)
if success:
categorizedImages += 1
return (countrycode, lang, commonsCategoryBase.title(withNamespace=False),
commonsTemplate, totalImages, categorizedImages)
-def outputStatistics(statistics):
- '''
- Output the results of the bot as a nice wikitable
- '''
+def outputStatistics(statistics, local_write):
+ """
+ Output the results of the bot as a nice wikitable.
+
+ @param statistics: statistics to output
+ @param local_write: where to locally create the output if not saved to wiki
+ """
output = u'{| class="wikitable sortable"\n'
output += \
u'! country !! [[:en:List of ISO 639-1 codes|lang]] !! Base category
!! Template !! data-sort-type="number"|Total images !!
data-sort-type="number"|Categorized images !! data-sort-type="number"|Images
left !! data-sort-type="number"|Current image count\n'
@@ -489,10 +498,11 @@
site = pywikibot.Site('commons', 'commons')
page = pywikibot.Page(
site, u'Commons:Monuments database/Categorization/Statistics')
-
- comment = u'Updating categorization statistics. Total: %s Categorized: %s
Leftover: %s' % (
- totalImages, categorizedImages, leftoverImages)
- page.put(newtext=output, comment=comment)
+ summary = (
+ u'Updating categorization statistics. '
+ u'Total: {0} Categorized: {1} Leftover: {2}'.format(
+ totalImages, categorizedImages, leftoverImages))
+ common.save_to_wiki_or_local(page, summary, output, local_path=local_write)
def getCommonscatTemplates(lang=None, project=None):
@@ -521,6 +531,7 @@
countrycode = u''
lang = u''
overridecat = u''
+ local_write = None
conn = None
cursor = None
# Connect database, we need that
@@ -534,10 +545,13 @@
lang = value
elif option == '-overridecat':
overridecat = value
+ elif option == '-local_write':
+ local_write = value
else:
raise Exception(
u'Bad parameters. Expected "-countrycode", "-langcode", '
- u'"-overridecat" or pywikibot args. Found "{}"'.format(option))
+ u'"-overridecat", "-local_write" or pywikibot args. '
+ u'Found "{}"'.format(option))
if countrycode and lang:
if not mconfig.countries.get((countrycode, lang)):
@@ -551,7 +565,7 @@
lang, countryconfig.get('project'))
# print commonsCatTemplates
processCountry(countrycode, lang, countryconfig, commonsCatTemplates,
- conn, cursor, overridecat=overridecat)
+ conn, cursor, local_write, overridecat=overridecat)
elif countrycode or lang:
raise Exception(u'The "countrycode" and "langcode" arguments must '
u'be used together.')
@@ -569,11 +583,12 @@
commonsCatTemplates = getCommonscatTemplates(
lang, countryconfig.get('project'))
result = processCountry(
- countrycode, lang, countryconfig, commonsCatTemplates, conn,
cursor)
+ countrycode, lang, countryconfig, commonsCatTemplates, conn,
+ cursor, local_write)
if result:
statistics.append(result)
- outputStatistics(statistics)
+ outputStatistics(statistics, local_write)
close_database_connection(conn, cursor)
diff --git a/erfgoedbot/common.py b/erfgoedbot/common.py
index 333409c..77a780e 100644
--- a/erfgoedbot/common.py
+++ b/erfgoedbot/common.py
@@ -1,8 +1,12 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Support library of commonly shared functions."""
-
+from __future__ import unicode_literals
+from builtins import open
+import os
import re
+import pywikibot
+from pywikibot.exceptions import OtherPageSaveError
def get_source_link(source, harvest_type=None, label=None):
@@ -50,8 +54,8 @@
commons is ('wikimedia', 'commons') rather than ('commons', 'commons').
@param source: the source value from the SQL table
- @harvest_type: the type of harvest from which the source was extracted.
- e.g. "sparql"
+ @param harvest_type: the type of harvest from which the source was
+ extracted, e.g. "sparql".
"""
site = None
page_name = None
@@ -60,7 +64,7 @@
return get_page_from_url(source)
except AttributeError:
raise ValueError(
- u'Could not find source list ({0})'.format(source))
+ 'Could not find source list ({0})'.format(source))
else:
supported_sites = ['wikipedia', 'wikivoyage', 'wikidata', 'wikimedia']
pattern = '\/\/(.+?)\.({0})\.org\/w\/index\.php\?title=(.+?)&'.format(
@@ -71,6 +75,53 @@
page_name = m.group(3)
except AttributeError:
raise ValueError(
- u'Could not find source list ({0})'.format(source))
+ 'Could not find source list ({0})'.format(source))
return (page_name, site)
+
+
+def save_to_wiki_or_local(page, summary, content, minorEdit=True,
+ local_path=None):
+ """
+ Save the content to the page on a given site or store it locally.
+
+ @param page: the pywikibot.Page to which the content should be written
+ @param content: the content to store
+ @param summary: the edit summary to save the content with
+ @param minorEdit: if the edit should be marked as minor (defaults to True)
+ @param local_path: path to where local file should be saved, leave empty
+ for writing to wiki.
+ """
+ if not isinstance(page, pywikibot.Page):
+ pywikibot.warning(
+ 'Could not save page {0} because it is not a Page '
+ 'instance.'.format(page))
+
+ if not local_path:
+ try:
+ page.put(newtext=content, summary=summary, minorEdit=minorEdit)
+ except OtherPageSaveError:
+ pywikibot.warning(
+ 'Could not save page {0} ({1})'.format(page, summary))
+ else:
+ filename = os.path.join(local_path, page_to_filename(page))
+ with open(filename, 'w', encoding='utf-8') as f:
+ f.write('#summary: {0}\n---------------\n'.format(summary))
+ f.write(unicode(content))
+
+
+def page_to_filename(page):
+ """
+ Create a standardised filename for a page.
+
+ The name takes the form [site][namespace]pagename.wiki where '/', ':' and
+ " " has been replaced by '_'. Namespace 0 is given as just '_'.
+
+ @param page: the pywikibot.Page for which to generate a filename.
+ """
+ site_str = str(page.site)
+ namespace_str = page.namespace().custom_prefix().rstrip(':') or '_'
+ pagename_str = page.title(as_filename=True, withNamespace=False)
+ filename = '[{site}][{ns}]{page}.wiki'.format(
+ site=site_str, ns=namespace_str, page=pagename_str)
+ return filename.replace(' ', '_').replace(':', '_')
diff --git a/erfgoedbot/database_statistics.py
b/erfgoedbot/database_statistics.py
index b13b55c..462de1d 100755
--- a/erfgoedbot/database_statistics.py
+++ b/erfgoedbot/database_statistics.py
@@ -7,6 +7,7 @@
'''
import pywikibot
+import common as common
from database_connection import (
close_database_connection,
connect_to_monuments_database
@@ -23,7 +24,7 @@
return count
-def outputStatistics(statistics):
+def outputStatistics(statistics, local_write):
'''
Output the statistics in wikitext on Commons
'''
@@ -203,11 +204,11 @@
output += u'|| %(source)s\n' % totals
output += u'|}\n'
+
site = pywikibot.Site('commons', 'commons')
page = pywikibot.Page(site, u'Commons:Monuments database/Statistics')
-
comment = u'Updating monument database statistics'
- page.put(newtext=output, comment=comment)
+ common.save_to_wiki_or_local(page, comment, output, local_path=local_write)
def getStatistics(country, language, conn, cursor):
@@ -307,13 +308,20 @@
def main():
- '''
- The main loop
- '''
-
+ """The main loop."""
+ local_write = None
conn = None
cursor = None
(conn, cursor) = connect_to_monuments_database()
+
+ for arg in pywikibot.handleArgs():
+ option, sep, value = arg.partition(':')
+ if option == '-local_write':
+ local_write = value
+ else:
+ raise Exception(
+ u'Bad parameters. Expected "-local_write" or '
+ u'pywikibot args. Found "{}"'.format(option))
statistics = {}
@@ -323,7 +331,7 @@
statistics[country][language] = getStatistics(
country, language, conn, cursor)
- outputStatistics(statistics)
+ outputStatistics(statistics, local_write)
close_database_connection(conn, cursor)
diff --git a/erfgoedbot/images_of_monuments_without_id.py
b/erfgoedbot/images_of_monuments_without_id.py
index 5413ea1..e521682 100644
--- a/erfgoedbot/images_of_monuments_without_id.py
+++ b/erfgoedbot/images_of_monuments_without_id.py
@@ -16,6 +16,7 @@
import pywikibot
import monuments_config as mconfig
+import common as common
from database_connection import (
close_database_connection,
connect_to_monuments_database,
@@ -23,7 +24,8 @@
)
-def processCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
cursor2):
+def processCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
+ cursor2, local_write):
'''
Work on a single country.
'''
@@ -59,7 +61,7 @@
# An image is in the category and is in the list of used images
if withPhoto.get(image):
added = addCommonsTemplate(
- image, commonsTemplate, withPhoto.get(image))
+ image, commonsTemplate, withPhoto.get(image), local_write)
if not added:
text += \
u'File:%s|<nowiki>{{%s|%s}}</nowiki>\n' % (
@@ -76,7 +78,7 @@
image not in withTemplate and \
image not in withoutTemplate:
added = addCommonsTemplate(
- image, commonsTemplate, withPhoto.get(image))
+ image, commonsTemplate, withPhoto.get(image), local_write)
if not added:
text += \
u'File:%s|<nowiki>{{%s|%s}}</nowiki>\n' % (
@@ -92,7 +94,8 @@
site = pywikibot.getSite(lang, project)
page = pywikibot.Page(site, imagesWithoutIdPage)
pywikibot.output(text)
- page.put(text, comment, minorEdit=False)
+ common.save_to_wiki_or_local(
+ page, comment, text, local_path=local_write, minorEdit=False)
def getMonumentsWithPhoto(countrycode, lang, countryconfig, conn, cursor):
@@ -166,7 +169,7 @@
return result
-def addCommonsTemplate(image, commonsTemplate, identifier):
+def addCommonsTemplate(image, commonsTemplate, identifier, local_write):
'''
Add the commonsTemplate with identifier to the image.
'''
@@ -184,13 +187,15 @@
comment = u'Adding template %s based on usage in list' % (commonsTemplate,)
pywikibot.showDiff(text, newtext)
- page.put(newtext, comment)
+ common.save_to_wiki_or_local(
+ page, comment, newtext, local_path=local_write)
return True
def main():
countrycode = u''
lang = u''
+ local_write = None
conn = None
cursor = None
# Connect database, we need that
@@ -203,10 +208,12 @@
countrycode = value
elif option == '-langcode':
lang = value
+ elif option == '-local_write':
+ local_write = value
else:
raise Exception(
- u'Bad parameters. Expected "-countrycode", "-langcode" or '
- u'pywikibot args. Found "{}"'.format(option))
+ u'Bad parameters. Expected "-countrycode", "-langcode", '
+ u'"-local_write" or pywikibot args. Found "{}"'.format(option))
if countrycode and lang:
if not mconfig.countries.get((countrycode, lang)):
@@ -216,7 +223,7 @@
pywikibot.output(
u'Working on countrycode "%s" in language "%s"' % (countrycode,
lang))
processCountry(countrycode, lang, mconfig.countries.get(
- (countrycode, lang)), conn, cursor, conn2, cursor2)
+ (countrycode, lang)), conn, cursor, conn2, cursor2, local_write)
elif countrycode or lang:
raise Exception(u'The "countrycode" and "langcode" arguments must '
u'be used together.')
@@ -225,7 +232,8 @@
pywikibot.output(
u'Working on countrycode "%s" in language "%s"' %
(countrycode, lang))
processCountry(
- countrycode, lang, countryconfig, conn, cursor, conn2, cursor2)
+ countrycode, lang, countryconfig, conn, cursor, conn2, cursor2,
+ local_write)
close_database_connection(conn, cursor)
diff --git a/erfgoedbot/missing_commonscat_links.py
b/erfgoedbot/missing_commonscat_links.py
index fd5898d..8b55048 100644
--- a/erfgoedbot/missing_commonscat_links.py
+++ b/erfgoedbot/missing_commonscat_links.py
@@ -16,6 +16,7 @@
import pywikibot
import monuments_config as mconfig
+import common as common
from database_connection import (
close_database_connection,
connect_to_monuments_database,
@@ -25,7 +26,8 @@
_logger = "missing_commonscat"
-def processCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
cursor2):
+def processCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
+ cursor2, local_write):
'''
Work on a single country.
'''
@@ -103,7 +105,7 @@
site = pywikibot.Site(lang, u'wikipedia')
page = pywikibot.Page(site, missingCommonscatPage)
pywikibot.debug(text, _logger)
- page.put(text, comment)
+ common.save_to_wiki_or_local(page, comment, text, local_path=local_write)
return totalCategories
@@ -166,7 +168,7 @@
return result
-def makeStatistics(mconfig, totals):
+def makeStatistics(mconfig, totals, local_write):
text = u'{| class="wikitable sortable"\n'
text += \
u'! country !! lang !! total !! page !! row template !! Commons
template\n'
@@ -195,12 +197,13 @@
comment = u'Updating missing commonscat links statistics. Total missing
links: %s' % totalCategories
pywikibot.debug(text, _logger)
- page.put(newtext=text, comment=comment)
+ common.save_to_wiki_or_local(page, comment, text, local_path=local_write)
def main():
countrycode = u''
lang = u''
+ local_write = None
conn = None
cursor = None
# Connect database, we need that
@@ -213,10 +216,12 @@
countrycode = value
elif option == '-langcode':
lang = value
+ elif option == '-local_write':
+ local_write = value
else:
raise Exception(
- u'Bad parameters. Expected "-countrycode", "-langcode" or '
- u'pywikibot args. Found "{}"'.format(option))
+ u'Bad parameters. Expected "-countrycode", "-langcode", '
+ u'"-local_write" or pywikibot args. Found "{}"'.format(option))
if countrycode and lang:
if not mconfig.countries.get((countrycode, lang)):
@@ -226,7 +231,7 @@
pywikibot.log(
u'Working on countrycode "%s" in language "%s"' % (countrycode,
lang))
processCountry(countrycode, lang, mconfig.countries.get(
- (countrycode, lang)), conn, cursor, conn2, cursor2)
+ (countrycode, lang)), conn, cursor, conn2, cursor2, local_write)
elif countrycode or lang:
raise Exception(u'The "countrycode" and "langcode" arguments must '
u'be used together.')
@@ -236,8 +241,9 @@
pywikibot.log(
u'Working on countrycode "%s" in language "%s"' %
(countrycode, lang))
totals[(countrycode, lang)] = processCountry(
- countrycode, lang, countryconfig, conn, cursor, conn2, cursor2)
- makeStatistics(mconfig, totals)
+ countrycode, lang, countryconfig, conn, cursor, conn2, cursor2,
+ local_write)
+ makeStatistics(mconfig, totals, local_write)
close_database_connection(conn, cursor)
diff --git a/erfgoedbot/populate_image_table.py
b/erfgoedbot/populate_image_table.py
index 5ceac15..0ca7b66 100644
--- a/erfgoedbot/populate_image_table.py
+++ b/erfgoedbot/populate_image_table.py
@@ -32,6 +32,7 @@
import pywikibot
import monuments_config as mconfig
+import common as common
from database_connection import (
close_database_connection,
connect_to_monuments_database,
@@ -185,7 +186,7 @@
(countrycode, monumentId, name, has_geolocation,))
-def makeStatistics(totals):
+def makeStatistics(totals, local_write):
"""Make statistics on the number of indexed images and put on Commons."""
text = u'{| class="wikitable sortable"\n'
text += \
@@ -214,16 +215,23 @@
comment = u'Updating indexed image statistics. Total indexed images: %s' %
totalImages
pywikibot.output(text)
- page.put(newtext=text, comment=comment)
+ common.save_to_wiki_or_local(page, comment, text, local_path=local_write)
def main():
countrycode = u''
+ local_write = None
for arg in pywikibot.handleArgs():
option, sep, value = arg.partition(':')
if option == '-countrycode':
countrycode = value
+ elif option == '-local_write':
+ local_write = value
+ else:
+ raise Exception(
+ u'Bad parameters. Expected "-countrycode", "-local_write" or '
+ u'pywikibot args. Found "{}"'.format(option))
if countrycode:
pywikibot.output(u'Working on countrycode "%s"' % (countrycode,))
@@ -248,7 +256,7 @@
% (len(sources),))
totals = processSources(sources)
- makeStatistics(totals)
+ makeStatistics(totals, local_write)
if __name__ == "__main__":
diff --git a/erfgoedbot/unused_monument_images.py
b/erfgoedbot/unused_monument_images.py
index 16e6ebe..07676d5 100644
--- a/erfgoedbot/unused_monument_images.py
+++ b/erfgoedbot/unused_monument_images.py
@@ -24,7 +24,7 @@
_logger = "unused_images"
-def processCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
cursor2):
+def processCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
cursor2, local_write):
'''
Work on a single country.
'''
@@ -107,7 +107,8 @@
site = pywikibot.Site(lang, project)
page = pywikibot.Page(site, unusedImagesPage)
pywikibot.debug(text, _logger)
- page.put(text, comment, minorEdit=False)
+ common.save_to_wiki_or_local(page, comment, text, local_path=local_write,
+ minorEdit=False)
return totalImages
@@ -161,7 +162,7 @@
return result
-def makeStatistics(mconfig, totals):
+def makeStatistics(mconfig, totals, local_write):
text = u'{| class="wikitable sortable"\n'
text += \
u'! country !! lang !! data-sort-type="number"|total !! page !! row
template !! Commons template\n'
@@ -190,12 +191,13 @@
comment = u'Updating unused image statistics. Total unused images: %s' %
totalImages
pywikibot.debug(text, _logger)
- page.put(newtext=text, comment=comment)
+ common.save_to_wiki_or_local(page, comment, text, local_path=local_write)
def main():
countrycode = u''
lang = u''
+ local_write = None
conn = None
cursor = None
# Connect database, we need that
@@ -208,10 +210,12 @@
countrycode = value
elif option == '-langcode':
lang = value
+ elif option == '-local_write':
+ local_write = value
else:
raise Exception(
- u'Bad parameters. Expected "-countrycode", "-langcode" or '
- u'pywikibot args. Found "{}"'.format(option))
+ u'Bad parameters. Expected "-countrycode", "-langcode", '
+ u'"-local_write" or pywikibot args. Found "{}"'.format(option))
if countrycode and lang:
if not mconfig.countries.get((countrycode, lang)):
@@ -221,7 +225,7 @@
pywikibot.log(
u'Working on countrycode "%s" in language "%s"' % (countrycode,
lang))
processCountry(countrycode, lang, mconfig.countries.get(
- (countrycode, lang)), conn, cursor, conn2, cursor2)
+ (countrycode, lang)), conn, cursor, conn2, cursor2, local_write)
elif countrycode or lang:
raise Exception(u'The "countrycode" and "langcode" arguments must '
u'be used together.')
@@ -231,8 +235,9 @@
pywikibot.log(
u'Working on countrycode "%s" in language "%s"' %
(countrycode, lang))
totals[(countrycode, lang)] = processCountry(
- countrycode, lang, countryconfig, conn, cursor, conn2, cursor2)
- makeStatistics(mconfig, totals)
+ countrycode, lang, countryconfig, conn, cursor, conn2, cursor2,
+ local_write)
+ makeStatistics(mconfig, totals, local_write)
close_database_connection(conn, cursor)
diff --git a/erfgoedbot/update_database.py b/erfgoedbot/update_database.py
index b124386..11a8f83 100755
--- a/erfgoedbot/update_database.py
+++ b/erfgoedbot/update_database.py
@@ -22,7 +22,6 @@
import pywikibot
import pywikibot.data.sparql
from pywikibot import pagegenerators
-from pywikibot.exceptions import OtherPageSaveError
import monuments_config as mconfig
import common as common
@@ -140,7 +139,7 @@
countryconfig.get('table'), field.get('conv')))
-def unknownFieldsStatistics(countryconfig, unknownFields):
+def unknownFieldsStatistics(countryconfig, unknownFields, local_write):
"""
Produce some unknown field statistics to debug.
@@ -148,8 +147,9 @@
"""
site = pywikibot.Site(u'commons', u'commons')
page = pywikibot.Page(
- site, u'Commons:Monuments database/Unknown fields/%s' % (
- countryconfig.get('table'),))
+ site, u'Commons:Monuments database/Unknown fields/{0}'.format(
+ countryconfig.get('table')))
+ summary = u'Updating the list of unknown fields'
text = u'{| class="wikitable sortable"\n'
text += u'! Field !! Count\n'
@@ -159,11 +159,8 @@
text += u'|}\n'
text += u'[[Category:Commons:Monuments database/Unknown fields]]'
- comment = u'Updating the list of unknown fields'
- try:
- page.put(text, comment)
- except OtherPageSaveError:
- pywikibot.warning("Could not save page %s (%s)" % (page, comment))
+
+ common.save_to_wiki_or_local(page, summary, text, local_path=local_write)
def updateMonument(contents, source, countryconfig, conn, cursor, sourcePage):
@@ -412,15 +409,18 @@
return unknownFields
-def processCountry(countryconfig, conn, cursor, fullUpdate, daysBack):
+def processCountry(countryconfig, conn, cursor, fullUpdate, daysBack,
+ local_write):
"""Process all the monuments of one country."""
if countryconfig.get('type') == 'sparql':
process_country_wikidata(countryconfig, conn, cursor)
else:
- process_country_list(countryconfig, conn, cursor, fullUpdate, daysBack)
+ process_country_list(countryconfig, conn, cursor, fullUpdate, daysBack,
+ local_write)
-def process_country_list(countryconfig, conn, cursor, fullUpdate, daysBack):
+def process_country_list(countryconfig, conn, cursor, fullUpdate, daysBack,
+ local_write):
"""Process all the monuments of one country using row templates."""
site = pywikibot.Site(countryconfig.get('lang'),
countryconfig.get('project'))
rowTemplate = pywikibot.Page(
@@ -457,7 +457,7 @@
conn, cursor, unknownFields=unknownFields)
try:
- unknownFieldsStatistics(countryconfig, unknownFields)
+ unknownFieldsStatistics(countryconfig, unknownFields, local_write)
except pywikibot.exceptions.PageSaveRelatedError as e:
pywikibot.warning(
'Could not update field statistics. Details below:\n{}'.format(e))
@@ -508,6 +508,7 @@
lang = u''
fullUpdate = True
skip_wd = False
+ local_write = None
daysBack = 2 # Default 2 days. Runs every night so can miss one night.
conn = None
cursor = None
@@ -525,11 +526,13 @@
fullUpdate = True
elif option == u'-skip_wd':
skip_wd = True
+ elif option == '-local_write':
+ local_write = value
else:
raise Exception(
u'Bad parameters. Expected "-countrycode", "-langcode", '
- u'"-daysback", "-fullupdate" or pywikibot args. '
- u'Found "{}"'.format(option))
+ u'"-daysback", "-fullupdate", "-skip_wd", "-local_write" or '
+ u'pywikibot args. Found "{}"'.format(option))
if countrycode and lang:
if not mconfig.countries.get((countrycode, lang)):
@@ -543,8 +546,8 @@
countrycode, lang))
try:
countryconfig = mconfig.countries.get((countrycode, lang))
- processCountry(countryconfig, conn, cursor,
- fullUpdate, daysBack)
+ processCountry(countryconfig, conn, cursor, fullUpdate,
+ daysBack, local_write)
except Exception, e:
pywikibot.error(
u"Unknown error occurred when processing country "
@@ -561,8 +564,8 @@
u'Working on countrycode "%s" in language "%s"' % (
countrycode, lang))
try:
- processCountry(countryconfig, conn, cursor,
- fullUpdate, daysBack)
+ processCountry(countryconfig, conn, cursor, fullUpdate,
+ daysBack, local_write)
except Exception, e:
pywikibot.error(
u"Unknown error occurred when processing country "
diff --git a/requirements.txt b/requirements.txt
index 6f0822c..b698dc0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+future
pywikibot==3.0.20170403
MySQL-python
requests
diff --git a/tests/test_common.py b/tests/test_common.py
index 2f016c0..0a1fe86 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -2,6 +2,9 @@
import unittest
import mock
+import tempfile
+import os
+import pywikibot
from erfgoedbot import common
@@ -73,3 +76,100 @@
self.mock_get_source.return_value = ('Q123', ('wikidata', 'www'))
result = common.get_source_link('a link', 'sparql', 'bar')
self.assertEquals(result, '[[:d:Q123|bar]]')
+
+
+class TestPageToFilename(unittest.TestCase):
+
+ def test_page_to_filename_commons(self):
+ site = pywikibot.Site('commons', 'commons')
+ page = pywikibot.Page(site, 'Foo')
+ self.assertEquals(
+ common.page_to_filename(page),
+ '[commons_commons][_]Foo.wiki'
+ )
+
+ def test_page_to_filename_wikipedia(self):
+ site = pywikibot.Site('en', 'wikipedia')
+ page = pywikibot.Page(site, 'Foo')
+ self.assertEquals(
+ common.page_to_filename(page),
+ '[wikipedia_en][_]Foo.wiki'
+ )
+
+ def test_page_to_filename_namespace(self):
+ site = pywikibot.Site('commons', 'commons')
+ page = pywikibot.Page(site, 'Template:Foo')
+ self.assertEquals(
+ common.page_to_filename(page),
+ '[commons_commons][Template]Foo.wiki'
+ )
+
+ def test_page_to_filename_subpage(self):
+ site = pywikibot.Site('commons', 'commons')
+ page = pywikibot.Page(site, 'Foo/Bar')
+ self.assertEquals(
+ common.page_to_filename(page),
+ '[commons_commons][_]Foo_Bar.wiki'
+ )
+
+ def test_page_to_filename_with_spaces(self):
+ site = pywikibot.Site('commons', 'commons')
+ page = pywikibot.Page(site, 'Foo bar')
+ self.assertEquals(
+ common.page_to_filename(page),
+ '[commons_commons][_]Foo_bar.wiki'
+ )
+
+
+class TestSaveToWikiOrLocal(unittest.TestCase):
+
+ def setUp(self):
+ site = pywikibot.Site('test', 'wikipedia')
+ self.page = pywikibot.Page(site, 'Foo')
+
+ patcher = mock.patch('erfgoedbot.common.page_to_filename')
+ self.mock_page_to_filename = patcher.start()
+ self.mock_page_to_filename.return_value = 'filename'
+ self.addCleanup(patcher.stop)
+
+ # Create a temporary file
+ self.test_outfile = tempfile.NamedTemporaryFile(delete=False)
+ patcher = mock.patch('erfgoedbot.common.os.path.join')
+ self.mock_join = patcher.start()
+ self.mock_join.return_value = self.test_outfile.name
+ self.addCleanup(patcher.stop)
+
+ # Ensure tests don't write
+ patcher = mock.patch('erfgoedbot.common.pywikibot.Page.put')
+ self.mock_page_put = patcher.start()
+ self.addCleanup(patcher.stop)
+
+ def tearDown(self):
+ # Closes and removes the file
+ os.remove(self.test_outfile.name)
+
+ def test_save_to_wiki_or_local_write_to_wiki(self):
+ summary = 'a summary'
+ content = 'The content'
+ local_path = None
+ common.save_to_wiki_or_local(self.page, summary, content,
+ local_path=local_path)
+ self.mock_page_put.assert_called_once_with(
+ newtext=content, summary=summary, minorEdit=True)
+ self.mock_page_to_filename.assert_not_called()
+ self.mock_join.assert_not_called()
+ self.assertEquals(self.test_outfile.read(), '')
+
+ def test_save_to_wiki_or_local_write_locally(self):
+ summary = 'a summary'
+ content = u'The content'
+ local_path = 'something'
+ common.save_to_wiki_or_local(self.page, summary, content,
+ local_path=local_path)
+ self.mock_page_put.assert_not_called()
+ self.mock_page_to_filename.assert_called_once_with(self.page)
+ self.mock_join.assert_called_once_with('something', 'filename')
+ self.assertEquals(
+ self.test_outfile.read(),
+ '#summary: a summary\n---------------\nThe content'
+ )
--
To view, visit https://gerrit.wikimedia.org/r/378800
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I2bf650f99a57a0e93dbb0c3d6f520049c3579957
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits