jenkins-bot has submitted this change and it was merged.
Change subject: Change all linebreaks from Windows to Unix
......................................................................
Change all linebreaks from Windows to Unix
No other changes done at all
Change-Id: I92c81101f146652fc29f39b4ea2cadf988112207
---
M erfgoedbot/add_coord_to_articles.py
M erfgoedbot/add_object_location_monuments.py
M erfgoedbot/images_of_monuments_without_id.py
M erfgoedbot/missing_commonscat_links.py
M erfgoedbot/populate_image_table.py
M erfgoedbot/top_streets.py
6 files changed, 1,436 insertions(+), 1,436 deletions(-)
Approvals:
Multichill: Looks good to me, approved
jenkins-bot: Verified
diff --git a/erfgoedbot/add_coord_to_articles.py
b/erfgoedbot/add_coord_to_articles.py
index bf4f351..02bad27 100644
--- a/erfgoedbot/add_coord_to_articles.py
+++ b/erfgoedbot/add_coord_to_articles.py
@@ -1,329 +1,329 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-'''
-
-get coordinates from the Monuments database and
- add coordinate template to articles in Wikipedia
-
-@author Kentaur
-
-Usage:
-# loop through all countries
-python add_coord_to_articles.py
-
-# work on specific country-lang
-python add_coord_to_articles.py -countrycode:XX -lang:YY
-
-'''
-
-import os
-import monuments_config as mconfig
-import pywikibot
-import re, MySQLdb
-
-#coordinate templates for different language wikipedias
-wikiData = {
- ('et') : {
- 'coordTemplate' : 'Coordinate',
- #coordTemplateSyntax % (lat, lon, countrycode.upper() )
- 'coordTemplateSyntax' :
u'{{Coordinate|NS=%f|EW=%f|type=landmark|region=%s}}'
- },
- ('fr') : {
- 'coordTemplate' : 'coord',
-
#{{coord|52.51626|13.3777|type:landmark_region:DE|format=dms|display=title}}
- 'coordTemplateSyntax' :
u'{{coord|%f|%f|type:landmark_region=%s|format=dms|display=title}}'
- }
-}
-
-
-# "constants"
-
-# wikipedia article namespace
-WP_ARTICLE_NS = 0
-# wikipedia category namespace
-WP_CATEGORY_NS = 14
-# output debug messages
-DEBUG = True
-
-
-# classes
-
-class Monument:
-#Constructor with default arguments
- def __init__(self, id = None):
- self.id = id
- self.name = u''
- self.country = u''
- self.wikilang = u''
- self.article = u''
- self.lat = None
- self.lon = None
- self.source = u''
-
-
-# functions
-
-def connectMonDatabase():
- '''
- Connect to the monuments mysql database
- '''
- conn = MySQLdb.connect(host=mconfig.db_server, db=mconfig.db,
- read_default_file=os.path.expanduser("~/.my.cnf"),
- use_unicode=True, charset='utf8')
- cursor = conn.cursor()
- return (conn, cursor)
-
-def connectWikiDatabase(lang):
- '''
- Connect to the wiki database
- '''
- if (lang):
- hostName = lang + 'wiki.labsdb'
- dbName = lang + 'wiki_p'
- #coordDbName = 'u_dispenser_p'
- conn = MySQLdb.connect(host=hostName, db=dbName,
- read_default_file=os.path.expanduser("~/.my.cnf"),
- use_unicode=True, charset='utf8')
- cursor = conn.cursor()
- return (conn, cursor)
-
-def processCountry(countrycode, lang, countryconfig, coordconfig, connMon,
cursorMon):
- '''
- Work on a single country.
- '''
- if (not coordconfig or not coordconfig.get('coordTemplate')):
- # No template found, just skip.
- pywikibot.output(u'Language: %s has no coordTemplate set!' % lang)
- return False
-
- (connWiki, cursorWiki) = connectWikiDatabase(lang)
-
- withCoordinates = getMonumentsWithCoordinates(countrycode, lang, cursorMon)
-
- articleNames = []
- duplicateArticles = []
- monumentsWithArticle = []
-
- for aMonument in withCoordinates:
- article_name = u''
- result = re.match("\[\[(.+?)\|.+?\]\]", aMonument.name)
- if (result and result.group(1)):
- article_name = result.group(1)
-
- result = re.match("\[\[([^\|]+?)\]\]", aMonument.name)
- if (result and result.group(1)):
- article_name = result.group(1)
-
- if (article_name):
- if article_name not in duplicateArticles:
- if article_name in articleNames:
- duplicateArticles.append(article_name)
- articleNames.remove(article_name)
- for sMon in monumentsWithArticle:
- if (sMon.article == article_name):
- monumentsWithArticle.remove(sMon)
- break
- else:
- articleNames.append(article_name)
- aMonument.article = article_name
- monumentsWithArticle.append(aMonument)
-
- if len(duplicateArticles):
- pywikibot.output(u'Multiple references to following articles: %s in
monument lists! Skipped those.' % duplicateArticles[:])
-
- for aMonument in monumentsWithArticle:
- PageNs = WP_ARTICLE_NS
- followRedirect = True
- (retStatus, pageId, redirNs, redirTitle) =
getPageId(aMonument.article, connWiki, cursorWiki, PageNs, followRedirect)
- if (retStatus == 'FOLLOWED_REDIR' and redirNs == WP_ARTICLE_NS):
- aMonument.article = redirTitle
- if (pageId):
- if not hasCoordinates(pageId, lang, cursorWiki):
- addCoords(countrycode, lang, aMonument, coordconfig)
-
-
-
-def getMonumentsWithCoordinates(countrycode, lang, cursor):
- '''
- Get monuments with coordinates from monuments database for a certain
country/language combination.
- '''
- result = []
- query = """SELECT id, name, lat, lon, source FROM monuments_all
- WHERE lat<>0 AND lon<>0 AND country=%s AND lang=%s"""
- cursor.execute(query, (countrycode, lang))
-
- #result = cursor.fetchall ()
- while True:
- try:
- row = cursor.fetchone()
- aMon = Monument()
- aMon.country = countrycode
- aMon.wikilang = lang
- (aMon.id, aMon.name, aMon.lat, aMon.lon, aMon.source) = row
- result.append(aMon)
- except TypeError:
- break
-
- return result
-
-def hasCoordinates(pageId, lang, cursor):
- '''
- check if Article has Article coords in WP coords DB
- '''
-
- if (pageId and lang):
- coordTable = 'u_dispenser_p.coord_' + lang + 'wiki'
-
- # check if primary coordinate i.e. article coordinate exists for pageId
- query = """SELECT gc_from FROM %s
- WHERE (gc_from = %s AND gc_primary = 1)
- LIMIT 1"""
- # FIXME escape & sanitize coordTable and pageId
- cursor.execute(query % (coordTable, int(pageId) ))
-
- if (cursor.rowcount > 0):
- return True
- else:
- return False
- else:
- return False
-
-def getPageId(pageName, conn, cursor, pageNamespace = WP_ARTICLE_NS,
followRedirect = False):
- '''
- get Wikipedia pagename pageId
- '''
-
- #underscores
- pageName = pageName.replace(u' ', u'_')
- retStatus = ''
- pageId = ''
- redirNs = ''
- redirTitle = u''
-
- #FIXME page_titles like 'Château_de_Bercy' won't work, but titles like
'Käru' do ??
- query = """SELECT page_id, page_is_redirect FROM page
- WHERE page_namespace = %s AND page_title = %s"""
- cursor.execute(query, (pageNamespace, pageName))
- if DEBUG:
- print cursor._executed
- print u'rowcount: %d ' % cursor.rowcount
-
- if (cursor.rowcount > 0):
- row = cursor.fetchone()
- (pageId, IsRedirect) = row
- if (IsRedirect):
- if (followRedirect):
- (redirNs, redirTitle) = getRedirPageNsTitle(pageId, cursor)
- redirTitle = unicode(redirTitle, "utf-8")
- (dummy0, pageId, dummy1, dummy2) = getPageId(redirTitle, conn,
cursor, redirNs)
- retStatus = 'FOLLOWED_REDIR'
- else:
- retStatus = 'REDIRECT'
- else:
- retStatus = 'OK'
-
- return (retStatus, pageId, redirNs, redirTitle)
-
-def getRedirPageNsTitle(pageId, cursor):
- '''
- Get redirect page namespace and title.
- '''
-
- if (pageId):
- pageNs = ''
- pageTitle = u''
-
- query = """SELECT rd_namespace, rd_title FROM redirect
- WHERE rd_from = %s"""
- cursor.execute(query, (pageId,))
-
- if (cursor.rowcount > 0):
- row = cursor.fetchone()
- (pageNs, pageTitle) = row
-
- return (pageNs, pageTitle)
-
-
-def addCoords(countrycode, lang, monument, coordconfig):
- '''
- Add the coordinates to article.
- '''
-
- if (countrycode and lang):
- coordTemplate = coordconfig.get('coordTemplate')
- coordTemplateSyntax = coordconfig.get('coordTemplateSyntax')
- site = pywikibot.getSite(lang, 'wikipedia')
-
- page = pywikibot.Page(site, monument.article)
- try:
- text = page.get()
- except pywikibot.NoPage: # First except, prevent empty pages
- return False
- except pywikibot.IsRedirectPage: # second except, prevent redirect
- pywikibot.output(u'%s is a redirect!' % monument.article)
- return False
- except pywikibot.Error: # third exception, take the problem and print
- pywikibot.output(u"Some error, skipping..")
- return False
-
- if coordTemplate in page.templates():
- return False
-
- newtext = text
- replCount = 1
- coordText = coordTemplateSyntax % (monument.lat, monument.lon,
countrycode.upper() )
- localCatName = pywikibot.getSite().namespace(WP_CATEGORY_NS)
- catStart = r'\[\[(' + localCatName + '|Category):'
- catStartPlain = u'[[' + localCatName + ':'
- replacementText = u''
- replacementText = coordText + '\n\n' + catStartPlain
-
- # insert coordinate template before categories
- newtext = re.sub(catStart, replacementText, newtext, replCount,
flags=re.IGNORECASE)
-
- if text != newtext:
- wikilist = u''
- matchWikipage = re.search("title=(.+?)&", monument.source)
- if (matchWikipage and matchWikipage.group(1)):
- wikilist = matchWikipage.group(1)
- comment = u'Adding template %s based on [[%s]], # %s' %
(coordTemplate, wikilist, monument.id)
- pywikibot.showDiff(text, newtext)
- modPage = pywikibot.input(u'Modify page: %s ([y]/n) ?' %
(monument.article) )
- if (modPage.lower == 'y' or modPage == ''):
- page.put(newtext, comment)
- return True
- else:
- return False
- else:
- return False
-
-def main():
- countrycode = u''
- connMon = None
- cursorMon = None
-
- (connMon, cursorMon) = connectMonDatabase()
-
- for arg in pywikibot.handleArgs():
- option, sep, value = arg.partition(':')
- if option == '-countrycode:':
- countrycode = value
-
- if countrycode:
- lang = pywikibot.getSite().language()
- if not mconfig.countries.get((countrycode, lang)):
- pywikibot.output(u'I have no config for countrycode "%s" in
language "%s"' % (countrycode, lang))
- return False
- pywikibot.output(u'Working on countrycode "%s" in language "%s"' %
(countrycode, lang))
- processCountry(countrycode, lang, mconfig.countries.get((countrycode,
lang)), wikiData.get(lang), connMon, cursorMon)
- else:
- for (countrycode, lang), countryconfig in mconfig.countries.iteritems():
- pywikibot.output(u'Working on countrycode "%s" in language "%s"' %
(countrycode, lang))
- processCountry(countrycode, lang, countryconfig,
wikiData.get(lang), connMon, cursorMon)
-
-
-if __name__ == "__main__":
- try:
- main()
- finally:
- pywikibot.stopme()
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+'''
+
+get coordinates from the Monuments database and
+ add coordinate template to articles in Wikipedia
+
+@author Kentaur
+
+Usage:
+# loop through all countries
+python add_coord_to_articles.py
+
+# work on specific country-lang
+python add_coord_to_articles.py -countrycode:XX -lang:YY
+
+'''
+
+import os
+import monuments_config as mconfig
+import pywikibot
+import re, MySQLdb
+
+#coordinate templates for different language wikipedias
+wikiData = {
+ ('et') : {
+ 'coordTemplate' : 'Coordinate',
+ #coordTemplateSyntax % (lat, lon, countrycode.upper() )
+ 'coordTemplateSyntax' :
u'{{Coordinate|NS=%f|EW=%f|type=landmark|region=%s}}'
+ },
+ ('fr') : {
+ 'coordTemplate' : 'coord',
+
#{{coord|52.51626|13.3777|type:landmark_region:DE|format=dms|display=title}}
+ 'coordTemplateSyntax' :
u'{{coord|%f|%f|type:landmark_region=%s|format=dms|display=title}}'
+ }
+}
+
+
+# "constants"
+
+# wikipedia article namespace
+WP_ARTICLE_NS = 0
+# wikipedia category namespace
+WP_CATEGORY_NS = 14
+# output debug messages
+DEBUG = True
+
+
+# classes
+
+class Monument:
+#Constructor with default arguments
+ def __init__(self, id = None):
+ self.id = id
+ self.name = u''
+ self.country = u''
+ self.wikilang = u''
+ self.article = u''
+ self.lat = None
+ self.lon = None
+ self.source = u''
+
+
+# functions
+
+def connectMonDatabase():
+ '''
+ Connect to the monuments mysql database
+ '''
+ conn = MySQLdb.connect(host=mconfig.db_server, db=mconfig.db,
+ read_default_file=os.path.expanduser("~/.my.cnf"),
+ use_unicode=True, charset='utf8')
+ cursor = conn.cursor()
+ return (conn, cursor)
+
+def connectWikiDatabase(lang):
+ '''
+ Connect to the wiki database
+ '''
+ if (lang):
+ hostName = lang + 'wiki.labsdb'
+ dbName = lang + 'wiki_p'
+ #coordDbName = 'u_dispenser_p'
+ conn = MySQLdb.connect(host=hostName, db=dbName,
+ read_default_file=os.path.expanduser("~/.my.cnf"),
+ use_unicode=True, charset='utf8')
+ cursor = conn.cursor()
+ return (conn, cursor)
+
+def processCountry(countrycode, lang, countryconfig, coordconfig, connMon,
cursorMon):
+ '''
+ Work on a single country.
+ '''
+ if (not coordconfig or not coordconfig.get('coordTemplate')):
+ # No template found, just skip.
+ pywikibot.output(u'Language: %s has no coordTemplate set!' % lang)
+ return False
+
+ (connWiki, cursorWiki) = connectWikiDatabase(lang)
+
+ withCoordinates = getMonumentsWithCoordinates(countrycode, lang, cursorMon)
+
+ articleNames = []
+ duplicateArticles = []
+ monumentsWithArticle = []
+
+ for aMonument in withCoordinates:
+ article_name = u''
+ result = re.match("\[\[(.+?)\|.+?\]\]", aMonument.name)
+ if (result and result.group(1)):
+ article_name = result.group(1)
+
+ result = re.match("\[\[([^\|]+?)\]\]", aMonument.name)
+ if (result and result.group(1)):
+ article_name = result.group(1)
+
+ if (article_name):
+ if article_name not in duplicateArticles:
+ if article_name in articleNames:
+ duplicateArticles.append(article_name)
+ articleNames.remove(article_name)
+ for sMon in monumentsWithArticle:
+ if (sMon.article == article_name):
+ monumentsWithArticle.remove(sMon)
+ break
+ else:
+ articleNames.append(article_name)
+ aMonument.article = article_name
+ monumentsWithArticle.append(aMonument)
+
+ if len(duplicateArticles):
+ pywikibot.output(u'Multiple references to following articles: %s in
monument lists! Skipped those.' % duplicateArticles[:])
+
+ for aMonument in monumentsWithArticle:
+ PageNs = WP_ARTICLE_NS
+ followRedirect = True
+ (retStatus, pageId, redirNs, redirTitle) =
getPageId(aMonument.article, connWiki, cursorWiki, PageNs, followRedirect)
+ if (retStatus == 'FOLLOWED_REDIR' and redirNs == WP_ARTICLE_NS):
+ aMonument.article = redirTitle
+ if (pageId):
+ if not hasCoordinates(pageId, lang, cursorWiki):
+ addCoords(countrycode, lang, aMonument, coordconfig)
+
+
+
+def getMonumentsWithCoordinates(countrycode, lang, cursor):
+ '''
+ Get monuments with coordinates from monuments database for a certain
country/language combination.
+ '''
+ result = []
+ query = """SELECT id, name, lat, lon, source FROM monuments_all
+ WHERE lat<>0 AND lon<>0 AND country=%s AND lang=%s"""
+ cursor.execute(query, (countrycode, lang))
+
+ #result = cursor.fetchall ()
+ while True:
+ try:
+ row = cursor.fetchone()
+ aMon = Monument()
+ aMon.country = countrycode
+ aMon.wikilang = lang
+ (aMon.id, aMon.name, aMon.lat, aMon.lon, aMon.source) = row
+ result.append(aMon)
+ except TypeError:
+ break
+
+ return result
+
+def hasCoordinates(pageId, lang, cursor):
+ '''
+ check if Article has Article coords in WP coords DB
+ '''
+
+ if (pageId and lang):
+ coordTable = 'u_dispenser_p.coord_' + lang + 'wiki'
+
+ # check if primary coordinate i.e. article coordinate exists for pageId
+ query = """SELECT gc_from FROM %s
+ WHERE (gc_from = %s AND gc_primary = 1)
+ LIMIT 1"""
+ # FIXME escape & sanitize coordTable and pageId
+ cursor.execute(query % (coordTable, int(pageId) ))
+
+ if (cursor.rowcount > 0):
+ return True
+ else:
+ return False
+ else:
+ return False
+
+def getPageId(pageName, conn, cursor, pageNamespace = WP_ARTICLE_NS,
followRedirect = False):
+ '''
+ get Wikipedia pagename pageId
+ '''
+
+ #underscores
+ pageName = pageName.replace(u' ', u'_')
+ retStatus = ''
+ pageId = ''
+ redirNs = ''
+ redirTitle = u''
+
+ #FIXME page_titles like 'Château_de_Bercy' won't work, but titles like
'Käru' do ??
+ query = """SELECT page_id, page_is_redirect FROM page
+ WHERE page_namespace = %s AND page_title = %s"""
+ cursor.execute(query, (pageNamespace, pageName))
+ if DEBUG:
+ print cursor._executed
+ print u'rowcount: %d ' % cursor.rowcount
+
+ if (cursor.rowcount > 0):
+ row = cursor.fetchone()
+ (pageId, IsRedirect) = row
+ if (IsRedirect):
+ if (followRedirect):
+ (redirNs, redirTitle) = getRedirPageNsTitle(pageId, cursor)
+ redirTitle = unicode(redirTitle, "utf-8")
+ (dummy0, pageId, dummy1, dummy2) = getPageId(redirTitle, conn,
cursor, redirNs)
+ retStatus = 'FOLLOWED_REDIR'
+ else:
+ retStatus = 'REDIRECT'
+ else:
+ retStatus = 'OK'
+
+ return (retStatus, pageId, redirNs, redirTitle)
+
+def getRedirPageNsTitle(pageId, cursor):
+ '''
+ Get redirect page namespace and title.
+ '''
+
+ if (pageId):
+ pageNs = ''
+ pageTitle = u''
+
+ query = """SELECT rd_namespace, rd_title FROM redirect
+ WHERE rd_from = %s"""
+ cursor.execute(query, (pageId,))
+
+ if (cursor.rowcount > 0):
+ row = cursor.fetchone()
+ (pageNs, pageTitle) = row
+
+ return (pageNs, pageTitle)
+
+
+def addCoords(countrycode, lang, monument, coordconfig):
+ '''
+ Add the coordinates to article.
+ '''
+
+ if (countrycode and lang):
+ coordTemplate = coordconfig.get('coordTemplate')
+ coordTemplateSyntax = coordconfig.get('coordTemplateSyntax')
+ site = pywikibot.getSite(lang, 'wikipedia')
+
+ page = pywikibot.Page(site, monument.article)
+ try:
+ text = page.get()
+ except pywikibot.NoPage: # First except, prevent empty pages
+ return False
+ except pywikibot.IsRedirectPage: # second except, prevent redirect
+ pywikibot.output(u'%s is a redirect!' % monument.article)
+ return False
+ except pywikibot.Error: # third exception, take the problem and print
+ pywikibot.output(u"Some error, skipping..")
+ return False
+
+ if coordTemplate in page.templates():
+ return False
+
+ newtext = text
+ replCount = 1
+ coordText = coordTemplateSyntax % (monument.lat, monument.lon,
countrycode.upper() )
+ localCatName = pywikibot.getSite().namespace(WP_CATEGORY_NS)
+ catStart = r'\[\[(' + localCatName + '|Category):'
+ catStartPlain = u'[[' + localCatName + ':'
+ replacementText = u''
+ replacementText = coordText + '\n\n' + catStartPlain
+
+ # insert coordinate template before categories
+ newtext = re.sub(catStart, replacementText, newtext, replCount,
flags=re.IGNORECASE)
+
+ if text != newtext:
+ wikilist = u''
+ matchWikipage = re.search("title=(.+?)&", monument.source)
+ if (matchWikipage and matchWikipage.group(1)):
+ wikilist = matchWikipage.group(1)
+ comment = u'Adding template %s based on [[%s]], # %s' %
(coordTemplate, wikilist, monument.id)
+ pywikibot.showDiff(text, newtext)
+ modPage = pywikibot.input(u'Modify page: %s ([y]/n) ?' %
(monument.article) )
+ if (modPage.lower == 'y' or modPage == ''):
+ page.put(newtext, comment)
+ return True
+ else:
+ return False
+ else:
+ return False
+
+def main():
+ countrycode = u''
+ connMon = None
+ cursorMon = None
+
+ (connMon, cursorMon) = connectMonDatabase()
+
+ for arg in pywikibot.handleArgs():
+ option, sep, value = arg.partition(':')
+ if option == '-countrycode:':
+ countrycode = value
+
+ if countrycode:
+ lang = pywikibot.getSite().language()
+ if not mconfig.countries.get((countrycode, lang)):
+ pywikibot.output(u'I have no config for countrycode "%s" in
language "%s"' % (countrycode, lang))
+ return False
+ pywikibot.output(u'Working on countrycode "%s" in language "%s"' %
(countrycode, lang))
+ processCountry(countrycode, lang, mconfig.countries.get((countrycode,
lang)), wikiData.get(lang), connMon, cursorMon)
+ else:
+ for (countrycode, lang), countryconfig in mconfig.countries.iteritems():
+ pywikibot.output(u'Working on countrycode "%s" in language "%s"' %
(countrycode, lang))
+ processCountry(countrycode, lang, countryconfig,
wikiData.get(lang), connMon, cursorMon)
+
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ pywikibot.stopme()
diff --git a/erfgoedbot/add_object_location_monuments.py
b/erfgoedbot/add_object_location_monuments.py
index 9d8b40e..4b63073 100644
--- a/erfgoedbot/add_object_location_monuments.py
+++ b/erfgoedbot/add_object_location_monuments.py
@@ -1,261 +1,261 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-'''
-
-Bot to add {{Object location dec}} to monuments. Location is based on
information from the monuments database.
-
-'''
-import monuments_config as mconfig
-import pywikibot
-import config
-import pagegenerators
-import MySQLdb
-
-
-def connectDatabase():
- '''
- Connect to the monuments mysql database, if it fails, go down in flames
- '''
- conn = MySQLdb.connect(host=mconfig.db_server, db=mconfig.db,
user=config.db_username,
- passwd=config.db_password, use_unicode=True,
charset='utf8')
- cursor = conn.cursor()
- return (conn, cursor)
-
-
-def connectDatabase2():
- '''
- Connect to the commons mysql database, if it fails, go down in flames
- '''
- conn = MySQLdb.connect('commonswiki.labsdb', db='commonswiki_p',
- user=config.db_username, passwd=config.db_password,
use_unicode=True, charset='latin1')
- cursor = conn.cursor()
- return (conn, cursor)
-
-
-def locateCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
cursor2):
- '''
- Locate images in a single country.
- '''
- if not countryconfig.get('commonsTemplate') or not
countryconfig.get('commonsTrackerCategory'):
- # Not possible for this country. Silently return
- return False
-
- for (page, monumentId) in getMonumentsWithoutLocation(countryconfig,
conn2, cursor2):
- locationTemplate = locateImage(
- page, monumentId, countrycode, lang, countryconfig, conn, cursor)
- if locationTemplate:
- addLocation(page, locationTemplate)
-
-
-def getMonumentsWithoutLocation(countryconfig, conn2, cursor2):
- site = pywikibot.getSite(u'commons', u'commons')
- query = u"""SELECT page_title, cl_sortkey FROM page
-JOIN templatelinks ON page_id=tl_from
-JOIN categorylinks ON page_id=cl_from
-WHERE page_namespace=6 AND page_is_redirect=0
-AND tl_namespace=10 AND tl_title=%s
-AND cl_to=%s
-AND NOT EXISTS(
-SELECT * FROM categorylinks AS loccat
-WHERE page_id=loccat.cl_from
-AND loccat.cl_to='Media_with_locations') LIMIT 10000"""
- commonsTemplate = countryconfig.get('commonsTemplate').replace(u' ', u'_')
- commonsTrackerCategory = countryconfig.get(
- 'commonsTrackerCategory').replace(u' ', u'_')
-
- cursor2.execute(
- query, (commonsTemplate.encode('utf-8'),
commonsTrackerCategory.encode('utf-8')))
-
- while True:
- try:
- pageName, sortkey = cursor2.fetchone()
- except TypeError:
- # Nothing left
- break
- if pageName:
- page = pywikibot.Page(site, 'File:' + unicode(pageName, 'utf-8'))
- try:
- monumentId = unicode(sortkey, 'utf-8')
- # Just want the first line
- mLines = monumentId.splitlines()
- monumentId = mLines[0]
- # Remove leading and trailing spaces
- monumentId = monumentId.strip()
- # Remove leading zero's. FIXME: This should be replaced with
- # underscores
- monumentId = monumentId.lstrip(u'0')
- # Remove leading underscors.
- monumentId = monumentId.lstrip(u'_')
- yield (page, monumentId)
- except ValueError:
- pywikibot.output(u'Got value error for %s' % (monumentId,))
-
-
-def locateImage(page, monumentId, countrycode, lang, countryconfig, conn,
cursor):
- pywikibot.output(u'Working on: %s with id %s' % (page.title(), monumentId))
-
- # First check if the identifier returns something useful
- coordinates = getCoordinates(monumentId, countrycode, lang, conn, cursor)
- if not coordinates:
- pywikibot.output(
- u'File contains an unknown identifier: %s' % monumentId)
- return False
-
- (lat, lon, source) = coordinates
-
- # Ok. We know we have coordinates. Now check to be sure to see if there's
- # not already a template on the page.
- templates = page.templates()
-
- if u'Location' in page.templates() or u'Location dec' in page.templates()
or u'Object location' in page.templates() or u'Object location dec' in
page.templates():
- pywikibot.output(
- u'Location template already found at: %s' % page.title())
- return False
-
- locationTemplate = u'{{Object location
dec|%s|%s|region:%s_type:landmark_scale:1500}}<!-- Location from %s -->' % (
- lat, lon, countrycode.upper(), source)
-
- return locationTemplate
-
-
-def getCoordinates(monumentId, countrycode, lang, conn, cursor):
- '''
- Get coordinates from the erfgoed database
- '''
- result = None
-
- query = u"""SELECT lat, lon, source FROM monuments_all
-WHERE id=%s
-AND country=%s
-AND lang=%s
-AND NOT lat=0 AND NOT lon=0
-AND NOT lat='' AND NOT lon=''
-AND NOT lat IS NULL AND NOT lon IS NULL
-LIMIT 1"""
-
- cursor.execute(query, (monumentId, countrycode, lang,))
-
- try:
- row = cursor.fetchone()
- return row
- except TypeError:
- return False
-
-
-def addLocation(page, locationTemplate):
- try:
- oldtext = page.get()
- except pywikibot.NoPage:
- # For some reason we sometimes get a NoPage Exception
- pywikibot.output(u'No text found at %s. Skipping' % (page.title(),))
- return False
-
- comment = u'Adding object location based on monument identifier'
-
- newtext = putAfterTemplate(
- oldtext, u'Information', locationTemplate, loose=True)
- pywikibot.showDiff(oldtext, newtext)
- page.put(newtext, comment)
-
-
-def putAfterTemplate(oldtext, template, toadd, loose=True):
- '''
- Try to put text after template.
- If the template is not found return False if loose is set to False
- If loose is set to True: Remove interwiki's, categories, add template,
restore categories, restore interwiki's.
-
- Based on cc-by-sa-3.0 code by Dschwen
- '''
- newtext = u''
-
- templatePosition = oldtext.find(u'{{%s' % (template,))
-
- if templatePosition >= 0:
- previousChar = u''
- currentChar = u''
- templatePosition += 2
- curly = 1
- square = 0
-
- while templatePosition < len(oldtext):
- currentChar = oldtext[templatePosition]
-
- if currentChar == u'[' and previousChar == u'[':
- square += 1
- previousChar = u''
- if currentChar == u']' and previousChar == u']':
- square -= 1
- previousChar = u''
- if currentChar == u'{' and previousChar == u'{':
- curly += 1
- previousChar = u''
- if currentChar == u'}' and previousChar == u'}':
- curly -= 1
- previousChar = u''
-
- previousChar = currentChar
- templatePosition += 1
-
- if curly == 0 and square <= 0:
- # Found end of template
- break
- newtext = oldtext[:templatePosition] + \
- u'\n' + toadd + oldtext[templatePosition:]
-
- else:
- if loose:
- newtext = oldtext
- cats = pywikibot.getCategoryLinks(newtext)
- ll = pywikibot.getLanguageLinks(newtext)
- nextext = pywikibot.removeLanguageLinks(newtext)
- newtext = pywikibot.removeCategoryLinks(newtext)
- newtext = newtext + u'\n' + toadd
- newtext = pywikibot.replaceCategoryLinks(newtext, cats)
- newtext = pywikibot.replaceLanguageLinks(newtext, ll)
-
- return newtext
-
-
-def main():
- countrycode = u''
-
- # Connect database, we need that
- (conn, cursor) = connectDatabase()
- (conn2, cursor2) = connectDatabase2()
-
- generator = None
- genFactory = pagegenerators.GeneratorFactory()
-
- for arg in pywikibot.handleArgs():
- option, sep, value = arg.partition(':')
- if option == '-countrycode:':
- countrycode = value
-
- lang = pywikibot.getSite().language()
- pywikibot.setSite(pywikibot.getSite(u'commons', u'commons'))
-
- if countrycode:
- if not mconfig.countries.get((countrycode, lang)):
- pywikibot.output(
- u'I have no config for countrycode "%s" in language "%s"' %
(countrycode, lang))
- return False
- pywikibot.output(
- u'Working on countrycode "%s" in language "%s"' % (countrycode,
lang))
- locateCountry(countrycode, lang, mconfig.countries.get(
- (countrycode, lang)), conn, cursor, conn2, cursor2)
- else:
- for (countrycode, lang), countryconfig in
mconfig.countries.iteritems():
- if not countryconfig.get('autoGeocode'):
- pywikibot.output(
- u'"%s" in language "%s" is not supported in auto geocode
mode (yet).' % (countrycode, lang))
- else:
- pywikibot.output(
- u'Working on countrycode "%s" in language "%s"' %
(countrycode, lang))
- locateCountry(
- countrycode, lang, countryconfig, conn, cursor, conn2,
cursor2)
-
-if __name__ == "__main__":
- try:
- main()
- finally:
- pywikibot.stopme()
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+'''
+
+Bot to add {{Object location dec}} to monuments. Location is based on
information from the monuments database.
+
+'''
+import monuments_config as mconfig
+import pywikibot
+import config
+import pagegenerators
+import MySQLdb
+
+
+def connectDatabase():
+ '''
+ Connect to the monuments mysql database, if it fails, go down in flames
+ '''
+ conn = MySQLdb.connect(host=mconfig.db_server, db=mconfig.db,
user=config.db_username,
+ passwd=config.db_password, use_unicode=True,
charset='utf8')
+ cursor = conn.cursor()
+ return (conn, cursor)
+
+
+def connectDatabase2():
+ '''
+ Connect to the commons mysql database, if it fails, go down in flames
+ '''
+ conn = MySQLdb.connect('commonswiki.labsdb', db='commonswiki_p',
+ user=config.db_username, passwd=config.db_password,
use_unicode=True, charset='latin1')
+ cursor = conn.cursor()
+ return (conn, cursor)
+
+
+def locateCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
cursor2):
+ '''
+ Locate images in a single country.
+ '''
+ if not countryconfig.get('commonsTemplate') or not
countryconfig.get('commonsTrackerCategory'):
+ # Not possible for this country. Silently return
+ return False
+
+ for (page, monumentId) in getMonumentsWithoutLocation(countryconfig,
conn2, cursor2):
+ locationTemplate = locateImage(
+ page, monumentId, countrycode, lang, countryconfig, conn, cursor)
+ if locationTemplate:
+ addLocation(page, locationTemplate)
+
+
+def getMonumentsWithoutLocation(countryconfig, conn2, cursor2):
+ site = pywikibot.getSite(u'commons', u'commons')
+ query = u"""SELECT page_title, cl_sortkey FROM page
+JOIN templatelinks ON page_id=tl_from
+JOIN categorylinks ON page_id=cl_from
+WHERE page_namespace=6 AND page_is_redirect=0
+AND tl_namespace=10 AND tl_title=%s
+AND cl_to=%s
+AND NOT EXISTS(
+SELECT * FROM categorylinks AS loccat
+WHERE page_id=loccat.cl_from
+AND loccat.cl_to='Media_with_locations') LIMIT 10000"""
+ commonsTemplate = countryconfig.get('commonsTemplate').replace(u' ', u'_')
+ commonsTrackerCategory = countryconfig.get(
+ 'commonsTrackerCategory').replace(u' ', u'_')
+
+ cursor2.execute(
+ query, (commonsTemplate.encode('utf-8'),
commonsTrackerCategory.encode('utf-8')))
+
+ while True:
+ try:
+ pageName, sortkey = cursor2.fetchone()
+ except TypeError:
+ # Nothing left
+ break
+ if pageName:
+ page = pywikibot.Page(site, 'File:' + unicode(pageName, 'utf-8'))
+ try:
+ monumentId = unicode(sortkey, 'utf-8')
+ # Just want the first line
+ mLines = monumentId.splitlines()
+ monumentId = mLines[0]
+ # Remove leading and trailing spaces
+ monumentId = monumentId.strip()
+ # Remove leading zero's. FIXME: This should be replaced with
+ # underscores
+ monumentId = monumentId.lstrip(u'0')
+ # Remove leading underscors.
+ monumentId = monumentId.lstrip(u'_')
+ yield (page, monumentId)
+ except ValueError:
+ pywikibot.output(u'Got value error for %s' % (monumentId,))
+
+
+def locateImage(page, monumentId, countrycode, lang, countryconfig, conn,
cursor):
+ pywikibot.output(u'Working on: %s with id %s' % (page.title(), monumentId))
+
+ # First check if the identifier returns something useful
+ coordinates = getCoordinates(monumentId, countrycode, lang, conn, cursor)
+ if not coordinates:
+ pywikibot.output(
+ u'File contains an unknown identifier: %s' % monumentId)
+ return False
+
+ (lat, lon, source) = coordinates
+
+ # Ok. We know we have coordinates. Now check to be sure to see if there's
+ # not already a template on the page.
+ templates = page.templates()
+
+ if u'Location' in page.templates() or u'Location dec' in page.templates()
or u'Object location' in page.templates() or u'Object location dec' in
page.templates():
+ pywikibot.output(
+ u'Location template already found at: %s' % page.title())
+ return False
+
+ locationTemplate = u'{{Object location
dec|%s|%s|region:%s_type:landmark_scale:1500}}<!-- Location from %s -->' % (
+ lat, lon, countrycode.upper(), source)
+
+ return locationTemplate
+
+
+def getCoordinates(monumentId, countrycode, lang, conn, cursor):
+ '''
+ Get coordinates from the erfgoed database
+ '''
+ result = None
+
+ query = u"""SELECT lat, lon, source FROM monuments_all
+WHERE id=%s
+AND country=%s
+AND lang=%s
+AND NOT lat=0 AND NOT lon=0
+AND NOT lat='' AND NOT lon=''
+AND NOT lat IS NULL AND NOT lon IS NULL
+LIMIT 1"""
+
+ cursor.execute(query, (monumentId, countrycode, lang,))
+
+ try:
+ row = cursor.fetchone()
+ return row
+ except TypeError:
+ return False
+
+
+def addLocation(page, locationTemplate):
+ try:
+ oldtext = page.get()
+ except pywikibot.NoPage:
+ # For some reason we sometimes get a NoPage Exception
+ pywikibot.output(u'No text found at %s. Skipping' % (page.title(),))
+ return False
+
+ comment = u'Adding object location based on monument identifier'
+
+ newtext = putAfterTemplate(
+ oldtext, u'Information', locationTemplate, loose=True)
+ pywikibot.showDiff(oldtext, newtext)
+ page.put(newtext, comment)
+
+
+def putAfterTemplate(oldtext, template, toadd, loose=True):
+ '''
+ Try to put text after template.
+ If the template is not found return False if loose is set to False
+ If loose is set to True: Remove interwiki's, categories, add template,
restore categories, restore interwiki's.
+
+ Based on cc-by-sa-3.0 code by Dschwen
+ '''
+ newtext = u''
+
+ templatePosition = oldtext.find(u'{{%s' % (template,))
+
+ if templatePosition >= 0:
+ previousChar = u''
+ currentChar = u''
+ templatePosition += 2
+ curly = 1
+ square = 0
+
+ while templatePosition < len(oldtext):
+ currentChar = oldtext[templatePosition]
+
+ if currentChar == u'[' and previousChar == u'[':
+ square += 1
+ previousChar = u''
+ if currentChar == u']' and previousChar == u']':
+ square -= 1
+ previousChar = u''
+ if currentChar == u'{' and previousChar == u'{':
+ curly += 1
+ previousChar = u''
+ if currentChar == u'}' and previousChar == u'}':
+ curly -= 1
+ previousChar = u''
+
+ previousChar = currentChar
+ templatePosition += 1
+
+ if curly == 0 and square <= 0:
+ # Found end of template
+ break
+ newtext = oldtext[:templatePosition] + \
+ u'\n' + toadd + oldtext[templatePosition:]
+
+ else:
+ if loose:
+ newtext = oldtext
+ cats = pywikibot.getCategoryLinks(newtext)
+ ll = pywikibot.getLanguageLinks(newtext)
+ nextext = pywikibot.removeLanguageLinks(newtext)
+ newtext = pywikibot.removeCategoryLinks(newtext)
+ newtext = newtext + u'\n' + toadd
+ newtext = pywikibot.replaceCategoryLinks(newtext, cats)
+ newtext = pywikibot.replaceLanguageLinks(newtext, ll)
+
+ return newtext
+
+
+def main():
+ countrycode = u''
+
+ # Connect database, we need that
+ (conn, cursor) = connectDatabase()
+ (conn2, cursor2) = connectDatabase2()
+
+ generator = None
+ genFactory = pagegenerators.GeneratorFactory()
+
+ for arg in pywikibot.handleArgs():
+ option, sep, value = arg.partition(':')
+ if option == '-countrycode:':
+ countrycode = value
+
+ lang = pywikibot.getSite().language()
+ pywikibot.setSite(pywikibot.getSite(u'commons', u'commons'))
+
+ if countrycode:
+ if not mconfig.countries.get((countrycode, lang)):
+ pywikibot.output(
+ u'I have no config for countrycode "%s" in language "%s"' %
(countrycode, lang))
+ return False
+ pywikibot.output(
+ u'Working on countrycode "%s" in language "%s"' % (countrycode,
lang))
+ locateCountry(countrycode, lang, mconfig.countries.get(
+ (countrycode, lang)), conn, cursor, conn2, cursor2)
+ else:
+ for (countrycode, lang), countryconfig in
mconfig.countries.iteritems():
+ if not countryconfig.get('autoGeocode'):
+ pywikibot.output(
+ u'"%s" in language "%s" is not supported in auto geocode
mode (yet).' % (countrycode, lang))
+ else:
+ pywikibot.output(
+ u'Working on countrycode "%s" in language "%s"' %
(countrycode, lang))
+ locateCountry(
+ countrycode, lang, countryconfig, conn, cursor, conn2,
cursor2)
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ pywikibot.stopme()
diff --git a/erfgoedbot/images_of_monuments_without_id.py
b/erfgoedbot/images_of_monuments_without_id.py
index 4d5df36..359f947 100644
--- a/erfgoedbot/images_of_monuments_without_id.py
+++ b/erfgoedbot/images_of_monuments_without_id.py
@@ -1,245 +1,245 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-'''
-
-Add monument-ID-templates to images on Commons -- based on the image usage in
the lists -- and
- make a galleries of monuments without an id at Commons
-
-Usage:
-# loop thtough all countries
-python images_of_monuments_without_id.py
-# work on specific country-lang
-python images_of_monuments_without_id.py -countrycode:XX -lang:YY
-
-
-'''
-import monuments_config as mconfig
-import pywikibot
-import config
-import MySQLdb
-##import re, imagerecat, pagegenerators, catlib
-
-
-def connectDatabase():
- '''
- Connect to the monuments mysql database, if it fails, go down in flames.
- This database is utf-8 encoded.
- '''
- conn = MySQLdb.connect(host=mconfig.db_server, db=mconfig.db,
user=config.db_username,
- passwd=config.db_password, use_unicode=True,
charset='utf8')
- conn.ping(True)
- cursor = conn.cursor()
- return (conn, cursor)
-
-
-def connectDatabase2():
- '''
- Connect to the commons mysql database, if it fails, go down in flames
- This database is latin1 encoded.
- '''
- conn = MySQLdb.connect('commonswiki.labsdb', db='commonswiki_p',
- user=config.db_username, passwd=config.db_password,
use_unicode=True, charset='latin1')
- cursor = conn.cursor()
- return (conn, cursor)
-
-
-def processCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
cursor2):
- '''
- Work on a single country.
- '''
- if not countryconfig.get('commonsTemplate'):
- # No template found, just skip silently.
- return False
-
- commonsTemplate = countryconfig.get('commonsTemplate')
- imagesWithoutIdPage = countryconfig.get('imagesWithoutIdPage')
- project = countryconfig.get('project', u'wikipedia')
-
- # All items in the list with a photo
- withPhoto = getMonumentsWithPhoto(
- countrycode, lang, countryconfig, conn, cursor)
-
- # All items on Commons with the id template
- withTemplate = getMonumentsWithTemplate(
- countrycode, lang, countryconfig, conn2, cursor2)
-
- # All items on Commons in the monument tree without the id template
- withoutTemplate = getMonumentsWithoutTemplate(
- countrycode, lang, countryconfig, conn2, cursor2)
-
- # Get the image ignore list
- # FIXME: Make an actual function of this instead of a static list.
- ignoreList = [u'Monumentenschildje.jpg', u'Rijksmonument-Schildje-NL.jpg']
-
- # FIXME: Do something with a header template.
- text = u'<gallery>\n'
-
- for image in withoutTemplate:
- if not image in ignoreList:
- # An image is in the category and is in the list of used images
- if withPhoto.get(image):
- added = addCommonsTemplate(
- image, commonsTemplate, withPhoto.get(image))
- if not added:
- text = text + \
- u'File:%s|<nowiki>{{%s|%s}}</nowiki>\n' % (
- image, commonsTemplate, withPhoto.get(image))
- # An image is in the category and is not in the list of used images
- else:
- text = text + u'File:%s\n' % (image,)
-
- # An image is in the list of used images, but not in the category
- for image in withPhoto:
- # Skip images which already have the templates and the ones in without
- # templates to prevent duplicates
- if not image in ignoreList and not image in withTemplate and not image
in withoutTemplate:
- added = addCommonsTemplate(
- image, commonsTemplate, withPhoto.get(image))
- if not added:
- text = text + \
- u'File:%s|<nowiki>{{%s|%s}}</nowiki>\n' % (
- image, commonsTemplate, withPhoto.get(image))
-
- text = text + u'</gallery>'
-
- # imagesWithoutIdPage isn't set for every source, just skip it if it's not
- # set
- if imagesWithoutIdPage:
- comment = u'Images without an id'
-
- site = pywikibot.getSite(lang, project)
- page = pywikibot.Page(site, imagesWithoutIdPage)
- pywikibot.output(text)
- page.put(text, comment)
-
-
-def getMonumentsWithPhoto(countrycode, lang, countryconfig, conn, cursor):
- '''
- Get a dictionary of images which are in the monuments database for a
certain country/language combination.
- '''
- result = {}
- query = u"""SELECT image, id FROM monuments_all WHERE NOT image='' AND
country=%s AND lang=%s"""
- cursor.execute(query, (countrycode, lang))
-
- while True:
- try:
- row = cursor.fetchone()
- (image, id) = row
- # Spaces are lowercase in the other database
- image = image.replace(u' ', u'_')
- # First char always needs to be uppercase
- image = image[0].upper() + image[1:]
- result[image] = id
- except TypeError:
- break
-
- return result
-
-
-def getMonumentsWithoutTemplate(countrycode, lang, countryconfig, conn,
cursor):
- '''
- Get a list of images which are in the relevant monuments category tree,
but don't contain the identification template.
- '''
-
- commonsCategoryBase = countryconfig.get(
- 'commonsCategoryBase'). replace(u' ', u'_')
- commonsTemplate = countryconfig.get('commonsTemplate').replace(u' ', u'_')
-
- result = []
- query = u"""SELECT DISTINCT(page_title) FROM page JOIN categorylinks ON
page_id=cl_from WHERE page_namespace=6 AND page_is_redirect=0 AND (cl_to='%s'
OR cl_to LIKE '%s\_in\_%%') AND NOT EXISTS(SELECT * FROM templatelinks WHERE
page_id=tl_from AND tl_namespace=10 AND tl_title='%s') ORDER BY page_title
ASC"""
- cursor.execute(
- query % (commonsCategoryBase, commonsCategoryBase, commonsTemplate))
-
- while True:
- try:
- row = cursor.fetchone()
- (image,) = row
- result.append(image.decode('utf-8'))
- except TypeError:
- break
-
- return result
-
-
-def getMonumentsWithTemplate(countrycode, lang, countryconfig, conn, cursor):
- '''
- Get all images of monuments which already contain the identification
template.
- '''
-
- commonsTrackerCategory = countryconfig.get(
- 'commonsTrackerCategory'). replace(u' ', u'_')
-
- result = []
- query = u"""SELECT DISTINCT(page_title) FROM page JOIN categorylinks ON
page_id=cl_from WHERE page_namespace=6 AND page_is_redirect=0 AND cl_to=%s
ORDER BY page_title ASC"""
- cursor.execute(query, (commonsTrackerCategory,))
-
- while True:
- try:
- row = cursor.fetchone()
- (image,) = row
- result.append(image.decode('utf-8'))
- except TypeError:
- break
-
- return result
-
-
-def addCommonsTemplate(image, commonsTemplate, identifier):
- '''
- Add the commonsTemplate with identifier to the image.
- '''
- site = pywikibot.getSite('commons', 'commons')
- page = pywikibot.ImagePage(site, image)
- if not page.exists() or page.isRedirectPage() or page.isEmpty():
- return False
-
- if commonsTemplate in page.templates():
- return False
-
- text = page.get()
- newtext = u'{{%s|%s}}\n' % (commonsTemplate, identifier) + text
-
- comment = u'Adding template %s based on usage in list' % (commonsTemplate,)
-
- pywikibot.showDiff(text, newtext)
- page.put(newtext, comment)
- return True
-
-
-def main():
- countrycode = u''
- conn = None
- cursor = None
- # Connect database, we need that
- (conn, cursor) = connectDatabase()
- (conn2, cursor2) = connectDatabase2()
-
- for arg in pywikibot.handleArgs():
- option, sep, value = arg.partition(':')
- if option == '-countrycode':
- countrycode = value
-
- if countrycode:
- # looks like default lang is 'nl'
- lang = pywikibot.getSite().language()
- if not mconfig.countries.get((countrycode, lang)):
- pywikibot.output(
- u'I have no config for countrycode "%s" in language "%s"' %
(countrycode, lang))
- return False
- pywikibot.output(
- u'Working on countrycode "%s" in language "%s"' % (countrycode,
lang))
- processCountry(countrycode, lang, mconfig.countries.get(
- (countrycode, lang)), conn, cursor, conn2, cursor2)
- else:
- for (countrycode, lang), countryconfig in
mconfig.countries.iteritems():
- pywikibot.output(
- u'Working on countrycode "%s" in language "%s"' %
(countrycode, lang))
- processCountry(
- countrycode, lang, countryconfig, conn, cursor, conn2, cursor2)
-
-
-if __name__ == "__main__":
- try:
- main()
- finally:
- pywikibot.stopme()
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+'''
+
+Add monument-ID-templates to images on Commons -- based on the image usage in
the lists -- and
+ make a galleries of monuments without an id at Commons
+
+Usage:
+# loop thtough all countries
+python images_of_monuments_without_id.py
+# work on specific country-lang
+python images_of_monuments_without_id.py -countrycode:XX -lang:YY
+
+
+'''
+import monuments_config as mconfig
+import pywikibot
+import config
+import MySQLdb
+##import re, imagerecat, pagegenerators, catlib
+
+
+def connectDatabase():
+ '''
+ Connect to the monuments mysql database, if it fails, go down in flames.
+ This database is utf-8 encoded.
+ '''
+ conn = MySQLdb.connect(host=mconfig.db_server, db=mconfig.db,
user=config.db_username,
+ passwd=config.db_password, use_unicode=True,
charset='utf8')
+ conn.ping(True)
+ cursor = conn.cursor()
+ return (conn, cursor)
+
+
+def connectDatabase2():
+ '''
+ Connect to the commons mysql database, if it fails, go down in flames
+ This database is latin1 encoded.
+ '''
+ conn = MySQLdb.connect('commonswiki.labsdb', db='commonswiki_p',
+ user=config.db_username, passwd=config.db_password,
use_unicode=True, charset='latin1')
+ cursor = conn.cursor()
+ return (conn, cursor)
+
+
+def processCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
cursor2):
+ '''
+ Work on a single country.
+ '''
+ if not countryconfig.get('commonsTemplate'):
+ # No template found, just skip silently.
+ return False
+
+ commonsTemplate = countryconfig.get('commonsTemplate')
+ imagesWithoutIdPage = countryconfig.get('imagesWithoutIdPage')
+ project = countryconfig.get('project') or u'wikipedia'
+
+ # All items in the list with a photo
+ withPhoto = getMonumentsWithPhoto(
+ countrycode, lang, countryconfig, conn, cursor)
+
+ # All items on Commons with the id template
+ withTemplate = getMonumentsWithTemplate(
+ countrycode, lang, countryconfig, conn2, cursor2)
+
+ # All items on Commons in the monument tree without the id template
+ withoutTemplate = getMonumentsWithoutTemplate(
+ countrycode, lang, countryconfig, conn2, cursor2)
+
+ # Get the image ignore list
+ # FIXME: Make an actual function of this instead of a static list.
+ ignoreList = [u'Monumentenschildje.jpg', u'Rijksmonument-Schildje-NL.jpg']
+
+ # FIXME: Do something with a header template.
+ text = u'<gallery>\n'
+
+ for image in withoutTemplate:
+ if not image in ignoreList:
+ # An image is in the category and is in the list of used images
+ if withPhoto.get(image):
+ added = addCommonsTemplate(
+ image, commonsTemplate, withPhoto.get(image))
+ if not added:
+ text = text + \
+ u'File:%s|<nowiki>{{%s|%s}}</nowiki>\n' % (
+ image, commonsTemplate, withPhoto.get(image))
+ # An image is in the category and is not in the list of used images
+ else:
+ text = text + u'File:%s\n' % (image,)
+
+ # An image is in the list of used images, but not in the category
+ for image in withPhoto:
+ # Skip images which already have the templates and the ones in without
+ # templates to prevent duplicates
+ if not image in ignoreList and not image in withTemplate and not image
in withoutTemplate:
+ added = addCommonsTemplate(
+ image, commonsTemplate, withPhoto.get(image))
+ if not added:
+ text = text + \
+ u'File:%s|<nowiki>{{%s|%s}}</nowiki>\n' % (
+ image, commonsTemplate, withPhoto.get(image))
+
+ text = text + u'</gallery>'
+
+ # imagesWithoutIdPage isn't set for every source, just skip it if it's not
+ # set
+ if imagesWithoutIdPage:
+ comment = u'Images without an id'
+
+ site = pywikibot.getSite(lang, project)
+ page = pywikibot.Page(site, imagesWithoutIdPage)
+ pywikibot.output(text)
+ page.put(text, comment)
+
+
+def getMonumentsWithPhoto(countrycode, lang, countryconfig, conn, cursor):
+ '''
+ Get a dictionary of images which are in the monuments database for a
certain country/language combination.
+ '''
+ result = {}
+ query = u"""SELECT image, id FROM monuments_all WHERE NOT image='' AND
country=%s AND lang=%s"""
+ cursor.execute(query, (countrycode, lang))
+
+ while True:
+ try:
+ row = cursor.fetchone()
+ (image, id) = row
+ # Spaces are lowercase in the other database
+ image = image.replace(u' ', u'_')
+ # First char always needs to be uppercase
+ image = image[0].upper() + image[1:]
+ result[image] = id
+ except TypeError:
+ break
+
+ return result
+
+
+def getMonumentsWithoutTemplate(countrycode, lang, countryconfig, conn,
cursor):
+ '''
+ Get a list of images which are in the relevant monuments category tree,
but don't contain the identification template.
+ '''
+
+ commonsCategoryBase = countryconfig.get(
+ 'commonsCategoryBase'). replace(u' ', u'_')
+ commonsTemplate = countryconfig.get('commonsTemplate').replace(u' ', u'_')
+
+ result = []
+ query = u"""SELECT DISTINCT(page_title) FROM page JOIN categorylinks ON
page_id=cl_from WHERE page_namespace=6 AND page_is_redirect=0 AND (cl_to='%s'
OR cl_to LIKE '%s\_in\_%%') AND NOT EXISTS(SELECT * FROM templatelinks WHERE
page_id=tl_from AND tl_namespace=10 AND tl_title='%s') ORDER BY page_title
ASC"""
+ cursor.execute(
+ query % (commonsCategoryBase, commonsCategoryBase, commonsTemplate))
+
+ while True:
+ try:
+ row = cursor.fetchone()
+ (image,) = row
+ result.append(image.decode('utf-8'))
+ except TypeError:
+ break
+
+ return result
+
+
+def getMonumentsWithTemplate(countrycode, lang, countryconfig, conn, cursor):
+ '''
+ Get all images of monuments which already contain the identification
template.
+ '''
+
+ commonsTrackerCategory = countryconfig.get(
+ 'commonsTrackerCategory'). replace(u' ', u'_')
+
+ result = []
+ query = u"""SELECT DISTINCT(page_title) FROM page JOIN categorylinks ON
page_id=cl_from WHERE page_namespace=6 AND page_is_redirect=0 AND cl_to=%s
ORDER BY page_title ASC"""
+ cursor.execute(query, (commonsTrackerCategory,))
+
+ while True:
+ try:
+ row = cursor.fetchone()
+ (image,) = row
+ result.append(image.decode('utf-8'))
+ except TypeError:
+ break
+
+ return result
+
+
+def addCommonsTemplate(image, commonsTemplate, identifier):
+ '''
+ Add the commonsTemplate with identifier to the image.
+ '''
+ site = pywikibot.getSite('commons', 'commons')
+ page = pywikibot.ImagePage(site, image)
+ if not page.exists() or page.isRedirectPage() or page.isEmpty():
+ return False
+
+ if commonsTemplate in page.templates():
+ return False
+
+ text = page.get()
+ newtext = u'{{%s|%s}}\n' % (commonsTemplate, identifier) + text
+
+ comment = u'Adding template %s based on usage in list' % (commonsTemplate,)
+
+ pywikibot.showDiff(text, newtext)
+ page.put(newtext, comment)
+ return True
+
+
+def main():
+ countrycode = u''
+ conn = None
+ cursor = None
+ # Connect database, we need that
+ (conn, cursor) = connectDatabase()
+ (conn2, cursor2) = connectDatabase2()
+
+ for arg in pywikibot.handleArgs():
+ option, sep, value = arg.partition(':')
+ if option == '-countrycode':
+ countrycode = value
+
+ if countrycode:
+ # looks like default lang is 'nl'
+ lang = pywikibot.getSite().language()
+ if not mconfig.countries.get((countrycode, lang)):
+ pywikibot.output(
+ u'I have no config for countrycode "%s" in language "%s"' %
(countrycode, lang))
+ return False
+ pywikibot.output(
+ u'Working on countrycode "%s" in language "%s"' % (countrycode,
lang))
+ processCountry(countrycode, lang, mconfig.countries.get(
+ (countrycode, lang)), conn, cursor, conn2, cursor2)
+ else:
+ for (countrycode, lang), countryconfig in
mconfig.countries.iteritems():
+ pywikibot.output(
+ u'Working on countrycode "%s" in language "%s"' %
(countrycode, lang))
+ processCountry(
+ countrycode, lang, countryconfig, conn, cursor, conn2, cursor2)
+
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ pywikibot.stopme()
diff --git a/erfgoedbot/missing_commonscat_links.py
b/erfgoedbot/missing_commonscat_links.py
index 3388a66..ba4c4df 100644
--- a/erfgoedbot/missing_commonscat_links.py
+++ b/erfgoedbot/missing_commonscat_links.py
@@ -1,252 +1,252 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-'''
-
-Make a list of monuments where a category about the monument exists, but no
link is in the list yet.
-
-Usage:
-# loop thtough all countries
-python missing_commonscat_links.py
-# work on specific country-lang
-python missing_commonscat_links.py -countrycode:XX -lang:YY
-
-'''
-import monuments_config as mconfig
-import pywikibot
-from pywikibot import config
-import re
-import MySQLdb
-
-
-def connectDatabase():
- '''
- Connect to the p_erfoed_p mysql database, if it fails, go down in flames
- '''
- conn = MySQLdb.connect(host=mconfig.db_server, db=mconfig.db,
- user=config.db_username, passwd=config.db_password,
- use_unicode=True, charset='utf8')
- conn.ping(True)
- cursor = conn.cursor()
- return (conn, cursor)
-
-
-def connectDatabase2():
- '''
- Connect to the commons mysql database, if it fails, go down in flames
- '''
- conn = MySQLdb.connect('commonswiki.labsdb', db='commonswiki_p',
- user=config.db_username, passwd=config.db_password,
- use_unicode=True, charset='latin1')
- cursor = conn.cursor()
- return (conn, cursor)
-
-
-def processCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
cursor2):
- '''
- Work on a single country.
- '''
- if not countryconfig.get('missingCommonscatPage'):
- # missingCommonscatPage not set, just skip silently.
- return False
-
- commonscatField = lookupSourceField(u'commonscat', countryconfig)
- if not commonscatField:
- # Field is missing. Something is seriously wrong, but we just skip it
- # silently
- return False
-
- missingCommonscatPage = countryconfig.get('missingCommonscatPage')
- commonsTrackerCategory = countryconfig.get(
- 'commonsTrackerCategory'). replace(u' ', u'_')
-
- withoutCommonscat = getMonumentsWithoutCommonscat(
- countrycode, lang, conn, cursor)
- commonscats = getMonumentCommonscats(
- commonsTrackerCategory, conn2, cursor2)
-
- pywikibot.output(u'withoutCommonscat %s elements' %
- (len(withoutCommonscat),))
- pywikibot.output(u'commonscats %s elements' % (len(commonscats),))
-
- # People can add a /header template for with more info
- text = u'{{#ifexist:{{FULLPAGENAME}}/header | {{/header}} }}\n'
- # text = text + u'<gallery>\n'
- totalCategories = 0
- maxCategories = 1000
-
- for catSortKey in sorted(commonscats.keys()):
- try:
- monumentId = unicode(catSortKey, 'utf-8')
- # Just want the first line
- mLines = monumentId.splitlines()
- monumentId = mLines[0]
- # Remove leading and trailing spaces
- monumentId = monumentId.strip()
- # Remove leading zero's. FIXME: This should be replaced with
- # underscores
- monumentId = monumentId.lstrip(u'0')
- # Remove leading underscors.
- monumentId = monumentId.lstrip(u'_')
- # All uppercase, same happens in other list. FIXME: Remove this
- monumentId = monumentId.upper()
- if monumentId in withoutCommonscat:
- m = re.search(
- '^[^\?]+\?title\=(.+?)&',
withoutCommonscat.get(monumentId))
- wikiSourceList = m.group(1)
- categoryName = commonscats.get(catSortKey)
- # pywikibot.output(u'Key %s returned a result' % (monumentId,))
- # pywikibot.output(wikiSourceList)
- # pywikibot.output(imageName)
- if totalCategories <= maxCategories:
- text = text + u'* <nowiki>|</nowiki> %s =
[[:Commons:Category:%s|%s]] - %s @ [[%s]]\n' % (commonscatField, unicode(
- categoryName, 'utf-8'), unicode(categoryName,
'utf-8').replace(u'_', u' '), monumentId, wikiSourceList)
- totalCategories = totalCategories + 1
- except ValueError:
- pywikibot.output(u'Got value error for %s' % (monumentId,))
-
- # text = text + u'</gallery>'
-
- if totalCategories >= maxCategories:
- text = text + \
- u'<!-- Maximum number of categories reached: %s, total of missing
commonscat links: %s -->\n' % (
- maxCategories, totalCategories)
- comment = u'Commonscat links to be made in monument lists: %s (list
maximum reached), total of missing commonscat links: %s' % (
- maxCategories, totalCategories)
- else:
- comment = u'Commonscat links to be made in monument lists: %s' %
totalCategories
-
- text = text + getInterwikisMissingCommonscatPage(countrycode, lang)
-
- site = pywikibot.Site(lang, u'wikipedia')
- page = pywikibot.Page(site, missingCommonscatPage)
- pywikibot.output(text)
- page.put(text, comment)
-
- return totalCategories
-
-
-def lookupSourceField(destination, countryconfig):
- '''
- Lookup the source field of a destination.
- '''
- for field in countryconfig.get('fields'):
- if field.get('dest') == destination:
- return field.get('source')
-
-
-def getInterwikisMissingCommonscatPage(countrycode, lang):
- result = u''
- for (countrycode2, lang2), countryconfig in mconfig.countries.iteritems():
- if countrycode == countrycode2 and lang != lang2:
- if countryconfig.get('missingCommonscatPage'):
- result = result + \
- u'[[%s:%s]]\n' % (
- lang2, countryconfig.get('missingCommonscatPage'))
-
- return result
-
-
-def getMonumentsWithoutCommonscat(countrycode, lang, conn, cursor):
- result = {}
-
- query = u"""SELECT id, source FROM monuments_all WHERE (commonscat IS NULL
or commonscat='') AND country=%s AND lang=%s"""
-
- cursor.execute(query, (countrycode, lang))
-
- while True:
- try:
- row = cursor.fetchone()
- (id, source) = row
- # To uppercase, same happens in the other list
- result[id.upper()] = source
- except TypeError:
- break
-
- return result
-
-
-def getMonumentCommonscats(commonsTrackerCategory, conn, cursor):
- result = {}
-
- query = u"""SELECT page_title, cl_sortkey FROM page JOIN categorylinks ON
page_id=cl_from WHERE page_namespace=14 AND page_is_redirect=0 AND cl_to=%s"""
-
- cursor.execute(query, (commonsTrackerCategory,))
-
- while True:
- try:
- row = cursor.fetchone()
- (category, id) = row
- result[id] = category
- except TypeError:
- break
-
- return result
-
-
-def makeStatistics(mconfig, totals):
- text = u'{| class="wikitable sortable"\n'
- text = text + \
- u'! country !! lang !! total !! page !! row template !! Commons
template\n'
-
- totalCategories = 0
- for ((countrycode, lang), countryconfig) in
sorted(mconfig.countries.items()):
- if countryconfig.get('missingCommonscatPage') and
countryconfig.get('commonsTemplate'):
- text = text + u'|-\n'
- text = text + u'| %s ' % countrycode
- text = text + u'|| %s ' % lang
- text = text + u'|| %s ' % totals.get((countrycode, lang))
- totalCategories = totalCategories + totals.get((countrycode, lang))
- text = text + u'|| [[:%s:%s|%s]] ' % (lang, countryconfig.get(
- 'missingCommonscatPage'),
countryconfig.get('missingCommonscatPage'))
- text = text + u'|| [[:%s:Template:%s|%s]] ' % (
- lang, countryconfig.get('rowTemplate'),
countryconfig.get('rowTemplate'))
- text = text + \
- u'|| {{tl|%s}}\n' % countryconfig.get('commonsTemplate')
- text = text + u'|- class="sortbottom"\n'
- text = text + u'| || || %s \n' % totalCategories
- text = text + u'|}\n'
-
- site = pywikibot.Site('commons', 'commons')
- page = pywikibot.Page(
- site, u'Commons:Monuments database/Missing commonscat
links/Statistics')
-
- comment = u'Updating missing commonscat links statistics. Total missing
links: %s' % totalCategories
- pywikibot.output(text)
- page.put(newtext=text, comment=comment)
-
-
-def main():
- countrycode = u''
- conn = None
- cursor = None
- # Connect database, we need that
- (conn, cursor) = connectDatabase()
- (conn2, cursor2) = connectDatabase2()
-
- for arg in pywikibot.handleArgs():
- option, sep, value = arg.partition(':')
- if option == '-countrycode:':
- countrycode = value
-
- if countrycode:
- lang = pywikibot.Site().language()
- if not mconfig.countries.get((countrycode, lang)):
- pywikibot.output(
- u'I have no config for countrycode "%s" in language "%s"' %
(countrycode, lang))
- return False
- pywikibot.output(
- u'Working on countrycode "%s" in language "%s"' % (countrycode,
lang))
- processCountry(countrycode, lang, mconfig.countries.get(
- (countrycode, lang)), conn, cursor, conn2, cursor2)
- else:
- totals = {}
- for (countrycode, lang), countryconfig in
mconfig.countries.iteritems():
- pywikibot.output(
- u'Working on countrycode "%s" in language "%s"' %
(countrycode, lang))
- totals[(countrycode, lang)] = processCountry(
- countrycode, lang, countryconfig, conn, cursor, conn2, cursor2)
- makeStatistics(mconfig, totals)
-
-
-if __name__ == "__main__":
- main()
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+'''
+
+Make a list of monuments where a category about the monument exists, but no
link is in the list yet.
+
+Usage:
+# loop thtough all countries
+python missing_commonscat_links.py
+# work on specific country-lang
+python missing_commonscat_links.py -countrycode:XX -lang:YY
+
+'''
+import monuments_config as mconfig
+import pywikibot
+from pywikibot import config
+import re
+import MySQLdb
+
+
+def connectDatabase():
+ '''
+ Connect to the p_erfoed_p mysql database, if it fails, go down in flames
+ '''
+ conn = MySQLdb.connect(host=mconfig.db_server, db=mconfig.db,
+ user=config.db_username, passwd=config.db_password,
+ use_unicode=True, charset='utf8')
+ conn.ping(True)
+ cursor = conn.cursor()
+ return (conn, cursor)
+
+
+def connectDatabase2():
+ '''
+ Connect to the commons mysql database, if it fails, go down in flames
+ '''
+ conn = MySQLdb.connect('commonswiki.labsdb', db='commonswiki_p',
+ user=config.db_username, passwd=config.db_password,
+ use_unicode=True, charset='latin1')
+ cursor = conn.cursor()
+ return (conn, cursor)
+
+
+def processCountry(countrycode, lang, countryconfig, conn, cursor, conn2,
cursor2):
+ '''
+ Work on a single country.
+ '''
+ if not countryconfig.get('missingCommonscatPage'):
+ # missingCommonscatPage not set, just skip silently.
+ return False
+
+ commonscatField = lookupSourceField(u'commonscat', countryconfig)
+ if not commonscatField:
+ # Field is missing. Something is seriously wrong, but we just skip it
+ # silently
+ return False
+
+ missingCommonscatPage = countryconfig.get('missingCommonscatPage')
+ commonsTrackerCategory = countryconfig.get(
+ 'commonsTrackerCategory'). replace(u' ', u'_')
+
+ withoutCommonscat = getMonumentsWithoutCommonscat(
+ countrycode, lang, conn, cursor)
+ commonscats = getMonumentCommonscats(
+ commonsTrackerCategory, conn2, cursor2)
+
+ pywikibot.output(u'withoutCommonscat %s elements' %
+ (len(withoutCommonscat),))
+ pywikibot.output(u'commonscats %s elements' % (len(commonscats),))
+
+ # People can add a /header template for with more info
+ text = u'{{#ifexist:{{FULLPAGENAME}}/header | {{/header}} }}\n'
+ # text = text + u'<gallery>\n'
+ totalCategories = 0
+ maxCategories = 1000
+
+ for catSortKey in sorted(commonscats.keys()):
+ try:
+ monumentId = unicode(catSortKey, 'utf-8')
+ # Just want the first line
+ mLines = monumentId.splitlines()
+ monumentId = mLines[0]
+ # Remove leading and trailing spaces
+ monumentId = monumentId.strip()
+ # Remove leading zero's. FIXME: This should be replaced with
+ # underscores
+ monumentId = monumentId.lstrip(u'0')
+ # Remove leading underscors.
+ monumentId = monumentId.lstrip(u'_')
+ # All uppercase, same happens in other list. FIXME: Remove this
+ monumentId = monumentId.upper()
+ if monumentId in withoutCommonscat:
+ m = re.search(
+ '^[^\?]+\?title\=(.+?)&',
withoutCommonscat.get(monumentId))
+ wikiSourceList = m.group(1)
+ categoryName = commonscats.get(catSortKey)
+ # pywikibot.output(u'Key %s returned a result' % (monumentId,))
+ # pywikibot.output(wikiSourceList)
+ # pywikibot.output(imageName)
+ if totalCategories <= maxCategories:
+ text = text + u'* <nowiki>|</nowiki> %s =
[[:Commons:Category:%s|%s]] - %s @ [[%s]]\n' % (commonscatField, unicode(
+ categoryName, 'utf-8'), unicode(categoryName,
'utf-8').replace(u'_', u' '), monumentId, wikiSourceList)
+ totalCategories = totalCategories + 1
+ except ValueError:
+ pywikibot.output(u'Got value error for %s' % (monumentId,))
+
+ # text = text + u'</gallery>'
+
+ if totalCategories >= maxCategories:
+ text = text + \
+ u'<!-- Maximum number of categories reached: %s, total of missing
commonscat links: %s -->\n' % (
+ maxCategories, totalCategories)
+ comment = u'Commonscat links to be made in monument lists: %s (list
maximum reached), total of missing commonscat links: %s' % (
+ maxCategories, totalCategories)
+ else:
+ comment = u'Commonscat links to be made in monument lists: %s' %
totalCategories
+
+ text = text + getInterwikisMissingCommonscatPage(countrycode, lang)
+
+ site = pywikibot.Site(lang, u'wikipedia')
+ page = pywikibot.Page(site, missingCommonscatPage)
+ pywikibot.output(text)
+ page.put(text, comment)
+
+ return totalCategories
+
+
+def lookupSourceField(destination, countryconfig):
+ '''
+ Lookup the source field of a destination.
+ '''
+ for field in countryconfig.get('fields'):
+ if field.get('dest') == destination:
+ return field.get('source')
+
+
+def getInterwikisMissingCommonscatPage(countrycode, lang):
+ result = u''
+ for (countrycode2, lang2), countryconfig in mconfig.countries.iteritems():
+ if countrycode == countrycode2 and lang != lang2:
+ if countryconfig.get('missingCommonscatPage'):
+ result = result + \
+ u'[[%s:%s]]\n' % (
+ lang2, countryconfig.get('missingCommonscatPage'))
+
+ return result
+
+
+def getMonumentsWithoutCommonscat(countrycode, lang, conn, cursor):
+ result = {}
+
+ query = u"""SELECT id, source FROM monuments_all WHERE (commonscat IS NULL
or commonscat='') AND country=%s AND lang=%s"""
+
+ cursor.execute(query, (countrycode, lang))
+
+ while True:
+ try:
+ row = cursor.fetchone()
+ (id, source) = row
+ # To uppercase, same happens in the other list
+ result[id.upper()] = source
+ except TypeError:
+ break
+
+ return result
+
+
+def getMonumentCommonscats(commonsTrackerCategory, conn, cursor):
+ result = {}
+
+ query = u"""SELECT page_title, cl_sortkey FROM page JOIN categorylinks ON
page_id=cl_from WHERE page_namespace=14 AND page_is_redirect=0 AND cl_to=%s"""
+
+ cursor.execute(query, (commonsTrackerCategory,))
+
+ while True:
+ try:
+ row = cursor.fetchone()
+ (category, id) = row
+ result[id] = category
+ except TypeError:
+ break
+
+ return result
+
+
+def makeStatistics(mconfig, totals):
+ text = u'{| class="wikitable sortable"\n'
+ text = text + \
+ u'! country !! lang !! total !! page !! row template !! Commons
template\n'
+
+ totalCategories = 0
+ for ((countrycode, lang), countryconfig) in
sorted(mconfig.countries.items()):
+ if countryconfig.get('missingCommonscatPage') and
countryconfig.get('commonsTemplate'):
+ text = text + u'|-\n'
+ text = text + u'| %s ' % countrycode
+ text = text + u'|| %s ' % lang
+ text = text + u'|| %s ' % totals.get((countrycode, lang))
+ totalCategories = totalCategories + totals.get((countrycode, lang))
+ text = text + u'|| [[:%s:%s|%s]] ' % (lang, countryconfig.get(
+ 'missingCommonscatPage'),
countryconfig.get('missingCommonscatPage'))
+ text = text + u'|| [[:%s:Template:%s|%s]] ' % (
+ lang, countryconfig.get('rowTemplate'),
countryconfig.get('rowTemplate'))
+ text = text + \
+ u'|| {{tl|%s}}\n' % countryconfig.get('commonsTemplate')
+ text = text + u'|- class="sortbottom"\n'
+ text = text + u'| || || %s \n' % totalCategories
+ text = text + u'|}\n'
+
+ site = pywikibot.Site('commons', 'commons')
+ page = pywikibot.Page(
+ site, u'Commons:Monuments database/Missing commonscat
links/Statistics')
+
+ comment = u'Updating missing commonscat links statistics. Total missing
links: %s' % totalCategories
+ pywikibot.output(text)
+ page.put(newtext=text, comment=comment)
+
+
+def main():
+ countrycode = u''
+ conn = None
+ cursor = None
+ # Connect database, we need that
+ (conn, cursor) = connectDatabase()
+ (conn2, cursor2) = connectDatabase2()
+
+ for arg in pywikibot.handleArgs():
+ option, sep, value = arg.partition(':')
+ if option == '-countrycode:':
+ countrycode = value
+
+ if countrycode:
+ lang = pywikibot.Site().language()
+ if not mconfig.countries.get((countrycode, lang)):
+ pywikibot.output(
+ u'I have no config for countrycode "%s" in language "%s"' %
(countrycode, lang))
+ return False
+ pywikibot.output(
+ u'Working on countrycode "%s" in language "%s"' % (countrycode,
lang))
+ processCountry(countrycode, lang, mconfig.countries.get(
+ (countrycode, lang)), conn, cursor, conn2, cursor2)
+ else:
+ totals = {}
+ for (countrycode, lang), countryconfig in
mconfig.countries.iteritems():
+ pywikibot.output(
+ u'Working on countrycode "%s" in language "%s"' %
(countrycode, lang))
+ totals[(countrycode, lang)] = processCountry(
+ countrycode, lang, countryconfig, conn, cursor, conn2, cursor2)
+ makeStatistics(mconfig, totals)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/erfgoedbot/populate_image_table.py
b/erfgoedbot/populate_image_table.py
index 235fdc2..eb635f6 100644
--- a/erfgoedbot/populate_image_table.py
+++ b/erfgoedbot/populate_image_table.py
@@ -1,233 +1,233 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-'''
-
-Update the image table with all the images tracked by a template in
https://commons.wikimedia.org/wiki/Category:Cultural_heritage_monuments_with_known_IDs
-
-The fields:
-* country - same as country field in the monuments_all table
-* id - same as the id field in the monuments_all table
-* img_name - The filename at Commons
-
-First the bots loops over the configuration and gets:
-* countrycode
-* commonsTemplate
-* commonsTrackerCategory
-Some countries are available in multiple languages. This is deduplicated
-
-For each combination the bot will loop over the Commons database and insert
the information into the image table
-
-
-Make a gallery of unused photos so people can add them to monument lists
-
-Usage:
-# Do everything
-python populate_image_table.py
-# Do just a specific country
-python populate_image_table.py -countrycode:xx
-
-'''
-import warnings
-import monuments_config as mconfig
-import pywikibot
-from pywikibot import config
-import MySQLdb
-
-
-def connectDatabase():
- '''
- Connect to the p_erfgoed_p mysql database, if it fails, go down in flames
- '''
- conn = MySQLdb.connect(host=mconfig.db_server, db=mconfig.db,
user=config.db_username,
- passwd=config.db_password, use_unicode=True,
charset='utf8')
- conn.ping(True)
- cursor = conn.cursor()
- return (conn, cursor)
-
-
-def connectDatabase2():
- '''
- Connect to the commons mysql database, if it fails, go down in flames
- '''
- conn = MySQLdb.connect('commonswiki.labsdb', db='commonswiki_p',
- user=config.db_username, passwd=config.db_password,
use_unicode=True, charset='utf8')
- conn.ping(True)
- cursor = conn.cursor()
- return (conn, cursor)
-
-
-def getSources(countrycode=u''):
- '''
- Get a dictionary of sources to go harvest
- '''
- sources = {}
- for (icountrycode, lang), countryconfig in mconfig.countries.iteritems():
- if not countrycode or (countrycode and countrycode == icountrycode):
- if icountrycode not in sources:
- if countryconfig.get('commonsTemplate') and
countryconfig.get('commonsTrackerCategory'):
- sources[icountrycode] = {
- 'commonsTemplate':
countryconfig.get('commonsTemplate'),
- 'commonsTrackerCategory':
countryconfig.get('commonsTrackerCategory'),
- }
- return sources
-
-
-def processSources(sources, conn, cursor, conn2, cursor2):
- '''
- Loop over all sources and process them. Very small right now, will
probably exapnded later
- '''
- result = sources
- for countrycode, countryconfig in sources.iteritems():
- totalImages = processSource(
- countrycode, countryconfig, conn, cursor, conn2, cursor2)
- result[countrycode]['totalImages'] = totalImages
- return result
-
-
-def processSource(countrycode, countryconfig, conn, cursor, conn2, cursor2):
- '''
- Work on a single source (country).
- '''
-
- commonsTemplate = countryconfig.get('commonsTemplate').replace(u' ', u'_')
- commonsTrackerCategory = countryconfig.get(
- 'commonsTrackerCategory').replace(u' ', u'_')
-
- photos = getMonumentPhotos(commonsTrackerCategory, conn2, cursor2)
-
- pywikibot.output(u'For country "%s" I found %s photos tagged with "{{%s}}"
in [[Category:%s]]' % (
- countrycode, len(photos), commonsTemplate, commonsTrackerCategory))
-
- for catSortKey, page_title in photos:
- try:
- monumentId = unicode(catSortKey, 'utf-8')
- name = unicode(page_title, 'utf-8')
- # Just want the first line
- mLines = monumentId.splitlines()
- monumentId = mLines[0]
- # Remove leading and trailing spaces
- monumentId = monumentId.strip()
- # Remove leading zero's. FIXME: This should be replaced with
- # underscores
- monumentId = monumentId.lstrip(u'0')
- # Remove leading underscors.
- monumentId = monumentId.lstrip(u'_')
- # All uppercase, same happens in other list
- # monumentId = monumentId.upper()
- updateImage(countrycode, monumentId, name, conn, cursor)
-
- except UnicodeDecodeError:
- pywikibot.output(
- u'Got unicode decode error for %s' % (monumentId,))
- # UnicodeDecodeError is a subclass of ValueError and should catch most
- except ValueError:
- pywikibot.output(u'Got value error for %s' % (monumentId,))
-
- return len(photos)
-
-
-def getMonumentPhotos(commonsTrackerCategory, conn, cursor):
- '''
- Get all the monument photos that are in a certain tracker category at
Wikimedia Commons
- '''
- result = []
-
- query = u"""SELECT cl_sortkey, page_title FROM page JOIN categorylinks ON
page_id=cl_from WHERE page_namespace=6 AND page_is_redirect=0 AND cl_to=%s"""
-
- cursor.execute(query, (commonsTrackerCategory,))
-
- result = cursor.fetchall()
- '''
- while True:
- try:
- row = cursor.fetchone()
- #(image, id) = row
- result.append(row)
- print row
- #result[id] = image
- except TypeError:
- break
- '''
- return result
-
-
-def updateImage(countrycode, monumentId, name, conn, cursor):
- '''
- Update an entry for a single image
- '''
- query = u"""REPLACE INTO `image` (`country`, `id`, `img_name`) VALUES (%s,
%s, %s)"""
- with warnings.catch_warnings(record=True):
- warnings.simplefilter("always")
- cursor.execute(query, (countrycode, monumentId, name,))
-
-
-def makeStatistics(totals):
- '''
- Make statistics on the number of indexed images and put these on Commons
- '''
- text = u'{| class="wikitable sortable"\n'
- text = text + \
- u'! country !! total !! tracker template !! tracker category\n'
- totalImages = 0
- print totals
- for (countrycode, countryresults) in sorted(totals.iteritems()):
- text = text + u'|-\n'
- text = text + u'| %s ' % countrycode
- text = text + u'|| %s ' % countryresults.get('totalImages')
- totalImages = totalImages + countryresults.get('totalImages')
- text = text + u'|| {{tl|%s}}' % countryresults.get('commonsTemplate')
- text = text + u'|| [[:Category:%s|%s]]\n' % (countryresults.get(
- 'commonsTrackerCategory'),
countryresults.get('commonsTrackerCategory'))
- text = text + u'|- class="sortbottom"\n'
- text = text + u'| || %s \n' % totalImages
- text = text + u'|}\n'
-
- site = pywikibot.Site('commons', 'commons')
- page = pywikibot.Page(
- site, u'Commons:Monuments database/Indexed images/Statistics')
-
- comment = u'Updating indexed image statistics. Total indexed images: %s' %
totalImages
- pywikibot.output(text)
- page.put(newtext=text, comment=comment)
-
-
-def main():
- countrycode = u''
- conn = None
- cursor = None
- # Connect database, we need that
- (conn, cursor) = connectDatabase()
- (conn2, cursor2) = connectDatabase2()
-
- for arg in pywikibot.handleArgs():
- option, sep, value = arg.partition(':')
- if option == '-countrycode:':
- countrycode = value
-
- if countrycode:
- pywikibot.output(u'Working on countrycode "%s"' % (countrycode,))
- sources = getSources(countrycode=countrycode)
- if not sources:
- pywikibot.output(
- u'I have no config for countrycode "%s"' % (countrycode,))
- return False
- else:
- totals = processSources(sources, conn, cursor, conn2, cursor2)
-
- else:
- pywikibot.output(u'Working on all countrycodes')
- sources = getSources()
- if not sources:
- pywikibot.output(
- u'No sources found, something went completely wrong')
- return False
- else:
- pywikibot.output(
- u'Found %s countries with monument tracker templates to work
on' % (len(sources),))
- totals = processSources(sources, conn, cursor, conn2, cursor2)
-
- makeStatistics(totals)
-
-
-if __name__ == "__main__":
- main()
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+'''
+
+Update the image table with all the images tracked by a template in
https://commons.wikimedia.org/wiki/Category:Cultural_heritage_monuments_with_known_IDs
+
+The fields:
+* country - same as country field in the monuments_all table
+* id - same as the id field in the monuments_all table
+* img_name - The filename at Commons
+
+First the bots loops over the configuration and gets:
+* countrycode
+* commonsTemplate
+* commonsTrackerCategory
+Some countries are available in multiple languages. This is deduplicated
+
+For each combination the bot will loop over the Commons database and insert
the information into the image table
+
+
+Make a gallery of unused photos so people can add them to monument lists
+
+Usage:
+# Do everything
+python populate_image_table.py
+# Do just a specific country
+python populate_image_table.py -countrycode:xx
+
+'''
+import warnings
+import monuments_config as mconfig
+import pywikibot
+from pywikibot import config
+import MySQLdb
+
+
+def connectDatabase():
+ '''
+ Connect to the p_erfgoed_p mysql database, if it fails, go down in flames
+ '''
+ conn = MySQLdb.connect(host=mconfig.db_server, db=mconfig.db,
user=config.db_username,
+ passwd=config.db_password, use_unicode=True,
charset='utf8')
+ conn.ping(True)
+ cursor = conn.cursor()
+ return (conn, cursor)
+
+
+def connectDatabase2():
+ '''
+ Connect to the commons mysql database, if it fails, go down in flames
+ '''
+ conn = MySQLdb.connect('commonswiki.labsdb', db='commonswiki_p',
+ user=config.db_username, passwd=config.db_password,
use_unicode=True, charset='utf8')
+ conn.ping(True)
+ cursor = conn.cursor()
+ return (conn, cursor)
+
+
+def getSources(countrycode=u''):
+ '''
+ Get a dictionary of sources to go harvest
+ '''
+ sources = {}
+ for (icountrycode, lang), countryconfig in mconfig.countries.iteritems():
+ if not countrycode or (countrycode and countrycode == icountrycode):
+ if icountrycode not in sources:
+ if countryconfig.get('commonsTemplate') and
countryconfig.get('commonsTrackerCategory'):
+ sources[icountrycode] = {
+ 'commonsTemplate':
countryconfig.get('commonsTemplate'),
+ 'commonsTrackerCategory':
countryconfig.get('commonsTrackerCategory'),
+ }
+ return sources
+
+
+def processSources(sources, conn, cursor, conn2, cursor2):
+ '''
+ Loop over all sources and process them. Very small right now, will
probably exapnded later
+ '''
+ result = sources
+ for countrycode, countryconfig in sources.iteritems():
+ totalImages = processSource(
+ countrycode, countryconfig, conn, cursor, conn2, cursor2)
+ result[countrycode]['totalImages'] = totalImages
+ return result
+
+
+def processSource(countrycode, countryconfig, conn, cursor, conn2, cursor2):
+ '''
+ Work on a single source (country).
+ '''
+
+ commonsTemplate = countryconfig.get('commonsTemplate').replace(u' ', u'_')
+ commonsTrackerCategory = countryconfig.get(
+ 'commonsTrackerCategory').replace(u' ', u'_')
+
+ photos = getMonumentPhotos(commonsTrackerCategory, conn2, cursor2)
+
+ pywikibot.output(u'For country "%s" I found %s photos tagged with "{{%s}}"
in [[Category:%s]]' % (
+ countrycode, len(photos), commonsTemplate, commonsTrackerCategory))
+
+ for catSortKey, page_title in photos:
+ try:
+ monumentId = unicode(catSortKey, 'utf-8')
+ name = unicode(page_title, 'utf-8')
+ # Just want the first line
+ mLines = monumentId.splitlines()
+ monumentId = mLines[0]
+ # Remove leading and trailing spaces
+ monumentId = monumentId.strip()
+ # Remove leading zero's. FIXME: This should be replaced with
+ # underscores
+ monumentId = monumentId.lstrip(u'0')
+ # Remove leading underscors.
+ monumentId = monumentId.lstrip(u'_')
+ # All uppercase, same happens in other list
+ # monumentId = monumentId.upper()
+ updateImage(countrycode, monumentId, name, conn, cursor)
+
+ except UnicodeDecodeError:
+ pywikibot.output(
+ u'Got unicode decode error for %s' % (monumentId,))
+ # UnicodeDecodeError is a subclass of ValueError and should catch most
+ except ValueError:
+ pywikibot.output(u'Got value error for %s' % (monumentId,))
+
+ return len(photos)
+
+
+def getMonumentPhotos(commonsTrackerCategory, conn, cursor):
+ '''
+ Get all the monument photos that are in a certain tracker category at
Wikimedia Commons
+ '''
+ result = []
+
+ query = u"""SELECT cl_sortkey, page_title FROM page JOIN categorylinks ON
page_id=cl_from WHERE page_namespace=6 AND page_is_redirect=0 AND cl_to=%s"""
+
+ cursor.execute(query, (commonsTrackerCategory,))
+
+ result = cursor.fetchall()
+ '''
+ while True:
+ try:
+ row = cursor.fetchone()
+ #(image, id) = row
+ result.append(row)
+ print row
+ #result[id] = image
+ except TypeError:
+ break
+ '''
+ return result
+
+
+def updateImage(countrycode, monumentId, name, conn, cursor):
+ '''
+ Update an entry for a single image
+ '''
+ query = u"""REPLACE INTO `image` (`country`, `id`, `img_name`) VALUES (%s,
%s, %s)"""
+ with warnings.catch_warnings(record=True):
+ warnings.simplefilter("always")
+ cursor.execute(query, (countrycode, monumentId, name,))
+
+
+def makeStatistics(totals):
+ '''
+ Make statistics on the number of indexed images and put these on Commons
+ '''
+ text = u'{| class="wikitable sortable"\n'
+ text = text + \
+ u'! country !! total !! tracker template !! tracker category\n'
+ totalImages = 0
+ print totals
+ for (countrycode, countryresults) in sorted(totals.iteritems()):
+ text = text + u'|-\n'
+ text = text + u'| %s ' % countrycode
+ text = text + u'|| %s ' % countryresults.get('totalImages')
+ totalImages = totalImages + countryresults.get('totalImages')
+ text = text + u'|| {{tl|%s}}' % countryresults.get('commonsTemplate')
+ text = text + u'|| [[:Category:%s|%s]]\n' % (countryresults.get(
+ 'commonsTrackerCategory'),
countryresults.get('commonsTrackerCategory'))
+ text = text + u'|- class="sortbottom"\n'
+ text = text + u'| || %s \n' % totalImages
+ text = text + u'|}\n'
+
+ site = pywikibot.Site('commons', 'commons')
+ page = pywikibot.Page(
+ site, u'Commons:Monuments database/Indexed images/Statistics')
+
+ comment = u'Updating indexed image statistics. Total indexed images: %s' %
totalImages
+ pywikibot.output(text)
+ page.put(newtext=text, comment=comment)
+
+
+def main():
+ countrycode = u''
+ conn = None
+ cursor = None
+ # Connect database, we need that
+ (conn, cursor) = connectDatabase()
+ (conn2, cursor2) = connectDatabase2()
+
+ for arg in pywikibot.handleArgs():
+ option, sep, value = arg.partition(':')
+ if option == '-countrycode:':
+ countrycode = value
+
+ if countrycode:
+ pywikibot.output(u'Working on countrycode "%s"' % (countrycode,))
+ sources = getSources(countrycode=countrycode)
+ if not sources:
+ pywikibot.output(
+ u'I have no config for countrycode "%s"' % (countrycode,))
+ return False
+ else:
+ totals = processSources(sources, conn, cursor, conn2, cursor2)
+
+ else:
+ pywikibot.output(u'Working on all countrycodes')
+ sources = getSources()
+ if not sources:
+ pywikibot.output(
+ u'No sources found, something went completely wrong')
+ return False
+ else:
+ pywikibot.output(
+ u'Found %s countries with monument tracker templates to work
on' % (len(sources),))
+ totals = processSources(sources, conn, cursor, conn2, cursor2)
+
+ makeStatistics(totals)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/erfgoedbot/top_streets.py b/erfgoedbot/top_streets.py
index f78ecc3..27ea5b9 100644
--- a/erfgoedbot/top_streets.py
+++ b/erfgoedbot/top_streets.py
@@ -1,116 +1,116 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-'''
-Make a list of top streets for a municipality. Bot expects two things on the
commandline:
-* -countrycode : The country code (as it is in the database)
-* -municipality : The name of the municipality (as it is in the database)
-* -minimum : (optional) The minimum of hits before we show the item
-'''
-import monuments_config as mconfig
-import pywikibot
-import config
-import MySQLdb
-from collections import Counter
-
-def connectDatabase():
- '''
- Connect to the monuments mysql database, if it fails, go down in flames.
- This database is utf-8 encoded.
- '''
- conn = MySQLdb.connect(host=mconfig.db_server, db=mconfig.db, user =
config.db_username, passwd = config.db_password, use_unicode=True,
charset='utf8')
- cursor = conn.cursor()
- return (conn, cursor)
-
-def getAddresses(countrycode, lang, municipality, conn, cursor):
- '''
- Get a list of addresses of a municipality in a country in a certain
language
- '''
- result = []
- query = u"""SELECT address FROM monuments_all WHERE country=%s AND lang=%s
AND municipality=%s ORDER BY address ASC""";
- cursor.execute(query, (countrycode, lang, municipality))
-
- while True:
- try:
- row = cursor.fetchone()
- (address,) = row
- result.append(address)
- except TypeError:
- break
-
- return result
-
-def printTopStreets (addresses, minimum):
- '''
- Print the top streets with a minimum number of hits
- '''
- streets = Counter() #collections.Counter
- for address in addresses:
- address = address.replace(u'{{sorteer|', u'')
- temp = u''
- partslist = []
- for addrPart in address.split(u' '):
- temp = temp + u' ' + addrPart
- partslist.append(temp.strip())
-
- streets.update(partslist)
-
- topStreets = []
-
- for street in streets.most_common():
- if street[1] < minimum:
- break
- topStreets.append(street[0])
-
- filteredStreets = []
-
- for topStreet1 in topStreets:
- for topStreet2 in topStreets:
- if topStreet1 != topStreet2 and topStreet2.startswith(topStreet1):
- filteredStreets.append(topStreet1)
- break
-
- pywikibot.output(u'Filtered out the following. These are probably street
parts:')
- for street in streets.most_common():
- if street[1] < minimum:
- break
- if street[0] in filteredStreets:
- pywikibot.output(u'* %s - %s' % street)
-
- pywikibot.output(u'Found the following entries which are probably real
streets:')
- for street in streets.most_common():
- if street[1] < minimum:
- break
- if not street[0] in filteredStreets:
- pywikibot.output(u'* %s - %s' % street)
-
-
-def main():
- countrycode = u''
- lang = u''
- municipality = u''
- minimum = 15
- conn = None
- cursor = None
- # Connect database, we need that
- (conn, cursor) = connectDatabase()
-
- for arg in pywikibot.handleArgs():
- if arg.startswith('-countrycode:'):
- countrycode = arg [len('-countrycode:'):]
- elif arg.startswith('-municipality:'):
- municipality = arg [len('-municipality:'):]
- elif arg.startswith('-minimum:'):
- minimum = int(arg [len('-minimum:'):])
-
- if countrycode and municipality:
- lang = pywikibot.getSite().language()
- addresses = getAddresses(countrycode, lang, municipality, conn, cursor)
- printTopStreets (addresses, minimum)
- else:
- print u'Usage'
-
-if __name__ == "__main__":
- try:
- main()
- finally:
- pywikibot.stopme()
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+'''
+Make a list of top streets for a municipality. Bot expects two things on the
commandline:
+* -countrycode : The country code (as it is in the database)
+* -municipality : The name of the municipality (as it is in the database)
+* -minimum : (optional) The minimum of hits before we show the item
+'''
+import monuments_config as mconfig
+import pywikibot
+import config
+import MySQLdb
+from collections import Counter
+
+def connectDatabase():
+ '''
+ Connect to the monuments mysql database, if it fails, go down in flames.
+ This database is utf-8 encoded.
+ '''
+ conn = MySQLdb.connect(host=mconfig.db_server, db=mconfig.db, user =
config.db_username, passwd = config.db_password, use_unicode=True,
charset='utf8')
+ cursor = conn.cursor()
+ return (conn, cursor)
+
+def getAddresses(countrycode, lang, municipality, conn, cursor):
+ '''
+ Get a list of addresses of a municipality in a country in a certain
language
+ '''
+ result = []
+ query = u"""SELECT address FROM monuments_all WHERE country=%s AND lang=%s
AND municipality=%s ORDER BY address ASC""";
+ cursor.execute(query, (countrycode, lang, municipality))
+
+ while True:
+ try:
+ row = cursor.fetchone()
+ (address,) = row
+ result.append(address)
+ except TypeError:
+ break
+
+ return result
+
+def printTopStreets (addresses, minimum):
+ '''
+ Print the top streets with a minimum number of hits
+ '''
+ streets = Counter() #collections.Counter
+ for address in addresses:
+ address = address.replace(u'{{sorteer|', u'')
+ temp = u''
+ partslist = []
+ for addrPart in address.split(u' '):
+ temp = temp + u' ' + addrPart
+ partslist.append(temp.strip())
+
+ streets.update(partslist)
+
+ topStreets = []
+
+ for street in streets.most_common():
+ if street[1] < minimum:
+ break
+ topStreets.append(street[0])
+
+ filteredStreets = []
+
+ for topStreet1 in topStreets:
+ for topStreet2 in topStreets:
+ if topStreet1 != topStreet2 and topStreet2.startswith(topStreet1):
+ filteredStreets.append(topStreet1)
+ break
+
+ pywikibot.output(u'Filtered out the following. These are probably street
parts:')
+ for street in streets.most_common():
+ if street[1] < minimum:
+ break
+ if street[0] in filteredStreets:
+ pywikibot.output(u'* %s - %s' % street)
+
+ pywikibot.output(u'Found the following entries which are probably real
streets:')
+ for street in streets.most_common():
+ if street[1] < minimum:
+ break
+ if not street[0] in filteredStreets:
+ pywikibot.output(u'* %s - %s' % street)
+
+
+def main():
+ countrycode = u''
+ lang = u''
+ municipality = u''
+ minimum = 15
+ conn = None
+ cursor = None
+ # Connect database, we need that
+ (conn, cursor) = connectDatabase()
+
+ for arg in pywikibot.handleArgs():
+ if arg.startswith('-countrycode:'):
+ countrycode = arg [len('-countrycode:'):]
+ elif arg.startswith('-municipality:'):
+ municipality = arg [len('-municipality:'):]
+ elif arg.startswith('-minimum:'):
+ minimum = int(arg [len('-minimum:'):])
+
+ if countrycode and municipality:
+ lang = pywikibot.getSite().language()
+ addresses = getAddresses(countrycode, lang, municipality, conn, cursor)
+ printTopStreets (addresses, minimum)
+ else:
+ print u'Usage'
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ pywikibot.stopme()
--
To view, visit https://gerrit.wikimedia.org/r/280969
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I92c81101f146652fc29f39b4ea2cadf988112207
Gerrit-PatchSet: 2
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil <[email protected]>
Gerrit-Reviewer: Jean-Frédéric <[email protected]>
Gerrit-Reviewer: Multichill <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits