Dachary has uploaded a new change for review. https://gerrit.wikimedia.org/r/312706
Change subject: complete rework for simplification ...................................................................... complete rework for simplification Having subcommands for qa or repository is not a useful separation since both should be done on each item. Instead have only one set of options with qa and repositories as plugins in charge of checking a given aspect of the item. The items on which the plugins are applied are selected by the main function instead of being delegated to qa or repository. The plugins are only responsible for providing SPARQL queries that can be used to focus on a specific subset of items instead of working on all of them all the time. Change-Id: Id99402a1b3e61eb969a5202cae0587a1cd5775f6 Signed-off-by: Loic Dachary <[email protected]> --- M FLOSSbot/bot.py M FLOSSbot/main.py A FLOSSbot/plugin.py M FLOSSbot/qa.py M FLOSSbot/repository.py M requirements.txt M tests/test_bot.py D tests/test_main.py A tests/test_plugin.py M tests/test_qa.py M tests/test_repository.py M tests/wikidata.py 12 files changed, 586 insertions(+), 631 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/bots/FLOSSbot refs/changes/06/312706/1 diff --git a/FLOSSbot/bot.py b/FLOSSbot/bot.py index a0690e7..bea1cef 100644 --- a/FLOSSbot/bot.py +++ b/FLOSSbot/bot.py @@ -16,17 +16,31 @@ # import argparse import logging -from datetime import datetime, timedelta +import textwrap +import time import pywikibot +from pywikibot import pagegenerators as pg +from FLOSSbot import qa, repository, util +from FLOSSbot.plugin import Plugin + +logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s') log = logging.getLogger(__name__) + +plugins = [ + repository.Repository, + qa.QA, +] + +name2plugin = dict([(p.__name__, p) for p in plugins]) class Bot(object): def __init__(self, args): self.args = args + logging.getLogger('FLOSSbot').setLevel(self.args.verbose) self.site = pywikibot.Site( code="wikidata" if not self.args.test else "test", fam="wikidata", @@ -36,11 +50,30 @@ if self.args.test: self.wikidata_site = pywikibot.Site(code="wikidata", fam="wikidata") - self.reset_cache() + else: + self.wikidata_site = None + self.plugins = [] + for name in self.args.plugin or name2plugin.keys(): + plugin = name2plugin[name] + self.plugins.append(plugin(self, args)) @staticmethod def get_parser(): + filters = [] + available_plugins = [] + for plugin in plugins: + filters.extend(plugin.filter_names()) + available_plugins.append(plugin.__name__) parser = argparse.ArgumentParser(add_help=False) + parser.add_argument( + '-v', '--verbose', + action='store_const', + const=logging.DEBUG, + default=logging.INFO) + parser.add_argument( + '--dry-run', + action='store_true', default=None, + help='no side effect') parser.add_argument( '--test', action='store_true', default=None, @@ -50,182 +83,64 @@ default=None, help='wikidata user name') parser.add_argument( - '--verification-delay', - type=int, - default=30, - help='days to wait before verifying a claim again') + '--plugin', + default=[], + choices=available_plugins, + action='append', + help='use this plugin instead of all of them (can be repeated)') + select = parser.add_mutually_exclusive_group() + select.add_argument( + '--filter', + default='', + choices=filters, + help='filter with a pre-defined query', + ) + select.add_argument( + '--item', + default=[], + action='append', + help='work on this QID (can be repeated)') return parser @staticmethod - def factory(cls, argv): + def factory(argv): + parents = [ + Bot.get_parser(), + Plugin.get_parser(), + ] + for plugin in plugins: + parents.append(plugin.get_parser()) parser = argparse.ArgumentParser( - parents=[Bot.get_parser()], - add_help=False, - conflict_handler='resolve') - cls.set_subparser(parser.add_subparsers()) - return cls(parser.parse_args(argv)) + formatter_class=util.CustomFormatter, + description=textwrap.dedent("""\ + A command-line toolbox for the wikidata FLOSS project. + """), + parents=parents) + return Bot(parser.parse_args(argv)) - def debug(self, item, message): - self.log(log.debug, item, message) - - def info(self, item, message): - self.log(log.info, item, message) - - def error(self, item, message): - self.log(log.error, item, message) - - def log(self, fun, item, message): - fun("http://wikidata.org/wiki/" + item.getID() + " " + message) - - def reset_cache(self): - self.entities = { - 'property': {}, - 'item': {}, - } - - def lookup_entity(self, name, **kwargs): - type = kwargs['type'] - found = self.entities[type].get(name) - if found: - return found - found = self.search_entity(self.site, name, **kwargs) - if found: - if type == 'property': - found = found['id'] - self.entities[type][name] = found - return found - - # - # Hardcode the desired wikidata item when there are - # multiple items with the same english label and no - # trivial way to disambiguate them. - # - authoritative = { - 'wikidata': { - 'git': 'Q186055', - 'Fossil': 'Q1439431', - }, - 'test': { - }, - } - - def search_entity(self, site, name, **kwargs): - if name in Bot.authoritative[site.code]: - candidate = pywikibot.ItemPage( - site, Bot.authoritative[site.code][name], 0) - if candidate.get()['labels']['en'] == name: - return candidate - candidates = [] - for p in site.search_entities(name, 'en', **kwargs): - log.debug("looking for entity " + name + ", found " + str(p)) - if p.get('label') == name: - if kwargs['type'] == 'property': - candidates.append(p) - else: - candidates.append(pywikibot.ItemPage(site, p['id'], 0)) - if len(candidates) == 0: - return None - elif len(candidates) > 1 and kwargs['type'] == 'item': - found = [] - for candidate in candidates: - item = candidate.get() - ok = True - for instance_of in item['claims'].get(self.P_instance_of, []): - if (instance_of.getTarget() == - self.Q_Wikimedia_disambiguation_page): - log.debug("ignore disambiguation page " + - candidate.getID() + " for " + name) - ok = False - break - if ok: - found.append(candidate) - if len(found) != 1: - raise ValueError("found multiple items for " + name + - " " + str(found)) - return found[0] + def run(self): + if len(self.args.item) > 0: + self.run_items() else: - return candidates[0] + self.run_query() - lookup_item = lookup_entity + def run_items(self): + for item in self.args.item: + item = pywikibot.ItemPage(self.site, item, 0) + for plugin in self.plugins: + plugin.run(item) - def lookup_property(self, name): - return self.lookup_entity(self.site, name, type='property') - - def create_entity(self, type, name): - found = self.search_entity(self.wikidata_site, name, type=type) - entity = { - "labels": { - "en": { - "language": "en", - "value": name, - } - }, - } - if type == 'property': - assert found, type + " " + name + " must exist in wikidata" - id = found['id'] - found = self.wikidata_site.loadcontent({'ids': id}, 'datatype') - assert found, "datatype of " + id + " " + name + " is not found" - entity['datatype'] = found[id]['datatype'] - log.debug("create " + type + " " + str(entity)) - self.site.editEntity({'new': type}, entity) - - def clear_entity_label(self, id): - data = { - "labels": { - "en": { - "language": "en", - "value": "", - } - } - } - log.debug("clear " + id + " label") - self.site.editEntity({'id': id}, data) - self.reset_cache() - - def __getattribute__(self, name): - if name.startswith('P_'): - type = 'property' - elif name.startswith('Q_'): - type = 'item' - else: - return super(Bot, self).__getattribute__(name) - label = " ".join(name.split('_')[1:]) - found = self.lookup_entity(label, type=type) - if not found and self.args.test: - self.create_entity(type, label) - for i in range(120): - found = self.lookup_entity(label, type=type) - if found is not None: - break - return found - - def need_verification(self, claim): - now = datetime.utcnow() - if self.P_point_in_time in claim.qualifiers: - previous = claim.qualifiers[self.P_point_in_time][0] - previous = previous.getTarget() - previous = datetime(year=previous.year, - month=previous.month, - day=previous.day) - return (now - previous >= - timedelta(days=self.args.verification_delay)) - else: - return True - - def set_point_in_time(self, item, claim, now=datetime.utcnow()): - when = pywikibot.WbTime(now.year, now.month, now.day) - if self.P_point_in_time in claim.qualifiers: - self.debug(item, "updating point-in-time") - point_in_time = claim.qualifiers[self.P_point_in_time][0] - point_in_time.setTarget(when) - if not self.args.dry_run: - self.site.save_claim(claim) - else: - self.debug(item, "setting point-in-time") - point_in_time = pywikibot.Claim(self.site, - self.P_point_in_time, - isQualifier=True) - point_in_time.setTarget(when) - if not self.args.dry_run: - claim.addQualifier(point_in_time, bot=True) + def run_query(self): + for plugin in self.plugins: + query = plugin.get_query(self.args.filter) + if query is not None: + break + if query is None: + query = Plugin(self, self.args).get_query(self.args.filter) + query = query + " # " + str(time.time()) + log.debug('running query ' + query) + for item in pg.WikidataSPARQLPageGenerator(query, + site=self.site, + result_type=list): + for plugin in self.plugins: + plugin.run(item) diff --git a/FLOSSbot/main.py b/FLOSSbot/main.py index a0a9862..f0a59b9 100644 --- a/FLOSSbot/main.py +++ b/FLOSSbot/main.py @@ -14,52 +14,10 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # -import argparse -import logging -import textwrap - -from FLOSSbot import bot, qa, repository, util - -logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s') +from FLOSSbot import bot class FLOSSbot(object): - def __init__(self): - self.parser = argparse.ArgumentParser( - formatter_class=util.CustomFormatter, - description=textwrap.dedent("""\ - A command-line toolbox for the wikidata FLOSS project. - - The documentation for each subcommand can be displayed with - - FLOSSbot subcommand --help - """), - parents=[bot.Bot.get_parser()]) - - self.parser.add_argument( - '-v', '--verbose', - action='store_const', - const=logging.DEBUG, - default=logging.INFO) - - self.parser.add_argument( - '--dry-run', - action='store_true', default=None, - help='no side effect') - - subparsers = self.parser.add_subparsers( - title='subcommands', - description='valid subcommands', - help='sub-command -h', - ) - - qa.QA.set_subparser(subparsers) - repository.Repository.set_subparser(subparsers) - def run(self, argv): - self.args = self.parser.parse_args(argv) - - logging.getLogger('FLOSSbot').setLevel(self.args.verbose) - - return self.args.func(self.args).run() + return bot.Bot.factory(argv).run() diff --git a/FLOSSbot/plugin.py b/FLOSSbot/plugin.py new file mode 100644 index 0000000..a433e65 --- /dev/null +++ b/FLOSSbot/plugin.py @@ -0,0 +1,214 @@ +# +# Copyright (C) 2016 Loic Dachary <[email protected]> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +import argparse +import logging +from datetime import datetime, timedelta + +import pywikibot + +log = logging.getLogger(__name__) + + +class Plugin(object): + + def __init__(self, bot, args): + self.args = args + self.bot = bot + self.reset_cache() + + @staticmethod + def get_parser(): + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument( + '--verification-delay', + type=int, + default=30, + help='days to wait before verifying a claim again') + return parser + + def get_query(self, filter): + query = """ + SELECT DISTINCT ?item WHERE {{ + ?item wdt:{source_code_repository} ?url. + }} ORDER BY ?item + """.format(source_code_repository=self.P_source_code_repository) + return query + + def debug(self, item, message): + self.log(log.debug, item, message) + + def info(self, item, message): + self.log(log.info, item, message) + + def error(self, item, message): + self.log(log.error, item, message) + + def log(self, fun, item, message): + fun("http://wikidata.org/wiki/" + item.getID() + " " + message) + + def reset_cache(self): + self.bot.entities = { + 'property': {}, + 'item': {}, + } + + def lookup_entity(self, name, **kwargs): + type = kwargs['type'] + found = self.bot.entities[type].get(name) + if found: + return found + found = self.search_entity(self.bot.site, name, **kwargs) + if found: + if type == 'property': + found = found['id'] + self.bot.entities[type][name] = found + return found + + # + # Hardcode the desired wikidata item when there are + # multiple items with the same english label and no + # trivial way to disambiguate them. + # + authoritative = { + 'wikidata': { + 'git': 'Q186055', + 'Fossil': 'Q1439431', + }, + 'test': { + }, + } + + def search_entity(self, site, name, **kwargs): + if name in Plugin.authoritative[site.code]: + candidate = pywikibot.ItemPage( + site, Plugin.authoritative[site.code][name], 0) + if candidate.get()['labels']['en'] == name: + return candidate + candidates = [] + for p in site.search_entities(name, 'en', **kwargs): + log.debug("looking for entity " + name + ", found " + str(p)) + if p.get('label') == name: + if kwargs['type'] == 'property': + candidates.append(p) + else: + candidates.append(pywikibot.ItemPage(site, p['id'], 0)) + if len(candidates) == 0: + return None + elif len(candidates) > 1 and kwargs['type'] == 'item': + found = [] + for candidate in candidates: + item = candidate.get() + ok = True + for instance_of in item['claims'].get(self.P_instance_of, []): + if (instance_of.getTarget() == + self.Q_Wikimedia_disambiguation_page): + log.debug("ignore disambiguation page " + + candidate.getID() + " for " + name) + ok = False + break + if ok: + found.append(candidate) + if len(found) != 1: + raise ValueError("found multiple items for " + name + + " " + str(found)) + return found[0] + else: + return candidates[0] + + lookup_item = lookup_entity + + def lookup_property(self, name): + return self.lookup_entity(self.bot.site, name, type='property') + + def create_entity(self, type, name): + found = self.search_entity(self.bot.wikidata_site, name, type=type) + entity = { + "labels": { + "en": { + "language": "en", + "value": name, + } + }, + } + if type == 'property': + assert found, type + " " + name + " must exist in wikidata" + id = found['id'] + found = self.bot.wikidata_site.loadcontent({'ids': id}, 'datatype') + assert found, "datatype of " + id + " " + name + " is not found" + entity['datatype'] = found[id]['datatype'] + log.debug("create " + type + " " + str(entity)) + self.bot.site.editEntity({'new': type}, entity) + + def clear_entity_label(self, id): + data = { + "labels": { + "en": { + "language": "en", + "value": "", + } + } + } + log.debug("clear " + id + " label") + self.bot.site.editEntity({'id': id}, data) + self.reset_cache() + + def __getattribute__(self, name): + if name.startswith('P_'): + type = 'property' + elif name.startswith('Q_'): + type = 'item' + else: + return super(Plugin, self).__getattribute__(name) + label = " ".join(name.split('_')[1:]) + found = self.lookup_entity(label, type=type) + if not found and self.args.test: + self.create_entity(type, label) + for i in range(120): + found = self.lookup_entity(label, type=type) + if found is not None: + break + return found + + def need_verification(self, claim): + now = datetime.utcnow() + if self.P_point_in_time in claim.qualifiers: + previous = claim.qualifiers[self.P_point_in_time][0] + previous = previous.getTarget() + previous = datetime(year=previous.year, + month=previous.month, + day=previous.day) + return (now - previous >= + timedelta(days=self.args.verification_delay)) + else: + return True + + def set_point_in_time(self, item, claim, now=datetime.utcnow()): + when = pywikibot.WbTime(now.year, now.month, now.day) + if self.P_point_in_time in claim.qualifiers: + self.debug(item, "updating point-in-time") + point_in_time = claim.qualifiers[self.P_point_in_time][0] + point_in_time.setTarget(when) + if not self.args.dry_run: + self.bot.site.save_claim(claim) + else: + self.debug(item, "setting point-in-time") + point_in_time = pywikibot.Claim(self.bot.site, + self.P_point_in_time, + isQualifier=True) + point_in_time.setTarget(when) + if not self.args.dry_run: + claim.addQualifier(point_in_time, bot=True) diff --git a/FLOSSbot/qa.py b/FLOSSbot/qa.py index 9153307..0ccf758 100644 --- a/FLOSSbot/qa.py +++ b/FLOSSbot/qa.py @@ -17,77 +17,35 @@ import argparse import logging import os -import textwrap from urllib.parse import urlparse import pywikibot import requests -from pywikibot import pagegenerators as pg -from FLOSSbot import bot, util +from FLOSSbot import plugin log = logging.getLogger(__name__) -class QA(bot.Bot): +class QA(plugin.Plugin): @staticmethod def get_parser(): parser = argparse.ArgumentParser(add_help=False) - select = parser.add_mutually_exclusive_group() - select.add_argument( - '--filter', - default='', - choices=['verify'], - help='filter with a pre-defined query', - ) - select.add_argument( - '--item', - default=[], - action='append', - help='work on this QID (can be repeated)') return parser @staticmethod - def set_subparser(subparsers): - subparsers.add_parser( - 'qa', - formatter_class=util.CustomFormatter, - description=textwrap.dedent("""\ - Set the software quality assurance statement - """), - help='Set the software quality assurance statement', - parents=[QA.get_parser()], - add_help=False, - conflict_handler='resolve', - ).set_defaults( - func=QA, - ) + def filter_names(): + return ['qa-verify'] - @staticmethod - def factory(argv): - return bot.Bot.factory(QA, argv) - - def run(self): - if len(self.args.item) > 0: - self.run_items() - else: - self.run_query() - - def run_items(self): - for item in self.args.item: - item = pywikibot.ItemPage(self.site, item, 0) - self.fixup(item) - self.verify(item) - - def run_query(self): + def get_query(self, filter): format_args = { 'repository': self.P_source_code_repository, 'qa': self.P_software_quality_assurance, 'point_in_time': self.P_point_in_time, 'delay': self.args.verification_delay, } - if self.args.filter == 'verify': + if filter == 'qa-verify': query = """ SELECT DISTINCT ?item WHERE {{ ?item p:{qa} ?qa . @@ -97,18 +55,12 @@ }} ORDER BY ?item """.format(**format_args) else: - query = """ - SELECT DISTINCT ?item WHERE {{ - ?item p:{repository} ?repository. - FILTER NOT EXISTS {{ ?item p:{qa} ?qa }} - }} ORDER BY ?item - """.format(**format_args) - log.debug(query) - for item in pg.WikidataSPARQLPageGenerator(query, - site=self.site, - result_type=list): - self.fixup(item) - self.verify(item) + query = None + return query + + def run(self, item): + self.fixup(item) + self.verify(item) def verify(self, item): item_dict = item.get() @@ -136,7 +88,7 @@ continue ok = True for (qualifier, target) in found.items(): - name = pywikibot.PropertyPage(self.site, qualifier) + name = pywikibot.PropertyPage(self.bot.site, qualifier) name.get() name = name.labels['en'] if qualifier not in qa.qualifiers: @@ -217,12 +169,12 @@ return software_quality_assurance = pywikibot.Claim( - self.site, self.P_software_quality_assurance, 0) + self.bot.site, self.P_software_quality_assurance, 0) software_quality_assurance.setTarget(self.Q_Continuous_integration) item.addClaim(software_quality_assurance) for (qualifier, target) in found.items(): - claim = pywikibot.Claim(self.site, qualifier, 0) + claim = pywikibot.Claim(self.bot.site, qualifier, 0) claim.setTarget(target) software_quality_assurance.addQualifier(claim, bot=True) diff --git a/FLOSSbot/repository.py b/FLOSSbot/repository.py index bb5d9e9..487ab64 100644 --- a/FLOSSbot/repository.py +++ b/FLOSSbot/repository.py @@ -17,158 +17,31 @@ import argparse import logging import re -import textwrap -import time import pywikibot import requests -from pywikibot import pagegenerators as pg -from FLOSSbot import bot, util +from FLOSSbot import plugin, util log = logging.getLogger(__name__) -FLOSS_doc = ("https://www.wikidata.org/wiki/Wikidata:" - "WikiProject_Informatics/FLOSS#source_code_repository") - -class Repository(bot.Bot): - - cache = None +class Repository(plugin.Plugin): @staticmethod def get_parser(): parser = argparse.ArgumentParser(add_help=False) - select = parser.add_mutually_exclusive_group() - select.add_argument( - '--filter', - default='', - choices=['no-protocol', 'no-preferred'], - help='filter with a pre-defined query', - ) - select.add_argument( - '--item', - default=[], - action='append', - help='work on this QID (can be repeated)') return parser @staticmethod - def set_subparser(subparsers): - subparsers.add_parser( - 'repository', - formatter_class=util.CustomFormatter, - description=textwrap.dedent("""\ - Verify and fix the source code repository claim. + def filter_names(): + return [ + 'repository-no-protocol', + 'repository-no-preferred', + ] - The scope of the verifications and the associated - modifications is explained below. By default all - items that have at least one source code repository - claim are considered. It can be restricted with - the --filter or --item options. - - A) Protocol - - The source code repository responds to a protocol that - depends on the VCS. If the protocol qualifier is missing, - try a range of VCS to figure out which protocol it - implements and set the protocol qualifier accordingly. - - For web sites that host many respositories (such as github - or sourceforge), additional heuristics are implemented to - figure out the URL of the repository or the protocol. For - instance, since github only hosts git repositories, the - protocol is always assumed to be git. For sourceforce, - the URL of the web interface to the repository is fetched - to get the instructions and figure out if it is subversion, - mercurial or git. - - When everything fails and the protocol cannot be established - with absolute certainty, an error is displayed and an editor - should fix the item. - - --filter no-protocol - select only the items for which there exists - at least one claim with no protocol qualifier - - B) Preferred rank - - When there are multiple source code repository URLs - one of them must have the preferred rank. The aim - is to display it in an infobox therefore the URL - with the http protocol should be preferred over another - requiring a VCS software. - - --filter no-preferred - select only the items for which there exists - at more than one claim with no preferred rank - - [1] {doc} - """.format(doc=FLOSS_doc)), - epilog=textwrap.dedent(""" - Examples: - - $ FLOSSbot --verbose repository - - INFO WORKING ON https://www.wikidata.org/wiki/Q403539 - INFO IGNORE \ -https://code.wireshark.org/review/gitweb?p=wireshark.git \ -because it already has a protocol - DEBUG trying all known protocols on \ -https://code.wireshark.org/review/p/wireshark.git - DEBUG :sh: timeout 30 git ls-remote \ -https://code.wireshark.org/review/p/wireshark.git HEAD - DEBUG b'e8f1d2abda939f37d99f272f8a76a191c9a752b4\tHEAD' - - INFO WORKING ON https://www.wikidata.org/wiki/Q4035967 - DEBUG trying all known protocols on \ -http://git.ceph.com/?p=ceph.git;a=summary - DEBUG :sh: timeout 30 git ls-remote \ -http://git.ceph.com/?p=ceph.git;a=summary HEAD - DEBUG b"fatal: repository \ -'http://git.ceph.com/?p=ceph.git/' not found" - DEBUG b'/bin/sh: 1: HEAD: not found' - ... - ERROR SKIP http://git.ceph.com/?p=ceph.git;a=summary - - The first item (https://www.wikidata.org/wiki/Q403539) has - two source code repository. The first one already has a - protocol qualifier and is left untouched. An attempt is - made to retrieve it with the git command line and - succeeds. The protocol qualifier is set to git. - - The second item (WORKING ON https://www.wikidata.org/wiki/Q4035967) - has a source code repository URL which is a gitweb interface to a - git repository. It is not useable wiht any protocol, including git, - and the program fails with an error so the editor can manually - edit the item. - """), - help='Set protocol of the source code repository', - parents=[Repository.get_parser()], - add_help=False, - conflict_handler='resolve', - ).set_defaults( - func=Repository, - ) - - @staticmethod - def factory(argv): - return bot.Bot.factory(Repository, argv) - - def run(self): - if len(self.args.item) > 0: - self.run_items() - else: - self.run_query() - - def run_items(self): - for item in self.args.item: - item = pywikibot.ItemPage(self.site, item, 0) - self.fixup(item) - self.verify(item) - - def run_query(self): - if self.args.filter == 'no-protocol': + def get_query(self, filter): + if filter == 'repository-no-protocol': query = """ SELECT DISTINCT ?item WHERE {{ ?item p:{source_code_repository} ?repo. @@ -178,7 +51,7 @@ }} ORDER BY ?item """.format(source_code_repository=self.P_source_code_repository, protocol=self.P_protocol) - elif self.args.filter == 'no-preferred': + elif filter == 'repository-no-preferred': query = """ SELECT ?item (COUNT(?value) AS ?count) WHERE {{ @@ -193,18 +66,12 @@ ORDER BY ?item """.format(source_code_repository=self.P_source_code_repository) else: - query = """ - SELECT DISTINCT ?item WHERE {{ - ?item wdt:{source_code_repository} ?url. - }} ORDER BY ?item - """.format(source_code_repository=self.P_source_code_repository) - query = query + " # " + str(time.time()) - log.debug(query) - for item in pg.WikidataSPARQLPageGenerator(query, - site=self.site, - result_type=list): - self.fixup(item) - self.verify(item) + query = None + return query + + def run(self, item): + self.fixup(item) + self.verify(item) def verify(self, item): item_dict = item.get() diff --git a/requirements.txt b/requirements.txt index 1923f79..50635b7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,7 @@ pycodestyle==2.0.0 pyflakes==1.2.3 pytest==3.0.2 +pytest-capturelog requests==2.11.1 six==1.10.0 testfixtures==4.10.1 diff --git a/tests/test_bot.py b/tests/test_bot.py index f600df3..b3e2f5e 100644 --- a/tests/test_bot.py +++ b/tests/test_bot.py @@ -15,11 +15,10 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # -import argparse -from datetime import date +import logging -import pytest -import pywikibot +import mock +import pytest # noqa # caplog from FLOSSbot.bot import Bot from tests.wikidata import WikidataHelper @@ -30,117 +29,76 @@ def setup_class(cls): WikidataHelper().login() - def test_lookup_item(self): - bot = Bot(argparse.Namespace( - test=True, - user='FLOSSbotCI', - )) - assert 0 == len(bot.entities['item']) - git = bot.Q_git - assert 1 == len(bot.entities['item']) - assert git == bot.Q_git - assert bot.Q_Concurrent_Versions_System - assert 2 == len(bot.entities['item']) + def test_factory(self): + Bot.factory(['--verbose']) + assert (logging.getLogger('FLOSSbot').getEffectiveLevel() == + logging.DEBUG) - def test_create_entity(self): - bot = Bot(argparse.Namespace( - test=True, - user='FLOSSbotCI', - )) - item = bot.Q_git - assert 1 == len(bot.entities['item']) - bot.clear_entity_label(item.getID()) - assert 0 == len(bot.entities['item']) - item = bot.Q_git - assert 1 == len(bot.entities['item']) + b = Bot.factory([]) + assert (logging.getLogger('FLOSSbot').getEffectiveLevel() == + logging.INFO) - property2datatype = { - 'P_source_code_repository': 'url', - 'P_website_username': 'string', - 'P_protocol': 'wikibase-item', - } + assert len(b.plugins) > 0 - wikidata_bot = Bot(argparse.Namespace( - test=False, - user=None, - )) - for (attr, datatype) in property2datatype.items(): - bot.reset_cache() - property = bot.__getattribute__(attr) - assert 1 == len(bot.entities['property']) - bot.clear_entity_label(property) - assert 0 == len(bot.entities['property']) - for i in range(120): - if (bot.lookup_entity( - attr, type='property') is None): - break - property = bot.__getattribute__(attr) - assert 1 == len(bot.entities['property']) + plugin = 'QA' + b = Bot.factory(['--verbose', '--plugin=' + plugin]) + assert 1 == len(b.plugins) + assert plugin == b.plugins[0].__class__.__name__ - new_content = bot.site.loadcontent({'ids': property}, 'datatype') - wikidata_property = wikidata_bot.__getattribute__(attr) - wikidata_content = wikidata_bot.site.loadcontent( - {'ids': wikidata_property}, 'datatype') - assert (wikidata_content[wikidata_property]['datatype'] == - new_content[property]['datatype']), attr - assert (datatype == - wikidata_content[wikidata_property]['datatype']), attr + b = Bot.factory([ + '--verbose', + '--plugin=QA', + '--plugin=Repository', + ]) + assert 2 == len(b.plugins) - def test_set_point_in_time(self): - bot = Bot(argparse.Namespace( - test=True, - user='FLOSSbotCI', - dry_run=False, - verification_delay=30, - )) - item = bot.__getattribute__('Q_' + WikidataHelper.random_name()) - claim = pywikibot.Claim(bot.site, - bot.P_source_code_repository, - 0) - claim.setTarget("http://repo.com/some") - item.addClaim(claim) - bot.set_point_in_time(item, claim) - assert bot.need_verification(claim) is False - bot.set_point_in_time(item, claim, date(1965, 11, 2)) - assert bot.need_verification(claim) is True - bot.clear_entity_label(item.getID()) + @mock.patch.object(Bot, 'run_items') + @mock.patch.object(Bot, 'run_query') + def test_run(self, m_query, m_items): + b = Bot.factory([]) + b.run() + m_query.assert_called_with() + m_items.assert_not_called() - def test_search_entity(self): - bot = Bot(argparse.Namespace( - test=True, - user='FLOSSbotCI', - )) - name = WikidataHelper.random_name() - entity = { - "labels": { - "en": { - "language": "en", - "value": name, - } - }, - } - first = bot.site.editEntity({'new': 'item'}, entity) - first = pywikibot.ItemPage(bot.site, first['entity']['id'], 0) - second = bot.site.editEntity({'new': 'item'}, entity) - second = pywikibot.ItemPage(bot.site, second['entity']['id'], 0) + m_query.reset_mock() + m_items.reset_mock() + b = Bot.factory(['--verbose', '--item=Q1']) + b.run() + m_items.assert_called_with() + m_query.assert_not_called() - with pytest.raises(ValueError) as e: - bot.search_entity(bot.site, name, type='item') - assert "found multiple items" in str(e.value) + @mock.patch('FLOSSbot.qa.QA.run') + def test_run_items(self, m_run): + b = Bot.factory([ + '--verbose', + '--item=Q1', + '--plugin=QA', + ]) + b.run() + m_run.assert_called_with(mock.ANY) - claim = pywikibot.Claim(bot.site, bot.P_instance_of, 0) - claim.setTarget(bot.Q_Wikimedia_disambiguation_page) - first.addClaim(claim) + @mock.patch('FLOSSbot.qa.QA.run') + @mock.patch('pywikibot.pagegenerators.WikidataSPARQLPageGenerator') + def test_run_query_default(self, m_query, m_run): + b = Bot.factory([ + '--verbose', + '--plugin=QA', + ]) + m_query.side_effect = 'one page' + b.run() + m_run.assert_called_with(mock.ANY) - found = bot.search_entity(bot.site, name, type='item') - assert found.getID() == second.getID() + @mock.patch('FLOSSbot.qa.QA.run') + @mock.patch('pywikibot.pagegenerators.WikidataSPARQLPageGenerator') + def test_run_query_items(self, m_query, m_run, caplog): + b = Bot.factory([ + '--verbose', + '--filter=qa-verify', + '--plugin=QA', + ]) + m_query.side_effect = 'one page' + b.run() - bot.site.editEntity({'new': 'item'}, entity) - - with pytest.raises(ValueError) as e: - bot.search_entity(bot.site, name, type='item') - assert "found multiple items" in str(e.value) - - Bot.authoritative['test'][name] = second.getID() - found = bot.search_entity(bot.site, name, type='item') - assert found.getID() == second.getID() + for record in caplog.records(): + if 'running query' in record.message: + assert '?qa' in record.message diff --git a/tests/test_main.py b/tests/test_main.py deleted file mode 100644 index df56061..0000000 --- a/tests/test_main.py +++ /dev/null @@ -1,52 +0,0 @@ -# -*- mode: python; coding: utf-8 -*- -# -# Copyright (C) 2016 Loic Dachary <[email protected]> -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# -import logging - -import mock - -import FLOSSbot.qa -from FLOSSbot import main - - -class TestFLOSSbot(object): - - @mock.patch.object(FLOSSbot.qa.QA, 'run') - def test_run_main(self, m_run): - f = main.FLOSSbot() - - argv = ['qa'] - f.run(['--verbose'] + argv) - assert (logging.getLogger('FLOSSbot').getEffectiveLevel() == - logging.DEBUG) - - f.run(argv) - assert (logging.getLogger('FLOSSbot').getEffectiveLevel() == - logging.INFO) - - @mock.patch.object(FLOSSbot.repository.Repository, 'run') - def test_run_repository(self, m_run): - f = main.FLOSSbot() - - argv = ['repository'] - f.run(['--verbose'] + argv) - assert (logging.getLogger('FLOSSbot').getEffectiveLevel() == - logging.DEBUG) - -# Local Variables: -# compile-command: "cd .. ; tox -e py3 tests/test_main.py" -# End: diff --git a/tests/test_plugin.py b/tests/test_plugin.py new file mode 100644 index 0000000..3e31601 --- /dev/null +++ b/tests/test_plugin.py @@ -0,0 +1,150 @@ +# -*- mode: python; coding: utf-8 -*- +# +# Copyright (C) 2016 Loic Dachary <[email protected]> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +from datetime import date + +import pytest +import pywikibot + +from FLOSSbot.bot import Bot +from FLOSSbot.plugin import Plugin +from tests.wikidata import WikidataHelper + + +class TestPlugin(object): + + def setup_class(cls): + WikidataHelper().login() + + def test_lookup_item(self): + bot = Bot.factory([ + '--test', + '--user=FLOSSbotCI', + ]) + plugin = Plugin(bot, bot.args) + assert 0 == len(plugin.bot.entities['item']) + git = plugin.Q_git + assert 1 == len(plugin.bot.entities['item']) + assert git == plugin.Q_git + assert plugin.Q_Concurrent_Versions_System + assert 2 == len(plugin.bot.entities['item']) + + def test_create_entity(self): + bot = Bot.factory([ + '--test', + '--user=FLOSSbotCI', + ]) + plugin = Plugin(bot, bot.args) + item = plugin.Q_git + assert 1 == len(plugin.bot.entities['item']) + plugin.clear_entity_label(item.getID()) + assert 0 == len(plugin.bot.entities['item']) + item = plugin.Q_git + assert 1 == len(plugin.bot.entities['item']) + + property2datatype = { + 'P_source_code_repository': 'url', + 'P_website_username': 'string', + 'P_protocol': 'wikibase-item', + } + + bot = Bot.factory([ + '--test', + '--user=FLOSSbotCI', + ]) + wikidata_plugin = Plugin(bot, bot.args) + for (attr, datatype) in property2datatype.items(): + plugin.reset_cache() + property = plugin.__getattribute__(attr) + assert 1 == len(plugin.bot.entities['property']) + plugin.clear_entity_label(property) + assert 0 == len(plugin.bot.entities['property']) + for i in range(120): + if (plugin.lookup_entity( + attr, type='property') is None): + break + property = plugin.__getattribute__(attr) + assert 1 == len(plugin.bot.entities['property']) + + new_content = plugin.bot.site.loadcontent( + {'ids': property}, 'datatype') + wikidata_property = wikidata_plugin.__getattribute__(attr) + wikidata_content = wikidata_plugin.bot.site.loadcontent( + {'ids': wikidata_property}, 'datatype') + assert (wikidata_content[wikidata_property]['datatype'] == + new_content[property]['datatype']), attr + assert (datatype == + wikidata_content[wikidata_property]['datatype']), attr + + def test_set_point_in_time(self): + bot = Bot.factory([ + '--test', + '--user=FLOSSbotCI', + ]) + plugin = Plugin(bot, bot.args) + item = plugin.__getattribute__('Q_' + WikidataHelper.random_name()) + claim = pywikibot.Claim(plugin.bot.site, + plugin.P_source_code_repository, + 0) + claim.setTarget("http://repo.com/some") + item.addClaim(claim) + plugin.set_point_in_time(item, claim) + assert plugin.need_verification(claim) is False + plugin.set_point_in_time(item, claim, date(1965, 11, 2)) + assert plugin.need_verification(claim) is True + plugin.clear_entity_label(item.getID()) + + def test_search_entity(self): + bot = Bot.factory([ + '--test', + '--user=FLOSSbotCI', + ]) + plugin = Plugin(bot, bot.args) + name = WikidataHelper.random_name() + entity = { + "labels": { + "en": { + "language": "en", + "value": name, + } + }, + } + first = plugin.bot.site.editEntity({'new': 'item'}, entity) + first = pywikibot.ItemPage(bot.site, first['entity']['id'], 0) + second = plugin.bot.site.editEntity({'new': 'item'}, entity) + second = pywikibot.ItemPage(bot.site, second['entity']['id'], 0) + + with pytest.raises(ValueError) as e: + plugin.search_entity(plugin.bot.site, name, type='item') + assert "found multiple items" in str(e.value) + + claim = pywikibot.Claim(plugin.bot.site, plugin.P_instance_of, 0) + claim.setTarget(plugin.Q_Wikimedia_disambiguation_page) + first.addClaim(claim) + + found = plugin.search_entity(bot.site, name, type='item') + assert found.getID() == second.getID() + + plugin.bot.site.editEntity({'new': 'item'}, entity) + + with pytest.raises(ValueError) as e: + plugin.search_entity(plugin.bot.site, name, type='item') + assert "found multiple items" in str(e.value) + + Plugin.authoritative['test'][name] = second.getID() + found = plugin.search_entity(plugin.bot.site, name, type='item') + assert found.getID() == second.getID() diff --git a/tests/test_qa.py b/tests/test_qa.py index b769738..e876557 100644 --- a/tests/test_qa.py +++ b/tests/test_qa.py @@ -15,13 +15,13 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # -import argparse import logging import mock import pywikibot import requests +from FLOSSbot.bot import Bot from FLOSSbot.qa import QA from tests.wikidata import WikidataHelper @@ -46,30 +46,32 @@ return c(url2code.get(url, requests.codes.ok)) m_get.side_effect = get - qa = QA(argparse.Namespace( - test=True, - user='FLOSSbotCI', - dry_run=False, - verification_delay=0, - )) + bot = Bot.factory([ + '--verbose', + '--test', + '--user=FLOSSbotCI', + '--verification-delay=0', + ]) + qa = QA(bot, bot.args) item = qa.__getattribute__('Q_' + WikidataHelper.random_name()) log.debug(">> do nothing if there is no source code repository") - to_verify = pywikibot.ItemPage(qa.site, item.getID(), 0) + to_verify = pywikibot.ItemPage(qa.bot.site, item.getID(), 0) assert [] == qa.verify(to_verify) log.debug(">> add a source code repository") - repository = pywikibot.Claim(qa.site, qa.P_source_code_repository, 0) + repository = pywikibot.Claim( + qa.bot.site, qa.P_source_code_repository, 0) url = "http://github.com/FAKE1/FAKE2" repository.setTarget(url) item.addClaim(repository) log.debug(">> add a qa statement") - to_verify = pywikibot.ItemPage(qa.site, item.getID(), 0) + to_verify = pywikibot.ItemPage(qa.bot.site, item.getID(), 0) qa.fixup(to_verify) log.debug(">> no ci found") - to_verify = pywikibot.ItemPage(qa.site, item.getID(), 0) + to_verify = pywikibot.ItemPage(qa.bot.site, item.getID(), 0) url2code['https://travis-ci.org/FAKE1/FAKE2'] = 404 assert ['no ci found'] == qa.verify(to_verify) @@ -84,7 +86,7 @@ log.debug(">> inconsistent qualifier") repository.changeTarget("http://github.com/other/other") - to_verify = pywikibot.ItemPage(qa.site, item.getID(), 0) + to_verify = pywikibot.ItemPage(qa.bot.site, item.getID(), 0) assert (['inconsistent qualifier archive URL', 'inconsistent qualifier described at URL'] == qa.verify(to_verify)) @@ -93,7 +95,7 @@ qa_claim = to_verify.claims[qa.P_software_quality_assurance][0] archive_URL = qa_claim.qualifiers[qa.P_archive_URL][0] qa_claim.removeQualifier(archive_URL) - to_verify = pywikibot.ItemPage(qa.site, item.getID(), 0) + to_verify = pywikibot.ItemPage(qa.bot.site, item.getID(), 0) assert ['inconsistent qualifier described at URL', 'missing qualifier archive URL'] == qa.verify(to_verify) diff --git a/tests/test_repository.py b/tests/test_repository.py index be5ca83..20b4b44 100644 --- a/tests/test_repository.py +++ b/tests/test_repository.py @@ -15,10 +15,9 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # -import argparse - import pywikibot +from FLOSSbot.bot import Bot from FLOSSbot.repository import Repository from tests.wikidata import WikidataHelper @@ -29,10 +28,12 @@ WikidataHelper().login() def setup(self): - self.r = Repository.factory([ - '--user=FLOSSbotCI', + bot = Bot.factory([ + '--verbose', '--test', + '--user=FLOSSbotCI', ]) + self.r = Repository(bot, bot.args) def test_guessproto__github_is_git(self): assert( @@ -93,36 +94,30 @@ is None) def test_verify(self): - r = Repository(argparse.Namespace( - test=True, - user='FLOSSbotCI', - dry_run=False, - verification_delay=30, - )) - item = r.__getattribute__('Q_' + WikidataHelper.random_name()) - claim = pywikibot.Claim(r.site, - r.P_source_code_repository, + item = self.r.__getattribute__('Q_' + WikidataHelper.random_name()) + claim = pywikibot.Claim(self.r.bot.site, + self.r.P_source_code_repository, 0) url = "http://github.com/ceph/ceph" claim.setTarget(url) item.addClaim(claim) - to_verify = pywikibot.ItemPage(r.site, item.getID(), 0) - assert {url: 'no protocol'} == r.verify(to_verify) + to_verify = pywikibot.ItemPage(self.r.bot.site, item.getID(), 0) + assert {url: 'no protocol'} == self.r.verify(to_verify) - protocol = pywikibot.Claim(r.site, r.P_protocol, 0) - protocol.setTarget(r.Q_git) + protocol = pywikibot.Claim(self.r.bot.site, self.r.P_protocol, 0) + protocol.setTarget(self.r.Q_git) claim.addQualifier(protocol, bot=True) - to_verify = pywikibot.ItemPage(r.site, item.getID(), 0) - assert {url: 'verified'} == r.verify(to_verify) + to_verify = pywikibot.ItemPage(self.r.bot.site, item.getID(), 0) + assert {url: 'verified'} == self.r.verify(to_verify) - to_verify = pywikibot.ItemPage(r.site, item.getID(), 0) - assert {url: 'no need'} == r.verify(to_verify) + to_verify = pywikibot.ItemPage(self.r.bot.site, item.getID(), 0) + assert {url: 'no need'} == self.r.verify(to_verify) claim.changeTarget("http://example.org") - to_verify = pywikibot.ItemPage(r.site, item.getID(), 0) - assert {"http://example.org": 'fail'} == r.verify(to_verify) + to_verify = pywikibot.ItemPage(self.r.bot.site, item.getID(), 0) + assert {"http://example.org": 'fail'} == self.r.verify(to_verify) - r.clear_entity_label(item.getID()) + self.r.clear_entity_label(item.getID()) diff --git a/tests/wikidata.py b/tests/wikidata.py index 708748f..b69b1f8 100644 --- a/tests/wikidata.py +++ b/tests/wikidata.py @@ -14,7 +14,6 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # -import logging import random import string @@ -23,10 +22,6 @@ class WikidataHelper(object): - - def __init__(self): - logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s') - logging.getLogger('FLOSSbot').setLevel(logging.DEBUG) def login(self): site = pywikibot.Site("test", "wikidata", "FLOSSbotCI") -- To view, visit https://gerrit.wikimedia.org/r/312706 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id99402a1b3e61eb969a5202cae0587a1cd5775f6 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/bots/FLOSSbot Gerrit-Branch: master Gerrit-Owner: Dachary <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
