EBernhardson has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/394011 )
Change subject: Add basic pre-deployment sanity check for MLR ...................................................................... Add basic pre-deployment sanity check for MLR Implements a very simple configuration-driven sanity checker that ensures some set of urls is in the top 3 results of a given query. The intention of this script is to build up a small list of queries and results for each wiki we deploy MLR to and use that list as a smoke check before pushing a mediawiki-config change to move a new model to full production usage. Not sure relforge is the best place for this, or where the configuration should really go, but I couldn't think of a better place. Change-Id: Ie29ef99d2e404fe97e3b2e42b17df22b836385d8 --- A sanityCheck.py A sanityCheck/enwiki.json 2 files changed, 75 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/relevanceForge refs/changes/11/394011/1 diff --git a/sanityCheck.py b/sanityCheck.py new file mode 100644 index 0000000..bcf7136 --- /dev/null +++ b/sanityCheck.py @@ -0,0 +1,64 @@ +from __future__ import print_function +import argparse +import functools +import json +import requests +import sys +import urlparse + + +def check(model, config): + ok = True + query_params = { + 'action': 'query', + 'list': 'search', + 'srlimit': 3, + 'cirrusMLRModel': model, + 'format': 'json', + 'formatversion': 2, + } + if 'query' in config: + # Apply overrides from config if requested. This might + # apply a specific cirrusUserTesting param or some such. + query_params.update(config['query']) + + print('Running sanity check against %s' % (config['api'])) + for query, expected in config['queries'].items(): + print("Query: %s" % (query)) + query_params['srsearch'] = query + r = requests.get(config['api'], params=query_params) + results = [x['title'] for x in r.json()['query']['search']] + diff = set(expected).difference(results) + if diff: + ok = False + print("Results:\n\t" + '\n\t'.join(results)) + print("Expected:") + for title in expected: + marker = '+' if title in results else '-' + print('\t%s %s' % (marker, title)) + print('') + else: + print("PASSED\n") + return ok + + +def parse_arguments(argv): + parser = argparse.ArgumentParser(description='mlr sanity check') + parser.add_argument( + 'config', type=lambda x: json.load(open(x)), + help='json file containing queries to check and results expected in top 3') + parser.add_argument( + 'model', help='MLR model to use for ranking') + args = parser.parse_args(argv) + return dict(vars(args)) + + +def main(argv=None): + args = parse_arguments(argv) + return check(**args) + + +if __name__ == "__main__": + ok = main() + sys.exit(0 if ok else 1) + diff --git a/sanityCheck/enwiki.json b/sanityCheck/enwiki.json new file mode 100644 index 0000000..a1902cb --- /dev/null +++ b/sanityCheck/enwiki.json @@ -0,0 +1,11 @@ +{ + "api": "https://en.wikipedia.org/w/api.php", + "queries": { + "example": [ + "Example" + ], + "JFK": [ + "John F. Kennedy" + ] + } +} -- To view, visit https://gerrit.wikimedia.org/r/394011 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ie29ef99d2e404fe97e3b2e42b17df22b836385d8 Gerrit-PatchSet: 1 Gerrit-Project: wikimedia/discovery/relevanceForge Gerrit-Branch: master Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits