Tjones has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/269313

Change subject: Hack the JSON output with order numbers
......................................................................

Hack the JSON output with order numbers

Add search result order numbers to the JSON in jsondiff.py so it
is easier to track what's what when reviewing results in the diffs.

Add enwiki as the explicit basename in relevance.ini

Fix line-too-long error in importindices.py to stop tox complaints.

Bug: T126245
Change-Id: I602182f6e242c188fffad23d2f02be20fdb99495
---
M importindices.py
M jsondiff.py
M relevance.ini
3 files changed, 24 insertions(+), 7 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/relevancylab 
refs/changes/13/269313/1

diff --git a/importindices.py b/importindices.py
index a9f5ad9..957c4c5 100755
--- a/importindices.py
+++ b/importindices.py
@@ -67,7 +67,8 @@
     args = parser.parse_args()
 
     for wiki in args.wikis:
-        src_url = 
'http://dumps.wikimedia.org/other/cirrussearch/%s/%s-%s-cirrussearch-%s.json.gz'
 % \
+        src_url = \
+            
'http://dumps.wikimedia.org/other/cirrussearch/%s/%s-%s-cirrussearch-%s.json.gz'
 % \
             (args.date, wiki, args.date, args.type)
         fd, temp_path = tempfile.mkstemp(dir=args.temp_dir)
         print("Downloading ", src_url, " to ", temp_path)
diff --git a/jsondiff.py b/jsondiff.py
index 79b4ace..55b582d 100755
--- a/jsondiff.py
+++ b/jsondiff.py
@@ -13,6 +13,10 @@
 # the JSON, but the goal here was to put something reasonable together
 # as quickly as possible.
 #
+# It has a number of hacks specific to diffing JSON from CirrusSearch
+# results, including removing "searchmatch" markup and bolding elements
+# that are most important in comparing results, and numbering results.
+#
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -36,6 +40,15 @@
 import re
 import sys
 from itertools import izip_longest
+
+
+def add_nums_to_results(results):
+    res_count = 1;
+    if 'rows' in results:
+        for result in results['rows']:
+            result['relLabItemNumber'] = res_count;
+            res_count += 1;
+    return results;
 
 
 def main():
@@ -72,13 +85,16 @@
             bline = re.sub(r'<span class=\\"searchmatch\\">(.*?)<\\/span>',
                            '\\1', bline)
 
-            aline = json.dumps(json.loads(aline), sort_keys=True, indent=2)
-            bline = json.dumps(json.loads(bline), sort_keys=True, indent=2)
-            output = 
difflib.HtmlDiff(wrapcolumn=50).make_file(aline.split('\n'),
-                                                               
bline.split('\n'),
+            aresults = add_nums_to_results(json.loads(aline));
+            bresults = add_nums_to_results(json.loads(bline));
+
+            aline = json.dumps(aresults, sort_keys=True, indent=2)
+            bline = json.dumps(bresults, sort_keys=True, indent=2)
+            output = 
difflib.HtmlDiff(wrapcolumn=50).make_file(aline.splitlines(),
+                                                               
bline.splitlines(),
                                                                file1, file2)
             # highlight key fields
-            output = re.sub(r'("(title|query|totalHits)":&nbsp;.*?)</td>',
+            output = 
re.sub(r'("(title|query|totalHits|relLabItemNumber)":&nbsp;.*?)</td>',
                             '<b><font color=#0000aa>\\1</font></b></td>', 
output)
             diff_file.writelines(output)
             diff_file.close()
diff --git a/relevance.ini b/relevance.ini
index 3aa4519..3c5ef5d 100644
--- a/relevance.ini
+++ b/relevance.ini
@@ -3,7 +3,7 @@
 ; Host to run queries on
 labHost = suggesty.eqiad.wmflabs
 ; Command to run a query
-searchCommand = sudo -u vagrant mwscript 
extensions/CirrusSearch/maintenance/runSearch.php
+searchCommand = sudo -u vagrant mwscript 
extensions/CirrusSearch/maintenance/runSearch.php --baseName=enwiki
 ; Working directory
 workDir = ./relevance
 ; JSON Diff tool

-- 
To view, visit https://gerrit.wikimedia.org/r/269313
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I602182f6e242c188fffad23d2f02be20fdb99495
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/relevancylab
Gerrit-Branch: master
Gerrit-Owner: Tjones <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to