jenkins-bot has submitted this change and it was merged.

Change subject: Add cqd a small cirrus query debugger tool
......................................................................


Add cqd a small cirrus query debugger tool

It allows to display lucene explanation in a more compact format than the json
provided by elasticsearch.

The code is relatively fragile and painful as it tries to pack multiple explain
nodes into a single score node. It's probable that it will fail on some setup.

Functionallities:
* displays the explanation
* identifies rank score
* identifies max score

Bug: T126621
Change-Id: Ia7a9585a4adf2052f75b7a8463ecb6e9cdb49d9c
---
A cqd.py
1 file changed, 819 insertions(+), 0 deletions(-)

Approvals:
  Tjones: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/cqd.py b/cqd.py
new file mode 100755
index 0000000..2c742f2
--- /dev/null
+++ b/cqd.py
@@ -0,0 +1,819 @@
+#!/usr/bin/env python
+
+# cqd.py - Cirrus Query Debugger is a small command line tool to display
+# various debugging information.
+# -.-. --.- -..
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+# http://www.gnu.org/copyleft/gpl.html
+
+import argparse
+# import json
+import math
+import re
+import requests
+import sys
+from termcolor import colored
+
+
+class CQuery:
+    """Represents a cirrus search query"""
+    def __init__(self, query, wiki, params):
+        self.query = query
+        self.params = params
+        self.wiki = wiki
+
+    def run(self):
+        res = self.fetch().json()
+        return CQResultSet(res, self.params.offset)
+
+    def fetch(self):
+        if re.search('^https?://', self.wiki):
+            base_uri = self.wiki
+        else:
+            base_uri = 'https://'+self.wiki+'/w/api.php'
+        uri_param = dict({
+            'action': 'query',
+            'list': 'search',
+            'cirrusDumpResult': '',
+            'cirrusExplain': '',
+            'format': 'json',
+            'srsearch': self.query,
+        })
+        self.params.update(uri_param)
+        return requests.get(base_uri, uri_param)
+
+
+class CQueryParams:
+    """List of tweak parameters"""
+    def __init__(self, args):
+        self.limit = args.limit
+        self.offset = args.offset
+        self.limit = args.limit
+        self.functionWindow = args.functionWindow
+        self.phraseWindow = args.phraseWindow
+        self.rescoreProfile = args.rescoreProfile
+        self.allField = args.allField
+        self.custom = args.custom
+
+    def update(self, uri_params):
+            uri_params['srlimit'] = self.limit
+            uri_params['sroffset'] = self.offset
+            if self.functionWindow is not None:
+                uri_params['cirrusFunctionWindow'] = self.functionWindow
+            if self.phraseWindow is not None:
+                uri_params['cirrusPhraseWindow'] = self.phraseWindow
+                # TODO: remove back compat param
+                uri_params['cirrusPhraseWinwdow'] = self.phraseWindow
+
+            if self.rescoreProfile is not None:
+                uri_params['cirrusRescoreProfile'] = self.rescoreProfile
+
+            if self.allField is not None:
+                uri_params['cirrusUseAllFields'] = self.allField
+            for c in self.custom:
+                (param, value) = c.split('=', 2)
+                uri_params[param] = value
+
+
+class CQResultSet:
+    """A Cirrus query result set"""
+    def __init__(self, res, offset):
+        self.desc = res['description']
+        res = res['result']
+        self.time = res['took']
+        self.shards = res['_shards']['total']
+        res = res['hits']
+        self.total = res['total']
+        self.max_score = res['max_score']
+        self.hits = list()
+        self.shardHits = {}
+
+        rank = offset
+        for hit in res['hits']:
+            rank += 1
+            hit = CQResultHit(rank, hit)
+            self.hits.append(hit)
+            if hit.shard not in self.shardHits:
+                self.shardHits[hit.shard] = 0
+            self.shardHits[hit.shard] += 1
+
+
+class CQResultHit:
+    """A single hit"""
+    def __init__(self, rank, hit):
+        self.rank = rank
+        self.shard = hit['_shard']
+        self.id = hit['_id']
+        self.title = hit['_source']['title']
+        self.score = hit['_score']
+        self.explanation = None
+        if '_explanation' in hit:
+            self.explanation = CQExplain.build(hit['_explanation'])
+
+        self.snippet = None
+        if 'highlight' in hit and 'text' in hit['highlight']:
+            self.snippet = hit['highlight']['text']
+
+
+class CQPrinter:
+    def __init__(self):
+        self.out = sys.stdout
+
+    def nl(self):
+        self.out.write('\n')
+
+    def w(self, txt, color=None, bg=None):
+        txt = str(txt)
+        if color is not None:
+            if bg is not None:
+                txt = colored(txt, color, 'on_'+bg)
+            else:
+                txt = colored(txt, color, attrs=['bold'])
+        self.out.write(txt)
+
+
+class CQExplainPrinter:
+    """Display explain info"""
+    def __init__(self, printer=None, level=None):
+        if level is None:
+            self.level = 0
+        else:
+            self.level = level
+        self.indentChar = "  "
+        if printer is not None:
+            self.printer = printer
+        else:
+            self.printer = CQPrinter()
+
+    def disp(self, exp, rankScore=None, maxScore=None):
+        # we need rankScore because we want to flag
+        # the explain node that is responsible for the rankScore
+        # This is because explain will re-apply the rescore query
+        # and if the doc is normally outside the rescore window
+        # during explain it will always be inside the rescore window
+        if rankScore is not None:
+            self.rankScore = rankScore
+
+        # Flag the max score
+        if maxScore is not None:
+            self.maxScore = maxScore
+
+        self.indent()
+        self.mainScore(exp.score)
+        self.append(' = ')
+        exp.disp(self)
+        self.printer.nl()
+        self.descend(exp)
+
+    def append(self, txt, color=None, bg=None):
+        self.printer.w(txt, color, bg)
+        return self
+
+    def score(self, txt):
+        self.printer.w(str(txt), 'cyan')
+        return self
+
+    def mainScore(self, txt):
+        if self.maxScore is not None and math.fabs(txt - self.maxScore) < 
0.0001:
+            self.printer.w(str(txt), 'grey', 'green')
+            self.maxScore = None
+        elif self.rankScore is not None and math.fabs(txt - self.rankScore) < 
0.0001:
+            self.printer.w(str(txt), 'white', 'blue')
+            # found it
+            self.rankScore = None
+        else:
+            if txt > 10:
+                self.printer.w(str(txt), 'red')
+            elif txt > 1:
+                self.printer.w(str(txt), 'yellow')
+            else:
+                self.printer.w(str(txt), 'white')
+        return self
+
+    def term(self, field, term, boost=None):
+        self.printer.w(str(field), 'blue')
+        self.printer.w(':')
+        self.printer.w(term, 'green')
+        if boost is not None:
+            self.printer.w('^')
+            self.score(boost)
+        return self
+
+    def operator(self, operator):
+        self.printer.w(operator, 'blue')
+        return self
+
+    def weight(self, weight):
+        self.printer.w(weight, 'cyan')
+        return self
+
+    def query(self, query):
+        lastO = 0
+        for r in re.compile('\\(([^\\(\\)]+)\\)').finditer(query):
+            if lastO < r.start(1):
+                self.printer.w(query[lastO:r.start(1)])
+            self.printer.w(r.group(1), color='magenta')
+            lastO = r.end(1)
+        if lastO < len(query):
+            self.printer.w(query[lastO:])
+
+    def formula(self, formula):
+        lastO = 0
+        for r in re.compile('doc\\[\'([a-z\\._]+)\'\\]').finditer(formula):
+            if lastO < r.start():
+                self.printer.w(formula[lastO:r.start(1)])
+                self.printer.w(r.group(1), color='magenta')
+                lastO += r.end(1)
+                self.printer.w(formula[lastO:r.end()])
+                lastO = r.end()
+            if lastO < len(formula):
+                self.printer.w(formula[lastO:])
+
+    def descend(self, exp):
+        for child in exp.children:
+            self.level += 1
+            self.disp(child)
+            self.level -= 1
+
+    def indent(self):
+        self.printer.w(self.indentChar * self.level)
+
+
+class CQHitPrinter:
+    """Display hit info"""
+    def __init__(self):
+        self.level = 0
+        self.indentChar = "  "
+        self.printer = CQPrinter()
+        self.explain_printer = CQExplainPrinter(level=1)
+        self.snippet_pattern = re.compile('<span 
class="searchmatch">([^<]+)</span>')
+
+    def indent(self, lvl=None):
+        if lvl is None:
+            lvl = self.level
+        self.printer.w(self.indentChar * lvl)
+
+    def higlight(self, word):
+        self.printer.w(word, 'white')
+
+    def shard(self, num):
+        self.printer.w('S' + str(num), 'green')
+        return self
+
+    def disp(self, hit, maxScore=None):
+        self.printer.w('#')
+        self.printer.w(hit.rank, 'white')
+        self.printer.w('(')
+        self.shard(hit.shard)
+        self.printer.w('): ')
+        self.printer.w(hit.title, 'blue')
+        self.printer.w(' - ')
+        self.printer.w(str(hit.score), 'white')
+        self.printer.nl()
+        if hit.snippet is not None:
+            self.indent(1)
+            for s in hit.snippet:
+                s = s.replace('\n', ' ')
+                lastO = 0
+                for p in self.snippet_pattern.finditer(s):
+                    if p.start() > lastO:
+                        self.printer.w(s[lastO:p.start()])
+                    self.higlight(p.group(1))
+                    lastO = p.end()
+                if lastO < len(s):
+                    self.printer.w(s[lastO:])
+            self.printer.nl()
+            self.indent()
+
+        self.explain_printer.disp(hit.explanation, rankScore=hit.score, 
maxScore=maxScore)
+        self.printer.nl()
+
+
+class CQResultSetPrinter:
+    def __init__(self):
+        self.level = 0
+        self.indentChar = "  "
+        self.printer = CQPrinter()
+        self.hitPrinter = CQHitPrinter()
+
+    def num(self, num):
+        self.printer.w(num, 'white')
+        return self
+
+    def score(self, num):
+        self.printer.w(num, 'blue')
+
+    def shard(self, num):
+        self.printer.w('S' + str(num), 'green')
+        return self
+
+    def disp(self, results):
+        self.printer.w(results.desc)
+        self.printer.nl()
+        self.printer.w('Found ')
+        self.num(results.total)
+        self.printer.w(' hits in ')
+        self.num(results.shards)
+        self.printer.w(' shards, time: ')
+        self.num(results.time)
+        self.printer.w('ms (maxScore: ')
+        self.score(results.max_score)
+        self.printer.w(', shard bal:')
+        for k in sorted(results.shardHits, key=results.shardHits.get, 
reverse=True):
+            self.printer.w(' ')
+            self.shard(k)
+            self.printer.w(':')
+            self.num(results.shardHits[k])
+        self.printer.w(')')
+        self.printer.nl()
+        self.printer.nl()
+
+        for h in results.hits:
+            self.hitPrinter.disp(h, maxScore=results.max_score)
+
+
+class CQExplain:
+    @staticmethod
+    def build(exp):
+        if CQRescoreExp.accept(exp):
+            return CQRescoreExp(exp)
+        if CQSingleRescoreExp.accept(exp):
+            return CQSingleRescoreExp(exp)
+        if CQDisMaxExp.accept(exp):
+            return CQDisMaxExp(exp)
+        if CQTermWeight.accept(exp):
+            return CQTermWeight(exp)
+        if CQPhraseWeight.accept(exp):
+            return CQPhraseWeight(exp)
+        if CQBoolWithCoord.accept(exp):
+            return CQBoolWithCoord(exp)
+        if CQBool.accept(exp):
+            return CQBool(exp)
+        if CQFilter.accept(exp):
+            return CQFilter(exp)
+        if CQFunctionScoreChain.accept(exp):
+            return CQFunctionScoreChain(exp)
+        raise Exception('Unknown explain node :' + exp['description'])
+
+    def __init__(self, exp):
+        self.score = exp['value']
+        self.desc = exp['description']
+        self.children = list()
+
+    def __cmp__(self, other):
+        return self.score - other.score
+
+    def disp(self, display):
+        return
+
+
+class CQSingleRescoreExp(CQExplain):
+    """Unclear..."""
+
+    @staticmethod
+    def accept(exp):
+        """Can be identified by the presence of product primaryWeight"""
+
+        # always 2 (primary*w) [op] (secondary*w)
+        if len(exp['details']) != 2:
+            return False
+
+        # must be the product with primaryWeightV
+        if exp['description'] != 'product of:':
+            return False
+
+        if len(exp['details'][0]['details']) != 2:
+            return False
+
+        if exp['details'][1]['description'] != 'primaryWeight':
+            return False
+
+        return True
+
+    def __init__(self, exp):
+        CQExplain.__init__(self, exp)
+        self.children.append(CQExplain.build(exp['details'][0]))
+        self.primaryWeigth = exp['details'][1]['value']
+
+    def disp(self, display):
+        display.append('RescoreSingle ')
+        display.append(' primW=')
+        display.weight(self.primaryWeigth)
+
+
+class CQRescoreExp(CQExplain):
+    """Rescore"""
+
+    @staticmethod
+    def accept(exp):
+        """Can be identified by the presence of product primaryWeight"""
+
+        # always 2 (primary*w) [op] (secondary*w)
+        if len(exp['details']) != 2:
+            return False
+
+        # must be the product with primaryWeightV
+        if exp['details'][0]['description'] != 'product of:':
+            return False
+
+        if len(exp['details'][0]['details']) != 2:
+            return False
+
+        if exp['details'][0]['details'][1]['description'] != 'primaryWeight':
+            return False
+
+        return True
+
+    def __init__(self, exp):
+        CQExplain.__init__(self, exp)
+        self.children.append(CQExplain.build(exp['details'][0]['details'][0]))
+        self.children.append(CQExplain.build(exp['details'][1]['details'][0]))
+        self.operator = re.search(r'([^ ]+)', exp['description']).group(1)
+        self.primaryWeigth = exp['details'][0]['details'][1]['value']
+        self.secondaryWeigth = exp['details'][1]['details'][1]['value']
+
+    def disp(self, display):
+        display.append('Rescore ')
+        display.operator(self.operator)
+        display.append(' primW=')
+        display.weight(self.primaryWeigth)
+        display.append(' secW=')
+        display.weight(self.secondaryWeigth)
+
+
+class CQBool(CQExplain):
+    """Simple boolean"""
+    @staticmethod
+    def accept(exp):
+        # check sum of
+        return exp['description'] == 'sum of:'
+
+    def __init__(self, exp):
+        CQExplain.__init__(self, exp)
+        for cl in exp['details']:
+            self.children.append(CQExplain.build(cl))
+
+    def disp(self, display):
+        display.append('Bool')
+
+
+class CQBoolWithCoord(CQBool):
+    """Simple boolean with a coord factor"""
+    @staticmethod
+    def accept(exp):
+        # check for the product with coord()
+        if exp['description'] == 'product of:'\
+                and len(exp['details']) == 2\
+                and exp['details'][1]['description'].startswith('coord('):
+            return CQBool.accept(exp['details'][0])
+        return False
+
+    def __init__(self, exp):
+        CQBool.__init__(self, exp['details'][0])
+        self.score = exp['value']
+        self.coord = exp['details'][1]['value']
+
+    def disp(self, display):
+        CQBool.disp(self, display)
+        display.append(' coord=')
+        display.weight(self.coord)
+
+
+class CQFunctionScoreChain(CQExplain):
+    """Function score used in function rescore window"""
+    @staticmethod
+    def accept(exp):
+        if exp['description'].startswith('function score, '):
+            return True
+        return False
+
+    def __init__(self, exp):
+        CQExplain.__init__(self, exp)
+        self.boost = exp['details'][0]['value']
+        if len(exp['details']) == 2:
+            # empty rescore chain, no match?
+            self.scoreMode = 'nomatch?'
+            return
+
+        search = re.search('score mode \\[([^\\]]+)\\]',
+                           exp['details'][1]['details'][0]['description'])
+        if search:
+            self.scoreMode = search.group(1)
+            # skip the min of with epsilon
+            for func in exp['details'][1]['details'][0]['details']:
+                self.children.append(self.build_chain(func))
+        else:
+            # a single function?
+            self.scoreMode = '???'
+            self.children.append(self.build_chain(exp))
+
+    def disp(self, display):
+        display.append('FuncChain ')
+        display.append('scoreMode: ')
+        display.operator(self.scoreMode)
+
+    def build_chain(self, func):
+        if CQFunction.accept(func):
+            return CQFunction(func)
+        if CQFunctionQuery.accept(func):
+            return CQFunctionQuery(func)
+        if CQFunctionScore.accept(func):
+            return CQFunctionScore(func)
+        raise Exception('Unknwon function :' + func['description'])
+
+
+class CQFunctionScore(CQExplain):
+    """Function score query
+    NOTE: do not add to CQExplain.build it's in conflict with 
CQFunctionScoreChain"""
+
+    @staticmethod
+    def accept(exp):
+        return exp['description'] == 'function score, product of:'
+
+    def __init__(self, exp):
+        CQExplain.__init__(self, exp)
+        if exp['details'][0]['description'] != 'match filter: *:*':
+            self.query = exp['details'][0]['description']
+
+    def disp(self, display):
+        display.append('FuncScore ')
+        display.append('query: ')
+        display.query(self.query)
+
+
+class CQFunction(CQFunctionScore):
+    @staticmethod
+    def accept(exp):
+        if '*:*' in exp['details'][0]['description'] and\
+                "function: " in exp['details'][1]['description']:
+            return True
+
+        if '*:*' in exp['details'][0]['description'] and\
+                ("Math.min of" in exp['details'][1]['description'] or
+                 "product of:" in exp['details'][1]['description']) and\
+                "function: " in exp['details'][1]['details'][0]['description']:
+            return True
+        return False
+
+    def __init__(self, exp):
+        CQFunctionScore.__init__(self, exp)
+        if exp['details'][1]['description'] == 'product of:' or\
+                "Math.min of" in exp['details'][1]['description']:
+            self.function = exp['details'][1]['details'][0]['description']
+        else:
+            self.function = exp['details'][1]['description']
+
+    def disp(self, display):
+        display.append('Function :')
+        display.formula(self.function)
+
+
+class CQFunctionQuery(CQFunctionScore):
+    @staticmethod
+    def accept(exp):
+        return 'match filter: *:*' in exp['details'][0]['description']
+
+    def __init__(self, exp):
+        CQFunctionScore.__init__(self, exp)
+        self.query = exp['details'][0]['description']
+        self.weight = exp['details'][1]['details'][1]['value']
+
+    def disp(self, display):
+        display.append('FQuery ')
+        display.append('weight: ')
+        display.weight(self.weight)
+        display.append(', query: ')
+        display.query(self.query)
+
+
+class CQDisMaxExp(CQExplain):
+    
"""https://lucene.apache.org/core/4_4_0/core/org/apache/lucene/search/DisjunctionMaxQuery.html
+
+    Generated by QueryString when using multi field (param dis_max, defaults 
true)
+    
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_multi_field
+    """
+
+    @staticmethod
+    def accept(exp):
+        return exp['description'] == 'max of:'
+
+    def __init__(self, exp):
+        CQExplain.__init__(self, exp)
+        for exp in exp['details']:
+            self.children.append(CQExplain.build(exp))
+        self.winner = sorted(self.children)[0]
+
+    def disp(self, display):
+        display.append('DisMax ')
+        display.append('best=')
+        display.term(self.winner.field, self.winner.term)
+
+
+class CQTermWeight(CQExplain):
+    @staticmethod
+    def accept(exp):
+        # Accept everything except phrases
+        if re.search('^weight\\([^"]+$', exp['description']):
+            return True
+        return False
+
+    """TermWeight (core tf/idf sim)"""
+    def __init__(self, exp):
+        CQExplain.__init__(self, exp)
+        # extract field, term and boost from weight(all.plain^0.5:test in 
93730) [....
+        search = re.search('weight\\(([a-z_\\.]+):([^\\^]+?)(?:\\^([\d\\.]+))? 
in [\d]+\\) \\[',
+                           self.desc)
+        if search:
+            self.field = search.group(1)
+            self.term = search.group(2)
+            self.boost = None
+            if search.group(3):
+                self.boost = float(search.group(3))
+        else:
+            raise Exception("Cannot parse TermWeight field: " + self.desc)
+
+        # extract queryWeight idf info (inside queryWeight, product of:)
+        qW = exp['details'][0]['details'][0]
+        self.queryNorm = None
+        if len(qW['details']) > 1:
+            self.queryNorm = qW['details'][1]['value']
+
+        # extract tf.idf info (inside fieldWeight )
+        fW = exp['details'][0]['details'][1]
+
+        self.tf = fW['details'][0]['value']
+        self.termFreq = fW['details'][0]['details'][0]['value']
+
+        if fW['details'][1]['description'] != 'idf(), sum of:':
+            # raw docFreq for non phrase
+            search = re.search('idf\\(docFreq=(\d+), maxDocs=(\d+)\\)',
+                               fW['details'][1]['description'])
+            if search:
+                self.docFreq = int(search.group(1))
+                self.maxDocs = int(search.group(2))
+            else:
+                raise Exception("Cannot parse docFreq in :" + 
fW['details'][1]['description'])
+        self.idf = fW['details'][1]['value']
+        self.norm = fW['details'][2]['value']
+
+    def disp(self, display):
+        display.append('TFIDF ')
+        display.append('term=')
+        display.term(self.field, self.term, self.boost)
+        display.append(' tf=')
+        display.score(self.tf)
+        display.append('(freq=')
+        display.score(self.termFreq)
+        display.append(') ')
+        display.append('idf=')
+        display.score(self.idf)
+        display.append(' qNorm=')
+        display.score(self.queryNorm)
+        display.append(' fNorm=')
+        display.score(self.norm)
+
+
+class CQFilter(CQExplain):
+    """Constant score node"""
+    @staticmethod
+    def accept(exp):
+        return exp['description'].startswith('ConstantScore(')
+
+    def __init__(self, exp):
+        CQExplain.__init__(self, exp)
+        self.query = exp['description']
+
+    def disp(self, display):
+        display.append('Filter ')
+        display.query(self.query)
+
+
+class CQPhraseWeight(CQExplain):
+    """TermWeight for phrases (core tf/idf sim)"""
+    @staticmethod
+    def accept(exp):
+        # Force a phrase (")
+        if re.search('^weight\\(.*".*"', exp['description']):
+            return True
+        return False
+
+    def __init__(self, exp):
+        CQExplain.__init__(self, exp)
+        # extract field, term and boost from weight(all.plain^0.5:test in 
93730) [....
+        search = re.search('weight\\(([a-z_\\.]+):([^\\^]+?)(?:\\^([\d\\.]+))? 
in [\d]+\\) \\[',
+                           self.desc)
+        if search:
+            self.field = search.group(1)
+            self.term = search.group(2)
+            self.boost = None
+            if search.group(3):
+                self.boost = float(search.group(3))
+        else:
+            raise Exception("Cannot parse TermWeight field: " + self.desc)
+
+        self.queryWeight = None
+        self.queryNorm = None
+        if exp['details'][0]['description'].startswith('score('):
+            exp = exp['details'][0]
+        if len(exp['details']) > 1:
+            if 'queryWeight' in exp['details'][0]['description']:
+                qWeight = exp['details'][0]
+                fWeight = exp['details'][1]
+            else:
+                fWeight = exp['details'][0]
+                qWeight = exp['details'][1]
+
+            self.queryNorm = qWeight['details'][1]['value']
+            self.queryWeight = qWeight['value']
+        else:
+            fWeight = exp['details'][0]
+
+        if fWeight['details'][0]['description'] == 'idf(), sum of:':
+            tfData = fWeight['details'][1]
+        else:
+            tfData = fWeight['details'][0]
+
+        # extract queryWeight idf info (inside queryWeight, product of:)
+        self.tf = tfData['value']
+        search = re.search('phraseFreq=([\d\\.]+)$', 
tfData['details'][0]['description'])
+        if search:
+            self.phraseFreq = search.group(1)
+        else:
+            raise Exception('Cannot parse phraseFreq in:' + 
tfData['details'][0]['description'])
+        self.norm = fWeight['details'][2]['value']
+
+        self.idf = exp['details'][0]['details'][1]['value']
+        # TODO: fix details extraction
+#        self.idfDetails = list()
+#        for idf in exp['details'][0]['details'][1]['details']:
+#            detail = {
+#                'idf': idf['value']
+#            }
+#            search = re.search('^idf\\(docFreq=(\d+), maxDocs=(\d+)\\)$', 
idf['description'])
+#            if search:
+#                detail['docFreq'] = search.group(1)
+#                detail['maxDocs'] = search.group(2)
+#            else:
+#                raise Exception('Cannot load idf details for CQPhraseWeight 
in : ' +
+#                                idf['description'])
+#            self.idfDetails.append(detail)
+
+    def disp(self, display):
+        display.append('TFIDF ')
+        display.append('phrase=')
+        display.term(self.field, self.term, self.boost)
+        display.append(' tf=')
+        display.score(self.tf)
+        display.append('(freq=')
+        display.score(self.phraseFreq)
+        display.append(') ')
+        display.append('idf=')
+        display.score(self.idf)
+        display.append(' fNorm=')
+        display.score(self.norm)
+        if self.queryWeight is not None:
+            display.append(' qWeight=')
+            display.score(self.queryWeight)
+            display.append('(qNorm=')
+            display.score(self.queryNorm)
+            display.append(')')
+
+
+reload(sys)
+sys.setdefaultencoding('utf8')
+
+aparser = argparse.ArgumentParser(description='Cirrus Query Debugger', 
prog=sys.argv[0])
+aparser.add_argument('-q', '--query', help='The query', default='cqd')
+aparser.add_argument('-w', '--wiki', help='Wiki to run', 
default='en.wikipedia.org')
+aparser.add_argument('-l', '--limit', type=int, help='Limit', default=10)
+aparser.add_argument('-o', '--offset', type=int, help='Offset', default=0)
+aparser.add_argument('--allField', help='Use the all field (defaults: yes, use 
no to disable)',
+                     default='yes')
+aparser.add_argument('-fw', '--functionWindow', type=int, help='Function 
window size')
+aparser.add_argument('-pw', '--phraseWindow', type=int, help='Phrase window 
size')
+aparser.add_argument('-rp', '--rescoreProfile', help='Rescore profile')
+aparser.add_argument('-c', '--custom', nargs='+', default=[],
+                     help='List of custom param (-c param1=value1 
param2=value2)')
+args = aparser.parse_args()
+
+params = CQueryParams(args)
+
+query = CQuery(args.query, args.wiki, params)
+
+
+res = query.run()
+printer = CQResultSetPrinter()
+printer.disp(res)

-- 
To view, visit https://gerrit.wikimedia.org/r/270943
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ia7a9585a4adf2052f75b7a8463ecb6e9cdb49d9c
Gerrit-PatchSet: 9
Gerrit-Project: wikimedia/discovery/relevancylab
Gerrit-Branch: master
Gerrit-Owner: DCausse <[email protected]>
Gerrit-Reviewer: EBernhardson <[email protected]>
Gerrit-Reviewer: Tjones <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to