Author: peter
Date: Mon Mar 4 06:09:44 2013
New Revision: 1452190
URL: http://svn.apache.org/r1452190
Log:
#395 search highlighting
Modified:
incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py
incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py
incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py
incubator/bloodhound/trunk/bloodhound_search/bhsearch/templates/bhsearch.html
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/web_ui.py
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py
incubator/bloodhound/trunk/bloodhound_search/bhsearch/web_ui.py
incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py
incubator/bloodhound/trunk/bloodhound_theme/bhtheme/htdocs/bloodhound.css
Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py
URL:
http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py?rev=1452190&r1=1452189&r2=1452190&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/api.py Mon Mar 4
06:09:44 2013
@@ -41,6 +41,7 @@ class QueryResult(object):
self.page_number = 0
self.offset = 0
self.docs = []
+ self.highlighting = []
self.facets = None
self.debug = {}
@@ -119,7 +120,9 @@ class ISearchBackend(Interface):
filter = None,
facets = None,
pagenum = 1,
- pagelen = 20):
+ pagelen = 20,
+ highlight=False,
+ highlight_fields=None):
"""
Perform query implementation
@@ -131,6 +134,8 @@ class ISearchBackend(Interface):
:param facets: list of facet fields
:param pagenum: page number
:param pagelen: page length
+ :param highlight: highlight matched terms in fields
+ :param highlight_fields: list of fields to highlight
:return: ResultsPage
"""
@@ -231,7 +236,9 @@ class BloodhoundSearchApi(Component):
filter = None,
facets = None,
pagenum = 1,
- pagelen = 20):
+ pagelen = 20,
+ highlight = False,
+ highlight_fields = None):
"""Return query result from an underlying search backend.
Arguments:
@@ -245,6 +252,8 @@ class BloodhoundSearchApi(Component):
:param facets: optional list of facet terms, can be field or expression
:param page: paging support
:param pagelen: paging support
+ :param highlight: highlight matched terms in fields
+ :param highlight_fields: list of fields to highlight
:return: result QueryResult
"""
@@ -265,6 +274,8 @@ class BloodhoundSearchApi(Component):
facets = facets,
pagenum = pagenum,
pagelen = pagelen,
+ highlight = highlight,
+ highlight_fields = highlight_fields,
)
for query_processor in self.query_processors:
query_processor.query_pre_process(query_parameters)
Modified:
incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py
URL:
http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py?rev=1452190&r1=1452189&r2=1452190&view=diff
==============================================================================
---
incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py
(original)
+++
incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/ticket_search.py
Mon Mar 4 06:09:44 2013
@@ -203,6 +203,7 @@ class TicketSearchParticipant(BaseSearch
else:
stat = res[TicketFields.STATUS]
- id = tag(tag.span('#'+res['id'], class_=css_class))
- return id + ': %s (%s)' % (res['summary'], stat)
+ id = res['hilited_id'] or res['id']
+ id = tag.span('#', id, class_=css_class)
+ return tag(id, ': ', res['hilited_summary'], ' (%s)' % stat)
Modified:
incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py
URL:
http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py?rev=1452190&r1=1452189&r2=1452190&view=diff
==============================================================================
---
incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py
(original)
+++
incubator/bloodhound/trunk/bloodhound_search/bhsearch/search_resources/wiki_search.py
Mon Mar 4 06:09:44 2013
@@ -147,7 +147,8 @@ class WikiSearchParticipant(BaseSearchPa
return "Wiki"
def format_search_results(self, res):
- return u'%s: %s...' % (res['id'], res['content'][:50])
+ title = res['hilited_id'] or res['id']
+ return title
Modified:
incubator/bloodhound/trunk/bloodhound_search/bhsearch/templates/bhsearch.html
URL:
http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/templates/bhsearch.html?rev=1452190&r1=1452189&r2=1452190&view=diff
==============================================================================
---
incubator/bloodhound/trunk/bloodhound_search/bhsearch/templates/bhsearch.html
(original)
+++
incubator/bloodhound/trunk/bloodhound_search/bhsearch/templates/bhsearch.html
Mon Mar 4 06:09:44 2013
@@ -172,20 +172,20 @@
<tr class="${'odd' if idx % 2 else 'even'}
prio${result.priority_value}${
' added' if 'added' in result else ''}${
' changed' if 'changed' in result else ''}${
- ' removed' if 'removed' in result else ''}">
+ ' removed' if 'removed' in result else ''} searchable">
<py:for each="idx, header in enumerate(headers)"
py:choose="">
- <py:with vars="name = header.name; value =
result[name]; title = _('View ')+ result['type']">
+ <py:with vars="name = header.name; value =
result[name]; hilited_value=result['hilited_' + name]; title = _('View ')+
result['type']">
<td py:when="name == 'id'" class="id"><a
href="$result.href" title="${title}"
- class="${classes(closed=result.status ==
'closed')}">#$result.id</a></td>
+ class="${classes(closed=result.status ==
'closed')}">#${result.hilited_id or result.id}</a></td>
<td py:otherwise="" class="$name" py:choose="">
- <a py:when="name == 'summary'"
href="$result.href" title="title">$value</a>
+ <a py:when="name == 'summary'"
href="$result.href" title="title">${hilited_value or value}</a>
<py:when test="isinstance(value,
datetime)">${pretty_dateinfo(value, dateonly=True)}</py:when>
<py:when test="name ==
'reporter'">${authorinfo(value)}</py:when>
<py:when test="name ==
'cc'">${format_emails(ticket_context, value)}</py:when>
<py:when test="name == 'owner' and
value">${authorinfo(value)}</py:when>
<py:when test="name == 'milestone'"><a
py:if="value" title="View milestone"
href="${href.milestone(value)}">${value}</a></py:when>
<!--<py:when
test="header.wikify">${wiki_to_oneliner(ticket_context, value)}</py:when>-->
- <py:otherwise>$value</py:otherwise>
+ <py:otherwise>${hilited_value or
value}</py:otherwise>
</td>
</py:with>
</py:for>
Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/web_ui.py
URL:
http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/web_ui.py?rev=1452190&r1=1452189&r2=1452190&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/web_ui.py
(original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/web_ui.py Mon
Mar 4 06:09:44 2013
@@ -589,6 +589,92 @@ class WebUiTestCaseWithWhoosh(BaseBloodh
self.assertEquals("id, time desc", active_sort["expression"])
self.assertNotIn("sort=", active_sort["href"])
+ def test_that_document_summary_contains_highlighted_search_terms(self):
+ term = "searchterm"
+ long_text = "foo " * 200 + term + " bar" * 100
+ self.insert_wiki("Dummy title", long_text)
+
+ self.req.args[RequestParameters.QUERY] = term
+ data = self.process_request()
+
+ content = str(data["results"].items[0]["hilited_content"])
+ matched_term = '<em>%s</em>' % term
+ self.assertIn(matched_term, content)
+
+ def test_that_only_matched_terms_are_highlighted(self):
+ term = "search_term"
+ self.insert_wiki(term, term)
+
+ self.req.args[RequestParameters.QUERY] = "id:%s" % term
+ data = self.process_request()
+
+ title = str(data["results"].items[0]["title"])
+ content = str(data["results"].items[0]["content"])
+ matched_term = '<em>%s</em>' % term
+ self.assertIn(matched_term, title)
+ self.assertNotIn(matched_term, content)
+
+ def test_that_matched_terms_in_title_are_highlighted(self):
+ term = "search_term"
+ self.insert_wiki(term, 'content')
+ self.insert_ticket(term)
+
+ self.req.args[RequestParameters.QUERY] = term
+ data = self.process_request()
+
+ for row in data["results"].items:
+ title = str(row["title"])
+ matched_term = '<em>%s</em>' % term
+ self.assertIn(matched_term, str(title))
+
+ def test_that_html_tags_are_escaped(self):
+ term = "search_term"
+ content = '%s <b>%s</b>' % (term, term)
+ self.insert_wiki(term, content)
+
+ self.req.args[RequestParameters.QUERY] = "content:%s" % term
+ data = self.process_request()
+
+ content = str(data["results"].items[0]["hilited_content"])
+ matched_term = '<em>%s</em>' % term
+ self.assertIn(matched_term, content)
+ self.assertNotIn('<b>', content)
+ self.assertIn('<b>', content)
+
+ def test_that_id_is_displayed_even_if_it_doesnt_contain_query_terms(self):
+ id, term = "1", "search_term"
+ self.insert_ticket(term, id=id)
+ self.insert_wiki(id, term)
+
+ self.req.args[RequestParameters.QUERY] = term
+ data = self.process_request()
+
+ for row in data["results"].items:
+ title = row["title"]
+ print id, title
+ self.assertIn(id, str(title))
+
+
+ def test_that_id_is_highlighted_in_title(self):
+ self.insert_ticket("some summary")
+ id = "1"
+ self.req.args[RequestParameters.QUERY] = id
+ data = self.process_request()
+ row = data["results"].items[0]
+ title = row["title"]
+ print id, title
+ self.assertIn('<em>%s</em>' % id, str(title))
+
+ def test_that_content_summary_is_trimmed(self):
+ content = "foo " * 1000
+ self.insert_wiki("title", content)
+
+ data = self.process_request()
+
+ for row in data["results"].items:
+ self.assertLess(len(row['content']), 500)
+ self.assertLess(len(row['hilited_content']), 500)
+
def _find_header(self, headers, name):
for header in headers:
if header["name"] == name:
Modified:
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py
URL:
http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py?rev=1452190&r1=1452189&r2=1452190&view=diff
==============================================================================
---
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py
(original)
+++
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py
Mon Mar 4 06:09:44 2013
@@ -337,6 +337,86 @@ class WhooshBackendTestCase(BaseBloodhou
self.print_result(result)
self.assertEqual(0, result.hits)
+ def test_can_highlight_given_terms(self):
+ term = 'search_term'
+ text = "foo foo %s bar bar" % term
+ self.whoosh_backend.add_doc(dict(id="1", type="ticket", content=text))
+ self.whoosh_backend.add_doc(dict(id="3", type="wiki", content=text))
+ search_query = self.parser.parse(term)
+
+ result = self.whoosh_backend.query(
+ search_query,
+ highlight=True,
+ highlight_fields=['content', 'summary']
+ )
+ self.print_result(result)
+
+ self.assertEqual(len(result.highlighting), 2)
+ for highlight in result.highlighting:
+ self.assertIn(self._highlighted(term), highlight['content'])
+ self.assertEquals("", highlight['summary'])
+
+ def test_that_highlighting_escapes_html(self):
+ term = 'search_term'
+ text = "bla <a href=''>%s bar</a> bla" % term
+ self.whoosh_backend.add_doc(dict(id="1", type="ticket", content=text))
+ search_query = self.parser.parse(term)
+
+ result = self.whoosh_backend.query(
+ search_query,
+ highlight=True,
+ highlight_fields=['content']
+ )
+ self.print_result(result)
+
+ self.assertEqual(len(result.highlighting), 1)
+ highlight = result.highlighting[0]
+ self.assertEquals(
+ "bla <a href=''><em>search_term</em> bar</a> bla",
+ highlight['content'])
+
+ def test_highlights_all_text_fields_by_default(self):
+ term = 'search_term'
+ text = "foo foo %s bar bar" % term
+ self.whoosh_backend.add_doc(dict(id="1", type="ticket", content=text))
+ self.whoosh_backend.add_doc(dict(id="3", type="wiki", content=text))
+ search_query = self.parser.parse(term)
+
+ result = self.whoosh_backend.query(
+ search_query,
+ highlight=True,
+ )
+ self.print_result(result)
+
+ self.assertEqual(len(result.highlighting), 2)
+ for highlight in result.highlighting:
+ self.assertIn('content', highlight)
+ self.assertIn('summary', highlight)
+ self.assertIn(self._highlighted(term), highlight['content'])
+
+ def test_only_highlights_terms_in_fields_that_match_query(self):
+ term = 'search_term'
+ self.whoosh_backend.add_doc(dict(id=term, type="wiki", content=term))
+ self.whoosh_backend.add_doc(dict(id=term, type="ticket", summary=term))
+ search_query = self.parser.parse('id:%s' % term)
+
+ result = self.whoosh_backend.query(
+ search_query,
+ highlight=True,
+ highlight_fields=["id", "content", "summary"]
+ )
+ self.print_result(result)
+
+ self.assertEqual(len(result.highlighting), 2)
+ for highlight in result.highlighting:
+ self.assertIn(self._highlighted(term), highlight['id'])
+ self.assertNotIn(self._highlighted(term), highlight['summary'])
+ self.assertNotIn(self._highlighted(term), highlight['content'])
+
+ def _highlighted(self, term):
+ return '<em>%s</em>' % term
+
+
class WhooshFunctionalityTestCase(unittest.TestCase):
def setUp(self):
self.index_dir = tempfile.mkdtemp('whoosh_index')
Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/web_ui.py
URL:
http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/web_ui.py?rev=1452190&r1=1452189&r2=1452190&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/web_ui.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/web_ui.py Mon Mar 4
06:09:44 2013
@@ -20,6 +20,7 @@
r"""Bloodhound Search user interface."""
import copy
+from collections import defaultdict
import pkg_resources
from bhsearch import BHSEARCH_CONFIG_SECTION
@@ -27,6 +28,7 @@ import re
from trac.core import Component, implements, TracError
from genshi.builder import tag
+from genshi import HTML
from trac.perm import IPermissionRequestor
from trac.search import shorten_result
from trac.config import OrderedExtensionsOption, ListOption, Option
@@ -270,7 +272,7 @@ class BloodhoundSearchModule(Component):
self.default_facets
)
- query_result = BloodhoundSearchApi(self.env).query(
+ query_result = BloodhoundSearchApi(self.env).query(
request_context.parameters.query,
pagenum=request_context.page,
pagelen=request_context.pagelen,
@@ -278,6 +280,7 @@ class BloodhoundSearchModule(Component):
fields=request_context.fields,
facets=request_context.facets,
filter=request_context.query_filter,
+ highlight=True,
)
request_context.process_results(query_result)
@@ -578,15 +581,17 @@ class RequestContext(object):
ui_doc = dict(doc)
ui_doc["href"] = self.req.href(doc['type'], doc['id'])
#todo: perform content adaptation here
- if doc.has_key('content'):
+
+ if doc['content']:
ui_doc['content'] = shorten_result(doc['content'])
- if doc.has_key('time'):
+
+ if doc['time']:
ui_doc['date'] = user_time(self.req, format_datetime, doc['time'])
is_free_text_view = self.view is None
if is_free_text_view:
- ui_doc['title'] = self.allowed_participants[
- doc['type']].format_search_results(doc)
+ participant = self.allowed_participants[doc['type']]
+ ui_doc['title'] = participant.format_search_results(doc)
return ui_doc
def _prepare_results(self, result_docs, hits):
@@ -626,7 +631,9 @@ class RequestContext(object):
results.shown_pages = [dict(zip(fields, p)) for p in page_data]
def process_results(self, query_result):
- self._prepare_results(query_result.docs, query_result.hits)
+ docs = self._prepare_docs(query_result.docs,
+ query_result.highlighting)
+ self._prepare_results(docs, query_result.hits)
self._prepare_result_facet_counts(query_result.facets)
self.data[self.DATA_DEBUG] = query_result.debug
self.data[self.DATA_PAGE_HREF] = self.parameters.create_href()
@@ -674,3 +681,19 @@ class RequestContext(object):
self.data[self.DATA_FACET_COUNTS] = facet_counts
+ def _prepare_docs(self, docs, highlights):
+ new_docs = []
+ for doc, highlight in zip(docs, highlights):
+ doc = defaultdict(str, doc)
+ for field in highlight.iterkeys():
+ highlighted_field = 'hilited_%s' % field
+ if highlight[field]:
+ fragment = self._create_genshi_fragment(highlight[field])
+ doc[highlighted_field] = fragment
+ else:
+ doc[highlighted_field] = ''
+ new_docs.append(doc)
+ return new_docs
+
+ def _create_genshi_fragment(self, html_fragment):
+ return tag(HTML(html_fragment))
Modified:
incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py
URL:
http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py?rev=1452190&r1=1452189&r2=1452190&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py
(original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py Mon
Mar 4 06:09:44 2013
@@ -25,12 +25,13 @@ from bhsearch.api import ISearchBackend,
import os
from bhsearch.search_resources.ticket_search import TicketFields
from trac.core import Component, implements, TracError
-from trac.config import Option
+from trac.config import Option, IntOption
from trac.util.text import empty
from trac.util.datefmt import utc
from whoosh.fields import Schema, ID, DATETIME, KEYWORD, TEXT
from whoosh import index
import whoosh
+import whoosh.highlight
from whoosh.writing import AsyncWriter
from datetime import datetime
@@ -68,6 +69,15 @@ class WhooshBackend(Component):
changes=TEXT(),
)
+ max_fragment_size = IntOption('bhsearch', 'max_fragment_size', 240,
+ 'The maximum number of characters allowed in a '
+ 'fragment.')
+
+ fragment_surround = IntOption('bhsearch', 'fragment_surround', 60,
+ 'The number of extra characters of context '
+ 'to add both before the first matched term '
+ 'and after the last matched term.')
+
def __init__(self):
self.index_dir = self.index_dir_setting
if not os.path.isabs(self.index_dir):
@@ -159,7 +169,9 @@ class WhooshBackend(Component):
filter = None,
facets = None,
pagenum = 1,
- pagelen = 20):
+ pagelen = 20,
+ highlight = False,
+ highlight_fields = None):
"""
Perform query.
@@ -174,6 +186,9 @@ class WhooshBackend(Component):
of Whoosh is applied.
"""
with self.index.searcher() as searcher:
+ highlight_fields = self._prepare_highlight_fields(highlight,
+ highlight_fields)
+
sortedby = self._prepare_sortedby(sort)
#TODO: investigate how faceting is applied to multi-value fields
@@ -199,7 +214,10 @@ class WhooshBackend(Component):
self.env.log.debug("Whoosh query to execute: %s",
query_parameters)
raw_page = searcher.search_page(**query_parameters)
- results = self._process_results(raw_page, fields, query_parameters)
+ results = self._process_results(raw_page,
+ fields,
+ highlight_fields,
+ query_parameters)
return results
def _workaround_join_query_and_filter(
@@ -267,10 +285,30 @@ class WhooshBackend(Component):
sortedby.append(sort_condition)
return sortedby
+ def _prepare_highlight_fields(self, highlight, highlight_fields):
+ if not highlight:
+ return ()
+
+ if not highlight_fields:
+ highlight_fields = self._all_highlightable_fields()
+
+ return highlight_fields
+
+ def _all_highlightable_fields(self):
+ return [name for name, field in self.SCHEMA.items()
+ if self._is_highlightable(field)]
+
+ def _is_highlightable(self, field):
+ return not isinstance(field, whoosh.fields.DATETIME) and field.stored
+
def _is_desc(self, order):
return (order.lower()==DESC)
- def _process_results(self, page, fields, search_parameters = None):
+ def _process_results(self,
+ page,
+ fields,
+ highlight_fields,
+ search_parameters=None):
# It's important to grab the hits first before slicing. Otherwise, this
# can cause pagination failures.
"""
@@ -285,10 +323,17 @@ class WhooshBackend(Component):
results.facets = self._load_facets(page)
docs = []
+ highlighting = []
for retrieved_record in page:
result_doc = self._process_record(fields, retrieved_record)
docs.append(result_doc)
+
+ result_highlights = self._create_highlights(highlight_fields,
+ retrieved_record)
+ highlighting.append(result_highlights)
results.docs = docs
+ results.highlighting = highlighting
+
results.debug["search_parameters"] = search_parameters
return results
@@ -332,6 +377,28 @@ class WhooshBackend(Component):
current user."
% self.index_dir)
+ def _create_highlights(self, fields, record):
+ result_highlights = dict()
+ fragmenter = whoosh.highlight.ContextFragmenter(
+ self.max_fragment_size,
+ self.fragment_surround,
+ )
+ highlighter = whoosh.highlight.Highlighter(
+ formatter=WhooshEmFormatter(),
+ fragmenter=fragmenter)
+
+ for field in fields:
+ if field in record:
+ highlighted = highlighter.highlight_hit(record, field)
+ else:
+ highlighted = ''
+ result_highlights[field] = highlighted
+ return result_highlights
+
+
+class WhooshEmFormatter(whoosh.highlight.HtmlFormatter):
+ template = '<em>%(t)s</em>'
+
class WhooshEmptyFacetErrorWorkaround(Component):
"""
Modified:
incubator/bloodhound/trunk/bloodhound_theme/bhtheme/htdocs/bloodhound.css
URL:
http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_theme/bhtheme/htdocs/bloodhound.css?rev=1452190&r1=1452189&r2=1452190&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_theme/bhtheme/htdocs/bloodhound.css
(original)
+++ incubator/bloodhound/trunk/bloodhound_theme/bhtheme/htdocs/bloodhound.css
Mon Mar 4 06:09:44 2013
@@ -430,6 +430,17 @@ h1, h2, h3, h4 {
font-size: 13px;
}
+.search_results dt {
+ font-weight: inherit;
+ text-decoration: underline;
+}
+
+
+.searchable em {
+ font-style: inherit;
+ font-weight: bold;
+}
+
/* @end */
/* @group Admin */