Modified: incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/bhsearch/whoosh_backend.py URL: http://svn.apache.org/viewvc/incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/bhsearch/whoosh_backend.py?rev=1440987&r1=1440986&r2=1440987&view=diff ============================================================================== --- incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/bhsearch/whoosh_backend.py (original) +++ incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/bhsearch/whoosh_backend.py Thu Jan 31 14:41:22 2013 @@ -19,18 +19,21 @@ # under the License. r"""Whoosh specific backend for Bloodhound Search plugin.""" -from bhsearch.api import ISearchBackend, DESC, QueryResult, SCORE +from bhsearch.api import ISearchBackend, DESC, QueryResult, SCORE, \ + IDocIndexPreprocessor, IResultPostprocessor, IndexFields, \ + IQueryPreprocessor import os -from trac.core import * +from trac.core import Component, implements, TracError from trac.config import Option +from trac.util.text import empty from trac.util.datefmt import utc -from whoosh.fields import * -from whoosh import index, sorting, query -from whoosh.searching import ResultsPage +from whoosh.fields import Schema, ID, DATETIME, KEYWORD, TEXT +from whoosh import index +import whoosh from whoosh.writing import AsyncWriter from datetime import datetime -UNIQUE_ID = 'unique_id' +UNIQUE_ID = "unique_id" class WhooshBackend(Component): """ @@ -43,19 +46,22 @@ class WhooshBackend(Component): directory of the environment.""") #This is schema prototype. It will be changed later - #TODO: add other fields support, add dynamic field support + #TODO: add other fields support, add dynamic field support. + #Schema must be driven by index participants SCHEMA = Schema( unique_id=ID(stored=True, unique=True), id=ID(stored=True), type=ID(stored=True), product=ID(stored=True), + milestone=ID(stored=True), time=DATETIME(stored=True), + due=DATETIME(stored=True), + completed=DATETIME(stored=True), author=ID(stored=True), - component=KEYWORD(stored=True), - status=KEYWORD(stored=True), - resolution=KEYWORD(stored=True), + component=ID(stored=True), + status=ID(stored=True), + resolution=ID(stored=True), keywords=KEYWORD(scorable=True), - milestone=TEXT(spelling=True), summary=TEXT(stored=True), content=TEXT(stored=True), changes=TEXT(), @@ -65,7 +71,7 @@ class WhooshBackend(Component): self.index_dir = self.index_dir_setting if not os.path.isabs(self.index_dir): self.index_dir = os.path.join(self.env.path, self.index_dir) - self.open_or_create_index_if_missing() + self.index = self._open_or_create_index_if_missing() #ISearchBackend methods def start_operation(self): @@ -80,16 +86,13 @@ class WhooshBackend(Component): The contents should be a dict with fields matching the search schema. The only required fields are type and id, everything else is optional. """ - # Really make sure it's unicode, because Whoosh won't have it any - # other way. is_local_writer = False if writer is None: is_local_writer = True writer = self._create_writer() - for key in doc: - doc[key] = self._to_whoosh_format(doc[key]) - doc["unique_id"] = self._create_unique_id(doc["type"], doc["id"]) + self._reformat_doc(doc) + doc[UNIQUE_ID] = self._create_unique_id(doc["type"], doc["id"]) self.log.debug("Doc to index: %s", doc) try: writer.update_document(**doc) @@ -98,9 +101,24 @@ class WhooshBackend(Component): except: if is_local_writer: writer.cancel() + raise + + def _reformat_doc(self, doc): + """ + Strings must be converted unicode format accepted by Whoosh. + """ + for key, value in doc.items(): + if key is None: + del doc[None] + elif value is None: + del doc[key] + elif isinstance(value, basestring) and value == "": + del doc[key] + else: + doc[key] = self._to_whoosh_format(value) - def delete_doc(self, type, id, writer=None): - unique_id = self._create_unique_id(type, id) + def delete_doc(self, doc_type, doc_id, writer=None): + unique_id = self._create_unique_id(doc_type, doc_id) self.log.debug('Removing document from the index: %s', unique_id) is_local_writer = False if writer is None: @@ -126,7 +144,7 @@ class WhooshBackend(Component): def cancel(self, writer): try: writer.cancel() - except Exception,ex: + except Exception, ex: self.env.log.error("Error during writer cancellation: %s", ex) def recreate_index(self): @@ -134,14 +152,21 @@ class WhooshBackend(Component): self._make_dir_if_not_exists() return index.create_in(self.index_dir, schema=self.SCHEMA) - def open_or_create_index_if_missing(self): + def _open_or_create_index_if_missing(self): if index.exists_in(self.index_dir): - self.index = index.open_dir(self.index_dir) + return index.open_dir(self.index_dir) else: - self.index = self.recreate_index() + return self.recreate_index() - def query(self, query, sort = None, fields = None, boost = None, filter = None, - facets = None, pagenum = 1, pagelen = 20): + def query(self, + query, + sort = None, + fields = None, + boost = None, + filter = None, + facets = None, + pagenum = 1, + pagelen = 20): """ Perform query. @@ -157,28 +182,31 @@ class WhooshBackend(Component): """ with self.index.searcher() as searcher: sortedby = self._prepare_sortedby(sort) + + #TODO: investigate how faceting is applied to multi-value fields + #e.g. keywords. For now, just pass facets lit to Whoosh API + #groupedby = self._prepare_groupedby(facets) groupedby = facets - query_filter = self._prepare_filter(filter) #workaround of Whoosh bug, read method __doc__ query = self._workaround_join_query_and_filter( query, - query_filter) + filter) - search_parameters = dict( + query_parameters = dict( query = query, pagenum = pagenum, pagelen = pagelen, sortedby = sortedby, groupedby = groupedby, - maptype=sorting.Count, + maptype=whoosh.sorting.Count, #workaround of Whoosh bug, read method __doc__ - #filter = query_filter, + #filter = filter, ) self.env.log.debug("Whoosh query to execute: %s", - search_parameters) - raw_page = searcher.search_page(**search_parameters) - results = self._process_results(raw_page, fields, search_parameters) + query_parameters) + raw_page = searcher.search_page(**query_parameters) + results = self._process_results(raw_page, fields, query_parameters) return results def _workaround_join_query_and_filter( @@ -187,21 +215,10 @@ class WhooshBackend(Component): query_filter): if not query_filter: return query_expression - return query.And((query_expression, query_filter)) - - def _prepare_filter(self, filters): - if not filters: - return None - and_filters = [] - for filter in filters: - and_filters.append(query.Term( - unicode(filter[0]), - unicode(filter[1]))) - return query.And(and_filters) - + return whoosh.query.And((query_expression, query_filter)) - def _create_unique_id(self, type, id): - return u"%s:%s" % (type, id) + def _create_unique_id(self, doc_type, doc_id): + return u"%s:%s" % (doc_type, doc_id) def _to_whoosh_format(self, value): if isinstance(value, basestring): @@ -212,9 +229,9 @@ class WhooshBackend(Component): def _convert_date_to_tz_naive_utc(self, value): """Convert datetime to naive utc datetime - Whoosh can not read from index datetime value with + Whoosh can not read from index datetime values passed from Trac with tzinfo=trac.util.datefmt.FixedOffset because of non-empty - constructor""" + constructor of FixedOffset""" if value.tzinfo: utc_time = value.astimezone(utc) value = utc_time.replace(tzinfo=None) @@ -225,13 +242,16 @@ class WhooshBackend(Component): value = utc.localize(value) return value -# def _prepare_groupedby(self, facets): -# if not facets: -# return None -# groupedby = sorting.Facets() -# for facet_name in facets: -# groupedby.add_field(facet_name, allow_overlap=True, maptype=sorting.Count) -# return groupedby + def _prepare_groupedby(self, facets): + if not facets: + return None + groupedby = whoosh.sorting.Facets() + for facet_name in facets: + groupedby.add_field( + facet_name, + allow_overlap=True, + maptype=whoosh.sortingwhoosh.Count) + return groupedby def _prepare_sortedby(self, sort): if not sort: @@ -240,12 +260,15 @@ class WhooshBackend(Component): for (field, order) in sort: if field.lower() == SCORE: if self._is_desc(order): - #We can implement later our own ScoreFacet with + #We can implement tis later by our own ScoreFacet with # "score DESC" support - raise TracError("Whoosh does not support DESC score ordering.") - sort_condition = sorting.ScoreFacet() + raise TracError( + "Whoosh does not support DESC score ordering.") + sort_condition = whoosh.sorting.ScoreFacet() else: - sort_condition = sorting.FieldFacet(field, reverse=self._is_desc(order)) + sort_condition = whoosh.sorting.FieldFacet( + field, + reverse=self._is_desc(order)) sortedby.append(sort_condition) return sortedby @@ -267,7 +290,7 @@ class WhooshBackend(Component): results.facets = self._load_facets(page) docs = [] - for doc_offset, retrieved_record in enumerate(page): + for retrieved_record in page: result_doc = self._process_record(fields, retrieved_record) docs.append(result_doc) results.docs = docs @@ -294,6 +317,7 @@ class WhooshBackend(Component): return result_doc def _load_facets(self, page): + """This method can be also used by unit-tests""" non_paged_results = page.results facet_names = non_paged_results.facet_names() if not facet_names: @@ -313,3 +337,73 @@ class WhooshBackend(Component): current user." % self.index_dir) + +class WhooshEmptyFacetErrorWorkaround(Component): + """ + Whoosh 2.4.1 raises "IndexError: list index out of range" + when search contains facets on field that is missing in at least one + document in the index. The error manifests only when index contains + more than one segment. + + The goal of this class is to temporary solve the problem for + prototype phase. Fro non-prototype phase, the problem should be solved + by the next version of Whoosh. + + Remove this class when fixed version of Whoosh is introduced. + """ + implements(IDocIndexPreprocessor) + implements(IResultPostprocessor) + implements(IQueryPreprocessor) + + NULL_MARKER = u"empty" + + should_not_be_empty_fields = [ + IndexFields.STATUS, + IndexFields.MILESTONE, + IndexFields.COMPONENT, + ] + + #IDocIndexPreprocessor methods + def pre_process(self, doc): + for field in self.should_not_be_empty_fields: + if field not in doc or doc[field] is None or doc[field] == empty: + doc[field] = self.NULL_MARKER + + #IResultPostprocessor methods + def post_process(self, query_result): + #fix facets + if query_result.facets: + for count_dict in query_result.facets.values(): + for field, count in count_dict.iteritems(): + if field == self.NULL_MARKER: + count_dict[None] = count + del count_dict[self.NULL_MARKER] + #we can fix query_result.docs later if needed + + #IQueryPreprocessor methods + def query_pre_process(self, query_parameters): + """ + Go through filter queries and replace "NOT (field_name:*)" query with + "field_name:NULL_MARKER" query. + + This is really quick fix to make prototype working with hope that + the next Whoosh version will be released soon. + """ + if "filter" in query_parameters and query_parameters["filter"]: + self._find_and_fix_condition(query_parameters["filter"]) + if "query" in query_parameters and query_parameters["query"]: + self._find_and_fix_condition(query_parameters["query"]) + + def _find_and_fix_condition(self, filter_condition): + if isinstance(filter_condition, whoosh.query.CompoundQuery): + sub_queries = list(filter_condition.subqueries) + for i, subquery in enumerate(sub_queries): + term_to_replace = self._find_and_fix_condition(subquery) + if term_to_replace: + filter_condition.subqueries[i] = term_to_replace + elif isinstance(filter_condition, whoosh.query.Not): + not_query = filter_condition.query + if isinstance(not_query, whoosh.query.Every) and \ + not_query.fieldname in self.should_not_be_empty_fields: + return whoosh.query.Term(not_query.fieldname, self.NULL_MARKER) + return None
Modified: incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/setup.py URL: http://svn.apache.org/viewvc/incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/setup.py?rev=1440987&r1=1440986&r2=1440987&view=diff ============================================================================== --- incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/setup.py (original) +++ incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/setup.py Thu Jan 31 14:41:22 2013 @@ -109,35 +109,26 @@ PKG_INFO = {'bhsearch' : ('bhsearch', ), } -#ENTRY_POINTS = r""" -# [trac.plugins] -# bhsearch.web_ui = bhsearch.web_ui -# bhsearch.api = bhsearch.api -# bhsearch.admin = bhsearch.admin -# bhsearch.ticket_search = bhsearch.ticket_search -# bhsearch.query_parser = bhsearch.query_parser -# bhsearch.whoosh_backend = bhsearch.whoosh_backend -# """ ENTRY_POINTS = { - 'trac.plugins': [ - 'bhsearch.web_ui = bhsearch.web_ui', - 'bhsearch.api = bhsearch.api', - 'bhsearch.admin = bhsearch.admin', - 'bhsearch.ticket_search = bhsearch.ticket_search', - 'bhsearch.wiki_search = bhsearch.wiki_search', - 'bhsearch.query_parser = bhsearch.query_parser', - 'bhsearch.whoosh_backend = bhsearch.whoosh_backend', - ], + 'trac.plugins': [ + 'bhsearch.web_ui = bhsearch.web_ui', + 'bhsearch.api = bhsearch.api', + 'bhsearch.admin = bhsearch.admin', + 'bhsearch.search_resources.ticket_search =\ + bhsearch.search_resources.ticket_search', + 'bhsearch.search_resources.wiki_search = \ + bhsearch.search_resources.wiki_search', + 'bhsearch.search_resources.milestone_search = \ + bhsearch.search_resources.milestone_search', + 'bhsearch.query_parser = bhsearch.query_parser', + 'bhsearch.whoosh_backend = bhsearch.whoosh_backend', + ], } -#bhsearch.whoosh_backend = bhsearch.whoosh_backend -#bhsearch.ticket_search = bhsearch.ticket_search - setup( name=DIST_NM, version=latest, description=DESC.split('\n', 1)[0], requires = ['trac'], -# tests_require = ['dutest>=0.2.4', 'TracXMLRPC'], install_requires = [ 'setuptools>=0.6b1', 'Trac>=0.11', Modified: incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_theme/bhtheme/theme.py URL: http://svn.apache.org/viewvc/incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_theme/bhtheme/theme.py?rev=1440987&r1=1440986&r2=1440987&view=diff ============================================================================== --- incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_theme/bhtheme/theme.py (original) +++ incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_theme/bhtheme/theme.py Thu Jan 31 14:41:22 2013 @@ -16,6 +16,8 @@ # specific language governing permissions and limitations # under the License. +import sys + from genshi.builder import tag from genshi.core import TEXT from genshi.filters.transform import Transformer @@ -213,6 +215,10 @@ class BloodhoundTheme(ThemeBase): def post_process_request(self, req, template, data, content_type): """Post process request filter. Removes all trac provided css if required""" + + if template is None and data is None and sys.exc_info() == (None, None, None): + return template, data, content_type + def is_active_theme(): is_active = False active_theme = ThemeEngineSystem(self.env).theme
