Author: ahorincar
Date: Thu Aug 7 19:00:56 2014
New Revision: 1616567
URL: http://svn.apache.org/r1616567
Log:
Fixed pagination, fixed wildcard searching, implemented ITemplateStreamFilter
for more_like_this queries, added feature to generate schema
Added:
bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py
bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py
bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py
bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py
Removed:
bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/solr_backend.py
Modified:
bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml
bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py
Added: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py
URL:
http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py?rev=1616567&view=auto
==============================================================================
--- bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py (added)
+++ bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/admin.py Thu Aug
7 19:00:56 2014
@@ -0,0 +1,15 @@
+from trac.core import Component, implements
+from bhsolr.schema import SolrSchema
+from trac.admin import IAdminCommandProvider
+
+class BloodhoundSolrAdmin(Component):
+
+ implements(IAdminCommandProvider)
+
+ # IAdminCommandProvider methods
+ def get_admin_commands(self):
+ yield ('bhsolr generate_schema', '<path>',
+ 'Generate Solr schema',
+ None, SolrSchema(self.env).generate_schema)
+
+
Added: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py
URL:
http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py?rev=1616567&view=auto
==============================================================================
--- bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py (added)
+++ bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/backend.py Thu Aug
7 19:00:56 2014
@@ -0,0 +1,229 @@
+from bhsearch import BHSEARCH_CONFIG_SECTION
+from bhsearch.api import ISearchBackend, SCORE, QueryResult
+from bhsearch.query_parser import DefaultQueryParser
+from bhsearch.search_resources.ticket_search import TicketIndexer
+from trac.core import Component, implements, TracError
+from trac.config import Option
+from trac.ticket.model import Ticket
+from trac.ticket.api import TicketSystem
+from trac.util.datefmt import utc
+from datetime import datetime
+from sunburnt import SolrInterface
+from contextlib import contextmanager
+from math import ceil
+import re
+import pkg_resources
+from bhsolr.schema import SolrSchema
+
+UNIQUE_ID = "unique_id"
+
+HIGHLIGHTABLE_FIELDS = {"unique_id" : True,
+ "id" : True,
+ "type" : True,
+ "product" : True,
+ "milestone" : True,
+ "author" : True,
+ "component" : True,
+ "status" : True,
+ "resolution" : True,
+ "keywords" : True,
+ "summary" : True,
+ "content" : True,
+ "changes" : True,
+ "owner" : True,
+ "repository" : True,
+ "revision" : True,
+ "message" : True,
+ "name" : True}
+
+class SolrBackend(Component):
+ implements(ISearchBackend)
+
+ server_url = Option(
+ BHSEARCH_CONFIG_SECTION,
+ 'solr_server_url',
+ doc="""Url of the server running Solr instance.""",
+ doc_domain='bhsearch')
+
+
+ def __init__(self):
+ resource_filename = pkg_resources.resource_filename
+ path = resource_filename(__name__, "schemadoc")
+ file_obj = open(path + "/schema.xml")
+ # print SolrSchema(self.env).getInstance(self.env).path
+ # file_obj = open(SolrSchema.getInstance(self.env).path)
+ self.solr_interface = SolrInterface(str(self.server_url),
schemadoc=file_obj)
+
+ def add_doc(self, doc, operation_context=None):
+ self._reformat_doc(doc)
+ doc[UNIQUE_ID] = self._create_unique_id(doc.get("product", ''),
+ doc["type"],
+ doc["id"])
+ self.solr_interface.add(doc)
+ self.solr_interface.commit()
+
+
+ def delete_doc(product, doc_type, doc_id, operation_context=None):
+ unique_id = self._create_unique_id(product, doc_type, doc_id)
+ self.solr_interface.delete(unique_id)
+
+
+ def optimize(self):
+ self.solr_interface.optimize()
+
+ def query(self, query, query_string, sort = None, fields = None, filter =
None,
+ facets = None, pagenum = 1, pagelen = 20, highlight = False,
+ highlight_fields = None, context = None):
+
+ if not query_string:
+ query_string = "*.*"
+
+ final_query_chain = self._create_query_chain(query, query_string)
+ solr_query = self.solr_interface.query(final_query_chain)
+ faceted_solr_query = solr_query.facet_by(facets)
+ self.highlighted_solr_query =
faceted_solr_query.highlight(HIGHLIGHTABLE_FIELDS)
+
+ start = 0 if pagenum == 1 else pagelen * pagenum
+
+ paginated_solr_query = self.highlighted_solr_query.paginate(start=start,
rows=pagelen)
+ results = paginated_solr_query.execute()
+ mlt = self.query_more_like_this(paginated_solr_query, fields="type",
mindf=1, mintf=1)
+
+ return self._create_query_result(results, fields, pagenum, pagelen, mlt)
+
+ def query_more_like_this(self, query_chain, **kwargs):
+ mlt_results = query_chain.mlt(**kwargs).execute().more_like_these
+ return mlt_results
+
+ def _create_query_result(self, results, fields, pagenum, pagelen, mlt):
+ total_num, total_page_count, page_num, offset = \
+ self._prepare_query_result_attributes(results, pagenum,
pagelen)
+
+ query_results = QueryResult()
+ query_results.hits = total_num
+ query_results.total_page_count = total_page_count
+ query_results.page_number = page_num
+ query_results.offset = offset
+
+ docs = []
+ highlighting = []
+
+ for retrieved_record in results:
+ result_doc = self._process_record(fields, retrieved_record, mlt)
+ docs.append(result_doc)
+
+ result_highlights = dict(retrieved_record['solr_highlights'])
+
+ highlighting.append(result_highlights)
+ query_results.docs = docs
+ query_results.highlighting = highlighting
+
+ return query_results
+
+ def _create_query_chain(self, query, query_string):
+ matches = re.findall(re.compile(r'([\w\*]+)'), query_string)
+ tokens = set([match for match in matches])
+
+ final_query_chain = None
+ for token in tokens:
+ token_query_chain = self._search_fields_for_token(token)
+ if final_query_chain == None:
+ final_query_chain = token_query_chain
+ else:
+ final_query_chain |= token_query_chain
+
+ return final_query_chain
+
+
+ def _process_record(self, fields, retrieved_record, mlt):
+ result_doc = dict()
+ if fields:
+ for field in fields:
+ if field in retrieved_record:
+ result_doc[field] = retrieved_record[field]
+ else:
+ for key, value in retrieved_record.items():
+ result_doc[key] = value
+
+ for key, value in result_doc.iteritems():
+ result_doc[key] = self._from_whoosh_format(value)
+
+ return result_doc
+
+ def _from_whoosh_format(self, value):
+ if isinstance(value, datetime):
+ value = utc.localize(value)
+ return value
+
+ def _prepare_query_result_attributes(self, results, pagenum, pagelen):
+ results_total_num = self.highlighted_solr_query.execute().result.numFound
+ total_page_count = int(ceil(results_total_num / pagelen))
+ pagenum = min(total_page_count, pagenum)
+
+ offset = (pagenum - 1) * pagelen
+ if (offset + pagelen) > results_total_num:
+ pagelen = results_total_num - offset
+
+ return results_total_num, total_page_count, pagenum, offset
+
+ def is_index_outdated(self):
+ return False
+
+ def recreate_index(self):
+ return True
+
+ @contextmanager
+ def start_operation(self):
+ yield
+
+ def _search_fields_for_token(self, token):
+ query_chain = None
+ field_boosts = DefaultQueryParser(self.env).field_boosts
+
+ for field, boost in field_boosts.iteritems():
+ if field != 'query_suggestion_basket' and field != 'relations':
+ field_token_dict = {field: token}
+ if query_chain == None:
+ query_chain = self.solr_interface.Q(**field_token_dict)**boost
+ else:
+ query_chain |= self.solr_interface.Q(**field_token_dict)**boost
+
+ return query_chain
+
+ def _reformat_doc(self, doc):
+ for key, value in doc.items():
+ if key is None:
+ del doc[None]
+ elif value is None:
+ del doc[key]
+ elif isinstance(value, basestring) and value == "":
+ del doc[key]
+ else:
+ doc[key] = self._to_whoosh_format(value)
+
+ def _to_whoosh_format(self, value):
+ if isinstance(value, basestring):
+ value = unicode(value)
+ elif isinstance(value, datetime):
+ value = self._convert_date_to_tz_naive_utc(value)
+ return value
+
+
+ def _convert_date_to_tz_naive_utc(self, value):
+ if value.tzinfo:
+ utc_time = value.astimezone(utc)
+ value = utc_time.replace(tzinfo=None)
+ return value
+
+
+ def _create_unique_id(self, product, doc_type, doc_id):
+ if product:
+ return u"%s:%s:%s" % (product, doc_type, doc_id)
+ else:
+ return u"%s:%s" % (doc_type, doc_id)
+
+ def getInstance(self):
+ return self.solr_interface
+
+
+
Added: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py
URL:
http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py?rev=1616567&view=auto
==============================================================================
--- bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py (added)
+++ bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schema.py Thu Aug
7 19:00:56 2014
@@ -0,0 +1,169 @@
+from lxml import etree
+from bhsearch.whoosh_backend import WhooshBackend
+from trac.core import Component, implements, TracError
+import os
+
+class SolrSchema(Component):
+ instance = None
+
+ REQUIRED_FIELDS = {"id": True,
+ "unique_id": True,
+ "type": True}
+
+ FIELDS_TYPE_DICT = {"ID": "string",
+ "DATETIME": "date",
+ "KEYWORD": "string",
+ "TEXT": "text_general"
+ }
+
+ def __init__(self):
+ self.schema = WhooshBackend.SCHEMA
+ self.schema_element = etree.Element("schema")
+ self.schema_element.set("name", "Bloodhound Solr Schema")
+ self.schema_element.set("version", "1")
+
+ self.path = None
+ self.fields_element = etree.SubElement(self.schema_element, "fields")
+ self.unique_key_element = etree.SubElement(self.schema_element,
"uniqueKey")
+ self.unique_key_element.text = "unique_id"
+
+ version_field = etree.SubElement(self.fields_element, "field")
+ version_field.set("name", "_version_")
+ version_field.set("type", "long")
+ version_field.set("indexed", "true")
+ version_field.set("stored", "true")
+
+ root_field = etree.SubElement(self.fields_element, "field")
+ root_field.set("name", "_root_")
+ root_field.set("type", "string")
+ root_field.set("indexed", "true")
+ root_field.set("stored", "false")
+
+ stored_name = etree.SubElement(self.fields_element, "field")
+ stored_name.set("name", "_stored_name")
+ stored_name.set("type", "string")
+ stored_name.set("indexed", "true")
+ stored_name.set("stored", "true")
+ stored_name.set("required", "false")
+ stored_name.set("multivalued", "false")
+
+ # @classmethod
+ # def getInstance(self, env):
+ # if not self.instance:
+ # self.instance = SolrSchema(env)
+ # return self.instance
+
+ def generate_schema(self, path=None):
+ if not path:
+ path = os.getcwd()
+
+ self.add_all_fields()
+ self.add_type_definitions()
+ doc = etree.ElementTree(self.schema_element)
+
+ self.path = os.path.join(path, 'schema.xml')
+
+ out_file = open(os.path.join(path, 'schema.xml'), 'w')
+ doc.write(out_file, xml_declaration=True, encoding='UTF-8',
pretty_print=True)
+ out_file.close()
+
+ def add_field(self, field_name, name_attr, type_attr, indexed_attr,
stored_attr, required_attr, multivalued_attr):
+ field = etree.SubElement(self.fields_element, field_name)
+ field.set("name", name_attr)
+ field.set("type", type_attr)
+ field.set("indexed", indexed_attr)
+ field.set("stored", stored_attr)
+ field.set("required", required_attr)
+ field.set("multivalued", multivalued_attr)
+
+ def add_all_fields(self):
+ for (field_name, field_attrs) in self.schema.items():
+ type_attr =
SolrSchema.FIELDS_TYPE_DICT[str(field_attrs.__class__.__name__)]
+ indexed_attr = str(field_attrs.indexed).lower()
+ stored_attr = str(field_attrs.stored).lower()
+ if field_name in SolrSchema.REQUIRED_FIELDS:
+ required_attr = "true"
+ else:
+ required_attr = "false"
+
+ self.add_field("field", field_name, type_attr, indexed_attr,
stored_attr, required_attr, "false")
+
+
+ def add_type_definitions(self):
+ self.types_element = etree.SubElement(self.schema_element, "types")
+ self._add_string_type_definition()
+ self._add_text_general_type_definition()
+ self._add_date_type_definition()
+ self._add_long_type_definition()
+ self._add_lowercase_type_definition()
+
+
+ def _add_string_type_definition(self):
+ field_type = etree.SubElement(self.types_element, "fieldType")
+ field_type.set("name", "string")
+ field_type.set("class", "solr.StrField")
+ field_type.set("sortMissingLast", "true")
+
+
+ def _add_text_general_type_definition(self):
+ field_type = etree.SubElement(self.types_element, "fieldType")
+ field_type.set("name", "text_general")
+ field_type.set("class", "solr.TextField")
+ field_type.set("positionIncrementGap", "100")
+
+ analyzer_index = etree.SubElement(field_type, "analyzer")
+ analyzer_index.set("type", "index")
+
+ tokenizer_index = etree.SubElement(analyzer_index, "tokenizer")
+ tokenizer_index.set("class", "solr.StandardTokenizerFactory")
+ filter1 = etree.SubElement(analyzer_index, "filter")
+ filter1.set("class", "solr.StopFilterFactory")
+ filter1.set("ignoreCase", "true")
+ filter1.set("words", "stopwords.txt")
+
+ filter2 = etree.SubElement(analyzer_index, "filter")
+ filter2.set("class", "solr.LowerCaseFilterFactory")
+
+ analyzer_query = etree.SubElement(field_type, "analyzer")
+ analyzer_query.set("type", "query")
+ tokenizer_query = etree.SubElement(analyzer_query, "tokenizer")
+ tokenizer_query.set("class", "solr.StandardTokenizerFactory")
+ filter3 = etree.SubElement(analyzer_query, "filter")
+ filter3.set("class", "solr.StopFilterFactory")
+ filter3.set("ignoreCase", "true")
+ filter3.set("words", "stopwords.txt")
+
+ filter4 = etree.SubElement(analyzer_query, "filter")
+ filter4.set("class", "solr.SynonymFilterFactory")
+ filter4.set("synonyms", "synonyms.txt")
+ filter4.set("ignoreCase", "true")
+ filter4.set("expand", "true")
+
+ filter5 = etree.SubElement(analyzer_query, "filter")
+ filter5.set("class", "solr.LowerCaseFilterFactory")
+
+ def _add_date_type_definition(self):
+ field_type = etree.SubElement(self.types_element, "fieldType")
+ field_type.set("name", "date")
+ field_type.set("class", "solr.TrieDateField")
+ field_type.set("precisionStep", "0")
+ field_type.set("positionIncrementGap", "0")
+
+ def _add_long_type_definition(self):
+ field_type = etree.SubElement(self.types_element, "fieldType")
+ field_type.set("name", "long")
+ field_type.set("class", "solr.TrieLongField")
+ field_type.set("precisionStep", "0")
+ field_type.set("positionIncrementGap", "0")
+
+ def _add_lowercase_type_definition(self):
+ field_type = etree.SubElement(self.types_element, "fieldType")
+ field_type.set("name", "lowercase")
+ field_type.set("class", "solr.TextField")
+ field_type.set("positionIncrementGap", "100")
+
+ analyzer = etree.SubElement(field_type, "analyzer")
+ tokenizer = etree.SubElement(analyzer, "tokenizer")
+ tokenizer.set("class", "solr.KeywordTokenizerFactory")
+ filter_lowercase = etree.SubElement(analyzer, "filter")
+ filter_lowercase.set("class", "solr.LowerCaseFilterFactory")
Modified:
bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml
URL:
http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml?rev=1616567&r1=1616566&r2=1616567&view=diff
==============================================================================
---
bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml
(original)
+++
bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/schemadoc/schema.xml
Thu Aug 7 19:00:56 2014
@@ -8,8 +8,8 @@
<field name="_root_" type="string" indexed="true" stored="false"/>
<!-- BH fields -->
- <field name="id" type="string" indexed="true" stored="true" required="true"
multiValued="false" />
<field name="unique_id" type="string" indexed="true" stored="true"
required="true" multiValued="false" />
+ <field name="id" type="string" indexed="true" stored="true" required="true"
multiValued="false" />
<field name="type" type="string" indexed="true" stored="true"
required="true" multiValued="false"/>
<field name="product" type="string" indexed="true" stored="true"
required="false" multiValued="false"/>
<field name="milestone" type="string" indexed="true" stored="true"
required="false" multiValued="false"/>
@@ -31,14 +31,12 @@
<field name="required_permission" type="string" indexed="true" stored="true"
required="false" multiValued="false"/>
<field name="name" type="text_general" indexed="true" stored="true"
required="false" multiValued="false"/>
<field name="_stored_name" type="string" indexed="true" stored="true"
required="false" multiValued="false"/>
- <!-- <field name="query_suggestion_basket" type="text_general"
indexed="true" stored="true" required="true" multiValued="false"/> -->
- <!-- <field name="relations" type="lowercase" indexed="true" stored="true"
required="true" multiValued="false"/> -->
+ <field name="relations" type="lowercase" indexed="true" stored="true"
required="true" multiValued="false"/>
+ <field name="query_suggestion_basket" type="text_general" indexed="true"
stored="true" required="true" multiValued="false"/>
</fields>
<uniqueKey>unique_id</uniqueKey>
-<!-- <copyField source="name" dest="text"/> -->
-
<types>
<!-- Field type definitions -->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
@@ -69,4 +67,3 @@
</types>
</schema>
-
Added: bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py
URL:
http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py?rev=1616567&view=auto
==============================================================================
--- bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py (added)
+++ bloodhound/branches/bep_0014_solr/bloodhound_solr/bhsolr/web_ui.py Thu Aug
7 19:00:56 2014
@@ -0,0 +1,17 @@
+from trac.web.api import ITemplateStreamFilter
+from genshi.filters import Transformer
+import re
+from trac.core import Component, implements, TracError
+from genshi.input import HTML
+
+class BloodhoundSolrTemplate(Component):
+ implements (ITemplateStreamFilter)
+
+ def filter_stream(self, req, method, filename, stream, data):
+ html = HTML(u'''<br></br><a href="porc" class="btn" style="margin: 10px
10px 10px 0px;">More like this</a>''')
+
+ if re.match(r'/bhsearch', req.path_info):
+ filter = Transformer('//dl[@id="results"]/dd/span[@class="date"]')
+ stream |= filter.append(html)
+
+ return stream
Modified: bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py
URL:
http://svn.apache.org/viewvc/bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py?rev=1616567&r1=1616566&r2=1616567&view=diff
==============================================================================
--- bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py (original)
+++ bloodhound/branches/bep_0014_solr/bloodhound_solr/setup.py Thu Aug 7
19:00:56 2014
@@ -8,9 +8,10 @@ PKG_INFO = {'bhsolr': ['schemadoc/*.xml'
ENTRY_POINTS = {
'trac.plugins': [
- 'bhsolr.api = bhsolr.api',
'bhsolr.admin = bhsolr.admin',
- 'bhsolr.solr_backend = bhsolr.solr_backend',
+ 'bhsolr.schema = bhsolr.schema',
+ 'bhsolr.backend = bhsolr.backend',
+ 'bhsolr.web_ui = bhsolr.web_ui',
'bhsolr.search_resources.ticket_search =
bhsolr.search_resources.ticket_search',
'bhsolr.search_resources.milestone_search =
bhsolr.search_resources.milestone_search',
'bhsolr.search_resources.changeset_search =
bhsolr.search_resources.changeset_search',