Author: andrej
Date: Fri Mar 8 12:35:14 2013
New Revision: 1454347
URL: http://svn.apache.org/r1454347
Log:
removing stop words from whoosh index - towards #447 (from astaric)
Added:
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/query_parser.py
Modified:
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py
incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py
Added:
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/query_parser.py
URL:
http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/query_parser.py?rev=1454347&view=auto
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/query_parser.py
(added)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/query_parser.py
Fri Mar 8 12:35:14 2013
@@ -0,0 +1,59 @@
+import unittest
+from bhsearch.tests.base import BaseBloodhoundSearchTest
+from bhsearch.query_parser import DefaultQueryParser
+from whoosh.query import terms, nary, wrappers
+
+
+class MetaKeywordsParsingTestCase(BaseBloodhoundSearchTest):
+ def setUp(self):
+ super(MetaKeywordsParsingTestCase, self).setUp()
+ self.parser = DefaultQueryParser(self.env)
+
+ def test_can_parse_keyword_ticket(self):
+ parsed_query = self.parser.parse("$ticket")
+ self.assertEqual(parsed_query, terms.Term('type', 'ticket'))
+
+ def test_can_parse_NOT_keyword_ticket(self):
+ parsed_query = self.parser.parse("NOT $ticket")
+ self.assertEqual(parsed_query,
+ wrappers.Not(
+ terms.Term('type', 'ticket')))
+
+ def test_can_parse_keyword_wiki(self):
+ parsed_query = self.parser.parse("$wiki")
+ self.assertEqual(parsed_query, terms.Term('type', 'wiki'))
+
+ def test_can_parse_keyword_resolved(self):
+ parsed_query = self.parser.parse("$resolved")
+ self.assertEqual(parsed_query,
+ nary.Or([terms.Term('status', 'resolved'),
+ terms.Term('status', 'closed')]))
+
+ def test_can_parse_meta_keywords_that_resolve_to_meta_keywords(self):
+ parsed_query = self.parser.parse("$unresolved")
+ self.assertEqual(parsed_query,
+ wrappers.Not(
+ nary.Or([terms.Term('status', 'resolved'),
+ terms.Term('status', 'closed')])))
+
+ def test_can_parse_complex_query(self):
+ parsed_query = self.parser.parse("content:test $ticket $unresolved")
+
+ self.assertEqual(parsed_query,
+ nary.And([
+ terms.Term('content', 'test'),
+ terms.Term('type', 'ticket'),
+ wrappers.Not(
+ nary.Or([terms.Term('status', 'resolved'),
+ terms.Term('status', 'closed')])
+ )
+ ]))
+
+
+def suite():
+ test_suite = unittest.TestSuite()
+ test_suite.addTest(unittest.makeSuite(MetaKeywordsParsingTestCase, 'test'))
+ return test_suite
+
+if __name__ == '__main__':
+ unittest.main()
Modified:
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py
URL:
http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py?rev=1454347&r1=1454346&r2=1454347&view=diff
==============================================================================
---
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py
(original)
+++
incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py
Fri Mar 8 12:35:14 2013
@@ -31,7 +31,7 @@ from trac.util.datefmt import FixedOffse
from whoosh import index, sorting, query
from whoosh.fields import Schema, ID, TEXT, KEYWORD
from whoosh.qparser import MultifieldPlugin, QueryParser, WhitespacePlugin, \
- PhrasePlugin
+ PhrasePlugin, MultifieldParser
class WhooshBackendTestCase(BaseBloodhoundSearchTest):
@@ -581,6 +581,32 @@ class WhooshFunctionalityTestCase(unitte
results = s.search(query.Every())
self.assertEquals(0, len(results))
+ def test_handles_stop_words_in_queries(self):
+ schema = WhooshBackend.SCHEMA
+ ix = index.create_in(self.index_dir, schema=schema)
+ with ix.writer() as w:
+ w.add_document(content=u"A nice sentence with stop words.")
+
+ with ix.searcher() as s:
+ query = u"with stop"
+
+ # field_names both ignore stop words
+ q = MultifieldParser(['content', 'summary'],
+ WhooshBackend.SCHEMA).parse(query)
+ self.assertEqual(q.simplify(s).__unicode__(),
+ u'((content:with OR summary:with) AND '
+ u'(content:stop OR summary:stop))')
+ self.assertEqual(len(s.search(q)), 1)
+
+ # 'content' and 'id' ignores stop words
+ q = MultifieldParser(['content', 'id'],
+ WhooshBackend.SCHEMA).parse(query)
+ self.assertEqual(q.simplify(s).__unicode__(),
+ u'((content:with OR id:with) AND '
+ u'(content:stop OR id:stop))')
+ self.assertEqual(len(s.search(q)), 1)
+
+
class WhooshEmptyFacetErrorWorkaroundTestCase(BaseBloodhoundSearchTest):
def setUp(self):
super(WhooshEmptyFacetErrorWorkaroundTestCase, self).setUp()
Modified:
incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py
URL:
http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py?rev=1454347&r1=1454346&r2=1454347&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py
(original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py Fri
Mar 8 12:35:14 2013
@@ -29,7 +29,7 @@ from trac.config import Option, IntOptio
from trac.util.text import empty
from trac.util.datefmt import utc
from whoosh.fields import Schema, ID, DATETIME, KEYWORD, TEXT
-from whoosh import index
+from whoosh import index, analysis
import whoosh
import whoosh.highlight
from whoosh.writing import AsyncWriter
@@ -64,9 +64,11 @@ class WhooshBackend(Component):
status=ID(stored=True),
resolution=ID(stored=True),
keywords=KEYWORD(scorable=True),
- summary=TEXT(stored=True),
- content=TEXT(stored=True),
- changes=TEXT(),
+ summary=TEXT(stored=True,
+ analyzer=analysis.StandardAnalyzer(stoplist=None)),
+ content=TEXT(stored=True,
+ analyzer=analysis.StandardAnalyzer(stoplist=None)),
+ changes=TEXT(analyzer=analysis.StandardAnalyzer(stoplist=None)),
)
max_fragment_size = IntOption('bhsearch', 'max_fragment_size', 240,