By default Xapian will join query terms with the same prefix with OR instead
of AND, so searching for multiple labels doesn't return the expected results.
By making use of a parameter to add_boolean_prefix (added in Xapian 1.2) we
can tell Xapian to use OR only for the search terms that are guaranteed to be
unique.

Signed-off-by: Sascha Silbe <sascha-...@silbe.org>
---
 lib/sup/index.rb |   74 +++++++++++++++++++++++++++---------------------------
 1 files changed, 37 insertions(+), 37 deletions(-)

Tested on Debian Squeeze with Ruby 1.8.7.302 and Xapian 1.2.3.

diff --git a/lib/sup/index.rb b/lib/sup/index.rb
index 9273f18..a72bec6 100644
--- a/lib/sup/index.rb
+++ b/lib/sup/index.rb
@@ -419,8 +419,8 @@ EOS
     qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
     qp.default_op = Xapian::Query::OP_AND
     
qp.add_valuerangeprocessor(Xapian::NumberValueRangeProcessor.new(DATE_VALUENO, 
'date:', true))
-    NORMAL_PREFIX.each { |k,vs| vs.each { |v| qp.add_prefix k, v } }
-    BOOLEAN_PREFIX.each { |k,vs| vs.each { |v| qp.add_boolean_prefix k, v } }
+    NORMAL_PREFIX.each { |k,info| info[:prefix].each { |v| qp.add_prefix k, v 
} }
+    BOOLEAN_PREFIX.each { |k,info| info[:prefix].each { |v| 
qp.add_boolean_prefix k, v, info[:exclusive] } }

     begin
       xapian_query = qp.parse_query(subs, 
Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_WILDCARD)
@@ -471,31 +471,31 @@ EOS

   # Stemmed
   NORMAL_PREFIX = {
-    'subject' => 'S',
-    'body' => 'B',
-    'from_name' => 'FN',
-    'to_name' => 'TN',
-    'name' => %w(FN TN),
-    'attachment' => 'A',
-    'email_text' => 'E',
-    '' => %w(S B FN TN A E),
+    'subject' => {:prefix => 'S', :exclusive => false},
+    'body' => {:prefix => 'B', :exclusive => false},
+    'from_name' => {:prefix => 'FN', :exclusive => false},
+    'to_name' => {:prefix => 'TN', :exclusive => false},
+    'name' => {:prefix => %w(FN TN), :exclusive => false},
+    'attachment' => {:prefix => 'A', :exclusive => false},
+    'email_text' => {:prefix => 'E', :exclusive => false},
+    '' => {:prefix => %w(S B FN TN A E), :exclusive => false},
   }

   # Unstemmed
   BOOLEAN_PREFIX = {
-    'type' => 'K',
-    'from_email' => 'FE',
-    'to_email' => 'TE',
-    'email' => %w(FE TE),
-    'date' => 'D',
-    'label' => 'L',
-    'source_id' => 'I',
-    'attachment_extension' => 'O',
-    'msgid' => 'Q',
-    'id' => 'Q',
-    'thread' => 'H',
-    'ref' => 'R',
-    'location' => 'J',
+    'type' => {:prefix => 'K', :exclusive => true},
+    'from_email' => {:prefix => 'FE', :exclusive => false},
+    'to_email' => {:prefix => 'TE', :exclusive => false},
+    'email' => {:prefix => %w(FE TE), :exclusive => false},
+    'date' => {:prefix => 'D', :exclusive => true},
+    'label' => {:prefix => 'L', :exclusive => false},
+    'source_id' => {:prefix => 'I', :exclusive => true},
+    'attachment_extension' => {:prefix => 'O', :exclusive => false},
+    'msgid' => {:prefix => 'Q', :exclusive => true},
+    'id' => {:prefix => 'Q', :exclusive => true},
+    'thread' => {:prefix => 'H', :exclusive => false},
+    'ref' => {:prefix => 'R', :exclusive => false},
+    'location' => {:prefix => 'J', :exclusive => false},
   }

   PREFIX = NORMAL_PREFIX.merge BOOLEAN_PREFIX
@@ -661,8 +661,8 @@ EOS
     # Person names are indexed with several prefixes
     person_termer = lambda do |d|
       lambda do |p|
-        doc.index_text p.name, PREFIX["#{d}_name"] if p.name
-        doc.index_text p.email, PREFIX['email_text']
+        doc.index_text p.name, PREFIX["#{d}_name"][:prefix] if p.name
+        doc.index_text p.email, PREFIX['email_text'][:prefix]
         doc.add_term mkterm(:email, d, p.email)
       end
     end
@@ -673,9 +673,9 @@ EOS
     # Full text search content
     subject_text = m.indexable_subject
     body_text = m.indexable_body
-    doc.index_text subject_text, PREFIX['subject']
-    doc.index_text body_text, PREFIX['body']
-    m.attachments.each { |a| doc.index_text a, PREFIX['attachment'] }
+    doc.index_text subject_text, PREFIX['subject'][:prefix]
+    doc.index_text body_text, PREFIX['body'][:prefix]
+    m.attachments.each { |a| doc.index_text a, PREFIX['attachment'][:prefix] }

     # Miscellaneous terms
     doc.add_term mkterm(:date, m.date) if m.date
@@ -753,25 +753,25 @@ EOS
   def mkterm type, *args
     case type
     when :label
-      PREFIX['label'] + args[0].to_s.downcase
+      PREFIX['label'][:prefix] + args[0].to_s.downcase
     when :type
-      PREFIX['type'] + args[0].to_s.downcase
+      PREFIX['type'][:prefix] + args[0].to_s.downcase
     when :date
-      PREFIX['date'] + args[0].getutc.strftime("%Y%m%d%H%M%S")
+      PREFIX['date'][:prefix] + args[0].getutc.strftime("%Y%m%d%H%M%S")
     when :email
       case args[0]
-      when :from then PREFIX['from_email']
-      when :to then PREFIX['to_email']
+      when :from then PREFIX['from_email'][:prefix]
+      when :to then PREFIX['to_email'][:prefix]
       else raise "Invalid email term type #{args[0]}"
       end + args[1].to_s.downcase
     when :source_id
-      PREFIX['source_id'] + args[0].to_s.downcase
+      PREFIX['source_id'][:prefix] + args[0].to_s.downcase
     when :location
-      PREFIX['location'] + [args[0]].pack('n') + args[1].to_s
+      PREFIX['location'][:prefix] + [args[0]].pack('n') + args[1].to_s
     when :attachment_extension
-      PREFIX['attachment_extension'] + args[0].to_s.downcase
+      PREFIX['attachment_extension'][:prefix] + args[0].to_s.downcase
     when :msgid, :ref, :thread
-      PREFIX[type.to_s] + args[0][0...(MAX_TERM_LENGTH-1)]
+      PREFIX[type.to_s][:prefix] + args[0][0...(MAX_TERM_LENGTH-1)]
     else
       raise "Invalid term type #{type}"
     end
--
1.7.1

_______________________________________________
Sup-devel mailing list
Sup-devel@rubyforge.org
http://rubyforge.org/mailman/listinfo/sup-devel

Reply via email to