Author: norman Date: Tue Jun 7 19:04:38 2011 New Revision: 1133130 URL: http://svn.apache.org/viewvc?rev=1133130&view=rev Log: Add more Base-Subject extracting tests and fix a few bugs in there. See MAILBOX-78
Modified: james/mailbox/trunk/api/src/main/java/org/apache/james/mailbox/SearchQuery.java james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/lucene/LuceneMessageSearchIndex.java james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/search/SearchUtilTest.java Modified: james/mailbox/trunk/api/src/main/java/org/apache/james/mailbox/SearchQuery.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/api/src/main/java/org/apache/james/mailbox/SearchQuery.java?rev=1133130&r1=1133129&r2=1133130&view=diff ============================================================================== --- james/mailbox/trunk/api/src/main/java/org/apache/james/mailbox/SearchQuery.java (original) +++ james/mailbox/trunk/api/src/main/java/org/apache/james/mailbox/SearchQuery.java Tue Jun 7 19:04:38 2011 @@ -102,6 +102,11 @@ public class SearchQuery { To, /** + * + */ + SentDate, + + /** * Uid of the message. This is the DEFAULT if no other is specified */ Uid Modified: james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java?rev=1133130&r1=1133129&r2=1133130&view=diff ============================================================================== --- james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java (original) +++ james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java Tue Jun 7 19:04:38 2011 @@ -19,6 +19,7 @@ package org.apache.james.mailbox.store.search; import java.nio.charset.Charset; +import java.util.Locale; import org.apache.james.mime4j.codec.DecoderUtil; import org.apache.james.mime4j.util.MimeUtil; @@ -29,7 +30,7 @@ public class SearchUtil { private final static String SUBJ_FWD_HDR = "[fwd:"; private final static String SUBJ_FWD_TRL = "]"; private final static String RE = "re"; - private final static String FWD = "FWD"; + private final static String FWD = "fwd"; private final static String FW = "fw"; private final static char WS = ' '; private final static char OPEN_SQUARE_BRACKED = '['; @@ -133,7 +134,7 @@ public class SearchUtil { // base, then remove the prefix text. decodedSubjectLength = decodedSubject.length(); String subj = removeBlob(decodedSubject); - + // check if it will leave a non-empty subject if (subj.length() > 0) { decodedSubject = subj; @@ -150,14 +151,14 @@ public class SearchUtil { break; } } + String lowcaseSubject = decodedSubject.toLowerCase(Locale.US); - if (decodedSubject.startsWith(SUBJ_FWD_HDR) && decodedSubject.endsWith(SUBJ_FWD_TRL)) { + if (lowcaseSubject.startsWith(SUBJ_FWD_HDR) && lowcaseSubject.endsWith(SUBJ_FWD_TRL)) { // (6) If the resulting text begins with the subj-fwd-hdr ABNF and // ends with the subj-fwd-trl ABNF, remove the subj-fwd-hdr and // subj-fwd-trl and repeat from step (2). decodedSubject = decodedSubject.substring(SUBJ_FWD_HDR.length(), decodedSubject.length() - SUBJ_FWD_TRL.length()); decodedSubjectLength = decodedSubject.length(); - } else { break; } @@ -167,13 +168,26 @@ public class SearchUtil { return decodedSubject; } + /** + * Remove the subj-blob + * + * subj-blob = "[" *BLOBCHAR "]" *WSP + * subj-refwd = ("re" / ("fw" ["d"])) *WSP [subj-blob] ":" + * + * BLOBCHAR = %x01-5a / %x5c / %x5e-7f + * ; any CHAR except '[' and ']' + * + * + * @param subject + * @return sub + */ private static String removeSubjectBlob(String subject) { String subj = subject; while(subj.charAt(0) == OPEN_SQUARE_BRACKED) { int length = subj.length(); subj = removeBlob(subject); int i = 0; - if (subj.charAt(i) == CLOSE_SQUARE_BRACKED) { + if (subj.length() > 0 && subj.charAt(i) == CLOSE_SQUARE_BRACKED) { i++; } else { return subject; @@ -182,62 +196,86 @@ public class SearchUtil { i++; } subj = subj.substring(i); - System.out.println(subj); - if (length == subj.length()) { return subj; } } return subj; } + + /** + * Remove the subj-leader + * + * subj-leader = (*subj-blob subj-refwd) / WSP + * subj-blob = "[" *BLOBCHAR "]" *WSP + * subj-refwd = ("re" / ("fw" ["d"])) *WSP [subj-blob] ":" + * + * BLOBCHAR = %x01-5a / %x5c / %x5e-7f + * ; any CHAR except '[' and ']' + * + * + * @param subject + * @return sub + */ private static String removeSubjLeaders(String subject) { - - // subj-leader = (*subj-blob subj-refwd) / WSP - // subj-blob = "[" *BLOBCHAR "]" *WSP - // subj-refwd = ("re" / ("fw" ["d"])) *WSP [subj-blob] ":" - // - // BLOBCHAR = %x01-5a / %x5c / %x5e-7f - // ; any CHAR except '[' and ']' */ - - String subj = removeSubjectBlob(subject); - - int subString = 0; - if (subj.startsWith(RE)) { - subString = RE.length(); - } else if (subj.startsWith(FWD)) { - subString = FWD.length(); - } else if (subj.startsWith(FW)) { - subString = FW.length(); - } else { - return subject; - } - while(subj.charAt(subString) == WS) { + while (subject.charAt(subString) == WS) { subString++; } - subj = removeSubjectBlob(subj.substring(subString)); - if (subj.endsWith(String.valueOf(CLOSE_SQUARE_BRACKED))) { - subString = 1; + if (subString > 0) { + // check if we have matched WSP + return subject.substring(subString); } else { - subString = 0; - } - if (subj.charAt(subString) == COLON) { - subString++; - } else { - return subject; + String subj = removeSubjectBlob(subject); + + String lowCaseSubj = subj.toLowerCase(Locale.US); + if (lowCaseSubj.startsWith(RE)) { + subString = RE.length(); + } else if (lowCaseSubj.startsWith(FWD)) { + subString = FWD.length(); + } else if (lowCaseSubj.startsWith(FW)) { + subString = FW.length(); + } else { + return subject; + } + while (subj.charAt(subString) == WS) { + subString++; + } + + /* + * subj = removeSubjectBlob(subj.substring(subString)); if + * (subj.endsWith(String.valueOf(CLOSE_SQUARE_BRACKED))) { subString + * = 1; } else { subString = 0; } + */ + + if (subj.charAt(subString) == COLON) { + subString++; + } else { + return subject; + } + + while (subj.charAt(subString) == WS) { + subString++; + } + return subj.substring(subString); } - - return subj.substring(subString); } + + /** + * remove the remove_subj_trailers + * + * subj-trailer = "(fwd)" / WSP + * + * + * @param decodedSubject + * * @return sub + */ private static String removeSubTrailers(String decodedSubject) { int subStringStart = 0; int subStringEnd = decodedSubject.length(); - - // remove the remove_subj_trailers - // - // subj-trailer = "(fwd)" / WSP + int originalSize = decodedSubject.length(); int curPos = originalSize -1; while(true) { @@ -255,6 +293,15 @@ public class SearchUtil { return decodedSubject; } + /** + * Remove all blobchars + * + * BLOBCHAR = %x01-5a / %x5c / %x5e-7f + * ; any CHAR except '[' and ']' + * + * @param subject + * @return subj + */ private static String removeBlob(String subject) { int i = 0; char lastChar = Character.UNASSIGNED; @@ -270,6 +317,8 @@ public class SearchUtil { if (lastChar != CLOSE_SQUARE_BRACKED) { return subject; } else { + // the lastChar was a ] so increase the count before substring + i++; return subject.substring(i); } Modified: james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java?rev=1133130&r1=1133129&r2=1133130&view=diff ============================================================================== --- james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java (original) +++ james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java Tue Jun 7 19:04:38 2011 @@ -81,6 +81,8 @@ public class CombinedComparator implemen case Uid: comparator = UidComparator.uid(reverse); break; + case SentDate: + comparator = SentDateComparator.sentDate(reverse); default: break; } Modified: james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/lucene/LuceneMessageSearchIndex.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/lucene/LuceneMessageSearchIndex.java?rev=1133130&r1=1133129&r2=1133130&view=diff ============================================================================== --- james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/lucene/LuceneMessageSearchIndex.java (original) +++ james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/lucene/LuceneMessageSearchIndex.java Tue Jun 7 19:04:38 2011 @@ -56,9 +56,11 @@ import org.apache.james.mailbox.SearchQu import org.apache.james.mailbox.store.mail.model.Mailbox; import org.apache.james.mailbox.store.mail.model.Message; import org.apache.james.mailbox.store.search.MessageSearchIndex; +import org.apache.james.mailbox.store.search.SearchUtil; import org.apache.james.mime4j.MimeException; import org.apache.james.mime4j.descriptor.BodyDescriptor; import org.apache.james.mime4j.field.AddressListField; +import org.apache.james.mime4j.field.DateTimeField; import org.apache.james.mime4j.field.address.Address; import org.apache.james.mime4j.field.address.AddressList; import org.apache.james.mime4j.field.address.Group; @@ -178,6 +180,8 @@ public class LuceneMessageSearchIndex<Id public final static String FIRST_FROM_MAILBOX_NAME_FIELD ="firstFromMailboxName"; + public final static String BASE_SUBJECT_FIELD = "baseSubject"; + /** * {@link Field} which contain the internalDate of the message with YEAR-Resolution */ @@ -252,6 +256,12 @@ public class LuceneMessageSearchIndex<Id private final static SortField ARRIVAL_MAILBOX_SORT = new SortField(INTERNAL_DATE_FIELD_MILLISECOND_RESOLUTION, SortField.LONG); private final static SortField ARRIVAL_MAILBOX_SORT_REVERSE = new SortField(INTERNAL_DATE_FIELD_MILLISECOND_RESOLUTION, SortField.LONG, true); + private final static SortField BASE_SUBJECT_SORT = new SortField(BASE_SUBJECT_FIELD, SortField.STRING); + private final static SortField BASE_SUBJECT_SORT_REVERSE = new SortField(BASE_SUBJECT_FIELD, SortField.STRING, true); + + private final static SortField SENT_DATE_SORT = new SortField(SENT_DATE_FIELD_MILLISECOND_RESOLUTION, SortField.LONG); + private final static SortField SENT_DATE_SORT_REVERSE = new SortField(SENT_DATE_FIELD_MILLISECOND_RESOLUTION, SortField.LONG, true); + public LuceneMessageSearchIndex(Directory directory) throws CorruptIndexException, LockObtainFailedException, IOException { this(directory, true); } @@ -353,7 +363,7 @@ public class LuceneMessageSearchIndex<Id * @param membership * @return document */ - private Document createMessageDocument(Message<?> membership) throws MailboxException{ + private Document createMessageDocument(final Message<?> membership) throws MailboxException{ final Document doc = new Document(); // TODO: Better handling doc.add(new Field(MAILBOX_ID_FIELD, membership.getMailboxId().toString().toLowerCase(Locale.US), Store.YES, Index.NOT_ANALYZED)); @@ -378,13 +388,16 @@ public class LuceneMessageSearchIndex<Id public void headers(Header header) { + Date sentDate = null; + Iterator<org.apache.james.mime4j.parser.Field> fields = header.iterator(); while(fields.hasNext()) { org.apache.james.mime4j.parser.Field f = fields.next(); String headerName = f.getName().toLowerCase(Locale.US); + String headerValue = f.getBody().toLowerCase(Locale.US); String fullValue = f.toString().toLowerCase(Locale.US); doc.add(new Field(HEADERS_FIELD, fullValue, Store.NO, Index.ANALYZED)); - doc.add(new Field(PREFIX_HEADER_FIELD + headerName, f.getBody().toLowerCase(Locale.US) ,Store.NO, Index.ANALYZED)); + doc.add(new Field(PREFIX_HEADER_FIELD + headerName, headerValue, Store.NO, Index.ANALYZED)); // TODO: Handle base subject if (f instanceof AddressListField) { @@ -437,8 +450,18 @@ public class LuceneMessageSearchIndex<Id } } } + } else if (headerName.equalsIgnoreCase("Subject")) { + doc.add(new Field(BASE_SUBJECT_FIELD, SearchUtil.getBaseSubject(headerValue), Store.YES, Index.NOT_ANALYZED)); + } else if (f instanceof DateTimeField) { + sentDate = ((DateTimeField) f).getDate(); } } + if (sentDate == null) { + sentDate = membership.getInternalDate(); + } + doc.add(new NumericField(SENT_DATE_FIELD_MILLISECOND_RESOLUTION,Store.NO, true).setLongValue(DateUtils.truncate(sentDate,Calendar.MILLISECOND).getTime())); + + } /* @@ -727,6 +750,13 @@ public class LuceneMessageSearchIndex<Id sf = ARRIVAL_MAILBOX_SORT; } break; + case SentDate: + if (reverse) { + sf = SENT_DATE_SORT_REVERSE; + } else { + sf = SENT_DATE_SORT; + } + break; case Cc: if (reverse) { sf = FIRST_CC_MAILBOX_SORT_REVERSE; @@ -749,7 +779,11 @@ public class LuceneMessageSearchIndex<Id } break; case Subject: - // TODO: Fix me + if (reverse) { + sf = BASE_SUBJECT_SORT_REVERSE; + } else { + sf = BASE_SUBJECT_SORT; + } break; case To: if (reverse) { @@ -770,7 +804,15 @@ public class LuceneMessageSearchIndex<Id break; } if (sf != null) { + fields.add(sf); + + // Add the uid sort as tie-breaker + if (sf == SENT_DATE_SORT) { + fields.add(UID_SORT); + } else if (sf == SENT_DATE_SORT_REVERSE) { + fields.add(UID_SORT_REVERSE); + } } } sort.setSort(fields.toArray(new SortField[0])); Modified: james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/search/SearchUtilTest.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/search/SearchUtilTest.java?rev=1133130&r1=1133129&r2=1133130&view=diff ============================================================================== --- james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/search/SearchUtilTest.java (original) +++ james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/search/SearchUtilTest.java Tue Jun 7 19:04:38 2011 @@ -1,3 +1,21 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ package org.apache.james.mailbox.store.search; import static org.junit.Assert.*; @@ -31,11 +49,20 @@ public class SearchUtilTest { assertEquals("This is my subject", SearchUtil.getBaseSubject(subject)); } - /* + @Test - public void testRemoveLeaders() { - String subject ="[Blah blub] [go] re: This is my subject"; - assertEquals("This is my subject", SearchUtil.getBaseSubject(subject)); + public void testSimpleExtraction() { + String expectedSubject = "Test"; + assertEquals(expectedSubject, SearchUtil.getBaseSubject("Re: Test")); + assertEquals(expectedSubject, SearchUtil.getBaseSubject("re: Test")); + assertEquals(expectedSubject, SearchUtil.getBaseSubject("Fwd: Test")); + assertEquals(expectedSubject, SearchUtil.getBaseSubject("fwd: Test")); + assertEquals(expectedSubject, SearchUtil.getBaseSubject("Fwd: Re: Test")); + assertEquals(expectedSubject, SearchUtil.getBaseSubject("Fwd: Re: Test (fwd)")); + } + + @Test + public void testComplexExtraction() { + assertEquals("Test", SearchUtil.getBaseSubject("Re: re:re: fwd:[fwd: \t Test] (fwd) (fwd)(fwd) ")); } - */ } --------------------------------------------------------------------- To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org For additional commands, e-mail: server-dev-h...@james.apache.org