Author: tommaso
Date: Fri Nov 13 15:54:04 2015
New Revision: 1714227

URL: http://svn.apache.org/viewvc?rev=1714227&view=rev
Log:
OAK-3580 - use indexes for excerpt, backport to 1.2

Modified:
    jackrabbit/oak/branches/1.2/   (props changed)
    
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
    
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
    jackrabbit/oak/branches/1.2/oak-lucene/pom.xml
    
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
    
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
    jackrabbit/oak/branches/1.2/oak-solr-core/pom.xml
    
jackrabbit/oak/branches/1.2/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java
    
jackrabbit/oak/branches/1.2/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
    
jackrabbit/oak/branches/1.2/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
    
jackrabbit/oak/branches/1.2/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java

Propchange: jackrabbit/oak/branches/1.2/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Nov 13 15:54:04 2015
@@ -1,3 +1,3 @@
 /jackrabbit/oak/branches/1.0:1665962
-/jackrabbit/oak/trunk:1672350,1672468,1672537,1672603,1672642,1672644,1672834-1672835,1673351,1673410,1673414-1673415,1673436,1673644,1673662-1673664,1673669,1673695,1673713,1673738,1673787,1673791,1674046,1674065,1674075,1674107,1674228,1674780,1674880,1675054-1675055,1675319,1675332,1675354,1675357,1675382,1675555,1675566,1675593,1676198,1676237,1676407,1676458,1676539,1676670,1676693,1676703,1676725,1677579,1677581,1677609,1677611,1677774,1677788,1677797,1677804,1677806,1677939,1677991,1678023,1678095-1678096,1678124,1678171,1678173,1678211,1678323,1678758,1678938,1678954,1679144,1679165,1679191,1679232,1679235,1679503,1679958,1679961,1680170,1680172,1680182,1680222,1680232,1680236,1680461,1680633,1680643,1680747,1680805-1680806,1680903,1681282,1681767,1681918,1681955,1682042,1682218,1682235,1682437,1682494,1682555,1682855,1682904,1683059,1683089,1683213,1683249,1683259,1683278,1683323,1683687,1683700,1684174-1684175,1684186,1684376,1684442,1684561,1684570,1684601,1684618,1684820
 
,1684868,1685023,1685075,1685370,1685552,1685589-1685590,1685840,1685964,1685977,1685989,1685999,1686023,1686032,1686097,1686162,1686229,1686234,1686253,1686414,1686780,1686854,1686857,1686971,1687053-1687055,1687175,1687196,1687198,1687220,1687239-1687240,1687301,1687441,1687553,1688089-1688090,1688172,1688179,1688349,1688421,1688436,1688453,1688616,1688622,1688634,1688636,1688817,1689003-1689004,1689008,1689577,1689581,1689623,1689810,1689828,1689831,1689833,1689903,1690017,1690043,1690047,1690057,1690247,1690249,1690634-1690637,1690650,1690669,1690674,1690885,1690941,1691139,1691151,1691159,1691167,1691183,1691188,1691210,1691280,1691307,1691331-1691333,1691345,1691384-1691385,1691401,1691509,1692133-1692134,1692156,1692250,1692274,1692363,1692382,1692478,1692955,1693002,1693030,1693209,1693421,1693525-1693526,1694007,1694393-1694394,1694651,1694653-1694654,1695032,1695050,1695122,1695280,1695299,1695420,1695457,1695482,1695492,1695507,1695521,1695540,1695905,1696190,1696194,1696
 
242,1696285,1696375,1696522,1696578,1696759,1696916,1697363,1697373,1697410,1697582,1697589,1697616,1697672,1700191,1700231,1700397,1700403,1700506,1700571,1700727,1700749,1700769,1700775,1701065,1701619,1701733,1701743,1701750,1701768,1701806,1701810,1701814,1701948,1701955,1701959,1701965,1701986,1702014,1702022,1702045,1702051,1702241,1702272,1702387,1702405,1702423,1702860,1702942,1702960,1703212,1703382,1703395,1703411,1703428,1703430,1703568,1703592,1703758,1703858,1703878,1704256,1704282,1704285,1704457,1704479,1704490,1704614,1704629,1704636,1704655,1704670,1704886,1705005,1705027,1705043,1705055,1705250,1705268,1705273,1705323,1705677,1705701,1705871,1705992,1705998,1706009,1706037,1706059,1706212,1706218,1706270,1706764,1706772,1707049,1707191,1707435,1708105,1708315,1708546,1708592,1708766,1709012,1709852,1709978,1710013,1710031,1710049,1710205,1710242,1710559,1710575,1710590,1710614,1710637,1710789,1710811,1710816,1710972,1711248,1711282,1711296,1711498,1712042,1712319,1
 
712490,1712531,1712730,1712785,1712963,1713008,1713439,1713461,1713580,1713586,1713599-1713600,1713626,1713698,1713803,1713809,1714034,1714084,1714213
+/jackrabbit/oak/trunk:1672350,1672468,1672537,1672603,1672642,1672644,1672834-1672835,1673351,1673410,1673414-1673415,1673436,1673644,1673662-1673664,1673669,1673695,1673713,1673738,1673787,1673791,1674046,1674065,1674075,1674107,1674228,1674780,1674880,1675054-1675055,1675319,1675332,1675354,1675357,1675382,1675555,1675566,1675593,1676198,1676237,1676407,1676458,1676539,1676670,1676693,1676703,1676725,1677579,1677581,1677609,1677611,1677774,1677788,1677797,1677804,1677806,1677939,1677991,1678023,1678095-1678096,1678124,1678171,1678173,1678211,1678323,1678758,1678938,1678954,1679144,1679165,1679191,1679232,1679235,1679503,1679958,1679961,1680170,1680172,1680182,1680222,1680232,1680236,1680461,1680633,1680643,1680747,1680805-1680806,1680903,1681282,1681767,1681918,1681955,1682042,1682218,1682235,1682437,1682494,1682555,1682855,1682904,1683059,1683089,1683213,1683249,1683259,1683278,1683323,1683687,1683700,1684174-1684175,1684186,1684376,1684442,1684561,1684570,1684601,1684618,1684820
 
,1684868,1685023,1685075,1685370,1685552,1685589-1685590,1685840,1685964,1685977,1685989,1685999,1686023,1686032,1686097,1686162,1686229,1686234,1686253,1686414,1686780,1686854,1686857,1686971,1687053-1687055,1687175,1687196,1687198,1687220,1687239-1687240,1687301,1687441,1687553,1688089-1688090,1688172,1688179,1688349,1688421,1688436,1688453,1688616,1688622,1688634,1688636,1688817,1689003-1689004,1689008,1689577,1689581,1689623,1689810,1689828,1689831,1689833,1689903,1690017,1690043,1690047,1690057,1690247,1690249,1690634-1690637,1690650,1690669,1690674,1690885,1690941,1691139,1691151,1691159,1691167,1691183,1691188,1691210,1691280,1691307,1691331-1691333,1691345,1691384-1691385,1691401,1691509,1692133-1692134,1692156,1692250,1692274,1692363,1692382,1692478,1692955,1693002,1693030,1693209,1693421,1693525-1693526,1694007,1694393-1694394,1694651,1694653-1694654,1695032,1695050,1695122,1695280,1695299,1695420,1695457,1695482,1695492,1695507,1695521,1695540,1695905,1696190,1696194,1696
 
242,1696285,1696375,1696522,1696578,1696759,1696916,1697363,1697373,1697410,1697582,1697589,1697616,1697672,1700191,1700231,1700397,1700403,1700506,1700571,1700727,1700749,1700769,1700775,1701065,1701619,1701733,1701743,1701750,1701768,1701806,1701810,1701814,1701948,1701955,1701959,1701965,1701986,1702014,1702022,1702045,1702051,1702241,1702272,1702387,1702405,1702423,1702860,1702942,1702960,1703212,1703382,1703395,1703411,1703428,1703430,1703568,1703592,1703758,1703858,1703878,1704256,1704282,1704285,1704457,1704479,1704490,1704614,1704629,1704636,1704655,1704670,1704886,1705005,1705027,1705043,1705055,1705250,1705268,1705273,1705323,1705677,1705701,1705871,1705992,1705998,1706009,1706037,1706059,1706212,1706218,1706270,1706764,1706772,1707049,1707191,1707435,1708105,1708315,1708546,1708592,1708766,1709012,1709852,1709978,1710013,1710031,1710049,1710205,1710242,1710559,1710575,1710590,1710614,1710637,1710789,1710811,1710816,1710972,1711248,1711282,1711296,1711498,1712042,1712319,1
 
712490,1712531,1712730,1712785,1712963,1713008,1713439,1713461,1713580,1713586,1713599-1713600,1713626,1713698,1713803,1713809,1714034,1714061,1714084,1714170,1714213
 /jackrabbit/trunk:1345480

Modified: 
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java?rev=1714227&r1=1714226&r2=1714227&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
 Fri Nov 13 15:54:04 2015
@@ -35,18 +35,18 @@ public class ResultRowImpl implements Re
 
     private final Query query;
     private final Tree[] trees;
-    
+
     /**
      * The column values.
      */
     private final PropertyValue[] values;
-    
+
     /**
      * Whether the value at the given index is used for comparing rows (used
      * within hashCode and equals). If null, all columns are distinct.
      */
     private final boolean[] distinctValues;
-    
+
     /**
      * The values used for ordering.
      */
@@ -59,7 +59,7 @@ public class ResultRowImpl implements Re
         this.distinctValues = distinctValues;
         this.orderValues = orderValues;
     }
-    
+
     PropertyValue[] getOrderValues() {
         return orderValues;
     }
@@ -107,19 +107,35 @@ public class ResultRowImpl implements Re
         }
         // OAK-318:
         // somebody might call rep:excerpt(text)
-        // even thought the query doesn't contain that column
+        // even though the query doesn't contain that column
         if (columnName.startsWith(QueryImpl.REP_EXCERPT)) {
-            // missing excerpt, generate a default value
-            String ex = SimpleExcerptProvider.getExcerpt(getPath(), columnName,
-                    query, true);
-            if (ex != null) {
-                return PropertyValues.newString(ex);
+            int columnIndex = query.getColumnIndex(QueryImpl.REP_EXCERPT);
+            if (columnIndex >= 0 && QueryImpl.REP_EXCERPT.equals(columnName) 
|| SimpleExcerptProvider.REP_EXCERPT_FN.
+                    equals(columnName)) {
+                // TODO : make it possible to extract property level excerpts, 
e.g. rep:excerpt(text) from indexes
+                PropertyValue value = values[columnIndex];
+                if (value != null) {
+                    return SimpleExcerptProvider.getExcerpt(value);
+                } else {
+                    return getFallbackExcerpt(columnName);
+                }
+            } else {
+                // missing excerpt, generate a default value
+                return getFallbackExcerpt(columnName);
             }
-            return PropertyValues.newString(getPath());
         }
         throw new IllegalArgumentException("Column not found: " + columnName);
     }
 
+    private PropertyValue getFallbackExcerpt(String columnName) {
+        String ex = SimpleExcerptProvider.getExcerpt(getPath(), columnName,
+                query, true);
+        if (ex != null) {
+            return PropertyValues.newString(ex);
+        }
+        return PropertyValues.newString(getPath());
+    }
+
     @Override
     public PropertyValue[] getValues() {
         PropertyValue[] v2 = new PropertyValue[values.length];
@@ -146,7 +162,7 @@ public class ResultRowImpl implements Re
         }
         return buff.toString();
     }
-    
+
 
     @Override
     public int hashCode() {
@@ -155,7 +171,7 @@ public class ResultRowImpl implements Re
         result = 31 * result + hashCodeOfValues();
         return result;
     }
-    
+
     private int hashCodeOfValues() {
         int result = 1;
         for (int i = 0; i < values.length; i++) {
@@ -249,4 +265,4 @@ public class ResultRowImpl implements Re
 
     }
 
-}
+}
\ No newline at end of file

Modified: 
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java?rev=1714227&r1=1714226&r2=1714227&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
 Fri Nov 13 15:54:04 2015
@@ -16,13 +16,16 @@
  */
 package org.apache.jackrabbit.oak.query.fulltext;
 
-import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters;
-
 import java.util.BitSet;
 import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
+import com.google.common.base.Splitter;
+import com.google.common.collect.ImmutableSet;
 import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.PropertyValue;
 import org.apache.jackrabbit.oak.api.Tree;
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.commons.PathUtils;
@@ -33,20 +36,24 @@ import org.apache.jackrabbit.oak.query.a
 import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl;
 import org.apache.jackrabbit.oak.query.ast.LiteralImpl;
 import org.apache.jackrabbit.oak.query.ast.OrImpl;
+import org.apache.jackrabbit.oak.spi.query.PropertyValues;
 
-import com.google.common.collect.ImmutableSet;
+import static com.google.common.collect.Maps.newHashMap;
+import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters;
 
 /**
  * This class can extract excerpts from node.
  */
 public class SimpleExcerptProvider {
 
-    private static final String REP_EXCERPT_FN = "rep:excerpt(.)";
+    public static final String REP_EXCERPT_FN = "rep:excerpt(.)";
+    public static final String EXCERPT_END = "</span></div>";
+    public static final String EXCERPT_BEGIN = "<div><span>";
 
     private static int maxFragmentSize = 150;
 
     public static String getExcerpt(String path, String columnName,
-            Query query, boolean highlight) {
+                                    Query query, boolean highlight) {
         if (path == null) {
             return null;
         }
@@ -72,7 +79,7 @@ public class SimpleExcerptProvider {
         for (PropertyState p : t.getProperties()) {
             if (p.getType().tag() == Type.STRING.tag()
                     && (columnName == null || columnName.equalsIgnoreCase(p
-                            .getName()))) {
+                    .getName()))) {
                 text.append(separator);
                 separator = " ";
                 for (String v : p.getValue(Type.STRINGS)) {
@@ -82,8 +89,7 @@ public class SimpleExcerptProvider {
         }
         Set<String> searchToken = extractFulltext(query);
         if (highlight && searchToken != null) {
-            String h = highlight(text, searchToken);
-            return h;
+            return highlight(text, searchToken);
         }
         return noHighlight(text);
     }
@@ -140,32 +146,32 @@ public class SimpleExcerptProvider {
         Set<String> out = new HashSet<String>();
         StringBuilder token = new StringBuilder();
         boolean quote = false;
-        for (int i = 0; i < in.length();) {
+        for (int i = 0; i < in.length(); ) {
             final int c = in.codePointAt(i);
             int length = Character.charCount(c);
             switch (c) {
-            case ' ':
-                if (quote) {
-                    token.append(' ');
-                } else if (token.length() > 0) {
-                    out.add(token.toString());
-                    token = new StringBuilder();
-                }
-                break;
-            case '"':
-            case '\'':
-                if (quote) {
-                    quote = false;
-                    if (token.length() > 0) {
+                case ' ':
+                    if (quote) {
+                        token.append(' ');
+                    } else if (token.length() > 0) {
                         out.add(token.toString());
                         token = new StringBuilder();
                     }
-                } else {
-                    quote = true;
-                }
-                break;
-            default:
-                token.append(new String(Character.toChars(c)));
+                    break;
+                case '"':
+                case '\'':
+                    if (quote) {
+                        quote = false;
+                        if (token.length() > 0) {
+                            out.add(token.toString());
+                            token = new StringBuilder();
+                        }
+                    } else {
+                        quote = true;
+                    }
+                    break;
+                default:
+                    token.append(new String(Character.toChars(c)));
             }
             i += length;
         }
@@ -198,7 +204,7 @@ public class SimpleExcerptProvider {
         for (String token : tokens) {
             highlight(escaped, highlight, token);
         }
-        StringBuilder excerpt = new StringBuilder("<div><span>");
+        StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN);
         boolean strong = false;
         for (int i = 0; i < escaped.length(); i++) {
             if (highlight.get(i) && !strong) {
@@ -213,10 +219,10 @@ public class SimpleExcerptProvider {
         if (strong) {
             excerpt.append("</strong>");
         }
-        excerpt.append("</span></div>");
+        excerpt.append(EXCERPT_END);
         return excerpt.toString();
     }
-    
+
     private static void highlight(String text, BitSet highlightBits, String 
token) {
         boolean isLike = false;
         if (token.endsWith("*")) {
@@ -247,5 +253,14 @@ public class SimpleExcerptProvider {
             }
         }
     }
-    
+
+    public static PropertyValue getExcerpt(PropertyValue value) {
+        Splitter listSplitter = 
Splitter.on(',').trimResults().omitEmptyStrings();
+        StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN);
+        for (String v : listSplitter.splitToList(value.toString())) {
+            excerpt.append(v);
+        }
+        excerpt.append(EXCERPT_END);
+        return PropertyValues.newString(excerpt.toString());
+    }
 }

Modified: jackrabbit/oak/branches/1.2/oak-lucene/pom.xml
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/pom.xml?rev=1714227&r1=1714226&r2=1714227&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.2/oak-lucene/pom.xml (original)
+++ jackrabbit/oak/branches/1.2/oak-lucene/pom.xml Fri Nov 13 15:54:04 2015
@@ -42,6 +42,9 @@
       
org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragment      
               <!-- OAK-318 -->
       
org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragmentEndsWithDots
         <!-- OAK-318 -->
       org.apache.jackrabbit.core.query.ExcerptTest#testPreferPhrase            
                      <!-- OAK-318 -->
+      org.apache.jackrabbit.core.query.ExcerptTest#testQuotedPhrase            
                      <!-- OAK-3580 -->
+      org.apache.jackrabbit.core.query.ExcerptTest#testHighlightJa             
                      <!-- OAK-3580 -->
+      
org.apache.jackrabbit.core.query.ExcerptTest#testEncodeIllegalCharsHighlights   
               <!-- OAK-3580 -->
       org.apache.jackrabbit.core.query.QueryResultTest#testSkip                
                      <!-- OAK-484 -->
       org.apache.jackrabbit.core.query.DerefTest#testDeref                     
                      <!-- OAK-321 -->
       org.apache.jackrabbit.core.query.DerefTest#testDerefInPredicate          
                      <!-- OAK-321 -->
@@ -210,6 +213,11 @@
       <version>${lucene.version}</version>
       <scope>provided</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-highlighter</artifactId>
+      <version>${lucene.version}</version>
+    </dependency>
 
     <!-- Logging -->
     <dependency>

Modified: 
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1714227&r1=1714226&r2=1714227&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
 Fri Nov 13 15:54:04 2015
@@ -73,12 +73,14 @@ import org.apache.jackrabbit.oak.spi.que
 import 
org.apache.jackrabbit.oak.spi.query.QueryIndex.AdvanceFulltextQueryIndex;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CachingTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.index.Term;
@@ -98,6 +100,12 @@ import org.apache.lucene.search.TermRang
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TotalHitCountCollector;
 import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.TextFragment;
 import org.apache.lucene.search.spell.SuggestWord;
 import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.util.Version;
@@ -171,6 +179,9 @@ public class LuceneIndex implements Adva
 
     private final NodeAggregator aggregator;
 
+    private final Highlighter highlighter = new Highlighter(new 
SimpleHTMLFormatter("<strong>", "</strong>"),
+            new SimpleHTMLEncoder(), null);
+
     public LuceneIndex(IndexTracker tracker, NodeAggregator aggregator) {
         this.tracker = tracker;
         this.aggregator = aggregator;
@@ -292,7 +303,7 @@ public class LuceneIndex implements Adva
                 return endOfData();
             }
 
-            private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher 
searcher) throws IOException {
+            private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher 
searcher, String excerpt) throws IOException {
                 IndexReader reader = searcher.getIndexReader();
                 PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
                 reader.document(doc.doc, visitor);
@@ -317,7 +328,7 @@ public class LuceneIndex implements Adva
                         seenPaths.add(path);
                     }
 
-                    return new LuceneResultRow(path, doc.score);
+                    return new LuceneResultRow(path, doc.score, excerpt);
                 }
                 return null;
             }
@@ -357,8 +368,14 @@ public class LuceneIndex implements Adva
                             LOG.debug("... took {} ms", time);
                             nextBatchSize = (int) Math.min(nextBatchSize * 2L, 
100000);
 
+                            boolean addExcerpt = filter.getQueryStatement() != 
null && filter.getQueryStatement().contains(QueryImpl.REP_EXCERPT);
                             for (ScoreDoc doc : docs.scoreDocs) {
-                                LuceneResultRow row = convertToRow(doc, 
searcher);
+                                String excerpt = null;
+                                if (addExcerpt) {
+                                    excerpt = getExcerpt(indexNode, searcher, 
query, doc);
+                                }
+
+                                LuceneResultRow row = convertToRow(doc, 
searcher, excerpt);
                                 if (row != null) {
                                     queue.add(row);
                                 }
@@ -469,6 +486,35 @@ public class LuceneIndex implements Adva
         return new LucenePathCursor(itr, settings, sizeEstimator);
     }
 
+    private String getExcerpt(IndexNode indexNode, IndexSearcher searcher, 
Query query, ScoreDoc doc) throws IOException {
+        StringBuilder excerpt = new StringBuilder();
+        QueryScorer scorer = new QueryScorer(query);
+        scorer.setExpandMultiTermQuery(true);
+        highlighter.setFragmentScorer(scorer);
+
+        for (IndexableField field : 
searcher.getIndexReader().document(doc.doc).getFields())
+            if (!FieldNames.SUGGEST.equals(field.name())) {
+                try {
+                    Analyzer analyzer = 
indexNode.getDefinition().getAnalyzer();
+                    TokenStream tokenStream = 
analyzer.tokenStream(field.name(), field.stringValue());
+                    tokenStream.reset();
+                    CachingTokenFilter cachingTokenFilter = new 
CachingTokenFilter(tokenStream);
+                    TextFragment[] textFragments = 
highlighter.getBestTextFragments(cachingTokenFilter, field.stringValue(), true, 
2);
+                    if (textFragments != null && textFragments.length > 0) {
+                        for (TextFragment fragment : textFragments) {
+                            if (excerpt.length() > 0) {
+                                excerpt.append("...");
+                            }
+                            excerpt.append(fragment.toString());
+                        }
+                    }
+                } catch (InvalidTokenOffsetsException e) {
+                    LOG.error("higlighting failed", e);
+                }
+            }
+        return excerpt.toString();
+    }
+
     protected static IndexPlan.Builder planBuilder(Filter filter){
         return new IndexPlan.Builder()
                 .setCostPerExecution(0) // we're local. Low-cost
@@ -1031,10 +1077,12 @@ public class LuceneIndex implements Adva
         final String path;
         final double score;
         final Iterable<String> suggestWords;
+        final String excerpt;
 
-        LuceneResultRow(String path, double score) {
+        LuceneResultRow(String path, double score, String excerpt) {
             this.path = path;
             this.score = score;
+            this.excerpt = excerpt;
             this.suggestWords = Collections.emptySet();
         }
 
@@ -1042,6 +1090,7 @@ public class LuceneIndex implements Adva
             this.path = "/";
             this.score = 1.0d;
             this.suggestWords = suggestWords;
+            this.excerpt = null;
         }
 
         @Override
@@ -1115,6 +1164,9 @@ public class LuceneIndex implements Adva
                     if (QueryImpl.REP_SPELLCHECK.equals(columnName) || 
QueryImpl.REP_SUGGEST.equals(columnName)) {
                         return 
PropertyValues.newString(Iterables.toString(currentRow.suggestWords));
                     }
+                    if (QueryImpl.REP_EXCERPT.equals(columnName)) {
+                        return PropertyValues.newString(currentRow.excerpt);
+                    }
                     return pathRow.getValue(columnName);
                 }
 

Modified: 
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java?rev=1714227&r1=1714226&r2=1714227&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
 Fri Nov 13 15:54:04 2015
@@ -18,6 +18,10 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.lucene;
 
+import javax.annotation.CheckForNull;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import javax.jcr.PropertyType;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -28,17 +32,11 @@ import java.util.List;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicReference;
 
-import javax.annotation.CheckForNull;
-import javax.annotation.Nonnull;
-import javax.annotation.Nullable;
-import javax.jcr.PropertyType;
-
 import com.google.common.collect.AbstractIterator;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Queues;
 import com.google.common.collect.Sets;
-
 import org.apache.jackrabbit.oak.api.PropertyValue;
 import org.apache.jackrabbit.oak.api.Result.SizePrecision;
 import org.apache.jackrabbit.oak.api.Type;
@@ -70,10 +68,13 @@ import org.apache.jackrabbit.oak.spi.que
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.apache.jackrabbit.oak.util.PerfLogger;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CachingTokenFilter;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.index.Term;
@@ -98,6 +99,12 @@ import org.apache.lucene.search.TermRang
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TotalHitCountCollector;
 import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.TextFragment;
 import org.apache.lucene.search.spell.SuggestWord;
 import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.util.Version;
@@ -114,6 +121,7 @@ import static org.apache.jackrabbit.oak.
 import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot;
 import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH;
+import static 
org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.SUGGEST;
 import static 
org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.NATIVE_SORT_ORDER;
 import static 
org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.VERSION;
 import static 
org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newAncestorTerm;
@@ -121,9 +129,7 @@ import static org.apache.jackrabbit.oak.
 import static org.apache.jackrabbit.oak.query.QueryImpl.JCR_PATH;
 import static 
org.apache.jackrabbit.oak.spi.query.QueryIndex.AdvancedQueryIndex;
 import static org.apache.jackrabbit.oak.spi.query.QueryIndex.NativeQueryIndex;
-import static org.apache.lucene.search.BooleanClause.Occur.MUST;
-import static org.apache.lucene.search.BooleanClause.Occur.MUST_NOT;
-import static org.apache.lucene.search.BooleanClause.Occur.SHOULD;
+import static org.apache.lucene.search.BooleanClause.Occur.*;
 
 /**
  * Provides a QueryIndex that does lookups against a Lucene-based index
@@ -167,6 +173,8 @@ import static org.apache.lucene.search.B
 public class LucenePropertyIndex implements AdvancedQueryIndex, QueryIndex, 
NativeQueryIndex,
         AdvanceFulltextQueryIndex {
 
+    private static double MIN_COST = 2.1;
+
     private static final Logger LOG = LoggerFactory
             .getLogger(LucenePropertyIndex.class);
     private static final PerfLogger PERF_LOGGER =
@@ -183,6 +191,9 @@ public class LucenePropertyIndex impleme
 
     private final ScorerProviderFactory scorerProviderFactory;
 
+    private final Highlighter highlighter = new Highlighter(new 
SimpleHTMLFormatter("<strong>", "</strong>"),
+            new SimpleHTMLEncoder(), null);
+
     public LucenePropertyIndex(IndexTracker tracker) {
         this.tracker = tracker;
         this.scorerProviderFactory = ScorerProviderFactory.DEFAULT;
@@ -246,7 +257,7 @@ public class LucenePropertyIndex impleme
                     .append(path)
                     .append(") ");
             sb.append(getLuceneRequest(plan, null));
-            if(plan.getSortOrder() != null && !plan.getSortOrder().isEmpty()){
+            if (plan.getSortOrder() != null && !plan.getSortOrder().isEmpty()) 
{
                 sb.append(" ordering:").append(plan.getSortOrder());
             }
             if (ft != null) {
@@ -285,7 +296,7 @@ public class LucenePropertyIndex impleme
                 return endOfData();
             }
 
-            private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher 
searcher) throws IOException {
+            private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher 
searcher, String excerpt) throws IOException {
                 IndexReader reader = searcher.getIndexReader();
                 //TODO Look into usage of field cache for retrieving the path
                 //instead of reading via reader if no of docs in index are 
limited
@@ -300,13 +311,13 @@ public class LucenePropertyIndex impleme
                         String originalPath = path;
                         path = pr.transformPath(path);
 
-                        if (path == null){
+                        if (path == null) {
                             LOG.trace("Ignoring path {} : Transformation 
returned null", originalPath);
                             return null;
                         }
 
                         // avoid duplicate entries
-                        if (seenPaths.contains(path)){
+                        if (seenPaths.contains(path)) {
                             LOG.trace("Ignoring path {} : Duplicate post 
transformation", originalPath);
                             return null;
                         }
@@ -314,7 +325,7 @@ public class LucenePropertyIndex impleme
                     }
 
                     LOG.trace("Matched path {}", path);
-                    return new LuceneResultRow(path, doc.score);
+                    return new LuceneResultRow(path, doc.score, excerpt);
                 }
                 return null;
             }
@@ -331,7 +342,7 @@ public class LucenePropertyIndex impleme
 
                 ScoreDoc lastDocToRecord = null;
 
-                IndexNode indexNode = acquireIndexNode(plan);
+                final IndexNode indexNode = acquireIndexNode(plan);
                 checkState(indexNode != null);
                 try {
                     IndexSearcher searcher = indexNode.getSearcher();
@@ -368,8 +379,14 @@ public class LucenePropertyIndex impleme
                             PERF_LOGGER.end(start, -1, "{} ...", 
docs.scoreDocs.length);
                             nextBatchSize = (int) Math.min(nextBatchSize * 2L, 
100000);
 
+                            boolean addExcerpt = filter.getQueryStatement() != 
null && filter.getQueryStatement().contains(QueryImpl.REP_EXCERPT);
                             for (ScoreDoc doc : docs.scoreDocs) {
-                                LuceneResultRow row = convertToRow(doc, 
searcher);
+                                String excerpt = null;
+                                if (addExcerpt) {
+                                    excerpt = getExcerpt(indexNode, searcher, 
query, doc);
+                                }
+
+                                LuceneResultRow row = convertToRow(doc, 
searcher, excerpt);
                                 if (row != null) {
                                     queue.add(row);
                                 }
@@ -447,7 +464,7 @@ public class LucenePropertyIndex impleme
 
             private void checkForIndexVersionChange(IndexSearcher searcher) {
                 long currentVersion = getVersion(searcher);
-                if (currentVersion != lastSearchIndexerVersion && lastDoc != 
null){
+                if (currentVersion != lastSearchIndexerVersion && lastDoc != 
null) {
                     lastDoc = null;
                     LOG.debug("Change in index version detected {} => {}. 
Query would be performed without " +
                             "offset", currentVersion, 
lastSearchIndexerVersion);
@@ -468,7 +485,9 @@ public class LucenePropertyIndex impleme
                         LOG.debug("estimate size for query " + query);
                         TotalHitCountCollector collector = new 
TotalHitCountCollector();
                         searcher.search(query, collector);
-                        return collector.getTotalHits();
+                        int totalHits = collector.getTotalHits();
+                        LOG.debug("Estimated size for query {} is {}", query, 
totalHits);
+                        return totalHits;
                     }
                     LOG.debug("estimate size: not a Query: " + 
luceneRequestFacade.getLuceneRequest());
                 } catch (IOException e) {
@@ -482,6 +501,35 @@ public class LucenePropertyIndex impleme
         return new LucenePathCursor(itr, plan, settings, sizeEstimator);
     }
 
+    private String getExcerpt(IndexNode indexNode, IndexSearcher searcher, 
Query query, ScoreDoc doc) throws IOException {
+        StringBuilder excerpt = new StringBuilder();
+        QueryScorer scorer = new QueryScorer(query);
+        scorer.setExpandMultiTermQuery(true);
+        highlighter.setFragmentScorer(scorer);
+
+        for (IndexableField field : 
searcher.getIndexReader().document(doc.doc).getFields())
+            if (!SUGGEST.equals(field.name())) {
+                try {
+                    Analyzer analyzer = 
indexNode.getDefinition().getAnalyzer();
+                    TokenStream tokenStream = 
analyzer.tokenStream(field.name(), field.stringValue());
+                    tokenStream.reset();
+                    CachingTokenFilter cachingTokenFilter = new 
CachingTokenFilter(tokenStream);
+                    TextFragment[] textFragments = 
highlighter.getBestTextFragments(cachingTokenFilter, field.stringValue(), true, 
2);
+                    if (textFragments != null && textFragments.length > 0) {
+                        for (TextFragment fragment : textFragments) {
+                            if (excerpt.length() > 0) {
+                                excerpt.append("...");
+                            }
+                            excerpt.append(fragment.toString());
+                        }
+                    }
+                } catch (InvalidTokenOffsetsException e) {
+                    LOG.error("higlighting failed", e);
+                }
+            }
+        return excerpt.toString();
+    }
+
     @Override
     public NodeAggregator getNodeAggregator() {
         return null;
@@ -494,7 +542,7 @@ public class LucenePropertyIndex impleme
      *
      * @return true if the term is related to node
      */
-    public static boolean isNodePath(String fulltextTermPath){
+    public static boolean isNodePath(String fulltextTermPath) {
         return fulltextTermPath.endsWith("/*");
     }
 
@@ -556,7 +604,7 @@ public class LucenePropertyIndex impleme
         }
     }
 
-    private static String getIndexName(IndexPlan plan){
+    private static String getIndexName(IndexPlan plan) {
         return PathUtils.getName(getPlanResult(plan).indexPath);
     }
 
@@ -642,7 +690,7 @@ public class LucenePropertyIndex impleme
         }
 
         if (qs.size() == 0) {
-            if (reader == null){
+            if (reader == null) {
                 //When called in planning mode then some queries like 
rep:similar
                 //cannot create query as reader is not provided. In such case 
we
                 //just return match all queries
@@ -662,7 +710,7 @@ public class LucenePropertyIndex impleme
     /**
      * Perform additional wraps on the list of queries to allow, for example, 
the NOT CONTAINS to
      * play properly when sent to lucene.
-     * 
+     *
      * @param qs the list of queries. Cannot be null.
      * @return
      */
@@ -696,7 +744,7 @@ public class LucenePropertyIndex impleme
             }
 
             if (!unwrapped) {
-                bq.add(q, MUST);                
+                bq.add(q, MUST);
             }
         }
         return new LuceneRequestFacade<Query>(bq);
@@ -704,7 +752,7 @@ public class LucenePropertyIndex impleme
 
     /**
      * unwraps any NOT clauses from the provided boolean query into another 
boolean query.
-     * 
+     *
      * @param input the query to be analysed for the existence of NOT clauses. 
Cannot be null.
      * @param output the query where the unwrapped NOTs will be saved into. 
Cannot be null.
      * @return true if there where at least one unwrapped NOT. false otherwise.
@@ -719,23 +767,23 @@ public class LucenePropertyIndex impleme
                 unwrapped = true;
             }
         }
-        
+
         return unwrapped;
     }
-    
+
     private CustomScoreQuery getCustomScoreQuery(IndexPlan plan, Query 
subQuery) {
         PlanResult planResult = getPlanResult(plan);
         IndexDefinition idxDef = planResult.indexDefinition;
         String providerName = idxDef.getScorerProviderName();
         if (scorerProviderFactory != null && providerName != null) {
-               return  scorerProviderFactory.getScorerProvider(providerName)
-                       .createCustomScoreQuery(subQuery);
+            return scorerProviderFactory.getScorerProvider(providerName)
+                    .createCustomScoreQuery(subQuery);
         }
         return null;
     }
 
     private static void addNonFullTextConstraints(List<Query> qs,
-            IndexPlan plan, IndexReader reader) {
+                                                  IndexPlan plan, IndexReader 
reader) {
         Filter filter = plan.getFilter();
         PlanResult planResult = getPlanResult(plan);
         IndexDefinition defn = planResult.indexDefinition;
@@ -745,37 +793,37 @@ public class LucenePropertyIndex impleme
 
         String path = getPathRestriction(plan);
         switch (filter.getPathRestriction()) {
-        case ALL_CHILDREN:
-            if (defn.evaluatePathRestrictions()) {
-                if ("/".equals(path)) {
-                    break;
-                }
-                qs.add(new TermQuery(newAncestorTerm(path)));
-            }
-            break;
-        case DIRECT_CHILDREN:
-            if (defn.evaluatePathRestrictions()) {
-                BooleanQuery bq = new BooleanQuery();
-                bq.add(new BooleanClause(new TermQuery(newAncestorTerm(path)), 
BooleanClause.Occur.MUST));
-                bq.add(new BooleanClause(newDepthQuery(path), 
BooleanClause.Occur.MUST));
-                qs.add(bq);
-            }
-            break;
-        case EXACT:
-            qs.add(new TermQuery(newPathTerm(path)));
-            break;
-        case PARENT:
-            if (denotesRoot(path)) {
-                // there's no parent of the root node
-                // we add a path that can not possibly occur because there
-                // is no way to say "match no documents" in Lucene
-                qs.add(new TermQuery(new Term(FieldNames.PATH, "///")));
-            } else {
-                qs.add(new TermQuery(newPathTerm(getParentPath(path))));
-            }
-            break;
-        case NO_RESTRICTION:
-            break;
+            case ALL_CHILDREN:
+                if (defn.evaluatePathRestrictions()) {
+                    if ("/".equals(path)) {
+                        break;
+                    }
+                    qs.add(new TermQuery(newAncestorTerm(path)));
+                }
+                break;
+            case DIRECT_CHILDREN:
+                if (defn.evaluatePathRestrictions()) {
+                    BooleanQuery bq = new BooleanQuery();
+                    bq.add(new BooleanClause(new 
TermQuery(newAncestorTerm(path)), BooleanClause.Occur.MUST));
+                    bq.add(new BooleanClause(newDepthQuery(path), 
BooleanClause.Occur.MUST));
+                    qs.add(bq);
+                }
+                break;
+            case EXACT:
+                qs.add(new TermQuery(newPathTerm(path)));
+                break;
+            case PARENT:
+                if (denotesRoot(path)) {
+                    // there's no parent of the root node
+                    // we add a path that can not possibly occur because there
+                    // is no way to say "match no documents" in Lucene
+                    qs.add(new TermQuery(new Term(FieldNames.PATH, "///")));
+                } else {
+                    qs.add(new TermQuery(newPathTerm(getParentPath(path))));
+                }
+                break;
+            case NO_RESTRICTION:
+                break;
         }
 
         for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
@@ -831,14 +879,14 @@ public class LucenePropertyIndex impleme
                 typeFromRestriction = pr.first.getType().tag();
             } else if (pr.last != null && pr.last.getType() != Type.UNDEFINED) 
{
                 typeFromRestriction = pr.last.getType().tag();
-            } else if (pr.list != null && !pr.list.isEmpty()){
+            } else if (pr.list != null && !pr.list.isEmpty()) {
                 typeFromRestriction = pr.list.get(0).getType().tag();
             }
         }
         return getPropertyType(defn, pr.propertyName, typeFromRestriction);
     }
 
-    private static int getPropertyType(PropertyDefinition defn, String name, 
int defaultVal){
+    private static int getPropertyType(PropertyDefinition defn, String name, 
int defaultVal) {
         if (defn.isTypeDefined()) {
             return defn.getType();
         }
@@ -879,13 +927,13 @@ public class LucenePropertyIndex impleme
                                      PropertyDefinition defn) {
         int propType = determinePropertyType(defn, pr);
 
-        if (pr.isNullRestriction()){
+        if (pr.isNullRestriction()) {
             return new TermQuery(new Term(FieldNames.NULL_PROPS, defn.name));
         }
 
         //If notNullCheckEnabled explicitly enabled use the simple TermQuery
         //otherwise later fallback to range query
-        if (pr.isNotNullRestriction() && defn.notNullCheckEnabled){
+        if (pr.isNotNullRestriction() && defn.notNullCheckEnabled) {
             return new TermQuery(new Term(FieldNames.NOT_NULL_PROPS, 
defn.name));
         }
 
@@ -1011,12 +1059,12 @@ public class LucenePropertyIndex impleme
                 }
             }
         }
-        throw new IllegalStateException("PropertyRestriction not handled " + 
pr + " for index " + defn );
+        throw new IllegalStateException("PropertyRestriction not handled " + 
pr + " for index " + defn);
     }
 
-    static long getVersion(IndexSearcher indexSearcher){
+    static long getVersion(IndexSearcher indexSearcher) {
         IndexReader reader = indexSearcher.getIndexReader();
-        if (reader instanceof DirectoryReader){
+        if (reader instanceof DirectoryReader) {
             return ((DirectoryReader) reader).getVersion();
         }
         return -1;
@@ -1034,11 +1082,11 @@ public class LucenePropertyIndex impleme
             return createLikeQuery(FieldNames.NODE_NAME, first);
         }
 
-        throw new IllegalStateException("For nodeName queries only EQUALS and 
LIKE are supported "+pr);
+        throw new IllegalStateException("For nodeName queries only EQUALS and 
LIKE are supported " + pr);
     }
 
     private static void addReferenceConstraint(String uuid, List<Query> qs,
-            IndexReader reader) {
+                                               IndexReader reader) {
         if (reader == null) {
             // getPlan call
             qs.add(new TermQuery(new Term("*", uuid)));
@@ -1112,7 +1160,7 @@ public class LucenePropertyIndex impleme
                     if (x instanceof BooleanQuery) {
                         BooleanQuery bq = (BooleanQuery) x;
                         if ((bq.getClauses().length == 1) &&
-                            (bq.getClauses()[0].getOccur() == 
BooleanClause.Occur.MUST_NOT)) {
+                                (bq.getClauses()[0].getOccur() == 
BooleanClause.Occur.MUST_NOT)) {
                             hasMustNot = true;
                             q.add(bq.getClauses()[0]);
                         }
@@ -1133,7 +1181,7 @@ public class LucenePropertyIndex impleme
 
             private boolean visitTerm(String propertyName, String text, String 
boost, boolean not) {
                 String p = getLuceneFieldName(propertyName, pr);
-                Query q = tokenToQuery(text, p, pr.indexingRule,  analyzer);
+                Query q = tokenToQuery(text, p, pr.indexingRule, analyzer);
                 if (q == null) {
                     return false;
                 }
@@ -1154,12 +1202,12 @@ public class LucenePropertyIndex impleme
     }
 
     static String getLuceneFieldName(@Nullable String p, PlanResult pr) {
-        if (p == null){
+        if (p == null) {
             return FieldNames.FULLTEXT;
         }
 
-        if (isNodePath(p)){
-            if (pr.isPathTransformed()){
+        if (isNodePath(p)) {
+            if (pr.isPathTransformed()) {
                 p = PathUtils.getName(p);
             } else {
                 //Get rid of /* as aggregated fulltext field name is the
@@ -1167,13 +1215,13 @@ public class LucenePropertyIndex impleme
                 p = 
FieldNames.createFulltextFieldName(PathUtils.getParentPath(p));
             }
         } else {
-            if (pr.isPathTransformed()){
+            if (pr.isPathTransformed()) {
                 p = PathUtils.getName(p);
             }
             p = FieldNames.createAnalyzedFieldName(p);
         }
 
-        if ("*".equals(p)){
+        if ("*".equals(p)) {
             p = FieldNames.FULLTEXT;
         }
         return p;
@@ -1218,7 +1266,7 @@ public class LucenePropertyIndex impleme
     /**
      * Following logic is taken from 
org.apache.jackrabbit.core.query.lucene.JackrabbitQueryParser#parse(java.lang.String)
      */
-    private static String rewriteQueryText(String textsearch){
+    private static String rewriteQueryText(String textsearch) {
         // replace escaped ' with just '
         StringBuilder rewritten = new StringBuilder();
         // the default lucene query parser recognizes 'AND' and 'NOT' as
@@ -1272,8 +1320,10 @@ public class LucenePropertyIndex impleme
         final String path;
         final double score;
         final Iterable<String> suggestWords;
+        final String excerpt;
 
-        LuceneResultRow(String path, double score) {
+        LuceneResultRow(String path, double score, String excerpt) {
+            this.excerpt = excerpt;
             this.path = path;
             this.score = score;
             this.suggestWords = Collections.emptySet();
@@ -1283,6 +1333,7 @@ public class LucenePropertyIndex impleme
             this.path = "/";
             this.score = 1.0d;
             this.suggestWords = suggestWords;
+            this.excerpt = null;
         }
 
         @Override
@@ -1364,6 +1415,9 @@ public class LucenePropertyIndex impleme
                     if (QueryImpl.REP_SPELLCHECK.equals(columnName) || 
QueryImpl.REP_SUGGEST.equals(columnName)) {
                         return 
PropertyValues.newString(Iterables.toString(currentRow.suggestWords));
                     }
+                    if (QueryImpl.REP_EXCERPT.equals(columnName)) {
+                        return PropertyValues.newString(currentRow.excerpt);
+                    }
                     return pathRow.getValue(columnName);
                 }
 

Modified: jackrabbit/oak/branches/1.2/oak-solr-core/pom.xml
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-solr-core/pom.xml?rev=1714227&r1=1714226&r2=1714227&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.2/oak-solr-core/pom.xml (original)
+++ jackrabbit/oak/branches/1.2/oak-solr-core/pom.xml Fri Nov 13 15:54:04 2015
@@ -58,6 +58,8 @@
             
org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragment      
               <!-- OAK-318 -->
             
org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragmentEndsWithDots
         <!-- OAK-318 -->
             org.apache.jackrabbit.core.query.ExcerptTest#testPreferPhrase      
                            <!-- OAK-318 -->
+            org.apache.jackrabbit.core.query.ExcerptTest#testQuotedPhrase      
                            <!-- OAK-3580 -->
+            
org.apache.jackrabbit.core.query.ExcerptTest#testEncodeIllegalCharsHighlights   
               <!-- OAK-3580 -->
         </known.issues>
     </properties>
 

Modified: 
jackrabbit/oak/branches/1.2/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java?rev=1714227&r1=1714226&r2=1714227&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java
 Fri Nov 13 15:54:04 2015
@@ -21,6 +21,7 @@ import java.util.List;
 import javax.jcr.PropertyType;
 
 import 
org.apache.jackrabbit.oak.plugins.index.solr.configuration.OakSolrConfiguration;
+import org.apache.jackrabbit.oak.query.QueryImpl;
 import org.apache.jackrabbit.oak.query.fulltext.FullTextAnd;
 import org.apache.jackrabbit.oak.query.fulltext.FullTextContains;
 import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression;
@@ -218,6 +219,19 @@ class FilterQueryParser {
             solrQuery.addFilterQuery(ptQueryBuilder.toString());
         }
 
+        if (filter.getQueryStatement() != null && 
filter.getQueryStatement().contains(QueryImpl.REP_EXCERPT)) {
+            if (!solrQuery.getHighlight()) {
+                // enable highlighting
+                solrQuery.setHighlight(true);
+                // defaults
+                solrQuery.set("hl.fl", "*");
+                solrQuery.set("hl.encoder", "html");
+                solrQuery.set("hl.mergeContiguous", true);
+                solrQuery.setHighlightSimplePre("<strong>");
+                solrQuery.setHighlightSimplePost("</strong>");
+            }
+        }
+
         if (configuration.useForPathRestrictions()) {
             Filter.PathRestriction pathRestriction = 
filter.getPathRestriction();
             if (pathRestriction != null) {

Modified: 
jackrabbit/oak/branches/1.2/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java?rev=1714227&r1=1714226&r2=1714227&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
 Fri Nov 13 15:54:04 2015
@@ -16,6 +16,7 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.solr.query;
 
+import javax.annotation.CheckForNull;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -25,7 +26,6 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import javax.annotation.CheckForNull;
 
 import com.google.common.collect.AbstractIterator;
 import com.google.common.collect.Iterables;
@@ -62,9 +62,7 @@ import org.apache.solr.common.util.Simpl
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static org.apache.jackrabbit.oak.commons.PathUtils.getAncestorPath;
-import static org.apache.jackrabbit.oak.commons.PathUtils.getDepth;
-import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
+import static org.apache.jackrabbit.oak.commons.PathUtils.*;
 
 /**
  * A Solr based {@link QueryIndex}
@@ -128,11 +126,11 @@ public class SolrQueryIndex implements F
         }
 
         // property restriction OR native language property restriction 
defined AND property restriction handled
-        if (filter.getPropertyRestrictions() != null 
+        if (filter.getPropertyRestrictions() != null
                 && filter.getPropertyRestrictions().size() > 0
-                && (filter.getPropertyRestriction(NATIVE_SOLR_QUERY) != null 
+                && (filter.getPropertyRestriction(NATIVE_SOLR_QUERY) != null
                 || filter.getPropertyRestriction(NATIVE_LUCENE_QUERY) != null
-                || configuration.useForPropertyRestrictions()) 
+                || configuration.useForPropertyRestrictions())
                 && !hasIgnoredProperties(filter.getPropertyRestrictions(), 
configuration)) {
             match++;
         }
@@ -154,7 +152,6 @@ public class SolrQueryIndex implements F
         }
 
 
-
         return match;
     }
 
@@ -317,7 +314,21 @@ public class SolrQueryIndex implements F
 
                         onRetrievedDocs(filter, docs);
 
+                        Map<String, Map<String, List<String>>> highlighting = 
queryResponse.getHighlighting();
                         for (SolrDocument doc : docs) {
+                            // handle highlight
+                            if (highlighting != null) {
+                                Object pathObject = 
doc.getFieldValue(configuration.getPathField());
+                                if (pathObject != null && 
highlighting.get(String.valueOf(pathObject)) != null) {
+                                    Map<String, List<String>> value = 
highlighting.get(String.valueOf(pathObject));
+                                    for (Map.Entry<String, List<String>> entry 
: value.entrySet()) {
+                                        // all highlighted values end up in 
'rep:excerpt', regardless of field match
+                                        for (String v : entry.getValue()) {
+                                            
doc.addField(QueryImpl.REP_EXCERPT, v);
+                                        }
+                                    }
+                                }
+                            }
                             SolrResultRow row = convertToRow(doc);
                             if (row != null) {
                                 queue.add(row);
@@ -434,7 +445,7 @@ public class SolrQueryIndex implements F
                 (!configuration.useForPropertyRestrictions() // Solr index not 
used for properties
                         || (configuration.getUsedProperties().size() > 0 && 
!configuration.getUsedProperties().contains(propertyName)) // not explicitly 
contained in the used properties
                         || propertyName.contains("/") // no child-level 
property restrictions
-                        || "rep:excerpt".equals(propertyName) // rep:excerpt 
is handled by the query engine
+                        || "rep:excerpt".equals(propertyName) // rep:excerpt 
is not handled at the property level
                         || 
QueryConstants.RESTRICTION_LOCAL_NAME.equals(propertyName)
                         || 
configuration.getIgnoredProperties().contains(propertyName));
     }
@@ -564,8 +575,23 @@ public class SolrQueryIndex implements F
                     }
                     // TODO : make inclusion of doc configurable
                     Collection<Object> fieldValues = 
currentRow.doc.getFieldValues(columnName);
-                    return currentRow.doc != null ? PropertyValues.newString(
-                            Iterables.toString(fieldValues != null ? 
fieldValues : Collections.emptyList())) : null;
+                    String value;
+                    if (fieldValues != null && fieldValues.size() > 0) {
+                        if (fieldValues.size() > 1) {
+                            value = Iterables.toString(fieldValues);
+                        } else {
+                            Object fieldValue = 
currentRow.doc.getFieldValue(columnName);
+                            if (fieldValue != null) {
+                                value = fieldValue.toString();
+                            } else {
+                                value = null;
+                            }
+                        }
+                    } else {
+                        value = Iterables.toString(Collections.emptyList());
+                    }
+
+                    return PropertyValues.newString(value);
                 }
 
             };

Modified: 
jackrabbit/oak/branches/1.2/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml?rev=1714227&r1=1714226&r2=1714227&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
 Fri Nov 13 15:54:04 2015
@@ -1219,6 +1219,114 @@
         </arr>
     </requestHandler>
 
+    <!-- Highlighting Component
+
+       http://wiki.apache.org/solr/HighlightingParameters
+    -->
+    <searchComponent class="solr.HighlightComponent" name="highlight">
+        <highlighting>
+            <!-- Configure the standard fragmenter -->
+            <!-- This could most likely be commented out in the "default" case 
-->
+            <fragmenter name="gap"
+                        default="true"
+                        class="solr.highlight.GapFragmenter">
+                <lst name="defaults">
+                    <int name="hl.fragsize">100</int>
+                </lst>
+            </fragmenter>
+
+            <!-- A regular-expression-based fragmenter
+               (for sentence extraction)
+            -->
+            <fragmenter name="regex"
+                        class="solr.highlight.RegexFragmenter">
+                <lst name="defaults">
+                    <!-- slightly smaller fragsizes work better because of 
slop -->
+                    <int name="hl.fragsize">70</int>
+                    <!-- allow 50% slop on fragment sizes -->
+                    <float name="hl.regex.slop">0.5</float>
+                    <!-- a basic sentence pattern -->
+                    <str name="hl.regex.pattern">[-\w
+                        ,/\n\&quot;&apos;]{20,200}
+                    </str>
+                </lst>
+            </fragmenter>
+
+            <!-- Configure the standard formatter -->
+            <formatter name="html"
+                       default="true"
+                       class="solr.highlight.HtmlFormatter">
+                <lst name="defaults">
+                    <str name="hl.simple.pre"><![CDATA[<em>]]></str>
+                    <str name="hl.simple.post"><![CDATA[</em>]]></str>
+                </lst>
+            </formatter>
+
+            <!-- Configure the standard encoder -->
+            <encoder name="html"
+                     class="solr.highlight.HtmlEncoder"/>
+
+            <!-- Configure the standard fragListBuilder -->
+            <fragListBuilder name="simple"
+                             class="solr.highlight.SimpleFragListBuilder"/>
+
+            <!-- Configure the single fragListBuilder -->
+            <fragListBuilder name="single"
+                             class="solr.highlight.SingleFragListBuilder"/>
+
+            <!-- Configure the weighted fragListBuilder -->
+            <fragListBuilder name="weighted"
+                             default="true"
+                             class="solr.highlight.WeightedFragListBuilder"/>
+
+            <!-- default tag FragmentsBuilder -->
+            <fragmentsBuilder name="default"
+                              default="true"
+                              
class="solr.highlight.ScoreOrderFragmentsBuilder">
+                <!--
+                <lst name="defaults">
+                  <str name="hl.multiValuedSeparatorChar">/</str>
+                </lst>
+                -->
+            </fragmentsBuilder>
+
+            <!-- multi-colored tag FragmentsBuilder -->
+            <fragmentsBuilder name="colored"
+                              
class="solr.highlight.ScoreOrderFragmentsBuilder">
+                <lst name="defaults">
+                    <str name="hl.tag.pre"><![CDATA[
+               <b style="background:yellow">,<b style="background:lawgreen">,
+               <b style="background:aquamarine">,<b 
style="background:magenta">,
+               <b style="background:palegreen">,<b style="background:coral">,
+               <b style="background:wheat">,<b style="background:khaki">,
+               <b style="background:lime">,<b 
style="background:deepskyblue">]]></str>
+                    <str name="hl.tag.post"><![CDATA[</b>]]></str>
+                </lst>
+            </fragmentsBuilder>
+
+            <boundaryScanner name="default"
+                             default="true"
+                             class="solr.highlight.SimpleBoundaryScanner">
+                <lst name="defaults">
+                    <str name="hl.bs.maxScan">10</str>
+                    <str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
+                </lst>
+            </boundaryScanner>
+
+            <boundaryScanner name="breakIterator"
+                             
class="solr.highlight.BreakIteratorBoundaryScanner">
+                <lst name="defaults">
+                    <!-- type should be one of CHARACTER, WORD(default), LINE 
and SENTENCE -->
+                    <str name="hl.bs.type">WORD</str>
+                    <!-- language and country are used when constructing 
Locale object.  -->
+                    <!-- And the Locale object will be used when getting 
instance of BreakIterator -->
+                    <str name="hl.bs.language">en</str>
+                    <str name="hl.bs.country">US</str>
+                </lst>
+            </boundaryScanner>
+        </highlighting>
+    </searchComponent>
+
     <!-- Update Processors
 
          Chains of Update Processor Factories for dealing with Update

Modified: 
jackrabbit/oak/branches/1.2/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java?rev=1714227&r1=1714226&r2=1714227&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java
 Fri Nov 13 15:54:04 2015
@@ -66,6 +66,26 @@ public class SpellcheckTest extends Abst
         assertEquals("[hello, hold]", result);
     }
 
+    public void testSpellcheckMultipleWords() throws Exception {
+        Session session = superuser;
+        QueryManager qm = session.getWorkspace().getQueryManager();
+        Node n1 = testRootNode.addNode("node1");
+        n1.setProperty("jcr:title", "it is always a good idea to go visiting 
ontario");
+        Node n2 = testRootNode.addNode("node2");
+        n2.setProperty("jcr:title", "ontario is a nice place to live in");
+        Node n3 = testRootNode.addNode("node3");
+        n2.setProperty("jcr:title", "I flied to ontario for voting for the 
major polls");
+        Node n4 = testRootNode.addNode("node4");
+        n2.setProperty("jcr:title", "I will go voting in ontario, I always 
voted since I've been allowed to");
+        session.save();
+
+        String xpath = "/jcr:root[rep:spellcheck('votin in 
ontari')]/(rep:spellcheck())";
+        Query q = qm.createQuery(xpath, Query.XPATH);
+        String result = getResult(q.execute(), "rep:spellcheck()");
+        assertNotNull(result);
+        assertEquals("voting in ontario", result);
+    }
+
     static String getResult(QueryResult result, String propertyName) throws 
RepositoryException {
         StringBuilder buff = new StringBuilder();
         RowIterator it = result.getRows();


Reply via email to