Author: alexparvulescu Date: Fri Sep 13 08:43:45 2013 New Revision: 1522815
URL: http://svn.apache.org/r1522815 Log: OAK-318 Excerpt support - removed some of the unneccesary parts, reverted to the initial behavior - added or support for the full-text serach token extraction from a query Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/SelectorImpl.java jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java?rev=1522815&r1=1522814&r2=1522815&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java (original) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java Fri Sep 13 08:43:45 2013 @@ -22,7 +22,6 @@ import java.util.Comparator; import org.apache.jackrabbit.JcrConstants; import org.apache.jackrabbit.oak.api.PropertyValue; import org.apache.jackrabbit.oak.api.ResultRow; -import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.query.ast.ColumnImpl; import org.apache.jackrabbit.oak.query.ast.OrderingImpl; import org.apache.jackrabbit.oak.query.fulltext.SimpleExcerptProvider; @@ -73,27 +72,23 @@ public class ResultRowImpl implements Re @Override public PropertyValue getValue(String columnName) { + int index = query.getColumnIndex(columnName); + if (index >= 0) { + return values[index]; + } + if (JcrConstants.JCR_PATH.equals(columnName)) { + return PropertyValues.newString(getPath()); + } // OAK-318: // somebody might call rep:excerpt(text) // even thought the query doesn't contain that column if (columnName.startsWith(QueryImpl.REP_EXCERPT)) { - // get the search token - int index = query.getColumnIndex(QueryImpl.REP_EXCERPT); - String searchToken = values[index].getValue(Type.STRING); - String ex = new SimpleExcerptProvider().getExcerpt(getPath(), - columnName, query, searchToken, true); // missing excerpt, generate a default value + String ex = SimpleExcerptProvider.getExcerpt(getPath(), columnName, + query, true); if (ex != null) { return PropertyValues.newString(ex); } - return null; - } - - int index = query.getColumnIndex(columnName); - if (index >= 0) { - return values[index]; - } - if (JcrConstants.JCR_PATH.equals(columnName)) { return PropertyValues.newString(getPath()); } throw new IllegalArgumentException("Column not found: " + columnName); Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/SelectorImpl.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/SelectorImpl.java?rev=1522815&r1=1522814&r2=1522815&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/SelectorImpl.java (original) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/SelectorImpl.java Fri Sep 13 08:43:45 2013 @@ -43,7 +43,6 @@ import org.apache.jackrabbit.oak.api.Tre import org.apache.jackrabbit.oak.commons.PathUtils; import org.apache.jackrabbit.oak.query.QueryImpl; import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression; -import org.apache.jackrabbit.oak.query.fulltext.SimpleExcerptProvider; import org.apache.jackrabbit.oak.query.index.FilterImpl; import org.apache.jackrabbit.oak.spi.query.Cursor; import org.apache.jackrabbit.oak.spi.query.Cursors; @@ -326,15 +325,7 @@ public class SelectorImpl extends Source } else if (propertyName.equals(QueryImpl.JCR_SCORE)) { return currentRow.getValue(QueryImpl.JCR_SCORE); } else if (propertyName.equals(QueryImpl.REP_EXCERPT)) { - // The excerpt itself is calculated at runtime (this is weird, - // but Jackrabbit 2.x supports that, see OAK-318). - // We store the search token (the full-text condition text) - // in this column (which is also weird), as this is needed for highlighting - String searchToken = SimpleExcerptProvider.extractFulltext(query.getConstraint()); - if (searchToken == null) { - return PropertyValues.newString(path); - } - return PropertyValues.newString(searchToken); + return currentRow.getValue(QueryImpl.REP_EXCERPT); } return PropertyValues.create(t.getProperty(propertyName)); } Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java?rev=1522815&r1=1522814&r2=1522815&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java (original) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java Fri Sep 13 08:43:45 2013 @@ -16,19 +16,24 @@ */ package org.apache.jackrabbit.oak.query.fulltext; -import java.util.ArrayList; -import java.util.List; +import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters; + +import java.util.HashSet; +import java.util.Set; import org.apache.jackrabbit.oak.api.PropertyState; import org.apache.jackrabbit.oak.api.Tree; import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.commons.PathUtils; import org.apache.jackrabbit.oak.query.Query; +import org.apache.jackrabbit.oak.query.QueryImpl; import org.apache.jackrabbit.oak.query.ast.AndImpl; import org.apache.jackrabbit.oak.query.ast.ConstraintImpl; import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl; import org.apache.jackrabbit.oak.query.ast.LiteralImpl; -import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters; +import org.apache.jackrabbit.oak.query.ast.OrImpl; + +import com.google.common.collect.ImmutableSet; public class SimpleExcerptProvider { @@ -36,8 +41,8 @@ public class SimpleExcerptProvider { private static int maxFragmentSize = 150; - public String getExcerpt(String path, String columnName, Query query, String searchToken, - boolean highlight) { + public static String getExcerpt(String path, String columnName, + Query query, boolean highlight) { if (path == null) { return null; } @@ -45,8 +50,8 @@ public class SimpleExcerptProvider { if (t == null || !t.exists()) { return null; } - String filter = null; - if (columnName.contains("/")) { + columnName = extractExcerptProperty(columnName); + if (columnName != null && columnName.contains("/")) { for (String p : PathUtils.elements(PathUtils .getParentPath(columnName))) { if (t.hasChild(p)) { @@ -55,16 +60,15 @@ public class SimpleExcerptProvider { return null; } } - filter = extractExcerptProperty(PathUtils.getName(columnName)); - } else { - filter = extractExcerptProperty(columnName); + columnName = PathUtils.getName(columnName); } StringBuilder text = new StringBuilder(); String separator = ""; for (PropertyState p : t.getProperties()) { if (p.getType().tag() == Type.STRING.tag() - && (filter == null || filter.equalsIgnoreCase(p.getName()))) { + && (columnName == null || columnName.equalsIgnoreCase(p + .getName()))) { text.append(separator); separator = " "; for (String v : p.getValue(Type.STRINGS)) { @@ -72,10 +76,10 @@ public class SimpleExcerptProvider { } } } - if (highlight) { - if (searchToken != null) { - return highlight(text, searchToken); - } + Set<String> searchToken = extractFulltext(query); + if (highlight && searchToken != null) { + String h = highlight(text, searchToken); + return h; } return noHighlight(text); } @@ -88,30 +92,48 @@ public class SimpleExcerptProvider { return column.substring(column.indexOf("(") + 1, column.indexOf(")")); } - public static String extractFulltext(ConstraintImpl c) { - // TODO instanceof should not be used, + private static Set<String> extractFulltext(Query q) { + // TODO instanceof should not be used + if (q instanceof QueryImpl) { + return extractFulltext(((QueryImpl) q).getConstraint()); + } + return ImmutableSet.of(); + } + + private static Set<String> extractFulltext(ConstraintImpl c) { + Set<String> tokens = new HashSet<String>(); + // TODO instanceof should not be used, // as it will break without us noticing if we extend the AST if (c instanceof FullTextSearchImpl) { FullTextSearchImpl f = (FullTextSearchImpl) c; if (f.getFullTextSearchExpression() instanceof LiteralImpl) { LiteralImpl l = (LiteralImpl) f.getFullTextSearchExpression(); - return l.getLiteralValue().getValue(Type.STRING); + tokens.add(l.getLiteralValue().getValue(Type.STRING)); } - return null; } if (c instanceof AndImpl) { AndImpl a = (AndImpl) c; - String t = extractFulltext(a.getConstraint1()); - if (t == null) { - return extractFulltext(a.getConstraint2()); - } - return t; + tokens.addAll(extractFulltext(a.getConstraint1())); + tokens.addAll(extractFulltext(a.getConstraint2())); + } + if (c instanceof OrImpl) { + OrImpl o = (OrImpl) c; + tokens.addAll(extractFulltext(o.getConstraint1())); + tokens.addAll(extractFulltext(o.getConstraint2())); + } + return tokens; + } + + private static Set<String> tokenize(Set<String> in) { + Set<String> tokens = new HashSet<String>(); + for (String s : in) { + tokens.addAll(tokenize(s)); } - return null; + return tokens; } - private static List<String> tokenize(String in) { - List<String> out = new ArrayList<String>(); + private static Set<String> tokenize(String in) { + Set<String> out = new HashSet<String>(); StringBuilder token = new StringBuilder(); boolean quote = false; for (int i = 0; i < in.length();) { @@ -165,8 +187,8 @@ public class SimpleExcerptProvider { return excerpt.toString(); } - private static String highlight(StringBuilder text, String searchToken) { - List<String> tokens = tokenize(searchToken); + private static String highlight(StringBuilder text, Set<String> searchToken) { + Set<String> tokens = tokenize(searchToken); text = new StringBuilder(encodeIllegalXMLCharacters(text.toString())); for (String token : tokens) { text = replaceAll(text, token, "<strong>", "</strong>");
