Author: alexparvulescu Date: Wed Jun 12 08:24:28 2013 New Revision: 1492109
URL: http://svn.apache.org/r1492109 Log: OAK-318 Excerpt support - introduced a simple excerpt provider that injects an excerpt if it is needed - enabled parts of the excerpt tests in jackrabbit Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java (with props) Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/Query.java jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java jackrabbit/oak/trunk/oak-jcr/pom.xml jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryJcrTest.java Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/Query.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/Query.java?rev=1492109&r1=1492108&r2=1492109&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/Query.java (original) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/Query.java Wed Jun 12 08:24:28 2013 @@ -582,7 +582,7 @@ public class Query { return i; } } - throw new IllegalArgumentException("Column not found: " + columnName); + return -1; } public PropertyValue getBindVariableValue(String bindVariableName) { Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java?rev=1492109&r1=1492108&r2=1492109&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java (original) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java Wed Jun 12 08:24:28 2013 @@ -16,10 +16,12 @@ */ package org.apache.jackrabbit.oak.query; +import org.apache.jackrabbit.JcrConstants; import org.apache.jackrabbit.oak.api.PropertyValue; import org.apache.jackrabbit.oak.api.ResultRow; import org.apache.jackrabbit.oak.query.ast.ColumnImpl; import org.apache.jackrabbit.oak.query.ast.SelectorImpl; +import org.apache.jackrabbit.oak.spi.query.PropertyValues; /** * A query result row that keeps all data (for this row only) in memory. @@ -59,7 +61,23 @@ public class ResultRowImpl implements Re @Override public PropertyValue getValue(String columnName) { - return values[query.getColumnIndex(columnName)]; + int index = query.getColumnIndex(columnName); + if (index >= 0) { + return values[index]; + } + if (JcrConstants.JCR_PATH.equals(columnName)) { + return PropertyValues.newString(getPath()); + } + if (columnName.startsWith(Query.REP_EXCERPT)) { + String ex = new SimpleExcerptProvider().getExcerpt(getPath(), + columnName, query, true); + // missing excerpt, generate a default value + if (ex != null) { + return PropertyValues.newString(ex); + } + return null; + } + throw new IllegalArgumentException("Column not found: " + columnName); } @Override Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java?rev=1492109&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java (added) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java Wed Jun 12 08:24:28 2013 @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.query; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.api.Tree; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.commons.PathUtils; +import org.apache.jackrabbit.oak.query.ast.AndImpl; +import org.apache.jackrabbit.oak.query.ast.ConstraintImpl; +import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl; +import org.apache.jackrabbit.oak.query.ast.LiteralImpl; +import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters; + +public class SimpleExcerptProvider { + + private static final String REP_EXCERPT_FN = "rep:excerpt(.)"; + + private static int maxFragmentSize = 150; + + public String getExcerpt(String path, String columnName, Query query, + boolean highlight) { + if (path == null) { + return null; + } + Tree t = query.getTree(path); + if (t == null || !t.exists()) { + return null; + } + String filter = null; + if (columnName.contains("/")) { + for (String p : PathUtils.elements(PathUtils + .getParentPath(columnName))) { + if (t.hasChild(p)) { + t = t.getChild(p); + } else { + return null; + } + } + filter = extractExcerptProperty(PathUtils.getName(columnName)); + } else { + filter = extractExcerptProperty(columnName); + } + + StringBuilder text = new StringBuilder(); + String separator = ""; + for (PropertyState p : t.getProperties()) { + if (p.getType().tag() == Type.STRING.tag() + && (filter == null || filter.equalsIgnoreCase(p.getName()))) { + text.append(separator); + separator = " "; + for (String v : p.getValue(Type.STRINGS)) { + text.append(v); + } + } + } + String searchToken = extractFulltext(query.getConstraint()); + if (highlight && searchToken != null) { + return highlight(text, searchToken); + } + return noHighlight(text); + } + + private String extractExcerptProperty(String column) { + // most frequent case first + if (REP_EXCERPT_FN.equalsIgnoreCase(column)) { + return null; + } + return column.substring(column.indexOf("(") + 1, column.indexOf(")")); + } + + private static String extractFulltext(ConstraintImpl c) { + if (c instanceof FullTextSearchImpl) { + FullTextSearchImpl f = (FullTextSearchImpl) c; + if (f.getFullTextSearchExpression() instanceof LiteralImpl) { + LiteralImpl l = (LiteralImpl) f.getFullTextSearchExpression(); + return l.getLiteralValue().getValue(Type.STRING); + } + return null; + } + if (c instanceof AndImpl) { + AndImpl a = (AndImpl) c; + String t = extractFulltext(a.getConstraint1()); + if (t == null) { + return extractFulltext(a.getConstraint2()); + } + return t; + } + return null; + } + + private static List<String> tokenize(String in) { + List<String> out = new ArrayList<String>(); + StringBuilder token = new StringBuilder(); + boolean quote = false; + for (int i = 0; i < in.length();) { + final int c = in.codePointAt(i); + int length = Character.charCount(c); + switch (c) { + case ' ': + if (quote) { + token.append(' '); + } else if (token.length() > 0) { + out.add(token.toString()); + token = new StringBuilder(); + } + break; + case '"': + case '\'': + if (quote) { + quote = false; + if (token.length() > 0) { + out.add(token.toString()); + token = new StringBuilder(); + } + } else { + quote = true; + } + break; + default: + token.append(new String(Character.toChars(c))); + } + i += length; + } + if (token.length() > 0) { + out.add(token.toString()); + } + return out; + } + + private static String noHighlight(StringBuilder text) { + if (text.length() > maxFragmentSize) { + int lastSpace = text.lastIndexOf(" ", maxFragmentSize); + if (lastSpace != -1) { + text.setLength(lastSpace); + } else { + text.setLength(maxFragmentSize); + } + text.append(" ..."); + } + StringBuilder excerpt = new StringBuilder("<div><span>"); + excerpt.append(encodeIllegalXMLCharacters(text.toString())); + excerpt.append("</span></div>"); + return excerpt.toString(); + } + + private static String highlight(StringBuilder text, String searchToken) { + List<String> tokens = tokenize(searchToken); + text = new StringBuilder(encodeIllegalXMLCharacters(text.toString())); + for (String token : tokens) { + text = replaceAll(text, token, "<strong>", "</strong>"); + } + + StringBuilder excerpt = new StringBuilder("<div><span>"); + excerpt.append(text.toString()); + excerpt.append("</span></div>"); + return excerpt.toString(); + } + + private static StringBuilder replaceAll(StringBuilder in, String token, + String start, String end) { + boolean isLike = false; + if (token.endsWith("*")) { + token = token.substring(0, token.length() - 1); + isLike = true; + } + int index = in.indexOf(token); + while (index != -1) { + int endIndex = index + token.length(); + if (isLike) { + int nextSpace = in.indexOf(" ", endIndex); + if (nextSpace != -1) { + endIndex = nextSpace; + } else { + endIndex = in.length(); + } + } + String current = in.substring(index, endIndex); + StringBuilder newToken = new StringBuilder(start); + newToken.append(current); + newToken.append(end); + String newTokenS = newToken.toString(); + in.replace(index, index + current.length(), newTokenS); + index = in.indexOf(token, + in.lastIndexOf(newTokenS) + newTokenS.length()); + } + return in; + } +} Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Modified: jackrabbit/oak/trunk/oak-jcr/pom.xml URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-jcr/pom.xml?rev=1492109&r1=1492108&r2=1492109&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-jcr/pom.xml (original) +++ jackrabbit/oak/trunk/oak-jcr/pom.xml Wed Jun 12 08:24:28 2013 @@ -297,6 +297,13 @@ org.apache.jackrabbit.oak.jcr.security.user.MemberNodeImportTest <!-- OAK-414, OAK-482 --> org.apache.jackrabbit.oak.jcr.security.user.UserImportTest#testImportGroupIntoUsersTree <!-- OAK-821 --> + + org.apache.jackrabbit.core.query.ExcerptTest#testMoreTextDotsAtEnd <!-- OAK-318 --> + org.apache.jackrabbit.core.query.ExcerptTest#testMoreTextDotsAtStart <!-- OAK-318 --> + org.apache.jackrabbit.core.query.ExcerptTest#testMoreTextDotsAtStartAndEnd <!-- OAK-318 --> + org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragment <!-- OAK-318 --> + org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragmentEndsWithDots <!-- OAK-318 --> + org.apache.jackrabbit.core.query.ExcerptTest#testPreferPhrase <!-- OAK-318 --> </known.issues> </properties> Modified: jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryJcrTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryJcrTest.java?rev=1492109&r1=1492108&r2=1492109&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryJcrTest.java (original) +++ jackrabbit/oak/trunk/oak-jcr/src/test/java/org/apache/jackrabbit/oak/jcr/query/QueryJcrTest.java Wed Jun 12 08:24:28 2013 @@ -20,6 +20,7 @@ import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; +import org.apache.jackrabbit.core.query.ExcerptTest; import org.apache.jackrabbit.core.query.FulltextQueryTest; import org.apache.jackrabbit.core.query.FulltextSQL2QueryTest; import org.apache.jackrabbit.core.query.JoinTest; @@ -52,12 +53,12 @@ public class QueryJcrTest extends TestCa suite.addTestSuite(SQL2OffsetLimitTest.class); suite.addTestSuite(LimitAndOffsetTest.class); suite.addTestSuite(OrderByTest.class); + suite.addTestSuite(ExcerptTest.class); // FAILURES // // suite.addTestSuite(QueryResultTest.class); // OAK-484 // suite.addTestSuite(ParentNodeTest.class); // OAK-309 - // suite.addTestSuite(ExcerptTest.class); // OAK-318 // suite.addTestSuite(SimilarQueryTest.class); // OAK-319 // suite.addTestSuite(DerefTest.class); // OAK-321 // suite.addTestSuite(XPathAxisTest.class); // OAK-322
