Author: catholicon Date: Tue Jan 16 22:03:46 2018 New Revision: 1821325 URL: http://svn.apache.org/viewvc?rev=1821325&view=rev Log: OAK-4401: Excerpt Highlighting for a property is not correct
This is just about ensuring that SimpleExcerptProvider would use whitespace as delimeter marker before highlighting Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/SimpleExcerptProviderTest.java Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java?rev=1821325&r1=1821324&r2=1821325&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java (original) +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/SimpleExcerptProvider.java Tue Jan 16 22:03:46 2018 @@ -19,6 +19,7 @@ package org.apache.jackrabbit.oak.query; import java.util.BitSet; import java.util.HashSet; import java.util.Set; +import java.util.regex.Pattern; import com.google.common.base.Splitter; import com.google.common.collect.ImmutableSet; @@ -34,6 +35,7 @@ import org.apache.jackrabbit.oak.query.a import org.apache.jackrabbit.oak.query.ast.OrImpl; import org.apache.jackrabbit.oak.plugins.memory.PropertyValues; +import static java.lang.Character.isWhitespace; import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters; /** @@ -239,15 +241,30 @@ class SimpleExcerptProvider { } int endIndex = index + token.length(); if (isLike) { - int nextSpace = text.indexOf(" ", endIndex); - if (nextSpace != -1) { + int nextSpace = endIndex; + + while (nextSpace < text.length() && !isWhitespace(text.charAt(nextSpace))) { + nextSpace++; + } + + if (nextSpace != text.length()) { endIndex = nextSpace; } else { endIndex = text.length(); } } - while (index < endIndex) { - highlightBits.set(index++); + + boolean isStartOk = (index == 0) || //allow for highlighting for token at the beginning + isWhitespace(text.charAt(index-1)); //else token must follow a space + boolean isEndOk = (endIndex == text.length()) || //token is at the end of string + isWhitespace(text.charAt(endIndex)); //else token must precede a space + + if (isStartOk && isEndOk) { + while (index < endIndex) { + highlightBits.set(index++); + } + } else { + index = endIndex; } } } Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/SimpleExcerptProviderTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/SimpleExcerptProviderTest.java?rev=1821325&r1=1821324&r2=1821325&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/SimpleExcerptProviderTest.java (original) +++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/SimpleExcerptProviderTest.java Tue Jan 16 22:03:46 2018 @@ -23,8 +23,10 @@ import static com.google.common.collect. import static org.apache.jackrabbit.oak.query.SimpleExcerptProvider.highlight; import static org.junit.Assert.assertEquals; +import java.util.Map; import java.util.Random; +import com.google.common.collect.Maps; import org.junit.Test; public class SimpleExcerptProviderTest { @@ -59,6 +61,52 @@ public class SimpleExcerptProviderTest { } } + @Test + public void hightlightCompleteWordOnly() { + String[] whitespaces = new String[] {" ", "\t"}; + Map<String, String> simpleCheck = Maps.newHashMap(); // highlight "of" + + // simple ones + simpleCheck.put("official conflict of interest", + "<div><span>official conflict <strong>of</strong> interest</span></div>"); + simpleCheck.put("of to new city", + "<div><span><strong>of</strong> to new city</span></div>"); + simpleCheck.put("out of the roof", + "<div><span>out <strong>of</strong> the roof</span></div>"); + simpleCheck.put("well this is of", + "<div><span>well this is <strong>of</strong></span></div>"); + + for (Map.Entry<String, String> simple : simpleCheck.entrySet()) { + String text = simple.getKey(); + String expect = simple.getValue(); + for (String whitespace : whitespaces) { + text = text.replaceAll(" ", whitespace); + expect = expect.replaceAll(" ", whitespace); + assertEquals("highlighting '" + text + "' for 'of' (whitespace - '" + whitespace + "')", + expect, highlight(sb(text), of("of"))); + } + } + + Map<String, String> wildcardCheck = Maps.newHashMap(); // highlight "of*" + wildcardCheck.put("office room", + "<div><span><strong>office</strong> room</span></div>"); + wildcardCheck.put("office room off", + "<div><span><strong>office</strong> room <strong>off</strong></span></div>"); + wildcardCheck.put("big office room", + "<div><span>big <strong>office</strong> room</span></div>"); + + for (Map.Entry<String, String> wildcard : wildcardCheck.entrySet()) { + String text = wildcard.getKey(); + String expect = wildcard.getValue(); + for (String whitespace : whitespaces) { + text = text.replaceAll(" ", whitespace); + expect = expect.replaceAll(" ", whitespace); + assertEquals("highlighting '" + text + "' for 'of*' (whitespace - '" + whitespace + "')", + expect, highlight(sb(text), of("of*"))); + } + } + } + private static String randomString(Random r, String set) { int len = r.nextInt(10); StringBuilder buff = new StringBuilder();