Author: thomasm
Date: Tue Jul 21 12:13:31 2020
New Revision: 1880102
URL: http://svn.apache.org/viewvc?rev=1880102&view=rev
Log:
OAK-9144 Indexing: dynamic boost is not robust
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java?rev=1880102&r1=1880101&r2=1880102&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
Tue Jul 21 12:13:31 2020
@@ -336,22 +336,49 @@ public class LuceneDocumentMaker extends
}
}
}
-
+
@Override
protected boolean indexDynamicBoost(Document doc, PropertyDefinition pd,
NodeState nodeState, String propertyName) {
NodeState propertNode = nodeState;
String parentName = PathUtils.getParentPath(propertyName);
for (String c : PathUtils.elements(parentName)) {
- propertNode = propertNode.getChildNode(c);
+ propertNode = propertNode.getChildNode(c);
}
boolean added = false;
for (String nodeName : propertNode.getChildNodeNames()) {
NodeState dynaTag = propertNode.getChildNode(nodeName);
- String dynaTagName =
dynaTag.getProperty(DYNAMIC_BOOST_TAG_NAME).getValue(Type.STRING);
- Double dynaTagConfidence =
dynaTag.getProperty(DYNAMIC_BOOST_TAG_CONFIDENCE).getValue(Type.DOUBLE);
-
+ PropertyState p = dynaTag.getProperty(DYNAMIC_BOOST_TAG_NAME);
+ if (p == null) {
+ // here we don't log a warning, because possibly it will be
added later
+ continue;
+ }
+ if (p.isArray()) {
+ log.warn(p.getName() + " is an array: {}", parentName);
+ continue;
+ }
+ String dynaTagName = p.getValue(Type.STRING);
+ p = dynaTag.getProperty(DYNAMIC_BOOST_TAG_CONFIDENCE);
+ if (p == null) {
+ // here we don't log a warning, because possibly it will be
added later
+ continue;
+ }
+ if (p.isArray()) {
+ log.warn(p.getName() + " is an array: {}", parentName);
+ continue;
+ }
+ double dynaTagConfidence;
+ try {
+ dynaTagConfidence = p.getValue(Type.DOUBLE);
+ } catch (NumberFormatException e) {
+ log.warn(p.getName() + " parsing failed: {}", parentName, e);
+ continue;
+ }
+ if (!Double.isFinite(dynaTagConfidence)) {
+ log.warn(p.getName() + " is not finite: {}", parentName);
+ continue;
+ }
List<String> tokens = new
ArrayList<>(splitForIndexing(dynaTagName));
- if (tokens.size() > 1) {
+ if (tokens.size() > 1) {
// Actual name not in tokens
tokens.add(dynaTagName);
}
@@ -386,7 +413,7 @@ public class LuceneDocumentMaker extends
ft.setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions.DOCS_ONLY);
ft.freeze();
}
-
+
AugmentedField(String name, double weight) {
super(name, "1", ft);
setBoost((float) weight);
@@ -396,9 +423,9 @@ public class LuceneDocumentMaker extends
private static List<String> splitForIndexing(String tagName) {
return
Arrays.asList(removeBackSlashes(tagName).split(DYNAMIC_BOOST_SPLIT_REGEX));
}
-
+
private static String removeBackSlashes(String text) {
return text.replaceAll("\\\\", "");
}
-
+
}
\ No newline at end of file
Modified:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java?rev=1880102&r1=1880101&r2=1880102&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java
Tue Jul 21 12:13:31 2020
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertEqu
import java.io.ByteArrayInputStream;
import java.io.InputStream;
+import java.util.ArrayList;
import org.apache.jackrabbit.JcrConstants;
import org.apache.jackrabbit.oak.Oak;
@@ -94,7 +95,7 @@ public class DynamicBoostTest extends Ab
root.commit();
factory.indexFieldProvider = new IndexFieldProviderImpl();
- String log = runTest(IndexFieldProviderImpl.class);
+ String log = runTest(IndexFieldProviderImpl.class, true);
assertEquals(
"[" +
"Added augmented fields:
jcr:content/metadata/predictedTags/[my, a, my:a], 10.0" +
@@ -111,15 +112,37 @@ public class DynamicBoostTest extends Ab
pt.setProperty("propertyIndex", true);
root.commit();
- String log = runTest(LuceneDocumentMaker.class);
+ String log = runTest(LuceneDocumentMaker.class, true);
assertEquals(
"[" +
"Added augmented fields:
jcr:content/metadata/predictedTags/[my, a, my:a], 10.0, " +
- "Added augmented fields:
jcr:content/metadata/predictedTags/[my, a, my:a], 30.0" +
+ "Added augmented fields:
jcr:content/metadata/predictedTags/[my, a, my:a], 30.0, " +
+ "confidence is not finite: jcr:content/metadata/predictedTags,
" +
+ "confidence is not finite: jcr:content/metadata/predictedTags,
" +
+ "confidence parsing failed:
jcr:content/metadata/predictedTags, " +
+ "confidence parsing failed:
jcr:content/metadata/predictedTags, " +
+ "confidence is an array: jcr:content/metadata/predictedTags, "
+
+ "confidence is an array: jcr:content/metadata/predictedTags, "
+
+ "name is an array: jcr:content/metadata/predictedTags, " +
+ "name is an array: jcr:content/metadata/predictedTags" +
"]", log);
}
- private String runTest(Class<?> loggerClass) throws CommitFailedException {
+ @Test public void withDynamicBoostMissingProperty() throws Exception {
+ NodeTypeRegistry.register(root, toInputStream(ASSET_NODE_TYPE), "test
nodeType");
+ Tree props = createIndex("dam:Asset");
+ Tree pt = createNodeWithType(props, "predictedTags", UNSTRUCTURED);
+ pt.setProperty("name", "jcr:content/metadata/predictedTags/.*");
+ pt.setProperty("isRegexp", true);
+ pt.setProperty("dynamicBoost", true);
+ pt.setProperty("propertyIndex", true);
+ root.commit();
+
+ String log = runTest(LuceneDocumentMaker.class, false);
+ assertEquals("[]", log);
+ }
+
+ private String runTest(Class<?> loggerClass, boolean nameProperty) throws
CommitFailedException {
LogCustomizer customLogs = LogCustomizer
.forLogger(loggerClass)
.enable(Level.TRACE).create();
@@ -134,17 +157,51 @@ public class DynamicBoostTest extends Ab
"metadata", UNSTRUCTURED),
"predictedTags", UNSTRUCTURED);
Tree t = createNodeWithType(predicted, "a", UNSTRUCTURED);
- t.setProperty("name", "my:a");
+ if (nameProperty) {
+ t.setProperty("name", "my:a");
+ }
t.setProperty("confidence", 10.0);
root.commit();
- // this is not detected
+
+ // this is not detected because updateCount is not set
t.setProperty("confidence", 20);
root.commit();
+
+ // this is not detected
+ if (nameProperty) {
+ t.removeProperty("confidence");
+ t.getParent().setProperty("updateCount", 1);
+ root.commit();
+ }
+
// now we change an indexed property:
// this is detected in the dynamicBoost case
t.getParent().setProperty("updateCount", 2);
t.setProperty("confidence", 30);
root.commit();
+
+ // we try with an invalid value:
+ t.getParent().setProperty("updateCount", 3);
+ t.setProperty("confidence", Double.NEGATIVE_INFINITY);
+ root.commit();
+
+ // we try with another invalid value:
+ t.getParent().setProperty("updateCount", 4);
+ t.setProperty("confidence", "xz");
+ root.commit();
+
+ // we try with an array:
+ t.getParent().setProperty("updateCount", 5);
+ t.setProperty("confidence", new ArrayList<String>(), Type.STRINGS);
+ root.commit();
+
+ // we try with an array:
+ if (nameProperty) {
+ t.getParent().setProperty("updateCount", 6);
+ t.setProperty("name", new ArrayList<String>(), Type.STRINGS);
+ root.commit();
+ }
+
return customLogs.getLogs().toString();
} finally {
customLogs.finished();
Modified:
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java?rev=1880102&r1=1880101&r2=1880102&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
(original)
+++
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
Tue Jul 21 12:13:31 2020
@@ -247,7 +247,11 @@ public abstract class FulltextDocumentMa
dirty |= addTypedFields(doc, property, pname, pd);
}
if (pd.dynamicBoost) {
- dirty |= indexDynamicBoost(doc, pd, state, pname);
+ try {
+ dirty |= indexDynamicBoost(doc, pd, state, pname);
+ } catch (Exception e) {
+ log.error("Could not index dynamic boost for property {}
and definition {}", property, pd, e);
+ }
}
if (pd.fulltextEnabled() && includeTypeForFullText) {