Author: mkataria Date: Fri Oct 4 08:55:12 2019 New Revision: 1867967 URL: http://svn.apache.org/viewvc?rev=1867967&view=rev Log: OAK-8642: Add warn logs if we add/update a string property larger than 100KB (index)
Added: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMakerLargeStringPropertiesLogTest.java Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java?rev=1867967&r1=1867966&r2=1867967&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java Fri Oct 4 08:55:12 2019 @@ -102,30 +102,24 @@ public class LuceneDocumentMaker extends } @Override - protected boolean indexTypedProperty(Document doc, PropertyState property, String pname, PropertyDefinition pd) { + protected void indexTypedProperty(Document doc, PropertyState property, String pname, PropertyDefinition pd, int i) { int tag = property.getType().tag(); - boolean fieldAdded = false; - for (int i = 0; i < property.count(); i++) { - Field f; - if (tag == Type.LONG.tag()) { - f = new LongField(pname, property.getValue(Type.LONG, i), Field.Store.NO); - } else if (tag == Type.DATE.tag()) { - String date = property.getValue(Type.DATE, i); - f = new LongField(pname, FieldFactory.dateToLong(date), Field.Store.NO); - } else if (tag == Type.DOUBLE.tag()) { - f = new DoubleField(pname, property.getValue(Type.DOUBLE, i), Field.Store.NO); - } else if (tag == Type.BOOLEAN.tag()) { - f = new StringField(pname, property.getValue(Type.BOOLEAN, i).toString(), Field.Store.NO); - } else { - f = new StringField(pname, property.getValue(Type.STRING, i), Field.Store.NO); - } - if (includePropertyValue(property, i, pd)){ - doc.add(f); - fieldAdded = true; - } + Field f; + if (tag == Type.LONG.tag()) { + f = new LongField(pname, property.getValue(Type.LONG, i), Field.Store.NO); + } else if (tag == Type.DATE.tag()) { + String date = property.getValue(Type.DATE, i); + f = new LongField(pname, FieldFactory.dateToLong(date), Field.Store.NO); + } else if (tag == Type.DOUBLE.tag()) { + f = new DoubleField(pname, property.getValue(Type.DOUBLE, i), Field.Store.NO); + } else if (tag == Type.BOOLEAN.tag()) { + f = new StringField(pname, property.getValue(Type.BOOLEAN, i).toString(), Field.Store.NO); + } else { + f = new StringField(pname, property.getValue(Type.STRING, i), Field.Store.NO); } - return fieldAdded; + + doc.add(f); } @Override Added: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMakerLargeStringPropertiesLogTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMakerLargeStringPropertiesLogTest.java?rev=1867967&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMakerLargeStringPropertiesLogTest.java (added) +++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMakerLargeStringPropertiesLogTest.java Fri Oct 4 08:55:12 2019 @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jackrabbit.oak.plugins.index.lucene; + +import ch.qos.logback.classic.LoggerContext; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.read.ListAppender; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.commons.junit.TemporarySystemProperty; +import org.apache.jackrabbit.oak.plugins.index.lucene.util.IndexDefinitionBuilder; +import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextDocumentMaker; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.security.InvalidParameterException; + +import static java.util.Arrays.asList; +import static org.apache.jackrabbit.oak.InitialContentHelper.INITIAL_CONTENT; +import static org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.EMPTY_NODE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +public class LuceneDocumentMakerLargeStringPropertiesLogTest { + + ListAppender<ILoggingEvent> listAppender = null; + private final String nodeImplLogger = LuceneDocumentMaker.class.getName(); + private final String warnMessage = "String length: {} for property: {} at Node: {} is greater than configured value {}"; + private String customStringPropertyThresholdLimit = "9"; + private String smallStringProperty = "1234567"; + private String largeStringPropertyAsPerCustomThreshold = "1234567890"; + + @Rule + public TemporarySystemProperty temporarySystemProperty = new TemporarySystemProperty(); + + @Before + public void loggingAppenderStart() { + LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory(); + listAppender = new ListAppender<>(); + listAppender.start(); + context.getLogger(nodeImplLogger).addAppender(listAppender); + } + + @After + public void loggingAppenderStop() { + listAppender.stop(); + } + + private void setThresholdLimit(String threshold) { + System.setProperty(FulltextDocumentMaker.WARN_LOG_STRING_SIZE_THRESHOLD_KEY, threshold); + } + + private LuceneDocumentMaker addPropertyAccordingToType(NodeBuilder test, Type type, String... str) throws IOException { + NodeState root = INITIAL_CONTENT; + IndexDefinitionBuilder builder = new IndexDefinitionBuilder(); + builder.indexRule("nt:base") + .property("foo") + .propertyIndex() + .analyzed() + .valueExcludedPrefixes("/jobs"); + + LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(root, builder.build(), "/foo").build(); + LuceneDocumentMaker docMaker = new LuceneDocumentMaker(defn, + defn.getApplicableIndexingRule("nt:base"), "/x"); + + if (Type.STRINGS == type) { + test.setProperty("foo", asList(str), Type.STRINGS); + } else if (Type.STRING == type && str.length == 1) { + test.setProperty("foo", str[0]); + } else { + throw new InvalidParameterException(); + } + return docMaker; + } + + @Test + public void testDefaultThreshold() throws IOException { + NodeBuilder test = EMPTY_NODE.builder(); + LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRING, largeStringPropertyAsPerCustomThreshold); + assertNotNull(docMaker.makeDocument(test.getNodeState())); + assertFalse(isWarnMessagePresent(listAppender)); + } + + @Test + public void testNoLoggingOnAddingSmallStringWithCustomThreshold() throws IOException { + setThresholdLimit(customStringPropertyThresholdLimit); + NodeBuilder test = EMPTY_NODE.builder(); + LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRING, smallStringProperty); + assertNotNull(docMaker.makeDocument(test.getNodeState())); + assertFalse(isWarnMessagePresent(listAppender)); + } + + @Test + public void testNoLoggingOnAddingSmallStringArrayWithCustomThreshold() throws IOException { + setThresholdLimit(customStringPropertyThresholdLimit); + NodeBuilder test = EMPTY_NODE.builder(); + LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRINGS, smallStringProperty, smallStringProperty); + assertNotNull(docMaker.makeDocument(test.getNodeState())); + assertFalse(isWarnMessagePresent(listAppender)); + } + + @Test + public void testNoLoggingOnAddingSmallStringArrayWithoutCustomThreshold() throws IOException { + NodeBuilder test = EMPTY_NODE.builder(); + LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRINGS, smallStringProperty, smallStringProperty); + assertNotNull(docMaker.makeDocument(test.getNodeState())); + assertFalse(isWarnMessagePresent(listAppender)); + } + + @Test + public void testLoggingOnAddingLargeStringWithCustomThreshold() throws IOException { + setThresholdLimit(customStringPropertyThresholdLimit); + NodeBuilder test = EMPTY_NODE.builder(); + LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRING, largeStringPropertyAsPerCustomThreshold); + assertNotNull(docMaker.makeDocument(test.getNodeState())); + assertTrue(isWarnMessagePresent(listAppender)); + } + + @Test + public void testLoggingOnAddingLargeStringWithoutCustomThreshold() throws IOException { + // setThresholdLimit(null); + NodeBuilder test = EMPTY_NODE.builder(); + LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRING, smallStringProperty); + assertNotNull(docMaker.makeDocument(test.getNodeState())); + assertFalse(isWarnMessagePresent(listAppender)); + } + + @Test + public void testLoggingOnAddingLargeStringArrayOneLargePropertyWithoutCustomThreshold() throws IOException { + NodeBuilder test = EMPTY_NODE.builder(); + LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRINGS, largeStringPropertyAsPerCustomThreshold, smallStringProperty); + assertNotNull(docMaker.makeDocument(test.getNodeState())); + assertFalse(isWarnMessagePresent(listAppender)); + } + + @Test + public void testLoggingOnAddingLargeStringArrayOneLargePropertyWithCustomThreshold() throws IOException { + setThresholdLimit(customStringPropertyThresholdLimit); + NodeBuilder test = EMPTY_NODE.builder(); + LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRINGS, largeStringPropertyAsPerCustomThreshold, smallStringProperty); + assertNotNull(docMaker.makeDocument(test.getNodeState())); + assertTrue(isWarnMessagePresent(listAppender)); + assertEquals(2, listAppender.list.size()); + } + + @Test + public void testLoggingOnAddingLargeStringArrayTwoLargePropertiesWithCustomThreshold() throws IOException { + setThresholdLimit(customStringPropertyThresholdLimit); + NodeBuilder test = EMPTY_NODE.builder(); + LuceneDocumentMaker docMaker = addPropertyAccordingToType(test, Type.STRINGS, largeStringPropertyAsPerCustomThreshold, largeStringPropertyAsPerCustomThreshold); + assertNotNull(docMaker.makeDocument(test.getNodeState())); + assertTrue(isWarnMessagePresent(listAppender)); + // number of logs equal twice the number of large properties once for fulltext indexing + // and once for property indexing. + assertEquals(4, listAppender.list.size()); + } + + private boolean isWarnMessagePresent(ListAppender<ILoggingEvent> listAppender) { + for (ILoggingEvent loggingEvent : listAppender.list) { + if (loggingEvent.getMessage().contains(warnMessage)) { + return true; + } + } + return false; + } + +} \ No newline at end of file Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java?rev=1867967&r1=1867966&r2=1867967&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java Fri Oct 4 08:55:12 2019 @@ -115,29 +115,23 @@ public class ElasticsearchDocumentMaker } @Override - protected boolean indexTypedProperty(ElasticsearchDocument doc, PropertyState property, String pname, PropertyDefinition pd) { + protected void indexTypedProperty(ElasticsearchDocument doc, PropertyState property, String pname, PropertyDefinition pd, int i) { int tag = property.getType().tag(); - boolean fieldAdded = false; - for (int i = 0; i < property.count(); i++) { - Object f; - if (tag == Type.LONG.tag()) { - f = property.getValue(Type.LONG, i); - } else if (tag == Type.DATE.tag()) { - f = property.getValue(Type.DATE, i); - } else if (tag == Type.DOUBLE.tag()) { - f = property.getValue(Type.DOUBLE, i); - } else if (tag == Type.BOOLEAN.tag()) { - f = property.getValue(Type.BOOLEAN, i).toString(); - } else { - f = property.getValue(Type.STRING, i); - } - if (includePropertyValue(property, i, pd)){ - doc.addProperty(pname, f); - fieldAdded = true; - } + Object f; + if (tag == Type.LONG.tag()) { + f = property.getValue(Type.LONG, i); + } else if (tag == Type.DATE.tag()) { + f = property.getValue(Type.DATE, i); + } else if (tag == Type.DOUBLE.tag()) { + f = property.getValue(Type.DOUBLE, i); + } else if (tag == Type.BOOLEAN.tag()) { + f = property.getValue(Type.BOOLEAN, i).toString(); + } else { + f = property.getValue(Type.STRING, i); } - return fieldAdded; + + doc.addProperty(pname, f); } @Override Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java?rev=1867967&r1=1867966&r2=1867967&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java (original) +++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java Fri Oct 4 08:55:12 2019 @@ -57,11 +57,14 @@ import static org.apache.jackrabbit.oak. public abstract class FulltextDocumentMaker<D> implements DocumentMaker<D> { private final Logger log = LoggerFactory.getLogger(getClass()); + public static final String WARN_LOG_STRING_SIZE_THRESHOLD_KEY = "oak.repository.property.index.logWarnStringSizeThreshold"; + private static final int DEFAULT_WARN_LOG_STRING_SIZE_THRESHOLD_VALUE = 102400; private final FulltextBinaryTextExtractor textExtractor; protected final IndexDefinition definition; protected final IndexDefinition.IndexingRule indexingRule; protected final String path; + private final int logWarnStringSizeThreshold; public FulltextDocumentMaker(@Nullable FulltextBinaryTextExtractor textExtractor, @NotNull IndexDefinition definition, @@ -71,6 +74,8 @@ public abstract class FulltextDocumentMa this.definition = checkNotNull(definition); this.indexingRule = checkNotNull(indexingRule); this.path = checkNotNull(path); + this.logWarnStringSizeThreshold = Integer.getInteger(WARN_LOG_STRING_SIZE_THRESHOLD_KEY, + DEFAULT_WARN_LOG_STRING_SIZE_THRESHOLD_VALUE); } protected abstract D initDoc(); @@ -93,7 +98,7 @@ public abstract class FulltextDocumentMa protected abstract void indexFulltextValue(D doc, String value); - protected abstract boolean indexTypedProperty(D doc, PropertyState property, String pname, PropertyDefinition pd); + protected abstract void indexTypedProperty(D doc, PropertyState property, String pname, PropertyDefinition pd, int index); protected abstract void indexAncestors(D doc, String path); @@ -105,6 +110,13 @@ public abstract class FulltextDocumentMa protected abstract void indexNodeName(D doc, String value); + protected void logLargeStringProperties(String propertyName, String value) { + if (value.length() > logWarnStringSizeThreshold) { + log.warn("String length: {} for property: {} at Node: {} is greater than configured value {}", + value.length(), propertyName, path, logWarnStringSizeThreshold); + } + } + @Nullable public D makeDocument(NodeState state) throws IOException { return makeDocument(state, false, Collections.<PropertyState>emptyList()); @@ -231,6 +243,7 @@ public abstract class FulltextDocumentMa if (pd.fulltextEnabled() && includeTypeForFullText) { for (String value : property.getValue(Type.STRINGS)) { + logLargeStringProperties(property.getName(), value); if (!includePropertyValue(value, pd)){ continue; } @@ -281,7 +294,22 @@ public abstract class FulltextDocumentMa protected abstract void indexSimilarityStrings(D doc, PropertyDefinition pd, String value) throws IOException; private boolean addTypedFields(D doc, PropertyState property, String pname, PropertyDefinition pd) { - return indexTypedProperty(doc, property, pname, pd); + int tag = property.getType().tag(); + boolean fieldAdded = false; + + for (int i = 0; i < property.count(); i++) { + if (!includePropertyValue(property, i, pd)) { + continue; + } + + indexTypedProperty(doc, property, pname, pd, i); + fieldAdded = true; + + if (tag == Type.STRING.tag()) { + logLargeStringProperties(property.getName(), property.getValue(Type.STRING, i)); + } + } + return fieldAdded; } private boolean addTypedOrderedFields(D doc,