This is an automated email from the ASF dual-hosted git repository.
ngupta pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new f0ac80ebd8 OAK-10776 | Add support for custom excludes in incremental
index store (#1437)
f0ac80ebd8 is described below
commit f0ac80ebd85363e36fc71150c334c3665b9d7631
Author: nit0906 <[email protected]>
AuthorDate: Wed May 8 09:46:37 2024 +0530
OAK-10776 | Add support for custom excludes in incremental index store
(#1437)
* OAK-10776 | Add support for custom excludes while building incremental
index store
---------
Co-authored-by: Nitin Gupta <[email protected]>
---
.../indexer/document/DocumentStoreIndexerBase.java | 30 ++++++
.../jackrabbit/oak/index/IncrementalStoreTest.java | 110 +++++++++++++++++----
2 files changed, 123 insertions(+), 17 deletions(-)
diff --git
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
index ecdba7a66a..cb435986b6 100644
---
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
+++
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/DocumentStoreIndexerBase.java
@@ -24,6 +24,7 @@ import com.mongodb.client.MongoDatabase;
import org.apache.jackrabbit.guava.common.base.Stopwatch;
import org.apache.jackrabbit.guava.common.io.Closer;
import org.apache.jackrabbit.oak.api.CommitFailedException;
+import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.commons.concurrent.ExecutorCloser;
import org.apache.jackrabbit.oak.index.IndexHelper;
import org.apache.jackrabbit.oak.index.IndexerSupport;
@@ -64,6 +65,7 @@ import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutionException;
@@ -75,11 +77,16 @@ import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Pattern;
+import java.util.stream.Collectors;
import static
org.apache.jackrabbit.guava.common.base.Preconditions.checkNotNull;
import static
org.apache.jackrabbit.oak.index.indexer.document.flatfile.FlatFileNodeStoreBuilder.OAK_INDEXER_SORTED_FILE_PATH;
+import static
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedMongoDownloadTask.DEFAULT_OAK_INDEXER_PIPELINED_MONGO_CUSTOM_EXCLUDED_PATHS;
import static
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedMongoDownloadTask.DEFAULT_OAK_INDEXER_PIPELINED_MONGO_CUSTOM_EXCLUDE_ENTRIES_REGEX;
+import static
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedMongoDownloadTask.DEFAULT_OAK_INDEXER_PIPELINED_MONGO_REGEX_PATH_FILTERING;
+import static
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedMongoDownloadTask.OAK_INDEXER_PIPELINED_MONGO_CUSTOM_EXCLUDED_PATHS;
import static
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedMongoDownloadTask.OAK_INDEXER_PIPELINED_MONGO_CUSTOM_EXCLUDE_ENTRIES_REGEX;
+import static
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedMongoDownloadTask.OAK_INDEXER_PIPELINED_MONGO_REGEX_PATH_FILTERING;
import static
org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME;
public abstract class DocumentStoreIndexerBase implements Closeable {
@@ -247,6 +254,29 @@ public abstract class DocumentStoreIndexerBase implements
Closeable {
predicate =
predicate.and(indexerSupport.getFilterPredicateBasedOnCustomRegex(Pattern.compile(customExcludeEntriesRegex),
Function.identity()));
}
+ // Handle custom excluded paths if provided. This is only applicable
if regex path filtering is enabled.
+ // Any paths whose ancestor is in the custom excluded paths list will
be excluded from incremental index store.
+ // This is to keep in line with the custom exclude paths
implementation in the pipelined strategy.
+ boolean regexPathFiltering = ConfigHelper.getSystemPropertyAsBoolean(
+ OAK_INDEXER_PIPELINED_MONGO_REGEX_PATH_FILTERING,
+ DEFAULT_OAK_INDEXER_PIPELINED_MONGO_REGEX_PATH_FILTERING);
+ List<String> customExcludedPaths;
+ String excludePathsString = ConfigHelper.getSystemPropertyAsString(
+ OAK_INDEXER_PIPELINED_MONGO_CUSTOM_EXCLUDED_PATHS,
+ DEFAULT_OAK_INDEXER_PIPELINED_MONGO_CUSTOM_EXCLUDED_PATHS
+ ).trim();
+
+ if (regexPathFiltering && !excludePathsString.isEmpty()) {
+ customExcludedPaths = Arrays.stream(excludePathsString.split(","))
+ .map(String::trim)
+ .collect(Collectors.toList());
+
+ if (!customExcludedPaths.isEmpty()) {
+ // Add an AND condition to the existing predicate to filter
out paths that are ancestors of the custom excluded paths.
+ predicate = predicate.and(t ->
customExcludedPaths.stream().noneMatch(excludedPath ->
PathUtils.isAncestor(excludedPath, t)));
+ }
+ }
+
try {
builder = new IncrementalStoreBuilder(indexHelper.getWorkDir(),
indexHelper, initialCheckpoint, finalCheckpoint)
.withPreferredPathElements(preferredPathElements)
diff --git
a/oak-run/src/test/java/org/apache/jackrabbit/oak/index/IncrementalStoreTest.java
b/oak-run/src/test/java/org/apache/jackrabbit/oak/index/IncrementalStoreTest.java
index 73853a7bf2..af01009905 100644
---
a/oak-run/src/test/java/org/apache/jackrabbit/oak/index/IncrementalStoreTest.java
+++
b/oak-run/src/test/java/org/apache/jackrabbit/oak/index/IncrementalStoreTest.java
@@ -91,6 +91,9 @@ import java.util.function.Predicate;
import java.util.stream.Collectors;
import static com.google.common.collect.Lists.newArrayList;
+import static
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedMongoDownloadTask.OAK_INDEXER_PIPELINED_MONGO_CUSTOM_EXCLUDED_PATHS;
+import static
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedMongoDownloadTask.OAK_INDEXER_PIPELINED_MONGO_CUSTOM_EXCLUDE_ENTRIES_REGEX;
+import static
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedMongoDownloadTask.OAK_INDEXER_PIPELINED_MONGO_REGEX_PATH_FILTERING;
import static
org.apache.jackrabbit.oak.index.indexer.document.indexstore.IndexStoreUtils.OAK_INDEXER_USE_LZ4;
import static
org.apache.jackrabbit.oak.index.indexer.document.indexstore.IndexStoreUtils.OAK_INDEXER_USE_ZIP;
import static org.junit.Assert.assertEquals;
@@ -144,8 +147,7 @@ public class IncrementalStoreTest {
public void testWithNoCompression() throws Exception {
System.setProperty(OAK_INDEXER_USE_ZIP, "false");
algorithm = IndexStoreUtils.compressionAlgorithm();
- incrementalFFSTest(false);
- System.clearProperty(OAK_INDEXER_USE_ZIP);
+ incrementalFFSTest(false, false);
}
@Test
@@ -153,23 +155,41 @@ public class IncrementalStoreTest {
// LZ4 compression is used by default - so disable that, fallback is
gzip
System.setProperty(OAK_INDEXER_USE_LZ4, "false");
algorithm = IndexStoreUtils.compressionAlgorithm();
- incrementalFFSTest(false);
- System.clearProperty(OAK_INDEXER_USE_LZ4);
+ incrementalFFSTest(false, false);
}
@Test
public void testWithLz4Compression() throws Exception {
algorithm = IndexStoreUtils.compressionAlgorithm();
- incrementalFFSTest(false);
+ incrementalFFSTest(false, false);
}
@Test
public void testWithLz4CompressionWithCustomRegexFilter() throws Exception
{
-
System.setProperty("oak.indexer.pipelined.mongoCustomExcludeEntriesRegex",
+
System.setProperty(OAK_INDEXER_PIPELINED_MONGO_CUSTOM_EXCLUDE_ENTRIES_REGEX,
"(.*/jcr:content/renditions/foo\\.metadata\\.xml.*$)|(.*/jcr:content/renditions/foo\\.metadata\\..*$)|(.*/jcr:content/metadata/fooBar$)");
algorithm = IndexStoreUtils.compressionAlgorithm();
- incrementalFFSTest(true);
-
System.clearProperty("oak.indexer.pipelined.mongoCustomExcludeEntriesRegex");
+ incrementalFFSTest(true, false);
+ }
+
+ @Test
+ public void testWithLz4CompressionWithCustomExcludedPaths() throws
Exception {
+ System.setProperty(OAK_INDEXER_PIPELINED_MONGO_REGEX_PATH_FILTERING,
+ "true");
+ System.setProperty(OAK_INDEXER_PIPELINED_MONGO_CUSTOM_EXCLUDED_PATHS,
"/oak:index,/var/foo");
+ algorithm = IndexStoreUtils.compressionAlgorithm();
+ incrementalFFSTest(false, true);
+ }
+
+ @Test
+ public void
testWithLz4CompressionWithCustomRegexFilterAndCustomExcludedPaths() throws
Exception {
+
System.setProperty(OAK_INDEXER_PIPELINED_MONGO_CUSTOM_EXCLUDE_ENTRIES_REGEX,
+
"(.*/jcr:content/renditions/foo\\.metadata\\.xml.*$)|(.*/jcr:content/renditions/foo\\.metadata\\..*$)|(.*/jcr:content/metadata/fooBar$)");
+ System.setProperty(OAK_INDEXER_PIPELINED_MONGO_REGEX_PATH_FILTERING,
+ "true");
+ System.setProperty(OAK_INDEXER_PIPELINED_MONGO_CUSTOM_EXCLUDED_PATHS,
"/oak:index,/var/foo");
+ algorithm = IndexStoreUtils.compressionAlgorithm();
+ incrementalFFSTest(true, true);
}
/**
@@ -182,7 +202,7 @@ public class IncrementalStoreTest {
*
* @return
*/
- public void incrementalFFSTest(boolean customRegexFilter) throws Exception
{
+ public void incrementalFFSTest(boolean customRegexFilter, boolean
customExcludedPathsFilter) throws Exception {
Backend rwBackend = createNodeStore(false);
createBaseContent(rwBackend.documentNodeStore);
String initialCheckpoint =
rwBackend.documentNodeStore.checkpoint(3600000);
@@ -191,13 +211,13 @@ public class IncrementalStoreTest {
Predicate<String> pathPredicate = s -> true;
Set<String> basePreferredPathElements = Set.of();
- Path initialFfsPath = createFFS(roBackend, pathPredicate,
basePreferredPathElements, Collections.EMPTY_LIST, initialCheckpoint,
"initial", getNodeStateAtCheckpoint1(customRegexFilter));
+ Path initialFfsPath = createFFS(roBackend, pathPredicate,
basePreferredPathElements, List.of(new PathFilter(List.of("/"),
Collections.EMPTY_LIST)), initialCheckpoint, "initial",
getNodeStateAtCheckpoint1(customRegexFilter, customExcludedPathsFilter));
createIncrementalContent(rwBackend.documentNodeStore);
String finalCheckpoint =
rwBackend.documentNodeStore.checkpoint(3600000);
Backend roBackend1 = createNodeStore(true);
- Path finalFfsPath = createFFS(roBackend1, pathPredicate,
basePreferredPathElements, Collections.EMPTY_LIST, finalCheckpoint, "final",
getNodeStateAtCheckpoint2(customRegexFilter));
+ Path finalFfsPath = createFFS(roBackend1, pathPredicate,
basePreferredPathElements, List.of(new PathFilter(List.of("/"),
Collections.EMPTY_LIST)), finalCheckpoint, "final",
getNodeStateAtCheckpoint2(customRegexFilter, customExcludedPathsFilter));
Backend roBackend2 = createNodeStore(true);
IndexStore indexStore = getDocumentIndexer(roBackend2,
finalCheckpoint).buildStore(initialCheckpoint, finalCheckpoint);
@@ -386,7 +406,9 @@ public class IncrementalStoreTest {
private void createBaseContent(NodeStore rwNodeStore) throws
CommitFailedException {
@NotNull NodeBuilder rootBuilder = rwNodeStore.getRoot().builder();
- rootBuilder.child("oak:index");
+ @NotNull NodeBuilder indexBuilder = rootBuilder.child("oak:index");
+ indexBuilder.child("fooIndex").child(":data");
+ indexBuilder.child("barIndex").child(":data");
@NotNull NodeBuilder contentBuilder = rootBuilder.child("content");
contentBuilder.child("2022").child("02").setProperty("p1", "v202202");
contentBuilder.child("2022").child("02").child("28").setProperty("p1",
"v20220228");
@@ -414,11 +436,21 @@ public class IncrementalStoreTest {
contentDamBuilder.child("2025").child("jcr:content").child("metadata").child("fooBar");
contentDamBuilder.child("2026").child("jcr:content").child("renditions").child("foo.metadata.bar1").child("jcr:content");
contentDamBuilder.child("2026").child("jcr:content").child("renditions").child("foo.metadata.bar2").child("jcr:content");
+
+ @NotNull NodeBuilder varBuilder = rootBuilder.child("var");
+ varBuilder.child("foo").setProperty("p0", "v202202");
+ varBuilder.child("foo").child("01").setProperty("p1", "v202202");
+ varBuilder.child("foo").child("01").setProperty("p2", "v202202");
+ varBuilder.child("bar").child("01").setProperty("p1", "v202202");
+ varBuilder.child("bar").child("01").setProperty("p2", "v202202");
rwNodeStore.merge(rootBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
}
private void createIncrementalContent(NodeStore rwNodeStore) throws
CommitFailedException {
@NotNull NodeBuilder rootBuilder = rwNodeStore.getRoot().builder();
+ @NotNull NodeBuilder indexBuilder = rootBuilder.child("oak:index");
+ indexBuilder.child("fooIndex-2").child(":data");
+ indexBuilder.child("barIndex-2").child(":data");
@NotNull NodeBuilder contentDamBuilder =
rootBuilder.child("content").child("dam");
contentDamBuilder.child("1000").child("12").setProperty("p2",
"v100012"); // new property added
contentDamBuilder.child("2022").child("02").setProperty("p1",
"v202202-new");// property updated
@@ -432,12 +464,20 @@ public class IncrementalStoreTest {
contentDamBuilder.child("2025").child("jcr:content").child("metadata").child("fooBar").setProperty("foo",
"bar");
contentDamBuilder.child("2026").child("jcr:content").child("renditions").child("foo.metadata.bar2").child("jcr:content").setProperty("foo",
"bar");
contentDamBuilder.child("2026").child("jcr:content").child("renditions").child("foo.metadata.bar3").child("jcr:content").setProperty("foo",
"bar");
+
+ @NotNull NodeBuilder varBuilder = rootBuilder.child("var");
+ varBuilder.child("foo").setProperty("p0", "v202202-new");
+ varBuilder.child("foo").child("01").setProperty("p1", "v202202-new");
+ varBuilder.child("foo").child("02");
+ varBuilder.child("bar").child("01").setProperty("p1", "v202202-new");
+ varBuilder.child("bar").child("01").setProperty("p3", "v202202");
+ varBuilder.child("bar").child("02");
rwNodeStore.merge(rootBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
}
@NotNull
- private static List<String> getNodeStateAtCheckpoint1(boolean
customRegexFilter) {
+ private static List<String> getNodeStateAtCheckpoint1(boolean
customRegexFilter, boolean customExcludedPathsFilter) {
List<String> expectedPathsAtCheckpoint1 = new
ArrayList<>(List.of("/|{}",
"/content|{}",
"/content/2022|{}",
@@ -486,7 +526,14 @@ public class IncrementalStoreTest {
"/content/dam/2026/jcr:content/renditions/foo.metadata.bar1/jcr:content|{}",
"/content/dam/2026/jcr:content/renditions/foo.metadata.bar2|{}",
"/content/dam/2026/jcr:content/renditions/foo.metadata.bar2/jcr:content|{}",
- "/oak:index|{}"));
+ "/oak:index|{}",
+ "/oak:index/barIndex|{}",
+ "/oak:index/fooIndex|{}",
+ "/var|{}",
+ "/var/bar|{}",
+ "/var/bar/01|{\"p1\":\"v202202\",\"p2\":\"v202202\"}",
+ "/var/foo|{\"p0\":\"v202202\"}",
+ "/var/foo/01|{\"p1\":\"v202202\",\"p2\":\"v202202\"}"));
if (customRegexFilter) {
expectedPathsAtCheckpoint1.removeAll(List.of("/content/dam/2024/jcr:content/renditions/foo.metadata.xml|{}",
@@ -498,11 +545,20 @@ public class IncrementalStoreTest {
"/content/dam/2026/jcr:content/renditions/foo.metadata.bar2/jcr:content|{}"));
}
+ // excluded filter being passed in tests is /oak:index,/var/foo
+ if (customExcludedPathsFilter) {
+ expectedPathsAtCheckpoint1.removeAll(List.of(//"/oak:index|{}",
+ "/oak:index/barIndex|{}",
+ "/oak:index/fooIndex|{}",
+ //"/var/foo|{}",
+ "/var/foo/01|{\"p1\":\"v202202\",\"p2\":\"v202202\"}"));
+ }
+
return expectedPathsAtCheckpoint1;
}
@NotNull
- private static List<String> getNodeStateAtCheckpoint2(boolean
customRegexFilter) {
+ private static List<String> getNodeStateAtCheckpoint2(boolean
customRegexFilter, boolean customExcludedPathsFilter){
List<String> expectedPathsAtCheckpoint2 = new
ArrayList<>(List.of("/|{}",
"/content|{}",
"/content/2022|{}",
@@ -552,8 +608,18 @@ public class IncrementalStoreTest {
"/content/dam/2026/jcr:content/renditions/foo.metadata.bar2/jcr:content|{\"foo\":\"bar\"}",
"/content/dam/2026/jcr:content/renditions/foo.metadata.bar3|{}",
"/content/dam/2026/jcr:content/renditions/foo.metadata.bar3/jcr:content|{\"foo\":\"bar\"}",
- "/oak:index|{}"
- ));
+ "/oak:index|{}",
+ "/oak:index/barIndex|{}",
+ "/oak:index/barIndex-2|{}",
+ "/oak:index/fooIndex|{}",
+ "/oak:index/fooIndex-2|{}",
+ "/var|{}",
+ "/var/bar|{}",
+
"/var/bar/01|{\"p1\":\"v202202-new\",\"p2\":\"v202202\",\"p3\":\"v202202\"}",
+ "/var/bar/02|{}",
+ "/var/foo|{\"p0\":\"v202202-new\"}",
+ "/var/foo/01|{\"p1\":\"v202202-new\",\"p2\":\"v202202\"}",
+ "/var/foo/02|{}"));
if (customRegexFilter) {
expectedPathsAtCheckpoint2.removeAll(List.of("/content/dam/2024/jcr:content/renditions/foo.metadata.xml|{}",
@@ -567,6 +633,16 @@ public class IncrementalStoreTest {
"/content/dam/2026/jcr:content/renditions/foo.metadata.bar3/jcr:content|{\"foo\":\"bar\"}"));
}
+ if (customExcludedPathsFilter) {
+ expectedPathsAtCheckpoint2.removeAll(List.of(//"/oak:index|{}",
+ "/oak:index/barIndex|{}",
+ "/oak:index/barIndex-2|{}",
+ "/oak:index/fooIndex|{}",
+ "/oak:index/fooIndex-2|{}",
+ "/var/foo/01|{\"p1\":\"v202202-new\",\"p2\":\"v202202\"}",
+ "/var/foo/02|{}"));
+ }
+
return expectedPathsAtCheckpoint2;
}