This is an automated email from the ASF dual-hosted git repository.
thomasm pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new b4229e2b58 OAK-11767 Use 'suffixesToSkip' for indexing with the index
store (#2338)
b4229e2b58 is described below
commit b4229e2b58fc1f99d178e9c5c350928ac34fadfa
Author: Thomas Mueller <[email protected]>
AuthorDate: Fri Jun 20 13:59:49 2025 +0200
OAK-11767 Use 'suffixesToSkip' for indexing with the index store (#2338)
---
.../oak/index/indexer/document/tree/TreeStore.java | 16 ++++++++++
.../index/indexer/document/tree/TreeStoreTest.java | 36 ++++++++++++++++++++++
2 files changed, 52 insertions(+)
diff --git
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
index 387c836711..4b0727b769 100644
---
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
+++
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
@@ -35,6 +35,7 @@ import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry;
import
org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry.NodeStateEntryBuilder;
import
org.apache.jackrabbit.oak.index.indexer.document.flatfile.NodeStateEntryReader;
+import
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedStrategy;
import org.apache.jackrabbit.oak.index.indexer.document.indexstore.IndexStore;
import org.apache.jackrabbit.oak.index.indexer.document.tree.store.TreeSession;
import org.apache.jackrabbit.oak.index.indexer.document.tree.store.Compression;
@@ -42,6 +43,7 @@ import
org.apache.jackrabbit.oak.index.indexer.document.tree.store.Store;
import
org.apache.jackrabbit.oak.index.indexer.document.tree.store.StoreBuilder;
import
org.apache.jackrabbit.oak.index.indexer.document.tree.store.utils.FilePacker;
import
org.apache.jackrabbit.oak.index.indexer.document.tree.store.utils.SieveCache;
+import org.apache.jackrabbit.oak.plugins.index.ConfigHelper;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState;
import org.apache.jackrabbit.oak.spi.filter.PathFilter;
@@ -87,6 +89,7 @@ public class TreeStore implements ParallelIndexStore {
private long iterationCount;
private PathIteratorFilter filter = new PathIteratorFilter();
private ArrayList<String> splitPoints;
+ private final List<String> suffixesToSkip =
ConfigHelper.getSystemPropertyAsStringList(PipelinedStrategy.OAK_INDEXER_PIPELINED_NODE_DOCUMENT_FILTER_SUFFIXES_TO_SKIP,
"", ';');
// the prefetcher, if any. we keep a references so we can shut it down on
close
private Prefetcher prefetcher;
@@ -120,6 +123,7 @@ public class TreeStore implements ParallelIndexStore {
this.session = new TreeSession(store);
// we don not want to merge too early during the download
session.setMaxRoots(1000);
+ LOG.info("Suffixed to skip: " + suffixesToSkip);
}
public void init() {
@@ -153,6 +157,15 @@ public class TreeStore implements ParallelIndexStore {
return iteratorOverPaths(null, null);
}
+ private boolean canSkip(String path) {
+ for (String suffix : suffixesToSkip) {
+ if (path.endsWith(suffix)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
private Iterator<String> iteratorOverPaths(String start, String end) {
startPrefetch();
final Iterator<Entry<String, String>> firstIterator =
session.iterator(start);
@@ -178,6 +191,9 @@ public class TreeStore implements ParallelIndexStore {
if (value.isEmpty()) {
continue;
}
+ if (canSkip(key)) {
+ continue;
+ }
if (!filter.includes(key)) {
// if the path is not, see if there is a next included
path
String next = filter.nextIncludedPath(key);
diff --git
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java
index 58741384cc..46f797ab07 100644
---
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java
+++
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java
@@ -30,6 +30,8 @@ import java.util.Set;
import org.apache.jackrabbit.oak.InitialContent;
import org.apache.jackrabbit.oak.OakInitializer;
+import org.apache.jackrabbit.oak.commons.junit.TemporarySystemProperty;
+import
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedStrategy;
import org.apache.jackrabbit.oak.index.indexer.document.tree.store.TreeSession;
import
org.apache.jackrabbit.oak.index.indexer.document.tree.store.utils.FilePacker;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
@@ -43,6 +45,7 @@ import
org.apache.jackrabbit.oak.spi.commit.CompositeEditorProvider;
import org.apache.jackrabbit.oak.spi.commit.EditorHook;
import org.apache.jackrabbit.oak.spi.state.NodeStore;
import org.junit.ClassRule;
+import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.mockito.Mockito;
@@ -52,6 +55,9 @@ public class TreeStoreTest {
@ClassRule
public static TemporaryFolder temporaryFolder = new TemporaryFolder(new
File("target"));
+ @Rule
+ public TemporarySystemProperty temporarySystemProperty = new
TemporarySystemProperty();
+
@Test
public void convertPathTest() {
assertEquals("\t", TreeStore.toChildNodeEntry("/"));
@@ -203,6 +209,36 @@ public class TreeStoreTest {
}
}
+ @Test
+ public void skipSuffixTest() throws IOException {
+ File testFolder = temporaryFolder.newFolder();
+ // thanks to the TemporarySystemProperty rule, this is reset
+ // at the end of the test
+ System.setProperty(
+
PipelinedStrategy.OAK_INDEXER_PIPELINED_NODE_DOCUMENT_FILTER_SUFFIXES_TO_SKIP,
+ "/unimportant");
+ TreeStore store = new TreeStore("test", testFolder, null, 1);
+ try {
+ store.getSession().init();
+ store.putNode("/", "{}");
+ store.putNode("/content", "{}");
+ store.putNode("/content/abc", "{}");
+ store.putNode("/content/abc/unimportant", "{}");
+ store.putNode("/content/def", "{}");
+ store.putNode("/jcr:system", "{}");
+
+ Set<IndexDefinition> defs = inMemoryIndexDefinitions("/content",
"/var", "/tmp");
+ store.setIndexDefinitions(defs);
+
+ Iterator<String> it = store.iteratorOverPaths();
+ assertEquals("/content", it.next());
+ assertEquals("/content/abc", it.next());
+ assertEquals("/content/def", it.next());
+ assertFalse(it.hasNext());
+ } finally {
+ store.close();
+ }
+ }
private static Set<IndexDefinition> inMemoryIndexDefinitions(String...
includedPaths) {
NodeStore store = new MemoryNodeStore();
EditorHook hook = new EditorHook(