This is an automated email from the ASF dual-hosted git repository.

thomasm pushed a commit to branch OAK-11767
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 179f1820451a7d1b047b27b4edde45ade182948d
Author: Thomas Mueller <[email protected]>
AuthorDate: Tue Jun 17 14:45:25 2025 +0200

    OAK-11767 Use 'suffixesToSkip' for indexing with the index store
---
 .../oak/index/indexer/document/tree/TreeStore.java | 16 ++++++++++
 .../index/indexer/document/tree/TreeStoreTest.java | 36 ++++++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
index 387c836711..4b0727b769 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java
@@ -35,6 +35,7 @@ import org.apache.jackrabbit.oak.commons.PathUtils;
 import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry;
 import 
org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry.NodeStateEntryBuilder;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.NodeStateEntryReader;
+import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedStrategy;
 import org.apache.jackrabbit.oak.index.indexer.document.indexstore.IndexStore;
 import org.apache.jackrabbit.oak.index.indexer.document.tree.store.TreeSession;
 import org.apache.jackrabbit.oak.index.indexer.document.tree.store.Compression;
@@ -42,6 +43,7 @@ import 
org.apache.jackrabbit.oak.index.indexer.document.tree.store.Store;
 import 
org.apache.jackrabbit.oak.index.indexer.document.tree.store.StoreBuilder;
 import 
org.apache.jackrabbit.oak.index.indexer.document.tree.store.utils.FilePacker;
 import 
org.apache.jackrabbit.oak.index.indexer.document.tree.store.utils.SieveCache;
+import org.apache.jackrabbit.oak.plugins.index.ConfigHelper;
 import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
 import org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState;
 import org.apache.jackrabbit.oak.spi.filter.PathFilter;
@@ -87,6 +89,7 @@ public class TreeStore implements ParallelIndexStore {
     private long iterationCount;
     private PathIteratorFilter filter = new PathIteratorFilter();
     private ArrayList<String> splitPoints;
+    private final List<String> suffixesToSkip = 
ConfigHelper.getSystemPropertyAsStringList(PipelinedStrategy.OAK_INDEXER_PIPELINED_NODE_DOCUMENT_FILTER_SUFFIXES_TO_SKIP,
 "", ';');
 
     // the prefetcher, if any. we keep a references so we can shut it down on 
close
     private Prefetcher prefetcher;
@@ -120,6 +123,7 @@ public class TreeStore implements ParallelIndexStore {
         this.session = new TreeSession(store);
         // we don not want to merge too early during the download
         session.setMaxRoots(1000);
+        LOG.info("Suffixed to skip: " + suffixesToSkip);
     }
 
     public void init() {
@@ -153,6 +157,15 @@ public class TreeStore implements ParallelIndexStore {
         return iteratorOverPaths(null, null);
     }
 
+    private boolean canSkip(String path) {
+        for (String suffix : suffixesToSkip) {
+            if (path.endsWith(suffix)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
     private Iterator<String> iteratorOverPaths(String start, String end) {
         startPrefetch();
         final Iterator<Entry<String, String>> firstIterator = 
session.iterator(start);
@@ -178,6 +191,9 @@ public class TreeStore implements ParallelIndexStore {
                     if (value.isEmpty()) {
                         continue;
                     }
+                    if (canSkip(key)) {
+                        continue;
+                    }
                     if (!filter.includes(key)) {
                         // if the path is not, see if there is a next included 
path
                         String next = filter.nextIncludedPath(key);
diff --git 
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java
 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java
index 58741384cc..46f797ab07 100644
--- 
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java
+++ 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java
@@ -30,6 +30,8 @@ import java.util.Set;
 
 import org.apache.jackrabbit.oak.InitialContent;
 import org.apache.jackrabbit.oak.OakInitializer;
+import org.apache.jackrabbit.oak.commons.junit.TemporarySystemProperty;
+import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedStrategy;
 import org.apache.jackrabbit.oak.index.indexer.document.tree.store.TreeSession;
 import 
org.apache.jackrabbit.oak.index.indexer.document.tree.store.utils.FilePacker;
 import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
@@ -43,6 +45,7 @@ import 
org.apache.jackrabbit.oak.spi.commit.CompositeEditorProvider;
 import org.apache.jackrabbit.oak.spi.commit.EditorHook;
 import org.apache.jackrabbit.oak.spi.state.NodeStore;
 import org.junit.ClassRule;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 import org.mockito.Mockito;
@@ -52,6 +55,9 @@ public class TreeStoreTest {
     @ClassRule
     public static TemporaryFolder temporaryFolder = new TemporaryFolder(new 
File("target"));
 
+    @Rule
+    public TemporarySystemProperty temporarySystemProperty = new 
TemporarySystemProperty();
+
     @Test
     public void convertPathTest() {
         assertEquals("\t", TreeStore.toChildNodeEntry("/"));
@@ -203,6 +209,36 @@ public class TreeStoreTest {
         }
     }
 
+    @Test
+    public void skipSuffixTest() throws IOException {
+        File testFolder = temporaryFolder.newFolder();
+        // thanks to the TemporarySystemProperty rule, this is reset
+        // at the end of the test
+        System.setProperty(
+                
PipelinedStrategy.OAK_INDEXER_PIPELINED_NODE_DOCUMENT_FILTER_SUFFIXES_TO_SKIP,
+                "/unimportant");
+        TreeStore store = new TreeStore("test", testFolder, null, 1);
+        try {
+            store.getSession().init();
+            store.putNode("/", "{}");
+            store.putNode("/content", "{}");
+            store.putNode("/content/abc", "{}");
+            store.putNode("/content/abc/unimportant", "{}");
+            store.putNode("/content/def", "{}");
+            store.putNode("/jcr:system", "{}");
+
+            Set<IndexDefinition> defs = inMemoryIndexDefinitions("/content", 
"/var", "/tmp");
+            store.setIndexDefinitions(defs);
+
+            Iterator<String> it = store.iteratorOverPaths();
+            assertEquals("/content", it.next());
+            assertEquals("/content/abc", it.next());
+            assertEquals("/content/def", it.next());
+            assertFalse(it.hasNext());
+        } finally {
+            store.close();
+        }
+    }
     private static Set<IndexDefinition> inMemoryIndexDefinitions(String... 
includedPaths) {
         NodeStore store = new MemoryNodeStore();
         EditorHook hook = new EditorHook(

Reply via email to