This is an automated email from the ASF dual-hosted git repository. thomasm pushed a commit to branch OAK-11767 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 179f1820451a7d1b047b27b4edde45ade182948d Author: Thomas Mueller <[email protected]> AuthorDate: Tue Jun 17 14:45:25 2025 +0200 OAK-11767 Use 'suffixesToSkip' for indexing with the index store --- .../oak/index/indexer/document/tree/TreeStore.java | 16 ++++++++++ .../index/indexer/document/tree/TreeStoreTest.java | 36 ++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java index 387c836711..4b0727b769 100644 --- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java +++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStore.java @@ -35,6 +35,7 @@ import org.apache.jackrabbit.oak.commons.PathUtils; import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry; import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry.NodeStateEntryBuilder; import org.apache.jackrabbit.oak.index.indexer.document.flatfile.NodeStateEntryReader; +import org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedStrategy; import org.apache.jackrabbit.oak.index.indexer.document.indexstore.IndexStore; import org.apache.jackrabbit.oak.index.indexer.document.tree.store.TreeSession; import org.apache.jackrabbit.oak.index.indexer.document.tree.store.Compression; @@ -42,6 +43,7 @@ import org.apache.jackrabbit.oak.index.indexer.document.tree.store.Store; import org.apache.jackrabbit.oak.index.indexer.document.tree.store.StoreBuilder; import org.apache.jackrabbit.oak.index.indexer.document.tree.store.utils.FilePacker; import org.apache.jackrabbit.oak.index.indexer.document.tree.store.utils.SieveCache; +import org.apache.jackrabbit.oak.plugins.index.ConfigHelper; import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition; import org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState; import org.apache.jackrabbit.oak.spi.filter.PathFilter; @@ -87,6 +89,7 @@ public class TreeStore implements ParallelIndexStore { private long iterationCount; private PathIteratorFilter filter = new PathIteratorFilter(); private ArrayList<String> splitPoints; + private final List<String> suffixesToSkip = ConfigHelper.getSystemPropertyAsStringList(PipelinedStrategy.OAK_INDEXER_PIPELINED_NODE_DOCUMENT_FILTER_SUFFIXES_TO_SKIP, "", ';'); // the prefetcher, if any. we keep a references so we can shut it down on close private Prefetcher prefetcher; @@ -120,6 +123,7 @@ public class TreeStore implements ParallelIndexStore { this.session = new TreeSession(store); // we don not want to merge too early during the download session.setMaxRoots(1000); + LOG.info("Suffixed to skip: " + suffixesToSkip); } public void init() { @@ -153,6 +157,15 @@ public class TreeStore implements ParallelIndexStore { return iteratorOverPaths(null, null); } + private boolean canSkip(String path) { + for (String suffix : suffixesToSkip) { + if (path.endsWith(suffix)) { + return true; + } + } + return false; + } + private Iterator<String> iteratorOverPaths(String start, String end) { startPrefetch(); final Iterator<Entry<String, String>> firstIterator = session.iterator(start); @@ -178,6 +191,9 @@ public class TreeStore implements ParallelIndexStore { if (value.isEmpty()) { continue; } + if (canSkip(key)) { + continue; + } if (!filter.includes(key)) { // if the path is not, see if there is a next included path String next = filter.nextIncludedPath(key); diff --git a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java index 58741384cc..46f797ab07 100644 --- a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java +++ b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/TreeStoreTest.java @@ -30,6 +30,8 @@ import java.util.Set; import org.apache.jackrabbit.oak.InitialContent; import org.apache.jackrabbit.oak.OakInitializer; +import org.apache.jackrabbit.oak.commons.junit.TemporarySystemProperty; +import org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedStrategy; import org.apache.jackrabbit.oak.index.indexer.document.tree.store.TreeSession; import org.apache.jackrabbit.oak.index.indexer.document.tree.store.utils.FilePacker; import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition; @@ -43,6 +45,7 @@ import org.apache.jackrabbit.oak.spi.commit.CompositeEditorProvider; import org.apache.jackrabbit.oak.spi.commit.EditorHook; import org.apache.jackrabbit.oak.spi.state.NodeStore; import org.junit.ClassRule; +import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.mockito.Mockito; @@ -52,6 +55,9 @@ public class TreeStoreTest { @ClassRule public static TemporaryFolder temporaryFolder = new TemporaryFolder(new File("target")); + @Rule + public TemporarySystemProperty temporarySystemProperty = new TemporarySystemProperty(); + @Test public void convertPathTest() { assertEquals("\t", TreeStore.toChildNodeEntry("/")); @@ -203,6 +209,36 @@ public class TreeStoreTest { } } + @Test + public void skipSuffixTest() throws IOException { + File testFolder = temporaryFolder.newFolder(); + // thanks to the TemporarySystemProperty rule, this is reset + // at the end of the test + System.setProperty( + PipelinedStrategy.OAK_INDEXER_PIPELINED_NODE_DOCUMENT_FILTER_SUFFIXES_TO_SKIP, + "/unimportant"); + TreeStore store = new TreeStore("test", testFolder, null, 1); + try { + store.getSession().init(); + store.putNode("/", "{}"); + store.putNode("/content", "{}"); + store.putNode("/content/abc", "{}"); + store.putNode("/content/abc/unimportant", "{}"); + store.putNode("/content/def", "{}"); + store.putNode("/jcr:system", "{}"); + + Set<IndexDefinition> defs = inMemoryIndexDefinitions("/content", "/var", "/tmp"); + store.setIndexDefinitions(defs); + + Iterator<String> it = store.iteratorOverPaths(); + assertEquals("/content", it.next()); + assertEquals("/content/abc", it.next()); + assertEquals("/content/def", it.next()); + assertFalse(it.hasNext()); + } finally { + store.close(); + } + } private static Set<IndexDefinition> inMemoryIndexDefinitions(String... includedPaths) { NodeStore store = new MemoryNodeStore(); EditorHook hook = new EditorHook(
