This is an automated email from the ASF dual-hosted git repository. thomasm pushed a commit to branch OAK-12010-subset in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit a91de68d4fd288c437530cb6aff6bc93d04439ad Author: Thomas Mueller <[email protected]> AuthorDate: Mon Jan 19 10:29:51 2026 +0100 OAK-12010 Simplified index management (without optimizer) --- .../jackrabbit/oak/plugins/index/IndexName.java | 17 + .../jackrabbit/oak/plugins/index/IndexUpdate.java | 21 + .../oak/plugins/index/diff/DiffIndex.java | 246 +++++++ .../oak/plugins/index/diff/DiffIndexMerger.java | 801 +++++++++++++++++++++ .../oak/plugins/index/diff/JsonNodeBuilder.java | 279 +++++++ .../plugins/index/diff/RootIndexesListService.java | 112 +++ .../oak/plugins/index/diff/DiffIndexTest.java | 307 ++++++++ .../plugins/index/diff/JsonNodeBuilderTest.java | 226 ++++++ .../oak/plugins/index/diff/MergeTest.java | 191 +++++ .../jackrabbit/oak/plugins/index/diff/indexes.json | 187 +++++ .../index/search/spi/query/FulltextIndex.java | 2 + .../index/search/spi/query/IndexNameTest.java | 53 ++ 12 files changed, 2442 insertions(+) diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexName.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexName.java index 3597079d28..7d8313c8e2 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexName.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexName.java @@ -234,6 +234,23 @@ public class IndexName implements Comparable<IndexName> { return result; } + public static Collection<String> filterNewestIndexes(Collection<String> indexPaths, NodeState rootState) { + HashMap<String, IndexName> latestVersions = new HashMap<>(); + for (String p : indexPaths) { + IndexName indexName = IndexName.parse(p); + IndexName stored = latestVersions.get(indexName.baseName); + if (stored == null || stored.compareTo(indexName) < 0) { + // no old version, or old version is smaller: use + latestVersions.put(indexName.baseName, indexName); + } + } + ArrayList<String> result = new ArrayList<>(latestVersions.size()); + for (IndexName n : latestVersions.values()) { + result.add(n.nodeName); + } + return result; + } + public String nextCustomizedName() { return baseName + "-" + productVersion + "-custom-" + (customerVersion + 1); } diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexUpdate.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexUpdate.java index e33bfe9eff..058d8f8b16 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexUpdate.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/IndexUpdate.java @@ -48,6 +48,8 @@ import org.apache.jackrabbit.oak.commons.collections.IterableUtils; import org.apache.jackrabbit.oak.commons.collections.SetUtils; import org.apache.jackrabbit.oak.plugins.index.IndexCommitCallback.IndexProgress; import org.apache.jackrabbit.oak.plugins.index.NodeTraversalCallback.PathSource; +import org.apache.jackrabbit.oak.plugins.index.diff.DiffIndex; +import org.apache.jackrabbit.oak.plugins.index.diff.DiffIndexMerger; import org.apache.jackrabbit.oak.plugins.index.progress.IndexingProgressReporter; import org.apache.jackrabbit.oak.plugins.index.progress.NodeCountEstimator; import org.apache.jackrabbit.oak.plugins.index.progress.TraversalRateEstimator; @@ -60,6 +62,7 @@ import org.apache.jackrabbit.oak.spi.commit.VisibleEditor; import org.apache.jackrabbit.oak.spi.state.NodeBuilder; import org.apache.jackrabbit.oak.spi.state.NodeState; import org.apache.jackrabbit.oak.spi.state.NodeStateUtils; +import org.apache.jackrabbit.oak.spi.state.NodeStore; import org.apache.jackrabbit.oak.spi.state.ReadOnlyBuilder; import org.apache.jackrabbit.util.ISO8601; import org.jetbrains.annotations.NotNull; @@ -108,6 +111,8 @@ public class IndexUpdate implements Editor, PathSource { } } + private final NodeStore store; + private final IndexUpdateRootState rootState; private final NodeBuilder builder; @@ -150,6 +155,16 @@ public class IndexUpdate implements Editor, PathSource { NodeState root, NodeBuilder builder, IndexUpdateCallback updateCallback, NodeTraversalCallback traversalCallback, CommitInfo commitInfo, CorruptIndexHandler corruptIndexHandler) { + this(provider, async, root, builder, updateCallback, traversalCallback, commitInfo, corruptIndexHandler, null); + } + + public IndexUpdate( + IndexEditorProvider provider, String async, + NodeState root, NodeBuilder builder, + IndexUpdateCallback updateCallback, NodeTraversalCallback traversalCallback, + CommitInfo commitInfo, CorruptIndexHandler corruptIndexHandler, + @Nullable NodeStore store) { + this.store = store; this.parent = null; this.name = null; this.path = "/"; @@ -158,6 +173,7 @@ public class IndexUpdate implements Editor, PathSource { } private IndexUpdate(IndexUpdate parent, String name) { + this.store = parent.store; this.parent = requireNonNull(parent); this.name = name; this.rootState = parent.rootState; @@ -279,6 +295,11 @@ public class IndexUpdate implements Editor, PathSource { } private void collectIndexEditors(NodeBuilder definitions, NodeState before) throws CommitFailedException { + if (definitions.hasChildNode(DiffIndexMerger.DIFF_INDEX) + && "disabled".equals(definitions.child(DiffIndexMerger.DIFF_INDEX).getString("type")) + && rootState.async == null) { + DiffIndex.applyDiffIndexChanges(store, definitions); + } for (String name : definitions.getChildNodeNames()) { NodeBuilder definition = definitions.getChildNode(name); if (isIncluded(rootState.async, definition)) { diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndex.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndex.java new file mode 100644 index 0000000000..825a23c38e --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndex.java @@ -0,0 +1,246 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.diff; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Comparator; + +import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.commons.PathUtils; +import org.apache.jackrabbit.oak.commons.json.JsonObject; +import org.apache.jackrabbit.oak.plugins.index.IndexConstants; +import org.apache.jackrabbit.oak.plugins.index.IndexName; +import org.apache.jackrabbit.oak.plugins.tree.TreeConstants; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Processing of diff indexes, that is nodes under "/oak:index/diff.index". A + * diff index contains differences to existing indexes, and possibly new + * (custom) indexes in the form of JSON. These changes can then be merged + * (applied) to the index definitions. This allows to simplify index management, + * because it allows to modify (add, update) indexes in a simple way. + */ +public class DiffIndex { + + private static final Logger LOG = LoggerFactory.getLogger(DiffIndex.class); + + /** + * Apply changes to the index definitions. That means merge the index diff with + * the existing indexes, creating new index versions. It might also mean to + * remove old (merged) indexes if the diff no longer contains them. + * + * @param store the node store + * @param indexDefinitions the /oak:index node + */ + public static void applyDiffIndexChanges(NodeStore store, NodeBuilder indexDefinitions) { + JsonObject newImageLuceneDefinitions = null; + for (String diffIndex : new String[] { DiffIndexMerger.DIFF_INDEX, DiffIndexMerger.DIFF_INDEX_OPTIMIZER }) { + if (!indexDefinitions.hasChildNode(diffIndex)) { + continue; + } + NodeBuilder diffIndexDefinition = indexDefinitions.child(diffIndex); + NodeBuilder diffJson = diffIndexDefinition.getChildNode("diff.json"); + if (!diffJson.exists()) { + continue; + } + NodeBuilder jcrContent = diffJson.getChildNode("jcr:content"); + if (!jcrContent.exists()) { + continue; + } + PropertyState lastMod = jcrContent.getProperty("jcr:lastModified"); + if (lastMod == null) { + continue; + } + String modified = lastMod.getValue(Type.DATE); + PropertyState lastProcessed = jcrContent.getProperty(":lastProcessed"); + if (lastProcessed != null) { + if (modified.equals(lastProcessed.getValue(Type.STRING))) { + // already processed + continue; + } + } + // store now, so a change is only processed once + jcrContent.setProperty(":lastProcessed", modified); + PropertyState jcrData = jcrContent.getProperty("jcr:data"); + String diff = tryReadString(jcrData); + if (diff == null) { + continue; + } + try { + JsonObject diffObj = JsonObject.fromJson("{\"diff\": " + diff + "}", true); + diffIndexDefinition.removeProperty("error"); + if (newImageLuceneDefinitions == null) { + newImageLuceneDefinitions = new JsonObject(); + } + newImageLuceneDefinitions.getChildren().put("/oak:index/" + diffIndex, diffObj); + } catch (Exception e) { + String message = "Error parsing diff.index"; + LOG.warn(message + ": {}", e.getMessage(), e); + diffIndexDefinition.setProperty("error", message + ": " + e.getMessage()); + } + } + if (newImageLuceneDefinitions == null) { + // not a valid diff index, or already processed + return; + } + LOG.info("Processing a new diff.index with node store {}", store); + JsonObject repositoryDefinitions = RootIndexesListService.getRootIndexDefinitions(indexDefinitions); + LOG.debug("Index list {}", repositoryDefinitions.toString()); + try { + DiffIndexMerger.merge(newImageLuceneDefinitions, repositoryDefinitions, store); + for (String indexPath : newImageLuceneDefinitions.getChildren().keySet()) { + if (indexPath.startsWith("/oak:index/" + DiffIndexMerger.DIFF_INDEX)) { + continue; + } + JsonObject newDef = newImageLuceneDefinitions.getChildren().get(indexPath); + String indexName = PathUtils.getName(indexPath); + JsonNodeBuilder.addOrReplace(indexDefinitions, store, indexName, IndexConstants.INDEX_DEFINITIONS_NODE_TYPE, newDef.toString()); + updateNodetypeIndexForPath(indexDefinitions, indexName, true); + disableOrRemoveOldVersions(indexDefinitions, indexPath, indexName); + } + removeDisabledMergedIndexes(indexDefinitions); + sortIndexes(indexDefinitions); + } catch (Exception e) { + LOG.warn("Error merging diff.index: {}", e.getMessage(), e); + NodeBuilder diffIndexDefinition = indexDefinitions.child(DiffIndexMerger.DIFF_INDEX); + diffIndexDefinition.setProperty("error", e.getMessage()); + } + } + + /** + * Try to read a text from the (binary) jcr:data property. Edge cases such as + * "property does not exist" and IO exceptions (blob not found) do not throw an + * exception (IO exceptions are logged). + * + * @param jcrData the "jcr:data" property + * @return the string, or null if reading fails + */ + public static String tryReadString(PropertyState jcrData) { + if (jcrData == null) { + return null; + } + InputStream in = jcrData.getValue(Type.BINARY).getNewStream(); + try { + return new String(in.readAllBytes(), StandardCharsets.UTF_8); + } catch (IOException e) { + LOG.warn("Can not read jcr:data", e); + return null; + } + } + + private static void sortIndexes(NodeBuilder builder) { + ArrayList<String> list = new ArrayList<>(); + for (String child : builder.getChildNodeNames()) { + list.add(child); + } + list.sort(Comparator.naturalOrder()); + builder.setProperty(TreeConstants.OAK_CHILD_ORDER, list, Type.NAMES); + } + + private static void removeDisabledMergedIndexes(NodeBuilder definitions) { + ArrayList<String> toRemove = new ArrayList<>(); + for (String child : definitions.getChildNodeNames()) { + if (!definitions.getChildNode(child).hasProperty("mergeChecksum")) { + continue; + } + if ("disabled".equals(definitions.getChildNode(child).getString("type"))) { + toRemove.add(child); + } + } + for (String r : toRemove) { + LOG.info("Removing disabled index {}", r); + definitions.child(r).remove(); + updateNodetypeIndexForPath(definitions, r, false); + } + } + + /** + * Try to remove or disable old version of merged indexes, if there are any. + * + * @param definitions the builder for /oak:index + * @param indexPath the path + * @param keep which index name (which version) to retain + */ + private static void disableOrRemoveOldVersions(NodeBuilder definitions, String indexPath, String keep) { + String indexName = indexPath; + if (indexPath.startsWith("/oak:index/")) { + indexName = indexPath.substring("/oak:index/".length()); + } + String baseName = IndexName.parse(indexName).getBaseName(); + ArrayList<String> toRemove = new ArrayList<>(); + for (String child : definitions.getChildNodeNames()) { + if (child.equals(keep) || child.indexOf("-custom-") < 0) { + // the one to keep, or not a customized or custom index + continue; + } + String childBaseName = IndexName.parse(child).getBaseName(); + if (baseName.equals(childBaseName)) { + if (indexName.equals(child)) { + if (!"disabled".equals(definitions.getChildNode(indexName).getString("type"))) { + continue; + } + } + toRemove.add(child); + } + } + for (String r : toRemove) { + LOG.info("Removing old index " + r); + definitions.child(r).remove(); + updateNodetypeIndexForPath(definitions, r, false); + } + } + + private static void updateNodetypeIndexForPath(NodeBuilder indexDefinitions, + String indexName, boolean add) { + LOG.info("nodetype index update add={} name={}", add, indexName); + if (!indexDefinitions.hasChildNode("nodetype")) { + return; + } + NodeBuilder nodetypeIndex = indexDefinitions.getChildNode("nodetype"); + NodeBuilder indexContent = nodetypeIndex.child(":index"); + String key = URLEncoder.encode("oak:QueryIndexDefinition", StandardCharsets.UTF_8); + String path = "/oak:index/" + indexName; + if (add) { + // insert entry + NodeBuilder builder = indexContent.child(key); + for (String name : PathUtils.elements(path)) { + builder = builder.child(name); + } + LOG.info("nodetype index match"); + builder.setProperty("match", true); + } else { + // remove entry (for deleted indexes) + NodeBuilder builder = indexContent.getChildNode(key); + for (String name : PathUtils.elements(path)) { + builder = builder.getChildNode(name); + } + if (builder.exists()) { + LOG.info("nodetype index remove"); + builder.removeProperty("match"); + } + } + } + +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexMerger.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexMerger.java new file mode 100644 index 0000000000..b5c4527976 --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexMerger.java @@ -0,0 +1,801 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.diff; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; + +import org.apache.jackrabbit.oak.commons.StringUtils; +import org.apache.jackrabbit.oak.commons.json.JsonObject; +import org.apache.jackrabbit.oak.commons.json.JsopBuilder; +import org.apache.jackrabbit.oak.commons.json.JsopTokenizer; +import org.apache.jackrabbit.oak.json.Base64BlobSerializer; +import org.apache.jackrabbit.oak.json.JsonSerializer; +import org.apache.jackrabbit.oak.plugins.index.IndexName; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.jackrabbit.oak.spi.state.NodeStateUtils; +import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Index definition merge utility that uses the "diff" mode. + */ +public class DiffIndexMerger { + + final static Logger LOG = LoggerFactory.getLogger(DiffIndexMerger.class); + + public final static String DIFF_INDEX = "diff.index"; + public final static String DIFF_INDEX_OPTIMIZER = "diff.index.optimizer"; + + private final static String MERGE_INFO = "This index was auto-merged. See also https://oak-indexing.github.io/oakTools/simplified.html"; + + // the list of unsupported included paths, e.g. "/apps,/libs" + // by default all paths are supported + private final static String[] UNSUPPORTED_INCLUDED_PATHS = System.getProperty("oak.diffIndex.unsupportedPaths", "").split(","); + + // in case a custom index is removed, whether a dummy index is created + private final static boolean DELETE_CREATES_DUMMY = Boolean.getBoolean("oak.diffIndex.deleteCreatesDummy"); + + // in case a customization was removed, create a copy of the OOTB index + private final static boolean DELETE_COPIES_OOTB = Boolean.getBoolean("oak.diffIndex.deleteCopiesOOTB"); + + /** + * If there is a diff index, that is an index with prefix "diff.", then try to merge it. + * + * @param newImageLuceneDefinitions + * the new indexes + * (input and output) + * @param repositoryDefinitions + * the indexes in the writable repository + * (input) + * @param repositoryNodeStore + */ + public static void merge(JsonObject newImageLuceneDefinitions, JsonObject repositoryDefinitions, NodeStore repositoryNodeStore) { + // combine all definitions into one object + JsonObject combined = new JsonObject(); + + // index definitions in the repository + combined.getChildren().putAll(repositoryDefinitions.getChildren()); + + // read the diff.index.optimizer explicitly, + // because it's a not a regular index definition, + // and so in the repositoryDefinitions + if (repositoryNodeStore != null) { + Map<String, JsonObject> diffInRepo = readDiffIndex(repositoryNodeStore, DIFF_INDEX_OPTIMIZER); + combined.getChildren().putAll(diffInRepo); + } + + // overwrite with the provided definitions (if any) + combined.getChildren().putAll(newImageLuceneDefinitions.getChildren()); + + // check if there "diff.index" or "diff.index.optimizer" + boolean found = combined.getChildren().containsKey("/oak:index/" + DIFF_INDEX) + || combined.getChildren().containsKey("/oak:index/" + DIFF_INDEX_OPTIMIZER); + if (!found) { + // early exit, so that the risk of merging the PR + // is very small for customers that do not use this + LOG.debug("No 'diff.index' definition"); + return; + } + mergeDiff(newImageLuceneDefinitions, combined); + } + + /** + * If there is a diff index (hardcoded node "/oak:index/diff.index" or + * "/oak:index/diff.index.optimizer"), then iterate over all entries and create new + * (merged) versions if needed. + * + * @param newImageLuceneDefinitions + * the new Lucene definitions + * (input + output) + * @param combined + * the definitions in the repository, + * including the one in the customer repo and new ones + * (input) + * @return whether a new version of an index was added + */ + static boolean mergeDiff(JsonObject newImageLuceneDefinitions, JsonObject combined) { + // iterate again, this time process + + // collect the diff index(es) + HashMap<String, JsonObject> toProcess = new HashMap<>(); + tryExtractDiffIndex(combined, "/oak:index/" + DIFF_INDEX, toProcess); + tryExtractDiffIndex(combined, "/oak:index/" + DIFF_INDEX_OPTIMIZER, toProcess); + // if the diff index exists, but doesn't contain some of the previous indexes + // (indexes with mergeInfo), then we need to disable those (using /dummy includedPath) + extractExistingMergedIndexes(combined, toProcess); + if (toProcess.isEmpty()) { + LOG.debug("No diff index definitions found."); + return false; + } + boolean hasChanges = false; + for (Entry<String, JsonObject> e : toProcess.entrySet()) { + String key = e.getKey(); + JsonObject value = e.getValue(); + if (key.startsWith("/oak:index/")) { + LOG.warn("The key should contains just the index name, without the '/oak:index' prefix for key {}", key); + key = key.substring("/oak:index/".length()); + } + LOG.debug("Processing {}", key); + hasChanges |= processMerge(key, value, newImageLuceneDefinitions, combined); + } + return hasChanges; + } + + /** + * Extract a "diff.index" from the set of index definitions (if found), and if + * found, store the nested entries in the target map, merging them with previous + * entries if found. + * + * The diff.index may either have a file (a "jcr:content" child node with a + * "jcr:data" property), or a "diff" JSON object. For customers (in the git + * repository), the file is much easier to construct, but when running the + * indexing job, the nested JSON is much easier. + * + * @param indexDefs the set of index definitions (may be empty) + * @param name the name of the diff.index (either diff.index or + * diff.index.optimizer) + * @param target the target map of diff.index definitions + * @return the error message trying to parse the JSON file, or null + */ + static String tryExtractDiffIndex(JsonObject indexDefs, String name, HashMap<String, JsonObject> target) { + JsonObject diffIndex = indexDefs.getChildren().get(name); + if (diffIndex == null) { + return null; + } + // extract either the file, or the nested json + JsonObject file = diffIndex.getChildren().get("diff.json"); + JsonObject diff; + if (file != null) { + // file + JsonObject jcrContent = file.getChildren().get("jcr:content"); + if (jcrContent == null) { + String message = "jcr:content child node is missing in diff.json"; + LOG.warn(message); + return message; + } + String jcrData = JsonNodeBuilder.oakStringValue(jcrContent, "jcr:data"); + try { + diff = JsonObject.fromJson(jcrData, true); + } catch (Exception e) { + LOG.warn("Illegal Json, ignoring: {}", jcrData, e); + String message = "Illegal Json, ignoring: " + e.getMessage(); + return message; + } + } else { + // nested json + diff = diffIndex.getChildren().get("diff"); + } + // store, if not empty + if (diff != null) { + for (Entry<String, JsonObject> e : diff.getChildren().entrySet()) { + String key = e.getKey(); + target.put(key, mergeDiffs(target.get(key), e.getValue())); + } + } + return null; + } + + /** + * Extract the indexes with a "mergeInfo" property and store them in the target + * object. This is needed so that indexes that were removed from the index.diff + * are detected (a new version is needed in this case with includedPaths + * "/dummy"). + * + * @param indexDefs the index definitions in the repository + * @param target the target map of "diff.index" definitions. for each entry + * found, an empty object is added + */ + private static void extractExistingMergedIndexes(JsonObject indexDefs, HashMap<String, JsonObject> target) { + for (Entry<String, JsonObject> e : indexDefs.getChildren().entrySet()) { + String key = e.getKey(); + JsonObject value = e.getValue(); + if (key.indexOf("-custom-") < 0 || !value.getProperties().containsKey("mergeInfo")) { + continue; + } + String baseName = IndexName.parse(key.substring("/oak:index/".length())).getBaseName(); + if (!target.containsKey(baseName)) { + // if there is no entry yet for this key, + // add a new empty object + target.put(baseName, new JsonObject()); + } + } + } + + /** + * Merge diff from "diff.index" and "diff.index.optimizer". + * The customer can define a diff (stored in "diff.index") + * and someone else (or the optimizer) can define one (stored in "diff.index.optimizer"). + * + * @param a the first diff + * @param b the second diff (overwrites entries in a) + * @return the merged entry + */ + private static JsonObject mergeDiffs(JsonObject a, JsonObject b) { + if (a == null) { + return b; + } else if (b == null) { + return a; + } + JsonObject result = JsonObject.fromJson(a.toString(), true); + result.getProperties().putAll(b.getProperties()); + HashSet<String> both = new HashSet<>(a.getChildren().keySet()); + both.addAll(b.getChildren().keySet()); + for (String k : both) { + result.getChildren().put(k, mergeDiffs(a.getChildren().get(k), b.getChildren().get(k))); + } + return result; + } + + /** + * Merge using the diff definition. + * + * If the latest customized index already matches, then + * newImageLuceneDefinitions will remain as is. Otherwise, a new customized + * index is added, with a "mergeInfo" property. + * + * Existing properties are never changed; only new properties/children are + * added. + * + * @param indexName the name, eg. "damAssetLucene" + * @param indexDiff the diff with the new properties + * @param newImageLuceneDefinitions the new Lucene definitions (input + output) + * @param combined the definitions in the repository, including + * the one in the customer repo and new ones + * (input) + * @return whether a new version of an index was added + */ + public static boolean processMerge(String indexName, JsonObject indexDiff, JsonObject newImageLuceneDefinitions, JsonObject combined) { + // extract the latest product index (eg. damAssetLucene-12) + // and customized index (eg. damAssetLucene-12-custom-3) - if any + IndexName latestProduct = null; + String latestProductKey = null; + IndexName latestCustomized = null; + String latestCustomizedKey = null; + String prefix = "/oak:index/"; + for (String key : combined.getChildren().keySet()) { + IndexName name = IndexName.parse(key.substring(prefix.length())); + if (!name.isVersioned()) { + LOG.debug("Ignoring unversioned index {}", name); + continue; + } + if (!name.getBaseName().equals(indexName)) { + continue; + } + boolean isCustom = key.indexOf("-custom-") >= 0; + if (isCustom) { + if (latestCustomized == null || + name.compareTo(latestCustomized) > 0) { + latestCustomized = name; + latestCustomizedKey = key; + } + } else { + if (latestProduct == null || + name.compareTo(latestProduct) > 0) { + latestProduct = name; + latestProductKey = key; + } + } + } + LOG.debug("Latest product: {}", latestProductKey); + LOG.debug("Latest customized: {}", latestCustomizedKey); + if (latestProduct == null) { + if (indexName.indexOf('.') >= 0) { + // a fully custom index needs to contains a dot + LOG.debug("Fully custom index {}", indexName); + } else { + LOG.debug("No product version for {}", indexName); + return false; + } + } + JsonObject latestProductIndex = combined.getChildren().get(latestProductKey); + String[] includedPaths; + if (latestProductIndex == null) { + if (indexDiff.getProperties().isEmpty() && indexDiff.getChildren().isEmpty()) { + // there is no customization (any more), which means a dummy index may be needed + LOG.debug("No customization for {}", indexName); + } else { + includedPaths = JsonNodeBuilder.oakStringArrayValue(indexDiff, "includedPaths"); + if (includesUnsupportedPaths(includedPaths)) { + LOG.warn("New custom index {} is not supported because it contains an unsupported path ({})", + indexName, Arrays.toString(UNSUPPORTED_INCLUDED_PATHS)); + return false; + } + } + } else { + includedPaths = JsonNodeBuilder.oakStringArrayValue(latestProductIndex, "includedPaths"); + if (includesUnsupportedPaths(includedPaths)) { + LOG.warn("Customizing index {} is not supported because it contains an unsupported path ({})", + latestProductKey, Arrays.toString(UNSUPPORTED_INCLUDED_PATHS)); + return false; + } + } + + // merge + JsonObject merged = null; + if (indexDiff == null) { + // no diff definition: use to the OOTB index + if (latestCustomized == null) { + LOG.debug("Only a product index found, nothing to do"); + return false; + } + merged = latestProductIndex; + } else { + merged = processMerge(latestProductIndex, indexDiff); + } + + // compare to the latest version of the this index + JsonObject latestIndexVersion = new JsonObject(); + if (latestCustomized == null) { + latestIndexVersion = latestProductIndex; + } else { + latestIndexVersion = combined.getChildren().get(latestCustomizedKey); + } + JsonObject mergedDef = cleanedAndNormalized(switchToLucene(merged)); + // compute merge checksum for later, but do not yet add + String mergeChecksum = computeMergeChecksum(mergedDef); + // get the merge checksum before cleaning (cleaning removes it) - if available + String key; + if (latestIndexVersion == null) { + // new index + key = prefix + indexName + "-1-custom-1"; + } else { + String latestMergeChecksum = JsonNodeBuilder.oakStringValue(latestIndexVersion, "mergeChecksum"); + JsonObject latestDef = cleanedAndNormalized(switchToLucene(latestIndexVersion)); + if (isSameIgnorePropertyOrder(mergedDef, latestDef)) { + // normal case: no change + // (even if checksums do not match: checksums might be missing or manipulated) + LOG.debug("Latest index matches"); + if (latestMergeChecksum != null && !latestMergeChecksum.equals(mergeChecksum)) { + LOG.warn("Indexes do match, but checksums do not. Possibly checksum was changed: {} vs {}", latestMergeChecksum, mergeChecksum); + LOG.warn("latest: {}\nmerged: {}", latestDef, mergedDef); + } + return false; + } + if (latestMergeChecksum != null && latestMergeChecksum.equals(mergeChecksum)) { + // checksum matches, but data does not match + // could be eg. due to numbers formatting issues (-0.0 vs 0.0, 0.001 vs 1e-3) + // but unexpected because we do not normally have such cases + LOG.warn("Indexes do not match, but checksums match. Possible normalization issue."); + LOG.warn("Index: {}, latest: {}\nmerged: {}", indexName, latestDef, mergedDef); + // if checksums match, we consider it a match + return false; + } + LOG.info("Indexes do not match, with"); + LOG.info("Index: {}, latest: {}\nmerged: {}", indexName, latestDef, mergedDef); + // a new merged index definition + if (latestProduct == null) { + // fully custom index: increment version + key = prefix + indexName + + "-" + latestCustomized.getProductVersion() + + "-custom-" + (latestCustomized.getCustomerVersion() + 1); + } else { + // customized OOTB index: use the latest product as the base + key = prefix + indexName + + "-" + latestProduct.getProductVersion() + + "-custom-"; + if (latestCustomized != null) { + key += (latestCustomized.getCustomerVersion() + 1); + } else { + key += "1"; + } + } + } + merged.getProperties().put("mergeInfo", JsopBuilder.encode(MERGE_INFO)); + merged.getProperties().put("mergeChecksum", JsopBuilder.encode(mergeChecksum)); + merged.getProperties().put("merges", "[" + JsopBuilder.encode("/oak:index/" + indexName) + "]"); + merged.getProperties().remove("reindexCount"); + merged.getProperties().remove("reindex"); + if (!DELETE_COPIES_OOTB && indexDiff.toString().equals("{}")) { + merged.getProperties().put("type", "\"disabled\""); + merged.getProperties().put("mergeComment", "\"This index is superseeded and can be removed\""); + } + newImageLuceneDefinitions.getChildren().put(key, merged); + return true; + } + + /** + * Check whether the includedPaths covers unsupported paths, + * if there are any unsupported path (eg. "/apps" or "/libs"). + * In this case, simplified index management is not supported. + * + * @param includedPaths the includedPaths list + * @return true if any unsupported path is included + */ + public static boolean includesUnsupportedPaths(String[] includedPaths) { + if (UNSUPPORTED_INCLUDED_PATHS.length == 1 && "".equals(UNSUPPORTED_INCLUDED_PATHS[0])) { + // set to an empty string + return false; + } + if (includedPaths == null) { + // not set means all entries + return true; + } + for (String path : includedPaths) { + if ("/".equals(path)) { + // all + return true; + } + for (String unsupported : UNSUPPORTED_INCLUDED_PATHS) { + if (unsupported.isEmpty()) { + continue; + } + if (path.equals(unsupported) || path.startsWith(unsupported + "/")) { + // includedPaths matches, or starts with an unsupported path + return true; + } + } + } + return false; + } + + /** + * Compute the SHA-256 checksum of the JSON object. This is useful to detect + * that the JSON object was not "significantly" changed, even if stored + * somewhere and later read again. Insignificant changes include: rounding of + * floating point numbers, re-ordering properties, things like that. Without the + * checksum, we would risk creating a new version of a customized index each + * time the indexing job is run, even thought the customer didn't change + * anything. + * + * @param json the input + * @return the SHA-256 checksum + */ + private static String computeMergeChecksum(JsonObject json) { + byte[] bytes = json.toString().getBytes(StandardCharsets.UTF_8); + try { + MessageDigest md = MessageDigest.getInstance("SHA-256"); + return StringUtils.convertBytesToHex(md.digest(bytes)); + } catch (NoSuchAlgorithmException e) { + // SHA-256 is guaranteed to be available in standard Java platforms + throw new RuntimeException("SHA-256 algorithm not available", e); + } + } + + /** + * Switch the index from type "elasticsearch" to "lucene", if needed. This will + * also replace all properties that have an "...@lucene" version. + * + * This is needed because we want to merge only the "lucene" version, to + * simplify the merging logic. (The switch to the "elasticsearch" version + * happens later). + * + * @param indexDef the index definition (is not changed by this method) + * @return the lucene version (a new JSON object) + */ + public static JsonObject switchToLucene(JsonObject indexDef) { + JsonObject obj = JsonObject.fromJson(indexDef.toString(), true); + String type = JsonNodeBuilder.oakStringValue(obj, "type"); + if (type == null || !"elasticsearch".equals(type) ) { + return obj; + } + switchToLuceneChildren(obj); + return obj; + } + + private static void switchToLuceneChildren(JsonObject indexDef) { + // clone the keys to avoid ConcurrentModificationException + for (String p : new ArrayList<>(indexDef.getProperties().keySet())) { + if (!p.endsWith("@lucene")) { + continue; + } + String v = indexDef.getProperties().remove(p); + indexDef.getProperties().put(p.substring(0, p.length() - "@lucene".length()), v); + } + for (String c : indexDef.getChildren().keySet()) { + JsonObject co = indexDef.getChildren().get(c); + switchToLuceneChildren(co); + } + } + + /** + * Convert the JSON object to a new object, where index definition + * properties that are unimportant for comparison are removed. + * Example of important properties are "reindex", "refresh", "seed" etc. + * The order of properties is not relevant (but the order of children is). + * + * @param obj the input (is not changed by the method) + * @return a new JSON object + */ + public static JsonObject cleanedAndNormalized(JsonObject obj) { + obj = JsonObject.fromJson(obj.toString(), true); + obj.getProperties().remove(":version"); + obj.getProperties().remove(":nameSeed"); + obj.getProperties().remove(":mappingVersion"); + obj.getProperties().remove("refresh"); + obj.getProperties().remove("reindexCount"); + obj.getProperties().remove("reindex"); + obj.getProperties().remove("seed"); + obj.getProperties().remove("merges"); + obj.getProperties().remove("mergeInfo"); + obj.getProperties().remove("mergeChecksum"); + for (String p : new ArrayList<>(obj.getProperties().keySet())) { + if (p.endsWith("@lucene")) { + obj.getProperties().remove(p); + } else if (p.endsWith("@elasticsearch")) { + obj.getProperties().remove(p); + } else { + // remove "str:", "nam:", etc if needed + String v = obj.getProperties().get(p); + String v2 = normalizeOakString(v); + if (!v2.equals(v)) { + obj.getProperties().put(p, v2); + } + } + } + removeUUIDs(obj); + for (Entry<String, JsonObject> e : obj.getChildren().entrySet()) { + obj.getChildren().put(e.getKey(), cleanedAndNormalized(e.getValue())); + } + // re-build the properties in alphabetical order + // (sorting the child nodes would be incorrect however, as order is significant here) + TreeMap<String, String> props = new TreeMap<>(obj.getProperties()); + obj.getProperties().clear(); + for (Entry<String, String> e : props.entrySet()) { + obj.getProperties().put(e.getKey(), e.getValue()); + } + return obj; + } + + /** + * "Normalize" a JSON string value. Remove any "nam:" and "dat:" and "str:" + * prefix in the value, because customers won't use them normally. (We want the + * diff to be as simple as possible). + * + * @param value the value (including double quotes; eg. "str:value") + * @return the normalized value (including double quotes) + */ + private static String normalizeOakString(String value) { + if (value == null || !value.startsWith("\"")) { + // ignore numbers + return value; + } + value = JsopTokenizer.decodeQuoted(value); + if (value.startsWith("str:") || value.startsWith("nam:") || value.startsWith("dat:")) { + value = value.substring("str:".length()); + } + return JsopBuilder.encode(value); + } + + /** + * Remove all "jcr:uuid" properties (including those in children), because the + * values might conflict. (new uuids are added later when needed). + * + * @param obj the JSON object where uuids will be removed. + */ + private static void removeUUIDs(JsonObject obj) { + obj.getProperties().remove("jcr:uuid"); + for (JsonObject c : obj.getChildren().values()) { + removeUUIDs(c); + } + } + + /** + * Merge a product index with a diff. If the product index is null, then the + * diff needs to contain a complete custom index definition. + * + * @param productIndex the product index definition, or null if none + * @param diff the diff (from the diff.index definition) + * @return the index definition of the merged index + */ + public static JsonObject processMerge(JsonObject productIndex, JsonObject diff) { + JsonObject result; + if (productIndex == null) { + // fully custom index + result = new JsonObject(true); + } else { + result = JsonObject.fromJson(productIndex.toString(), true); + } + mergeInto("", diff, result); + addPrimaryType("", result); + return result; + } + + /** + * Add primary type properties where needed. For the top-level index definition, + * this is "oak:QueryIndexDefinition", and "nt:unstructured" elsewhere. + * + * @param path the path (so we can call the method recursively) + * @param json the JSON object (is changed if needed) + */ + private static void addPrimaryType(String path, JsonObject json) { + // all nodes need to have a node type; + // the index definition itself (at root level) is "oak:QueryIndexDefinition", + // and all other nodes are "nt:unstructured" + if (!json.getProperties().containsKey("jcr:primaryType")) { + // all nodes need to have a primary type, + // otherwise index import will fail + String nodeType; + if (path.isEmpty()) { + nodeType = "oak:QueryIndexDefinition"; + } else { + nodeType = "nt:unstructured"; + } + String nodeTypeValue = "nam:" + nodeType; + json.getProperties().put("jcr:primaryType", JsopBuilder.encode(nodeTypeValue)); + } + for (Entry<String, JsonObject> e : json.getChildren().entrySet()) { + addPrimaryType(path + "/" + e.getKey(), e.getValue()); + } + } + + /** + * Merge a JSON diff into a target index definition. + * + * @param path the path + * @param diff the diff (what to merge) + * @param target where to merge into + */ + private static void mergeInto(String path, JsonObject diff, JsonObject target) { + for (String p : diff.getProperties().keySet()) { + if (path.isEmpty()) { + if ("jcr:primaryType".equals(p)) { + continue; + } + } + if (target.getProperties().containsKey(p)) { + // we do not currently allow to overwrite most existing properties + if (p.equals("boost")) { + // allow overwriting the boost value + LOG.info("Overwrite property {} value at {}", p, path); + target.getProperties().put(p, diff.getProperties().get(p)); + } else { + LOG.warn("Ignoring existing property {} at {}", p, path); + } + } else { + target.getProperties().put(p, diff.getProperties().get(p)); + } + } + for (String c : diff.getChildren().keySet()) { + String targetChildName = c; + if (!target.getChildren().containsKey(c)) { + if (path.endsWith("/properties")) { + // search for a property with the same "name" value + String propertyName = diff.getChildren().get(c).getProperties().get("name"); + if (propertyName != null) { + propertyName = JsonNodeBuilder.oakStringValue(propertyName); + String c2 = getChildWithKeyValuePair(target, "name", propertyName); + if (c2 != null) { + targetChildName = c2; + } + } + // search for a property with the same "function" value + String function = diff.getChildren().get(c).getProperties().get("function"); + if (function != null) { + function = JsonNodeBuilder.oakStringValue(function); + String c2 = getChildWithKeyValuePair(target, "function", function); + if (c2 != null) { + targetChildName = c2; + } + } + } + if (targetChildName.equals(c)) { + // only create the child (properties are added below) + target.getChildren().put(c, new JsonObject()); + } + } + mergeInto(path + "/" + targetChildName, diff.getChildren().get(c), target.getChildren().get(targetChildName)); + } + if (target.getProperties().isEmpty() && target.getChildren().isEmpty()) { + if (DELETE_CREATES_DUMMY) { + // dummy index + target.getProperties().put("async", "\"async\""); + target.getProperties().put("includedPaths", "\"/dummy\""); + target.getProperties().put("queryPaths", "\"/dummy\""); + target.getProperties().put("type", "\"lucene\""); + JsopBuilder buff = new JsopBuilder(); + buff.object(). + key("properties").object(). + key("dummy").object(). + key("name").value("dummy"). + key("propertyIndex").value(true). + endObject(). + endObject(). + endObject(); + JsonObject indexRules = JsonObject.fromJson(buff.toString(), true); + target.getChildren().put("indexRules", indexRules); + } else { + target.getProperties().put("type", "\"disabled\""); + } + } + } + + public static String getChildWithKeyValuePair(JsonObject obj, String key, String value) { + for(Entry<String, JsonObject> c : obj.getChildren().entrySet()) { + String v2 = c.getValue().getProperties().get(key); + if (v2 == null) { + continue; + } + v2 = JsonNodeBuilder.oakStringValue(v2); + if (value.equals(v2)) { + return c.getKey(); + } + } + return null; + } + + /** + * Compare two JSON object, ignoring the order of properties. (The order of + * children is however significant). + * + * This is done in addition to the checksum comparison, because the in theory + * the customer might change the checksum (it is not read-only as read-only + * values are not supported). We do not rely on the comparison, but if comparison + * and checksum comparison do not match, we log a warning. + * + * @param a the first object + * @param b the second object + * @return true if the keys and values are equal + */ + public static boolean isSameIgnorePropertyOrder(JsonObject a, JsonObject b) { + if (!a.getChildren().keySet().equals(b.getChildren().keySet())) { + LOG.debug("Child (order) difference: {} vs {}", + a.getChildren().keySet(), b.getChildren().keySet()); + return false; + } + for (String k : a.getChildren().keySet()) { + if (!isSameIgnorePropertyOrder( + a.getChildren().get(k), b.getChildren().get(k))) { + return false; + } + } + TreeMap<String, String> pa = new TreeMap<>(a.getProperties()); + TreeMap<String, String> pb = new TreeMap<>(b.getProperties()); + if (!pa.toString().equals(pb.toString())) { + LOG.debug("Property value difference: {} vs {}", pa.toString(), pb.toString()); + } + return pa.toString().equals(pb.toString()); + } + + /** + * Read a diff.index from the repository, if it exists. + * This is needed because the build-transform job doesn't have this + * data: it is only available in the writeable repository. + * + * @param repositoryNodeStore the node store + * @return a map, possibly with a single entry with this key + */ + static Map<String, JsonObject> readDiffIndex(NodeStore repositoryNodeStore, String name) { + HashMap<String, JsonObject> map = new HashMap<>(); + NodeState root = repositoryNodeStore.getRoot(); + String indexPath = "/oak:index/" + name; + NodeState idxState = NodeStateUtils.getNode(root, indexPath); + LOG.debug("Searching index {}: found={}", indexPath, idxState.exists()); + if (!idxState.exists()) { + return map; + } + JsopBuilder builder = new JsopBuilder(); + String filter = "{\"properties\":[\"*\", \"-:childOrder\"],\"nodes\":[\"*\", \"-:*\"]}"; + JsonSerializer serializer = new JsonSerializer(builder, filter, new Base64BlobSerializer()); + serializer.serialize(idxState); + JsonObject jsonObj = JsonObject.fromJson(builder.toString(), true); + jsonObj = cleanedAndNormalized(jsonObj); + LOG.debug("Found {}", jsonObj.toString()); + map.put(indexPath, jsonObj); + return map; + } + +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeBuilder.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeBuilder.java new file mode 100644 index 0000000000..2f290748a9 --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeBuilder.java @@ -0,0 +1,279 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.diff; + +import java.util.Map.Entry; +import java.util.TreeSet; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Base64; +import java.util.UUID; + +import org.apache.jackrabbit.oak.api.Blob; +import org.apache.jackrabbit.oak.api.CommitFailedException; +import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.commons.PathUtils; +import org.apache.jackrabbit.oak.commons.json.JsonObject; +import org.apache.jackrabbit.oak.commons.json.JsopReader; +import org.apache.jackrabbit.oak.commons.json.JsopTokenizer; +import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore; +import org.apache.jackrabbit.oak.plugins.tree.TreeConstants; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A utility class to persist a configuration that is in the form of JSON into + * the node store. + * + * This is used to persist a small set of configuration nodes, eg. index + * definitions, using a simple JSON format. + * + * The node type does not need to be set on a per-node basis. Where it is + * missing, the provided node type is used (e.g. "nt:unstructured") + * + * A "jcr:uuid" is automatically added for nodes of type "nt:resource". + * + * String, string arrays, boolean, blob, long, and double values are supported. + * Values that start with ":blobId:...base64..." are stored as binaries. "str:", + * "nam:" and "dat:" prefixes are removed. + * + * "null" entries are not supported. + */ +public class JsonNodeBuilder { + + private static final Logger LOG = LoggerFactory.getLogger(JsonNodeBuilder.class); + + /** + * Add a replace a node, including all child nodes, in the node store. + * + * @param nodeStore the target node store + * @param targetPath the target path where the node(s) is/are replaced + * @param nodeType the node type of the new node (eg. "nt:unstructured") + * @param jsonString the json string with the node data + * @throws CommitFailedException if storing the nodes failed + * @throws IOException if storing a blob failed + */ + public static void addOrReplace(NodeBuilder builder, NodeStore nodeStore, String targetPath, String nodeType, String jsonString) throws CommitFailedException, IOException { + LOG.info("Storing {}: {}", targetPath, jsonString); + if (nodeType.indexOf("/") >= 0) { + throw new IllegalStateException("Illegal node type: " + nodeType); + } + JsonObject json = JsonObject.fromJson(jsonString, true); + for (String name : PathUtils.elements(targetPath)) { + NodeBuilder child = builder.child(name); + if (!child.hasProperty("jcr:primaryType")) { + child.setProperty("jcr:primaryType", nodeType, Type.NAME); + } + builder = child; + } + storeConfigNode(nodeStore, builder, nodeType, json); + } + + private static void storeConfigNode(NodeStore nodeStore, NodeBuilder builder, String nodeType, JsonObject json) throws IOException { + ArrayList<String> childOrder = new ArrayList<>(); + for (Entry<String, JsonObject> e : json.getChildren().entrySet()) { + String k = e.getKey(); + childOrder.add(k); + JsonObject v = e.getValue(); + storeConfigNode(nodeStore, builder.child(k), nodeType, v); + } + for (String child : builder.getChildNodeNames()) { + if (!json.getChildren().containsKey(child)) { + builder.child(child).remove(); + } + } + for (Entry<String, String> e : json.getProperties().entrySet()) { + String k = e.getKey(); + String v = e.getValue(); + storeConfigProperty(nodeStore, builder, k, v); + } + if (!json.getProperties().containsKey("jcr:primaryType")) { + builder.setProperty("jcr:primaryType", nodeType, Type.NAME); + } + for (PropertyState prop : builder.getProperties()) { + if ("jcr:primaryType".equals(prop.getName())) { + continue; + } + if (!json.getProperties().containsKey(prop.getName())) { + builder.removeProperty(prop.getName()); + } + } + builder.setProperty(TreeConstants.OAK_CHILD_ORDER, childOrder, Type.NAMES); + if ("nt:resource".equals(JsonNodeBuilder.oakStringValue(json, "jcr:primaryType"))) { + if (!json.getProperties().containsKey("jcr:uuid")) { + String uuid = UUID.randomUUID().toString(); + builder.setProperty("jcr:uuid", uuid); + } + } + } + + private static void storeConfigProperty(NodeStore nodeStore, NodeBuilder builder, String propertyName, String value) throws IOException { + if (value.startsWith("\"")) { + // string or blob + value = JsopTokenizer.decodeQuoted(value); + if (value.startsWith(":blobId:")) { + String base64 = value.substring(":blobId:".length()); + byte[] bytes = Base64.getDecoder().decode(base64.getBytes(StandardCharsets.UTF_8)); + if (nodeStore == null) { + MemoryNodeStore mns = new MemoryNodeStore(); + Blob blob = mns.createBlob(new ByteArrayInputStream(bytes)); + builder.setProperty(propertyName, blob); + } else { + Blob blob = nodeStore.createBlob(new ByteArrayInputStream(bytes)); + builder.setProperty(propertyName, blob); + } + } else { + if (value.startsWith("str:") || value.startsWith("nam:") || value.startsWith("dat:")) { + value = value.substring("str:".length()); + } + if ("jcr:primaryType".equals(propertyName)) { + builder.setProperty(propertyName, value, Type.NAME); + } else { + builder.setProperty(propertyName, value); + } + } + } else if ("null".equals(value)) { + throw new IllegalArgumentException("Removing entries is not supported for property " + propertyName); + } else if ("true".equals(value)) { + builder.setProperty(propertyName, true); + } else if ("false".equals(value)) { + builder.setProperty(propertyName, false); + } else if (value.startsWith("[")) { + JsopTokenizer tokenizer = new JsopTokenizer(value); + ArrayList<String> result = new ArrayList<>(); + tokenizer.matches('['); + if (!tokenizer.matches(']')) { + do { + if (!tokenizer.matches(JsopReader.STRING)) { + throw new IllegalArgumentException("Could not process string array " + value + " for property " + propertyName); + } + result.add(tokenizer.getToken()); + } while (tokenizer.matches(',')); + tokenizer.read(']'); + } + tokenizer.read(JsopReader.END); + builder.setProperty(propertyName, result, Type.STRINGS); + } else if (value.indexOf('.') >= 0 || value.toLowerCase().indexOf("e") >= 0) { + // double + try { + Double d = Double.parseDouble(value); + builder.setProperty(propertyName, d); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Could not parse double " + value + " for property " + propertyName); + } + } else if (value.startsWith("-") || (!value.isEmpty() && Character.isDigit(value.charAt(0)))) { + // long + try { + Long x = Long.parseLong(value); + builder.setProperty(propertyName, x); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Could not parse long " + value + " for property " + propertyName); + } + } else { + throw new IllegalArgumentException("Unsupported value " + value + " for property " + propertyName); + } + } + + public static String oakStringValue(JsonObject json, String propertyName) { + String value = json.getProperties().get(propertyName); + if (value == null) { + return null; + } + return oakStringValue(value); + } + + public static String oakStringValue(String value) { + if (!value.startsWith("\"")) { + // support numbers + return value; + } + value = JsopTokenizer.decodeQuoted(value); + if (value.startsWith(":blobId:")) { + value = value.substring(":blobId:".length()); + value = new String(Base64.getDecoder().decode(value.getBytes(StandardCharsets.UTF_8)), StandardCharsets.UTF_8); + } else if (value.startsWith("str:") || value.startsWith("nam:") || value.startsWith("dat:")) { + value = value.substring("str:".length()); + } + return value; + } + + /** + * Read an Oak string array. There are 3 cases: + * + * - the property doesn't exist: return null + * - the value is stored as string: return an array with one entry + * - the value is stored in an array: return the sorted list of value + * + * The value is sorted, because the order is insignificant in our case, + * and want ["a", "b"] = ["b", "a"] when comparing index definitions. + * + * @param json the JSON object + * @param propertyName the property to extract + * @return a string array or null + */ + public static String[] oakStringArrayValue(JsonObject json, String propertyName) { + String value = json.getProperties().get(propertyName); + if (value == null) { + return null; + } else if (value.startsWith("\"")) { + return new String[] { oakStringValue(value) }; + } else if (value.startsWith("[")) { + return getStringSet(value).toArray(new String[0]); + } else { + LOG.warn("Unsupported value type: {}", value); + return null; + } + } + + public static TreeSet<String> getStringSet(String value) { + if (value == null) { + return null; + } + try { + JsopTokenizer tokenizer = new JsopTokenizer(value); + TreeSet<String> result = new TreeSet<>(); + if (tokenizer.matches(JsopReader.STRING)) { + result.add(tokenizer.getToken()); + return result; + } + if (!tokenizer.matches('[')) { + return null; + } + if (!tokenizer.matches(']')) { + do { + if (!tokenizer.matches(JsopReader.STRING)) { + // not a string + return null; + } + result.add(tokenizer.getToken()); + } while (tokenizer.matches(',')); + tokenizer.read(']'); + } + tokenizer.read(JsopReader.END); + return result; + } catch (IllegalArgumentException e) { + LOG.warn("Unsupported value: {}", value); + return null; + } + } + +} diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/RootIndexesListService.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/RootIndexesListService.java new file mode 100644 index 0000000000..806278f154 --- /dev/null +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/RootIndexesListService.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.diff; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.ArrayList; + +import org.apache.felix.inventory.Format; +import org.apache.jackrabbit.oak.commons.json.JsonObject; +import org.apache.jackrabbit.oak.commons.json.JsopBuilder; +import org.apache.jackrabbit.oak.commons.json.JsopTokenizer; +import org.apache.jackrabbit.oak.json.Base64BlobSerializer; +import org.apache.jackrabbit.oak.json.JsonSerializer; +import org.apache.jackrabbit.oak.plugins.index.IndexConstants; +import org.apache.jackrabbit.oak.plugins.index.IndexPathService; +import org.apache.jackrabbit.oak.plugins.index.inventory.IndexDefinitionPrinter; +import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.jetbrains.annotations.Nullable; + +public class RootIndexesListService implements IndexPathService { + + private final NodeStore nodeStore; + + private RootIndexesListService(NodeStore nodeStore) { + this.nodeStore = nodeStore; + } + + public static JsonObject getRootIndexDefinitions(NodeBuilder definitions) { + JsopBuilder json = new JsopBuilder(); + String filter = "{\"properties\":[\"*\", \"-:childOrder\"],\"nodes\":[\"*\", \"-:*\"]}"; + json.object(); + for (String indexPath : definitions.getChildNodeNames()) { + NodeState node = definitions.child(indexPath).getNodeState(); + json.key("/oak:index/" + indexPath); + JsonSerializer s = new JsonSerializer(json, filter, new Base64BlobSerializer()); + s.serialize(node); + } + json.endObject(); + return JsonObject.fromJson(json.toString(), true); + } + + /** + * Get the index definitions at /oak:index from a node store. + * + * @param nodeStore the source node store (may not be null) + * @param typePattern the index types (may be null, meaning all) + * @return a JSON object with all index definitions + */ + public static JsonObject getRootIndexDefinitions(NodeStore nodeStore, @Nullable String typePattern) { + if (nodeStore == null) { + return new JsonObject(); + } + RootIndexesListService imageIndexPathService = new RootIndexesListService(nodeStore); + IndexDefinitionPrinter indexDefinitionPrinter = new IndexDefinitionPrinter(nodeStore, imageIndexPathService); + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + indexDefinitionPrinter.print(printWriter, Format.JSON, false); + printWriter.flush(); + writer.flush(); + String json = writer.toString(); + JsonObject result = JsonObject.fromJson(json, true); + if (typePattern != null) { + for (String c : new ArrayList<>(result.getChildren().keySet())) { + String type = result.getChildren().get(c).getProperties().get("type"); + if (type == null) { + continue; + } + type = JsopTokenizer.decodeQuoted(type); + if (type != null && !type.matches(typePattern)) { + result.getChildren().remove(c); + } + } + } + return result; + } + + @Override + public Iterable<String> getIndexPaths() { + ArrayList<String> list = new ArrayList<>(); + NodeState oakIndex = nodeStore.getRoot().getChildNode("oak:index"); + if (!oakIndex.exists()) { + return list; + } + for (ChildNodeEntry cn : oakIndex.getChildNodeEntries()) { + if (!IndexConstants.INDEX_DEFINITIONS_NODE_TYPE + .equals(cn.getNodeState().getName("jcr:primaryType"))) { + continue; + } + list.add("/oak:index/" + cn.getName()); + } + return list; + } + +} \ No newline at end of file diff --git a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexTest.java b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexTest.java new file mode 100644 index 0000000000..888eeaf898 --- /dev/null +++ b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexTest.java @@ -0,0 +1,307 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.diff; + +import static org.apache.jackrabbit.oak.InitialContentHelper.INITIAL_CONTENT; +import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME; +import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.List; + +import org.apache.jackrabbit.JcrConstants; +import org.apache.jackrabbit.oak.api.Blob; +import org.apache.jackrabbit.oak.api.CommitFailedException; +import org.apache.jackrabbit.oak.api.PropertyState; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.commons.json.JsonObject; +import org.apache.jackrabbit.oak.plugins.index.AsyncIndexUpdate; +import org.apache.jackrabbit.oak.plugins.index.CompositeIndexEditorProvider; +import org.apache.jackrabbit.oak.plugins.index.IndexConstants; +import org.apache.jackrabbit.oak.plugins.index.IndexEditorProvider; +import org.apache.jackrabbit.oak.plugins.index.IndexUpdateProvider; +import org.apache.jackrabbit.oak.plugins.index.counter.NodeCounterEditorProvider; +import org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexEditorProvider; +import org.apache.jackrabbit.oak.plugins.index.reference.ReferenceEditorProvider; +import org.apache.jackrabbit.oak.plugins.memory.BinaryPropertyState; +import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore; +import org.apache.jackrabbit.oak.spi.commit.CommitInfo; +import org.apache.jackrabbit.oak.spi.commit.EditorHook; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.junit.Test; + +/** + * Tests for DiffIndex functionality. + */ +public class DiffIndexTest { + + @Test + public void listIndexes() { + NodeStore store = new MemoryNodeStore(INITIAL_CONTENT); + JsonObject indexDefs = RootIndexesListService.getRootIndexDefinitions(store, "property"); + // expect at least one index + assertFalse(indexDefs.getChildren().isEmpty()); + } + + @Test + public void tryReadStringNull() { + assertNull(DiffIndex.tryReadString(null)); + } + + @Test + public void tryReadStringValidContent() { + String content = "Hello, World!"; + PropertyState prop = BinaryPropertyState.binaryProperty("jcr:data", + content.getBytes(StandardCharsets.UTF_8)); + assertEquals(content, DiffIndex.tryReadString(prop)); + } + + @Test + public void tryReadStringEmpty() { + PropertyState prop = BinaryPropertyState.binaryProperty("jcr:data", new byte[0]); + assertEquals("", DiffIndex.tryReadString(prop)); + } + + @Test + public void tryReadStringJsonContent() { + String content = "{ \"key\": \"value\", \"array\": [1, 2, 3] }"; + PropertyState prop = BinaryPropertyState.binaryProperty("jcr:data", + content.getBytes(StandardCharsets.UTF_8)); + assertEquals(content, DiffIndex.tryReadString(prop)); + } + + @Test + public void tryReadStringIOException() throws IOException { + PropertyState prop = mock(PropertyState.class); + Blob blob = mock(Blob.class); + InputStream failingStream = new InputStream() { + @Override + public int read() throws IOException { + throw new IOException("Simulated read failure"); + } + @Override + public byte[] readAllBytes() throws IOException { + throw new IOException("Simulated read failure"); + } + }; + when(prop.getValue(Type.BINARY)).thenReturn(blob); + when(blob.getNewStream()).thenReturn(failingStream); + + // Should return null (not throw exception) + assertNull(DiffIndex.tryReadString(prop)); + } + + @Test + public void testDiffIndexUpdate() throws Exception { + // Create a memory node store + NodeStore store = new MemoryNodeStore(INITIAL_CONTENT); + + storeDiff(store, "2026-01-01T00:00:00.000Z", "" + + "{ \"acme.testIndex\": {\n" + + " \"async\": [ \"async\", \"nrt\" ],\n" + + " \"compatVersion\": 2,\n" + + " \"evaluatePathRestrictions\": true,\n" + + " \"includedPaths\": [ \"/content/dam\" ],\n" + + " \"jcr:primaryType\": \"oak:QueryIndexDefinition\",\n" + + " \"queryPaths\": [ \"/content/dam\" ],\n" + + " \"selectionPolicy\": \"tag\",\n" + + " \"tags\": [ \"abc\" ],\n" + + " \"type\": \"lucene\",\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"dam:Asset\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"created\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"name\": \"str:jcr:created\",\n" + + " \"ordered\": true,\n" + + " \"propertyIndex\": true,\n" + + " \"type\": \"Date\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " } }"); + + JsonObject repositoryDefinitions = RootIndexesListService.getRootIndexDefinitions(store, "lucene"); + assertSameJson("{\n" + + " \"/oak:index/acme.testIndex-1-custom-1\": {\n" + + " \"compatVersion\": 2,\n" + + " \"async\": [\"async\", \"nrt\"],\n" + + " \"evaluatePathRestrictions\": true,\n" + + " \"mergeChecksum\": \"34e7f7f0eb480ea781317b56134bc85fc59ed97031d95f518fdcff230aec28a2\",\n" + + " \"mergeInfo\": \"This index was auto-merged. See also https://oak-indexing.github.io/oakTools/simplified.html\",\n" + + " \"selectionPolicy\": \"tag\",\n" + + " \"queryPaths\": [\"/content/dam\"],\n" + + " \"includedPaths\": [\"/content/dam\"],\n" + + " \"jcr:primaryType\": \"nam:oak:QueryIndexDefinition\",\n" + + " \"type\": \"lucene\",\n" + + " \"tags\": [\"abc\"],\n" + + " \"merges\": [\"/oak:index/acme.testIndex\"],\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"dam:Asset\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"created\": {\n" + + " \"ordered\": true,\n" + + " \"name\": \"str:jcr:created\",\n" + + " \"propertyIndex\": true,\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"type\": \"Date\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}", repositoryDefinitions.toString()); + + storeDiff(store, "2026-01-01T00:00:00.001Z", "" + + "{ \"acme.testIndex\": {\n" + + " \"async\": [ \"async\", \"nrt\" ],\n" + + " \"compatVersion\": 2,\n" + + " \"evaluatePathRestrictions\": true,\n" + + " \"includedPaths\": [ \"/content/dam\" ],\n" + + " \"jcr:primaryType\": \"oak:QueryIndexDefinition\",\n" + + " \"queryPaths\": [ \"/content/dam\" ],\n" + + " \"selectionPolicy\": \"tag\",\n" + + " \"tags\": [ \"abc\" ],\n" + + " \"type\": \"lucene\",\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"dam:Asset\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"created\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"name\": \"str:jcr:created\",\n" + + " \"propertyIndex\": true\n" + + " },\n" + + " \"modified\": {\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"name\": \"str:jcr:modified\",\n" + + " \"propertyIndex\": true\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " } }"); + + repositoryDefinitions = RootIndexesListService.getRootIndexDefinitions(store, "lucene"); + assertSameJson("{\n" + + " \"/oak:index/acme.testIndex-1-custom-2\": {\n" + + " \"compatVersion\": 2,\n" + + " \"async\": [\"async\", \"nrt\"],\n" + + " \"mergeChecksum\": \"41df9c87e4d4fca446aed3f55e6d188304a2cb49bae442b75403dc23a89b266f\",\n" + + " \"mergeInfo\": \"This index was auto-merged. See also https://oak-indexing.github.io/oakTools/simplified.html\",\n" + + " \"selectionPolicy\": \"tag\",\n" + + " \"queryPaths\": [\"/content/dam\"],\n" + + " \"includedPaths\": [\"/content/dam\"],\n" + + " \"jcr:primaryType\": \"nam:oak:QueryIndexDefinition\",\n" + + " \"evaluatePathRestrictions\": true,\n" + + " \"type\": \"lucene\",\n" + + " \"tags\": [\"abc\"],\n" + + " \"merges\": [\"/oak:index/acme.testIndex\"],\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"dam:Asset\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"created\": {\n" + + " \"name\": \"str:jcr:created\",\n" + + " \"propertyIndex\": true,\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\"\n" + + " },\n" + + " \"modified\": {\n" + + " \"name\": \"str:jcr:modified\",\n" + + " \"propertyIndex\": true,\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}", repositoryDefinitions.toString()); + + storeDiff(store, "2026-01-01T00:00:00.002Z", "" + + "{}"); + + repositoryDefinitions = RootIndexesListService.getRootIndexDefinitions(store, "lucene"); + assertSameJson("{}", repositoryDefinitions.toString()); + } + + private void assertSameJson(String a, String b) { + JsonObject ja = JsonObject.fromJson(a, true); + JsonObject jb = JsonObject.fromJson(b, true); + if (!DiffIndexMerger.isSameIgnorePropertyOrder(ja, jb)) { + assertEquals(a, b); + } + } + + private void storeDiff(NodeStore store, String timestamp, String json) throws CommitFailedException { + // Get the root builder + NodeBuilder builder = store.getRoot().builder(); + + List<IndexEditorProvider> indexEditors = List.of( + new ReferenceEditorProvider(), new PropertyIndexEditorProvider(), new NodeCounterEditorProvider()); + IndexEditorProvider provider = CompositeIndexEditorProvider.compose(indexEditors); + EditorHook hook = new EditorHook(new IndexUpdateProvider(provider)); + + // Create the index definition at /oak:index/diff.index + NodeBuilder indexDefs = builder.child(INDEX_DEFINITIONS_NAME); + NodeBuilder diffIndex = indexDefs.child("diff.index"); + + // Set index properties + diffIndex.setProperty("jcr:primaryType", IndexConstants.INDEX_DEFINITIONS_NODE_TYPE, Type.NAME); + diffIndex.setProperty(TYPE_PROPERTY_NAME, "disabled"); + + // Create the diff.json child node with primary type nt:file + NodeBuilder diffJson = diffIndex.child("diff.json"); + diffJson.setProperty(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_FILE, Type.NAME); + + // Create jcr:content child node (required for nt:file) with empty text + NodeBuilder content = diffJson.child(JcrConstants.JCR_CONTENT); + content.setProperty(JcrConstants.JCR_LASTMODIFIED, timestamp); + content.setProperty(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_RESOURCE, Type.NAME); + + content.setProperty("jcr:data", json); + + // Merge changes to the store + store.merge(builder, hook, CommitInfo.EMPTY); + + // Run async indexing explicitly + for (int i = 0; i < 5; i++) { + try (AsyncIndexUpdate async = new AsyncIndexUpdate("async", store, provider)) { + async.run(); + } + } + } +} + diff --git a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeBuilderTest.java b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeBuilderTest.java new file mode 100644 index 0000000000..9a80fa8a7e --- /dev/null +++ b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeBuilderTest.java @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.diff; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Base64; +import java.util.TreeSet; + +import org.apache.jackrabbit.oak.api.CommitFailedException; +import org.apache.jackrabbit.oak.commons.json.JsonObject; +import org.apache.jackrabbit.oak.json.JsonUtils; +import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore; +import org.apache.jackrabbit.oak.spi.commit.CommitInfo; +import org.apache.jackrabbit.oak.spi.commit.EmptyHook; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; +import org.junit.Test; + +public class JsonNodeBuilderTest { + + @Test + public void addNodeTypeAndUUID() throws CommitFailedException, IOException { + MemoryNodeStore ns = new MemoryNodeStore(); + JsonObject json = JsonObject.fromJson( + "{\n" + + " \"includedPaths\": \"/same\",\n" + + " \"jcr:primaryType\": \"nt:unstructured\",\n" + + " \"queryPaths\": \"/same\",\n" + + " \"type\": \"lucene\",\n" + + " \"diff.json\": {\n" + + " \"jcr:primaryType\": \"nt:file\",\n" + + " \"jcr:content\": {\n" + + " \"jcr:data\": \":blobId:dGVzdA==\",\n" + + " \"jcr:mimeType\": \"application/json\",\n" + + " \"jcr:primaryType\": \"nt:resource\"\n" + + " }\n" + + " }\n" + + " }", true); + NodeBuilder builder = ns.getRoot().builder(); + JsonNodeBuilder.addOrReplace(builder, ns, "/test", "nt:test", json.toString()); + ns.merge(builder, new EmptyHook(), CommitInfo.EMPTY); + String json2 = JsonUtils.nodeStateToJson(ns.getRoot(), 5); + json2 = json2.replaceAll("jcr:uuid\" : \".*\"", "jcr:uuid\" : \"...\""); + assertEquals("{\n" + + " \"test\" : {\n" + + " \"queryPaths\" : \"/same\",\n" + + " \"includedPaths\" : \"/same\",\n" + + " \"jcr:primaryType\" : \"nt:unstructured\",\n" + + " \"type\" : \"lucene\",\n" + + " \":childOrder\" : [ \"diff.json\" ],\n" + + " \"diff.json\" : {\n" + + " \"jcr:primaryType\" : \"nt:file\",\n" + + " \":childOrder\" : [ \"jcr:content\" ],\n" + + " \"jcr:content\" : {\n" + + " \"jcr:mimeType\" : \"application/json\",\n" + + " \"jcr:data\" : \"test\",\n" + + " \"jcr:primaryType\" : \"nt:resource\",\n" + + " \"jcr:uuid\" : \"...\",\n" + + " \":childOrder\" : [ ]\n" + + " }\n" + + " }\n" + + " }\n" + + "}", json2); + + json = JsonObject.fromJson( + "{\"number\":1," + + "\"double2\":1.0," + + "\"child2\":{\"y\":2}}", true); + builder = ns.getRoot().builder(); + JsonNodeBuilder.addOrReplace(builder, ns, "/test", "nt:test", json.toString()); + ns.merge(builder, new EmptyHook(), CommitInfo.EMPTY); + assertEquals("{\n" + + " \"test\" : {\n" + + " \"number\" : 1,\n" + + " \"double2\" : 1.0,\n" + + " \"jcr:primaryType\" : \"nt:test\",\n" + + " \":childOrder\" : [ \"child2\" ],\n" + + " \"child2\" : {\n" + + " \"y\" : 2,\n" + + " \"jcr:primaryType\" : \"nt:test\",\n" + + " \":childOrder\" : [ ]\n" + + " }\n" + + " }\n" + + "}", JsonUtils.nodeStateToJson(ns.getRoot(), 5)); + } + + @Test + public void store() throws CommitFailedException, IOException { + MemoryNodeStore ns = new MemoryNodeStore(); + JsonObject json = JsonObject.fromJson( + "{\"number\":1," + + "\"double\":1.0," + + "\"string\":\"hello\"," + + "\"array\":[\"a\",\"b\"]," + + "\"child\":{\"x\":1}," + + "\"blob\":\":blobId:dGVzdA==\"}", true); + NodeBuilder builder = ns.getRoot().builder(); + JsonNodeBuilder.addOrReplace(builder, ns, "/test", "nt:test", json.toString()); + ns.merge(builder, new EmptyHook(), CommitInfo.EMPTY); + assertEquals("{\n" + + " \"test\" : {\n" + + " \"number\" : 1,\n" + + " \"blob\" : \"test\",\n" + + " \"string\" : \"hello\",\n" + + " \"array\" : [ \"a\", \"b\" ],\n" + + " \"double\" : 1.0,\n" + + " \"jcr:primaryType\" : \"nt:test\",\n" + + " \":childOrder\" : [ \"child\" ],\n" + + " \"child\" : {\n" + + " \"x\" : 1,\n" + + " \"jcr:primaryType\" : \"nt:test\",\n" + + " \":childOrder\" : [ ]\n" + + " }\n" + + " }\n" + + "}", JsonUtils.nodeStateToJson(ns.getRoot(), 5)); + + json = JsonObject.fromJson( + "{\"number\":1," + + "\"double2\":1.0," + + "\"child2\":{\"y\":2}}", true); + builder = ns.getRoot().builder(); + JsonNodeBuilder.addOrReplace(builder, ns, "/test", "nt:test", json.toString()); + ns.merge(builder, new EmptyHook(), CommitInfo.EMPTY); + assertEquals("{\n" + + " \"test\" : {\n" + + " \"number\" : 1,\n" + + " \"double2\" : 1.0,\n" + + " \"jcr:primaryType\" : \"nt:test\",\n" + + " \":childOrder\" : [ \"child2\" ],\n" + + " \"child2\" : {\n" + + " \"y\" : 2,\n" + + " \"jcr:primaryType\" : \"nt:test\",\n" + + " \":childOrder\" : [ ]\n" + + " }\n" + + " }\n" + + "}", JsonUtils.nodeStateToJson(ns.getRoot(), 5)); + } + + @Test + public void oakStringValue() { + assertEquals("123", JsonNodeBuilder.oakStringValue("123")); + assertEquals("45.67", JsonNodeBuilder.oakStringValue("45.67")); + assertEquals("-10", JsonNodeBuilder.oakStringValue("-10")); + + String helloBase64 = Base64.getEncoder().encodeToString("hello".getBytes(StandardCharsets.UTF_8)); + assertEquals("hello", JsonNodeBuilder.oakStringValue("\":blobId:" + helloBase64 + "\"")); + + assertEquals("hello", JsonNodeBuilder.oakStringValue("\"str:hello\"")); + assertEquals("acme:Test", JsonNodeBuilder.oakStringValue("\"nam:acme:Test\"")); + assertEquals("2024-01-19", JsonNodeBuilder.oakStringValue("\"dat:2024-01-19\"")); + } + + @Test + public void getStringSet() { + assertNull(JsonNodeBuilder.getStringSet(null)); + assertEquals(new TreeSet<>(Arrays.asList("hello")), JsonNodeBuilder.getStringSet("\"hello\"")); + assertEquals(null, JsonNodeBuilder.getStringSet("123")); + assertEquals(new TreeSet<>(Arrays.asList("content/abc")), JsonNodeBuilder.getStringSet("\"content\\/abc\"")); + assertTrue(JsonNodeBuilder.getStringSet("[]").isEmpty()); + assertEquals(new TreeSet<>(Arrays.asList("a")), JsonNodeBuilder.getStringSet("[\"a\"]")); + assertEquals(new TreeSet<>(Arrays.asList("content/abc")), JsonNodeBuilder.getStringSet("[\"content\\/abc\"]")); + assertEquals(new TreeSet<>(Arrays.asList("a")), JsonNodeBuilder.getStringSet("[\"a\",\"a\"]")); + assertEquals(new TreeSet<>(Arrays.asList("a", "z")), JsonNodeBuilder.getStringSet("[\"z\",\"a\"]")); + } + + @Test + public void oakStringArrayValue() throws IOException { + assertNull(JsonNodeBuilder.oakStringArrayValue(JsonObject.fromJson("{}", true), "p")); + assertArrayEquals(new String[]{"hello"}, JsonNodeBuilder.oakStringArrayValue(JsonObject.fromJson("{\"p\":\"hello\"}", true), "p")); + assertNull(JsonNodeBuilder.oakStringArrayValue(JsonObject.fromJson("{\"p\":123}", true), "p")); + assertArrayEquals(new String[]{"content/abc"}, JsonNodeBuilder.oakStringArrayValue(JsonObject.fromJson("{\"p\":\"content\\/abc\"}", true), "p")); + assertArrayEquals(new String[]{}, JsonNodeBuilder.oakStringArrayValue(JsonObject.fromJson("{\"p\":[]}", true), "p")); + assertArrayEquals(new String[]{"a"}, JsonNodeBuilder.oakStringArrayValue(JsonObject.fromJson("{\"p\":[\"a\"]}", true), "p")); + assertArrayEquals(new String[]{"content/abc"}, JsonNodeBuilder.oakStringArrayValue(JsonObject.fromJson("{\"p\":[\"content\\/abc\"]}", true), "p")); + assertArrayEquals(new String[]{"a"}, JsonNodeBuilder.oakStringArrayValue(JsonObject.fromJson("{\"p\":[\"a\",\"a\"]}", true), "p")); + assertArrayEquals(new String[]{"a", "z"}, JsonNodeBuilder.oakStringArrayValue(JsonObject.fromJson("{\"p\":[\"z\",\"a\"]}", true), "p")); + } + + @Test + public void addOrReplacePrefixesBooleansAndEscapes() throws CommitFailedException, IOException { + MemoryNodeStore ns = new MemoryNodeStore(); + JsonObject json = JsonObject.fromJson( + "{\"strValue\":\"str:hello\"," + + "\"namValue\":\"nam:acme:Test\"," + + "\"datValue\":\"dat:2024-01-19\"," + + "\"boolTrue\":true," + + "\"boolFalse\":false," + + "\"escapedArray\":[\"\\/content\\/path\"]}", true); + NodeBuilder builder = ns.getRoot().builder(); + JsonNodeBuilder.addOrReplace(builder, ns, "/test", "nt:test", json.toString()); + ns.merge(builder, new EmptyHook(), CommitInfo.EMPTY); + assertEquals("{\n" + + " \"test\" : {\n" + + " \"namValue\" : \"acme:Test\",\n" + + " \"boolTrue\" : true,\n" + + " \"boolFalse\" : false,\n" + + " \"datValue\" : \"2024-01-19\",\n" + + " \"escapedArray\" : [ \"/content/path\" ],\n" + + " \"jcr:primaryType\" : \"nt:test\",\n" + + " \"strValue\" : \"hello\",\n" + + " \":childOrder\" : [ ]\n" + + " }\n" + + "}", JsonUtils.nodeStateToJson(ns.getRoot(), 5)); + } + +} diff --git a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/MergeTest.java b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/MergeTest.java new file mode 100644 index 0000000000..35c4c41497 --- /dev/null +++ b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/MergeTest.java @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.diff; + +import static org.junit.Assert.assertEquals; + +import org.apache.jackrabbit.oak.commons.json.JsonObject; +import org.junit.Test; + +public class MergeTest { + + @Test + public void renamedProperty() { + // A property might be indexed twice, by adding two children to the "properties" node + // that both have the same "name" value. + // Alternatively, they could have the same "function" value. + String merged = DiffIndexMerger.processMerge(JsonObject.fromJson("{\n" + + " \"jcr:primaryType\": \"nam:oak:QueryIndexDefinition\",\n" + + " \"type\": \"lucene\",\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"acme:Test\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"abc\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"name\": \"test\",\n" + + " \"boost\": 1.0\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }" + + "", true), JsonObject.fromJson("{\n" + + " \"indexRules\": {\n" + + " \"acme:Test\": {\n" + + " \"properties\": {\n" + + " \"def\": {\n" + + " \"name\": \"test\",\n" + + " \"boost\": 1.2\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }", true)).toString(); + assertEquals("{\n" + + " \"jcr:primaryType\": \"nam:oak:QueryIndexDefinition\",\n" + + " \"type\": \"lucene\",\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"acme:Test\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"abc\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"name\": \"test\",\n" + + " \"boost\": 1.2\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}", merged); + } + + @Test + public void renamedFunction() { + // A function might be indexed twice, by adding two children to the "properties" node + // that both have the same "function" value. + String merged = DiffIndexMerger.processMerge(JsonObject.fromJson("{\n" + + " \"jcr:primaryType\": \"nam:oak:QueryIndexDefinition\",\n" + + " \"type\": \"lucene\",\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"acme:Test\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"abc\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"function\": \"upper(test)\",\n" + + " \"boost\": 1.0\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }" + + "", true), JsonObject.fromJson("{\n" + + " \"indexRules\": {\n" + + " \"acme:Test\": {\n" + + " \"properties\": {\n" + + " \"def\": {\n" + + " \"function\": \"upper(test)\",\n" + + " \"boost\": 1.2\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }", true)).toString(); + assertEquals("{\n" + + " \"jcr:primaryType\": \"nam:oak:QueryIndexDefinition\",\n" + + " \"type\": \"lucene\",\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"acme:Test\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"abc\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"function\": \"upper(test)\",\n" + + " \"boost\": 1.2\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}", merged); + } + + @Test + public void boost() { + // - "analyzed" must not be overwritten + // - "ordered" is added + // - "boost" is overwritten + String merged = DiffIndexMerger.processMerge(JsonObject.fromJson("{\n" + + " \"jcr:primaryType\": \"nam:oak:QueryIndexDefinition\",\n" + + " \"type\": \"lucene\",\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"acme:Test\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"abc\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"analyzed\": true,\n" + + " \"boost\": 1.0\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }" + + "", true), JsonObject.fromJson("{\n" + + " \"indexRules\": {\n" + + " \"acme:Test\": {\n" + + " \"properties\": {\n" + + " \"abc\": {\n" + + " \"analyzed\": false,\n" + + " \"ordered\": true,\n" + + " \"boost\": 1.2\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }", true)).toString(); + assertEquals("{\n" + + " \"jcr:primaryType\": \"nam:oak:QueryIndexDefinition\",\n" + + " \"type\": \"lucene\",\n" + + " \"indexRules\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"acme:Test\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"properties\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"abc\": {\n" + + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n" + + " \"analyzed\": true,\n" + + " \"boost\": 1.2,\n" + + " \"ordered\": true\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}", merged); + } +} diff --git a/oak-core/src/test/resources/org/apache/jackrabbit/oak/plugins/index/diff/indexes.json b/oak-core/src/test/resources/org/apache/jackrabbit/oak/plugins/index/diff/indexes.json new file mode 100644 index 0000000000..a5a16d0fb7 --- /dev/null +++ b/oak-core/src/test/resources/org/apache/jackrabbit/oak/plugins/index/diff/indexes.json @@ -0,0 +1,187 @@ +{ + "/oak:index/ntFolder": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + "includedPaths": [ + "/content/test" + ], + "tags": [ + "testTag1", + "testTag2" + ], + "type": "lucene", + "async": [ + "async", + "nrt" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "nt:folder": { + "jcr:primaryType": "nam:nt:unstructured", + "properties": { + "jcr:primaryType": "nam:nt:unstructured", + "jcrTitle": { + "jcr:primaryType": "nam:nt:unstructured", + "nodeScopeIndex": true, + "useInSuggest": true, + "useInSpellcheck": true, + "name": "str:jcr:content/jcr:title" + } + } + } + } + }, + "/oak:index/share": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + "selectionPolicy": "tag", + "includedPaths": [ + "/var/share" + ], + "tags": [ + "share" + ], + "type": "lucene", + "async": [ + "async", + "nrt" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "nt:unstructured": { + "jcr:primaryType": "nam:nt:unstructured" + } + } + }, + "/oak:index/versionStoreIndex": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + "includedPaths": [ + "/jcr:system/jcr:versionStorage" + ], + "type": "lucene", + "async": [ + "async", + "sync" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "nt:version": { + "jcr:primaryType": "nam:nt:unstructured" + }, + "nt:frozenNode": { + "jcr:primaryType": "nam:nt:unstructured" + }, + "nt:base": { + "jcr:primaryType": "nam:nt:unstructured" + } + } + }, + "/oak:index/authorizables": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + ":version": 2, + "type": "lucene", + "async": [ + "async", + "nrt" + ], + "excludedPaths": [ + "/var", + "/jcr:system" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "rep:Authorizable": { + "jcr:primaryType": "nam:nt:unstructured", + "properties": { + "jcr:primaryType": "nam:nt:unstructured" + } + } + } + }, + "/oak:index/internalVerificationLucene": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + ":version": 2, + "includedPaths": [ + "/tmp" + ], + "type": "lucene", + "async": [ + "async" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "nt:base": { + "jcr:primaryType": "nam:nt:unstructured", + "properties": { + "jcr:primaryType": "nam:nt:unstructured", + "verification": { + "jcr:primaryType": "nam:nt:unstructured", + "propertyIndex": true, + "name": "verification", + "type": "String" + } + } + } + } + }, + "/oak:index/ntBaseLucene-2": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + "type": "lucene", + "async": [ + "async", + "nrt" + ], + "evaluatePathRestrictions": true, + "excludedPaths": [ + "/oak:index" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "nt:base": { + "jcr:primaryType": "nam:nt:unstructured" + } + } + }, + "/oak:index/fragments": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + "selectionPolicy": "tag", + "includedPaths": [ + "/content/dam", + "/content/launches" + ], + "tags": [ + "fragments" + ], + "type": "lucene", + "async": [ + "async", + "nrt" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "dam:Asset": { + "jcr:primaryType": "nam:nt:unstructured", + "properties": { + "jcr:primaryType": "nam:nt:unstructured" + } + } + } + }, + "/oak:index/assetLucene": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + "includedPaths": [ + "/content/dam", + "/content/assets" + ], + "tags": [], + "type": "lucene", + "async": [ + "async", + "nrt" + ], + "indexRules": { + "jcr:primaryType": "nam:nt:unstructured", + "dam:Asset": { + "jcr:primaryType": "nam:nt:unstructured" + } + } + } +} diff --git a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndex.java b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndex.java index f8beab7a3b..0d47a4d148 100644 --- a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndex.java +++ b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndex.java @@ -118,6 +118,8 @@ public abstract class FulltextIndex implements AdvancedQueryIndex, QueryIndex, N .collectIndexNodePaths(filter); if (filterReplacedIndexes()) { indexPaths = IndexName.filterReplacedIndexes(indexPaths, rootState, runIsActiveIndexCheck()); + } else { + indexPaths = IndexName.filterNewestIndexes(indexPaths, rootState); } List<IndexPlan> plans = new ArrayList<>(indexPaths.size()); for (String path : indexPaths) { diff --git a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/IndexNameTest.java b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/IndexNameTest.java index 1135772def..cdcd14fe1b 100644 --- a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/IndexNameTest.java +++ b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/IndexNameTest.java @@ -30,6 +30,9 @@ import org.apache.jackrabbit.oak.spi.state.NodeState; import org.junit.Test; import org.slf4j.event.Level; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; /** * Test the IndexName class @@ -113,4 +116,54 @@ public class IndexNameTest { lc.finished(); } } + + @Test + public void filterNewestIndexes() { + NodeState root = EMPTY_NODE; + + // Single index - should return as-is + Collection<String> single = Arrays.asList("/lucene"); + Collection<String> result = IndexName.filterNewestIndexes(single, root); + assertEquals(1, result.size()); + assertTrue(result.contains("/lucene")); + + // Multiple versions of the same base index - should return only the newest + Collection<String> multipleVersions = Arrays.asList( + "/lucene", + "/lucene-1", + "/lucene-2", + "/lucene-1-custom-1", + "/lucene-2-custom-3" + ); + result = IndexName.filterNewestIndexes(multipleVersions, root); + assertEquals(1, result.size()); + assertTrue(result.contains("/lucene-2-custom-3")); + + // Different base indexes - should return newest of each + Collection<String> differentBases = Arrays.asList( + "/luceneA", + "/luceneA-1", + "/luceneB", + "/luceneB-2-custom-1", + "/luceneC-1-custom-5" + ); + result = IndexName.filterNewestIndexes(differentBases, root); + assertEquals(new HashSet<>(Arrays.asList("/luceneA-1", "/luceneB-2-custom-1", "/luceneC-1-custom-5")), + new HashSet<>(result)); + + // Custom versions without product version + Collection<String> customOnly = Arrays.asList( + "/lucene-custom-1", + "/lucene-custom-2", + "/lucene-custom-3" + ); + result = IndexName.filterNewestIndexes(customOnly, root); + assertEquals(1, result.size()); + assertTrue(result.contains("/lucene-custom-3")); + + // Empty collection + Collection<String> empty = Arrays.asList(); + result = IndexName.filterNewestIndexes(empty, root); + assertTrue(result.isEmpty()); + } }
