This is an automated email from the ASF dual-hosted git repository.
thomasm pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 30ee4e6e32 OAK-12110 Simplify Index Management - take 2 (#2751)
30ee4e6e32 is described below
commit 30ee4e6e323b413441faf98ca8ac01cf5a6aaed9
Author: Thomas Mueller <[email protected]>
AuthorDate: Wed Feb 25 09:05:44 2026 +0100
OAK-12110 Simplify Index Management - take 2 (#2751)
* OAK-12010 Simplified index management (improvements)
* OAK-12010 Simplified index management (improvements)
* OAK-12010 Simplified index management
* OAK-12010 Simplified index management
* OAK-12010 Simplified index management
---
.../oak/plugins/index/diff/DiffIndex.java | 56 ++-
.../oak/plugins/index/diff/DiffIndexMerger.java | 193 ++++++--
.../oak/plugins/index/diff/JsonNodeUpdater.java | 13 +-
.../plugins/index/diff/RootIndexesListService.java | 1 +
.../oak/plugins/index/diff/DiffIndexTest.java | 70 +++
.../plugins/index/diff/JsonNodeUpdaterTest.java | 122 ++---
.../oak/plugins/index/diff/MergeTest.java | 292 +++++++++---
oak-doc/src/site/markdown/query/indexing.md | 500 ++++++++++++++-------
8 files changed, 917 insertions(+), 330 deletions(-)
diff --git
a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndex.java
b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndex.java
index 6df72e2ce8..6f72a65b5a 100644
---
a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndex.java
+++
b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndex.java
@@ -22,6 +22,7 @@ import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Comparator;
+import java.util.List;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
@@ -58,21 +59,21 @@ public class DiffIndex {
* @param indexDefinitions the /oak:index node
*/
public static void applyDiffIndexChanges(NodeStore store, NodeBuilder
indexDefinitions) {
- JsonObject diffs = collectDiffs(indexDefinitions);
- if (diffs == null) {
- // nothing todo
- return;
+ JsonObject diffs = collectDiffs(indexDefinitions, MERGER);
+ if (diffs != null) {
+ processDiffs(store, indexDefinitions, diffs, MERGER);
}
- processDiffs(store, indexDefinitions, diffs);
+ storeOrRemoveWarnings(indexDefinitions, MERGER);
}
/**
* Collect the diffs from the diff.index and diff.index.optimizer.
*
* @param indexDefinitions the node builder for /oak:index
+ * @param merger the merger instance to use for collecting warnings
* @return the diffs, or null if none
*/
- public static JsonObject collectDiffs(NodeBuilder indexDefinitions) {
+ public static JsonObject collectDiffs(NodeBuilder indexDefinitions,
DiffIndexMerger merger) {
JsonObject diffs = null;
for (String diffIndex : new String[] {
DiffIndexMerger.DIFF_INDEX,
@@ -113,11 +114,10 @@ public class DiffIndex {
diffs.getChildren().put("/oak:index/" + diffIndex, diffObj);
} catch (Exception e) {
String message = "Error parsing " + diffIndex;
- LOG.warn("{}: {}", message, e.getMessage(), e);
- diffIndexDefinition.setProperty("error", message + ": " +
e.getMessage());
+ merger.logWarn("{}: {}", message, e.getMessage());
}
if (!diffIndexDefinition.hasProperty("info")) {
- diffIndexDefinition.setProperty("info", "This diff is are
automatically merged with other indexes. See
https://oak-indexing.github.io/oakTools/simplified.html");
+ diffIndexDefinition.setProperty("info", "This diff is
automatically merged with other indexes. See
https://oak-indexing.github.io/oakTools/simplified.html");
}
}
return diffs;
@@ -129,13 +129,14 @@ public class DiffIndex {
* @param store the node store
* @param indexDefinitions the node builder for /oak:index
* @param diffs the json object with the combined diffs
+ * @param merger the merger instance to use for collecting warnings
*/
- private static void processDiffs(NodeStore store, NodeBuilder
indexDefinitions, JsonObject diffs) {
- LOG.info("Processing a diffs");
+ private static void processDiffs(NodeStore store, NodeBuilder
indexDefinitions, JsonObject diffs, DiffIndexMerger merger) {
+ LOG.info("Processing diffs");
JsonObject repositoryDefinitions =
RootIndexesListService.getRootIndexDefinitions(indexDefinitions);
LOG.debug("Index list {}", repositoryDefinitions);
try {
- MERGER.merge(diffs, repositoryDefinitions, store);
+ merger.merge(diffs, repositoryDefinitions, store);
for (String indexPath : diffs.getChildren().keySet()) {
if (indexPath.startsWith("/oak:index/" +
DiffIndexMerger.DIFF_INDEX)) {
continue;
@@ -176,6 +177,32 @@ public class DiffIndex {
}
}
+ /**
+ * Store warnings collected during diff index processing in the diff.index
node.
+ * Warnings are stored in separate properties named "warn.01", "warn.02",
etc.
+ * Any existing "warn." properties are removed first.
+ *
+ * @param indexDefinitions the node builder for /oak:index
+ * @param merger the merger instance to retrieve warnings from
+ */
+ public static void storeOrRemoveWarnings(NodeBuilder indexDefinitions,
DiffIndexMerger merger) {
+ if (!indexDefinitions.hasChildNode(DiffIndexMerger.DIFF_INDEX)) {
+ return;
+ }
+ NodeBuilder diffIndexDefinition =
indexDefinitions.child(DiffIndexMerger.DIFF_INDEX);
+ // remove existing warn.* properties
+ for (PropertyState ps :
diffIndexDefinition.getNodeState().getProperties()) {
+ if (ps.getName().startsWith("warn.")) {
+ diffIndexDefinition.removeProperty(ps.getName());
+ }
+ }
+ List<String> warnings = merger.getAndClearWarnings();
+ for (int i = 0; i < warnings.size(); i++) {
+ String name = String.format("warn.%02d", i + 1);
+ diffIndexDefinition.setProperty(name, warnings.get(i));
+ }
+ }
+
private static void sortIndexes(NodeBuilder builder) {
ArrayList<String> list = new ArrayList<>();
for (String child : builder.getChildNodeNames()) {
@@ -211,7 +238,7 @@ public class DiffIndex {
* @param indexPath the path
* @param keep which index name (which version) to retain
*/
- private static void disableOrRemoveOldVersions(NodeBuilder definitions,
String indexPath, String keep) {
+ public static void disableOrRemoveOldVersions(NodeBuilder definitions,
String indexPath, String keep) {
String indexName = indexPath;
if (indexPath.startsWith("/oak:index/")) {
indexName = indexPath.substring("/oak:index/".length());
@@ -226,6 +253,7 @@ public class DiffIndex {
String childBaseName = IndexName.parse(child).getBaseName();
if (baseName.equals(childBaseName)) {
if (indexName.equals(child)) {
+ // we can not remove it unless it is disabled
if (!IndexConstants.TYPE_DISABLED.equals(definitions.
getChildNode(indexName).
getString(IndexConstants.TYPE_PROPERTY_NAME))) {
@@ -236,7 +264,7 @@ public class DiffIndex {
}
}
for (String r : toRemove) {
- LOG.info("Removing old index {}", r);
+ LOG.info("Removing old index {}", r);
definitions.child(r).remove();
updateNodetypeIndexForPath(definitions, r, false);
}
diff --git
a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexMerger.java
b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexMerger.java
index 7183828db9..e3a81b17ff 100644
---
a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexMerger.java
+++
b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexMerger.java
@@ -23,9 +23,13 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Set;
import java.util.TreeMap;
+import java.util.TreeSet;
import org.apache.jackrabbit.oak.commons.StringUtils;
import org.apache.jackrabbit.oak.commons.json.JsonObject;
@@ -57,22 +61,54 @@ public class DiffIndexMerger {
// the list of unsupported included paths, e.g. "/apps,/libs"
// by default all paths are supported
- private final static String[] UNSUPPORTED_INCLUDED_PATHS =
System.getProperty("oak.diffIndex.unsupportedPaths", "").split(",");
+ private final static String[] UNSUPPORTED_INCLUDED_PATHS =
System.getProperty(
+ "oak.diffIndex.unsupportedPaths", "").split(",");
// in case a custom index is removed, whether a dummy index is created
- private final static boolean DELETE_CREATES_DUMMY =
Boolean.getBoolean("oak.diffIndex.deleteCreatesDummy");
+ private final static boolean DELETE_CREATES_DUMMY = Boolean.getBoolean(
+ "oak.diffIndex.deleteCreatesDummy");
// in case a customization was removed, create a copy of the OOTB index
- private final static boolean DELETE_COPIES_OOTB =
Boolean.getBoolean("oak.diffIndex.deleteCopiesOOTB");
+ private final static boolean DELETE_COPIES_OOTB = Boolean.getBoolean(
+ "oak.diffIndex.deleteCopiesOOTB");
// whether to log at info level
- private final static boolean LOG_AT_INFO_LEVEL =
Boolean.getBoolean("oak.diffIndex.logAtInfoLevel");
+ private final static boolean LOG_AT_INFO_LEVEL = Boolean.getBoolean(
+ "oak.diffIndex.logAtInfoLevel");
+
+ // the set of top-level properties that is not allowed to be added to an
existing index
+ private final static Set<String>
REJECTED_TOP_LEVEL_PROPS_FOR_EXISTING_INDEX = Set.of(
+ "selectionPolicy", "valueRegex", "queryFilterRegex",
"includedPaths", "excludedPaths", "queryPaths");
+
+ // the set of child properties that is not allowed to be added if the
property is already indexed
+ // eg. the "name" property may not need to be set if the existing property
doesn't have it yet (eg. a function-based index),
+ // or the "function" property may not need to be set unless if it already
exists (eg. a name-based index)
+ private final static Set<String> REJECTED_ADDING_TO_EXISTING_PROPERTY =
Set.of(
+ "isRegexp", "index", "function", "name");
+
+ // set of properties that are allowed to be changed if the property
already exists
+ private final static Set<String> ALLOW_CHANGING_IN_EXISTING_PROPERTY =
Set.of(
+ "boost", "weight");
+
+ // set of properties that allow multi-valued string that might be merged
+ private final static Set<String> MERGE_MULTI_VALUES_STRINGS = Set.of(
+ "includedPaths", "queryPaths", "tags");
+
+ // maximum number of warnings to keep
+ private final static int MAX_WARNINGS = 100;
+
+ // maximum total size of warnings (1 MB)
+ private final static int MAX_WARNINGS_SIZE = 1024 * 1024;
private String[] unsupportedIncludedPaths;
private boolean deleteCreatesDummyIndex;
private boolean deleteCopiesOutOfTheBoxIndex;
private boolean logAtInfoLevel;
+ // thread-safe queue to store warnings (oldest first)
+ private final LinkedList<String> warnings = new LinkedList<>();
+ private int warningsSize = 0;
+
public DiffIndexMerger() {
this(UNSUPPORTED_INCLUDED_PATHS, DELETE_CREATES_DUMMY,
DELETE_COPIES_OOTB, LOG_AT_INFO_LEVEL);
}
@@ -106,7 +142,7 @@ public class DiffIndexMerger {
// read the diff.index.optimizer explicitly,
// because it's a not a regular index definition,
- // and so in the repositoryDefinitions
+ // and so it is not in the repositoryDefinitions
if (repositoryNodeStore != null) {
Map<String, JsonObject> diffInRepo =
readDiffIndex(repositoryNodeStore, DIFF_INDEX_OPTIMIZER);
combined.getChildren().putAll(diffInRepo);
@@ -160,7 +196,7 @@ public class DiffIndexMerger {
String key = e.getKey();
JsonObject value = e.getValue();
if (key.startsWith("/oak:index/")) {
- LOG.warn("The key should contains just the index name, without
the '/oak:index' prefix for key {}", key);
+ logWarn("The key should contain just the index name, without
the '/oak:index' prefix, for key: {}", key);
key = key.substring("/oak:index/".length());
}
log("Processing {}", key);
@@ -185,7 +221,7 @@ public class DiffIndexMerger {
* @param target the target map of diff.index definitions
* @return the error message trying to parse the JSON file, or null
*/
- public static String tryExtractDiffIndex(JsonObject indexDefs, String
name, HashMap<String, JsonObject> target) {
+ public String tryExtractDiffIndex(JsonObject indexDefs, String name,
HashMap<String, JsonObject> target) {
JsonObject diffIndex = indexDefs.getChildren().get(name);
if (diffIndex == null) {
return null;
@@ -198,15 +234,15 @@ public class DiffIndexMerger {
JsonObject jcrContent = file.getChildren().get("jcr:content");
if (jcrContent == null) {
String message = "jcr:content child node is missing in
diff.json";
- LOG.warn(message);
+ logWarn(message);
return message;
}
String jcrData = JsonNodeUpdater.oakStringValue(jcrContent,
"jcr:data");
try {
diff = JsonObject.fromJson(jcrData, true);
} catch (Exception e) {
- LOG.warn("Illegal Json, ignoring: {}", jcrData, e);
String message = "Illegal Json, ignoring: " + e.getMessage();
+ logWarn("Illegal Json, ignoring: {}", jcrData, e);
return message;
}
} else {
@@ -237,6 +273,8 @@ public class DiffIndexMerger {
for (Entry<String, JsonObject> e : indexDefs.getChildren().entrySet())
{
String key = e.getKey();
JsonObject value = e.getValue();
+ // merged indexes always contain "-custom-". Other indexes may in
theory contain that term,
+ // but then they do not contain "mergeInfo".
if (key.indexOf("-custom-") < 0 ||
!value.getProperties().containsKey("mergeInfo")) {
continue;
}
@@ -327,6 +365,7 @@ public class DiffIndexMerger {
log("Latest product: {}", latestProductKey);
log("Latest customized: {}", latestCustomizedKey);
if (latestProduct == null) {
+ // if it's not a product index, then verify it's a correctly named
custom index
if (indexName.indexOf('.') >= 0) {
// a fully custom index needs to contains a dot
log("Fully custom index {}", indexName);
@@ -338,13 +377,13 @@ public class DiffIndexMerger {
JsonObject latestProductIndex =
combined.getChildren().get(latestProductKey);
String[] includedPaths;
if (latestProductIndex == null) {
- if (indexDiff.getProperties().isEmpty() &&
indexDiff.getChildren().isEmpty()) {
+ if (indexDiff == null || indexDiff.getProperties().isEmpty() &&
indexDiff.getChildren().isEmpty()) {
// there is no customization (any more), which means a dummy
index may be needed
log("No customization for {}", indexName);
} else {
includedPaths = JsonNodeUpdater.oakStringArrayValue(indexDiff,
"includedPaths");
if (includesUnsupportedPaths(includedPaths)) {
- LOG.warn("New custom index {} is not supported because it
contains an unsupported path ({})",
+ logWarn("New custom index {} is not supported because it
contains an unsupported path ({})",
indexName,
Arrays.toString(unsupportedIncludedPaths));
return false;
}
@@ -352,7 +391,7 @@ public class DiffIndexMerger {
} else {
includedPaths =
JsonNodeUpdater.oakStringArrayValue(latestProductIndex, "includedPaths");
if (includesUnsupportedPaths(includedPaths)) {
- LOG.warn("Customizing index {} is not supported because it
contains an unsupported path ({})",
+ logWarn("Customizing index {} is not supported because it
contains an unsupported path ({})",
latestProductKey,
Arrays.toString(unsupportedIncludedPaths));
return false;
}
@@ -368,7 +407,7 @@ public class DiffIndexMerger {
}
merged = latestProductIndex;
} else {
- merged = processMerge(latestProductIndex, indexDiff);
+ merged = processMerge(indexName, latestProductIndex, indexDiff);
}
// compare to the latest version of the this index
@@ -378,7 +417,7 @@ public class DiffIndexMerger {
} else {
latestIndexVersion =
combined.getChildren().get(latestCustomizedKey);
}
- JsonObject mergedDef = cleanedAndNormalized(switchToLucene(merged));
+ JsonObject mergedDef =
cleanedAndNormalized(switchToLuceneIfNeeded(merged));
// compute merge checksum for later, but do not yet add
String mergeChecksum = computeMergeChecksum(mergedDef);
// get the merge checksum before cleaning (cleaning removes it) - if
available
@@ -388,14 +427,14 @@ public class DiffIndexMerger {
key = prefix + indexName + "-1-custom-1";
} else {
String latestMergeChecksum =
JsonNodeUpdater.oakStringValue(latestIndexVersion, "mergeChecksum");
- JsonObject latestDef =
cleanedAndNormalized(switchToLucene(latestIndexVersion));
+ JsonObject latestDef =
cleanedAndNormalized(switchToLuceneIfNeeded(latestIndexVersion));
if (isSameIgnorePropertyOrder(mergedDef, latestDef)) {
// normal case: no change
// (even if checksums do not match: checksums might be missing
or manipulated)
log("Latest index matches");
if (latestMergeChecksum != null &&
!latestMergeChecksum.equals(mergeChecksum)) {
- LOG.warn("Indexes do match, but checksums do not. Possibly
checksum was changed: {} vs {}", latestMergeChecksum, mergeChecksum);
- LOG.warn("latest: {}\nmerged: {}", latestDef, mergedDef);
+ logWarn("Indexes do match, but checksums do not. Possibly
checksum was changed: {} vs {}", latestMergeChecksum, mergeChecksum);
+ logWarn("Index: {}, latest: {}\nmerged: {}", indexName,
latestDef, mergedDef);
}
return false;
}
@@ -403,8 +442,8 @@ public class DiffIndexMerger {
// checksum matches, but data does not match
// could be eg. due to numbers formatting issues (-0.0 vs 0.0,
0.001 vs 1e-3)
// but unexpected because we do not normally have such cases
- LOG.warn("Indexes do not match, but checksums match. Possible
normalization issue.");
- LOG.warn("Index: {}, latest: {}\nmerged: {}", indexName,
latestDef, mergedDef);
+ logWarn("Indexes do not match, but checksums match. Possible
normalization issue.");
+ logWarn("Index: {}, latest: {}\nmerged: {}", indexName,
latestDef, mergedDef);
// if checksums match, we consider it a match
return false;
}
@@ -510,13 +549,12 @@ public class DiffIndexMerger {
* @param indexDef the index definition (is not changed by this method)
* @return the lucene version (a new JSON object)
*/
- public static JsonObject switchToLucene(JsonObject indexDef) {
+ public static JsonObject switchToLuceneIfNeeded(JsonObject indexDef) {
JsonObject obj = JsonObject.fromJson(indexDef.toString(), true);
String type = JsonNodeUpdater.oakStringValue(obj, "type");
- if (type == null || !"elasticsearch".equals(type) ) {
- return obj;
+ if ("elasticsearch".equals(type) ) {
+ switchToLuceneChildren(obj);
}
- switchToLuceneChildren(obj);
return obj;
}
@@ -621,19 +659,24 @@ public class DiffIndexMerger {
* Merge a product index with a diff. If the product index is null, then
the
* diff needs to contain a complete custom index definition.
*
+ * @param indexName the index name (for logging)
* @param productIndex the product index definition, or null if none
* @param diff the diff (from the diff.index definition)
+ *
* @return the index definition of the merged index
*/
- public JsonObject processMerge(JsonObject productIndex, JsonObject diff) {
+ public JsonObject processMerge(String indexName, JsonObject productIndex,
JsonObject diff) {
JsonObject result;
+ boolean isNew;
if (productIndex == null) {
// fully custom index
result = new JsonObject(true);
+ isNew = true;
} else {
result = JsonObject.fromJson(productIndex.toString(), true);
+ isNew = false;
}
- mergeInto("", diff, result);
+ mergeInto(indexName, "", diff, result, isNew);
addPrimaryType("", result);
return result;
}
@@ -669,33 +712,68 @@ public class DiffIndexMerger {
/**
* Merge a JSON diff into a target index definition.
*
- * @param path the path
+ * @param indexName the index name (for logging)
+ * @param path the path (relative to the index)
* @param diff the diff (what to merge)
* @param target where to merge into
+ * @param isNew whether the target node is newly created (didn't exist
before)
*/
- private void mergeInto(String path, JsonObject diff, JsonObject target) {
+ private void mergeInto(String indexName, String path, JsonObject diff,
JsonObject target,
+ boolean isNewNode) {
+ String pathForLogging = path.isEmpty() ? "the root" : "relative path "
+ path;
for (String p : diff.getProperties().keySet()) {
if (path.isEmpty()) {
if ("jcr:primaryType".equals(p)) {
continue;
}
}
+ if (!isNewNode) {
+ // for existing nodes, we do a few more checks before the merge
+ if (path.isEmpty() &&
REJECTED_TOP_LEVEL_PROPS_FOR_EXISTING_INDEX.contains(p)
+ && !target.getProperties().containsKey(p)) {
+ // at the top level, some properties (eg. selectionPolicy)
are not allowed to be added
+ // to an existing index
+ logWarn("{}: Ignoring new top-level property {} at {} for
existing index", indexName, p, pathForLogging);
+ continue;
+ }
+ if (REJECTED_ADDING_TO_EXISTING_PROPERTY.contains(p) &&
!target.getProperties().containsKey(p)) {
+ // some properties are not allowed to be added if the node
already exists
+ logWarn("{}: Ignoring new property \"{}\" at {} for
existing child", indexName, p, pathForLogging);
+ continue;
+ }
+ }
if (target.getProperties().containsKey(p)) {
- // we do not currently allow to overwrite most existing
properties
- if (p.equals("boost")) {
- // allow overwriting the boost value
- LOG.info("Overwrite property {} value at {}", p, path);
+ // we do not currently allow to overwrite most existing
properties,
+ // except for:
+ if (!path.isEmpty() &&
ALLOW_CHANGING_IN_EXISTING_PROPERTY.contains(p)) {
+ // allow overwriting the (eg.) boost value
target.getProperties().put(p, diff.getProperties().get(p));
+ } else if (path.isEmpty() &&
MERGE_MULTI_VALUES_STRINGS.contains(p)) {
+ // merge includedPaths, queryPaths, and tags,
+ // such that it contains more entries
+ // (if the property is not set, we would make it more
restrictive,
+ // which is not allowed)
+ TreeSet<String> oldSet =
JsonNodeUpdater.getStringSet(target.getProperties().get(p));
+ TreeSet<String> newSet =
JsonNodeUpdater.getStringSet(diff.getProperties().get(p));
+ TreeSet<String> mergedSet = new TreeSet<String>(oldSet);
+ mergedSet.addAll(newSet);
+ JsopBuilder buff = new JsopBuilder().array();
+ for(String v : mergedSet) {
+ buff.value(v);
+ }
+ target.getProperties().put(p, buff.endArray().toString());
} else {
- LOG.warn("Ignoring existing property {} at {}", p, path);
+ logWarn("{}: Ignoring existing property \"{}\" at {}",
indexName, p, pathForLogging);
}
} else {
target.getProperties().put(p, diff.getProperties().get(p));
}
}
for (String c : diff.getChildren().keySet()) {
+ boolean childIsNew;
String targetChildName = c;
if (!target.getChildren().containsKey(c)) {
+ childIsNew = true;
if (path.endsWith("/properties")) {
// search for a property with the same "name" value
String propertyName =
diff.getChildren().get(c).getProperties().get("name");
@@ -720,8 +798,10 @@ public class DiffIndexMerger {
// only create the child (properties are added below)
target.getChildren().put(c, new JsonObject());
}
+ } else {
+ childIsNew = false;
}
- mergeInto(path + "/" + targetChildName, diff.getChildren().get(c),
target.getChildren().get(targetChildName));
+ mergeInto(indexName, path + "/" + targetChildName,
diff.getChildren().get(c), target.getChildren().get(targetChildName),
childIsNew);
}
if (target.getProperties().isEmpty() &&
target.getChildren().isEmpty()) {
if (deleteCreatesDummyIndex) {
@@ -747,8 +827,19 @@ public class DiffIndexMerger {
}
}
+ /**
+ * Find a child node that contains a property with the given key and value.
+ * This is used during merging to find an existing index rule property
+ * definition that matches a given "name" or "function" value, so that the
+ * diff can be applied to the correct child even if the child node name
differs.
+ *
+ * @param obj the parent JSON object whose children are searched
+ * @param key the property key to match (e.g. "name" or "function")
+ * @param value the expected property value (already converted via
oakStringValue)
+ * @return the name of the first matching child, or null if no match is
found
+ */
public static String getChildWithKeyValuePair(JsonObject obj, String key,
String value) {
- for(Entry<String, JsonObject> c : obj.getChildren().entrySet()) {
+ for (Entry<String, JsonObject> c : obj.getChildren().entrySet()) {
String v2 = c.getValue().getProperties().get(key);
if (v2 == null) {
continue;
@@ -800,6 +891,7 @@ public class DiffIndexMerger {
* data: it is only available in the writeable repository.
*
* @param repositoryNodeStore the node store
+ * @param name the name (diff.index, diff.index.optimize,...)
* @return a map, possibly with a single entry with this key
*/
public Map<String, JsonObject> readDiffIndex(NodeStore
repositoryNodeStore, String name) {
@@ -830,6 +922,39 @@ public class DiffIndexMerger {
}
}
+ /**
+ * Log a warning message and store it in a size-limited queue.
+ * The queue keeps the oldest entries and is limited to 100 entries or 1
MB total size.
+ *
+ * @param format the log message format
+ * @param arguments the log message arguments
+ */
+ public void logWarn(String format, Object... arguments) {
+ String message =
org.slf4j.helpers.MessageFormatter.arrayFormat(format, arguments).getMessage();
+ LOG.warn(message);
+ synchronized (warnings) {
+ int messageSize = message.getBytes(StandardCharsets.UTF_8).length;
+ if (warnings.size() < MAX_WARNINGS && warningsSize + messageSize
<= MAX_WARNINGS_SIZE) {
+ warnings.add(message);
+ warningsSize += messageSize;
+ }
+ }
+ }
+
+ /**
+ * Get and clear all collected warnings.
+ *
+ * @return a list of warning messages (oldest first)
+ */
+ public List<String> getAndClearWarnings() {
+ synchronized (warnings) {
+ List<String> result = new ArrayList<>(warnings);
+ warnings.clear();
+ warningsSize = 0;
+ return result;
+ }
+ }
+
public DiffIndexMerger setUnsupportedIncludedPaths(String[]
unsupportedIncludedPaths) {
this.unsupportedIncludedPaths = unsupportedIncludedPaths;
return this;
diff --git
a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeUpdater.java
b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeUpdater.java
index a5dabdbce7..99e7a74452 100644
---
a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeUpdater.java
+++
b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeUpdater.java
@@ -66,14 +66,16 @@ public class JsonNodeUpdater {
/**
* Add a replace a node, including all child nodes, in the node store.
*
+ * @param builder the builder to add the node to
* @param nodeStore the target node store
- * @param targetPath the target path where the node(s) is/are replaced
+ * @param targetPath the target path (relative to the builder) where the
node(s) is/are replaced
* @param nodeType the node type of the new node (eg. "nt:unstructured")
* @param jsonString the json string with the node data
* @throws CommitFailedException if storing the nodes failed
* @throws IOException if storing a blob failed
*/
- public static void addOrReplace(NodeBuilder builder, NodeStore nodeStore,
String targetPath, String nodeType, String jsonString) throws
CommitFailedException, IOException {
+ public static void addOrReplace(NodeBuilder builder, NodeStore nodeStore,
String targetPath,
+ String nodeType, String jsonString) throws CommitFailedException,
IOException {
LOG.info("Storing {}: {}", targetPath, jsonString);
if (nodeType.contains("/")) {
throw new IllegalStateException("Illegal node type: " + nodeType);
@@ -245,6 +247,13 @@ public class JsonNodeUpdater {
}
}
+ /**
+ * Parse a raw JSON value and convert it to a set of strings. Also
supported is a single string value.
+ * Everything else (numbers, booleans, etc.) is not supported and returns
null.
+ *
+ * @param value the raw JSON value
+ * @return a set of strings or null
+ */
public static TreeSet<String> getStringSet(String value) {
if (value == null) {
return null;
diff --git
a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/RootIndexesListService.java
b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/RootIndexesListService.java
index 806278f154..f09e495f51 100644
---
a/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/RootIndexesListService.java
+++
b/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/diff/RootIndexesListService.java
@@ -100,6 +100,7 @@ public class RootIndexesListService implements
IndexPathService {
return list;
}
for (ChildNodeEntry cn : oakIndex.getChildNodeEntries()) {
+ // ignore entries that are not of type oak:QueryIndexDefinition
if (!IndexConstants.INDEX_DEFINITIONS_NODE_TYPE
.equals(cn.getNodeState().getName("jcr:primaryType"))) {
continue;
diff --git
a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexTest.java
b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexTest.java
index b0cf3b50a1..50f9e2efd2 100644
---
a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexTest.java
+++
b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/DiffIndexTest.java
@@ -18,10 +18,12 @@ package org.apache.jackrabbit.oak.plugins.index.diff;
import static org.apache.jackrabbit.oak.InitialContentHelper.INITIAL_CONTENT;
import static
org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME;
+import static
org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_DISABLED;
import static
org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -303,5 +305,73 @@ public class DiffIndexTest {
}
}
}
+
+ // verify @lucene and @elasticsearch are cleared
+ @Test
+ public void cleanedAndNormalizedRemoveAtLucene() {
+ assertEquals("{\n"
+ + " \"test\": 4,\n"
+ + " \"test@abc\": 3\n"
+ + "}",
+ DiffIndexMerger.cleanedAndNormalized(JsonObject.fromJson(
+ "{\"test@lucene\":1, \"test@elasticsearch\": 2,
\"test@abc\": 3, \"test\": 4}", true)).toString());
+ }
+
+ // verify @lucene and @elasticsearch are cleared
+ @Test
+ public void cleanedAndNormalizedRemovePrefixes() {
+ assertEquals("{\n"
+ + " \"test\": \"hello\",\n"
+ + " \"test1\": \"world\",\n"
+ + " \"test2\": \"123\"\n"
+ + "}",
+ DiffIndexMerger.cleanedAndNormalized(JsonObject.fromJson(
+ "{\"test\":\"str:hello\", \"test1\": \"nam:world\",
\"test2\": \"dat:123\"}", true)).toString());
+ }
+
+ @Test
+ public void disableOrRemoveOldVersions() {
+ NodeStore store = new MemoryNodeStore(INITIAL_CONTENT);
+ NodeBuilder definitions =
store.getRoot().builder().child(INDEX_DEFINITIONS_NAME);
+
+ definitions.child("myLuceneIndex").setProperty(TYPE_PROPERTY_NAME,
"lucene");
+ definitions.child("myNodetypeIndex").setProperty(TYPE_PROPERTY_NAME,
"property");
+ DiffIndex.disableOrRemoveOldVersions(definitions, "lucene", "lucene");
+ assertTrue(definitions.hasChildNode("myLuceneIndex"));
+ assertTrue(definitions.hasChildNode("myNodetypeIndex"));
+
+
definitions.child("product-1-custom-1").setProperty(TYPE_PROPERTY_NAME,
"lucene");
+
definitions.child("product-1-custom-2").setProperty(TYPE_PROPERTY_NAME,
"lucene");
+
definitions.child("product-1-custom-3").setProperty(TYPE_PROPERTY_NAME,
"lucene");
+ DiffIndex.disableOrRemoveOldVersions(definitions,
"/oak:index/product-1-custom-3", "product-1-custom-3");
+ assertFalse(definitions.hasChildNode("product-1-custom-1"));
+ assertFalse(definitions.hasChildNode("product-1-custom-2"));
+ assertTrue(definitions.hasChildNode("product-1-custom-3"));
+
+ definitions.child("other-1-custom-1").setProperty(TYPE_PROPERTY_NAME,
"lucene");
+
definitions.child("product-1-custom-4").setProperty(TYPE_PROPERTY_NAME,
"lucene");
+ DiffIndex.disableOrRemoveOldVersions(definitions,
"product-1-custom-4", "product-1-custom-4");
+ assertTrue(definitions.hasChildNode("other-1-custom-1"));
+ assertTrue(definitions.hasChildNode("product-1-custom-4"));
+
+ definitions.child("foo-1-custom-1").setProperty(TYPE_PROPERTY_NAME,
"lucene");
+ definitions.child("foo-1-custom-2").setProperty(TYPE_PROPERTY_NAME,
TYPE_DISABLED);
+ DiffIndex.disableOrRemoveOldVersions(definitions,
"/oak:index/foo-1-custom-2", "foo-1-custom-2");
+ assertFalse(definitions.hasChildNode("foo-1-custom-1"));
+ assertTrue(definitions.hasChildNode("foo-1-custom-2"));
+
+ definitions.child("abc-1-custom-1").setProperty(TYPE_PROPERTY_NAME,
"lucene");
+ definitions.child("abc-1-custom-2").setProperty(TYPE_PROPERTY_NAME,
"lucene");
+ DiffIndex.disableOrRemoveOldVersions(definitions,
"/oak:index/abc-1-custom-1", "abc-1-custom-2");
+ assertTrue(definitions.hasChildNode("abc-1-custom-1"));
+ assertTrue(definitions.hasChildNode("abc-1-custom-2"));
+
+ definitions.child("abc-1-custom-1").setProperty(TYPE_PROPERTY_NAME,
TYPE_DISABLED);
+ definitions.child("abc-1-custom-2").setProperty(TYPE_PROPERTY_NAME,
"lucene");
+ DiffIndex.disableOrRemoveOldVersions(definitions,
"/oak:index/abc-1-custom-1", "abc-1-custom-2");
+ assertFalse(definitions.hasChildNode("abc-1-custom-1"));
+ assertTrue(definitions.hasChildNode("abc-1-custom-2"));
+
+ }
}
diff --git
a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeUpdaterTest.java
b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeUpdaterTest.java
index f31629f077..f364d69d4e 100644
---
a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeUpdaterTest.java
+++
b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/JsonNodeUpdaterTest.java
@@ -29,6 +29,7 @@ import java.util.TreeSet;
import org.apache.jackrabbit.oak.api.CommitFailedException;
import org.apache.jackrabbit.oak.commons.json.JsonObject;
+import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
import org.apache.jackrabbit.oak.json.JsonUtils;
import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore;
import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
@@ -59,24 +60,24 @@ public class JsonNodeUpdaterTest {
NodeBuilder builder = ns.getRoot().builder();
JsonNodeUpdater.addOrReplace(builder, ns, "/test", "nt:test",
json.toString());
ns.merge(builder, new EmptyHook(), CommitInfo.EMPTY);
- String json2 = JsonUtils.nodeStateToJson(ns.getRoot(), 5);
- json2 = json2.replaceAll("jcr:uuid\" : \".*\"", "jcr:uuid\" :
\"...\"");
+ String json2 = reformatJson(JsonUtils.nodeStateToJson(ns.getRoot(),
5));
+ json2 = json2.replaceAll("jcr:uuid\": \".*\"", "jcr:uuid\": \"...\"");
assertEquals("{\n"
- + " \"test\" : {\n"
- + " \"queryPaths\" : \"/same\",\n"
- + " \"includedPaths\" : \"/same\",\n"
- + " \"jcr:primaryType\" : \"nt:unstructured\",\n"
- + " \"type\" : \"lucene\",\n"
- + " \":childOrder\" : [ \"diff.json\" ],\n"
- + " \"diff.json\" : {\n"
- + " \"jcr:primaryType\" : \"nt:file\",\n"
- + " \":childOrder\" : [ \"jcr:content\" ],\n"
- + " \"jcr:content\" : {\n"
- + " \"jcr:mimeType\" : \"application/json\",\n"
- + " \"jcr:data\" : \"test\",\n"
- + " \"jcr:primaryType\" : \"nt:resource\",\n"
- + " \"jcr:uuid\" : \"...\",\n"
- + " \":childOrder\" : [ ]\n"
+ + " \"test\": {\n"
+ + " \"queryPaths\": \"/same\",\n"
+ + " \"includedPaths\": \"/same\",\n"
+ + " \"jcr:primaryType\": \"nt:unstructured\",\n"
+ + " \"type\": \"lucene\",\n"
+ + " \":childOrder\": [\"diff.json\"],\n"
+ + " \"diff.json\": {\n"
+ + " \"jcr:primaryType\": \"nt:file\",\n"
+ + " \":childOrder\": [\"jcr:content\"],\n"
+ + " \"jcr:content\": {\n"
+ + " \"jcr:mimeType\": \"application/json\",\n"
+ + " \"jcr:data\": \"test\",\n"
+ + " \"jcr:primaryType\": \"nt:resource\",\n"
+ + " \"jcr:uuid\": \"...\",\n"
+ + " \":childOrder\": []\n"
+ " }\n"
+ " }\n"
+ " }\n"
@@ -90,15 +91,15 @@ public class JsonNodeUpdaterTest {
JsonNodeUpdater.addOrReplace(builder, ns, "/test", "nt:test",
json.toString());
ns.merge(builder, new EmptyHook(), CommitInfo.EMPTY);
assertEquals("{\n"
- + " \"test\" : {\n"
- + " \"number\" : 1,\n"
- + " \"double2\" : 1.0,\n"
- + " \"jcr:primaryType\" : \"nt:test\",\n"
- + " \":childOrder\" : [ \"child2\" ],\n"
- + " \"child2\" : {\n"
- + " \"y\" : 2,\n"
- + " \"jcr:primaryType\" : \"nt:test\",\n"
- + " \":childOrder\" : [ ]\n"
+ + " \"test\": {\n"
+ + " \"number\": 1,\n"
+ + " \"double2\": 1.0,\n"
+ + " \"jcr:primaryType\": \"nt:test\",\n"
+ + " \":childOrder\": [\"child2\"],\n"
+ + " \"child2\": {\n"
+ + " \"y\": 2,\n"
+ + " \"jcr:primaryType\": \"nt:test\",\n"
+ + " \":childOrder\": []\n"
+ " }\n"
+ " }\n"
+ "}", reformatJson(JsonUtils.nodeStateToJson(ns.getRoot(),
5)));
@@ -118,18 +119,18 @@ public class JsonNodeUpdaterTest {
JsonNodeUpdater.addOrReplace(builder, ns, "/test", "nt:test",
json.toString());
ns.merge(builder, new EmptyHook(), CommitInfo.EMPTY);
assertEquals("{\n"
- + " \"test\" : {\n"
- + " \"number\" : 1,\n"
- + " \"blob\" : \"test\",\n"
- + " \"string\" : \"hello\",\n"
- + " \"array\" : [ \"a\", \"b\" ],\n"
- + " \"double\" : 1.0,\n"
- + " \"jcr:primaryType\" : \"nt:test\",\n"
- + " \":childOrder\" : [ \"child\" ],\n"
- + " \"child\" : {\n"
- + " \"x\" : 1,\n"
- + " \"jcr:primaryType\" : \"nt:test\",\n"
- + " \":childOrder\" : [ ]\n"
+ + " \"test\": {\n"
+ + " \"number\": 1,\n"
+ + " \"blob\": \"test\",\n"
+ + " \"string\": \"hello\",\n"
+ + " \"array\": [\"a\", \"b\"],\n"
+ + " \"double\": 1.0,\n"
+ + " \"jcr:primaryType\": \"nt:test\",\n"
+ + " \":childOrder\": [\"child\"],\n"
+ + " \"child\": {\n"
+ + " \"x\": 1,\n"
+ + " \"jcr:primaryType\": \"nt:test\",\n"
+ + " \":childOrder\": []\n"
+ " }\n"
+ " }\n"
+ "}", reformatJson(JsonUtils.nodeStateToJson(ns.getRoot(),
5)));
@@ -142,20 +143,25 @@ public class JsonNodeUpdaterTest {
JsonNodeUpdater.addOrReplace(builder, ns, "/test", "nt:test",
json.toString());
ns.merge(builder, new EmptyHook(), CommitInfo.EMPTY);
assertEquals("{\n"
- + " \"test\" : {\n"
- + " \"number\" : 1,\n"
- + " \"double2\" : 1.0,\n"
- + " \"jcr:primaryType\" : \"nt:test\",\n"
- + " \":childOrder\" : [ \"child2\" ],\n"
- + " \"child2\" : {\n"
- + " \"y\" : 2,\n"
- + " \"jcr:primaryType\" : \"nt:test\",\n"
- + " \":childOrder\" : [ ]\n"
+ + " \"test\": {\n"
+ + " \"number\": 1,\n"
+ + " \"double2\": 1.0,\n"
+ + " \"jcr:primaryType\": \"nt:test\",\n"
+ + " \":childOrder\": [\"child2\"],\n"
+ + " \"child2\": {\n"
+ + " \"y\": 2,\n"
+ + " \"jcr:primaryType\": \"nt:test\",\n"
+ + " \":childOrder\": []\n"
+ " }\n"
+ " }\n"
+ "}", reformatJson(JsonUtils.nodeStateToJson(ns.getRoot(),
5)));
}
+ String reformatJson(String json) {
+ // replace \r\n with \n
+ return JsopBuilder.prettyPrint(json);
+ }
+
@Test
public void oakStringValue() {
assertEquals("123", JsonNodeUpdater.oakStringValue("123"));
@@ -210,21 +216,17 @@ public class JsonNodeUpdaterTest {
JsonNodeUpdater.addOrReplace(builder, ns, "/test", "nt:test",
json.toString());
ns.merge(builder, new EmptyHook(), CommitInfo.EMPTY);
assertEquals("{\n"
- + " \"test\" : {\n"
- + " \"namValue\" : \"acme:Test\",\n"
- + " \"boolTrue\" : true,\n"
- + " \"boolFalse\" : false,\n"
- + " \"datValue\" : \"2024-01-19\",\n"
- + " \"escapedArray\" : [ \"/content/path\" ],\n"
- + " \"jcr:primaryType\" : \"nt:test\",\n"
- + " \"strValue\" : \"hello\",\n"
- + " \":childOrder\" : [ ]\n"
+ + " \"test\": {\n"
+ + " \"namValue\": \"acme:Test\",\n"
+ + " \"boolTrue\": true,\n"
+ + " \"boolFalse\": false,\n"
+ + " \"datValue\": \"2024-01-19\",\n"
+ + " \"escapedArray\": [\"/content/path\"],\n"
+ + " \"jcr:primaryType\": \"nt:test\",\n"
+ + " \"strValue\": \"hello\",\n"
+ + " \":childOrder\": []\n"
+ " }\n"
+ "}", reformatJson(JsonUtils.nodeStateToJson(ns.getRoot(),
5)));
}
- String reformatJson(String json) {
- // replace \r\n with \n
- return json.replace("\r", "");
- }
}
diff --git
a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/MergeTest.java
b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/MergeTest.java
index b8e63fe9c3..f8a1fdc374 100644
---
a/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/MergeTest.java
+++
b/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/diff/MergeTest.java
@@ -18,6 +18,7 @@ package org.apache.jackrabbit.oak.plugins.index.diff;
import static org.apache.jackrabbit.oak.InitialContentHelper.INITIAL_CONTENT;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.nio.charset.StandardCharsets;
@@ -90,7 +91,7 @@ public class MergeTest {
+ " }", true);
HashMap<String, JsonObject> target = new HashMap<>();
- DiffIndexMerger.tryExtractDiffIndex(repositoryDefinitions,
"/oak:index/diff.index", target);
+ new DiffIndexMerger().tryExtractDiffIndex(repositoryDefinitions,
"/oak:index/diff.index", target);
assertEquals("{damAssetLucene={\n"
+ " \"indexRules\": {\n"
+ " \"dam:Asset\": {\n"
@@ -110,25 +111,25 @@ public class MergeTest {
// A property might be indexed twice, by adding two children to the
"properties" node
// that both have the same "name" value.
// Alternatively, they could have the same "function" value.
- String merged = new
DiffIndexMerger().processMerge(JsonObject.fromJson("{\n"
- + " \"jcr:primaryType\":
\"nam:oak:QueryIndexDefinition\",\n"
- + " \"type\": \"lucene\",\n"
- + " \"indexRules\": {\n"
- + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
- + " \"acme:Test\": {\n"
- + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
- + " \"properties\": {\n"
- + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
- + " \"abc\": {\n"
- + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
- + " \"name\": \"test\",\n"
- + " \"boost\": 1.0\n"
- + " }\n"
- + " }\n"
- + " }\n"
- + " }\n"
- + " }"
- + "", true), JsonObject.fromJson("{\n"
+ String merged = new DiffIndexMerger().processMerge(null,
JsonObject.fromJson("{\n"
+ + " \"jcr:primaryType\":
\"nam:oak:QueryIndexDefinition\",\n"
+ + " \"type\": \"lucene\",\n"
+ + " \"indexRules\": {\n"
+ + " \"jcr:primaryType\":
\"nam:nt:unstructured\",\n"
+ + " \"acme:Test\": {\n"
+ + " \"jcr:primaryType\":
\"nam:nt:unstructured\",\n"
+ + " \"properties\": {\n"
+ + " \"jcr:primaryType\":
\"nam:nt:unstructured\",\n"
+ + " \"abc\": {\n"
+ + " \"jcr:primaryType\":
\"nam:nt:unstructured\",\n"
+ + " \"name\": \"test\",\n"
+ + " \"boost\": 1.0\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }"
+ + "", true), JsonObject.fromJson("{\n"
+ " \"indexRules\": {\n"
+ " \"acme:Test\": {\n"
+ " \"properties\": {\n"
@@ -161,28 +162,152 @@ public class MergeTest {
}
@Test
- public void renamedFunction() {
- // A function might be indexed twice, by adding two children to the
"properties" node
- // that both have the same "function" value.
- String merged = new
DiffIndexMerger().processMerge(JsonObject.fromJson("{\n"
- + " \"jcr:primaryType\":
\"nam:oak:QueryIndexDefinition\",\n"
- + " \"type\": \"lucene\",\n"
- + " \"indexRules\": {\n"
+ public void ignoredNewPropertiesForExisting() {
+ // for existing indexes or properties,
+ // some additions are not allowed, as they could result in the index
+ // to be not usable for existing queries
+ // (eg. the selectionPolicy may not be set if the index already exists)
+ String merged = new DiffIndexMerger().processMerge(null,
JsonObject.fromJson("{\n"
+ + " \"jcr:primaryType\":
\"nam:oak:IndexDefinition\",\n"
+ + " \"type\": \"lucene\",\n"
+ + " \"async\": [\"async\", \"nrt\"],\n"
+ + " \"indexRules\": {\n"
+ + " \"dam:Asset\": {\n"
+ + " \"properties\": {\n"
+ + " \"named\": {\n"
+ + " \"name\": \"x\"\n"
+ + " },\n"
+ + " \"functionBased\": {\n"
+ + " \"function\":
\"upper(x)\"\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + "", true), JsonObject.fromJson("{ \n"
+ + " \"tags\": [\"newTag\"],\n"
+ + " \"selectionPolicy\": \"tag\",\n"
+ + " \"includedPaths\": \"/content\",\n"
+ + " \"excludedPaths\": \"/libs\",\n"
+ + " \"queryPaths\": \"/content/abc\",\n"
+ + " \"tags\": \"myTag\",\n"
+ + " \"indexRules\": {\n"
+ + " \"dam:Asset\": {\n"
+ + " \"properties\": {\n"
+ + " \"named\": {\n"
+ + " \"function\": \"upper(y)\",\n"
+ + " \"weight\": 10.0\n"
+ + " },\n"
+ + " \"functionBased\": {\n"
+ + " \"name\": \"y\",\n"
+ + " \"weight\": 20.0\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }", true)).toString();
+ assertEquals("{\n"
+ + " \"jcr:primaryType\": \"nam:oak:IndexDefinition\",\n"
+ + " \"type\": \"lucene\",\n"
+ + " \"async\": [\"async\", \"nrt\"],\n"
+ + " \"tags\": \"myTag\",\n"
+ + " \"indexRules\": {\n"
+ + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
+ + " \"dam:Asset\": {\n"
+ " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
- + " \"acme:Test\": {\n"
+ + " \"properties\": {\n"
+ " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
- + " \"properties\": {\n"
- + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
- + " \"abc\": {\n"
- + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
- + " \"function\": \"upper(test)\",\n"
- + " \"boost\": 1.0\n"
- + " }\n"
+ + " \"named\": {\n"
+ + " \"name\": \"x\",\n"
+ + " \"weight\": 10.0,\n"
+ + " \"jcr:primaryType\": \"nam:nt:unstructured\"\n"
+ + " },\n"
+ + " \"functionBased\": {\n"
+ + " \"function\": \"upper(x)\",\n"
+ + " \"weight\": 20.0,\n"
+ + " \"jcr:primaryType\": \"nam:nt:unstructured\"\n"
+ " }\n"
+ " }\n"
+ " }\n"
- + " }"
- + "", true), JsonObject.fromJson("{\n"
+ + " }\n"
+ + "}", merged);
+ }
+
+ @Test
+ public void customizeIncludedPathsQueryPathsAndTags() {
+ // We can merge includedPaths because that will extend the list.
+ // Adding tags is fine as well.
+ // But we can not add queryPaths if that doesn't exist yet.
+ // Also, selectionPolicy may not be set.
+ String merged = new DiffIndexMerger().processMerge(null,
JsonObject.fromJson("{\n"
+ + " \"jcr:primaryType\":
\"nam:oak:IndexDefinition\",\n"
+ + " \"type\": \"lucene\",\n"
+ + " \"async\": [\"async\", \"nrt\"],\n"
+ + " \"tags\": [\"abc\", \"def\"],\n"
+ + " \"includedPaths\": \"/content/dam\",\n"
+ + " \"indexRules\": {\n"
+ + " \"dam:Asset\": {\n"
+ + " \"properties\": {\n"
+ + " \"x\": {\n"
+ + " \"name\": \"x\",\n"
+ + " \"propertyIndex\": true\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + "", true), JsonObject.fromJson("{ \n"
+ + " \"includedPaths\": [\"/content/dam\",
\"/content/additional\" ],\n"
+ + " \"queryPaths\": [\"/content/dam\" ],\n"
+ + " \"selectionPolicy\": \"tag\",\n"
+ + " \"tags\": [\"def\", \"ghi\" ]\n"
+ + " }", true)).toString();
+ assertEquals("{\n"
+ + " \"jcr:primaryType\": \"nam:oak:IndexDefinition\",\n"
+ + " \"type\": \"lucene\",\n"
+ + " \"async\": [\"async\", \"nrt\"],\n"
+ + " \"tags\": [\"abc\", \"def\", \"ghi\"],\n"
+ + " \"includedPaths\": [\"/content/additional\",
\"/content/dam\"],\n"
+ + " \"indexRules\": {\n"
+ + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
+ + " \"dam:Asset\": {\n"
+ + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
+ + " \"properties\": {\n"
+ + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
+ + " \"x\": {\n"
+ + " \"name\": \"x\",\n"
+ + " \"propertyIndex\": true,\n"
+ + " \"jcr:primaryType\": \"nam:nt:unstructured\"\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + "}", merged);
+ }
+
+ @Test
+ public void renamedFunction() {
+ // A function might be indexed twice, by adding two children to the
"properties" node
+ // that both have the same "function" value.
+ String merged = new DiffIndexMerger().processMerge(null,
JsonObject.fromJson("{\n"
+ + " \"jcr:primaryType\":
\"nam:oak:QueryIndexDefinition\",\n"
+ + " \"type\": \"lucene\",\n"
+ + " \"indexRules\": {\n"
+ + " \"jcr:primaryType\":
\"nam:nt:unstructured\",\n"
+ + " \"acme:Test\": {\n"
+ + " \"jcr:primaryType\":
\"nam:nt:unstructured\",\n"
+ + " \"properties\": {\n"
+ + " \"jcr:primaryType\":
\"nam:nt:unstructured\",\n"
+ + " \"abc\": {\n"
+ + " \"jcr:primaryType\":
\"nam:nt:unstructured\",\n"
+ + " \"function\": \"upper(test)\",\n"
+ + " \"boost\": 1.0\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }"
+ + "", true), JsonObject.fromJson("{\n"
+ " \"indexRules\": {\n"
+ " \"acme:Test\": {\n"
+ " \"properties\": {\n"
@@ -218,8 +343,8 @@ public class MergeTest {
public void createDummy() {
// when enabling "deleteCreatesDummyIndex", then a dummy index is
created
// (that indexes /dummy, which doesn't exist)
- String merged = new DiffIndexMerger(new String[0], true, true,
false).processMerge(JsonObject.fromJson("{}"
- + "", true), JsonObject.fromJson("{}", true)).toString();
+ String merged = new DiffIndexMerger(new String[0], true, true,
false).processMerge(null, JsonObject.fromJson("{}"
+ + "", true), JsonObject.fromJson("{}",
true)).toString();
assertEquals("{\n"
+ " \"async\": \"async\",\n"
+ " \"includedPaths\": \"/dummy\",\n"
@@ -245,25 +370,25 @@ public class MergeTest {
// - "analyzed" must not be overwritten
// - "ordered" is added
// - "boost" is overwritten
- String merged = new
DiffIndexMerger().processMerge(JsonObject.fromJson("{\n"
- + " \"jcr:primaryType\":
\"nam:oak:QueryIndexDefinition\",\n"
- + " \"type\": \"lucene\",\n"
- + " \"indexRules\": {\n"
- + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
- + " \"acme:Test\": {\n"
- + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
- + " \"properties\": {\n"
- + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
- + " \"abc\": {\n"
- + " \"jcr:primaryType\": \"nam:nt:unstructured\",\n"
- + " \"analyzed\": true,\n"
- + " \"boost\": 1.0\n"
- + " }\n"
- + " }\n"
- + " }\n"
- + " }\n"
- + " }"
- + "", true), JsonObject.fromJson("{\n"
+ String merged = new DiffIndexMerger().processMerge(null,
JsonObject.fromJson("{\n"
+ + " \"jcr:primaryType\":
\"nam:oak:QueryIndexDefinition\",\n"
+ + " \"type\": \"lucene\",\n"
+ + " \"indexRules\": {\n"
+ + " \"jcr:primaryType\":
\"nam:nt:unstructured\",\n"
+ + " \"acme:Test\": {\n"
+ + " \"jcr:primaryType\":
\"nam:nt:unstructured\",\n"
+ + " \"properties\": {\n"
+ + " \"jcr:primaryType\":
\"nam:nt:unstructured\",\n"
+ + " \"abc\": {\n"
+ + " \"jcr:primaryType\":
\"nam:nt:unstructured\",\n"
+ + " \"analyzed\": true,\n"
+ + " \"boost\": 1.0\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }"
+ + "", true), JsonObject.fromJson("{\n"
+ " \"indexRules\": {\n"
+ " \"acme:Test\": {\n"
+ " \"properties\": {\n"
@@ -402,6 +527,31 @@ public class MergeTest {
assertEquals(true, merger.includesUnsupportedPaths(new
String[]{"/content", "/apps"}));
assertEquals(true, merger.includesUnsupportedPaths(new
String[]{"/content", "/libs/test"}));
+ assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"x"}));
+ assertEquals(false, merger.includesUnsupportedPaths(new String[]{""}));
+ assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"/content"}));
+ assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"/content/dam"}));
+ assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"/var"}));
+ assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"/etc"}));
+ assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"/content", "/var", "/etc"}));
+ }
+
+ @Test
+ public void includesUnsupportedPathsDisabledTest() {
+ DiffIndexMerger merger = new DiffIndexMerger().
+ setUnsupportedIncludedPaths(new String[]{""}).
+ setDeleteCopiesOutOfTheBoxIndex(false).
+ setDeleteCreatesDummyIndex(false);
+
+ assertEquals(false, merger.includesUnsupportedPaths(null));
+ assertEquals(false, merger.includesUnsupportedPaths(new String[]{""}));
+ assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"/"}));
+ assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"/apps"}));
+ assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"/libs"}));
+ assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"/libs/foundation"}));
+ assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"/content", "/apps"}));
+ assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"/content", "/libs/test"}));
+ assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"x"}));
assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"/content"}));
assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"/content/dam"}));
assertEquals(false, merger.includesUnsupportedPaths(new
String[]{"/var"}));
@@ -436,5 +586,31 @@ public class MergeTest {
assertEquals("\"async\"", indexDef.getProperties().get("async"));
assertEquals("\"/content\"",
indexDef.getProperties().get("includedPaths"));
assertTrue(indexDef.getChildren().containsKey("indexRules"));
+
+ Map<String, JsonObject> result2 = new
DiffIndexMerger().readDiffIndex(store, "diff.index.notThere");
+ assertTrue(result2.isEmpty());
+
+ }
+
+ @Test
+ public void getChildWithKeyValuePairTest() {
+ JsonObject parent = JsonObject.fromJson("{\n"
+ + " \"child1\": { \"name\": \"str:jcr:title\",
\"propertyIndex\": true },\n"
+ + " \"child2\": { \"function\": \"upper(x)\", \"ordered\":
true },\n"
+ + " \"child3\": { \"propertyIndex\": true },\n"
+ + " \"empty\": { }\n"
+ + "}", true);
+ assertEquals("child1",
DiffIndexMerger.getChildWithKeyValuePair(parent, "name", "jcr:title"));
+ assertEquals("child2",
DiffIndexMerger.getChildWithKeyValuePair(parent, "function", "upper(x)"));
+ assertNull(DiffIndexMerger.getChildWithKeyValuePair(parent, "name",
"nonexistent"));
+ assertNull(DiffIndexMerger.getChildWithKeyValuePair(parent, "name",
"upper(x)"));
+ assertNull(DiffIndexMerger.getChildWithKeyValuePair(parent,
"function", "jcr:title"));
+ // v2 == null: child3 and empty have no "name" property, so they are
skipped
+ assertNull(DiffIndexMerger.getChildWithKeyValuePair(parent, "name",
"true"));
+ // key not present in any child
+ assertNull(DiffIndexMerger.getChildWithKeyValuePair(parent, "missing",
"anything"));
+ // no children at all
+ JsonObject emptyParent = JsonObject.fromJson("{}", true);
+ assertNull(DiffIndexMerger.getChildWithKeyValuePair(emptyParent,
"name", "x"));
}
}
diff --git a/oak-doc/src/site/markdown/query/indexing.md
b/oak-doc/src/site/markdown/query/indexing.md
index ad5c321e88..9a9b22ac42 100644
--- a/oak-doc/src/site/markdown/query/indexing.md
+++ b/oak-doc/src/site/markdown/query/indexing.md
@@ -38,24 +38,32 @@
* [NRT Indexing Mode - sync](#nrt-indexing-mode-sync)
* [Cluster Setup](#nrt-indexing-cluster-setup)
* [Configuration](#nrt-indexing-config)
- * [Reindexing](#reindexing)
- * [Reducing reindexing times](#reduce-reindexing-times)
- * [How to Abort Reindexing](#abort-reindex)
-
+ * [Diff Indexes](#diff-indexes)
+ * [Warnings](#warnings)
+ * [Customizing OOTB and Fully Custom
Indexes](#customizing-ootb-and-fully-custom-indexes)
+ * [Versioning and Removal](#versioning-and-removal)
+ * [Merge Rules](#merge-rules)
+ * [Top-Level Property Merging](#top-level-property-merging)
+ * [Indexed Property Rules](#indexed-property-rules)
+ * [Merging Details](#merging-details)
+ * [Reindexing](#reindexing)
+ * [Reducing reindexing times](#reduce-reindexing-times)
+ * [How to Abort Reindexing](#abort-reindex)
+
## <a name="overview"></a> Overview
-
-For queries to perform well, Oak supports indexing of content that is stored
in the repository.
+
+For queries to perform well, Oak supports indexing of content that is stored
in the repository.
Indexing works by comparing different versions of the node data
-(technically, "diff" between the base `NodeState` and the modified
`NodeState`).
+(technically, "diff" between the base `NodeState` and the modified
`NodeState`).
The indexing mode defines how comparing is performed, and when the index
content gets updated:
1. Synchronous Indexing
2. Asynchronous Indexing
3. Near Real Time (NRT) Indexing
-Indexing uses [Commit Editors](../architecture/nodestate.html#commit-editors).
-Some of the editors are of type `IndexEditor`, which are responsible for
updating index content
-based on changes in main content.
+Indexing uses [Commit Editors](../architecture/nodestate.html#commit-editors).
+Some of the editors are of type `IndexEditor`, which are responsible for
updating index content
+based on changes in main content.
Currently, Oak has the following built-in editors:
1. PropertyIndexEditor
@@ -71,9 +79,9 @@ Currently, Oak has the following built-in editors:
## <a name="indexing-flow"></a> Indexing Flow
-The `IndexEditor` is invoked as part of a commit (`Session.save()`),
-or as part of the asynchronous "diff" process.
-For both cases, at some stage "diff" is performed between the _before_ and the
_after_ state,
+The `IndexEditor` is invoked as part of a commit (`Session.save()`),
+or as part of the asynchronous "diff" process.
+For both cases, at some stage "diff" is performed between the _before_ and the
_after_ state,
and passed to `IndexUpdate`, which is responsible for invoking the
`IndexEditor`
based on the _discovered_ index definitions.
@@ -81,7 +89,7 @@ based on the _discovered_ index definitions.
Index definitions are nodes of type `oak:QueryIndexDefinition`,
which are stored under a special node named `oak:index`.
-As part of diff traversal, at each level, `IndexUpdate` looks for `oak:index`
nodes.
+As part of diff traversal, at each level, `IndexUpdate` looks for `oak:index`
nodes.
Below is the canonical index definition structure:
/oak:index/indexName
@@ -89,169 +97,169 @@ Below is the canonical index definition structure:
- type (string) mandatory
- async (string) multiple
- reindex (boolean)
-
+
The index definitions nodes have the following properties:
1. `type` - It determines the _type_ of index.
- `IndexUpdate` looks for an `IndexEditor` of the given
- type from the registered `IndexEditorProvider`.
+ `IndexUpdate` looks for an `IndexEditor` of the given
+ type from the registered `IndexEditorProvider`.
For an out-of-the-box Oak setup, it can have one of the following values:
* `reference` - Configured with the out-of-the-box setup
* `counter` - Configured with the out-of-the-box setup
* `property`
* `lucene`
* `solr` (*NOTE:* Solr support has been removed in Oak 1.82 (see
[OAK-11346](https://issues.apache.org/jira/browse/OAK-11346))
-2. `async` - This determines if the index is to be updated synchronously or
asynchronously.
+2. `async` - This determines if the index is to be updated synchronously or
asynchronously.
It can have the following values:
* `sync` - The default value. It indicates that index is meant to be
updated as part of each commit.
- * `nrt` - Indicates that index is a [near real time](#nrt-indexing)
index.
- * `async` - Indicates that index is to be updated asynchronously.
+ * `nrt` - Indicates that index is a [near real time](#nrt-indexing) index.
+ * `async` - Indicates that index is to be updated asynchronously.
In such a case, this value is used to determine
the [indexing lane](#indexing-lane)
- * Any other value which ends in `async`.
-3. `reindex` - If set to `true`, reindexing is performed for that index.
+ * Any other value which ends in `async`.
+3. `reindex` - If set to `true`, reindexing is performed for that index.
After reindexing is done, the property value is set to `false`.
See [reindexing](#reindexing) for more details.
-
-Based on the above two properties, the `IndexUpdate` creates an `IndexEditor`
instances
+
+Based on the above two properties, the `IndexUpdate` creates an `IndexEditor`
instances
as it traverses the "diff", and registers them with itself, passing on the
callbacks for changes.
#### <a name="oak-index-nodes"></a> Index Definition Location
-Indexing logic supports placing `oak:index` nodes at any path.
-Depending on the location, such indexes only index content which are present
under those paths.
+Indexing logic supports placing `oak:index` nodes at any path.
+Depending on the location, such indexes only index content which are present
under those paths.
So, for example if 'oak:index' is present at _'/content/oak:index'_, then
indexes
defined under that node only index repository data present under _'/content'_.
-Depending on the type of the index, one can create these index definitions
under the root path ('/'),
-or non-root paths.
-Currently only `lucene` indexes support creating index definitions at non-root
paths.
+Depending on the type of the index, one can create these index definitions
under the root path ('/'),
+or non-root paths.
+Currently only `lucene` indexes support creating index definitions at non-root
paths.
`property` indexes can only be created under the root path, that is, under '/'.
### <a name="sync-indexing"></a> Synchronous Indexing
-Under synchronous indexing, the index content gets updated as part of the
commit itself.
-Changes to both the main content, as well as the index content, are done
atomically in a single commit.
+Under synchronous indexing, the index content gets updated as part of the
commit itself.
+Changes to both the main content, as well as the index content, are done
atomically in a single commit.
This mode is currently supported by `property` and `reference` indexes.
### <a name="async-indexing"></a> Asynchronous Indexing
-Asynchronous indexing (also called async indexing) is performed using periodic
scheduled jobs.
-As part of the setup, Oak schedules certain periodic jobs which perform
-diff of the repository content, and update the index content based on that.
+Asynchronous indexing (also called async indexing) is performed using periodic
scheduled jobs.
+As part of the setup, Oak schedules certain periodic jobs which perform
+diff of the repository content, and update the index content based on that.
-Each periodic `AsyncIndexUpdate` job is assigned to an [indexing
lane](#indexing-lane),
-and is scheduled to run at a certain interval.
+Each periodic `AsyncIndexUpdate` job is assigned to an [indexing
lane](#indexing-lane),
+and is scheduled to run at a certain interval.
At time of execution, the job performs its work:
-1. Look for the last indexed state via stored checkpoint data.
- If such a checkpoint exists, then read the `NodeState` for that checkpoint.
- If no such state exists, or no such checkpoint is present,
- then it treats it as initial indexing, in which case the base state is
empty.
+1. Look for the last indexed state via stored checkpoint data.
+ If such a checkpoint exists, then read the `NodeState` for that checkpoint.
+ If no such state exists, or no such checkpoint is present,
+ then it treats it as initial indexing, in which case the base state is
empty.
This state is considered the `before` state.
-2. Check if there has been any change in repository from the `before` state.
+2. Check if there has been any change in repository from the `before` state.
If no change is detected then current indexing cycle is considered
completed and
`IndexStatsMBean#done` time is set to current time. `LastIndexedTime` is
not updated
3. Create a checkpoint for _current_ state and refer to this as `after` state.
-4. Create an `IndexUpdate` instance bound to the current _indexing lane_,
+4. Create an `IndexUpdate` instance bound to the current _indexing lane_,
and trigger a diff between the `before` and the `after` state.
-5. `IndexUpdate` will then pick up index definitions that are bound to the
current indexing lane,
- will create `IndexEditor` instances for them,
+5. `IndexUpdate` will then pick up index definitions that are bound to the
current indexing lane,
+ will create `IndexEditor` instances for them,
and pass them the diff callbacks.
-6. The diff traverses in a depth-first manner,
- and at the end of diff, the `IndexEditor` will do final changes for the
current indexing run.
+6. The diff traverses in a depth-first manner,
+ and at the end of diff, the `IndexEditor` will do final changes for the
current indexing run.
Depending on the index implementation, the index data can be either stored
in the NodeStore itself
(for indexes of type `lucene`, `property`, and so on), or in any remote
store (for type `solr`).
-7. `AsyncIndexUpdate` will then update the last indexed checkpoint to the
current checkpoint
- and do a commit.
+7. `AsyncIndexUpdate` will then update the last indexed checkpoint to the
current checkpoint
+ and do a commit.
-Such async indexes are _eventually consistent_ with the repository state,
-and lag behind the latest repository state by some time.
+Such async indexes are _eventually consistent_ with the repository state,
+and lag behind the latest repository state by some time.
However, the index content is eventually consistent, and never ends up in
wrong state with respect
to repository state.
#### <a name="checkpoint"></a> Checkpoint
-A checkpoint is a mechanism, whereby a client of the `NodeStore` can request
Oak to ensure
-that the repository state (snapshot) at that time can be preserved, and not
removed
-by the revision garbage collection process.
-Later, that state can be retrieved from the NodeStore by passing the
checkpoint.
-You can think of a checkpoint as a tag in a git repository, or as a named
revision.
+A checkpoint is a mechanism, whereby a client of the `NodeStore` can request
Oak to ensure
+that the repository state (snapshot) at that time can be preserved, and not
removed
+by the revision garbage collection process.
+Later, that state can be retrieved from the NodeStore by passing the
checkpoint.
+You can think of a checkpoint as a tag in a git repository, or as a named
revision.
-Async indexing makes use of checkpoint support to access older repository
state.
+Async indexing makes use of checkpoint support to access older repository
state.
#### <a name="indexing-lane"></a> Indexing Lane
The term "indexing lane" refers to a set of indexes which are to be updated by
a given async indexer.
-Each index definition meant for async indexing defines an `async` property,
-whose value is the name of the indexing lane.
+Each index definition meant for async indexing defines an `async` property,
+whose value is the name of the indexing lane.
For example, consider following two index definitions:
/oak:index/userIndex
- jcr:primaryType = "oak:QueryIndexDefinition"
- async = "async"
-
+
/oak:index/assetIndex
- jcr:primaryType = "oak:QueryIndexDefinition"
- async = "fulltext-async"
-
-Here, _userIndex_ is bound to the "async" indexing lane,
-while _assetIndex_ is bound to the "fulltext-async" lane.
-Oak [setup](#async-index-setup) configures two `AsyncIndexUpdate` jobs:
+
+Here, _userIndex_ is bound to the "async" indexing lane,
+while _assetIndex_ is bound to the "fulltext-async" lane.
+Oak [setup](#async-index-setup) configures two `AsyncIndexUpdate` jobs:
one for "async", and one for "fulltext-async".
-When the job for "async" is run,
-it only processes index definitions where the `async` value is `async`,
+When the job for "async" is run,
+it only processes index definitions where the `async` value is `async`,
while when the job for "fulltext-async" is run,
it only picks up index definitions where the `async` value is `fulltext-async`.
-These jobs can be scheduled to run at different intervals, and also on
different cluster nodes.
-Each job keeps its own bookkeeping of checkpoint state,
+These jobs can be scheduled to run at different intervals, and also on
different cluster nodes.
+Each job keeps its own bookkeeping of checkpoint state,
and can be [paused and resumed](#async-index-mbean) separately.
-Prior to Oak 1.4, there was only one indexing lane: `async`.
-In Oak 1.4, support was added to create two lanes: `async` and
`fulltext-async`.
-With 1.6, it is possible to [create multiple lanes](#async-index-setup).
+Prior to Oak 1.4, there was only one indexing lane: `async`.
+In Oak 1.4, support was added to create two lanes: `async` and
`fulltext-async`.
+With 1.6, it is possible to [create multiple lanes](#async-index-setup).
#### <a name="cluster"></a> Clustered Setup
-In a clustered setup, one needs to ensure in the host application that
-the async indexing jobs for all lanes are run as singleton in the cluster.
+In a clustered setup, one needs to ensure in the host application that
+the async indexing jobs for all lanes are run as singleton in the cluster.
If `AsyncIndexUpdate` for the same lane is executed concurrently on different
cluster nodes,
-it leads to race conditions, where an old checkpoint gets lost,
+it leads to race conditions, where an old checkpoint gets lost,
leading to reindexing.
-See also [clustering](../clustering.html#scheduled-jobs)
+See also [clustering](../clustering.html#scheduled-jobs)
for more details on how the host application should schedule such indexing
jobs.
##### <a name="async-index-lease"></a> Indexing Lease
-`AsyncIndexUpdate` has an in-built "lease" logic to ensure that
-even if the jobs gets scheduled to run on different cluster nodes, only one of
them runs.
-This is done by keeping a lease property, which gets periodically updated as
-indexing progresses.
+`AsyncIndexUpdate` has an in-built "lease" logic to ensure that
+even if the jobs gets scheduled to run on different cluster nodes, only one of
them runs.
+This is done by keeping a lease property, which gets periodically updated as
+indexing progresses.
An `AsyncIndexUpdate` run skips indexing if the current lease has not expired.
-If the last update of the lease was done too long ago (default: more than 15
minutes),
-it is assumed that cluster node that is supposed to index is not available,
+If the last update of the lease was done too long ago (default: more than 15
minutes),
+it is assumed that cluster node that is supposed to index is not available,
and some other node will take over.
-The lease logic can delay the start of indexing if the system is not stopped
cleanly.
+The lease logic can delay the start of indexing if the system is not stopped
cleanly.
As of Oak 1.6, this does not affect non-clustered setups like those based on
SegmentNodeStore,
but only [affects DocumentNodeStore][OAK-5159] based setups.
#### <a name="async-index-lag"></a> Indexing Lag
-Async indexing jobs are by default configured to run at an interval of 5
seconds.
-Depending on the system load and diff size of content to be indexed,
-the indexing may start lagging by a longer time interval.
-Due to this, the indexing results can lag behind the repository state,
+Async indexing jobs are by default configured to run at an interval of 5
seconds.
+Depending on the system load and diff size of content to be indexed,
+the indexing may start lagging by a longer time interval.
+Due to this, the indexing results can lag behind the repository state,
and may become stale, that means new content added will only show up in query
results after a longer time.
-The `IndexStats` MBean keeps a time series and metrics stats for the indexing
frequency.
+The `IndexStats` MBean keeps a time series and metrics stats for the indexing
frequency.
This can be used to track the indexing state.
-[NRT Indexing](#nrt-indexing) introduced in Oak 1.6 helps in such situations,
+[NRT Indexing](#nrt-indexing) introduced in Oak 1.6 helps in such situations,
and can keep the results more up to date.
#### <a name="async-index-setup"></a> Setup
@@ -262,12 +270,12 @@ Async indexers can be configured via the OSGi config for
`org.apache.jackrabbit.

-Different lanes can be configured by adding more rows of _Async Indexer
Configs_.
+Different lanes can be configured by adding more rows of _Async Indexer
Configs_.
Prior to 1.6, the indexers were created programmatically while constructing
Oak.
#### <a name="async-index-mbean"></a> Async Indexing MBean
-For each configured async indexer in the setup, the indexer exposes a
`IndexStatsMBean`,
+For each configured async indexer in the setup, the indexer exposes a
`IndexStatsMBean`,
which provides various stats around the current indexing state:
org.apache.jackrabbit.oak: async (IndexStats)
@@ -278,14 +286,14 @@ It provides the following details:
* FailingIndexStats - Stats around indexes which are [failing and marked as
corrupt](#corrupt-index-handling).
* LastIndexedTime - Time up to which the repository state has been indexed.
* Status - running, done, failing etc.
-* Failing - boolean flag indicating that indexing has been failing due to some
issue.
+* Failing - boolean flag indicating that indexing has been failing due to some
issue.
This can be monitored for detecting if indexer is healthy or not.
* ExecutionCount - Time series data around the number of runs for various time
intervals.
Further it provides the following operations:
* pause - Pauses the indexer.
-* abortAndPause - Aborts any running indexing cycle and pauses the indexer.
+* abortAndPause - Aborts any running indexing cycle and pauses the indexer.
Invoke 'resume' once you are ready to resume indexing again.
* resume - Resume indexing.
@@ -294,32 +302,32 @@ Further it provides the following operations:
`Since 1.6`
The `AsyncIndexerService` marks any index which fails to update for 30 minutes
-(configurable) as `corrupt`, and ignore such indexes from further indexing.
+(configurable) as `corrupt`, and ignore such indexes from further indexing.
When any index is marked as corrupt, the following log entry is made:
- 2016-11-22 12:52:35,484 INFO NA [async-index-update-fulltext-async]
o.a.j.o.p.i.AsyncIndexUpdate -
- Marking [/oak:index/lucene] as corrupt. The index is failing since Tue Nov
22 12:51:25 IST 2016,
- 1 indexing cycles, failed 7 times, skipped 0 time
+ 2016-11-22 12:52:35,484 INFO NA [async-index-update-fulltext-async]
o.a.j.o.p.i.AsyncIndexUpdate -
+ Marking [/oak:index/lucene] as corrupt. The index is failing since Tue Nov
22 12:51:25 IST 2016,
+ 1 indexing cycles, failed 7 times, skipped 0 time
-Post this, when any new content gets indexed and any such corrupt index is
skipped,
+Post this, when any new content gets indexed and any such corrupt index is
skipped,
the following warn entry is made:
- 2016-11-22 12:52:35,485 WARN NA [async-index-update-fulltext-async]
o.a.j.o.p.index.IndexUpdate -
- Ignoring corrupt index [/oak:index/lucene] which has been marked as
corrupt since
- [2016-11-22T12:51:25.492+05:30]. This index MUST be reindexed for indexing
to work properly
-
+ 2016-11-22 12:52:35,485 WARN NA [async-index-update-fulltext-async]
o.a.j.o.p.index.IndexUpdate -
+ Ignoring corrupt index [/oak:index/lucene] which has been marked as
corrupt since
+ [2016-11-22T12:51:25.492+05:30]. This index MUST be reindexed for indexing
to work properly
+
This info is also seen in the MBean

-
+
Later, once the index is reindexed, the following log entry is made
- 2016-11-22 12:56:25,486 INFO NA [async-index-update-fulltext-async]
o.a.j.o.p.index.IndexUpdate -
- Removing corrupt flag from index [/oak:index/lucene] which has been marked
as corrupt since
- [corrupt = 2016-11-22T12:51:25.492+05:30]
+ 2016-11-22 12:56:25,486 INFO NA [async-index-update-fulltext-async]
o.a.j.o.p.index.IndexUpdate -
+ Removing corrupt flag from index [/oak:index/lucene] which has been marked
as corrupt since
+ [corrupt = 2016-11-22T12:51:25.492+05:30]
-This feature can be disabled by setting `failingIndexTimeoutSeconds` to 0 in
the `AsyncIndexService` config.
+This feature can be disabled by setting `failingIndexTimeoutSeconds` to 0 in
the `AsyncIndexService` config.
See also [OAK-4939][OAK-4939] for more details.
### <a name="nrt-indexing"></a> Near Real Time Indexing
@@ -328,16 +336,16 @@ See also [OAK-4939][OAK-4939] for more details.
_This mode is only supported for `lucene` indexes_
-Lucene indexes perform well for evaluating complex queries,
-and have the benefit of being evaluated locally with copy-on-read support.
+Lucene indexes perform well for evaluating complex queries,
+and have the benefit of being evaluated locally with copy-on-read support.
However, they are `async`, and depending on system load can lag behind the
repository state.
-For cases where such lag (which can be in the order of minutes) is not
acceptable,
-one must use `property` indexes.
+For cases where such lag (which can be in the order of minutes) is not
acceptable,
+one must use `property` indexes.
To avoid that, Oak 1.6 has [added support for near real time
indexing][OAK-4412]

-In this mode, the indexing happen in two modes, and a query will consult
multiple indexes.
+In this mode, the indexing happen in two modes, and a query will consult
multiple indexes.
The diagram above shows the indexing flow with time. In the above flow:
* T1, T3 and T5 - Time instances at which checkpoints are created.
@@ -348,26 +356,26 @@ The diagram above shows the indexing flow with time. In
the above flow:
* Local Index:
* NRT1 - Local index, which has repository state indexed between T2 and T4.
* NRT2 - Local index, which has repository state indexed between T4 and T6.
-
-As the repository state changes with time, the Async indexer will run and
index the
-changes between the last known checkpoint and current state when that run
started.
+
+As the repository state changes with time, the Async indexer will run and
index the
+changes between the last known checkpoint and current state when that run
started.
So when async run 1 completed, the persisted index has the repository state
indexed up to T3.
-Now without NRT index support, if any query is performed between T2 and T4,
-it can only see index results for the repository state at T1,
-as that is the state where the persisted indexes have data for.
-Any change after that cannot be seen until the next async indexing cycle is
complete (at T4).
+Now without NRT index support, if any query is performed between T2 and T4,
+it can only see index results for the repository state at T1,
+as that is the state where the persisted indexes have data for.
+Any change after that cannot be seen until the next async indexing cycle is
complete (at T4).
With NRT indexing support, indexing will happen at two places:
-* Persisted Index - This is the index which is updated via the async indexer
run.
+* Persisted Index - This is the index which is updated via the async indexer
run.
This flow remains the same, it will be periodically updated by the indexer
run.
-* Local Index - In addition to persisted index, each cluster node will also
maintain a local index.
- This index only keeps data between two async indexer runs.
- Post each run, the previous index is discarded, and a new index is built
+* Local Index - In addition to persisted index, each cluster node will also
maintain a local index.
+ This index only keeps data between two async indexer runs.
+ Post each run, the previous index is discarded, and a new index is built
(actually, the previous index is retained for one cycle).
-
-Any query making use of such an index will automatically make use of both the
persisted and the local indexes.
+
+Any query making use of such an index will automatically make use of both the
persisted and the local indexes.
With this, new content added in the repository after the last async index run
will also show up quickly.
#### <a name="nrt-indexing-usage"></a> Usage
@@ -377,7 +385,7 @@ NRT (Near real time) indexing can be enabled for an index
by configuring the `as
/oak:index/assetIndex
- jcr:primaryType = "oak:QueryIndexDefinition"
- async = ['fulltext-async', 'nrt']
-
+
Here, `async` has been set to a multi-valued property, with the
* Indexing lane - For example `async` or `fulltext-async`,
@@ -385,8 +393,8 @@ Here, `async` has been set to a multi-valued property, with
the
##### <a name="nrt-indexing-mode-nrt"></a> NRT Indexing Mode - nrt
-In this mode, the local index is updated asynchronously on that cluster nodes
post each commit,
-and the index reader is refreshed each second.
+In this mode, the local index is updated asynchronously on that cluster nodes
post each commit,
+and the index reader is refreshed each second.
So, any change done should show up on that cluster node within 1 to 2 seconds.
/oak:index/userIndex
@@ -396,36 +404,204 @@ So, any change done should show up on that cluster node
within 1 to 2 seconds.
##### <a name="nrt-indexing-mode-sync"></a> NRT Indexing Mode - sync
In this mode, the local index is updated synchronously on that cluster nodes
post each commit,
-and the index reader is refreshed immediately.
+and the index reader is refreshed immediately.
This mode indexes more slowly compared to the "nrt" mode.
/oak:index/userIndex
- jcr:primaryType = "oak:QueryIndexDefinition"
- async = ['async', 'sync']
-
-For a single node setup (for example with the `SegmentNodeStore`),
-this mode effectively makes async lucene index perform same as synchronous
property indexes.
+
+For a single node setup (for example with the `SegmentNodeStore`),
+this mode effectively makes async lucene index perform same as synchronous
property indexes.
However, the 'nrt' mode performs better, so using that is preferable.
#### <a name="nrt-indexing-cluster-setup"></a> Cluster Setup
In cluster setup, each cluster node maintains its own local index for changes
happening in that cluster node.
-In addition to that, it also indexes changes from other cluster nodes by
relying on
-[Oak observation for external changes][OAK-4808].
-This depends on how frequently external changes are delivered.
-Due to this, even with NRT indexing changes from other cluster nodes will take
some more time
+In addition to that, it also indexes changes from other cluster nodes by
relying on
+[Oak observation for external changes][OAK-4808].
+This depends on how frequently external changes are delivered.
+Due to this, even with NRT indexing changes from other cluster nodes will take
some more time
to be reflected in query results compared to local changes.
#### <a name="nrt-indexing-config"></a> Configuration
NRT indexing expose a few configuration options as part of the
[LuceneIndexProviderService](lucene.html#osgi-config):
-* `enableHybridIndexing` - Boolean property, defaults to `true`.
+* `enableHybridIndexing` - Boolean property, defaults to `true`.
Can be set to `false` to disable the NRT indexing feature completely.
-* `hybridQueueSize` - The size of the in-memory queue used
- to hold Lucene documents for indexing in the `nrt` mode.
+* `hybridQueueSize` - The size of the in-memory queue used
+ to hold Lucene documents for indexing in the `nrt` mode.
The default size is 10000.
+## <a name="diff-indexes"></a> Diff Indexes
+
+Note: the following section only applies with Oak version 1.92 and newer
(OAK-12010).
+
+A diff index is a special node under `/oak:index` named `diff.index`.
+It contains differences (a "diff") to existing index definitions,
+and possibly new (custom) index definitions, in the form of JSON.
+These diffs are automatically merged with the out-of-the-box (OOTB) index
definitions,
+creating new versioned index nodes (e.g. `damAssetLucene-8-custom-1`).
+This simplifies index management by allowing modifications to indexes
+without directly editing the OOTB index definitions.
+
+The diff index consists of:
+
+ /oak:index/diff.index
+ - type: disabled
+ - jcr:primaryType: oak:QueryIndexDefinition
+ + diff.json (nt:file)
+
+An example diff.json file is an empty JSON object: `{}`.
+See below for more examples.
+
+To build a JSON index definition for a query, you may want to use the existing
+[online tooling](https://oak-indexing.github.io/oakTools/).
+
+### Warnings
+
+If there are errors when trying to parse the JSON, or when merging,
+these errors are written to the `diff.index` node in the form of
+`warn.01`, `warn.02` etc. properties.
+(After storing the new diff, you might need to refresh the node to see these
warnings.)
+
+Merge warnings consist of ignored properties, unsupported paths, checksum
mismatches, etc.
+They are limited to 100 entries or 1 MB total.
+The warning properties are automatically removed if the warnings are resolved
+(there is no need to remove them manually).
+
+Example: if a `diff.json` with a typo is stored, a `warn.01` property
+is added to the `diff.index` node.
+
+### Customizing OOTB and Fully Custom Indexes
+
+* An index name containing a dot (e.g. `acme.myIndex`) is treated as a
+ fully custom index (not based on an OOTB product index).
+ It gets a new node with version, eg. `-1-custom-1`.
+ Later changes increment the version: `-custom-2`, `-custom-3`, etc.
+* For fully custom indexes, the diff must contain
+ the complete index definition (there is no product base to merge with).
+
+Example: if there is an existing index under `/oak:index/damAssetsLucene-10`,
then the following `diff.json` will
+customize it, and create a new merged index
`/oak:index/damAssetsLucene-10-custom-1` if merging is successful.
+It will also add a new fully custom index named
`/oak:index/acme.myIndex-1-custom-1`.
+
+ {
+ "damAssetsLucene": {
+ "indexRules": {
+ "dam:Asset": {
+ "properties": {
+ "newProperty": {
+ "name": "newProperty",
+ "propertyIndex": true
+ }
+ }
+ }
+ }
+ },
+ "acme.myIndex": {
+ "async": [ "async", "nrt" ],
+ "compatVersion": 2,
+ "evaluatePathRestrictions": true,
+ "includedPaths": [ "/content" ],
+ "queryPaths": [ "/content" ],
+ "selectionPolicy": "tag",
+ "tags": [ "acme" ],
+ "type": "lucene",
+ "indexRules": {
+ "acme:Item": {
+ "properties": {
+ "myTitle": {
+ "name": "myTitle",
+ "ordered": true,
+ "propertyIndex": true
+ }
+ }
+ }
+ }
+ }
+ }
+
+### Versioning and Removal
+
+* Each merged index gets a name like
`<baseName>-<productVersion>-custom-<customerVersion>`.
+ When a new merge produces a different result, the customer version is
incremented.
+* A `mergeInfo` and `mergeChecksum` property are added to every merged index.
+* Old versions of the same base index are removed
+ after the new version is created.
+* If a previously merged index (one with a `mergeInfo` property)
+ is no longer referenced in the diff, the merged index is removed.
+
+### Merge Rules
+
+The following rules apply when merging a diff with an OOTB index:
+
+#### Top-Level Property Merging
+
+The following rules apply to the properties at the top level (properties of
the index):
+
+* `includedPaths`, `queryPaths`, and `tags` are merged (union of old and new
values) rather than overwritten,
+ if the property already exists.
+ The properties `includedPaths` and `queryPaths` may not be added if they
don't exist yet
+ (to avoid making the index more restrictive).
+* In addition to the above, the following properties may also
+ not be added to an existing (OOTB) index:
+ `selectionPolicy`, `valueRegex`, `queryFilterRegex`, `excludedPaths`.
+ They are dropped with a warning.
+* Other new properties that don't exist on the target are added.
+* Properties that already exist may not be overwritten.
+
+Example:
+
+ {
+ "acmeAssetsLucene": {
+ "tags": [ "additionalTag" ],
+ "indexRules": {
+ "acme:Asset": {
+ "properties": {
+ "newProperty": {
+ "name": "newProperty",
+ "propertyIndex": true
+ }
+ }
+ }
+ }
+ }
+ }
+
+#### Indexed Property Rules
+
+* When adding a new child under a `properties` node,
+ the merger first tries to **match by `name`** value:
+ if an existing child has the same `name`, the diff is applied to that child
+ (even if the child node name differs).
+* Similarly, it tries to **match by `function`** value for function-based
index properties.
+* If no match is found, a new child node is created.
+* The following properties may not be added to an existing child node,
+ and may not be updated: `isRegexp`, `index`, `function`, `name`.
+ This prevents changing how an existing property is indexed.
+* Existing property values may not be overwritten, except for `boost` and
`weight`.
+
+#### Merging Details
+
+In addition, the following rules are used when merging.
+In most cases, they are irrelevant, but listed here for completeness.
+
+* Before comparing indexes, a cleaned and normalized version is produced:
+ properties like `reindex`, `reindexCount`, `refresh`, `seed`, `:version`,
`:nameSeed`,
+ `:mappingVersion`, `merges`, `mergeInfo`, and `mergeChecksum` are stripped.
+* Oak-style string prefixes (`str:`, `nam:`, `dat:`) are normalized away for
comparison.
+* The `jcr:primaryType` property is silently ignored at the top level.
+* Technically, rules in the "Indexed Property Rules" also
+ apply to other child nodes, however in practice these
+ properties are only useful in the `properties` child node.
+* All `jcr:uuid` properties are removed (new UUIDs are generated when needed).
+* Property order is ignored during comparison (properties are sorted
alphabetically),
+ but child node order is significant.
+* A SHA-256 checksum is computed on the merged definition
+ to detect changes efficiently across runs.
+
## <a name="superseding"></a> Superseding an Index
This helps in replacing one index with another. Suppose we have the following
indices:
@@ -443,16 +619,16 @@ result, the superseded index `sampleIndex1` would be
disabled (by setting type=d
## <a name="reindexing"></a> Reindexing
-Reindexing rarely solves problems.
-Specially, it does not typically make queries return the expected result.
+Reindexing rarely solves problems.
+Specially, it does not typically make queries return the expected result.
For such cases, it is _not_ recommended to reindex,
-also because reindex can be very slow (sometimes multiple days),
+also because reindex can be very slow (sometimes multiple days),
and use a lot of temporary disk space.
Note that removing checkpoints, and removing the hidden `:async` node
will cause a full reindex, so doing this is not recommended either.
If queries don't return the right data, then possibly the index is [not yet
up-to-date][OAK-5159],
-or the query is incorrect, or included/excluded path settings are wrong (for
Lucene indexes).
-Instead of reindexing, it is suggested to first check the log file,
+or the query is incorrect, or included/excluded path settings are wrong (for
Lucene indexes).
+Instead of reindexing, it is suggested to first check the log file,
modify the query so it uses a different index or traversal, and run the query
again.
Reindexing of existing indexes is required in the following scenarios:
@@ -464,7 +640,7 @@ Reindexing of existing indexes is required in the following
scenarios:
* B: Prior to Oak 1.6, in case a _Lucene_ index definition was changed (same
as A).
In Oak 1.6 and newer, queries will use the old index definition
until the index is [reindexed](lucene.html#stored-index-definition).
-* C: Prior to Oak 1.2.15 / 1.4.2, in case the query engine picks a very slow
index
+* C: Prior to Oak 1.2.15 / 1.4.2, in case the query engine picks a very slow
index
for some queries because the counter index (`/oak:index/counter`)
[got out of sync after adding and removing lots of nodes many
times][OAK-4065].
For this case, it is recommended to verify the contents of the counter index
first,
@@ -477,20 +653,20 @@ Reindexing of existing indexes is required in the
following scenarios:
The workaround (to avoid reindexing) is to manually tweak index
configurations
using manually set `entryCount` of the index that should be used to a low
value
(as high as possible so that the index is still needed), for example to 100
or 1000.
-* D: In case a binary of a Lucene index (a Lucene index file) is missing,
+* D: In case a binary of a Lucene index (a Lucene index file) is missing,
for example because the binary is not available in the datastore.
This can happen in case the datastore is misconfigured
such that garbage collection removed a binary that is still required.
In such cases, other binaries might be missing as well;
it is best to traverse all nodes of the repository to ensure this is not the
case.
* E: In case a binary of a Lucene index (a Lucene index file) is corrupt.
- If the index is corrupt, an `AsyncIndexUpdate` run will fail
+ If the index is corrupt, an `AsyncIndexUpdate` run will fail
with an exception saying a Lucene index file is corrupt.
In such a case, first verify that the following procedure doesn't resolve
the issue: stop Oak, remove the local copy of the Lucene index (directory
`index`),
and restart. If the index is still corrupt after this, then reindexing is
needed.
In such cases, please file an Oak issue.
-* F: Prior to Oak 1.2.24 / 1.4.13 / 1.6.1,
+* F: Prior to Oak 1.2.24 / 1.4.13 / 1.6.1,
when using the document store (MongoDB or RDBMK)
in combination with a large transaction (a commit that changed or added many
thousand nodes),
and if one of the parent nodes had more than 100 child nodes,
@@ -502,12 +678,12 @@ Reindexing of existing indexes is required in the
following scenarios:
See also [OAK-4684][OAK-4684].
* H: If a binary is missing after reindexing.
This can happen in the following case:
- When reindexing or creating a new index takes multiple days,
+ When reindexing or creating a new index takes multiple days,
and during that time, after one day or later, datastore garbage collection
was run concurrently.
- Some binaries created during by reindexing can get missing because
+ Some binaries created during by reindexing can get missing because
datastore garbage collection removes unreferenced binaries older than one
day.
- Indexing or reindexing using oak-run is not affected by this.
-* I: Prior to Oak 1.0.27 / 1.2.11,
+ Indexing or reindexing using oak-run is not affected by this.
+* I: Prior to Oak 1.0.27 / 1.2.11,
if an index file gets larger than 2 GB, then possibly the index can not be
opened
(exception "Invalid seek request"), and subsequently the index might get
corrupt.
See also [OAK-3911][OAK-3911].
@@ -517,26 +693,26 @@ To reindex an _existing_ index (when needed), set the
`reindex` property to `tru
/oak:index/userIndex
- reindex = true
-
+
Once changes are saved, the index is reindexed.
For asynchronous indexes, reindex starts with the next async indexing cycle.
For synchronous indexes, the reindexing is done as part of save (or commit)
itself.
-For a (synchronous) property index,
-as an alternative you can use the `PropertyIndexAsyncReindexMBean`;
+For a (synchronous) property index,
+as an alternative you can use the `PropertyIndexAsyncReindexMBean`;
see the [reindeinxing property indexes](property-index.html#reindexing)
section for more details on that.
Once reindexing starts, the following log entries can be seen in the log:
[async-index-update-async] o.a.j.o.p.i.IndexUpdate Reindexing will be
performed for following indexes: [/oak:index/userIndex]
- [async-index-update-async] o.a.j.o.p.i.IndexUpdate Reindexing Traversed
#100000 /home/user/admin
- [async-index-update-async] o.a.j.o.p.i.AsyncIndexUpdate [async] Reindexing
completed for indexes: [/oak:index/userIndex*(4407016)] in 30 min
+ [async-index-update-async] o.a.j.o.p.i.IndexUpdate Reindexing Traversed
#100000 /home/user/admin
+ [async-index-update-async] o.a.j.o.p.i.AsyncIndexUpdate [async] Reindexing
completed for indexes: [/oak:index/userIndex*(4407016)] in 30 min
Once reindexing is complete, the `reindex` flag is set to `false`
automatically.
### <a name="reduce-reindexing-times"></a> Reducing Reindexing Times
-If the index being reindexed has full text extraction configured then
reindexing can take long time as most of the
-time is spent in text extraction.
+If the index being reindexed has full text extraction configured then
reindexing can take long time as most of the
+time is spent in text extraction.
For such cases it's recommended to use text [pre-extraction
support](pre-extract-text.html).
The text pre-extraction can be done before starting the actual reindexing.
This would then ensure that during reindexing
time is not spent in performing text extraction and hence the actual time
taken for reindexing such an index gets reduced
@@ -546,13 +722,13 @@ considerably.
Building an index can be slow. It can be aborted (stopped before it is
finished),
for example if you detect there is an error in the index definition.
-Reindexing and building a new index can be aborted
+Reindexing and building a new index can be aborted
when using asynchronous indexes.
For synchronous indexes, it can be stopped if it was started using the
`PropertyIndexAsyncReindexMBean`.
-To do this, use the respective `IndexStats` JMX bean
-(for example, `async`, `fulltext-async`, or `async-reindex`),
+To do this, use the respective `IndexStats` JMX bean
+(for example, `async`, `fulltext-async`, or `async-reindex`),
and call the operation `abortAndPause()`.
-Then, either set the `reindex` flag to `false` (for an existing index),
+Then, either set the `reindex` flag to `false` (for an existing index),
remove the index definition (for a new index),
or change the index type to `disabled`. Store the change. Finally, call the
operation `resume()`
so that regular indexing operations can continue.
@@ -566,4 +742,4 @@ so that regular indexing operations can continue.
[OAK-5159]: https://issues.apache.org/jira/browse/OAK-5159
[OAK-5557]: https://issues.apache.org/jira/browse/OAK-5557
-
+