This is an automated email from the ASF dual-hosted git repository. thomasm pushed a commit to branch OAK-11735 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit ce67c68578da8e33d1a3e1e4e61e533dd62e2361 Author: Thomas Mueller <thom...@apache.org> AuthorDate: Thu May 22 08:30:23 2025 +0200 OAK-11735 Index merge: merge aggregation definitions --- .../oak/index/merge/IndexDefMergerUtils.java | 80 ++++++++++++++++ .../index/merge/IndexDefMergerScenariosTest.java | 1 + .../oak/index/merge/merge-aggregates.json | 103 +++++++++++++++++++++ 3 files changed, 184 insertions(+) diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerUtils.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerUtils.java index 39ed699b76..b699e9d06e 100644 --- a/oak-run/src/main/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerUtils.java +++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerUtils.java @@ -28,6 +28,8 @@ import java.util.List; import java.util.Map.Entry; import java.util.Objects; import java.util.TreeSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import org.apache.jackrabbit.oak.commons.json.JsonObject; @@ -270,6 +272,9 @@ public class IndexDefMergerUtils { private static JsonObject mergeChild(String path, String child, int level, JsonObject ancestor, JsonObject custom, JsonObject product, ArrayList<String> conflicts) { + if (level == 1 && path.indexOf("/aggregates/") >= 0) { + return mergeAggregates(path, child, level, ancestor, custom, product, conflicts); + } JsonObject a = ancestor.getChildren().get(child); JsonObject c = custom.getChildren().get(child); JsonObject p = product.getChildren().get(child); @@ -287,6 +292,81 @@ public class IndexDefMergerUtils { } } + private static JsonObject mergeAggregates(String path, String child, int level, JsonObject ancestor, JsonObject custom, JsonObject product, + ArrayList<String> conflicts) { + + // merge, with level + 1 so that we don't recurse into this function again + // conflicts are redirected to a new, temporary list + ArrayList<String> aggregateConflicts = new ArrayList<>(); + JsonObject merged = mergeChild(path, child, level + 1, ancestor, custom, product, aggregateConflicts); + + // if there were conflicts, resolve them + if (!aggregateConflicts.isEmpty()) { + + // list of "include" elements to move to the end + ArrayList<JsonObject> elementToMove = new ArrayList<>(); + + // which is the next id for "include" (eg. 12) + long nextIncludeId = getNextIncludeId(ancestor.getChildren().get(child)); + nextIncludeId = Math.max(nextIncludeId, getNextIncludeId(custom.getChildren().get(child))); + nextIncludeId = Math.max(nextIncludeId, getNextIncludeId(product.getChildren().get(child))); + + // loop over conflicts, and find + remove these + // the aggregateConflicts will contain entries that look like this: + // "Could not merge value; path=/oak:index/assets-11/aggregates/asset/include11 + // property=path; ancestor=null; custom=...; product=..." + // and we need to extract the path + for (String n : aggregateConflicts) { + String regex = "path=([^\\s]+)\\sproperty="; + Pattern pattern = Pattern.compile(regex); + Matcher matcher = pattern.matcher(n); + if (matcher.find()) { + // the path of the conflicting aggregation node + String extractedPath = matcher.group(1); + String[] elements = extractedPath.split("/"); + String conflictElement = elements[elements.length - 1]; + + // remove from the custom list + JsonObject conflict = custom.getChildren().get(child).getChildren().remove(conflictElement); + + // remember the element, to put it back later + elementToMove.add(conflict); + } + } + + // merge again, with conflicts resolved now + // (if there are other conflicts unrelated to aggregation, + // those will not be resolved) + merged = mergeChild(path, child, level + 1, ancestor, custom, product, conflicts); + + // add the aggregation conflict at the end, with new ids + // first we need to clone the merged object, + // because it might be the same object as the product currently + merged = JsonObject.fromJson(merged.toString(), true); + for (JsonObject json : elementToMove) { + merged.getChildren().put("include" + nextIncludeId, json); + nextIncludeId++; + } + } + return merged; + } + + private static long getNextIncludeId(JsonObject json) { + long max = 0; + for(String n : json.getChildren().keySet()) { + if (n.startsWith("include")) { + n = n.substring("include".length()); + try { + long id = Long.parseLong(n); + max = Math.max(max, id); + } catch (NumberFormatException e) { + // ignore + } + } + } + return max + 1; + } + private static boolean isSameJson(JsonObject a, JsonObject b) { if (a == null || b == null) { return a == null && b == null; diff --git a/oak-run/src/test/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerScenariosTest.java b/oak-run/src/test/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerScenariosTest.java index 961f9b07d4..ac11a2aa00 100644 --- a/oak-run/src/test/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerScenariosTest.java +++ b/oak-run/src/test/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerScenariosTest.java @@ -48,6 +48,7 @@ public class IndexDefMergerScenariosTest extends ParameterizedMergingTestBase { public static Collection<Object[]> data() { return Arrays.asList(new Object[][] { testCase("should merge tags fully; and override type", "merge-override-tags-type.json"), + testCase("should merge aggregates", "merge-aggregates.json"), testCase("should merge custom into new base index", "basic.json"), testCase("should use the latest base version for the base in merges", "merges-base.json"), testCase( diff --git a/oak-run/src/test/resources/org/apache/jackrabbit/oak/index/merge/merge-aggregates.json b/oak-run/src/test/resources/org/apache/jackrabbit/oak/index/merge/merge-aggregates.json new file mode 100644 index 0000000000..e0a8780bb3 --- /dev/null +++ b/oak-run/src/test/resources/org/apache/jackrabbit/oak/index/merge/merge-aggregates.json @@ -0,0 +1,103 @@ +{ + "build": { + "/oak:index/lucene-2": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + ":version": 2, + "tags": ["similarity", "asset", "fragments"], + "type": "elasticsearch", + "async": "elastic-async", + "reindex": false, + "reindexCount": 1, + "aggregates": { + "asset": { + "include0": { + "path": "a" + }, + "include1": { + "path": "b" + } + } + } + } + }, + + "run": { + "/oak:index/lucene-1": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + ":version": 2, + "tags": ["similarity", "asset"], + "type": "disabled", + "async": ["async", "nrt"], + "reindex": false, + "reindexCount": 1, + "aggregates": { + "asset": { + "include0": { + "path": "a" + } + } + } + }, + "/oak:index/lucene-1-custom-1": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + ":version": 2, + "tags": "custom", + "type": "lucene", + "async": "async", + "reindex": false, + "reindexCount": 1, + "aggregates": { + "asset": { + "include0": { + "path": "a" + }, + "include1": { + "path": "xzy" + } + } + } + } + }, + + "expected": { + "/oak:index/lucene-2": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + ":version": 2, + "tags": ["similarity", "asset", "fragments"], + "type": "elasticsearch", + "async": "elastic-async", + "reindex": false, + "reindexCount": 1, + "aggregates": { + "asset": { + "include0": { + "path": "a" + }, + "include1": { + "path": "b" + } + } + } + }, + "/oak:index/lucene-2-custom-1": { + "jcr:primaryType": "nam:oak:QueryIndexDefinition", + "tags": ["asset", "custom", "fragments", "similarity"], + "type": "elasticsearch", + "async": "elastic-async", + "merges": ["/oak:index/lucene-2", "/oak:index/lucene-1-custom-1"], + "aggregates": { + "asset": { + "include0": { + "path": "a" + }, + "include1": { + "path": "b" + }, + "include2": { + "path": "xzy" + } + } + } + } + } +}