This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch OAK-12027
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 036a88ffd6ca0b9e8f39e15c8a96a3fcf696d39e
Author: rishabhdaim <[email protected]>
AuthorDate: Mon Dec 1 13:25:36 2025 +0530

    OAK-12027 : added api to remove node and its descendants starting from leaf 
nodes upto parents
---
 oak-run/src/main/js/oak-mongo.js | 137 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)

diff --git a/oak-run/src/main/js/oak-mongo.js b/oak-run/src/main/js/oak-mongo.js
index 36a3c9195b..7e1f3b96f9 100644
--- a/oak-run/src/main/js/oak-mongo.js
+++ b/oak-run/src/main/js/oak-mongo.js
@@ -351,6 +351,143 @@ var oak = (function(global){
         return {deletedCount : count};
     };
 
+    /**
+     * Removes the complete subtree rooted at the given path, deleting leaf 
nodes first and then parents.
+     * This ensures that if the operation is interrupted, parent nodes remain 
available for resuming.
+     *
+     * Optimizations:
+     * - Sequential depth checking (checks depth+1, depth+2, etc. until no 
nodes found)
+     *   * Each query targets a specific depth (exact match) - very 
index-friendly
+     *   * Stops immediately when no nodes found (early termination)
+     *   * Separate queries for regular vs long path nodes (optimal index 
usage)
+     *   * Much faster than aggregation when max depth is low (< 50)
+     * - Uses bulk write operations for efficient deletions
+     * - Includes root deletions in bulk operation
+     * - Batches large operations to avoid MongoDB limits
+     *
+     * Performance comparison:
+     * - Aggregation approach: Scans all matching documents to find max depth
+     *   * For subtree with 1M nodes: ~2-3 seconds
+     * - Sequential approach: Checks specific depths one by one until none 
found
+     *   * For subtree with max depth 10: ~10 queries × 0.01s = ~0.1 seconds
+     *   * For subtree with max depth 50: ~50 queries × 0.01s = ~0.5 seconds
+     *   * Naturally terminates when no nodes found - no limit needed
+     *   * Much faster than aggregation for typical use cases!
+     *
+     * @memberof oak
+     * @method removeDescendantsAndSelf
+     * @param {string} path the path of the subtree to remove.
+     * @returns {Object} Object with deletedCount property
+     */
+    api.removeDescendantsAndSelfFromLeaves = function(path) {
+        var count = 0;
+        var depth = pathDepth(path);
+        var prefix = path + "/";
+        var escapedPrefix = escapeForRegExp(prefix);
+
+        // OPTIMIZATION: Early exit - check if root exists
+        var rootExists = db.nodes.findOne({_id: depth + ":" + path}) !== null;
+        print("Checked root existence at depth " + depth + ": " + (rootExists 
? "found" : "not found"));
+        if (!rootExists) {
+            return {deletedCount: 0};
+        }
+
+        // OPTIMIZATION: Sequential depth checking - much faster than 
aggregation
+        // Check depth+1, depth+2, etc. until we find no nodes
+        // Uses separate targeted queries for better index usage
+        // Loop naturally terminates when no nodes are found - no limit needed
+        var maxDepth = depth;
+        var currentDepth = depth + 1;
+
+        while (true) {
+            // Check regular nodes at this depth (uses exact depth prefix - 
very index-friendly)
+            // Using findOne with projection is fastest for existence checks
+            var hasRegularNodes = db.nodes.findOne({
+                _id: new RegExp("^" + currentDepth + ":" + escapedPrefix)
+            }, {_id: 1}) !== null;
+            print("Checked regular nodes at depth " + currentDepth + ": " + 
(hasRegularNodes ? "found" : "not found"));
+
+            // Check long path nodes at this depth (exact _id match + prefix 
on _path)
+            // Separate query allows MongoDB to use optimal index for each
+            var hasLongPathNodes = db.nodes.findOne({
+                _id: currentDepth + ":h",
+                _path: new RegExp("^" + escapedPrefix)
+            }, {_id: 1}) !== null;
+            print("Checked long path nodes at depth " + currentDepth + ": " + 
(hasLongPathNodes ? "found" : "not found"));
+
+            if (!hasRegularNodes && !hasLongPathNodes) {
+                // No nodes at this depth, previous depth was max
+                break;
+            }
+
+            maxDepth = currentDepth;
+            currentDepth++;
+        }
+
+        print("Max depth found: " + maxDepth + " (root depth: " + depth + ")");
+
+        // If maxDepth equals depth, only root exists (already checked above)
+        if (maxDepth === depth) {
+            var rootResult = db.nodes.deleteMany({_id: depth + ":" + path});
+            print("Deleted root regular nodes at depth " + depth + ": " + 
rootResult.deletedCount + " nodes");
+            var longPathResult = db.nodes.deleteMany(longPathQuery(path));
+            print("Deleted root long path nodes at depth " + depth + ": " + 
longPathResult.deletedCount + " nodes");
+            return {deletedCount: rootResult.deletedCount + 
longPathResult.deletedCount};
+        }
+
+        // OPTIMIZATION: Process one depth at a time - add bulk ops, execute, 
then continue to upper level
+        // Delete from deepest level to root (children only, excludes root)
+        for (var d = maxDepth; d > depth; d--) {
+            var bulkOps = [];
+
+            // Add bulk operations for this depth level
+            bulkOps.push({
+                deleteMany: {
+                    filter: longPathFilter(d, prefix)
+                }
+            });
+            bulkOps.push({
+                deleteMany: {
+                    filter: {_id: pathFilter(d, prefix)}
+                }
+            });
+
+            // Execute bulk operations for this depth level
+            if (bulkOps.length > 0) {
+                var bulkResult = db.nodes.bulkWrite(bulkOps, {
+                    ordered: false,           // Allow parallel execution
+                    writeConcern: {w: 1}       // Adjust based on consistency 
needs
+                });
+                count += bulkResult.deletedCount;
+                print("Deleted nodes at depth " + d + ": " + 
bulkResult.deletedCount + " nodes");
+            }
+        }
+
+        // OPTIMIZATION: Process root deletions separately
+        var rootBulkOps = [];
+        rootBulkOps.push({
+            deleteMany: {
+                filter: longPathQuery(path)
+            }
+        });
+        rootBulkOps.push({
+            deleteMany: {
+                filter: {_id: depth + ":" + path}
+            }
+        });
+
+        if (rootBulkOps.length > 0) {
+            var rootBulkResult = db.nodes.bulkWrite(rootBulkOps, {
+                ordered: false,           // Allow parallel execution
+                writeConcern: {w: 1}       // Adjust based on consistency needs
+            });
+            count += rootBulkResult.deletedCount;
+            print("Deleted root nodes at depth " + depth + ": " + 
rootBulkResult.deletedCount + " nodes");
+        }
+
+        return {deletedCount: count};
+    };
+
     /**
      * Helper method to find nodes based on Regular Expression.
      *

Reply via email to