xuzifu666 commented on code in PR #7919:
URL: https://github.com/apache/paimon/pull/7919#discussion_r3279729702


##########
paimon-common/src/main/java/org/apache/paimon/fileindex/rtree/RTree.java:
##########
@@ -0,0 +1,212 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.fileindex.rtree;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/** R-Tree spatial index implementation. */
+public class RTree {
+    private RTreeNode root;
+    private final int dimensions;
+    private final int minEntries;
+    private final int maxEntries;
+    private int size;
+
+    public RTree(int dimensions, int maxEntries) {
+        this.dimensions = dimensions;
+        this.maxEntries = maxEntries;
+        this.minEntries = Math.max(2, maxEntries / 2);
+        this.root = new RTreeNode(dimensions, maxEntries, true);
+        this.size = 0;
+    }
+
+    public int getDimensions() {
+        return dimensions;
+    }
+
+    public int getMinEntries() {
+        return minEntries;
+    }
+
+    public int getMaxEntries() {
+        return maxEntries;
+    }
+
+    public RTreeNode getRoot() {
+        return root;
+    }
+
+    public void setRoot(RTreeNode newRoot) {
+        this.root = newRoot;
+    }
+
+    public void setSize(int newSize) {
+        this.size = newSize;
+    }
+
+    public int getSize() {
+        return size;
+    }
+
+    public void insert(double[] point, int rowId) {
+        BoundingBox pointBox = BoundingBox.fromPoint(point);
+        insert(pointBox, rowId, root);
+        size++;
+    }
+
+    private void insert(BoundingBox bbox, int rowId, RTreeNode node) {
+        if (node.isLeaf()) {
+            node.addLeafEntry(new LeafEntry(rowId, bbox));
+
+            if (node.canSplit()) {
+                splitNode(node);
+            }
+        } else {
+            RTreeNode bestChild = chooseBestChild(node, bbox);
+            insert(bbox, rowId, bestChild);
+
+            if (bestChild.canSplit()) {
+                splitNode(bestChild);
+            }
+            node.getBoundingBox().expand(bbox);
+        }
+    }
+
+    private RTreeNode chooseBestChild(RTreeNode node, BoundingBox bbox) {
+        RTreeNode best = null;
+        double minExpansion = Double.POSITIVE_INFINITY;
+        double minArea = Double.POSITIVE_INFINITY;
+
+        for (RTreeNode child : node.getChildren()) {
+            double expansion = child.getBoundingBox().getExpansionArea(bbox);
+            double area = child.getBoundingBox().getArea();
+
+            if (expansion < minExpansion || (expansion == minExpansion && area 
< minArea)) {
+                minExpansion = expansion;
+                minArea = area;
+                best = child;
+            }
+        }
+
+        if (best == null) {
+            throw new IllegalStateException("No child found in non-leaf node");
+        }
+        return best;
+    }
+
+    private void splitNode(RTreeNode node) {
+        if (node.isLeaf()) {
+            splitLeafNode(node);
+        } else {
+            splitInternalNode(node);
+        }
+    }
+
+    private void splitLeafNode(RTreeNode node) {
+        List<LeafEntry> entries = new ArrayList<>(node.getLeafEntries());
+        node.getLeafRowIds().clear();
+        node.getLeafEntries().clear();
+        node.getBoundingBox().clear();
+
+        RTreeNode newNode = new RTreeNode(dimensions, maxEntries, true);
+
+        int mid = entries.size() / 2;
+        for (int i = 0; i < mid; i++) {
+            node.addLeafEntry(entries.get(i));
+        }
+        for (int i = mid; i < entries.size(); i++) {
+            newNode.addLeafEntry(entries.get(i));
+        }
+
+        if (node == root) {
+            RTreeNode newRoot = new RTreeNode(dimensions, maxEntries, false);
+            newRoot.addChild(node);
+            newRoot.addChild(newNode);
+            root = newRoot;
+        } else {
+            adjustParent(node, newNode);
+        }
+    }
+
+    private void splitInternalNode(RTreeNode node) {
+        List<RTreeNode> children = new ArrayList<>(node.getChildren());
+        node.getChildren().clear();
+        node.getBoundingBox().clear();
+
+        RTreeNode newNode = new RTreeNode(dimensions, maxEntries, false);
+
+        int mid = children.size() / 2;
+        for (int i = 0; i < mid; i++) {
+            node.addChild(children.get(i));
+        }
+        for (int i = mid; i < children.size(); i++) {
+            newNode.addChild(children.get(i));
+        }
+
+        if (node == root) {
+            RTreeNode newRoot = new RTreeNode(dimensions, maxEntries, false);
+            newRoot.addChild(node);
+            newRoot.addChild(newNode);
+            root = newRoot;
+        } else {
+            adjustParent(node, newNode);
+        }
+    }
+
+    private void adjustParent(RTreeNode node, RTreeNode newNode) {
+        RTreeNode parent = node.getParent();
+        if (parent == null) {
+            return;
+        }
+
+        parent.addChild(newNode);
+
+        if (parent.canSplit()) {
+            splitNode(parent);
+        }
+    }
+
+    public List<Integer> search(BoundingBox searchBox) {
+        List<Integer> results = new ArrayList<>();
+        search(searchBox, root, results);
+        return results;
+    }
+
+    private void search(BoundingBox searchBox, RTreeNode node, List<Integer> 
results) {
+        if (!node.getBoundingBox().intersects(searchBox)) {
+            return;
+        }
+
+        if (node.isLeaf()) {
+            for (Integer rowId : node.getLeafRowIds()) {
+                results.add(rowId);

Review Comment:
   Changed as: 
   1. Enhanced ```RTree.java``` search() method:
       a. Leaf nodes: Added per-entry checking 
entry.getBbox().intersects(searchBox)
       b. Previously only checked node.getBoundingBox().intersects()
   2. LeafEntry structure: Stores rowId + bbox for precision verification



##########
paimon-common/src/main/java/org/apache/paimon/fileindex/rtree/RTreeFileIndex.java:
##########
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.fileindex.rtree;
+
+import org.apache.paimon.fileindex.FileIndexReader;
+import org.apache.paimon.fileindex.FileIndexWriter;
+import org.apache.paimon.fileindex.FileIndexer;
+import org.apache.paimon.fs.SeekableInputStream;
+import org.apache.paimon.options.Options;
+import org.apache.paimon.types.DataType;
+
+/** The implementation of R-Tree file index. */
+public class RTreeFileIndex implements FileIndexer {

Review Comment:
   My current alternative is:
   1. Implemented STRBulkLoader (Sort-Tile-Recursive algorithm):
       a. Sort entries by current dimension
       b. Partition into vertical tiles (~maxEntries per tile)
       c. Recursively process each tile with next dimension
       d. Build tree bottom-up
   2. Enhanced RTreeFileIndexWriter:
       a. write() method: Collect entries into list
       b. serializedBytes(): Use STRBulkLoader for batch tree construction



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to