imbajin commented on code in PR #2982:
URL: https://github.com/apache/hugegraph/pull/2982#discussion_r3105748359
##########
hugegraph-server/hugegraph-rocksdb/src/main/java/org/apache/hugegraph/backend/store/rocksdb/RocksDBTable.java:
##########
@@ -224,7 +224,8 @@ protected BackendColumnIterator
getById(RocksDBSessions.Session session, Id id)
return BackendColumnIterator.iterator(col);
}
- protected BackendColumnIterator getByIds(RocksDBSessions.Session session,
Set<Id> ids) {
+ protected BackendColumnIterator getByIds(RocksDBSessions.Session session,
+ Collection<Id> ids) {
Review Comment:
‼️ **`Set<Id>` → `Collection<Id>` 语义变化需注意**
原方法接收 `Set<Id>`(天然去重),改为 `Collection<Id>` 后,传入 `List` 时若含重复 ID,RocksDB
`multiGet` 会对同一 key 重复查询并返回重复结果。
测试 `testVertexQueryByIdsWithDuplicateIds` 验证了这个行为(id1 返回 2 次),但这与原 `Set`
语义不一致。需要确认上层 `IdQuery` 的 ids 是否可能含重复——如果含重复,行为变更可能导致上层重复处理数据。
**建议**:在 `getByIds` 入口去重以保持原语义,或者明确文档说明 `Collection` 含重复返回的行为变更。
##########
hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/unit/rocksdb/RocksDBTableQueryByIdsTest.java:
##########
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hugegraph.unit.rocksdb;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hugegraph.backend.id.Id;
+import org.apache.hugegraph.backend.id.IdGenerator;
+import org.apache.hugegraph.backend.store.BackendEntry.BackendColumn;
+import org.apache.hugegraph.backend.store.BackendEntry.BackendColumnIterator;
+import org.apache.hugegraph.backend.store.rocksdb.RocksDBSessions;
+import org.apache.hugegraph.backend.store.rocksdb.RocksDBTables;
+import org.apache.hugegraph.testutil.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.rocksdb.RocksDBException;
+
+public class RocksDBTableQueryByIdsTest extends BaseRocksDBUnitTest {
+
+ private static final String DATABASE = "db";
+
+ private TestVertexTable vertexTable;
+ private TestEdgeTable edgeTable;
+
+ @Override
+ @Before
+ public void setup() throws RocksDBException {
+ super.setup();
+ this.vertexTable = new TestVertexTable(DATABASE);
+ this.edgeTable = new TestEdgeTable(DATABASE);
+ this.rocks.createTable(this.vertexTable.table());
+ this.rocks.createTable(this.edgeTable.table());
+ }
+
+ @Test
+ public void testVertexQueryByIdsWithAllExistingIds() {
+ Id id1 = IdGenerator.of("v1");
+ Id id2 = IdGenerator.of("v2");
+ Id id3 = IdGenerator.of("v3");
+
+ this.rocks.session().put(this.vertexTable.table(), id1.asBytes(),
getBytes("value1"));
+ this.rocks.session().put(this.vertexTable.table(), id2.asBytes(),
getBytes("value2"));
+ this.rocks.session().put(this.vertexTable.table(), id3.asBytes(),
getBytes("value3"));
+ this.commit();
+
+ List<Id> ids = Arrays.asList(id1, id2, id3);
+ BackendColumnIterator iter =
this.vertexTable.queryByIds(this.rocks.session(), ids);
+
+ Map<String, String> results = toResultMap(iter);
+
+ Assert.assertEquals(3, results.size());
+ Assert.assertEquals("value1", results.get("v1"));
+ Assert.assertEquals("value2", results.get("v2"));
+ Assert.assertEquals("value3", results.get("v3"));
+ }
+
+ @Test
+ public void testVertexQueryByIdsWithExistingAndMissingIdsMixed() {
+ Id id1 = IdGenerator.of("v1");
+ Id id2 = IdGenerator.of("v2");
+ Id id3 = IdGenerator.of("v3");
+
+ this.rocks.session().put(this.vertexTable.table(), id1.asBytes(),
getBytes("value1"));
+ this.rocks.session().put(this.vertexTable.table(), id3.asBytes(),
getBytes("value3"));
+ this.commit();
+
+ List<Id> ids = Arrays.asList(id1, id2, id3);
+ BackendColumnIterator iter =
this.vertexTable.queryByIds(this.rocks.session(), ids);
+
+ Map<String, String> results = toResultMap(iter);
+
+ Assert.assertEquals(2, results.size());
+ Assert.assertEquals("value1", results.get("v1"));
+ Assert.assertEquals("value3", results.get("v3"));
+ Assert.assertFalse(results.containsKey("v2"));
+ }
+
+ @Test
+ public void testVertexQueryByIdsWithDuplicateIds() {
+ Id id1 = IdGenerator.of("v1");
+ Id id2 = IdGenerator.of("v2");
+
+ this.rocks.session().put(this.vertexTable.table(), id1.asBytes(),
getBytes("value1"));
+ this.rocks.session().put(this.vertexTable.table(), id2.asBytes(),
getBytes("value2"));
+ this.commit();
+
+ List<Id> ids = Arrays.asList(id1, id2, id1);
+ BackendColumnIterator iter =
this.vertexTable.queryByIds(this.rocks.session(), ids);
+
+ Map<String, Integer> countMap = new HashMap<>();
+ Map<String, String> results = new HashMap<>();
+ while (iter.hasNext()) {
+ BackendColumn col = iter.next();
+ String key = getString(col.name);
+ results.put(key, getString(col.value));
+ countMap.put(key, countMap.getOrDefault(key, 0) + 1);
+ }
+
+ Assert.assertEquals(2, results.size());
+ Assert.assertEquals("value1", results.get("v1"));
+ Assert.assertEquals("value2", results.get("v2"));
+ // Verify duplicate ids produce duplicate results
+ Assert.assertEquals(Integer.valueOf(2), countMap.get("v1"));
+ Assert.assertEquals(Integer.valueOf(1), countMap.get("v2"));
+ }
+
+ @Test
+ public void testEdgeQueryByIdsWithAllExistingIds() {
+ Id id1 = IdGenerator.of("e1");
+ Id id2 = IdGenerator.of("e2");
+
+ this.rocks.session().put(this.edgeTable.table(), id1.asBytes(),
getBytes("edge-value1"));
+ this.rocks.session().put(this.edgeTable.table(), id2.asBytes(),
getBytes("edge-value2"));
+ this.commit();
+
+ List<Id> ids = Arrays.asList(id1, id2);
+ BackendColumnIterator iter =
this.edgeTable.queryByIds(this.rocks.session(), ids);
+
+ Map<String, String> results = toResultMap(iter);
+
+ Assert.assertEquals(2, results.size());
+ Assert.assertEquals("edge-value1", results.get("e1"));
+ Assert.assertEquals("edge-value2", results.get("e2"));
+ }
+
+ /**
+ * NOTE: Testing the fallback path (session.hasChanges() == true) is not
+ * feasible here because both the optimized multi-get path and the fallback
+ * scan-based path ultimately delegate to session.get() / session.scan(),
+ * which have a pre-existing assertion `assert !this.hasChanges()` in
+ * RocksDBStdSessions. This assertion is disabled in production but fires
+ * during unit tests when assertions are enabled. The dispatch logic itself
+ * is covered by the implementation in
RocksDBTables.Vertex/Edge.queryByIds().
Review Comment:
‼️ **fallback 路径(`hasChanges() == true`)缺乏测试覆盖**
当 session 有未提交变更时走 `super.queryByIds()`(scan 路径),但测试中因 `RocksDBStdSessions`
的 `assert !this.hasChanges()` 断言无法覆盖此场景。
核心分支逻辑(if/else)只测了一半。建议考虑用 mock session 或在集成测试中补充覆盖 `hasChanges() == true`
的场景。
##########
hugegraph-server/hugegraph-rocksdb/src/main/java/org/apache/hugegraph/backend/store/rocksdb/RocksDBTables.java:
##########
@@ -208,6 +210,15 @@ public static Edge in(String database) {
protected BackendColumnIterator queryById(RocksDBSessions.Session
session, Id id) {
return this.getById(session, id);
}
+
+ @Override
+ protected BackendColumnIterator queryByIds(RocksDBSessions.Session
session,
+ Collection<Id> ids) {
+ if (!session.hasChanges()) {
+ return this.getByIds(session, ids);
+ }
+ return super.queryByIds(session, ids);
+ }
}
Review Comment:
⚠️ **Vertex 和 Edge 的 `queryByIds` 逻辑完全重复**
`Vertex.queryByIds()` 和 `Edge.queryByIds()` 代码完全一致(5 行相同逻辑)。可以考虑:
1. 提取到父类 `RocksDBTable.queryByIds()` 中统一做 `hasChanges()` 判断
2. 或者提供一个公共的 helper 方法
这样可以减少重复,也降低后续维护时两处不一致的风险。
##########
hugegraph-server/hugegraph-rocksdb/src/main/java/org/apache/hugegraph/backend/store/rocksdb/RocksDBTables.java:
##########
@@ -182,7 +182,9 @@ protected BackendColumnIterator
queryById(RocksDBSessions.Session session, Id id
@Override
protected BackendColumnIterator queryByIds(RocksDBSessions.Session
session,
Collection<Id> ids) {
- // TODO: use getByIds() after batch version multi-get is ready
+ if (!session.hasChanges()) {
+ return this.getByIds(session, ids);
Review Comment:
⚠️ **fallback 路径的实际效果需确认**
当 `hasChanges() == true` 时,fallback 到 `super.queryByIds()` 会对每个 ID 调用
`this.queryById()`。而 Vertex/Edge 已经 override `queryById` 为 `getById`(point
get),所以 fallback 路径实际也会调用 `getById`。
但 `RocksDBStdSessions` 中 `get()` 方法有 `assert !this.hasChanges()` 断言——也就是说
fallback 路径在开启断言的环境下依然会触发 assert 失败。
需要确认:`hasChanges() == true` 时是否真的能安全走 fallback?还是说这个分支在 RocksDB 后端实际上永远不会被触发?
##########
hugegraph-server/hugegraph-rocksdb/src/main/java/org/apache/hugegraph/backend/store/rocksdb/RocksDBTables.java:
##########
@@ -182,7 +182,9 @@ protected BackendColumnIterator
queryById(RocksDBSessions.Session session, Id id
@Override
protected BackendColumnIterator queryByIds(RocksDBSessions.Session
session,
Collection<Id> ids) {
Review Comment:
🧹 **PR 描述与实际改动不完全匹配**
PR 描述提到 "improves the performance of Gremlin queries... when using RPC-based
backends such as HBase and HStore",但代码改动全在 `hugegraph-rocksdb` 模块。建议更新 PR
描述,准确反映本次优化的范围是 RocksDB 后端。
##########
hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/unit/UnitTestSuite.java:
##########
@@ -50,6 +50,7 @@
import org.apache.hugegraph.unit.id.SplicingIdGeneratorTest;
import org.apache.hugegraph.unit.mysql.MysqlUtilTest;
import org.apache.hugegraph.unit.mysql.WhereBuilderTest;
+import org.apache.hugegraph.unit.rocksdb.RocksDBTableQueryByIdsTest;
Review Comment:
🧹 **import 未按字母序排列**
`RocksDBTableQueryByIdsTest` 的 import 插在 `RocksDBCountersTest`
之前,不符合字母顺序。建议调整为:
```suggestion
import org.apache.hugegraph.unit.rocksdb.RocksDBCountersTest;
import org.apache.hugegraph.unit.rocksdb.RocksDBSessionTest;
import org.apache.hugegraph.unit.rocksdb.RocksDBSessionsTest;
import org.apache.hugegraph.unit.rocksdb.RocksDBTableQueryByIdsTest;
```
##########
hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/unit/rocksdb/RocksDBTableQueryByIdsTest.java:
##########
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hugegraph.unit.rocksdb;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hugegraph.backend.id.Id;
+import org.apache.hugegraph.backend.id.IdGenerator;
+import org.apache.hugegraph.backend.store.BackendEntry.BackendColumn;
+import org.apache.hugegraph.backend.store.BackendEntry.BackendColumnIterator;
+import org.apache.hugegraph.backend.store.rocksdb.RocksDBSessions;
+import org.apache.hugegraph.backend.store.rocksdb.RocksDBTables;
+import org.apache.hugegraph.testutil.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.rocksdb.RocksDBException;
+
+public class RocksDBTableQueryByIdsTest extends BaseRocksDBUnitTest {
+
+ private static final String DATABASE = "db";
+
+ private TestVertexTable vertexTable;
+ private TestEdgeTable edgeTable;
+
+ @Override
+ @Before
+ public void setup() throws RocksDBException {
+ super.setup();
+ this.vertexTable = new TestVertexTable(DATABASE);
+ this.edgeTable = new TestEdgeTable(DATABASE);
+ this.rocks.createTable(this.vertexTable.table());
+ this.rocks.createTable(this.edgeTable.table());
+ }
+
+ @Test
+ public void testVertexQueryByIdsWithAllExistingIds() {
+ Id id1 = IdGenerator.of("v1");
+ Id id2 = IdGenerator.of("v2");
+ Id id3 = IdGenerator.of("v3");
+
+ this.rocks.session().put(this.vertexTable.table(), id1.asBytes(),
getBytes("value1"));
+ this.rocks.session().put(this.vertexTable.table(), id2.asBytes(),
getBytes("value2"));
+ this.rocks.session().put(this.vertexTable.table(), id3.asBytes(),
getBytes("value3"));
+ this.commit();
+
+ List<Id> ids = Arrays.asList(id1, id2, id3);
+ BackendColumnIterator iter =
this.vertexTable.queryByIds(this.rocks.session(), ids);
+
+ Map<String, String> results = toResultMap(iter);
+
+ Assert.assertEquals(3, results.size());
+ Assert.assertEquals("value1", results.get("v1"));
+ Assert.assertEquals("value2", results.get("v2"));
+ Assert.assertEquals("value3", results.get("v3"));
+ }
+
+ @Test
+ public void testVertexQueryByIdsWithExistingAndMissingIdsMixed() {
+ Id id1 = IdGenerator.of("v1");
+ Id id2 = IdGenerator.of("v2");
+ Id id3 = IdGenerator.of("v3");
+
+ this.rocks.session().put(this.vertexTable.table(), id1.asBytes(),
getBytes("value1"));
+ this.rocks.session().put(this.vertexTable.table(), id3.asBytes(),
getBytes("value3"));
+ this.commit();
+
+ List<Id> ids = Arrays.asList(id1, id2, id3);
+ BackendColumnIterator iter =
this.vertexTable.queryByIds(this.rocks.session(), ids);
+
+ Map<String, String> results = toResultMap(iter);
+
+ Assert.assertEquals(2, results.size());
+ Assert.assertEquals("value1", results.get("v1"));
+ Assert.assertEquals("value3", results.get("v3"));
+ Assert.assertFalse(results.containsKey("v2"));
+ }
+
+ @Test
+ public void testVertexQueryByIdsWithDuplicateIds() {
+ Id id1 = IdGenerator.of("v1");
+ Id id2 = IdGenerator.of("v2");
+
+ this.rocks.session().put(this.vertexTable.table(), id1.asBytes(),
getBytes("value1"));
+ this.rocks.session().put(this.vertexTable.table(), id2.asBytes(),
getBytes("value2"));
+ this.commit();
+
+ List<Id> ids = Arrays.asList(id1, id2, id1);
+ BackendColumnIterator iter =
this.vertexTable.queryByIds(this.rocks.session(), ids);
+
+ Map<String, Integer> countMap = new HashMap<>();
+ Map<String, String> results = new HashMap<>();
+ while (iter.hasNext()) {
+ BackendColumn col = iter.next();
+ String key = getString(col.name);
+ results.put(key, getString(col.value));
+ countMap.put(key, countMap.getOrDefault(key, 0) + 1);
+ }
+
+ Assert.assertEquals(2, results.size());
+ Assert.assertEquals("value1", results.get("v1"));
+ Assert.assertEquals("value2", results.get("v2"));
+ // Verify duplicate ids produce duplicate results
+ Assert.assertEquals(Integer.valueOf(2), countMap.get("v1"));
+ Assert.assertEquals(Integer.valueOf(1), countMap.get("v2"));
+ }
+
+ @Test
+ public void testEdgeQueryByIdsWithAllExistingIds() {
+ Id id1 = IdGenerator.of("e1");
+ Id id2 = IdGenerator.of("e2");
+
+ this.rocks.session().put(this.edgeTable.table(), id1.asBytes(),
getBytes("edge-value1"));
+ this.rocks.session().put(this.edgeTable.table(), id2.asBytes(),
getBytes("edge-value2"));
+ this.commit();
+
+ List<Id> ids = Arrays.asList(id1, id2);
+ BackendColumnIterator iter =
this.edgeTable.queryByIds(this.rocks.session(), ids);
+
+ Map<String, String> results = toResultMap(iter);
+
+ Assert.assertEquals(2, results.size());
+ Assert.assertEquals("edge-value1", results.get("e1"));
+ Assert.assertEquals("edge-value2", results.get("e2"));
+ }
+
+ /**
+ * NOTE: Testing the fallback path (session.hasChanges() == true) is not
+ * feasible here because both the optimized multi-get path and the fallback
+ * scan-based path ultimately delegate to session.get() / session.scan(),
+ * which have a pre-existing assertion `assert !this.hasChanges()` in
+ * RocksDBStdSessions. This assertion is disabled in production but fires
+ * during unit tests when assertions are enabled. The dispatch logic itself
+ * is covered by the implementation in
RocksDBTables.Vertex/Edge.queryByIds().
+ */
+
+ private Map<String, String> toResultMap(BackendColumnIterator iter) {
+ Map<String, String> results = new HashMap<>();
+ while (iter.hasNext()) {
+ BackendColumn col = iter.next();
+ results.put(getString(col.name), getString(col.value));
+ }
+ return results;
+ }
+
+ /**
+ * Subclass that exposes the protected queryByIds for testing.
+ */
+ private static class TestVertexTable extends RocksDBTables.Vertex {
+
+ public TestVertexTable(String database) {
+ super(database);
+ }
+
+ @Override
+ public BackendColumnIterator queryByIds(RocksDBSessions.Session
session,
+ Collection<Id> ids) {
+ return super.queryByIds(session, ids);
+ }
+ }
+
+ /**
+ * Subclass that exposes the protected queryByIds for testing.
+ */
+ private static class TestEdgeTable extends RocksDBTables.Edge {
Review Comment:
🧹 **只测了 out edge 方向**
`TestEdgeTable` 固定传 `true`(out edge),`in` edge 方向未覆盖。虽然逻辑相同,但建议至少补一个 `in`
方向的 case 或在注释中说明为什么只测 out。
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]