kitalkuyo-gita commented on code in PR #716: URL: https://github.com/apache/geaflow/pull/716#discussion_r2689050634
########## geaflow-ai/src/main/java/org/apache/geaflow/ai/operator/SearchUtils.java: ########## @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.geaflow.ai.operator; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +public class SearchUtils { + + // Set of excluded characters: these will be replaced with spaces in formatQuery + private static final Set<Character> EXCLUDED_CHARS = new HashSet<>(Arrays.asList( + '*', '#', '-', '?', '`', '{', '}', '[', ']', '(', ')', '>', '<', ':', '/', '.' + )); + + // Set of allowed characters for validation in isAllAllowedChars + // Includes: digits (0-9), and some common safe symbols + private static final Set<Character> IGNORE_CHARS = buildIgnoredChars(); + + /** + * Builds the set of allowed characters for input validation. + * Includes alphanumeric characters and selected common symbols. + * + * @return an unmodifiable set of ignored characters + */ + private static Set<Character> buildIgnoredChars() { + Set<Character> ignored = new HashSet<>(EXCLUDED_CHARS); Review Comment: It is recommended not to include EXCLUDED_CHARS in IGNORE_CHARS, as this may cause errors in SearchStore query string construction and semantic filtering. For example, `SubgraphSemanticPromptFunction.verbalize` filters strings using `.filter(str -> !SearchUtils.isAllAllowedChars(str))`. An incorrect set of allowed characters will lead to incorrect filtering behavior (strings that should be kept are removed, and strings that should be removed are kept). samples: ``` // SearchUtils.java: 修复 buildIgnoredChars() private static final Set<Character> EXCLUDED_CHARS = new HashSet<>(Arrays.asList( '*', '#', '-', '?', '`', '{', '}', '[', ']', '(', ')', '>', '<', ':', '/', '.' )); private static final Set<Character> IGNORE_CHARS = buildIgnoredChars(); private static Set<Character> buildIgnoredChars() { Set<Character> allowed = new HashSet<>(); // 加入英文字母(大小写) for (char c = 'a'; c <= 'z'; c++) allowed.add(c); for (char c = 'A'; c <= 'Z'; c++) allowed.add(c); // 加入数字 for (char c = '0'; c <= '9'; c++) allowed.add(c); // 加入常用安全字符(空格、下划线等) allowed.add(' '); allowed.add('_'); allowed.add('-'); allowed.add('@'); allowed.add('+'); allowed.add('!'); allowed.add('$'); allowed.add('%'); allowed.add('&'); allowed.add('='); allowed.add('~'); // 不要加入 EXCLUDED_CHARS ! return Collections.unmodifiableSet(allowed); } ``` ########## geaflow-ai/src/main/java/org/apache/geaflow/ai/session/SessionManagement.java: ########## @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.geaflow.ai.session; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import org.apache.geaflow.ai.common.config.Constants; +import org.apache.geaflow.ai.subgraph.SubGraph; + +public class SessionManagement { + + public static final SessionManagement INSTANCE = new SessionManagement(); + private final Map<String, Long> session2ActiveTime = new HashMap<>(); + private final Map<String, List<SubGraph>> session2Graphs = new HashMap<>(); + + private SessionManagement() { + } + + public boolean createSession(String sessionId) { + if (session2ActiveTime.containsKey(sessionId)) { + return false; + } + session2ActiveTime.put(sessionId, System.nanoTime()); + return true; + } + + public String createSession() { + String sessionId = Constants.PREFIX_TMP_SESSION + System.nanoTime() + + UUID.randomUUID().toString().replace("-", "").substring(0, 8); + if (createSession(sessionId)) { + return sessionId; + } else { + return null; + } + } + + public boolean sessionExists(String session) { + return this.session2ActiveTime.containsKey(session); + } + + public List<SubGraph> getSubGraph(String sessionId) { + return this.session2Graphs.get(sessionId); + } + + public void setSubGraph(String sessionId, List<SubGraph> subGraphs) { + this.session2Graphs.put(sessionId, subGraphs); Review Comment: `SessionManagement.createSession(String)` only writes the time to `session2ActiveTime` but does not create the corresponding empty list in `session2Graphs`. `GraphMemoryServer.verbalize` directly calls `sessionManagement.getSubGraph(sessionId)`; if `subGraphList` is null when calling `new ArrayList<>(subGraphList.size())`, it will throw a NullPointerException. It is recommended to call `session2Graphs.put(sessionId, new ArrayList<>())` when `createSession(String)` and `createSession()` are successful. Also, change `getSubGraph` to return a non-null value. ########## geaflow-ai/src/main/java/org/apache/geaflow/ai/session/SessionManagement.java: ########## @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.geaflow.ai.session; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import org.apache.geaflow.ai.common.config.Constants; +import org.apache.geaflow.ai.subgraph.SubGraph; + +public class SessionManagement { + + public static final SessionManagement INSTANCE = new SessionManagement(); + private final Map<String, Long> session2ActiveTime = new HashMap<>(); + private final Map<String, List<SubGraph>> session2Graphs = new HashMap<>(); + + private SessionManagement() { + } + + public boolean createSession(String sessionId) { + if (session2ActiveTime.containsKey(sessionId)) { + return false; + } + session2ActiveTime.put(sessionId, System.nanoTime()); + return true; + } + + public String createSession() { + String sessionId = Constants.PREFIX_TMP_SESSION + System.nanoTime() + + UUID.randomUUID().toString().replace("-", "").substring(0, 8); + if (createSession(sessionId)) { + return sessionId; + } else { + return null; + } + } + + public boolean sessionExists(String session) { + return this.session2ActiveTime.containsKey(session); + } + + public List<SubGraph> getSubGraph(String sessionId) { + return this.session2Graphs.get(sessionId); + } + + public void setSubGraph(String sessionId, List<SubGraph> subGraphs) { + this.session2Graphs.put(sessionId, subGraphs); Review Comment: samples: ``` // 将 Map 改为并发实现(见并发项) private final ConcurrentMap<String, Long> session2ActiveTime = new ConcurrentHashMap<>(); private final ConcurrentMap<String, List<SubGraph>> session2Graphs = new ConcurrentHashMap<>(); public boolean createSession(String sessionId) { if (sessionId == null) { return false; } Long prev = session2ActiveTime.putIfAbsent(sessionId, System.nanoTime()); if (prev != null) { return false; } // 初始化 subgraphs 为可变空列表,避免 NPE session2Graphs.putIfAbsent(sessionId, new ArrayList<>()); return true; } public String createSession() { String sessionId = Constants.PREFIX_TMP_SESSION + System.nanoTime() + UUID.randomUUID().toString().replace("-", "").substring(0, 8); return createSession(sessionId) ? sessionId : null; } // 返回不可为 null 的 List(防止调用者 NPE) public List<SubGraph> getSubGraph(String sessionId) { List<SubGraph> l = this.session2Graphs.get(sessionId); return l == null ? new ArrayList<>() : l; } public void setSubGraph(String sessionId, List<SubGraph> subGraphs) { // 安全性:确保 map 存在 key this.session2Graphs.put(sessionId, subGraphs == null ? new ArrayList<>() : subGraphs); } ``` ########## geaflow-ai/src/main/java/org/apache/geaflow/ai/operator/SessionOperator.java: ########## @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.geaflow.ai.operator; + +import java.util.*; +import java.util.stream.Collectors; +import org.apache.geaflow.ai.graph.GraphAccessor; +import org.apache.geaflow.ai.graph.GraphEdge; +import org.apache.geaflow.ai.graph.GraphEntity; +import org.apache.geaflow.ai.graph.GraphVertex; +import org.apache.geaflow.ai.index.IndexStore; +import org.apache.geaflow.ai.index.vector.IVector; +import org.apache.geaflow.ai.index.vector.VectorType; +import org.apache.geaflow.ai.search.VectorSearch; +import org.apache.geaflow.ai.subgraph.SubGraph; + +public class SessionOperator implements SearchOperator { + + private final GraphAccessor graphAccessor; + private final IndexStore indexStore; + + public SessionOperator(GraphAccessor accessor, IndexStore store) { + this.graphAccessor = Objects.requireNonNull(accessor); + this.indexStore = Objects.requireNonNull(store); + } + + @Override + public List<SubGraph> apply(List<SubGraph> subGraphList, VectorSearch search) { + List<IVector> keyWordVectors = search.getVectorMap().get(VectorType.KeywordVector); + if (keyWordVectors == null || keyWordVectors.isEmpty()) { + if (subGraphList == null) { + return new ArrayList<>(); + } + return new ArrayList<>(subGraphList); + } + List<String> contents = new ArrayList<>(keyWordVectors.size()); + for (IVector v : keyWordVectors) { + contents.add(v.toString()); + } + String query = String.join(SearchConstants.DELIMITER, contents); + List<GraphEntity> globalResults = searchWithGlobalGraph(query); + if (subGraphList == null || subGraphList.isEmpty()) { + List<GraphVertex> startVertices = new ArrayList<>(); + for (GraphEntity resEntity : globalResults) { + if (resEntity instanceof GraphVertex) { + startVertices.add((GraphVertex) resEntity); + } + } + //Apply to subgraph + return startVertices.stream().map(v -> { + SubGraph subGraph = new SubGraph(); + subGraph.addVertex(v); + return subGraph; + }).collect(Collectors.toList()); + } else { + Map<GraphEntity, List<IVector>> extendEntityIndexMap = new HashMap<>(); + //Traverse all extension points of the subgraph and search within the extension area + for (SubGraph subGraph : subGraphList) { + List<GraphEntity> extendEntities = getSubgraphExpand(subGraph); + for (GraphEntity extendEntity : extendEntities) { + List<IVector> entityIndex = indexStore.getEntityIndex(extendEntity); + extendEntityIndexMap.put(extendEntity, entityIndex); + } + } + //recall compute + GraphSearchStore searchStore = initSearchStore(extendEntityIndexMap); Review Comment: Perhaps it would be better to explicitly call `writer.commit()` (or `close()`) in `initSearchStore` after all the `addDoc` operations are completed? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
