Copilot commented on code in PR #265: URL: https://github.com/apache/incubator-hugegraph-ai/pull/265#discussion_r2257139549
########## hugegraph-llm/src/hugegraph_llm/utils/embedding_utils.py: ########## @@ -71,3 +71,21 @@ async def get_embeddings_parallel(embedding: BaseEmbedding, vids: list[str]) -> embeddings.extend(batch_embeddings) return embeddings + + +def get_filename_prefix(embedding_type: str = None, model_name: str = None) -> str: + """Generate filename based on model name.""" + if not (model_name and model_name.strip() and embedding_type and embedding_type.strip()): + return "" + # Sanitize model_name to prevent path traversal or invalid filename chars + safe_embedding_type = embedding_type.replace("/", "_").replace("\\", "_").strip() + safe_model_name = model_name.replace("/", "_").replace("\\", "_").strip() + return f"{safe_embedding_type}_{safe_model_name}" + + +def get_index_folder_name(graph_name: str, space_name: str = None) -> str: + if not (space_name and space_name.strip()): Review Comment: [nitpick] This condition pattern is repeated from the function above. Consider extracting a helper function like `is_valid_string(value)` to reduce code duplication. ```suggestion if not is_valid_string(space_name): ``` ########## hugegraph-llm/src/hugegraph_llm/utils/embedding_utils.py: ########## @@ -71,3 +71,21 @@ async def get_embeddings_parallel(embedding: BaseEmbedding, vids: list[str]) -> embeddings.extend(batch_embeddings) return embeddings + + +def get_filename_prefix(embedding_type: str = None, model_name: str = None) -> str: + """Generate filename based on model name.""" + if not (model_name and model_name.strip() and embedding_type and embedding_type.strip()): Review Comment: [nitpick] The condition combines multiple checks in a complex way. Consider splitting this into separate, more readable conditions or using a helper function to validate parameters. ```suggestion def is_valid_param(param: str) -> bool: return param is not None and bool(param.strip()) def get_filename_prefix(embedding_type: str = None, model_name: str = None) -> str: """Generate filename based on model name.""" if not (is_valid_param(model_name) and is_valid_param(embedding_type)): ``` ########## hugegraph-llm/src/hugegraph_llm/indices/vector_index.py: ########## @@ -37,30 +37,57 @@ def __init__(self, embed_dim: int = 1024): self.properties = [] @staticmethod - def from_index_file(dir_path: str, record_miss: bool = True) -> "VectorIndex": - index_file = os.path.join(dir_path, INDEX_FILE_NAME) - properties_file = os.path.join(dir_path, PROPERTIES_FILE_NAME) + def from_index_file(dir_path: str, filename_prefix: str = None, record_miss: bool = True) -> "VectorIndex": + """Load index from files, supporting model-specific filenames. + + This method loads a Faiss index and its corresponding properties from a directory. + It handles model-specific filenames by using the get_model_filename utility. Review Comment: The docstring mentions 'get_model_filename utility' but this function doesn't exist in the code. The actual implementation uses inline filename construction. ```suggestion It handles model-specific filenames by constructing them inline using f-strings. ``` ########## hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py: ########## @@ -45,10 +47,27 @@ def store_schema(schema, question, gremlin_prompt): def build_example_vector_index(temp_file) -> dict: + folder_name = get_index_folder_name(huge_settings.graph_name, huge_settings.graph_space) + index_path = os.path.join(resource_path, folder_name, "gremlin_examples") + if not os.path.exists(index_path): + os.makedirs(index_path) if temp_file is None: full_path = os.path.join(resource_path, "demo", "text2gremlin.csv") else: full_path = temp_file.name + name, ext = os.path.splitext(temp_file.name) + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + _, file_name = os.path.split(f"{name}_{timestamp}{ext}") + log.info("Copying file to: %s", file_name) + target_file = os.path.join(resource_path, folder_name, "gremlin_examples", file_name) + try: + import shutil Review Comment: Import statements should be placed at the top of the file, not inside functions. Move the shutil import to the top with other imports. ```suggestion ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hugegraph.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@hugegraph.apache.org For additional commands, e-mail: issues-h...@hugegraph.apache.org