This is an automated email from the ASF dual-hosted git repository. jin pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git
The following commit(s) were added to refs/heads/main by this push: new 9239799 refactor(llm): extract `topk_per_keyword` & `topk_return_results` to .env 9239799 is described below commit 9239799f899b181869e188f25d0268d3dbc3c49a Author: SoJGooo <102796027+mrjs...@users.noreply.github.com> AuthorDate: Thu Jan 9 16:11:19 2025 +0800 refactor(llm): extract `topk_per_keyword` & `topk_return_results` to .env 1. Extract argument `topk_per_keyword` to file .env 2. Extract argument `topk_return_results` to file .env 3. Rename `max_items` to `max_graph_items` (argument of function `query_graphdb`) --------- Co-authored-by: imbajin <j...@apache.org> --- hugegraph-llm/README.md | 2 +- hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py | 11 ++++++++++- hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py | 2 +- .../hugegraph_llm/operators/common_op/merge_dedup_rerank.py | 3 ++- hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py | 12 +++++++----- .../hugegraph_llm/operators/hugegraph_op/graph_rag_query.py | 4 ++-- .../hugegraph_llm/operators/index_op/semantic_id_query.py | 2 +- 7 files changed, 24 insertions(+), 12 deletions(-) diff --git a/hugegraph-llm/README.md b/hugegraph-llm/README.md index 49fe502..2b25185 100644 --- a/hugegraph-llm/README.md +++ b/hugegraph-llm/README.md @@ -159,7 +159,7 @@ Here is a brief usage guide: ``` 3. **Query Graph for Rag**: Retrieve the corresponding keywords and their multi-degree associated relationships from HugeGraph. ```python - graph_rag.query_graphdb(max_deep=2, max_items=30).print_result() + graph_rag.query_graphdb(max_deep=2, max_graph_items=30).print_result() ``` 4. **Rerank Searched Result**: Rerank the searched results based on the similarity between the question and the results. ```python diff --git a/hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py b/hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py index cde225a..eac2def 100644 --- a/hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py +++ b/hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py @@ -21,14 +21,23 @@ from .models import BaseConfig class HugeGraphConfig(BaseConfig): """HugeGraph settings""" + # graph server config graph_ip: Optional[str] = "127.0.0.1" graph_port: Optional[str] = "8080" graph_name: Optional[str] = "hugegraph" graph_user: Optional[str] = "admin" graph_pwd: Optional[str] = "xxx" graph_space: Optional[str] = None + + # graph query config limit_property: Optional[str] = "False" max_graph_path: Optional[int] = 10 - max_items: Optional[int] = 30 + max_graph_items: Optional[int] = 30 edge_limit_pre_label: Optional[int] = 8 + + # vector config vector_dis_threshold: Optional[float] = 0.9 + topk_per_keyword: Optional[int] = 1 + + # rerank config + topk_return_results: Optional[int] = 20 diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py index bcc1198..83b027b 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py @@ -63,7 +63,7 @@ def init_rag_ui() -> gr.Interface: title="HugeGraph RAG Platform", css=CSS, ) as hugegraph_llm_ui: - gr.Markdown("# HugeGraph LLM RAG Demo") + gr.Markdown("# HugeGraph RAG Platform 🚀") """ TODO: leave a general idea of the unresolved part diff --git a/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py b/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py index c4ff757..d9c5e98 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py @@ -22,6 +22,7 @@ import jieba import requests from nltk.translate.bleu_score import sentence_bleu +from hugegraph_llm.config import huge_settings from hugegraph_llm.models.embeddings.base import BaseEmbedding from hugegraph_llm.models.rerankers.init_reranker import Rerankers from hugegraph_llm.utils.log import log @@ -43,7 +44,7 @@ class MergeDedupRerank: def __init__( self, embedding: BaseEmbedding, - topk: int = 20, + topk: int = huge_settings.topk_return_results, graph_ratio: float = 0.5, method: Literal["bleu", "reranker"] = "bleu", near_neighbor_first: bool = False, diff --git a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py index 399864a..7df36d7 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py @@ -32,7 +32,7 @@ from hugegraph_llm.operators.index_op.vector_index_query import VectorIndexQuery from hugegraph_llm.operators.llm_op.answer_synthesize import AnswerSynthesize from hugegraph_llm.operators.llm_op.keyword_extract import KeywordExtract from hugegraph_llm.utils.decorators import log_time, log_operator_time, record_qps -from hugegraph_llm.config import prompt +from hugegraph_llm.config import prompt, huge_settings class RAGPipeline: @@ -98,7 +98,7 @@ class RAGPipeline: def keywords_to_vid( self, by: Literal["query", "keywords"] = "keywords", - topk_per_keyword: int = 1, + topk_per_keyword: int = huge_settings.topk_per_keyword, topk_per_query: int = 10, ): """ @@ -121,7 +121,7 @@ class RAGPipeline: def query_graphdb( self, max_deep: int = 2, - max_items: int = 30, + max_graph_items: int = huge_settings.max_graph_items, max_v_prop_len: int = 2048, max_e_prop_len: int = 256, prop_to_match: Optional[str] = None, @@ -132,16 +132,18 @@ class RAGPipeline: Add a graph RAG query operator to the pipeline. :param max_deep: Maximum depth for the graph query. - :param max_items: Maximum number of items to retrieve. + :param max_graph_items: Maximum number of items to retrieve. :param max_v_prop_len: Maximum length of vertex properties. :param max_e_prop_len: Maximum length of edge properties. :param prop_to_match: Property to match in the graph. + :param num_gremlin_generate_example: Number of examples to generate. + :param gremlin_prompt: Gremlin prompt for generating examples. :return: Self-instance for chaining. """ self._operators.append( GraphRAGQuery( max_deep=max_deep, - max_items=max_items, + max_graph_items=max_graph_items, max_v_prop_len=max_v_prop_len, max_e_prop_len=max_e_prop_len, prop_to_match=prop_to_match, diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py index e213c37..2ced618 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py @@ -78,7 +78,7 @@ class GraphRAGQuery: def __init__( self, max_deep: int = 2, - max_items: int = int(huge_settings.max_items), + max_graph_items: int = huge_settings.max_graph_items, prop_to_match: Optional[str] = None, llm: Optional[BaseLLM] = None, embedding: Optional[BaseEmbedding] = None, @@ -96,7 +96,7 @@ class GraphRAGQuery: huge_settings.graph_space, ) self._max_deep = max_deep - self._max_items = max_items + self._max_items = max_graph_items self._prop_to_match = prop_to_match self._schema = "" self._limit_property = huge_settings.limit_property.lower() == "true" diff --git a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py index 47e80f0..8aa6411 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py @@ -34,7 +34,7 @@ class SemanticIdQuery: embedding: BaseEmbedding, by: Literal["query", "keywords"] = "keywords", topk_per_query: int = 10, - topk_per_keyword: int = 1 + topk_per_keyword: int = huge_settings.topk_per_keyword ): self.index_dir = str(os.path.join(resource_path, huge_settings.graph_name, "graph_vids")) self.vector_index = VectorIndex.from_index_file(self.index_dir)