(incubator-hugegraph-ai) branch main updated: refactor(llm): extract `topk_per_keyword` & `topk_return_results` to .env

jin Thu, 09 Jan 2025 00:11:32 -0800

This is an automated email from the ASF dual-hosted git repository.

jin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git



The following commit(s) were added to refs/heads/main by this push:
     new 9239799  refactor(llm): extract `topk_per_keyword` & 
`topk_return_results` to .env
9239799 is described below

commit 9239799f899b181869e188f25d0268d3dbc3c49a
Author: SoJGooo <102796027+mrjs...@users.noreply.github.com>
AuthorDate: Thu Jan 9 16:11:19 2025 +0800

    refactor(llm): extract `topk_per_keyword` & `topk_return_results` to .env
    
    1. Extract argument `topk_per_keyword` to file .env
    2. Extract argument `topk_return_results` to file .env
    3. Rename `max_items` to `max_graph_items` (argument of function 
`query_graphdb`)
    
    ---------
    
    Co-authored-by: imbajin <j...@apache.org>
---
 hugegraph-llm/README.md                                      |  2 +-
 hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py   | 11 ++++++++++-
 hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py         |  2 +-
 .../hugegraph_llm/operators/common_op/merge_dedup_rerank.py  |  3 ++-
 hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py  | 12 +++++++-----
 .../hugegraph_llm/operators/hugegraph_op/graph_rag_query.py  |  4 ++--
 .../hugegraph_llm/operators/index_op/semantic_id_query.py    |  2 +-
 7 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/hugegraph-llm/README.md b/hugegraph-llm/README.md
index 49fe502..2b25185 100644
--- a/hugegraph-llm/README.md
+++ b/hugegraph-llm/README.md
@@ -159,7 +159,7 @@ Here is a brief usage guide:
     ```
 3. **Query Graph for Rag**: Retrieve the corresponding keywords and their 
multi-degree associated relationships from HugeGraph.
      ```python
-     graph_rag.query_graphdb(max_deep=2, max_items=30).print_result()
+     graph_rag.query_graphdb(max_deep=2, max_graph_items=30).print_result()
      ```
 4. **Rerank Searched Result**: Rerank the searched results based on the 
similarity between the question and the results.
      ```python
diff --git a/hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py 
b/hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py
index cde225a..eac2def 100644
--- a/hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py
+++ b/hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py
@@ -21,14 +21,23 @@ from .models import BaseConfig
 
 class HugeGraphConfig(BaseConfig):
     """HugeGraph settings"""
+    # graph server config
     graph_ip: Optional[str] = "127.0.0.1"
     graph_port: Optional[str] = "8080"
     graph_name: Optional[str] = "hugegraph"
     graph_user: Optional[str] = "admin"
     graph_pwd: Optional[str] = "xxx"
     graph_space: Optional[str] = None
+
+    # graph query config
     limit_property: Optional[str] = "False"
     max_graph_path: Optional[int] = 10
-    max_items: Optional[int] = 30
+    max_graph_items: Optional[int] = 30
     edge_limit_pre_label: Optional[int] = 8
+
+    # vector config
     vector_dis_threshold: Optional[float] = 0.9
+    topk_per_keyword: Optional[int] = 1
+
+    # rerank config
+    topk_return_results: Optional[int] = 20
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py 
b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
index bcc1198..83b027b 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
@@ -63,7 +63,7 @@ def init_rag_ui() -> gr.Interface:
         title="HugeGraph RAG Platform",
         css=CSS,
     ) as hugegraph_llm_ui:
-        gr.Markdown("# HugeGraph LLM RAG Demo")
+        gr.Markdown("# HugeGraph RAG Platform 🚀")
 
         """
         TODO: leave a general idea of the unresolved part
diff --git 
a/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py 
b/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py
index c4ff757..d9c5e98 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py
@@ -22,6 +22,7 @@ import jieba
 import requests
 from nltk.translate.bleu_score import sentence_bleu
 
+from hugegraph_llm.config import huge_settings
 from hugegraph_llm.models.embeddings.base import BaseEmbedding
 from hugegraph_llm.models.rerankers.init_reranker import Rerankers
 from hugegraph_llm.utils.log import log
@@ -43,7 +44,7 @@ class MergeDedupRerank:
     def __init__(
         self,
         embedding: BaseEmbedding,
-        topk: int = 20,
+        topk: int = huge_settings.topk_return_results,
         graph_ratio: float = 0.5,
         method: Literal["bleu", "reranker"] = "bleu",
         near_neighbor_first: bool = False,
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py 
b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
index 399864a..7df36d7 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
@@ -32,7 +32,7 @@ from hugegraph_llm.operators.index_op.vector_index_query 
import VectorIndexQuery
 from hugegraph_llm.operators.llm_op.answer_synthesize import AnswerSynthesize
 from hugegraph_llm.operators.llm_op.keyword_extract import KeywordExtract
 from hugegraph_llm.utils.decorators import log_time, log_operator_time, 
record_qps
-from hugegraph_llm.config import prompt
+from hugegraph_llm.config import prompt, huge_settings
 
 
 class RAGPipeline:
@@ -98,7 +98,7 @@ class RAGPipeline:
     def keywords_to_vid(
         self,
         by: Literal["query", "keywords"] = "keywords",
-        topk_per_keyword: int = 1,
+        topk_per_keyword: int = huge_settings.topk_per_keyword,
         topk_per_query: int = 10,
     ):
         """
@@ -121,7 +121,7 @@ class RAGPipeline:
     def query_graphdb(
         self,
         max_deep: int = 2,
-        max_items: int = 30,
+        max_graph_items: int = huge_settings.max_graph_items,
         max_v_prop_len: int = 2048,
         max_e_prop_len: int = 256,
         prop_to_match: Optional[str] = None,
@@ -132,16 +132,18 @@ class RAGPipeline:
         Add a graph RAG query operator to the pipeline.
 
         :param max_deep: Maximum depth for the graph query.
-        :param max_items: Maximum number of items to retrieve.
+        :param max_graph_items: Maximum number of items to retrieve.
         :param max_v_prop_len: Maximum length of vertex properties.
         :param max_e_prop_len: Maximum length of edge properties.
         :param prop_to_match: Property to match in the graph.
+        :param num_gremlin_generate_example: Number of examples to generate.
+        :param gremlin_prompt: Gremlin prompt for generating examples.
         :return: Self-instance for chaining.
         """
         self._operators.append(
             GraphRAGQuery(
                 max_deep=max_deep,
-                max_items=max_items,
+                max_graph_items=max_graph_items,
                 max_v_prop_len=max_v_prop_len,
                 max_e_prop_len=max_e_prop_len,
                 prop_to_match=prop_to_match,
diff --git 
a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py 
b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
index e213c37..2ced618 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
@@ -78,7 +78,7 @@ class GraphRAGQuery:
     def __init__(
         self,
         max_deep: int = 2,
-        max_items: int = int(huge_settings.max_items),
+        max_graph_items: int = huge_settings.max_graph_items,
         prop_to_match: Optional[str] = None,
         llm: Optional[BaseLLM] = None,
         embedding: Optional[BaseEmbedding] = None,
@@ -96,7 +96,7 @@ class GraphRAGQuery:
             huge_settings.graph_space,
         )
         self._max_deep = max_deep
-        self._max_items = max_items
+        self._max_items = max_graph_items
         self._prop_to_match = prop_to_match
         self._schema = ""
         self._limit_property = huge_settings.limit_property.lower() == "true"
diff --git 
a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py 
b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py
index 47e80f0..8aa6411 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py
@@ -34,7 +34,7 @@ class SemanticIdQuery:
             embedding: BaseEmbedding,
             by: Literal["query", "keywords"] = "keywords",
             topk_per_query: int = 10,
-            topk_per_keyword: int = 1
+            topk_per_keyword: int = huge_settings.topk_per_keyword
     ):
         self.index_dir = str(os.path.join(resource_path, 
huge_settings.graph_name, "graph_vids"))
         self.vector_index = VectorIndex.from_index_file(self.index_dir)

(incubator-hugegraph-ai) branch main updated: refactor(llm): extract `topk_per_keyword` & `topk_return_results` to .env

Reply via email to