This is an automated email from the ASF dual-hosted git repository.
jin pushed a commit to branch graph-query
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git
The following commit(s) were added to refs/heads/graph-query by this push:
new cc2726c refactor(llm): enhance property query template
cc2726c is described below
commit cc2726c70e7eb1166f4fe21028566f5ccd2590fd
Author: imbajin <[email protected]>
AuthorDate: Sun Sep 29 00:40:51 2024 +0800
refactor(llm): enhance property query template
---
.../operators/hugegraph_op/fetch_graph_data.py | 9 +--------
.../operators/hugegraph_op/graph_rag_query.py | 14 ++++++++------
2 files changed, 9 insertions(+), 14 deletions(-)
diff --git
a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/fetch_graph_data.py
b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/fetch_graph_data.py
index ab61e36..9cb5729 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/fetch_graph_data.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/fetch_graph_data.py
@@ -29,12 +29,5 @@ class FetchGraphData:
if context is None:
context = {}
if "vertices" not in context:
- context["vertices"] = []
- vertices = self.graph.gremlin().exec("g.V().id()")["data"]
- for vertex in vertices:
- context["vertices"].append({
- "id": vertex["id"],
- "label": vertex["label"],
- "properties": vertex["properties"]
- })
+ context["vertices"] =
self.graph.gremlin().exec("g.V().id()")["data"]
return context
diff --git
a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
index 9feb48b..b2b6f08 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
@@ -26,8 +26,8 @@ from pyhugegraph.client import PyHugeClient
VERTEX_QUERY_TPL = "g.V({keywords}).as('subj').toList()"
# TODO: we could use a simpler query (like kneighbor-api to get the edges)
-# TODO: use dedup() to filter duplicate paths
-ID_QUERY_NEIGHBOR_TPL = """
+# TODO: test with profile()/explain() to speed up the query
+VID_QUERY_NEIGHBOR_TPL = """
g.V({keywords}).as('subj')
.repeat(
bothE({edge_labels}).limit({edge_limit}).as('rel').otherV().dedup().as('obj')
@@ -52,8 +52,9 @@ g.V({keywords}).as('subj')
PROPERTY_QUERY_NEIGHBOR_TPL = """
g.V().has('{prop}', within({keywords})).as('subj')
.repeat(
- bothE({edge_labels}).as('rel').otherV().as('obj')
-).times({max_deep})
+
bothE({edge_labels}).limit({edge_limit}).as('rel').otherV().dedup().as('obj')
+).times({max_deep}).emit()
+.simplePath()
.path()
.by(project('label', 'props')
.by(label())
@@ -126,7 +127,7 @@ class GraphRAGQuery:
log.debug("Vids query: %s", gremlin_query)
vertex_knowledge =
self._format_graph_from_vertex(query_result=result)
- gremlin_query = ID_QUERY_NEIGHBOR_TPL.format(
+ gremlin_query = VID_QUERY_NEIGHBOR_TPL.format(
keywords=match_vids,
max_deep=self._max_deep,
edge_labels=edge_labels_str,
@@ -151,9 +152,10 @@ class GraphRAGQuery:
gremlin_query = PROPERTY_QUERY_NEIGHBOR_TPL.format(
prop=self._prop_to_match,
keywords=keywords_str,
+ edge_labels=edge_labels_str,
+ edge_limit=edge_limit_amount,
max_deep=self._max_deep,
max_items=self._max_items,
- edge_labels=edge_labels_str,
)
log.warning("Unable to find vid, downgraded to property query,
please confirm if it meets expectation.")