This is an automated email from the ASF dual-hosted git repository.

jin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git


The following commit(s) were added to refs/heads/main by this push:
     new a0cc3f9  refactor(llm): return schema.groovy first when backup graph 
data (#161)
a0cc3f9 is described below

commit a0cc3f93a6a739c7433a982d4864e172296259d0
Author: SoJGooo <102796027+mrjs...@users.noreply.github.com>
AuthorDate: Fri Feb 7 15:44:41 2025 +0800

    refactor(llm): return schema.groovy first when backup graph data (#161)
    
    Note: for non-groovy mode, return JSON format
    
    ---------
    
    Co-authored-by: imbajin <j...@apache.org>
---
 .../src/hugegraph_llm/utils/hugegraph_utils.py     | 30 +++++++++++++++++++---
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py 
b/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py
index dc94900..4b90943 100644
--- a/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py
+++ b/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py
@@ -111,13 +111,14 @@ def backup_data():
             "vertices.json": f"g.V().limit({MAX_VERTICES})"
                              
f".aggregate('vertices').count().as('count').select('count','vertices')",
             "edges.json": 
f"g.E().limit({MAX_EDGES}).aggregate('edges').count().as('count').select('count','edges')",
-            "schema.json": client.schema().getSchema()
+            "schema.json": client.schema().getSchema(_format="groovy")
         }
 
+        vertexlabels = client.schema().getSchema()["vertexlabels"]
+        all_pk_flag = all(data.get('id_strategy') == 'PRIMARY_KEY' for data in 
vertexlabels)
+
         for filename, query in files.items():
-            with open(os.path.join(backup_subdir, filename), "w", 
encoding="utf-8") as f:
-                data = client.gremlin().exec(query)["data"] if "schema" not in 
filename else query
-                json.dump(data, f, ensure_ascii=False)
+            write_backup_file(client, backup_subdir, filename, query, 
all_pk_flag)
 
         log.info("Backup successfully in %s.", backup_subdir)
         relative_backup_subdir = os.path.relpath(backup_subdir, 
start=resource_path)
@@ -128,6 +129,27 @@ def backup_data():
         raise Exception("Failed to execute backup") from e
 
 
+def write_backup_file(client, backup_subdir, filename, query, all_pk_flag):
+    with open(os.path.join(backup_subdir, filename), "w", encoding="utf-8") as 
f:
+        if filename == "edges.json":
+            data = client.gremlin().exec(query)["data"][0]["edges"]
+            json.dump(data, f, ensure_ascii=False)
+        elif filename == "vertices.json":
+            data_full = client.gremlin().exec(query)["data"][0]["vertices"]
+            data = [{key: value for key, value in vertex.items() if key != 
"id"}
+                    for vertex in data_full] if all_pk_flag else data_full
+            json.dump(data, f, ensure_ascii=False)
+        elif filename == "schema.json":
+            data_full = query
+            if isinstance(data_full, dict) and "schema" in data_full:
+                groovy_filename = filename.replace(".json", ".groovy")
+                with open(os.path.join(backup_subdir, groovy_filename), "w", 
encoding="utf-8") as groovy_file:
+                    groovy_file.write(str(data_full["schema"]))
+            else:
+                data = data_full
+                json.dump(data, f, ensure_ascii=False)
+
+
 def manage_backup_retention():
     try:
         backup_dirs = [

Reply via email to