This is an automated email from the ASF dual-hosted git repository. jin pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git
The following commit(s) were added to refs/heads/main by this push: new a0cc3f9 refactor(llm): return schema.groovy first when backup graph data (#161) a0cc3f9 is described below commit a0cc3f93a6a739c7433a982d4864e172296259d0 Author: SoJGooo <102796027+mrjs...@users.noreply.github.com> AuthorDate: Fri Feb 7 15:44:41 2025 +0800 refactor(llm): return schema.groovy first when backup graph data (#161) Note: for non-groovy mode, return JSON format --------- Co-authored-by: imbajin <j...@apache.org> --- .../src/hugegraph_llm/utils/hugegraph_utils.py | 30 +++++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py b/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py index dc94900..4b90943 100644 --- a/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py +++ b/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py @@ -111,13 +111,14 @@ def backup_data(): "vertices.json": f"g.V().limit({MAX_VERTICES})" f".aggregate('vertices').count().as('count').select('count','vertices')", "edges.json": f"g.E().limit({MAX_EDGES}).aggregate('edges').count().as('count').select('count','edges')", - "schema.json": client.schema().getSchema() + "schema.json": client.schema().getSchema(_format="groovy") } + vertexlabels = client.schema().getSchema()["vertexlabels"] + all_pk_flag = all(data.get('id_strategy') == 'PRIMARY_KEY' for data in vertexlabels) + for filename, query in files.items(): - with open(os.path.join(backup_subdir, filename), "w", encoding="utf-8") as f: - data = client.gremlin().exec(query)["data"] if "schema" not in filename else query - json.dump(data, f, ensure_ascii=False) + write_backup_file(client, backup_subdir, filename, query, all_pk_flag) log.info("Backup successfully in %s.", backup_subdir) relative_backup_subdir = os.path.relpath(backup_subdir, start=resource_path) @@ -128,6 +129,27 @@ def backup_data(): raise Exception("Failed to execute backup") from e +def write_backup_file(client, backup_subdir, filename, query, all_pk_flag): + with open(os.path.join(backup_subdir, filename), "w", encoding="utf-8") as f: + if filename == "edges.json": + data = client.gremlin().exec(query)["data"][0]["edges"] + json.dump(data, f, ensure_ascii=False) + elif filename == "vertices.json": + data_full = client.gremlin().exec(query)["data"][0]["vertices"] + data = [{key: value for key, value in vertex.items() if key != "id"} + for vertex in data_full] if all_pk_flag else data_full + json.dump(data, f, ensure_ascii=False) + elif filename == "schema.json": + data_full = query + if isinstance(data_full, dict) and "schema" in data_full: + groovy_filename = filename.replace(".json", ".groovy") + with open(os.path.join(backup_subdir, groovy_filename), "w", encoding="utf-8") as groovy_file: + groovy_file.write(str(data_full["schema"])) + else: + data = data_full + json.dump(data, f, ensure_ascii=False) + + def manage_backup_retention(): try: backup_dirs = [