This is an automated email from the ASF dual-hosted git repository.

weibin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar.git


The following commit(s) were added to refs/heads/main by this push:
     new a150281c feat(format): Implement the protocol definition of GraphAr 
format (#521)
a150281c is described below

commit a150281cc11132a17cd43c04543274fa36468331
Author: Weibin Zeng <[email protected]>
AuthorDate: Fri Jun 14 16:11:43 2024 +0800

    feat(format): Implement the protocol definition of GraphAr format (#521)
    
    ## Reason for this PR
    After implementation and including buf to graphar format definition, it's 
time to merge the format develop branch in to main.
    
    ## What changes are included in this PR?
    - implementation of protocol of GraphAr format definition, there are some 
place that diff from the libraries:
    - the label naming to type, to distinguish with the multi-label support in 
the future.
      extend the metadata to the definition, to including the number of 
vertex/edge, the chunk number of vertex, the edge
       num of each vertex chunk etc. We recorded the metadata with many files 
before.
    - Use buf to generate code from protobuf.
    - Add related CI
    
    ## Are these changes tested?
    yes
    ## Are there any user-facing changes?
    No
    
    ---------
    
    Signed-off-by: acezen <[email protected]>
    Co-authored-by: Semyon <[email protected]>
---
 .github/workflows/format.yml | 59 ++++++++++++++++++++++++++++++++++++++++++++
 buf.gen.yaml                 | 35 ++++++++++++++++++++++++++
 buf.yaml                     | 20 +++++++++++++++
 format/README.md             | 20 +++++++++++++++
 format/adjacent_list.proto   | 43 ++++++++++++++++++++++++++++++++
 format/edge_info.proto       | 51 ++++++++++++++++++++++++++++++++++++++
 format/enums.proto           | 53 +++++++++++++++++++++++++++++++++++++++
 format/graph_info.proto      | 47 +++++++++++++++++++++++++++++++++++
 format/property_group.proto  | 40 ++++++++++++++++++++++++++++++
 format/vertex_info.proto     | 41 ++++++++++++++++++++++++++++++
 10 files changed, 409 insertions(+)

diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
new file mode 100644
index 00000000..0132b98e
--- /dev/null
+++ b/.github/workflows/format.yml
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: GraphAr Format 
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'format/**'
+      - '.github/workflows/format.yml'
+      - 'buf.gen.yaml'
+      - 'buf.yaml'
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'format/**'
+      - '.github/workflows/format.yml'
+      - 'buf.gen.yaml'
+      - 'buf.yaml'
+
+concurrency:
+  group: ${{ github.repository }}-${{ github.event.number || github.head_ref 
|| github.sha }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+jobs:
+  generate:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+          ref: ${{ github.event.pull_request.head.ref }}
+          submodules: false
+          fetch-depth: 0
+
+      - uses: bufbuild/buf-setup-action@v1
+        with:
+          version: "1.32.0"
+
+      - name: Buf Generate
+        run: buf generate
+ 
\ No newline at end of file
diff --git a/buf.gen.yaml b/buf.gen.yaml
new file mode 100644
index 00000000..6405baa9
--- /dev/null
+++ b/buf.gen.yaml
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+version: v2
+managed:
+  enabled: true
+  disable:
+    - file_option: java_package
+plugins:
+  # Python classes
+  - remote: buf.build/protocolbuffers/python:v27.1
+    out: pyspark/graphar_pyspark/proto/
+  # Python headers for IDEs and MyPy
+  - remote: buf.build/protocolbuffers/pyi
+    out: pyspark/graphar_pyspark/proto/
+  # Cpp
+  - remote: buf.build/protocolbuffers/cpp:v27.1
+    out: cpp/proto
+  # Java
+  - remote: buf.build/protocolbuffers/java:v27.1
+    out: maven-projects/info/src/main/java/
diff --git a/buf.yaml b/buf.yaml
new file mode 100644
index 00000000..6b16f8b4
--- /dev/null
+++ b/buf.yaml
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+version: v2
+modules:
+  - path: format
diff --git a/format/README.md b/format/README.md
new file mode 100644
index 00000000..5126835b
--- /dev/null
+++ b/format/README.md
@@ -0,0 +1,20 @@
+# GraphAr Format Specification
+
+This folder contains protocol definitions for the GraphAr format.
+
+## How to generate code
+
+### Prerequisites
+
+- [protoc](https://developers.google.com/protocol-buffers/docs/downloads)
+- [buf](https://buf.build/docs/installation) (version >= 1.32.0)
+
+### Build
+
+the build process is managed by `buf` and runs in the root of the repository.
+
+```bash
+buf generate
+```
+
+For documentation about the format, see the [GraphAr 
documentation](https://graphar.apache.org/docs/specification/format).
diff --git a/format/adjacent_list.proto b/format/adjacent_list.proto
new file mode 100644
index 00000000..da88ef2a
--- /dev/null
+++ b/format/adjacent_list.proto
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+syntax = "proto3";
+
+package graphar;
+option java_multiple_files = true;
+option java_package = "org.apache.graphar.info.proto";
+
+import "enums.proto";
+
+message AdjacentList {
+    AdjListType type = 1;
+    FileType file_type = 2;
+    string prefix = 3;
+
+    // Statistics message, including
+    // 1. the number of vertices base on the AdjListType
+    // 2. the number of vertex chunks base on the AdjListType
+    // 3. the number of edges of each vertex chunk
+    message Statistics {
+        int64 num_vertices = 1; 
+        int64 num_vertex_chunks = 2;
+        repeated int64 edge_nums_of_vertex_chunks = 3;
+    }
+    optional Statistics statistics = 4;
+};
diff --git a/format/edge_info.proto b/format/edge_info.proto
new file mode 100644
index 00000000..12474545
--- /dev/null
+++ b/format/edge_info.proto
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+syntax = "proto3";
+
+package graphar;
+option java_multiple_files = true;
+option java_package = "org.apache.graphar.info.proto";
+
+import "property_group.proto";
+import "adjacent_list.proto";
+
+message EdgeInfo {
+    string type = 1;
+    string source_vertex_type = 2;
+    string destination_vertex_type = 3;
+    int64 chunk_size = 4;
+    int64 source_vertex_chunk_size = 5;
+    int64 destination_vertex_chunk_size = 6;
+    repeated AdjacentList adjacent_list = 7;
+    repeated PropertyGroup properties = 8;
+    bool is_directed = 9;
+    string prefix = 10;
+
+    // Statistics message of the edge, including
+    // 1. num_edges: the number of edges
+    // 2. num_source_vertices: the number of source vertices
+    // 3. num_destination_vertices: the number of destination vertices
+    message Statistics {
+        int64 num_edges = 1;
+        int64 num_source_vertices = 2;
+        int64 num_destination_vertices = 3;
+    }
+    optional Statistics statistics = 11;
+};
diff --git a/format/enums.proto b/format/enums.proto
new file mode 100644
index 00000000..234b9e86
--- /dev/null
+++ b/format/enums.proto
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+syntax = "proto3";
+
+package graphar;
+option java_multiple_files = true;
+option java_package = "org.apache.graphar.info.proto";
+
+enum DataType {
+    BOOL = 0;
+    INT32 = 1;
+    INT64 = 2;
+    FLOAT = 3;
+    DOUBLE = 4;
+    STRING = 5;
+    LIST = 6;
+    DATE = 7;
+    TIMESTAMP = 8;
+    TIME = 9;
+};
+
+enum FileType {
+    CSV = 0;
+    PARQUET = 1;
+    ORC = 2;
+    JSON = 3;
+    AVRO = 4;
+    HDF5 = 5;
+};
+
+enum AdjListType {
+    UNORDERED_BY_SOURCE = 0;
+    UNORDERED_BY_TARGET = 1;
+    ORDERED_BY_SOURCE = 2;
+    ORDERED_BY_TARGET = 3;
+};
diff --git a/format/graph_info.proto b/format/graph_info.proto
new file mode 100644
index 00000000..7d11c956
--- /dev/null
+++ b/format/graph_info.proto
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+syntax = "proto3";
+
+package graphar;
+option java_multiple_files = true;
+option java_package = "org.apache.graphar.info.proto";
+
+import "vertex_info.proto";
+import "edge_info.proto";
+
+message GraphInfo {
+    string name = 1;
+    repeated VertexInfo vertices = 2;
+    repeated EdgeInfo edges = 3;
+    string prefix = 4;
+
+    // Statistics of the graph, including the number of vertices and edges
+    message Statistics {
+        int64 num_vertices = 1;
+        int64 num_edges = 2;
+    }
+    optional Statistics statistics = 5;
+
+    message KeyValue {
+        string key = 1;
+        string value = 2;
+    } 
+    repeated KeyValue key_value_metadata = 6;
+};
diff --git a/format/property_group.proto b/format/property_group.proto
new file mode 100644
index 00000000..23c28bcc
--- /dev/null
+++ b/format/property_group.proto
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+syntax = "proto3";
+
+package graphar;
+option java_multiple_files = true;
+option java_package = "org.apache.graphar.info.proto";
+
+import "enums.proto";
+
+message Property {
+    string name = 1;
+    DataType type = 2;
+    bool is_primary_key = 3;
+    bool is_nullable = 4;
+    string prefix = 5;
+};
+
+message PropertyGroup {
+    repeated Property properties = 1;
+    FileType file_type = 2;
+    string prefix = 3;
+};
diff --git a/format/vertex_info.proto b/format/vertex_info.proto
new file mode 100644
index 00000000..99ec482f
--- /dev/null
+++ b/format/vertex_info.proto
@@ -0,0 +1,41 @@
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+syntax = "proto3";
+
+package graphar;
+option java_multiple_files = true;
+option java_package = "org.apache.graphar.info.proto";
+
+import "property_group.proto";
+
+message VertexInfo {
+    string type = 1;
+    int64 chunk_size = 2;
+    repeated PropertyGroup properties = 3;
+    string prefix = 4;
+
+    // Statistics message, including the number of vertices and chunks
+    // of this type of vertex
+    message Statistics {
+        int64 num_vertices = 1;
+        int64 num_chunks = 2;
+    }
+    optional Statistics statistics = 5;
+};


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to