This is an automated email from the ASF dual-hosted git repository.
weibin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-graphar-website.git
The following commit(s) were added to refs/heads/main by this push:
new cb52cd0 Remove the docs content and use content in incubator-graphar
repo instead (#10)
cb52cd0 is described below
commit cb52cd091e226d6cd0a7159736ca84147eda7f46
Author: Weibin Zeng <[email protected]>
AuthorDate: Thu Apr 11 22:53:12 2024 +0800
Remove the docs content and use content in incubator-graphar repo instead
(#10)
We put the dev docs in
[incubartor-graphar/docs](https://github.com/apache/incubator-graphar/blob/main/docs),
remove the docs content of this repo and refine the building process to sync
the docs from incubator-graphar.
---------
Signed-off-by: acezen <[email protected]>
---
.github/workflows/deploy.yml | 28 +++-
.gitignore | 6 +
README.md | 29 +++-
community/committers/_category_.yml | 2 +-
docs/developers/contributing-guide.md | 0
docs/developers/index.md | 5 -
docs/index.md | 19 ---
docs/libraries/cpp.md | 5 -
docs/libraries/index.md | 7 -
docs/overview/concepts.md | 40 -----
docs/overview/motivation.md | 14 --
docs/overview/overview.md | 16 --
docs/specification/format.md | 191 ---------------------
docs/specification/implementation-status.md | 246 ----------------------------
docs/specification/index.md | 8 -
docusaurus.config.ts | 37 ++++-
static/docs/images | 1 +
17 files changed, 89 insertions(+), 565 deletions(-)
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index d4756b3..5a94f86 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -27,15 +27,37 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
+
- uses: actions/setup-node@v4
with:
node-version: 18.x
+
- uses: pnpm/action-setup@v2
with:
version: 8
- - run: pnpm install
- - run: pnpm run build
- - run: cp .asf.yaml build/.asf.yaml
+
+ - name: Check out the incubator-graphar repository
+ uses: actions/checkout@v4
+ with:
+ repository: apache/incubator-graphar
+ path: incubator-graphar
+ ref: main
+ depth: 1
+
+ - name: Syncing the docs folder and remove incubator-graphar
+ run: |
+ # Note: DO NOT omit the trailing slash in the source directory
+ rsync -av incubator-graphar/docs/ docs/
+ rm -rf incubator-graphar
+
+ - name: Install dependencies
+ run: pnpm install
+
+ - name: Build
+ run: |
+ pnpm run build
+ cp .asf.yaml build/.asf.yaml
+
- uses: peaceiris/actions-gh-pages@v3
if: github.event_name != 'pull_request'
with:
diff --git a/.gitignore b/.gitignore
index b2d6de3..bded95b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,10 @@
# Production
/build
+# Everythin in docs folder except sidebars.ts
+/docs/*
+!/docs/sidebars.ts
+
# Generated files
.docusaurus
.cache-loader
@@ -18,3 +22,5 @@
npm-debug.log*
yarn-debug.log*
yarn-error.log*
+
+package-lock.json
diff --git a/README.md b/README.md
index 0b7c5ff..237dee5 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,39 @@
# Apache GraphAr (incubating) Website
-This project contains a static website for Apache GraphAr (incubating).
+This website is built using [Docusaurus 2](https://docusaurus.io/), a modern
static website generator.
+## Installation
-### Local Development
+```
+$ pnpm install
+```
+
+## Syncing the `docs` directory from the main repository
+
+```
+$ git clone https://github.com/apache/incubator-graphar.git --depth 1
+$ rsync -av --progress incubator-graphar/docs/ docs/
+or
+$ cp -r incubator-graphar/docs/ docs/
+```
+
+Note: **DO NOT omit the trailing slash in the source directory.**
+
+## Local Development
```
-$ npm start
+$ pnpm start
```
This command starts a local development server and opens up a browser window.
Most changes are reflected live without having to restart the server.
+## Build
+
+```
+$ pnpm build
+```
+
+This command generates static content into the `build` directory and can be
served using any static contents hosting service.
## LICENSE
diff --git a/community/committers/_category_.yml
b/community/committers/_category_.yml
index 7bc2f69..567912d 100644
--- a/community/committers/_category_.yml
+++ b/community/committers/_category_.yml
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-position: 1
+position: 2
label: 'Committers'
collapsible: true
collapsed: false
diff --git a/docs/developers/contributing-guide.md
b/docs/developers/contributing-guide.md
deleted file mode 100644
index e69de29..0000000
diff --git a/docs/developers/index.md b/docs/developers/index.md
deleted file mode 100644
index ec6df71..0000000
--- a/docs/developers/index.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-id: developers
-title: Developer Guide
-sidebar_position: 4
----
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
deleted file mode 100644
index 704d1e0..0000000
--- a/docs/index.md
+++ /dev/null
@@ -1,19 +0,0 @@
----
-id: documentation
-title: Documentation
-sidebar_position: 0
----
-
-Welcome to the documentation for Apache GraphAr. Here, you can find
information about the GraphAr File Format, including specifications and library.
-
-### [Overview](/docs/overview)
-Overview of the GraphAr project.
-
-### [Specification](/docs/specification)
-Documentation about the GraphAr file format.
-
-### [Libraries](/docs/libraries)
-Documentation about the libraries of GraphAr.
-
-### [Developer Guide](/docs/developers)
-All developer resources for GraphAr.
\ No newline at end of file
diff --git a/docs/libraries/cpp.md b/docs/libraries/cpp.md
deleted file mode 100644
index b41312d..0000000
--- a/docs/libraries/cpp.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-id: cpp
-title: C++ Library
-sidebar_position: 1
----
\ No newline at end of file
diff --git a/docs/libraries/index.md b/docs/libraries/index.md
deleted file mode 100644
index 5b0bfad..0000000
--- a/docs/libraries/index.md
+++ /dev/null
@@ -1,7 +0,0 @@
----
-id: libraries
-title: Libraries
-sidebar_position: 3
----
-
-## [C++ Library](/docs/libraries/cpp)
\ No newline at end of file
diff --git a/docs/overview/concepts.md b/docs/overview/concepts.md
deleted file mode 100644
index 2039e8d..0000000
--- a/docs/overview/concepts.md
+++ /dev/null
@@ -1,40 +0,0 @@
----
-id: concepts
-title: Concepts
-sidebar_position: 3
----
-
-Glossary of relevant concepts and terms.
-
-- **Property Group**: GraphAr splits the properties of vertex/edge into groups
to allow for efficient storage
- and access without the need to load all properties. Also benefits appending
of new properties. Each property
- group is the unit of storage and is stored in a separate directory.
-
-- **Adjacency List**: The storage method to store the edges of certain vertex
type. Which include:
- - *ordered by source vertex id*: the edges are ordered and aligned by the
source vertex
- - *ordered by destination vertex id*: the edges are ordered and aligned by
the destination vertex
- - *unordered by source vertex id*: the edges are unordered but aligned by
the source vertex
- - *unordered by destination vertex id*: the edges are unordered but
aligned by the destination vertex
-
-- **Compressed Sparse Row (CSR)**: The storage layout the edges of certain
vertex type. Corresponding to the
- ordered by source vertex id adjacency list, the edges are stored in a single
array and the offsets of the
- edges of each vertex are stored in a separate array.
-
-- **Compressed Sparse Column (CSC)**: The storage layout the edges of certain
vertex type. Corresponding to the
- ordered by destination vertex id adjacency list, the edges are stored in a
single array and the offsets of the
- edges of each vertex are stored in a separate array.
-
-- **Coordinate List (COO)**: The storage layout the edges of certain vertex
type. Corresponding to the unordered
- by source vertex id or unordered by target vertex id adjacency list, the
edges are stored in a single array and
- no offsets are stored.
-
-- **Vertex Chunk**: The storage unit of vertex. Each vertex chunk contains a
fixed number of vertices and is stored
- in a separate file.
-
-- **Edge Chunk**: The storage unit of edge. Each edge chunk contains a fixed
number of edges and is stored in a separate file.
-
-**Highlights**:
- The design of property group and vertex/edge chunk allows users to
- - Access the data without reading all the data into memory
- - Conveniently append new properties to the graph without the need to
reorganize the data
- - Efficiently store and access the data in a distributed environment and
parallel processing
diff --git a/docs/overview/motivation.md b/docs/overview/motivation.md
deleted file mode 100644
index a262c87..0000000
--- a/docs/overview/motivation.md
+++ /dev/null
@@ -1,14 +0,0 @@
----
-id: motivation
-title: Motivation
-sidebar_position: 2
----
-
-Numerous graph systems,
-such as Neo4j, Nebula Graph, and Apache HugeGraph, have been developed in
recent years.
-Each of these systems has its own graph data storage format, complicating the
exchange of graph data between different systems.
-The need for a standard data file format for large-scale graph data storage
and processing that can be used by diverse existing systems is evident, as it
would reduce overhead when various systems work together.
-
-Our aim is to fill this gap and contribute to the open-source community by
providing a standard data file format for graph data storage and exchange, as
well as for out-of-core querying.
-This format, which we have named GraphAr, is engineered to be efficient,
cross-language compatible, and to support out-of-core processing scenarios,
such as those commonly found in data lakes.
-Furthermore, GraphAr's flexible design ensures that it can be easily extended
to accommodate a broader array of graph data storage and exchange use cases in
the future.
\ No newline at end of file
diff --git a/docs/overview/overview.md b/docs/overview/overview.md
deleted file mode 100644
index ea97a1b..0000000
--- a/docs/overview/overview.md
+++ /dev/null
@@ -1,16 +0,0 @@
----
-id: overview
-title: Overview
-sidebar_position: 1
----
-
-<img src="/img/docs/overview.png" alt="overview" width="700"/>
-
-GraphAr is a project to standardize the graph data format and provide a set of
libraries to generate, access and transform such formatted files.
-
-It is intended to serve as the standard file format for importing/exporting
and persistent storage of the graph data which can be used by diverse existing
systems, reducing the overhead when various systems co-work.
-
-Additionally, it can also serve as the direct data source for graph processing
applications.
-
-### [Motivation](/docs/overview/motivation)
-### [Concepts](/docs/overview/concepts)
\ No newline at end of file
diff --git a/docs/specification/format.md b/docs/specification/format.md
deleted file mode 100644
index e0f9438..0000000
--- a/docs/specification/format.md
+++ /dev/null
@@ -1,191 +0,0 @@
----
-id: format
-title: Format Specification
-sidebar_position: 1
----
-
-## Property Graph
-
-GraphAr is designed for representing and storing the property graphs. Graph
(in discrete mathematics) is a structure made of vertices and edges.
-Property graph is then a type of graph model where the vertices/edges could
carry a name (also called as type or label) and some properties.
-Since carrying additional information than non-property graphs, the property
graph is able to represent
-connections among data scattered across diverse data databases and with
different schemas.
-Compared with the relational database schema, the property graph excels at
showing data dependencies.
-Therefore, it is widely-used in modeling modern applications including social
network analytics, data mining,
-network routing, scientific computing and so on.
-
-A property graph consists of vertices and edges, with each vertex contains a
unique identifier and:
-
-- A text label that describes the vertex type.
-- A collection of properties, with each property can be represented by a
key-value pair.
-
-Each edge contains a unique identifier and:
-
-- The outgoing vertex (source).
-- The incoming vertex (destination).
-- A text label that describes the relationship between the two vertices.
-- A collection of properties.
-
-The following is an example property graph containing two types of vertices
("person" and "comment") and three types of edges.
-
-<img src="/img/docs/property_graph.png" alt="property graph" width="700"
align="center"/>
-
-## Property Data Types
-
-GraphAr support a set of built-in property data types that are common in real
use cases and supported by most file types (CSV, ORC, Parquet), includes:
-
-- **Boolean**
-- **Int32**: Integer with 32 bits
-- **Int64**: Integer with 64 bits
-- **Float**: 32-bit floating point values
-- **Double**: 64-bit floating point values
-- **String**: Textual data
-- **Date**: days since the Unix epoch
-- **Timestamp**: milliseconds since the Unix epoch
-- **Time**: milliseconds since midnight
-- **List**: A list of values of the same type
-
-GraphAr also supports the user-defined data types, which can be used to
represent complex data structures,
-such as the struct, map, and union types.
-
-## Configurations
-
-### Vertex Chunk Size
-
-The vertex chunk size is a configuration parameter that determines the number
of vertices in a vertex chunk
-and used to partition the logical vertex table into multiple physical vertex
tables.
-
-The vertex chunk size should be set to a value that is large enough to reduce
the overhead of reading/writing files,
-but small enough to avoid reading/writing too many vertices at once. We
recommend setting the vertex chunk size to
-empirical value 2^18 (262,144) for most cases.
-
-### Edge Chunk Size
-
-The edge chunk size is a configuration parameter that determines the number of
edges in an edge chunk
-and used to partition the logical edge table into multiple physical edge
tables.
-
-The edge chunk size should be set to a value that is large enough to reduce
the overhead of reading/writing files,
-but small enough to avoid reading/writing too many edges at once. We recommend
setting the edge chunk size to
-empirical value 2^22 (4,194,304) for most cases.
-
-### Data File Format
-
-GraphAr supports multiple file formats for storing the actual data of vertices
and edges,
-including Apache ORC, Apache Parquet, CSV, and JSON.
-
-The file format should be chosen based on the specific use case and the data
processing framework that will be used to
-process the graph data. For example, if the graph data will be processed using
Apache Spark,
-then the Apache Parquet file format is recommended.
-
-## Adjacency List Type
-
-Adjacency list is a data structure used to represent the edges of a graph.
GraphAr supports multiple types of adjacency lists for a given group of edges,
including:
-
-- **ordered_by_source**: all the edges in the logical table are ordered and
further partitioned by the internal vertex id of the source, which can be seen
as the CSR format.
-- **ordered_by_dest**: all the edges in the logical table are ordered and
further partitioned by the internal vertex id of the destination, which can be
seen as the CSC format.
-- **unordered_by_source**: the internal id of the source vertex is used as the
partition key to divide the edges into different sub-logical-tables, and the
edges in each sub-logical-table are unordered, which can be seen as the COO
format.
-- **unordered_by_dest**: the internal id of the destination vertex is used as
the partition key to divide the edges into different sub-logical-tables, and
the edges in each sub-logical-table are unordered, which can also be seen as
the COO format.
-
-
-## Vertex Chunks in GraphAr
-
-### Logical table of vertices
-
-Each type of vertices (with the same label) constructs a logical vertex table,
with each vertex assigned with a global index inside this type (called internal
vertex id) starting from 0, corresponding to the row number of the vertex in
the logical vertex table. An example layout for a logical table of vertices
under the label "person" is provided for reference.
-
-Given an internal vertex id and the vertex label, a vertex is uniquely
identifiable and its respective properties can be accessed from this table. The
internal vertex id is further used to identify the source and destination
vertices when maintaining the topology of the graph.
-
-<img src="/img/docs/vertex_logical_table.png" alt="vertex logical table"
width="700" align="center"/>
-
-> **Note:** In the logical vertex table, some property can be marked as the
primary key, such as the "id" column of the "person" table.
-
-
-### Physical table of vertices
-
-The logical vertex table will be partitioned into multiple continuous vertex
chunks for enhancing the reading/writing efficiency. To maintain the ability of
random access, the size of vertex chunks for the same label is fixed. To
support to access required properties avoiding reading all properties from the
files, and to add properties for vertices without modifying the existing files,
the columns of the logical table will be divided into several column groups.
-
-Take the "person" vertex table as an example, if the chunk size is set to be
500, the logical table will be separated into sub-logical-tables of 500 rows
with the exception of the last one, which may have less than 500 rows. The
columns for maintaining properties will also be divided into distinct groups
(e.g., 2 for our example). As a result, a total of 4 physical vertex tables are
created for storing the example logical table, which can be seen from the
following figure.
-
-<img src="/img/docs/vertex_physical_table.png" alt="vertex physical table"
width="700" align="center"/>
-
-
-> **Note:** For efficiently utilize the filter push-down of the payload file
format like Parquet, the internal vertex id is stored in the payload file as a
column. And since the internal vertex id is continuous, the payload file format
can use the delta encoding for the internal vertex id column, which would not
bring too much overhead for the storage.
-
-## Edge Chunks in GraphAr
-
-### Logical table of edges
-
-For maintaining a type of edges (that with the same triplet of the source
label, edge label, and destination label), a logical edge table is established.
And in order to support quickly creating a graph from the graph storage file,
the logical edge table could maintain the topology information in a way similar
to CSR/CSC (learn more about
[CSR/CSC](https://en.wikipedia.org/wiki/Sparse_matrix)), that is, the edges are
ordered by the internal vertex id of either source or destination. In [...]
-
-Take the logical table for "person likes person" edges as an example, the
logical edge table looks like:
-
-<img src="/img/docs/edge_logical_table.png" alt="edge logical table"
width="700" align="center"/>
-
-### Physical table of edges
-
-As same with the vertex table, the logical edge table is also partitioned into
some sub-logical-tables, with each sub-logical-table contains edges that the
source (or destination) vertices are in the same vertex chunk. According to the
partition strategy and the order of the edges, edges can be stored in GraphAr
following the setting adjacency list type.
-
-After that, the whole logical table of edges will be divided into multiple
sub-logical-tables with each sub-logical-table contains edges that the source
(or destination) vertices are in the same vertex chunk. Then, a
sub-logical-table is further divided into edge chunks of a predefined, fixed
number of rows (referred to as edge chunk size). Finally, an edge chunk is
separated into physical tables in the following way:
-
-- an adjList table (which contains only two columns: the internal vertex id of
the source and the destination).
-- 0 or more property group tables (each contains the properties of the edges).
-
-Additionally, there would be an offset table for **ordered_by_source** or
**ordered_by_dest** edges. The offset table is used to record the starting
point of the edges for each vertex. The partition of the offset table should be
in alignment with the partition of the corresponding vertex table. The first
row of each offset chunk is always 0, indicating the starting point for the
corresponding sub-logical-table for edges.
-
-Take the "person knows person" edges to illustrate. Suppose the vertex chunk
size is set to 500 and the edge chunk size is 1024, and the edges are
**ordered_by_source**, then the edges could be saved in the following physical
tables:
-
-<img src="/img/docs/edge_physical_table1.png" alt="edge physical table1"
width="700" align="center"/>
-
-<img src="/img/docs/edge_physical_table2.png" alt="edge physical table2"
width="700" align="center"/>
-
-> **Tip:** When the edge type is **ordered_by_source**, the sorted adjList
table together with the offset table can be used as CSR, supporting the fast
access of the outgoing edges for a given vertex. Similarly, a CSC view can be
constructed by sorting the edges by destination and recording corresponding
offsets, supporting the fast access of the incoming edges for a given vertex.
-
-## Information files
-
-GraphAr uses two kinds of files to store a graph: a group of Yaml files to
describe metadata information; and data files to store actual data for vertices
and edges.
-A graph information file which named "\<name\>.graph.yml" describes the meta
information for a graph whose name is \<name\>. The content of this file
includes:
-
-- the graph name;
-- the root directory path of the data files;
-- the vertex information and edge information files included;
-- the version of GraphAr.
-- extra information for the graph, could be used for user defined information.
-
-A vertex information file which named "\<label\>.vertex.yml" defines a single
group of vertices with the same vertex label \<label\>, and all vertices in
this group have the same schema. The file defines:
-
-- the vertex label;
-- the vertex chunk size;
-- the relative path for vertex data files;
-- the property groups attached: each property group has its own file type and
the prefix for the path of its data files, it also lists all properties in this
group, with every property containing its own name, data type, flagging of
whether it is the primary key or not and flagging of whether it is nullable or
not for non-primary key properties;
-- the version of GraphAr.
-
-An edge information file which named "\<source label\>_\<edge
label\>_\<destination label\>.edge.yml" defines a single group of edges with
specific label for source vertex, destination vertex and the edge. It describes
the meta information for these edges, includes:
-
-- the source/edge/destination labels;
-- the edge chunk size, the source vertex chunk size and the destination vertex
chunk size;
-- if the edges are directed or not;
-- the relative path for edge data files;
-- which kinds of adjList it includes: for each kind of adjList, the adjList
type, the prefix of file path, the file type;
-- the property groups attached to the edge for all adjLists;
-- the version of GraphAr.
-
-> **Note:** Please note that GraphAr supports the storage of multiple types of
adjLists for a given group of edges, e.g., a group of edges could be accessed
in both CSR and CSC way when two copies (one is **ordered_by_source** and the
other is **ordered_by_dest**) of the relevant data are present in GraphAr.
-
-See also [Gar Information Files](/docs/libraries/cpp) for an example.
-
-## Data files
-
-As previously mentioned, each logical vertex/edge table is divided into
multiple physical tables stored in one of the following file formats:
-
-- [Apache ORC](https://orc.apache.org/)
-- [Apache Parquet](https://parquet.apache.org/)
-- CSV
-- JSON
-
-Both of Apache ORC and Apache Parquet are column-oriented data storage
formats. In practice of graph processing, it is common to only query a subset
of columns of the properties. Thus, the column-oriented formats are more
efficient, which eliminate the need to read columns that are not relevant. They
are also used by a large number of data processing frameworks like [Apache
Spark](https://spark.apache.org/), [Apache Hive](https://hive.apache.org/),
[Apache Flink](https://flink.apache.org [...]
-
-See also [Gar Data Files](/docs/libraries/cpp) for an example.
-
-## Implementation
-
-The GraphAr libraries may implement part of the GraphAr format. The
implementation status of the GraphAr libraries can refer to the [GraphAr
implementation status](/docs/specification/implementation-status).
diff --git a/docs/specification/implementation-status.md
b/docs/specification/implementation-status.md
deleted file mode 100644
index 4b344a4..0000000
--- a/docs/specification/implementation-status.md
+++ /dev/null
@@ -1,246 +0,0 @@
----
-id: implementation-status
-title: Implementation Status
-sidebar_position: 2
----
-
-The following tables summarize the features available in the various official
GraphAr libraries.
-All libraries currently follow version 1.0.0 of the GraphAr format.
-
-## Data Types
-
-| Data type (primitive) | C++ | Java | Scala | Python |
-| --------------------- | --- | ---- | ----- | ------ |
-| Boolean | ✓ | ✓ | ✓ | ✓ |
-| Int32 | ✓ | ✓ | ✓ | ✓ |
-| Int64 | ✓ | ✓ | ✓ | ✓ |
-| Float | ✓ | ✓ | ✓ | ✓ |
-| Double | ✓ | ✓ | ✓ | ✓ |
-| String | ✓ | ✓ | ✓ | ✓ |
-| Date | ✓ | | | |
-| Timestamp | ✓ | | | |
-| Time | | | | |
-
-| Data type (nested) | C++ | Java | Scala | Python |
-| --------------------- | --- | ---- | ----- | ------ |
-| List (*) | ✓ | | | |
-
-
-Notes:
-
-- \(\*) The data type of List is not supported by the CSV payload file format.
-
-
-## Payload Data File Formats
-
-| Format | C++ | Java | Scala | Python |
-|-----------------------------|---------|---------|-------|------------|
-| CSV | R/W | R (1) | R/W | R/W (2) |
-| ORC | R/W | R (1) | R/W | R/W (2) |
-| Parquet | R/W | R (1) | R/W | R/W (2) |
-| Avro | | | | |
-| HDF5 | | | | |
-| JSON | | | | |
-
-> Notes:
-> - *R* - Read supported
-> - *W* - Write supported
-
-Supported compression methods for the file formats:
-
-| Compression | C++ | Java | Scala | Python |
-|-----------------------------|---------|---------|-------|------------|
-| ZSTD (*) | ✓ | ✓ | ✓ | ✓ |
-
-Notes:
-
-- \(\*) Compression is not supported by the CSV payload file format.
-
-
-## Property
-
-| Property feature | C++ | Java | Scala | Python |
-|-------------------|-------|-------|-------|------------|
-| primary key | ✓ | ✓ | ✓ | ✓ |
-| nullable | ✓ | | ✓ | ✓ |
-
-
-Supported operations in Property:
-
-| Property operation| C++ | Java | Scala | Python |
-|-------------------|-------|-------|-------|------------|
-| create | ✓ | ✓ (1) | ✓ | ✓ (2) |
-| get_name | ✓ | ✓ (1) | ✓ | ✓ (2) |
-| is_primary_key | ✓ | ✓ (1) | ✓ | ✓ (2) |
-| is_nullable | ✓ | | ✓ | ✓ (2) |
-
-
-## Property Group
-
-| Property Group | C++ |Java (1)| Scala | Python (2)|
-| (operation) | | | | |
-|-------------------|-------|--------|-------|------------|
-| create | ✓ | ✓ | ✓ | ✓ |
-| add property | ✓ | ✓ | ✓ | ✓ |
-| remove property | | | | |
-| get properties | ✓ | ✓ | ✓ | ✓ |
-| check property | ✓ | ✓ | | |
-| get file type | ✓ | ✓ | ✓ | ✓ |
-| get path prefix | ✓ | ✓ | ✓ | ✓ |
-| check validation | ✓ | | | |
-
-
-## Adjacency List
-
-| Adjacency List | C++ | Java | Scala | Python |
-| (type) | | | | |
-|-------------------|-------|-------|-------|------------|
-| CSR | ✓ | ✓ | ✓ | ✓ |
-| CSC | ✓ | ✓ | ✓ | ✓ |
-| COO | ✓ | ✓ | ✓ | ✓ |
-
-Supported operations in Adjacency List:
-
-| Adjacency List | C++ |Java (1)| Scala | Python (2)|
-| (operation) | | | | |
-|-------------------|-------|--------|-------|------------|
-| create | ✓ | | ✓ | ✓ |
-| get adjacency type| ✓ | | ✓ | ✓ |
-| get file type | ✓ | | ✓ | ✓ |
-| get path prefix | ✓ | | ✓ | ✓ |
-| check validation | ✓ | | | |
-
-
-## Vertex
-
-Vertex features:
-
-| Vertex feature | C++ | Java | Scala | Python |
-|-------------------|-------|-------|-------|------------|
-| label | ✓ | ✓ | ✓ | ✓ |
-| tag | | | | |
-| chunk based | ✓ | ✓ | ✓ | ✓ |
-| property group | ✓ | ✓ | ✓ | ✓ |
-
-Notes:
-
-* *label* is the vertex label, which is a unique identifier for the vertex.
-* *tag* is the vertex tag, which is tag or category for the vertex.
-
-Supported operations in Vertex Info:
-
-| Vertex Info | C++ |Java (1)| Scala | Python (2) |
-| (operation) | | | | |
-|-------------------|-------|--------|-------|------------|
-| create | ✓ | ✓ | ✓ | ✓ |
-| add group | ✓ | ✓ | ✓ | ✓ |
-| remove group | | | | |
-| get label | ✓ | ✓ | ✓ | ✓ |
-| get chunk size | ✓ | ✓ | ✓ | ✓ |
-| get groups | ✓ | ✓ | ✓ | ✓ |
-| get path prefix | ✓ | ✓ | ✓ | ✓ |
-| check property | ✓ | ✓ | ✓ | ✓ |
-| check validation | ✓ | | ✓ | ✓ |
-| serialize | ✓ | ✓ | ✓ | ✓ |
-| deserialize | ✓ | ✓ | ✓ | ✓ |
-
-
-## Edge
-
-Edge features:
-
-| Edge feature | C++ | Java | Scala | Python |
-|-------------------|-------|-------|-------|------------|
-| label | ✓ | ✓ | ✓ | ✓ |
-| chunk based | ✓ | ✓ | ✓ | ✓ |
-| property group | ✓ | ✓ | ✓ | ✓ |
-| adjacent list | ✓ | ✓ | ✓ | ✓ |
-| directed | ✓ | ✓ | ✓ | ✓ |
-
-Supported operations in Edge Info:
-
-| Edge Info | C++ |Java (1)| Scala | Python (2) |
-| (operation) | | | | |
-|-------------------|-------|--------|-------|------------|
-| create | ✓ | ✓ | ✓ | ✓ |
-| add group | ✓ | ✓ | ✓ | ✓ |
-| remove group | | | | |
-| add adj list | ✓ | ✓ | ✓ | ✓ |
-| remove adj list | | | | |
-| get label | ✓ | ✓ | ✓ | ✓ |
-| get source label | ✓ | ✓ | ✓ | ✓ |
-| get dest label | ✓ | ✓ | ✓ | ✓ |
-| get chunk size | ✓ | ✓ | ✓ | ✓ |
-| get source chunk size | ✓ | ✓ | ✓ | ✓ |
-| get dest chunk size | ✓ | ✓ | ✓ | ✓ |
-| get groups | ✓ | ✓ | ✓ | ✓ |
-| check adj list | ✓ | ✓ | ✓ | ✓ |
-| check property | ✓ | ✓ | ✓ | ✓ |
-| get file type | ✓ | ✓ | ✓ | ✓ |
-| get path prefix | ✓ | ✓ | ✓ | ✓ |
-| is directed | ✓ | ✓ | ✓ | ✓ |
-| check validation | ✓ | | ✓ | ✓ |
-| serialize | ✓ | ✓ | ✓ | ✓ |
-| deserialize | ✓ | ✓ | ✓ | ✓ |
-
-> Notes:
-> - *\<source label, label, dest label\>* is the unique identifier for the
edge type.
-
-
-## Graph
-
-| Graph | C++ | Java | Scala | Python |
-|-------------------|-------|-------|-------|------------|
-| labeled vertex (with property) | ✓ | ✓ | ✓ | ✓ |
-| labeled edge (with property) | ✓ | ✓ | ✓ | ✓ |
-| extra info | ✓ | | | |
-
-Supported operations in Graph Info:
-
-| Graph Info | C++ |Java (1)| Scala | Python (2) |
-| (operation) | | | | |
-|-------------------|-------|--------|-------|------------|
-| create | ✓ | ✓ | ✓ | ✓ |
-| add vertex | ✓ | ✓ | ✓ | ✓ |
-| remove vertex | | | | |
-| add edge | ✓ | ✓ | ✓ | ✓ |
-| remove edge | | | | |
-| get name | ✓ | ✓ | ✓ | ✓ |
-| get vertex | ✓ | ✓ | ✓ | ✓ |
-| get edge | ✓ | ✓ | ✓ | ✓ |
-| add extra info | | | | |
-| remove extra info | | | | |
-| get extra info | ✓ | | | |
-| check validation | ✓ | | | |
-| serialize | ✓ | ✓ | ✓ | ✓ |
-| deserialize | ✓ | ✓ | ✓ | ✓ |
-
-
-Notes:
-
-- \(1) Through fastFFI bindings to the GraphAr C++ library.
-
-- \(2) Through py4j bindings to the GraphAr Spark library.
-
-
-## Libraries Version Compatibility
-
-| GraphAr C++ Version | C++ | CMake | Format Version |
-|---------------------|-----|-------|----------------|
-| 0.11.x | 17+ | 2.8+ | 1.0.0 |
-
-| GraphAr Java Version | Java | Maven | Format Version |
-|----------------------|------|-------|----------------|
-| 0.1.0 | 1.8 | 3.6+ | 1.0.0 |
-
-| GraphAr Spark Version | Apache Spark Version | Scala Version | Java Version
| Hadoop Version | Format Version |
-|-----------------------|----------------------|---------------|--------------|----------------|----------------|
-| 0.1.0 | 3.2.x-3.3.x | 2.12.x | 1.8, 11
| 3 | 1.0.0 |
-
-| GraphAr PySpark Version | Python Version | PySpark Version | Hadoop Version
| Format Version |
-|-------------------------|----------------|-----------------|----------------|----------------|
-| 0.1.0 | 3.8+ | 3.2.x | 3
| 1.0.0 |
-
-Notes:
-- Since the GraphAr PySpark library is bindings to the GraphAr Spark library,
- the PySpark version should be compatible with the Spark version.
diff --git a/docs/specification/index.md b/docs/specification/index.md
deleted file mode 100644
index f3a68f8..0000000
--- a/docs/specification/index.md
+++ /dev/null
@@ -1,8 +0,0 @@
----
-id: specification
-title: Specification
-sidebar_position: 2
----
-
-## [Format Specification](/docs/specification/format)
-## [Implementation Status](/docs/specification/implementation-status)
\ No newline at end of file
diff --git a/docusaurus.config.ts b/docusaurus.config.ts
index 92eb8d2..d6ec7d5 100644
--- a/docusaurus.config.ts
+++ b/docusaurus.config.ts
@@ -48,14 +48,14 @@ const config: Config = {
{
docs: {
sidebarPath: './docs/sidebars.ts',
- editUrl: `https://github.com/apache/${siteRepoName}/tree/main/`,
+ editUrl: `https://github.com/apache/${mainRepoName}/edit/main/`,
exclude: ['**/README.md'],
},
blog: {
blogSidebarCount: 'ALL',
blogSidebarTitle: 'All our posts',
showReadingTime: true,
- editUrl: `https://github.com/apache/${siteRepoName}/tree/main/`,
+ editUrl: `https://github.com/apache/${siteRepoName}/edit/main/`,
},
theme: {
customCss: './src/css/custom.css',
@@ -72,7 +72,7 @@ const config: Config = {
path: 'community',
routeBasePath: 'community',
sidebarPath: require.resolve('./community/sidebars.ts'),
- editUrl: `https://github.com/apache/${siteRepoName}/tree/main/`,
+ editUrl: `https://github.com/apache/${siteRepoName}/edit/main/`,
},
],
],
@@ -86,6 +86,11 @@ const config: Config = {
src: 'img/logo.svg',
},
items: [
+ {
+ position: 'right',
+ label: 'Format',
+ to: '/docs/specification/format'
+ },
{
type: 'docSidebar',
sidebarId: 'documentation',
@@ -99,6 +104,28 @@ const config: Config = {
label: 'Community',
docsPluginId: 'community'
},
+ {
+ position: 'right',
+ label: 'API Reference',
+ items: [
+ {
+ label: 'C++ Library',
+ to: 'pathname:///docs/cpp/'
+ },
+ {
+ label: 'Java Library',
+ to: 'pathname:///docs/java/'
+ },
+ {
+ label: 'Spark Library',
+ to: 'pathname:///docs/spark/'
+ },
+ {
+ label: 'PySpark Library',
+ to: 'pathname:///docs/pyspark/'
+ },
+ ]
+ },
{ to: '/blog', label: 'Blog', position: 'right' },
{
type: 'dropdown',
@@ -175,10 +202,6 @@ const config: Config = {
label: 'Format',
to: '/docs/specification/format',
},
- {
- label: 'Contributing',
- to: '/docs/developers',
- },
],
},
{
diff --git a/static/docs/images b/static/docs/images
new file mode 120000
index 0000000..4c0d78a
--- /dev/null
+++ b/static/docs/images
@@ -0,0 +1 @@
+../../docs/images
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]