This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 0766f51 feat: add demo app and integration tests (#226)
0766f51 is described below
commit 0766f51830f9190d32f1e0b9d1076dcece42b7a9
Author: Shiyan Xu <[email protected]>
AuthorDate: Tue Dec 17 14:02:37 2024 -0600
feat: add demo app and integration tests (#226)
---------
Co-authored-by: Shaurya <[email protected]>
---
.github/workflows/ci.yml | 8 ++++
.gitignore | 1 -
.gitignore => demo/.env | 20 +--------
.gitignore => demo/app/.gitignore | 22 ++--------
demo/app/python/src/__init__.py | 16 +++++++
demo/app/python/src/main.py | 52 ++++++++++++++++++++++
.gitignore => demo/app/rust/Cargo.toml | 27 +++++-------
demo/app/rust/src/main.rs | 65 +++++++++++++++++++++++++++
demo/compose.yaml | 68 +++++++++++++++++++++++++++++
.gitignore => demo/infra/mc/Dockerfile | 23 +++-------
.gitignore => demo/infra/mc/prepare_data.sh | 27 +++++-------
.gitignore => demo/infra/runner/Dockerfile | 26 +++++------
.gitignore => demo/run_app.sh | 38 +++++++++-------
13 files changed, 276 insertions(+), 117 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d7e08f6..6b36580 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -111,6 +111,14 @@ jobs:
path: ./cov-reports
if-no-files-found: 'error'
+ integration-tests:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Integration tests
+ run: |
+ cd demo
+ ./run_app.sh
publish-coverage:
name: Publish coverage reports to codecov.io
diff --git a/.gitignore b/.gitignore
index 5f104d1..ffad11d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,7 +23,6 @@
.vscode
# python
-.env
venv
**/.python-version
__pycache__
diff --git a/.gitignore b/demo/.env
similarity index 83%
copy from .gitignore
copy to demo/.env
index 5f104d1..937387f 100644
--- a/.gitignore
+++ b/demo/.env
@@ -15,22 +15,6 @@
# specific language governing permissions and limitations
# under the License.
-/Cargo.lock
-/target
-**/target
-/.idea
-.vscode
-
-# python
-.env
-venv
-**/.python-version
-__pycache__
-
-# macOS
-**/.DS_Store
-
-# coverage files
-*.profraw
-cobertura.xml
+MINIO_ROOT_USER=minioadmin
+MINIO_ROOT_PASSWORD=minioadmin
diff --git a/.gitignore b/demo/app/.gitignore
similarity index 82%
copy from .gitignore
copy to demo/app/.gitignore
index 5f104d1..afcc356 100644
--- a/.gitignore
+++ b/demo/app/.gitignore
@@ -15,22 +15,6 @@
# specific language governing permissions and limitations
# under the License.
-/Cargo.lock
-/target
-**/target
-
-/.idea
-.vscode
-
-# python
-.env
-venv
-**/.python-version
-__pycache__
-
-# macOS
-**/.DS_Store
-
-# coverage files
-*.profraw
-cobertura.xml
+venv/
+Cargo.lock
+**/target/
diff --git a/demo/app/python/src/__init__.py b/demo/app/python/src/__init__.py
new file mode 100644
index 0000000..a67d5ea
--- /dev/null
+++ b/demo/app/python/src/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/demo/app/python/src/main.py b/demo/app/python/src/main.py
new file mode 100644
index 0000000..8c08bf6
--- /dev/null
+++ b/demo/app/python/src/main.py
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from hudi import HudiTableBuilder
+import pyarrow as pa
+
+hudi_table = HudiTableBuilder.from_base_uri(
+ "s3://hudi-demo/v6_complexkeygen_hivestyle"
+).build()
+records = hudi_table.read_snapshot()
+
+arrow_table = pa.Table.from_batches(records)
+assert arrow_table.schema.names == [
+ "_hoodie_commit_time",
+ "_hoodie_commit_seqno",
+ "_hoodie_record_key",
+ "_hoodie_partition_path",
+ "_hoodie_file_name",
+ "id",
+ "name",
+ "isActive",
+ "intField",
+ "longField",
+ "floatField",
+ "doubleField",
+ "decimalField",
+ "dateField",
+ "timestampField",
+ "binaryField",
+ "arrayField",
+ "mapField",
+ "structField",
+ "byteField",
+ "shortField",
+]
+assert arrow_table.num_rows == 4
+
+print("Python API: read snapshot successfully!")
diff --git a/.gitignore b/demo/app/rust/Cargo.toml
similarity index 75%
copy from .gitignore
copy to demo/app/rust/Cargo.toml
index 5f104d1..349a2d2 100644
--- a/.gitignore
+++ b/demo/app/rust/Cargo.toml
@@ -15,22 +15,15 @@
# specific language governing permissions and limitations
# under the License.
-/Cargo.lock
-/target
-**/target
+[workspace]
+# keep this empty such that it won't be linked to the repo workspace
-/.idea
-.vscode
+[package]
+name = "app"
+version = "0.1.0"
+edition = "2021"
-# python
-.env
-venv
-**/.python-version
-__pycache__
-
-# macOS
-**/.DS_Store
-
-# coverage files
-*.profraw
-cobertura.xml
+[dependencies]
+tokio = "^1"
+datafusion = "^43"
+hudi = { path = "../../../crates/hudi", features = ["datafusion"] }
diff --git a/demo/app/rust/src/main.rs b/demo/app/rust/src/main.rs
new file mode 100644
index 0000000..9b5ea69
--- /dev/null
+++ b/demo/app/rust/src/main.rs
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use std::sync::Arc;
+
+use datafusion::error::Result;
+use datafusion::prelude::{DataFrame, SessionContext};
+use hudi::HudiDataSource;
+
+#[tokio::main]
+async fn main() -> Result<()> {
+ let ctx = SessionContext::new();
+ let hudi =
HudiDataSource::new("s3://hudi-demo/v6_complexkeygen_hivestyle").await?;
+ ctx.register_table("v6_table", Arc::new(hudi))?;
+ let df: DataFrame = ctx.sql("SELECT * from v6_table").await?;
+ assert!(
+ df.schema()
+ .columns()
+ .iter()
+ .map(|c| c.name())
+ .collect::<Vec<_>>()
+ == vec![
+ "_hoodie_commit_time",
+ "_hoodie_commit_seqno",
+ "_hoodie_record_key",
+ "_hoodie_partition_path",
+ "_hoodie_file_name",
+ "id",
+ "name",
+ "isActive",
+ "intField",
+ "longField",
+ "floatField",
+ "doubleField",
+ "decimalField",
+ "dateField",
+ "timestampField",
+ "binaryField",
+ "arrayField",
+ "mapField",
+ "structField",
+ "byteField",
+ "shortField",
+ ]
+ );
+ assert!(df.count().await.unwrap() == 4);
+ println!("Rust API: read snapshot successfully!");
+ Ok(())
+}
diff --git a/demo/compose.yaml b/demo/compose.yaml
new file mode 100644
index 0000000..e503229
--- /dev/null
+++ b/demo/compose.yaml
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+---
+services:
+ minio:
+ image: quay.io/minio/minio:latest
+ container_name: minio
+ ports:
+ - 9000:9000
+ - 9001:9001
+ command: server /data --console-address ":9001"
+ environment:
+ MINIO_ROOT_USER: ${MINIO_ROOT_USER}
+ MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD}
+ healthcheck:
+ test: [ "CMD", "mc", "ready", "local" ]
+ interval: 10s
+ timeout: 5s
+ retries: 3
+
+ mc:
+ build:
+ context: ./infra/mc
+ container_name: mc
+ environment:
+ MINIO_ROOT_USER: ${MINIO_ROOT_USER}
+ MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD}
+ depends_on:
+ minio:
+ condition: service_healthy
+ volumes:
+ - ../crates/tests/data:/opt/data:ro
+ - ./infra/mc/prepare_data.sh:/opt/prepare_data.sh
+ command:
+ - /bin/sh
+ - -c
+ - /opt/prepare_data.sh
+
+ runner:
+ build:
+ context: ./infra/runner
+ container_name: runner
+ volumes:
+ - ../.:/opt/hudi-rs
+ environment:
+ AWS_ACCESS_KEY_ID: ${MINIO_ROOT_USER}
+ AWS_SECRET_ACCESS_KEY: ${MINIO_ROOT_PASSWORD}
+ AWS_ENDPOINT_URL: http://minio:9000
+ AWS_ALLOW_HTTP: true
+ AWS_REGION: us-east-1 # minio default
+
+networks:
+ app_network:
+ driver: bridge
diff --git a/.gitignore b/demo/infra/mc/Dockerfile
similarity index 77%
copy from .gitignore
copy to demo/infra/mc/Dockerfile
index 5f104d1..95d8b3f 100644
--- a/.gitignore
+++ b/demo/infra/mc/Dockerfile
@@ -15,22 +15,13 @@
# specific language governing permissions and limitations
# under the License.
-/Cargo.lock
-/target
-**/target
+FROM alpine
-/.idea
-.vscode
+RUN apk update && apk add --no-cache \
+ wget ca-certificates bash unzip
-# python
-.env
-venv
-**/.python-version
-__pycache__
+RUN cd /usr/local/bin && \
+ wget -q --show-progress https://dl.min.io/client/mc/release/linux-amd64/mc
&& \
+ chmod +x mc
-# macOS
-**/.DS_Store
-
-# coverage files
-*.profraw
-cobertura.xml
+WORKDIR /opt/data
diff --git a/.gitignore b/demo/infra/mc/prepare_data.sh
old mode 100644
new mode 100755
similarity index 70%
copy from .gitignore
copy to demo/infra/mc/prepare_data.sh
index 5f104d1..2158b98
--- a/.gitignore
+++ b/demo/infra/mc/prepare_data.sh
@@ -1,3 +1,5 @@
+#!/bin/sh
+#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -14,23 +16,16 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+#
-/Cargo.lock
-/target
-**/target
-
-/.idea
-.vscode
+mc alias set local http://minio:9000 "$MINIO_ROOT_USER" "$MINIO_ROOT_PASSWORD"
-# python
-.env
-venv
-**/.python-version
-__pycache__
+# create a bucket named `hudi-demo`
+mc mb local/hudi-demo
-# macOS
-**/.DS_Store
+# unzip the data
+mkdir /tmp/tables
+for zip in /opt/data/tables/*.zip; do unzip -o "$zip" -d "/tmp/tables/"; done
-# coverage files
-*.profraw
-cobertura.xml
+# copy the data to the bucket
+mc cp -r /tmp/tables/* local/hudi-demo/
diff --git a/.gitignore b/demo/infra/runner/Dockerfile
similarity index 75%
copy from .gitignore
copy to demo/infra/runner/Dockerfile
index 5f104d1..eadf90b 100644
--- a/.gitignore
+++ b/demo/infra/runner/Dockerfile
@@ -15,22 +15,18 @@
# specific language governing permissions and limitations
# under the License.
-/Cargo.lock
-/target
-**/target
+FROM rust:1.79
-/.idea
-.vscode
+RUN apt-get update && apt-get install -y python3-dev python3-venv
-# python
-.env
-venv
-**/.python-version
-__pycache__
+RUN python3 -m venv /opt/venv
-# macOS
-**/.DS_Store
+ENV PATH="/opt/venv/bin:$PATH"
-# coverage files
-*.profraw
-cobertura.xml
+ENV VIRTUAL_ENV=/opt/venv
+
+RUN pip install --no-cache-dir --upgrade pip
+
+WORKDIR /opt
+
+CMD tail -f /dev/null
diff --git a/.gitignore b/demo/run_app.sh
old mode 100644
new mode 100755
similarity index 54%
copy from .gitignore
copy to demo/run_app.sh
index 5f104d1..839eeba
--- a/.gitignore
+++ b/demo/run_app.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -14,23 +16,29 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+#
-/Cargo.lock
-/target
-**/target
+docker compose up --build -d
-/.idea
-.vscode
+max_attempts=30
+attempt=0
-# python
-.env
-venv
-**/.python-version
-__pycache__
+until [ "$(docker inspect -f '{{.State.Status}}' runner)" = "running" ] || [
$attempt -eq $max_attempts ]; do
+ attempt=$(( $attempt + 1 ))
+ echo "Waiting for container... (attempt $attempt of $max_attempts)"
+ sleep 1
+done
-# macOS
-**/.DS_Store
+if [ $attempt -eq $max_attempts ]; then
+ echo "Container failed to become ready in time"
+ exit 1
+fi
-# coverage files
-*.profraw
-cobertura.xml
+# install dependencies and run the app
+docker compose exec -T runner /bin/bash -c "
+ cd /opt/hudi-rs/python && \
+ make setup develop && \
+ cd /opt/hudi-rs/demo/app && \
+ cargo run --manifest-path=rust/Cargo.toml && \
+ python -m python.src.main
+ "