This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 0766f51  feat: add demo app and integration tests (#226)
0766f51 is described below

commit 0766f51830f9190d32f1e0b9d1076dcece42b7a9
Author: Shiyan Xu <[email protected]>
AuthorDate: Tue Dec 17 14:02:37 2024 -0600

    feat: add demo app and integration tests (#226)
    
    
    
    ---------
    
    Co-authored-by: Shaurya <[email protected]>
---
 .github/workflows/ci.yml                    |  8 ++++
 .gitignore                                  |  1 -
 .gitignore => demo/.env                     | 20 +--------
 .gitignore => demo/app/.gitignore           | 22 ++--------
 demo/app/python/src/__init__.py             | 16 +++++++
 demo/app/python/src/main.py                 | 52 ++++++++++++++++++++++
 .gitignore => demo/app/rust/Cargo.toml      | 27 +++++-------
 demo/app/rust/src/main.rs                   | 65 +++++++++++++++++++++++++++
 demo/compose.yaml                           | 68 +++++++++++++++++++++++++++++
 .gitignore => demo/infra/mc/Dockerfile      | 23 +++-------
 .gitignore => demo/infra/mc/prepare_data.sh | 27 +++++-------
 .gitignore => demo/infra/runner/Dockerfile  | 26 +++++------
 .gitignore => demo/run_app.sh               | 38 +++++++++-------
 13 files changed, 276 insertions(+), 117 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d7e08f6..6b36580 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -111,6 +111,14 @@ jobs:
           path: ./cov-reports
           if-no-files-found: 'error'
 
+  integration-tests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Integration tests
+        run: |
+          cd demo
+          ./run_app.sh
 
   publish-coverage:
     name: Publish coverage reports to codecov.io
diff --git a/.gitignore b/.gitignore
index 5f104d1..ffad11d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,7 +23,6 @@
 .vscode
 
 # python
-.env
 venv
 **/.python-version
 __pycache__
diff --git a/.gitignore b/demo/.env
similarity index 83%
copy from .gitignore
copy to demo/.env
index 5f104d1..937387f 100644
--- a/.gitignore
+++ b/demo/.env
@@ -15,22 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-/Cargo.lock
-/target
-**/target
 
-/.idea
-.vscode
-
-# python
-.env
-venv
-**/.python-version
-__pycache__
-
-# macOS
-**/.DS_Store
-
-# coverage files
-*.profraw
-cobertura.xml
+MINIO_ROOT_USER=minioadmin
+MINIO_ROOT_PASSWORD=minioadmin
diff --git a/.gitignore b/demo/app/.gitignore
similarity index 82%
copy from .gitignore
copy to demo/app/.gitignore
index 5f104d1..afcc356 100644
--- a/.gitignore
+++ b/demo/app/.gitignore
@@ -15,22 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-/Cargo.lock
-/target
-**/target
-
-/.idea
-.vscode
-
-# python
-.env
-venv
-**/.python-version
-__pycache__
-
-# macOS
-**/.DS_Store
-
-# coverage files
-*.profraw
-cobertura.xml
+venv/
+Cargo.lock
+**/target/
diff --git a/demo/app/python/src/__init__.py b/demo/app/python/src/__init__.py
new file mode 100644
index 0000000..a67d5ea
--- /dev/null
+++ b/demo/app/python/src/__init__.py
@@ -0,0 +1,16 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
diff --git a/demo/app/python/src/main.py b/demo/app/python/src/main.py
new file mode 100644
index 0000000..8c08bf6
--- /dev/null
+++ b/demo/app/python/src/main.py
@@ -0,0 +1,52 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+from hudi import HudiTableBuilder
+import pyarrow as pa
+
+hudi_table = HudiTableBuilder.from_base_uri(
+    "s3://hudi-demo/v6_complexkeygen_hivestyle"
+).build()
+records = hudi_table.read_snapshot()
+
+arrow_table = pa.Table.from_batches(records)
+assert arrow_table.schema.names == [
+    "_hoodie_commit_time",
+    "_hoodie_commit_seqno",
+    "_hoodie_record_key",
+    "_hoodie_partition_path",
+    "_hoodie_file_name",
+    "id",
+    "name",
+    "isActive",
+    "intField",
+    "longField",
+    "floatField",
+    "doubleField",
+    "decimalField",
+    "dateField",
+    "timestampField",
+    "binaryField",
+    "arrayField",
+    "mapField",
+    "structField",
+    "byteField",
+    "shortField",
+]
+assert arrow_table.num_rows == 4
+
+print("Python API: read snapshot successfully!")
diff --git a/.gitignore b/demo/app/rust/Cargo.toml
similarity index 75%
copy from .gitignore
copy to demo/app/rust/Cargo.toml
index 5f104d1..349a2d2 100644
--- a/.gitignore
+++ b/demo/app/rust/Cargo.toml
@@ -15,22 +15,15 @@
 # specific language governing permissions and limitations
 # under the License.
 
-/Cargo.lock
-/target
-**/target
+[workspace]
+# keep this empty such that it won't be linked to the repo workspace
 
-/.idea
-.vscode
+[package]
+name = "app"
+version = "0.1.0"
+edition = "2021"
 
-# python
-.env
-venv
-**/.python-version
-__pycache__
-
-# macOS
-**/.DS_Store
-
-# coverage files
-*.profraw
-cobertura.xml
+[dependencies]
+tokio = "^1"
+datafusion = "^43"
+hudi = { path = "../../../crates/hudi", features = ["datafusion"] }
diff --git a/demo/app/rust/src/main.rs b/demo/app/rust/src/main.rs
new file mode 100644
index 0000000..9b5ea69
--- /dev/null
+++ b/demo/app/rust/src/main.rs
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use std::sync::Arc;
+
+use datafusion::error::Result;
+use datafusion::prelude::{DataFrame, SessionContext};
+use hudi::HudiDataSource;
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let ctx = SessionContext::new();
+    let hudi = 
HudiDataSource::new("s3://hudi-demo/v6_complexkeygen_hivestyle").await?;
+    ctx.register_table("v6_table", Arc::new(hudi))?;
+    let df: DataFrame = ctx.sql("SELECT * from v6_table").await?;
+    assert!(
+        df.schema()
+            .columns()
+            .iter()
+            .map(|c| c.name())
+            .collect::<Vec<_>>()
+            == vec![
+                "_hoodie_commit_time",
+                "_hoodie_commit_seqno",
+                "_hoodie_record_key",
+                "_hoodie_partition_path",
+                "_hoodie_file_name",
+                "id",
+                "name",
+                "isActive",
+                "intField",
+                "longField",
+                "floatField",
+                "doubleField",
+                "decimalField",
+                "dateField",
+                "timestampField",
+                "binaryField",
+                "arrayField",
+                "mapField",
+                "structField",
+                "byteField",
+                "shortField",
+            ]
+    );
+    assert!(df.count().await.unwrap() == 4);
+    println!("Rust API: read snapshot successfully!");
+    Ok(())
+}
diff --git a/demo/compose.yaml b/demo/compose.yaml
new file mode 100644
index 0000000..e503229
--- /dev/null
+++ b/demo/compose.yaml
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+---
+services:
+  minio:
+    image: quay.io/minio/minio:latest
+    container_name: minio
+    ports:
+      - 9000:9000
+      - 9001:9001
+    command: server /data --console-address ":9001"
+    environment:
+      MINIO_ROOT_USER: ${MINIO_ROOT_USER}
+      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD}
+    healthcheck:
+      test: [ "CMD", "mc", "ready", "local" ]
+      interval: 10s
+      timeout: 5s
+      retries: 3
+
+  mc:
+    build:
+      context: ./infra/mc
+    container_name: mc
+    environment:
+      MINIO_ROOT_USER: ${MINIO_ROOT_USER}
+      MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD}
+    depends_on:
+      minio:
+        condition: service_healthy
+    volumes:
+      - ../crates/tests/data:/opt/data:ro
+      - ./infra/mc/prepare_data.sh:/opt/prepare_data.sh
+    command:
+      - /bin/sh
+      - -c
+      - /opt/prepare_data.sh
+
+  runner:
+    build:
+      context: ./infra/runner
+    container_name: runner
+    volumes:
+      - ../.:/opt/hudi-rs
+    environment:
+      AWS_ACCESS_KEY_ID: ${MINIO_ROOT_USER}
+      AWS_SECRET_ACCESS_KEY: ${MINIO_ROOT_PASSWORD}
+      AWS_ENDPOINT_URL: http://minio:9000
+      AWS_ALLOW_HTTP: true
+      AWS_REGION: us-east-1 # minio default
+
+networks:
+  app_network:
+    driver: bridge
diff --git a/.gitignore b/demo/infra/mc/Dockerfile
similarity index 77%
copy from .gitignore
copy to demo/infra/mc/Dockerfile
index 5f104d1..95d8b3f 100644
--- a/.gitignore
+++ b/demo/infra/mc/Dockerfile
@@ -15,22 +15,13 @@
 # specific language governing permissions and limitations
 # under the License.
 
-/Cargo.lock
-/target
-**/target
+FROM alpine
 
-/.idea
-.vscode
+RUN apk update && apk add --no-cache \
+       wget ca-certificates bash unzip
 
-# python
-.env
-venv
-**/.python-version
-__pycache__
+RUN cd /usr/local/bin && \
+  wget -q --show-progress https://dl.min.io/client/mc/release/linux-amd64/mc 
&& \
+  chmod +x mc
 
-# macOS
-**/.DS_Store
-
-# coverage files
-*.profraw
-cobertura.xml
+WORKDIR /opt/data
diff --git a/.gitignore b/demo/infra/mc/prepare_data.sh
old mode 100644
new mode 100755
similarity index 70%
copy from .gitignore
copy to demo/infra/mc/prepare_data.sh
index 5f104d1..2158b98
--- a/.gitignore
+++ b/demo/infra/mc/prepare_data.sh
@@ -1,3 +1,5 @@
+#!/bin/sh
+#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -14,23 +16,16 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+#
 
-/Cargo.lock
-/target
-**/target
-
-/.idea
-.vscode
+mc alias set local http://minio:9000 "$MINIO_ROOT_USER" "$MINIO_ROOT_PASSWORD"
 
-# python
-.env
-venv
-**/.python-version
-__pycache__
+# create a bucket named `hudi-demo`
+mc mb local/hudi-demo
 
-# macOS
-**/.DS_Store
+# unzip the data
+mkdir /tmp/tables
+for zip in /opt/data/tables/*.zip; do unzip -o "$zip" -d "/tmp/tables/"; done
 
-# coverage files
-*.profraw
-cobertura.xml
+# copy the data to the bucket
+mc cp -r /tmp/tables/* local/hudi-demo/
diff --git a/.gitignore b/demo/infra/runner/Dockerfile
similarity index 75%
copy from .gitignore
copy to demo/infra/runner/Dockerfile
index 5f104d1..eadf90b 100644
--- a/.gitignore
+++ b/demo/infra/runner/Dockerfile
@@ -15,22 +15,18 @@
 # specific language governing permissions and limitations
 # under the License.
 
-/Cargo.lock
-/target
-**/target
+FROM rust:1.79
 
-/.idea
-.vscode
+RUN apt-get update && apt-get install -y python3-dev python3-venv
 
-# python
-.env
-venv
-**/.python-version
-__pycache__
+RUN python3 -m venv /opt/venv
 
-# macOS
-**/.DS_Store
+ENV PATH="/opt/venv/bin:$PATH"
 
-# coverage files
-*.profraw
-cobertura.xml
+ENV VIRTUAL_ENV=/opt/venv
+
+RUN pip install --no-cache-dir --upgrade pip
+
+WORKDIR /opt
+
+CMD tail -f /dev/null
diff --git a/.gitignore b/demo/run_app.sh
old mode 100644
new mode 100755
similarity index 54%
copy from .gitignore
copy to demo/run_app.sh
index 5f104d1..839eeba
--- a/.gitignore
+++ b/demo/run_app.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -14,23 +16,29 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+#
 
-/Cargo.lock
-/target
-**/target
+docker compose up --build -d
 
-/.idea
-.vscode
+max_attempts=30
+attempt=0
 
-# python
-.env
-venv
-**/.python-version
-__pycache__
+until [ "$(docker inspect -f '{{.State.Status}}' runner)" = "running" ] || [ 
$attempt -eq $max_attempts ]; do
+  attempt=$(( $attempt + 1 ))
+  echo "Waiting for container... (attempt $attempt of $max_attempts)"
+  sleep 1
+done
 
-# macOS
-**/.DS_Store
+if [ $attempt -eq $max_attempts ]; then
+  echo "Container failed to become ready in time"
+  exit 1
+fi
 
-# coverage files
-*.profraw
-cobertura.xml
+# install dependencies and run the app
+docker compose exec -T runner /bin/bash -c "
+  cd /opt/hudi-rs/python && \
+  make setup develop && \
+  cd /opt/hudi-rs/demo/app && \
+  cargo run --manifest-path=rust/Cargo.toml && \
+  python -m python.src.main
+  "

Reply via email to