This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 2848aaff35 Chore: update wasm-supported crates, add tests (#14005)
2848aaff35 is described below

commit 2848aaff35ee555b15d4276f3846253bec2050cb
Author: Lordworms <[email protected]>
AuthorDate: Mon Jan 6 03:21:50 2025 -0800

    Chore: update wasm-supported crates, add tests (#14005)
    
    * Chore: update wasm-supported crates
    
    * format
---
 datafusion/wasmtest/Cargo.toml |  9 +++++-
 datafusion/wasmtest/README.md  | 12 ++++++--
 datafusion/wasmtest/src/lib.rs | 70 ++++++++++++++++++++++++++++++++++++------
 3 files changed, 79 insertions(+), 12 deletions(-)

diff --git a/datafusion/wasmtest/Cargo.toml b/datafusion/wasmtest/Cargo.toml
index 69b9bd61a3..8520b8d02d 100644
--- a/datafusion/wasmtest/Cargo.toml
+++ b/datafusion/wasmtest/Cargo.toml
@@ -43,14 +43,21 @@ chrono = { version = "0.4", features = ["wasmbind"] }
 # code size when deploying.
 console_error_panic_hook = { version = "0.1.1", optional = true }
 datafusion = { workspace = true }
+datafusion-catalog = { workspace = true }
 datafusion-common = { workspace = true, default-features = true }
+datafusion-common-runtime = { workspace = true }
 datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
+datafusion-expr-common = { workspace = true }
+datafusion-functions = { workspace = true }
+datafusion-functions-aggregate = { workspace = true }
+datafusion-functions-aggregate-common = { workspace = true }
+datafusion-functions-table = { workspace = true }
 datafusion-optimizer = { workspace = true, default-features = true }
 datafusion-physical-expr = { workspace = true, default-features = true }
+datafusion-physical-expr-common = { workspace = true }
 datafusion-physical-plan = { workspace = true }
 datafusion-sql = { workspace = true }
-
 # getrandom must be compiled with js feature
 getrandom = { version = "0.2.8", features = ["js"] }
 
diff --git a/datafusion/wasmtest/README.md b/datafusion/wasmtest/README.md
index 2e525ee909..8843eed697 100644
--- a/datafusion/wasmtest/README.md
+++ b/datafusion/wasmtest/README.md
@@ -87,5 +87,13 @@ The following DataFusion crates are verified to work in a 
wasm-pack environment
 - `datafusion-physical-expr`
 - `datafusion-physical-plan`
 - `datafusion-sql`
-
-The difficulty with getting the remaining DataFusion crates compiled to WASM 
is that they have non-optional dependencies on the 
[`parquet`](https://docs.rs/crate/parquet/) crate with its default features 
enabled. Several of the default parquet crate features require native 
dependencies that are not compatible with WASM, in particular the `lz4` and 
`zstd` features. If we can arrange our feature flags to make it possible to 
depend on parquet with these features disabled, then it should be  [...]
+- `datafusion-expr-common`
+- `datafusion-physical-expr-common`
+- `datafusion-functions`
+- `datafusion-functions-aggregate`
+- `datafusion-functions-aggregate-common`
+- `datafusion-functions-table`
+- `datafusion-catalog`
+- `datafusion-common-runtime`
+
+The `datafusion-ffi` crate cannot compile for the wasm32-unknown-unknown 
target because it relies on lzma-sys, which depends on native C libraries 
(liblzma). The wasm32-unknown-unknown target lacks a standard C library 
(stdlib.h) and POSIX-like environment, preventing the native code from being 
compiled.
diff --git a/datafusion/wasmtest/src/lib.rs b/datafusion/wasmtest/src/lib.rs
index 54b662514c..c7c620d1be 100644
--- a/datafusion/wasmtest/src/lib.rs
+++ b/datafusion/wasmtest/src/lib.rs
@@ -26,7 +26,6 @@ use datafusion_sql::sqlparser::dialect::GenericDialect;
 use datafusion_sql::sqlparser::parser::Parser;
 use std::sync::Arc;
 use wasm_bindgen::prelude::*;
-
 pub fn set_panic_hook() {
     // When the `console_error_panic_hook` feature is enabled, we can call the
     // `set_panic_hook` function at least once during initialization, and then
@@ -77,7 +76,14 @@ pub fn basic_parse() {
 #[cfg(test)]
 mod test {
     use super::*;
-    use datafusion::execution::context::SessionContext;
+    use datafusion::{
+        arrow::{
+            array::{ArrayRef, Int32Array, RecordBatch, StringArray},
+            datatypes::{DataType, Field, Schema},
+        },
+        datasource::MemTable,
+        execution::context::SessionContext,
+    };
     use datafusion_execution::{
         config::SessionConfig, disk_manager::DiskManagerConfig,
         runtime_env::RuntimeEnvBuilder,
@@ -95,19 +101,21 @@ mod test {
         basic_parse();
     }
 
-    #[wasm_bindgen_test(unsupported = tokio::test)]
-    async fn basic_execute() {
-        let sql = "SELECT 2 + 2;";
-
-        // Execute SQL (using datafusion)
+    fn get_ctx() -> Arc<SessionContext> {
         let rt = RuntimeEnvBuilder::new()
             .with_disk_manager(DiskManagerConfig::Disabled)
             .build_arc()
             .unwrap();
         let session_config = SessionConfig::new().with_target_partitions(1);
-        let session_context =
-            Arc::new(SessionContext::new_with_config_rt(session_config, rt));
+        Arc::new(SessionContext::new_with_config_rt(session_config, rt))
+    }
+    #[wasm_bindgen_test(unsupported = tokio::test)]
+    async fn basic_execute() {
+        let sql = "SELECT 2 + 2;";
+
+        // Execute SQL (using datafusion)
 
+        let session_context = get_ctx();
         let statement = DFParser::parse_sql(sql).unwrap().pop_back().unwrap();
 
         let logical_plan = session_context
@@ -124,4 +132,48 @@ mod test {
         let task_ctx = session_context.task_ctx();
         let _ = collect(physical_plan, task_ctx).await.unwrap();
     }
+
+    #[wasm_bindgen_test(unsupported = tokio::test)]
+    async fn basic_df_function_execute() {
+        let sql = "SELECT abs(-1.0);";
+        let statement = DFParser::parse_sql(sql).unwrap().pop_back().unwrap();
+        let ctx = get_ctx();
+        let logical_plan = 
ctx.state().statement_to_plan(statement).await.unwrap();
+        let data_frame = ctx.execute_logical_plan(logical_plan).await.unwrap();
+        let physical_plan = data_frame.create_physical_plan().await.unwrap();
+
+        let task_ctx = ctx.task_ctx();
+        let _ = collect(physical_plan, task_ctx).await.unwrap();
+    }
+
+    #[wasm_bindgen_test(unsupported = tokio::test)]
+    async fn test_basic_aggregate() {
+        let sql =
+            "SELECT FIRST_VALUE(value) OVER (ORDER BY id) as first_val FROM 
test_table;";
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("value", DataType::Utf8, false),
+        ]));
+
+        let data: Vec<ArrayRef> = vec![
+            Arc::new(Int32Array::from(vec![1])),
+            Arc::new(StringArray::from(vec!["a"])),
+        ];
+
+        let batch = RecordBatch::try_new(schema.clone(), data).unwrap();
+        let table = MemTable::try_new(schema.clone(), 
vec![vec![batch]]).unwrap();
+
+        let ctx = get_ctx();
+        ctx.register_table("test_table", Arc::new(table)).unwrap();
+
+        let statement = DFParser::parse_sql(sql).unwrap().pop_back().unwrap();
+
+        let logical_plan = 
ctx.state().statement_to_plan(statement).await.unwrap();
+        let data_frame = ctx.execute_logical_plan(logical_plan).await.unwrap();
+        let physical_plan = data_frame.create_physical_plan().await.unwrap();
+
+        let task_ctx = ctx.task_ctx();
+        let _ = collect(physical_plan, task_ctx).await.unwrap();
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to