This is an automated email from the ASF dual-hosted git repository.
wayne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 965f4bcd54 feat: add more components to the wasm-pack compatible list
(#8843)
965f4bcd54 is described below
commit 965f4bcd54ed8f95c0b9d18f4312353727e37ea3
Author: Ruihang Xia <[email protected]>
AuthorDate: Sat Jan 13 11:11:55 2024 +0800
feat: add more components to the wasm-pack compatible list (#8843)
* feat: add datafusion-physical-plan to compatible list
Signed-off-by: Ruihang Xia <[email protected]>
* feat: add datafusion-execution to the list
Signed-off-by: Ruihang Xia <[email protected]>
* feat: add datafusion to the list
Signed-off-by: Ruihang Xia <[email protected]>
* fix: toml format
Signed-off-by: Ruihang Xia <[email protected]>
---------
Signed-off-by: Ruihang Xia <[email protected]>
---
datafusion/core/Cargo.toml | 7 ++++---
datafusion/core/src/datasource/listing/url.rs | 4 ++++
datafusion/execution/src/object_store.rs | 11 ++++++++++-
datafusion/physical-plan/Cargo.toml | 15 ++++++++++++---
datafusion/substrait/Cargo.toml | 2 ++
datafusion/wasmtest/Cargo.toml | 5 ++++-
datafusion/wasmtest/README.md | 3 +++
7 files changed, 39 insertions(+), 8 deletions(-)
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 9de6a7f7d6..c2e8c2b445 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -37,7 +37,7 @@ path = "src/lib.rs"
# Used to enable the avro format
avro = ["apache-avro", "num-traits", "datafusion-common/avro"]
backtrace = ["datafusion-common/backtrace"]
-compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression"]
+compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression",
"tokio-util"]
crypto_expressions = ["datafusion-physical-expr/crypto_expressions",
"datafusion-optimizer/crypto_expressions"]
default = ["crypto_expressions", "encoding_expressions", "regex_expressions",
"unicode_expressions", "compression", "parquet"]
encoding_expressions = ["datafusion-physical-expr/encoding_expressions"]
@@ -87,8 +87,8 @@ pin-project-lite = "^0.2.7"
rand = { workspace = true }
sqlparser = { workspace = true }
tempfile = { workspace = true }
-tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread",
"sync", "fs", "parking_lot"] }
-tokio-util = { version = "0.7.4", features = ["io"] }
+tokio = { version = "1.28", features = ["macros", "rt", "sync"] }
+tokio-util = { version = "0.7.4", features = ["io"], optional = true }
url = { workspace = true }
uuid = { version = "1.0", features = ["v4"] }
xz2 = { version = "0.1", optional = true }
@@ -113,6 +113,7 @@ rust_decimal = { version = "1.27.0", features =
["tokio-pg"] }
serde_json = { workspace = true }
test-utils = { path = "../../test-utils" }
thiserror = { workspace = true }
+tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread",
"sync", "fs", "parking_lot"] }
tokio-postgres = "0.7.7"
[target.'cfg(not(target_os = "windows"))'.dev-dependencies]
nix = { version = "0.27.1", features = ["fs"] }
diff --git a/datafusion/core/src/datasource/listing/url.rs
b/datafusion/core/src/datasource/listing/url.rs
index 766dee7de9..6421edf779 100644
--- a/datafusion/core/src/datasource/listing/url.rs
+++ b/datafusion/core/src/datasource/listing/url.rs
@@ -103,12 +103,14 @@ impl ListingTableUrl {
let s = s.as_ref();
// This is necessary to handle the case of a path starting with a
drive letter
+ #[cfg(not(target_arch = "wasm32"))]
if std::path::Path::new(s).is_absolute() {
return Self::parse_path(s);
}
match Url::parse(s) {
Ok(url) => Self::try_new(url, None),
+ #[cfg(not(target_arch = "wasm32"))]
Err(url::ParseError::RelativeUrlWithoutBase) =>
Self::parse_path(s),
Err(e) => Err(DataFusionError::External(Box::new(e))),
}
@@ -146,6 +148,7 @@ impl ListingTableUrl {
}
/// Creates a new [`ListingTableUrl`] interpreting `s` as a filesystem path
+ #[cfg(not(target_arch = "wasm32"))]
fn parse_path(s: &str) -> Result<Self> {
let (path, glob) = match split_glob_expression(s) {
Some((prefix, glob)) => {
@@ -282,6 +285,7 @@ impl ListingTableUrl {
}
/// Creates a file URL from a potentially relative filesystem path
+#[cfg(not(target_arch = "wasm32"))]
fn url_from_filesystem_path(s: &str) -> Option<Url> {
let path = std::path::Path::new(s);
let is_dir = match path.exists() {
diff --git a/datafusion/execution/src/object_store.rs
b/datafusion/execution/src/object_store.rs
index 5a1cdb7690..7626f8bef1 100644
--- a/datafusion/execution/src/object_store.rs
+++ b/datafusion/execution/src/object_store.rs
@@ -21,6 +21,7 @@
use dashmap::DashMap;
use datafusion_common::{exec_err, DataFusionError, Result};
+#[cfg(not(target_arch = "wasm32"))]
use object_store::local::LocalFileSystem;
use object_store::ObjectStore;
use std::sync::Arc;
@@ -169,16 +170,24 @@ impl Default for DefaultObjectStoreRegistry {
impl DefaultObjectStoreRegistry {
/// This will register [`LocalFileSystem`] to handle `file://` paths
+ #[cfg(not(target_arch = "wasm32"))]
pub fn new() -> Self {
let object_stores: DashMap<String, Arc<dyn ObjectStore>> =
DashMap::new();
object_stores.insert("file://".to_string(),
Arc::new(LocalFileSystem::new()));
Self { object_stores }
}
+
+ /// Default without any backend registered.
+ #[cfg(target_arch = "wasm32")]
+ pub fn new() -> Self {
+ Self::default()
+ }
}
///
/// Stores are registered based on the scheme, host and port of the provided
URL
-/// with a [`LocalFileSystem::new`] automatically registered for `file://`
+/// with a [`LocalFileSystem::new`] automatically registered for `file://` (if
the
+/// target arch is not `wasm32`).
///
/// For example:
///
diff --git a/datafusion/physical-plan/Cargo.toml
b/datafusion/physical-plan/Cargo.toml
index 6c761fc968..357e036b6f 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -33,7 +33,9 @@ name = "datafusion_physical_plan"
path = "src/lib.rs"
[dependencies]
-ahash = { version = "0.8", default-features = false, features =
["runtime-rng"] }
+ahash = { version = "0.8", default-features = false, features = [
+ "runtime-rng",
+] }
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
@@ -54,10 +56,17 @@ once_cell = "1.18.0"
parking_lot = { workspace = true }
pin-project-lite = "^0.2.7"
rand = { workspace = true }
-tokio = { version = "1.28", features = ["sync", "fs", "parking_lot"] }
+tokio = { version = "1.28", features = ["sync"] }
uuid = { version = "^1.2", features = ["v4"] }
[dev-dependencies]
rstest = { workspace = true }
termtree = "0.4.1"
-tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread",
"sync", "fs", "parking_lot"] }
+tokio = { version = "1.28", features = [
+ "macros",
+ "rt",
+ "rt-multi-thread",
+ "sync",
+ "fs",
+ "parking_lot",
+] }
diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml
index e15b59999a..160af37ef9 100644
--- a/datafusion/substrait/Cargo.toml
+++ b/datafusion/substrait/Cargo.toml
@@ -36,6 +36,8 @@ object_store = { workspace = true }
prost = "0.12"
prost-types = "0.12"
substrait = "0.22.1"
+
+[dev-dependencies]
tokio = "1.17"
[features]
diff --git a/datafusion/wasmtest/Cargo.toml b/datafusion/wasmtest/Cargo.toml
index c5f795d065..91af15a6ea 100644
--- a/datafusion/wasmtest/Cargo.toml
+++ b/datafusion/wasmtest/Cargo.toml
@@ -28,7 +28,7 @@ authors = { workspace = true }
rust-version = "1.70"
[lib]
-crate-type = ["cdylib", "rlib",]
+crate-type = ["cdylib", "rlib"]
[dependencies]
@@ -37,11 +37,14 @@ crate-type = ["cdylib", "rlib",]
# all the `std::fmt` and `std::panicking` infrastructure, so isn't great for
# code size when deploying.
console_error_panic_hook = { version = "0.1.1", optional = true }
+datafusion = { path = "../core", default-features = false }
datafusion-common = { workspace = true }
+datafusion-execution = { workspace = true }
datafusion-expr = { workspace = true }
datafusion-optimizer = { workspace = true }
datafusion-physical-expr = { workspace = true }
+datafusion-physical-plan = { workspace = true }
datafusion-sql = { workspace = true }
# getrandom must be compiled with js feature
diff --git a/datafusion/wasmtest/README.md b/datafusion/wasmtest/README.md
index d26369a18a..4af0f94db9 100644
--- a/datafusion/wasmtest/README.md
+++ b/datafusion/wasmtest/README.md
@@ -59,10 +59,13 @@ Then open http://localhost:8080/ in a web browser and check
the console to see t
The following DataFusion crates are verified to work in a wasm-pack
environment using the default `wasm32-unknown-unknown` target:
+- `datafusion` (datafusion-core) with default-features disabled to remove
`bzip2-sys` from `async-compression`
- `datafusion-common` with default-features disabled to remove the `parquet`
dependency (see below)
- `datafusion-expr`
+- `datafusion-execution`
- `datafusion-optimizer`
- `datafusion-physical-expr`
+- `datafusion-physical-plan`
- `datafusion-sql`
The difficulty with getting the remaining DataFusion crates compiled to WASM
is that they have non-optional dependencies on the
[`parquet`](https://docs.rs/crate/parquet/) crate with its default features
enabled. Several of the default parquet crate features require native
dependencies that are not compatible with WASM, in particular the `lz4` and
`zstd` features. If we can arrange our feature flags to make it possible to
depend on parquet with these features disabled, then it should be [...]