milenkovicm commented on issue #1142:
URL:
https://github.com/apache/datafusion-ballista/issues/1142#issuecomment-2508681263
Draft patch to illustrate "Possible Solution (I)", for `datafusion-python`
(v42) which would solve (py)ballista issues:
```diff
diff --git a/Cargo.lock b/Cargo.lock
index 815323b..a00bdc5 100644
diff --git a/Cargo.toml b/Cargo.toml
index df72cd4..cf3cb1c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -47,6 +47,7 @@ async-trait = "0.1"
futures = "0.3"
object_store = { version = "0.11.0", features = ["aws", "gcp", "azure",
"http"] }
url = "2"
+ballista = { path = "../arrow-ballista/ballista/client", default-features =
false }
[build-dependencies]
prost-types = "0.13" # keep in line with `datafusion-substrait`
diff --git a/python/datafusion/context.py b/python/datafusion/context.py
index 957d7e3..ca6094a 100644
--- a/python/datafusion/context.py
+++ b/python/datafusion/context.py
@@ -423,7 +423,7 @@ class SessionContext:
"""
def __init__(
- self, config: SessionConfig | None = None, runtime: RuntimeConfig |
None = None
+ self, config: SessionConfig | None = None, runtime: RuntimeConfig |
None = None, url: str | None = None
) -> None:
"""Main interface for executing queries with DataFusion.
@@ -448,7 +448,7 @@ class SessionContext:
config = config.config_internal if config is not None else None
runtime = runtime.config_internal if runtime is not None else None
- self.ctx = SessionContextInternal(config, runtime)
+ self.ctx = SessionContextInternal(config, runtime, url)
def register_object_store(
self, schema: str, store: Any, host: str | None = None
diff --git a/src/context.rs b/src/context.rs
index f445874..a40bc47 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -23,6 +23,7 @@ use std::sync::Arc;
use arrow::array::RecordBatchReader;
use arrow::ffi_stream::ArrowArrayStreamReader;
use arrow::pyarrow::FromPyArrow;
+use ballista::prelude::SessionContextExt;
use datafusion::execution::session_state::SessionStateBuilder;
use object_store::ObjectStore;
use url::Url;
@@ -271,11 +272,13 @@ pub struct PySessionContext {
#[pymethods]
impl PySessionContext {
- #[pyo3(signature = (config=None, runtime=None))]
+ #[pyo3(signature = (config=None, runtime=None, ballista_url=None))]
#[new]
pub fn new(
config: Option<PySessionConfig>,
runtime: Option<PyRuntimeConfig>,
+ ballista_url: Option<String>,
+ py: Python,
) -> PyResult<Self> {
let config = if let Some(c) = config {
c.config
@@ -293,9 +296,16 @@ impl PySessionContext {
.with_runtime_env(runtime)
.with_default_features()
.build();
- Ok(PySessionContext {
- ctx: SessionContext::new_with_state(session_state),
- })
+
+ match ballista_url {
+ Some(url) => Ok(PySessionContext {
+ ctx: wait_for_future(py,
SessionContext::remote_with_state(&url, session_state))
+ .map_err(DataFusionError::from)?,
+ }),
+ None => Ok(PySessionContext {
+ ctx: SessionContext::new_with_state(session_state),
+ }),
+ }
}
/// Register an object store with the given name
```
more details at
https://github.com/apache/datafusion-python/compare/main...milenkovicm:datafusion-python:feat_add_ballista
If we go this direction we would need to make ballista optional feature
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]