This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-ballista.git
The following commit(s) were added to refs/heads/master by this push:
new 2e1f5d61 Allow some config options to be set when creating context
(#204)
2e1f5d61 is described below
commit 2e1f5d619760d3b7acce225a166a9507f9efe9a1
Author: Andy Grove <[email protected]>
AuthorDate: Tue Sep 13 17:49:28 2022 -0600
Allow some config options to be set when creating context (#204)
---
python/README.md | 10 +++++++++-
python/src/ballista_context.rs | 16 +++++++++++++---
2 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/python/README.md b/python/README.md
index 80673c36..2a46f06a 100644
--- a/python/README.md
+++ b/python/README.md
@@ -43,7 +43,7 @@ import pyarrow
f = ballista.functions
# create a context
-ctx = ballista.SessionContext()
+ctx = ballista.BallistaContext("localhost", 50050)
# create a RecordBatch and a new DataFrame from it
batch = pyarrow.RecordBatch.from_arrays(
@@ -65,6 +65,14 @@ assert result.column(0) == pyarrow.array([5, 7, 9])
assert result.column(1) == pyarrow.array([-3, -3, -3])
```
+### Specifying Configuration Options
+
+Configuration settings can be specified when creating the context.
+
+```python
+ctx = ballista.BallistaContext("localhost", 50050, shuffle_partitions = 200,
batch_size = 16384)
+```
+
### UDFs
```python
diff --git a/python/src/ballista_context.rs b/python/src/ballista_context.rs
index 40e389e7..956d2e35 100644
--- a/python/src/ballista_context.rs
+++ b/python/src/ballista_context.rs
@@ -38,10 +38,20 @@ pub(crate) struct PyBallistaContext {
#[pymethods]
impl PyBallistaContext {
#[new]
- #[args(port = "50050")]
- fn new(py: Python, host: &str, port: u16) -> PyResult<Self> {
+ #[args(port = "50050", shuffle_partitions = 4, batch_size = 8192)]
+ fn new(
+ py: Python,
+ host: &str,
+ port: u16,
+ shuffle_partitions: usize,
+ batch_size: usize,
+ ) -> PyResult<Self> {
let config = BallistaConfig::builder()
- .set("ballista.shuffle.partitions", "4")
+ .set(
+ "ballista.shuffle.partitions",
+ &format!("{}", shuffle_partitions),
+ )
+ .set("ballista.batch.size", &format!("{}", batch_size))
.set("ballista.with_information_schema", "true")
.build()
.map_err(BallistaError::from)?;