This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-ballista.git


The following commit(s) were added to refs/heads/master by this push:
     new 2e1f5d61 Allow some config options to be set when creating context 
(#204)
2e1f5d61 is described below

commit 2e1f5d619760d3b7acce225a166a9507f9efe9a1
Author: Andy Grove <[email protected]>
AuthorDate: Tue Sep 13 17:49:28 2022 -0600

    Allow some config options to be set when creating context (#204)
---
 python/README.md               | 10 +++++++++-
 python/src/ballista_context.rs | 16 +++++++++++++---
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/python/README.md b/python/README.md
index 80673c36..2a46f06a 100644
--- a/python/README.md
+++ b/python/README.md
@@ -43,7 +43,7 @@ import pyarrow
 f = ballista.functions
 
 # create a context
-ctx = ballista.SessionContext()
+ctx = ballista.BallistaContext("localhost", 50050)
 
 # create a RecordBatch and a new DataFrame from it
 batch = pyarrow.RecordBatch.from_arrays(
@@ -65,6 +65,14 @@ assert result.column(0) == pyarrow.array([5, 7, 9])
 assert result.column(1) == pyarrow.array([-3, -3, -3])
 ```
 
+### Specifying Configuration Options
+
+Configuration settings can be specified when creating the context.
+
+```python
+ctx = ballista.BallistaContext("localhost", 50050, shuffle_partitions = 200, 
batch_size = 16384)
+```
+
 ### UDFs
 
 ```python
diff --git a/python/src/ballista_context.rs b/python/src/ballista_context.rs
index 40e389e7..956d2e35 100644
--- a/python/src/ballista_context.rs
+++ b/python/src/ballista_context.rs
@@ -38,10 +38,20 @@ pub(crate) struct PyBallistaContext {
 #[pymethods]
 impl PyBallistaContext {
     #[new]
-    #[args(port = "50050")]
-    fn new(py: Python, host: &str, port: u16) -> PyResult<Self> {
+    #[args(port = "50050", shuffle_partitions = 4, batch_size = 8192)]
+    fn new(
+        py: Python,
+        host: &str,
+        port: u16,
+        shuffle_partitions: usize,
+        batch_size: usize,
+    ) -> PyResult<Self> {
         let config = BallistaConfig::builder()
-            .set("ballista.shuffle.partitions", "4")
+            .set(
+                "ballista.shuffle.partitions",
+                &format!("{}", shuffle_partitions),
+            )
+            .set("ballista.batch.size", &format!("{}", batch_size))
             .set("ballista.with_information_schema", "true")
             .build()
             .map_err(BallistaError::from)?;

Reply via email to