This is an automated email from the ASF dual-hosted git repository.
timsaucer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git
The following commit(s) were added to refs/heads/main by this push:
new 6b162850 feat: expose `select_exprs` method on DataFrame (#1271)
6b162850 is described below
commit 6b1628509c5dc214c673ea77215e62b9a4693a01
Author: Marko Milenković <[email protected]>
AuthorDate: Tue Oct 14 15:20:40 2025 +0100
feat: expose `select_exprs` method on DataFrame (#1271)
* feat: expose select_exprs method on DataFrame
* change python doc
* ruff linting
---------
Co-authored-by: Tim Saucer <[email protected]>
---
python/datafusion/dataframe.py | 11 +++++++++++
python/tests/test_dataframe.py | 32 ++++++++++++++++++++++++++++++++
src/dataframe.rs | 7 +++++++
3 files changed, 50 insertions(+)
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index 86131c45..d15111d5 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -405,6 +405,17 @@ class DataFrame:
"""
return self.select(*args)
+ def select_exprs(self, *args: str) -> DataFrame:
+ """Project arbitrary list of expression strings into a new DataFrame.
+
+ This method will parse string expressions into logical plan
expressions.
+ The output DataFrame has one column for each expression.
+
+ Returns:
+ DataFrame only containing the specified columns.
+ """
+ return self.df.select_exprs(*args)
+
def select(self, *exprs: Expr | str) -> DataFrame:
"""Project arbitrary expressions into a new :py:class:`DataFrame`.
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index 76b80803..9317711f 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -221,6 +221,38 @@ def test_select(df):
assert result.column(1) == pa.array([1, 2, 3])
+def test_select_exprs(df):
+ df_1 = df.select_exprs(
+ "a + b",
+ "a - b",
+ )
+
+ # execute and collect the first (and only) batch
+ result = df_1.collect()[0]
+
+ assert result.column(0) == pa.array([5, 7, 9])
+ assert result.column(1) == pa.array([-3, -3, -3])
+
+ df_2 = df.select_exprs("b", "a")
+
+ # execute and collect the first (and only) batch
+ result = df_2.collect()[0]
+
+ assert result.column(0) == pa.array([4, 5, 6])
+ assert result.column(1) == pa.array([1, 2, 3])
+
+ df_3 = df.select_exprs(
+ "abs(a + b)",
+ "abs(a - b)",
+ )
+
+ # execute and collect the first (and only) batch
+ result = df_3.collect()[0]
+
+ assert result.column(0) == pa.array([5, 7, 9])
+ assert result.column(1) == pa.array([3, 3, 3])
+
+
def test_drop_quoted_columns():
ctx = SessionContext()
batch = pa.RecordBatch.from_arrays([pa.array([1, 2, 3])],
names=["ID_For_Students"])
diff --git a/src/dataframe.rs b/src/dataframe.rs
index 34da8744..1f87f99d 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -435,6 +435,13 @@ impl PyDataFrame {
Ok(Self::new(df))
}
+ #[pyo3(signature = (*args))]
+ fn select_exprs(&self, args: Vec<PyBackedStr>) -> PyDataFusionResult<Self>
{
+ let args = args.iter().map(|s| s.as_ref()).collect::<Vec<&str>>();
+ let df = self.df.as_ref().clone().select_exprs(&args)?;
+ Ok(Self::new(df))
+ }
+
#[pyo3(signature = (*args))]
fn select(&self, args: Vec<PyExpr>) -> PyDataFusionResult<Self> {
let expr: Vec<Expr> = args.into_iter().map(|e| e.into()).collect();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]