This is an automated email from the ASF dual-hosted git repository.

timsaucer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 6b162850 feat: expose `select_exprs` method on DataFrame (#1271)
6b162850 is described below

commit 6b1628509c5dc214c673ea77215e62b9a4693a01
Author: Marko Milenković <[email protected]>
AuthorDate: Tue Oct 14 15:20:40 2025 +0100

    feat: expose `select_exprs` method on DataFrame (#1271)
    
    * feat: expose select_exprs method on DataFrame
    
    * change python doc
    
    * ruff linting
    
    ---------
    
    Co-authored-by: Tim Saucer <[email protected]>
---
 python/datafusion/dataframe.py | 11 +++++++++++
 python/tests/test_dataframe.py | 32 ++++++++++++++++++++++++++++++++
 src/dataframe.rs               |  7 +++++++
 3 files changed, 50 insertions(+)

diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index 86131c45..d15111d5 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -405,6 +405,17 @@ class DataFrame:
         """
         return self.select(*args)
 
+    def select_exprs(self, *args: str) -> DataFrame:
+        """Project arbitrary list of expression strings into a new DataFrame.
+
+        This method will parse string expressions into logical plan 
expressions.
+        The output DataFrame has one column for each expression.
+
+        Returns:
+            DataFrame only containing the specified columns.
+        """
+        return self.df.select_exprs(*args)
+
     def select(self, *exprs: Expr | str) -> DataFrame:
         """Project arbitrary expressions into a new :py:class:`DataFrame`.
 
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index 76b80803..9317711f 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -221,6 +221,38 @@ def test_select(df):
     assert result.column(1) == pa.array([1, 2, 3])
 
 
+def test_select_exprs(df):
+    df_1 = df.select_exprs(
+        "a + b",
+        "a - b",
+    )
+
+    # execute and collect the first (and only) batch
+    result = df_1.collect()[0]
+
+    assert result.column(0) == pa.array([5, 7, 9])
+    assert result.column(1) == pa.array([-3, -3, -3])
+
+    df_2 = df.select_exprs("b", "a")
+
+    # execute and collect the first (and only) batch
+    result = df_2.collect()[0]
+
+    assert result.column(0) == pa.array([4, 5, 6])
+    assert result.column(1) == pa.array([1, 2, 3])
+
+    df_3 = df.select_exprs(
+        "abs(a + b)",
+        "abs(a - b)",
+    )
+
+    # execute and collect the first (and only) batch
+    result = df_3.collect()[0]
+
+    assert result.column(0) == pa.array([5, 7, 9])
+    assert result.column(1) == pa.array([3, 3, 3])
+
+
 def test_drop_quoted_columns():
     ctx = SessionContext()
     batch = pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], 
names=["ID_For_Students"])
diff --git a/src/dataframe.rs b/src/dataframe.rs
index 34da8744..1f87f99d 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -435,6 +435,13 @@ impl PyDataFrame {
         Ok(Self::new(df))
     }
 
+    #[pyo3(signature = (*args))]
+    fn select_exprs(&self, args: Vec<PyBackedStr>) -> PyDataFusionResult<Self> 
{
+        let args = args.iter().map(|s| s.as_ref()).collect::<Vec<&str>>();
+        let df = self.df.as_ref().clone().select_exprs(&args)?;
+        Ok(Self::new(df))
+    }
+
     #[pyo3(signature = (*args))]
     fn select(&self, args: Vec<PyExpr>) -> PyDataFusionResult<Self> {
         let expr: Vec<Expr> = args.into_iter().map(|e| e.into()).collect();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to