This is an automated email from the ASF dual-hosted git repository.

timsaucer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 72f2743  feat: expose drop method (#913)
72f2743 is described below

commit 72f274385792d6eee3d9053ed786966b4899b24d
Author: Ion Koutsouris <[email protected]>
AuthorDate: Tue Oct 15 13:20:22 2024 +0200

    feat: expose drop method (#913)
---
 python/datafusion/dataframe.py | 11 +++++++++++
 python/tests/test_dataframe.py | 11 +++++++++++
 src/dataframe.rs               |  7 +++++++
 3 files changed, 29 insertions(+)

diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index c5ac0bb..a9e4d4d 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -129,6 +129,17 @@ class DataFrame:
         ]
         return DataFrame(self.df.select(*exprs_internal))
 
+    def drop(self, *columns: str) -> DataFrame:
+        """Drop arbitrary amount of columns.
+
+        Args:
+            columns: Column names to drop from the dataframe.
+
+        Returns:
+            DataFrame with those columns removed in the projection.
+        """
+        return DataFrame(self.df.drop(*columns))
+
     def filter(self, *predicates: Expr) -> DataFrame:
         """Return a DataFrame for which ``predicate`` evaluates to ``True``.
 
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index e89c571..88c642a 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -169,6 +169,17 @@ def test_sort(df):
     assert table.to_pydict() == expected
 
 
+def test_drop(df):
+    df = df.drop("c")
+
+    # execute and collect the first (and only) batch
+    result = df.collect()[0]
+
+    assert df.schema().names == ["a", "b"]
+    assert result.column(0) == pa.array([1, 2, 3])
+    assert result.column(1) == pa.array([4, 5, 6])
+
+
 def test_limit(df):
     df = df.limit(1)
 
diff --git a/src/dataframe.rs b/src/dataframe.rs
index e77ca84..db24370 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -170,6 +170,13 @@ impl PyDataFrame {
         Ok(Self::new(df))
     }
 
+    #[pyo3(signature = (*args))]
+    fn drop(&self, args: Vec<PyBackedStr>) -> PyResult<Self> {
+        let cols = args.iter().map(|s| s.as_ref()).collect::<Vec<&str>>();
+        let df = self.df.as_ref().clone().drop_columns(&cols)?;
+        Ok(Self::new(df))
+    }
+
     fn filter(&self, predicate: PyExpr) -> PyResult<Self> {
         let df = self.df.as_ref().clone().filter(predicate.into())?;
         Ok(Self::new(df))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to