This is an automated email from the ASF dual-hosted git repository.
timsaucer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git
The following commit(s) were added to refs/heads/main by this push:
new 72f2743 feat: expose drop method (#913)
72f2743 is described below
commit 72f274385792d6eee3d9053ed786966b4899b24d
Author: Ion Koutsouris <[email protected]>
AuthorDate: Tue Oct 15 13:20:22 2024 +0200
feat: expose drop method (#913)
---
python/datafusion/dataframe.py | 11 +++++++++++
python/tests/test_dataframe.py | 11 +++++++++++
src/dataframe.rs | 7 +++++++
3 files changed, 29 insertions(+)
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index c5ac0bb..a9e4d4d 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -129,6 +129,17 @@ class DataFrame:
]
return DataFrame(self.df.select(*exprs_internal))
+ def drop(self, *columns: str) -> DataFrame:
+ """Drop arbitrary amount of columns.
+
+ Args:
+ columns: Column names to drop from the dataframe.
+
+ Returns:
+ DataFrame with those columns removed in the projection.
+ """
+ return DataFrame(self.df.drop(*columns))
+
def filter(self, *predicates: Expr) -> DataFrame:
"""Return a DataFrame for which ``predicate`` evaluates to ``True``.
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index e89c571..88c642a 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -169,6 +169,17 @@ def test_sort(df):
assert table.to_pydict() == expected
+def test_drop(df):
+ df = df.drop("c")
+
+ # execute and collect the first (and only) batch
+ result = df.collect()[0]
+
+ assert df.schema().names == ["a", "b"]
+ assert result.column(0) == pa.array([1, 2, 3])
+ assert result.column(1) == pa.array([4, 5, 6])
+
+
def test_limit(df):
df = df.limit(1)
diff --git a/src/dataframe.rs b/src/dataframe.rs
index e77ca84..db24370 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -170,6 +170,13 @@ impl PyDataFrame {
Ok(Self::new(df))
}
+ #[pyo3(signature = (*args))]
+ fn drop(&self, args: Vec<PyBackedStr>) -> PyResult<Self> {
+ let cols = args.iter().map(|s| s.as_ref()).collect::<Vec<&str>>();
+ let df = self.df.as_ref().clone().drop_columns(&cols)?;
+ Ok(Self::new(df))
+ }
+
fn filter(&self, predicate: PyExpr) -> PyResult<Self> {
let df = self.df.as_ref().clone().filter(predicate.into())?;
Ok(Self::new(df))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]