This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git
The following commit(s) were added to refs/heads/main by this push:
new 309fc48 feat: expose offset in python API (#437)
309fc48 is described below
commit 309fc486c47d86776aeec07d86cd04b5d70d97a1
Author: Phillip Cloud <[email protected]>
AuthorDate: Mon Jul 24 05:47:17 2023 -0700
feat: expose offset in python API (#437)
---
datafusion/tests/test_dataframe.py | 11 +++++++++++
datafusion/tests/test_expr.py | 8 ++++++++
src/dataframe.rs | 5 +++--
3 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/datafusion/tests/test_dataframe.py
b/datafusion/tests/test_dataframe.py
index 4df2061..78cb50f 100644
--- a/datafusion/tests/test_dataframe.py
+++ b/datafusion/tests/test_dataframe.py
@@ -115,6 +115,17 @@ def test_limit(df):
assert len(result.column(1)) == 1
+def test_limit_with_offset(df):
+ # only 3 rows, but limit past the end to ensure that offset is working
+ df = df.limit(5, offset=2)
+
+ # execute and collect the first (and only) batch
+ result = df.collect()[0]
+
+ assert len(result.column(0)) == 1
+ assert len(result.column(1)) == 1
+
+
def test_with_column(df):
df = df.with_column("c", column("a") + column("b"))
diff --git a/datafusion/tests/test_expr.py b/datafusion/tests/test_expr.py
index 0c4869f..73f7d08 100644
--- a/datafusion/tests/test_expr.py
+++ b/datafusion/tests/test_expr.py
@@ -81,6 +81,14 @@ def test_limit(test_ctx):
plan = plan.to_variant()
assert isinstance(plan, Limit)
+ assert plan.skip() == 0
+
+ df = test_ctx.sql("select c1 from test LIMIT 10 OFFSET 5")
+ plan = df.logical_plan()
+
+ plan = plan.to_variant()
+ assert isinstance(plan, Limit)
+ assert plan.skip() == 5
def test_aggregate_query(test_ctx):
diff --git a/src/dataframe.rs b/src/dataframe.rs
index 00c12e8..b8d8ddc 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -136,8 +136,9 @@ impl PyDataFrame {
Ok(Self::new(df))
}
- fn limit(&self, count: usize) -> PyResult<Self> {
- let df = self.df.as_ref().clone().limit(0, Some(count))?;
+ #[pyo3(signature = (count, offset=0))]
+ fn limit(&self, count: usize, offset: usize) -> PyResult<Self> {
+ let df = self.df.as_ref().clone().limit(offset, Some(count))?;
Ok(Self::new(df))
}