This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 653ab44 feat(python/sedonadb): Add DataFrame.execute() for non-result
queries (#102)
653ab44 is described below
commit 653ab44bdd2923b5c395828f93de7fc3085ff6c2
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Sep 17 11:25:15 2025 -0500
feat(python/sedonadb): Add DataFrame.execute() for non-result queries (#102)
---
python/sedonadb/python/sedonadb/dataframe.py | 26 ++++++++++++++++++++++++++
python/sedonadb/src/dataframe.rs | 11 +++++++++++
python/sedonadb/tests/test_dataframe.py | 10 ++++++++++
3 files changed, 47 insertions(+)
diff --git a/python/sedonadb/python/sedonadb/dataframe.py
b/python/sedonadb/python/sedonadb/dataframe.py
index ff8316b..bd45432 100644
--- a/python/sedonadb/python/sedonadb/dataframe.py
+++ b/python/sedonadb/python/sedonadb/dataframe.py
@@ -112,6 +112,32 @@ class DataFrame:
"""
return DataFrame(self._ctx, self._impl.limit(n, offset))
+ def execute(self) -> None:
+ """Execute the plan represented by this DataFrame
+
+ This will execute the query without collecting results into memory,
+ which is useful for executing SQL statements like SET, CREATE VIEW,
+ and CREATE EXTERNAL TABLE.
+
+ Note that this is functionally similar to `.count()` except it does
+ not apply any optimizations (e.g., does not use statistics to avoid
+ reading data to calculate a count).
+
+ Examples:
+
+ >>> sd = sedona.db.connect()
+ >>> sd.sql("CREATE OR REPLACE VIEW temp_view AS SELECT 1 as
one").execute()
+ 0
+ >>> sd.view("temp_view").show()
+ ┌───────┐
+ │ one │
+ │ int64 │
+ ╞═══════╡
+ │ 1 │
+ └───────┘
+ """
+ return self._impl.execute()
+
def count(self) -> int:
"""Compute the number of rows in this DataFrame
diff --git a/python/sedonadb/src/dataframe.rs b/python/sedonadb/src/dataframe.rs
index 6d92205..940c332 100644
--- a/python/sedonadb/src/dataframe.rs
+++ b/python/sedonadb/src/dataframe.rs
@@ -89,6 +89,17 @@ impl InternalDataFrame {
Ok(InternalDataFrame::new(inner, self.runtime.clone()))
}
+ fn execute<'py>(&self, py: Python<'py>) -> Result<usize, PySedonaError> {
+ let mut c = 0;
+ let stream = wait_for_future(py, &self.runtime,
self.inner.clone().execute_stream())??;
+ let reader = PySedonaStreamReader::new(self.runtime.clone(), stream);
+ for batch in reader {
+ c += batch?.num_rows();
+ }
+
+ Ok(c)
+ }
+
fn count<'py>(&self, py: Python<'py>) -> Result<usize, PySedonaError> {
Ok(wait_for_future(
py,
diff --git a/python/sedonadb/tests/test_dataframe.py
b/python/sedonadb/tests/test_dataframe.py
index aab33ef..ef8cabc 100644
--- a/python/sedonadb/tests/test_dataframe.py
+++ b/python/sedonadb/tests/test_dataframe.py
@@ -213,6 +213,16 @@ def test_head_limit(con):
)
+def test_execute(con):
+ df = con.sql("SELECT * FROM (VALUES ('one'), ('two'), ('three')) AS
t(val)")
+ assert df.execute() == 3
+
+ df = con.sql("CREATE OR REPLACE VIEW temp_view AS SELECT 1 as one")
+ assert df.execute() == 0
+ assert con.view("temp_view").count() == 1
+ con.drop_view("temp_view")
+
+
def test_count(con):
df = con.sql("SELECT * FROM (VALUES ('one'), ('two'), ('three')) AS
t(val)")
assert df.count() == 3