This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 0e6fe51 Rename collect to to_memtable (#86)
0e6fe51 is described below
commit 0e6fe515a26e6af01a25382efd8a1e16d6ae972b
Author: jp <[email protected]>
AuthorDate: Mon Sep 15 21:18:59 2025 -0700
Rename collect to to_memtable (#86)
---
python/sedonadb/python/sedonadb/dataframe.py | 13 ++++++++-----
python/sedonadb/python/sedonadb/testing.py | 2 +-
python/sedonadb/src/dataframe.rs | 6 +++++-
python/sedonadb/tests/test_dataframe.py | 4 ++--
4 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/python/sedonadb/python/sedonadb/dataframe.py
b/python/sedonadb/python/sedonadb/dataframe.py
index b390e77..8bbe16f 100644
--- a/python/sedonadb/python/sedonadb/dataframe.py
+++ b/python/sedonadb/python/sedonadb/dataframe.py
@@ -176,17 +176,20 @@ class DataFrame:
"""
self._impl.to_view(self._ctx, name, overwrite)
- def collect(self) -> "DataFrame":
- """Collect a data frame into memory
+ def to_memtable(self) -> "DataFrame":
+ """Collect a data frame into a memtable
Executes the logical plan represented by this object and returns a
DataFrame representing it.
+ Does not guarantee ordering of rows. Use `to_arrow_table()` if
+ ordering is needed.
+
Examples:
>>> import sedonadb
>>> con = sedonadb.connect()
- >>> con.sql("SELECT ST_Point(0, 1) as geom").collect().show()
+ >>> con.sql("SELECT ST_Point(0, 1) as geom").to_memtable().show()
┌────────────┐
│ geom │
│ geometry │
@@ -195,7 +198,7 @@ class DataFrame:
└────────────┘
"""
- return DataFrame(self._ctx, self._impl.collect(self._ctx))
+ return DataFrame(self._ctx, self._impl.to_memtable(self._ctx))
def __datafusion_table_provider__(self):
return self._impl.__datafusion_table_provider__()
@@ -412,7 +415,7 @@ def _scan_default(ctx_impl, obj, schema):
def _scan_collected_default(ctx_impl, obj, schema):
- return _scan_default(ctx_impl, obj, schema).collect()
+ return _scan_default(ctx_impl, obj, schema).to_memtable()
def _scan_geopandas(ctx_impl, obj, schema):
diff --git a/python/sedonadb/python/sedonadb/testing.py
b/python/sedonadb/python/sedonadb/testing.py
index 33232d1..df04907 100644
--- a/python/sedonadb/python/sedonadb/testing.py
+++ b/python/sedonadb/python/sedonadb/testing.py
@@ -324,7 +324,7 @@ class SedonaDB(DBEngine):
return cls(*args, **kwargs)
def create_table_parquet(self, name, paths) -> "SedonaDB":
- self.con.read_parquet(paths).collect().to_view(name, overwrite=True)
+ self.con.read_parquet(paths).to_memtable().to_view(name,
overwrite=True)
return self
def create_view_parquet(self, name, paths) -> "SedonaDB":
diff --git a/python/sedonadb/src/dataframe.rs b/python/sedonadb/src/dataframe.rs
index aae953b..f6ddf06 100644
--- a/python/sedonadb/src/dataframe.rs
+++ b/python/sedonadb/src/dataframe.rs
@@ -111,7 +111,11 @@ impl InternalDataFrame {
Ok(())
}
- fn collect<'py>(&self, py: Python<'py>, ctx: &InternalContext) ->
Result<Self, PySedonaError> {
+ fn to_memtable<'py>(
+ &self,
+ py: Python<'py>,
+ ctx: &InternalContext,
+ ) -> Result<Self, PySedonaError> {
let schema = self.inner.schema();
let partitions =
wait_for_future(py, &self.runtime,
self.inner.clone().collect_partitioned())??;
diff --git a/python/sedonadb/tests/test_dataframe.py
b/python/sedonadb/tests/test_dataframe.py
index bd79b2a..289e9c4 100644
--- a/python/sedonadb/tests/test_dataframe.py
+++ b/python/sedonadb/tests/test_dataframe.py
@@ -165,9 +165,9 @@ def test_schema_non_null_crs(con):
assert df.schema.field("geom").type.crs == gat.OGC_CRS84
-def test_collect(con):
+def test_to_memtable(con):
df = con.sql("SELECT 1 as one")
- pd.testing.assert_frame_equal(df.collect().to_pandas(), df.to_pandas())
+ pd.testing.assert_frame_equal(df.to_memtable().to_pandas(), df.to_pandas())
def test_to_view(con):