(sedona-db) branch main updated: Rename collect to to_memtable (#86)

jiayu Mon, 15 Sep 2025 21:19:25 -0700

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git



The following commit(s) were added to refs/heads/main by this push:
     new 0e6fe51  Rename collect to to_memtable (#86)
0e6fe51 is described below

commit 0e6fe515a26e6af01a25382efd8a1e16d6ae972b
Author: jp <[email protected]>
AuthorDate: Mon Sep 15 21:18:59 2025 -0700

    Rename collect to to_memtable (#86)
---
 python/sedonadb/python/sedonadb/dataframe.py | 13 ++++++++-----
 python/sedonadb/python/sedonadb/testing.py   |  2 +-
 python/sedonadb/src/dataframe.rs             |  6 +++++-
 python/sedonadb/tests/test_dataframe.py      |  4 ++--
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/python/sedonadb/python/sedonadb/dataframe.py 
b/python/sedonadb/python/sedonadb/dataframe.py
index b390e77..8bbe16f 100644
--- a/python/sedonadb/python/sedonadb/dataframe.py
+++ b/python/sedonadb/python/sedonadb/dataframe.py
@@ -176,17 +176,20 @@ class DataFrame:
         """
         self._impl.to_view(self._ctx, name, overwrite)
 
-    def collect(self) -> "DataFrame":
-        """Collect a data frame into memory
+    def to_memtable(self) -> "DataFrame":
+        """Collect a data frame into a memtable
 
         Executes the logical plan represented by this object and returns a
         DataFrame representing it.
 
+        Does not guarantee ordering of rows.  Use `to_arrow_table()` if
+        ordering is needed.
+
         Examples:
 
             >>> import sedonadb
             >>> con = sedonadb.connect()
-            >>> con.sql("SELECT ST_Point(0, 1) as geom").collect().show()
+            >>> con.sql("SELECT ST_Point(0, 1) as geom").to_memtable().show()
             ┌────────────┐
             │    geom    │
             │  geometry  │
@@ -195,7 +198,7 @@ class DataFrame:
             └────────────┘
 
         """
-        return DataFrame(self._ctx, self._impl.collect(self._ctx))
+        return DataFrame(self._ctx, self._impl.to_memtable(self._ctx))
 
     def __datafusion_table_provider__(self):
         return self._impl.__datafusion_table_provider__()
@@ -412,7 +415,7 @@ def _scan_default(ctx_impl, obj, schema):
 
 
 def _scan_collected_default(ctx_impl, obj, schema):
-    return _scan_default(ctx_impl, obj, schema).collect()
+    return _scan_default(ctx_impl, obj, schema).to_memtable()
 
 
 def _scan_geopandas(ctx_impl, obj, schema):
diff --git a/python/sedonadb/python/sedonadb/testing.py 
b/python/sedonadb/python/sedonadb/testing.py
index 33232d1..df04907 100644
--- a/python/sedonadb/python/sedonadb/testing.py
+++ b/python/sedonadb/python/sedonadb/testing.py
@@ -324,7 +324,7 @@ class SedonaDB(DBEngine):
         return cls(*args, **kwargs)
 
     def create_table_parquet(self, name, paths) -> "SedonaDB":
-        self.con.read_parquet(paths).collect().to_view(name, overwrite=True)
+        self.con.read_parquet(paths).to_memtable().to_view(name, 
overwrite=True)
         return self
 
     def create_view_parquet(self, name, paths) -> "SedonaDB":
diff --git a/python/sedonadb/src/dataframe.rs b/python/sedonadb/src/dataframe.rs
index aae953b..f6ddf06 100644
--- a/python/sedonadb/src/dataframe.rs
+++ b/python/sedonadb/src/dataframe.rs
@@ -111,7 +111,11 @@ impl InternalDataFrame {
         Ok(())
     }
 
-    fn collect<'py>(&self, py: Python<'py>, ctx: &InternalContext) -> 
Result<Self, PySedonaError> {
+    fn to_memtable<'py>(
+        &self,
+        py: Python<'py>,
+        ctx: &InternalContext,
+    ) -> Result<Self, PySedonaError> {
         let schema = self.inner.schema();
         let partitions =
             wait_for_future(py, &self.runtime, 
self.inner.clone().collect_partitioned())??;
diff --git a/python/sedonadb/tests/test_dataframe.py 
b/python/sedonadb/tests/test_dataframe.py
index bd79b2a..289e9c4 100644
--- a/python/sedonadb/tests/test_dataframe.py
+++ b/python/sedonadb/tests/test_dataframe.py
@@ -165,9 +165,9 @@ def test_schema_non_null_crs(con):
     assert df.schema.field("geom").type.crs == gat.OGC_CRS84
 
 
-def test_collect(con):
+def test_to_memtable(con):
     df = con.sql("SELECT 1 as one")
-    pd.testing.assert_frame_equal(df.collect().to_pandas(), df.to_pandas())
+    pd.testing.assert_frame_equal(df.to_memtable().to_pandas(), df.to_pandas())
 
 
 def test_to_view(con):

(sedona-db) branch main updated: Rename collect to to_memtable (#86)

Reply via email to