This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push: new fab75bf refactor(python/sedonadb): Scope options to the SedonaContext (#106) fab75bf is described below commit fab75bfb9adc9cd81623ca1f8e287c1fffb7ce28 Author: Dewey Dunnington <de...@wherobots.com> AuthorDate: Thu Sep 18 09:44:43 2025 -0500 refactor(python/sedonadb): Scope options to the SedonaContext (#106) --- python/sedonadb/python/sedonadb/__init__.py | 4 --- python/sedonadb/python/sedonadb/_options.py | 14 -------- python/sedonadb/python/sedonadb/context.py | 21 ++++++++++-- python/sedonadb/python/sedonadb/dataframe.py | 49 ++++++++++++++-------------- python/sedonadb/tests/test_context.py | 8 +++++ python/sedonadb/tests/test_dataframe.py | 4 +-- 6 files changed, 52 insertions(+), 48 deletions(-) diff --git a/python/sedonadb/python/sedonadb/__init__.py b/python/sedonadb/python/sedonadb/__init__.py index 9d88698..7d46b86 100644 --- a/python/sedonadb/python/sedonadb/__init__.py +++ b/python/sedonadb/python/sedonadb/__init__.py @@ -16,10 +16,6 @@ # under the License. from sedonadb import _lib from sedonadb.context import connect, configure_proj -from sedonadb import _options - -options = _options.global_options() -"""Global options for SedonaDB""" __version__ = _lib.sedona_python_version() diff --git a/python/sedonadb/python/sedonadb/_options.py b/python/sedonadb/python/sedonadb/_options.py index 2f3c65f..b7ac40a 100644 --- a/python/sedonadb/python/sedonadb/_options.py +++ b/python/sedonadb/python/sedonadb/_options.py @@ -52,17 +52,3 @@ class Options: @width.setter def width(self, value: Optional[int]): self._width = value - - -def global_options() -> Options: - """Access the global options - - Most users should use `sedonadb.options` to access this singleton; however, - internal SedonaDB Python code must use this function to avoid a circular - dependency. - """ - global _global_options - return _global_options - - -_global_options = Options() diff --git a/python/sedonadb/python/sedonadb/context.py b/python/sedonadb/python/sedonadb/context.py index 32f9dee..6348695 100644 --- a/python/sedonadb/python/sedonadb/context.py +++ b/python/sedonadb/python/sedonadb/context.py @@ -22,6 +22,7 @@ from typing import Any, Dict, Iterable, Literal, Optional, Union from sedonadb._lib import InternalContext, configure_proj_shared from sedonadb.dataframe import DataFrame, _create_data_frame from sedonadb.utility import sedona # noqa: F401 +from sedonadb._options import Options class SedonaContext: @@ -30,10 +31,23 @@ class SedonaContext: This object keeps track of state such as registered functions, registered tables, and available memory. This is similar to a Spark SessionContext or a database connection. + + Examples: + + >>> sd = sedona.db.connect() + >>> sd.options.interactive = True + >>> sd.sql("SELECT 1 as one") + ┌───────┐ + │ one │ + │ int64 │ + ╞═══════╡ + │ 1 │ + └───────┘ """ def __init__(self): self._impl = InternalContext() + self.options = Options() def create_data_frame(self, obj: Any, schema: Any = None) -> DataFrame: """Create a DataFrame from an in-memory or protocol-enabled object. @@ -64,7 +78,7 @@ class SedonaContext: │ 1 │ └───────┘ """ - return _create_data_frame(self._impl, obj, schema) + return _create_data_frame(self._impl, obj, schema, self.options) def view(self, name: str) -> DataFrame: """Create a [DataFrame][sedonadb.dataframe.DataFrame] from a named view @@ -88,7 +102,7 @@ class SedonaContext: >>> sd.drop_view("foofy") """ - return DataFrame(self._impl, self._impl.view(name)) + return DataFrame(self._impl, self._impl.view(name), self.options) def drop_view(self, name: str) -> None: """Remove a named view @@ -135,6 +149,7 @@ class SedonaContext: return DataFrame( self._impl, self._impl.read_parquet([str(path) for path in table_paths], options), + self.options, ) def sql(self, sql: str) -> DataFrame: @@ -153,7 +168,7 @@ class SedonaContext: <sedonadb.dataframe.DataFrame object at ...> """ - return DataFrame(self._impl, self._impl.sql(sql)) + return DataFrame(self._impl, self._impl.sql(sql), self.options) def connect() -> SedonaContext: diff --git a/python/sedonadb/python/sedonadb/dataframe.py b/python/sedonadb/python/sedonadb/dataframe.py index bd45432..673fbfb 100644 --- a/python/sedonadb/python/sedonadb/dataframe.py +++ b/python/sedonadb/python/sedonadb/dataframe.py @@ -18,7 +18,6 @@ from pathlib import Path from typing import TYPE_CHECKING, Union, Optional, Any, Iterable -from sedonadb._options import global_options from sedonadb.utility import sedona # noqa: F401 @@ -36,9 +35,10 @@ class DataFrame: reading a file, or executing SQL. """ - def __init__(self, ctx, impl): + def __init__(self, ctx, impl, options): self._ctx = ctx self._impl = impl + self._options = options @property def schema(self): @@ -110,7 +110,7 @@ class DataFrame: └───────┘ """ - return DataFrame(self._ctx, self._impl.limit(n, offset)) + return DataFrame(self._ctx, self._impl.limit(n, offset), self._options) def execute(self) -> None: """Execute the plan represented by this DataFrame @@ -220,7 +220,7 @@ class DataFrame: └────────────┘ """ - return DataFrame(self._ctx, self._impl.to_memtable(self._ctx)) + return DataFrame(self._ctx, self._impl.to_memtable(self._ctx), self._options) def __datafusion_table_provider__(self): return self._impl.__datafusion_table_provider__() @@ -376,7 +376,7 @@ class DataFrame: └────────────┘ """ - width = _out_width(width) + width = self._out_width(width) print(self._impl.show(self._ctx, limit, width, ascii), end="") def explain( @@ -420,29 +420,28 @@ class DataFrame: │ ┆ │ └───────────────┴─────────────────────────────────┘ """ - return DataFrame(self._ctx, self._impl.explain(type, format)) + return DataFrame(self._ctx, self._impl.explain(type, format), self._options) def __repr__(self) -> str: - if global_options().interactive: - width = _out_width() + if self._options.interactive: + width = self._out_width() return self._impl.show(self._ctx, 10, width, ascii=False).strip() else: return super().__repr__() + def _out_width(self, width=None) -> int: + if width is None: + width = self._options.width -def _out_width(width=None) -> int: - if width is None: - width = global_options().width + if width is None: + import shutil - if width is None: - import shutil + width, _ = shutil.get_terminal_size(fallback=(100, 24)) - width, _ = shutil.get_terminal_size(fallback=(100, 24)) + return width - return width - -def _create_data_frame(ctx_impl, obj, schema) -> DataFrame: +def _create_data_frame(ctx_impl, obj, schema, options) -> DataFrame: """Create a DataFrame (internal) This is defined here because we need it in future dataframe methods like @@ -461,27 +460,27 @@ def _create_data_frame(ctx_impl, obj, schema) -> DataFrame: # This includes geopandas/pandas DataFrames, pyarrow tables, and Polars tables. type_name = _qualified_type_name(obj) if type_name in SPECIAL_CASED_SCANS: - return SPECIAL_CASED_SCANS[type_name](ctx_impl, obj, schema) + return SPECIAL_CASED_SCANS[type_name](ctx_impl, obj, schema, options) # The default implementation handles objects that implement # __datafusion_table_provider__ or __arrow_c_stream__. For objects implementing # __arrow_c_stream__, this currently will only work for a single scan (i.e., # the returned data frame can't be previewed before the query is computed). - return _scan_default(ctx_impl, obj, schema) + return _scan_default(ctx_impl, obj, schema, options) -def _scan_default(ctx_impl, obj, schema): +def _scan_default(ctx_impl, obj, schema, options): impl = ctx_impl.create_data_frame(obj, schema) - return DataFrame(ctx_impl, impl) + return DataFrame(ctx_impl, impl, options) -def _scan_collected_default(ctx_impl, obj, schema): - return _scan_default(ctx_impl, obj, schema).to_memtable() +def _scan_collected_default(ctx_impl, obj, schema, options): + return _scan_default(ctx_impl, obj, schema, options).to_memtable() -def _scan_geopandas(ctx_impl, obj, schema): +def _scan_geopandas(ctx_impl, obj, schema, options): return _scan_collected_default( - ctx_impl, obj.to_arrow(geometry_encoding="WKB"), schema + ctx_impl, obj.to_arrow(geometry_encoding="WKB"), schema, options ) diff --git a/python/sedonadb/tests/test_context.py b/python/sedonadb/tests/test_context.py index 39ed012..758b201 100644 --- a/python/sedonadb/tests/test_context.py +++ b/python/sedonadb/tests/test_context.py @@ -20,6 +20,14 @@ import pytest import sedonadb +def test_options(): + sd = sedonadb.connect() + assert "DataFrame object at" in repr(sd.sql("SELECT 1 as one")) + + sd.options.interactive = True + assert "DataFrame object at" not in repr(sd.sql("SELECT 1 as one")) + + def test_read_parquet(con, geoarrow_data): # Check one file tab = con.read_parquet( diff --git a/python/sedonadb/tests/test_dataframe.py b/python/sedonadb/tests/test_dataframe.py index ef8cabc..b609635 100644 --- a/python/sedonadb/tests/test_dataframe.py +++ b/python/sedonadb/tests/test_dataframe.py @@ -446,7 +446,7 @@ def test_repr(con): ) try: - sedonadb.options.interactive = True + con.options.interactive = True repr_interactive = repr(con.sql("SELECT 1 as one")) expected = """ ┌───────┐ @@ -458,4 +458,4 @@ def test_repr(con): """.strip() assert repr_interactive == expected finally: - sedonadb.options.interactive = False + con.options.interactive = False