This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new a7889603 feat(rust/sedona-geoparquet): Ensure metadata cache is used
in GeoParquet wrapper code (#646)
a7889603 is described below
commit a78896036c0b0693c07813c53478a6b53dc7ce30
Author: Dewey Dunnington <[email protected]>
AuthorDate: Mon Feb 23 16:14:08 2026 -0600
feat(rust/sedona-geoparquet): Ensure metadata cache is used in GeoParquet
wrapper code (#646)
Co-authored-by: Copilot <[email protected]>
---
docs/overture-examples.ipynb | 601 ++++++++++++------------------
docs/overture-examples.md | 439 +++++++++-------------
python/sedonadb/tests/test_udf.py | 39 --
rust/sedona-geoparquet/src/file_opener.rs | 3 +
rust/sedona-geoparquet/src/format.rs | 53 ++-
5 files changed, 472 insertions(+), 663 deletions(-)
diff --git a/docs/overture-examples.ipynb b/docs/overture-examples.ipynb
index f542a6d0..f312d861 100644
--- a/docs/overture-examples.ipynb
+++ b/docs/overture-examples.ipynb
@@ -28,7 +28,7 @@
"\n",
"> Note: Before running this notebook, ensure that you have installed
SedonaDB: `pip install \"apache-sedona[db]\"`\n",
"\n",
- "This notebook demonstrates how to query and analyze the [Overture
Maps](https://overturemaps.org/) dataset using SedonaDB. See [this
page](https://docs.overturemaps.org/release-calendar/) to get the latest
version of the Overture data.\n",
+ "This notebook demonstrates how to query and analyze the [Overture
Maps](https://overturemaps.org/) dataset using SedonaDB. \n",
"\n",
"The notebook explains how to:\n",
"\n",
@@ -39,534 +39,429 @@
},
{
"cell_type": "code",
- "execution_count": 19,
- "id": "c5e580ff",
- "metadata": {
- "collapsed": true,
- "jupyter": {
- "outputs_hidden": true
- },
- "scrolled": true
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Requirement already satisfied: lonboard in
/opt/miniconda3/lib/python3.12/site-packages (0.12.1)\n",
- "Requirement already satisfied: anywidget~=0.9.0 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (0.9.18)\n",
- "Requirement already satisfied: arro3-compute>=0.4.1 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (0.6.3)\n",
- "Requirement already satisfied: arro3-core>=0.4.1 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (0.6.3)\n",
- "Requirement already satisfied: arro3-io>=0.4.1 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (0.6.3)\n",
- "Requirement already satisfied: geoarrow-rust-core>=0.5.2 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (0.5.2)\n",
- "Requirement already satisfied: ipywidgets>=7.6.0 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (8.1.7)\n",
- "Requirement already satisfied: numpy>=1.14 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (2.3.3)\n",
- "Requirement already satisfied: pyproj>=3.3 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (3.7.2)\n",
- "Requirement already satisfied: traitlets>=5.7.1 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (5.14.3)\n",
- "Requirement already satisfied: psygnal>=0.8.1 in
/opt/miniconda3/lib/python3.12/site-packages (from anywidget~=0.9.0->lonboard)
(0.14.1)\n",
- "Requirement already satisfied: typing-extensions>=4.2.0 in
/opt/miniconda3/lib/python3.12/site-packages (from anywidget~=0.9.0->lonboard)
(4.15.0)\n",
- "Requirement already satisfied: comm>=0.1.3 in
/opt/miniconda3/lib/python3.12/site-packages (from ipywidgets>=7.6.0->lonboard)
(0.2.3)\n",
- "Requirement already satisfied: ipython>=6.1.0 in
/opt/miniconda3/lib/python3.12/site-packages (from ipywidgets>=7.6.0->lonboard)
(9.5.0)\n",
- "Requirement already satisfied: widgetsnbextension~=4.0.14 in
/opt/miniconda3/lib/python3.12/site-packages (from ipywidgets>=7.6.0->lonboard)
(4.0.14)\n",
- "Requirement already satisfied: jupyterlab_widgets~=3.0.15 in
/opt/miniconda3/lib/python3.12/site-packages (from ipywidgets>=7.6.0->lonboard)
(3.0.15)\n",
- "Requirement already satisfied: certifi in
/opt/miniconda3/lib/python3.12/site-packages (from pyproj>=3.3->lonboard)
(2025.8.3)\n",
- "Requirement already satisfied: decorator in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (5.2.1)\n",
- "Requirement already satisfied: ipython-pygments-lexers in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (1.1.1)\n",
- "Requirement already satisfied: jedi>=0.16 in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (0.19.2)\n",
- "Requirement already satisfied: matplotlib-inline in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (0.1.7)\n",
- "Requirement already satisfied: pexpect>4.3 in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (4.9.0)\n",
- "Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (3.0.52)\n",
- "Requirement already satisfied: pygments>=2.4.0 in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (2.19.2)\n",
- "Requirement already satisfied: stack_data in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (0.6.3)\n",
- "Requirement already satisfied: parso<0.9.0,>=0.8.4 in
/opt/miniconda3/lib/python3.12/site-packages (from
jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (0.8.5)\n",
- "Requirement already satisfied: ptyprocess>=0.5 in
/opt/miniconda3/lib/python3.12/site-packages (from
pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (0.7.0)\n",
- "Requirement already satisfied: wcwidth in
/opt/miniconda3/lib/python3.12/site-packages (from
prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets>=7.6.0->lonboard)
(0.2.14)\n",
- "Requirement already satisfied: executing>=1.2.0 in
/opt/miniconda3/lib/python3.12/site-packages (from
stack_data->ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (2.2.1)\n",
- "Requirement already satisfied: asttokens>=2.1.0 in
/opt/miniconda3/lib/python3.12/site-packages (from
stack_data->ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (3.0.0)\n",
- "Requirement already satisfied: pure-eval in
/opt/miniconda3/lib/python3.12/site-packages (from
stack_data->ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (0.2.3)\n",
- "Note: you may need to restart the kernel to use updated packages.\n"
- ]
- }
- ],
- "source": [
- "%pip install lonboard"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"id": "6d6fa0ab-b4ed-4e60-b099-a1af0998b486",
"metadata": {},
"outputs": [],
"source": [
"import sedona.db\n",
- "import os\n",
- "\n",
- "os.environ[\"AWS_SKIP_SIGNATURE\"] = \"true\"\n",
- "os.environ[\"AWS_DEFAULT_REGION\"] = \"us-west-2\"\n",
"\n",
"sd = sedona.db.connect()"
]
},
{
"cell_type": "markdown",
- "id": "4f44adfb-2973-4a65-b4f2-d24b28700b79",
+ "id": "4d7e32aa",
"metadata": {},
"source": [
- "## Overture buildings table"
+ "## Overture divisions\n",
+ "\n",
+ "Let's load a table! Like any local or remote collection of Parquet files,
we can use `sd.read_parquet()`. This is a lazy operation, fetching only
metadata required to calculate a table schema. To reduce the number of times
this needs to happen (and make the resulting DataFrame easier to reference in
SQL), we use `.to_view()`.\n",
+ "\n",
+ "> Overture removes old releases. See [this
page](https://docs.overturemaps.org/release-calendar/#current-release) to see
the latest version number and replace the relevant portion of the URL below."
]
},
{
"cell_type": "code",
- "execution_count": 3,
- "id": "52855769-4872-472a-9c42-afced3d85ca8",
+ "execution_count": 2,
+ "id": "a205670e",
"metadata": {},
"outputs": [],
"source": [
- "df = sd.read_parquet(\n",
- "
\"s3://overturemaps-us-west-2/release/2025-11-19.0/theme=buildings/type=building/\"\n",
- ")"
+ "sd.read_parquet(\n",
+ "
\"s3://overturemaps-us-west-2/release/2026-02-18.0/theme=divisions/type=division_area/\",\n",
+ " options={\"aws.skip_signature\": True, \"aws.region\":
\"us-west-2\"},\n",
+ ").to_view(\"divisions\")"
]
},
{
- "cell_type": "code",
- "execution_count": 4,
- "id": "b45b5e5c-64ed-49ba-a8aa-9f2292f617c6",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
-
"┌──────────────────────────────────────┬─────────────────────────────────────────┬───┬─────────────┐\n",
- "│ id ┆ geometry
┆ … ┆ roof_height │\n",
- "│ utf8 ┆ geometry
┆ ┆ float64 │\n",
-
"╞══════════════════════════════════════╪═════════════════════════════════════════╪═══╪═════════════╡\n",
- "│ 85b47da4-1b8d-4132-ac6c-d8dc14fab4b8 ┆ POLYGON((-6.4292972
54.8290034,-6.4291… ┆ … ┆ │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ ec12e345-d44d-4e40-8e08-e1e6e68d4d17 ┆ POLYGON((-6.430836
54.8299412,-6.43095… ┆ … ┆ │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 285f9ff9-2d6d-409c-b214-74992c8d7e7d ┆ POLYGON((-6.4311579
54.8300247,-6.4313… ┆ … ┆ │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ abedfc7c-e5fd-4a29-931e-da77b610d02d ┆ POLYGON((-6.4321833
54.8294427,-6.4322… ┆ … ┆ │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ a203a2c6-e130-4979-a7d5-8a059c6f31fd ┆ POLYGON((-6.4300627
54.829276,-6.43006… ┆ … ┆ │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 1d14caf6-b12d-486e-87dd-feef82fba9a7 ┆ POLYGON((-6.4301786
54.8281533,-6.4299… ┆ … ┆ │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 4b1e67cf-7355-439b-9a31-46a50f3ee227 ┆ POLYGON((-6.4298614
54.8278977,-6.4299… ┆ … ┆ │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 06de994e-efd4-4a1c-8a20-b4e883904cb2 ┆ POLYGON((-6.4296383
54.827599,-6.42956… ┆ … ┆ │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ ea0b2ea6-7c52-4395-9baa-bc023c7d3166 ┆ POLYGON((-6.4296844
54.8277379,-6.4296… ┆ … ┆ │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 49f022ef-5574-4613-ae54-af139666fde3 ┆ POLYGON((-6.4296843
54.8278169,-6.4296… ┆ … ┆ │\n",
-
"└──────────────────────────────────────┴─────────────────────────────────────────┴───┴─────────────┘\n"
- ]
- }
- ],
- "source": [
- "df.limit(10).show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "e37a023e-0e80-403a-a65b-b5a190004f72",
+ "cell_type": "markdown",
+ "id": "83a37848",
"metadata": {},
- "outputs": [],
"source": [
- "df.to_view(\"buildings\")"
+ "We can preview the first few rows using `.show()`. Because this is a lazy
operation and we've already cached the schema using `.to_view()`, this only
takes a few seconds."
]
},
{
"cell_type": "code",
- "execution_count": 6,
- "id": "ebfe4776-e08f-4f38-97fc-fca8ec6fc364",
+ "execution_count": 3,
+ "id": "62c19bf2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "┌────────────┐\n",
- "│ count(*) │\n",
- "│ int64 │\n",
- "╞════════════╡\n",
- "│ 2541497985 │\n",
- "└────────────┘\n"
+
"┌───────────────┬───────────────┬──────────────┬─────────┬───┬────────┬─────────────┬──────────────┐\n",
+ "│ id ┆ geometry ┆ bbox ┆ country ┆ … ┆ region ┆
admin_level ┆ division_id │\n",
+ "│ utf8 ┆ geometry ┆ struct ┆ utf8 ┆ ┆ utf8 ┆
int32 ┆ utf8 │\n",
+
"╞═══════════════╪═══════════════╪══════════════╪═════════╪═══╪════════╪═════════════╪══════════════╡\n",
+ "│ a5c573c4-022… ┆ POLYGON((-49… ┆ {xmin: -49.… ┆ BR ┆ … ┆ BR-PR ┆
┆ 388a8056-ee… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ cf523f8c-c26… ┆ POLYGON((-49… ┆ {xmin: -49.… ┆ BR ┆ … ┆ BR-PR ┆
┆ 068ef37e-3b… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ 8ace3d06-b8a… ┆ POLYGON((-49… ┆ {xmin: -49.… ┆ BR ┆ … ┆ BR-PR ┆
┆ 7238aeb3-b8… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ b26d2cba-b54… ┆ POLYGON((-49… ┆ {xmin: -49.… ┆ BR ┆ … ┆ BR-PR ┆
┆ 3c2dc8fc-79… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ 20103725-17c… ┆ POLYGON((-49… ┆ {xmin: -49.… ┆ BR ┆ … ┆ BR-PR ┆
┆ 45037e82-de… │\n",
+
"└───────────────┴───────────────┴──────────────┴─────────┴───┴────────┴─────────────┴──────────────┘\n"
]
}
],
"source": [
- "# the buildings table is large and contains billions of rows\n",
- "sd.sql(\"\"\"\n",
- "SELECT\n",
- " COUNT(*)\n",
- "FROM\n",
- " buildings\n",
- "\"\"\").show()"
+ "sd.view(\"divisions\").show(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "06fa447a",
+ "metadata": {},
+ "source": [
+ "The default view of the data hides some columns to ensure the entire
output can be shown. To look at all the columns with type details, use
`.schema`:"
]
},
{
"cell_type": "code",
- "execution_count": 7,
- "id": "b73f670d-0d10-4a7a-bfc7-e2abe5d9edd2",
+ "execution_count": 4,
+ "id": "471fd72f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "SedonaSchema with 24 fields:\n",
+ "SedonaSchema with 14 fields:\n",
" id: utf8<Utf8View>\n",
" geometry: geometry<WkbView(ogc:crs84)>\n",
- " bbox: struct<Struct(xmin Float32, xmax Float32, ymin Float32, ymax
Float32)>\n",
+ " bbox: struct<Struct(\"xmin\": Float32, \"xmax\": Float32, \"ymin\":
Float32, \"ymax\": Float32)>\n",
+ " country: utf8<Utf8View>\n",
" version: int32<Int32>\n",
- " sources: list<List(Field { name: \"element\", data_type:
Struct([Field { name: \"property\", data_type: Utf8, nullable: true, dict_id:
0, dict_is_ordered: false, metadata: {} }, Field { name: \"dataset\",
data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata:
{} }, Field { name: \"license\", data_type: Utf8, nullable: true, dict_id: 0,
dict_is_ordered: false, metadata: {} }, Field { name: \"record_id\", data_type:
Utf8, nullable: true, dict_id: 0, dict_i [...]
- " level: int32<Int32>\n",
+ " sources: list<List(Struct(\"property\": Utf8, \"dataset\": Utf8,
\"license\": Utf8, \"record_id\": Utf8, \"update_time\": Utf8, \"confidence\":
Float64, \"between\": List(Float64, field: 'element')), field: 'element')>\n",
" subtype: utf8<Utf8View>\n",
" class: utf8<Utf8View>\n",
- " height: float64<Float64>\n",
- " names: struct<Struct(primary Utf8, common Map(Field { name:
\"key_value\", data_type: Struct([Field { name: \"key\", data_type: Utf8,
nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field {
name: \"value\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered:
false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false,
metadata: {} }, false), rules List(Field { name: \"element\", data_type:
Struct([Field { name: \"variant\", data_t [...]
- " has_parts: boolean<Boolean>\n",
- " is_underground: boolean<Boolean>\n",
- " num_floors: int32<Int32>\n",
- " num_floors_underground: int32<Int32>\n",
- " min_height: float64<Float64>\n",
- " min_floor: int32<Int32>\n",
- " facade_color: utf8<Utf8View>\n",
- " facade_material: utf8<Utf8View>\n",
- " roof_material: utf8<Utf8View>\n",
- " roof_shape: utf8<Utf8View>\n",
- " roof_direction: float64<Float64>\n",
- " roof_orientation: utf8<Utf8View>\n",
- " roof_color: utf8<Utf8View>\n",
- " roof_height: float64<Float64>"
+ " names: struct<Struct(\"primary\": Utf8, \"common\":
Map(\"key_value\": non-null Struct(\"key\": non-null Utf8, \"value\": Utf8),
unsorted), \"rules\": List(Struct(\"variant\": Utf8, \"language\": Utf8,
\"perspectives\": Struct(\"mode\": Utf8, \"countries\": List(Utf8, field:
'element')), \"value\": Utf8, \"between\": List(Float64, field: 'element'),
\"side\": Utf8), field: 'element'))>\n",
+ " is_land: boolean<Boolean>\n",
+ " is_territorial: boolean<Boolean>\n",
+ " region: utf8<Utf8View>\n",
+ " admin_level: int32<Int32>\n",
+ " division_id: utf8<Utf8View>"
]
},
- "execution_count": 7,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "# check out the schema of the buildings table to see what it contains\n",
- "df.schema"
+ "sd.view(\"divisions\").schema"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "925a4b10",
+ "metadata": {},
+ "source": [
+ "Overture data makes heavy use of nested types. These can be indexed into
or expanded using SQL:"
]
},
{
"cell_type": "code",
- "execution_count": 8,
- "id": "68d1b68c-dd26-45c2-944f-61138b212943",
+ "execution_count": 5,
+ "id": "85710387",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
-
"┌─────────────────────────┬────────────────────┬────────────┬────────────┬─────────────────────────┐\n",
- "│ id ┆ height ┆ num_floors ┆
roof_shape ┆ centroid │\n",
- "│ utf8 ┆ float64 ┆ int32 ┆ utf8
┆ geometry │\n",
-
"╞═════════════════════════╪════════════════════╪════════════╪════════════╪═════════════════════════╡\n",
- "│ aa8e3a73-c72c-4f1a-b6e… ┆ 20.38205909729004 ┆ ┆
┆ POINT(-74.187673580307… │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ efe7616b-7f7e-464c-9ce… ┆ 26.18361473083496 ┆ ┆
┆ POINT(-74.189040982134… │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ b3f734a1-325b-4e8c-b1d… ┆ 27.025876998901367 ┆ ┆
┆ POINT(-74.2558161 40.8… │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 45d88655-e2f4-4a08-926… ┆ 25.485210418701172 ┆ ┆
┆ POINT(-74.182252194444… │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 31e8353c-7d5b-4b20-94e… ┆ 21.294815063476562 ┆ ┆
┆ POINT(-74.197113787905… │\n",
-
"└─────────────────────────┴────────────────────┴────────────┴────────────┴─────────────────────────┘\n"
+
"┌────────────────────────────────────┬─────────────────────────────────────────────────────────────┐\n",
+ "│ name ┆
geometry │\n",
+ "│ utf8 ┆
geometry │\n",
+
"╞════════════════════════════════════╪═════════════════════════════════════════════════════════════╡\n",
+ "│ Sable Island National Park Reserve ┆ POLYGON((-60.178333
43.9824655,-60.1785682 43.9825425,-60.… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Sable Island ┆ POLYGON((-59.7744732
44.2254616,-59.7928902 44.2173253,-59… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Halifax Regional Municipality ┆ MULTIPOLYGON(((-59.7321078
44.2390248,-59.7502166 44.23385… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ West Liscomb ┆ POLYGON((-62.0615594
45.0023306,-62.0621839 45.0024475,-62… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Marie Joseph ┆ POLYGON((-61.9911914
44.95646,-61.9912383 44.9579526,-61.9… │\n",
+
"└────────────────────────────────────┴─────────────────────────────────────────────────────────────┘\n"
]
}
],
"source": [
- "# find all the buildings in New York City that are taller than 20
meters\n",
- "nyc_bbox_wkt = (\n",
- " \"POLYGON((-74.2591 40.4774, -74.2591 40.9176, -73.7004 40.9176,
\"\n",
- " \"-73.7004 40.4774, -74.2591 40.4774))\"\n",
- ")\n",
- "sd.sql(f\"\"\"\n",
- "SELECT\n",
- " id,\n",
- " height,\n",
- " num_floors,\n",
- " roof_shape,\n",
- " ST_Centroid(geometry) as centroid\n",
- "FROM\n",
- " buildings\n",
- "WHERE\n",
- " is_underground = FALSE\n",
- " AND height IS NOT NULL\n",
- " AND height > 20\n",
- " AND ST_Intersects(\n",
- " geometry,\n",
- " ST_GeomFromText('{nyc_bbox_wkt}', 4326)\n",
- " )\n",
- "LIMIT 5;\n",
- "\"\"\").show()"
+ "sd.sql(\n",
+ " \"SELECT names.primary AS name, geometry FROM divisions WHERE region
= 'CA-NS'\"\n",
+ ").show(5)"
]
},
{
"cell_type": "markdown",
- "id": "e07fcdc1-962b-4dce-90cb-bf715432e299",
+ "id": "3141209b",
"metadata": {},
"source": [
- "## Overture divisions table"
+ "Like all remote tables, it is worth resolving a query into a concrete
local table to avoid fetching unnecessary data on repeated queries. The
`.to_memtable()` method can be used to resolve a remote table into memory
(great for small results); `.to_parquet()` can be used to resolve a remote
table to disk (great for medium to large results)."
]
},
{
"cell_type": "code",
- "execution_count": 9,
- "id": "d9f122d3-4d90-46b0-ab9a-259a71cc423b",
- "metadata": {},
- "outputs": [],
- "source": [
- "df = sd.read_parquet(\n",
- "
\"s3://overturemaps-us-west-2/release/2025-11-19.0/theme=divisions/type=division_area/\"\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "id": "19a75b19-0b56-4167-b3f1-73a171ecc480",
+ "execution_count": 6,
+ "id": "25aae0de",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
-
"┌─────────────────┬────────────────┬────────────────┬───┬────────────────┬────────┬────────────────┐\n",
- "│ id ┆ geometry ┆ bbox ┆ … ┆
is_territorial ┆ region ┆ division_id │\n",
- "│ utf8 ┆ geometry ┆ struct ┆ ┆ boolean
┆ utf8 ┆ utf8 │\n",
-
"╞═════════════════╪════════════════╪════════════════╪═══╪════════════════╪════════╪════════════════╡\n",
- "│ 3665c36d-d3a9-… ┆ POLYGON((12.5… ┆ {xmin: 12.455… ┆ … ┆ true
┆ IT-34 ┆ f05aa29f-151f… │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 18a69439-a1da-… ┆ POLYGON((12.5… ┆ {xmin: 12.596… ┆ … ┆ true
┆ IT-36 ┆ ae00d58c-6e67… │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 7d0f6d37-bb55-… ┆ POLYGON((12.6… ┆ {xmin: 12.567… ┆ … ┆ true
┆ IT-36 ┆ bdfc82ca-5f23… │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 3f480ff6-6361-… ┆ POLYGON((12.5… ┆ {xmin: 12.549… ┆ … ┆ true
┆ IT-36 ┆ 1c750104-4470… │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 31c3ab5e-eb6f-… ┆ POLYGON((12.6… ┆ {xmin: 12.612… ┆ … ┆ true
┆ IT-34 ┆ d90804ee-19a4… │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 308517e6-64b4-… ┆ POLYGON((12.5… ┆ {xmin: 12.589… ┆ … ┆ true
┆ IT-34 ┆ aabd71e9-4d98… │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 646e5b1f-b76a-… ┆ POLYGON((12.5… ┆ {xmin: 12.485… ┆ … ┆ true
┆ IT-34 ┆ 502c1c4e-fc19… │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ f2809a49-1082-… ┆ POLYGON((12.5… ┆ {xmin: 12.538… ┆ … ┆ true
┆ IT-34 ┆ 8b446eed-00ad… │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 72b27245-c7fd-… ┆ POLYGON((12.5… ┆ {xmin: 12.501… ┆ … ┆ true
┆ IT-34 ┆ 1d535e1f-d19e… │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ 815855d9-05d0-… ┆ POLYGON((12.4… ┆ {xmin: 12.371… ┆ … ┆ true
┆ IT-34 ┆ 5aa91354-9e8c… │\n",
-
"└─────────────────┴────────────────┴────────────────┴───┴────────────────┴────────┴────────────────┘\n"
+
"┌────────────────────────────────────┬─────────────────────────────────────────────────────────────┐\n",
+ "│ name ┆
geometry │\n",
+ "│ utf8 ┆
geometry │\n",
+
"╞════════════════════════════════════╪═════════════════════════════════════════════════════════════╡\n",
+ "│ Sable Island National Park Reserve ┆ POLYGON((-60.178333
43.9824655,-60.1785682 43.9825425,-60.… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Sable Island ┆ POLYGON((-59.7744732
44.2254616,-59.7928902 44.2173253,-59… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Halifax Regional Municipality ┆ MULTIPOLYGON(((-59.7321078
44.2390248,-59.7502166 44.23385… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ West Liscomb ┆ POLYGON((-62.0615594
45.0023306,-62.0621839 45.0024475,-62… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Marie Joseph ┆ POLYGON((-61.9911914
44.95646,-61.9912383 44.9579526,-61.9… │\n",
+
"└────────────────────────────────────┴─────────────────────────────────────────────────────────────┘\n"
]
}
],
"source": [
- "# inspect a few rows of the data\n",
- "df.show(10)"
+ "sd.sql(\n",
+ " \"SELECT names.primary AS name, geometry FROM divisions WHERE region
= 'CA-NS'\"\n",
+ ").to_memtable().to_view(\"divisions_ns\")\n",
+ "\n",
+ "sd.view(\"divisions_ns\").show(5)"
]
},
{
- "cell_type": "code",
- "execution_count": 11,
- "id": "03b951de-3397-4fcf-9baf-50e139a38dd4",
+ "cell_type": "markdown",
+ "id": "2828dfc0",
"metadata": {},
- "outputs": [],
"source": [
- "df.to_view(\"division_area\")"
+ "Importantly, Overture data is distributed using GeoParquet 1.1, for which
SedonaDB has built in support! This means that spatial queries (e.g.,
`ST_Intersects()`) tend to execute quickly against overture. In this case, the
spatial query for Nova Scotia is ~5x faster than the text-based region query."
]
},
{
"cell_type": "code",
- "execution_count": 12,
- "id": "9c6bd69d-9407-432a-bdc8-d60976237a3a",
+ "execution_count": 7,
+ "id": "f43824dc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "┌──────────┐\n",
- "│ count(*) │\n",
- "│ int64 │\n",
- "╞══════════╡\n",
- "│ 1052542 │\n",
- "└──────────┘\n"
+
"┌───────────────────┬──────────────────────────────────────────────────────────────────────────────┐\n",
+ "│ name ┆ geometry
│\n",
+ "│ utf8 ┆ geometry
│\n",
+
"╞═══════════════════╪══════════════════════════════════════════════════════════════════════════════╡\n",
+ "│ Maces Bay ┆ POLYGON((-66.4491254 45.1265729,-66.4577261
45.126933,-66.4591563 45.126991… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Gooseberry Island ┆ POLYGON((-66.2598821 45.1380421,-66.2599962
45.1381233,-66.2600591 45.13828… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Musquash Parish ┆ POLYGON((-66.4595418 45.2215004,-66.4595406
45.221468,-66.4595396 45.221391… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Dipper Harbour ┆ POLYGON((-66.3755086 45.118812,-66.4089711
45.1488327,-66.4284252 45.138119… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Chance Harbour ┆ POLYGON((-66.4089711 45.1488327,-66.3755086
45.118812,-66.3541725 45.105991… │\n",
+
"└───────────────────┴──────────────────────────────────────────────────────────────────────────────┘\n"
]
}
],
"source": [
- "sd.sql(\"\"\"\n",
- "SELECT\n",
- " COUNT(*)\n",
- "FROM division_area\n",
- "\"\"\").show()"
+ "import shapely\n",
+ "\n",
+ "ns_bbox_wkb = shapely.box(-66.5, 43.4, -59.8, 47.1).wkb\n",
+ "\n",
+ "sd.sql(\n",
+ " \"\"\"\n",
+ " SELECT names.primary AS name, geometry\n",
+ " FROM divisions\n",
+ " WHERE ST_Contains(ST_GeomFromWKB($wkb, 4326), geometry)\n",
+ " \"\"\",\n",
+ " params={\"wkb\": ns_bbox_wkb},\n",
+ ").to_memtable().to_view(\"divisions_ns\", overwrite=True)\n",
+ "\n",
+ "sd.view(\"divisions_ns\").show(5)"
]
},
{
- "cell_type": "code",
- "execution_count": 13,
- "id": "75a6d0ed-9767-4d36-a77a-4afb7952fbe4",
+ "cell_type": "markdown",
+ "id": "4f44adfb-2973-4a65-b4f2-d24b28700b79",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "SedonaSchema with 13 fields:\n",
- " id: utf8<Utf8View>\n",
- " geometry: geometry<WkbView(ogc:crs84)>\n",
- " bbox: struct<Struct(xmin Float32, xmax Float32, ymin Float32, ymax
Float32)>\n",
- " country: utf8<Utf8View>\n",
- " version: int32<Int32>\n",
- " sources: list<List(Field { name: \"element\", data_type:
Struct([Field { name: \"property\", data_type: Utf8, nullable: true, dict_id:
0, dict_is_ordered: false, metadata: {} }, Field { name: \"dataset\",
data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata:
{} }, Field { name: \"license\", data_type: Utf8, nullable: true, dict_id: 0,
dict_is_ordered: false, metadata: {} }, Field { name: \"record_id\", data_type:
Utf8, nullable: true, dict_id: 0, dict_i [...]
- " subtype: utf8<Utf8View>\n",
- " class: utf8<Utf8View>\n",
- " names: struct<Struct(primary Utf8, common Map(Field { name:
\"key_value\", data_type: Struct([Field { name: \"key\", data_type: Utf8,
nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field {
name: \"value\", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered:
false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false,
metadata: {} }, false), rules List(Field { name: \"element\", data_type:
Struct([Field { name: \"variant\", data_t [...]
- " is_land: boolean<Boolean>\n",
- " is_territorial: boolean<Boolean>\n",
- " region: utf8<Utf8View>\n",
- " division_id: utf8<Utf8View>"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
"source": [
- "df.schema"
+ "## Overture buildings table\n",
+ "\n",
+ "The [Overture buildings
table](https://docs.overturemaps.org/guides/buildings/) is one of the largest
tables provided by the Overture Maps Foundation. The workflow is similar to the
division table or any remote table; however, the buildings table presents
several unique challenges.\n",
+ "\n",
+ "First, the metadata size for all files in the buildings table is very
large. SedonaDB caches remote metadata to avoid repeated download; however, the
default cache size is too small. For repeated queries against the buildings
table, ensure that the cache size is increased to at least 900 MB and/or
`.to_view()` is used to cache the schema. The cache lives as long as the
session...use `sd = sedona.db.connect()` or reset the cache size to a smaller
value to clear the cache.\n",
+ "\n",
+ "> Overture removes old releases. See [this
page](https://docs.overturemaps.org/release-calendar/#current-release) to see
the latest version number and replace the relevant portion of the URL below."
]
},
{
"cell_type": "code",
- "execution_count": 14,
- "id": "f1f7158c-ef2b-4377-9bee-180309ddd553",
+ "execution_count": 8,
+ "id": "52855769-4872-472a-9c42-afced3d85ca8",
"metadata": {},
"outputs": [],
"source": [
- "# get all the divisions in Nova Scotia and save them in memory with
to_memtable()\n",
- "nova_scotia_bbox_wkt = (\n",
- " \"POLYGON((-66.5 43.4, -66.5 47.1, -59.8 47.1, -59.8 43.4, -66.5
43.4))\"\n",
- ")\n",
- "ns = sd.sql(f\"\"\"\n",
- "SELECT\n",
- " country, region, names, geometry\n",
- "FROM division_area\n",
- "WHERE\n",
- " ST_Intersects(\n",
- " geometry,\n",
- " ST_GeomFromText('{nova_scotia_bbox_wkt}', 4326)\n",
- " )\n",
- "\"\"\").to_memtable()"
+ "sd.sql(\"SET datafusion.runtime.metadata_cache_limit =
'900M'\").execute()\n",
+ "\n",
+ "sd.read_parquet(\n",
+ "
\"s3://overturemaps-us-west-2/release/2026-02-18.0/theme=buildings/type=building/\",\n",
+ " options={\"aws.skip_signature\": True, \"aws.region\":
\"us-west-2\"},\n",
+ ").to_view(\"buildings\")"
]
},
{
- "cell_type": "code",
- "execution_count": 15,
- "id": "27e6909d-06fa-438b-88e0-d300fd2fb1ec",
+ "cell_type": "markdown",
+ "id": "fb89b55f",
"metadata": {},
- "outputs": [],
"source": [
- "ns.to_view(\"ns_divisions\")"
+ "Like all SedonaDB DataFrames, viewing a schema or previewing the first
few rows are lazy and usually fast unless a query contains large aggregations
or joins."
]
},
{
"cell_type": "code",
- "execution_count": 16,
- "id": "2dec92d8-a374-4021-990a-e50f5769516e",
+ "execution_count": 9,
+ "id": "b45b5e5c-64ed-49ba-a8aa-9f2292f617c6",
"metadata": {},
- "outputs": [],
- "source": [
- "df = sd.sql(\"\"\"\n",
- "SELECT UNNEST(names), geometry\n",
- "FROM ns_divisions\n",
- "WHERE region = 'CA-NS'\n",
- "\"\"\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "id": "7f39a283-0eee-4f72-a30a-8dd9fa1aaa69",
- "metadata": {
- "scrolled": true
- },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
-
"┌────────────────────────┬────────────────────────┬────────────────────────┬───────────────────────┐\n",
- "│ __unnest_placeholder(n ┆ __unnest_placeholder(n ┆
__unnest_placeholder(n ┆ geometry │\n",
- "│ s_divisions.names).pr… ┆ s_divisions.names).co… ┆
s_divisions.names).ru… ┆ geometry │\n",
-
"╞════════════════════════╪════════════════════════╪════════════════════════╪═══════════════════════╡\n",
- "│ Apple River ┆ ┆
┆ POLYGON((-64.7260681… │\n",
-
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
- "│ Allen Hill ┆ ┆
┆ POLYGON((-64.6956656… │\n",
-
"└────────────────────────┴────────────────────────┴────────────────────────┴───────────────────────┘\n",
- "CPU times: user 1.25 ms, sys: 805 μs, total: 2.05 ms\n",
- "Wall time: 1.42 ms\n"
+
"┌──────────────────────────────────────┬─────────────────────────────────────────┬───┬─────────────┐\n",
+ "│ id ┆ geometry
┆ … ┆ roof_height │\n",
+ "│ utf8 ┆ geometry
┆ ┆ float64 │\n",
+
"╞══════════════════════════════════════╪═════════════════════════════════════════╪═══╪═════════════╡\n",
+ "│ ab23f7ee-4c05-4246-a016-8260ce58a916 ┆ POLYGON((-67.589523
-39.0908362,-67.58… ┆ … ┆ │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ 58356258-2e80-48fc-aacf-d81fcf74074c ┆ POLYGON((-67.5896327
-39.0907868,-67.5… ┆ … ┆ │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ b50595a8-cddb-44dd-bdbf-7bbe1e858ae0 ┆ POLYGON((-67.5897117
-39.0908483,-67.5… ┆ … ┆ │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ cbabe2df-f49a-4e9f-9cbe-c527a4b3b9f1 ┆ POLYGON((-67.5898768
-39.0907073,-67.5… ┆ … ┆ │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ bcd6984b-8da4-4dfe-9212-be2b02a24b67 ┆ POLYGON((-67.5901879
-39.0908288,-67.5… ┆ … ┆ │\n",
+
"└──────────────────────────────────────┴─────────────────────────────────────────┴───┴─────────────┘\n"
]
}
],
"source": [
- "%%time\n",
- "# this executes quickly because the Nova Scotia data was persisted in
memory with `to_memtable()`\n",
- "df.show(2)"
+ "sd.view(\"buildings\").show(5)"
]
},
{
"cell_type": "markdown",
- "id": "fc1d2023-c83a-4010-808b-212161b1b577",
+ "id": "caaa29c4",
"metadata": {},
"source": [
- "## Visualize the results with lonboard"
+ "Some operations like `.count()` use summary statistics and execute
quickly even for large remote tables:"
]
},
{
"cell_type": "code",
- "execution_count": 18,
- "id": "f78583fd-a73a-4169-9c45-74d8026bb5fb",
+ "execution_count": 10,
+ "id": "e37a023e-0e80-403a-a65b-b5a190004f72",
"metadata": {},
"outputs": [
{
"data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "02a229b2c09f4acf8ae4daedcb8af8ae",
- "version_major": 2,
- "version_minor": 1
- },
"text/plain": [
- "Map(basemap_style=<CartoBasemap.DarkMatter:
'https://basemaps.cartocdn.com/gl/dark-matter-gl-style/style.json'…"
+ "2541282557"
]
},
- "execution_count": 18,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "import lonboard\n",
- "\n",
- "lonboard.viz(df)"
+ "sd.view(\"buildings\").count()"
]
},
{
"cell_type": "markdown",
- "id": "0f668f51-462e-4e18-b774-c047140b224a",
+ "id": "ea52bc84",
"metadata": {},
"source": [
- ""
+ "Overture buildings has a number of attributes on which we can filter. For
long-running queries it may be convenient to cache a result locally using
`.to_memtable()` or `.to_parquet()` before inspecting using other tools;
however like all Overture tables it is optimized for spatial queries and these
are usually not expensive for small areas.\n",
+ "\n",
+ "For example, we can find all of the buildings in New York City taller
than 20 meters:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "68d1b68c-dd26-45c2-944f-61138b212943",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+
"┌─────────────────────────┬────────────────────┬────────────┬────────────┬─────────────────────────┐\n",
+ "│ id ┆ height ┆ num_floors ┆
roof_shape ┆ centroid │\n",
+ "│ utf8 ┆ float64 ┆ int32 ┆ utf8
┆ geometry │\n",
+
"╞═════════════════════════╪════════════════════╪════════════╪════════════╪═════════════════════════╡\n",
+ "│ aa8e3a73-c72c-4f1a-b6e… ┆ 20.38205909729004 ┆ ┆
┆ POINT(-74.187673580307… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ efe7616b-7f7e-464c-9ce… ┆ 26.18361473083496 ┆ ┆
┆ POINT(-74.189040982134… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ b3f734a1-325b-4e8c-b1d… ┆ 27.025876998901367 ┆ ┆
┆ POINT(-74.2558161 40.8… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ 45d88655-e2f4-4a08-926… ┆ 25.485210418701172 ┆ ┆
┆ POINT(-74.182252194444… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ 31e8353c-7d5b-4b20-94e… ┆ 21.294815063476562 ┆ ┆
┆ POINT(-74.197113787905… │\n",
+
"└─────────────────────────┴────────────────────┴────────────┴────────────┴─────────────────────────┘\n"
+ ]
+ }
+ ],
+ "source": [
+ "nyc_bbox_wkt = (\n",
+ " \"POLYGON((-74.2591 40.4774, -74.2591 40.9176, -73.7004 40.9176,
\"\n",
+ " \"-73.7004 40.4774, -74.2591 40.4774))\"\n",
+ ")\n",
+ "\n",
+ "sd.sql(\n",
+ " \"\"\"\n",
+ " SELECT\n",
+ " id,\n",
+ " height,\n",
+ " num_floors,\n",
+ " roof_shape,\n",
+ " ST_Centroid(geometry) as centroid\n",
+ " FROM\n",
+ " buildings\n",
+ " WHERE\n",
+ " is_underground = FALSE\n",
+ " AND height IS NOT NULL\n",
+ " AND height > 20\n",
+ " AND ST_Intersects(\n",
+ " geometry,\n",
+ " ST_GeomFromText($1, 4326)\n",
+ " )\n",
+ " LIMIT 5;\n",
+ " \"\"\",\n",
+ " params=(nyc_bbox_wkt,),\n",
+ ").to_memtable().to_view(\"buildings_nyc\")\n",
+ "\n",
+ "sd.view(\"buildings_nyc\").show(5)"
]
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": ".venv",
"language": "python",
"name": "python3"
},
@@ -580,7 +475,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.12.4"
+ "version": "3.13.8"
}
},
"nbformat": 4,
diff --git a/docs/overture-examples.md b/docs/overture-examples.md
index 0762b3b8..ad5ad8e0 100644
--- a/docs/overture-examples.md
+++ b/docs/overture-examples.md
@@ -21,7 +21,7 @@
> Note: Before running this notebook, ensure that you have installed SedonaDB:
> `pip install "apache-sedona[db]"`
-This notebook demonstrates how to query and analyze the [Overture
Maps](https://overturemaps.org/) dataset using SedonaDB. See [this
page](https://docs.overturemaps.org/release-calendar/) to get the latest
version of the Overture data.
+This notebook demonstrates how to query and analyze the [Overture
Maps](https://overturemaps.org/) dataset using SedonaDB.
The notebook explains how to:
@@ -30,353 +30,268 @@ The notebook explains how to:
* Optimize subsequent query performance by caching a subset of data in memory.
-```python
-%pip install lonboard
-```
-
- Requirement already satisfied: lonboard in
/opt/miniconda3/lib/python3.12/site-packages (0.12.1)
- Requirement already satisfied: anywidget~=0.9.0 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (0.9.18)
- Requirement already satisfied: arro3-compute>=0.4.1 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (0.6.3)
- Requirement already satisfied: arro3-core>=0.4.1 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (0.6.3)
- Requirement already satisfied: arro3-io>=0.4.1 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (0.6.3)
- Requirement already satisfied: geoarrow-rust-core>=0.5.2 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (0.5.2)
- Requirement already satisfied: ipywidgets>=7.6.0 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (8.1.7)
- Requirement already satisfied: numpy>=1.14 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (2.3.3)
- Requirement already satisfied: pyproj>=3.3 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (3.7.2)
- Requirement already satisfied: traitlets>=5.7.1 in
/opt/miniconda3/lib/python3.12/site-packages (from lonboard) (5.14.3)
- Requirement already satisfied: psygnal>=0.8.1 in
/opt/miniconda3/lib/python3.12/site-packages (from anywidget~=0.9.0->lonboard)
(0.14.1)
- Requirement already satisfied: typing-extensions>=4.2.0 in
/opt/miniconda3/lib/python3.12/site-packages (from anywidget~=0.9.0->lonboard)
(4.15.0)
- Requirement already satisfied: comm>=0.1.3 in
/opt/miniconda3/lib/python3.12/site-packages (from ipywidgets>=7.6.0->lonboard)
(0.2.3)
- Requirement already satisfied: ipython>=6.1.0 in
/opt/miniconda3/lib/python3.12/site-packages (from ipywidgets>=7.6.0->lonboard)
(9.5.0)
- Requirement already satisfied: widgetsnbextension~=4.0.14 in
/opt/miniconda3/lib/python3.12/site-packages (from ipywidgets>=7.6.0->lonboard)
(4.0.14)
- Requirement already satisfied: jupyterlab_widgets~=3.0.15 in
/opt/miniconda3/lib/python3.12/site-packages (from ipywidgets>=7.6.0->lonboard)
(3.0.15)
- Requirement already satisfied: certifi in
/opt/miniconda3/lib/python3.12/site-packages (from pyproj>=3.3->lonboard)
(2025.8.3)
- Requirement already satisfied: decorator in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (5.2.1)
- Requirement already satisfied: ipython-pygments-lexers in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (1.1.1)
- Requirement already satisfied: jedi>=0.16 in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (0.19.2)
- Requirement already satisfied: matplotlib-inline in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (0.1.7)
- Requirement already satisfied: pexpect>4.3 in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (4.9.0)
- Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (3.0.52)
- Requirement already satisfied: pygments>=2.4.0 in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (2.19.2)
- Requirement already satisfied: stack_data in
/opt/miniconda3/lib/python3.12/site-packages (from
ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (0.6.3)
- Requirement already satisfied: parso<0.9.0,>=0.8.4 in
/opt/miniconda3/lib/python3.12/site-packages (from
jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (0.8.5)
- Requirement already satisfied: ptyprocess>=0.5 in
/opt/miniconda3/lib/python3.12/site-packages (from
pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (0.7.0)
- Requirement already satisfied: wcwidth in
/opt/miniconda3/lib/python3.12/site-packages (from
prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets>=7.6.0->lonboard)
(0.2.14)
- Requirement already satisfied: executing>=1.2.0 in
/opt/miniconda3/lib/python3.12/site-packages (from
stack_data->ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (2.2.1)
- Requirement already satisfied: asttokens>=2.1.0 in
/opt/miniconda3/lib/python3.12/site-packages (from
stack_data->ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (3.0.0)
- Requirement already satisfied: pure-eval in
/opt/miniconda3/lib/python3.12/site-packages (from
stack_data->ipython>=6.1.0->ipywidgets>=7.6.0->lonboard) (0.2.3)
- Note: you may need to restart the kernel to use updated packages.
-
-
-
```python
import sedona.db
-import os
-
-os.environ["AWS_SKIP_SIGNATURE"] = "true"
-os.environ["AWS_DEFAULT_REGION"] = "us-west-2"
sd = sedona.db.connect()
```
-## Overture buildings table
+## Overture divisions
+Let's load a table! Like any local or remote collection of Parquet files, we
can use `sd.read_parquet()`. This is a lazy operation, fetching only metadata
required to calculate a table schema. To reduce the number of times this needs
to happen (and make the resulting DataFrame easier to reference in SQL), we use
`.to_view()`.
-```python
-df = sd.read_parquet(
-
"s3://overturemaps-us-west-2/release/2025-11-19.0/theme=buildings/type=building/"
-)
-```
+> Overture removes old releases. See [this
page](https://docs.overturemaps.org/release-calendar/#current-release) to see
the latest version number and replace the relevant portion of the URL below.
```python
-df.limit(10).show()
+sd.read_parquet(
+
"s3://overturemaps-us-west-2/release/2026-02-18.0/theme=divisions/type=division_area/",
+ options={"aws.skip_signature": True, "aws.region": "us-west-2"},
+).to_view("divisions")
```
-
┌──────────────────────────────────────┬─────────────────────────────────────────┬───┬─────────────┐
- │ id ┆ geometry
┆ … ┆ roof_height │
- │ utf8 ┆ geometry
┆ ┆ float64 │
-
╞══════════════════════════════════════╪═════════════════════════════════════════╪═══╪═════════════╡
- │ 85b47da4-1b8d-4132-ac6c-d8dc14fab4b8 ┆ POLYGON((-6.4292972
54.8290034,-6.4291… ┆ … ┆ │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ ec12e345-d44d-4e40-8e08-e1e6e68d4d17 ┆ POLYGON((-6.430836
54.8299412,-6.43095… ┆ … ┆ │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 285f9ff9-2d6d-409c-b214-74992c8d7e7d ┆ POLYGON((-6.4311579
54.8300247,-6.4313… ┆ … ┆ │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ abedfc7c-e5fd-4a29-931e-da77b610d02d ┆ POLYGON((-6.4321833
54.8294427,-6.4322… ┆ … ┆ │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ a203a2c6-e130-4979-a7d5-8a059c6f31fd ┆ POLYGON((-6.4300627
54.829276,-6.43006… ┆ … ┆ │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 1d14caf6-b12d-486e-87dd-feef82fba9a7 ┆ POLYGON((-6.4301786
54.8281533,-6.4299… ┆ … ┆ │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 4b1e67cf-7355-439b-9a31-46a50f3ee227 ┆ POLYGON((-6.4298614
54.8278977,-6.4299… ┆ … ┆ │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 06de994e-efd4-4a1c-8a20-b4e883904cb2 ┆ POLYGON((-6.4296383
54.827599,-6.42956… ┆ … ┆ │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ ea0b2ea6-7c52-4395-9baa-bc023c7d3166 ┆ POLYGON((-6.4296844
54.8277379,-6.4296… ┆ … ┆ │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 49f022ef-5574-4613-ae54-af139666fde3 ┆ POLYGON((-6.4296843
54.8278169,-6.4296… ┆ … ┆ │
-
└──────────────────────────────────────┴─────────────────────────────────────────┴───┴─────────────┘
-
+We can preview the first few rows using `.show()`. Because this is a lazy
operation and we've already cached the schema using `.to_view()`, this only
takes a few seconds.
```python
-df.to_view("buildings")
+sd.view("divisions").show(5)
```
+
┌───────────────┬───────────────┬──────────────┬─────────┬───┬────────┬─────────────┬──────────────┐
+ │ id ┆ geometry ┆ bbox ┆ country ┆ … ┆ region ┆
admin_level ┆ division_id │
+ │ utf8 ┆ geometry ┆ struct ┆ utf8 ┆ ┆ utf8 ┆
int32 ┆ utf8 │
+
╞═══════════════╪═══════════════╪══════════════╪═════════╪═══╪════════╪═════════════╪══════════════╡
+ │ a5c573c4-022… ┆ POLYGON((-49… ┆ {xmin: -49.… ┆ BR ┆ … ┆ BR-PR ┆
┆ 388a8056-ee… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ cf523f8c-c26… ┆ POLYGON((-49… ┆ {xmin: -49.… ┆ BR ┆ … ┆ BR-PR ┆
┆ 068ef37e-3b… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ 8ace3d06-b8a… ┆ POLYGON((-49… ┆ {xmin: -49.… ┆ BR ┆ … ┆ BR-PR ┆
┆ 7238aeb3-b8… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ b26d2cba-b54… ┆ POLYGON((-49… ┆ {xmin: -49.… ┆ BR ┆ … ┆ BR-PR ┆
┆ 3c2dc8fc-79… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ 20103725-17c… ┆ POLYGON((-49… ┆ {xmin: -49.… ┆ BR ┆ … ┆ BR-PR ┆
┆ 45037e82-de… │
+
└───────────────┴───────────────┴──────────────┴─────────┴───┴────────┴─────────────┴──────────────┘
-```python
-# the buildings table is large and contains billions of rows
-sd.sql("""
-SELECT
- COUNT(*)
-FROM
- buildings
-""").show()
-```
-
- ┌────────────┐
- │ count(*) │
- │ int64 │
- ╞════════════╡
- │ 2541497985 │
- └────────────┘
+The default view of the data hides some columns to ensure the entire output
can be shown. To look at all the columns with type details, use `.schema`:
```python
-# check out the schema of the buildings table to see what it contains
-df.schema
+sd.view("divisions").schema
```
- SedonaSchema with 24 fields:
+ SedonaSchema with 14 fields:
id: utf8<Utf8View>
geometry: geometry<WkbView(ogc:crs84)>
- bbox: struct<Struct(xmin Float32, xmax Float32, ymin Float32, ymax
Float32)>
+ bbox: struct<Struct("xmin": Float32, "xmax": Float32, "ymin": Float32,
"ymax": Float32)>
+ country: utf8<Utf8View>
version: int32<Int32>
- sources: list<List(Field { name: "element", data_type: Struct([Field {
name: "property", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered:
false, metadata: {} }, Field { name: "dataset", data_type: Utf8, nullable:
true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name:
"license", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: {} }, Field { name: "record_id", data_type: Utf8, nullable: true,
dict_id: 0, dict_is_ordered: fal [...]
- level: int32<Int32>
+ sources: list<List(Struct("property": Utf8, "dataset": Utf8, "license":
Utf8, "record_id": Utf8, "update_time": Utf8, "confidence": Float64, "between":
List(Float64, field: 'element')), field: 'element')>
subtype: utf8<Utf8View>
class: utf8<Utf8View>
- height: float64<Float64>
- names: struct<Struct(primary Utf8, common Map(Field { name: "key_value",
data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false,
dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value",
data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata:
{} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} },
false), rules List(Field { name: "element", data_type: Struct([Field { name:
"variant", data_type: Utf8, nul [...]
- has_parts: boolean<Boolean>
- is_underground: boolean<Boolean>
- num_floors: int32<Int32>
- num_floors_underground: int32<Int32>
- min_height: float64<Float64>
- min_floor: int32<Int32>
- facade_color: utf8<Utf8View>
- facade_material: utf8<Utf8View>
- roof_material: utf8<Utf8View>
- roof_shape: utf8<Utf8View>
- roof_direction: float64<Float64>
- roof_orientation: utf8<Utf8View>
- roof_color: utf8<Utf8View>
- roof_height: float64<Float64>
+ names: struct<Struct("primary": Utf8, "common": Map("key_value":
non-null Struct("key": non-null Utf8, "value": Utf8), unsorted), "rules":
List(Struct("variant": Utf8, "language": Utf8, "perspectives": Struct("mode":
Utf8, "countries": List(Utf8, field: 'element')), "value": Utf8, "between":
List(Float64, field: 'element'), "side": Utf8), field: 'element'))>
+ is_land: boolean<Boolean>
+ is_territorial: boolean<Boolean>
+ region: utf8<Utf8View>
+ admin_level: int32<Int32>
+ division_id: utf8<Utf8View>
+
+Overture data makes heavy use of nested types. These can be indexed into or
expanded using SQL:
```python
-# find all the buildings in New York City that are taller than 20 meters
-nyc_bbox_wkt = (
- "POLYGON((-74.2591 40.4774, -74.2591 40.9176, -73.7004 40.9176, "
- "-73.7004 40.4774, -74.2591 40.4774))"
-)
-sd.sql(f"""
-SELECT
- id,
- height,
- num_floors,
- roof_shape,
- ST_Centroid(geometry) as centroid
-FROM
- buildings
-WHERE
- is_underground = FALSE
- AND height IS NOT NULL
- AND height > 20
- AND ST_Intersects(
- geometry,
- ST_GeomFromText('{nyc_bbox_wkt}', 4326)
- )
-LIMIT 5;
-""").show()
+sd.sql(
+ "SELECT names.primary AS name, geometry FROM divisions WHERE region =
'CA-NS'"
+).show(5)
```
-
┌─────────────────────────┬────────────────────┬────────────┬────────────┬─────────────────────────┐
- │ id ┆ height ┆ num_floors ┆ roof_shape ┆
centroid │
- │ utf8 ┆ float64 ┆ int32 ┆ utf8 ┆
geometry │
-
╞═════════════════════════╪════════════════════╪════════════╪════════════╪═════════════════════════╡
- │ aa8e3a73-c72c-4f1a-b6e… ┆ 20.38205909729004 ┆ ┆ ┆
POINT(-74.187673580307… │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ efe7616b-7f7e-464c-9ce… ┆ 26.18361473083496 ┆ ┆ ┆
POINT(-74.189040982134… │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ b3f734a1-325b-4e8c-b1d… ┆ 27.025876998901367 ┆ ┆ ┆
POINT(-74.2558161 40.8… │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 45d88655-e2f4-4a08-926… ┆ 25.485210418701172 ┆ ┆ ┆
POINT(-74.182252194444… │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 31e8353c-7d5b-4b20-94e… ┆ 21.294815063476562 ┆ ┆ ┆
POINT(-74.197113787905… │
-
└─────────────────────────┴────────────────────┴────────────┴────────────┴─────────────────────────┘
+
┌────────────────────────────────────┬─────────────────────────────────────────────────────────────┐
+ │ name ┆ geometry
│
+ │ utf8 ┆ geometry
│
+
╞════════════════════════════════════╪═════════════════════════════════════════════════════════════╡
+ │ Sable Island National Park Reserve ┆ POLYGON((-60.178333
43.9824655,-60.1785682 43.9825425,-60.… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Sable Island ┆ POLYGON((-59.7744732
44.2254616,-59.7928902 44.2173253,-59… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Halifax Regional Municipality ┆ MULTIPOLYGON(((-59.7321078
44.2390248,-59.7502166 44.23385… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ West Liscomb ┆ POLYGON((-62.0615594
45.0023306,-62.0621839 45.0024475,-62… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Marie Joseph ┆ POLYGON((-61.9911914
44.95646,-61.9912383 44.9579526,-61.9… │
+
└────────────────────────────────────┴─────────────────────────────────────────────────────────────┘
-## Overture divisions table
+Like all remote tables, it is worth resolving a query into a concrete local
table to avoid fetching unnecessary data on repeated queries. The
`.to_memtable()` method can be used to resolve a remote table into memory
(great for small results); `.to_parquet()` can be used to resolve a remote
table to disk (great for medium to large results).
```python
-df = sd.read_parquet(
-
"s3://overturemaps-us-west-2/release/2025-11-19.0/theme=divisions/type=division_area/"
-)
-```
-
+sd.sql(
+ "SELECT names.primary AS name, geometry FROM divisions WHERE region =
'CA-NS'"
+).to_memtable().to_view("divisions_ns")
-```python
-# inspect a few rows of the data
-df.show(10)
+sd.view("divisions_ns").show(5)
```
-
┌─────────────────┬────────────────┬────────────────┬───┬────────────────┬────────┬────────────────┐
- │ id ┆ geometry ┆ bbox ┆ … ┆ is_territorial ┆
region ┆ division_id │
- │ utf8 ┆ geometry ┆ struct ┆ ┆ boolean ┆
utf8 ┆ utf8 │
-
╞═════════════════╪════════════════╪════════════════╪═══╪════════════════╪════════╪════════════════╡
- │ 3665c36d-d3a9-… ┆ POLYGON((12.5… ┆ {xmin: 12.455… ┆ … ┆ true ┆
IT-34 ┆ f05aa29f-151f… │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 18a69439-a1da-… ┆ POLYGON((12.5… ┆ {xmin: 12.596… ┆ … ┆ true ┆
IT-36 ┆ ae00d58c-6e67… │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 7d0f6d37-bb55-… ┆ POLYGON((12.6… ┆ {xmin: 12.567… ┆ … ┆ true ┆
IT-36 ┆ bdfc82ca-5f23… │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 3f480ff6-6361-… ┆ POLYGON((12.5… ┆ {xmin: 12.549… ┆ … ┆ true ┆
IT-36 ┆ 1c750104-4470… │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 31c3ab5e-eb6f-… ┆ POLYGON((12.6… ┆ {xmin: 12.612… ┆ … ┆ true ┆
IT-34 ┆ d90804ee-19a4… │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 308517e6-64b4-… ┆ POLYGON((12.5… ┆ {xmin: 12.589… ┆ … ┆ true ┆
IT-34 ┆ aabd71e9-4d98… │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 646e5b1f-b76a-… ┆ POLYGON((12.5… ┆ {xmin: 12.485… ┆ … ┆ true ┆
IT-34 ┆ 502c1c4e-fc19… │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ f2809a49-1082-… ┆ POLYGON((12.5… ┆ {xmin: 12.538… ┆ … ┆ true ┆
IT-34 ┆ 8b446eed-00ad… │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 72b27245-c7fd-… ┆ POLYGON((12.5… ┆ {xmin: 12.501… ┆ … ┆ true ┆
IT-34 ┆ 1d535e1f-d19e… │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ 815855d9-05d0-… ┆ POLYGON((12.4… ┆ {xmin: 12.371… ┆ … ┆ true ┆
IT-34 ┆ 5aa91354-9e8c… │
-
└─────────────────┴────────────────┴────────────────┴───┴────────────────┴────────┴────────────────┘
-
+
┌────────────────────────────────────┬─────────────────────────────────────────────────────────────┐
+ │ name ┆ geometry
│
+ │ utf8 ┆ geometry
│
+
╞════════════════════════════════════╪═════════════════════════════════════════════════════════════╡
+ │ Sable Island National Park Reserve ┆ POLYGON((-60.178333
43.9824655,-60.1785682 43.9825425,-60.… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Sable Island ┆ POLYGON((-59.7744732
44.2254616,-59.7928902 44.2173253,-59… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Halifax Regional Municipality ┆ MULTIPOLYGON(((-59.7321078
44.2390248,-59.7502166 44.23385… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ West Liscomb ┆ POLYGON((-62.0615594
45.0023306,-62.0621839 45.0024475,-62… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Marie Joseph ┆ POLYGON((-61.9911914
44.95646,-61.9912383 44.9579526,-61.9… │
+
└────────────────────────────────────┴─────────────────────────────────────────────────────────────┘
-```python
-df.to_view("division_area")
-```
+Importantly, Overture data is distributed using GeoParquet 1.1, for which
SedonaDB has built in support! This means that spatial queries (e.g.,
`ST_Intersects()`) tend to execute quickly against overture. In this case, the
spatial query for Nova Scotia is ~5x faster than the text-based region query.
```python
-sd.sql("""
-SELECT
- COUNT(*)
-FROM division_area
-""").show()
-```
-
- ┌──────────┐
- │ count(*) │
- │ int64 │
- ╞══════════╡
- │ 1052542 │
- └──────────┘
+import shapely
+ns_bbox_wkb = shapely.box(-66.5, 43.4, -59.8, 47.1).wkb
+sd.sql(
+ """
+ SELECT names.primary AS name, geometry
+ FROM divisions
+ WHERE ST_Contains(ST_GeomFromWKB($wkb, 4326), geometry)
+ """,
+ params={"wkb": ns_bbox_wkb},
+).to_memtable().to_view("divisions_ns", overwrite=True)
-```python
-df.schema
+sd.view("divisions_ns").show(5)
```
+
┌───────────────────┬──────────────────────────────────────────────────────────────────────────────┐
+ │ name ┆ geometry
│
+ │ utf8 ┆ geometry
│
+
╞═══════════════════╪══════════════════════════════════════════════════════════════════════════════╡
+ │ Maces Bay ┆ POLYGON((-66.4491254 45.1265729,-66.4577261
45.126933,-66.4591563 45.126991… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Gooseberry Island ┆ POLYGON((-66.2598821 45.1380421,-66.2599962
45.1381233,-66.2600591 45.13828… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Musquash Parish ┆ POLYGON((-66.4595418 45.2215004,-66.4595406
45.221468,-66.4595396 45.221391… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Dipper Harbour ┆ POLYGON((-66.3755086 45.118812,-66.4089711
45.1488327,-66.4284252 45.138119… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Chance Harbour ┆ POLYGON((-66.4089711 45.1488327,-66.3755086
45.118812,-66.3541725 45.105991… │
+
└───────────────────┴──────────────────────────────────────────────────────────────────────────────┘
+## Overture buildings table
- SedonaSchema with 13 fields:
- id: utf8<Utf8View>
- geometry: geometry<WkbView(ogc:crs84)>
- bbox: struct<Struct(xmin Float32, xmax Float32, ymin Float32, ymax
Float32)>
- country: utf8<Utf8View>
- version: int32<Int32>
- sources: list<List(Field { name: "element", data_type: Struct([Field {
name: "property", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered:
false, metadata: {} }, Field { name: "dataset", data_type: Utf8, nullable:
true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name:
"license", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: {} }, Field { name: "record_id", data_type: Utf8, nullable: true,
dict_id: 0, dict_is_ordered: fal [...]
- subtype: utf8<Utf8View>
- class: utf8<Utf8View>
- names: struct<Struct(primary Utf8, common Map(Field { name: "key_value",
data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false,
dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value",
data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata:
{} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} },
false), rules List(Field { name: "element", data_type: Struct([Field { name:
"variant", data_type: Utf8, nul [...]
- is_land: boolean<Boolean>
- is_territorial: boolean<Boolean>
- region: utf8<Utf8View>
- division_id: utf8<Utf8View>
+The [Overture buildings
table](https://docs.overturemaps.org/guides/buildings/) is one of the largest
tables provided by the Overture Maps Foundation. The workflow is similar to the
division table or any remote table; however, the buildings table presents
several unique challenges.
+First, the metadata size for all files in the buildings table is very large.
SedonaDB caches remote metadata to avoid repeated download; however, the
default cache size is too small. For repeated queries against the buildings
table, ensure that the cache size is increased to at least 900 MB and/or
`.to_view()` is used to cache the schema. The cache lives as long as the
session...use `sd = sedona.db.connect()` or reset the cache size to a smaller
value to clear the cache.
+> Overture removes old releases. See [this
page](https://docs.overturemaps.org/release-calendar/#current-release) to see
the latest version number and replace the relevant portion of the URL below.
```python
-# get all the divisions in Nova Scotia and save them in memory with
to_memtable()
-nova_scotia_bbox_wkt = (
- "POLYGON((-66.5 43.4, -66.5 47.1, -59.8 47.1, -59.8 43.4, -66.5 43.4))"
-)
-ns = sd.sql(f"""
-SELECT
- country, region, names, geometry
-FROM division_area
-WHERE
- ST_Intersects(
- geometry,
- ST_GeomFromText('{nova_scotia_bbox_wkt}', 4326)
- )
-""").to_memtable()
+sd.sql("SET datafusion.runtime.metadata_cache_limit = '900M'").execute()
+
+sd.read_parquet(
+
"s3://overturemaps-us-west-2/release/2026-02-18.0/theme=buildings/type=building/",
+ options={"aws.skip_signature": True, "aws.region": "us-west-2"},
+).to_view("buildings")
```
+Like all SedonaDB DataFrames, viewing a schema or previewing the first few
rows are lazy and usually fast unless a query contains large aggregations or
joins.
+
```python
-ns.to_view("ns_divisions")
+sd.view("buildings").show(5)
```
+
┌──────────────────────────────────────┬─────────────────────────────────────────┬───┬─────────────┐
+ │ id ┆ geometry
┆ … ┆ roof_height │
+ │ utf8 ┆ geometry
┆ ┆ float64 │
+
╞══════════════════════════════════════╪═════════════════════════════════════════╪═══╪═════════════╡
+ │ ab23f7ee-4c05-4246-a016-8260ce58a916 ┆ POLYGON((-67.589523
-39.0908362,-67.58… ┆ … ┆ │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ 58356258-2e80-48fc-aacf-d81fcf74074c ┆ POLYGON((-67.5896327
-39.0907868,-67.5… ┆ … ┆ │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ b50595a8-cddb-44dd-bdbf-7bbe1e858ae0 ┆ POLYGON((-67.5897117
-39.0908483,-67.5… ┆ … ┆ │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ cbabe2df-f49a-4e9f-9cbe-c527a4b3b9f1 ┆ POLYGON((-67.5898768
-39.0907073,-67.5… ┆ … ┆ │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ bcd6984b-8da4-4dfe-9212-be2b02a24b67 ┆ POLYGON((-67.5901879
-39.0908288,-67.5… ┆ … ┆ │
+
└──────────────────────────────────────┴─────────────────────────────────────────┴───┴─────────────┘
+
-```python
-df = sd.sql("""
-SELECT UNNEST(names), geometry
-FROM ns_divisions
-WHERE region = 'CA-NS'
-""")
-```
+Some operations like `.count()` use summary statistics and execute quickly
even for large remote tables:
```python
-%%time
-# this executes quickly because the Nova Scotia data was persisted in memory
with `to_memtable()`
-df.show(2)
+sd.view("buildings").count()
```
-
┌────────────────────────┬────────────────────────┬────────────────────────┬───────────────────────┐
- │ __unnest_placeholder(n ┆ __unnest_placeholder(n ┆ __unnest_placeholder(n
┆ geometry │
- │ s_divisions.names).pr… ┆ s_divisions.names).co… ┆ s_divisions.names).ru…
┆ geometry │
-
╞════════════════════════╪════════════════════════╪════════════════════════╪═══════════════════════╡
- │ Apple River ┆ ┆
┆ POLYGON((-64.7260681… │
-
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
- │ Allen Hill ┆ ┆
┆ POLYGON((-64.6956656… │
-
└────────────────────────┴────────────────────────┴────────────────────────┴───────────────────────┘
- CPU times: user 1.25 ms, sys: 805 μs, total: 2.05 ms
- Wall time: 1.42 ms
-
-## Visualize the results with lonboard
-```python
-import lonboard
+ 2541282557
-lonboard.viz(df)
-```
+Overture buildings has a number of attributes on which we can filter. For
long-running queries it may be convenient to cache a result locally using
`.to_memtable()` or `.to_parquet()` before inspecting using other tools;
however like all Overture tables it is optimized for spatial queries and these
are usually not expensive for small areas.
+For example, we can find all of the buildings in New York City taller than 20
meters:
- Map(basemap_style=<CartoBasemap.DarkMatter:
'https://basemaps.cartocdn.com/gl/dark-matter-gl-style/style.json'…
+```python
+nyc_bbox_wkt = (
+ "POLYGON((-74.2591 40.4774, -74.2591 40.9176, -73.7004 40.9176, "
+ "-73.7004 40.4774, -74.2591 40.4774))"
+)
+sd.sql(
+ """
+ SELECT
+ id,
+ height,
+ num_floors,
+ roof_shape,
+ ST_Centroid(geometry) as centroid
+ FROM
+ buildings
+ WHERE
+ is_underground = FALSE
+ AND height IS NOT NULL
+ AND height > 20
+ AND ST_Intersects(
+ geometry,
+ ST_GeomFromText($1, 4326)
+ )
+ LIMIT 5;
+ """,
+ params=(nyc_bbox_wkt,),
+).to_memtable().to_view("buildings_nyc")
+
+sd.view("buildings_nyc").show(5)
+```
-
+
┌─────────────────────────┬────────────────────┬────────────┬────────────┬─────────────────────────┐
+ │ id ┆ height ┆ num_floors ┆ roof_shape ┆
centroid │
+ │ utf8 ┆ float64 ┆ int32 ┆ utf8 ┆
geometry │
+
╞═════════════════════════╪════════════════════╪════════════╪════════════╪═════════════════════════╡
+ │ aa8e3a73-c72c-4f1a-b6e… ┆ 20.38205909729004 ┆ ┆ ┆
POINT(-74.187673580307… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ efe7616b-7f7e-464c-9ce… ┆ 26.18361473083496 ┆ ┆ ┆
POINT(-74.189040982134… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ b3f734a1-325b-4e8c-b1d… ┆ 27.025876998901367 ┆ ┆ ┆
POINT(-74.2558161 40.8… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ 45d88655-e2f4-4a08-926… ┆ 25.485210418701172 ┆ ┆ ┆
POINT(-74.182252194444… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ 31e8353c-7d5b-4b20-94e… ┆ 21.294815063476562 ┆ ┆ ┆
POINT(-74.197113787905… │
+
└─────────────────────────┴────────────────────┴────────────┴────────────┴─────────────────────────┘
diff --git a/python/sedonadb/tests/test_udf.py
b/python/sedonadb/tests/test_udf.py
index 4159c96f..cead6de1 100644
--- a/python/sedonadb/tests/test_udf.py
+++ b/python/sedonadb/tests/test_udf.py
@@ -237,42 +237,3 @@ def test_udf_datafusion_to_sedonadb(con):
con.sql("SELECT some_external_udf('abcd', 123) as col").to_pandas(),
pd.DataFrame({"col": [b"abcd / 123"]}),
)
-
-
-def test_udf_sedonadb_registry_function_to_datafusion(con):
- datafusion = pytest.importorskip("datafusion")
- udf_impl = udf.arrow_udf(pa.binary(), [udf.STRING, udf.NUMERIC])(some_udf)
-
- # Register with our session
- con.register_udf(udf_impl)
-
- # Create a datafusion session, fetch our udf and register with the other
session
- datafusion_ctx = datafusion.SessionContext()
- datafusion_ctx.register_udf(
- datafusion.ScalarUDF.from_pycapsule(con._impl.scalar_udf("some_udf"))
- )
-
- # Can't quite use to_pandas() because there is a schema/batch nullability
mismatch
- batches = datafusion_ctx.sql("SELECT some_udf('abcd', 123) as
col").collect()
- assert len(batches) == 1
- pd.testing.assert_frame_equal(
- batches[0].to_pandas(),
- pd.DataFrame({"col": [b"abcd / 123"]}),
- )
-
-
-def test_udf_sedonadb_to_datafusion():
- datafusion = pytest.importorskip("datafusion")
- udf_impl = udf.arrow_udf(pa.binary(), [udf.STRING, udf.NUMERIC])(some_udf)
-
- # Create a datafusion session, register udf_impl directly
- datafusion_ctx = datafusion.SessionContext()
- datafusion_ctx.register_udf(datafusion.ScalarUDF.from_pycapsule(udf_impl))
-
- # Can't quite use to_pandas() because there is a schema/batch nullability
mismatch
- batches = datafusion_ctx.sql("SELECT some_udf('abcd', 123) as
col").collect()
- assert len(batches) == 1
- pd.testing.assert_frame_equal(
- batches[0].to_pandas(),
- pd.DataFrame({"col": [b"abcd / 123"]}),
- )
diff --git a/rust/sedona-geoparquet/src/file_opener.rs
b/rust/sedona-geoparquet/src/file_opener.rs
index 6ba04b82..c827f776 100644
--- a/rust/sedona-geoparquet/src/file_opener.rs
+++ b/rust/sedona-geoparquet/src/file_opener.rs
@@ -27,6 +27,7 @@ use datafusion_common::{
exec_err, Result,
};
use datafusion_datasource_parquet::metadata::DFParquetMetadata;
+use datafusion_execution::cache::cache_manager::FileMetadataCache;
use datafusion_physical_expr::PhysicalExpr;
use datafusion_physical_plan::metrics::{
ExecutionPlanMetricsSet, MetricBuilder, MetricType, MetricValue,
PruningMetrics,
@@ -111,6 +112,7 @@ pub(crate) struct GeoParquetFileOpener {
pub enable_pruning: bool,
pub metrics: GeoParquetFileOpenerMetrics,
pub options: TableGeoParquetOptions,
+ pub metadata_cache: Option<Arc<dyn FileMetadataCache>>,
}
impl FileOpener for GeoParquetFileOpener {
@@ -121,6 +123,7 @@ impl FileOpener for GeoParquetFileOpener {
let parquet_metadata =
DFParquetMetadata::new(&self_clone.object_store,
&file.object_meta)
.with_metadata_size_hint(self_clone.metadata_size_hint)
+ .with_file_metadata_cache(self_clone.metadata_cache)
.fetch_metadata()
.await?;
diff --git a/rust/sedona-geoparquet/src/format.rs
b/rust/sedona-geoparquet/src/format.rs
index 08a04c92..6c91ccfc 100644
--- a/rust/sedona-geoparquet/src/format.rs
+++ b/rust/sedona-geoparquet/src/format.rs
@@ -40,6 +40,7 @@ use datafusion::{
use datafusion_catalog::{memory::DataSourceExec, Session};
use datafusion_common::{plan_err, GetExt, Result, Statistics};
use datafusion_datasource_parquet::metadata::DFParquetMetadata;
+use datafusion_execution::cache::cache_manager::FileMetadataCache;
use datafusion_physical_expr::{LexRequirement, PhysicalExpr};
use datafusion_physical_plan::{
filter_pushdown::FilterPushdownPropagation,
metrics::ExecutionPlanMetricsSet, ExecutionPlan,
@@ -47,7 +48,7 @@ use datafusion_physical_plan::{
use futures::{StreamExt, TryStreamExt};
use object_store::{ObjectMeta, ObjectStore};
-use sedona_common::sedona_internal_err;
+use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err};
use sedona_schema::extension_type::ExtensionType;
@@ -197,16 +198,22 @@ impl FileFormat for GeoParquetFormat {
let inner_schema_without_metadata =
self.inner().infer_schema(state, store, objects).await?;
+ let file_metadata_cache =
state.runtime_env().cache_manager.get_file_metadata_cache();
+
// Collect metadata separately. We can in theory do our own schema
// inference too to save an extra server request, but then we have to
// copy more ParquetFormat code. It may be that caching at the object
// store level is the way to go here.
let metadatas: Vec<_> = futures::stream::iter(objects)
- .map(|object| async move {
- DFParquetMetadata::new(store.as_ref(), object)
- .with_metadata_size_hint(self.inner().metadata_size_hint())
- .fetch_metadata()
- .await
+ .map(|object| {
+ let metadata_cache = file_metadata_cache.clone();
+ async move {
+ DFParquetMetadata::new(store.as_ref(), object)
+
.with_metadata_size_hint(self.inner().metadata_size_hint())
+ .with_file_metadata_cache(Some(metadata_cache))
+ .fetch_metadata()
+ .await
+ }
})
.boxed() // Workaround
https://github.com/rust-lang/rust/issues/64552
.buffered(state.config_options().execution.meta_fetch_concurrency)
@@ -305,7 +312,7 @@ impl FileFormat for GeoParquetFormat {
async fn create_physical_plan(
&self,
- _state: &dyn Session,
+ state: &dyn Session,
config: FileScanConfig,
) -> Result<Arc<dyn ExecutionPlan>> {
// A copy of ParquetSource::create_physical_plan() that ensures the
underlying
@@ -316,12 +323,21 @@ impl FileFormat for GeoParquetFormat {
metadata_size_hint = Some(metadata);
}
- let mut source = GeoParquetFileSource::new(self.options.clone());
+ let mut source = config
+ .file_source()
+ .as_any()
+ .downcast_ref::<GeoParquetFileSource>()
+ .cloned()
+ .ok_or_else(|| sedona_internal_datafusion_err!("Expected
GeoParquetFileSource"))?;
+
+ source = source.with_options(self.options.clone());
if let Some(metadata_size_hint) = metadata_size_hint {
source = source.with_metadata_size_hint(metadata_size_hint)
}
+ let file_metadata_cache =
state.runtime_env().cache_manager.get_file_metadata_cache();
+ source.metadata_cache = Some(file_metadata_cache.clone());
let conf = FileScanConfigBuilder::from(config)
.with_source(Arc::new(source))
.build();
@@ -371,6 +387,7 @@ pub struct GeoParquetFileSource {
metadata_size_hint: Option<usize>,
predicate: Option<Arc<dyn PhysicalExpr>>,
options: TableGeoParquetOptions,
+ metadata_cache: Option<Arc<dyn FileMetadataCache>>,
}
impl GeoParquetFileSource {
@@ -381,6 +398,14 @@ impl GeoParquetFileSource {
metadata_size_hint: None,
predicate: None,
options,
+ metadata_cache: None,
+ }
+ }
+
+ pub fn with_options(&self, options: TableGeoParquetOptions) -> Self {
+ Self {
+ options,
+ ..self.clone()
}
}
@@ -431,6 +456,7 @@ impl GeoParquetFileSource {
options: TableGeoParquetOptions::from(
parquet_source.table_parquet_options().clone(),
),
+ metadata_cache: None,
})
} else {
sedona_internal_err!("GeoParquetFileSource constructed from
non-ParquetSource")
@@ -444,6 +470,7 @@ impl GeoParquetFileSource {
metadata_size_hint: self.metadata_size_hint,
predicate: Some(predicate),
options: self.options.clone(),
+ metadata_cache: self.metadata_cache.clone(),
}
}
@@ -469,6 +496,7 @@ impl GeoParquetFileSource {
metadata_size_hint: self.metadata_size_hint,
predicate: self.predicate.clone(),
options: self.options.clone(),
+ metadata_cache: self.metadata_cache.clone(),
}
}
@@ -479,6 +507,7 @@ impl GeoParquetFileSource {
metadata_size_hint: Some(hint),
predicate: self.predicate.clone(),
options: self.options.clone(),
+ metadata_cache: self.metadata_cache.clone(),
}
}
}
@@ -504,11 +533,12 @@ impl FileSource for GeoParquetFileSource {
metadata_size_hint: self.metadata_size_hint,
predicate: self.predicate.clone(),
file_schema: base_config.file_schema().clone(),
- enable_pruning: self.inner.table_parquet_options().global.pruning,
+ enable_pruning: self.options.inner.global.pruning,
// HACK: Since there is no public API to set inner's metrics, so
we use
// inner's metrics as the ExecutionPlan-global metrics
metrics: GeoParquetFileOpenerMetrics::new(self.inner.metrics()),
options: self.options.clone(),
+ metadata_cache: self.metadata_cache.clone(),
})
}
@@ -527,6 +557,7 @@ impl FileSource for GeoParquetFileSource {
None,
)?;
updated_inner.options = self.options.clone();
+ updated_inner.metadata_cache = self.metadata_cache.clone();
Ok(inner_result.with_updated_node(Arc::new(updated_inner)))
}
None => Ok(inner_result),
@@ -544,6 +575,7 @@ impl FileSource for GeoParquetFileSource {
self.predicate.clone(),
);
source.options = self.options.clone();
+ source.metadata_cache = self.metadata_cache.clone();
Arc::new(source)
}
@@ -554,6 +586,7 @@ impl FileSource for GeoParquetFileSource {
self.predicate.clone(),
);
source.options = self.options.clone();
+ source.metadata_cache = self.metadata_cache.clone();
Arc::new(source)
}
@@ -564,6 +597,7 @@ impl FileSource for GeoParquetFileSource {
self.predicate.clone(),
);
source.options = self.options.clone();
+ source.metadata_cache = self.metadata_cache.clone();
Arc::new(source)
}
@@ -574,6 +608,7 @@ impl FileSource for GeoParquetFileSource {
self.predicate.clone(),
);
source.options = self.options.clone();
+ source.metadata_cache = self.metadata_cache.clone();
Arc::new(source)
}