This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new e531723 docs: Add delta lake documentation (#238)
e531723 is described below
commit e531723d4d920c8739a3a99ba1f2f0648112f37e
Author: Matthew Powers <[email protected]>
AuthorDate: Mon Nov 3 15:16:13 2025 -0500
docs: Add delta lake documentation (#238)
---
docs/delta-lake.ipynb | 425 ++++++++++++++++++++++++++++++++++++++++++++++++++
docs/delta-lake.md | 279 +++++++++++++++++++++++++++++++++
mkdocs.yml | 14 +-
3 files changed, 712 insertions(+), 6 deletions(-)
diff --git a/docs/delta-lake.ipynb b/docs/delta-lake.ipynb
new file mode 100644
index 0000000..c156a0f
--- /dev/null
+++ b/docs/delta-lake.ipynb
@@ -0,0 +1,425 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "3425268d-6430-4e52-9019-969d61ef5458",
+ "metadata": {},
+ "source": [
+ "# SedonaDB + Delta Lake\n",
+ "\n",
+ "This page shows how to read and write Delta Lake tables with SedonaDB.\n",
+ "\n",
+ "Make sure you run `pip install deltalake` to run the code snippets below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "83f5ca25-8059-4624-bd00-e44cd172d9c2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from deltalake import write_deltalake, DeltaTable\n",
+ "import sedona.db\n",
+ "import pyarrow.compute as pc\n",
+ "\n",
+ "sd = sedona.db.connect()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1a90b1bf-99f9-4f50-987b-f1f30bf9988a",
+ "metadata": {},
+ "source": [
+ "Read in a GeoParquet dataset into a SedonaDB DataFrame."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "id": "a37f3a30-3267-4bfc-8e5b-e234053927af",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "countries = sd.read_parquet(\n",
+ "
\"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1f82b047-8182-4120-82d7-945ff38ecbca",
+ "metadata": {},
+ "source": [
+ "## Create a Delta Lake table with WKT\n",
+ "\n",
+ "Now write the DataFrame to a Delta Lake table. Notice that you must
convert the geometry column to Well-Known Text (WKT) or Well-Known Binary (WKB)
before writing to the Delta table.\n",
+ "\n",
+ "Delta Lake does not support geometry columns.\n",
+ "\n",
+ "This example shows how to create a Delta table with a WKT column. The
section that follows shows how to create a Delta table with a WKB column."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "35bdc296-d9ef-4ea2-9ba5-dac1cfd115ef",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "countries.to_view(\"countries\", True)\n",
+ "df = sd.sql(\n",
+ " \"select name, continent, ST_AsText(geometry) as geometry_wkt from
countries\"\n",
+ ")\n",
+ "table_path = \"/tmp/delta_with_wkt\"\n",
+ "write_deltalake(table_path, df.to_pandas(), mode=\"overwrite\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "974a5558-fb18-49b7-a304-fa93bdd363e8",
+ "metadata": {},
+ "source": [
+ "## Read Delta table into SedonaDB\n",
+ "\n",
+ "Now read the Delta table back into a SedonaDB DataFrame."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "c15d4605-483a-4041-973a-547098bdeef4",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+
"┌─────────────────────────────┬───────────────┬────────────────────────────────────────────────────┐\n",
+ "│ name ┆ continent ┆
geometry_wkt │\n",
+ "│ utf8 ┆ utf8 ┆
utf8 │\n",
+
"╞═════════════════════════════╪═══════════════╪════════════════════════════════════════════════════╡\n",
+ "│ Fiji ┆ Oceania ┆ MULTIPOLYGON(((180
-16.067132663642447,180 -16.55… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ United Republic of Tanzania ┆ Africa ┆
POLYGON((33.90371119710453 -0.9500000000000001,34… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Western Sahara ┆ Africa ┆
POLYGON((-8.665589565454809 27.656425889592356,-8… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Canada ┆ North America ┆
MULTIPOLYGON(((-122.84000000000003 49.00000000000… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ United States of America ┆ North America ┆
MULTIPOLYGON(((-122.84000000000003 49.00000000000… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Kazakhstan ┆ Asia ┆
POLYGON((87.35997033076265 49.21498078062912,86.5… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Uzbekistan ┆ Asia ┆
POLYGON((55.96819135928291 41.30864166926936,55.9… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Papua New Guinea ┆ Oceania ┆
MULTIPOLYGON(((141.00021040259185 -2.600151055515… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Indonesia ┆ Asia ┆
MULTIPOLYGON(((141.00021040259185 -2.600151055515… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Argentina ┆ South America ┆
MULTIPOLYGON(((-68.63401022758323 -52.63637045887… │\n",
+
"└─────────────────────────────┴───────────────┴────────────────────────────────────────────────────┘\n"
+ ]
+ }
+ ],
+ "source": [
+ "dt = DeltaTable(table_path)\n",
+ "arrow_table = dt.to_pyarrow_table()\n",
+ "df = sd.create_data_frame(arrow_table)\n",
+ "df.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a44fd9bd-85d2-4f1b-9df4-cc98ac6223f0",
+ "metadata": {},
+ "source": [
+ "Notice that the `geometry_wkt` column is `utf8`. It's not a geometry
column.\n",
+ "\n",
+ "Let's convert the `geometry_wkt` column to a geometry column."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "9052c4af-4478-4038-81ae-cf9c0ba84345",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+
"┌─────────────────────────────┬───────────────┬────────────────────────────────────────────────────┐\n",
+ "│ name ┆ continent ┆
geom │\n",
+ "│ utf8 ┆ utf8 ┆
geometry │\n",
+
"╞═════════════════════════════╪═══════════════╪════════════════════════════════════════════════════╡\n",
+ "│ Fiji ┆ Oceania ┆ MULTIPOLYGON(((180
-16.067132663642447,180 -16.55… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ United Republic of Tanzania ┆ Africa ┆
POLYGON((33.90371119710453 -0.9500000000000001,34… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Western Sahara ┆ Africa ┆
POLYGON((-8.665589565454809 27.656425889592356,-8… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Canada ┆ North America ┆
MULTIPOLYGON(((-122.84000000000003 49.00000000000… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ United States of America ┆ North America ┆
MULTIPOLYGON(((-122.84000000000003 49.00000000000… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Kazakhstan ┆ Asia ┆
POLYGON((87.35997033076265 49.21498078062912,86.5… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Uzbekistan ┆ Asia ┆
POLYGON((55.96819135928291 41.30864166926936,55.9… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Papua New Guinea ┆ Oceania ┆
MULTIPOLYGON(((141.00021040259185 -2.600151055515… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Indonesia ┆ Asia ┆
MULTIPOLYGON(((141.00021040259185 -2.600151055515… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Argentina ┆ South America ┆
MULTIPOLYGON(((-68.63401022758323 -52.63637045887… │\n",
+
"└─────────────────────────────┴───────────────┴────────────────────────────────────────────────────┘\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.to_view(\"my_table\", True)\n",
+ "res = sd.sql(\"\"\"\n",
+ "SELECT\n",
+ " name,\n",
+ " continent,\n",
+ " ST_GeomFromWKT(geometry_wkt) as geom\n",
+ "from my_table\n",
+ "\"\"\")\n",
+ "res.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "802be3a7-5b1d-4f02-85f9-9460743a921d",
+ "metadata": {},
+ "source": [
+ "Confirm the schema of the DataFrame."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "5758bfec-86c0-4a14-8e4e-201149a59fac",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "SedonaSchema with 3 fields:\n",
+ " name: utf8<Utf8>\n",
+ " continent: utf8<Utf8>\n",
+ " geom: geometry<Wkb>"
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "res.schema"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7c975e52-5b2d-4ee9-a7c6-51cb3304c0d6",
+ "metadata": {},
+ "source": [
+ "## Filter countries in a particular geographic region\n",
+ "\n",
+ "Now, let's grab some countries in the western portion of South America
using a polygon region.\n",
+ "\n",
+ "SedonaDB can run these types of queries on geometric data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "c210e29a-170b-4a01-b464-c7d2f79998eb",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+
"┌──────────┬───────────────┬───────────────────────────────────────────────────────────────────────┐\n",
+ "│ name ┆ continent ┆ geom
│\n",
+ "│ utf8 ┆ utf8 ┆ geometry
│\n",
+
"╞══════════╪═══════════════╪═══════════════════════════════════════════════════════════════════════╡\n",
+ "│ Chile ┆ South America ┆ MULTIPOLYGON(((-68.63401022758323
-52.63637045887449,-68.63335000000… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Peru ┆ South America ┆ POLYGON((-69.89363521999663
-4.2981869441943275,-70.7947688463023 -4… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Colombia ┆ South America ┆ POLYGON((-66.87632585312258
1.253360500489336,-67.0650481838525 1.13… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Ecuador ┆ South America ┆ POLYGON((-75.37322323271385
-0.1520317521204504,-75.23372270374195 -… │\n",
+
"└──────────┴───────────────┴───────────────────────────────────────────────────────────────────────┘\n"
+ ]
+ }
+ ],
+ "source": [
+ "res = sd.sql(\"\"\"\n",
+ "SELECT\n",
+ " name,\n",
+ " continent,\n",
+ " ST_GeomFromWKT(geometry_wkt) as geom\n",
+ "FROM my_table\n",
+ "WHERE ST_Intersects(\n",
+ " ST_GeomFromWKT(geometry_wkt),\n",
+ " ST_GeomFromWKT('POLYGON((-81 5, -75 5, -75 -56, -81 -56, -81 5))')\n",
+ ")\n",
+ "\"\"\")\n",
+ "res.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "996a56e2-a5b6-42f8-88b0-90d3a0c4c30c",
+ "metadata": {},
+ "source": [
+ "## Create a Delta Lake table with WKB\n",
+ "\n",
+ "You can also create a Delta table with WKB.\n",
+ "\n",
+ "WKB is binary, can be compressed more effectively than WKT, and results
in smaller file sizes.\n",
+ "\n",
+ "The following example shows how to store the cities dataset in a Delta
table with the geometry data stored as WKB.\n",
+ "\n",
+ "It also demonstrates how to add a `bbox` column to the Delta table,
enabling more efficient filtering."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "143a8485-773f-4b94-b487-40ad63706669",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cities = sd.read_parquet(\n",
+ "
\"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet\"\n",
+ ")\n",
+ "cities.to_view(\"cities\", True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "36bce474-51b8-468e-931e-347c19624221",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = sd.sql(\"\"\"\n",
+ "select \n",
+ " name, \n",
+ " ST_AsBinary(geometry) as geometry_wkb,\n",
+ " STRUCT(\n",
+ " ST_XMin(geometry) as xmin,\n",
+ " ST_YMin(geometry) as ymin,\n",
+ " ST_XMax(geometry) as xmax,\n",
+ " ST_YMax(geometry) as ymax\n",
+ " ) as bbox\n",
+ "from cities\n",
+ "\"\"\")\n",
+ "table_path = \"/tmp/delta_with_wkb\"\n",
+ "write_deltalake(table_path, df.to_pandas(), mode=\"overwrite\",
schema_mode=\"overwrite\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cfb1739c-6e60-4330-be98-48b2faa4c6cf",
+ "metadata": {},
+ "source": [
+ "Read the Delta table and filter it to only include cities in the eastern
half of North America."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "590d056d-60bb-4637-9981-eea721c559a2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dt = DeltaTable(table_path)\n",
+ "dataset = dt.to_pyarrow_dataset()\n",
+ "filter_expr = (\n",
+ " (pc.field(\"bbox\", \"xmax\") >= -97.0)\n",
+ " & (pc.field(\"bbox\", \"xmin\") <= -67.0)\n",
+ " & (pc.field(\"bbox\", \"ymax\") >= 25.0)\n",
+ " & (pc.field(\"bbox\", \"ymin\") <= 50.0)\n",
+ ")\n",
+ "filtered_table = dataset.to_table(filter=filter_expr)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "65578ebb-4797-4aaa-9377-87893f9a7694",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = sd.create_data_frame(filtered_table.to_pandas())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "a0cb1afd-153b-413d-8f04-0d533e717b30",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "┌──────────────────┬──────────────────────────────────────────────┐\n",
+ "│ name ┆ geom │\n",
+ "│ utf8 ┆ geometry │\n",
+ "╞══════════════════╪══════════════════════════════════════════════╡\n",
+ "│ Ottawa ┆ POINT(-75.7019612 45.4186427) │\n",
+ "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Nassau ┆ POINT(-77.3500438 25.0833901) │\n",
+ "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Houston ┆ POINT(-95.34843625672217 29.741272831862542) │\n",
+ "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Miami ┆ POINT(-80.2260519 25.7895566) │\n",
+ "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Atlanta ┆ POINT(-84.36764186571386 33.73945728378348) │\n",
+ "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Chicago ┆ POINT(-87.63523655322338 41.847961283364114) │\n",
+ "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Toronto ┆ POINT(-79.38945855491194 43.66464454743429) │\n",
+ "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Washington, D.C. ┆ POINT(-77.0113644 38.9014952) │\n",
+ "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ New York ┆ POINT(-73.99571754361698 40.72156174972766) │\n",
+ "└──────────────────┴──────────────────────────────────────────────┘\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.to_view(\"us_east_cities\", True)\n",
+ "sd.sql(\"select name, ST_GeomFromWKB(geometry_wkb) as geom from
us_east_cities\").show()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/delta-lake.md b/docs/delta-lake.md
new file mode 100644
index 0000000..11a1b1b
--- /dev/null
+++ b/docs/delta-lake.md
@@ -0,0 +1,279 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# SedonaDB + Delta Lake
+
+This page shows how to read and write Delta Lake tables with SedonaDB.
+
+Make sure you run `pip install deltalake` to run the code snippets below.
+
+
+```python
+from deltalake import write_deltalake, DeltaTable
+import sedona.db
+import pyarrow.compute as pc
+
+sd = sedona.db.connect()
+```
+
+Read in a GeoParquet dataset into a SedonaDB DataFrame.
+
+
+```python
+countries = sd.read_parquet(
+
"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet"
+)
+```
+
+## Create a Delta Lake table with WKT
+
+Now write the DataFrame to a Delta Lake table. Notice that you must convert
the geometry column to Well-Known Text (WKT) or Well-Known Binary (WKB) before
writing to the Delta table.
+
+Delta Lake does not support geometry columns.
+
+This example shows how to create a Delta table with a WKT column. The section
that follows shows how to create a Delta table with a WKB column.
+
+
+```python
+countries.to_view("countries", True)
+df = sd.sql(
+ "select name, continent, ST_AsText(geometry) as geometry_wkt from
countries"
+)
+table_path = "/tmp/delta_with_wkt"
+write_deltalake(table_path, df.to_pandas(), mode="overwrite")
+```
+
+## Read Delta table into SedonaDB
+
+Now read the Delta table back into a SedonaDB DataFrame.
+
+
+```python
+dt = DeltaTable(table_path)
+arrow_table = dt.to_pyarrow_table()
+df = sd.create_data_frame(arrow_table)
+df.show()
+```
+
+
┌─────────────────────────────┬───────────────┬────────────────────────────────────────────────────┐
+ │ name ┆ continent ┆
geometry_wkt │
+ │ utf8 ┆ utf8 ┆
utf8 │
+
╞═════════════════════════════╪═══════════════╪════════════════════════════════════════════════════╡
+ │ Fiji ┆ Oceania ┆ MULTIPOLYGON(((180
-16.067132663642447,180 -16.55… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ United Republic of Tanzania ┆ Africa ┆ POLYGON((33.90371119710453
-0.9500000000000001,34… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Western Sahara ┆ Africa ┆
POLYGON((-8.665589565454809 27.656425889592356,-8… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Canada ┆ North America ┆
MULTIPOLYGON(((-122.84000000000003 49.00000000000… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ United States of America ┆ North America ┆
MULTIPOLYGON(((-122.84000000000003 49.00000000000… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Kazakhstan ┆ Asia ┆ POLYGON((87.35997033076265
49.21498078062912,86.5… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Uzbekistan ┆ Asia ┆ POLYGON((55.96819135928291
41.30864166926936,55.9… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Papua New Guinea ┆ Oceania ┆
MULTIPOLYGON(((141.00021040259185 -2.600151055515… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Indonesia ┆ Asia ┆
MULTIPOLYGON(((141.00021040259185 -2.600151055515… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Argentina ┆ South America ┆
MULTIPOLYGON(((-68.63401022758323 -52.63637045887… │
+
└─────────────────────────────┴───────────────┴────────────────────────────────────────────────────┘
+
+
+Notice that the `geometry_wkt` column is `utf8`. It's not a geometry column.
+
+Let's convert the `geometry_wkt` column to a geometry column.
+
+
+```python
+df.to_view("my_table", True)
+res = sd.sql("""
+SELECT
+ name,
+ continent,
+ ST_GeomFromWKT(geometry_wkt) as geom
+from my_table
+""")
+res.show()
+```
+
+
┌─────────────────────────────┬───────────────┬────────────────────────────────────────────────────┐
+ │ name ┆ continent ┆
geom │
+ │ utf8 ┆ utf8 ┆
geometry │
+
╞═════════════════════════════╪═══════════════╪════════════════════════════════════════════════════╡
+ │ Fiji ┆ Oceania ┆ MULTIPOLYGON(((180
-16.067132663642447,180 -16.55… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ United Republic of Tanzania ┆ Africa ┆ POLYGON((33.90371119710453
-0.9500000000000001,34… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Western Sahara ┆ Africa ┆
POLYGON((-8.665589565454809 27.656425889592356,-8… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Canada ┆ North America ┆
MULTIPOLYGON(((-122.84000000000003 49.00000000000… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ United States of America ┆ North America ┆
MULTIPOLYGON(((-122.84000000000003 49.00000000000… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Kazakhstan ┆ Asia ┆ POLYGON((87.35997033076265
49.21498078062912,86.5… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Uzbekistan ┆ Asia ┆ POLYGON((55.96819135928291
41.30864166926936,55.9… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Papua New Guinea ┆ Oceania ┆
MULTIPOLYGON(((141.00021040259185 -2.600151055515… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Indonesia ┆ Asia ┆
MULTIPOLYGON(((141.00021040259185 -2.600151055515… │
+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Argentina ┆ South America ┆
MULTIPOLYGON(((-68.63401022758323 -52.63637045887… │
+
└─────────────────────────────┴───────────────┴────────────────────────────────────────────────────┘
+
+
+Confirm the schema of the DataFrame.
+
+
+```python
+res.schema
+```
+
+
+
+
+ SedonaSchema with 3 fields:
+ name: utf8<Utf8>
+ continent: utf8<Utf8>
+ geom: geometry<Wkb>
+
+
+
+## Filter countries in a particular geographic region
+
+Now, let's grab some countries in the western portion of South America using a
polygon region.
+
+SedonaDB can run these types of queries on geometric data.
+
+
+```python
+res = sd.sql("""
+SELECT
+ name,
+ continent,
+ ST_GeomFromWKT(geometry_wkt) as geom
+FROM my_table
+WHERE ST_Intersects(
+ ST_GeomFromWKT(geometry_wkt),
+ ST_GeomFromWKT('POLYGON((-81 5, -75 5, -75 -56, -81 -56, -81 5))')
+)
+""")
+res.show()
+```
+
+
┌──────────┬───────────────┬───────────────────────────────────────────────────────────────────────┐
+ │ name ┆ continent ┆ geom
│
+ │ utf8 ┆ utf8 ┆ geometry
│
+
╞══════════╪═══════════════╪═══════════════════════════════════════════════════════════════════════╡
+ │ Chile ┆ South America ┆ MULTIPOLYGON(((-68.63401022758323
-52.63637045887449,-68.63335000000… │
+
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Peru ┆ South America ┆ POLYGON((-69.89363521999663
-4.2981869441943275,-70.7947688463023 -4… │
+
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Colombia ┆ South America ┆ POLYGON((-66.87632585312258
1.253360500489336,-67.0650481838525 1.13… │
+
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Ecuador ┆ South America ┆ POLYGON((-75.37322323271385
-0.1520317521204504,-75.23372270374195 -… │
+
└──────────┴───────────────┴───────────────────────────────────────────────────────────────────────┘
+
+
+## Create a Delta Lake table with WKB
+
+You can also create a Delta table with WKB.
+
+WKB is binary, can be compressed more effectively than WKT, and results in
smaller file sizes.
+
+The following example shows how to store the cities dataset in a Delta table
with the geometry data stored as WKB.
+
+It also demonstrates how to add a `bbox` column to the Delta table, enabling
more efficient filtering.
+
+
+```python
+cities = sd.read_parquet(
+
"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet"
+)
+cities.to_view("cities", True)
+```
+
+
+```python
+df = sd.sql("""
+select
+ name,
+ ST_AsBinary(geometry) as geometry_wkb,
+ STRUCT(
+ ST_XMin(geometry) as xmin,
+ ST_YMin(geometry) as ymin,
+ ST_XMax(geometry) as xmax,
+ ST_YMax(geometry) as ymax
+ ) as bbox
+from cities
+""")
+table_path = "/tmp/delta_with_wkb"
+write_deltalake(table_path, df.to_pandas(), mode="overwrite",
schema_mode="overwrite")
+```
+
+Read the Delta table and filter it to only include cities in the eastern half
of North America.
+
+
+```python
+dt = DeltaTable(table_path)
+dataset = dt.to_pyarrow_dataset()
+filter_expr = (
+ (pc.field("bbox", "xmax") >= -97.0) &
+ (pc.field("bbox", "xmin") <= -67.0) &
+ (pc.field("bbox", "ymax") >= 25.0) &
+ (pc.field("bbox", "ymin") <= 50.0)
+)
+filtered_table = dataset.to_table(filter=filter_expr)
+```
+
+
+```python
+df = sd.create_data_frame(filtered_table.to_pandas())
+```
+
+
+```python
+df.to_view("us_east_cities", True)
+sd.sql("select name, ST_GeomFromWKB(geometry_wkb) as geom from
us_east_cities").show()
+```
+
+ ┌──────────────────┬──────────────────────────────────────────────┐
+ │ name ┆ geom │
+ │ utf8 ┆ geometry │
+ ╞══════════════════╪══════════════════════════════════════════════╡
+ │ Ottawa ┆ POINT(-75.7019612 45.4186427) │
+ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Nassau ┆ POINT(-77.3500438 25.0833901) │
+ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Houston ┆ POINT(-95.34843625672217 29.741272831862542) │
+ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Miami ┆ POINT(-80.2260519 25.7895566) │
+ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Atlanta ┆ POINT(-84.36764186571386 33.73945728378348) │
+ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Chicago ┆ POINT(-87.63523655322338 41.847961283364114) │
+ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Toronto ┆ POINT(-79.38945855491194 43.66464454743429) │
+ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ Washington, D.C. ┆ POINT(-77.0113644 38.9014952) │
+ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ New York ┆ POINT(-73.99571754361698 40.72156174972766) │
+ └──────────────────┴──────────────────────────────────────────────┘
diff --git a/mkdocs.yml b/mkdocs.yml
index 9f45525..aaaf78f 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -41,16 +41,18 @@ nav:
- Home: index.md
- Quickstart: quickstart-python.md
- Programming Guide: programming-guide.md
- - GeoPandas interop: geopandas-interop.md
- - Overture Examples: overture-examples.md
- - CRS Examples: crs-examples.md
- - Working with Parquet Files: working-with-parquet-files.md
- - Working with SQL in SedonaDB: working-with-sql-sedonadb.md
- - Contributors Guide: contributors-guide.md
- API:
- Python: reference/python.md
- SQL: reference/sql.md
- Spatial Joins: reference/sql-joins.md
+ - Examples:
+ - GeoPandas interop: geopandas-interop.md
+ - Overture Examples: overture-examples.md
+ - CRS Examples: crs-examples.md
+ - Delta Lake: delta-lake.md
+ - Working with Parquet Files: working-with-parquet-files.md
+ - Working with SQL in SedonaDB: working-with-sql-sedonadb.md
+ - Contributors Guide: contributors-guide.md
- SpatialBench: https://sedona.apache.org/spatialbench/
- Blog: https://sedona.apache.org/latest/blog/
- Community: https://sedona.apache.org/latest/setup/compile/