This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch branch-0.1.0 in repository https://gitbox.apache.org/repos/asf/sedona-db.git
commit 18aeba7f959d4417799cc25867d24e9723c66e44 Author: Matthew Powers <[email protected]> AuthorDate: Mon Sep 22 00:01:07 2025 -0400 docs: crs tracking example (#81) --- docs/crs-examples.ipynb | 481 ++++++++++++++++++++++++++++++++++++++++++++++++ docs/crs-examples.md | 334 +++++++++++++++++++++++++++++++++ mkdocs.yml | 2 +- 3 files changed, 816 insertions(+), 1 deletion(-) diff --git a/docs/crs-examples.ipynb b/docs/crs-examples.ipynb new file mode 100644 index 0000000..74192a3 --- /dev/null +++ b/docs/crs-examples.ipynb @@ -0,0 +1,481 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "91910e50-a5ae-4d5a-a431-62ac5fbc11ca", + "metadata": {}, + "source": [ + "# Coordinate Reference System (CRS) Examples\n", + "\n", + "This example demonstrates how one table with an EPSG 4326 CRS cannot be joined with another table that uses EPSG 3857." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "be8ffe47-dc89-4842-bb1e-1e8640afffc3", + "metadata": {}, + "outputs": [], + "source": [ + "import sedonadb\n", + "\n", + "sd = sedonadb.connect()" + ] + }, + { + "cell_type": "markdown", + "id": "54b48173-6be0-4827-ac42-1439eb31e9f7", + "metadata": {}, + "source": [ + "Read a table with a geometry column that uses EPSG 4326.\n", + "\n", + "Note how SedonaDB reads the CRS specified in the Parquet file." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "143f00d5-6878-4dab-a82c-c9fb4dbfaf00", + "metadata": {}, + "outputs": [], + "source": [ + "countries = sd.read_parquet(\n", + " \"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8ef94b7b-b65d-4da5-9443-3253e84e2e7f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SedonaSchema with 3 fields:\n", + " name: Utf8View\n", + " continent: Utf8View\n", + " geometry: wkb_view <epsg:4326>" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "countries.schema" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "12d94c4f-5e7f-47c6-b5cc-3a3363bbc290", + "metadata": {}, + "outputs": [], + "source": [ + "cities = sd.sql(\"\"\"\n", + "SELECT city, ST_SetSRID(ST_GeomFromText(wkt), 3857) AS geometry FROM (VALUES\n", + " ('New York', 'POINT(-8238310.24 4969803.34)'),\n", + " ('Los Angeles', 'POINT(-13153204.78 4037636.04)'),\n", + " ('Chicago', 'POINT(-9757148.04 5138517.44)'))\n", + "AS t(city, wkt)\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "36e53438-c2d2-444e-9f34-d391f0f3f588", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SedonaSchema with 2 fields:\n", + " city: Utf8\n", + " geometry: wkb <epsg:3857>" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cities.schema" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "62c87571-50aa-4f57-a7dd-4afa3210320a", + "metadata": {}, + "outputs": [], + "source": [ + "cities.to_view(\"cities\", overwrite=True)\n", + "countries.to_view(\"countries\", overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "id": "561b3c8c-4952-4fa7-9fe1-3fa0522b0d9f", + "metadata": {}, + "source": [ + "## Join with mismatched CRSs\n", + "\n", + "The cities and countries tables have different CRSs.\n", + "\n", + "The cities table uses EPSG:3857 and the countries table uses EPSG:4326.\n", + "\n", + "Let's confirm that the code errors out if we try to join the mismatched tables." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "906bad37-4f3f-4028-82b4-487fabe5957f", + "metadata": {}, + "outputs": [ + { + "ename": "SedonaError", + "evalue": "type_coercion\ncaused by\nError during planning: Mismatched CRS arguments: epsg:3857 vs epsg:4326\nUse ST_Transform() or ST_SetSRID() to ensure arguments are compatible.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mSedonaError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 6\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# join doesn't work when CRSs don't match\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[43msd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;43mselect * from cities\u001b[39;49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124;43mjoin countries\u001b[39;49m\n\u001b[1;32m [...] + "File \u001b[0;32m/opt/miniconda3/lib/python3.12/site-packages/sedonadb/dataframe.py:297\u001b[0m, in \u001b[0;36mDataFrame.show\u001b[0;34m(self, limit, width, ascii)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Print the first limit rows to the console\u001b[39;00m\n\u001b[1;32m 273\u001b[0m \n\u001b[1;32m 274\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 294\u001b[0m \n\u001b[1;32 [...] + "\u001b[0;31mSedonaError\u001b[0m: type_coercion\ncaused by\nError during planning: Mismatched CRS arguments: epsg:3857 vs epsg:4326\nUse ST_Transform() or ST_SetSRID() to ensure arguments are compatible." + ] + } + ], + "source": [ + "# join doesn't work when CRSs don't match\n", + "sd.sql(\"\"\"\n", + "select * from cities\n", + "join countries\n", + "where ST_Intersects(cities.geometry, countries.geometry)\n", + "\"\"\").show()" + ] + }, + { + "cell_type": "markdown", + "id": "41e6f59f-5217-40b2-b05a-9c95eae29df8", + "metadata": {}, + "source": [ + "## Convert CRS and then join\n", + "\n", + "Let's convert the cities table to use EPSG:4326 and then perform the join with the two tables once they have matching CRSs." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "122857c1-f68d-4037-9787-54c20706e60f", + "metadata": {}, + "outputs": [], + "source": [ + "# update cities to use 4326\n", + "cities = sd.sql(\"\"\"\n", + "SELECT city, ST_Transform(geometry, 'EPSG:4326') as geometry\n", + "FROM cities\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "947e085c-62a4-4315-b155-007a95156964", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SedonaSchema with 2 fields:\n", + " city: Utf8\n", + " geometry: wkb <ogc:crs84>" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cities.schema" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3bcbaf7a-ec40-4b7e-85c2-db5ef1e3232e", + "metadata": {}, + "outputs": [], + "source": [ + "cities.to_view(\"cities\", overwrite=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "819c8d04-fa03-4ef8-aecb-e79d48f0b820", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "┌─────────────┬──────────────────────┬──────────────────────┬───────────────┬──────────────────────┐\n", + "│ city ┆ geometry ┆ name ┆ continent ┆ geometry │\n", + "│ utf8 ┆ geometry ┆ utf8view ┆ utf8view ┆ geometry │\n", + "╞═════════════╪══════════════════════╪══════════════════════╪═══════════════╪══════════════════════╡\n", + "│ New York ┆ POINT(-74.006000039… ┆ United States of Am… ┆ North America ┆ MULTIPOLYGON(((-122… │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ Los Angeles ┆ POINT(-118.15724889… ┆ United States of Am… ┆ North America ┆ MULTIPOLYGON(((-122… │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ Chicago ┆ POINT(-87.649952137… ┆ United States of Am… ┆ North America ┆ MULTIPOLYGON(((-122… │\n", + "└─────────────┴──────────────────────┴──────────────────────┴───────────────┴──────────────────────┘\n" + ] + } + ], + "source": [ + "# join works when CRSs match\n", + "sd.sql(\"\"\"\n", + "select * from cities\n", + "join countries\n", + "where ST_Intersects(cities.geometry, countries.geometry)\n", + "\"\"\").show()" + ] + }, + { + "cell_type": "markdown", + "id": "5279bebd-1d8d-4f33-bcd9-2c1e93ff7221", + "metadata": {}, + "source": [ + "## Example #2: Joining two tables with different CRSs\n", + "\n", + "This example shows how to join a `vermont` table with an EPSG 32618 CRS with a `buildings` table that uses an EPSG 4326 CRS.\n", + "\n", + "The example highlights the following features:\n", + "\n", + "1. SedonaDB reads the CRS stored in the files\n", + "2. SedonaDB protects you from accidentally joining files with mismatched CRSs\n", + "3. It's easy to convert a GeoPandas DataFrame => a SedonaDB DataFrame and maintain the CRS" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d9ea1469-8e6d-4ef1-a440-5573c1345f0d", + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "\n", + "path = \"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/example-crs/files/example-crs_vermont-utm.fgb\"\n", + "gdf = gpd.read_file(path)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "46660482-3fed-4e6c-b37a-2947326e884a", + "metadata": {}, + "outputs": [], + "source": [ + "vermont = sd.create_data_frame(gdf)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "1571aa5e-638c-493a-90a1-0cffbeea0bd9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SedonaSchema with 1 field:\n", + " geometry: wkb <epsg:32618>" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vermont.schema" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "eb4b33f1-972e-4c18-bdbd-fd4fe268b339", + "metadata": {}, + "outputs": [], + "source": [ + "buildings = sd.read_parquet(\n", + " \"https://github.com/geoarrow/geoarrow-data/releases/download/v0.2.0/microsoft-buildings_point_geo.parquet\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "22baf082-736e-4881-9704-d57eea07068c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "┌─────────────────────────────────┐\n", + "│ geometry │\n", + "│ geometry │\n", + "╞═════════════════════════════════╡\n", + "│ POINT(-77.10109681 42.53495524) │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ POINT(-77.10048552 42.53695011) │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ POINT(-77.10096508 42.53681338) │\n", + "└─────────────────────────────────┘\n" + ] + } + ], + "source": [ + "buildings.show(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e1d2d89d-8227-43ea-8507-fd6524fe2ac5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SedonaSchema with 1 field:\n", + " geometry: wkb_view <ogc:crs84>" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "buildings.schema" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e959081e-3a8d-4041-b00f-a19bca10be39", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "129735970" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "buildings.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d9ef702f-e424-4e53-9629-da6ed256ee7f", + "metadata": {}, + "outputs": [], + "source": [ + "buildings.to_view(\"buildings\", overwrite=True)\n", + "vermont.to_view(\"vermont\", overwrite=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "0c300dad-aa60-4291-a10b-d2a332b37593", + "metadata": {}, + "outputs": [ + { + "ename": "SedonaError", + "evalue": "type_coercion\ncaused by\nError during planning: Mismatched CRS arguments: ogc:crs84 vs epsg:32618\nUse ST_Transform() or ST_SetSRID() to ensure arguments are compatible.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mSedonaError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[12], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[43msd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124;43mselect count(*) from buildings\u001b[39;49m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;43mjoin vermont\u001b[39;49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124;43mwhere ST_Intersects(buildings.geometry, vermont.geometry)\u [...] + "File \u001b[0;32m/opt/miniconda3/lib/python3.12/site-packages/sedonadb/dataframe.py:297\u001b[0m, in \u001b[0;36mDataFrame.show\u001b[0;34m(self, limit, width, ascii)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Print the first limit rows to the console\u001b[39;00m\n\u001b[1;32m 273\u001b[0m \n\u001b[1;32m 274\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 294\u001b[0m \n\u001b[1;32 [...] + "\u001b[0;31mSedonaError\u001b[0m: type_coercion\ncaused by\nError during planning: Mismatched CRS arguments: ogc:crs84 vs epsg:32618\nUse ST_Transform() or ST_SetSRID() to ensure arguments are compatible." + ] + } + ], + "source": [ + "sd.sql(\"\"\"\n", + "select count(*) from buildings\n", + "join vermont\n", + "where ST_Intersects(buildings.geometry, vermont.geometry)\n", + "\"\"\").show()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "648984ce-ac7a-4f76-ac9f-17bd5c628bd0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "┌──────────┐\n", + "│ count(*) │\n", + "│ int64 │\n", + "╞══════════╡\n", + "│ 361856 │\n", + "└──────────┘\n" + ] + } + ], + "source": [ + "sd.sql(\"\"\"\n", + "select count(*) from buildings\n", + "join vermont\n", + "where ST_Intersects(buildings.geometry, ST_Transform(vermont.geometry, 'EPSG:4326'))\n", + "\"\"\").show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/crs-examples.md b/docs/crs-examples.md new file mode 100644 index 0000000..cfa7b51 --- /dev/null +++ b/docs/crs-examples.md @@ -0,0 +1,334 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Coordinate Reference System (CRS) Examples + +This example demonstrates how one table with an EPSG 4326 CRS cannot be joined with another table that uses EPSG 3857. + + +```python +import sedonadb + +sd = sedonadb.connect() +``` + +Read a table with a geometry column that uses EPSG 4326. + +Note how SedonaDB reads the CRS specified in the Parquet file. + + +```python +countries = sd.read_parquet( + "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet" +) +``` + + +```python +countries.schema +``` + + + + + SedonaSchema with 3 fields: + name: Utf8View + continent: Utf8View + geometry: wkb_view <epsg:4326> + + + + +```python +cities = sd.sql(""" +SELECT city, ST_SetSRID(ST_GeomFromText(wkt), 3857) AS geometry FROM (VALUES + ('New York', 'POINT(-8238310.24 4969803.34)'), + ('Los Angeles', 'POINT(-13153204.78 4037636.04)'), + ('Chicago', 'POINT(-9757148.04 5138517.44)')) +AS t(city, wkt)""") +``` + + +```python +cities.schema +``` + + + + + SedonaSchema with 2 fields: + city: Utf8 + geometry: wkb <epsg:3857> + + + + +```python +cities.to_view("cities", overwrite=True) +countries.to_view("countries", overwrite=True) +``` + +## Join with mismatched CRSs + +The cities and countries tables have different CRSs. + +The cities table uses EPSG:3857 and the countries table uses EPSG:4326. + +Let's confirm that the code errors out if we try to join the mismatched tables. + + +```python +# join doesn't work when CRSs don't match +sd.sql(""" +select * from cities +join countries +where ST_Intersects(cities.geometry, countries.geometry) +""").show() +``` + + + --------------------------------------------------------------------------- + + SedonaError Traceback (most recent call last) + + Cell In[7], line 6 + 1 # join doesn't work when CRSs don't match + 2 sd.sql(""" + 3 select * from cities + 4 join countries + 5 where ST_Intersects(cities.geometry, countries.geometry) + ----> 6 """).show() + + + File /opt/miniconda3/lib/python3.12/site-packages/sedonadb/dataframe.py:297, in DataFrame.show(self, limit, width, ascii) + 272 """Print the first limit rows to the console + 273 + 274 Args: + (...) + 294 + 295 """ + 296 width = _out_width(width) + --> 297 print(self._impl.show(self._ctx, limit, width, ascii), end="") + + + SedonaError: type_coercion + caused by + Error during planning: Mismatched CRS arguments: epsg:3857 vs epsg:4326 + Use ST_Transform() or ST_SetSRID() to ensure arguments are compatible. + + +## Convert CRS and then join + +Let's convert the cities table to use EPSG:4326 and then perform the join with the two tables once they have matching CRSs. + + +```python +# update cities to use 4326 +cities = sd.sql(""" +SELECT city, ST_Transform(geometry, 'EPSG:4326') as geometry +FROM cities +""") +``` + + +```python +cities.schema +``` + + + + + SedonaSchema with 2 fields: + city: Utf8 + geometry: wkb <ogc:crs84> + + + + +```python +cities.to_view("cities", overwrite=True) +``` + + +```python +# join works when CRSs match +sd.sql(""" +select * from cities +join countries +where ST_Intersects(cities.geometry, countries.geometry) +""").show() +``` + + ┌─────────────┬──────────────────────┬──────────────────────┬───────────────┬──────────────────────┐ + │ city ┆ geometry ┆ name ┆ continent ┆ geometry │ + │ utf8 ┆ geometry ┆ utf8view ┆ utf8view ┆ geometry │ + ╞═════════════╪══════════════════════╪══════════════════════╪═══════════════╪══════════════════════╡ + │ New York ┆ POINT(-74.006000039… ┆ United States of Am… ┆ North America ┆ MULTIPOLYGON(((-122… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Los Angeles ┆ POINT(-118.15724889… ┆ United States of Am… ┆ North America ┆ MULTIPOLYGON(((-122… │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Chicago ┆ POINT(-87.649952137… ┆ United States of Am… ┆ North America ┆ MULTIPOLYGON(((-122… │ + └─────────────┴──────────────────────┴──────────────────────┴───────────────┴──────────────────────┘ + + +## Example #2: Joining two tables with different CRSs + +This example shows how to join a `vermont` table with an EPSG 32618 CRS with a `buildings` table that uses an EPSG 4326 CRS. + +The example highlights the following features: + +1. SedonaDB reads the CRS stored in the files +2. SedonaDB protects you from accidentally joining files with mismatched CRSs +3. It's easy to convert a GeoPandas DataFrame => a SedonaDB DataFrame and maintain the CRS + + +```python +import geopandas as gpd + +path = "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/example-crs/files/example-crs_vermont-utm.fgb" +gdf = gpd.read_file(path) +``` + + +```python +vermont = sd.create_data_frame(gdf) +``` + + +```python +vermont.schema +``` + + + + + SedonaSchema with 1 field: + geometry: wkb <epsg:32618> + + + + +```python +buildings = sd.read_parquet( + "https://github.com/geoarrow/geoarrow-data/releases/download/v0.2.0/microsoft-buildings_point_geo.parquet" +) +``` + + +```python +buildings.show(3) +``` + + ┌─────────────────────────────────┐ + │ geometry │ + │ geometry │ + ╞═════════════════════════════════╡ + │ POINT(-77.10109681 42.53495524) │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ POINT(-77.10048552 42.53695011) │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ POINT(-77.10096508 42.53681338) │ + └─────────────────────────────────┘ + + + +```python +buildings.schema +``` + + + + + SedonaSchema with 1 field: + geometry: wkb_view <ogc:crs84> + + + + +```python +buildings.count() +``` + + + + + 129735970 + + + + +```python +buildings.to_view("buildings", overwrite=True) +vermont.to_view("vermont", overwrite=True) +``` + + +```python +sd.sql(""" +select count(*) from buildings +join vermont +where ST_Intersects(buildings.geometry, vermont.geometry) +""").show() +``` + + + --------------------------------------------------------------------------- + + SedonaError Traceback (most recent call last) + + Cell In[12], line 5 + 1 sd.sql(""" + 2 select count(*) from buildings + 3 join vermont + 4 where ST_Intersects(buildings.geometry, vermont.geometry) + ----> 5 """).show() + + + File /opt/miniconda3/lib/python3.12/site-packages/sedonadb/dataframe.py:297, in DataFrame.show(self, limit, width, ascii) + 272 """Print the first limit rows to the console + 273 + 274 Args: + (...) + 294 + 295 """ + 296 width = _out_width(width) + --> 297 print(self._impl.show(self._ctx, limit, width, ascii), end="") + + + SedonaError: type_coercion + caused by + Error during planning: Mismatched CRS arguments: ogc:crs84 vs epsg:32618 + Use ST_Transform() or ST_SetSRID() to ensure arguments are compatible. + + + +```python +sd.sql(""" +select count(*) from buildings +join vermont +where ST_Intersects(buildings.geometry, ST_Transform(vermont.geometry, 'EPSG:4326')) +""").show() +``` + + ┌──────────┐ + │ count(*) │ + │ int64 │ + ╞══════════╡ + │ 361856 │ + └──────────┘ diff --git a/mkdocs.yml b/mkdocs.yml index 6a9515a..99fbfe6 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -27,7 +27,7 @@ nav: - Working with Overture: overture-examples.md - Working with Parquet Files: working-with-parquet-files.md - Contributors Guide: contributors-guide.md - + - CRS Examples: crs-examples.md - SedonaDB Reference: - Python: - Python Functions: reference/python.md
