MrPowers commented on code in PR #81:
URL: https://github.com/apache/sedona-db/pull/81#discussion_r2359444959


##########
docs/crs-examples.ipynb:
##########
@@ -0,0 +1,446 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "91910e50-a5ae-4d5a-a431-62ac5fbc11ca",
+   "metadata": {},
+   "source": [
+    "# CRS Examples\n",
+    "\n",
+    "This example demonstrates how one table with a EPSG 4326 CRS cannot be 
joined with another table that uses EPSG 3857."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "be8ffe47-dc89-4842-bb1e-1e8640afffc3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sedonadb\n",
+    "\n",
+    "sd = sedonadb.connect()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "54b48173-6be0-4827-ac42-1439eb31e9f7",
+   "metadata": {},
+   "source": [
+    "Read a table with a geometry column that uses EPSG 4326.\n",
+    "\n",
+    "Note how SedonaDB reads the CRS specified in the Parquet file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "143f00d5-6878-4dab-a82c-c9fb4dbfaf00",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "countries = sd.read_parquet(\n",
+    "    
\"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet\"\n";,
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8ef94b7b-b65d-4da5-9443-3253e84e2e7f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "SedonaSchema with 3 fields:\n",
+       "  name: Utf8View\n",
+       "  continent: Utf8View\n",
+       "  geometry: wkb_view <epsg:4326>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "countries.schema"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "12d94c4f-5e7f-47c6-b5cc-3a3363bbc290",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cities = sd.sql(\"\"\"\n",
+    "SELECT city, ST_SetSRID(ST_GeomFromText(wkt), 3857) AS geometry FROM 
(VALUES\n",
+    "    ('New York', 'POINT(-8238310.24 4969803.34)'),\n",
+    "    ('Los Angeles', 'POINT(-13153204.78 4037636.04)'),\n",
+    "    ('Chicago', 'POINT(-9757148.04 5138517.44)'))\n",
+    "AS t(city, wkt)\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "36e53438-c2d2-444e-9f34-d391f0f3f588",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "SedonaSchema with 2 fields:\n",
+       "  city: Utf8\n",
+       "  geometry: wkb <epsg:3857>"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cities.schema"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "62c87571-50aa-4f57-a7dd-4afa3210320a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cities.to_view(\"cities\", overwrite=True)\n",
+    "countries.to_view(\"countries\", overwrite=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "561b3c8c-4952-4fa7-9fe1-3fa0522b0d9f",
+   "metadata": {},
+   "source": [
+    "## Join with mismatched CRSs\n",
+    "\n",
+    "The cities and countries tables have different CRSs.\n",
+    "\n",
+    "The cities table uses EPSG:3857 and the countries table uses 
EPSG:4326.\n",
+    "\n",
+    "Let's confirm that the code errors out if we try to join the mismatched 
tables."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "906bad37-4f3f-4028-82b4-487fabe5957f",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "SedonaError",
+     "evalue": "type_coercion\ncaused by\nError during planning: Mismatched 
CRS arguments: epsg:3857 vs epsg:4326\nUse ST_Transform() or ST_SetSRID() to 
ensure arguments are compatible.",
+     "output_type": "error",
+     "traceback": [
+      
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mSedonaError\u001b[0m                               
Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[7], line 6\u001b[0m\n\u001b[1;32m      1\u001b[0m 
\u001b[38;5;66;03m# join doesn't work when CRSs don't 
match\u001b[39;00m\n\u001b[1;32m      2\u001b[0m 
\u001b[43msd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m
      3\u001b[0m \u001b[38;5;124;43mselect * from 
cities\u001b[39;49m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;124;43mjoin 
countries\u001b[39;49m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;124;43mwhere 
ST_Intersects(cities.geometry, 
countries.geometry)\u001b[39;49m\n\u001b[0;32m----> 6\u001b[0m 
\u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshow\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File 
\u001b[0;32m/opt/miniconda3/lib/python3.12/site-packages/sedonadb/dataframe.py:297\u001b[0m,
 in \u001b[0;36mDataFrame.show\u001b[0;34m(self, limit, width, 
ascii)\u001b[0m\n\u001b[1;32m    272\u001b[0m 
\u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Print the first limit rows 
to the console\u001b[39;00m\n\u001b[1;32m    273\u001b[0m \n\u001b[1;32m    
274\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m   
(...)\u001b[0m\n\u001b[1;32m    294\u001b[0m \n\u001b[1;32m    295\u001b[0m 
\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    296\u001b[0m width 
\u001b[38;5;241m=\u001b[39m _out_width(width)\n\u001b[0;32m--> 297\u001b[0m 
\u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_impl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshow\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_ctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m
 \u001b[49m
 \u001b[43mlimit\u001b[49m\u001b[43m,\u001b[49m\u001b[43m 
\u001b[49m\u001b[43mwidth\u001b[49m\u001b[43m,\u001b[49m\u001b[43m 
\u001b[49m\u001b[43mascii\u001b[49m\u001b[43m)\u001b[49m, 
end\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "\u001b[0;31mSedonaError\u001b[0m: type_coercion\ncaused by\nError 
during planning: Mismatched CRS arguments: epsg:3857 vs epsg:4326\nUse 
ST_Transform() or ST_SetSRID() to ensure arguments are compatible."
+     ]
+    }
+   ],
+   "source": [
+    "# join doesn't work when CRSs don't match\n",
+    "sd.sql(\"\"\"\n",
+    "select * from cities\n",
+    "join countries\n",
+    "where ST_Intersects(cities.geometry, countries.geometry)\n",
+    "\"\"\").show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "41e6f59f-5217-40b2-b05a-9c95eae29df8",
+   "metadata": {},
+   "source": [
+    "## Convert CRS and then join\n",
+    "\n",
+    "Let's convert the cities table to use EPSG:4326 and then perform the join 
with the two tables once they have matching CRSs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "122857c1-f68d-4037-9787-54c20706e60f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# update cities to use 4326\n",
+    "cities = sd.sql(\"\"\"\n",
+    "SELECT city, ST_Transform(geometry, 'EPSG:4326') as geometry\n",
+    "FROM cities\n",
+    "\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "947e085c-62a4-4315-b155-007a95156964",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "SedonaSchema with 2 fields:\n",
+       "  city: Utf8\n",
+       "  geometry: wkb <ogc:crs84>"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cities.schema"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "3bcbaf7a-ec40-4b7e-85c2-db5ef1e3232e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cities.to_view(\"cities\", overwrite=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "819c8d04-fa03-4ef8-aecb-e79d48f0b820",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      
"┌─────────────┬──────────────────────┬──────────────────────┬───────────────┬──────────────────────┐\n",
+      "│     city    ┆       geometry       ┆         name         ┆   
continent   ┆       geometry       │\n",
+      "│     utf8    ┆       geometry       ┆       utf8view       ┆    
utf8view   ┆       geometry       │\n",
+      
"╞═════════════╪══════════════════════╪══════════════════════╪═══════════════╪══════════════════════╡\n",
+      "│ New York    ┆ POINT(-74.006000039… ┆ United States of Am… ┆ North 
America ┆ MULTIPOLYGON(((-122… │\n",
+      
"├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+      "│ Los Angeles ┆ POINT(-118.15724889… ┆ United States of Am… ┆ North 
America ┆ MULTIPOLYGON(((-122… │\n",
+      
"├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+      "│ Chicago     ┆ POINT(-87.649952137… ┆ United States of Am… ┆ North 
America ┆ MULTIPOLYGON(((-122… │\n",
+      
"└─────────────┴──────────────────────┴──────────────────────┴───────────────┴──────────────────────┘\n"
+     ]
+    }
+   ],
+   "source": [
+    "# join works when CRSs match\n",
+    "sd.sql(\"\"\"\n",
+    "select * from cities\n",
+    "join countries\n",
+    "where ST_Intersects(cities.geometry, countries.geometry)\n",
+    "\"\"\").show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5279bebd-1d8d-4f33-bcd9-2c1e93ff7221",
+   "metadata": {},
+   "source": [
+    "## Example #2: Joining two tables with different CRSs"

Review Comment:
   Not relevant anymore, but smart observation



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to