This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 86d08f99 [SEDONA-316] Refactor Jupyter Notebooks to add unified
SedonaContext entrypoint (#883)
86d08f99 is described below
commit 86d08f99e6b04b819e89de3048934c3430752198
Author: Nilesh Gajwani <[email protected]>
AuthorDate: Mon Jul 3 22:13:54 2023 -0700
[SEDONA-316] Refactor Jupyter Notebooks to add unified SedonaContext
entrypoint (#883)
---
binder/ApacheSedonaCore.ipynb | 521 ++++++++++++---------
binder/ApacheSedonaRaster.ipynb | 240 ++++++----
binder/ApacheSedonaSQL.ipynb | 375 ++++++---------
...eSedonaSQL_SpatialJoin_AirportsPerCountry.ipynb | 176 +++----
binder/Pipfile | 2 +-
5 files changed, 642 insertions(+), 672 deletions(-)
diff --git a/binder/ApacheSedonaCore.ipynb b/binder/ApacheSedonaCore.ipynb
index d219ee98..e82d5b22 100644
--- a/binder/ApacheSedonaCore.ipynb
+++ b/binder/ApacheSedonaCore.ipynb
@@ -24,7 +24,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -39,82 +39,71 @@
"from shapely.geometry import Point\n",
"from shapely.geometry import Polygon\n",
"\n",
- "from sedona.register import SedonaRegistrator\n",
- "from sedona.core.SpatialRDD import SpatialRDD\n",
- "from sedona.core.SpatialRDD import PointRDD\n",
- "from sedona.core.SpatialRDD import PolygonRDD\n",
- "from sedona.core.SpatialRDD import LineStringRDD\n",
- "from sedona.core.enums import FileDataSplitter\n",
- "from sedona.utils.adapter import Adapter\n",
- "from sedona.core.spatialOperator import KNNQuery\n",
- "from sedona.core.spatialOperator import JoinQuery\n",
- "from sedona.core.spatialOperator import JoinQueryRaw\n",
- "from sedona.core.spatialOperator import RangeQuery\n",
- "from sedona.core.spatialOperator import RangeQueryRaw\n",
- "from sedona.core.formatMapper.shapefileParser import ShapefileReader\n",
- "from sedona.core.formatMapper import WkbReader\n",
- "from sedona.core.formatMapper import WktReader\n",
- "from sedona.core.formatMapper import GeoJsonReader\n",
- "from sedona.sql.types import GeometryType\n",
- "from sedona.core.enums import GridType\n",
- "from sedona.core.SpatialRDD import RectangleRDD\n",
- "from sedona.core.enums import IndexType\n",
- "from sedona.core.geom.envelope import Envelope\n",
- "from sedona.utils import SedonaKryoRegistrator, KryoSerializer"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "spark = SparkSession.\\\n",
- " builder.\\\n",
- " master(\"local[*]\").\\\n",
- " appName(\"Sedona App\").\\\n",
- " config(\"spark.serializer\", KryoSerializer.getName).\\\n",
- " config(\"spark.kryo.registrator\", SedonaKryoRegistrator.getName)
.\\\n",
- " config(\"spark.jars.packages\",
\"org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.0,org.datasyslab:geotools-wrapper:1.4.0-28.2\")
.\\\n",
- " getOrCreate()"
+ "from sedona.spark import *\n",
+ "from sedona.core.geom.envelope import Envelope\n"
]
},
{
"cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Register function is essential for Apache Sedona Core and Apache Sedona
SQL. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ ":: loading settings :: url =
jar:file:/Users/nileshgajwani/Desktop/spark/spark-3.4.0-bin-hadoop3/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Ivy Default Cache set to: /Users/nileshgajwani/.ivy2/cache\n",
+ "The jars for the packages stored in: /Users/nileshgajwani/.ivy2/jars\n",
+ "org.apache.sedona#sedona-spark-shaded-3.0_2.12 added as a dependency\n",
+ "org.datasyslab#geotools-wrapper added as a dependency\n",
+ ":: resolving dependencies ::
org.apache.spark#spark-submit-parent-7c4309f6-195c-4473-b3ac-629607126e04;1.0\n",
+ "\tconfs: [default]\n",
+ "\tfound org.apache.sedona#sedona-spark-shaded-3.0_2.12;1.4.1 in
central\n",
+ "\tfound org.datasyslab#geotools-wrapper;1.4.0-28.2 in central\n",
+ ":: resolution report :: resolve 121ms :: artifacts dl 2ms\n",
+ "\t:: modules in use:\n",
+ "\torg.apache.sedona#sedona-spark-shaded-3.0_2.12;1.4.1 from central in
[default]\n",
+ "\torg.datasyslab#geotools-wrapper;1.4.0-28.2 from central in
[default]\n",
+
"\t---------------------------------------------------------------------\n",
+ "\t| | modules || artifacts
|\n",
+ "\t| conf | number| search|dwnlded|evicted||
number|dwnlded|\n",
+
"\t---------------------------------------------------------------------\n",
+ "\t| default | 2 | 0 | 0 | 0 || 2 | 0
|\n",
+
"\t---------------------------------------------------------------------\n",
+ ":: retrieving ::
org.apache.spark#spark-submit-parent-7c4309f6-195c-4473-b3ac-629607126e04\n",
+ "\tconfs: [default]\n",
+ "\t0 artifacts copied, 2 already retrieved (0kB/3ms)\n",
+ "23/07/03 20:34:33 WARN NativeCodeLoader: Unable to load native-hadoop
library for your platform... using builtin-java classes where applicable\n",
+ "Setting default log level to \"WARN\".\n",
+ "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use
setLogLevel(newLevel).\n",
+ "23/07/03 20:34:33 WARN Utils: Service 'SparkUI' could not bind on port
4040. Attempting port 4041.\n"
+ ]
}
],
"source": [
- "SedonaRegistrator.registerAll(spark)"
+ "config = SedonaContext.builder() .\\\n",
+ " config('spark.jars.packages',\n",
+ " 'org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.1,'\n",
+ " 'org.datasyslab:geotools-wrapper:1.4.0-28.2'). \\\n",
+ " getOrCreate()\n",
+ "\n",
+ "sedona = SedonaContext.create(config)"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
- "sc = spark.sparkContext"
+ "sc = sedona.sparkContext"
]
},
{
@@ -148,7 +137,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@@ -157,7 +146,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -166,7 +155,7 @@
"3000"
]
},
- "execution_count": 12,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -178,17 +167,33 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 8,
"metadata": {},
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+
"/Users/nileshgajwani/Desktop/sedona/sedona/venv/lib/python3.9/site-packages/sedona/core/geom/envelope.py:27:
ShapelyDeprecationWarning: Setting custom attributes on geometry objects is
deprecated, and will raise an AttributeError in Shapely 2.0\n",
+ " self.minx = minx\n",
+
"/Users/nileshgajwani/Desktop/sedona/sedona/venv/lib/python3.9/site-packages/sedona/core/geom/envelope.py:28:
ShapelyDeprecationWarning: Setting custom attributes on geometry objects is
deprecated, and will raise an AttributeError in Shapely 2.0\n",
+ " self.maxx = maxx\n",
+
"/Users/nileshgajwani/Desktop/sedona/sedona/venv/lib/python3.9/site-packages/sedona/core/geom/envelope.py:29:
ShapelyDeprecationWarning: Setting custom attributes on geometry objects is
deprecated, and will raise an AttributeError in Shapely 2.0\n",
+ " self.miny = miny\n",
+
"/Users/nileshgajwani/Desktop/sedona/sedona/venv/lib/python3.9/site-packages/sedona/core/geom/envelope.py:30:
ShapelyDeprecationWarning: Setting custom attributes on geometry objects is
deprecated, and will raise an AttributeError in Shapely 2.0\n",
+ " self.maxy = maxy\n"
+ ]
+ },
{
"data": {
- "image/svg+xml": "<svg xmlns=\"http://www.w3.org/2000/svg\"
xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\"
viewBox=\"-176.64696132 26.718666680000002 95.20719264000002
48.162659640000015\" preserveAspectRatio=\"xMinYMin meet\"><g
transform=\"matrix(1,0,0,-1,0,101.59999300000001)\"><path fill-rule=\"evenodd\"
fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"1.9041438528000003\"
opacity=\"0.6\" d=\"M -173.120769,30.244859 L -173.120769,71.355134 L -84 [...]
+ "image/svg+xml": [
+ "<svg xmlns=\"http://www.w3.org/2000/svg\"
xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\"
viewBox=\"-176.64696132 26.718666680000002 95.20719264000002
48.162659640000015\" preserveAspectRatio=\"xMinYMin meet\"><g
transform=\"matrix(1,0,0,-1,0,101.59999300000001)\"><path fill-rule=\"evenodd\"
fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"1.9041438528000003\"
opacity=\"0.6\" d=\"M -173.120769,30.244859 L -173.120769,71.355134 L
-84.965961,71.35513 [...]
+ ],
"text/plain": [
"Envelope(-173.120769, -84.965961, 30.244859, 71.355134)"
]
},
- "execution_count": 13,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -201,7 +206,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -210,7 +215,7 @@
"True"
]
},
- "execution_count": 14,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -222,17 +227,19 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
- "image/svg+xml": "<svg xmlns=\"http://www.w3.org/2000/svg\"
xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\"
viewBox=\"-176.64696132 26.718666680000002 95.20719264000002
48.162659640000015\" preserveAspectRatio=\"xMinYMin meet\"><g
transform=\"matrix(1,0,0,-1,0,101.59999300000001)\"><path fill-rule=\"evenodd\"
fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"1.9041438528000003\"
opacity=\"0.6\" d=\"M -173.120769,30.244859 L -173.120769,71.355134 L -84 [...]
+ "image/svg+xml": [
+ "<svg xmlns=\"http://www.w3.org/2000/svg\"
xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\"
viewBox=\"-176.64696132 26.718666680000002 95.20719264000002
48.162659640000015\" preserveAspectRatio=\"xMinYMin meet\"><g
transform=\"matrix(1,0,0,-1,0,101.59999300000001)\"><path fill-rule=\"evenodd\"
fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"1.9041438528000003\"
opacity=\"0.6\" d=\"M -173.120769,30.244859 L -173.120769,71.355134 L
-84.965961,71.35513 [...]
+ ],
"text/plain": [
"Envelope(-173.120769, -84.965961, 30.244859, 71.355134)"
]
},
- "execution_count": 15,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -245,7 +252,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -254,7 +261,7 @@
"2996"
]
},
- "execution_count": 16,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -266,7 +273,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -275,7 +282,7 @@
"'epsg:4326'"
]
},
- "execution_count": 17,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -287,7 +294,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -296,7 +303,7 @@
"'epsg:4326'"
]
},
- "execution_count": 18,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -308,7 +315,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -317,7 +324,7 @@
"True"
]
},
- "execution_count": 19,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -347,16 +354,26 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 15,
"metadata": {},
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\r",
+ "[Stage 8:> (0
+ 1) / 1]\r",
+ "\r",
+ "
\r"
+ ]
+ },
{
"data": {
"text/plain": [
"[Geometry: Point userData:
testattribute0\ttestattribute1\ttestattribute2]"
]
},
- "execution_count": 20,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -368,7 +385,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -381,7 +398,7 @@
" Geometry: Point userData:
testattribute0\ttestattribute1\ttestattribute2]"
]
},
- "execution_count": 21,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -393,7 +410,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
@@ -406,7 +423,7 @@
" 110.97122518072091]"
]
},
- "execution_count": 22,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -439,7 +456,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
@@ -448,7 +465,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
@@ -459,7 +476,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
@@ -538,7 +555,7 @@
"4 POINT (-88.32399 32.95067) testattribute0 testattribute1
testattribute2"
]
},
- "execution_count": 25,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -556,7 +573,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
@@ -565,20 +582,20 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"spatial_df = Adapter.\\\n",
- " toDf(point_rdd, [\"attr1\", \"attr2\", \"attr3\"], spark).\\\n",
+ " toDf(point_rdd, [\"attr1\", \"attr2\", \"attr3\"], sedona).\\\n",
" createOrReplaceTempView(\"spatial_df\")\n",
"\n",
- "spatial_gdf = spark.sql(\"Select attr1, attr2, attr3, geometry as geom
from spatial_df\")"
+ "spatial_gdf = sedona.sql(\"Select attr1, attr2, attr3, geometry as geom
from spatial_df\")"
]
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -605,7 +622,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -684,7 +701,7 @@
"4 testattribute0 testattribute1 testattribute2 POINT (-88.32399
32.95067)"
]
},
- "execution_count": 29,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -702,7 +719,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
@@ -718,16 +735,16 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
- "geo_df = spark.createDataFrame(point_rdd_to_geo, schema,
verifySchema=False)"
+ "geo_df = sedona.createDataFrame(point_rdd_to_geo, schema,
verifySchema=False)"
]
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 27,
"metadata": {},
"outputs": [
{
@@ -806,7 +823,7 @@
"4 POINT (-88.32399 32.95067) testattribute0 testattribute1
testattribute2"
]
},
- "execution_count": 32,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
@@ -836,7 +853,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
@@ -848,7 +865,7 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 29,
"metadata": {},
"outputs": [
{
@@ -857,7 +874,7 @@
"True"
]
},
- "execution_count": 34,
+ "execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
@@ -885,7 +902,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 30,
"metadata": {},
"outputs": [
{
@@ -894,7 +911,7 @@
"True"
]
},
- "execution_count": 35,
+ "execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
@@ -919,7 +936,7 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
@@ -967,7 +984,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
@@ -990,16 +1007,16 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "MapPartitionsRDD[68] at map at FlatPairRddConverter.scala:30"
+ "MapPartitionsRDD[64] at map at FlatPairRddConverter.scala:30"
]
},
- "execution_count": 38,
+ "execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
@@ -1010,7 +1027,7 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 34,
"metadata": {},
"outputs": [
{
@@ -1020,7 +1037,7 @@
" [Geometry: Polygon userData: , Geometry: Point userData: ]]"
]
},
- "execution_count": 39,
+ "execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
@@ -1031,7 +1048,7 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 35,
"metadata": {},
"outputs": [
{
@@ -1042,7 +1059,7 @@
" [Geometry: Polygon userData: , Geometry: Point userData: ]]"
]
},
- "execution_count": 40,
+ "execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
@@ -1053,7 +1070,7 @@
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": 36,
"metadata": {},
"outputs": [
{
@@ -1062,7 +1079,7 @@
"[0.0, 0.0, 0.0, 0.0, 0.0]"
]
},
- "execution_count": 41,
+ "execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
@@ -1074,20 +1091,20 @@
},
{
"cell_type": "code",
- "execution_count": 42,
+ "execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[0.05189354027999942,\n",
+ "[0.026651558685001447,\n",
+ " 0.051572544132000575,\n",
" 0.051572544132000575,\n",
- " 0.05189354027999942,\n",
- " 0.05189354027999942,\n",
- " 0.020876428870001032]"
+ " 0.051572544132000575,\n",
+ " 0.05189354027999942]"
]
},
- "execution_count": 42,
+ "execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
@@ -1099,7 +1116,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
@@ -1108,7 +1125,7 @@
},
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
@@ -1122,7 +1139,7 @@
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": 40,
"metadata": {},
"outputs": [
{
@@ -1132,11 +1149,11 @@
"+--------------------+--------------------+\n",
"| geom_left| geom_right|\n",
"+--------------------+--------------------+\n",
- "|POLYGON ((-87.092...|POINT (-86.94719 ...|\n",
- "|POLYGON ((-87.082...|POINT (-86.865782...|\n",
- "|POLYGON ((-87.092...|POINT (-86.779523...|\n",
- "|POLYGON ((-87.092...|POINT (-86.865782...|\n",
- "|POLYGON ((-86.816...|POINT (-86.779523...|\n",
+ "|POLYGON ((-87.229...|POINT (-87.204299...|\n",
+ "|POLYGON ((-87.082...|POINT (-87.059583...|\n",
+ "|POLYGON ((-87.082...|POINT (-87.075409...|\n",
+ "|POLYGON ((-87.082...|POINT (-87.08084 ...|\n",
+ "|POLYGON ((-87.092...|POINT (-87.08084 ...|\n",
"+--------------------+--------------------+\n",
"only showing top 5 rows\n",
"\n"
@@ -1146,12 +1163,12 @@
"source": [
"# Set verifySchema to False\n",
"spatial_join_result = result.map(lambda x: [x[0].geom, x[1].geom])\n",
- "spark.createDataFrame(spatial_join_result, schema,
verifySchema=False).show(5, True)"
+ "sedona.createDataFrame(spatial_join_result, schema,
verifySchema=False).show(5, True)"
]
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
@@ -1160,7 +1177,7 @@
},
{
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 42,
"metadata": {},
"outputs": [
{
@@ -1175,7 +1192,7 @@
}
],
"source": [
- "spark.createDataFrame(spatial_join_result, schema,
verifySchema=False).printSchema()"
+ "sedona.createDataFrame(spatial_join_result, schema,
verifySchema=False).printSchema()"
]
},
{
@@ -1187,7 +1204,7 @@
},
{
"cell_type": "code",
- "execution_count": 48,
+ "execution_count": 43,
"metadata": {},
"outputs": [
{
@@ -1197,11 +1214,11 @@
"+--------------------+-----+--------------------+-----+\n",
"| geom_1|attr1| geom_2|attr2|\n",
"+--------------------+-----+--------------------+-----+\n",
- "|POLYGON ((-87.092...| |POINT (-86.94719 ...| |\n",
- "|POLYGON ((-87.082...| |POINT (-86.865782...| |\n",
- "|POLYGON ((-87.092...| |POINT (-86.779523...| |\n",
- "|POLYGON ((-87.092...| |POINT (-86.865782...| |\n",
- "|POLYGON ((-86.816...| |POINT (-86.779523...| |\n",
+ "|POLYGON ((-87.229...| |POINT (-87.204299...| |\n",
+ "|POLYGON ((-87.082...| |POINT (-87.059583...| |\n",
+ "|POLYGON ((-87.082...| |POINT (-87.075409...| |\n",
+ "|POLYGON ((-87.082...| |POINT (-87.08084 ...| |\n",
+ "|POLYGON ((-87.092...| |POINT (-87.08084 ...| |\n",
"+--------------------+-----+--------------------+-----+\n",
"only showing top 5 rows\n",
"\n"
@@ -1209,7 +1226,7 @@
}
],
"source": [
- "Adapter.toDf(result, [\"attr1\"], [\"attr2\"], spark).show(5, True)"
+ "Adapter.toDf(result, [\"attr1\"], [\"attr2\"], sedona).show(5, True)"
]
},
{
@@ -1221,7 +1238,7 @@
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": 44,
"metadata": {},
"outputs": [
{
@@ -1238,7 +1255,7 @@
}
],
"source": [
- "Adapter.toDf(result, [\"attr1\"], [\"attr2\"], spark).printSchema()"
+ "Adapter.toDf(result, [\"attr1\"], [\"attr2\"], sedona).printSchema()"
]
},
{
@@ -1258,7 +1275,7 @@
},
{
"cell_type": "code",
- "execution_count": 50,
+ "execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
@@ -1268,7 +1285,7 @@
},
{
"cell_type": "code",
- "execution_count": 51,
+ "execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
@@ -1277,7 +1294,7 @@
},
{
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
@@ -1287,7 +1304,7 @@
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
@@ -1299,7 +1316,7 @@
},
{
"cell_type": "code",
- "execution_count": 54,
+ "execution_count": 49,
"metadata": {},
"outputs": [
{
@@ -1310,22 +1327,22 @@
"| geometry|number_of_points|\n",
"+--------------------+----------------+\n",
"|POLYGON ((-86.749...| 4|\n",
+ "|POLYGON ((-87.229...| 7|\n",
+ "|POLYGON ((-87.114...| 15|\n",
"|POLYGON ((-87.082...| 12|\n",
"|POLYGON ((-86.697...| 1|\n",
+ "|POLYGON ((-86.816...| 6|\n",
+ "|POLYGON ((-87.285...| 26|\n",
"|POLYGON ((-87.105...| 15|\n",
- "|POLYGON ((-87.114...| 15|\n",
- "|POLYGON ((-87.229...| 7|\n",
"|POLYGON ((-86.860...| 12|\n",
- "|POLYGON ((-86.816...| 6|\n",
"|POLYGON ((-87.092...| 5|\n",
- "|POLYGON ((-87.285...| 26|\n",
"+--------------------+----------------+\n",
"\n"
]
}
],
"source": [
- "spark.createDataFrame(number_of_points, schema,
verifySchema=False).show()"
+ "sedona.createDataFrame(number_of_points, schema,
verifySchema=False).show()"
]
},
{
@@ -1351,7 +1368,7 @@
},
{
"cell_type": "code",
- "execution_count": 55,
+ "execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
@@ -1360,7 +1377,7 @@
},
{
"cell_type": "code",
- "execution_count": 56,
+ "execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
@@ -1369,7 +1386,7 @@
},
{
"cell_type": "code",
- "execution_count": 57,
+ "execution_count": 52,
"metadata": {},
"outputs": [
{
@@ -1382,7 +1399,7 @@
" Geometry: Point userData: ]"
]
},
- "execution_count": 57,
+ "execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
@@ -1400,7 +1417,7 @@
},
{
"cell_type": "code",
- "execution_count": 58,
+ "execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
@@ -1414,7 +1431,7 @@
},
{
"cell_type": "code",
- "execution_count": 59,
+ "execution_count": 54,
"metadata": {},
"outputs": [
{
@@ -1427,7 +1444,7 @@
" Geometry: Polygon userData: ]"
]
},
- "execution_count": 59,
+ "execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
@@ -1438,7 +1455,7 @@
},
{
"cell_type": "code",
- "execution_count": 60,
+ "execution_count": 55,
"metadata": {},
"outputs": [
{
@@ -1447,7 +1464,7 @@
"'POLYGON ((-83.993559 34.087259, -83.993559 34.131247, -83.959903
34.131247, -83.959903 34.087259, -83.993559 34.087259))'"
]
},
- "execution_count": 60,
+ "execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
@@ -1477,9 +1494,33 @@
},
{
"cell_type": "code",
- "execution_count": 61,
+ "execution_count": 56,
"metadata": {},
"outputs": [],
+ "source": [
+ "from sedona.core.geom.envelope import Envelope"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+
"/Users/nileshgajwani/Desktop/sedona/sedona/venv/lib/python3.9/site-packages/sedona/core/geom/envelope.py:27:
ShapelyDeprecationWarning: Setting custom attributes on geometry objects is
deprecated, and will raise an AttributeError in Shapely 2.0\n",
+ " self.minx = minx\n",
+
"/Users/nileshgajwani/Desktop/sedona/sedona/venv/lib/python3.9/site-packages/sedona/core/geom/envelope.py:28:
ShapelyDeprecationWarning: Setting custom attributes on geometry objects is
deprecated, and will raise an AttributeError in Shapely 2.0\n",
+ " self.maxx = maxx\n",
+
"/Users/nileshgajwani/Desktop/sedona/sedona/venv/lib/python3.9/site-packages/sedona/core/geom/envelope.py:29:
ShapelyDeprecationWarning: Setting custom attributes on geometry objects is
deprecated, and will raise an AttributeError in Shapely 2.0\n",
+ " self.miny = miny\n",
+
"/Users/nileshgajwani/Desktop/sedona/sedona/venv/lib/python3.9/site-packages/sedona/core/geom/envelope.py:30:
ShapelyDeprecationWarning: Setting custom attributes on geometry objects is
deprecated, and will raise an AttributeError in Shapely 2.0\n",
+ " self.maxy = maxy\n"
+ ]
+ }
+ ],
"source": [
"query_envelope = Envelope(-85.01, -60.01, 34.01, 50.01)\n",
"\n",
@@ -1488,16 +1529,16 @@
},
{
"cell_type": "code",
- "execution_count": 62,
+ "execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "MapPartitionsRDD[131] at map at GeometryRddConverter.scala:30"
+ "MapPartitionsRDD[128] at map at GeometryRddConverter.scala:30"
]
},
- "execution_count": 62,
+ "execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
@@ -1508,9 +1549,16 @@
},
{
"cell_type": "code",
- "execution_count": 63,
+ "execution_count": 59,
"metadata": {},
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "23/07/03 20:34:42 WARN BlockManager: Task 405 already completed, not
releasing lock for rdd_45_0\n"
+ ]
+ },
{
"data": {
"text/plain": [
@@ -1522,7 +1570,7 @@
" Geometry: LineString userData: ]"
]
},
- "execution_count": 63,
+ "execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
@@ -1533,7 +1581,7 @@
},
{
"cell_type": "code",
- "execution_count": 64,
+ "execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
@@ -1542,7 +1590,7 @@
},
{
"cell_type": "code",
- "execution_count": 65,
+ "execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
@@ -1551,7 +1599,7 @@
},
{
"cell_type": "code",
- "execution_count": 66,
+ "execution_count": 62,
"metadata": {},
"outputs": [
{
@@ -1573,7 +1621,7 @@
}
],
"source": [
- "spark.createDataFrame(\n",
+ "sedona.createDataFrame(\n",
" result_range_query.map(lambda x: [x.geom]),\n",
" schema,\n",
" verifySchema=False\n",
@@ -1600,7 +1648,7 @@
},
{
"cell_type": "code",
- "execution_count": 67,
+ "execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
@@ -1609,7 +1657,7 @@
},
{
"cell_type": "code",
- "execution_count": 68,
+ "execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
@@ -1618,16 +1666,16 @@
},
{
"cell_type": "code",
- "execution_count": 69,
+ "execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x1203b87c0>"
+ "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x13b149ca0>"
]
},
- "execution_count": 69,
+ "execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
@@ -1638,7 +1686,7 @@
},
{
"cell_type": "code",
- "execution_count": 70,
+ "execution_count": 66,
"metadata": {},
"outputs": [
{
@@ -1660,12 +1708,12 @@
}
],
"source": [
- "Adapter.toDf(shape_rdd, spark).show(5, True)"
+ "Adapter.toDf(shape_rdd, sedona).show(5, True)"
]
},
{
"cell_type": "code",
- "execution_count": 71,
+ "execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
@@ -1683,7 +1731,7 @@
},
{
"cell_type": "code",
- "execution_count": 72,
+ "execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
@@ -1692,16 +1740,16 @@
},
{
"cell_type": "code",
- "execution_count": 73,
+ "execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x12038c940>"
+ "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x13b1162e0>"
]
},
- "execution_count": 73,
+ "execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
@@ -1712,7 +1760,7 @@
},
{
"cell_type": "code",
- "execution_count": 74,
+ "execution_count": 70,
"metadata": {},
"outputs": [
{
@@ -1734,12 +1782,12 @@
}
],
"source": [
- "Adapter.toDf(geo_json_rdd, spark).drop(\"AWATER\").show(5, True)"
+ "Adapter.toDf(geo_json_rdd, sedona).drop(\"AWATER\").show(5, True)"
]
},
{
"cell_type": "code",
- "execution_count": 75,
+ "execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
@@ -1748,7 +1796,7 @@
},
{
"cell_type": "code",
- "execution_count": 76,
+ "execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
@@ -1757,16 +1805,16 @@
},
{
"cell_type": "code",
- "execution_count": 77,
+ "execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x1203a60a0>"
+ "<sedona.core.SpatialRDD.spatial_rdd.SpatialRDD at 0x13b149820>"
]
},
- "execution_count": 77,
+ "execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
@@ -1777,7 +1825,7 @@
},
{
"cell_type": "code",
- "execution_count": 78,
+ "execution_count": 74,
"metadata": {},
"outputs": [
{
@@ -1791,12 +1839,12 @@
}
],
"source": [
- "Adapter.toDf(wkt_rdd, spark).printSchema()"
+ "Adapter.toDf(wkt_rdd, sedona).printSchema()"
]
},
{
"cell_type": "code",
- "execution_count": 79,
+ "execution_count": 75,
"metadata": {},
"outputs": [
{
@@ -1818,12 +1866,12 @@
}
],
"source": [
- "Adapter.toDf(wkt_rdd, spark).show(5, True)"
+ "Adapter.toDf(wkt_rdd, sedona).show(5, True)"
]
},
{
"cell_type": "code",
- "execution_count": 80,
+ "execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
@@ -1832,7 +1880,7 @@
},
{
"cell_type": "code",
- "execution_count": 81,
+ "execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
@@ -1841,7 +1889,7 @@
},
{
"cell_type": "code",
- "execution_count": 82,
+ "execution_count": 78,
"metadata": {},
"outputs": [
{
@@ -1863,7 +1911,7 @@
}
],
"source": [
- "Adapter.toDf(wkb_rdd, spark).show(5, True)"
+ "Adapter.toDf(wkb_rdd, sedona).show(5, True)"
]
},
{
@@ -1875,7 +1923,7 @@
},
{
"cell_type": "code",
- "execution_count": 83,
+ "execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
@@ -1889,17 +1937,17 @@
},
{
"cell_type": "code",
- "execution_count": 84,
+ "execution_count": 80,
"metadata": {},
"outputs": [],
"source": [
"# without passing column names, the result will contain only two
geometries columns\n",
- "geometry_df = Adapter.toDf(result, spark)"
+ "geometry_df = Adapter.toDf(result, sedona)"
]
},
{
"cell_type": "code",
- "execution_count": 85,
+ "execution_count": 81,
"metadata": {},
"outputs": [
{
@@ -1919,7 +1967,7 @@
},
{
"cell_type": "code",
- "execution_count": 86,
+ "execution_count": 82,
"metadata": {},
"outputs": [
{
@@ -1929,11 +1977,11 @@
"+--------------------+--------------------+\n",
"| leftgeometry| rightgeometry|\n",
"+--------------------+--------------------+\n",
- "|POLYGON ((-87.229...|POINT (-87.105455...|\n",
- "|POLYGON ((-87.229...|POINT (-87.10534 ...|\n",
- "|POLYGON ((-87.229...|POINT (-87.160372...|\n",
- "|POLYGON ((-87.229...|POINT (-87.204033...|\n",
- "|POLYGON ((-87.229...|POINT (-87.204299...|\n",
+ "|POLYGON ((-87.285...|POINT (-87.28468 ...|\n",
+ "|POLYGON ((-87.285...|POINT (-87.215491...|\n",
+ "|POLYGON ((-87.285...|POINT (-87.210001...|\n",
+ "|POLYGON ((-87.285...|POINT (-87.278485...|\n",
+ "|POLYGON ((-87.285...|POINT (-87.280556...|\n",
"+--------------------+--------------------+\n",
"only showing top 5 rows\n",
"\n"
@@ -1946,16 +1994,16 @@
},
{
"cell_type": "code",
- "execution_count": 87,
+ "execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "Row(leftgeometry=<shapely.geometry.polygon.Polygon object at
0x11ffdb100>, rightgeometry=<shapely.geometry.point.Point object at
0x11ffdb1c0>)"
+ "Row(leftgeometry=<shapely.geometry.polygon.Polygon object at
0x13b18d520>, rightgeometry=<shapely.geometry.point.Point object at
0x13b18dca0>)"
]
},
- "execution_count": 87,
+ "execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
@@ -1973,16 +2021,16 @@
},
{
"cell_type": "code",
- "execution_count": 88,
+ "execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
- "geometry_df = Adapter.toDf(result, [\"left_user_data\"],
[\"right_user_data\"], spark)"
+ "geometry_df = Adapter.toDf(result, [\"left_user_data\"],
[\"right_user_data\"], sedona)"
]
},
{
"cell_type": "code",
- "execution_count": 89,
+ "execution_count": 85,
"metadata": {},
"outputs": [
{
@@ -1992,11 +2040,11 @@
"+--------------------+--------------+--------------------+---------------+\n",
"| leftgeometry|left_user_data|
rightgeometry|right_user_data|\n",
"+--------------------+--------------+--------------------+---------------+\n",
- "|POLYGON ((-87.229...| |POINT (-87.105455...|
null|\n",
- "|POLYGON ((-87.229...| |POINT (-87.10534 ...|
null|\n",
- "|POLYGON ((-87.229...| |POINT (-87.160372...|
null|\n",
- "|POLYGON ((-87.229...| |POINT (-87.204033...|
null|\n",
- "|POLYGON ((-87.229...| |POINT (-87.204299...|
null|\n",
+ "|POLYGON ((-87.285...| |POINT (-87.28468 ...|
null|\n",
+ "|POLYGON ((-87.285...| |POINT (-87.215491...|
null|\n",
+ "|POLYGON ((-87.285...| |POINT (-87.210001...|
null|\n",
+ "|POLYGON ((-87.285...| |POINT (-87.278485...|
null|\n",
+ "|POLYGON ((-87.285...| |POINT (-87.280556...|
null|\n",
"+--------------------+--------------+--------------------+---------------+\n",
"only showing top 5 rows\n",
"\n"
@@ -2016,7 +2064,7 @@
},
{
"cell_type": "code",
- "execution_count": 90,
+ "execution_count": 86,
"metadata": {},
"outputs": [],
"source": [
@@ -2027,17 +2075,17 @@
},
{
"cell_type": "code",
- "execution_count": 91,
+ "execution_count": 87,
"metadata": {},
"outputs": [],
"source": [
"# converting to df\n",
- "gdf = Adapter.toDf(result_range_query, spark)"
+ "gdf = Adapter.toDf(result_range_query, sedona)"
]
},
{
"cell_type": "code",
- "execution_count": 92,
+ "execution_count": 88,
"metadata": {},
"outputs": [
{
@@ -2064,7 +2112,7 @@
},
{
"cell_type": "code",
- "execution_count": 93,
+ "execution_count": 89,
"metadata": {},
"outputs": [
{
@@ -2083,18 +2131,18 @@
},
{
"cell_type": "code",
- "execution_count": 94,
+ "execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"# Passing column names\n",
"# converting to df\n",
- "gdf_with_columns = Adapter.toDf(result_range_query, spark, [\"_c1\"])"
+ "gdf_with_columns = Adapter.toDf(result_range_query, sedona, [\"_c1\"])"
]
},
{
"cell_type": "code",
- "execution_count": 95,
+ "execution_count": 91,
"metadata": {},
"outputs": [
{
@@ -2121,7 +2169,7 @@
},
{
"cell_type": "code",
- "execution_count": 96,
+ "execution_count": 92,
"metadata": {},
"outputs": [
{
@@ -2138,13 +2186,20 @@
"source": [
"gdf_with_columns.printSchema()"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "apache-sedona",
"language": "python",
- "name": "python3"
+ "name": "apache-sedona"
},
"language_info": {
"codemirror_mode": {
@@ -2156,7 +2211,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.12"
+ "version": "3.9.6"
}
},
"nbformat": 4,
diff --git a/binder/ApacheSedonaRaster.ipynb b/binder/ApacheSedonaRaster.ipynb
index 525e7d6d..d3c3b8ae 100644
--- a/binder/ApacheSedonaRaster.ipynb
+++ b/binder/ApacheSedonaRaster.ipynb
@@ -34,11 +34,9 @@
"import pandas as pd\n",
"from pyspark.sql.types import StructType, StructField,StringType,
LongType, IntegerType, DoubleType, ArrayType\n",
"from pyspark.sql.functions import regexp_replace\n",
- "from sedona.register import SedonaRegistrator\n",
- "from sedona.utils import SedonaKryoRegistrator, KryoSerializer\n",
"from pyspark.sql.functions import col, split, expr\n",
"from pyspark.sql.functions import udf, lit\n",
- "from sedona.utils import SedonaKryoRegistrator, KryoSerializer\n",
+ "from sedona.spark import *\n",
"from pyspark.sql.functions import col, split, expr\n",
"from pyspark.sql.functions import udf, lit\n",
"import os\n"
@@ -53,21 +51,57 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 2,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ ":: loading settings :: url =
jar:file:/Users/nileshgajwani/Desktop/spark/spark-3.4.0-bin-hadoop3/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Ivy Default Cache set to: /Users/nileshgajwani/.ivy2/cache\n",
+ "The jars for the packages stored in: /Users/nileshgajwani/.ivy2/jars\n",
+ "org.apache.sedona#sedona-spark-shaded-3.0_2.12 added as a dependency\n",
+ "org.datasyslab#geotools-wrapper added as a dependency\n",
+ ":: resolving dependencies ::
org.apache.spark#spark-submit-parent-f6cc1c05-35e7-48b0-8060-745906834ca0;1.0\n",
+ "\tconfs: [default]\n",
+ "\tfound org.apache.sedona#sedona-spark-shaded-3.0_2.12;1.4.1 in
central\n",
+ "\tfound org.datasyslab#geotools-wrapper;1.4.0-28.2 in central\n",
+ ":: resolution report :: resolve 79ms :: artifacts dl 2ms\n",
+ "\t:: modules in use:\n",
+ "\torg.apache.sedona#sedona-spark-shaded-3.0_2.12;1.4.1 from central in
[default]\n",
+ "\torg.datasyslab#geotools-wrapper;1.4.0-28.2 from central in
[default]\n",
+
"\t---------------------------------------------------------------------\n",
+ "\t| | modules || artifacts
|\n",
+ "\t| conf | number| search|dwnlded|evicted||
number|dwnlded|\n",
+
"\t---------------------------------------------------------------------\n",
+ "\t| default | 2 | 0 | 0 | 0 || 2 | 0
|\n",
+
"\t---------------------------------------------------------------------\n",
+ ":: retrieving ::
org.apache.spark#spark-submit-parent-f6cc1c05-35e7-48b0-8060-745906834ca0\n",
+ "\tconfs: [default]\n",
+ "\t0 artifacts copied, 2 already retrieved (0kB/3ms)\n",
+ "23/06/30 14:06:36 WARN NativeCodeLoader: Unable to load native-hadoop
library for your platform... using builtin-java classes where applicable\n",
+ "Setting default log level to \"WARN\".\n",
+ "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use
setLogLevel(newLevel).\n"
+ ]
+ }
+ ],
"source": [
- "spark = SparkSession.\\\n",
- " builder.\\\n",
- " master(\"local[*]\").\\\n",
- " appName(\"Demo-app\").\\\n",
- " config(\"spark.serializer\", KryoSerializer.getName).\\\n",
- " config(\"spark.kryo.registrator\", SedonaKryoRegistrator.getName)
.\\\n",
- " config(\"spark.jars.packages\",
\"org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.0,org.datasyslab:geotools-wrapper:1.4.0-28.2\")
.\\\n",
+ "config = SedonaContext.builder() .\\\n",
+ " config('spark.jars.packages',\n",
+ " 'org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.1,'\n",
+ " 'org.datasyslab:geotools-wrapper:1.4.0-28.2'). \\\n",
" getOrCreate()\n",
"\n",
- "SedonaRegistrator.registerAll(spark)\n",
- "sc = spark.sparkContext\n"
+ "sedona = SedonaContext.create(config)\n",
+ "\n",
+ "sc = sedona.sparkContext\n"
]
},
{
@@ -92,7 +126,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {
"scrolled": true
},
@@ -115,15 +149,23 @@
}
],
"source": [
- "df =
spark.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\",
\"EPSG:4326\").option(\"disableErrorInCRS\", False).load(DATA_DIR)\n",
+ "df =
sedona.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\",
\"EPSG:4326\").option(\"disableErrorInCRS\", False).load(DATA_DIR)\n",
"df.printSchema()"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 6,
"metadata": {},
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\r",
+ "[Stage 3:> (0
+ 1) / 1]\r"
+ ]
+ },
{
"name": "stdout",
"output_type": "stream",
@@ -131,11 +173,19 @@
"+--------------------+--------------------+------+-----+--------------------+-----+\n",
"| origin| Geom|height|width|
data|bands|\n",
"+--------------------+--------------------+------+-----+--------------------+-----+\n",
- "|file:///media/kch...|POLYGON ((-58.702...| 32| 32|[1081.0,
1068.0, ...| 4|\n",
- "|file:///media/kch...|POLYGON ((-58.286...| 32| 32|[1151.0,
1141.0, ...| 4|\n",
+ "|file:/Users/niles...|POLYGON ((-58.702...| 32| 32|[1081.0,
1068.0, ...| 4|\n",
+ "|file:/Users/niles...|POLYGON ((-58.286...| 32| 32|[1151.0,
1141.0, ...| 4|\n",
"+--------------------+--------------------+------+-----+--------------------+-----+\n",
"\n"
]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\r",
+ "
\r"
+ ]
}
],
"source": [
@@ -152,7 +202,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -186,7 +236,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -212,7 +262,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -237,7 +287,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -262,7 +312,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -281,13 +331,13 @@
],
"source": [
"'''RS_GreaterThan() is used to mask all the values with 1 which are
greater than a particular threshold'''\n",
- "greaterthanDF = spark.sql(\"Select RS_GreaterThan(Band1,1000.0) as
greaterthan from allbands\")\n",
+ "greaterthanDF = sedona.sql(\"Select RS_GreaterThan(Band1,1000.0) as
greaterthan from allbands\")\n",
"greaterthanDF.show()"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -307,13 +357,13 @@
"source": [
"'''RS_GreaterThanEqual() is used to mask all the values with 1 which are
greater than a particular threshold'''\n",
"\n",
- "greaterthanEqualDF = spark.sql(\"Select RS_GreaterThanEqual(Band1,360.0)
as greaterthanEqual from allbands\")\n",
+ "greaterthanEqualDF = sedona.sql(\"Select RS_GreaterThanEqual(Band1,360.0)
as greaterthanEqual from allbands\")\n",
"greaterthanEqualDF.show()"
]
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -332,13 +382,13 @@
],
"source": [
"'''RS_LessThan() is used to mask all the values with 1 which are less
than a particular threshold'''\n",
- "lessthanDF = spark.sql(\"Select RS_LessThan(Band1,1000.0) as lessthan
from allbands\")\n",
+ "lessthanDF = sedona.sql(\"Select RS_LessThan(Band1,1000.0) as lessthan
from allbands\")\n",
"lessthanDF.show()"
]
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
@@ -357,13 +407,13 @@
],
"source": [
"'''RS_LessThanEqual() is used to mask all the values with 1 which are
less than equal to a particular threshold'''\n",
- "lessthanEqualDF = spark.sql(\"Select RS_LessThanEqual(Band1,2890.0) as
lessthanequal from allbands\")\n",
+ "lessthanEqualDF = sedona.sql(\"Select RS_LessThanEqual(Band1,2890.0) as
lessthanequal from allbands\")\n",
"lessthanEqualDF.show()"
]
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -388,7 +438,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
@@ -413,7 +463,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
@@ -438,7 +488,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -463,7 +513,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
@@ -488,7 +538,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
@@ -513,7 +563,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -538,7 +588,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -563,7 +613,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -588,7 +638,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
@@ -613,7 +663,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
@@ -638,7 +688,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 27,
"metadata": {},
"outputs": [
{
@@ -675,7 +725,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 29,
"metadata": {},
"outputs": [
{
@@ -695,7 +745,7 @@
"source": [
"'''Plotting images as a dataframe using geotiff Dataframe.'''\n",
"\n",
- "df =
spark.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\",
\"EPSG:4326\").load(DATA_DIR)\n",
+ "df =
sedona.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\",
\"EPSG:4326\").load(DATA_DIR)\n",
"df = df.selectExpr(\"image.origin as
origin\",\"ST_GeomFromWkt(image.geometry) as Geom\", \"image.height as
height\", \"image.width as width\", \"image.data as data\", \"image.nBands as
bands\")\n",
"\n",
"df = df.selectExpr(\"RS_GetBand(data,1,bands) as targetband\",
\"height\", \"width\", \"bands\", \"Geom\")\n",
@@ -706,7 +756,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 30,
"metadata": {},
"outputs": [
{
@@ -726,19 +776,19 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
- " <td>POLYGON ((-58.70271939504448 -34.41877544555479,
-58.70277605822864 -34.42156988068061, -58.6994039180242 -34.42161679331493,
-58.69934736692278 -34.4188223533111, -58.70271939504448
-34.41877544555479))</td>\n",
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFGElEQVR42l2X2W4jRRSG+zVhEu9b2+6O492xE8eJE8eZZLIxZNaISZgJIAHSSGwSFyziDoRAvABPgMQVQkhDneg70q+5+F3d5aqz/Gep6qgVRW/WAhoBpYA6Y5XnWkCRNb2AecCDgIuA44A44LOAPwM+DeiwZxU5hnxAhTGPbJsvB0Td8DNCuG3eFMWTAPs/hwH7rH0d8DTgVcAQPAm4ClgieAXDDRmQR1aeeTM+Mk8GojTBkHWQsNCepwGLgK2AMc87KP084N+A31ifBRnYyPGeYy7rDMyg3wTdBuyhbChGbDCOWechM3YOAt4L+CHgb8Y6jC6g2z1eQbmPZkw0QKCN3yDQvD5BiC38ACVbGLZLOM4ZzYlDDDWvUgy9YH0snq9iQAl5k
[...]
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFHklEQVR42l2X2W4jRRSG/ZowifetbbfttPexY8exsziTTBYzBGaJSMJMAIlBGolN4oJF3IEQiBfgCZC4QgjJVDXf8fyai9/VXa46y3+Wqk4kEs1VItFwqDrkHSqMJZ7LDjnWdBzmDg8dlg4nDoHDS4c/HD51aLFnEzkeGYciYwbZfr7gkGi7nwHC/eZtUTxa/f9/GgP2WfvK4YnDC4c+eOxw5bBA8AaGeyRBBlkZ5r3xsSc9URpiyBYIWeifJw4HDmOHIc+7KP3M4R+HX1mfAknYSPOeZi5lDEyh3wu6c9hDWV+MuM84ZJ2FzLNz6PCOw/cOfzFWYPQAus3jDZTb6I2JvW/AwtcI9F6fIsQv/AAlYwybEY4LRu/EEYZ6r+oYumR9IJ5vY
[...]
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFGElEQVR42l2X2W4jRRSG/ZpA4i3euu1uO+09duw4ceI4k0w2hswaMQlDAIlBGolN4oJF3IEQiBfgCZC4QghpqGq+Y/2ai9/VXa46y3+Wqs5kWpnXmaZD3aHkEDJWea45bLCm6zB3uO9w6XDiEDi8dPjD4VOHNnvWkeNRcKgwFpDt58sOmY77GSLcb94WxWMH/38eAw5Y+8rhicOHDgPw2OHaYYngNQz3yIICsgrMe+NTT/qiNMKQTRCx0D9PHRYOE4cRz7so/czhH4dfWZ8DWdjI855nLmcMzKDfC7pz2EfZQIzYYhyxzkLm2Tl0eNfhe4e/GEMYXUC3ebyGchu9Man3TVj4GoHe61OE+IXvo2SCYXuE44LRO3GEod6rGEMvWR+I5+sYU
[...]
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAGOElEQVR42k3XWUtXURQF8Ps1s3kwLRvNBm3O5kEtK0srmq3AgqAJemigtyIKv0CfIPBJJDjx27BE4Xj/99xz915r7eGc2+3Zs6ft3r27DQwMtC1btrTt27fXta+vr3739/e3zZs315r9+/e306dPt7GxsTY5OdmuXLnStm3b1l6+fNn+/PnT5ubm2r59++qdtWvXlh1j48aNbevWrXU12Dbf29vbuqGhoTY8PFzGvXz06NFlx0eOHGmeb9iwoQCcPXu21r5+/brNzMy0p0+ftkOHDtWYnp5u9+7daxcuXCjDa9asKeDGunXranDOlqt54DtMDh48uOx0586dBWTv3r013Fvo9/Hjx9u5c+fasWPH2uHDh+v3qVOnyumrV6/a0tJS+/XrV
[...]
+ " <td>POLYGON ((-58.70271939504447 -34.418775445554786,
-58.702776058228636 -34.421569880680615, -58.6994039180242 -34.42161679331493,
-58.69934736692278 -34.4188223533111, -58.70271939504447
-34.418775445554786))</td>\n",
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFhklEQVR4Xk3W1wpdVRAG4HlNa+yxx240xh57bzExSmKJRlBBsIEXRvFOCYov4BMIXokIcb496+ecizlr77VnzfzzT1mn7qi6eKjllpbrWm5e68H1fGPLtUvn3pYnW15qeb3lxZabWj5v+bPls5a715krauyQq1tuWCth2/71LXVP/zxQY9zho7Vz/FCL71fVAHhq6X7Z8k7L2ZbDS062vNfybI3hy2uAkyuXcM6W1T7wJZL7a+f0thogdy7xTtHzIy1PtzzccmQ9P17j9IuWf1t+W/oHlnCMDY69W+153hh4rIZ+hj5uOVbjTFQB8eBaOaWXlGHnmZY3Wn5s+XutgsEogOhOxFjhPCswJXoGrd/VGBT1yzVGKJ6ucSJywJ6oScdraxXEc
[...]
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFgElEQVR4Xk3X18psRRAF4HlNc84555zjMR8zZj2CCoIJvDDgnSKKL+ATCF6JCGV9u3rNzEX9vXfv7qq1VlV1z7/b7a6t3e6ativaLmy7fI2XrOdL2y5Ya25qe7Dt6bbn255qu6zt07Y/2z5pu2HtOavGDzuv7eI1Mr7NX9S2u7H/3Fbj3Oa76hD4zprv59YAeHit/bzttbYP2m5d9mrbm22P1Tg+swY4O3uZ4HwZzQO/MbmlDkGvqgFy3TLvFnq+p+2Rtrvb7ljP99cE/azt37bf1vpzlglMDYG9G8153hS4r0Z+jj5se6gmGFYBcfsaBbUuKaPOo20vtP3Q9vcakaEogOQOY6oInhGYjT2Hxm9qHGJ9osaJhW/VBMEcsAdq0vHcGpF4v
[...]
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFfElEQVR4Xk3X16pmRRAF4HpNc84555xzdsyYdQQVBBN4YcA7RRRfwCcQvBIRtL6uXv85DHV6797dVWutqur+p+rq+q+uarus7fy2S/d40X6+uO28veaGtvvbnmx7ru2JtkvaPmn7o+3jtuv2njNq/LBz2i7cI+Pb/AVtdX3/uaXGuc131FHg29t8P7sGwIN77Wdtr7a933bztlfa3mh7pMbx6TXA2ZnbBOfLaB74xeSmOgp6RQ2Qa7Z5t9DzXW0Ptd3Zdtt+vrcm6Kdt/7T9uteftU1gagjs3WjO81Lgnhr5Ofqg7YGaYFgFxK17FNS6pIw6D7c93/Z92197RIaiAJI7jKkieEZgFnsOjV/XOMT6qRonFr5ZEwRzwO6rSceze0Ti0RqgW
[...]
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAGOklEQVR4Xk3XWUtXURQF8Ps1s3kwLRvNBm3O5kEtK0srmq3AgqAJemigtyIKv0CfIPBJJDjx27BE4XjuPffcvddaezj33+3Zs6ft3r27DQwMtC1btrTt27fX3NfXV9f9/f1t8+bNtWf//v3t9OnTbWxsrE1OTrYrV660bdu2tZcvX7Y/f/60ubm5tm/fvnpn7dq1ZcfYuHFj27p1a80G29Z7e3tbNzQ01IaHh8u4l48ePbrs+MiRI83zDRs2FICzZ8/W3tevX7eZmZn29OnTdujQoRrT09Pt3r177cKFC2V4zZo1BdxYt25dDc7ZMlsHvsPk4MGDy0537txZQPbu3VvDvY2ujx8/3s6dO9eOHTvWDh8+XNenTp0qp69evWpLS0vt169ft
[...]
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
- " <td>POLYGON ((-58.28663657626114 -34.75858090620287,
-58.28667994174946 -34.76137571668496, -58.28329340123002 -34.76141146033393,
-58.28325014980316 -34.75861664615162, -58.28663657626114
-34.75858090620287))</td>\n",
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAE+UlEQVR42l2XS4/bVBTH/R0p08nDieOx4zhO7LxfM5lH0xnoY9qhtKgMagEJVd0hIbaw4FuwR2zY8QXKvdHvoL+yOLF977nn/M/7JoiD4FPHUelo4Shx9NBR31HlqO3I8zQcHTtqOooctXgv2D9iP2Yt4sxDqIGsnaO/HL1ylDsKUgCkgMgR3GYtFiFeQR3lkfCMHPXg9We7Yog/EwIgRMczzpx4AP5n4miA1TnUEvKHP3P0AEsNkFc0dnTr6AVgauyF4oUO3976P7De8w49gDkAEhDFAEhBn/B+JACOAeQ9cO7oFMtCeI4B0YLfQNw7+hUA3uAbDUEf97UAMCGWU/bash/KM5LnhLMNqH4QzgGhWiDXey2owdjHpRFCvGc2vO842IGar
[...]
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAE/ElEQVR42l2XyXLcVBSG+x0JjntQS62WWq0epJ4nu2M7HZuExCEJQwVTCVBFUeyootjCgrdgT7FhxwuYe5XvmL+8OC3p3jP8Z7y3a7VafFurdRwVjlaOEkcPHQ0clY7C2w88TUfHjlqOIkdt3ofsH7EfsxYh8xBqouvg6C9HbxzljmopAFJA5CgOWYtFiTfQwHgkPBNHfXi9bE8c8TIBAAJsPEem6wH4n5mjEV7nUFvIC3/k6AGeGiBvaOro2tFLwNTZCyQKHb6993/gvecdewBLACQgigGQgj7h/UgAHAPIR+CRoxM8C+A5BkQbfgNx4+hXAHiHrzQFA8LXBsCMXM7ZC2U/kGckzxmyTahxL50jUrVCr49aFbIGRnoo8kp8ZHa8HxDsQ
[...]
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAE+ElEQVR42l2XyW7bVhSG9Y5NHWugSFGkKGogNU+2YjuK3aSJ3SQdkLpomgBFkF2Bott20bfovuimu75Aei/xHeOHF0ck7z3Df8Z7VavFtU+1jqPC0cpR4uiho4Gj0lHoyPM0HR07ajmKHLV5H7J/xH7MWoTMQ6iJroOjvx29dpQ7qqUASAGRozhkLRYl3kAD45HwTBz14fWyPXHEywQACLDxHJmuB+B/Zo5GeJ1DbSEv/JmjB3hqgLyhqaNrRzeAqbMXSBQ6fHvv/8R7zzv2AJYASEAUAyAFfcL7kQA4BpCPwCNHJ3gWwHMMiDb8BuLW0W8A8A5faQoGhK8NgBm5nLMXyn4gz0ieM2SbUONeOkekaoVeH7UqZA2M9FDklfjI7Hg/INiBW
[...]
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAF7klEQVR42k3XyU5WSxQF4POOKCooSmODqKggNoCCiA2tghqUAJIY44zEMNWBb8HcOGHmC9TNt5PFhaSoU1W7Wbut+ruhoaE2ODjY7ty506amptrIyEg7d+5cu3nzZhsfH2+XL19uaPr7+9v58+fbxYsX25UrV9rAwEB9j42N1Xlvb2+d+7aHBg9Zhm+yFhYW2p8/f9rGxkYbHR1t3dWrVwuAGQibBCO2R2CEUNDX11fKKQjN3bt3240bN4oW77Vr104MwXPp0qUCYKZjeXm5eIaHh1vn3/3799utW7fKagAMSjIw9/T0tDNnzpSlAUTRvXv32urqaltbWyswFy5cqDPK4gUGWrP+9+/fZT3a27dvt+7BgwcFAGJgWAEAAujt+6Y4AOwDx
[...]
+ " <td>POLYGON ((-58.286636576261145 -34.758580906202866,
-58.286679941749476 -34.76137571668496, -58.28329340123003 -34.76141146033393,
-58.28325014980317 -34.75861664615162, -58.286636576261145
-34.758580906202866))</td>\n",
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFX0lEQVR4Xk3WydIVRRAF4HxHmRRFEAWRGZRJRJF5kB8FDIEQNMIw2BlBuIUFb+HeYMOOF/jJr6sO9y7ydndVVp6TY936rGpzT8uRllMtn7dsb/mq5WjLpy10drbsaPm4ZXfLrvl+cO5vm/verdFxhi3ina1LLf+3/NJyoKW+qEHAEwmLDFO2xmCMAPioBjiA6Bxr+XLqOruvVo4480kNAp4wbs0ze1vKz4mWQzW8RoAAiTj8QcuWGp6GEKDjLT+13KlB5sO5ByxR4KBv3r+q4T3dwy31TQ0CGCPDCwQoYG/dO+AQsI6QCFxo+baGZ0Do2EcCefoh8ajl3xoEOHy11lLAe+FzCAGk5PLk3AOWfUB58jJPZ5wFSKRrPZ1ApUqtsStqJWQUg
[...]
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFYElEQVR4Xk3WydJURRAF4H5HxQkUcGBQmVUQRBFnGRwQAyEQiSAId0YYbHHhW7g33LjzBdL8qur034vse29VVp6TY/Vms3m1NpvDLSdb3mt5veX5ljdbTrUcrKmzv+WFlgMth1peWe9vrf3n1r53a3ScYYt4Z+uTlr9bvm853rJ5oyYBTyQsMkzZGoMxAuClmuAAonO65djSdfZI7TnizMs1CXjCuLbOvNYyfs62vF3TawQIkIjDz7Q8W9PTEAJ0puVGy82aZF5ce8ASBQ765v2fNb2ne6Jl825NAhgjwwsEKGBv3TvgELCOkAh82HKppmdA6NhHAnn6IXG35UlNAhz+vHZSwHvhcwgBpOTy3NoDln1AefIyT2ecBUikazedQKVKrbEra
[...]
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFXElEQVR4Xk3WWa9VRRAF4PqPMqOIgODEJDOIA86CzBKBiJoY4psJ8VUf/Be+G1988w9ofd29zj25qbv37q6utWrsU3Wk/qvXWk61XGw51rK35a2W0y2vttA52LKv5eWWwy2H1vvba3/P2vdujY4zbBHvbH3c8lfL/ZY3W+r1mgQ8kbDIMGVrDMYIgAM1wQFE50zLG0vX2eO144gzr9Qk4Anj5jpztGX8e7flnZpeI0CARBx+qWVXTU9DCNDZlq9bbtcks3/tAUsUOOib93/U9J7uyZa6UJMAxsjwAgEK2Fv3DjgErCMkAu+3XKvpGRA69pFAnn5IPGn5tSYBDn9WWyngvfA5hABScnlu7QHLPqA8eZmnM84CJNK1nU6gUqXW2BW1ETKKQ
[...]
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAF8UlEQVR4Xk3XyU5XTRAF8PuOKCooyuCIiAriACiIODAqqEEJKIkx7kgMW134FuyNG3a8QH/5VXL4MGnv/3ZX1Tk19qUbGhpqg4OD7c6dO21qaqqNjIy0c+fOtVu3brXx8fF2+fLlRqa/v7+dP3++Xbx4sV25cqUNDAzU79HR0Trv7e2tc7/tkaHDluU3WwsLC+3v379tY2Oj3bx5s3VXr14tAp5I2GSYsD0GYwRAX19fgQOIzN27d9uNGzdKlu61a9dOHKFz6dKlIuAJY3l5uXSGh4db57/79++327dvl9cIWECyKPf09LQzZ86UpyEE6N69e211dbWtra0VmQsXLtQZsESBg955/+fPn/Ke7NjYWOsePHhQBDBGhhcIEMDevt+AQ8A+Q
[...]
" </tr>\n",
" </tbody>\n",
"</table>"
@@ -764,13 +814,13 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"'''Writing GeoTiff DataFrames as GeoTiff Images'''\n",
"\n",
- "df =
spark.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\",
\"EPSG:4326\").load(DATA_DIR)\n",
+ "df =
sedona.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\",
\"EPSG:4326\").load(DATA_DIR)\n",
"df = df.selectExpr(\"image.origin as
origin\",\"ST_GeomFromWkt(image.geometry) as Geom\", \"image.height as
height\", \"image.width as width\", \"image.data as data\", \"image.nBands as
bands\")\n",
"\n",
"SAVE_PATH = \"./data/raster-written/\"\n",
@@ -779,7 +829,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
@@ -789,7 +839,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
@@ -805,7 +855,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 35,
"metadata": {},
"outputs": [
{
@@ -825,19 +875,19 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
- " <td>POLYGON ((-58.70266723632812 -34.41881942749023,
-58.70266723632812 -34.42157363891602, -58.69945526123047 -34.42157363891602,
-58.69945526123047 -34.41881942749023, -58.70266723632812
-34.41881942749023))</td>\n",
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFGElEQVR42l2X2W4jRRSG+zVhEu9b2+6O492xE8eJE8eZZLIxZNaISZgJIAHSSGwSFyziDoRAvABPgMQVQkhDneg70q+5+F3d5aqz/Gep6qgVRW/WAhoBpYA6Y5XnWkCRNb2AecCDgIuA44A44LOAPwM+DeiwZxU5hnxAhTGPbJsvB0Td8DNCuG3eFMWTAPs/hwH7rH0d8DTgVcAQPAm4ClgieAXDDRmQR1aeeTM+Mk8GojTBkHWQsNCepwGLgK2AMc87KP084N+A31ifBRnYyPGeYy7rDMyg3wTdBuyhbChGbDCOWechM3YOAt4L+CHgb8Y6jC6g2z1eQbmPZkw0QKCN3yDQvD5BiC38ACVbGLZLOM4ZzYlDDDWvUgy9YH0snq9iQAl5k
[...]
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFHklEQVR42l2X2W4jRRSG/ZowifetbbfttPexY8exsziTTBYzBGaJSMJMAIlBGolN4oJF3IEQiBfgCZC4QgjJVDXf8fyai9/VXa46y3+Wqk4kEs1VItFwqDrkHSqMJZ7LDjnWdBzmDg8dlg4nDoHDS4c/HD51aLFnEzkeGYciYwbZfr7gkGi7nwHC/eZtUTxa/f9/GgP2WfvK4YnDC4c+eOxw5bBA8AaGeyRBBlkZ5r3xsSc9URpiyBYIWeifJw4HDmOHIc+7KP3M4R+HX1mfAknYSPOeZi5lDEyh3wu6c9hDWV+MuM84ZJ2FzLNz6PCOw/cOfzFWYPQAus3jDZTb6I2JvW/AwtcI9F6fIsQv/AAlYwybEY4LRu/EEYZ6r+oYumR9IJ5vY
[...]
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFGElEQVR42l2X2W4jRRSG/ZpA4i3euu1uO+09duw4ceI4k0w2hswaMQlDAIlBGolN4oJF3IEQiBfgCZC4QghpqGq+Y/2ai9/VXa46y3+Wqs5kWpnXmaZD3aHkEDJWea45bLCm6zB3uO9w6XDiEDi8dPjD4VOHNnvWkeNRcKgwFpDt58sOmY77GSLcb94WxWMH/38eAw5Y+8rhicOHDgPw2OHaYYngNQz3yIICsgrMe+NTT/qiNMKQTRCx0D9PHRYOE4cRz7so/czhH4dfWZ8DWdjI855nLmcMzKDfC7pz2EfZQIzYYhyxzkLm2Tl0eNfhe4e/GEMYXUC3ebyGchu9Man3TVj4GoHe61OE+IXvo2SCYXuE44LRO3GEod6rGEMvWR+I5+sYU
[...]
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAGOElEQVR42k3XWUtXURQF8Ps1s3kwLRvNBm3O5kEtK0srmq3AgqAJemigtyIKv0CfIPBJJDjx27BE4Xj/99xz915r7eGc2+3Zs6ft3r27DQwMtC1btrTt27fXta+vr3739/e3zZs315r9+/e306dPt7GxsTY5OdmuXLnStm3b1l6+fNn+/PnT5ubm2r59++qdtWvXlh1j48aNbevWrXU12Dbf29vbuqGhoTY8PFzGvXz06NFlx0eOHGmeb9iwoQCcPXu21r5+/brNzMy0p0+ftkOHDtWYnp5u9+7daxcuXCjDa9asKeDGunXranDOlqt54DtMDh48uOx0586dBWTv3r013Fvo9/Hjx9u5c+fasWPH2uHDh+v3qVOnyumrV6/a0tJS+/XrV
[...]
+ " <td>POLYGON ((-58.702667236328125 -34.418819427490234,
-58.702667236328125 -34.421573638916016, -58.69945526123047
-34.421573638916016, -58.69945526123047 -34.418819427490234,
-58.702667236328125 -34.418819427490234))</td>\n",
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFhklEQVR4Xk3W1wpdVRAG4HlNa+yxx240xh57bzExSmKJRlBBsIEXRvFOCYov4BMIXokIcb496+ecizlr77VnzfzzT1mn7qi6eKjllpbrWm5e68H1fGPLtUvn3pYnW15qeb3lxZabWj5v+bPls5a715krauyQq1tuWCth2/71LXVP/zxQY9zho7Vz/FCL71fVAHhq6X7Z8k7L2ZbDS062vNfybI3hy2uAkyuXcM6W1T7wJZL7a+f0thogdy7xTtHzIy1PtzzccmQ9P17j9IuWf1t+W/oHlnCMDY69W+153hh4rIZ+hj5uOVbjTFQB8eBaOaWXlGHnmZY3Wn5s+XutgsEogOhOxFjhPCswJXoGrd/VGBT1yzVGKJ6ucSJywJ6oScdraxXEc
[...]
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFgElEQVR4Xk3X18psRRAF4HlNc84555zjMR8zZj2CCoIJvDDgnSKKL+ATCF6JCGV9u3rNzEX9vXfv7qq1VlV1z7/b7a6t3e6ativaLmy7fI2XrOdL2y5Ya25qe7Dt6bbn255qu6zt07Y/2z5pu2HtOavGDzuv7eI1Mr7NX9S2u7H/3Fbj3Oa76hD4zprv59YAeHit/bzttbYP2m5d9mrbm22P1Tg+swY4O3uZ4HwZzQO/MbmlDkGvqgFy3TLvFnq+p+2Rtrvb7ljP99cE/azt37bf1vpzlglMDYG9G8153hS4r0Z+jj5se6gmGFYBcfsaBbUuKaPOo20vtP3Q9vcakaEogOQOY6oInhGYjT2Hxm9qHGJ9osaJhW/VBMEcsAdq0vHcGpF4v
[...]
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFfElEQVR4Xk3X16pmRRAF4HpNc84555xzdsyYdQQVBBN4YcA7RRRfwCcQvBIRtL6uXv85DHV6797dVWutqur+p+rq+q+uarus7fy2S/d40X6+uO28veaGtvvbnmx7ru2JtkvaPmn7o+3jtuv2njNq/LBz2i7cI+Pb/AVtdX3/uaXGuc131FHg29t8P7sGwIN77Wdtr7a933bztlfa3mh7pMbx6TXA2ZnbBOfLaB74xeSmOgp6RQ2Qa7Z5t9DzXW0Ptd3Zdtt+vrcm6Kdt/7T9uteftU1gagjs3WjO81Lgnhr5Ofqg7YGaYFgFxK17FNS6pIw6D7c93/Z92197RIaiAJI7jKkieEZgFnsOjV/XOMT6qRonFr5ZEwRzwO6rSceze0Ti0RqgW
[...]
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAGOklEQVR4Xk3XWUtXURQF8Ps1s3kwLRvNBm3O5kEtK0srmq3AgqAJemigtyIKv0CfIPBJJDjx27BE4XjuPffcvddaezj33+3Zs6ft3r27DQwMtC1btrTt27fX3NfXV9f9/f1t8+bNtWf//v3t9OnTbWxsrE1OTrYrV660bdu2tZcvX7Y/f/60ubm5tm/fvnpn7dq1ZcfYuHFj27p1a80G29Z7e3tbNzQ01IaHh8u4l48ePbrs+MiRI83zDRs2FICzZ8/W3tevX7eZmZn29OnTdujQoRrT09Pt3r177cKFC2V4zZo1BdxYt25dDc7ZMlsHvsPk4MGDy0537txZQPbu3VvDvY2ujx8/3s6dO9eOHTvWDh8+XNenTp0qp69evWpLS0vt169ft
[...]
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
- " <td>POLYGON ((-58.28658294677734 -34.75862503051758,
-58.28658294677734 -34.76136779785156, -58.28334426879883 -34.76136779785156,
-58.28334426879883 -34.75862503051758, -58.28658294677734
-34.75862503051758))</td>\n",
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAE+UlEQVR42l2XS4/bVBTH/R0p08nDieOx4zhO7LxfM5lH0xnoY9qhtKgMagEJVd0hIbaw4FuwR2zY8QXKvdHvoL+yOLF977nn/M/7JoiD4FPHUelo4Shx9NBR31HlqO3I8zQcHTtqOooctXgv2D9iP2Yt4sxDqIGsnaO/HL1ylDsKUgCkgMgR3GYtFiFeQR3lkfCMHPXg9We7Yog/EwIgRMczzpx4AP5n4miA1TnUEvKHP3P0AEsNkFc0dnTr6AVgauyF4oUO3976P7De8w49gDkAEhDFAEhBn/B+JACOAeQ9cO7oFMtCeI4B0YLfQNw7+hUA3uAbDUEf97UAMCGWU/bash/KM5LnhLMNqH4QzgGhWiDXey2owdjHpRFCvGc2vO842IGar
[...]
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAE/ElEQVR42l2XyXLcVBSG+x0JjntQS62WWq0epJ4nu2M7HZuExCEJQwVTCVBFUeyootjCgrdgT7FhxwuYe5XvmL+8OC3p3jP8Z7y3a7VafFurdRwVjlaOEkcPHQ0clY7C2w88TUfHjlqOIkdt3ofsH7EfsxYh8xBqouvg6C9HbxzljmopAFJA5CgOWYtFiTfQwHgkPBNHfXi9bE8c8TIBAAJsPEem6wH4n5mjEV7nUFvIC3/k6AGeGiBvaOro2tFLwNTZCyQKHb6993/gvecdewBLACQgigGQgj7h/UgAHAPIR+CRoxM8C+A5BkQbfgNx4+hXAHiHrzQFA8LXBsCMXM7ZC2U/kGckzxmyTahxL50jUrVCr49aFbIGRnoo8kp8ZHa8HxDsQ
[...]
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAE+ElEQVR42l2XyW7bVhSG9Y5NHWugSFGkKGogNU+2YjuK3aSJ3SQdkLpomgBFkF2Bott20bfovuimu75Aei/xHeOHF0ck7z3Df8Z7VavFtU+1jqPC0cpR4uiho4Gj0lHoyPM0HR07ajmKHLV5H7J/xH7MWoTMQ6iJroOjvx29dpQ7qqUASAGRozhkLRYl3kAD45HwTBz14fWyPXHEywQACLDxHJmuB+B/Zo5GeJ1DbSEv/JmjB3hqgLyhqaNrRzeAqbMXSBQ6fHvv/8R7zzv2AJYASEAUAyAFfcL7kQA4BpCPwCNHJ3gWwHMMiDb8BuLW0W8A8A5faQoGhK8NgBm5nLMXyn4gz0ieM2SbUONeOkekaoVeH7UqZA2M9FDklfjI7Hg/INiBW
[...]
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAF7klEQVR42k3XyU5WSxQF4POOKCooSmODqKggNoCCiA2tghqUAJIY44zEMNWBb8HcOGHmC9TNt5PFhaSoU1W7Wbut+ruhoaE2ODjY7ty506amptrIyEg7d+5cu3nzZhsfH2+XL19uaPr7+9v58+fbxYsX25UrV9rAwEB9j42N1Xlvb2+d+7aHBg9Zhm+yFhYW2p8/f9rGxkYbHR1t3dWrVwuAGQibBCO2R2CEUNDX11fKKQjN3bt3240bN4oW77Vr104MwXPp0qUCYKZjeXm5eIaHh1vn3/3799utW7fKagAMSjIw9/T0tDNnzpSlAUTRvXv32urqaltbWyswFy5cqDPK4gUGWrP+9+/fZT3a27dvt+7BgwcFAGJgWAEAAujt+6Y4AOwDx
[...]
+ " <td>POLYGON ((-58.286582946777344 -34.75862503051758,
-58.286582946777344 -34.76136779785156, -58.28334426879883 -34.76136779785156,
-58.28334426879883 -34.75862503051758, -58.286582946777344
-34.75862503051758))</td>\n",
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFX0lEQVR4Xk3WydIVRRAF4HxHmRRFEAWRGZRJRJF5kB8FDIEQNMIw2BlBuIUFb+HeYMOOF/jJr6sO9y7ydndVVp6TY936rGpzT8uRllMtn7dsb/mq5WjLpy10drbsaPm4ZXfLrvl+cO5vm/verdFxhi3ina1LLf+3/NJyoKW+qEHAEwmLDFO2xmCMAPioBjiA6Bxr+XLqOruvVo4480kNAp4wbs0ze1vKz4mWQzW8RoAAiTj8QcuWGp6GEKDjLT+13KlB5sO5ByxR4KBv3r+q4T3dwy31TQ0CGCPDCwQoYG/dO+AQsI6QCFxo+baGZ0Do2EcCefoh8ajl3xoEOHy11lLAe+FzCAGk5PLk3AOWfUB58jJPZ5wFSKRrPZ1ApUqtsStqJWQUg
[...]
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFYElEQVR4Xk3WydJURRAF4H5HxQkUcGBQmVUQRBFnGRwQAyEQiSAId0YYbHHhW7g33LjzBdL8qur034vse29VVp6TY/Vms3m1NpvDLSdb3mt5veX5ljdbTrUcrKmzv+WFlgMth1peWe9vrf3n1r53a3ScYYt4Z+uTlr9bvm853rJ5oyYBTyQsMkzZGoMxAuClmuAAonO65djSdfZI7TnizMs1CXjCuLbOvNYyfs62vF3TawQIkIjDz7Q8W9PTEAJ0puVGy82aZF5ce8ASBQ765v2fNb2ne6Jl825NAhgjwwsEKGBv3TvgELCOkAh82HKppmdA6NhHAnn6IXG35UlNAhz+vHZSwHvhcwgBpOTy3NoDln1AefIyT2ecBUikazedQKVKrbEra
[...]
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFXElEQVR4Xk3WWa9VRRAF4PqPMqOIgODEJDOIA86CzBKBiJoY4psJ8VUf/Be+G1988w9ofd29zj25qbv37q6utWrsU3Wk/qvXWk61XGw51rK35a2W0y2vttA52LKv5eWWwy2H1vvba3/P2vdujY4zbBHvbH3c8lfL/ZY3W+r1mgQ8kbDIMGVrDMYIgAM1wQFE50zLG0vX2eO144gzr9Qk4Anj5jpztGX8e7flnZpeI0CARBx+qWVXTU9DCNDZlq9bbtcks3/tAUsUOOib93/U9J7uyZa6UJMAxsjwAgEK2Fv3DjgErCMkAu+3XKvpGRA69pFAnn5IPGn5tSYBDn9WWyngvfA5hABScnlu7QHLPqA8eZmnM84CJNK1nU6gUqXW2BW1ETKKQ
[...]
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAF8UlEQVR4Xk3XyU5XTRAF8PuOKCooyuCIiAriACiIODAqqEEJKIkx7kgMW134FuyNG3a8QH/5VXL4MGnv/3ZX1Tk19qUbGhpqg4OD7c6dO21qaqqNjIy0c+fOtVu3brXx8fF2+fLlRqa/v7+dP3++Xbx4sV25cqUNDAzU79HR0Trv7e2tc7/tkaHDluU3WwsLC+3v379tY2Oj3bx5s3VXr14tAp5I2GSYsD0GYwRAX19fgQOIzN27d9uNGzdKlu61a9dOHKFz6dKlIuAJY3l5uXSGh4db57/79++327dvl9cIWECyKPf09LQzZ86UpyEE6N69e211dbWtra0VmQsXLtQZsESBg955/+fPn/Ke7NjYWOsePHhQBDBGhhcIEMDevt+AQ8A+Q
[...]
" </tr>\n",
" </tbody>\n",
"</table>"
@@ -853,7 +903,7 @@
"source": [
"'''Load and Visualize Written GeoTiff Image.'''\n",
"\n",
- "df =
spark.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\",
\"EPSG:4326\").load(writtenPath)\n",
+ "df =
sedona.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\",
\"EPSG:4326\").load(writtenPath)\n",
"df = df.selectExpr(\"image.origin as
origin\",\"ST_GeomFromWkt(image.geometry) as Geom\", \"image.height as
height\", \"image.width as width\", \"image.data as data\", \"image.nBands as
bands\")\n",
"\n",
"df = df.selectExpr(\"RS_GetBand(data,1,bands) as targetband\",
\"height\", \"width\", \"bands\", \"Geom\")\n",
@@ -871,7 +921,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 37,
"metadata": {},
"outputs": [
{
@@ -881,8 +931,8 @@
"+--------------------+--------------------+------+-----+--------------------+-----+\n",
"| origin| geom|height|width|
data|bands|\n",
"+--------------------+--------------------+------+-----+--------------------+-----+\n",
- "|file:///media/kch...|POLYGON ((-58.702...| 32| 32|[1081.0,
1068.0, ...| 4|\n",
- "|file:///media/kch...|POLYGON ((-58.286...| 32| 32|[1151.0,
1141.0, ...| 4|\n",
+ "|file:/Users/niles...|POLYGON ((-58.702...| 32| 32|[1081.0,
1068.0, ...| 4|\n",
+ "|file:/Users/niles...|POLYGON ((-58.286...| 32| 32|[1151.0,
1141.0, ...| 4|\n",
"+--------------------+--------------------+------+-----+--------------------+-----+\n",
"\n"
]
@@ -890,14 +940,14 @@
],
"source": [
"'''First load GeoTiff Images'''\n",
- "df =
spark.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\",
\"EPSG:4326\").option(\"disableErrorInCRS\", False).load(DATA_DIR)\n",
+ "df =
sedona.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\",
\"EPSG:4326\").option(\"disableErrorInCRS\", False).load(DATA_DIR)\n",
"df = df.selectExpr(\"image.origin as
origin\",\"ST_GeomFromWkt(image.geometry) as geom\", \"image.height as
height\", \"image.width as width\", \"image.data as data\", \"image.nBands as
bands\")\n",
"df.show(5)"
]
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 38,
"metadata": {},
"outputs": [
{
@@ -907,8 +957,8 @@
"+--------------------+--------------------+-----+------+--------------------+-----+--------------------+\n",
"| origin| geom|width|height|
data|bands|normalizedDifference|\n",
"+--------------------+--------------------+-----+------+--------------------+-----+--------------------+\n",
- "|file:///media/kch...|POLYGON ((-58.702...| 32| 32|[1081.0,
1068.0, ...| 4|[0.09, 0.08, 0.11...|\n",
- "|file:///media/kch...|POLYGON ((-58.286...| 32| 32|[1151.0,
1141.0, ...| 4|[0.13, 0.09, 0.09...|\n",
+ "|file:/Users/niles...|POLYGON ((-58.702...| 32| 32|[1081.0,
1068.0, ...| 4|[0.09, 0.08, 0.11...|\n",
+ "|file:/Users/niles...|POLYGON ((-58.286...| 32| 32|[1151.0,
1141.0, ...| 4|[0.13, 0.09, 0.09...|\n",
"+--------------------+--------------------+-----+------+--------------------+-----+--------------------+\n",
"\n"
]
@@ -924,7 +974,7 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 39,
"metadata": {},
"outputs": [
{
@@ -934,8 +984,8 @@
"+--------------------+--------------------+--------------------+------+-----+------------+\n",
"| origin| geom|
data_edited|height|width|nBand_edited|\n",
"+--------------------+--------------------+--------------------+------+-----+------------+\n",
- "|file:///media/kch...|POLYGON ((-58.702...|[1081.0, 1068.0, ...| 32|
32| 5|\n",
- "|file:///media/kch...|POLYGON ((-58.286...|[1151.0, 1141.0, ...| 32|
32| 5|\n",
+ "|file:/Users/niles...|POLYGON ((-58.702...|[1081.0, 1068.0, ...| 32|
32| 5|\n",
+ "|file:/Users/niles...|POLYGON ((-58.286...|[1151.0, 1141.0, ...| 32|
32| 5|\n",
"+--------------------+--------------------+--------------------+------+-----+------------+\n",
"\n"
]
@@ -952,7 +1002,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
@@ -970,7 +1020,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 42,
"metadata": {},
"outputs": [
{
@@ -997,12 +1047,12 @@
" total+=1\n",
" return total\n",
"\n",
- "df =
spark.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\",
\"EPSG:4326\").load(DATA_DIR)\n",
+ "df =
sedona.read.format(\"geotiff\").option(\"dropInvalid\",True).option(\"readToCRS\",
\"EPSG:4326\").load(DATA_DIR)\n",
"df = df.selectExpr(\"image.origin as
origin\",\"ST_GeomFromWkt(image.geometry) as Geom\", \"image.height as
height\", \"image.width as width\", \"image.data as data\", \"image.nBands as
bands\")\n",
"df = df.selectExpr(\"RS_GetBand(data,1,bands) as targetband\",
\"height\", \"width\", \"bands\", \"Geom\")\n",
" \n",
"calculateSum = udf(SumOfValues, DoubleType())\n",
- "spark.udf.register(\"RS_Sum\", calculateSum)\n",
+ "sedona.udf.register(\"RS_Sum\", calculateSum)\n",
"\n",
"sumDF = df.selectExpr(\"RS_Sum(targetband) as sum\")\n",
"sumDF.show()"
@@ -1010,7 +1060,7 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 43,
"metadata": {},
"outputs": [
{
@@ -1027,13 +1077,13 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
- " <td>POLYGON ((-58.70271939504448 -34.41877544555479,
-58.70277605822864 -34.42156988068061, -58.6994039180242 -34.42161679331493,
-58.69934736692278 -34.4188223533111, -58.70271939504448
-34.41877544555479))</td>\n",
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFlklEQVR42l2Xa28bRRSG529CXV/XG6/t9S1rx/c4iZPGaS6N3RBK2oiGSwCJVqpEAYkPBcQ3EALxB/gFSHxCCGk5M/OMM+qH1zO73p1zznvec2ZWtZRSTUFNEAiqjBvMK4ISz3QFc6XSU8FKcCKIBC8Efwq+FGwK5J30viAABUEo7xbABjbKApUIBiwuL6uJZ3jM/3kcOLDPpq8EV4JPBX3wgeBasBDIwmlGUAJZC2M8z6iDirQDEona8ozGONIGMQ/q+bbggSw2FYwEer6L0ZeCfwW/wUoOZGFDG87hQJa5YWAH+mUhdSvYx1jfc2LIOLLPpcJGKu+kwk56KHgs+EHwN6MEk3ZxcIMU6IgzGHejdsZE32T8VnBI1GekRT/4oUCMqKl1L
[...]
+ " <td>POLYGON ((-58.70271939504447 -34.418775445554786,
-58.702776058228636 -34.421569880680615, -58.6994039180242 -34.42161679331493,
-58.69934736692278 -34.4188223533111, -58.70271939504447
-34.418775445554786))</td>\n",
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAGGUlEQVR4Xk3XW6/dUxQF8Pk13WpX695S15aWFi2qqPutiDuVIJEoEg9FvBEhvoBPIPEkItnGb661ds/DOP/7mmOOOeZc+9SRqrozuD04GNw2jzfP81uCA/Od+4LTVdtngxeDZ4Jbg8+DP4PPgnuCfLO9Ljg4sT+4Kd/un7C2GDcGdW/wwFw8H9dDdSXwifl8U4PA4+Pd7ZfBheCj4NjEG8HbwdkgC2+vDQ5M7Bvo4NZylFTIVyWTOlpXgh6uQeSuCddedH4yeCKLPRwcD5w/OoN+Efwb/FZDlesnBKfGJt/mugkg47wVeKSG/FmoPg7OzGDH5hEenMfj471t1Njmm23U2T4ZvBT8EPw9j0lmG0WbYOTuEsg4qnTwddwEnT15Hb8NsmBnf
[...]
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
- " <td>POLYGON ((-58.28663657626114 -34.75858090620287,
-58.28667994174946 -34.76137571668496, -58.28329340123002 -34.76141146033393,
-58.28325014980316 -34.75861664615162, -58.28663657626114
-34.75858090620287))</td>\n",
- " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAFfklEQVR42k2XSXPbRhCF8R/tSOJOQiRBECTBTaRILdZiybJjKbacpGynvKQqlcotVSlfk0P+Re6pXHLLH0C6e74R5vAEgFj69evXPaMojqKoI5gIVoKuYFcwFOSClkCfqQn2BPUoKtqCpkDPM4HcL3YEe5xnPCPvFLugxrcuBX8L7gWpIOpBoAeJ1AWxh3sE3wVKoEpwDSDPFPJMMRUMCK6k+oIugZVUAwINYnwtkHei/Yg/c8GIrFPQDKAvPxI8FuwEWWmgmeBWcAeZCvcagQodd23Z/0n2mtxYCRxAoAuZGAI9Mu5yvhMQ0KweocCp4EgwIehj7u+i1E5JInov+AIBTfg6LMEQ6ZsQUFJSy2jBvVZ53wI1gyz9Ud4pUgIqqpSEUlnQA
[...]
+ " <td>POLYGON ((-58.286636576261145 -34.758580906202866,
-58.286679941749476 -34.76137571668496, -58.28329340123003 -34.76141146033393,
-58.28325014980317 -34.75861664615162, -58.286636576261145
-34.758580906202866))</td>\n",
+ " <td><img
src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAGA0lEQVR4Xk3WSfNeYxAF8P6OhgRJ3BBDiMQYQ5B/BCHmGMpQiCql7FQpWxa+hb2ysfMFrv710/3mv+j33vcOT58+53Q/Nx6KiLMZFzMuZ5zLOJHxRMaljAczPPNAxsmMUxH7lnEmw/mTGXl/vzfjZJ+75pl8Zz/R4X1rXc/4O+NWxvmMeCQWAEcgXDzVD7smOUACgPs7+ZaRz+z5zP50xuOdHKhHM851YqBON4DTneO9jHwnHo7+eTbjQqyqARBnjoWX78q4O0OlU5VEz2R8kPFhLDD39T1Jt5V4P7v+V/V/xqpecU9lxAuxAKAeGBUD4AEVu+48Ex8AqCoBFQOvZ1zJuNhJ7+77QGDK8w0ivs74LRYABb8dvbibqke9igEAKrWM5/oeS
[...]
" </tr>\n",
" </tbody>\n",
"</table>"
@@ -1058,7 +1108,7 @@
" return band\n",
"\n",
"maskValues = udf(generatemask, ArrayType(DoubleType()))\n",
- "spark.udf.register(\"RS_MaskValues\", maskValues)\n",
+ "sedona.udf.register(\"RS_MaskValues\", maskValues)\n",
"\n",
"\n",
"df_base64 = df.selectExpr(\"Geom\",
\"RS_Base64(height,width,RS_Normalize(targetband), RS_Array(height*width,0.0),
RS_Array(height*width, 0.0), RS_MaskValues(targetband,width,height)) as
region\" )\n",
@@ -1076,9 +1126,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "apache-sedona",
"language": "python",
- "name": "python3"
+ "name": "apache-sedona"
},
"language_info": {
"codemirror_mode": {
@@ -1090,7 +1140,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.12"
+ "version": "3.9.6"
}
},
"nbformat": 4,
diff --git a/binder/ApacheSedonaSQL.ipynb b/binder/ApacheSedonaSQL.ipynb
index 5a10271a..a58c264e 100644
--- a/binder/ApacheSedonaSQL.ipynb
+++ b/binder/ApacheSedonaSQL.ipynb
@@ -33,8 +33,7 @@
"import geopandas as gpd\n",
"from pyspark.sql import SparkSession\n",
"\n",
- "from sedona.register import SedonaRegistrator\n",
- "from sedona.utils import SedonaKryoRegistrator, KryoSerializer"
+ "from sedona.spark import *"
]
},
{
@@ -46,110 +45,51 @@
"name": "stdout",
"output_type": "stream",
"text": [
- ":: loading settings :: url =
jar:file:/home/jovyan/spark-3.1.2-bin-hadoop3.2/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml\n"
+ ":: loading settings :: url =
jar:file:/Users/nileshgajwani/Desktop/spark/spark-3.4.0-bin-hadoop3/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
- "Ivy Default Cache set to: /home/jovyan/.ivy2/cache\n",
- "The jars for the packages stored in: /home/jovyan/.ivy2/jars\n",
- "org.apache.sedona#sedona-python-adapter-3.0_2.12 added as a
dependency\n",
+ "Ivy Default Cache set to: /Users/nileshgajwani/.ivy2/cache\n",
+ "The jars for the packages stored in: /Users/nileshgajwani/.ivy2/jars\n",
+ "org.apache.sedona#sedona-spark-shaded-3.0_2.12 added as a dependency\n",
"org.datasyslab#geotools-wrapper added as a dependency\n",
- ":: resolving dependencies ::
org.apache.spark#spark-submit-parent-3625fe59-a0fc-45df-ae07-108b2b675db5;1.0\n",
+ ":: resolving dependencies ::
org.apache.spark#spark-submit-parent-ade932f0-a9e8-47af-b559-0d52a6a087e9;1.0\n",
"\tconfs: [default]\n",
- "\tfound
org.apache.sedona#sedona-python-adapter-3.0_2.12;1.1.0-incubating in central\n",
- "\tfound org.locationtech.jts#jts-core;1.18.0 in central\n",
- "\tfound org.wololo#jts2geojson;0.16.1 in central\n",
- "\tfound com.fasterxml.jackson.core#jackson-databind;2.12.2 in
central\n",
- "\tfound com.fasterxml.jackson.core#jackson-annotations;2.12.2 in
central\n",
- "\tfound com.fasterxml.jackson.core#jackson-core;2.12.2 in central\n",
- "\tfound org.apache.sedona#sedona-core-3.0_2.12;1.1.0-incubating in
central\n",
- "\tfound org.apache.sedona#sedona-sql-3.0_2.12;1.1.0-incubating in
central\n",
- "\tfound org.datasyslab#geotools-wrapper;1.1.0-25.2 in central\n",
- "downloading
https://repo1.maven.org/maven2/org/apache/sedona/sedona-python-adapter-3.0_2.12/1.1.0-incubating/sedona-python-adapter-3.0_2.12-1.1.0-incubating.jar
...\n",
- "\t[SUCCESSFUL ]
org.apache.sedona#sedona-python-adapter-3.0_2.12;1.1.0-incubating!sedona-python-adapter-3.0_2.12.jar
(85ms)\n",
- "downloading
https://repo1.maven.org/maven2/org/datasyslab/geotools-wrapper/1.1.0-25.2/geotools-wrapper-1.1.0-25.2.jar
...\n",
- "\t[SUCCESSFUL ]
org.datasyslab#geotools-wrapper;1.1.0-25.2!geotools-wrapper.jar (687ms)\n",
- "downloading
https://repo1.maven.org/maven2/org/locationtech/jts/jts-core/1.18.0/jts-core-1.18.0.jar
...\n",
- "\t[SUCCESSFUL ]
org.locationtech.jts#jts-core;1.18.0!jts-core.jar(bundle) (31ms)\n",
- "downloading
https://repo1.maven.org/maven2/org/wololo/jts2geojson/0.16.1/jts2geojson-0.16.1.jar
...\n",
- "\t[SUCCESSFUL ] org.wololo#jts2geojson;0.16.1!jts2geojson.jar (10ms)\n",
- "downloading
https://repo1.maven.org/maven2/org/apache/sedona/sedona-core-3.0_2.12/1.1.0-incubating/sedona-core-3.0_2.12-1.1.0-incubating.jar
...\n",
- "\t[SUCCESSFUL ]
org.apache.sedona#sedona-core-3.0_2.12;1.1.0-incubating!sedona-core-3.0_2.12.jar
(15ms)\n",
- "downloading
https://repo1.maven.org/maven2/org/apache/sedona/sedona-sql-3.0_2.12/1.1.0-incubating/sedona-sql-3.0_2.12-1.1.0-incubating.jar
...\n",
- "\t[SUCCESSFUL ]
org.apache.sedona#sedona-sql-3.0_2.12;1.1.0-incubating!sedona-sql-3.0_2.12.jar
(24ms)\n",
- "downloading
https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-databind/2.12.2/jackson-databind-2.12.2.jar
...\n",
- "\t[SUCCESSFUL ]
com.fasterxml.jackson.core#jackson-databind;2.12.2!jackson-databind.jar(bundle)
(43ms)\n",
- "downloading
https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-annotations/2.12.2/jackson-annotations-2.12.2.jar
...\n",
- "\t[SUCCESSFUL ]
com.fasterxml.jackson.core#jackson-annotations;2.12.2!jackson-annotations.jar(bundle)
(11ms)\n",
- "downloading
https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-core/2.12.2/jackson-core-2.12.2.jar
...\n",
- "\t[SUCCESSFUL ]
com.fasterxml.jackson.core#jackson-core;2.12.2!jackson-core.jar(bundle)
(17ms)\n",
- ":: resolution report :: resolve 9110ms :: artifacts dl 931ms\n",
+ "\tfound org.apache.sedona#sedona-spark-shaded-3.0_2.12;1.4.1 in
central\n",
+ "\tfound org.datasyslab#geotools-wrapper;1.4.0-28.2 in central\n",
+ ":: resolution report :: resolve 81ms :: artifacts dl 2ms\n",
"\t:: modules in use:\n",
- "\tcom.fasterxml.jackson.core#jackson-annotations;2.12.2 from central in
[default]\n",
- "\tcom.fasterxml.jackson.core#jackson-core;2.12.2 from central in
[default]\n",
- "\tcom.fasterxml.jackson.core#jackson-databind;2.12.2 from central in
[default]\n",
- "\torg.apache.sedona#sedona-core-3.0_2.12;1.1.0-incubating from central
in [default]\n",
- "\torg.apache.sedona#sedona-python-adapter-3.0_2.12;1.1.0-incubating
from central in [default]\n",
- "\torg.apache.sedona#sedona-sql-3.0_2.12;1.1.0-incubating from central
in [default]\n",
- "\torg.datasyslab#geotools-wrapper;1.1.0-25.2 from central in
[default]\n",
- "\torg.locationtech.jts#jts-core;1.18.0 from central in [default]\n",
- "\torg.wololo#jts2geojson;0.16.1 from central in [default]\n",
- "\t:: evicted modules:\n",
- "\torg.locationtech.jts#jts-core;1.18.1 by
[org.locationtech.jts#jts-core;1.18.0] in [default]\n",
+ "\torg.apache.sedona#sedona-spark-shaded-3.0_2.12;1.4.1 from central in
[default]\n",
+ "\torg.datasyslab#geotools-wrapper;1.4.0-28.2 from central in
[default]\n",
"\t---------------------------------------------------------------------\n",
"\t| | modules || artifacts
|\n",
"\t| conf | number| search|dwnlded|evicted||
number|dwnlded|\n",
"\t---------------------------------------------------------------------\n",
- "\t| default | 10 | 9 | 9 | 1 || 9 | 9
|\n",
+ "\t| default | 2 | 0 | 0 | 0 || 2 | 0
|\n",
"\t---------------------------------------------------------------------\n",
- ":: retrieving ::
org.apache.spark#spark-submit-parent-3625fe59-a0fc-45df-ae07-108b2b675db5\n",
+ ":: retrieving ::
org.apache.spark#spark-submit-parent-ade932f0-a9e8-47af-b559-0d52a6a087e9\n",
"\tconfs: [default]\n",
- "\t9 artifacts copied, 0 already retrieved (35019kB/52ms)\n",
- "22/08/28 04:59:46 WARN NativeCodeLoader: Unable to load native-hadoop
library for your platform... using builtin-java classes where applicable\n",
- "Using Spark's default log4j profile:
org/apache/spark/log4j-defaults.properties\n",
+ "\t0 artifacts copied, 2 already retrieved (0kB/2ms)\n",
+ "23/07/03 21:13:44 WARN NativeCodeLoader: Unable to load native-hadoop
library for your platform... using builtin-java classes where applicable\n",
"Setting default log level to \"WARN\".\n",
- "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use
setLogLevel(newLevel).\n"
- ]
- }
- ],
- "source": [
- "spark = SparkSession. \\\n",
- " builder. \\\n",
- " appName('appName'). \\\n",
- " config(\"spark.serializer\", KryoSerializer.getName). \\\n",
- " config(\"spark.kryo.registrator\", SedonaKryoRegistrator.getName).
\\\n",
- " config(\"spark.jars.packages\",
\"org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.0,org.datasyslab:geotools-wrapper:1.4.0-28.2\")
.\\\n",
- " getOrCreate()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
+ "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use
setLogLevel(newLevel).\n",
+ "23/07/03 21:13:44 WARN Utils: Service 'SparkUI' could not bind on port
4040. Attempting port 4041.\n",
+ "23/07/03 21:13:44 WARN Utils: Service 'SparkUI' could not bind on port
4041. Attempting port 4042.\n",
"
\r"
]
- },
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
}
],
"source": [
- "SedonaRegistrator.registerAll(spark)"
+ "config = SedonaContext.builder() .\\\n",
+ " config('spark.jars.packages',\n",
+ " 'org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.1,'\n",
+ " 'org.datasyslab:geotools-wrapper:1.4.0-28.2'). \\\n",
+ " getOrCreate()\n",
+ "\n",
+ "sedona = SedonaContext.create(config)\n"
]
},
{
@@ -168,7 +108,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -190,14 +130,14 @@
}
],
"source": [
- "point_csv_df = spark.read.format(\"csv\").\\\n",
+ "point_csv_df = sedona.read.format(\"csv\").\\\n",
" option(\"delimiter\", \",\").\\\n",
" option(\"header\", \"false\").\\\n",
" load(\"data/testpoint.csv\")\n",
"\n",
"point_csv_df.createOrReplaceTempView(\"pointtable\")\n",
"\n",
- "point_df = spark.sql(\"select ST_Point(cast(pointtable._c0 as
Decimal(24,20)), cast(pointtable._c1 as Decimal(24,20))) as arealandmark from
pointtable\")\n",
+ "point_df = sedona.sql(\"select ST_Point(cast(pointtable._c0 as
Decimal(24,20)), cast(pointtable._c1 as Decimal(24,20))) as arealandmark from
pointtable\")\n",
"point_df.show(5)"
]
},
@@ -210,7 +150,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -232,13 +172,13 @@
}
],
"source": [
- "polygon_wkt_df = spark.read.format(\"csv\").\\\n",
+ "polygon_wkt_df = sedona.read.format(\"csv\").\\\n",
" option(\"delimiter\", \"\\t\").\\\n",
" option(\"header\", \"false\").\\\n",
" load(\"data/county_small.tsv\")\n",
"\n",
"polygon_wkt_df.createOrReplaceTempView(\"polygontable\")\n",
- "polygon_df = spark.sql(\"select polygontable._c6 as name,
ST_GeomFromText(polygontable._c0) as countyshape from polygontable\")\n",
+ "polygon_df = sedona.sql(\"select polygontable._c6 as name,
ST_GeomFromText(polygontable._c0) as countyshape from polygontable\")\n",
"polygon_df.show(5)"
]
},
@@ -251,7 +191,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -273,13 +213,13 @@
}
],
"source": [
- "polygon_wkb_df = spark.read.format(\"csv\").\\\n",
+ "polygon_wkb_df = sedona.read.format(\"csv\").\\\n",
" option(\"delimiter\", \"\\t\").\\\n",
" option(\"header\", \"false\").\\\n",
" load(\"data/county_small_wkb.tsv\")\n",
"\n",
"polygon_wkb_df.createOrReplaceTempView(\"polygontable\")\n",
- "polygon_df = spark.sql(\"select polygontable._c6 as name,
ST_GeomFromWKB(polygontable._c0) as countyshape from polygontable\")\n",
+ "polygon_df = sedona.sql(\"select polygontable._c6 as name,
ST_GeomFromWKB(polygontable._c0) as countyshape from polygontable\")\n",
"polygon_df.show(5)"
]
},
@@ -292,7 +232,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -314,13 +254,13 @@
}
],
"source": [
- "polygon_json_df = spark.read.format(\"csv\").\\\n",
+ "polygon_json_df = sedona.read.format(\"csv\").\\\n",
" option(\"delimiter\", \"\\t\").\\\n",
" option(\"header\", \"false\").\\\n",
" load(\"data/testPolygon.json\")\n",
"\n",
"polygon_json_df.createOrReplaceTempView(\"polygontable\")\n",
- "polygon_df = spark.sql(\"select ST_GeomFromGeoJSON(polygontable._c0) as
countyshape from polygontable\")\n",
+ "polygon_df = sedona.sql(\"select ST_GeomFromGeoJSON(polygontable._c0) as
countyshape from polygontable\")\n",
"polygon_df.show(5)"
]
},
@@ -340,7 +280,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -348,26 +288,14 @@
"output_type": "stream",
"text": [
"== Physical Plan ==\n",
- "DistanceJoin pointshape1#259: geometry, pointshape2#283: geometry, 2.0,
false\n",
- ":- Project [st_point(cast(_c0#255 as decimal(24,20)), cast(_c1#256 as
decimal(24,20))) AS pointshape1#259, abc AS name1#260]\n",
- ": +- FileScan csv [_c0#255,_c1#256] Batched: false, DataFilters: [],
Format: CSV, Location:
InMemoryFileIndex[file:/home/jovyan/binder/data/testpoint.csv],
PartitionFilters: [], PushedFilters: [], ReadSchema:
struct<_c0:string,_c1:string>\n",
- "+- Project [st_point(cast(_c0#279 as decimal(24,20)), cast(_c1#280 as
decimal(24,20))) AS pointshape2#283, def AS name2#284]\n",
- " +- FileScan csv [_c0#279,_c1#280] Batched: false, DataFilters: [],
Format: CSV, Location:
InMemoryFileIndex[file:/home/jovyan/binder/data/testpoint.csv],
PartitionFilters: [], PushedFilters: [], ReadSchema:
struct<_c0:string,_c1:string>\n",
+ "BroadcastIndexJoin pointshape2#253: geometry, LeftSide, LeftSide,
Inner, INTERSECTS, (
**org.apache.spark.sql.sedona_sql.expressions.ST_Distance** < 2.0)
ST_INTERSECTS(pointshape1#228, pointshape2#253)\n",
+ ":- SpatialIndex pointshape1#228: geometry, QUADTREE, false, 2.0\n",
+ ": +- Project [
**org.apache.spark.sql.sedona_sql.expressions.ST_Point** AS pointshape1#228,
abc AS name1#229]\n",
+ ": +- FileScan csv [_c0#224,_c1#225] Batched: false, DataFilters:
[], Format: CSV, Location: InMemoryFileIndex(1
paths)[file:/Users/nileshgajwani/Desktop/sedona/sedona/binder/data/testpoint....,
PartitionFilters: [], PushedFilters: [], ReadSchema:
struct<_c0:string,_c1:string>\n",
+ "+- Project [ **org.apache.spark.sql.sedona_sql.expressions.ST_Point**
AS pointshape2#253, def AS name2#254]\n",
+ " +- FileScan csv [_c0#249,_c1#250] Batched: false, DataFilters: [],
Format: CSV, Location: InMemoryFileIndex(1
paths)[file:/Users/nileshgajwani/Desktop/sedona/sedona/binder/data/testpoint....,
PartitionFilters: [], PushedFilters: [], ReadSchema:
struct<_c0:string,_c1:string>\n",
+ "\n",
"\n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "22/08/28 05:00:01 WARN JoinQuery: UseIndex is true, but no index
exists. Will build index on the fly.\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
"+-----------------+-----+-----------------+-----+\n",
"| pointshape1|name1| pointshape2|name2|\n",
"+-----------------+-----+-----------------+-----+\n",
@@ -383,24 +311,24 @@
}
],
"source": [
- "point_csv_df_1 = spark.read.format(\"csv\").\\\n",
+ "point_csv_df_1 = sedona.read.format(\"csv\").\\\n",
" option(\"delimiter\", \",\").\\\n",
" option(\"header\", \"false\").load(\"data/testpoint.csv\")\n",
"\n",
"point_csv_df_1.createOrReplaceTempView(\"pointtable\")\n",
"\n",
- "point_df1 = spark.sql(\"SELECT ST_Point(cast(pointtable._c0 as
Decimal(24,20)),cast(pointtable._c1 as Decimal(24,20))) as pointshape1,
\\'abc\\' as name1 from pointtable\")\n",
+ "point_df1 = sedona.sql(\"SELECT ST_Point(cast(pointtable._c0 as
Decimal(24,20)),cast(pointtable._c1 as Decimal(24,20))) as pointshape1,
\\'abc\\' as name1 from pointtable\")\n",
"point_df1.createOrReplaceTempView(\"pointdf1\")\n",
"\n",
- "point_csv_df2 = spark.read.format(\"csv\").\\\n",
+ "point_csv_df2 = sedona.read.format(\"csv\").\\\n",
" option(\"delimiter\", \",\").\\\n",
" option(\"header\", \"false\").load(\"data/testpoint.csv\")\n",
"\n",
"point_csv_df2.createOrReplaceTempView(\"pointtable\")\n",
- "point_df2 = spark.sql(\"select ST_Point(cast(pointtable._c0 as
Decimal(24,20)),cast(pointtable._c1 as Decimal(24,20))) as pointshape2,
\\'def\\' as name2 from pointtable\")\n",
+ "point_df2 = sedona.sql(\"select ST_Point(cast(pointtable._c0 as
Decimal(24,20)),cast(pointtable._c1 as Decimal(24,20))) as pointshape2,
\\'def\\' as name2 from pointtable\")\n",
"point_df2.createOrReplaceTempView(\"pointdf2\")\n",
"\n",
- "distance_join_df = spark.sql(\"select * from pointdf1, pointdf2 where
ST_Distance(pointdf1.pointshape1,pointdf2.pointshape2) < 2\")\n",
+ "distance_join_df = sedona.sql(\"select * from pointdf1, pointdf2 where
ST_Distance(pointdf1.pointshape1,pointdf2.pointshape2) < 2\")\n",
"distance_join_df.explain()\n",
"distance_join_df.show(5)"
]
@@ -423,30 +351,21 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 8,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
-
"/srv/conda/envs/notebook/lib/python3.7/site-packages/geopandas/geodataframe.py:35:
ShapelyDeprecationWarning: The array interface is deprecated and will no
longer work in Shapely 2.0. Convert the '.coords' to a numpy array instead.\n",
- " out = from_shapely(data)\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"import pandas as pd\n",
"gdf = gpd.read_file(\"data/gis_osm_pois_free_1.shp\")\n",
"gdf = gdf.replace(pd.NA, '')\n",
- "osm_points = spark.createDataFrame(\n",
+ "osm_points = sedona.createDataFrame(\n",
" gdf\n",
")"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -469,16 +388,9 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 10,
"metadata": {},
"outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[Stage 17:> (0
+ 1) / 1]\r"
- ]
- },
{
"name": "stdout",
"output_type": "stream",
@@ -488,20 +400,13 @@
"+--------+----+---------+--------------+--------------------+\n",
"|26860257|2422|camp_site| de Kroon|POINT (15.3393145...|\n",
"|26860294|2406| chalet|Leśne Ustronie|POINT (14.8709625...|\n",
- "|29947493|2402| motel| null|POINT (15.0946636...|\n",
- "|29947498|2602| atm| null|POINT (15.0732014...|\n",
- "|29947499|2401| hotel| null|POINT (15.0696777...|\n",
+ "|29947493|2402| motel| |POINT (15.0946636...|\n",
+ "|29947498|2602| atm| |POINT (15.0732014...|\n",
+ "|29947499|2401| hotel| |POINT (15.0696777...|\n",
"+--------+----+---------+--------------+--------------------+\n",
"only showing top 5 rows\n",
"\n"
]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "
\r"
- ]
}
],
"source": [
@@ -510,7 +415,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@@ -519,11 +424,11 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
- "transformed_df = spark.sql(\n",
+ "transformed_df = sedona.sql(\n",
" \"\"\"\n",
" SELECT osm_id,\n",
" code,\n",
@@ -536,16 +441,9 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 13,
"metadata": {},
"outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[Stage 18:> (0
+ 1) / 1]\r"
- ]
- },
{
"name": "stdout",
"output_type": "stream",
@@ -555,20 +453,13 @@
"+--------+----+---------+--------------+--------------------+\n",
"|26860257|2422|camp_site| de Kroon|POINT (-3288183.3...|\n",
"|26860294|2406| chalet|Leśne Ustronie|POINT (-3341183.9...|\n",
- "|29947493|2402| motel| null|POINT (-3320466.5...|\n",
- "|29947498|2602| atm| null|POINT (-3323205.7...|\n",
- "|29947499|2401| hotel| null|POINT (-3323655.1...|\n",
+ "|29947493|2402| motel| |POINT (-3320466.5...|\n",
+ "|29947498|2602| atm| |POINT (-3323205.7...|\n",
+ "|29947499|2401| hotel| |POINT (-3323655.1...|\n",
"+--------+----+---------+--------------+--------------------+\n",
"only showing top 5 rows\n",
"\n"
]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "
\r"
- ]
}
],
"source": [
@@ -577,7 +468,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
@@ -586,11 +477,11 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
- "neighbours_within_1000m = spark.sql(\"\"\"\n",
+ "neighbours_within_1000m = sedona.sql(\"\"\"\n",
" SELECT a.osm_id AS id_1,\n",
" b.osm_id AS id_2,\n",
" a.geom \n",
@@ -601,15 +492,14 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "22/08/28 05:00:09 WARN JoinQuery: UseIndex is true, but no index
exists. Will build index on the fly.\n",
- "[Stage 22:=============================> (1
+ 1) / 2]\r"
+ "23/07/03 21:13:53 WARN JoinQuery: UseIndex is true, but no index
exists. Will build index on the fly.\n"
]
},
{
@@ -619,16 +509,6 @@
"+----------+---------+--------------------+\n",
"| id_1| id_2| geom|\n",
"+----------+---------+--------------------+\n",
- "| 26860294| 26860294|POINT (-3341183.9...|\n",
- "| 29947493| 29947493|POINT (-3320466.5...|\n",
- "|4165181885| 29947498|POINT (-3323204.4...|\n",
- "|5818905324| 29947498|POINT (-3323210.6...|\n",
- "| 29947498| 29947498|POINT (-3323205.7...|\n",
- "| 29947499| 29947499|POINT (-3323655.1...|\n",
- "| 30077461| 29947499|POINT (-3323697.1...|\n",
- "| 29947505| 29947505|POINT (-3330369.2...|\n",
- "| 29947499| 30077461|POINT (-3323655.1...|\n",
- "| 30077461| 30077461|POINT (-3323697.1...|\n",
"| 197624402|197624402|POINT (-3383818.5...|\n",
"| 197663196|197663196|POINT (-3383367.1...|\n",
"| 197953474|197953474|POINT (-3383763.3...|\n",
@@ -638,18 +518,21 @@
"|1074232906|270281140|POINT (-3385408.6...|\n",
"| 270306609|270306609|POINT (-3383982.8...|\n",
"| 270306746|270306746|POINT (-3383898.4...|\n",
- "| 273101780|273101780|POINT (-3389705.7...|\n",
+ "| 280402616|280402616|POINT (-3378817.6...|\n",
+ "| 839725400|280402616|POINT (-3378841.1...|\n",
+ "| 293896571|293896571|POINT (-3385029.0...|\n",
+ "|3256728465|293896571|POINT (-3385002.4...|\n",
+ "| 310838954|310838954|POINT (-3390510.5...|\n",
+ "| 311395303|311395303|POINT (-3389444.4...|\n",
+ "| 311395425|311395425|POINT (-3389867.6...|\n",
+ "|6339786017|311395425|POINT (-3389850.1...|\n",
+ "| 825853330|311395425|POINT (-3389877.4...|\n",
+ "| 945009922|311395425|POINT (-3389878.6...|\n",
+ "| 320100848|320100848|POINT (-3389610.6...|\n",
"+----------+---------+--------------------+\n",
"only showing top 20 rows\n",
"\n"
]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "
\r"
- ]
}
],
"source": [
@@ -665,15 +548,14 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "22/08/28 05:00:15 WARN JoinQuery: UseIndex is true, but no index
exists. Will build index on the fly.\n",
- "
\r"
+ "23/07/03 21:13:54 WARN JoinQuery: UseIndex is true, but no index
exists. Will build index on the fly.\n"
]
}
],
@@ -683,7 +565,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
@@ -692,7 +574,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -724,33 +606,33 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
- " <td>26860294</td>\n",
- " <td>26860294</td>\n",
- " <td>POINT (-3341183.976 4318356.064)</td>\n",
+ " <td>197624402</td>\n",
+ " <td>197624402</td>\n",
+ " <td>POINT (-3383818.580 4179182.169)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
- " <td>29947493</td>\n",
- " <td>29947493</td>\n",
- " <td>POINT (-3320466.547 4265941.760)</td>\n",
+ " <td>197663196</td>\n",
+ " <td>197663196</td>\n",
+ " <td>POINT (-3383367.151 4179427.096)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
- " <td>4165181885</td>\n",
- " <td>29947498</td>\n",
- " <td>POINT (-3323204.491 4266510.379)</td>\n",
+ " <td>197953474</td>\n",
+ " <td>197953474</td>\n",
+ " <td>POINT (-3383763.332 4179408.785)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
- " <td>5818905324</td>\n",
- " <td>29947498</td>\n",
- " <td>POINT (-3323210.654 4266502.772)</td>\n",
+ " <td>262310516</td>\n",
+ " <td>262310516</td>\n",
+ " <td>POINT (-3384257.682 4178033.053)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
- " <td>29947498</td>\n",
- " <td>29947498</td>\n",
- " <td>POINT (-3323205.784 4266548.416)</td>\n",
+ " <td>1074233123</td>\n",
+ " <td>262310516</td>\n",
+ " <td>POINT (-3384262.187 4178036.442)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
@@ -760,27 +642,27 @@
" </tr>\n",
" <tr>\n",
" <th>45314</th>\n",
- " <td>6815618439</td>\n",
- " <td>6815618435</td>\n",
- " <td>POINT (-3285827.820 4250345.966)</td>\n",
+ " <td>6785548354</td>\n",
+ " <td>6785548354</td>\n",
+ " <td>POINT (-3271487.870 4337964.529)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45315</th>\n",
- " <td>6815618435</td>\n",
- " <td>6815618439</td>\n",
- " <td>POINT (-3285831.862 4250347.684)</td>\n",
+ " <td>6785548356</td>\n",
+ " <td>6785548356</td>\n",
+ " <td>POINT (-3273379.389 4338379.126)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45316</th>\n",
- " <td>6815618439</td>\n",
- " <td>6815618439</td>\n",
- " <td>POINT (-3285827.820 4250345.966)</td>\n",
+ " <td>6785548357</td>\n",
+ " <td>6785548357</td>\n",
+ " <td>POINT (-3273745.222 4338528.241)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45317</th>\n",
- " <td>6815883980</td>\n",
- " <td>6815883980</td>\n",
- " <td>POINT (-3286165.443 4249818.008)</td>\n",
+ " <td>6785548358</td>\n",
+ " <td>6785548358</td>\n",
+ " <td>POINT (-3273027.996 4338093.401)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45318</th>\n",
@@ -795,22 +677,22 @@
],
"text/plain": [
" id_1 id_2 geom\n",
- "0 26860294 26860294 POINT (-3341183.976 4318356.064)\n",
- "1 29947493 29947493 POINT (-3320466.547 4265941.760)\n",
- "2 4165181885 29947498 POINT (-3323204.491 4266510.379)\n",
- "3 5818905324 29947498 POINT (-3323210.654 4266502.772)\n",
- "4 29947498 29947498 POINT (-3323205.784 4266548.416)\n",
+ "0 197624402 197624402 POINT (-3383818.580 4179182.169)\n",
+ "1 197663196 197663196 POINT (-3383367.151 4179427.096)\n",
+ "2 197953474 197953474 POINT (-3383763.332 4179408.785)\n",
+ "3 262310516 262310516 POINT (-3384257.682 4178033.053)\n",
+ "4 1074233123 262310516 POINT (-3384262.187 4178036.442)\n",
"... ... ... ...\n",
- "45314 6815618439 6815618435 POINT (-3285827.820 4250345.966)\n",
- "45315 6815618435 6815618439 POINT (-3285831.862 4250347.684)\n",
- "45316 6815618439 6815618439 POINT (-3285827.820 4250345.966)\n",
- "45317 6815883980 6815883980 POINT (-3286165.443 4249818.008)\n",
+ "45314 6785548354 6785548354 POINT (-3271487.870 4337964.529)\n",
+ "45315 6785548356 6785548356 POINT (-3273379.389 4338379.126)\n",
+ "45316 6785548357 6785548357 POINT (-3273745.222 4338528.241)\n",
+ "45317 6785548358 6785548358 POINT (-3273027.996 4338093.401)\n",
"45318 6817416704 6817416704 POINT (-3214549.268 4314872.904)\n",
"\n",
"[45319 rows x 3 columns]"
]
},
- "execution_count": 20,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@@ -818,13 +700,20 @@
"source": [
"gdf"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "apache-sedona",
"language": "python",
- "name": "python3"
+ "name": "apache-sedona"
},
"language_info": {
"codemirror_mode": {
@@ -836,7 +725,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.12"
+ "version": "3.9.6"
}
},
"nbformat": 4,
diff --git a/binder/ApacheSedonaSQL_SpatialJoin_AirportsPerCountry.ipynb
b/binder/ApacheSedonaSQL_SpatialJoin_AirportsPerCountry.ipynb
index 119d0c70..43981263 100644
--- a/binder/ApacheSedonaSQL_SpatialJoin_AirportsPerCountry.ipynb
+++ b/binder/ApacheSedonaSQL_SpatialJoin_AirportsPerCountry.ipynb
@@ -34,13 +34,7 @@
"from pyspark.sql import SparkSession\n",
"from pyspark.sql.functions import col, expr, when\n",
"\n",
- "from sedona.register import SedonaRegistrator\n",
- "from sedona.utils import SedonaKryoRegistrator, KryoSerializer\n",
- "from sedona.core.formatMapper.shapefileParser import ShapefileReader\n",
- "from sedona.utils.adapter import Adapter\n",
- "from sedona.core.enums import GridType\n",
- "from sedona.core.enums import IndexType\n",
- "from sedona.core.spatialOperator import JoinQueryRaw"
+ "from sedona.spark import *"
]
},
{
@@ -55,80 +49,53 @@
"execution_count": 2,
"metadata": {},
"outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "21/10/08 19:58:06 WARN Utils: Your hostname, Jia-MacBook-Pro.local
resolves to a loopback address: 127.0.0.1; using 192.168.0.34 instead (on
interface en0)\n",
- "21/10/08 19:58:06 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to
another address\n"
- ]
- },
{
"name": "stdout",
"output_type": "stream",
"text": [
- ":: loading settings :: url =
jar:file:/Users/jiayu/Downloads/spark-3.1.2-bin-hadoop3.2/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml\n"
+ ":: loading settings :: url =
jar:file:/Users/nileshgajwani/Desktop/spark/spark-3.4.0-bin-hadoop3/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
- "Ivy Default Cache set to: /Users/jiayu/.ivy2/cache\n",
- "The jars for the packages stored in: /Users/jiayu/.ivy2/jars\n",
- "org.apache.sedona#sedona-python-adapter-3.0_2.12 added as a
dependency\n",
+ "Ivy Default Cache set to: /Users/nileshgajwani/.ivy2/cache\n",
+ "The jars for the packages stored in: /Users/nileshgajwani/.ivy2/jars\n",
+ "org.apache.sedona#sedona-spark-shaded-3.0_2.12 added as a dependency\n",
"org.datasyslab#geotools-wrapper added as a dependency\n",
- ":: resolving dependencies ::
org.apache.spark#spark-submit-parent-15133089-5026-4f0c-8975-860f756f148d;1.0\n",
+ ":: resolving dependencies ::
org.apache.spark#spark-submit-parent-c8d6364e-e535-48bc-9161-729cb68c80b1;1.0\n",
"\tconfs: [default]\n",
- "\tfound
org.apache.sedona#sedona-python-adapter-3.0_2.12;1.1.0-incubating in central\n",
- "\tfound org.locationtech.jts#jts-core;1.18.0 in central\n",
- "\tfound org.wololo#jts2geojson;0.16.1 in central\n",
- "\tfound com.fasterxml.jackson.core#jackson-databind;2.12.2 in
central\n",
- "\tfound com.fasterxml.jackson.core#jackson-annotations;2.12.2 in
central\n",
- "\tfound com.fasterxml.jackson.core#jackson-core;2.12.2 in central\n",
- "\tfound org.apache.sedona#sedona-core-3.0_2.12;1.1.0-incubating in
central\n",
- "\tfound org.apache.sedona#sedona-sql-3.0_2.12;1.1.0-incubating in
central\n",
- "\tfound org.datasyslab#geotools-wrapper;1.1.0-25.2-RC1 in central\n",
- ":: resolution report :: resolve 315ms :: artifacts dl 10ms\n",
+ "\tfound org.apache.sedona#sedona-spark-shaded-3.0_2.12;1.4.1 in
central\n",
+ "\tfound org.datasyslab#geotools-wrapper;1.4.0-28.2 in central\n",
+ ":: resolution report :: resolve 87ms :: artifacts dl 2ms\n",
"\t:: modules in use:\n",
- "\tcom.fasterxml.jackson.core#jackson-annotations;2.12.2 from central in
[default]\n",
- "\tcom.fasterxml.jackson.core#jackson-core;2.12.2 from central in
[default]\n",
- "\tcom.fasterxml.jackson.core#jackson-databind;2.12.2 from central in
[default]\n",
- "\torg.apache.sedona#sedona-core-3.0_2.12;1.1.0-incubating from central
in [default]\n",
- "\torg.apache.sedona#sedona-python-adapter-3.0_2.12;1.1.0-incubating
from central in [default]\n",
- "\torg.apache.sedona#sedona-sql-3.0_2.12;1.1.0-incubating from central
in [default]\n",
- "\torg.datasyslab#geotools-wrapper;1.1.0-25.2-RC1 from central in
[default]\n",
- "\torg.locationtech.jts#jts-core;1.18.0 from central in [default]\n",
- "\torg.wololo#jts2geojson;0.16.1 from central in [default]\n",
- "\t:: evicted modules:\n",
- "\torg.locationtech.jts#jts-core;1.18.1 by
[org.locationtech.jts#jts-core;1.18.0] in [default]\n",
+ "\torg.apache.sedona#sedona-spark-shaded-3.0_2.12;1.4.1 from central in
[default]\n",
+ "\torg.datasyslab#geotools-wrapper;1.4.0-28.2 from central in
[default]\n",
"\t---------------------------------------------------------------------\n",
"\t| | modules || artifacts
|\n",
"\t| conf | number| search|dwnlded|evicted||
number|dwnlded|\n",
"\t---------------------------------------------------------------------\n",
- "\t| default | 10 | 0 | 0 | 1 || 9 | 0
|\n",
+ "\t| default | 2 | 0 | 0 | 0 || 2 | 0
|\n",
"\t---------------------------------------------------------------------\n",
- ":: retrieving ::
org.apache.spark#spark-submit-parent-15133089-5026-4f0c-8975-860f756f148d\n",
+ ":: retrieving ::
org.apache.spark#spark-submit-parent-c8d6364e-e535-48bc-9161-729cb68c80b1\n",
"\tconfs: [default]\n",
- "\t0 artifacts copied, 9 already retrieved (0kB/13ms)\n",
- "21/10/08 19:58:06 WARN NativeCodeLoader: Unable to load native-hadoop
library for your platform... using builtin-java classes where applicable\n",
- "Using Spark's default log4j profile:
org/apache/spark/log4j-defaults.properties\n",
+ "\t0 artifacts copied, 2 already retrieved (0kB/5ms)\n",
+ "23/07/03 21:19:15 WARN NativeCodeLoader: Unable to load native-hadoop
library for your platform... using builtin-java classes where applicable\n",
"Setting default log level to \"WARN\".\n",
- "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use
setLogLevel(newLevel).\n",
- "
\r"
+ "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use
setLogLevel(newLevel).\n"
]
}
],
"source": [
- "spark = SparkSession.builder.\\\n",
- " master(\"local[*]\").\\\n",
- " appName(\"SedonaSQL-Example\").\\\n",
- " config(\"spark.serializer\", KryoSerializer.getName).\\\n",
- " config(\"spark.kryo.registrator\", SedonaKryoRegistrator.getName)
.\\\n",
- " config(\"spark.jars.packages\",
\"org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.0,org.datasyslab:geotools-wrapper:1.4.0-28.2\")
.\\\n",
- " getOrCreate()\n",
- "SedonaRegistrator.registerAll(spark)\n",
- "sc = spark.sparkContext\n",
+ "config = SedonaContext.builder() .\\\n",
+ " config('spark.jars.packages',\n",
+ " 'org.apache.sedona:sedona-spark-shaded-3.0_2.12:1.4.1,'\n",
+ " 'org.datasyslab:geotools-wrapper:1.4.0-28.2'). \\\n",
+ " getOrCreate()\n",
+ "\n",
+ "sedona = SedonaContext.create(config)\n",
+ "sc = sedona.sparkContext\n",
"sc.setSystemProperty(\"sedona.global.charset\", \"utf8\")"
]
},
@@ -252,13 +219,13 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "21/10/08 19:58:13 WARN package: Truncated the string representation of
a plan since it was too large. This behavior can be adjusted by setting
'spark.sql.debug.maxToStringFields'.\n"
+ "23/07/03 21:19:18 WARN package: Truncated the string representation of
a plan since it was too large. This behavior can be adjusted by setting
'spark.sql.debug.maxToStringFields'.\n"
]
}
],
"source": [
"countries = ShapefileReader.readToGeometryRDD(sc,
\"data/ne_50m_admin_0_countries_lakes/\")\n",
- "countries_df = Adapter.toDf(countries, spark)\n",
+ "countries_df = Adapter.toDf(countries, sedona)\n",
"countries_df.createOrReplaceTempView(\"country\")\n",
"countries_df.printSchema()"
]
@@ -298,7 +265,7 @@
],
"source": [
"airports = ShapefileReader.readToGeometryRDD(sc,
\"data/ne_50m_airports/\")\n",
- "airports_df = Adapter.toDf(airports, spark)\n",
+ "airports_df = Adapter.toDf(airports, sedona)\n",
"airports_df.createOrReplaceTempView(\"airport\")\n",
"airports_df.printSchema()"
]
@@ -316,7 +283,7 @@
"metadata": {},
"outputs": [],
"source": [
- "result = spark.sql(\"SELECT c.geometry as country_geom, c.NAME_EN,
a.geometry as airport_geom, a.name FROM country c, airport a WHERE
ST_Contains(c.geometry, a.geometry)\")"
+ "result = sedona.sql(\"SELECT c.geometry as country_geom, c.NAME_EN,
a.geometry as airport_geom, a.name FROM country c, airport a WHERE
ST_Contains(c.geometry, a.geometry)\")"
]
},
{
@@ -330,7 +297,15 @@
"cell_type": "code",
"execution_count": 6,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[('3.0', '2.12', '1.4.1')]\n"
+ ]
+ }
+ ],
"source": [
"airports_rdd = Adapter.toSpatialRdd(airports_df, \"geometry\")\n",
"# Drop the duplicate name column in countries_df\n",
@@ -351,11 +326,11 @@
"\n",
"result_pair_rdd = JoinQueryRaw.SpatialJoinQueryFlat(airports_rdd,
countries_rdd, usingIndex, considerBoundaryIntersection)\n",
"\n",
- "result2 = Adapter.toDf(result_pair_rdd, countries_rdd.fieldNames,
airports.fieldNames, spark)\n",
+ "result2 = Adapter.toDf(result_pair_rdd, countries_rdd.fieldNames,
airports.fieldNames, sedona)\n",
"\n",
"result2.createOrReplaceTempView(\"join_result_with_all_cols\")\n",
"# Select the columns needed in the join\n",
- "result2 = spark.sql(\"SELECT leftgeometry as country_geom, NAME_EN,
rightgeometry as airport_geom, name FROM join_result_with_all_cols\")"
+ "result2 = sedona.sql(\"SELECT leftgeometry as country_geom, NAME_EN,
rightgeometry as airport_geom, name FROM join_result_with_all_cols\")"
]
},
{
@@ -374,7 +349,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "21/10/08 19:58:15 WARN JoinQuery: UseIndex is true, but no index
exists. Will build index on the fly.\n"
+ "23/07/03 21:19:20 WARN JoinQuery: UseIndex is true, but no index
exists. Will build index on the fly.\n"
]
},
{
@@ -457,13 +432,6 @@
"scrolled": true
},
"outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "
\r"
- ]
- },
{
"name": "stdout",
"output_type": "stream",
@@ -471,36 +439,46 @@
"+--------------------+--------------------+------------+\n",
"| NAME_EN| country_geom|AirportCount|\n",
"+--------------------+--------------------+------------+\n",
- "|Peru ...|POLYGON ((-69.965...| 1|\n",
+ "|Cuba ...|MULTIPOLYGON (((-...| 1|\n",
+ "|Mexico ...|MULTIPOLYGON (((-...| 12|\n",
+ "|Panama ...|MULTIPOLYGON (((-...| 1|\n",
+ "|Nicaragua ...|POLYGON ((-83.157...| 1|\n",
"|Honduras ...|MULTIPOLYGON (((-...| 1|\n",
- "|Mali ...|POLYGON ((-11.389...| 1|\n",
- "|Bermuda ...|POLYGON ((-64.730...| 1|\n",
- "|Ireland ...|MULTIPOLYGON (((-...| 1|\n",
- "|Democratic Republ...|POLYGON ((30.7511...| 2|\n",
- "|Bangladesh ...|MULTIPOLYGON (((8...| 1|\n",
- "|Serbia ...|POLYGON ((21.3600...| 1|\n",
- "|Croatia ...|MULTIPOLYGON (((1...| 1|\n",
- "|Kenya ...|MULTIPOLYGON (((4...| 2|\n",
- "|Malta ...|MULTIPOLYGON (((1...| 1|\n",
- "|United Arab Emira...|MULTIPOLYGON (((5...| 1|\n",
- "|Brazil ...|MULTIPOLYGON (((-...| 12|\n",
- "|Israel ...|POLYGON ((35.8691...| 1|\n",
- "|Lithuania ...|MULTIPOLYGON (((2...| 1|\n",
- "|Japan ...|MULTIPOLYGON (((1...| 3|\n",
- "|Bulgaria ...|POLYGON ((28.0144...| 1|\n",
- "|Guinea ...|POLYGON ((-10.283...| 1|\n",
+ "|Colombia ...|MULTIPOLYGON (((-...| 4|\n",
+ "|United States of ...|MULTIPOLYGON (((-...| 35|\n",
+ "|Ecuador ...|MULTIPOLYGON (((-...| 1|\n",
"|The Bahamas ...|MULTIPOLYGON (((-...| 1|\n",
- "|Afghanistan ...|POLYGON ((66.5222...| 1|\n",
+ "|Peru ...|POLYGON ((-69.965...| 1|\n",
+ "|Guatemala ...|POLYGON ((-92.235...| 1|\n",
+ "|Canada ...|MULTIPOLYGON (((-...| 15|\n",
+ "|Venezuela ...|MULTIPOLYGON (((-...| 3|\n",
+ "|Argentina ...|MULTIPOLYGON (((-...| 3|\n",
+ "|Bolivia ...|MULTIPOLYGON (((-...| 2|\n",
+ "|Paraguay ...|POLYGON ((-58.159...| 1|\n",
+ "|Benin ...|POLYGON ((1.62265...| 1|\n",
+ "|Guinea ...|POLYGON ((-10.283...| 1|\n",
+ "|Chile ...|MULTIPOLYGON (((-...| 5|\n",
+ "|Nigeria ...|MULTIPOLYGON (((7...| 3|\n",
"+--------------------+--------------------+------------+\n",
"only showing top 20 rows\n",
"\n"
]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\r",
+ "[Stage 20:=================================================> (6
+ 1) / 7]\r",
+ "\r",
+ "
\r"
+ ]
}
],
"source": [
"# result.createOrReplaceTempView(\"result\")\n",
"result2.createOrReplaceTempView(\"result\")\n",
- "groupedresult = spark.sql(\"SELECT c.NAME_EN, c.country_geom, count(*) as
AirportCount FROM result c GROUP BY c.NAME_EN, c.country_geom\")\n",
+ "groupedresult = sedona.sql(\"SELECT c.NAME_EN, c.country_geom, count(*)
as AirportCount FROM result c GROUP BY c.NAME_EN, c.country_geom\")\n",
"groupedresult.show()"
]
},
@@ -519,7 +497,7 @@
{
"data": {
"text/plain": [
- "<AxesSubplot:>"
+ "<Axes: >"
]
},
"execution_count": 9,
@@ -528,14 +506,12 @@
},
{
"data": {
- "image/png":
"iVBORw0KGgoAAAANSUhEUgAAAYwAAACXCAYAAAAGX2ymAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABIAElEQVR4nO29eZgsaVng+3sjIpdasvY6+3769N50033obnYQQUDZBgTUUeZeZhgdvep4x7mg4+ijjpfxGa8Pc8dHBYcrOgyIKIjIsMpio9ALvTe9nO4+fZY++zlVp9ZcIt77x/tFLlWZWVlrZtaJ3/PkU5mRGRFfRkV+7/fuoqokJCQkJCQshdfuASQkJCQkdAeJwEhISEhIaIlEYCQkJCQktEQiMBISEhISWiIRGAkJCQkJLRG0ewDVjI2N6b59+9o9jISEhIQ157777juvquP13rtKRGdXcexT8CVVff0qDtES
[...]
+ "image/png":
"iVBORw0KGgoAAAANSUhEUgAAAkYAAADeCAYAAADYd89rAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAACCHUlEQVR4nO29eZxcZZ3v/3nOUvvS+5Z0ZyErgRASIETBQY2E6KgoziCjDjqMjg7oCOpcGa8s3pmBkRnXCzLOHWHmKqLO76qDC4ooCBJQIwHCEpKQlaQ7Sae7q7uraznnfH9/fJ9TW1dXV3VXdVd3P+/Xq17dVXWW59SpOs/nfFdBRASFQqFQKBQKBbTZHoBCoVAoFApFvaCEkUKhUCgUCoVECSOFQqFQKBQKiRJGCoVCoVAoFBIljBQKhUKhUCgkShgpFAqFQqFQSJQwUigUCoVCoZAoYaRQKBQKhUIhMWZ7ANPF
[...]
"text/plain": [
- "<Figure size 432x288 with 2 Axes>"
+ "<Figure size 640x480 with 2 Axes>"
]
},
- "metadata": {
- "needs_background": "light"
- },
+ "metadata": {},
"output_type": "display_data"
}
],
@@ -561,9 +537,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "apache-sedona",
"language": "python",
- "name": "python3"
+ "name": "apache-sedona"
},
"language_info": {
"codemirror_mode": {
@@ -575,7 +551,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.12"
+ "version": "3.9.6"
}
},
"nbformat": 4,
diff --git a/binder/Pipfile b/binder/Pipfile
index 57843ff9..01844df5 100644
--- a/binder/Pipfile
+++ b/binder/Pipfile
@@ -17,7 +17,7 @@ geopandas="==0.11.1"
pyspark="==3.3.2"
attrs="*"
ipykernel = "*"
-apache-sedona="==1.4.0"
+apache-sedona="==1.4.1"
matplotlib = "*"
descartes = "*"