This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 75e8b557 Create VectorAnalisisApacheSedona (#646)
75e8b557 is described below

commit 75e8b557ad4288d1ca802dd200dbe11276dfb32d
Author: Ana Caroline Ferreira <[email protected]>
AuthorDate: Tue Jul 12 21:37:17 2022 -0300

    Create VectorAnalisisApacheSedona (#646)
    
    More complex example using apache sedona functions
---
 binder/VectorAnalisisApacheSedona | 1139 +++++++++++++++++++++++++++++++++++++
 1 file changed, 1139 insertions(+)

diff --git a/binder/VectorAnalisisApacheSedona 
b/binder/VectorAnalisisApacheSedona
new file mode 100644
index 00000000..f1606590
--- /dev/null
+++ b/binder/VectorAnalisisApacheSedona
@@ -0,0 +1,1139 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "5b4f4a55-9ac4-4591-a925-ba032344662a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.display import display, HTML\n",
+    "from pyspark.sql import SparkSession\n",
+    "from pyspark import StorageLevel\n",
+    "import pandas as pd\n",
+    "from pyspark.sql.types import StructType, StructField,StringType, 
LongType, IntegerType, DoubleType, ArrayType\n",
+    "from pyspark.sql.functions import regexp_replace\n",
+    "from sedona.register import SedonaRegistrator\n",
+    "from sedona.utils import SedonaKryoRegistrator, KryoSerializer\n",
+    "from pyspark.sql.functions import col, split, expr\n",
+    "from pyspark.sql.functions import udf, lit\n",
+    "from sedona.utils import SedonaKryoRegistrator, KryoSerializer\n",
+    "from pyspark.sql.functions import col, split, expr\n",
+    "from pyspark.sql.functions import udf, lit, flatten\n",
+    "from pywebhdfs.webhdfs import PyWebHdfsClient\n",
+    "from datetime import date\n",
+    "from pyspark.sql.functions import monotonically_increasing_id \n",
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "caf280aa-c891-4729-900f-824141ab7c03",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Warning: Ignoring non-Spark config property:  
spark.sql.adaptive.coalescePartitions.enabled\n",
+      "Ivy Default Cache set to: /root/.ivy2/cache\n",
+      "The jars for the packages stored in: /root/.ivy2/jars\n",
+      ":: loading settings :: url = 
jar:file:/usr/local/lib/python3.9/dist-packages/pyspark/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml\n",
+      "org.apache.sedona#sedona-python-adapter-3.0_2.12 added as a 
dependency\n",
+      "org.datasyslab#geotools-wrapper added as a dependency\n",
+      ":: resolving dependencies :: 
org.apache.spark#spark-submit-parent-c20579c1-d499-4129-ae06-0876b2e0525a;1.0\n",
+      "\tconfs: [default]\n",
+      "\tfound 
org.apache.sedona#sedona-python-adapter-3.0_2.12;1.1.0-incubating in central\n",
+      "\tfound org.locationtech.jts#jts-core;1.18.0 in central\n",
+      "\tfound org.wololo#jts2geojson;0.16.1 in central\n",
+      "\tfound com.fasterxml.jackson.core#jackson-databind;2.12.2 in 
central\n",
+      "\tfound com.fasterxml.jackson.core#jackson-annotations;2.12.2 in 
central\n",
+      "\tfound com.fasterxml.jackson.core#jackson-core;2.12.2 in central\n",
+      "\tfound org.apache.sedona#sedona-core-3.0_2.12;1.1.0-incubating in 
central\n",
+      "\tfound org.apache.sedona#sedona-sql-3.0_2.12;1.1.0-incubating in 
central\n",
+      "\tfound org.datasyslab#geotools-wrapper;1.1.0-25.2 in central\n",
+      ":: resolution report :: resolve 227ms :: artifacts dl 4ms\n",
+      "\t:: modules in use:\n",
+      "\tcom.fasterxml.jackson.core#jackson-annotations;2.12.2 from central in 
[default]\n",
+      "\tcom.fasterxml.jackson.core#jackson-core;2.12.2 from central in 
[default]\n",
+      "\tcom.fasterxml.jackson.core#jackson-databind;2.12.2 from central in 
[default]\n",
+      "\torg.apache.sedona#sedona-core-3.0_2.12;1.1.0-incubating from central 
in [default]\n",
+      "\torg.apache.sedona#sedona-python-adapter-3.0_2.12;1.1.0-incubating 
from central in [default]\n",
+      "\torg.apache.sedona#sedona-sql-3.0_2.12;1.1.0-incubating from central 
in [default]\n",
+      "\torg.datasyslab#geotools-wrapper;1.1.0-25.2 from central in 
[default]\n",
+      "\torg.locationtech.jts#jts-core;1.18.0 from central in [default]\n",
+      "\torg.wololo#jts2geojson;0.16.1 from central in [default]\n",
+      "\t:: evicted modules:\n",
+      "\torg.locationtech.jts#jts-core;1.18.1 by 
[org.locationtech.jts#jts-core;1.18.0] in [default]\n",
+      
"\t---------------------------------------------------------------------\n",
+      "\t|                  |            modules            ||   artifacts   
|\n",
+      "\t|       conf       | number| search|dwnlded|evicted|| 
number|dwnlded|\n",
+      
"\t---------------------------------------------------------------------\n",
+      "\t|      default     |   10  |   0   |   0   |   1   ||   9   |   0   
|\n",
+      
"\t---------------------------------------------------------------------\n",
+      ":: retrieving :: 
org.apache.spark#spark-submit-parent-c20579c1-d499-4129-ae06-0876b2e0525a\n",
+      "\tconfs: [default]\n",
+      "\t0 artifacts copied, 9 already retrieved (0kB/5ms)\n",
+      "22/02/01 12:43:55 WARN NativeCodeLoader: Unable to load native-hadoop 
library for your platform... using builtin-java classes where applicable\n",
+      "Using Spark's default log4j profile: 
org/apache/spark/log4j-defaults.properties\n",
+      "Setting default log level to \"WARN\".\n",
+      "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use 
setLogLevel(newLevel).\n",
+      "22/02/01 12:43:56 WARN SQLConf: The SQL config 
'spark.sql.execution.arrow.fallback.enabled' has been deprecated in Spark v3.0 
and may be removed in the future. Use 
'spark.sql.execution.arrow.pyspark.fallback.enabled' instead of it.\n",
+      "22/02/01 12:43:57 WARN SQLConf: The SQL config 
'spark.sql.execution.arrow.fallback.enabled' has been deprecated in Spark v3.0 
and may be removed in the future. Use 
'spark.sql.execution.arrow.pyspark.fallback.enabled' instead of it.\n",
+      "                                                                        
        \r"
+     ]
+    }
+   ],
+   "source": [
+    "# spark.scheduler.mode', 'FAIR'\n",
+    "spark = SparkSession.\\\n",
+    "    builder.\\\n",
+    "    appName(\"Overpass-API\").\\\n",
+    "    enableHiveSupport().\\\n",
+    "    master(\"local[*]\").\\\n",
+    "    master(\"spark://spark-master:7077\").\\\n",
+    "    config(\"spark.executor.memory\", \"15G\").\\\n",
+    "    config(\"spark.driver.maxResultSize\", \"135G\").\\\n",
+    "    config(\"spark.sql.shuffle.partitions\", \"500\").\\\n",
+    "    config(' spark.sql.adaptive.coalescePartitions.enabled', True).\\\n",
+    "    config('spark.sql.adaptive.enabled', True).\\\n",
+    "    config('spark.sql.adaptive.coalescePartitions.initialPartitionNum', 
125).\\\n",
+    "    config(\"spark.sql.execution.arrow.pyspark.enabled\", True).\\\n",
+    "    config(\"spark.sql.execution.arrow.fallback.enabled\", True).\\\n",
+    "    config('spark.kryoserializer.buffer.max', 2047).\\\n",
+    "    config(\"spark.serializer\", KryoSerializer.getName).\\\n",
+    "    config(\"spark.kryo.registrator\", 
SedonaKryoRegistrator.getName).\\\n",
+    "    config(\"spark.jars.packages\", 
\"org.apache.sedona:sedona-python-adapter-3.0_2.12:1.1.0-incubating,org.datasyslab:geotools-wrapper:1.1.0-25.2\")
 .\\\n",
+    "    enableHiveSupport().\\\n",
+    "    getOrCreate()\n",
+    "\n",
+    "SedonaRegistrator.registerAll(spark)\n",
+    "sc = spark.sparkContext"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "0b3b68fd-d9a4-4c95-bf45-8f8f2d995f7d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import json\n",
+    "\n",
+    "overpass_url = \"http://overpass-api.de/api/interpreter\"\n";,
+    "# overpass_query = \"\"\"\n",
+    "# [out:json];\n",
+    "# area[\"ISO3166-1\"=\"DE\"][admin_level=2];\n",
+    "# (node[\"amenity\"=\"biergarten\"](area);\n",
+    "#  way[\"amenity\"=\"biergarten\"](area);\n",
+    "#  rel[\"amenity\"=\"biergarten\"](area);\n",
+    "# );\n",
+    "# out center;\n",
+    "# \"\"\"\n",
+    "\n",
+    "# overpass_query = \"\"\"\n",
+    "# [out:json];\n",
+    "# area[name = \"Foz do Iguaçu\"];\n",
+    "# 
(way(area)[\"highway\"~\"^(private|cycleway|footway|bus_guideway|elevator|construction|proposed|bridleway|steps|raceway|motorway_link|path|secondary|motorway|trunk|primary)$\"];>;);\n",
+    "# out center;\n",
+    "# \"\"\"\n",
+    "\n",
+    "overpass_query = \"\"\"\n",
+    "[out:json];\n",
+    "area[name = \"Foz do Iguaçu\"];\n",
+    "way(area)[\"highway\"~\"\"];\n",
+    "out geom;\n",
+    ">;\n",
+    "out skel qt;\n",
+    "\"\"\"\n",
+    "\n",
+    "# response = requests.get(overpass_url, \n",
+    "#                         params={'data': overpass_query})\n",
+    "# data = response.json()\n",
+    "# hdfs = PyWebHdfsClient(host='179.106.229.159',port='50070', 
user_name='root')\n",
+    "file_name = \"foz_roads_osm.json\"\n",
+    "# hdfs.delete_file_dir(file_name)\n",
+    "# hdfs.create_file(file_name, json.dumps(data))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "be17fe8a-17ce-4c78-8103-0e970fc630ef",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "22/02/01 12:44:03 WARN package: Truncated the string representation of 
a plan since it was too large. This behavior can be adjusted by setting 
'spark.sql.debug.maxToStringFields'.\n"
+     ]
+    }
+   ],
+   "source": [
+    "path = \"hdfs://776faf4d6a1e:8020/\"+file_name\n",
+    "df = spark.read.json(path, multiLine = \"true\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "c8fa4b42-5a7c-45c4-9b1f-129e4438b883",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "root\n",
+      " |-- elements: array (nullable = true)\n",
+      " |    |-- element: struct (containsNull = true)\n",
+      " |    |    |-- bounds: struct (nullable = true)\n",
+      " |    |    |    |-- maxlat: double (nullable = true)\n",
+      " |    |    |    |-- maxlon: double (nullable = true)\n",
+      " |    |    |    |-- minlat: double (nullable = true)\n",
+      " |    |    |    |-- minlon: double (nullable = true)\n",
+      " |    |    |-- geometry: array (nullable = true)\n",
+      " |    |    |    |-- element: struct (containsNull = true)\n",
+      " |    |    |    |    |-- lat: double (nullable = true)\n",
+      " |    |    |    |    |-- lon: double (nullable = true)\n",
+      " |    |    |-- id: long (nullable = true)\n",
+      " |    |    |-- lat: double (nullable = true)\n",
+      " |    |    |-- lon: double (nullable = true)\n",
+      " |    |    |-- nodes: array (nullable = true)\n",
+      " |    |    |    |-- element: long (containsNull = true)\n",
+      " |    |    |-- tags: struct (nullable = true)\n",
+      " |    |    |    |-- FIXME: string (nullable = true)\n",
+      " |    |    |    |-- access: string (nullable = true)\n",
+      " |    |    |    |-- addr:city: string (nullable = true)\n",
+      " |    |    |    |-- addr:postcode: string (nullable = true)\n",
+      " |    |    |    |-- alt_name: string (nullable = true)\n",
+      " |    |    |    |-- area: string (nullable = true)\n",
+      " |    |    |    |-- barrier: string (nullable = true)\n",
+      " |    |    |    |-- bicycle: string (nullable = true)\n",
+      " |    |    |    |-- bridge: string (nullable = true)\n",
+      " |    |    |    |-- bridge:structure: string (nullable = true)\n",
+      " |    |    |    |-- bus: string (nullable = true)\n",
+      " |    |    |    |-- covered: string (nullable = true)\n",
+      " |    |    |    |-- crossing: string (nullable = true)\n",
+      " |    |    |    |-- description: string (nullable = true)\n",
+      " |    |    |    |-- destination: string (nullable = true)\n",
+      " |    |    |    |-- destination:ref: string (nullable = true)\n",
+      " |    |    |    |-- embankment: string (nullable = true)\n",
+      " |    |    |    |-- fixme: string (nullable = true)\n",
+      " |    |    |    |-- foot: string (nullable = true)\n",
+      " |    |    |    |-- footway: string (nullable = true)\n",
+      " |    |    |    |-- height: string (nullable = true)\n",
+      " |    |    |    |-- hgv: string (nullable = true)\n",
+      " |    |    |    |-- highway: string (nullable = true)\n",
+      " |    |    |    |-- horse: string (nullable = true)\n",
+      " |    |    |    |-- incline: string (nullable = true)\n",
+      " |    |    |    |-- junction: string (nullable = true)\n",
+      " |    |    |    |-- kerb: string (nullable = true)\n",
+      " |    |    |    |-- lanes: string (nullable = true)\n",
+      " |    |    |    |-- lanes:backward: string (nullable = true)\n",
+      " |    |    |    |-- lanes:forward: string (nullable = true)\n",
+      " |    |    |    |-- layer: string (nullable = true)\n",
+      " |    |    |    |-- lit: string (nullable = true)\n",
+      " |    |    |    |-- maxspeed: string (nullable = true)\n",
+      " |    |    |    |-- motor_vehicle: string (nullable = true)\n",
+      " |    |    |    |-- motorcar: string (nullable = true)\n",
+      " |    |    |    |-- motorroad: string (nullable = true)\n",
+      " |    |    |    |-- mtb:scale: string (nullable = true)\n",
+      " |    |    |    |-- name: string (nullable = true)\n",
+      " |    |    |    |-- name:en: string (nullable = true)\n",
+      " |    |    |    |-- name:es: string (nullable = true)\n",
+      " |    |    |    |-- name:etymology:wikidata: string (nullable = 
true)\n",
+      " |    |    |    |-- name:pt: string (nullable = true)\n",
+      " |    |    |    |-- noname: string (nullable = true)\n",
+      " |    |    |    |-- note: string (nullable = true)\n",
+      " |    |    |    |-- old_name: string (nullable = true)\n",
+      " |    |    |    |-- oneway: string (nullable = true)\n",
+      " |    |    |    |-- operator: string (nullable = true)\n",
+      " |    |    |    |-- owner: string (nullable = true)\n",
+      " |    |    |    |-- postal_code: string (nullable = true)\n",
+      " |    |    |    |-- ref: string (nullable = true)\n",
+      " |    |    |    |-- sac_scale: string (nullable = true)\n",
+      " |    |    |    |-- segregated: string (nullable = true)\n",
+      " |    |    |    |-- service: string (nullable = true)\n",
+      " |    |    |    |-- sidewalk: string (nullable = true)\n",
+      " |    |    |    |-- source: string (nullable = true)\n",
+      " |    |    |    |-- source:name: string (nullable = true)\n",
+      " |    |    |    |-- sport: string (nullable = true)\n",
+      " |    |    |    |-- surface: string (nullable = true)\n",
+      " |    |    |    |-- trail_visibility: string (nullable = true)\n",
+      " |    |    |    |-- tunnel: string (nullable = true)\n",
+      " |    |    |    |-- turn:lanes: string (nullable = true)\n",
+      " |    |    |    |-- turn:lanes:forward: string (nullable = true)\n",
+      " |    |    |    |-- vehicle: string (nullable = true)\n",
+      " |    |    |    |-- wheelchair: string (nullable = true)\n",
+      " |    |    |    |-- width: string (nullable = true)\n",
+      " |    |    |    |-- wikidata: string (nullable = true)\n",
+      " |    |    |    |-- wikipedia: string (nullable = true)\n",
+      " |    |    |-- type: string (nullable = true)\n",
+      " |-- generator: string (nullable = true)\n",
+      " |-- osm3s: struct (nullable = true)\n",
+      " |    |-- copyright: string (nullable = true)\n",
+      " |    |-- timestamp_areas_base: timestamp (nullable = true)\n",
+      " |    |-- timestamp_osm_base: timestamp (nullable = true)\n",
+      " |-- version: double (nullable = true)\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "df.printSchema()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "b3b1560a-27b7-4074-8b6b-efb0cad05479",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                                        
        \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      
"+--------------------+--------------------+--------------------+-------+-----------+\n",
+      "|            elements|           generator|               
osm3s|version|total_nodes|\n",
+      
"+--------------------+--------------------+--------------------+-------+-----------+\n",
+      "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|    
0.6|      36560|\n",
+      
"+--------------------+--------------------+--------------------+-------+-----------+\n",
+      "\n",
+      "36560\n",
+      "26122619\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                                        
        \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      
"+--------------------+--------------------+--------------------+-------+-----------+--------+\n",
+      "|            elements|           generator|               
osm3s|version|total_nodes|      id|\n",
+      
"+--------------------+--------------------+--------------------+-------+-----------+--------+\n",
+      "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|    
0.6|      36560|25835738|\n",
+      "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|    
0.6|      36560|26122619|\n",
+      "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|    
0.6|      36560|26122631|\n",
+      "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|    
0.6|      36560|26122645|\n",
+      "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|    
0.6|      36560|26122801|\n",
+      
"+--------------------+--------------------+--------------------+-------+-----------+--------+\n",
+      "only showing top 5 rows\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "                                                                        
        \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      
"+--------------------+--------------------+--------------------+-------+-----------+--------------------+\n",
+      "|            elements|           generator|               
osm3s|version|total_nodes|                 new|\n",
+      
"+--------------------+--------------------+--------------------+-------+-----------+--------------------+\n",
+      "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|    
0.6|      36560|[25835738, [[-25....|\n",
+      "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|    
0.6|      36560|[26122619, [[-25....|\n",
+      "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|    
0.6|      36560|[26122631, [[-25....|\n",
+      "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|    
0.6|      36560|[26122645, [[-25....|\n",
+      "|[[[-25.5267745, -...|Overpass API 0.7....|[The data include...|    
0.6|      36560|[26122801, [[-25....|\n",
+      
"+--------------------+--------------------+--------------------+-------+-----------+--------------------+\n",
+      "only showing top 5 rows\n",
+      "\n",
+      
"+--------+--------------------+--------------------+--------+-------+--------+--------------------+-----------+\n",
+      "|      id|                geom|               nodes|maxspeed|incline| 
surface|                name|total_nodes|\n",
+      
"+--------+--------------------+--------------------+--------+-------+--------+--------------------+-----------+\n",
+      "|25835738|[[-25.5343718, -5...|[362528326, 28597...|    null|   null|   
 null|Avenida Costa e S...|      36560|\n",
+      "|26122619|[[-25.5868602, -5...|[285975617, 59202...|      60|   null|   
paved|    Avenida Mercosul|      36560|\n",
+      "|26122631|[[-25.5868602, -5...|[285975617, 69310...|      60|   
null|concrete|Ponte Internacion...|      36560|\n",
+      "|26122645|[[-25.691851, -54...|[307420349, 16982...|    null|   null|   
paved|Rodovia das Catar...|      36560|\n",
+      "|26122801|[[-25.5637579, -5...|[5514364259, 5514...|    null|   null|   
 null| Rua Natal Graciotin|      36560|\n",
+      
"+--------+--------------------+--------------------+--------+-------+--------+--------------------+-----------+\n",
+      "only showing top 5 rows\n",
+      "\n",
+      "root\n",
+      " |-- lat: double (nullable = true)\n",
+      " |-- lon: double (nullable = true)\n",
+      " |-- id: long (nullable = true)\n",
+      "\n",
+      "+-----------+-----------+--------+\n",
+      "|        lat|        lon|      id|\n",
+      "+-----------+-----------+--------+\n",
+      "|-25.5343718|-54.5761722|25835738|\n",
+      "|-25.5343227|-54.5760531|25835738|\n",
+      "|-25.5341375|-54.5756818|25835738|\n",
+      "|-25.5337506|-54.5750471|25835738|\n",
+      "|-25.5334046|-54.5746139|25835738|\n",
+      "+-----------+-----------+--------+\n",
+      "only showing top 5 rows\n",
+      "\n",
+      
"+--------------------+--------+--------+-------+--------+--------------------+--------------------+-----------+\n",
+      "|         coordinates|      id|maxspeed|incline| surface|               
 name|               nodes|total_nodes|\n",
+      
"+--------------------+--------+--------+-------+--------+--------------------+--------------------+-----------+\n",
+      "|[-25.5343718,-54....|25835738|    null|   null|    null|Avenida Costa 
e S...|[362528326, 28597...|      36560|\n",
+      "|[-25.5868602,-54....|26122619|      60|   null|   paved|    Avenida 
Mercosul|[285975617, 59202...|      36560|\n",
+      "|[-25.5868602,-54....|26122631|      60|   null|concrete|Ponte 
Internacion...|[285975617, 69310...|      36560|\n",
+      "|[-25.691851,-54.4...|26122645|    null|   null|   paved|Rodovia das 
Catar...|[307420349, 16982...|      36560|\n",
+      "|[-25.5637579,-54....|26122801|    null|   null|    null| Rua Natal 
Graciotin|[5514364259, 5514...|      36560|\n",
+      
"+--------------------+--------+--------+-------+--------+--------------------+--------------------+-----------+\n",
+      "only showing top 5 rows\n",
+      "\n",
+      
"+--------------------+--------+--------+-------+--------+--------------------+--------------------+-----------+\n",
+      "|                geom|      id|maxspeed|incline| surface|               
 name|               nodes|total_nodes|\n",
+      
"+--------------------+--------+--------+-------+--------+--------------------+--------------------+-----------+\n",
+      "|LINESTRING (-25.5...|25835738|    null|   null|    null|Avenida Costa 
e S...|[362528326, 28597...|      36560|\n",
+      "|LINESTRING (-25.5...|26122619|      60|   null|   paved|    Avenida 
Mercosul|[285975617, 59202...|      36560|\n",
+      "|LINESTRING (-25.5...|26122631|      60|   null|concrete|Ponte 
Internacion...|[285975617, 69310...|      36560|\n",
+      "|LINESTRING (-25.6...|26122645|    null|   null|   paved|Rodovia das 
Catar...|[307420349, 16982...|      36560|\n",
+      "|LINESTRING (-25.5...|26122801|    null|   null|    null| Rua Natal 
Graciotin|[5514364259, 5514...|      36560|\n",
+      
"+--------------------+--------+--------+-------+--------+--------------------+--------------------+-----------+\n",
+      "only showing top 5 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pyspark.sql.functions import explode, arrays_zip\n",
+    "\n",
+    "df.createOrReplaceTempView(\"df\")\n",
+    "tb = spark.sql(\"select *, size(elements) total_nodes from df\")\n",
+    "tb.show(5)\n",
+    "\n",
+    "isolate_total_nodes = tb.select(\"total_nodes\").toPandas()\n",
+    "total_nodes = isolate_total_nodes[\"total_nodes\"].iloc[0]\n",
+    "print(total_nodes)\n",
+    "\n",
+    "isolate_ids = tb.select(\"elements.id\").toPandas()\n",
+    "ids = pd.DataFrame(isolate_ids[\"id\"].iloc[0]).drop_duplicates()\n",
+    "print(ids[0].iloc[1])\n",
+    "\n",
+    "formatted_df = tb\\\n",
+    ".withColumn(\"id\", explode(\"elements.id\"))\n",
+    "\n",
+    "formatted_df.show(5)\n",
+    "\n",
+    "formatted_df = tb\\\n",
+    ".withColumn(\"new\", arrays_zip(\"elements.id\", \"elements.geometry\", 
\"elements.nodes\", \"elements.tags\"))\\\n",
+    ".withColumn(\"new\", explode(\"new\"))\n",
+    "\n",
+    "formatted_df.show(5)\n",
+    "\n",
+    "# formatted_df.printSchema()\n",
+    "\n",
+    "formatted_df = 
formatted_df.select(\"new.0\",\"new.1\",\"new.2\",\"new.3.maxspeed\",\"new.3.incline\",\"new.3.surface\",
 \"new.3.name\", \"total_nodes\")\n",
+    "formatted_df = 
formatted_df.withColumnRenamed(\"0\",\"id\").withColumnRenamed(\"1\",\"geom\").withColumnRenamed(\"2\",\"nodes\").withColumnRenamed(\"3\",\"tags\")\n",
+    "formatted_df.createOrReplaceTempView(\"formatted_df\")\n",
+    "formatted_df.show(5)\n",
+    "# TODO atualizar daqui para baixo para considerar a linha inteira na 
lógica\n",
+    "points_tb = spark.sql(\"select geom, id from formatted_df where geom IS 
NOT NULL\")\n",
+    "points_tb = points_tb\\\n",
+    ".withColumn(\"new\", arrays_zip(\"geom.lat\", \"geom.lon\"))\\\n",
+    ".withColumn(\"new\", explode(\"new\"))\n",
+    "\n",
+    "points_tb = points_tb.select(\"new.0\",\"new.1\", \"id\")\n",
+    "\n",
+    "points_tb = 
points_tb.withColumnRenamed(\"0\",\"lat\").withColumnRenamed(\"1\",\"lon\")\n",
+    "points_tb.printSchema()\n",
+    "\n",
+    "points_tb.createOrReplaceTempView(\"points_tb\")\n",
+    "\n",
+    "points_tb.show(5)\n",
+    "\n",
+    "# teste = spark.sql(\"select st_point(lat, lon) as geom, id from 
points_tb\")\n",
+    "\n",
+    "# teste = spark.sql(\"select collect_list(array(p1.lat,p1.lon)) as line 
from points_tb p1 where p1.id = 25835738 group by p1.id\")\n",
+    "# teste.show(5)\n",
+    "# print(teste.take(1))\n",
+    "\n",
+    "# SELECT ST_AsText(ST_Envelope(\n",
+    "# \t\tST_Collect(\n",
+    "# \t\t\tST_GeomFromText('LINESTRING(55 75,125 150)'),\n",
+    "# \t\t\t\tST_Point(20, 80))\n",
+    "# \t\t\t\t)) As wktenv;\n",
+    "\n",
+    "coordinates_tb = spark.sql(\"select (select 
collect_list(CONCAT(p1.lat,',',p1.lon)) from points_tb p1 where p1.id = p2.id 
group by p1.id) as coordinates, p2.id, p2.maxspeed, p2.incline, p2.surface, 
p2.name, p2.nodes, p2.total_nodes from formatted_df p2\")\n",
+    "coordinates_tb.createOrReplaceTempView(\"coordinates_tb\")\n",
+    "coordinates_tb.show(5)\n",
+    "\n",
+    "roads_tb = spark.sql(\"SELECT 
ST_LineStringFromText(REPLACE(REPLACE(CAST(coordinates as 
string),'[',''),']',''), ',') as geom, id, maxspeed, incline, surface, name, 
nodes, total_nodes FROM coordinates_tb WHERE coordinates IS NOT NULL\")\n",
+    "roads_tb.createOrReplaceTempView(\"roads_tb\")\n",
+    "roads_tb.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "fc3c7891-3474-49fd-b574-8200e2d93c5f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[Row(geom=<shapely.geometry.linestring.LineString object at 
0x7f8b6c4fefd0>)]\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "print(roads_tb.select('geom').take(1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "4ef77f2d-136f-4881-97d4-c73d8c33491e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      
"+--------------------+-------------------+-------------------+------------------+--------------------+--------+--------+-------+--------+--------------------+-----------+\n",
+      "|               nodes|     distance_toend|           distance|          
geomsize|                geom|      id|maxspeed|incline| surface|               
 name|total_nodes|\n",
+      
"+--------------------+-------------------+-------------------+------------------+--------------------+--------+--------+-------+--------+--------------------+-----------+\n",
+      "|[362528326, 
28597...|0.04960428437877076|0.06191383729684203|11.818939546233448|LINESTRING 
(-25.5...|25835738|    null|   null|    null|Avenida Costa e S...|      
36560|\n",
+      "|[285975617, 59202...| 0.1036513492683037|0.11504041822963577|  
8.00531493113018|LINESTRING (-25.5...|26122619|      60|   null|   paved|    
Avenida Mercosul|      36560|\n",
+      "|[285975617, 
69310...|0.10941553855472455|0.12057253982300521|1.9538901610218935|LINESTRING 
(-25.5...|26122631|      60|   null|concrete|Ponte Internacion...|      
36560|\n",
+      "|[307420349, 16982...|0.24791409977758808| 
0.2619862588434014|11.873439986567648|LINESTRING (-25.6...|26122645|    null|   
null|   paved|Rodovia das Catar...|      36560|\n",
+      "|[5514364259, 5514...|  0.079155366559698|0.08873043404159503| 
6.558866347858073|LINESTRING (-25.5...|26122801|    null|   null|    null| Rua 
Natal Graciotin|      36560|\n",
+      
"+--------------------+-------------------+-------------------+------------------+--------------------+--------+--------+-------+--------+--------------------+-----------+\n",
+      "only showing top 5 rows\n",
+      "\n",
+      
"+--------------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+      "|               nodes|maxspeed|incline| surface|                name|   
   id|                geom|          geomsize|           distance|     
distance_toend|total_nodes|\n",
+      
"+--------------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+      "|[362528326, 28597...|      20|     0%|    soil|Avenida Costa e 
S...|25835738|LINESTRING 
(-25.5...|11.818939546233448|0.06191383729684203|0.04960428437877076|      
36560|\n",
+      "|[285975617, 59202...|      60|     0%|   paved|    Avenida 
Mercosul|26122619|LINESTRING (-25.5...|  8.00531493113018|0.11504041822963577| 
0.1036513492683037|      36560|\n",
+      "|[285975617, 69310...|      60|     0%|concrete|Ponte 
Internacion...|26122631|LINESTRING 
(-25.5...|1.9538901610218935|0.12057253982300521|0.10941553855472455|      
36560|\n",
+      "|[307420349, 16982...|      20|     0%|   paved|Rodovia das 
Catar...|26122645|LINESTRING (-25.6...|11.873439986567648| 
0.2619862588434014|0.24791409977758808|      36560|\n",
+      "|[5514364259, 5514...|      20|     0%|    soil| Rua Natal 
Graciotin|26122801|LINESTRING (-25.5...| 6.558866347858073|0.08873043404159503| 
 0.079155366559698|      36560|\n",
+      
"+--------------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+      "only showing top 5 rows\n",
+      "\n",
+      
"+--------------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+      "|               nodes|surface_index|maxspeed|incline| surface|          
      name|      id|                geom|          geomsize|           
distance|     distance_toend|total_nodes|\n",
+      
"+--------------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+      "|[362528326, 28597...|         0.04|      20|     0%|    soil|Avenida 
Costa e S...|25835738|LINESTRING 
(-25.5...|11.818939546233448|0.06191383729684203|0.04960428437877076|      
36560|\n",
+      "|[285975617, 59202...|         0.03|      60|     0%|   paved|    
Avenida Mercosul|26122619|LINESTRING (-25.5...|  
8.00531493113018|0.11504041822963577| 0.1036513492683037|      36560|\n",
+      "|[285975617, 69310...|         0.02|      60|     0%|concrete|Ponte 
Internacion...|26122631|LINESTRING 
(-25.5...|1.9538901610218935|0.12057253982300521|0.10941553855472455|      
36560|\n",
+      "|[307420349, 16982...|         0.03|      20|     0%|   paved|Rodovia 
das Catar...|26122645|LINESTRING (-25.6...|11.873439986567648| 
0.2619862588434014|0.24791409977758808|      36560|\n",
+      "|[5514364259, 5514...|         0.04|      20|     0%|    soil| Rua 
Natal Graciotin|26122801|LINESTRING (-25.5...| 
6.558866347858073|0.08873043404159503|  0.079155366559698|      36560|\n",
+      
"+--------------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+      "only showing top 5 rows\n",
+      "\n",
+      
"+--------------------+-------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+      "|               nodes|incline_index|surface_index|maxspeed|incline| 
surface|                name|      id|                geom|          geomsize|  
         distance|     distance_toend|total_nodes|\n",
+      
"+--------------------+-------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+      "|[362528326, 28597...|         0.00|         0.04|      20|     0%|    
soil|Avenida Costa e S...|25835738|LINESTRING 
(-25.5...|11.818939546233448|0.06191383729684203|0.04960428437877076|      
36560|\n",
+      "|[285975617, 59202...|         0.00|         0.03|      60|     0%|   
paved|    Avenida Mercosul|26122619|LINESTRING (-25.5...|  
8.00531493113018|0.11504041822963577| 0.1036513492683037|      36560|\n",
+      "|[285975617, 69310...|         0.00|         0.02|      60|     
0%|concrete|Ponte Internacion...|26122631|LINESTRING 
(-25.5...|1.9538901610218935|0.12057253982300521|0.10941553855472455|      
36560|\n",
+      "|[307420349, 16982...|         0.00|         0.03|      20|     0%|   
paved|Rodovia das Catar...|26122645|LINESTRING (-25.6...|11.873439986567648| 
0.2619862588434014|0.24791409977758808|      36560|\n",
+      "|[5514364259, 5514...|         0.00|         0.04|      20|     0%|    
soil| Rua Natal Graciotin|26122801|LINESTRING (-25.5...| 
6.558866347858073|0.08873043404159503|  0.079155366559698|      36560|\n",
+      
"+--------------------+-------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+      "only showing top 5 rows\n",
+      "\n",
+      
"+--------------------+------+-------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+      "|               
nodes|weight|incline_index|surface_index|maxspeed|incline| surface|             
   name|      id|                geom|          geomsize|           distance|   
  distance_toend|total_nodes|\n",
+      
"+--------------------+------+-------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+      "|[362528326, 28597...|  39.2|         0.00|         0.04|      20|     
0%|    soil|Avenida Costa e S...|25835738|LINESTRING 
(-25.5...|11.818939546233448|0.06191383729684203|0.04960428437877076|      
36560|\n",
+      "|[285975617, 59202...| 118.2|         0.00|         0.03|      60|     
0%|   paved|    Avenida Mercosul|26122619|LINESTRING (-25.5...|  
8.00531493113018|0.11504041822963577| 0.1036513492683037|      36560|\n",
+      "|[285975617, 69310...| 118.8|         0.00|         0.02|      60|     
0%|concrete|Ponte Internacion...|26122631|LINESTRING 
(-25.5...|1.9538901610218935|0.12057253982300521|0.10941553855472455|      
36560|\n",
+      "|[307420349, 16982...|  39.4|         0.00|         0.03|      20|     
0%|   paved|Rodovia das Catar...|26122645|LINESTRING 
(-25.6...|11.873439986567648| 0.2619862588434014|0.24791409977758808|      
36560|\n",
+      "|[5514364259, 5514...|  39.2|         0.00|         0.04|      20|     
0%|    soil| Rua Natal Graciotin|26122801|LINESTRING (-25.5...| 
6.558866347858073|0.08873043404159503|  0.079155366559698|      36560|\n",
+      
"+--------------------+------+-------------+-------------+--------+-------+--------+--------------------+--------+--------------------+------------------+-------------------+-------------------+-----------+\n",
+      "only showing top 5 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Não foi considerado que um caminha pode necessitar mais de 1 rua\n",
+    "\n",
+    "start_point = \"-25.4695946,-54.5909028\"\n",
+    "end_point = \"-25.4786993,-54.57938\" \n",
+    "\n",
+    "distance_tb = spark.sql(\"select nodes, st_distance(geom, 
st_point(\"+end_point+\")) as distance_toend, 
st_distance(st_point(\"+start_point+\"), geom) as distance, st_length(geom) * 
1000 as geomsize, geom, id, maxspeed, incline, surface, name , total_nodes from 
roads_tb\")\n",
+    "distance_tb.createOrReplaceTempView(\"distance_tb\")\n",
+    "distance_tb.show(5)\n",
+    "\n",
+    "# considerar distância, direcao(ex: 0 180 e etc), inclinacao(up, down, 
0%), superficie(asphalt,paved, concrete), velocidade(60 80 50 40 e etc)\n",
+    "fill_null_tb = spark.sql(\n",
+    "    \"select nodes, IFNULL(maxspeed, 20) as maxspeed, IFNULL(incline, 
'0%') as incline, IFNULL(surface, 'soil') as surface, name, id, geom, geomsize, 
distance, distance_toend, total_nodes  from distance_tb\")\n",
+    "fill_null_tb.createOrReplaceTempView(\"fill_null_tb\")\n",
+    "fill_null_tb.show(5)\n",
+    "\n",
+    "surface_index_tb = spark.sql(\n",
+    "    \"select nodes, case surface when 'asphalt'\" +\n",
+    "    \"then 0.01 when 'concrete'\" + \n",
+    "    \"then 0.02 when 'paved'\" +\n",
+    "    \"then 0.03 when 'soil'\" +\n",
+    "    \"then 0.04 when 'unpaved'\" + \n",
+    "    \"then 0.04 when 'sett'\" +\n",
+    "    \"then 0.03 ELSE 0.05 end as surface_index,\"+ \n",
+    "    \"maxspeed, incline, surface, name, id, geom, geomsize, distance, 
distance_toend, total_nodes  from fill_null_tb\")\n",
+    "surface_index_tb.createOrReplaceTempView(\"surface_index_tb\")\n",
+    "surface_index_tb.show(5)\n",
+    "\n",
+    "incline_index_tb = spark.sql(\n",
+    "    \"select nodes, case incline when 'top' then -0.10 when 'down' then 
0.10 when '0%' then 0 end as incline_index, surface_index, maxspeed, incline, 
surface, name, id, geom, geomsize, distance, distance_toend, total_nodes  from 
surface_index_tb\")\n",
+    "incline_index_tb.createOrReplaceTempView(\"incline_index_tb\")\n",
+    "incline_index_tb.show(5)\n",
+    "  \n",
+    "weight_index_tb = spark.sql(\n",
+    "    \"select nodes, (maxspeed - (maxspeed * surface_index)) + (maxspeed 
+(maxspeed * incline_index)) as weight, incline_index, surface_index, maxspeed, 
incline, surface, name, id, geom, geomsize, distance, distance_toend, 
total_nodes  from incline_index_tb WHERE geomsize IS NOT NULL\")\n",
+    "weight_index_tb.createOrReplaceTempView(\"weight_index_tb\")\n",
+    "weight_index_tb.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "78f6553a-61d7-4273-b027-675c6bd36cc4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+-----------+\n",
+      "|min(weight)|\n",
+      "+-----------+\n",
+      "|       39.0|\n",
+      "+-----------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "teste = spark.sql(\n",
+    "    \"select min(weight) from weight_index_tb\")\n",
+    "teste.show(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "9032cb75-c459-49b8-a727-6946a068a0dd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "32344883\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "22/02/01 12:44:21 ERROR Utils: Uncaught exception in thread 
element-tracking-store-worker\n",
+      "java.util.NoSuchElementException: key not found: 4363\n",
+      "\tat scala.collection.MapLike.default(MapLike.scala:235)\n",
+      "\tat scala.collection.MapLike.default$(MapLike.scala:234)\n",
+      "\tat scala.collection.AbstractMap.default(Map.scala:63)\n",
+      "\tat scala.collection.MapLike.apply(MapLike.scala:144)\n",
+      "\tat scala.collection.MapLike.apply$(MapLike.scala:143)\n",
+      "\tat scala.collection.AbstractMap.apply(Map.scala:63)\n",
+      "\tat 
org.apache.spark.sql.execution.ui.SQLAppStatusListener.$anonfun$aggregateMetrics$11(SQLAppStatusListener.scala:257)\n",
+      "\tat 
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)\n",
+      "\tat 
scala.collection.mutable.HashMap.$anonfun$foreach$1(HashMap.scala:149)\n",
+      "\tat 
scala.collection.mutable.HashTable.foreachEntry(HashTable.scala:237)\n",
+      "\tat 
scala.collection.mutable.HashTable.foreachEntry$(HashTable.scala:230)\n",
+      "\tat scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:44)\n",
+      "\tat scala.collection.mutable.HashMap.foreach(HashMap.scala:149)\n",
+      "\tat scala.collection.TraversableLike.map(TraversableLike.scala:238)\n",
+      "\tat 
scala.collection.TraversableLike.map$(TraversableLike.scala:231)\n",
+      "\tat scala.collection.AbstractTraversable.map(Traversable.scala:108)\n",
+      "\tat 
org.apache.spark.sql.execution.ui.SQLAppStatusListener.aggregateMetrics(SQLAppStatusListener.scala:256)\n",
+      "\tat 
org.apache.spark.sql.execution.ui.SQLAppStatusListener.$anonfun$onExecutionEnd$2(SQLAppStatusListener.scala:365)\n",
+      "\tat 
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\n",
+      "\tat org.apache.spark.util.Utils$.tryLog(Utils.scala:1945)\n",
+      "\tat 
org.apache.spark.status.ElementTrackingStore$$anon$1.run(ElementTrackingStore.scala:117)\n",
+      "\tat 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n",
+      "\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n",
+      "\tat 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n",
+      "\tat 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n",
+      "\tat java.lang.Thread.run(Thread.java:748)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+--------+--------------------+\n",
+      "|      id|            distance|\n",
+      "+--------+--------------------+\n",
+      "|32347167|8.330276767022637E-5|\n",
+      "+--------+--------------------+\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "22/02/01 12:44:22 ERROR Utils: Uncaught exception in thread 
element-tracking-store-worker\n",
+      "java.util.NoSuchElementException: key not found: 5357\n",
+      "\tat scala.collection.MapLike.default(MapLike.scala:235)\n",
+      "\tat scala.collection.MapLike.default$(MapLike.scala:234)\n",
+      "\tat scala.collection.AbstractMap.default(Map.scala:63)\n",
+      "\tat scala.collection.MapLike.apply(MapLike.scala:144)\n",
+      "\tat scala.collection.MapLike.apply$(MapLike.scala:143)\n",
+      "\tat scala.collection.AbstractMap.apply(Map.scala:63)\n",
+      "\tat 
org.apache.spark.sql.execution.ui.SQLAppStatusListener.$anonfun$aggregateMetrics$11(SQLAppStatusListener.scala:257)\n",
+      "\tat 
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)\n",
+      "\tat 
scala.collection.mutable.HashMap.$anonfun$foreach$1(HashMap.scala:149)\n",
+      "\tat 
scala.collection.mutable.HashTable.foreachEntry(HashTable.scala:237)\n",
+      "\tat 
scala.collection.mutable.HashTable.foreachEntry$(HashTable.scala:230)\n",
+      "\tat scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:44)\n",
+      "\tat scala.collection.mutable.HashMap.foreach(HashMap.scala:149)\n",
+      "\tat scala.collection.TraversableLike.map(TraversableLike.scala:238)\n",
+      "\tat 
scala.collection.TraversableLike.map$(TraversableLike.scala:231)\n",
+      "\tat scala.collection.AbstractTraversable.map(Traversable.scala:108)\n",
+      "\tat 
org.apache.spark.sql.execution.ui.SQLAppStatusListener.aggregateMetrics(SQLAppStatusListener.scala:256)\n",
+      "\tat 
org.apache.spark.sql.execution.ui.SQLAppStatusListener.$anonfun$onExecutionEnd$2(SQLAppStatusListener.scala:365)\n",
+      "\tat 
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\n",
+      "\tat org.apache.spark.util.Utils$.tryLog(Utils.scala:1945)\n",
+      "\tat 
org.apache.spark.status.ElementTrackingStore$$anon$1.run(ElementTrackingStore.scala:117)\n",
+      "\tat 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n",
+      "\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n",
+      "\tat 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n",
+      "\tat 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n",
+      "\tat java.lang.Thread.run(Thread.java:748)\n",
+      "22/02/01 12:44:23 ERROR Utils: Uncaught exception in thread 
element-tracking-store-worker\n",
+      "java.util.NoSuchElementException: key not found: 6462\n",
+      "\tat scala.collection.MapLike.default(MapLike.scala:235)\n",
+      "\tat scala.collection.MapLike.default$(MapLike.scala:234)\n",
+      "\tat scala.collection.AbstractMap.default(Map.scala:63)\n",
+      "\tat scala.collection.MapLike.apply(MapLike.scala:144)\n",
+      "\tat scala.collection.MapLike.apply$(MapLike.scala:143)\n",
+      "\tat scala.collection.AbstractMap.apply(Map.scala:63)\n",
+      "\tat 
org.apache.spark.sql.execution.ui.SQLAppStatusListener.$anonfun$aggregateMetrics$11(SQLAppStatusListener.scala:257)\n",
+      "\tat 
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)\n",
+      "\tat 
scala.collection.mutable.HashMap.$anonfun$foreach$1(HashMap.scala:149)\n",
+      "\tat 
scala.collection.mutable.HashTable.foreachEntry(HashTable.scala:237)\n",
+      "\tat 
scala.collection.mutable.HashTable.foreachEntry$(HashTable.scala:230)\n",
+      "\tat scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:44)\n",
+      "\tat scala.collection.mutable.HashMap.foreach(HashMap.scala:149)\n",
+      "\tat scala.collection.TraversableLike.map(TraversableLike.scala:238)\n",
+      "\tat 
scala.collection.TraversableLike.map$(TraversableLike.scala:231)\n",
+      "\tat scala.collection.AbstractTraversable.map(Traversable.scala:108)\n",
+      "\tat 
org.apache.spark.sql.execution.ui.SQLAppStatusListener.aggregateMetrics(SQLAppStatusListener.scala:256)\n",
+      "\tat 
org.apache.spark.sql.execution.ui.SQLAppStatusListener.$anonfun$onExecutionEnd$2(SQLAppStatusListener.scala:365)\n",
+      "\tat 
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\n",
+      "\tat org.apache.spark.util.Utils$.tryLog(Utils.scala:1945)\n",
+      "\tat 
org.apache.spark.status.ElementTrackingStore$$anon$1.run(ElementTrackingStore.scala:117)\n",
+      "\tat 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n",
+      "\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n",
+      "\tat 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n",
+      "\tat 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n",
+      "\tat java.lang.Thread.run(Thread.java:748)\n",
+      "                                                                        
        \r"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+--------------------+\n",
+      "|            boundary|\n",
+      "+--------------------+\n",
+      "|POLYGON ((-25.479...|\n",
+      "+--------------------+\n",
+      "\n",
+      "+--------+--------+\n",
+      "|contains|      id|\n",
+      "+--------+--------+\n",
+      "|   false|25835738|\n",
+      "|   false|26122619|\n",
+      "|   false|26122631|\n",
+      "|   false|26122645|\n",
+      "|   false|26122801|\n",
+      "+--------+--------+\n",
+      "only showing top 5 rows\n",
+      "\n",
+      "+---------+--------------------+\n",
+      "|       id|                geom|\n",
+      "+---------+--------------------+\n",
+      "| 32344891|LINESTRING (-25.4...|\n",
+      "|197591238|LINESTRING (-25.4...|\n",
+      "|918520467|LINESTRING (-25.4...|\n",
+      "| 32345349|LINESTRING (-25.4...|\n",
+      "|964560305|LINESTRING (-25.4...|\n",
+      "+---------+--------------------+\n",
+      "only showing top 5 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "closestoend_tb = spark.sql(\n",
+    "    \"select w1.id, w1.distance_toend from weight_index_tb w1 group by 
w1.id, w1.distance_toend having (select min(w2.distance_toend) as 
distance_toend  from weight_index_tb w2) = w1.distance_toend\")\n",
+    "closestoend_tb.createOrReplaceTempView(\"closestoend_tb\")\n",
+    "closestoend = closestoend_tb.take(1)[0]['id']\n",
+    "print(closestoend)\n",
+    "\n",
+    "closestostart_tb = spark.sql(\n",
+    "    \"select w1.id, w1.distance from weight_index_tb w1 group by w1.id, 
w1.distance having (select min(w2.distance) as distance  from weight_index_tb 
w2) = w1.distance\")\n",
+    "closestostart_tb.createOrReplaceTempView(\"closestostart_tb\")\n",
+    "closestostart_tb.show(5)\n",
+    "\n",
+    "closestostart = closestostart_tb.take(1)[0]['id']\n",
+    "\n",
+    "# FOLIUM EM 3857 dado em 4326 
st_transform(st_union_aggr(geom),'epsg:3857','epsg:4326')\n",
+    "json_lines = spark.sql(\n",
+    "    \"select ST_AsGeoJSON(st_envelope_aggr(geom)) AS json  from 
weight_index_tb where id in 
(\"+str(closestostart)+\",\"+str(closestoend)+\")\")\n",
+    "json_lines_string_teste  = json_lines.take(1)[0]['json']\n",
+    "coordinates_teste = json.loads(json_lines_string_teste)['coordinates']\n",
+    "\n",
+    "\n",
+    "# st_boundary st_contains\n",
+    "\n",
+    "# Pegar o limite entre a uniao da geom inicial e final\n",
+    "# select st_boundary(st_union_aggr(geom)) AS boundary  from 
weight_index_tb where id in 
(\"+str(closestostart)+\",\"+str(closestoend)+\")\n",
+    "\n",
+    "boundary_tb = spark.sql(\"select st_envelope_aggr(geom) as boundary from 
weight_index_tb where id in 
(\"+str(closestostart)+\",\"+str(closestoend)+\")\")\n",
+    "boundary_tb.createOrReplaceTempView(\"boundary_tb\")\n",
+    "boundary_tb.show(5)\n",
+    "\n",
+    "contains_tb = spark.sql(\"select st_intersects(boundary,geom) as 
contains, id from weight_index_tb, boundary_tb\")\n",
+    "contains_tb.createOrReplaceTempView(\"contains_tb\")\n",
+    "contains_tb.show(5)\n",
+    "\n",
+    "possible_paths = spark.sql(\"select id, geom from weight_index_tb group 
by id, geom having id in (select id from contains_tb where contains = 
true)\")\n",
+    "possible_paths.createOrReplaceTempView(\"possible_paths\")\n",
+    "possible_paths.show(5)\n",
+    "\n",
+    "paths_collection = spark.sql(\"select ST_AsGeoJSON(st_union_aggr(geom)) 
AS json from possible_paths\")\n",
+    "json_lines_string  = paths_collection.take(1)[0]['json']\n",
+    "coordinates = json.loads(json_lines_string)['coordinates']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "c59c3782-6b49-40f1-a1aa-6e54cacdf196",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+-------------------+\n",
+      "|     distance_toend|\n",
+      "+-------------------+\n",
+      "|0.01362215583892774|\n",
+      "+-------------------+\n",
+      "\n",
+      "select geom, id, weight from touches_tb where touches = true and 
distance_toend < 0.01362215583892774 and distance > 8.330276767022637e-05 and 
id NOT IN (32347167)\n",
+      "+--------------------+---------+------+\n",
+      "|                geom|       id|weight|\n",
+      "+--------------------+---------+------+\n",
+      "|LINESTRING (-25.4...| 32347009|  39.2|\n",
+      "|LINESTRING (-25.4...|197591238|  39.2|\n",
+      "+--------------------+---------+------+\n",
+      "\n",
+      "+---------+--------------------+-----------+\n",
+      "|       id|                geom|max(weight)|\n",
+      "+---------+--------------------+-----------+\n",
+      "|197591238|LINESTRING (-25.4...|       39.2|\n",
+      "| 32347009|LINESTRING (-25.4...|       39.2|\n",
+      "+---------+--------------------+-----------+\n",
+      "\n",
+      "+--------------------+\n",
+      "|      distance_toend|\n",
+      "+--------------------+\n",
+      "|0.011868762484771467|\n",
+      "+--------------------+\n",
+      "\n",
+      "select geom, id, weight from touches_tb where touches = true and 
distance_toend < 0.011868762484771467 and distance > 0.001627280815937359 and 
id NOT IN (32347167, 197591238)\n",
+      "+--------------------+---------+------+\n",
+      "|                geom|       id|weight|\n",
+      "+--------------------+---------+------+\n",
+      "|LINESTRING (-25.4...|197591235|  39.2|\n",
+      "+--------------------+---------+------+\n",
+      "\n",
+      "+---------+--------------------+-----------+\n",
+      "|       id|                geom|max(weight)|\n",
+      "+---------+--------------------+-----------+\n",
+      "|197591235|LINESTRING (-25.4...|       39.2|\n",
+      "+---------+--------------------+-----------+\n",
+      "\n",
+      "+--------------------+\n",
+      "|      distance_toend|\n",
+      "+--------------------+\n",
+      "|0.010329729036617774|\n",
+      "+--------------------+\n",
+      "\n",
+      "select geom, id, weight from touches_tb where touches = true and 
distance_toend < 0.010329729036617774 and distance > 0.0018402611895601347 and 
id NOT IN (32347167, 197591238, 197591235)\n",
+      "+--------------------+---------+------+\n",
+      "|                geom|       id|weight|\n",
+      "+--------------------+---------+------+\n",
+      "|LINESTRING (-25.4...|197591232|  39.2|\n",
+      "+--------------------+---------+------+\n",
+      "\n",
+      "+---------+--------------------+-----------+\n",
+      "|       id|                geom|max(weight)|\n",
+      "+---------+--------------------+-----------+\n",
+      "|197591232|LINESTRING (-25.4...|       39.2|\n",
+      "+---------+--------------------+-----------+\n",
+      "\n",
+      "+--------------------+\n",
+      "|      distance_toend|\n",
+      "+--------------------+\n",
+      "|0.007666208743570536|\n",
+      "+--------------------+\n",
+      "\n",
+      "select geom, id, weight from touches_tb where touches = true and 
distance_toend < 0.007666208743570536 and distance > 0.004980378367153946 and 
id NOT IN (32347167, 197591238, 197591235, 197591232)\n",
+      "+--------------------+---------+------+\n",
+      "|                geom|       id|weight|\n",
+      "+--------------------+---------+------+\n",
+      "|LINESTRING (-25.4...|437818123| 117.6|\n",
+      "+--------------------+---------+------+\n",
+      "\n",
+      "+---------+--------------------+-----------+\n",
+      "|       id|                geom|max(weight)|\n",
+      "+---------+--------------------+-----------+\n",
+      "|437818123|LINESTRING (-25.4...|      117.6|\n",
+      "+---------+--------------------+-----------+\n",
+      "\n",
+      "+--------------------+\n",
+      "|      distance_toend|\n",
+      "+--------------------+\n",
+      "|0.004314646750311...|\n",
+      "+--------------------+\n",
+      "\n",
+      "select geom, id, weight from touches_tb where touches = true and 
distance_toend < 0.0043146467503117755 and distance > 0.005667965772492608 and 
id NOT IN (32347167, 197591238, 197591235, 197591232, 437818123)\n",
+      "+--------------------+---------+------+\n",
+      "|                geom|       id|weight|\n",
+      "+--------------------+---------+------+\n",
+      "|LINESTRING (-25.4...|933800237| 117.6|\n",
+      "+--------------------+---------+------+\n",
+      "\n",
+      "+---------+--------------------+-----------+\n",
+      "|       id|                geom|max(weight)|\n",
+      "+---------+--------------------+-----------+\n",
+      "|933800237|LINESTRING (-25.4...|      117.6|\n",
+      "+---------+--------------------+-----------+\n",
+      "\n",
+      "+--------------------+\n",
+      "|      distance_toend|\n",
+      "+--------------------+\n",
+      "|0.004249921855750084|\n",
+      "+--------------------+\n",
+      "\n",
+      "select geom, id, weight from touches_tb where touches = true and 
distance_toend < 0.004249921855750084 and distance > 0.01157000312230015 and id 
NOT IN (32347167, 197591238, 197591235, 197591232, 437818123, 933800237)\n",
+      "+--------------------+---------+------+\n",
+      "|                geom|       id|weight|\n",
+      "+--------------------+---------+------+\n",
+      "|LINESTRING (-25.4...|933800236| 117.6|\n",
+      "+--------------------+---------+------+\n",
+      "\n",
+      "+---------+--------------------+-----------+\n",
+      "|       id|                geom|max(weight)|\n",
+      "+---------+--------------------+-----------+\n",
+      "|933800236|LINESTRING (-25.4...|      117.6|\n",
+      "+---------+--------------------+-----------+\n",
+      "\n",
+      "+--------------------+\n",
+      "|      distance_toend|\n",
+      "+--------------------+\n",
+      "|0.003896125505221...|\n",
+      "+--------------------+\n",
+      "\n",
+      "select geom, id, weight from touches_tb where touches = true and 
distance_toend < 0.0038961255052215697 and distance > 0.011710186719691087 and 
id NOT IN (32347167, 197591238, 197591235, 197591232, 437818123, 933800237, 
933800236)\n",
+      "+----+---+------+\n",
+      "|geom| id|weight|\n",
+      "+----+---+------+\n",
+      "+----+---+------+\n",
+      "\n",
+      "+---+----+-----------+\n",
+      "| id|geom|max(weight)|\n",
+      "+---+----+-----------+\n",
+      "+---+----+-----------+\n",
+      "\n",
+      "(32347167, 197591238, 197591235, 197591232, 437818123, 933800237, 
933800236, 32344883)\n"
+     ]
+    }
+   ],
+   "source": [
+    "path = [closestostart]\n",
+    "visited = [closestostart]\n",
+    "current_nodes = spark.sql(\"select geom from weight_index_tb where id = 
\"+str(closestostart))\n",
+    "row = current_nodes.rdd.collect()[0][\"geom\"]\n",
+    "id_current = closestostart\n",
+    "\n",
+    "def choose_path(row, path, id_current, visited, copy_row):\n",
+    "\n",
+    "    visited_frm = 
str(visited).replace(\"[\",\"(\").replace(\"]\",\")\")\n",
+    "  \n",
+    "    touches_tb = spark.sql(\"select 
st_touches(st_geomfromwkt('\"+str(row)+\"'),geom) as touches, * from 
weight_index_tb where geom IS NOT NULL and distance_toend IS NOT NULL\")\n",
+    "    touches_tb.createOrReplaceTempView(\"touches_tb\")\n",
+    "# st_distance(st_geomfromwkt('\"+str(row)+\"'),geom)\n",
+    "    fim_distance = spark.sql(\"select distance_toend from touches_tb 
where id = \"+ str(id_current))\n",
+    "    fim_distance.show(5)\n",
+    "    fim_distance_value = 
fim_distance.rdd.collect()[0][\"distance_toend\"]\n",
+    "    \n",
+    "    \n",
+    "    current_distance = spark.sql(\"select distance from touches_tb where 
id = \" + str(id_current))\n",
+    "    current_distance_value = 
current_distance.rdd.collect()[0][\"distance\"]\n",
+    "    \n",
+    "#     st_distance(st_geomfromwkt('\"+str(row)+\"'),geom) = \n",
+    "\n",
+    "    sql = \"select geom, id, weight from touches_tb where \" \\\n",
+    "    +\"touches = true\" \\\n",
+    "    +\" and \" \\\n",
+    "    +\"distance_toend < \" \\\n",
+    "    +str(fim_distance_value) \\\n",
+    "    +\" and \" \\\n",
+    "    +\"distance > \" \\\n",
+    "    +str(current_distance_value) \\\n",
+    "    +\" and \" \\\n",
+    "    +\"id NOT IN \" \\\n",
+    "    +visited_frm \\\n",
+    "    \n",
+    "    print(sql)\n",
+    "    current_nodes = spark.sql(sql)\n",
+    "    current_nodes.createOrReplaceTempView(\"current_nodes\")\n",
+    "    current_nodes.show(5)\n",
+    "    \n",
+    "    current_node = spark.sql(\"select id, geom, max(weight) from 
current_nodes group by id, geom, weight having max(weight) = weight \")\n",
+    "    current_node.show(5)\n",
+    "    \n",
+    "    if len(current_nodes.rdd.collect()) == 0:\n",
+    "        return path\n",
+    "    else:\n",
+    "        row = current_node.rdd.collect()[0][\"geom\"]\n",
+    "        id_current = current_node.rdd.collect()[0][\"id\"]\n",
+    "        path.append(id_current)\n",
+    "        visited.append(id_current)\n",
+    "        return choose_path(row, path, id_current, visited, copy_row)\n",
+    "    \n",
+    "path_ids = choose_path(row, path, id_current, visited, row)\n",
+    "path_ids.append(closestoend)\n",
+    "path_ids_frm = str(path_ids).replace(\"[\",\"(\").replace(\"]\",\")\")\n",
+    "print(path_ids_frm)\n",
+    "\n",
+    "short_path = spark.sql(\"select ST_AsGeoJSON(st_union_aggr(geom)) AS json 
from weight_index_tb where id in \"+path_ids_frm)\n",
+    "short_path_string  = short_path.take(1)[0]['json']\n",
+    "short_path_coordinates = json.loads(short_path_string)['coordinates']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "a918ed65-2778-4cb5-baf4-b9b8a74deb4c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"width:100%;\"><div 
style=\"position:relative;width:100%;height:0;padding-bottom:60%;\"><span 
style=\"color:#565656\">Make this Notebook Trusted to load map: File -> Trust 
Notebook</span><iframe src=\"about:blank\" 
style=\"position:absolute;width:100%;height:100%;left:0;top:0;border:none 
!important;\" 
data-html=%3C%21DOCTYPE%20html%3E%0A%3Chead%3E%20%20%20%20%0A%20%20%20%20%3Cmeta%20http-equiv%3D%22content-type%22%20content%3D%22text/html%3B%20charset%3DUTF-8%22%20/%3
 [...]
+      ],
+      "text/plain": [
+       "<folium.folium.Map at 0x7f8b12411880>"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#\n",
+    "\n",
+    "import folium\n",
+    "\n",
+    "start_point_arr = [-25.4695946,-54.5909028]\n",
+    "end_point_arr = [-25.4786993,-54.57938] \n",
+    "tooltip = \"Click me!\"\n",
+    "# 3857\n",
+    "m = folium.Map(location=[-25.5172662,-54.6170038], zoom_start=12, 
tiles='OpenStreetMap', crs='EPSG3857' )\n",
+    "folium.Marker(\n",
+    "    start_point_arr, popup=\"<i>Inicio</i>\", tooltip=tooltip, 
icon=folium.Icon(color=\"green\")\n",
+    ").add_to(m)\n",
+    "folium.Marker(\n",
+    "    end_point_arr, popup=\"<b>Fim</b>\", tooltip=tooltip, 
icon=folium.Icon(color=\"red\")\n",
+    ").add_to(m)\n",
+    "\n",
+    "# lines = folium.vector_layers.PolyLine(locations=coordinates)\n",
+    "# lines.add_to(m)\n",
+    "\n",
+    "# polygon = folium.vector_layers.Polygon(locations=coordinates_teste)\n",
+    "# polygon.add_to(m)\n",
+    "\n",
+    "polygon_path = 
folium.vector_layers.Polygon(locations=short_path_coordinates)\n",
+    "polygon_path.add_to(m)\n",
+    "\n",
+    "m"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "0e9a7675-f771-4618-98f3-d15352dbc7ae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spark.stop()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d280dc3d-6322-42df-82e7-0bd5cb0c0852",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

Reply via email to