This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 1fbac22a6 [SEDONA-662] remove dead code from dbscan code (#1622)
1fbac22a6 is described below
commit 1fbac22a6f1c919cc8b278dcf17c667fa3f15a31
Author: James Willis <[email protected]>
AuthorDate: Fri Oct 11 17:53:26 2024 -0700
[SEDONA-662] remove dead code from dbscan code (#1622)
Co-authored-by: jameswillis <[email protected]>
---
python/sedona/stats/clustering/dbscan.py | 1 -
python/tests/stats/test_dbscan.py | 1 -
.../src/main/scala/org/apache/sedona/stats/clustering/DBSCAN.scala | 5 +----
3 files changed, 1 insertion(+), 6 deletions(-)
diff --git a/python/sedona/stats/clustering/dbscan.py
b/python/sedona/stats/clustering/dbscan.py
index bb816e61a..f1501963d 100644
--- a/python/sedona/stats/clustering/dbscan.py
+++ b/python/sedona/stats/clustering/dbscan.py
@@ -25,7 +25,6 @@ from typing import Optional
from pyspark.sql import DataFrame, SparkSession
ID_COLUMN_NAME = "__id"
-DEFAULT_MAX_SAMPLE_SIZE = 1000000 # 1 million
def dbscan(
diff --git a/python/tests/stats/test_dbscan.py
b/python/tests/stats/test_dbscan.py
index 60cc8a991..6c6a0d3b3 100644
--- a/python/tests/stats/test_dbscan.py
+++ b/python/tests/stats/test_dbscan.py
@@ -18,7 +18,6 @@
import pyspark.sql.functions as f
import pytest
-from itertools import product
from sedona.sql.st_constructors import ST_MakePoint
from sedona.sql.st_functions import ST_Buffer
from sklearn.cluster import DBSCAN as sklearnDBSCAN
diff --git
a/spark/common/src/main/scala/org/apache/sedona/stats/clustering/DBSCAN.scala
b/spark/common/src/main/scala/org/apache/sedona/stats/clustering/DBSCAN.scala
index 5bc691c2d..02a6145cb 100644
---
a/spark/common/src/main/scala/org/apache/sedona/stats/clustering/DBSCAN.scala
+++
b/spark/common/src/main/scala/org/apache/sedona/stats/clustering/DBSCAN.scala
@@ -22,7 +22,7 @@ import org.apache.sedona.stats.Util.getGeometryColumnName
import org.apache.spark.sql.functions._
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
import org.apache.spark.sql.sedona_sql.expressions.st_functions.{ST_Distance,
ST_DistanceSpheroid}
-import org.apache.spark.sql.{Column, DataFrame, SparkSession}
+import org.apache.spark.sql.{Column, DataFrame}
import org.graphframes.GraphFrame
object DBSCAN {
@@ -60,9 +60,6 @@ object DBSCAN {
includeOutliers: Boolean = true,
useSpheroid: Boolean = false): DataFrame = {
- // We want to disable broadcast joins because the broadcast reference were
using too much driver memory
- val spark = SparkSession.getActiveSession.get
-
val geometryCol = geometry match {
case null => getGeometryColumnName(dataframe)
case _ => geometry