jiayuasu commented on code in PR #1526: URL: https://github.com/apache/sedona/pull/1526#discussion_r1686971821
########## spark/common/src/test/scala/org/apache/sedona/sql/aggregateFunctionTestScala.scala: ########## @@ -97,4 +122,24 @@ class aggregateFunctionTestScala extends TestBaseScala { assertResult(0.0)(intersectionDF.take(1)(0).get(0).asInstanceOf[Geometry].getArea) } } + + def generateRandomPolygon(index: Int): String = { + val random = new Random() + val x = random.nextDouble() * index + val y = random.nextDouble() * index + s"POLYGON (($x $y, ${x + 1} $y, ${x + 1} ${y + 1}, $x ${y + 1}, $x $y))" + } + + def createPolygonDataFrame(numPolygons: Int): Unit = { + val polygons = (1 to numPolygons).map(generateRandomPolygon).toArray + val polygonArray = polygons.map(polygon => s"ST_GeomFromWKT('$polygon')") + val polygonArrayStr = polygonArray.mkString(", ") + + val sqlQuery = + s""" + |SELECT explode(array($polygonArrayStr)) AS geom + """.stripMargin + + sparkSession.sql(sqlQuery).createOrReplaceTempView("geometry_table") Review Comment: Can you return a reference of the DF as the return value of the function, instead of creating a new temp view? Otherwise this might pollute the global namespace and lead to bugs that are hard to find. ########## spark/common/src/test/scala/org/apache/sedona/sql/aggregateFunctionTestScala.scala: ########## @@ -62,6 +67,26 @@ class aggregateFunctionTestScala extends TestBaseScala { assert(union.take(1)(0).get(0).asInstanceOf[Geometry].getArea == 10100) } + it("Measured ST_Union_aggr wall time") { + // number of random polygons to generate + val numPolygons = 1000 + createPolygonDataFrame(numPolygons) + + // cache the table to eliminate the time of table scan + sparkSession.sql("cache table geometry_table") Review Comment: Can you also unpersist this table at the end of the test case? Otherwise this will lead to memory leak. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@sedona.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org