I don't have a minimal reproduction right now but here's more relevant code
snippets.
Stacktrace:
org.apache.spark.sql.AnalysisException: Undefined function:
'ST_PolygonFromEnvelope'. This function is neither a registered temporary
function nor a permanent function registered in the database 'default'.;
line 2 pos 50
at
org.apache.spark.sql.catalyst.catalog.SessionCatalog.failFunctionLookup(SessionCatalog.scala:1562)
at
org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction(SessionCatalog.scala:1660)
at
org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction(SessionCatalog.scala:1677)
at
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveFunctions$$anonfun$apply$27$$anonfun$applyOrElse$114.$anonfun$applyOrElse$116(Analyzer.scala:2150)
at
org.apache.spark.sql.catalyst.analysis.package$.withPosition(package.scala:60)
at
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveFunctions$$anonfun$apply$27$$anonfun$applyOrElse$114.applyOrElse(Analyzer.scala:2150)
at
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveFunctions$$anonfun$apply$27$$anonfun$applyOrElse$114.applyOrElse(Analyzer.scala:2137)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
Expression definition:
case class ST_PolygonFromEnvelope(inputExpressions: Seq[Expression])
extends Expression with CodegenFallback with UserDataGeneratator {
override def nullable: Boolean = false
override def eval(input: InternalRow): Any = {
val minX = inputExpressions(0).eval(input) match {
case a: Double => a
case b: Decimal => b.toDouble
}
val minY = inputExpressions(1).eval(input) match {
case a: Double => a
case b: Decimal => b.toDouble
}
val maxX = inputExpressions(2).eval(input) match {
case a: Double => a
case b: Decimal => b.toDouble
}
val maxY = inputExpressions(3).eval(input) match {
case a: Double => a
case b: Decimal => b.toDouble
}
var coordinates = new Array[Coordinate](5)
coordinates(0) = new Coordinate(minX, minY)
coordinates(1) = new Coordinate(minX, maxY)
coordinates(2) = new Coordinate(maxX, maxY)
coordinates(3) = new Coordinate(maxX, minY)
coordinates(4) = coordinates(0)
val geometryFactory = new GeometryFactory()
val polygon = geometryFactory.createPolygon(coordinates)
new GenericArrayData(GeometrySerializer.serialize(polygon))
}
override def dataType: DataType = GeometryUDT
override def children: Seq[Expression] = inputExpressions
}
Function registration:
Catalog.expressions.foreach(f => {
val functionIdentifier =
FunctionIdentifier(f.getClass.getSimpleName.dropRight(1))
val expressionInfo = new ExpressionInfo(
f.getClass.getCanonicalName,
functionIdentifier.database.orNull,
functionIdentifier.funcName)
sparkSession.sessionState.functionRegistry.registerFunction(
functionIdentifier,
expressionInfo,
f
)
})
On Mon, Nov 1, 2021 at 10:43 AM Wenchen Fan <[email protected]> wrote:
> Hi Adam,
>
> Thanks for reporting this issue! Do you have the full stacktrace or a code
> snippet to reproduce the issue at Spark side? It looks like a bug, but it's
> not obvious to me how this bug can happen.
>
> Thanks,
> Wenchen
>
> On Sat, Oct 30, 2021 at 1:08 AM Adam Binford <[email protected]> wrote:
>
>> Hi devs,
>>
>> I'm working on getting Apache Sedona upgraded to work with Spark 3.2, and
>> ran into a weird issue I wanted to get some feedback on. The PR and current
>> discussion can be found here:
>> https://github.com/apache/incubator-sedona/pull/557
>>
>> To try to sum up in a quick way, this library defines custom expressions
>> and registers the expressions using
>> sparkSession.sessionState.functionRegistry.registerFunction. One of the
>> unit tests is now failing because the function can't be found when a
>> temporary view using that function is created in pure SQL.
>>
>> Examples:
>> This fails with Undefined function: 'ST_PolygonFromEnvelope'. This
>> function is neither a registered temporary function nor a permanent
>> function registered in the database 'default'.:
>>
>> spark.sql(
>> """
>> |CREATE OR REPLACE TEMP VIEW pixels AS
>> |SELECT pixel, shape FROM pointtable
>> |LATERAL VIEW EXPLODE(ST_Pixelize(shape, 1000, 1000,
>> ST_PolygonFromEnvelope(-126.790180,24.863836,-64.630926,50.000))) AS pixel
>> """.stripMargin)
>>
>> // Test visualization partitioner
>> val zoomLevel = 2
>> val newDf = VizPartitioner(spark.table("pixels"), zoomLevel, "pixel",
>> new Envelope(0, 1000, 0, 1000))
>>
>>
>> But both of these work fine:
>>
>> val table = spark.sql(
>> """
>> |SELECT pixel, shape FROM pointtable
>> |LATERAL VIEW EXPLODE(ST_Pixelize(shape, 1000, 1000,
>> ST_PolygonFromEnvelope(-126.790180,24.863836,-64.630926,50.000))) AS pixel
>> """.stripMargin)
>>
>> // Test visualization partitioner
>> val zoomLevel = 2
>> val newDf = VizPartitioner(table, zoomLevel, "pixel", new Envelope(0,
>> 1000, 0, 1000))
>>
>> val table = spark.sql(
>> """
>> |SELECT pixel, shape FROM pointtable
>> |LATERAL VIEW EXPLODE(ST_Pixelize(shape, 1000, 1000,
>> ST_PolygonFromEnvelope(-126.790180,24.863836,-64.630926,50.000))) AS pixel
>> """.stripMargin)
>> table.createOrReplaceTempView("pixels")
>>
>> // Test visualization partitioner
>> val zoomLevel = 2
>> val newDf = VizPartitioner(spark.table("pixels"), zoomLevel, "pixel",
>> new Envelope(0, 1000, 0, 1000))
>>
>>
>> So the main question is, is this a feature or a bug?
>>
>> --
>> Adam Binford
>>
>
--
Adam Binford