kbendick commented on a change in pull request #1473: URL: https://github.com/apache/iceberg/pull/1473#discussion_r497122281
########## File path: spark3-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveProcedures.scala ########## @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.catalyst.analysis + +import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.catalyst.CatalystTypeConverters +import org.apache.spark.sql.catalyst.expressions.{Expression, Literal} +import org.apache.spark.sql.catalyst.plans.logical.{Call, CallArgument, CallStatement, LogicalPlan, NamedArgument, PositionalArgument} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.connector.catalog.{CatalogNotFoundException, CatalogPlugin, Identifier, Procedure, ProcedureCatalog, ProcedureParameter} +import scala.collection.Seq + +object ResolveProcedures extends Rule[LogicalPlan] { + + override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { + case CallStatement(nameParts, args) => + val (catalog, ident) = resolveCatalog(nameParts) + + val procedure = catalog.asProcedureCatalog.loadProcedure(ident) + + validateParams(procedure) + validateMethodHandle(procedure) + + Call(procedure, argValues = buildArgValues(procedure, args)) + } + + private def validateParams(procedure: Procedure): Unit = { + // should not be any duplicate param names + val duplicateParamNames = procedure.parameters.groupBy(_.name).collect { + case (name, matchingParams) if matchingParams.length > 1 => name + } + + if (duplicateParamNames.nonEmpty) { + throw new AnalysisException(s"Duplicate parameter names: ${duplicateParamNames.mkString("[", ",", "]")}") + } + + // optional params should be at the end + procedure.parameters.sliding(2).foreach { + case Array(previousParam, currentParam) if previousParam.required && !currentParam.required => + throw new AnalysisException("Optional parameters must be after required ones") + case _ => + } + } + + private def validateMethodHandle(procedure: Procedure): Unit = { + val params = procedure.parameters + val outputType = procedure.outputType + + val methodHandle = procedure.methodHandle + val methodType = methodHandle.`type` + val methodReturnType = methodType.returnType + + // method cannot accept var ags + if (methodHandle.isVarargsCollector) { + throw new AnalysisException("Method must have fixed arity") + } + + // verify the number of params in the procedure match the number of params in the method + if (params.length != methodType.parameterCount) { + throw new AnalysisException("Method parameter count must match the number of procedure parameters") + } + + // the MethodHandle API does not allow us to check the generic type + // so we only verify the return type is either void or iterable + + if (outputType.nonEmpty && methodReturnType != classOf[java.lang.Iterable[_]]) { Review comment: For my own clarification: We're not checking for scala `Iterable` type here. Is this because 1) Our project uses java.lang.Iterable (and avoids scala as much as possible) or 2) Spark's DataSourceV2 has a Java API and therefore should only be returning `java.lang.Iterable[_]` or 3) Something else? After typing this out, my guess is on 2 but I would greatly appreciate clarification if you have time. :) ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org