This is an automated email from the ASF dual-hosted git repository.

lixiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new efd9299  [SPARK-28510][SQL] Implement Spark's own GetFunctionsOperation
efd9299 is described below

commit efd92993f403fe40b3abd1dac21f8d7c875f407d
Author: Yuming Wang <yumw...@ebay.com>
AuthorDate: Fri Aug 2 08:50:42 2019 -0700

    [SPARK-28510][SQL] Implement Spark's own GetFunctionsOperation
    
    ## What changes were proposed in this pull request?
    
    This PR implements Spark's own GetFunctionsOperation which mitigates the 
differences between Spark SQL and Hive UDFs. But our implementation is 
different from Hive's implementation:
    - Our implementation always returns results. Hive only returns results when 
[(null == catalogName || "".equals(catalogName)) && (null == schemaName || 
"".equals(schemaName))](https://github.com/apache/hive/blob/rel/release-3.1.1/service/src/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java#L101-L119).
    - Our implementation pads the `REMARKS` field with the function usage - 
Hive returns an empty string.
    - Our implementation does not support `FUNCTION_TYPE`, but Hive does.
    
    ## How was this patch tested?
    
    unit tests
    
    Closes #25252 from wangyum/SPARK-28510.
    
    Authored-by: Yuming Wang <yumw...@ebay.com>
    Signed-off-by: gatorsmile <gatorsm...@gmail.com>
---
 .../thriftserver/SparkGetFunctionsOperation.scala  | 115 +++++++++++++++++++++
 .../server/SparkSQLOperationManager.scala          |  15 +++
 .../thriftserver/SparkMetadataOperationSuite.scala |  43 +++++++-
 .../cli/operation/GetFunctionsOperation.java       |   2 +-
 .../cli/operation/GetFunctionsOperation.java       |   2 +-
 5 files changed, 174 insertions(+), 3 deletions(-)

diff --git 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala
 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala
new file mode 100644
index 0000000..462e5730
--- /dev/null
+++ 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.sql.DatabaseMetaData
+import java.util.UUID
+
+import scala.collection.JavaConverters.seqAsJavaListConverter
+
+import 
org.apache.hadoop.hive.ql.security.authorization.plugin.{HiveOperationType, 
HivePrivilegeObjectUtils}
+import org.apache.hive.service.cli._
+import org.apache.hive.service.cli.operation.GetFunctionsOperation
+import 
org.apache.hive.service.cli.operation.MetadataOperation.DEFAULT_HIVE_CATALOG
+import org.apache.hive.service.cli.session.HiveSession
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.util.{Utils => SparkUtils}
+
+/**
+ * Spark's own GetFunctionsOperation
+ *
+ * @param sqlContext SQLContext to use
+ * @param parentSession a HiveSession from SessionManager
+ * @param catalogName catalog name. null if not applicable
+ * @param schemaName database name, null or a concrete database name
+ * @param functionName function name pattern
+ */
+private[hive] class SparkGetFunctionsOperation(
+    sqlContext: SQLContext,
+    parentSession: HiveSession,
+    catalogName: String,
+    schemaName: String,
+    functionName: String)
+  extends GetFunctionsOperation(parentSession, catalogName, schemaName, 
functionName) with Logging {
+
+  private var statementId: String = _
+
+  override def close(): Unit = {
+    super.close()
+    HiveThriftServer2.listener.onOperationClosed(statementId)
+  }
+
+  override def runInternal(): Unit = {
+    statementId = UUID.randomUUID().toString
+    // Do not change cmdStr. It's used for Hive auditing and authorization.
+    val cmdStr = s"catalog : $catalogName, schemaPattern : $schemaName"
+    val logMsg = s"Listing functions '$cmdStr, functionName : $functionName'"
+    logInfo(s"$logMsg with $statementId")
+    setState(OperationState.RUNNING)
+    // Always use the latest class loader provided by executionHive's state.
+    val executionHiveClassLoader = sqlContext.sharedState.jarClassLoader
+    Thread.currentThread().setContextClassLoader(executionHiveClassLoader)
+
+    val catalog = sqlContext.sessionState.catalog
+    // get databases for schema pattern
+    val schemaPattern = convertSchemaPattern(schemaName)
+    val matchingDbs = catalog.listDatabases(schemaPattern)
+    val functionPattern = CLIServiceUtils.patternToRegex(functionName)
+
+    if (isAuthV2Enabled) {
+      // authorize this call on the schema objects
+      val privObjs =
+        
HivePrivilegeObjectUtils.getHivePrivDbObjects(seqAsJavaListConverter(matchingDbs).asJava)
+      authorizeMetaGets(HiveOperationType.GET_FUNCTIONS, privObjs, cmdStr)
+    }
+
+    HiveThriftServer2.listener.onStatementStart(
+      statementId,
+      parentSession.getSessionHandle.getSessionId.toString,
+      logMsg,
+      statementId,
+      parentSession.getUsername)
+
+    try {
+      matchingDbs.foreach { db =>
+        catalog.listFunctions(db, functionPattern).foreach {
+          case (funcIdentifier, _) =>
+            val info = catalog.lookupFunctionInfo(funcIdentifier)
+            val rowData = Array[AnyRef](
+              DEFAULT_HIVE_CATALOG, // FUNCTION_CAT
+              db, // FUNCTION_SCHEM
+              funcIdentifier.funcName, // FUNCTION_NAME
+              info.getUsage, // REMARKS
+              DatabaseMetaData.functionResultUnknown.asInstanceOf[AnyRef], // 
FUNCTION_TYPE
+              info.getClassName) // SPECIFIC_NAME
+            rowSet.addRow(rowData);
+        }
+      }
+      setState(OperationState.FINISHED)
+    } catch {
+      case e: HiveSQLException =>
+        setState(OperationState.ERROR)
+        HiveThriftServer2.listener.onStatementError(
+          statementId, e.getMessage, SparkUtils.exceptionString(e))
+        throw e
+    }
+    HiveThriftServer2.listener.onStatementFinish(statementId)
+  }
+}
diff --git 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
index 9b4198d..dfcd333 100644
--- 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
+++ 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
@@ -119,6 +119,21 @@ private[thriftserver] class SparkSQLOperationManager()
     operation
   }
 
+  override def newGetFunctionsOperation(
+      parentSession: HiveSession,
+      catalogName: String,
+      schemaName: String,
+      functionName: String): GetFunctionsOperation = synchronized {
+    val sqlContext = sessionToContexts.get(parentSession.getSessionHandle)
+    require(sqlContext != null, s"Session handle: 
${parentSession.getSessionHandle} has not been" +
+      " initialized or had already closed.")
+    val operation = new SparkGetFunctionsOperation(sqlContext, parentSession,
+      catalogName, schemaName, functionName)
+    handleToOperation.put(operation.getHandle, operation)
+    logDebug(s"Created GetFunctionsOperation with session=$parentSession.")
+    operation
+  }
+
   def setConfMap(conf: SQLConf, confMap: java.util.Map[String, String]): Unit 
= {
     val iterator = confMap.entrySet().iterator()
     while (iterator.hasNext) {
diff --git 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
index 80a7db5..45fe8a8 100644
--- 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
+++ 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
-import java.sql.ResultSet
+import java.sql.{DatabaseMetaData, ResultSet}
 
 class SparkMetadataOperationSuite extends HiveThriftJdbcTest {
 
@@ -182,4 +182,45 @@ class SparkMetadataOperationSuite extends 
HiveThriftJdbcTest {
       checkResult(metaData.getTableTypes, Seq("TABLE", "VIEW"))
     }
   }
+
+  test("Spark's own GetFunctionsOperation(SparkGetFunctionsOperation)") {
+    def checkResult(rs: ResultSet, functionName: Seq[String]): Unit = {
+      for (i <- functionName.indices) {
+        assert(rs.next())
+        assert(rs.getString("FUNCTION_SCHEM") === "default")
+        assert(rs.getString("FUNCTION_NAME") === functionName(i))
+        assert(rs.getString("REMARKS").startsWith(s"${functionName(i)}("))
+        assert(rs.getInt("FUNCTION_TYPE") === 
DatabaseMetaData.functionResultUnknown)
+        
assert(rs.getString("SPECIFIC_NAME").startsWith("org.apache.spark.sql.catalyst"))
+      }
+      // Make sure there are no more elements
+      assert(!rs.next())
+    }
+
+    withJdbcStatement() { statement =>
+      val metaData = statement.getConnection.getMetaData
+      // Hive does not have an overlay function, we use overlay to test.
+      checkResult(metaData.getFunctions(null, null, "overlay"), Seq("overlay"))
+      checkResult(metaData.getFunctions(null, null, "overla*"), Seq("overlay"))
+      checkResult(metaData.getFunctions(null, "", "overla*"), Seq("overlay"))
+      checkResult(metaData.getFunctions(null, null, "does-not-exist*"), 
Seq.empty)
+      checkResult(metaData.getFunctions(null, "default", "overlay"), 
Seq("overlay"))
+      checkResult(metaData.getFunctions(null, "default", "shift*"),
+        Seq("shiftleft", "shiftright", "shiftrightunsigned"))
+    }
+
+    withJdbcStatement() { statement =>
+      val metaData = statement.getConnection.getMetaData
+      val rs = metaData.getFunctions(null, "default", "upPer")
+      assert(rs.next())
+      assert(rs.getString("FUNCTION_SCHEM") === "default")
+      assert(rs.getString("FUNCTION_NAME") === "upper")
+      assert(rs.getString("REMARKS") ===
+        "upper(str) - Returns `str` with all characters changed to uppercase.")
+      assert(rs.getInt("FUNCTION_TYPE") === 
DatabaseMetaData.functionResultUnknown)
+      assert(rs.getString("SPECIFIC_NAME") === 
"org.apache.spark.sql.catalyst.expressions.Upper")
+      // Make sure there are no more elements
+      assert(!rs.next())
+    }
+  }
 }
diff --git 
a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
 
b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
index 5273c38..5dec8bd 100644
--- 
a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
+++ 
b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
@@ -63,7 +63,7 @@ public class GetFunctionsOperation extends MetadataOperation {
   private final String schemaName;
   private final String functionName;
 
-  private final RowSet rowSet;
+  protected final RowSet rowSet;
 
   public GetFunctionsOperation(HiveSession parentSession,
       String catalogName, String schemaName, String functionName) {
diff --git 
a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
 
b/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
index 6aa0c41..7f906f6 100644
--- 
a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
+++ 
b/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
@@ -63,7 +63,7 @@ public class GetFunctionsOperation extends MetadataOperation {
   private final String schemaName;
   private final String functionName;
 
-  private final RowSet rowSet;
+  protected final RowSet rowSet;
 
   public GetFunctionsOperation(HiveSession parentSession,
       String catalogName, String schemaName, String functionName) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to