[GitHub] [spark] HyukjinKwon commented on a change in pull request #30708: [SPARK-33707][SQL] Support multiple types of function partition pruning on hive metastore

GitBox Thu, 10 Dec 2020 19:16:28 -0800


HyukjinKwon commented on a change in pull request #30708:
URL: https://github.com/apache/spark/pull/30708#discussion_r540659765




##########
File path: 
sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
##########
@@ -837,6 +847,237 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
     }
   }
 
+  private def convertExpDescs(hive: Hive,
+      table: Table,
+      exp: Expression,
+      value: Object,
+      isRecursive: Boolean): JList[ExprNodeDesc] = {
+    val exporNodeDesc = new JArrayList[ExprNodeDesc]()
+    try {
+      logDebug("convertExpDescs-isRecurrsive:" + isRecursive)
+      // construct column genericUDFConcat
+      val funInfo = FunctionRegistry.getFunctionInfo(exp.prettyName)
+      val concatColumns = new JArrayList[ExprNodeDesc]()
+      logDebug("convertExpDescs-exp.children:" + exp.children)
+      exp.children.foreach(column => {
+        column match {
+          case attr: AttributeReference =>
+            logDebug("convertExpDescs-attr:" + attr )
+            concatColumns.add(new ExprNodeColumnDesc(
+              TypeInfoFactory.getPrimitiveTypeInfo(attr.dataType.typeName),
+              attr.name, "", false))
+          case liter: Literal =>
+            logDebug("convertExpDescs-liter:" + liter)
+            val literalValue = liter.value.toString
+            concatColumns.add(new ExprNodeConstantDesc(literalValue))
+        }
+      }
+      )
+      logDebug("convertExpDescs-concatColumns:" + concatColumns)
+      val expGenFunDesc = 
ExprNodeGenericFuncDesc.newInstance(funInfo.getGenericUDF, concatColumns)
+      exporNodeDesc.add(expGenFunDesc)
+      if (!isRecursive) {
+        // construct genericUDFConcat ExprNode
+        val expConstDesc = new 
ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, value)
+        exporNodeDesc.add(expConstDesc)
+      }
+    } catch {
+      case e: Throwable =>
+        logError(s"conver expression to node failed ${e.getMessage}!")
+        throw e
+    }
+    logDebug("convertExpDescs-exporNodeDesc:" + exporNodeDesc)
+    exporNodeDesc
+  }
+
+  private def convertUDFExpToPartitions(hive: Hive,
+      table: Table,
+      op: BinaryComparison,
+      a: Expression,
+      v: Object): Option[ExprNodeGenericFuncDesc] = {
+    try {
+      val opFun = FunctionRegistry.getFunctionInfo(op.sqlOperator)
+      logDebug("convertUDFExpToPartitions-table:" + table + ",op:" +
+        op + ",a:" + a + ",v:" + v + ",isSubstr")
+      if (opFun.getGenericUDF == null) {
+        throw new UDFArgumentException(s"${op.sqlOperator} is an aggregation " 
+
+          s"function or a table function.")
+      }
+      Some(ExprNodeGenericFuncDesc.newInstance(
+        opFun.getGenericUDF, convertExpDescs(hive, table, a, v, false)))
+    } catch {
+      case e: Throwable =>
+        logError(s"convert predicates expression failed ${e.getMessage}!")
+        None
+    }
+  }
+
+  private def convertAttributeExpToPartitions(name: String,
+      dataType: DataType, op: BinaryComparison,
+                                              value: Object): 
Option[ExprNodeGenericFuncDesc] = {
+    val AttributeColumn = new JArrayList[ExprNodeDesc]()
+    AttributeColumn.add(new ExprNodeColumnDesc(
+      TypeInfoFactory.getPrimitiveTypeInfo(dataType.typeName),
+      name, "", false))
+    val expConstDesc = new 
ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, value.toString)
+    AttributeColumn.add(expConstDesc)
+    val funInfo = FunctionRegistry.getFunctionInfo(op.symbol)
+    Some(ExprNodeGenericFuncDesc.newInstance(funInfo.getGenericUDF, 
AttributeColumn))
+  }
+  def convertUDFFilters(hive: Hive,
+      table: Table, filters: Seq[Expression]): Seq[ExprNodeGenericFuncDesc] = {
+    filters.flatMap(convertExp(hive, table))
+  }
+
+  def convertExp(hive: Hive, table: Table)(expr: Expression): 
Option[ExprNodeGenericFuncDesc] = {
+    val useConcatAdvancedFilter = SQLConf.get.metastorePredicateConcatFilter
+    logDebug("convertExp-expr:" + expr)
+    expr match {
+      // Prune partition of Concat
+      case op@BinaryComparison(a: Concat, Literal(v, _: StringType)) =>
+        convertUDFExpToPartitions(hive, table, op, a, v.toString)
+      case op@BinaryComparison(Literal(v, _: StringType), a: Concat) =>
+        convertUDFExpToPartitions(hive, table, op, a, v.toString)
+
+      // Prune partition of ConcatWs
+      case op@BinaryComparison(a: ConcatWs, Literal(v, _: StringType)) =>

Review comment:
       Nah .. I don't think this is a right direction in Spark. It's a very big 
overhead to maintain the list of case Hive function matches, and we should 
check each behaviour between Spark and Hive.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] HyukjinKwon commented on a change in pull request #30708: [SPARK-33707][SQL] Support multiple types of function partition pruning on hive metastore

Reply via email to