Re: [PR] [VL] Support skewness aggregate function [incubator-gluten]

via GitHub Tue, 12 Mar 2024 21:03:44 -0700


PHILO-HE commented on code in PR #4939:
URL: https://github.com/apache/incubator-gluten/pull/4939#discussion_r1522442068



##########
backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala:
##########
@@ -174,6 +174,7 @@ object CHExpressionUtil {
     ENCODE -> EncodeDecodeValidator(),
     ARRAY_EXCEPT -> DefaultValidator(),
     ARRAY_REPEAT -> DefaultValidator(),
-    DATE_FROM_UNIX_DATE -> DefaultValidator()
+    DATE_FROM_UNIX_DATE -> DefaultValidator(),
+    SKEWNESS -> DefaultValidator()

Review Comment:
   cc @taiyang-li, please note this.



##########
backends-velox/src/main/scala/io/glutenproject/execution/HashAggregateExecTransformer.scala:
##########
@@ -386,23 +386,33 @@ abstract class HashAggregateExecTransformer(
               val adjustedOrders = veloxOrders.map(sparkOrders.indexOf(_))
               veloxTypes.zipWithIndex.foreach {
                 case (veloxType, idx) =>
-                  val sparkType = sparkTypes(adjustedOrders(idx))
-                  val attr = rewrittenInputAttributes(adjustedOrders(idx))
-                  val aggFuncInputAttrNode = ExpressionConverter
-                    .replaceWithExpressionTransformer(attr, 
originalInputAttributes)
-                    .doTransform(args)
-                  val expressionNode = if (sparkType != veloxType) {
-                    newInputAttributes +=
-                      attr.copy(dataType = veloxType)(attr.exprId, 
attr.qualifier)
-                    ExpressionBuilder.makeCast(
-                      ConverterUtils.getTypeNode(veloxType, attr.nullable),
-                      aggFuncInputAttrNode,
-                      SQLConf.get.ansiEnabled)
+                  val adjustedIdx = adjustedOrders(idx)
+                  if (adjustedIdx == -1) {
+                    // The column not found in Spark, and this column is not 
useful in actual
+                    // calculations, it is sufficient to pass a default 
DataType value.
+                    val extraAttr = AttributeReference(veloxOrders(idx), 
veloxType)()
+                    newInputAttributes += extraAttr
+                    val lt = Literal.default(veloxType)
+                    childNodes.add(ExpressionBuilder.makeLiteral(lt.value, 
lt.dataType, false))

Review Comment:
   Not quite familiar with this part of code. Do we still need to put this to 
childNodes even if it is useless to native backend?



##########
backends-velox/src/main/scala/io/glutenproject/execution/HashAggregateExecTransformer.scala:
##########
@@ -386,23 +386,33 @@ abstract class HashAggregateExecTransformer(
               val adjustedOrders = veloxOrders.map(sparkOrders.indexOf(_))
               veloxTypes.zipWithIndex.foreach {
                 case (veloxType, idx) =>
-                  val sparkType = sparkTypes(adjustedOrders(idx))
-                  val attr = rewrittenInputAttributes(adjustedOrders(idx))
-                  val aggFuncInputAttrNode = ExpressionConverter
-                    .replaceWithExpressionTransformer(attr, 
originalInputAttributes)
-                    .doTransform(args)
-                  val expressionNode = if (sparkType != veloxType) {
-                    newInputAttributes +=
-                      attr.copy(dataType = veloxType)(attr.exprId, 
attr.qualifier)
-                    ExpressionBuilder.makeCast(
-                      ConverterUtils.getTypeNode(veloxType, attr.nullable),
-                      aggFuncInputAttrNode,
-                      SQLConf.get.ansiEnabled)
+                  val adjustedIdx = adjustedOrders(idx)
+                  if (adjustedIdx == -1) {
+                    // The column not found in Spark, and this column is not 
useful in actual
+                    // calculations, it is sufficient to pass a default 
DataType value.
+                    val extraAttr = AttributeReference(veloxOrders(idx), 
veloxType)()
+                    newInputAttributes += extraAttr
+                    val lt = Literal.default(veloxType)
+                    childNodes.add(ExpressionBuilder.makeLiteral(lt.value, 
lt.dataType, false))

Review Comment:
   If really needed, maybe just make null literal for corresponding type for 
simplicity?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [VL] Support skewness aggregate function [incubator-gluten]

Reply via email to