rui-mo commented on code in PR #5626:
URL: https://github.com/apache/incubator-gluten/pull/5626#discussion_r1606177888
##########
cpp/velox/substrait/SubstraitToVeloxPlan.cc:
##########
@@ -793,23 +794,34 @@ const core::WindowNode::Frame createWindowFrame(
VELOX_FAIL("the window type only support ROWS and RANGE, and the input
type is ", std::to_string(type));
}
- auto boundTypeConversion = [](::substrait::Expression_WindowFunction_Bound
boundType)
+ auto boundTypeConversion = [&](::substrait::Expression_WindowFunction_Bound
boundType)
-> std::tuple<core::WindowNode::BoundType, core::TypedExprPtr> {
- // TODO: support non-literal expression.
if (boundType.has_current_row()) {
return std::make_tuple(core::WindowNode::BoundType::kCurrentRow,
nullptr);
} else if (boundType.has_unbounded_following()) {
return std::make_tuple(core::WindowNode::BoundType::kUnboundedFollowing,
nullptr);
} else if (boundType.has_unbounded_preceding()) {
return std::make_tuple(core::WindowNode::BoundType::kUnboundedPreceding,
nullptr);
} else if (boundType.has_following()) {
- return std::make_tuple(
- core::WindowNode::BoundType::kFollowing,
- std::make_shared<core::ConstantTypedExpr>(BIGINT(),
variant(boundType.following().offset())));
+ if (boundType.following().has_offset()) {
+ return std::make_tuple(
+ core::WindowNode::BoundType::kFollowing,
+ std::make_shared<core::ConstantTypedExpr>(BIGINT(),
variant(boundType.following().offset())));
+ } else {
+ return std::make_tuple(
+ core::WindowNode::BoundType::kFollowing,
+ exprConverter_->toVeloxExpr(boundType.following().ref(),
inputType));
+ }
} else if (boundType.has_preceding()) {
- return std::make_tuple(
- core::WindowNode::BoundType::kPreceding,
- std::make_shared<core::ConstantTypedExpr>(BIGINT(),
variant(boundType.preceding().offset())));
+ if (boundType.preceding().has_offset()) {
+ return std::make_tuple(
+ core::WindowNode::BoundType::kPreceding,
+ std::make_shared<core::ConstantTypedExpr>(BIGINT(),
variant(boundType.preceding().offset())));
+ } else {
+ return std::make_tuple(
+ core::WindowNode::BoundType::kPreceding,
+ exprConverter_->toVeloxExpr(boundType.preceding().ref(),
inputType));
+ }
Review Comment:
ditto
##########
gluten-core/src/main/java/org/apache/gluten/substrait/expression/WindowFunctionNode.java:
##########
@@ -80,20 +91,50 @@ private Expression.WindowFunction.Bound.Builder setBound(
builder.setUnboundedFollowing(followingBuilder.build());
break;
default:
- try {
- Long offset = Long.valueOf(boundType);
- if (offset < 0) {
- Expression.WindowFunction.Bound.Preceding.Builder
offsetPrecedingBuilder =
- Expression.WindowFunction.Bound.Preceding.newBuilder();
- offsetPrecedingBuilder.setOffset(0 - offset);
- builder.setPreceding(offsetPrecedingBuilder.build());
- } else {
- Expression.WindowFunction.Bound.Following.Builder
offsetFollowingBuilder =
- Expression.WindowFunction.Bound.Following.newBuilder();
- offsetFollowingBuilder.setOffset(offset);
- builder.setFollowing(offsetFollowingBuilder.build());
+ if (boundType instanceof PreComputeRangeFrameBound) {
+ ExpressionNode refNode =
+ ExpressionConverter.replaceWithExpressionTransformer(
+ ((PreComputeRangeFrameBound)
boundType).child().toAttribute(),
+
JavaConverters.asScalaIteratorConverter(originalInputAttributes.iterator())
+ .asScala()
+ .toSeq())
+ .doTransform(new HashMap<String, Long>());
+ try {
+ Long offset = Long.valueOf(boundType.eval(null).toString());
+ if (offset < 0) {
+ Expression.WindowFunction.Bound.Preceding.Builder
refPrecedingBuilder =
+ Expression.WindowFunction.Bound.Preceding.newBuilder();
+ refPrecedingBuilder.setRef(refNode.toProtobuf());
+ builder.setPreceding(refPrecedingBuilder.build());
+ } else {
+ Expression.WindowFunction.Bound.Following.Builder
refFollowingBuilder =
+ Expression.WindowFunction.Bound.Following.newBuilder();
+ refFollowingBuilder.setRef(refNode.toProtobuf());
+ builder.setFollowing(refFollowingBuilder.build());
+ }
+ } catch (NumberFormatException e) {
+ throw new UnsupportedOperationException(
+ "Unsupported Window Function Frame Type:" + boundType);
+ }
+ } else if (boundType.foldable()) {
+ try {
+ Long offset = Long.valueOf(boundType.eval(null).toString());
+ if (offset < 0) {
+ Expression.WindowFunction.Bound.Preceding.Builder
offsetPrecedingBuilder =
+ Expression.WindowFunction.Bound.Preceding.newBuilder();
+ offsetPrecedingBuilder.setOffset(0 - offset);
Review Comment:
Do we need -offset?
##########
cpp/velox/substrait/SubstraitToVeloxPlan.cc:
##########
@@ -793,23 +794,34 @@ const core::WindowNode::Frame createWindowFrame(
VELOX_FAIL("the window type only support ROWS and RANGE, and the input
type is ", std::to_string(type));
}
- auto boundTypeConversion = [](::substrait::Expression_WindowFunction_Bound
boundType)
+ auto boundTypeConversion = [&](::substrait::Expression_WindowFunction_Bound
boundType)
-> std::tuple<core::WindowNode::BoundType, core::TypedExprPtr> {
- // TODO: support non-literal expression.
if (boundType.has_current_row()) {
return std::make_tuple(core::WindowNode::BoundType::kCurrentRow,
nullptr);
} else if (boundType.has_unbounded_following()) {
return std::make_tuple(core::WindowNode::BoundType::kUnboundedFollowing,
nullptr);
} else if (boundType.has_unbounded_preceding()) {
return std::make_tuple(core::WindowNode::BoundType::kUnboundedPreceding,
nullptr);
} else if (boundType.has_following()) {
- return std::make_tuple(
- core::WindowNode::BoundType::kFollowing,
- std::make_shared<core::ConstantTypedExpr>(BIGINT(),
variant(boundType.following().offset())));
+ if (boundType.following().has_offset()) {
+ return std::make_tuple(
+ core::WindowNode::BoundType::kFollowing,
+ std::make_shared<core::ConstantTypedExpr>(BIGINT(),
variant(boundType.following().offset())));
+ } else {
+ return std::make_tuple(
+ core::WindowNode::BoundType::kFollowing,
+ exprConverter_->toVeloxExpr(boundType.following().ref(),
inputType));
+ }
Review Comment:
It seems the only difference of the two branches is the expression. Can we
differentiate the expressions only and reuse the rest of code?
##########
gluten-core/src/main/resources/substrait/proto/substrait/algebra.proto:
##########
@@ -989,18 +989,28 @@ message Expression {
message Bound {
// Defines that the bound extends this far back from the current record.
message Preceding {
- // A strictly positive integer specifying the number of records that
- // the window extends back from the current record. Required. Use
- // CurrentRow for offset zero and Following for negative offsets.
- int64 offset = 1;
+ oneof kind {
+ // A strictly positive integer specifying the number of records that
+ // the window extends back from the current record. Use
+ // CurrentRow for offset zero and Following for negative offsets.
+ int64 offset = 1;
+
+ // the reference to pre-project range frame boundary.
+ Expression ref = 2;
+ }
Review Comment:
Would you document this change to Substrait in
https://github.com/apache/incubator-gluten/blob/main/docs/developers/SubstraitModifications.md?
##########
gluten-core/src/main/java/org/apache/gluten/substrait/expression/WindowFunctionNode.java:
##########
@@ -16,41 +16,50 @@
*/
package org.apache.gluten.substrait.expression;
+import org.apache.gluten.expression.ExpressionConverter;
import org.apache.gluten.substrait.type.TypeNode;
import io.substrait.proto.Expression;
import io.substrait.proto.FunctionArgument;
import io.substrait.proto.FunctionOption;
import io.substrait.proto.WindowType;
+import org.apache.spark.sql.catalyst.expressions.Attribute;
+import org.apache.spark.sql.catalyst.expressions.PreComputeRangeFrameBound;
import java.io.Serializable;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import scala.collection.JavaConverters;
+
public class WindowFunctionNode implements Serializable {
private final Integer functionId;
private final List<ExpressionNode> expressionNodes = new ArrayList<>();
private final String columnName;
private final TypeNode outputTypeNode;
- private final String upperBound;
+ private final org.apache.spark.sql.catalyst.expressions.Expression
upperBound;
- private final String lowerBound;
+ private final org.apache.spark.sql.catalyst.expressions.Expression
lowerBound;
private final String frameType;
private final boolean ignoreNulls;
+ private final List<Attribute> originalInputAttributes;
+
WindowFunctionNode(
Integer functionId,
List<ExpressionNode> expressionNodes,
String columnName,
TypeNode outputTypeNode,
- String upperBound,
- String lowerBound,
+ org.apache.spark.sql.catalyst.expressions.Expression upperBound,
+ org.apache.spark.sql.catalyst.expressions.Expression lowerBound,
Review Comment:
Can we import `org.apache.spark.sql.catalyst.expressions.Expression` and use
it here?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]