chenhao-db commented on code in PR #49609:
URL: https://github.com/apache/spark/pull/49609#discussion_r1926282951
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala:
##########
@@ -265,10 +258,16 @@ case class VariantGet(
override lazy val dataType: DataType = targetType.asNullable
+ private lazy val pathIsFoldable: Boolean = path.foldable
+
@transient private lazy val parsedPath = {
- val pathValue = path.eval().toString
- VariantPathParser.parse(pathValue).getOrElse {
- throw QueryExecutionErrors.invalidVariantGetPath(pathValue, prettyName)
+ if (pathIsFoldable) {
+ val pathValue = path.eval().toString
+ VariantPathParser.parse(pathValue).getOrElse {
Review Comment:
I suggest define a helper function for this piece of code. In this way, the
generated code doesn't have to define `ensureNonEmpty`.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala:
##########
@@ -288,30 +287,84 @@ case class VariantGet(
zoneId)
protected override def nullSafeEval(input: Any, path: Any): Any = {
- VariantGet.variantGet(input.asInstanceOf[VariantVal], parsedPath,
dataType, castArgs)
+ if (pathIsFoldable) {
+ VariantGet.variantGet(input.asInstanceOf[VariantVal], parsedPath,
dataType, castArgs)
+ } else {
+ val pathValue = path.toString
+ val parsedRowPath = VariantPathParser.parse(pathValue).getOrElse {
+ throw QueryExecutionErrors.invalidVariantGetPath(pathValue, prettyName)
+ }
+ VariantGet.variantGet(input.asInstanceOf[VariantVal], parsedRowPath,
dataType, castArgs)
+ }
}
protected override def doGenCode(ctx: CodegenContext, ev: ExprCode):
ExprCode = {
- val childCode = child.genCode(ctx)
- val tmp = ctx.freshVariable("tmp", classOf[Object])
- val parsedPathArg = ctx.addReferenceObj("parsedPath", parsedPath)
- val dataTypeArg = ctx.addReferenceObj("dataType", dataType)
- val castArgsArg = ctx.addReferenceObj("castArgs", castArgs)
- val code = code"""
- ${childCode.code}
- boolean ${ev.isNull} = ${childCode.isNull};
- ${CodeGenerator.javaType(dataType)} ${ev.value} =
${CodeGenerator.defaultValue(dataType)};
- if (!${ev.isNull}) {
- Object $tmp =
org.apache.spark.sql.catalyst.expressions.variant.VariantGet.variantGet(
- ${childCode.value}, $parsedPathArg, $dataTypeArg, $castArgsArg);
- if ($tmp == null) {
- ${ev.isNull} = true;
- } else {
- ${ev.value} = (${CodeGenerator.boxedType(dataType)})$tmp;
+ if (pathIsFoldable) {
+ val childCode = child.genCode(ctx)
+ val tmp = ctx.freshVariable("tmp", classOf[Object])
+ val parsedPathArg = ctx.addReferenceObj("parsedPath", parsedPath)
+ val dataTypeArg = ctx.addReferenceObj("dataType", dataType)
+ val castArgsArg = ctx.addReferenceObj("castArgs", castArgs)
+ val code = code"""
+ ${childCode.code}
+ boolean ${ev.isNull} = ${childCode.isNull};
+ ${CodeGenerator.javaType(dataType)} ${ev.value} =
${CodeGenerator.defaultValue(dataType)};
+ if (!${ev.isNull}) {
+ Object $tmp =
org.apache.spark.sql.catalyst.expressions.variant.VariantGet.variantGet(
+ ${childCode.value}, $parsedPathArg, $dataTypeArg, $castArgsArg);
+ if ($tmp == null) {
+ ${ev.isNull} = true;
+ } else {
+ ${ev.value} = (${CodeGenerator.boxedType(dataType)})$tmp;
+ }
}
- }
- """
- ev.copy(code = code)
+ """
+ ev.copy(code = code)
+ } else {
+ val tmp = ctx.freshVariable("tmp", classOf[Object])
+ val childCode = child.genCode(ctx)
+ val pathCode = path.genCode(ctx)
+ val dataTypeArg = ctx.addReferenceObj("dataType", dataType)
+ val castArgsArg = ctx.addReferenceObj("castArgs", castArgs)
+ val parsedPathVar = ctx.freshName("parsedPath")
+ val ensureNonEmpty = ctx.freshName("ensureNonEmpty")
+ val optionalParsedPathType =
+ CodeGenerator.typeName(classOf[Option[Array[VariantPathSegment]]])
+ val parsedPathType =
CodeGenerator.typeName(classOf[Array[VariantPathSegment]])
+ ctx.addNewFunction(ensureNonEmpty,
+ s"""
+ private void $ensureNonEmpty($optionalParsedPathType p, String
pathString)
+ throws Throwable {
+ if (p.isEmpty()) {
+ throw QueryExecutionErrors.invalidVariantGetPath(pathString,
"$prettyName");
+ }
+ }
+ """)
+ val code = code"""
+ ${childCode.code}
+ ${pathCode.code}
+ boolean ${ev.isNull} = ${childCode.isNull} || ${pathCode.isNull};
+ ${CodeGenerator.javaType(dataType)} ${ev.value} =
${CodeGenerator.defaultValue(dataType)};
+ if (!${ev.isNull}) {
+ $optionalParsedPathType $parsedPathVar = ($optionalParsedPathType)
+
org.apache.spark.sql.catalyst.expressions.variant.VariantPathParser.parse(
+ ${pathCode.value}.toString());
+ try {
Review Comment:
The try-catch is meaningless.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala:
##########
@@ -265,10 +258,16 @@ case class VariantGet(
override lazy val dataType: DataType = targetType.asNullable
+ private lazy val pathIsFoldable: Boolean = path.foldable
+
@transient private lazy val parsedPath = {
Review Comment:
For simplification, you can make `parsedPath` nullable. If it is null, it
means `path` is not foldable.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]