chenhao-db commented on code in PR #45708:
URL: https://github.com/apache/spark/pull/45708#discussion_r1546623105


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala:
##########
@@ -53,3 +66,320 @@ case class ParseJson(child: Expression)
   override protected def withNewChildInternal(newChild: Expression): ParseJson 
=
     copy(child = newChild)
 }
+
+object VariantPathParser extends RegexParsers {
+  // A path segment in the `VariantGet` expression represents either an object 
key access or an
+  // array index access.
+  type PathSegment = Either[String, Int]
+
+  private def root: Parser[Char] = '$'
+
+  // Parse index segment like `[123]`.
+  private def index: Parser[PathSegment] =
+    for {
+      index <- '[' ~> "\\d+".r <~ ']'
+    } yield {
+      scala.util.Right(index.toInt)
+    }
+
+  // Parse key segment like `.name`, `['name']`, or `["name"]`.
+  private def key: Parser[PathSegment] =
+    for {
+      key <- '.' ~> "[^\\.\\[]+".r | "['" ~> "[^\\'\\?]+".r <~ "']" |
+        "[\"" ~> "[^\\\"\\?]+".r <~ "\"]"
+    } yield {
+      scala.util.Left(key)
+    }
+
+  private val parser: Parser[List[PathSegment]] = phrase(root ~> rep(key | 
index))
+
+  def parse(str: String): Option[Array[PathSegment]] = {
+    this.parseAll(parser, str) match {
+      case Success(result, _) => Some(result.toArray)
+      case _ => None
+    }
+  }
+}
+
+/**
+ * The implementation for `variant_get` and `try_variant_get` expressions. 
Extracts a sub-variant
+ * value according to a path and cast it into a concrete data type.
+ * @param child The source variant value to extract from.
+ * @param path A literal path expression. It has the same format as the JSON 
path.
+ * @param targetType The target data type to cast into. Any non-nullable 
annotations are ignored.
+ * @param failOnError Controls whether the expression should throw an 
exception or return null if
+ *                    the cast fails.
+ * @param timeZoneId A string identifier of a time zone. It is required by 
timestamp-related casts.
+ */
+case class VariantGet(
+    child: Expression,
+    path: Expression,
+    targetType: DataType,
+    failOnError: Boolean,
+    timeZoneId: Option[String] = None)
+    extends BinaryExpression
+    with TimeZoneAwareExpression
+    with NullIntolerant
+    with ExpectsInputTypes
+    with CodegenFallback

Review Comment:
   Done. The current tests should be enough because `checkEvaluation` checks 
the codegen path.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to