gengliangwang commented on code in PR #54394:
URL: https://github.com/apache/spark/pull/54394#discussion_r3408213775
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala:
##########
@@ -333,6 +334,18 @@ class V2ExpressionBuilder(e: Expression, isPredicate:
Boolean = false) extends L
case _ =>
None
}
+ case v: VariantGet if v.path.foldable =>
+ (Option(v.path.eval()).map(_.toString), generateExpression(v.child))
match {
+ case (Some(path), Some(colRef: FieldReference)) =>
+ val vg = new V2VariantGet(colRef, path, v.targetType, v.failOnError,
+ v.timeZoneId.orNull)
Review Comment:
`v.timeZoneId.orNull` is only ever exercised with `None` here: every test
builds the catalyst `VariantGet` with the default `timeZoneId = None`, and the
two `toString` tests construct `V2VariantGet` directly. A regression that
dropped this to a hardcoded `null` would pass the whole suite. Worth a builder
test that sets a resolved timezone and asserts it reaches
`V2VariantGet.timeZoneId()` — and, since the `Some(colRef: FieldReference)`
match now admits nested columns, a struct-nested variant column case would pin
that path too.
##########
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala:
##########
@@ -818,6 +819,160 @@ class DataSourceV2StrategySuite extends
SharedSparkSession {
FieldReference("cdouble"))))
}
+ test("VariantGet translates to V2VariantGet connector expression") {
+ val ref = AttributeReference("v", VariantType)()
+ val path = Literal.create("$.city", StringType)
+ val expr = VariantGet(ref, path, StringType, failOnError = true)
+ val gt = GreaterThan(expr, Literal.create("NYC", StringType))
+ val result = new V2ExpressionBuilder(gt, isPredicate = true).build()
+ result match {
+ case Some(v2pred: Predicate) if v2pred.name() == ">" =>
+ v2pred.children()(0) match {
+ case vg: V2VariantGet =>
+ assert(vg.path() == "$.city")
+ assert(vg.targetType() == StringType)
+ assert(vg.failOnError())
+ assert(vg.timeZoneId() == null)
+ assert(vg.children().length == 1)
+ assert(vg.children()(0) == FieldReference("v"))
+ case other => fail(s"expected V2VariantGet, got
${other.getClass.getName}")
+ }
+ case _ => fail("expected predicate with name '>'")
+ }
+ }
+
+ test("try_variant_get translates with failOnError=false") {
+ val ref = AttributeReference("v", VariantType)()
+ val path = Literal.create("$.city", StringType)
+ val expr = VariantGet(ref, path, StringType, failOnError = false)
+ val gt = GreaterThan(expr, Literal.create("NYC", StringType))
+ val result = new V2ExpressionBuilder(gt, isPredicate = true).build()
+ result match {
+ case Some(v2pred: Predicate) if v2pred.name() == ">" =>
+ v2pred.children()(0) match {
+ case vg: V2VariantGet =>
+ assert(!vg.failOnError())
+ assert(vg.path() == "$.city")
+ case other => fail(s"expected V2VariantGet, got
${other.getClass.getName}")
+ }
+ case _ => fail("expected predicate with name '>'")
+ }
+ }
+
+ test("VariantGet predicate is translated by translateFilterV2") {
+ val ref = AttributeReference("v", VariantType)()
+ val path = Literal.create("$.city", StringType)
+ val expr = VariantGet(ref, path, StringType, failOnError = true)
+ val gt = GreaterThan(expr, Literal.create("NYC", StringType))
+ val result = DataSourceV2Strategy.translateFilterV2(gt)
+ assert(result.isDefined)
+ result.get.children()(0) match {
+ case vg: V2VariantGet =>
+ assert(vg.path() == "$.city")
+ assert(vg.targetType() == StringType)
+ assert(vg.failOnError())
+ case other =>
+ fail(s"expected V2VariantGet in translated predicate, got " +
+ s"${other.getClass.getName}")
+ }
+ }
+
+ test("VariantGet with integer targetType preserves type") {
+ val ref = AttributeReference("v", VariantType)()
+ val path = Literal.create("$.count", StringType)
+ val expr = VariantGet(ref, path, IntegerType, failOnError = true)
+ val gt = GreaterThan(expr, Literal(100))
+ val result = new V2ExpressionBuilder(gt, isPredicate = true).build()
+ assert(result.isDefined)
+ result.get.children()(0) match {
+ case vg: V2VariantGet =>
+ assert(vg.path() == "$.count")
+ assert(vg.targetType() == IntegerType)
+ case other => fail(s"expected V2VariantGet, got
${other.getClass.getName}")
+ }
+ }
+
+ test("VariantGet with non-foldable path returns None") {
+ val ref = AttributeReference("v", VariantType)()
+ val s = AttributeReference("s", StringType)()
+ val expr = VariantGet(ref, s, StringType, failOnError = true)
+ val result = new V2ExpressionBuilder(expr).build()
+ assert(result.isEmpty, "non-foldable path should not translate")
+ }
+
+ test("VariantGet with foldable null path returns None") {
+ val ref = AttributeReference("v", VariantType)()
+ val nullPath = Literal.create(null, StringType)
+ val expr = VariantGet(ref, nullPath, StringType, failOnError = true)
+ val result = new V2ExpressionBuilder(expr).build()
+ assert(result.isEmpty, "null path should not translate (graceful, no NPE)")
+ }
+
+ test("VariantGet with non-column child returns None") {
+ val lit = Literal("v")
+ val path = Literal.create("$.a", StringType)
+ val expr = VariantGet(lit, path, StringType, failOnError = true)
+ val result = new V2ExpressionBuilder(expr).build()
+ assert(result.isEmpty, "non-column child should not translate")
+ }
+
+ test("VariantGet boolean targetType wraps in BOOLEAN_EXPRESSION predicate
when isPredicate") {
+ val ref = AttributeReference("v", VariantType)()
+ val path = Literal.create("$.flag", StringType)
+ val expr = VariantGet(ref, path, BooleanType, failOnError = true)
+ val result = new V2ExpressionBuilder(expr, isPredicate = true).build()
+ result match {
+ case Some(p: Predicate) if p.name() == "BOOLEAN_EXPRESSION" =>
+ p.children()(0) match {
+ case vg: V2VariantGet =>
+ assert(vg.targetType() == BooleanType)
+ case other =>
+ fail(s"expected V2VariantGet inside BOOLEAN_EXPRESSION, got " +
+ s"${other.getClass.getName}")
+ }
+ case _ => fail(s"expected BOOLEAN_EXPRESSION predicate, got $result")
+ }
+ }
+
+ test("VariantGet boolean targetType does not crash under Or (isPredicate
path)") {
+ val ref = AttributeReference("v", VariantType)()
+ val path = Literal.create("$.flag", StringType)
+ val boolExpr = VariantGet(ref, path, BooleanType, failOnError = true)
+ val x = AttributeReference("x", IntegerType)()
+ val orExpr = Or(boolExpr, GreaterThan(x, Literal(0)))
+ // Without the fix, And/Or assert V2Predicate and crash with
AssertionError.
+ // With the fix, boolExpr is wrapped in BOOLEAN_EXPRESSION and Or
translates.
Review Comment:
This comment is written against the PR's history — once merged there's no
"the fix" to contrast with. Reframe to the current invariant:
```suggestion
// A boolean-typed VariantGet in predicate position must translate to a
V2Predicate, or the
// enclosing And/Or's `isInstanceOf[V2Predicate]` assert crashes
planning; the BOOLEAN_EXPRESSION
// wrapper provides that.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]