This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 8e73f7ca feat: Improve cast compatibility tests and docs (#379)
8e73f7ca is described below

commit 8e73f7cab5489d5918512b4ae206e39b96242320
Author: Andy Grove <[email protected]>
AuthorDate: Mon May 6 22:41:45 2024 -0600

    feat: Improve cast compatibility tests and docs (#379)
---
 docs/source/user-guide/compatibility-template.md   |  18 +-
 docs/source/user-guide/compatibility.md            | 203 +++++++++------------
 .../main/scala/org/apache/comet/GenerateDocs.scala |  37 ++--
 .../org/apache/comet/expressions/CometCast.scala   | 107 ++++++++---
 .../org/apache/comet/serde/QueryPlanSerde.scala    |   2 +-
 .../scala/org/apache/comet/CometCastSuite.scala    |  59 ++++--
 .../org/apache/comet/exec/CometExecSuite.scala     |   3 +-
 7 files changed, 251 insertions(+), 178 deletions(-)

diff --git a/docs/source/user-guide/compatibility-template.md 
b/docs/source/user-guide/compatibility-template.md
index deaca2d2..64f87135 100644
--- a/docs/source/user-guide/compatibility-template.md
+++ b/docs/source/user-guide/compatibility-template.md
@@ -44,7 +44,19 @@ Cast operations in Comet fall into three levels of support:
 - **Unsupported**: Comet does not provide a native version of this cast 
expression and the query stage will fall back to
   Spark.
 
-The following table shows the current cast operations supported by Comet. Any 
cast that does not appear in this
-table (such as those involving complex types and timestamp_ntz, for example) 
are not supported by Comet.
+### Compatible Casts
 
-<!--CAST_TABLE-->
+The following cast operations are generally compatible with Spark except for 
the differences noted here.
+
+<!--COMPAT_CAST_TABLE-->
+
+### Incompatible Casts
+
+The following cast operations are not compatible with Spark for all inputs and 
are disabled by default.
+
+<!--INCOMPAT_CAST_TABLE-->
+
+### Unsupported Casts
+
+Any cast not listed in the previous tables is currently unsupported. We are 
working on adding more. See the 
+[tracking issue](https://github.com/apache/datafusion-comet/issues/286) for 
more details.
diff --git a/docs/source/user-guide/compatibility.md 
b/docs/source/user-guide/compatibility.md
index 9a2478d3..57a4271f 100644
--- a/docs/source/user-guide/compatibility.md
+++ b/docs/source/user-guide/compatibility.md
@@ -38,122 +38,89 @@ Cast operations in Comet fall into three levels of support:
 
 - **Compatible**: The results match Apache Spark
 - **Incompatible**: The results may match Apache Spark for some inputs, but 
there are known issues where some inputs
-  will result in incorrect results or exceptions. The query stage will fall 
back to Spark by default. Setting
-  `spark.comet.cast.allowIncompatible=true` will allow all incompatible casts 
to run natively in Comet, but this is not
-  recommended for production use.
+will result in incorrect results or exceptions. The query stage will fall back 
to Spark by default. Setting
+`spark.comet.cast.allowIncompatible=true` will allow all incompatible casts to 
run natively in Comet, but this is not
+recommended for production use.
 - **Unsupported**: Comet does not provide a native version of this cast 
expression and the query stage will fall back to
-  Spark.
-
-The following table shows the current cast operations supported by Comet. Any 
cast that does not appear in this
-table (such as those involving complex types and timestamp_ntz, for example) 
are not supported by Comet.
-
-| From Type | To Type   | Compatible?  | Notes                               |
-| --------- | --------- | ------------ | ----------------------------------- |
-| boolean   | byte      | Compatible   |                                     |
-| boolean   | short     | Compatible   |                                     |
-| boolean   | integer   | Compatible   |                                     |
-| boolean   | long      | Compatible   |                                     |
-| boolean   | float     | Compatible   |                                     |
-| boolean   | double    | Compatible   |                                     |
-| boolean   | decimal   | Unsupported  |                                     |
-| boolean   | string    | Compatible   |                                     |
-| boolean   | timestamp | Unsupported  |                                     |
-| byte      | boolean   | Compatible   |                                     |
-| byte      | short     | Compatible   |                                     |
-| byte      | integer   | Compatible   |                                     |
-| byte      | long      | Compatible   |                                     |
-| byte      | float     | Compatible   |                                     |
-| byte      | double    | Compatible   |                                     |
-| byte      | decimal   | Compatible   |                                     |
-| byte      | string    | Compatible   |                                     |
-| byte      | binary    | Unsupported  |                                     |
-| byte      | timestamp | Unsupported  |                                     |
-| short     | boolean   | Compatible   |                                     |
-| short     | byte      | Compatible   |                                     |
-| short     | integer   | Compatible   |                                     |
-| short     | long      | Compatible   |                                     |
-| short     | float     | Compatible   |                                     |
-| short     | double    | Compatible   |                                     |
-| short     | decimal   | Compatible   |                                     |
-| short     | string    | Compatible   |                                     |
-| short     | binary    | Unsupported  |                                     |
-| short     | timestamp | Unsupported  |                                     |
-| integer   | boolean   | Compatible   |                                     |
-| integer   | byte      | Compatible   |                                     |
-| integer   | short     | Compatible   |                                     |
-| integer   | long      | Compatible   |                                     |
-| integer   | float     | Compatible   |                                     |
-| integer   | double    | Compatible   |                                     |
-| integer   | decimal   | Compatible   |                                     |
-| integer   | string    | Compatible   |                                     |
-| integer   | binary    | Unsupported  |                                     |
-| integer   | timestamp | Unsupported  |                                     |
-| long      | boolean   | Compatible   |                                     |
-| long      | byte      | Compatible   |                                     |
-| long      | short     | Compatible   |                                     |
-| long      | integer   | Compatible   |                                     |
-| long      | float     | Compatible   |                                     |
-| long      | double    | Compatible   |                                     |
-| long      | decimal   | Compatible   |                                     |
-| long      | string    | Compatible   |                                     |
-| long      | binary    | Unsupported  |                                     |
-| long      | timestamp | Unsupported  |                                     |
-| float     | boolean   | Compatible   |                                     |
-| float     | byte      | Unsupported  |                                     |
-| float     | short     | Unsupported  |                                     |
-| float     | integer   | Unsupported  |                                     |
-| float     | long      | Unsupported  |                                     |
-| float     | double    | Compatible   |                                     |
-| float     | decimal   | Unsupported  |                                     |
-| float     | string    | Incompatible |                                     |
-| float     | timestamp | Unsupported  |                                     |
-| double    | boolean   | Compatible   |                                     |
-| double    | byte      | Unsupported  |                                     |
-| double    | short     | Unsupported  |                                     |
-| double    | integer   | Unsupported  |                                     |
-| double    | long      | Unsupported  |                                     |
-| double    | float     | Compatible   |                                     |
-| double    | decimal   | Incompatible |                                     |
-| double    | string    | Incompatible |                                     |
-| double    | timestamp | Unsupported  |                                     |
-| decimal   | boolean   | Unsupported  |                                     |
-| decimal   | byte      | Unsupported  |                                     |
-| decimal   | short     | Unsupported  |                                     |
-| decimal   | integer   | Unsupported  |                                     |
-| decimal   | long      | Unsupported  |                                     |
-| decimal   | float     | Compatible   |                                     |
-| decimal   | double    | Compatible   |                                     |
-| decimal   | string    | Unsupported  |                                     |
-| decimal   | timestamp | Unsupported  |                                     |
-| string    | boolean   | Compatible   |                                     |
-| string    | byte      | Compatible   |                                     |
-| string    | short     | Compatible   |                                     |
-| string    | integer   | Compatible   |                                     |
-| string    | long      | Compatible   |                                     |
-| string    | float     | Unsupported  |                                     |
-| string    | double    | Unsupported  |                                     |
-| string    | decimal   | Unsupported  |                                     |
-| string    | binary    | Compatible   |                                     |
-| string    | date      | Unsupported  |                                     |
-| string    | timestamp | Incompatible | Not all valid formats are supported |
-| binary    | string    | Incompatible |                                     |
-| date      | boolean   | Unsupported  |                                     |
-| date      | byte      | Unsupported  |                                     |
-| date      | short     | Unsupported  |                                     |
-| date      | integer   | Unsupported  |                                     |
-| date      | long      | Unsupported  |                                     |
-| date      | float     | Unsupported  |                                     |
-| date      | double    | Unsupported  |                                     |
-| date      | decimal   | Unsupported  |                                     |
-| date      | string    | Compatible   |                                     |
-| date      | timestamp | Unsupported  |                                     |
-| timestamp | boolean   | Unsupported  |                                     |
-| timestamp | byte      | Unsupported  |                                     |
-| timestamp | short     | Unsupported  |                                     |
-| timestamp | integer   | Unsupported  |                                     |
-| timestamp | long      | Compatible   |                                     |
-| timestamp | float     | Unsupported  |                                     |
-| timestamp | double    | Unsupported  |                                     |
-| timestamp | decimal   | Unsupported  |                                     |
-| timestamp | string    | Compatible   |                                     |
-| timestamp | date      | Compatible   |                                     |
+Spark.
+
+### Compatible Casts
+
+The following cast operations are generally compatible with Spark except for 
the differences noted here.
+
+| From Type | To Type | Notes |
+|-|-|-|
+| boolean | byte |  |
+| boolean | short |  |
+| boolean | integer |  |
+| boolean | long |  |
+| boolean | float |  |
+| boolean | double |  |
+| boolean | string |  |
+| byte | boolean |  |
+| byte | short |  |
+| byte | integer |  |
+| byte | long |  |
+| byte | float |  |
+| byte | double |  |
+| byte | decimal |  |
+| byte | string |  |
+| short | boolean |  |
+| short | byte |  |
+| short | integer |  |
+| short | long |  |
+| short | float |  |
+| short | double |  |
+| short | decimal |  |
+| short | string |  |
+| integer | boolean |  |
+| integer | byte |  |
+| integer | short |  |
+| integer | long |  |
+| integer | float |  |
+| integer | double |  |
+| integer | string |  |
+| long | boolean |  |
+| long | byte |  |
+| long | short |  |
+| long | integer |  |
+| long | float |  |
+| long | double |  |
+| long | string |  |
+| float | boolean |  |
+| float | double |  |
+| float | string | There can be differences in precision. For example, the 
input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
+| double | boolean |  |
+| double | float |  |
+| double | string | There can be differences in precision. For example, the 
input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
+| decimal | float |  |
+| decimal | double |  |
+| string | boolean |  |
+| string | byte |  |
+| string | short |  |
+| string | integer |  |
+| string | long |  |
+| string | binary |  |
+| date | string |  |
+| timestamp | long |  |
+| timestamp | decimal |  |
+| timestamp | string |  |
+| timestamp | date |  |
+
+### Incompatible Casts
+
+The following cast operations are not compatible with Spark for all inputs and 
are disabled by default.
+
+| From Type | To Type | Notes |
+|-|-|-|
+| integer | decimal  | No overflow check |
+| long | decimal  | No overflow check |
+| float | decimal  | No overflow check |
+| double | decimal  | No overflow check |
+| string | timestamp  | Not all valid formats are supported |
+| binary | string  | Only works for binary data representing valid UTF-8 
strings |
+
+### Unsupported Casts
+
+Any cast not listed in the previous tables is currently unsupported. We are 
working on adding more. See the
+[tracking issue](https://github.com/apache/datafusion-comet/issues/286) for 
more details.
diff --git a/spark/src/main/scala/org/apache/comet/GenerateDocs.scala 
b/spark/src/main/scala/org/apache/comet/GenerateDocs.scala
index 8c414c7f..1e28efd5 100644
--- a/spark/src/main/scala/org/apache/comet/GenerateDocs.scala
+++ b/spark/src/main/scala/org/apache/comet/GenerateDocs.scala
@@ -25,7 +25,7 @@ import scala.io.Source
 
 import org.apache.spark.sql.catalyst.expressions.Cast
 
-import org.apache.comet.expressions.{CometCast, Compatible, Incompatible, 
Unsupported}
+import org.apache.comet.expressions.{CometCast, Compatible, Incompatible}
 
 /**
  * Utility for generating markdown documentation from the configs.
@@ -64,23 +64,36 @@ object GenerateDocs {
     val outputFilename = "docs/source/user-guide/compatibility.md"
     val w = new BufferedOutputStream(new FileOutputStream(outputFilename))
     for (line <- Source.fromFile(templateFilename).getLines()) {
-      if (line.trim == "<!--CAST_TABLE-->") {
-        w.write("| From Type | To Type | Compatible? | Notes |\n".getBytes)
-        w.write("|-|-|-|-|\n".getBytes)
+      if (line.trim == "<!--COMPAT_CAST_TABLE-->") {
+        w.write("| From Type | To Type | Notes |\n".getBytes)
+        w.write("|-|-|-|\n".getBytes)
         for (fromType <- CometCast.supportedTypes) {
           for (toType <- CometCast.supportedTypes) {
             if (Cast.canCast(fromType, toType) && fromType != toType) {
               val fromTypeName = fromType.typeName.replace("(10,2)", "")
               val toTypeName = toType.typeName.replace("(10,2)", "")
               CometCast.isSupported(fromType, toType, None, "LEGACY") match {
-                case Compatible =>
-                  w.write(s"| $fromTypeName | $toTypeName | Compatible | 
|\n".getBytes)
-                case Incompatible(Some(reason)) =>
-                  w.write(s"| $fromTypeName | $toTypeName | Incompatible | 
$reason |\n".getBytes)
-                case Incompatible(None) =>
-                  w.write(s"| $fromTypeName | $toTypeName | Incompatible | 
|\n".getBytes)
-                case Unsupported =>
-                  w.write(s"| $fromTypeName | $toTypeName | Unsupported | 
|\n".getBytes)
+                case Compatible(notes) =>
+                  val notesStr = notes.getOrElse("").trim
+                  w.write(s"| $fromTypeName | $toTypeName | $notesStr 
|\n".getBytes)
+                case _ =>
+              }
+            }
+          }
+        }
+      } else if (line.trim == "<!--INCOMPAT_CAST_TABLE-->") {
+        w.write("| From Type | To Type | Notes |\n".getBytes)
+        w.write("|-|-|-|\n".getBytes)
+        for (fromType <- CometCast.supportedTypes) {
+          for (toType <- CometCast.supportedTypes) {
+            if (Cast.canCast(fromType, toType) && fromType != toType) {
+              val fromTypeName = fromType.typeName.replace("(10,2)", "")
+              val toTypeName = toType.typeName.replace("(10,2)", "")
+              CometCast.isSupported(fromType, toType, None, "LEGACY") match {
+                case Incompatible(notes) =>
+                  val notesStr = notes.getOrElse("").trim
+                  w.write(s"| $fromTypeName | $toTypeName  | $notesStr 
|\n".getBytes)
+                case _ =>
               }
             }
           }
diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala 
b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
index 5641c94a..57e07b8c 100644
--- a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
+++ b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
@@ -24,10 +24,10 @@ import org.apache.spark.sql.types.{DataType, DataTypes, 
DecimalType}
 sealed trait SupportLevel
 
 /** We support this feature with full compatibility with Spark */
-object Compatible extends SupportLevel
+case class Compatible(notes: Option[String] = None) extends SupportLevel
 
 /** We support this feature but results can be different from Spark */
-case class Incompatible(reason: Option[String] = None) extends SupportLevel
+case class Incompatible(notes: Option[String] = None) extends SupportLevel
 
 /** We do not support this feature */
 object Unsupported extends SupportLevel
@@ -58,7 +58,7 @@ object CometCast {
       evalMode: String): SupportLevel = {
 
     if (fromType == toType) {
-      return Compatible
+      return Compatible()
     }
 
     (fromType, toType) match {
@@ -83,10 +83,14 @@ object CometCast {
         canCastFromDecimal(toType)
       case (DataTypes.BooleanType, _) =>
         canCastFromBoolean(toType)
-      case (
-            DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType | 
DataTypes.LongType,
-            _) =>
+      case (DataTypes.ByteType, _) =>
+        canCastFromByte(toType)
+      case (DataTypes.ShortType, _) =>
+        canCastFromShort(toType)
+      case (DataTypes.IntegerType, _) =>
         canCastFromInt(toType)
+      case (DataTypes.LongType, _) =>
+        canCastFromLong(toType)
       case (DataTypes.FloatType, _) =>
         canCastFromFloat(toType)
       case (DataTypes.DoubleType, _) =>
@@ -101,12 +105,12 @@ object CometCast {
       evalMode: String): SupportLevel = {
     toType match {
       case DataTypes.BooleanType =>
-        Compatible
+        Compatible()
       case DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType |
           DataTypes.LongType =>
-        Compatible
+        Compatible()
       case DataTypes.BinaryType =>
-        Compatible
+        Compatible()
       case DataTypes.FloatType | DataTypes.DoubleType =>
         // https://github.com/apache/datafusion-comet/issues/326
         Unsupported
@@ -130,18 +134,21 @@ object CometCast {
 
   private def canCastToString(fromType: DataType): SupportLevel = {
     fromType match {
-      case DataTypes.BooleanType => Compatible
+      case DataTypes.BooleanType => Compatible()
       case DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType |
           DataTypes.LongType =>
-        Compatible
-      case DataTypes.DateType => Compatible
-      case DataTypes.TimestampType => Compatible
+        Compatible()
+      case DataTypes.DateType => Compatible()
+      case DataTypes.TimestampType => Compatible()
       case DataTypes.FloatType | DataTypes.DoubleType =>
-        // https://github.com/apache/datafusion-comet/issues/326
-        Incompatible()
+        Compatible(
+          Some(
+            "There can be differences in precision. " +
+              "For example, the input \"1.4E-45\" will produce 1.0E-45 " +
+              "instead of 1.4E-45"))
       case DataTypes.BinaryType =>
         // https://github.com/apache/datafusion-comet/issues/377
-        Incompatible()
+        Incompatible(Some("Only works for binary data representing valid UTF-8 
strings"))
       case _ => Unsupported
     }
   }
@@ -155,9 +162,10 @@ object CometCast {
         Unsupported
       case DataTypes.LongType =>
         // https://github.com/apache/datafusion-comet/issues/352
-        Compatible
-      case DataTypes.StringType => Compatible
-      case DataTypes.DateType => Compatible
+        Compatible()
+      case DataTypes.StringType => Compatible()
+      case DataTypes.DateType => Compatible()
+      case _: DecimalType => Compatible()
       case _ => Unsupported
     }
   }
@@ -165,31 +173,72 @@ object CometCast {
   private def canCastFromBoolean(toType: DataType): SupportLevel = toType 
match {
     case DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType | 
DataTypes.LongType |
         DataTypes.FloatType | DataTypes.DoubleType =>
-      Compatible
+      Compatible()
     case _ => Unsupported
   }
 
+  private def canCastFromByte(toType: DataType): SupportLevel = toType match {
+    case DataTypes.BooleanType =>
+      Compatible()
+    case DataTypes.ShortType | DataTypes.IntegerType | DataTypes.LongType =>
+      Compatible()
+    case DataTypes.FloatType | DataTypes.DoubleType | _: DecimalType =>
+      Compatible()
+    case _ =>
+      Unsupported
+  }
+
+  private def canCastFromShort(toType: DataType): SupportLevel = toType match {
+    case DataTypes.BooleanType =>
+      Compatible()
+    case DataTypes.ByteType | DataTypes.IntegerType | DataTypes.LongType =>
+      Compatible()
+    case DataTypes.FloatType | DataTypes.DoubleType | _: DecimalType =>
+      Compatible()
+    case _ =>
+      Unsupported
+  }
+
   private def canCastFromInt(toType: DataType): SupportLevel = toType match {
-    case DataTypes.BooleanType | DataTypes.ByteType | DataTypes.ShortType |
-        DataTypes.IntegerType | DataTypes.LongType | DataTypes.FloatType | 
DataTypes.DoubleType |
-        _: DecimalType =>
-      Compatible
-    case _ => Unsupported
+    case DataTypes.BooleanType =>
+      Compatible()
+    case DataTypes.ByteType | DataTypes.ShortType | DataTypes.LongType =>
+      Compatible()
+    case DataTypes.FloatType | DataTypes.DoubleType =>
+      Compatible()
+    case _: DecimalType =>
+      Incompatible(Some("No overflow check"))
+    case _ =>
+      Unsupported
+  }
+
+  private def canCastFromLong(toType: DataType): SupportLevel = toType match {
+    case DataTypes.BooleanType =>
+      Compatible()
+    case DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType =>
+      Compatible()
+    case DataTypes.FloatType | DataTypes.DoubleType =>
+      Compatible()
+    case _: DecimalType =>
+      Incompatible(Some("No overflow check"))
+    case _ =>
+      Unsupported
   }
 
   private def canCastFromFloat(toType: DataType): SupportLevel = toType match {
-    case DataTypes.BooleanType | DataTypes.DoubleType => Compatible
+    case DataTypes.BooleanType | DataTypes.DoubleType => Compatible()
+    case _: DecimalType => Incompatible(Some("No overflow check"))
     case _ => Unsupported
   }
 
   private def canCastFromDouble(toType: DataType): SupportLevel = toType match 
{
-    case DataTypes.BooleanType | DataTypes.FloatType => Compatible
-    case _: DecimalType => Incompatible()
+    case DataTypes.BooleanType | DataTypes.FloatType => Compatible()
+    case _: DecimalType => Incompatible(Some("No overflow check"))
     case _ => Unsupported
   }
 
   private def canCastFromDecimal(toType: DataType): SupportLevel = toType 
match {
-    case DataTypes.FloatType | DataTypes.DoubleType => Compatible
+    case DataTypes.FloatType | DataTypes.DoubleType => Compatible()
     case _ => Unsupported
   }
 
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala 
b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index 1e8877c8..86e9f10b 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -636,7 +636,7 @@ object QueryPlanSerde extends Logging with 
ShimQueryPlanSerde {
                 reason.map(str => s" ($str)").getOrElse("")
 
             castSupport match {
-              case Compatible =>
+              case Compatible(_) =>
                 castToProto(timeZoneId, dt, childExpr, evalModeStr)
               case Incompatible(reason) =>
                 if (CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.get()) {
diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala 
b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index 483301e0..1d698a49 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, DataTypes}
 
-import org.apache.comet.expressions.CometCast
+import org.apache.comet.expressions.{CometCast, Compatible}
 
 class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
   import testImplicits._
@@ -66,6 +66,23 @@ class CometCastSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
               }
             } else if (!testExists) {
               fail(s"Missing test: $expectedTestName")
+            } else {
+              val testIgnored =
+                tags.get(expectedTestName).exists(s => 
s.contains("org.scalatest.Ignore"))
+              CometCast.isSupported(fromType, toType, None, "LEGACY") match {
+                case Compatible(_) =>
+                  if (testIgnored) {
+                    fail(
+                      s"Cast from $fromType to $toType is reported as 
compatible " +
+                        "with Spark but the test is ignored")
+                  }
+                case _ =>
+                  if (!testIgnored) {
+                    fail(
+                      s"We claim that cast from $fromType to $toType is not 
compatible " +
+                        "with Spark but the test is not ignored")
+                  }
+              }
             }
           } else if (testExists) {
             fail(s"Found test for cast that Spark does not support: 
$expectedTestName")
@@ -347,7 +364,7 @@ class CometCastSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
       Short.MaxValue.toFloat,
       0.0f) ++
       Range(0, dataSize).map(_ => r.nextFloat())
-    withNulls(values).toDF("a")
+    castTest(withNulls(values).toDF("a"), DataTypes.StringType)
   }
 
   ignore("cast FloatType to TimestampType") {
@@ -401,7 +418,7 @@ class CometCastSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
       Double.NegativeInfinity,
       0.0d) ++
       Range(0, dataSize).map(_ => r.nextDouble())
-    withNulls(values).toDF("a")
+    castTest(withNulls(values).toDF("a"), DataTypes.StringType)
   }
 
   ignore("cast DoubleType to TimestampType") {
@@ -559,6 +576,14 @@ class CometCastSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
     }
   }
 
+  ignore("cast StringType to TimestampType") {
+    // https://github.com/apache/datafusion-comet/issues/328
+    withSQLConf((CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key, "true")) {
+      val values = Seq("2020-01-01T12:34:56.123456", "T2") ++ 
generateStrings(timestampPattern, 8)
+      castTest(values.toDF("a"), DataTypes.TimestampType)
+    }
+  }
+
   test("cast StringType to TimestampType disabled for non-UTC timezone") {
     withSQLConf((SQLConf.SESSION_LOCAL_TIMEZONE.key, "America/Denver")) {
       val values = Seq("2020-01-01T12:34:56.123456", "T2").toDF("a")
@@ -569,15 +594,7 @@ class CometCastSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
     }
   }
 
-  ignore("cast StringType to TimestampType (fuzz test)") {
-    // https://github.com/apache/datafusion-comet/issues/328
-    withSQLConf((CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key, "true")) {
-      val values = Seq("2020-01-01T12:34:56.123456", "T2") ++ 
generateStrings(timestampPattern, 8)
-      castTest(values.toDF("a"), DataTypes.TimestampType)
-    }
-  }
-
-  test("cast StringType to TimestampType") {
+  test("cast StringType to TimestampType - subset of supported values") {
     withSQLConf(
       SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC",
       CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key -> "true") {
@@ -606,8 +623,12 @@ class CometCastSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
   // CAST from BinaryType
 
   ignore("cast BinaryType to StringType") {
-    // TODO implement this
     // https://github.com/apache/datafusion-comet/issues/377
+    castTest(generateBinary(), DataTypes.StringType)
+  }
+
+  test("cast BinaryType to StringType - valid UTF-8 inputs") {
+    castTest(generateStrings(numericPattern, 8).toDF("a"), 
DataTypes.StringType)
   }
 
   // CAST from DateType
@@ -795,7 +816,7 @@ class CometCastSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
       Seq(
         "2024-01-01T12:34:56.123456",
         "2024-01-01T01:00:00Z",
-        "2024-12-31T01:00:00-02:00",
+        "9999-12-31T01:00:00-02:00",
         "2024-12-31T01:00:00+02:00")
     withNulls(values)
       .toDF("str")
@@ -814,6 +835,16 @@ class CometCastSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
     Range(0, dataSize).map(_ => generateString(r, chars, maxLen))
   }
 
+  private def generateBinary(): DataFrame = {
+    val r = new Random(0)
+    val bytes = new Array[Byte](8)
+    val values: Seq[Array[Byte]] = Range(0, dataSize).map(_ => {
+      r.nextBytes(bytes)
+      bytes.clone()
+    })
+    values.toDF("a")
+  }
+
   private def withNulls[T](values: Seq[T]): Seq[Option[T]] = {
     values.map(v => Some(v)) ++ Seq(None)
   }
diff --git a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala 
b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
index 47c2c696..8f022988 100644
--- a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
@@ -253,7 +253,8 @@ class CometExecSuite extends CometTestBase {
     dataTypes.map { subqueryType =>
       withSQLConf(
         CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true",
-        CometConf.COMET_COLUMNAR_SHUFFLE_ENABLED.key -> "true") {
+        CometConf.COMET_COLUMNAR_SHUFFLE_ENABLED.key -> "true",
+        CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key -> "true") {
         withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl") {
           var column1 = s"CAST(max(_1) AS $subqueryType)"
           if (subqueryType == "BINARY") {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to