This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 8e73f7ca feat: Improve cast compatibility tests and docs (#379)
8e73f7ca is described below
commit 8e73f7cab5489d5918512b4ae206e39b96242320
Author: Andy Grove <[email protected]>
AuthorDate: Mon May 6 22:41:45 2024 -0600
feat: Improve cast compatibility tests and docs (#379)
---
docs/source/user-guide/compatibility-template.md | 18 +-
docs/source/user-guide/compatibility.md | 203 +++++++++------------
.../main/scala/org/apache/comet/GenerateDocs.scala | 37 ++--
.../org/apache/comet/expressions/CometCast.scala | 107 ++++++++---
.../org/apache/comet/serde/QueryPlanSerde.scala | 2 +-
.../scala/org/apache/comet/CometCastSuite.scala | 59 ++++--
.../org/apache/comet/exec/CometExecSuite.scala | 3 +-
7 files changed, 251 insertions(+), 178 deletions(-)
diff --git a/docs/source/user-guide/compatibility-template.md
b/docs/source/user-guide/compatibility-template.md
index deaca2d2..64f87135 100644
--- a/docs/source/user-guide/compatibility-template.md
+++ b/docs/source/user-guide/compatibility-template.md
@@ -44,7 +44,19 @@ Cast operations in Comet fall into three levels of support:
- **Unsupported**: Comet does not provide a native version of this cast
expression and the query stage will fall back to
Spark.
-The following table shows the current cast operations supported by Comet. Any
cast that does not appear in this
-table (such as those involving complex types and timestamp_ntz, for example)
are not supported by Comet.
+### Compatible Casts
-<!--CAST_TABLE-->
+The following cast operations are generally compatible with Spark except for
the differences noted here.
+
+<!--COMPAT_CAST_TABLE-->
+
+### Incompatible Casts
+
+The following cast operations are not compatible with Spark for all inputs and
are disabled by default.
+
+<!--INCOMPAT_CAST_TABLE-->
+
+### Unsupported Casts
+
+Any cast not listed in the previous tables is currently unsupported. We are
working on adding more. See the
+[tracking issue](https://github.com/apache/datafusion-comet/issues/286) for
more details.
diff --git a/docs/source/user-guide/compatibility.md
b/docs/source/user-guide/compatibility.md
index 9a2478d3..57a4271f 100644
--- a/docs/source/user-guide/compatibility.md
+++ b/docs/source/user-guide/compatibility.md
@@ -38,122 +38,89 @@ Cast operations in Comet fall into three levels of support:
- **Compatible**: The results match Apache Spark
- **Incompatible**: The results may match Apache Spark for some inputs, but
there are known issues where some inputs
- will result in incorrect results or exceptions. The query stage will fall
back to Spark by default. Setting
- `spark.comet.cast.allowIncompatible=true` will allow all incompatible casts
to run natively in Comet, but this is not
- recommended for production use.
+will result in incorrect results or exceptions. The query stage will fall back
to Spark by default. Setting
+`spark.comet.cast.allowIncompatible=true` will allow all incompatible casts to
run natively in Comet, but this is not
+recommended for production use.
- **Unsupported**: Comet does not provide a native version of this cast
expression and the query stage will fall back to
- Spark.
-
-The following table shows the current cast operations supported by Comet. Any
cast that does not appear in this
-table (such as those involving complex types and timestamp_ntz, for example)
are not supported by Comet.
-
-| From Type | To Type | Compatible? | Notes |
-| --------- | --------- | ------------ | ----------------------------------- |
-| boolean | byte | Compatible | |
-| boolean | short | Compatible | |
-| boolean | integer | Compatible | |
-| boolean | long | Compatible | |
-| boolean | float | Compatible | |
-| boolean | double | Compatible | |
-| boolean | decimal | Unsupported | |
-| boolean | string | Compatible | |
-| boolean | timestamp | Unsupported | |
-| byte | boolean | Compatible | |
-| byte | short | Compatible | |
-| byte | integer | Compatible | |
-| byte | long | Compatible | |
-| byte | float | Compatible | |
-| byte | double | Compatible | |
-| byte | decimal | Compatible | |
-| byte | string | Compatible | |
-| byte | binary | Unsupported | |
-| byte | timestamp | Unsupported | |
-| short | boolean | Compatible | |
-| short | byte | Compatible | |
-| short | integer | Compatible | |
-| short | long | Compatible | |
-| short | float | Compatible | |
-| short | double | Compatible | |
-| short | decimal | Compatible | |
-| short | string | Compatible | |
-| short | binary | Unsupported | |
-| short | timestamp | Unsupported | |
-| integer | boolean | Compatible | |
-| integer | byte | Compatible | |
-| integer | short | Compatible | |
-| integer | long | Compatible | |
-| integer | float | Compatible | |
-| integer | double | Compatible | |
-| integer | decimal | Compatible | |
-| integer | string | Compatible | |
-| integer | binary | Unsupported | |
-| integer | timestamp | Unsupported | |
-| long | boolean | Compatible | |
-| long | byte | Compatible | |
-| long | short | Compatible | |
-| long | integer | Compatible | |
-| long | float | Compatible | |
-| long | double | Compatible | |
-| long | decimal | Compatible | |
-| long | string | Compatible | |
-| long | binary | Unsupported | |
-| long | timestamp | Unsupported | |
-| float | boolean | Compatible | |
-| float | byte | Unsupported | |
-| float | short | Unsupported | |
-| float | integer | Unsupported | |
-| float | long | Unsupported | |
-| float | double | Compatible | |
-| float | decimal | Unsupported | |
-| float | string | Incompatible | |
-| float | timestamp | Unsupported | |
-| double | boolean | Compatible | |
-| double | byte | Unsupported | |
-| double | short | Unsupported | |
-| double | integer | Unsupported | |
-| double | long | Unsupported | |
-| double | float | Compatible | |
-| double | decimal | Incompatible | |
-| double | string | Incompatible | |
-| double | timestamp | Unsupported | |
-| decimal | boolean | Unsupported | |
-| decimal | byte | Unsupported | |
-| decimal | short | Unsupported | |
-| decimal | integer | Unsupported | |
-| decimal | long | Unsupported | |
-| decimal | float | Compatible | |
-| decimal | double | Compatible | |
-| decimal | string | Unsupported | |
-| decimal | timestamp | Unsupported | |
-| string | boolean | Compatible | |
-| string | byte | Compatible | |
-| string | short | Compatible | |
-| string | integer | Compatible | |
-| string | long | Compatible | |
-| string | float | Unsupported | |
-| string | double | Unsupported | |
-| string | decimal | Unsupported | |
-| string | binary | Compatible | |
-| string | date | Unsupported | |
-| string | timestamp | Incompatible | Not all valid formats are supported |
-| binary | string | Incompatible | |
-| date | boolean | Unsupported | |
-| date | byte | Unsupported | |
-| date | short | Unsupported | |
-| date | integer | Unsupported | |
-| date | long | Unsupported | |
-| date | float | Unsupported | |
-| date | double | Unsupported | |
-| date | decimal | Unsupported | |
-| date | string | Compatible | |
-| date | timestamp | Unsupported | |
-| timestamp | boolean | Unsupported | |
-| timestamp | byte | Unsupported | |
-| timestamp | short | Unsupported | |
-| timestamp | integer | Unsupported | |
-| timestamp | long | Compatible | |
-| timestamp | float | Unsupported | |
-| timestamp | double | Unsupported | |
-| timestamp | decimal | Unsupported | |
-| timestamp | string | Compatible | |
-| timestamp | date | Compatible | |
+Spark.
+
+### Compatible Casts
+
+The following cast operations are generally compatible with Spark except for
the differences noted here.
+
+| From Type | To Type | Notes |
+|-|-|-|
+| boolean | byte | |
+| boolean | short | |
+| boolean | integer | |
+| boolean | long | |
+| boolean | float | |
+| boolean | double | |
+| boolean | string | |
+| byte | boolean | |
+| byte | short | |
+| byte | integer | |
+| byte | long | |
+| byte | float | |
+| byte | double | |
+| byte | decimal | |
+| byte | string | |
+| short | boolean | |
+| short | byte | |
+| short | integer | |
+| short | long | |
+| short | float | |
+| short | double | |
+| short | decimal | |
+| short | string | |
+| integer | boolean | |
+| integer | byte | |
+| integer | short | |
+| integer | long | |
+| integer | float | |
+| integer | double | |
+| integer | string | |
+| long | boolean | |
+| long | byte | |
+| long | short | |
+| long | integer | |
+| long | float | |
+| long | double | |
+| long | string | |
+| float | boolean | |
+| float | double | |
+| float | string | There can be differences in precision. For example, the
input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
+| double | boolean | |
+| double | float | |
+| double | string | There can be differences in precision. For example, the
input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
+| decimal | float | |
+| decimal | double | |
+| string | boolean | |
+| string | byte | |
+| string | short | |
+| string | integer | |
+| string | long | |
+| string | binary | |
+| date | string | |
+| timestamp | long | |
+| timestamp | decimal | |
+| timestamp | string | |
+| timestamp | date | |
+
+### Incompatible Casts
+
+The following cast operations are not compatible with Spark for all inputs and
are disabled by default.
+
+| From Type | To Type | Notes |
+|-|-|-|
+| integer | decimal | No overflow check |
+| long | decimal | No overflow check |
+| float | decimal | No overflow check |
+| double | decimal | No overflow check |
+| string | timestamp | Not all valid formats are supported |
+| binary | string | Only works for binary data representing valid UTF-8
strings |
+
+### Unsupported Casts
+
+Any cast not listed in the previous tables is currently unsupported. We are
working on adding more. See the
+[tracking issue](https://github.com/apache/datafusion-comet/issues/286) for
more details.
diff --git a/spark/src/main/scala/org/apache/comet/GenerateDocs.scala
b/spark/src/main/scala/org/apache/comet/GenerateDocs.scala
index 8c414c7f..1e28efd5 100644
--- a/spark/src/main/scala/org/apache/comet/GenerateDocs.scala
+++ b/spark/src/main/scala/org/apache/comet/GenerateDocs.scala
@@ -25,7 +25,7 @@ import scala.io.Source
import org.apache.spark.sql.catalyst.expressions.Cast
-import org.apache.comet.expressions.{CometCast, Compatible, Incompatible,
Unsupported}
+import org.apache.comet.expressions.{CometCast, Compatible, Incompatible}
/**
* Utility for generating markdown documentation from the configs.
@@ -64,23 +64,36 @@ object GenerateDocs {
val outputFilename = "docs/source/user-guide/compatibility.md"
val w = new BufferedOutputStream(new FileOutputStream(outputFilename))
for (line <- Source.fromFile(templateFilename).getLines()) {
- if (line.trim == "<!--CAST_TABLE-->") {
- w.write("| From Type | To Type | Compatible? | Notes |\n".getBytes)
- w.write("|-|-|-|-|\n".getBytes)
+ if (line.trim == "<!--COMPAT_CAST_TABLE-->") {
+ w.write("| From Type | To Type | Notes |\n".getBytes)
+ w.write("|-|-|-|\n".getBytes)
for (fromType <- CometCast.supportedTypes) {
for (toType <- CometCast.supportedTypes) {
if (Cast.canCast(fromType, toType) && fromType != toType) {
val fromTypeName = fromType.typeName.replace("(10,2)", "")
val toTypeName = toType.typeName.replace("(10,2)", "")
CometCast.isSupported(fromType, toType, None, "LEGACY") match {
- case Compatible =>
- w.write(s"| $fromTypeName | $toTypeName | Compatible |
|\n".getBytes)
- case Incompatible(Some(reason)) =>
- w.write(s"| $fromTypeName | $toTypeName | Incompatible |
$reason |\n".getBytes)
- case Incompatible(None) =>
- w.write(s"| $fromTypeName | $toTypeName | Incompatible |
|\n".getBytes)
- case Unsupported =>
- w.write(s"| $fromTypeName | $toTypeName | Unsupported |
|\n".getBytes)
+ case Compatible(notes) =>
+ val notesStr = notes.getOrElse("").trim
+ w.write(s"| $fromTypeName | $toTypeName | $notesStr
|\n".getBytes)
+ case _ =>
+ }
+ }
+ }
+ }
+ } else if (line.trim == "<!--INCOMPAT_CAST_TABLE-->") {
+ w.write("| From Type | To Type | Notes |\n".getBytes)
+ w.write("|-|-|-|\n".getBytes)
+ for (fromType <- CometCast.supportedTypes) {
+ for (toType <- CometCast.supportedTypes) {
+ if (Cast.canCast(fromType, toType) && fromType != toType) {
+ val fromTypeName = fromType.typeName.replace("(10,2)", "")
+ val toTypeName = toType.typeName.replace("(10,2)", "")
+ CometCast.isSupported(fromType, toType, None, "LEGACY") match {
+ case Incompatible(notes) =>
+ val notesStr = notes.getOrElse("").trim
+ w.write(s"| $fromTypeName | $toTypeName | $notesStr
|\n".getBytes)
+ case _ =>
}
}
}
diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
index 5641c94a..57e07b8c 100644
--- a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
+++ b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
@@ -24,10 +24,10 @@ import org.apache.spark.sql.types.{DataType, DataTypes,
DecimalType}
sealed trait SupportLevel
/** We support this feature with full compatibility with Spark */
-object Compatible extends SupportLevel
+case class Compatible(notes: Option[String] = None) extends SupportLevel
/** We support this feature but results can be different from Spark */
-case class Incompatible(reason: Option[String] = None) extends SupportLevel
+case class Incompatible(notes: Option[String] = None) extends SupportLevel
/** We do not support this feature */
object Unsupported extends SupportLevel
@@ -58,7 +58,7 @@ object CometCast {
evalMode: String): SupportLevel = {
if (fromType == toType) {
- return Compatible
+ return Compatible()
}
(fromType, toType) match {
@@ -83,10 +83,14 @@ object CometCast {
canCastFromDecimal(toType)
case (DataTypes.BooleanType, _) =>
canCastFromBoolean(toType)
- case (
- DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType |
DataTypes.LongType,
- _) =>
+ case (DataTypes.ByteType, _) =>
+ canCastFromByte(toType)
+ case (DataTypes.ShortType, _) =>
+ canCastFromShort(toType)
+ case (DataTypes.IntegerType, _) =>
canCastFromInt(toType)
+ case (DataTypes.LongType, _) =>
+ canCastFromLong(toType)
case (DataTypes.FloatType, _) =>
canCastFromFloat(toType)
case (DataTypes.DoubleType, _) =>
@@ -101,12 +105,12 @@ object CometCast {
evalMode: String): SupportLevel = {
toType match {
case DataTypes.BooleanType =>
- Compatible
+ Compatible()
case DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType |
DataTypes.LongType =>
- Compatible
+ Compatible()
case DataTypes.BinaryType =>
- Compatible
+ Compatible()
case DataTypes.FloatType | DataTypes.DoubleType =>
// https://github.com/apache/datafusion-comet/issues/326
Unsupported
@@ -130,18 +134,21 @@ object CometCast {
private def canCastToString(fromType: DataType): SupportLevel = {
fromType match {
- case DataTypes.BooleanType => Compatible
+ case DataTypes.BooleanType => Compatible()
case DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType |
DataTypes.LongType =>
- Compatible
- case DataTypes.DateType => Compatible
- case DataTypes.TimestampType => Compatible
+ Compatible()
+ case DataTypes.DateType => Compatible()
+ case DataTypes.TimestampType => Compatible()
case DataTypes.FloatType | DataTypes.DoubleType =>
- // https://github.com/apache/datafusion-comet/issues/326
- Incompatible()
+ Compatible(
+ Some(
+ "There can be differences in precision. " +
+ "For example, the input \"1.4E-45\" will produce 1.0E-45 " +
+ "instead of 1.4E-45"))
case DataTypes.BinaryType =>
// https://github.com/apache/datafusion-comet/issues/377
- Incompatible()
+ Incompatible(Some("Only works for binary data representing valid UTF-8
strings"))
case _ => Unsupported
}
}
@@ -155,9 +162,10 @@ object CometCast {
Unsupported
case DataTypes.LongType =>
// https://github.com/apache/datafusion-comet/issues/352
- Compatible
- case DataTypes.StringType => Compatible
- case DataTypes.DateType => Compatible
+ Compatible()
+ case DataTypes.StringType => Compatible()
+ case DataTypes.DateType => Compatible()
+ case _: DecimalType => Compatible()
case _ => Unsupported
}
}
@@ -165,31 +173,72 @@ object CometCast {
private def canCastFromBoolean(toType: DataType): SupportLevel = toType
match {
case DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType |
DataTypes.LongType |
DataTypes.FloatType | DataTypes.DoubleType =>
- Compatible
+ Compatible()
case _ => Unsupported
}
+ private def canCastFromByte(toType: DataType): SupportLevel = toType match {
+ case DataTypes.BooleanType =>
+ Compatible()
+ case DataTypes.ShortType | DataTypes.IntegerType | DataTypes.LongType =>
+ Compatible()
+ case DataTypes.FloatType | DataTypes.DoubleType | _: DecimalType =>
+ Compatible()
+ case _ =>
+ Unsupported
+ }
+
+ private def canCastFromShort(toType: DataType): SupportLevel = toType match {
+ case DataTypes.BooleanType =>
+ Compatible()
+ case DataTypes.ByteType | DataTypes.IntegerType | DataTypes.LongType =>
+ Compatible()
+ case DataTypes.FloatType | DataTypes.DoubleType | _: DecimalType =>
+ Compatible()
+ case _ =>
+ Unsupported
+ }
+
private def canCastFromInt(toType: DataType): SupportLevel = toType match {
- case DataTypes.BooleanType | DataTypes.ByteType | DataTypes.ShortType |
- DataTypes.IntegerType | DataTypes.LongType | DataTypes.FloatType |
DataTypes.DoubleType |
- _: DecimalType =>
- Compatible
- case _ => Unsupported
+ case DataTypes.BooleanType =>
+ Compatible()
+ case DataTypes.ByteType | DataTypes.ShortType | DataTypes.LongType =>
+ Compatible()
+ case DataTypes.FloatType | DataTypes.DoubleType =>
+ Compatible()
+ case _: DecimalType =>
+ Incompatible(Some("No overflow check"))
+ case _ =>
+ Unsupported
+ }
+
+ private def canCastFromLong(toType: DataType): SupportLevel = toType match {
+ case DataTypes.BooleanType =>
+ Compatible()
+ case DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType =>
+ Compatible()
+ case DataTypes.FloatType | DataTypes.DoubleType =>
+ Compatible()
+ case _: DecimalType =>
+ Incompatible(Some("No overflow check"))
+ case _ =>
+ Unsupported
}
private def canCastFromFloat(toType: DataType): SupportLevel = toType match {
- case DataTypes.BooleanType | DataTypes.DoubleType => Compatible
+ case DataTypes.BooleanType | DataTypes.DoubleType => Compatible()
+ case _: DecimalType => Incompatible(Some("No overflow check"))
case _ => Unsupported
}
private def canCastFromDouble(toType: DataType): SupportLevel = toType match
{
- case DataTypes.BooleanType | DataTypes.FloatType => Compatible
- case _: DecimalType => Incompatible()
+ case DataTypes.BooleanType | DataTypes.FloatType => Compatible()
+ case _: DecimalType => Incompatible(Some("No overflow check"))
case _ => Unsupported
}
private def canCastFromDecimal(toType: DataType): SupportLevel = toType
match {
- case DataTypes.FloatType | DataTypes.DoubleType => Compatible
+ case DataTypes.FloatType | DataTypes.DoubleType => Compatible()
case _ => Unsupported
}
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index 1e8877c8..86e9f10b 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -636,7 +636,7 @@ object QueryPlanSerde extends Logging with
ShimQueryPlanSerde {
reason.map(str => s" ($str)").getOrElse("")
castSupport match {
- case Compatible =>
+ case Compatible(_) =>
castToProto(timeZoneId, dt, childExpr, evalModeStr)
case Incompatible(reason) =>
if (CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.get()) {
diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index 483301e0..1d698a49 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.functions.col
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{DataType, DataTypes}
-import org.apache.comet.expressions.CometCast
+import org.apache.comet.expressions.{CometCast, Compatible}
class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
import testImplicits._
@@ -66,6 +66,23 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
} else if (!testExists) {
fail(s"Missing test: $expectedTestName")
+ } else {
+ val testIgnored =
+ tags.get(expectedTestName).exists(s =>
s.contains("org.scalatest.Ignore"))
+ CometCast.isSupported(fromType, toType, None, "LEGACY") match {
+ case Compatible(_) =>
+ if (testIgnored) {
+ fail(
+ s"Cast from $fromType to $toType is reported as
compatible " +
+ "with Spark but the test is ignored")
+ }
+ case _ =>
+ if (!testIgnored) {
+ fail(
+ s"We claim that cast from $fromType to $toType is not
compatible " +
+ "with Spark but the test is not ignored")
+ }
+ }
}
} else if (testExists) {
fail(s"Found test for cast that Spark does not support:
$expectedTestName")
@@ -347,7 +364,7 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
Short.MaxValue.toFloat,
0.0f) ++
Range(0, dataSize).map(_ => r.nextFloat())
- withNulls(values).toDF("a")
+ castTest(withNulls(values).toDF("a"), DataTypes.StringType)
}
ignore("cast FloatType to TimestampType") {
@@ -401,7 +418,7 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
Double.NegativeInfinity,
0.0d) ++
Range(0, dataSize).map(_ => r.nextDouble())
- withNulls(values).toDF("a")
+ castTest(withNulls(values).toDF("a"), DataTypes.StringType)
}
ignore("cast DoubleType to TimestampType") {
@@ -559,6 +576,14 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
}
+ ignore("cast StringType to TimestampType") {
+ // https://github.com/apache/datafusion-comet/issues/328
+ withSQLConf((CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key, "true")) {
+ val values = Seq("2020-01-01T12:34:56.123456", "T2") ++
generateStrings(timestampPattern, 8)
+ castTest(values.toDF("a"), DataTypes.TimestampType)
+ }
+ }
+
test("cast StringType to TimestampType disabled for non-UTC timezone") {
withSQLConf((SQLConf.SESSION_LOCAL_TIMEZONE.key, "America/Denver")) {
val values = Seq("2020-01-01T12:34:56.123456", "T2").toDF("a")
@@ -569,15 +594,7 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
}
- ignore("cast StringType to TimestampType (fuzz test)") {
- // https://github.com/apache/datafusion-comet/issues/328
- withSQLConf((CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key, "true")) {
- val values = Seq("2020-01-01T12:34:56.123456", "T2") ++
generateStrings(timestampPattern, 8)
- castTest(values.toDF("a"), DataTypes.TimestampType)
- }
- }
-
- test("cast StringType to TimestampType") {
+ test("cast StringType to TimestampType - subset of supported values") {
withSQLConf(
SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC",
CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key -> "true") {
@@ -606,8 +623,12 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
// CAST from BinaryType
ignore("cast BinaryType to StringType") {
- // TODO implement this
// https://github.com/apache/datafusion-comet/issues/377
+ castTest(generateBinary(), DataTypes.StringType)
+ }
+
+ test("cast BinaryType to StringType - valid UTF-8 inputs") {
+ castTest(generateStrings(numericPattern, 8).toDF("a"),
DataTypes.StringType)
}
// CAST from DateType
@@ -795,7 +816,7 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
Seq(
"2024-01-01T12:34:56.123456",
"2024-01-01T01:00:00Z",
- "2024-12-31T01:00:00-02:00",
+ "9999-12-31T01:00:00-02:00",
"2024-12-31T01:00:00+02:00")
withNulls(values)
.toDF("str")
@@ -814,6 +835,16 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
Range(0, dataSize).map(_ => generateString(r, chars, maxLen))
}
+ private def generateBinary(): DataFrame = {
+ val r = new Random(0)
+ val bytes = new Array[Byte](8)
+ val values: Seq[Array[Byte]] = Range(0, dataSize).map(_ => {
+ r.nextBytes(bytes)
+ bytes.clone()
+ })
+ values.toDF("a")
+ }
+
private def withNulls[T](values: Seq[T]): Seq[Option[T]] = {
values.map(v => Some(v)) ++ Seq(None)
}
diff --git a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
index 47c2c696..8f022988 100644
--- a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
@@ -253,7 +253,8 @@ class CometExecSuite extends CometTestBase {
dataTypes.map { subqueryType =>
withSQLConf(
CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true",
- CometConf.COMET_COLUMNAR_SHUFFLE_ENABLED.key -> "true") {
+ CometConf.COMET_COLUMNAR_SHUFFLE_ENABLED.key -> "true",
+ CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key -> "true") {
withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl") {
var column1 = s"CAST(max(_1) AS $subqueryType)"
if (subqueryType == "BINARY") {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]