(incubator-gluten) branch main updated: [VL] Support YearMonthIntervalType and enable make_ym_interval (#4798)

marong Wed, 27 Mar 2024 21:08:59 -0700

This is an automated email from the ASF dual-hosted git repository.

marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new 8f5ad48c6 [VL] Support YearMonthIntervalType and enable 
make_ym_interval (#4798)
8f5ad48c6 is described below

commit 8f5ad48c627f2543f29ba3cb65b794b5e7d2e4ff
Author: Rong Ma <[email protected]>
AuthorDate: Thu Mar 28 12:08:51 2024 +0800

    [VL] Support YearMonthIntervalType and enable make_ym_interval (#4798)
---
 .../io/glutenproject/utils/CHExpressionUtil.scala  |  3 +-
 .../backendsapi/velox/ValidatorApiImpl.scala       |  3 +-
 .../backendsapi/velox/VeloxBackend.scala           |  6 +--
 .../execution/VeloxColumnarToRowExec.scala         |  1 +
 .../execution/ScalarFunctionsValidateSuite.scala   | 32 ++++++++++++++++
 cpp/core/shuffle/Utils.cc                          |  1 +
 cpp/velox/substrait/SubstraitParser.cc             |  3 ++
 cpp/velox/substrait/SubstraitToVeloxExpr.cc        |  2 +
 .../substrait/SubstraitToVeloxPlanValidator.cc     |  7 +++-
 .../substrait/type/IntervalYearTypeNode.java       | 43 ++++++++++++++++++++++
 .../glutenproject/substrait/type/TypeBuilder.java  |  4 ++
 .../glutenproject/expression/ConverterUtils.scala  |  2 +
 .../expression/ExpressionMappings.scala            |  1 +
 .../io/glutenproject/extension/GlutenPlan.scala    |  4 +-
 .../apache/spark/sql/utils/SparkArrowUtil.scala    |  6 ++-
 .../utils/clickhouse/ClickHouseTestSettings.scala  | 12 ++++++
 .../utils/clickhouse/ClickHouseTestSettings.scala  | 16 ++++++++
 .../utils/clickhouse/ClickHouseTestSettings.scala  | 16 ++++++++
 .../glutenproject/expression/ExpressionNames.scala |  1 +
 19 files changed, 154 insertions(+), 9 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
 
b/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
index 028e4e9e9..6dfed9dd6 100644
--- 
a/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
+++ 
b/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
@@ -179,6 +179,7 @@ object CHExpressionUtil {
     SPARK_PARTITION_ID -> DefaultValidator(),
     URL_DECODE -> DefaultValidator(),
     SKEWNESS -> DefaultValidator(),
-    BIT_LENGTH -> DefaultValidator()
+    BIT_LENGTH -> DefaultValidator(),
+    MAKE_YM_INTERVAL -> DefaultValidator()
   )
 }
diff --git 
a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ValidatorApiImpl.scala
 
b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ValidatorApiImpl.scala
index a2394e6c9..3acb3aebb 100644
--- 
a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ValidatorApiImpl.scala
+++ 
b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ValidatorApiImpl.scala
@@ -45,7 +45,8 @@ class ValidatorApiImpl extends ValidatorApi {
   private def isPrimitiveType(dataType: DataType): Boolean = {
     dataType match {
       case BooleanType | ByteType | ShortType | IntegerType | LongType | 
FloatType | DoubleType |
-          StringType | BinaryType | _: DecimalType | DateType | TimestampType 
| NullType =>
+          StringType | BinaryType | _: DecimalType | DateType | TimestampType |
+          YearMonthIntervalType.DEFAULT | NullType =>
         true
       case _ => false
     }
diff --git 
a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala
 
b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala
index 9d252149d..25d389f3c 100644
--- 
a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala
+++ 
b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala
@@ -27,7 +27,7 @@ import 
io.glutenproject.substrait.rel.LocalFilesNode.ReadFileFormat
 import 
io.glutenproject.substrait.rel.LocalFilesNode.ReadFileFormat.{DwrfReadFormat, 
OrcReadFormat, ParquetReadFormat}
 
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
-import org.apache.spark.sql.catalyst.expressions.{Alias, CumeDist, DenseRank, 
Descending, Expression, Lag, Lead, Literal, NamedExpression, NthValue, NTile, 
PercentRank, Rand, RangeFrame, Rank, RowNumber, SortOrder, 
SpecialFrameBoundary, SpecifiedWindowFrame, Uuid}
+import org.apache.spark.sql.catalyst.expressions.{Alias, CumeDist, DenseRank, 
Descending, Expression, Lag, Lead, Literal, MakeYMInterval, NamedExpression, 
NthValue, NTile, PercentRank, Rand, RangeFrame, Rank, RowNumber, SortOrder, 
SpecialFrameBoundary, SpecifiedWindowFrame, Uuid}
 import 
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, 
Count, Sum}
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
@@ -200,6 +200,7 @@ object BackendSettings extends BackendSettingsApi {
             case _: StructType => Some("StructType")
             case _: ArrayType => Some("ArrayType")
             case _: MapType => Some("MapType")
+            case _: YearMonthIntervalType => Some("YearMonthIntervalType")
             case _ => None
           }
       }
@@ -398,8 +399,7 @@ object BackendSettings extends BackendSettingsApi {
       expr match {
         // Block directly falling back the below functions by 
FallbackEmptySchemaRelation.
         case alias: Alias => checkExpr(alias.child)
-        case _: Rand => true
-        case _: Uuid => true
+        case _: Rand | _: Uuid | _: MakeYMInterval => true
         case _ => false
       }
     }
diff --git 
a/backends-velox/src/main/scala/io/glutenproject/execution/VeloxColumnarToRowExec.scala
 
b/backends-velox/src/main/scala/io/glutenproject/execution/VeloxColumnarToRowExec.scala
index 67bb4ba8c..11c076ab8 100644
--- 
a/backends-velox/src/main/scala/io/glutenproject/execution/VeloxColumnarToRowExec.scala
+++ 
b/backends-velox/src/main/scala/io/glutenproject/execution/VeloxColumnarToRowExec.scala
@@ -58,6 +58,7 @@ case class VeloxColumnarToRowExec(child: SparkPlan) extends 
ColumnarToRowExecBas
         case _: ArrayType =>
         case _: MapType =>
         case _: StructType =>
+        case YearMonthIntervalType.DEFAULT =>
         case _: NullType =>
         case _ =>
           throw new GlutenNotSupportException(
diff --git 
a/backends-velox/src/test/scala/io/glutenproject/execution/ScalarFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/io/glutenproject/execution/ScalarFunctionsValidateSuite.scala
index f6998b69e..2419d288b 100644
--- 
a/backends-velox/src/test/scala/io/glutenproject/execution/ScalarFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/io/glutenproject/execution/ScalarFunctionsValidateSuite.scala
@@ -556,6 +556,38 @@ class ScalarFunctionsValidateSuite extends 
FunctionsValidateTest {
     }
   }
 
+  test("Test make_ym_interval function") {
+    runQueryAndCompare("select make_ym_interval(1, 1)") {
+      checkOperatorMatch[ProjectExecTransformer]
+    }
+
+    runQueryAndCompare("select make_ym_interval(1)") {
+      checkOperatorMatch[ProjectExecTransformer]
+    }
+
+    runQueryAndCompare("select make_ym_interval()") {
+      checkOperatorMatch[ProjectExecTransformer]
+    }
+
+    withTempPath {
+      path =>
+        Seq[Tuple2[Integer, Integer]]((1, 0), (-1, 1), (null, 1), (1, null))
+          .toDF("year", "month")
+          .write
+          .parquet(path.getCanonicalPath)
+
+        
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("make_ym_interval_tbl")
+
+        runQueryAndCompare("select make_ym_interval(year, month) from 
make_ym_interval_tbl") {
+          checkOperatorMatch[ProjectExecTransformer]
+        }
+
+        runQueryAndCompare("select make_ym_interval(year) from 
make_ym_interval_tbl") {
+          checkOperatorMatch[ProjectExecTransformer]
+        }
+    }
+  }
+
   test("Test uuid function") {
     runQueryAndCompare("""SELECT uuid() from lineitem limit 
100""".stripMargin, false) {
       checkOperatorMatch[ProjectExecTransformer]
diff --git a/cpp/core/shuffle/Utils.cc b/cpp/core/shuffle/Utils.cc
index f0da2cebc..6854c1978 100644
--- a/cpp/core/shuffle/Utils.cc
+++ b/cpp/core/shuffle/Utils.cc
@@ -290,6 +290,7 @@ 
arrow::Result<std::vector<std::shared_ptr<arrow::DataType>>> gluten::toShuffleTy
       case arrow::LargeListType::type_id:
       case arrow::Decimal128Type::type_id:
       case arrow::NullType::type_id:
+      case arrow::MonthIntervalType::type_id:
         shuffleTypeId.push_back(field->type());
         break;
       default:
diff --git a/cpp/velox/substrait/SubstraitParser.cc 
b/cpp/velox/substrait/SubstraitParser.cc
index ce6a532ef..71966a0b3 100644
--- a/cpp/velox/substrait/SubstraitParser.cc
+++ b/cpp/velox/substrait/SubstraitParser.cc
@@ -83,6 +83,9 @@ TypePtr SubstraitParser::parseType(const ::substrait::Type& 
substraitType, bool
       auto scale = substraitType.decimal().scale();
       return DECIMAL(precision, scale);
     }
+    case ::substrait::Type::KindCase::kIntervalYear: {
+      return INTERVAL_YEAR_MONTH();
+    }
     case ::substrait::Type::KindCase::kNothing:
       return UNKNOWN();
     default:
diff --git a/cpp/velox/substrait/SubstraitToVeloxExpr.cc 
b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
index 8699907de..02d57f276 100644
--- a/cpp/velox/substrait/SubstraitToVeloxExpr.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
@@ -139,6 +139,8 @@ TypePtr getScalarType(const 
::substrait::Expression::Literal& literal) {
       return VARCHAR();
     case ::substrait::Expression_Literal::LiteralTypeCase::kBinary:
       return VARBINARY();
+    case 
::substrait::Expression_Literal::LiteralTypeCase::kIntervalYearToMonth:
+      return INTERVAL_YEAR_MONTH();
     default:
       return nullptr;
   }
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 5df59a348..f04dcda7c 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -267,8 +267,8 @@ bool SubstraitToVeloxPlanValidator::validateCast(
   }
 
   const auto& toType = SubstraitParser::parseType(castExpr.type());
-  if (toType->kind() == TypeKind::TIMESTAMP) {
-    LOG_VALIDATION_MSG("Casting to TIMESTAMP is not supported.");
+  if (toType->kind() == TypeKind::TIMESTAMP || toType->isIntervalYearMonth()) {
+    LOG_VALIDATION_MSG("Casting to " + toType->toString() + " is not 
supported.");
     return false;
   }
 
@@ -284,6 +284,9 @@ bool SubstraitToVeloxPlanValidator::validateCast(
       LOG_VALIDATION_MSG("Casting from DATE to " + toType->toString() + " is 
not supported.");
       return false;
     }
+  } else if (input->type()->isIntervalYearMonth()) {
+    LOG_VALIDATION_MSG("Casting from INTERVAL_YEAR_MONTH is not supported.");
+    return false;
   }
   switch (input->type()->kind()) {
     case TypeKind::ARRAY:
diff --git 
a/gluten-core/src/main/java/io/glutenproject/substrait/type/IntervalYearTypeNode.java
 
b/gluten-core/src/main/java/io/glutenproject/substrait/type/IntervalYearTypeNode.java
new file mode 100644
index 000000000..30538fc31
--- /dev/null
+++ 
b/gluten-core/src/main/java/io/glutenproject/substrait/type/IntervalYearTypeNode.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.glutenproject.substrait.type;
+
+import io.substrait.proto.Type;
+
+import java.io.Serializable;
+
+public class IntervalYearTypeNode implements TypeNode, Serializable {
+
+  private final Boolean nullable;
+
+  public IntervalYearTypeNode(Boolean nullable) {
+    this.nullable = nullable;
+  }
+
+  @Override
+  public Type toProtobuf() {
+    Type.IntervalYear.Builder intervalYearBuilder = 
Type.IntervalYear.newBuilder();
+    if (nullable) {
+      
intervalYearBuilder.setNullability(Type.Nullability.NULLABILITY_NULLABLE);
+    } else {
+      
intervalYearBuilder.setNullability(Type.Nullability.NULLABILITY_REQUIRED);
+    }
+    Type.Builder builder = Type.newBuilder();
+    builder.setIntervalYear(intervalYearBuilder.build());
+    return builder.build();
+  }
+}
diff --git 
a/gluten-core/src/main/java/io/glutenproject/substrait/type/TypeBuilder.java 
b/gluten-core/src/main/java/io/glutenproject/substrait/type/TypeBuilder.java
index a3efd6765..8393b1d9a 100644
--- a/gluten-core/src/main/java/io/glutenproject/substrait/type/TypeBuilder.java
+++ b/gluten-core/src/main/java/io/glutenproject/substrait/type/TypeBuilder.java
@@ -69,6 +69,10 @@ public class TypeBuilder {
     return new DateTypeNode(nullable);
   }
 
+  public static TypeNode makeIntervalYear(Boolean nullable) {
+    return new IntervalYearTypeNode(nullable);
+  }
+
   public static TypeNode makeDecimal(Boolean nullable, Integer precision, 
Integer scale) {
     return new DecimalTypeNode(nullable, precision, scale);
   }
diff --git 
a/gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala 
b/gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala
index 069700e29..169a9dd0e 100644
--- 
a/gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala
+++ 
b/gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala
@@ -238,6 +238,8 @@ object ConverterUtils extends Logging {
         TypeBuilder.makeBinary(nullable)
       case DateType =>
         TypeBuilder.makeDate(nullable)
+      case YearMonthIntervalType.DEFAULT =>
+        TypeBuilder.makeIntervalYear(nullable)
       case DecimalType() =>
         val decimalType = datatype.asInstanceOf[DecimalType]
         val precision = decimalType.precision
diff --git 
a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala
 
b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala
index 180597ebf..745d045fd 100644
--- 
a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala
+++ 
b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala
@@ -179,6 +179,7 @@ object ExpressionMappings {
     Sig[MonthsBetween](MONTHS_BETWEEN),
     Sig[DateFromUnixDate](DATE_FROM_UNIX_DATE),
     Sig[MakeTimestamp](MAKE_TIMESTAMP),
+    Sig[MakeYMInterval](MAKE_YM_INTERVAL),
     // JSON functions
     Sig[GetJsonObject](GET_JSON_OBJECT),
     Sig[LengthOfJsonArray](JSON_ARRAY_LENGTH),
diff --git 
a/gluten-core/src/main/scala/io/glutenproject/extension/GlutenPlan.scala 
b/gluten-core/src/main/scala/io/glutenproject/extension/GlutenPlan.scala
index c86bcdd8e..cf472dc2f 100644
--- a/gluten-core/src/main/scala/io/glutenproject/extension/GlutenPlan.scala
+++ b/gluten-core/src/main/scala/io/glutenproject/extension/GlutenPlan.scala
@@ -76,7 +76,9 @@ trait GlutenPlan extends SparkPlan with LogLevelUtil {
         }
         // FIXME: Use a validation-specific method to catch validation failures
         TestStats.addFallBackClassName(this.getClass.toString)
-        logValidationMessage(s"Validation failed with exception for plan: 
$nodeName, due to:", e)
+        logValidationMessage(
+          s"Validation failed with exception for plan: $nodeName, due to: 
${e.getMessage}",
+          e)
         ValidationResult.notOk(e.getMessage)
     } finally {
       TransformerState.finishValidation
diff --git 
a/gluten-data/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala 
b/gluten-data/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala
index 67a963273..014956d84 100644
--- a/gluten-data/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala
+++ b/gluten-data/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.utils
 import org.apache.spark.sql.types._
 
 import org.apache.arrow.vector.complex.MapVector
-import org.apache.arrow.vector.types.{DateUnit, FloatingPointPrecision, 
TimeUnit}
+import org.apache.arrow.vector.types.{DateUnit, FloatingPointPrecision, 
IntervalUnit, TimeUnit}
 import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema}
 
 import scala.collection.JavaConverters._
@@ -47,6 +47,8 @@ object SparkArrowUtil {
       } else {
         new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC")
       }
+    case YearMonthIntervalType.DEFAULT =>
+      new ArrowType.Interval(IntervalUnit.YEAR_MONTH)
     case _: ArrayType => ArrowType.List.INSTANCE
     case NullType => ArrowType.Null.INSTANCE
     case _ =>
@@ -69,6 +71,8 @@ object SparkArrowUtil {
     case date: ArrowType.Date if date.getUnit == DateUnit.DAY => DateType
     // TODO: Time unit is not handled.
     case _: ArrowType.Timestamp => TimestampType
+    case interval: ArrowType.Interval if interval.getUnit == 
IntervalUnit.YEAR_MONTH =>
+      YearMonthIntervalType.DEFAULT
     case ArrowType.Null.INSTANCE => NullType
     case _ => throw new UnsupportedOperationException(s"Unsupported data type: 
$dt")
   }
diff --git 
a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
index 540520ddf..7d05c8b58 100644
--- 
a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
@@ -1138,6 +1138,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("sorting on ShortType with nullable=false, sortOrder=List('a ASC 
NULLS LAST)")
     .exclude("sorting on ShortType with nullable=false, sortOrder=List('a DESC 
NULLS LAST)")
     .exclude("sorting on ShortType with nullable=false, sortOrder=List('a DESC 
NULLS FIRST)")
+    .excludeByPrefix("sorting on YearMonthIntervalType(0,1) with")
   enableSuite[GlutenTakeOrderedAndProjectSuite]
     .exclude("TakeOrderedAndProject.doExecute without project")
     .exclude("TakeOrderedAndProject.doExecute with project")
@@ -1459,11 +1460,13 @@ class ClickHouseTestSettings extends 
BackendTestSettings {
     .exclude("Loading a JSON dataset primitivesAsString returns complex fields 
as strings")
     .exclude("SPARK-4228 DataFrame to JSON")
     .exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
+    .exclude("SPARK-36830: Support reading and writing ANSI intervals")
   enableSuite[GlutenJsonV2Suite]
     .exclude("Complex field and type inferring")
     .exclude("Loading a JSON dataset primitivesAsString returns complex fields 
as strings")
     .exclude("SPARK-4228 DataFrame to JSON")
     .exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
+    .exclude("SPARK-36830: Support reading and writing ANSI intervals")
   enableSuite[GlutenOrcColumnarBatchReaderSuite]
   enableSuite[GlutenOrcFilterSuite].exclude("SPARK-32622: case sensitivity in 
predicate pushdown")
   enableSuite[GlutenOrcPartitionDiscoverySuite]
@@ -1485,6 +1488,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-36594: ORC vectorized reader should properly check maximal 
number of fields")
     // DISABLED: GLUTEN-4893 Vanilla UT checks scan operator by exactly 
matching the class type
     .exclude("SPARK-34862: Support ORC vectorized reader for nested column")
+    .excludeByPrefix(
+      "SPARK-36931: Support reading and writing ANSI intervals 
(spark.sql.orc.enableVectorizedReader=false,")
     .excludeGlutenTest("SPARK-31238: compatibility with Spark 2.4 in reading 
dates")
     .excludeGlutenTest("SPARK-31238, SPARK-31423: rebasing dates in write")
     .excludeGlutenTest("SPARK-31284: compatibility with Spark 2.4 in reading 
timestamps")
@@ -1723,7 +1728,11 @@ class ClickHouseTestSettings extends BackendTestSettings 
{
   enableSuite[GlutenParquetCompressionCodecPrecedenceSuite]
   enableSuite[GlutenParquetEncodingSuite]
   enableSuite[GlutenParquetFileFormatV1Suite]
+    .exclude(
+      "SPARK-36825, SPARK-36854: year-month/day-time intervals written and 
read as INT32/INT64")
   enableSuite[GlutenParquetFileFormatV2Suite]
+    .exclude(
+      "SPARK-36825, SPARK-36854: year-month/day-time intervals written and 
read as INT32/INT64")
   enableSuite[GlutenParquetIOSuite]
     .exclude("Standard mode - nested map with struct as key type")
     .exclude("Legacy mode - nested map with struct as key type")
@@ -1762,6 +1771,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-25207: exception when duplicate fields in case-insensitive 
mode")
     .exclude("Support Parquet column index")
     .exclude("SPARK-34562: Bloom filter push down")
+    .exclude("SPARK-36866: filter pushdown - year-month interval")
     .excludeGlutenTest("SPARK-25207: exception when duplicate fields in 
case-insensitive mode")
   enableSuite[GlutenParquetV1PartitionDiscoverySuite]
     .exclude("SPARK-7847: Dynamic partition directory path escaping and 
unescaping")
@@ -1967,6 +1977,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-25207: exception when duplicate fields in case-insensitive 
mode")
     .exclude("Support Parquet column index")
     .exclude("SPARK-34562: Bloom filter push down")
+    .exclude("SPARK-36866: filter pushdown - year-month interval")
     .excludeGlutenTest("SPARK-25207: exception when duplicate fields in 
case-insensitive mode")
     .excludeGlutenTest("filter pushdown - date")
   enableSuite[GlutenParquetV2PartitionDiscoverySuite]
@@ -2161,6 +2172,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
   enableSuite[GlutenFiltersSuite]
   enableSuite[GlutenInsertSuite]
   enableSuite[GlutenPartitionedWriteSuite]
+    .exclude("SPARK-37231, SPARK-37240: Dynamic writes/reads of ANSI interval 
partitions")
   enableSuite[GlutenPathOptionSuite]
   enableSuite[GlutenPrunedScanSuite]
   enableSuite[GlutenResolvedDataSourceSuite]
diff --git 
a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
index a81022ee8..da029e575 100644
--- 
a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
@@ -1176,6 +1176,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("sorting on ShortType with nullable=false, sortOrder=List('a DESC 
NULLS LAST)")
     .exclude("sorting on ShortType with nullable=false, sortOrder=List('a DESC 
NULLS FIRST)")
     .exclude("SPARK-40089: decimal values sort correctly")
+    .excludeByPrefix("sorting on YearMonthIntervalType(0,1) with")
   enableSuite[GlutenTakeOrderedAndProjectSuite]
     .exclude("TakeOrderedAndProject.doExecute without project")
     .exclude("TakeOrderedAndProject.doExecute with project")
@@ -1299,6 +1300,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
     .exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a 
non-default pattern")
     .exclude("SPARK-37360: Timestamp type inference for a column with 
TIMESTAMP_NTZ values")
+    .exclude("SPARK-36830: Support reading and writing ANSI intervals")
   enableSuite[GlutenJsonSuite]
     .exclude("Complex field and type inferring")
     .exclude("Loading a JSON dataset primitivesAsString returns complex fields 
as strings")
@@ -1306,6 +1308,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
     .exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a 
non-default pattern")
     .exclude("SPARK-37360: Timestamp type inference for a column with 
TIMESTAMP_NTZ values")
+    .exclude("SPARK-36830: Support reading and writing ANSI intervals")
   enableSuite[GlutenJsonV1Suite]
     .exclude("Complex field and type inferring")
     .exclude("Loading a JSON dataset primitivesAsString returns complex fields 
as strings")
@@ -1313,6 +1316,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
     .exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a 
non-default pattern")
     .exclude("SPARK-37360: Timestamp type inference for a column with 
TIMESTAMP_NTZ values")
+    .exclude("SPARK-36830: Support reading and writing ANSI intervals")
   enableSuite[GlutenJsonV2Suite]
     .exclude("Complex field and type inferring")
     .exclude("Loading a JSON dataset primitivesAsString returns complex fields 
as strings")
@@ -1320,6 +1324,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
     .exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a 
non-default pattern")
     .exclude("SPARK-37360: Timestamp type inference for a column with 
TIMESTAMP_NTZ values")
+    .exclude("SPARK-36830: Support reading and writing ANSI intervals")
   enableSuite[GlutenOrcColumnarBatchReaderSuite]
   enableSuite[GlutenOrcFilterSuite].exclude("SPARK-32622: case sensitivity in 
predicate pushdown")
   enableSuite[GlutenOrcPartitionDiscoverySuite]
@@ -1343,11 +1348,15 @@ class ClickHouseTestSettings extends 
BackendTestSettings {
     .exclude("SPARK-37812: Reuse result row when deserializing a struct")
     // DISABLED: GLUTEN-4893 Vanilla UT checks scan operator by exactly 
matching the class type
     .exclude("SPARK-34862: Support ORC vectorized reader for nested column")
+    .excludeByPrefix(
+      "SPARK-36931: Support reading and writing ANSI intervals 
(spark.sql.orc.enableVectorizedReader=false,")
     .excludeGlutenTest("SPARK-31238: compatibility with Spark 2.4 in reading 
dates")
     .excludeGlutenTest("SPARK-31238, SPARK-31423: rebasing dates in write")
     .excludeGlutenTest("SPARK-31284: compatibility with Spark 2.4 in reading 
timestamps")
     .excludeGlutenTest("SPARK-31284, SPARK-31423: rebasing timestamps in 
write")
     .excludeGlutenTest("SPARK-34862: Support ORC vectorized reader for nested 
column")
+    .excludeGlutenTest(
+      "SPARK-36931: Support reading and writing ANSI intervals 
(spark.sql.orc.enableVectorizedReader=false, 
spark.sql.orc.enableNestedColumnVectorizedReader=false)")
   enableSuite[GlutenOrcV1FilterSuite].exclude("SPARK-32622: case sensitivity 
in predicate pushdown")
   enableSuite[GlutenOrcV1PartitionDiscoverySuite]
   enableSuite[GlutenOrcV1QuerySuite]
@@ -1596,7 +1605,11 @@ class ClickHouseTestSettings extends BackendTestSettings 
{
   enableSuite[GlutenParquetEncodingSuite].exclude("All Types 
Dictionary").exclude("All Types Null")
   enableSuite[GlutenParquetFieldIdIOSuite]
   enableSuite[GlutenParquetFileFormatV1Suite]
+    .exclude(
+      "SPARK-36825, SPARK-36854: year-month/day-time intervals written and 
read as INT32/INT64")
   enableSuite[GlutenParquetFileFormatV2Suite]
+    .exclude(
+      "SPARK-36825, SPARK-36854: year-month/day-time intervals written and 
read as INT32/INT64")
   enableSuite[GlutenParquetIOSuite]
     .exclude("Standard mode - nested map with struct as key type")
     .exclude("Legacy mode - nested map with struct as key type")
@@ -1638,6 +1651,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("Support Parquet column index")
     .exclude("SPARK-34562: Bloom filter push down")
     .exclude("SPARK-38825: in and notIn filters")
+    .exclude("SPARK-36866: filter pushdown - year-month interval")
     .excludeGlutenTest("SPARK-25207: exception when duplicate fields in 
case-insensitive mode")
   enableSuite[GlutenParquetV1PartitionDiscoverySuite]
     .exclude("SPARK-7847: Dynamic partition directory path escaping and 
unescaping")
@@ -1844,6 +1858,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("Support Parquet column index")
     .exclude("SPARK-34562: Bloom filter push down")
     .exclude("SPARK-38825: in and notIn filters")
+    .exclude("SPARK-36866: filter pushdown - year-month interval")
     .excludeGlutenTest("SPARK-25207: exception when duplicate fields in 
case-insensitive mode")
     .excludeGlutenTest("filter pushdown - date")
   enableSuite[GlutenParquetV2PartitionDiscoverySuite]
@@ -2066,6 +2081,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
   enableSuite[GlutenFiltersSuite]
   enableSuite[GlutenInsertSuite]
   enableSuite[GlutenPartitionedWriteSuite]
+    .exclude("SPARK-37231, SPARK-37240: Dynamic writes/reads of ANSI interval 
partitions")
   enableSuite[GlutenPathOptionSuite]
   enableSuite[GlutenPrunedScanSuite]
   enableSuite[GlutenResolvedDataSourceSuite]
diff --git 
a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
index ab204b365..9fabee3ca 100644
--- 
a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
@@ -956,6 +956,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("sorting on ShortType with nullable=false, sortOrder=List('a ASC 
NULLS LAST)")
     .exclude("sorting on ShortType with nullable=false, sortOrder=List('a DESC 
NULLS LAST)")
     .exclude("sorting on ShortType with nullable=false, sortOrder=List('a DESC 
NULLS FIRST)")
+    .excludeByPrefix("sorting on YearMonthIntervalType(0,1) with")
   enableSuite[GlutenTakeOrderedAndProjectSuite]
     .exclude("TakeOrderedAndProject.doExecute without project")
     .exclude("TakeOrderedAndProject.doExecute with project")
@@ -1078,6 +1079,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
     .exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a 
non-default pattern")
     .exclude("SPARK-37360: Timestamp type inference for a column with 
TIMESTAMP_NTZ values")
+    .exclude("SPARK-36830: Support reading and writing ANSI intervals")
   enableSuite[GlutenJsonSuite]
     .exclude("Complex field and type inferring")
     .exclude("Loading a JSON dataset primitivesAsString returns complex fields 
as strings")
@@ -1085,6 +1087,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
     .exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a 
non-default pattern")
     .exclude("SPARK-37360: Timestamp type inference for a column with 
TIMESTAMP_NTZ values")
+    .exclude("SPARK-36830: Support reading and writing ANSI intervals")
   enableSuite[GlutenJsonV1Suite]
     .exclude("Complex field and type inferring")
     .exclude("Loading a JSON dataset primitivesAsString returns complex fields 
as strings")
@@ -1092,6 +1095,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
     .exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a 
non-default pattern")
     .exclude("SPARK-37360: Timestamp type inference for a column with 
TIMESTAMP_NTZ values")
+    .exclude("SPARK-36830: Support reading and writing ANSI intervals")
   enableSuite[GlutenJsonV2Suite]
     .exclude("Complex field and type inferring")
     .exclude("Loading a JSON dataset primitivesAsString returns complex fields 
as strings")
@@ -1099,6 +1103,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
     .exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a 
non-default pattern")
     .exclude("SPARK-37360: Timestamp type inference for a column with 
TIMESTAMP_NTZ values")
+    .exclude("SPARK-36830: Support reading and writing ANSI intervals")
   enableSuite[GlutenOrcColumnarBatchReaderSuite]
   enableSuite[GlutenOrcFilterSuite].exclude("SPARK-32622: case sensitivity in 
predicate pushdown")
   enableSuite[GlutenOrcPartitionDiscoverySuite]
@@ -1122,11 +1127,15 @@ class ClickHouseTestSettings extends 
BackendTestSettings {
     .exclude("SPARK-37812: Reuse result row when deserializing a struct")
     // DISABLED: GLUTEN-4893 Vanilla UT checks scan operator by exactly 
matching the class type
     .exclude("SPARK-34862: Support ORC vectorized reader for nested column")
+    .excludeByPrefix(
+      "SPARK-36931: Support reading and writing ANSI intervals 
(spark.sql.orc.enableVectorizedReader=false,")
     .excludeGlutenTest("SPARK-31238: compatibility with Spark 2.4 in reading 
dates")
     .excludeGlutenTest("SPARK-31238, SPARK-31423: rebasing dates in write")
     .excludeGlutenTest("SPARK-31284: compatibility with Spark 2.4 in reading 
timestamps")
     .excludeGlutenTest("SPARK-31284, SPARK-31423: rebasing timestamps in 
write")
     .excludeGlutenTest("SPARK-34862: Support ORC vectorized reader for nested 
column")
+    .excludeGlutenTest(
+      "SPARK-36931: Support reading and writing ANSI intervals 
(spark.sql.orc.enableVectorizedReader=false, 
spark.sql.orc.enableNestedColumnVectorizedReader=false)")
   enableSuite[GlutenOrcV1FilterSuite].exclude("SPARK-32622: case sensitivity 
in predicate pushdown")
   enableSuite[GlutenOrcV1PartitionDiscoverySuite]
   enableSuite[GlutenOrcV1QuerySuite]
@@ -1375,7 +1384,11 @@ class ClickHouseTestSettings extends BackendTestSettings 
{
   enableSuite[GlutenParquetEncodingSuite].exclude("All Types 
Dictionary").exclude("All Types Null")
   enableSuite[GlutenParquetFieldIdIOSuite]
   enableSuite[GlutenParquetFileFormatV1Suite]
+    .exclude(
+      "SPARK-36825, SPARK-36854: year-month/day-time intervals written and 
read as INT32/INT64")
   enableSuite[GlutenParquetFileFormatV2Suite]
+    .exclude(
+      "SPARK-36825, SPARK-36854: year-month/day-time intervals written and 
read as INT32/INT64")
   enableSuite[GlutenParquetIOSuite]
     .exclude("Standard mode - nested map with struct as key type")
     .exclude("Legacy mode - nested map with struct as key type")
@@ -1417,6 +1430,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("Support Parquet column index")
     .exclude("SPARK-34562: Bloom filter push down")
     .exclude("SPARK-38825: in and notIn filters")
+    .exclude("SPARK-36866: filter pushdown - year-month interval")
     .excludeGlutenTest("SPARK-25207: exception when duplicate fields in 
case-insensitive mode")
   enableSuite[GlutenParquetV1PartitionDiscoverySuite]
     .exclude("SPARK-7847: Dynamic partition directory path escaping and 
unescaping")
@@ -1623,6 +1637,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("Support Parquet column index")
     .exclude("SPARK-34562: Bloom filter push down")
     .exclude("SPARK-38825: in and notIn filters")
+    .exclude("SPARK-36866: filter pushdown - year-month interval")
     .excludeGlutenTest("SPARK-25207: exception when duplicate fields in 
case-insensitive mode")
     .excludeGlutenTest("filter pushdown - date")
   enableSuite[GlutenParquetV2PartitionDiscoverySuite]
@@ -1846,6 +1861,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
   enableSuite[GlutenInsertSuite]
     .excludeAllGlutenTests()
   enableSuite[GlutenPartitionedWriteSuite]
+    .exclude("SPARK-37231, SPARK-37240: Dynamic writes/reads of ANSI interval 
partitions")
   enableSuite[GlutenPathOptionSuite]
   enableSuite[GlutenPrunedScanSuite]
   enableSuite[GlutenResolvedDataSourceSuite]
diff --git 
a/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala 
b/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
index 2f3391629..97fa914e9 100644
--- 
a/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
+++ 
b/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
@@ -192,6 +192,7 @@ object ExpressionNames {
   final val MONTHS_BETWEEN = "months_between"
   final val DATE_FROM_UNIX_DATE = "date_from_unix_date"
   final val MAKE_TIMESTAMP = "make_timestamp"
+  final val MAKE_YM_INTERVAL = "make_ym_interval"
 
   // JSON functions
   final val GET_JSON_OBJECT = "get_json_object"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [VL] Support YearMonthIntervalType and enable make_ym_interval (#4798)

Reply via email to