This is an automated email from the ASF dual-hosted git repository.
marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 8f5ad48c6 [VL] Support YearMonthIntervalType and enable
make_ym_interval (#4798)
8f5ad48c6 is described below
commit 8f5ad48c627f2543f29ba3cb65b794b5e7d2e4ff
Author: Rong Ma <[email protected]>
AuthorDate: Thu Mar 28 12:08:51 2024 +0800
[VL] Support YearMonthIntervalType and enable make_ym_interval (#4798)
---
.../io/glutenproject/utils/CHExpressionUtil.scala | 3 +-
.../backendsapi/velox/ValidatorApiImpl.scala | 3 +-
.../backendsapi/velox/VeloxBackend.scala | 6 +--
.../execution/VeloxColumnarToRowExec.scala | 1 +
.../execution/ScalarFunctionsValidateSuite.scala | 32 ++++++++++++++++
cpp/core/shuffle/Utils.cc | 1 +
cpp/velox/substrait/SubstraitParser.cc | 3 ++
cpp/velox/substrait/SubstraitToVeloxExpr.cc | 2 +
.../substrait/SubstraitToVeloxPlanValidator.cc | 7 +++-
.../substrait/type/IntervalYearTypeNode.java | 43 ++++++++++++++++++++++
.../glutenproject/substrait/type/TypeBuilder.java | 4 ++
.../glutenproject/expression/ConverterUtils.scala | 2 +
.../expression/ExpressionMappings.scala | 1 +
.../io/glutenproject/extension/GlutenPlan.scala | 4 +-
.../apache/spark/sql/utils/SparkArrowUtil.scala | 6 ++-
.../utils/clickhouse/ClickHouseTestSettings.scala | 12 ++++++
.../utils/clickhouse/ClickHouseTestSettings.scala | 16 ++++++++
.../utils/clickhouse/ClickHouseTestSettings.scala | 16 ++++++++
.../glutenproject/expression/ExpressionNames.scala | 1 +
19 files changed, 154 insertions(+), 9 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
b/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
index 028e4e9e9..6dfed9dd6 100644
---
a/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
+++
b/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
@@ -179,6 +179,7 @@ object CHExpressionUtil {
SPARK_PARTITION_ID -> DefaultValidator(),
URL_DECODE -> DefaultValidator(),
SKEWNESS -> DefaultValidator(),
- BIT_LENGTH -> DefaultValidator()
+ BIT_LENGTH -> DefaultValidator(),
+ MAKE_YM_INTERVAL -> DefaultValidator()
)
}
diff --git
a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ValidatorApiImpl.scala
b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ValidatorApiImpl.scala
index a2394e6c9..3acb3aebb 100644
---
a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ValidatorApiImpl.scala
+++
b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ValidatorApiImpl.scala
@@ -45,7 +45,8 @@ class ValidatorApiImpl extends ValidatorApi {
private def isPrimitiveType(dataType: DataType): Boolean = {
dataType match {
case BooleanType | ByteType | ShortType | IntegerType | LongType |
FloatType | DoubleType |
- StringType | BinaryType | _: DecimalType | DateType | TimestampType
| NullType =>
+ StringType | BinaryType | _: DecimalType | DateType | TimestampType |
+ YearMonthIntervalType.DEFAULT | NullType =>
true
case _ => false
}
diff --git
a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala
b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala
index 9d252149d..25d389f3c 100644
---
a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala
+++
b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala
@@ -27,7 +27,7 @@ import
io.glutenproject.substrait.rel.LocalFilesNode.ReadFileFormat
import
io.glutenproject.substrait.rel.LocalFilesNode.ReadFileFormat.{DwrfReadFormat,
OrcReadFormat, ParquetReadFormat}
import org.apache.spark.sql.catalyst.catalog.BucketSpec
-import org.apache.spark.sql.catalyst.expressions.{Alias, CumeDist, DenseRank,
Descending, Expression, Lag, Lead, Literal, NamedExpression, NthValue, NTile,
PercentRank, Rand, RangeFrame, Rank, RowNumber, SortOrder,
SpecialFrameBoundary, SpecifiedWindowFrame, Uuid}
+import org.apache.spark.sql.catalyst.expressions.{Alias, CumeDist, DenseRank,
Descending, Expression, Lag, Lead, Literal, MakeYMInterval, NamedExpression,
NthValue, NTile, PercentRank, Rand, RangeFrame, Rank, RowNumber, SortOrder,
SpecialFrameBoundary, SpecifiedWindowFrame, Uuid}
import
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression,
Count, Sum}
import org.apache.spark.sql.catalyst.plans.JoinType
import org.apache.spark.sql.catalyst.util.CharVarcharUtils
@@ -200,6 +200,7 @@ object BackendSettings extends BackendSettingsApi {
case _: StructType => Some("StructType")
case _: ArrayType => Some("ArrayType")
case _: MapType => Some("MapType")
+ case _: YearMonthIntervalType => Some("YearMonthIntervalType")
case _ => None
}
}
@@ -398,8 +399,7 @@ object BackendSettings extends BackendSettingsApi {
expr match {
// Block directly falling back the below functions by
FallbackEmptySchemaRelation.
case alias: Alias => checkExpr(alias.child)
- case _: Rand => true
- case _: Uuid => true
+ case _: Rand | _: Uuid | _: MakeYMInterval => true
case _ => false
}
}
diff --git
a/backends-velox/src/main/scala/io/glutenproject/execution/VeloxColumnarToRowExec.scala
b/backends-velox/src/main/scala/io/glutenproject/execution/VeloxColumnarToRowExec.scala
index 67bb4ba8c..11c076ab8 100644
---
a/backends-velox/src/main/scala/io/glutenproject/execution/VeloxColumnarToRowExec.scala
+++
b/backends-velox/src/main/scala/io/glutenproject/execution/VeloxColumnarToRowExec.scala
@@ -58,6 +58,7 @@ case class VeloxColumnarToRowExec(child: SparkPlan) extends
ColumnarToRowExecBas
case _: ArrayType =>
case _: MapType =>
case _: StructType =>
+ case YearMonthIntervalType.DEFAULT =>
case _: NullType =>
case _ =>
throw new GlutenNotSupportException(
diff --git
a/backends-velox/src/test/scala/io/glutenproject/execution/ScalarFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/io/glutenproject/execution/ScalarFunctionsValidateSuite.scala
index f6998b69e..2419d288b 100644
---
a/backends-velox/src/test/scala/io/glutenproject/execution/ScalarFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/io/glutenproject/execution/ScalarFunctionsValidateSuite.scala
@@ -556,6 +556,38 @@ class ScalarFunctionsValidateSuite extends
FunctionsValidateTest {
}
}
+ test("Test make_ym_interval function") {
+ runQueryAndCompare("select make_ym_interval(1, 1)") {
+ checkOperatorMatch[ProjectExecTransformer]
+ }
+
+ runQueryAndCompare("select make_ym_interval(1)") {
+ checkOperatorMatch[ProjectExecTransformer]
+ }
+
+ runQueryAndCompare("select make_ym_interval()") {
+ checkOperatorMatch[ProjectExecTransformer]
+ }
+
+ withTempPath {
+ path =>
+ Seq[Tuple2[Integer, Integer]]((1, 0), (-1, 1), (null, 1), (1, null))
+ .toDF("year", "month")
+ .write
+ .parquet(path.getCanonicalPath)
+
+
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("make_ym_interval_tbl")
+
+ runQueryAndCompare("select make_ym_interval(year, month) from
make_ym_interval_tbl") {
+ checkOperatorMatch[ProjectExecTransformer]
+ }
+
+ runQueryAndCompare("select make_ym_interval(year) from
make_ym_interval_tbl") {
+ checkOperatorMatch[ProjectExecTransformer]
+ }
+ }
+ }
+
test("Test uuid function") {
runQueryAndCompare("""SELECT uuid() from lineitem limit
100""".stripMargin, false) {
checkOperatorMatch[ProjectExecTransformer]
diff --git a/cpp/core/shuffle/Utils.cc b/cpp/core/shuffle/Utils.cc
index f0da2cebc..6854c1978 100644
--- a/cpp/core/shuffle/Utils.cc
+++ b/cpp/core/shuffle/Utils.cc
@@ -290,6 +290,7 @@
arrow::Result<std::vector<std::shared_ptr<arrow::DataType>>> gluten::toShuffleTy
case arrow::LargeListType::type_id:
case arrow::Decimal128Type::type_id:
case arrow::NullType::type_id:
+ case arrow::MonthIntervalType::type_id:
shuffleTypeId.push_back(field->type());
break;
default:
diff --git a/cpp/velox/substrait/SubstraitParser.cc
b/cpp/velox/substrait/SubstraitParser.cc
index ce6a532ef..71966a0b3 100644
--- a/cpp/velox/substrait/SubstraitParser.cc
+++ b/cpp/velox/substrait/SubstraitParser.cc
@@ -83,6 +83,9 @@ TypePtr SubstraitParser::parseType(const ::substrait::Type&
substraitType, bool
auto scale = substraitType.decimal().scale();
return DECIMAL(precision, scale);
}
+ case ::substrait::Type::KindCase::kIntervalYear: {
+ return INTERVAL_YEAR_MONTH();
+ }
case ::substrait::Type::KindCase::kNothing:
return UNKNOWN();
default:
diff --git a/cpp/velox/substrait/SubstraitToVeloxExpr.cc
b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
index 8699907de..02d57f276 100644
--- a/cpp/velox/substrait/SubstraitToVeloxExpr.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
@@ -139,6 +139,8 @@ TypePtr getScalarType(const
::substrait::Expression::Literal& literal) {
return VARCHAR();
case ::substrait::Expression_Literal::LiteralTypeCase::kBinary:
return VARBINARY();
+ case
::substrait::Expression_Literal::LiteralTypeCase::kIntervalYearToMonth:
+ return INTERVAL_YEAR_MONTH();
default:
return nullptr;
}
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 5df59a348..f04dcda7c 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -267,8 +267,8 @@ bool SubstraitToVeloxPlanValidator::validateCast(
}
const auto& toType = SubstraitParser::parseType(castExpr.type());
- if (toType->kind() == TypeKind::TIMESTAMP) {
- LOG_VALIDATION_MSG("Casting to TIMESTAMP is not supported.");
+ if (toType->kind() == TypeKind::TIMESTAMP || toType->isIntervalYearMonth()) {
+ LOG_VALIDATION_MSG("Casting to " + toType->toString() + " is not
supported.");
return false;
}
@@ -284,6 +284,9 @@ bool SubstraitToVeloxPlanValidator::validateCast(
LOG_VALIDATION_MSG("Casting from DATE to " + toType->toString() + " is
not supported.");
return false;
}
+ } else if (input->type()->isIntervalYearMonth()) {
+ LOG_VALIDATION_MSG("Casting from INTERVAL_YEAR_MONTH is not supported.");
+ return false;
}
switch (input->type()->kind()) {
case TypeKind::ARRAY:
diff --git
a/gluten-core/src/main/java/io/glutenproject/substrait/type/IntervalYearTypeNode.java
b/gluten-core/src/main/java/io/glutenproject/substrait/type/IntervalYearTypeNode.java
new file mode 100644
index 000000000..30538fc31
--- /dev/null
+++
b/gluten-core/src/main/java/io/glutenproject/substrait/type/IntervalYearTypeNode.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.glutenproject.substrait.type;
+
+import io.substrait.proto.Type;
+
+import java.io.Serializable;
+
+public class IntervalYearTypeNode implements TypeNode, Serializable {
+
+ private final Boolean nullable;
+
+ public IntervalYearTypeNode(Boolean nullable) {
+ this.nullable = nullable;
+ }
+
+ @Override
+ public Type toProtobuf() {
+ Type.IntervalYear.Builder intervalYearBuilder =
Type.IntervalYear.newBuilder();
+ if (nullable) {
+
intervalYearBuilder.setNullability(Type.Nullability.NULLABILITY_NULLABLE);
+ } else {
+
intervalYearBuilder.setNullability(Type.Nullability.NULLABILITY_REQUIRED);
+ }
+ Type.Builder builder = Type.newBuilder();
+ builder.setIntervalYear(intervalYearBuilder.build());
+ return builder.build();
+ }
+}
diff --git
a/gluten-core/src/main/java/io/glutenproject/substrait/type/TypeBuilder.java
b/gluten-core/src/main/java/io/glutenproject/substrait/type/TypeBuilder.java
index a3efd6765..8393b1d9a 100644
--- a/gluten-core/src/main/java/io/glutenproject/substrait/type/TypeBuilder.java
+++ b/gluten-core/src/main/java/io/glutenproject/substrait/type/TypeBuilder.java
@@ -69,6 +69,10 @@ public class TypeBuilder {
return new DateTypeNode(nullable);
}
+ public static TypeNode makeIntervalYear(Boolean nullable) {
+ return new IntervalYearTypeNode(nullable);
+ }
+
public static TypeNode makeDecimal(Boolean nullable, Integer precision,
Integer scale) {
return new DecimalTypeNode(nullable, precision, scale);
}
diff --git
a/gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala
b/gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala
index 069700e29..169a9dd0e 100644
---
a/gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala
+++
b/gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala
@@ -238,6 +238,8 @@ object ConverterUtils extends Logging {
TypeBuilder.makeBinary(nullable)
case DateType =>
TypeBuilder.makeDate(nullable)
+ case YearMonthIntervalType.DEFAULT =>
+ TypeBuilder.makeIntervalYear(nullable)
case DecimalType() =>
val decimalType = datatype.asInstanceOf[DecimalType]
val precision = decimalType.precision
diff --git
a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala
b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala
index 180597ebf..745d045fd 100644
---
a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala
+++
b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala
@@ -179,6 +179,7 @@ object ExpressionMappings {
Sig[MonthsBetween](MONTHS_BETWEEN),
Sig[DateFromUnixDate](DATE_FROM_UNIX_DATE),
Sig[MakeTimestamp](MAKE_TIMESTAMP),
+ Sig[MakeYMInterval](MAKE_YM_INTERVAL),
// JSON functions
Sig[GetJsonObject](GET_JSON_OBJECT),
Sig[LengthOfJsonArray](JSON_ARRAY_LENGTH),
diff --git
a/gluten-core/src/main/scala/io/glutenproject/extension/GlutenPlan.scala
b/gluten-core/src/main/scala/io/glutenproject/extension/GlutenPlan.scala
index c86bcdd8e..cf472dc2f 100644
--- a/gluten-core/src/main/scala/io/glutenproject/extension/GlutenPlan.scala
+++ b/gluten-core/src/main/scala/io/glutenproject/extension/GlutenPlan.scala
@@ -76,7 +76,9 @@ trait GlutenPlan extends SparkPlan with LogLevelUtil {
}
// FIXME: Use a validation-specific method to catch validation failures
TestStats.addFallBackClassName(this.getClass.toString)
- logValidationMessage(s"Validation failed with exception for plan:
$nodeName, due to:", e)
+ logValidationMessage(
+ s"Validation failed with exception for plan: $nodeName, due to:
${e.getMessage}",
+ e)
ValidationResult.notOk(e.getMessage)
} finally {
TransformerState.finishValidation
diff --git
a/gluten-data/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala
b/gluten-data/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala
index 67a963273..014956d84 100644
--- a/gluten-data/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala
+++ b/gluten-data/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.utils
import org.apache.spark.sql.types._
import org.apache.arrow.vector.complex.MapVector
-import org.apache.arrow.vector.types.{DateUnit, FloatingPointPrecision,
TimeUnit}
+import org.apache.arrow.vector.types.{DateUnit, FloatingPointPrecision,
IntervalUnit, TimeUnit}
import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema}
import scala.collection.JavaConverters._
@@ -47,6 +47,8 @@ object SparkArrowUtil {
} else {
new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC")
}
+ case YearMonthIntervalType.DEFAULT =>
+ new ArrowType.Interval(IntervalUnit.YEAR_MONTH)
case _: ArrayType => ArrowType.List.INSTANCE
case NullType => ArrowType.Null.INSTANCE
case _ =>
@@ -69,6 +71,8 @@ object SparkArrowUtil {
case date: ArrowType.Date if date.getUnit == DateUnit.DAY => DateType
// TODO: Time unit is not handled.
case _: ArrowType.Timestamp => TimestampType
+ case interval: ArrowType.Interval if interval.getUnit ==
IntervalUnit.YEAR_MONTH =>
+ YearMonthIntervalType.DEFAULT
case ArrowType.Null.INSTANCE => NullType
case _ => throw new UnsupportedOperationException(s"Unsupported data type:
$dt")
}
diff --git
a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
index 540520ddf..7d05c8b58 100644
---
a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
@@ -1138,6 +1138,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("sorting on ShortType with nullable=false, sortOrder=List('a ASC
NULLS LAST)")
.exclude("sorting on ShortType with nullable=false, sortOrder=List('a DESC
NULLS LAST)")
.exclude("sorting on ShortType with nullable=false, sortOrder=List('a DESC
NULLS FIRST)")
+ .excludeByPrefix("sorting on YearMonthIntervalType(0,1) with")
enableSuite[GlutenTakeOrderedAndProjectSuite]
.exclude("TakeOrderedAndProject.doExecute without project")
.exclude("TakeOrderedAndProject.doExecute with project")
@@ -1459,11 +1460,13 @@ class ClickHouseTestSettings extends
BackendTestSettings {
.exclude("Loading a JSON dataset primitivesAsString returns complex fields
as strings")
.exclude("SPARK-4228 DataFrame to JSON")
.exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
+ .exclude("SPARK-36830: Support reading and writing ANSI intervals")
enableSuite[GlutenJsonV2Suite]
.exclude("Complex field and type inferring")
.exclude("Loading a JSON dataset primitivesAsString returns complex fields
as strings")
.exclude("SPARK-4228 DataFrame to JSON")
.exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
+ .exclude("SPARK-36830: Support reading and writing ANSI intervals")
enableSuite[GlutenOrcColumnarBatchReaderSuite]
enableSuite[GlutenOrcFilterSuite].exclude("SPARK-32622: case sensitivity in
predicate pushdown")
enableSuite[GlutenOrcPartitionDiscoverySuite]
@@ -1485,6 +1488,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-36594: ORC vectorized reader should properly check maximal
number of fields")
// DISABLED: GLUTEN-4893 Vanilla UT checks scan operator by exactly
matching the class type
.exclude("SPARK-34862: Support ORC vectorized reader for nested column")
+ .excludeByPrefix(
+ "SPARK-36931: Support reading and writing ANSI intervals
(spark.sql.orc.enableVectorizedReader=false,")
.excludeGlutenTest("SPARK-31238: compatibility with Spark 2.4 in reading
dates")
.excludeGlutenTest("SPARK-31238, SPARK-31423: rebasing dates in write")
.excludeGlutenTest("SPARK-31284: compatibility with Spark 2.4 in reading
timestamps")
@@ -1723,7 +1728,11 @@ class ClickHouseTestSettings extends BackendTestSettings
{
enableSuite[GlutenParquetCompressionCodecPrecedenceSuite]
enableSuite[GlutenParquetEncodingSuite]
enableSuite[GlutenParquetFileFormatV1Suite]
+ .exclude(
+ "SPARK-36825, SPARK-36854: year-month/day-time intervals written and
read as INT32/INT64")
enableSuite[GlutenParquetFileFormatV2Suite]
+ .exclude(
+ "SPARK-36825, SPARK-36854: year-month/day-time intervals written and
read as INT32/INT64")
enableSuite[GlutenParquetIOSuite]
.exclude("Standard mode - nested map with struct as key type")
.exclude("Legacy mode - nested map with struct as key type")
@@ -1762,6 +1771,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-25207: exception when duplicate fields in case-insensitive
mode")
.exclude("Support Parquet column index")
.exclude("SPARK-34562: Bloom filter push down")
+ .exclude("SPARK-36866: filter pushdown - year-month interval")
.excludeGlutenTest("SPARK-25207: exception when duplicate fields in
case-insensitive mode")
enableSuite[GlutenParquetV1PartitionDiscoverySuite]
.exclude("SPARK-7847: Dynamic partition directory path escaping and
unescaping")
@@ -1967,6 +1977,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-25207: exception when duplicate fields in case-insensitive
mode")
.exclude("Support Parquet column index")
.exclude("SPARK-34562: Bloom filter push down")
+ .exclude("SPARK-36866: filter pushdown - year-month interval")
.excludeGlutenTest("SPARK-25207: exception when duplicate fields in
case-insensitive mode")
.excludeGlutenTest("filter pushdown - date")
enableSuite[GlutenParquetV2PartitionDiscoverySuite]
@@ -2161,6 +2172,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
enableSuite[GlutenFiltersSuite]
enableSuite[GlutenInsertSuite]
enableSuite[GlutenPartitionedWriteSuite]
+ .exclude("SPARK-37231, SPARK-37240: Dynamic writes/reads of ANSI interval
partitions")
enableSuite[GlutenPathOptionSuite]
enableSuite[GlutenPrunedScanSuite]
enableSuite[GlutenResolvedDataSourceSuite]
diff --git
a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
index a81022ee8..da029e575 100644
---
a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
@@ -1176,6 +1176,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("sorting on ShortType with nullable=false, sortOrder=List('a DESC
NULLS LAST)")
.exclude("sorting on ShortType with nullable=false, sortOrder=List('a DESC
NULLS FIRST)")
.exclude("SPARK-40089: decimal values sort correctly")
+ .excludeByPrefix("sorting on YearMonthIntervalType(0,1) with")
enableSuite[GlutenTakeOrderedAndProjectSuite]
.exclude("TakeOrderedAndProject.doExecute without project")
.exclude("TakeOrderedAndProject.doExecute with project")
@@ -1299,6 +1300,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
.exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a
non-default pattern")
.exclude("SPARK-37360: Timestamp type inference for a column with
TIMESTAMP_NTZ values")
+ .exclude("SPARK-36830: Support reading and writing ANSI intervals")
enableSuite[GlutenJsonSuite]
.exclude("Complex field and type inferring")
.exclude("Loading a JSON dataset primitivesAsString returns complex fields
as strings")
@@ -1306,6 +1308,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
.exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a
non-default pattern")
.exclude("SPARK-37360: Timestamp type inference for a column with
TIMESTAMP_NTZ values")
+ .exclude("SPARK-36830: Support reading and writing ANSI intervals")
enableSuite[GlutenJsonV1Suite]
.exclude("Complex field and type inferring")
.exclude("Loading a JSON dataset primitivesAsString returns complex fields
as strings")
@@ -1313,6 +1316,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
.exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a
non-default pattern")
.exclude("SPARK-37360: Timestamp type inference for a column with
TIMESTAMP_NTZ values")
+ .exclude("SPARK-36830: Support reading and writing ANSI intervals")
enableSuite[GlutenJsonV2Suite]
.exclude("Complex field and type inferring")
.exclude("Loading a JSON dataset primitivesAsString returns complex fields
as strings")
@@ -1320,6 +1324,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
.exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a
non-default pattern")
.exclude("SPARK-37360: Timestamp type inference for a column with
TIMESTAMP_NTZ values")
+ .exclude("SPARK-36830: Support reading and writing ANSI intervals")
enableSuite[GlutenOrcColumnarBatchReaderSuite]
enableSuite[GlutenOrcFilterSuite].exclude("SPARK-32622: case sensitivity in
predicate pushdown")
enableSuite[GlutenOrcPartitionDiscoverySuite]
@@ -1343,11 +1348,15 @@ class ClickHouseTestSettings extends
BackendTestSettings {
.exclude("SPARK-37812: Reuse result row when deserializing a struct")
// DISABLED: GLUTEN-4893 Vanilla UT checks scan operator by exactly
matching the class type
.exclude("SPARK-34862: Support ORC vectorized reader for nested column")
+ .excludeByPrefix(
+ "SPARK-36931: Support reading and writing ANSI intervals
(spark.sql.orc.enableVectorizedReader=false,")
.excludeGlutenTest("SPARK-31238: compatibility with Spark 2.4 in reading
dates")
.excludeGlutenTest("SPARK-31238, SPARK-31423: rebasing dates in write")
.excludeGlutenTest("SPARK-31284: compatibility with Spark 2.4 in reading
timestamps")
.excludeGlutenTest("SPARK-31284, SPARK-31423: rebasing timestamps in
write")
.excludeGlutenTest("SPARK-34862: Support ORC vectorized reader for nested
column")
+ .excludeGlutenTest(
+ "SPARK-36931: Support reading and writing ANSI intervals
(spark.sql.orc.enableVectorizedReader=false,
spark.sql.orc.enableNestedColumnVectorizedReader=false)")
enableSuite[GlutenOrcV1FilterSuite].exclude("SPARK-32622: case sensitivity
in predicate pushdown")
enableSuite[GlutenOrcV1PartitionDiscoverySuite]
enableSuite[GlutenOrcV1QuerySuite]
@@ -1596,7 +1605,11 @@ class ClickHouseTestSettings extends BackendTestSettings
{
enableSuite[GlutenParquetEncodingSuite].exclude("All Types
Dictionary").exclude("All Types Null")
enableSuite[GlutenParquetFieldIdIOSuite]
enableSuite[GlutenParquetFileFormatV1Suite]
+ .exclude(
+ "SPARK-36825, SPARK-36854: year-month/day-time intervals written and
read as INT32/INT64")
enableSuite[GlutenParquetFileFormatV2Suite]
+ .exclude(
+ "SPARK-36825, SPARK-36854: year-month/day-time intervals written and
read as INT32/INT64")
enableSuite[GlutenParquetIOSuite]
.exclude("Standard mode - nested map with struct as key type")
.exclude("Legacy mode - nested map with struct as key type")
@@ -1638,6 +1651,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("Support Parquet column index")
.exclude("SPARK-34562: Bloom filter push down")
.exclude("SPARK-38825: in and notIn filters")
+ .exclude("SPARK-36866: filter pushdown - year-month interval")
.excludeGlutenTest("SPARK-25207: exception when duplicate fields in
case-insensitive mode")
enableSuite[GlutenParquetV1PartitionDiscoverySuite]
.exclude("SPARK-7847: Dynamic partition directory path escaping and
unescaping")
@@ -1844,6 +1858,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("Support Parquet column index")
.exclude("SPARK-34562: Bloom filter push down")
.exclude("SPARK-38825: in and notIn filters")
+ .exclude("SPARK-36866: filter pushdown - year-month interval")
.excludeGlutenTest("SPARK-25207: exception when duplicate fields in
case-insensitive mode")
.excludeGlutenTest("filter pushdown - date")
enableSuite[GlutenParquetV2PartitionDiscoverySuite]
@@ -2066,6 +2081,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
enableSuite[GlutenFiltersSuite]
enableSuite[GlutenInsertSuite]
enableSuite[GlutenPartitionedWriteSuite]
+ .exclude("SPARK-37231, SPARK-37240: Dynamic writes/reads of ANSI interval
partitions")
enableSuite[GlutenPathOptionSuite]
enableSuite[GlutenPrunedScanSuite]
enableSuite[GlutenResolvedDataSourceSuite]
diff --git
a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
index ab204b365..9fabee3ca 100644
---
a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala
@@ -956,6 +956,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("sorting on ShortType with nullable=false, sortOrder=List('a ASC
NULLS LAST)")
.exclude("sorting on ShortType with nullable=false, sortOrder=List('a DESC
NULLS LAST)")
.exclude("sorting on ShortType with nullable=false, sortOrder=List('a DESC
NULLS FIRST)")
+ .excludeByPrefix("sorting on YearMonthIntervalType(0,1) with")
enableSuite[GlutenTakeOrderedAndProjectSuite]
.exclude("TakeOrderedAndProject.doExecute without project")
.exclude("TakeOrderedAndProject.doExecute with project")
@@ -1078,6 +1079,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
.exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a
non-default pattern")
.exclude("SPARK-37360: Timestamp type inference for a column with
TIMESTAMP_NTZ values")
+ .exclude("SPARK-36830: Support reading and writing ANSI intervals")
enableSuite[GlutenJsonSuite]
.exclude("Complex field and type inferring")
.exclude("Loading a JSON dataset primitivesAsString returns complex fields
as strings")
@@ -1085,6 +1087,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
.exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a
non-default pattern")
.exclude("SPARK-37360: Timestamp type inference for a column with
TIMESTAMP_NTZ values")
+ .exclude("SPARK-36830: Support reading and writing ANSI intervals")
enableSuite[GlutenJsonV1Suite]
.exclude("Complex field and type inferring")
.exclude("Loading a JSON dataset primitivesAsString returns complex fields
as strings")
@@ -1092,6 +1095,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
.exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a
non-default pattern")
.exclude("SPARK-37360: Timestamp type inference for a column with
TIMESTAMP_NTZ values")
+ .exclude("SPARK-36830: Support reading and writing ANSI intervals")
enableSuite[GlutenJsonV2Suite]
.exclude("Complex field and type inferring")
.exclude("Loading a JSON dataset primitivesAsString returns complex fields
as strings")
@@ -1099,6 +1103,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-18352: Handle multi-line corrupt documents (PERMISSIVE)")
.exclude("SPARK-37360: Write and infer TIMESTAMP_NTZ values with a
non-default pattern")
.exclude("SPARK-37360: Timestamp type inference for a column with
TIMESTAMP_NTZ values")
+ .exclude("SPARK-36830: Support reading and writing ANSI intervals")
enableSuite[GlutenOrcColumnarBatchReaderSuite]
enableSuite[GlutenOrcFilterSuite].exclude("SPARK-32622: case sensitivity in
predicate pushdown")
enableSuite[GlutenOrcPartitionDiscoverySuite]
@@ -1122,11 +1127,15 @@ class ClickHouseTestSettings extends
BackendTestSettings {
.exclude("SPARK-37812: Reuse result row when deserializing a struct")
// DISABLED: GLUTEN-4893 Vanilla UT checks scan operator by exactly
matching the class type
.exclude("SPARK-34862: Support ORC vectorized reader for nested column")
+ .excludeByPrefix(
+ "SPARK-36931: Support reading and writing ANSI intervals
(spark.sql.orc.enableVectorizedReader=false,")
.excludeGlutenTest("SPARK-31238: compatibility with Spark 2.4 in reading
dates")
.excludeGlutenTest("SPARK-31238, SPARK-31423: rebasing dates in write")
.excludeGlutenTest("SPARK-31284: compatibility with Spark 2.4 in reading
timestamps")
.excludeGlutenTest("SPARK-31284, SPARK-31423: rebasing timestamps in
write")
.excludeGlutenTest("SPARK-34862: Support ORC vectorized reader for nested
column")
+ .excludeGlutenTest(
+ "SPARK-36931: Support reading and writing ANSI intervals
(spark.sql.orc.enableVectorizedReader=false,
spark.sql.orc.enableNestedColumnVectorizedReader=false)")
enableSuite[GlutenOrcV1FilterSuite].exclude("SPARK-32622: case sensitivity
in predicate pushdown")
enableSuite[GlutenOrcV1PartitionDiscoverySuite]
enableSuite[GlutenOrcV1QuerySuite]
@@ -1375,7 +1384,11 @@ class ClickHouseTestSettings extends BackendTestSettings
{
enableSuite[GlutenParquetEncodingSuite].exclude("All Types
Dictionary").exclude("All Types Null")
enableSuite[GlutenParquetFieldIdIOSuite]
enableSuite[GlutenParquetFileFormatV1Suite]
+ .exclude(
+ "SPARK-36825, SPARK-36854: year-month/day-time intervals written and
read as INT32/INT64")
enableSuite[GlutenParquetFileFormatV2Suite]
+ .exclude(
+ "SPARK-36825, SPARK-36854: year-month/day-time intervals written and
read as INT32/INT64")
enableSuite[GlutenParquetIOSuite]
.exclude("Standard mode - nested map with struct as key type")
.exclude("Legacy mode - nested map with struct as key type")
@@ -1417,6 +1430,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("Support Parquet column index")
.exclude("SPARK-34562: Bloom filter push down")
.exclude("SPARK-38825: in and notIn filters")
+ .exclude("SPARK-36866: filter pushdown - year-month interval")
.excludeGlutenTest("SPARK-25207: exception when duplicate fields in
case-insensitive mode")
enableSuite[GlutenParquetV1PartitionDiscoverySuite]
.exclude("SPARK-7847: Dynamic partition directory path escaping and
unescaping")
@@ -1623,6 +1637,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("Support Parquet column index")
.exclude("SPARK-34562: Bloom filter push down")
.exclude("SPARK-38825: in and notIn filters")
+ .exclude("SPARK-36866: filter pushdown - year-month interval")
.excludeGlutenTest("SPARK-25207: exception when duplicate fields in
case-insensitive mode")
.excludeGlutenTest("filter pushdown - date")
enableSuite[GlutenParquetV2PartitionDiscoverySuite]
@@ -1846,6 +1861,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
enableSuite[GlutenInsertSuite]
.excludeAllGlutenTests()
enableSuite[GlutenPartitionedWriteSuite]
+ .exclude("SPARK-37231, SPARK-37240: Dynamic writes/reads of ANSI interval
partitions")
enableSuite[GlutenPathOptionSuite]
enableSuite[GlutenPrunedScanSuite]
enableSuite[GlutenResolvedDataSourceSuite]
diff --git
a/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
b/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
index 2f3391629..97fa914e9 100644
---
a/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
+++
b/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
@@ -192,6 +192,7 @@ object ExpressionNames {
final val MONTHS_BETWEEN = "months_between"
final val DATE_FROM_UNIX_DATE = "date_from_unix_date"
final val MAKE_TIMESTAMP = "make_timestamp"
+ final val MAKE_YM_INTERVAL = "make_ym_interval"
// JSON functions
final val GET_JSON_OBJECT = "get_json_object"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]