This is an automated email from the ASF dual-hosted git repository.
kejia pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new f6a9665ca7 [GLUTEN-7675][VL] Support parquet write with complex data
type(eg. MAP, ARRYY) (#7676)
f6a9665ca7 is described below
commit f6a9665ca7b215fd03643fec783898c6f6c162f4
Author: Xiuli Wei <[email protected]>
AuthorDate: Thu Nov 7 11:31:02 2024 +0800
[GLUTEN-7675][VL] Support parquet write with complex data type(eg. MAP,
ARRYY) (#7676)
---
.../gluten/backendsapi/velox/VeloxBackend.scala | 25 +++--
.../execution/WriteFilesExecTransformer.scala | 39 +++++++-
.../parquet/GlutenParquetFieldIdIOSuite.scala | 25 ++++-
.../gluten/utils/velox/VeloxTestSettings.scala | 7 ++
.../spark/sql/GlutenCharVarcharTestSuite.scala | 101 +++++++++++++++++++-
.../parquet/GlutenParquetFieldIdIOSuite.scala | 25 ++++-
.../gluten/utils/velox/VeloxTestSettings.scala | 7 ++
.../spark/sql/GlutenCharVarcharTestSuite.scala | 102 ++++++++++++++++++++-
.../parquet/GlutenParquetFieldIdIOSuite.scala | 25 ++++-
9 files changed, 339 insertions(+), 17 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
index 539059cdb6..03d5aa2549 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
@@ -232,15 +232,26 @@ object VeloxBackendSettings extends BackendSettingsApi {
// Validate if all types are supported.
def validateDataTypes(): Option[String] = {
- val unsupportedTypes = fields.flatMap {
- field =>
- field.dataType match {
- case _: StructType => Some("StructType")
- case _: ArrayType => Some("ArrayType")
- case _: MapType => Some("MapType")
- case _: YearMonthIntervalType => Some("YearMonthIntervalType")
+ val unsupportedTypes = format match {
+ case _: ParquetFileFormat =>
+ fields.flatMap {
+ case StructField(_, _: YearMonthIntervalType, _, _) =>
+ Some("YearMonthIntervalType")
+ case StructField(_, _: StructType, _, _) =>
+ Some("StructType")
case _ => None
}
+ case _ =>
+ fields.flatMap {
+ field =>
+ field.dataType match {
+ case _: StructType => Some("StructType")
+ case _: ArrayType => Some("ArrayType")
+ case _: MapType => Some("MapType")
+ case _: YearMonthIntervalType => Some("YearMonthIntervalType")
+ case _ => None
+ }
+ }
}
if (unsupportedTypes.nonEmpty) {
Some(unsupportedTypes.mkString("Found unsupported type:", ",", ""))
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala
index 7034a5a8f0..8162e538cf 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala
@@ -21,6 +21,7 @@ import org.apache.gluten.backendsapi.BackendsApiManager
import org.apache.gluten.expression.ConverterUtils
import org.apache.gluten.extension.ValidationResult
import org.apache.gluten.metrics.MetricsUpdater
+import org.apache.gluten.planner.plan.GlutenPlanModel.GroupLeafExec
import org.apache.gluten.substrait.`type`.ColumnTypeNode
import org.apache.gluten.substrait.SubstraitContext
import org.apache.gluten.substrait.extensions.ExtensionBuilder
@@ -29,12 +30,15 @@ import org.apache.gluten.utils.SubstraitUtil
import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression,
Literal}
+import org.apache.spark.sql.catalyst.plans.logical.Project
import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.{ProjectExec, SparkPlan}
import org.apache.spark.sql.execution.datasources.FileFormat
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
import org.apache.spark.sql.execution.metric.SQLMetric
import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{ArrayType, MapType}
import org.apache.spark.sql.types.MetadataBuilder
import io.substrait.proto.NamedStruct
@@ -127,6 +131,37 @@ case class WriteFilesExecTransformer(
override protected def doValidateInternal(): ValidationResult = {
val finalChildOutput = getFinalChildOutput
+
+ def isConstantComplexType(e: Expression): Boolean = {
+ e match {
+ case Literal(_, _: ArrayType | _: MapType) => true
+ case _ => e.children.exists(isConstantComplexType)
+ }
+ }
+
+ lazy val hasConstantComplexType = child match {
+ case t: ProjectExecTransformer =>
+ t.projectList.exists(isConstantComplexType)
+ case p: ProjectExec =>
+ p.projectList.exists(isConstantComplexType)
+ case g: GroupLeafExec => // support the ras
+ g.metadata
+ .logicalLink()
+ .plan
+ .collectFirst {
+ case p: Project if p.projectList.exists(isConstantComplexType) =>
true
+ }
+ .isDefined
+ case _ => false
+ }
+ // TODO: currently the velox don't support parquet write with complex data
type
+ // with constant.
+ if (fileFormat.isInstanceOf[ParquetFileFormat] && hasConstantComplexType) {
+ return ValidationResult.failed(
+ "Unsupported native parquet write: " +
+ "complex data type with constant")
+ }
+
val validationResult =
BackendsApiManager.getSettings.supportWriteFilesExec(
fileFormat,
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFieldIdIOSuite.scala
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFieldIdIOSuite.scala
index 9e4d94e1c2..bd1c269843 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFieldIdIOSuite.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFieldIdIOSuite.scala
@@ -16,6 +16,27 @@
*/
package org.apache.spark.sql.execution.datasources.parquet
-import org.apache.spark.sql.GlutenSQLTestsBaseTrait
+import org.apache.spark.sql.{GlutenSQLTestsBaseTrait, Row}
-class GlutenParquetFieldIdIOSuite extends ParquetFieldIdIOSuite with
GlutenSQLTestsBaseTrait {}
+class GlutenParquetFieldIdIOSuite extends ParquetFieldIdIOSuite with
GlutenSQLTestsBaseTrait {
+ testGluten("Parquet writer with ARRAY and MAP") {
+ spark.sql("""
+ |CREATE TABLE T1 (
+ | a INT,
+ | b ARRAY<STRING>,
+ | c MAP<STRING,STRING>
+ |)
+ |USING PARQUET
+ |""".stripMargin)
+
+ spark.sql("""
+ | INSERT OVERWRITE T1 VALUES
+ | (1, ARRAY(1, 2, 3), MAP("key1","value1"))
+ |""".stripMargin)
+
+ checkAnswer(
+ spark.sql("SELECT * FROM T1"),
+ Row(1, Array("1", "2", "3"), Map("key1" -> "value1")) :: Nil
+ )
+ }
+}
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 0f3c43dfdf..ec13265f9e 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -963,6 +963,13 @@ class VeloxTestSettings extends BackendTestSettings {
// Extra ColumnarToRow is needed to transform vanilla columnar data to
gluten columnar data.
.exclude("SPARK-37369: Avoid redundant ColumnarToRow transition on
InMemoryTableScan")
enableSuite[GlutenFileSourceCharVarcharTestSuite]
+ .exclude("length check for input string values: nested in array")
+ .exclude("length check for input string values: nested in array")
+ .exclude("length check for input string values: nested in map key")
+ .exclude("length check for input string values: nested in map value")
+ .exclude("length check for input string values: nested in both map key and
value")
+ .exclude("length check for input string values: nested in array of struct")
+ .exclude("length check for input string values: nested in array of array")
enableSuite[GlutenDSV2CharVarcharTestSuite]
enableSuite[GlutenColumnExpressionSuite]
// Velox raise_error('errMsg') throws a velox_user_error exception with
the message 'errMsg'.
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
index 84502ace51..89d9114870 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
@@ -16,8 +16,107 @@
*/
package org.apache.spark.sql
+import org.apache.spark.SparkException
class GlutenFileSourceCharVarcharTestSuite
extends FileSourceCharVarcharTestSuite
- with GlutenSQLTestsTrait {}
+ with GlutenSQLTestsTrait {
+ private def testTableWrite(f: String => Unit): Unit = {
+ withTable("t")(f("char"))
+ withTable("t")(f("varchar"))
+ }
+
+ private val ERROR_MESSAGE =
+ "Exceeds char/varchar type length limitation: 5"
+
+ testGluten("length check for input string values: nested in struct") {
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c STRUCT<c: $typeName(5)>) USING $format")
+ sql("INSERT INTO t SELECT struct(null)")
+ checkAnswer(spark.table("t"), Row(Row(null)))
+ val e = intercept[RuntimeException] {
+ sql("INSERT INTO t SELECT struct('123456')")
+ }
+ assert(e.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+
+ testGluten("length check for input string values: nested in array") {
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c ARRAY<$typeName(5)>) USING $format")
+ sql("INSERT INTO t VALUES (array(null))")
+ checkAnswer(spark.table("t"), Row(Seq(null)))
+ val e = intercept[SparkException] {
+ sql("INSERT INTO t VALUES (array('a', '123456'))")
+ }
+ assert(e.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+
+ testGluten("length check for input string values: nested in map key") {
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c MAP<$typeName(5), STRING>) USING $format")
+ val e = intercept[SparkException](sql("INSERT INTO t VALUES
(map('123456', 'a'))"))
+ assert(e.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+
+ testGluten("length check for input string values: nested in map value") {
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c MAP<STRING, $typeName(5)>) USING $format")
+ sql("INSERT INTO t VALUES (map('a', null))")
+ checkAnswer(spark.table("t"), Row(Map("a" -> null)))
+ val e = intercept[SparkException](sql("INSERT INTO t VALUES (map('a',
'123456'))"))
+ assert(e.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+
+ testGluten("length check for input string values: nested in both map key and
value") {
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c MAP<$typeName(5), $typeName(5)>) USING $format")
+ val e1 = intercept[SparkException](sql("INSERT INTO t VALUES
(map('123456', 'a'))"))
+ assert(e1.getMessage.contains(ERROR_MESSAGE))
+ val e2 = intercept[SparkException](sql("INSERT INTO t VALUES (map('a',
'123456'))"))
+ assert(e2.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+
+ testGluten("length check for input string values: nested in struct of
array") {
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c STRUCT<c: ARRAY<$typeName(5)>>) USING $format")
+ sql("INSERT INTO t SELECT struct(array(null))")
+ checkAnswer(spark.table("t"), Row(Row(Seq(null))))
+ val e = intercept[SparkException](sql("INSERT INTO t SELECT
struct(array('123456'))"))
+ assert(e.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+
+ testGluten("length check for input string values: nested in array of
struct") {
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c ARRAY<STRUCT<c: $typeName(5)>>) USING $format")
+ sql("INSERT INTO t VALUES (array(struct(null)))")
+ checkAnswer(spark.table("t"), Row(Seq(Row(null))))
+ val e = intercept[SparkException](sql("INSERT INTO t VALUES
(array(struct('123456')))"))
+ assert(e.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+
+ testGluten("length check for input string values: nested in array of array")
{
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c ARRAY<ARRAY<$typeName(5)>>) USING $format")
+ sql("INSERT INTO t VALUES (array(array(null)))")
+ checkAnswer(spark.table("t"), Row(Seq(Seq(null))))
+ val e = intercept[SparkException](sql("INSERT INTO t VALUES
(array(array('123456')))"))
+ assert(e.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+}
class GlutenDSV2CharVarcharTestSuite extends DSV2CharVarcharTestSuite with
GlutenSQLTestsTrait {}
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFieldIdIOSuite.scala
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFieldIdIOSuite.scala
index 9e4d94e1c2..bd1c269843 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFieldIdIOSuite.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFieldIdIOSuite.scala
@@ -16,6 +16,27 @@
*/
package org.apache.spark.sql.execution.datasources.parquet
-import org.apache.spark.sql.GlutenSQLTestsBaseTrait
+import org.apache.spark.sql.{GlutenSQLTestsBaseTrait, Row}
-class GlutenParquetFieldIdIOSuite extends ParquetFieldIdIOSuite with
GlutenSQLTestsBaseTrait {}
+class GlutenParquetFieldIdIOSuite extends ParquetFieldIdIOSuite with
GlutenSQLTestsBaseTrait {
+ testGluten("Parquet writer with ARRAY and MAP") {
+ spark.sql("""
+ |CREATE TABLE T1 (
+ | a INT,
+ | b ARRAY<STRING>,
+ | c MAP<STRING,STRING>
+ |)
+ |USING PARQUET
+ |""".stripMargin)
+
+ spark.sql("""
+ | INSERT OVERWRITE T1 VALUES
+ | (1, ARRAY(1, 2, 3), MAP("key1","value1"))
+ |""".stripMargin)
+
+ checkAnswer(
+ spark.sql("SELECT * FROM T1"),
+ Row(1, Array("1", "2", "3"), Map("key1" -> "value1")) :: Nil
+ )
+ }
+}
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index a9525b1b0e..876d213b05 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -983,6 +983,13 @@ class VeloxTestSettings extends BackendTestSettings {
// Extra ColumnarToRow is needed to transform vanilla columnar data to
gluten columnar data.
.exclude("SPARK-37369: Avoid redundant ColumnarToRow transition on
InMemoryTableScan")
enableSuite[GlutenFileSourceCharVarcharTestSuite]
+ .exclude("length check for input string values: nested in array")
+ .exclude("length check for input string values: nested in array")
+ .exclude("length check for input string values: nested in map key")
+ .exclude("length check for input string values: nested in map value")
+ .exclude("length check for input string values: nested in both map key and
value")
+ .exclude("length check for input string values: nested in array of struct")
+ .exclude("length check for input string values: nested in array of array")
enableSuite[GlutenDSV2CharVarcharTestSuite]
enableSuite[GlutenColumnExpressionSuite]
// Velox raise_error('errMsg') throws a velox_user_error exception with
the message 'errMsg'.
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
index 84502ace51..ce2f1b465e 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
@@ -16,8 +16,108 @@
*/
package org.apache.spark.sql
+import org.apache.spark.SparkException
+
class GlutenFileSourceCharVarcharTestSuite
extends FileSourceCharVarcharTestSuite
- with GlutenSQLTestsTrait {}
+ with GlutenSQLTestsTrait {
+ private def testTableWrite(f: String => Unit): Unit = {
+ withTable("t")(f("char"))
+ withTable("t")(f("varchar"))
+ }
+
+ private val ERROR_MESSAGE =
+ "Exceeds char/varchar type length limitation: 5"
+
+ testGluten("length check for input string values: nested in struct") {
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c STRUCT<c: $typeName(5)>) USING $format")
+ sql("INSERT INTO t SELECT struct(null)")
+ checkAnswer(spark.table("t"), Row(Row(null)))
+ val e = intercept[RuntimeException] {
+ sql("INSERT INTO t SELECT struct('123456')")
+ }
+ assert(e.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+
+ testGluten("length check for input string values: nested in array") {
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c ARRAY<$typeName(5)>) USING $format")
+ sql("INSERT INTO t VALUES (array(null))")
+ checkAnswer(spark.table("t"), Row(Seq(null)))
+ val e = intercept[SparkException] {
+ sql("INSERT INTO t VALUES (array('a', '123456'))")
+ }
+ assert(e.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+
+ testGluten("length check for input string values: nested in map key") {
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c MAP<$typeName(5), STRING>) USING $format")
+ val e = intercept[SparkException](sql("INSERT INTO t VALUES
(map('123456', 'a'))"))
+ assert(e.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+
+ testGluten("length check for input string values: nested in map value") {
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c MAP<STRING, $typeName(5)>) USING $format")
+ sql("INSERT INTO t VALUES (map('a', null))")
+ checkAnswer(spark.table("t"), Row(Map("a" -> null)))
+ val e = intercept[SparkException](sql("INSERT INTO t VALUES (map('a',
'123456'))"))
+ assert(e.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+
+ testGluten("length check for input string values: nested in both map key and
value") {
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c MAP<$typeName(5), $typeName(5)>) USING $format")
+ val e1 = intercept[SparkException](sql("INSERT INTO t VALUES
(map('123456', 'a'))"))
+ assert(e1.getMessage.contains(ERROR_MESSAGE))
+ val e2 = intercept[SparkException](sql("INSERT INTO t VALUES (map('a',
'123456'))"))
+ assert(e2.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+
+ testGluten("length check for input string values: nested in struct of
array") {
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c STRUCT<c: ARRAY<$typeName(5)>>) USING $format")
+ sql("INSERT INTO t SELECT struct(array(null))")
+ checkAnswer(spark.table("t"), Row(Row(Seq(null))))
+ val e = intercept[SparkException](sql("INSERT INTO t SELECT
struct(array('123456'))"))
+ assert(e.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+
+ testGluten("length check for input string values: nested in array of
struct") {
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c ARRAY<STRUCT<c: $typeName(5)>>) USING $format")
+ sql("INSERT INTO t VALUES (array(struct(null)))")
+ checkAnswer(spark.table("t"), Row(Seq(Row(null))))
+ val e = intercept[SparkException](sql("INSERT INTO t VALUES
(array(struct('123456')))"))
+ assert(e.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+
+ testGluten("length check for input string values: nested in array of array")
{
+ testTableWrite {
+ typeName =>
+ sql(s"CREATE TABLE t(c ARRAY<ARRAY<$typeName(5)>>) USING $format")
+ sql("INSERT INTO t VALUES (array(array(null)))")
+ checkAnswer(spark.table("t"), Row(Seq(Seq(null))))
+ val e = intercept[SparkException](sql("INSERT INTO t VALUES
(array(array('123456')))"))
+ assert(e.getMessage.contains(ERROR_MESSAGE))
+ }
+ }
+}
class GlutenDSV2CharVarcharTestSuite extends DSV2CharVarcharTestSuite with
GlutenSQLTestsTrait {}
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFieldIdIOSuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFieldIdIOSuite.scala
index 9e4d94e1c2..bd1c269843 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFieldIdIOSuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFieldIdIOSuite.scala
@@ -16,6 +16,27 @@
*/
package org.apache.spark.sql.execution.datasources.parquet
-import org.apache.spark.sql.GlutenSQLTestsBaseTrait
+import org.apache.spark.sql.{GlutenSQLTestsBaseTrait, Row}
-class GlutenParquetFieldIdIOSuite extends ParquetFieldIdIOSuite with
GlutenSQLTestsBaseTrait {}
+class GlutenParquetFieldIdIOSuite extends ParquetFieldIdIOSuite with
GlutenSQLTestsBaseTrait {
+ testGluten("Parquet writer with ARRAY and MAP") {
+ spark.sql("""
+ |CREATE TABLE T1 (
+ | a INT,
+ | b ARRAY<STRING>,
+ | c MAP<STRING,STRING>
+ |)
+ |USING PARQUET
+ |""".stripMargin)
+
+ spark.sql("""
+ | INSERT OVERWRITE T1 VALUES
+ | (1, ARRAY(1, 2, 3), MAP("key1","value1"))
+ |""".stripMargin)
+
+ checkAnswer(
+ spark.sql("SELECT * FROM T1"),
+ Row(1, Array("1", "2", "3"), Map("key1" -> "value1")) :: Nil
+ )
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]