Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/21889#discussion_r209526670
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/SelectedFieldSuite.scala
---
@@ -0,0 +1,415 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.exceptions.TestFailedException
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.NamedExpression
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.types._
+
+class SelectedFieldSuite extends SparkFunSuite with BeforeAndAfterAll {
+ private val innerSchemaCol1 = StructField("col2", StructType(
+ StructField("field1", IntegerType) ::
+ StructField("field2", ArrayType(IntegerType, containsNull = false))
::
+ StructField("field3", ArrayType(StructType(
+ StructField("subfield1", IntegerType) ::
+ StructField("subfield2", IntegerType) ::
+ StructField("subfield3", ArrayType(IntegerType)) :: Nil)),
nullable = false) ::
+ StructField("field4", MapType(StringType, StructType(
+ StructField("subfield1", IntegerType) ::
+ StructField("subfield2", ArrayType(IntegerType, containsNull =
false))
+ :: Nil), valueContainsNull = false)) ::
+ StructField("field5", ArrayType(StructType(
+ StructField("subfield1", StructType(
+ StructField("subsubfield1", IntegerType) ::
+ StructField("subsubfield2", IntegerType) :: Nil), nullable =
false) ::
+ StructField("subfield2", StructType(
+ StructField("subsubfield1", StructType(
+ StructField("subsubsubfield1", StringType) :: Nil)) ::
+ StructField("subsubfield2", IntegerType) :: Nil)) :: Nil)),
nullable = false) ::
+ StructField("field6", StructType(
+ StructField("subfield1", StringType, nullable = false) ::
+ StructField("subfield2", StringType) :: Nil)) ::
+ StructField("field7", StructType(
+ StructField("subfield1", StructType(
+ StructField("subsubfield1", IntegerType) ::
+ StructField("subsubfield2", IntegerType) :: Nil)) :: Nil)) ::
+ StructField("field8", MapType(StringType, ArrayType(StructType(
+ StructField("subfield1", IntegerType) ::
+ StructField("subfield2", ArrayType(IntegerType, containsNull =
false))
+ :: Nil)), valueContainsNull = false)) ::
+ StructField("field9", MapType(StringType, IntegerType,
valueContainsNull = false)) :: Nil))
+
+ // The test schema as a tree string, i.e. `schema.treeString`
+ // root
+ // |-- col1: string (nullable = false)
+ // |-- col2: struct (nullable = true)
+ // | |-- field1: integer (nullable = true)
+ // | |-- field2: array (nullable = true)
+ // | | |-- element: integer (containsNull = false)
+ // | |-- field3: array (nullable = false)
+ // | | |-- element: struct (containsNull = true)
+ // | | | |-- subfield1: integer (nullable = true)
+ // | | | |-- subfield2: integer (nullable = true)
+ // | | | |-- subfield3: array (nullable = true)
+ // | | | | |-- element: integer (containsNull = true)
+ // | |-- field4: map (nullable = true)
+ // | | |-- key: string
+ // | | |-- value: struct (valueContainsNull = false)
+ // | | | |-- subfield1: integer (nullable = true)
+ // | | | |-- subfield2: array (nullable = true)
+ // | | | | |-- element: integer (containsNull = false)
+ // | |-- field5: array (nullable = false)
+ // | | |-- element: struct (containsNull = true)
+ // | | | |-- subfield1: struct (nullable = false)
+ // | | | | |-- subsubfield1: integer (nullable = true)
+ // | | | | |-- subsubfield2: integer (nullable = true)
+ // | | | |-- subfield2: struct (nullable = true)
+ // | | | | |-- subsubfield1: struct (nullable = true)
+ // | | | | | |-- subsubsubfield1: string (nullable =
true)
+ // | | | | |-- subsubfield2: integer (nullable = true)
+ // | |-- field6: struct (nullable = true)
+ // | | |-- subfield1: string (nullable = false)
+ // | | |-- subfield2: string (nullable = true)
+ // | |-- field7: struct (nullable = true)
+ // | | |-- subfield1: struct (nullable = true)
+ // | | | |-- subsubfield1: integer (nullable = true)
+ // | | | |-- subsubfield2: integer (nullable = true)
+ // | |-- field8: map (nullable = true)
+ // | | |-- key: string
+ // | | |-- value: array (valueContainsNull = false)
+ // | | | |-- element: struct (containsNull = true)
+ // | | | | |-- subfield1: integer (nullable = true)
+ // | | | | |-- subfield2: array (nullable = true)
+ // | | | | | |-- element: integer (containsNull = false)
+ // | |-- field9: map (nullable = true)
+ // | | |-- key: string
+ // | | |-- value: integer (valueContainsNull = false)
+ // |-- col3: array (nullable = false)
+ // | |-- element: struct (containsNull = false)
+ // | | |-- field1: struct (nullable = true)
+ // | | | |-- subfield1: integer (nullable = false)
+ // | | | |-- subfield2: integer (nullable = true)
+ // | | |-- field2: map (nullable = true)
+ // | | | |-- key: string
+ // | | | |-- value: integer (valueContainsNull = false)
+ // |-- col4: map (nullable = false)
+ // | |-- key: string
+ // | |-- value: struct (valueContainsNull = false)
+ // | | |-- field1: struct (nullable = true)
+ // | | | |-- subfield1: integer (nullable = false)
+ // | | | |-- subfield2: integer (nullable = true)
+ // | | |-- field2: map (nullable = true)
+ // | | | |-- key: string
+ // | | | |-- value: integer (valueContainsNull = false)
+ // |-- col5: array (nullable = true)
+ // | |-- element: map (containsNull = true)
+ // | | |-- key: string
+ // | | |-- value: struct (valueContainsNull = false)
+ // | | | |-- field1: struct (nullable = true)
+ // | | | | |-- subfield1: integer (nullable = true)
+ // | | | | |-- subfield2: integer (nullable = true)
+ // |-- col6: map (nullable = true)
+ // | |-- key: string
+ // | |-- value: array (valueContainsNull = true)
+ // | | |-- element: struct (containsNull = false)
+ // | | | |-- field1: struct (nullable = true)
+ // | | | | |-- subfield1: integer (nullable = true)
+ // | | | | |-- subfield2: integer (nullable = true)
+ // |-- col7: array (nullable = true)
+ // | |-- element: struct (containsNull = true)
+ // | | |-- field1: integer (nullable = false)
+ // | | |-- field2: struct (nullable = true)
+ // | | | |-- subfield1: integer (nullable = false)
+ // | | |-- field3: array (nullable = true)
+ // | | | |-- element: struct (containsNull = true)
+ // | | | | |-- subfield1: integer (nullable = false)
+ // |-- col8: array (nullable = true)
+ // | |-- element: struct (containsNull = true)
+ // | | |-- field1: array (nullable = false)
+ // | | | |-- element: integer (containsNull = false)
+ private val schema =
+ StructType(
+ StructField("col1", StringType, nullable = false) ::
+ innerSchemaCol1 ::
+ StructField("col3", ArrayType(StructType(
+ StructField("field1", StructType(
+ StructField("subfield1", IntegerType, nullable = false) ::
+ StructField("subfield2", IntegerType) :: Nil)) ::
+ StructField("field2", MapType(StringType, IntegerType,
valueContainsNull = false))
+ :: Nil), containsNull = false), nullable = false) ::
+ StructField("col4", MapType(StringType, StructType(
+ StructField("field1", StructType(
+ StructField("subfield1", IntegerType, nullable = false) ::
+ StructField("subfield2", IntegerType) :: Nil)) ::
+ StructField("field2", MapType(StringType, IntegerType,
valueContainsNull = false))
+ :: Nil), valueContainsNull = false), nullable = false) ::
+ StructField("col5", ArrayType(MapType(StringType, StructType(
+ StructField("field1", StructType(
+ StructField("subfield1", IntegerType) ::
+ StructField("subfield2", IntegerType) :: Nil)) :: Nil),
valueContainsNull = false))) ::
+ StructField("col6", MapType(StringType, ArrayType(StructType(
+ StructField("field1", StructType(
+ StructField("subfield1", IntegerType) ::
+ StructField("subfield2", IntegerType) :: Nil)) :: Nil),
containsNull = false))) ::
+ StructField("col7", ArrayType(StructType(
+ StructField("field1", IntegerType, nullable = false) ::
+ StructField("field2", StructType(
+ StructField("subfield1", IntegerType, nullable = false) :: Nil))
::
+ StructField("field3", ArrayType(StructType(
+ StructField("subfield1", IntegerType, nullable = false) :: Nil)))
:: Nil))) ::
+ StructField("col8", ArrayType(StructType(
+ StructField("field1", ArrayType(IntegerType, containsNull = false),
nullable = false)
+ :: Nil))) :: Nil)
+
+ private val testRelation = LocalRelation(schema.toAttributes)
--- End diff --
Shall we have multiple relations with split schema for each? It wouldn't
necessarily have one big deep nested schema which makes hard to read.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]