This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 95cbd618572 [SPARK-40066][SQL] ANSI mode: always return null on invalid access to map column 95cbd618572 is described below commit 95cbd6185729b25fc898037e0dd5ceda6caf8759 Author: Gengliang Wang <gengli...@apache.org> AuthorDate: Mon Aug 15 16:19:38 2022 -0700 [SPARK-40066][SQL] ANSI mode: always return null on invalid access to map column ### What changes were proposed in this pull request? Change the syntax of map column access under ANSI mode: always return null results instead of throwing `MAP_KEY_DOES_NOT_EXIST` errors. This PR also remove an internal `spark.sql.ansi.strictIndexOperator`. ### Why are the changes needed? Since https://github.com/apache/spark/pull/30386, Spark always throws an error on invalid access to a map column. There is no such syntax in the ANSI SQL standard since there is no Map type in it. There is a similar type `multiset` which returns null on non-existing element access. Also, I investigated PostgreSQL/Snowflake/Biguqery and all of them returns null return on map(json) key not exists. I suggest loosen the the syntax here. When users get the error, most of them will just use `try_element_at()` to get the same syntax or just turn off the ANSI SQL mode. ### Does this PR introduce _any_ user-facing change? Yes, see above ### How was this patch tested? Unit tests Closes #37503 from gengliangwang/returnNullOnInvalidMapAccess. Authored-by: Gengliang Wang <gengli...@apache.org> Signed-off-by: Gengliang Wang <gengli...@apache.org> --- core/src/main/resources/error/error-classes.json | 5 -- docs/sql-migration-guide.md | 1 + docs/sql-ref-ansi-compliance.md | 4 +- .../sql/catalyst/analysis/CheckAnalysis.scala | 2 +- .../expressions/ProjectionOverSchema.scala | 4 +- .../sql/catalyst/expressions/SelectedField.scala | 2 +- .../expressions/collectionOperations.scala | 23 +++--- .../expressions/complexTypeExtractors.scala | 43 +++------- .../sql/catalyst/optimizer/ComplexTypes.scala | 2 +- .../spark/sql/errors/QueryExecutionErrors.scala | 13 --- .../org/apache/spark/sql/internal/SQLConf.scala | 17 ++-- .../expressions/CollectionExpressionsSuite.scala | 9 +-- .../catalyst/expressions/ComplexTypeSuite.scala | 12 +-- .../test/resources/sql-tests/inputs/ansi/array.sql | 18 +---- .../test/resources/sql-tests/inputs/ansi/map.sql | 8 -- .../resources/sql-tests/results/ansi/array.sql.out | 93 ---------------------- .../resources/sql-tests/results/ansi/map.sql.out | 56 +------------ .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 19 ----- .../sql/errors/QueryExecutionAnsiErrorsSuite.scala | 15 ---- .../execution/datasources/SchemaPruningSuite.scala | 2 +- .../sql/execution/datasources/json/JsonSuite.scala | 2 +- 21 files changed, 44 insertions(+), 306 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index ed6dd112e9f..c2c5f30564c 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -287,11 +287,6 @@ ], "sqlState" : "42000" }, - "MAP_KEY_DOES_NOT_EXIST" : { - "message" : [ - "Key <keyValue> does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set <config> to \"false\" to bypass this error." - ] - }, "MISSING_STATIC_PARTITION_COLUMN" : { "message" : [ "Unknown static partition column: <columnName>" diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 75f7f6c9f8c..42df05f7f70 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -26,6 +26,7 @@ license: | - Since Spark 3.4, Number or Number(\*) from Teradata will be treated as Decimal(38,18). In Spark 3.3 or earlier, Number or Number(\*) from Teradata will be treated as Decimal(38, 0), in which case the fractional part will be removed. - Since Spark 3.4, v1 database, table, permanent view and function identifier will include 'spark_catalog' as the catalog name if database is defined, e.g. a table identifier will be: `spark_catalog.default.t`. To restore the legacy behavior, set `spark.sql.legacy.v1IdentifierNoCatalog` to `true`. + - Since Spark 3.4, when ANSI SQL mode(configuration `spark.sql.ansi.enabled`) is on, Spark SQL always returns NULL result on getting a map value with a non-existing key. In Spark 3.3 or earlier, there will be an error. ## Upgrading from Spark SQL 3.2 to 3.3 diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 7a8f7dc2ecf..664aa6183da 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -302,7 +302,6 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql. - `size`: This function returns null for null input. - `element_at`: - This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. - - This function throws `NoSuchElementException` if key does not exist in map. - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. - `parse_url`: This function throws `IllegalArgumentException` if an input string is not a valid url. - `to_date`: This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid. @@ -318,7 +317,6 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql. The behavior of some SQL operators can be different under ANSI mode (`spark.sql.ansi.enabled=true`). - `array_col[index]`: This operator throws `ArrayIndexOutOfBoundsException` if using invalid indices. - - `map_col[key]`: This operator throws `NoSuchElementException` if key does not exist in map. ### Useful Functions for ANSI Mode @@ -330,7 +328,7 @@ When ANSI mode is on, it throws exceptions for invalid operations. You can use t - `try_divide`: identical to the division operator `/`, except that it returns `NULL` result instead of throwing an exception on dividing 0. - `try_sum`: identical to the function `sum`, except that it returns `NULL` result instead of throwing an exception on integral/decimal/interval value overflow. - `try_avg`: identical to the function `avg`, except that it returns `NULL` result instead of throwing an exception on decimal/interval value overflow. - - `try_element_at`: identical to the function `element_at`, except that it returns `NULL` result instead of throwing an exception on array's index out of bound or map's key not found. + - `try_element_at`: identical to the function `element_at`, except that it returns `NULL` result instead of throwing an exception on array's index out of bound. - `try_to_timestamp`: identical to the function `to_timestamp`, except that it returns `NULL` result instead of throwing an exception on string parsing error. ### SQL Keywords (optional, disabled by default) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 3f5b535b947..ec9d8aab3af 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -196,7 +196,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { // If an attribute can't be resolved as a map key of string type, either the key should be // surrounded with single quotes, or there is a typo in the attribute name. - case GetMapValue(map, key: Attribute, _) if isMapWithStringKey(map) && !key.resolved => + case GetMapValue(map, key: Attribute) if isMapWithStringKey(map) && !key.resolved => failUnresolvedAttribute(operator, key, "UNRESOLVED_MAP_KEY") } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala index 69d30dd5048..3192ccc655c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala @@ -65,8 +65,8 @@ case class ProjectionOverSchema(schema: StructType, output: AttributeSet) { getProjection(child).map { projection => MapKeys(projection) } case MapValues(child) => getProjection(child).map { projection => MapValues(projection) } - case GetMapValue(child, key, failOnError) => - getProjection(child).map { projection => GetMapValue(projection, key, failOnError) } + case GetMapValue(child, key) => + getProjection(child).map { projection => GetMapValue(projection, key) } case GetStructFieldObject(child, field: StructField) => getProjection(child).map(p => (p, p.dataType)).map { case (projection, projSchema: StructType) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala index bd7028b689b..25afc16a7fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala @@ -96,7 +96,7 @@ object SelectedField { } val newField = StructField(field.name, newFieldDataType, field.nullable) selectField(child, Option(ArrayType(struct(newField), containsNull))) - case GetMapValue(child, _, _) => + case GetMapValue(child, _) => // GetMapValue does not select a field from a struct (i.e. prune the struct) so it can't be // the top-level extractor. However it can be part of an extractor chain. val MapType(keyType, _, valueContainsNull) = child.dataType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index f40f5a98232..6ba94effb4e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -2084,9 +2084,8 @@ case class ArrayPosition(left: Expression, right: Expression) If `spark.sql.ansi.enabled` is set to true, it throws ArrayIndexOutOfBoundsException for invalid indices. - _FUNC_(map, key) - Returns value for given key. The function returns NULL - if the key is not contained in the map and `spark.sql.ansi.enabled` is set to false. - If `spark.sql.ansi.enabled` is set to true, it throws NoSuchElementException instead. + _FUNC_(map, key) - Returns value for given key. The function returns NULL if the key is not + contained in the map. """, examples = """ Examples: @@ -2103,7 +2102,7 @@ case class ElementAt( // The value to return if index is out of bound defaultValueOutOfBound: Option[Literal] = None, failOnError: Boolean = SQLConf.get.ansiEnabled) - extends GetMapValueUtil with GetArrayItemUtil with NullIntolerant { + extends GetMapValueUtil with GetArrayItemUtil with NullIntolerant with SupportQueryContext { def this(left: Expression, right: Expression) = this(left, right, None, SQLConf.get.ansiEnabled) @@ -2171,7 +2170,7 @@ case class ElementAt( override def nullable: Boolean = left.dataType match { case _: ArrayType => computeNullabilityFromArray(left, right, failOnError, nullability) - case _: MapType => if (failOnError) mapValueContainsNull else true + case _: MapType => true } override def nullSafeEval(value: Any, ordinal: Any): Any = doElementAt(value, ordinal) @@ -2207,7 +2206,7 @@ case class ElementAt( } } case _: MapType => - (value, ordinal) => getValueEval(value, ordinal, mapKeyType, ordering, failOnError) + (value, ordinal) => getValueEval(value, ordinal, mapKeyType, ordering) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -2263,7 +2262,7 @@ case class ElementAt( """.stripMargin }) case _: MapType => - doGetValueGenCode(ctx, ev, left.dataType.asInstanceOf[MapType], failOnError) + doGetValueGenCode(ctx, ev, left.dataType.asInstanceOf[MapType]) } } @@ -2272,10 +2271,12 @@ case class ElementAt( override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): ElementAt = copy(left = newLeft, right = newRight) - override def initQueryContext(): Option[SQLQueryContext] = if (failOnError) { - Some(origin.context) - } else { - None + override def initQueryContext(): Option[SQLQueryContext] = { + if (failOnError && left.dataType.isInstanceOf[ArrayType]) { + Some(origin.context) + } else { + None + } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index 7b99b9d1082..188baad6f11 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -236,7 +236,7 @@ case class GetArrayStructFields( case class GetArrayItem( child: Expression, ordinal: Expression, - failOnError: Boolean = SQLConf.get.strictIndexOperator) extends BinaryExpression + failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryExpression with GetArrayItemUtil with ExpectsInputTypes with ExtractValue @@ -354,16 +354,14 @@ trait GetArrayItemUtil { /** * Common trait for [[GetMapValue]] and [[ElementAt]]. */ -trait GetMapValueUtil - extends BinaryExpression with ImplicitCastInputTypes with SupportQueryContext { +trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes { // todo: current search is O(n), improve it. def getValueEval( value: Any, ordinal: Any, keyType: DataType, - ordering: Ordering[Any], - failOnError: Boolean): Any = { + ordering: Ordering[Any]): Any = { val map = value.asInstanceOf[MapData] val length = map.numElements() val keys = map.keyArray() @@ -379,13 +377,7 @@ trait GetMapValueUtil } } - if (!found) { - if (failOnError) { - throw QueryExecutionErrors.mapKeyNotExistError(ordinal, keyType, getContextOrNull()) - } else { - null - } - } else if (values.isNullAt(i)) { + if (!found || values.isNullAt(i)) { null } else { values.get(i, dataType) @@ -395,8 +387,7 @@ trait GetMapValueUtil def doGetValueGenCode( ctx: CodegenContext, ev: ExprCode, - mapType: MapType, - failOnError: Boolean): ExprCode = { + mapType: MapType): ExprCode = { val index = ctx.freshName("index") val length = ctx.freshName("length") val keys = ctx.freshName("keys") @@ -414,15 +405,8 @@ trait GetMapValueUtil } val keyJavaType = CodeGenerator.javaType(keyType) - lazy val errorContext = getContextOrNullCode(ctx) val keyDt = ctx.addReferenceObj("keyType", keyType, keyType.getClass.getName) nullSafeCodeGen(ctx, ev, (eval1, eval2) => { - val keyNotFoundBranch = if (failOnError) { - s"throw QueryExecutionErrors.mapKeyNotExistError($eval2, $keyDt, $errorContext);" - } else { - s"${ev.isNull} = true;" - } - s""" final int $length = $eval1.numElements(); final ArrayData $keys = $eval1.keyArray(); @@ -440,7 +424,7 @@ trait GetMapValueUtil } if (!$found) { - $keyNotFoundBranch + ${ev.isNull} = true; } $nullCheck else { ${ev.value} = ${CodeGenerator.getValue(values, dataType, index)}; } @@ -454,10 +438,7 @@ trait GetMapValueUtil * * We need to do type checking here as `key` expression maybe unresolved. */ -case class GetMapValue( - child: Expression, - key: Expression, - failOnError: Boolean = SQLConf.get.strictIndexOperator) +case class GetMapValue(child: Expression, key: Expression) extends GetMapValueUtil with ExtractValue { @transient private lazy val ordering: Ordering[Any] = @@ -494,20 +475,14 @@ case class GetMapValue( // todo: current search is O(n), improve it. override def nullSafeEval(value: Any, ordinal: Any): Any = { - getValueEval(value, ordinal, keyType, ordering, failOnError) + getValueEval(value, ordinal, keyType, ordering) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - doGetValueGenCode(ctx, ev, child.dataType.asInstanceOf[MapType], failOnError) + doGetValueGenCode(ctx, ev, child.dataType.asInstanceOf[MapType]) } override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): GetMapValue = copy(child = newLeft, key = newRight) - - override def initQueryContext(): Option[SQLQueryContext] = if (failOnError) { - Some(origin.context) - } else { - None - } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala index fff894be88d..5c1967c094f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala @@ -63,7 +63,7 @@ object SimplifyExtractValueOps extends Rule[LogicalPlan] { // out of bounds, mimic the runtime behavior and return null Literal(null, ga.dataType) } - case GetMapValue(CreateMap(elems, _), key, _) => CaseKeyWhen(key, elems) + case GetMapValue(CreateMap(elems, _), key) => CaseKeyWhen(key, elems) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 71f42cf4d03..e4481a4c783 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -241,19 +241,6 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { summary = getSummary(context)) } - def mapKeyNotExistError( - key: Any, - dataType: DataType, - context: SQLQueryContext): NoSuchElementException = { - new SparkNoSuchElementException( - errorClass = "MAP_KEY_DOES_NOT_EXIST", - messageParameters = Array( - toSQLValue(key, dataType), - toSQLConf(SQLConf.ANSI_ENABLED.key)), - context = getQueryContext(context), - summary = getSummary(context)) - } - def invalidFractionOfSecondError(): DateTimeException = { new SparkDateTimeException( errorClass = "INVALID_FRACTION_OF_SECOND", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 98e6d2a1360..3ce6ee47958 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2955,15 +2955,6 @@ object SQLConf { .booleanConf .createWithDefault(false) - val ANSI_STRICT_INDEX_OPERATOR = buildConf("spark.sql.ansi.strictIndexOperator") - .internal() - .doc(s"When true and '${ANSI_ENABLED.key}' is true, accessing complex SQL types via [] " + - "operator will throw an exception if array index is out of bound, or map key does not " + - "exist. Otherwise, Spark will return a null result when accessing an invalid index.") - .version("3.3.0") - .booleanConf - .createWithDefault(true) - val SORT_BEFORE_REPARTITION = buildConf("spark.sql.execution.sortBeforeRepartition") .internal() @@ -3982,7 +3973,11 @@ object SQLConf { RemovedConfig("spark.sql.optimizer.planChangeLog.rules", "3.1.0", "", s"Please use `${PLAN_CHANGE_LOG_RULES.key}` instead."), RemovedConfig("spark.sql.optimizer.planChangeLog.batches", "3.1.0", "", - s"Please use `${PLAN_CHANGE_LOG_BATCHES.key}` instead.") + s"Please use `${PLAN_CHANGE_LOG_BATCHES.key}` instead."), + RemovedConfig("spark.sql.ansi.strictIndexOperator", "3.4.0", "true", + "This was an internal configuration. It is not needed anymore since Spark SQL always " + + "returns null when getting a map value with a non-existing key. See SPARK-40066 " + + "for more details.") ) Map(configs.map { cfg => cfg.key -> cfg } : _*) @@ -4543,8 +4538,6 @@ class SQLConf extends Serializable with Logging { def enforceReservedKeywords: Boolean = ansiEnabled && getConf(ENFORCE_RESERVED_KEYWORDS) - def strictIndexOperator: Boolean = ansiEnabled && getConf(ANSI_STRICT_INDEX_OPERATOR) - def timestampType: AtomicType = getConf(TIMESTAMP_TYPE) match { case "TIMESTAMP_LTZ" => // For historical reason, the TimestampType maps to TIMESTAMP WITH LOCAL TIME ZONE diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 1e466469973..0bcec56e09a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -2383,17 +2383,12 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper } } - test("SPARK-33460: element_at NoSuchElementException") { + test("SPARK-40066: element_at returns null on invalid map value access") { Seq(true, false).foreach { ansiEnabled => withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) { val map = Literal.create(Map(1 -> "a", 2 -> "b"), MapType(IntegerType, StringType)) val expr: Expression = ElementAt(map, Literal(5)) - if (ansiEnabled) { - val errMsg = "Key 5 does not exist." - checkExceptionInExpression[Exception](expr, errMsg) - } else { - checkEvaluation(expr, null) - } + checkEvaluation(expr, null) } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index cdd11085edb..7b482dec7e2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -85,19 +85,11 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { } } - test("SPARK-33460: GetMapValue NoSuchElementException") { + test("SPARK-40066: GetMapValue returns null on invalid map value access") { Seq(true, false).foreach { ansiEnabled => withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) { val map = Literal.create(Map(1 -> "a", 2 -> "b"), MapType(IntegerType, StringType)) - - if (ansiEnabled) { - checkExceptionInExpression[Exception]( - GetMapValue(map, Literal(5)), - "Key 5 does not exist." - ) - } else { - checkEvaluation(GetMapValue(map, Literal(5)), null) - } + checkEvaluation(GetMapValue(map, Literal(5)), null) } } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql index 90f1b9a74b9..b04abe57cb8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql @@ -1,17 +1 @@ ---IMPORT array.sql - --- index out of range for array elements --- return null results if array index in [] operator is out of bound -set spark.sql.ansi.strictIndexOperator=false; -select array(1, 2, 3)[5]; -select array(1, 2, 3)[-1]; - --- the configuration spark.sql.ansi.strictIndexOperator doesn't affect the function element_at -select element_at(array(1, 2, 3), 5); -select element_at(array(1, 2, 3), -5); -select element_at(array(1, 2, 3), 0); - --- -- the configuration spark.sql.ansi.strictIndexOperator doesn't affect the function elt -select elt(4, '123', '456'); -select elt(0, '123', '456'); -select elt(-1, '123', '456'); +--IMPORT array.sql \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql index dc4614ec20d..23e5b956297 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql @@ -1,9 +1 @@ --IMPORT map.sql - --- key does not exist --- return null results if the map key in [] operator doesn't exist -set spark.sql.ansi.strictIndexOperator=false; -select map(1, 'a', 2, 'b')[5]; --- the configuration spark.sql.ansi.strictIndexOperator doesn't affect the function element_at -select element_at(map(1, 'a', 2, 'b'), 5); -select element_at(map('a', 1, 'b', 2), 'c'); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out index 2c5cea7bf85..d2438cbaab6 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out @@ -323,96 +323,3 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException cannot resolve 'array_size(map('a', 1, 'b', 2))' due to data type mismatch: argument 1 requires array type, however, 'map('a', 1, 'b', 2)' is of map<string,int> type.; line 1 pos 7 - - --- !query -set spark.sql.ansi.strictIndexOperator=false --- !query schema -struct<key:string,value:string> --- !query output -spark.sql.ansi.strictIndexOperator false - - --- !query -select array(1, 2, 3)[5] --- !query schema -struct<array(1, 2, 3)[5]:int> --- !query output -NULL - - --- !query -select array(1, 2, 3)[-1] --- !query schema -struct<array(1, 2, 3)[-1]:int> --- !query output -NULL - - --- !query -select element_at(array(1, 2, 3), 5) --- !query schema -struct<> --- !query output -org.apache.spark.SparkArrayIndexOutOfBoundsException -[INVALID_ARRAY_INDEX_IN_ELEMENT_AT] The index 5 is out of bounds. The array has 3 elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 8) == -select element_at(array(1, 2, 3), 5) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - --- !query -select element_at(array(1, 2, 3), -5) --- !query schema -struct<> --- !query output -org.apache.spark.SparkArrayIndexOutOfBoundsException -[INVALID_ARRAY_INDEX_IN_ELEMENT_AT] The index -5 is out of bounds. The array has 3 elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 8) == -select element_at(array(1, 2, 3), -5) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - --- !query -select element_at(array(1, 2, 3), 0) --- !query schema -struct<> --- !query output -org.apache.spark.SparkRuntimeException -[ELEMENT_AT_BY_INDEX_ZERO] The index 0 is invalid. An index shall be either < 0 or > 0 (the first element has index 1). - - --- !query -select elt(4, '123', '456') --- !query schema -struct<> --- !query output -org.apache.spark.SparkArrayIndexOutOfBoundsException -[INVALID_ARRAY_INDEX] The index 4 is out of bounds. The array has 2 elements. Use `try_element_at` and increase the array index by 1(the starting array index is 1 for `try_element_at`) to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 8) == -select elt(4, '123', '456') - ^^^^^^^^^^^^^^^^^^^^ - - --- !query -select elt(0, '123', '456') --- !query schema -struct<> --- !query output -org.apache.spark.SparkArrayIndexOutOfBoundsException -[INVALID_ARRAY_INDEX] The index 0 is out of bounds. The array has 2 elements. Use `try_element_at` and increase the array index by 1(the starting array index is 1 for `try_element_at`) to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 8) == -select elt(0, '123', '456') - ^^^^^^^^^^^^^^^^^^^^ - - --- !query -select elt(-1, '123', '456') --- !query schema -struct<> --- !query output -org.apache.spark.SparkArrayIndexOutOfBoundsException -[INVALID_ARRAY_INDEX] The index -1 is out of bounds. The array has 2 elements. Use `try_element_at` and increase the array index by 1(the starting array index is 1 for `try_element_at`) to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 8) == -select elt(-1, '123', '456') - ^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out index c9d4186a27b..cd7cf9a60ce 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out @@ -2,25 +2,17 @@ -- !query select element_at(map(1, 'a', 2, 'b'), 5) -- !query schema -struct<> +struct<element_at(map(1, a, 2, b), 5):string> -- !query output -org.apache.spark.SparkNoSuchElementException -[MAP_KEY_DOES_NOT_EXIST] Key 5 does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 8) == -select element_at(map(1, 'a', 2, 'b'), 5) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +NULL -- !query select map(1, 'a', 2, 'b')[5] -- !query schema -struct<> +struct<map(1, a, 2, b)[5]:string> -- !query output -org.apache.spark.SparkNoSuchElementException -[MAP_KEY_DOES_NOT_EXIST] Key 5 does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 8) == -select map(1, 'a', 2, 'b')[5] - ^^^^^^^^^^^^^^^^^^^^^^ +NULL -- !query @@ -87,43 +79,3 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException cannot resolve 'map_contains_key(map(1, 'a', 2, 'b'), '1')' due to data type mismatch: Input to function map_contains_key should have been map followed by a value with same key type, but it's [map<int,string>, string].; line 1 pos 7 - - --- !query -set spark.sql.ansi.strictIndexOperator=false --- !query schema -struct<key:string,value:string> --- !query output -spark.sql.ansi.strictIndexOperator false - - --- !query -select map(1, 'a', 2, 'b')[5] --- !query schema -struct<map(1, a, 2, b)[5]:string> --- !query output -NULL - - --- !query -select element_at(map(1, 'a', 2, 'b'), 5) --- !query schema -struct<> --- !query output -org.apache.spark.SparkNoSuchElementException -[MAP_KEY_DOES_NOT_EXIST] Key 5 does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 8) == -select element_at(map(1, 'a', 2, 'b'), 5) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - --- !query -select element_at(map('a', 1, 'b', 2), 'c') --- !query schema -struct<> --- !query output -org.apache.spark.SparkNoSuchElementException -[MAP_KEY_DOES_NOT_EXIST] Key 'c' does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 8) == -select element_at(map('a', 1, 'b', 2), 'c') - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 15a615ce6d4..cc7e51abc4e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4315,25 +4315,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } - test("SPARK-39177: Query context of getting map value should be serialized to executors" + - " when WSCG is off") { - withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", - SQLConf.ANSI_ENABLED.key -> "true") { - withTable("t") { - sql("create table t(m map<string, string>) using parquet") - sql("insert into t values map('a', 'b')") - Seq( - "select m['foo'] from t", - "select element_at(m, 'foo') from t").foreach { query => - val msg = intercept[SparkException] { - sql(query).collect() - }.getMessage - assert(msg.contains(query)) - } - } - } - } - test("SPARK-39190,SPARK-39208,SPARK-39210: Query context of decimal overflow error should " + "be serialized to executors when WSCG is off") { withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala index 02492d5619c..1ebca5f5f6a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala @@ -114,21 +114,6 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest with QueryErrorsSuiteBase stop = 41)) } - test("MAP_KEY_DOES_NOT_EXIST: key does not exist in element_at") { - checkError( - exception = intercept[SparkNoSuchElementException] { - sql("select element_at(map(1, 'a', 2, 'b'), 3)").collect() - }, - errorClass = "MAP_KEY_DOES_NOT_EXIST", - parameters = Map( - "keyValue" -> "3", - "config" -> ansiConf), - context = ExpectedContext( - fragment = "element_at(map(1, 'a', 2, 'b'), 3", - start = 7, - stop = 40)) - } - test("CAST_INVALID_INPUT: cast string to double") { checkError( exception = intercept[SparkNumberFormatException] { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala index 1ff34f87122..72bd56e57ec 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala @@ -59,7 +59,7 @@ abstract class SchemaPruningSuite employer: Employer) override protected def sparkConf: SparkConf = - super.sparkConf.set(SQLConf.ANSI_STRICT_INDEX_OPERATOR.key, "false") + super.sparkConf.set(SQLConf.ANSI_ENABLED.key, "false") case class Employee(id: Int, name: FullName, employer: Company) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 1ecaf748f5d..02225d40c83 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -60,7 +60,7 @@ abstract class JsonSuite override protected def dataSourceFormat = "json" override protected def sparkConf: SparkConf = - super.sparkConf.set(SQLConf.ANSI_STRICT_INDEX_OPERATOR.key, "false") + super.sparkConf.set(SQLConf.ANSI_ENABLED.key, "false") test("Type promotion") { def checkTypePromotion(expected: Any, actual: Any): Unit = { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org