[spark] branch master updated: [SPARK-39877][PYTHON][FOLLOW-UP] Add DataFrame melt to PySpark docs
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new b0c831d3408 [SPARK-39877][PYTHON][FOLLOW-UP] Add DataFrame melt to PySpark docs b0c831d3408 is described below commit b0c831d3408dddfbbf3acacbe8100a9e08b400de Author: Enrico Minack AuthorDate: Tue Aug 2 12:24:02 2022 +0900 [SPARK-39877][PYTHON][FOLLOW-UP] Add DataFrame melt to PySpark docs ### What changes were proposed in this pull request? Same as #37354, but DataFrame.melt is missing from documentation. Also removes erroneous alias from DataFrame.unpivot doc. ### Why are the changes needed? Documenting new method. ### Does this PR introduce _any_ user-facing change? Only documentation. ### How was this patch tested? No. Closes #37356 from EnricoMi/branch-pyspark-unpivot-docs-2. Authored-by: Enrico Minack Signed-off-by: Hyukjin Kwon --- python/docs/source/reference/pyspark.sql/dataframe.rst | 1 + python/pyspark/sql/dataframe.py| 6 -- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/python/docs/source/reference/pyspark.sql/dataframe.rst b/python/docs/source/reference/pyspark.sql/dataframe.rst index 41a325a1198..fdb79f72fc7 100644 --- a/python/docs/source/reference/pyspark.sql/dataframe.rst +++ b/python/docs/source/reference/pyspark.sql/dataframe.rst @@ -73,6 +73,7 @@ DataFrame DataFrame.localCheckpoint DataFrame.mapInPandas DataFrame.mapInArrow +DataFrame.melt DataFrame.na DataFrame.observe DataFrame.orderBy diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 8c9632fe766..41ac701a332 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -2263,8 +2263,6 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): `IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType` do not have a common data type and `unpivot` fails. -:func:`groupby` is an alias for :func:`groupBy`. - .. versionadded:: 3.4.0 Parameters @@ -2309,6 +2307,10 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): | 2| int|12.0| | 2|double| 1.2| +---+--++ + +See Also + +DataFrame.melt """ def to_jcols( - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-38864][SQL][FOLLOW-UP] Make AnalysisException message deterministic
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 7a9d040814a [SPARK-38864][SQL][FOLLOW-UP] Make AnalysisException message deterministic 7a9d040814a is described below commit 7a9d040814aec5a13967ae14bc1ae54bd0fa355c Author: Enrico Minack AuthorDate: Tue Aug 2 12:22:44 2022 +0900 [SPARK-38864][SQL][FOLLOW-UP] Make AnalysisException message deterministic ### What changes were proposed in this pull request? Turns out the AnalysisException message is sensitive to the way Scala maps data types as used to generate the error message. This fix makes the error message deterministic. https://github.com/apache/spark/pull/36150#discussion_r933962854 ### Why are the changes needed? This fixes tests. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Test should not fail for Scala 2.13 anymore. Closes #37351 from EnricoMi/branch-fix-unpivot-test-asserts. Authored-by: Enrico Minack Signed-off-by: Hyukjin Kwon --- .../apache/spark/sql/errors/QueryCompilationErrors.scala | 3 ++- .../scala/org/apache/spark/sql/DatasetUnpivotSuite.scala | 16 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 430fceb76b5..36bbe167a9b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -101,8 +101,9 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { def unpivotValDataTypeMismatchError(values: Seq[NamedExpression]): Throwable = { val dataTypes = values .groupBy(_.dataType) - .mapValues(values => values.map(value => toSQLId(value.toString))) + .mapValues(values => values.map(value => toSQLId(value.toString)).sorted) .mapValues(values => if (values.length > 3) values.take(3) :+ "..." else values) + .toList.sortBy(_._1.sql) .map { case (dataType, values) => s"${toSQLType(dataType)} (${values.mkString(", ")})" } new AnalysisException( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala index b860f950325..b81383149a5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala @@ -310,9 +310,9 @@ class DatasetUnpivotSuite extends QueryTest errorClass = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH", parameters = Map( "types" -> - (STRING" \(`str1#\d+`\), """ + + (BIGINT" \(`long1#\d+L`, `long2#\d+L`\), """ + INT" \(`int1#\d+`, `int2#\d+`, `int3#\d+`, ...\), """ + - BIGINT" \(`long1#\d+L`, `long2#\d+L`\)""")), + STRING" \(`str1#\d+`\)""")), matchPVals = true) } @@ -396,9 +396,9 @@ class DatasetUnpivotSuite extends QueryTest exception = e3, errorClass = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH", parameters = Map("types" -> -(INT" \(`id#\d+`, `int1#\d+`\), """ + - STRING" \(`str1#\d+`, `str2#\d+`\), """ + - BIGINT" \(`long1#\d+L`\)""")), +(BIGINT" \(`long1#\d+L`\), """ + + INT" \(`id#\d+`, `int1#\d+`\), """ + + STRING" \(`str1#\d+`, `str2#\d+`\)""")), matchPVals = true) // unpivoting with star id columns so that no value columns are left @@ -429,9 +429,9 @@ class DatasetUnpivotSuite extends QueryTest exception = e5, errorClass = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH", parameters = Map("types" -> -(INT" \(`id#\d+`, `int1#\d+`\), """ + - STRING" \(`str1#\d+`, `str2#\d+`\), """ + - BIGINT" \(`long1#\d+L`\)""")), +(BIGINT" \(`long1#\d+L`\), """ + + INT" \(`id#\d+`, `int1#\d+`\), """ + + STRING" \(`str1#\d+`, `str2#\d+`\)""")), matchPVals = true) // unpivoting without giving values and no non-id columns - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (957eb7a7273 -> d2a18417d90)
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 957eb7a7273 [SPARK-39933][SQL][TESTS] Check query context by `checkError()` add d2a18417d90 [SPARK-39873][SQL] Remove `OptimizeLimitZero` and merge it into `EliminateLimits` No new revisions were added by this update. Summary of changes: .../spark/sql/catalyst/optimizer/Optimizer.scala | 46 -- .../catalyst/optimizer/CombiningLimitsSuite.scala | 2 +- .../optimizer/OptimizeLimitZeroSuite.scala | 2 +- 3 files changed, 10 insertions(+), 40 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (a27edf9ad41 -> 957eb7a7273)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from a27edf9ad41 [SPARK-39902][SQL] Add Scan details to spark plan scan node in SparkUI add 957eb7a7273 [SPARK-39933][SQL][TESTS] Check query context by `checkError()` No new revisions were added by this update. Summary of changes: .../scala/org/apache/spark/SparkFunSuite.scala | 27 +++- .../spark/sql/errors/QueryErrorsSuiteBase.scala| 15 - .../sql/errors/QueryExecutionAnsiErrorsSuite.scala | 37 -- 3 files changed, 67 insertions(+), 12 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39902][SQL] Add Scan details to spark plan scan node in SparkUI
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new a27edf9ad41 [SPARK-39902][SQL] Add Scan details to spark plan scan node in SparkUI a27edf9ad41 is described below commit a27edf9ad4104f7df30dbbf77ec06fcf3cf9feda Author: Sumeet Gajjar AuthorDate: Mon Aug 1 18:41:03 2022 -0700 [SPARK-39902][SQL] Add Scan details to spark plan scan node in SparkUI ### What changes were proposed in this pull request? In this PR, we propose to add a method "String name()" to the Scan interface, that "BatchScanExec" can invoke to set the node name of the plan. This nodeName will be eventually used by "SparkPlanGraphNode" to display it in the header of the UI node. ### Why are the changes needed? Since for DSv2, the scan node in the spark plan on SparkUI simply shows "BatchScan" instead of an informative name. ### Does this PR introduce _any_ user-facing change? Yes, after this change the user will be able to see the scan name in the spark plan on SparkUI. ### How was this patch tested? - Tested this change using existing UTs - Further tested this change by overriding the newly added method in Iceberg's implementation of the Scan interface. Closes #37325 from sumeetgajjar/v2_scan_ui_improvement. Authored-by: Sumeet Gajjar Signed-off-by: Dongjoon Hyun --- .../src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala | 2 +- .../main/java/org/apache/spark/sql/connector/read/Scan.java | 12 .../spark/sql/execution/datasources/v2/BatchScanExec.scala | 6 ++ .../apache/spark/sql/execution/datasources/v2/FileScan.scala | 4 .../src/test/scala/org/apache/spark/sql/ExplainSuite.scala | 2 +- .../sql/execution/DataSourceScanExecRedactionSuite.scala | 8 6 files changed, 28 insertions(+), 6 deletions(-) diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index d75e6906719..8a088a43579 100644 --- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -2408,7 +2408,7 @@ class AvroV2Suite extends AvroSuite with ExplainSuiteHelper { val basePath = dir.getCanonicalPath + "/avro" val expected_plan_fragment = s""" - |\\(1\\) BatchScan + |\\(1\\) BatchScan avro |Output \\[2\\]: \\[value#xL, id#x\\] |DataFilters: \\[isnotnull\\(value#xL\\), \\(value#xL > 2\\)\\] |Format: avro diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java index d161de92eb8..941a11b8b1d 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java @@ -60,6 +60,18 @@ public interface Scan { return this.getClass().toString(); } + /** + * The name of the scan, which will be shown in the header of a spark plan scan node on SparkUI. + * E.g. "scan parquet sample_db.sample_table" + * + * By default this returns the simple class name of the implementation. Please override it to + * provide a meaningful name. + * + */ + default String name() { +return this.getClass().getSimpleName(); + } + /** * Returns the physical representation of this scan for batch query. By default this method throws * exception, data sources must overwrite this method to provide an implementation, if the diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala index f1c43b8f60c..8da1123c9fe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala @@ -131,4 +131,10 @@ case class BatchScanExec( val result = s"$nodeName$truncatedOutputString ${scan.description()} $runtimeFiltersString" redact(result) } + + /** + * Returns the name of this type of TreeNode. Defaults to the class name. + * Note that we remove the "Exec" suffix for physical operators here. + */ + override def nodeName: String = s"BatchScan ${scan.name()}" } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala index 21503fda53e..a7b6afc7f4e 100644 ---
[spark] branch master updated: Revert "[SPARK-39917][SQL] Use different error classes for numeric/interval arithmetic overflow"
This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 939c2402c81 Revert "[SPARK-39917][SQL] Use different error classes for numeric/interval arithmetic overflow" 939c2402c81 is described below commit 939c2402c81ad98c5ab65b285ddbcc8825ecffeb Author: Gengliang Wang AuthorDate: Mon Aug 1 14:04:17 2022 -0700 Revert "[SPARK-39917][SQL] Use different error classes for numeric/interval arithmetic overflow" This reverts commit 1b6f14ff8c324454b0a44b2439aa42441af2dd81. --- core/src/main/resources/error/error-classes.json | 8 +--- .../sql/catalyst/expressions/arithmetic.scala | 20 +- .../catalyst/expressions/intervalExpressions.scala | 3 +- .../sql/catalyst/util/IntervalMathUtils.scala | 46 -- .../spark/sql/errors/QueryExecutionErrors.scala| 14 --- .../sql-tests/results/ansi/interval.sql.out| 14 +++ .../resources/sql-tests/results/interval.sql.out | 14 +++ .../sql-tests/results/postgreSQL/int4.sql.out | 12 +++--- .../sql-tests/results/postgreSQL/int8.sql.out | 8 ++-- .../results/postgreSQL/window_part2.sql.out| 4 +- .../apache/spark/sql/DataFrameAggregateSuite.scala | 8 ++-- 11 files changed, 41 insertions(+), 110 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index ed6dd112e9f..c4b59799f88 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -7,7 +7,7 @@ }, "ARITHMETIC_OVERFLOW" : { "message" : [ - ". If necessary set to \"false\" to bypass this error." + ". If necessary set to \"false\" (except for ANSI interval type) to bypass this error." ], "sqlState" : "22003" }, @@ -210,12 +210,6 @@ "" ] }, - "INTERVAL_ARITHMETIC_OVERFLOW" : { -"message" : [ - "." -], -"sqlState" : "22003" - }, "INTERVAL_DIVIDED_BY_ZERO" : { "message" : [ "Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 24ac685eace..86e6e6d7323 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.trees.SQLQueryContext import org.apache.spark.sql.catalyst.trees.TreePattern.{BINARY_ARITHMETIC, TreePattern, UNARY_POSITIVE} -import org.apache.spark.sql.catalyst.util.{IntervalMathUtils, IntervalUtils, MathUtils, TypeUtils} +import org.apache.spark.sql.catalyst.util.{IntervalUtils, MathUtils, TypeUtils} import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -89,7 +89,7 @@ case class UnaryMinus( defineCodeGen(ctx, ev, c => s"$iu.$method($c)") case _: AnsiIntervalType => nullSafeCodeGen(ctx, ev, eval => { -val mathUtils = IntervalMathUtils.getClass.getCanonicalName.stripSuffix("$") +val mathUtils = MathUtils.getClass.getCanonicalName.stripSuffix("$") s"${ev.value} = $mathUtils.negateExact($eval);" }) } @@ -98,8 +98,8 @@ case class UnaryMinus( case CalendarIntervalType if failOnError => IntervalUtils.negateExact(input.asInstanceOf[CalendarInterval]) case CalendarIntervalType => IntervalUtils.negate(input.asInstanceOf[CalendarInterval]) -case _: DayTimeIntervalType => IntervalMathUtils.negateExact(input.asInstanceOf[Long]) -case _: YearMonthIntervalType => IntervalMathUtils.negateExact(input.asInstanceOf[Int]) +case _: DayTimeIntervalType => MathUtils.negateExact(input.asInstanceOf[Long]) +case _: YearMonthIntervalType => MathUtils.negateExact(input.asInstanceOf[Int]) case _ => numeric.negate(input) } @@ -278,8 +278,6 @@ abstract class BinaryArithmetic extends BinaryOperator throw QueryExecutionErrors.notOverrideExpectedMethodsError("BinaryArithmetics", "calendarIntervalMethod", "genCode") - protected def isAnsiInterval: Boolean = dataType.isInstanceOf[AnsiIntervalType] - // Name of the function for the exact version of this expression in [[Math]]. // If the option "spark.sql.ansi.enabled" is enabled and there is corresponding // function in [[Math]], the exact function will be called instead of evaluation with [[symbol]]. @@ -307,7 +305,7 @@ abstract
[spark] branch master updated: [SPARK-39914][SQL] Add DS V2 Filter to V1 Filter conversion
This is an automated email from the ASF dual-hosted git repository. huaxingao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 2ef738205c0 [SPARK-39914][SQL] Add DS V2 Filter to V1 Filter conversion 2ef738205c0 is described below commit 2ef738205c0d4598a577a248afc117ac0844f3ad Author: huaxingao AuthorDate: Mon Aug 1 11:23:13 2022 -0700 [SPARK-39914][SQL] Add DS V2 Filter to V1 Filter conversion ### What changes were proposed in this pull request? Add util methods to convert DS V2 Filter to V1 Filter. ### Why are the changes needed? Provide convenient methods to convert V2 to V1 Filters. These methods can be used by [`SupportsRuntimeFiltering`](https://github.com/apache/spark/pull/36918/files#diff-0d3268f351817ca948e75e7b6641e5cc67c4d773c3234920a7aa62faf11f6c8e) and later be used by `SupportsDelete` ### Does this PR introduce _any_ user-facing change? No. These are intended for internal use only ### How was this patch tested? new tests Closes #37332 from huaxingao/toV1. Authored-by: huaxingao Signed-off-by: huaxingao --- .../sql/internal/connector/PredicateUtils.scala| 92 +- .../datasources/v2/V2PredicateSuite.scala | 85 2 files changed, 174 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/PredicateUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/PredicateUtils.scala index ace6b30d4cc..263edd82197 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/PredicateUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/PredicateUtils.scala @@ -19,14 +19,25 @@ package org.apache.spark.sql.internal.connector import org.apache.spark.sql.catalyst.CatalystTypeConverters import org.apache.spark.sql.connector.expressions.{LiteralValue, NamedReference} -import org.apache.spark.sql.connector.expressions.filter.Predicate -import org.apache.spark.sql.sources.{Filter, In} +import org.apache.spark.sql.connector.expressions.filter.{And => V2And, Not => V2Not, Or => V2Or, Predicate} +import org.apache.spark.sql.sources.{AlwaysFalse, AlwaysTrue, And, EqualNullSafe, EqualTo, Filter, GreaterThan, GreaterThanOrEqual, In, IsNotNull, IsNull, LessThan, LessThanOrEqual, Not, Or, StringContains, StringEndsWith, StringStartsWith} +import org.apache.spark.sql.types.StringType private[sql] object PredicateUtils { def toV1(predicate: Predicate): Option[Filter] = { + +def isValidBinaryPredicate(): Boolean = { + if (predicate.children().length == 2 && +predicate.children()(0).isInstanceOf[NamedReference] && +predicate.children()(1).isInstanceOf[LiteralValue[_]]) { +true + } else { +false + } +} + predicate.name() match { - // TODO: add conversion for other V2 Predicate case "IN" if predicate.children()(0).isInstanceOf[NamedReference] => val attribute = predicate.children()(0).toString val values = predicate.children().drop(1) @@ -43,6 +54,81 @@ private[sql] object PredicateUtils { Some(In(attribute, Array.empty[Any])) } + case "=" | "<=>" | ">" | "<" | ">=" | "<=" if isValidBinaryPredicate => +val attribute = predicate.children()(0).toString +val value = predicate.children()(1).asInstanceOf[LiteralValue[_]] +val v1Value = CatalystTypeConverters.convertToScala(value.value, value.dataType) +val v1Filter = predicate.name() match { + case "=" => EqualTo(attribute, v1Value) + case "<=>" => EqualNullSafe(attribute, v1Value) + case ">" => GreaterThan(attribute, v1Value) + case ">=" => GreaterThanOrEqual(attribute, v1Value) + case "<" => LessThan(attribute, v1Value) + case "<=" => LessThanOrEqual(attribute, v1Value) +} +Some(v1Filter) + + case "IS_NULL" | "IS_NOT_NULL" if predicate.children().length == 1 && + predicate.children()(0).isInstanceOf[NamedReference] => +val attribute = predicate.children()(0).toString +val v1Filter = predicate.name() match { + case "IS_NULL" => IsNull(attribute) + case "IS_NOT_NULL" => IsNotNull(attribute) +} +Some(v1Filter) + + case "STARTS_WITH" | "ENDS_WITH" | "CONTAINS" if isValidBinaryPredicate => +val attribute = predicate.children()(0).toString +val value = predicate.children()(1).asInstanceOf[LiteralValue[_]] +if (!value.dataType.sameType(StringType)) return None +val v1Value = value.value.toString +val v1Filter = predicate.name() match { + case "STARTS_WITH" => +StringStartsWith(attribute, v1Value) + case "ENDS_WITH" => +
[spark] branch master updated (e78585b2fbb -> 1b6f14ff8c3)
This is an automated email from the ASF dual-hosted git repository. gengliang pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from e78585b2fbb [SPARK-39848][BUILD] Upgrade Kafka to 3.2.1 add 1b6f14ff8c3 [SPARK-39917][SQL] Use different error classes for numeric/interval arithmetic overflow No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 8 - .../sql/catalyst/expressions/arithmetic.scala | 20 +++-- .../catalyst/expressions/intervalExpressions.scala | 3 +- .../sql/catalyst/util/IntervalMathUtils.scala} | 35 +++--- .../spark/sql/errors/QueryExecutionErrors.scala| 14 + .../sql-tests/results/ansi/interval.sql.out| 14 - .../resources/sql-tests/results/interval.sql.out | 14 - .../sql-tests/results/postgreSQL/int4.sql.out | 12 .../sql-tests/results/postgreSQL/int8.sql.out | 8 ++--- .../results/postgreSQL/window_part2.sql.out| 4 +-- .../apache/spark/sql/DataFrameAggregateSuite.scala | 8 ++--- 11 files changed, 82 insertions(+), 58 deletions(-) copy sql/{core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableExec.scala => catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalMathUtils.scala} (51%) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39848][BUILD] Upgrade Kafka to 3.2.1
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new e78585b2fbb [SPARK-39848][BUILD] Upgrade Kafka to 3.2.1 e78585b2fbb is described below commit e78585b2fbb219a7bd70bd3710ca3b52f2306623 Author: Dongjoon Hyun AuthorDate: Mon Aug 1 09:17:03 2022 -0700 [SPARK-39848][BUILD] Upgrade Kafka to 3.2.1 ### What changes were proposed in this pull request? This PR aims to upgrade Apache Kafka to 3.2.1. ### Why are the changes needed? Apache Kafka 3.2.1 is released. - https://lists.apache.org/thread/b6nonzos2qjhc9tpolld9qxrcxqcg011 Apache Kafka 3.2.1 has the following patches. - https://home.apache.org/~davidarthur/kafka-3.2.1-rc3/RELEASE_NOTES.html [KAFKA-14013](https://issues.apache.org/jira/browse/KAFKA-14013) Limit the length of the `reason` field sent on the wire Bug [KAFKA-13474](https://issues.apache.org/jira/browse/KAFKA-13474) Regression in dynamic update of broker certificate [KAFKA-13572](https://issues.apache.org/jira/browse/KAFKA-13572) Negative value for 'Preferred Replica Imbalance' metric [KAFKA-13773](https://issues.apache.org/jira/browse/KAFKA-13773) Data loss after recovery from crash due to full hard disk [KAFKA-13861](https://issues.apache.org/jira/browse/KAFKA-13861) validateOnly request field does not work for CreatePartition requests in Kraft mode. [KAFKA-13899](https://issues.apache.org/jira/browse/KAFKA-13899) Inconsistent error codes returned from AlterConfig APIs [KAFKA-13998](https://issues.apache.org/jira/browse/KAFKA-13998) JoinGroupRequestData 'reason' can be too large [KAFKA-14010](https://issues.apache.org/jira/browse/KAFKA-14010) alterISR request won't retry when receiving retriable error [KAFKA-14024](https://issues.apache.org/jira/browse/KAFKA-14024) Consumer stuck during cooperative rebalance for Commit offset in onJoinPrepare [KAFKA-14035](https://issues.apache.org/jira/browse/KAFKA-14035) QuorumController handleRenounce throws NPE [KAFKA-14055](https://issues.apache.org/jira/browse/KAFKA-14055) Transaction markers may be lost during cleaning if data keys conflict with marker keys [KAFKA-14062](https://issues.apache.org/jira/browse/KAFKA-14062) OAuth client token refresh fails with SASL extensions [KAFKA-14079](https://issues.apache.org/jira/browse/KAFKA-14079) Source task will not commit offsets and develops memory leak if "error.tolerance" is set to "all" ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs Closes #37261 from dongjoon-hyun/SPARK-39848. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 5e29ed57925..acebec1e2ec 100644 --- a/pom.xml +++ b/pom.xml @@ -128,7 +128,7 @@ 2.3 -3.2.0 +3.2.1 10.14.2.0 1.12.3 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39923][SQL] Multiple query contexts in Spark exceptions
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 9ee2c753b98 [SPARK-39923][SQL] Multiple query contexts in Spark exceptions 9ee2c753b98 is described below commit 9ee2c753b98b290fab9b2ec1f02d90c7c9441271 Author: Max Gekk AuthorDate: Mon Aug 1 13:40:22 2022 +0500 [SPARK-39923][SQL] Multiple query contexts in Spark exceptions ### What changes were proposed in this pull request? 1. Replace `Option[QueryContext]` by `Array[QueryContext]` in Spark exceptions like in `SparkRuntimeException`. 2. Pass `SQLQueryContext` to `QueryExecutionErrors` functions instead of `Option[SQLQueryContext]`. 3. Add the methods `getContextOrNull()` and `getContextOrNullCode()` to `SupportQueryContext` to get a SQL query context or `null` (if it is missed) of an expression. ### Why are the changes needed? 1. The changes will allow to chain multiple error contexts in Spark's exception. For instance, if user's query refers a view v1, v1 refers another view v2, and v2 does a division. The error contexts will be: sql fragment of v2 that does division -> sql fragment of v1 that refers v2 -> sql fragment of your query that refers v1. 2. Passing `SQLQueryContext` to `QueryExecutionErrors` directly simplifies codegen code because it allows to avoid construction of Scala objects like `scala.None`. ### Does this PR introduce _any_ user-facing change? Yes, this PR changes user-facing exceptions. ### How was this patch tested? By running the modified test suites: ``` $ build/sbt "test:testOnly *DecimalExpressionSuite" ``` and potentially affected tests: ``` $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" ``` Closes #37343 from MaxGekk/array-as-query-context. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../scala/org/apache/spark/SparkException.scala| 28 - .../spark/sql/catalyst/expressions/Cast.scala | 67 +++--- .../sql/catalyst/expressions/Expression.scala | 10 .../catalyst/expressions/aggregate/Average.scala | 6 +- .../sql/catalyst/expressions/aggregate/Sum.scala | 23 .../sql/catalyst/expressions/arithmetic.scala | 48 +--- .../expressions/collectionOperations.scala | 4 +- .../expressions/complexTypeExtractors.scala| 8 +-- .../catalyst/expressions/decimalExpressions.scala | 32 --- .../catalyst/expressions/intervalExpressions.scala | 16 +++--- .../sql/catalyst/expressions/mathExpressions.scala | 2 +- .../catalyst/expressions/stringExpressions.scala | 5 +- .../spark/sql/catalyst/util/DateTimeUtils.scala| 10 ++-- .../spark/sql/catalyst/util/IntervalUtils.scala| 2 +- .../apache/spark/sql/catalyst/util/MathUtils.scala | 14 ++--- .../spark/sql/catalyst/util/UTF8StringUtils.scala | 10 ++-- .../apache/spark/sql/errors/QueryErrorsBase.scala | 9 ++- .../spark/sql/errors/QueryExecutionErrors.scala| 54 - .../scala/org/apache/spark/sql/types/Decimal.scala | 4 +- .../expressions/DecimalExpressionSuite.scala | 2 +- 20 files changed, 182 insertions(+), 172 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala index d6add48ffb1..6548a114d41 100644 --- a/core/src/main/scala/org/apache/spark/SparkException.scala +++ b/core/src/main/scala/org/apache/spark/SparkException.scala @@ -119,7 +119,7 @@ private[spark] class SparkArithmeticException( errorClass: String, errorSubClass: Option[String] = None, messageParameters: Array[String], -context: Option[QueryContext], +context: Array[QueryContext], summary: String) extends ArithmeticException( SparkThrowableHelper.getMessage(errorClass, errorSubClass.orNull, messageParameters, summary)) @@ -128,7 +128,7 @@ private[spark] class SparkArithmeticException( override def getMessageParameters: Array[String] = messageParameters override def getErrorClass: String = errorClass override def getErrorSubClass: String = errorSubClass.orNull - override def getQueryContext: Array[QueryContext] = context.toArray + override def getQueryContext: Array[QueryContext] = context } /** @@ -195,7 +195,7 @@ private[spark] class SparkDateTimeException( errorClass: String, errorSubClass: Option[String] = None, messageParameters: Array[String], -context: Option[QueryContext], +context: Array[QueryContext], summary: String) extends DateTimeException( SparkThrowableHelper.getMessage(errorClass, errorSubClass.orNull, messageParameters, summary)) @@ -204,7 +204,7 @@ private[spark] class SparkDateTimeException( override def getMessageParameters:
[spark] branch master updated (8c6c7ae3744 -> 5f7f8163097)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 8c6c7ae3744 [SPARK-39881][PYTHON] Fix erroneous check for black and reenable black validation add 5f7f8163097 [SPARK-39877][PYTHON][FOLLOW-UP] Add DataFrame unpivot to PySpark docs No new revisions were added by this update. Summary of changes: python/docs/source/reference/pyspark.sql/dataframe.rst | 1 + 1 file changed, 1 insertion(+) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (004430054c2 -> 8c6c7ae3744)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 004430054c2 [SPARK-39503][SQL][FOLLOWUP] InMemoryCatalog should keep the catalog field when renaming tables add 8c6c7ae3744 [SPARK-39881][PYTHON] Fix erroneous check for black and reenable black validation No new revisions were added by this update. Summary of changes: .github/workflows/build_and_test.yml | 2 +- dev/lint-python| 4 +-- dev/pyproject.toml | 2 +- dev/reformat-python| 9 ++--- dev/requirements.txt | 2 +- python/pyspark/context.py | 5 +-- python/pyspark/ml/feature.py | 6 ++-- python/pyspark/pandas/data_type_ops/boolean_ops.py | 6 ++-- python/pyspark/pandas/frame.py | 4 +-- python/pyspark/pandas/series.py| 4 +-- .../pandas/tests/data_type_ops/test_binary_ops.py | 6 ++-- .../pandas/tests/data_type_ops/test_boolean_ops.py | 36 +-- .../tests/data_type_ops/test_categorical_ops.py| 6 ++-- .../pandas/tests/data_type_ops/test_complex_ops.py | 4 +-- .../pandas/tests/data_type_ops/test_date_ops.py| 10 +++--- .../tests/data_type_ops/test_datetime_ops.py | 10 +++--- .../pandas/tests/data_type_ops/test_null_ops.py| 6 ++-- .../pandas/tests/data_type_ops/test_num_ops.py | 14 .../tests/data_type_ops/test_timedelta_ops.py | 6 ++-- .../pandas/tests/data_type_ops/test_udt_ops.py | 6 ++-- .../pandas/tests/test_ops_on_diff_frames.py| 4 +-- python/pyspark/pandas/tests/test_series.py | 10 +++--- python/pyspark/rdd.py | 2 +- python/pyspark/sql/tests/test_arrow.py | 2 +- python/pyspark/sql/tests/test_column.py| 2 +- python/pyspark/sql/tests/test_context.py | 2 +- python/pyspark/sql/tests/test_pandas_udf_scalar.py | 4 +-- python/pyspark/sql/tests/test_types.py | 40 +++--- python/pyspark/sql/tests/test_udf.py | 4 +-- 29 files changed, 108 insertions(+), 110 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39503][SQL][FOLLOWUP] InMemoryCatalog should keep the catalog field when renaming tables
This is an automated email from the ASF dual-hosted git repository. yumwang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 004430054c2 [SPARK-39503][SQL][FOLLOWUP] InMemoryCatalog should keep the catalog field when renaming tables 004430054c2 is described below commit 004430054c2a1c1599f9451e6c77b64d02de4171 Author: Wenchen Fan AuthorDate: Mon Aug 1 15:26:44 2022 +0800 [SPARK-39503][SQL][FOLLOWUP] InMemoryCatalog should keep the catalog field when renaming tables ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/37021 . The `renameTable` method should keep the catalog name in `TableIdentifier`. This is necessary as methods like `getTablesByName` won't qualifier the table identifiers again. This PR also cleans up `InMemoryCatalog` a bit. The caller side `SessionCatalog` will create tables/functions using qualified identifiers with catalog name, and we don't need to attach catalog name again in places like `getTable`. We just need to make sure we don't drop the catalog field during table updating. ### Why are the changes needed? make sure the v1 identifiers are always qualified with catalog name. ### Does this PR introduce _any_ user-facing change? No. `InMemoryCatalog` is test only ### How was this patch tested? N/A Closes #37347 from cloud-fan/follow. Lead-authored-by: Wenchen Fan Co-authored-by: Wenchen Fan Signed-off-by: Yuming Wang --- .../sql/catalyst/catalog/InMemoryCatalog.scala | 13 +--- .../catalyst/catalog/ExternalCatalogSuite.scala| 23 -- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala index 4fe56440c11..218a342e669 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala @@ -25,8 +25,6 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.spark.SparkConf -import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} -import org.apache.spark.sql.catalyst.CatalystIdentifier._ import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._ import org.apache.spark.sql.catalyst.expressions.Expression @@ -282,7 +280,7 @@ class InMemoryCatalog( requireTableExists(db, oldName) requireTableNotExists(db, newName) val oldDesc = catalog(db).tables(oldName) -oldDesc.table = oldDesc.table.copy(identifier = TableIdentifier(newName, Some(db))) +oldDesc.table = oldDesc.table.copy(identifier = oldDesc.table.identifier.copy(table = newName)) if (oldDesc.table.tableType == CatalogTableType.MANAGED) { assert(oldDesc.table.storage.locationUri.isDefined, @@ -344,8 +342,7 @@ class InMemoryCatalog( override def getTable(db: String, table: String): CatalogTable = synchronized { requireTableExists(db, table) -val catalogTable = catalog(db).tables(table).table -catalogTable.copy(identifier = attachSessionCatalog(catalogTable.identifier)) +catalog(db).tables(table).table } override def getTablesByName(db: String, tables: Seq[String]): Seq[CatalogTable] = { @@ -634,15 +631,15 @@ class InMemoryCatalog( newName: String): Unit = synchronized { requireFunctionExists(db, oldName) requireFunctionNotExists(db, newName) -val newFunc = getFunction(db, oldName).copy(identifier = FunctionIdentifier(newName, Some(db))) +val oldFunc = getFunction(db, oldName) +val newFunc = oldFunc.copy(identifier = oldFunc.identifier.copy(funcName = newName)) catalog(db).functions.remove(oldName) catalog(db).functions.put(newName, newFunc) } override def getFunction(db: String, funcName: String): CatalogFunction = synchronized { requireFunctionExists(db, funcName) -val catalogFunction = catalog(db).functions(funcName) -catalogFunction.copy(identifier = attachSessionCatalog(catalogFunction.identifier)) +catalog(db).functions(funcName) } override def functionExists(db: String, funcName: String): Boolean = synchronized { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala index bf9bf38b07e..1b0a154a3f4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala +++