[spark] branch master updated: [SPARK-39877][PYTHON][FOLLOW-UP] Add DataFrame melt to PySpark docs

2022-08-01 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new b0c831d3408 [SPARK-39877][PYTHON][FOLLOW-UP] Add DataFrame melt to 
PySpark docs
b0c831d3408 is described below

commit b0c831d3408dddfbbf3acacbe8100a9e08b400de
Author: Enrico Minack 
AuthorDate: Tue Aug 2 12:24:02 2022 +0900

[SPARK-39877][PYTHON][FOLLOW-UP] Add DataFrame melt to PySpark docs

### What changes were proposed in this pull request?
Same as #37354, but DataFrame.melt is missing from documentation.

Also removes erroneous alias from DataFrame.unpivot doc.

### Why are the changes needed?
Documenting new method.

### Does this PR introduce _any_ user-facing change?
Only documentation.

### How was this patch tested?
No.

Closes #37356 from EnricoMi/branch-pyspark-unpivot-docs-2.

Authored-by: Enrico Minack 
Signed-off-by: Hyukjin Kwon 
---
 python/docs/source/reference/pyspark.sql/dataframe.rst | 1 +
 python/pyspark/sql/dataframe.py| 6 --
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/python/docs/source/reference/pyspark.sql/dataframe.rst 
b/python/docs/source/reference/pyspark.sql/dataframe.rst
index 41a325a1198..fdb79f72fc7 100644
--- a/python/docs/source/reference/pyspark.sql/dataframe.rst
+++ b/python/docs/source/reference/pyspark.sql/dataframe.rst
@@ -73,6 +73,7 @@ DataFrame
 DataFrame.localCheckpoint
 DataFrame.mapInPandas
 DataFrame.mapInArrow
+DataFrame.melt
 DataFrame.na
 DataFrame.observe
 DataFrame.orderBy
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 8c9632fe766..41ac701a332 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -2263,8 +2263,6 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
 `IntegerType` and `LongType` are cast to `LongType`, while 
`IntegerType` and `StringType`
 do not have a common data type and `unpivot` fails.
 
-:func:`groupby` is an alias for :func:`groupBy`.
-
 .. versionadded:: 3.4.0
 
 Parameters
@@ -2309,6 +2307,10 @@ class DataFrame(PandasMapOpsMixin, 
PandasConversionMixin):
 |  2|   int|12.0|
 |  2|double| 1.2|
 +---+--++
+
+See Also
+
+DataFrame.melt
 """
 
 def to_jcols(


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-38864][SQL][FOLLOW-UP] Make AnalysisException message deterministic

2022-08-01 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 7a9d040814a [SPARK-38864][SQL][FOLLOW-UP] Make AnalysisException 
message deterministic
7a9d040814a is described below

commit 7a9d040814aec5a13967ae14bc1ae54bd0fa355c
Author: Enrico Minack 
AuthorDate: Tue Aug 2 12:22:44 2022 +0900

[SPARK-38864][SQL][FOLLOW-UP] Make AnalysisException message deterministic

### What changes were proposed in this pull request?
Turns out the AnalysisException message is sensitive to the way Scala maps 
data types as used to generate the error message. This fix makes the error 
message deterministic.

https://github.com/apache/spark/pull/36150#discussion_r933962854

### Why are the changes needed?
This fixes tests.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Test should not fail for Scala 2.13 anymore.

Closes #37351 from EnricoMi/branch-fix-unpivot-test-asserts.

Authored-by: Enrico Minack 
Signed-off-by: Hyukjin Kwon 
---
 .../apache/spark/sql/errors/QueryCompilationErrors.scala |  3 ++-
 .../scala/org/apache/spark/sql/DatasetUnpivotSuite.scala | 16 
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 430fceb76b5..36bbe167a9b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -101,8 +101,9 @@ private[sql] object QueryCompilationErrors extends 
QueryErrorsBase {
   def unpivotValDataTypeMismatchError(values: Seq[NamedExpression]): Throwable 
= {
 val dataTypes = values
   .groupBy(_.dataType)
-  .mapValues(values => values.map(value => toSQLId(value.toString)))
+  .mapValues(values => values.map(value => toSQLId(value.toString)).sorted)
   .mapValues(values => if (values.length > 3) values.take(3) :+ "..." else 
values)
+  .toList.sortBy(_._1.sql)
   .map { case (dataType, values) => s"${toSQLType(dataType)} 
(${values.mkString(", ")})" }
 
 new AnalysisException(
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala
index b860f950325..b81383149a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala
@@ -310,9 +310,9 @@ class DatasetUnpivotSuite extends QueryTest
   errorClass = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH",
   parameters = Map(
 "types" ->
-  (STRING" \(`str1#\d+`\), """ +
+  (BIGINT" \(`long1#\d+L`, `long2#\d+L`\), """ +
INT" \(`int1#\d+`, `int2#\d+`, `int3#\d+`, ...\), """ +
-   BIGINT" \(`long1#\d+L`, `long2#\d+L`\)""")),
+   STRING" \(`str1#\d+`\)""")),
   matchPVals = true)
   }
 
@@ -396,9 +396,9 @@ class DatasetUnpivotSuite extends QueryTest
   exception = e3,
   errorClass = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH",
   parameters = Map("types" ->
-(INT" \(`id#\d+`, `int1#\d+`\), """ +
- STRING" \(`str1#\d+`, `str2#\d+`\), """ +
- BIGINT" \(`long1#\d+L`\)""")),
+(BIGINT" \(`long1#\d+L`\), """ +
+ INT" \(`id#\d+`, `int1#\d+`\), """ +
+ STRING" \(`str1#\d+`, `str2#\d+`\)""")),
   matchPVals = true)
 
 // unpivoting with star id columns so that no value columns are left
@@ -429,9 +429,9 @@ class DatasetUnpivotSuite extends QueryTest
   exception = e5,
   errorClass = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH",
   parameters = Map("types" ->
-(INT" \(`id#\d+`, `int1#\d+`\), """ +
- STRING" \(`str1#\d+`, `str2#\d+`\), """ +
- BIGINT" \(`long1#\d+L`\)""")),
+(BIGINT" \(`long1#\d+L`\), """ +
+ INT" \(`id#\d+`, `int1#\d+`\), """ +
+ STRING" \(`str1#\d+`, `str2#\d+`\)""")),
   matchPVals = true)
 
 // unpivoting without giving values and no non-id columns


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated (957eb7a7273 -> d2a18417d90)

2022-08-01 Thread wenchen
This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from 957eb7a7273 [SPARK-39933][SQL][TESTS] Check query context by 
`checkError()`
 add d2a18417d90 [SPARK-39873][SQL] Remove `OptimizeLimitZero` and merge it 
into `EliminateLimits`

No new revisions were added by this update.

Summary of changes:
 .../spark/sql/catalyst/optimizer/Optimizer.scala   | 46 --
 .../catalyst/optimizer/CombiningLimitsSuite.scala  |  2 +-
 .../optimizer/OptimizeLimitZeroSuite.scala |  2 +-
 3 files changed, 10 insertions(+), 40 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated (a27edf9ad41 -> 957eb7a7273)

2022-08-01 Thread maxgekk
This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from a27edf9ad41 [SPARK-39902][SQL] Add Scan details to spark plan scan 
node in SparkUI
 add 957eb7a7273 [SPARK-39933][SQL][TESTS] Check query context by 
`checkError()`

No new revisions were added by this update.

Summary of changes:
 .../scala/org/apache/spark/SparkFunSuite.scala | 27 +++-
 .../spark/sql/errors/QueryErrorsSuiteBase.scala| 15 -
 .../sql/errors/QueryExecutionAnsiErrorsSuite.scala | 37 --
 3 files changed, 67 insertions(+), 12 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-39902][SQL] Add Scan details to spark plan scan node in SparkUI

2022-08-01 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new a27edf9ad41 [SPARK-39902][SQL] Add Scan details to spark plan scan 
node in SparkUI
a27edf9ad41 is described below

commit a27edf9ad4104f7df30dbbf77ec06fcf3cf9feda
Author: Sumeet Gajjar 
AuthorDate: Mon Aug 1 18:41:03 2022 -0700

[SPARK-39902][SQL] Add Scan details to spark plan scan node in SparkUI

### What changes were proposed in this pull request?

In this PR, we propose to add a method "String name()" to the Scan 
interface, that "BatchScanExec" can invoke to set the node name of the plan. 
This nodeName will be eventually used by "SparkPlanGraphNode" to display it in 
the header of the UI node.

### Why are the changes needed?

Since for DSv2, the scan node in the spark plan on SparkUI simply shows 
"BatchScan" instead of an informative name.

### Does this PR introduce _any_ user-facing change?

Yes, after this change the user will be able to see the scan name in the 
spark plan on SparkUI.

### How was this patch tested?

- Tested this change using existing UTs
- Further tested this change by overriding the newly added method in 
Iceberg's implementation of the Scan interface.

Closes #37325 from sumeetgajjar/v2_scan_ui_improvement.

Authored-by: Sumeet Gajjar 
Signed-off-by: Dongjoon Hyun 
---
 .../src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala |  2 +-
 .../main/java/org/apache/spark/sql/connector/read/Scan.java  | 12 
 .../spark/sql/execution/datasources/v2/BatchScanExec.scala   |  6 ++
 .../apache/spark/sql/execution/datasources/v2/FileScan.scala |  4 
 .../src/test/scala/org/apache/spark/sql/ExplainSuite.scala   |  2 +-
 .../sql/execution/DataSourceScanExecRedactionSuite.scala |  8 
 6 files changed, 28 insertions(+), 6 deletions(-)

diff --git 
a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala 
b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index d75e6906719..8a088a43579 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -2408,7 +2408,7 @@ class AvroV2Suite extends AvroSuite with 
ExplainSuiteHelper {
   val basePath = dir.getCanonicalPath + "/avro"
   val expected_plan_fragment =
 s"""
-   |\\(1\\) BatchScan
+   |\\(1\\) BatchScan avro
|Output \\[2\\]: \\[value#xL, id#x\\]
|DataFilters: \\[isnotnull\\(value#xL\\), \\(value#xL > 2\\)\\]
|Format: avro
diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java 
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
index d161de92eb8..941a11b8b1d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
@@ -60,6 +60,18 @@ public interface Scan {
 return this.getClass().toString();
   }
 
+  /**
+   * The name of the scan, which will be shown in the header of a spark plan 
scan node on SparkUI.
+   * E.g. "scan parquet sample_db.sample_table"
+   * 
+   * By default this returns the simple class name of the implementation. 
Please override it to
+   * provide a meaningful name.
+   * 
+   */
+  default String name() {
+return this.getClass().getSimpleName();
+  }
+
   /**
* Returns the physical representation of this scan for batch query. By 
default this method throws
* exception, data sources must overwrite this method to provide an 
implementation, if the
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
index f1c43b8f60c..8da1123c9fe 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
@@ -131,4 +131,10 @@ case class BatchScanExec(
 val result = s"$nodeName$truncatedOutputString ${scan.description()} 
$runtimeFiltersString"
 redact(result)
   }
+
+  /**
+   * Returns the name of this type of TreeNode.  Defaults to the class name.
+   * Note that we remove the "Exec" suffix for physical operators here.
+   */
+  override def nodeName: String = s"BatchScan ${scan.name()}"
 }
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index 21503fda53e..a7b6afc7f4e 100644
--- 

[spark] branch master updated: Revert "[SPARK-39917][SQL] Use different error classes for numeric/interval arithmetic overflow"

2022-08-01 Thread gengliang
This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 939c2402c81 Revert "[SPARK-39917][SQL] Use different error classes for 
numeric/interval arithmetic overflow"
939c2402c81 is described below

commit 939c2402c81ad98c5ab65b285ddbcc8825ecffeb
Author: Gengliang Wang 
AuthorDate: Mon Aug 1 14:04:17 2022 -0700

Revert "[SPARK-39917][SQL] Use different error classes for numeric/interval 
arithmetic overflow"

This reverts commit 1b6f14ff8c324454b0a44b2439aa42441af2dd81.
---
 core/src/main/resources/error/error-classes.json   |  8 +---
 .../sql/catalyst/expressions/arithmetic.scala  | 20 +-
 .../catalyst/expressions/intervalExpressions.scala |  3 +-
 .../sql/catalyst/util/IntervalMathUtils.scala  | 46 --
 .../spark/sql/errors/QueryExecutionErrors.scala| 14 ---
 .../sql-tests/results/ansi/interval.sql.out| 14 +++
 .../resources/sql-tests/results/interval.sql.out   | 14 +++
 .../sql-tests/results/postgreSQL/int4.sql.out  | 12 +++---
 .../sql-tests/results/postgreSQL/int8.sql.out  |  8 ++--
 .../results/postgreSQL/window_part2.sql.out|  4 +-
 .../apache/spark/sql/DataFrameAggregateSuite.scala |  8 ++--
 11 files changed, 41 insertions(+), 110 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index ed6dd112e9f..c4b59799f88 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -7,7 +7,7 @@
   },
   "ARITHMETIC_OVERFLOW" : {
 "message" : [
-  ". If necessary set  to \"false\" to 
bypass this error."
+  ". If necessary set  to \"false\" (except 
for ANSI interval type) to bypass this error."
 ],
 "sqlState" : "22003"
   },
@@ -210,12 +210,6 @@
   ""
 ]
   },
-  "INTERVAL_ARITHMETIC_OVERFLOW" : {
-"message" : [
-  "."
-],
-"sqlState" : "22003"
-  },
   "INTERVAL_DIVIDED_BY_ZERO" : {
 "message" : [
   "Division by zero. Use `try_divide` to tolerate divisor being 0 and 
return NULL instead."
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index 24ac685eace..86e6e6d7323 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.SQLQueryContext
 import org.apache.spark.sql.catalyst.trees.TreePattern.{BINARY_ARITHMETIC, 
TreePattern, UNARY_POSITIVE}
-import org.apache.spark.sql.catalyst.util.{IntervalMathUtils, IntervalUtils, 
MathUtils, TypeUtils}
+import org.apache.spark.sql.catalyst.util.{IntervalUtils, MathUtils, TypeUtils}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -89,7 +89,7 @@ case class UnaryMinus(
   defineCodeGen(ctx, ev, c => s"$iu.$method($c)")
 case _: AnsiIntervalType =>
   nullSafeCodeGen(ctx, ev, eval => {
-val mathUtils = 
IntervalMathUtils.getClass.getCanonicalName.stripSuffix("$")
+val mathUtils = MathUtils.getClass.getCanonicalName.stripSuffix("$")
 s"${ev.value} = $mathUtils.negateExact($eval);"
   })
   }
@@ -98,8 +98,8 @@ case class UnaryMinus(
 case CalendarIntervalType if failOnError =>
   IntervalUtils.negateExact(input.asInstanceOf[CalendarInterval])
 case CalendarIntervalType => 
IntervalUtils.negate(input.asInstanceOf[CalendarInterval])
-case _: DayTimeIntervalType => 
IntervalMathUtils.negateExact(input.asInstanceOf[Long])
-case _: YearMonthIntervalType => 
IntervalMathUtils.negateExact(input.asInstanceOf[Int])
+case _: DayTimeIntervalType => 
MathUtils.negateExact(input.asInstanceOf[Long])
+case _: YearMonthIntervalType => 
MathUtils.negateExact(input.asInstanceOf[Int])
 case _ => numeric.negate(input)
   }
 
@@ -278,8 +278,6 @@ abstract class BinaryArithmetic extends BinaryOperator
 throw 
QueryExecutionErrors.notOverrideExpectedMethodsError("BinaryArithmetics",
   "calendarIntervalMethod", "genCode")
 
-  protected def isAnsiInterval: Boolean = 
dataType.isInstanceOf[AnsiIntervalType]
-
   // Name of the function for the exact version of this expression in [[Math]].
   // If the option "spark.sql.ansi.enabled" is enabled and there is 
corresponding
   // function in [[Math]], the exact function will be called instead of 
evaluation with [[symbol]].
@@ -307,7 +305,7 @@ abstract 

[spark] branch master updated: [SPARK-39914][SQL] Add DS V2 Filter to V1 Filter conversion

2022-08-01 Thread huaxingao
This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 2ef738205c0 [SPARK-39914][SQL] Add DS V2 Filter to V1 Filter conversion
2ef738205c0 is described below

commit 2ef738205c0d4598a577a248afc117ac0844f3ad
Author: huaxingao 
AuthorDate: Mon Aug 1 11:23:13 2022 -0700

[SPARK-39914][SQL] Add DS V2 Filter to V1 Filter conversion

### What changes were proposed in this pull request?
Add util methods to convert DS V2 Filter to V1 Filter.

### Why are the changes needed?
Provide convenient methods to convert V2 to V1 Filters. These methods can 
be used by 
[`SupportsRuntimeFiltering`](https://github.com/apache/spark/pull/36918/files#diff-0d3268f351817ca948e75e7b6641e5cc67c4d773c3234920a7aa62faf11f6c8e)
 and later be used by `SupportsDelete`

### Does this PR introduce _any_ user-facing change?
No. These are intended for internal use only

### How was this patch tested?
new tests

Closes #37332 from huaxingao/toV1.

Authored-by: huaxingao 
Signed-off-by: huaxingao 
---
 .../sql/internal/connector/PredicateUtils.scala| 92 +-
 .../datasources/v2/V2PredicateSuite.scala  | 85 
 2 files changed, 174 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/PredicateUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/PredicateUtils.scala
index ace6b30d4cc..263edd82197 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/PredicateUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/PredicateUtils.scala
@@ -19,14 +19,25 @@ package org.apache.spark.sql.internal.connector
 
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.connector.expressions.{LiteralValue, 
NamedReference}
-import org.apache.spark.sql.connector.expressions.filter.Predicate
-import org.apache.spark.sql.sources.{Filter, In}
+import org.apache.spark.sql.connector.expressions.filter.{And => V2And, Not => 
V2Not, Or => V2Or, Predicate}
+import org.apache.spark.sql.sources.{AlwaysFalse, AlwaysTrue, And, 
EqualNullSafe, EqualTo, Filter, GreaterThan, GreaterThanOrEqual, In, IsNotNull, 
IsNull, LessThan, LessThanOrEqual, Not, Or, StringContains, StringEndsWith, 
StringStartsWith}
+import org.apache.spark.sql.types.StringType
 
 private[sql] object PredicateUtils {
 
   def toV1(predicate: Predicate): Option[Filter] = {
+
+def isValidBinaryPredicate(): Boolean = {
+  if (predicate.children().length == 2 &&
+predicate.children()(0).isInstanceOf[NamedReference] &&
+predicate.children()(1).isInstanceOf[LiteralValue[_]]) {
+true
+  } else {
+false
+  }
+}
+
 predicate.name() match {
-  // TODO: add conversion for other V2 Predicate
   case "IN" if predicate.children()(0).isInstanceOf[NamedReference] =>
 val attribute = predicate.children()(0).toString
 val values = predicate.children().drop(1)
@@ -43,6 +54,81 @@ private[sql] object PredicateUtils {
   Some(In(attribute, Array.empty[Any]))
 }
 
+  case "=" | "<=>" | ">" | "<" | ">=" | "<=" if isValidBinaryPredicate =>
+val attribute = predicate.children()(0).toString
+val value = predicate.children()(1).asInstanceOf[LiteralValue[_]]
+val v1Value = CatalystTypeConverters.convertToScala(value.value, 
value.dataType)
+val v1Filter = predicate.name() match {
+  case "=" => EqualTo(attribute, v1Value)
+  case "<=>" => EqualNullSafe(attribute, v1Value)
+  case ">" => GreaterThan(attribute, v1Value)
+  case ">=" => GreaterThanOrEqual(attribute, v1Value)
+  case "<" => LessThan(attribute, v1Value)
+  case "<=" => LessThanOrEqual(attribute, v1Value)
+}
+Some(v1Filter)
+
+  case "IS_NULL" | "IS_NOT_NULL" if predicate.children().length == 1 &&
+  predicate.children()(0).isInstanceOf[NamedReference] =>
+val attribute = predicate.children()(0).toString
+val v1Filter = predicate.name() match {
+  case "IS_NULL" => IsNull(attribute)
+  case "IS_NOT_NULL" => IsNotNull(attribute)
+}
+Some(v1Filter)
+
+  case "STARTS_WITH" | "ENDS_WITH" | "CONTAINS" if isValidBinaryPredicate 
=>
+val attribute = predicate.children()(0).toString
+val value = predicate.children()(1).asInstanceOf[LiteralValue[_]]
+if (!value.dataType.sameType(StringType)) return None
+val v1Value = value.value.toString
+val v1Filter = predicate.name() match {
+  case "STARTS_WITH" =>
+StringStartsWith(attribute, v1Value)
+  case "ENDS_WITH" =>
+  

[spark] branch master updated (e78585b2fbb -> 1b6f14ff8c3)

2022-08-01 Thread gengliang
This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from e78585b2fbb [SPARK-39848][BUILD] Upgrade Kafka to 3.2.1
 add 1b6f14ff8c3 [SPARK-39917][SQL] Use different error classes for 
numeric/interval arithmetic overflow

No new revisions were added by this update.

Summary of changes:
 core/src/main/resources/error/error-classes.json   |  8 -
 .../sql/catalyst/expressions/arithmetic.scala  | 20 +++--
 .../catalyst/expressions/intervalExpressions.scala |  3 +-
 .../sql/catalyst/util/IntervalMathUtils.scala} | 35 +++---
 .../spark/sql/errors/QueryExecutionErrors.scala| 14 +
 .../sql-tests/results/ansi/interval.sql.out| 14 -
 .../resources/sql-tests/results/interval.sql.out   | 14 -
 .../sql-tests/results/postgreSQL/int4.sql.out  | 12 
 .../sql-tests/results/postgreSQL/int8.sql.out  |  8 ++---
 .../results/postgreSQL/window_part2.sql.out|  4 +--
 .../apache/spark/sql/DataFrameAggregateSuite.scala |  8 ++---
 11 files changed, 82 insertions(+), 58 deletions(-)
 copy 
sql/{core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableExec.scala
 => 
catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalMathUtils.scala}
 (51%)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-39848][BUILD] Upgrade Kafka to 3.2.1

2022-08-01 Thread dongjoon
This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new e78585b2fbb [SPARK-39848][BUILD] Upgrade Kafka to 3.2.1
e78585b2fbb is described below

commit e78585b2fbb219a7bd70bd3710ca3b52f2306623
Author: Dongjoon Hyun 
AuthorDate: Mon Aug 1 09:17:03 2022 -0700

[SPARK-39848][BUILD] Upgrade Kafka to 3.2.1

### What changes were proposed in this pull request?

This PR aims to upgrade Apache Kafka to 3.2.1.

### Why are the changes needed?

Apache Kafka 3.2.1 is released.
- https://lists.apache.org/thread/b6nonzos2qjhc9tpolld9qxrcxqcg011

Apache Kafka 3.2.1 has the following patches.
- https://home.apache.org/~davidarthur/kafka-3.2.1-rc3/RELEASE_NOTES.html

[KAFKA-14013](https://issues.apache.org/jira/browse/KAFKA-14013) Limit the 
length of the `reason` field sent on the wire Bug
[KAFKA-13474](https://issues.apache.org/jira/browse/KAFKA-13474) Regression 
in dynamic update of broker certificate
[KAFKA-13572](https://issues.apache.org/jira/browse/KAFKA-13572) Negative 
value for 'Preferred Replica Imbalance' metric
[KAFKA-13773](https://issues.apache.org/jira/browse/KAFKA-13773) Data loss 
after recovery from crash due to full hard disk
[KAFKA-13861](https://issues.apache.org/jira/browse/KAFKA-13861) 
validateOnly request field does not work for CreatePartition requests in Kraft 
mode.
[KAFKA-13899](https://issues.apache.org/jira/browse/KAFKA-13899) 
Inconsistent error codes returned from AlterConfig APIs
[KAFKA-13998](https://issues.apache.org/jira/browse/KAFKA-13998) 
JoinGroupRequestData 'reason' can be too large
[KAFKA-14010](https://issues.apache.org/jira/browse/KAFKA-14010) alterISR 
request won't retry when receiving retriable error
[KAFKA-14024](https://issues.apache.org/jira/browse/KAFKA-14024) Consumer 
stuck during cooperative rebalance for Commit offset in onJoinPrepare
[KAFKA-14035](https://issues.apache.org/jira/browse/KAFKA-14035) 
QuorumController handleRenounce throws NPE
[KAFKA-14055](https://issues.apache.org/jira/browse/KAFKA-14055) 
Transaction markers may be lost during cleaning if data keys conflict with 
marker keys
[KAFKA-14062](https://issues.apache.org/jira/browse/KAFKA-14062) OAuth 
client token refresh fails with SASL extensions
[KAFKA-14079](https://issues.apache.org/jira/browse/KAFKA-14079) Source 
task will not commit offsets and develops memory leak if "error.tolerance" is 
set to "all"

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs

Closes #37261 from dongjoon-hyun/SPARK-39848.

Authored-by: Dongjoon Hyun 
Signed-off-by: Dongjoon Hyun 
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 5e29ed57925..acebec1e2ec 100644
--- a/pom.xml
+++ b/pom.xml
@@ -128,7 +128,7 @@
 
 2.3
 
-3.2.0
+3.2.1
 
 10.14.2.0
 1.12.3


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-39923][SQL] Multiple query contexts in Spark exceptions

2022-08-01 Thread maxgekk
This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 9ee2c753b98 [SPARK-39923][SQL] Multiple query contexts in Spark 
exceptions
9ee2c753b98 is described below

commit 9ee2c753b98b290fab9b2ec1f02d90c7c9441271
Author: Max Gekk 
AuthorDate: Mon Aug 1 13:40:22 2022 +0500

[SPARK-39923][SQL] Multiple query contexts in Spark exceptions

### What changes were proposed in this pull request?
1. Replace `Option[QueryContext]` by `Array[QueryContext]` in Spark 
exceptions like in `SparkRuntimeException`.
2. Pass `SQLQueryContext` to `QueryExecutionErrors` functions instead of 
`Option[SQLQueryContext]`.
3. Add the methods `getContextOrNull()` and `getContextOrNullCode()` to 
`SupportQueryContext` to get a SQL query context or `null` (if it is missed) of 
an expression.

### Why are the changes needed?
1. The changes will allow to chain multiple error contexts in Spark's 
exception. For instance, if user's query refers a view v1, v1 refers another 
view v2, and v2 does a division. The error contexts will be: sql fragment of v2 
that does division -> sql fragment of v1 that refers v2 -> sql fragment of your 
query that refers v1.
2. Passing `SQLQueryContext` to `QueryExecutionErrors` directly simplifies 
codegen code because it allows to avoid construction of Scala objects like 
`scala.None`.

### Does this PR introduce _any_ user-facing change?
Yes, this PR changes user-facing exceptions.

### How was this patch tested?
By running the modified test suites:
```
$ build/sbt "test:testOnly *DecimalExpressionSuite"
```
and potentially affected tests:
```
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite"
```

Closes #37343 from MaxGekk/array-as-query-context.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
---
 .../scala/org/apache/spark/SparkException.scala| 28 -
 .../spark/sql/catalyst/expressions/Cast.scala  | 67 +++---
 .../sql/catalyst/expressions/Expression.scala  | 10 
 .../catalyst/expressions/aggregate/Average.scala   |  6 +-
 .../sql/catalyst/expressions/aggregate/Sum.scala   | 23 
 .../sql/catalyst/expressions/arithmetic.scala  | 48 +---
 .../expressions/collectionOperations.scala |  4 +-
 .../expressions/complexTypeExtractors.scala|  8 +--
 .../catalyst/expressions/decimalExpressions.scala  | 32 ---
 .../catalyst/expressions/intervalExpressions.scala | 16 +++---
 .../sql/catalyst/expressions/mathExpressions.scala |  2 +-
 .../catalyst/expressions/stringExpressions.scala   |  5 +-
 .../spark/sql/catalyst/util/DateTimeUtils.scala| 10 ++--
 .../spark/sql/catalyst/util/IntervalUtils.scala|  2 +-
 .../apache/spark/sql/catalyst/util/MathUtils.scala | 14 ++---
 .../spark/sql/catalyst/util/UTF8StringUtils.scala  | 10 ++--
 .../apache/spark/sql/errors/QueryErrorsBase.scala  |  9 ++-
 .../spark/sql/errors/QueryExecutionErrors.scala| 54 -
 .../scala/org/apache/spark/sql/types/Decimal.scala |  4 +-
 .../expressions/DecimalExpressionSuite.scala   |  2 +-
 20 files changed, 182 insertions(+), 172 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala 
b/core/src/main/scala/org/apache/spark/SparkException.scala
index d6add48ffb1..6548a114d41 100644
--- a/core/src/main/scala/org/apache/spark/SparkException.scala
+++ b/core/src/main/scala/org/apache/spark/SparkException.scala
@@ -119,7 +119,7 @@ private[spark] class SparkArithmeticException(
 errorClass: String,
 errorSubClass: Option[String] = None,
 messageParameters: Array[String],
-context: Option[QueryContext],
+context: Array[QueryContext],
 summary: String)
   extends ArithmeticException(
 SparkThrowableHelper.getMessage(errorClass, errorSubClass.orNull, 
messageParameters, summary))
@@ -128,7 +128,7 @@ private[spark] class SparkArithmeticException(
   override def getMessageParameters: Array[String] = messageParameters
   override def getErrorClass: String = errorClass
   override def getErrorSubClass: String = errorSubClass.orNull
-  override def getQueryContext: Array[QueryContext] = context.toArray
+  override def getQueryContext: Array[QueryContext] = context
 }
 
 /**
@@ -195,7 +195,7 @@ private[spark] class SparkDateTimeException(
 errorClass: String,
 errorSubClass: Option[String] = None,
 messageParameters: Array[String],
-context: Option[QueryContext],
+context: Array[QueryContext],
 summary: String)
   extends DateTimeException(
 SparkThrowableHelper.getMessage(errorClass, errorSubClass.orNull, 
messageParameters, summary))
@@ -204,7 +204,7 @@ private[spark] class SparkDateTimeException(
   override def getMessageParameters: 

[spark] branch master updated (8c6c7ae3744 -> 5f7f8163097)

2022-08-01 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from 8c6c7ae3744 [SPARK-39881][PYTHON] Fix erroneous check for black and 
reenable black validation
 add 5f7f8163097 [SPARK-39877][PYTHON][FOLLOW-UP] Add DataFrame unpivot to 
PySpark docs

No new revisions were added by this update.

Summary of changes:
 python/docs/source/reference/pyspark.sql/dataframe.rst | 1 +
 1 file changed, 1 insertion(+)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated (004430054c2 -> 8c6c7ae3744)

2022-08-01 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from 004430054c2 [SPARK-39503][SQL][FOLLOWUP] InMemoryCatalog should keep 
the catalog field when renaming tables
 add 8c6c7ae3744 [SPARK-39881][PYTHON] Fix erroneous check for black and 
reenable black validation

No new revisions were added by this update.

Summary of changes:
 .github/workflows/build_and_test.yml   |  2 +-
 dev/lint-python|  4 +--
 dev/pyproject.toml |  2 +-
 dev/reformat-python|  9 ++---
 dev/requirements.txt   |  2 +-
 python/pyspark/context.py  |  5 +--
 python/pyspark/ml/feature.py   |  6 ++--
 python/pyspark/pandas/data_type_ops/boolean_ops.py |  6 ++--
 python/pyspark/pandas/frame.py |  4 +--
 python/pyspark/pandas/series.py|  4 +--
 .../pandas/tests/data_type_ops/test_binary_ops.py  |  6 ++--
 .../pandas/tests/data_type_ops/test_boolean_ops.py | 36 +--
 .../tests/data_type_ops/test_categorical_ops.py|  6 ++--
 .../pandas/tests/data_type_ops/test_complex_ops.py |  4 +--
 .../pandas/tests/data_type_ops/test_date_ops.py| 10 +++---
 .../tests/data_type_ops/test_datetime_ops.py   | 10 +++---
 .../pandas/tests/data_type_ops/test_null_ops.py|  6 ++--
 .../pandas/tests/data_type_ops/test_num_ops.py | 14 
 .../tests/data_type_ops/test_timedelta_ops.py  |  6 ++--
 .../pandas/tests/data_type_ops/test_udt_ops.py |  6 ++--
 .../pandas/tests/test_ops_on_diff_frames.py|  4 +--
 python/pyspark/pandas/tests/test_series.py | 10 +++---
 python/pyspark/rdd.py  |  2 +-
 python/pyspark/sql/tests/test_arrow.py |  2 +-
 python/pyspark/sql/tests/test_column.py|  2 +-
 python/pyspark/sql/tests/test_context.py   |  2 +-
 python/pyspark/sql/tests/test_pandas_udf_scalar.py |  4 +--
 python/pyspark/sql/tests/test_types.py | 40 +++---
 python/pyspark/sql/tests/test_udf.py   |  4 +--
 29 files changed, 108 insertions(+), 110 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-39503][SQL][FOLLOWUP] InMemoryCatalog should keep the catalog field when renaming tables

2022-08-01 Thread yumwang
This is an automated email from the ASF dual-hosted git repository.

yumwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 004430054c2 [SPARK-39503][SQL][FOLLOWUP] InMemoryCatalog should keep 
the catalog field when renaming tables
004430054c2 is described below

commit 004430054c2a1c1599f9451e6c77b64d02de4171
Author: Wenchen Fan 
AuthorDate: Mon Aug 1 15:26:44 2022 +0800

[SPARK-39503][SQL][FOLLOWUP] InMemoryCatalog should keep the catalog field 
when renaming tables

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/37021 . The 
`renameTable` method should keep the catalog name in `TableIdentifier`. This is 
necessary as methods like `getTablesByName` won't qualifier the table 
identifiers again.

This PR also cleans up `InMemoryCatalog` a bit. The caller side 
`SessionCatalog` will create tables/functions using qualified identifiers with 
catalog name, and we don't need to attach catalog name again in places like 
`getTable`. We just need to make sure we don't drop the catalog field during 
table updating.

### Why are the changes needed?

make sure the v1 identifiers are always qualified with catalog name.

### Does this PR introduce _any_ user-facing change?

No. `InMemoryCatalog` is test only

### How was this patch tested?

N/A

Closes #37347 from cloud-fan/follow.

Lead-authored-by: Wenchen Fan 
Co-authored-by: Wenchen Fan 
Signed-off-by: Yuming Wang 
---
 .../sql/catalyst/catalog/InMemoryCatalog.scala | 13 +---
 .../catalyst/catalog/ExternalCatalogSuite.scala| 23 --
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 4fe56440c11..218a342e669 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -25,8 +25,6 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.CatalystIdentifier._
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._
 import org.apache.spark.sql.catalyst.expressions.Expression
@@ -282,7 +280,7 @@ class InMemoryCatalog(
 requireTableExists(db, oldName)
 requireTableNotExists(db, newName)
 val oldDesc = catalog(db).tables(oldName)
-oldDesc.table = oldDesc.table.copy(identifier = TableIdentifier(newName, 
Some(db)))
+oldDesc.table = oldDesc.table.copy(identifier = 
oldDesc.table.identifier.copy(table = newName))
 
 if (oldDesc.table.tableType == CatalogTableType.MANAGED) {
   assert(oldDesc.table.storage.locationUri.isDefined,
@@ -344,8 +342,7 @@ class InMemoryCatalog(
 
   override def getTable(db: String, table: String): CatalogTable = 
synchronized {
 requireTableExists(db, table)
-val catalogTable = catalog(db).tables(table).table
-catalogTable.copy(identifier = 
attachSessionCatalog(catalogTable.identifier))
+catalog(db).tables(table).table
   }
 
   override def getTablesByName(db: String, tables: Seq[String]): 
Seq[CatalogTable] = {
@@ -634,15 +631,15 @@ class InMemoryCatalog(
   newName: String): Unit = synchronized {
 requireFunctionExists(db, oldName)
 requireFunctionNotExists(db, newName)
-val newFunc = getFunction(db, oldName).copy(identifier = 
FunctionIdentifier(newName, Some(db)))
+val oldFunc = getFunction(db, oldName)
+val newFunc = oldFunc.copy(identifier = oldFunc.identifier.copy(funcName = 
newName))
 catalog(db).functions.remove(oldName)
 catalog(db).functions.put(newName, newFunc)
   }
 
   override def getFunction(db: String, funcName: String): CatalogFunction = 
synchronized {
 requireFunctionExists(db, funcName)
-val catalogFunction = catalog(db).functions(funcName)
-catalogFunction.copy(identifier = 
attachSessionCatalog(catalogFunction.identifier))
+catalog(db).functions(funcName)
   }
 
   override def functionExists(db: String, funcName: String): Boolean = 
synchronized {
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index bf9bf38b07e..1b0a154a3f4 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++