[spark] branch master updated (1f3eb73 -> c411d26)
This is an automated email from the ASF dual-hosted git repository. huaxingao pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 1f3eb73 [SPARK-37510][PYTHON] Support basic operations of timedelta Series/Index add c411d26 [SPARK-37330][SQL] Migrate ReplaceTableStatement to v2 command No new revisions were added by this update. Summary of changes: .../sql/catalyst/analysis/ResolveCatalogs.scala| 12 --- .../spark/sql/catalyst/parser/AstBuilder.scala | 11 ++- .../sql/catalyst/plans/logical/statements.scala| 23 +- .../sql/catalyst/plans/logical/v2Commands.scala| 19 ++ .../sql/connector/catalog/CatalogV2Util.scala | 6 +- .../spark/sql/catalyst/parser/DDLParserSuite.scala | 20 +-- .../catalyst/analysis/ResolveSessionCatalog.scala | 19 +++--- .../datasources/v2/DataSourceV2Strategy.scala | 19 -- .../datasources/v2/ReplaceTableExec.scala | 11 --- 9 files changed, 56 insertions(+), 84 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-37510][PYTHON] Support basic operations of timedelta Series/Index
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 1f3eb73 [SPARK-37510][PYTHON] Support basic operations of timedelta Series/Index 1f3eb73 is described below commit 1f3eb737f15cad101f961e0f153905c76a5dd12d Author: Xinrong Meng AuthorDate: Sat Dec 4 10:39:02 2021 +0900 [SPARK-37510][PYTHON] Support basic operations of timedelta Series/Index ### What changes were proposed in this pull request? Support basic operations of timedelta Series/Index ### Why are the changes needed? To be consistent with pandas ### Does this PR introduce _any_ user-facing change? Yes. ```py >>> psdf = ps.DataFrame( ... {'this': [timedelta(1), timedelta(microseconds=2), timedelta(weeks=3)], ... 'that': [timedelta(0), timedelta(microseconds=1), timedelta(seconds=2)]} ... ) >>> psdf this that 01 days 00:00:000 days 00:00:00 1 0 days 00:00:00.02 0 days 00:00:00.01 2 21 days 00:00:000 days 00:00:02 # __sub__ >>> psdf.this - psdf.that 0 1 days 00:00:00 1 0 days 00:00:00.01 2 20 days 23:59:58 dtype: timedelta64[ns] >>> psdf.this - timedelta(1) 00 days 00:00:00 1 -1 days +00:00:00.02 2 20 days 00:00:00 Name: this, dtype: timedelta64[ns] # __rsub__ >>> timedelta(1) - psdf.this 0 0 days 00:00:00 1 0 days 23:59:59.98 2 -20 days +00:00:00 Name: this, dtype: timedelta64[ns] ``` ### How was this patch tested? Unit tests. Closes #34787 from xinrong-databricks/timedeltaBasicOps. Authored-by: Xinrong Meng Signed-off-by: Hyukjin Kwon --- .../pyspark/pandas/data_type_ops/datetime_ops.py | 2 +- .../pyspark/pandas/data_type_ops/timedelta_ops.py | 59 ++-- .../tests/data_type_ops/test_timedelta_ops.py | 63 ++ .../pandas/tests/data_type_ops/testing_utils.py| 3 ++ 4 files changed, 101 insertions(+), 26 deletions(-) diff --git a/python/pyspark/pandas/data_type_ops/datetime_ops.py b/python/pyspark/pandas/data_type_ops/datetime_ops.py index f66f3ba..3ec4109 100644 --- a/python/pyspark/pandas/data_type_ops/datetime_ops.py +++ b/python/pyspark/pandas/data_type_ops/datetime_ops.py @@ -143,7 +143,7 @@ class DatetimeOps(DataTypeOps): if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) elif isinstance(spark_type, BooleanType): -raise TypeError("cannot astype a datetimelike from [datetime64[ns]] to [bool]") +raise TypeError("cannot astype a %s to [bool]" % self.pretty_name) elif isinstance(spark_type, StringType): return _as_string_type(index_ops, dtype, null_str=str(pd.NaT)) else: diff --git a/python/pyspark/pandas/data_type_ops/timedelta_ops.py b/python/pyspark/pandas/data_type_ops/timedelta_ops.py index 7e61ab1..56031c7 100644 --- a/python/pyspark/pandas/data_type_ops/timedelta_ops.py +++ b/python/pyspark/pandas/data_type_ops/timedelta_ops.py @@ -15,21 +15,26 @@ # limitations under the License. # -from typing import Union +from datetime import timedelta +from typing import Any, Union import pandas as pd from pandas.api.types import CategoricalDtype +from pyspark.sql.column import Column from pyspark.sql.types import ( BooleanType, +DayTimeIntervalType, StringType, ) -from pyspark.pandas._typing import Dtype, IndexOpsLike +from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex +from pyspark.pandas.base import IndexOpsMixin from pyspark.pandas.data_type_ops.base import ( DataTypeOps, _as_categorical_type, _as_other_type, _as_string_type, +_sanitize_list_like, ) from pyspark.pandas.typedef import pandas_on_spark_type @@ -49,7 +54,7 @@ class TimedeltaOps(DataTypeOps): if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) elif isinstance(spark_type, BooleanType): -raise TypeError("cannot astype a datetimelike from [timedelta64[ns]] to [bool]") +raise TypeError("cannot astype a %s to [bool]" % self.pretty_name) elif isinstance(spark_type, StringType): return _as_string_type(index_ops, dtype, null_str=str(pd.NaT)) else: @@ -58,3 +63,51 @@ class TimedeltaOps(DataTypeOps): def prepare(self, col: pd.Series) -> pd.Series: """Prepare column when from_pandas.""" return col + +def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: +from pyspark.pandas.base import column_op + +
[spark] branch master updated: [SPARK-37508][SQL][DOCS][FOLLOW-UP] Update expression desc of `CONTAINS()` string function
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new b5fc6da [SPARK-37508][SQL][DOCS][FOLLOW-UP] Update expression desc of `CONTAINS()` string function b5fc6da is described below commit b5fc6dade261fe9917ff2c835911bc54da121bbd Author: Angerszh AuthorDate: Sat Dec 4 10:36:22 2021 +0900 [SPARK-37508][SQL][DOCS][FOLLOW-UP] Update expression desc of `CONTAINS()` string function ### What changes were proposed in this pull request? Update usage doc of `CONTAINS()` string function to ``` _FUNC_(left, right) - Returns a boolean. The value is True if right is found inside left. Returns NULL if either input expression is NULL. Otherwise, returns False. ``` To clarify that when left and right expression both not `NULL` and if right is not found inside left, returns `False` ### Why are the changes needed? Make function description more clear ### Does this PR introduce _any_ user-facing change? `DESCRIBE FUNCTION EXTENDED contains`'s usage now return: ``` contains(left, right) - Returns a boolean. The value is True if right is found inside left. Returns NULL if either input expression is NULL. Otherwise, returns False. ``` ### How was this patch tested? Verified it after documentation build or SQL command such s DESCRIBE FUNCTION EXTENDED contains. Closes #34786 from AngersZh/SPARK-37508-FOLLOWUP. Authored-by: Angerszh Signed-off-by: Hyukjin Kwon --- .../org/apache/spark/sql/catalyst/expressions/stringExpressions.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 3fb30fa..ba2b4b5 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -467,8 +467,8 @@ abstract class StringPredicate extends BinaryExpression */ @ExpressionDescription( usage = """ -_FUNC_(expr1, expr2) - Returns a boolean value if expr2 is found inside expr1. -Returns NULL if either input expression is NULL. +_FUNC_(left, right) - Returns a boolean. The value is True if right is found inside left. +Returns NULL if either input expression is NULL. Otherwise, returns False. """, examples = """ Examples: - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (f570d01 -> b2a4e8f)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from f570d01 [SPARK-37286][DOCS][FOLLOWUP] Fix the wrong parameter name for Javadoc add b2a4e8f [SPARK-37495][PYTHON] Skip identical index checking of Series.compare when config 'compute.eager_check' is disabled No new revisions were added by this update. Summary of changes: .../source/user_guide/pandas_on_spark/options.rst | 7 --- python/pyspark/pandas/config.py | 5 +++-- python/pyspark/pandas/series.py | 21 - .../pyspark/pandas/tests/test_ops_on_diff_frames.py | 11 +++ 4 files changed, 38 insertions(+), 6 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-37286][DOCS][FOLLOWUP] Fix the wrong parameter name for Javadoc
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f570d01 [SPARK-37286][DOCS][FOLLOWUP] Fix the wrong parameter name for Javadoc f570d01 is described below commit f570d01c0d009bb035d3c89d77661a5432f982cb Author: Kousuke Saruta AuthorDate: Fri Dec 3 11:16:39 2021 -0600 [SPARK-37286][DOCS][FOLLOWUP] Fix the wrong parameter name for Javadoc ### What changes were proposed in this pull request? This PR fixes an issue that the Javadoc generation fails due to the wrong parameter name of a method added in SPARK-37286 (#34554). https://github.com/apache/spark/runs/4409267346?check_suite_focus=true#step:9:5081 ### Why are the changes needed? To keep the build clean. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? GA itself. Closes #34801 from sarutak/followup-SPARK-37286. Authored-by: Kousuke Saruta Signed-off-by: Sean Owen --- sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index fcc2be2..9a647e5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -196,7 +196,7 @@ abstract class JdbcDialect extends Serializable with Logging{ /** * Converts aggregate function to String representing a SQL expression. - * @param aggregate The aggregate function to be converted. + * @param aggFunction The aggregate function to be converted. * @return Converted value. */ @Since("3.3.0") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated (805db87 -> 449c330)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git. from 805db87 [SPARK-37471][SQL] spark-sql support `;` in nested bracketed comment add 449c330 [SPARK-37534][BUILD][3.2] Bump dev.ludovic.netlib to 2.2.1 No new revisions were added by this update. Summary of changes: dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 6 +++--- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 6 +++--- pom.xml | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated: [SPARK-37471][SQL] spark-sql support `; ` in nested bracketed comment
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new 805db87 [SPARK-37471][SQL] spark-sql support `;` in nested bracketed comment 805db87 is described below commit 805db872c085b237bf56b31d30252943d743a4e4 Author: Angerszh AuthorDate: Sat Dec 4 00:19:48 2021 +0800 [SPARK-37471][SQL] spark-sql support `;` in nested bracketed comment ### What changes were proposed in this pull request? In current spark-sql, when use -e and -f, it can't support nested bracketed comment such as ``` /* SELECT /*+ BROADCAST(b) */ 4; */ SELECT 1 ; ``` When run `spark-sql -f` with `--verbose` got below error ``` park master: yarn, Application Id: application_1632999510150_6968442 /* sielect /* BROADCAST(b) */ 4 Error in query: mismatched input '4' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 30) == SQL == /* sielect /* BROADCAST(b) */ 4 --^^^ ``` In current code ``` else if (line.charAt(index) == '/' && !insideSimpleComment) { val hasNext = index + 1 < line.length if (insideSingleQuote || insideDoubleQuote) { // Ignores '/' in any case of quotes } else if (insideBracketedComment && line.charAt(index - 1) == '*' ) { // Decrements `bracketedCommentLevel` at the beginning of the next loop leavingBracketedComment = true } else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') { bracketedCommentLevel += 1 } } ``` If it meet an `*/`, it will mark `leavingBracketedComment` as true, then when call next char, bracketed comment level -1. ``` if (leavingBracketedComment) { bracketedCommentLevel -= 1 leavingBracketedComment = false } ``` But when meet `/*`, it need `!insideBracketedComment`, then means if we have a case ``` /* aaa /* bbb */ ; ccc */ select 1; ``` when meet second `/*` , `insideBracketedComment` is true, so this `/*` won't be treat as a start of bracket comment. Then meet the first `*/`, bracketed comment end, this query is split as ``` /* aaa /* bbb */;=> comment ccc */ select 1; => query ``` Then query failed. So here we remove the condition of `!insideBracketedComment`, then we can have `bracketedCommentLevel > 1` and since ``` def insideBracketedComment: Boolean = bracketedCommentLevel > 0 ``` So chars between all level of bracket are treated as comment. ### Why are the changes needed? In spark #37389 we support nested bracketed comment in SQL, here for spark-sql we should support too. ### Does this PR introduce _any_ user-facing change? User can use nested bracketed comment in spark-sql ### How was this patch tested? Since spark-sql console mode have special logic about handle `;` ``` while (line != null) { if (!line.startsWith("--")) { if (prefix.nonEmpty) { prefix += '\n' } if (line.trim().endsWith(";") && !line.trim().endsWith("\\;")) { line = prefix + line ret = cli.processLine(line, true) prefix = "" currentPrompt = promptWithCurrentDB } else { prefix = prefix + line currentPrompt = continuedPromptWithDBSpaces } } line = reader.readLine(currentPrompt + "> ") } ``` If we write sql as below ``` /* SELECT /*+ BROADCAST(b) */ 4\\; */ SELECT 1 ; ``` the `\\;` is escaped. Manuel test wit spark-sql -f ``` (spark.submit.pyFiles,) (spark.submit.deployMode,client) (spark.master,local[*]) Classpath elements: Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). 21/11/26 16:32:08 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 21/11/26 16:32:10 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout
[spark] branch master updated (544865d -> 6e19125)
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 544865d [SPARK-37455][SQL] Replace hash with sort aggregate if child is already sorted add 6e19125 [SPARK-37471][SQL] spark-sql support `;` in nested bracketed comment No new revisions were added by this update. Summary of changes: .../apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 2 +- .../scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 9 + 2 files changed, 10 insertions(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (16f6295 -> 544865d)
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 16f6295 [SPARK-37286][SQL] Move compileAggregates from JDBCRDD to JdbcDialect add 544865d [SPARK-37455][SQL] Replace hash with sort aggregate if child is already sorted No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/internal/SQLConf.scala| 7 ++ .../spark/sql/execution/QueryExecution.scala | 3 + .../sql/execution/ReplaceHashWithSortAgg.scala | 106 + .../execution/adaptive/AdaptiveSparkPlanExec.scala | 1 + .../execution/aggregate/HashAggregateExec.scala| 9 ++ .../execution/ReplaceHashWithSortAggSuite.scala| 131 + 6 files changed, 257 insertions(+) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/ReplaceHashWithSortAgg.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/ReplaceHashWithSortAggSuite.scala - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-37286][SQL] Move compileAggregates from JDBCRDD to JdbcDialect
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 16f6295 [SPARK-37286][SQL] Move compileAggregates from JDBCRDD to JdbcDialect 16f6295 is described below commit 16f6295a3fc7fd1fcc77c6084a60d00fd79d202b Author: Jiaan Geng AuthorDate: Fri Dec 3 21:12:25 2021 +0800 [SPARK-37286][SQL] Move compileAggregates from JDBCRDD to JdbcDialect ### What changes were proposed in this pull request? Currently, the method `compileAggregates` is a member of `JDBCRDD`. But it is not reasonable, because the JDBC source knowns how to compile aggregate expressions to itself's dialect well. ### Why are the changes needed? JDBC source knowns how to compile aggregate expressions to itself's dialect well. After this PR, we can extend the pushdown(e.g. aggregate) based on different dialect between different JDBC database. There are two situations: First, database A and B implement a different number of aggregate functions that meet the SQL standard. ### Does this PR introduce _any_ user-facing change? 'No'. Just change the inner implementation. ### How was this patch tested? Jenkins tests. Closes #34554 from beliefer/SPARK-37286. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan --- .../sql/execution/datasources/jdbc/JDBCRDD.scala | 29 .../datasources/v2/jdbc/JDBCScanBuilder.scala | 6 ++--- .../org/apache/spark/sql/jdbc/JdbcDialects.scala | 31 ++ 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index 1b8d33b..394ba3f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -25,7 +25,6 @@ import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskCon import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Count, CountStar, Max, Min, Sum} import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects} import org.apache.spark.sql.sources._ @@ -140,34 +139,6 @@ object JDBCRDD extends Logging { }) } - def compileAggregates( - aggregates: Seq[AggregateFunc], - dialect: JdbcDialect): Option[Seq[String]] = { -def quote(colName: String): String = dialect.quoteIdentifier(colName) - -Some(aggregates.map { - case min: Min => -if (min.column.fieldNames.length != 1) return None -s"MIN(${quote(min.column.fieldNames.head)})" - case max: Max => -if (max.column.fieldNames.length != 1) return None -s"MAX(${quote(max.column.fieldNames.head)})" - case count: Count => -if (count.column.fieldNames.length != 1) return None -val distinct = if (count.isDistinct) "DISTINCT " else "" -val column = quote(count.column.fieldNames.head) -s"COUNT($distinct$column)" - case sum: Sum => -if (sum.column.fieldNames.length != 1) return None -val distinct = if (sum.isDistinct) "DISTINCT " else "" -val column = quote(sum.column.fieldNames.head) -s"SUM($distinct$column)" - case _: CountStar => -s"COUNT(*)" - case _ => return None -}) - } - /** * Build and return JDBCRDD from the given information. * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala index 7605b03..d3c141e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala @@ -72,8 +72,8 @@ case class JDBCScanBuilder( if (!jdbcOptions.pushDownAggregate) return false val dialect = JdbcDialects.get(jdbcOptions.url) -val compiledAgg = JDBCRDD.compileAggregates(aggregation.aggregateExpressions, dialect) -if (compiledAgg.isEmpty) return false +val compiledAggs = aggregation.aggregateExpressions.flatMap(dialect.compileAggregate(_)) +if (compiledAggs.length != aggregation.aggregateExpressions.length) return false val groupByCols = aggregation.groupByColumns.map { col => if (col.fieldNames.length != 1) return false @@ -84,7 +84,7 @@ case class JDBCScanBuilder( // e.g.
[spark] branch master updated: [SPARK-36902][SQL] Migrate CreateTableAsSelectStatement to v2 command
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 688fa23 [SPARK-36902][SQL] Migrate CreateTableAsSelectStatement to v2 command 688fa23 is described below commit 688fa239265b981dc3acc69b79443905afe6a8cf Author: dch nguyen AuthorDate: Fri Dec 3 21:08:41 2021 +0800 [SPARK-36902][SQL] Migrate CreateTableAsSelectStatement to v2 command ### What changes were proposed in this pull request? Migrate CreateTableAsSelectStatement to v2 command ### Why are the changes needed? Migrate CreateTableAsSelectStatement to v2 command ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing tests Closes #34667 from dchvn/migrate-CTAS. Authored-by: dch nguyen Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/ResolveCatalogs.scala| 12 - .../spark/sql/catalyst/parser/AstBuilder.scala | 12 ++--- .../sql/catalyst/plans/logical/statements.scala| 23 - .../sql/catalyst/plans/logical/v2Commands.scala| 23 ++--- .../sql/connector/catalog/CatalogV2Util.scala | 7 +-- .../CreateTablePartitioningValidationSuite.scala | 54 -- .../spark/sql/catalyst/parser/DDLParserSuite.scala | 26 +-- .../org/apache/spark/sql/DataFrameWriter.scala | 48 +++ .../org/apache/spark/sql/DataFrameWriterV2.scala | 28 +-- .../catalyst/analysis/ResolveSessionCatalog.scala | 29 +--- .../datasources/v2/DataSourceV2Strategy.scala | 12 ++--- .../datasources/v2/WriteToDataSourceV2Exec.scala | 8 +++- .../connector/V2CommandsCaseSensitivitySuite.scala | 14 +++--- .../execution/command/PlanResolutionSuite.scala| 46 +- 14 files changed, 155 insertions(+), 187 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala index d7c6301..3e21a60 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala @@ -37,18 +37,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager) case UnresolvedDBObjectName(CatalogAndIdentifier(catalog, identifier), _) => ResolvedDBObjectName(catalog, identifier.namespace :+ identifier.name()) -case c @ CreateTableAsSelectStatement( - NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _, _) => - CreateTableAsSelect( -catalog.asTableCatalog, -tbl.asIdentifier, -// convert the bucket spec and add it as a transform -c.partitioning ++ c.bucketSpec.map(_.asTransform), -c.asSelect, -convertTableProperties(c), -writeOptions = c.writeOptions, -ignoreIfExists = c.ifNotExists) - case c @ ReplaceTableStatement( NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) => ReplaceTable( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index f7d96f8..235f6a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3410,7 +3410,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } /** - * Create a table, returning a [[CreateTable]] or [[CreateTableAsSelectStatement]] logical plan. + * Create a table, returning a [[CreateTable]] or [[CreateTableAsSelect]] logical plan. * * Expected format: * {{{ @@ -3456,6 +3456,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } val partitioning = partitionExpressions(partTransforms, partCols, ctx) +val tableSpec = TableSpec(bucketSpec, properties, provider, options, location, comment, + serdeInfo, external) Option(ctx.query).map(plan) match { case Some(_) if columns.nonEmpty => @@ -3470,15 +3472,13 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg ctx) case Some(query) => -CreateTableAsSelectStatement( - table, query, partitioning, bucketSpec, properties, provider, options, location, comment, - writeOptions = Map.empty, serdeInfo, external = external, ifNotExists = ifNotExists) +CreateTableAsSelect( + UnresolvedDBObjectName(table, isNamespace = false), + partitioning, query, tableSpec, Map.empty, ifNotExists) case
[spark] branch master updated (f99e2e6 -> eba4f5c)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from f99e2e6 [SPARK-37526][INFRA][PYTHON][TESTS] Add Java17 PySpark daily test coverage add eba4f5c [SPARK-37531][INFRA][PYTHON][TESTS] Use PyArrow 6.0.0 in Python 3.9 tests at GitHub Action job No new revisions were added by this update. Summary of changes: .github/workflows/build_and_test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org