[spark] branch branch-3.2 updated (b7948ee -> 294bfbb)
This is an automated email from the ASF dual-hosted git repository. dbtsai pushed a change to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git. from b7948ee [SPARK-37098][SQL] Alter table properties should invalidate cache add 294bfbb [SPARK-37113][3.2][BUILD] Upgrade Parquet to 1.12.2 No new revisions were added by this update. Summary of changes: dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 12 ++-- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 12 ++-- pom.xml | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (ce2930c -> 87591c9)
This is an automated email from the ASF dual-hosted git repository. dbtsai pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from ce2930c [SPARK-37112][BUILD] Fix MiMa failure with Scala 2.13 add 87591c9 [SPARK-37113][BUILD] Upgrade Parquet to 1.12.2 No new revisions were added by this update. Summary of changes: dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 12 ++-- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 12 ++-- pom.xml | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.1 updated: Revert "[SPARK-37098][SQL] Alter table properties should invalidate cache"
This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.1 by this push: new 05aea97 Revert "[SPARK-37098][SQL] Alter table properties should invalidate cache" 05aea97 is described below commit 05aea976873e3ca55251f8e6c7e38fa5ae7e8c91 Author: Kent Yao AuthorDate: Tue Oct 26 11:10:10 2021 +0800 Revert "[SPARK-37098][SQL] Alter table properties should invalidate cache" This reverts commit 286a37663213ef3abfbf9effb8cd5723ec6382ff. --- .../apache/spark/sql/execution/command/ddl.scala | 2 -- .../apache/spark/sql/hive/HiveParquetSuite.scala | 24 -- 2 files changed, 26 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 4512a3d0..8722831 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -277,7 +277,6 @@ case class AlterTableSetPropertiesCommand( properties = table.properties ++ properties, comment = properties.get(TableCatalog.PROP_COMMENT).orElse(table.comment)) catalog.alterTable(newTable) -catalog.invalidateCachedTable(tableName) Seq.empty[Row] } @@ -316,7 +315,6 @@ case class AlterTableUnsetPropertiesCommand( val newProperties = table.properties.filter { case (k, _) => !propKeys.contains(k) } val newTable = table.copy(properties = newProperties, comment = tableComment) catalog.alterTable(newTable) -catalog.invalidateCachedTable(tableName) Seq.empty[Row] } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala index a5a3367..df96b06 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala @@ -123,28 +123,4 @@ class HiveParquetSuite extends QueryTest with ParquetTest with TestHiveSingleton assert(msg.contains("cannot resolve '`c3`' given input columns")) } } - - test("SPARK-37098: Alter table properties should invalidate cache") { -// specify the compression in case we change it in future -withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> "snappy") { - withTempPath { dir => -withTable("t") { - sql(s"CREATE TABLE t (c int) STORED AS PARQUET LOCATION '${dir.getCanonicalPath}'") - // cache table metadata - sql("SELECT * FROM t") - sql("ALTER TABLE t SET TBLPROPERTIES('parquet.compression'='zstd')") - sql("INSERT INTO TABLE t values(1)") - val files1 = dir.listFiles().filter(_.getName.endsWith("zstd.parquet")) - assert(files1.length == 1) - - // cache table metadata again - sql("SELECT * FROM t") - sql("ALTER TABLE t UNSET TBLPROPERTIES('parquet.compression')") - sql("INSERT INTO TABLE t values(1)") - val files2 = dir.listFiles().filter(_.getName.endsWith("snappy.parquet")) - assert(files2.length == 1) -} - } -} - } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (4e1e33b -> ce2930c)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 4e1e33b [SPARK-37011][PYTHON][BUILD] update flake8 on jenkins workers add ce2930c [SPARK-37112][BUILD] Fix MiMa failure with Scala 2.13 No new revisions were added by this update. Summary of changes: dev/change-scala-version.sh | 10 ++ project/MimaBuild.scala | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (d5563f3 -> 4e1e33b)
This is an automated email from the ASF dual-hosted git repository. shaneknapp pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from d5563f3 [SPARK-36956][MLLIB] model prediction in .mllib avoid conversion to breeze vector add 4e1e33b [SPARK-37011][PYTHON][BUILD] update flake8 on jenkins workers No new revisions were added by this update. Summary of changes: .../files/python_environments/py36.txt | 49 -- .../files/python_environments/spark-py36-spec.txt | 18 dev/lint-python| 3 +- 3 files changed, 10 insertions(+), 60 deletions(-) delete mode 100644 dev/ansible-for-test-node/roles/jenkins-worker/files/python_environments/py36.txt - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-36956][MLLIB] model prediction in .mllib avoid conversion to breeze vector
This is an automated email from the ASF dual-hosted git repository. huaxingao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new d5563f3 [SPARK-36956][MLLIB] model prediction in .mllib avoid conversion to breeze vector d5563f3 is described below commit d5563f3897b925fee7c2f7aae6418d8e03a5 Author: Ruifeng Zheng AuthorDate: Mon Oct 25 11:11:44 2021 -0700 [SPARK-36956][MLLIB] model prediction in .mllib avoid conversion to breeze vector ### What changes were proposed in this pull request? model prediction in .mllib avoid conversion to breeze vector ### Why are the changes needed? avoid unnecessary conversion ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing suites Closes #34221 from zhengruifeng/mllib_model_avoid_breeze_conversion. Authored-by: Ruifeng Zheng Signed-off-by: Huaxin Gao --- mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala | 4 ++-- mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala| 4 ++-- .../scala/org/apache/spark/mllib/regression/LinearRegression.scala| 4 ++-- .../scala/org/apache/spark/mllib/regression/RidgeRegression.scala | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala index 90cc4fb..33ce0d7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala @@ -20,7 +20,7 @@ package org.apache.spark.mllib.classification import org.apache.spark.SparkContext import org.apache.spark.annotation.Since import org.apache.spark.mllib.classification.impl.GLMClassificationModel -import org.apache.spark.mllib.linalg.Vector +import org.apache.spark.mllib.linalg.{BLAS, Vector} import org.apache.spark.mllib.optimization._ import org.apache.spark.mllib.pmml.PMMLExportable import org.apache.spark.mllib.regression._ @@ -72,7 +72,7 @@ class SVMModel @Since("1.1.0") ( dataMatrix: Vector, weightMatrix: Vector, intercept: Double) = { -val margin = weightMatrix.asBreeze.dot(dataMatrix.asBreeze) + intercept +val margin = BLAS.dot(weightMatrix, dataMatrix) + intercept threshold match { case Some(t) => if (margin > t) 1.0 else 0.0 case None => margin diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala index 47bb1fa9..13920b5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.regression import org.apache.spark.SparkContext import org.apache.spark.annotation.Since -import org.apache.spark.mllib.linalg.Vector +import org.apache.spark.mllib.linalg.{BLAS, Vector} import org.apache.spark.mllib.optimization._ import org.apache.spark.mllib.pmml.PMMLExportable import org.apache.spark.mllib.regression.impl.GLMRegressionModel @@ -43,7 +43,7 @@ class LassoModel @Since("1.1.0") ( dataMatrix: Vector, weightMatrix: Vector, intercept: Double): Double = { -weightMatrix.asBreeze.dot(dataMatrix.asBreeze) + intercept +BLAS.dot(weightMatrix, dataMatrix) + intercept } @Since("1.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala index f68ebc1..bd42d7b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.regression import org.apache.spark.SparkContext import org.apache.spark.annotation.Since -import org.apache.spark.mllib.linalg.Vector +import org.apache.spark.mllib.linalg.{BLAS, Vector} import org.apache.spark.mllib.optimization._ import org.apache.spark.mllib.pmml.PMMLExportable import org.apache.spark.mllib.regression.impl.GLMRegressionModel @@ -43,7 +43,7 @@ class LinearRegressionModel @Since("1.1.0") ( dataMatrix: Vector, weightMatrix: Vector, intercept: Double): Double = { -weightMatrix.asBreeze.dot(dataMatrix.asBreeze) + intercept +BLAS.dot(weightMatrix, dataMatrix) + intercept } @Since("1.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala index 1c3bdce..1f67536 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala +++
[spark] branch master updated (86f080f -> f44f9b4)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 86f080f [SPARK-37072][CORE] Pass all UTs in `repl` with Java 17 add f44f9b4 [SPARK-37105][TEST] Pass all UTs in `sql/hive` with Java 17 No new revisions were added by this update. Summary of changes: launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java | 1 + pom.xml | 1 + project/SparkBuild.scala| 1 + sql/hive/pom.xml| 2 +- 4 files changed, 4 insertions(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (2362a6f -> 86f080f)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 2362a6f [SPARK-37106][YARN] Pass all UTs in `resource-managers/yarn` with Java 17 add 86f080f [SPARK-37072][CORE] Pass all UTs in `repl` with Java 17 No new revisions were added by this update. Summary of changes: .../org/apache/spark/util/ClosureCleaner.scala | 30 +- .../apache/spark/launcher/JavaModuleOptions.java | 1 + pom.xml| 1 + project/SparkBuild.scala | 1 + 4 files changed, 32 insertions(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (f1727a6 -> 2362a6f)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from f1727a6 [SPARK-35437][SQL] Use expressions to filter Hive partitions at client side add 2362a6f [SPARK-37106][YARN] Pass all UTs in `resource-managers/yarn` with Java 17 No new revisions were added by this update. Summary of changes: .../yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (02d3b3b -> f1727a6)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 02d3b3b [SPARK-37098][SQL] Alter table properties should invalidate cache add f1727a6 [SPARK-35437][SQL] Use expressions to filter Hive partitions at client side No new revisions were added by this update. Summary of changes: .../catalyst/catalog/ExternalCatalogUtils.scala| 42 ++- .../org/apache/spark/sql/internal/SQLConf.scala| 14 .../spark/sql/hive/client/HiveClientImpl.scala | 2 +- .../apache/spark/sql/hive/client/HiveShim.scala| 84 ++ .../hive/client/HivePartitionFilteringSuite.scala | 55 ++ 5 files changed, 167 insertions(+), 30 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.1 updated: [SPARK-37098][SQL] Alter table properties should invalidate cache
This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.1 by this push: new 286a376 [SPARK-37098][SQL] Alter table properties should invalidate cache 286a376 is described below commit 286a37663213ef3abfbf9effb8cd5723ec6382ff Author: ulysses-you AuthorDate: Mon Oct 25 16:02:38 2021 +0800 [SPARK-37098][SQL] Alter table properties should invalidate cache ### What changes were proposed in this pull request? Invalidate the table cache after alter table properties (set and unset). ### Why are the changes needed? The table properties can change the behavior of wriing. e.g. the parquet table with `parquet.compression`. If you execute the following SQL, we will get the file with snappy compression rather than zstd. ``` CREATE TABLE t (c int) STORED AS PARQUET; // cache table metadata SELECT * FROM t; ALTER TABLE t SET TBLPROPERTIES('parquet.compression'='zstd'); INSERT INTO TABLE t values(1); ``` So we should invalidate the table cache after alter table properties. ### Does this PR introduce _any_ user-facing change? yes, bug fix ### How was this patch tested? Add test Closes #34365 from ulysses-you/SPARK-37098. Authored-by: ulysses-you Signed-off-by: Kent Yao (cherry picked from commit 02d3b3b452892779b3f0df7018a9574fde02afee) Signed-off-by: Kent Yao --- .../apache/spark/sql/execution/command/ddl.scala | 2 ++ .../apache/spark/sql/hive/HiveParquetSuite.scala | 24 ++ 2 files changed, 26 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 8722831..4512a3d0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -277,6 +277,7 @@ case class AlterTableSetPropertiesCommand( properties = table.properties ++ properties, comment = properties.get(TableCatalog.PROP_COMMENT).orElse(table.comment)) catalog.alterTable(newTable) +catalog.invalidateCachedTable(tableName) Seq.empty[Row] } @@ -315,6 +316,7 @@ case class AlterTableUnsetPropertiesCommand( val newProperties = table.properties.filter { case (k, _) => !propKeys.contains(k) } val newTable = table.copy(properties = newProperties, comment = tableComment) catalog.alterTable(newTable) +catalog.invalidateCachedTable(tableName) Seq.empty[Row] } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala index df96b06..a5a3367 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala @@ -123,4 +123,28 @@ class HiveParquetSuite extends QueryTest with ParquetTest with TestHiveSingleton assert(msg.contains("cannot resolve '`c3`' given input columns")) } } + + test("SPARK-37098: Alter table properties should invalidate cache") { +// specify the compression in case we change it in future +withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> "snappy") { + withTempPath { dir => +withTable("t") { + sql(s"CREATE TABLE t (c int) STORED AS PARQUET LOCATION '${dir.getCanonicalPath}'") + // cache table metadata + sql("SELECT * FROM t") + sql("ALTER TABLE t SET TBLPROPERTIES('parquet.compression'='zstd')") + sql("INSERT INTO TABLE t values(1)") + val files1 = dir.listFiles().filter(_.getName.endsWith("zstd.parquet")) + assert(files1.length == 1) + + // cache table metadata again + sql("SELECT * FROM t") + sql("ALTER TABLE t UNSET TBLPROPERTIES('parquet.compression')") + sql("INSERT INTO TABLE t values(1)") + val files2 = dir.listFiles().filter(_.getName.endsWith("snappy.parquet")) + assert(files2.length == 1) +} + } +} + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated: [SPARK-37098][SQL] Alter table properties should invalidate cache
This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new b7948ee [SPARK-37098][SQL] Alter table properties should invalidate cache b7948ee is described below commit b7948ee52c1682b2870a9a3f765c60ef5712da00 Author: ulysses-you AuthorDate: Mon Oct 25 16:02:38 2021 +0800 [SPARK-37098][SQL] Alter table properties should invalidate cache ### What changes were proposed in this pull request? Invalidate the table cache after alter table properties (set and unset). ### Why are the changes needed? The table properties can change the behavior of wriing. e.g. the parquet table with `parquet.compression`. If you execute the following SQL, we will get the file with snappy compression rather than zstd. ``` CREATE TABLE t (c int) STORED AS PARQUET; // cache table metadata SELECT * FROM t; ALTER TABLE t SET TBLPROPERTIES('parquet.compression'='zstd'); INSERT INTO TABLE t values(1); ``` So we should invalidate the table cache after alter table properties. ### Does this PR introduce _any_ user-facing change? yes, bug fix ### How was this patch tested? Add test Closes #34365 from ulysses-you/SPARK-37098. Authored-by: ulysses-you Signed-off-by: Kent Yao (cherry picked from commit 02d3b3b452892779b3f0df7018a9574fde02afee) Signed-off-by: Kent Yao --- .../apache/spark/sql/execution/command/ddl.scala | 2 ++ .../apache/spark/sql/hive/HiveParquetSuite.scala | 24 ++ 2 files changed, 26 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index ea1b656..f320859 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -275,6 +275,7 @@ case class AlterTableSetPropertiesCommand( properties = table.properties ++ properties, comment = properties.get(TableCatalog.PROP_COMMENT).orElse(table.comment)) catalog.alterTable(newTable) +catalog.invalidateCachedTable(tableName) Seq.empty[Row] } @@ -311,6 +312,7 @@ case class AlterTableUnsetPropertiesCommand( val newProperties = table.properties.filter { case (k, _) => !propKeys.contains(k) } val newTable = table.copy(properties = newProperties, comment = tableComment) catalog.alterTable(newTable) +catalog.invalidateCachedTable(tableName) Seq.empty[Row] } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala index 8940ab4..df13edd 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala @@ -123,4 +123,28 @@ class HiveParquetSuite extends QueryTest with ParquetTest with TestHiveSingleton assert(msg.contains("cannot resolve 'c3' given input columns")) } } + + test("SPARK-37098: Alter table properties should invalidate cache") { +// specify the compression in case we change it in future +withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> "snappy") { + withTempPath { dir => +withTable("t") { + sql(s"CREATE TABLE t (c int) STORED AS PARQUET LOCATION '${dir.getCanonicalPath}'") + // cache table metadata + sql("SELECT * FROM t") + sql("ALTER TABLE t SET TBLPROPERTIES('parquet.compression'='zstd')") + sql("INSERT INTO TABLE t values(1)") + val files1 = dir.listFiles().filter(_.getName.endsWith("zstd.parquet")) + assert(files1.length == 1) + + // cache table metadata again + sql("SELECT * FROM t") + sql("ALTER TABLE t UNSET TBLPROPERTIES('parquet.compression')") + sql("INSERT INTO TABLE t values(1)") + val files2 = dir.listFiles().filter(_.getName.endsWith("snappy.parquet")) + assert(files2.length == 1) +} + } +} + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-37098][SQL] Alter table properties should invalidate cache
This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 02d3b3b [SPARK-37098][SQL] Alter table properties should invalidate cache 02d3b3b is described below commit 02d3b3b452892779b3f0df7018a9574fde02afee Author: ulysses-you AuthorDate: Mon Oct 25 16:02:38 2021 +0800 [SPARK-37098][SQL] Alter table properties should invalidate cache ### What changes were proposed in this pull request? Invalidate the table cache after alter table properties (set and unset). ### Why are the changes needed? The table properties can change the behavior of wriing. e.g. the parquet table with `parquet.compression`. If you execute the following SQL, we will get the file with snappy compression rather than zstd. ``` CREATE TABLE t (c int) STORED AS PARQUET; // cache table metadata SELECT * FROM t; ALTER TABLE t SET TBLPROPERTIES('parquet.compression'='zstd'); INSERT INTO TABLE t values(1); ``` So we should invalidate the table cache after alter table properties. ### Does this PR introduce _any_ user-facing change? yes, bug fix ### How was this patch tested? Add test Closes #34365 from ulysses-you/SPARK-37098. Authored-by: ulysses-you Signed-off-by: Kent Yao --- .../apache/spark/sql/execution/command/ddl.scala | 2 ++ .../apache/spark/sql/hive/HiveParquetSuite.scala | 24 ++ 2 files changed, 26 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 5012b22..36a17a9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -276,6 +276,7 @@ case class AlterTableSetPropertiesCommand( properties = table.properties ++ properties, comment = properties.get(TableCatalog.PROP_COMMENT).orElse(table.comment)) catalog.alterTable(newTable) +catalog.invalidateCachedTable(tableName) Seq.empty[Row] } @@ -312,6 +313,7 @@ case class AlterTableUnsetPropertiesCommand( val newProperties = table.properties.filter { case (k, _) => !propKeys.contains(k) } val newTable = table.copy(properties = newProperties, comment = tableComment) catalog.alterTable(newTable) +catalog.invalidateCachedTable(tableName) Seq.empty[Row] } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala index e058e6a..76a66cf 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala @@ -144,4 +144,28 @@ class HiveParquetSuite extends QueryTest with ParquetTest with TestHiveSingleton .plus(123456, ChronoUnit.MICROS))) } } + + test("SPARK-37098: Alter table properties should invalidate cache") { +// specify the compression in case we change it in future +withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> "snappy") { + withTempPath { dir => +withTable("t") { + sql(s"CREATE TABLE t (c int) STORED AS PARQUET LOCATION '${dir.getCanonicalPath}'") + // cache table metadata + sql("SELECT * FROM t") + sql("ALTER TABLE t SET TBLPROPERTIES('parquet.compression'='zstd')") + sql("INSERT INTO TABLE t values(1)") + val files1 = dir.listFiles().filter(_.getName.endsWith("zstd.parquet")) + assert(files1.length == 1) + + // cache table metadata again + sql("SELECT * FROM t") + sql("ALTER TABLE t UNSET TBLPROPERTIES('parquet.compression')") + sql("INSERT INTO TABLE t values(1)") + val files2 = dir.listFiles().filter(_.getName.endsWith("snappy.parquet")) + assert(files2.length == 1) +} + } +} + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org