This is an automated email from the ASF dual-hosted git repository.
hvanhovell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 98957380be98 [SPARK-51012][SQL] Remove SparkStrategy from Connect Shims
98957380be98 is described below
commit 98957380be9856de480dadd21e4239322e199295
Author: Herman van Hovell <[email protected]>
AuthorDate: Tue Jan 28 09:59:42 2025 -0400
[SPARK-51012][SQL] Remove SparkStrategy from Connect Shims
### What changes were proposed in this pull request?
This PR removes SparkStrategy from the Connect shims.
### Why are the changes needed?
SparkStrategy in Connect shims is causing some headaches Scala reflection
based tests in Catalyst (see https://github.com/apache/spark/pull/48818). This
was the smallest change that fixes this particular issues. Another approach
would have been to split the shims projects into a sql-shims and a core-shims
project.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing tests.
I manually executed
`org.apache.spark.sql.catalyst.encoders.EncoderResolutionSuite` in both SBT and
Maven.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #49701 from hvanhovell/SPARK-51012.
Authored-by: Herman van Hovell <[email protected]>
Signed-off-by: Herman van Hovell <[email protected]>
---
sql/api/src/main/scala/org/apache/spark/sql/package.scala | 13 -------------
.../shims/src/main/scala/org/apache/spark/shims.scala | 1 -
.../scala/org/apache/spark/sql/ExperimentalMethods.scala | 1 +
.../scala/org/apache/spark/sql/SparkSessionExtensions.scala | 1 +
.../main/scala/org/apache/spark/sql/classic/package.scala | 13 +++++++++++++
.../sql/execution/adaptive/LogicalQueryStageStrategy.scala | 2 +-
.../spark/sql/execution/command/v2/V2CommandStrategy.scala | 2 +-
.../sql/execution/datasources/DataSourceStrategy.scala | 4 ++--
.../sql/execution/datasources/FileSourceStrategy.scala | 1 +
.../apache/spark/sql/internal/BaseSessionStateBuilder.scala | 4 ++--
.../scala/org/apache/spark/sql/ExtraStrategiesSuite.scala | 1 +
.../org/apache/spark/sql/execution/SparkPlannerSuite.scala | 2 +-
.../sql/execution/adaptive/AdaptiveQueryExecSuite.scala | 3 ++-
.../org/apache/spark/sql/hive/HiveSessionStateBuilder.scala | 4 ++--
.../scala/org/apache/spark/sql/hive/HiveStrategies.scala | 1 +
15 files changed, 29 insertions(+), 24 deletions(-)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/package.scala
b/sql/api/src/main/scala/org/apache/spark/sql/package.scala
index f9ad85d65fb5..284ecdc2fe77 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/package.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/package.scala
@@ -16,9 +16,6 @@
*/
package org.apache.spark
-import org.apache.spark.annotation.{DeveloperApi, Unstable}
-import org.apache.spark.sql.execution.SparkStrategy
-
/**
* Allows the execution of relational queries, including those expressed in
SQL using Spark.
*
@@ -33,16 +30,6 @@ import org.apache.spark.sql.execution.SparkStrategy
*/
package object sql {
- /**
- * Converts a logical plan into zero or more SparkPlans. This API is exposed
for experimenting
- * with the query planner and is not designed to be stable across spark
releases. Developers
- * writing libraries should instead consider using the stable APIs provided
in
- * [[org.apache.spark.sql.sources]]
- */
- @DeveloperApi
- @Unstable
- type Strategy = SparkStrategy
-
type DataFrame = Dataset[Row]
/**
diff --git a/sql/connect/shims/src/main/scala/org/apache/spark/shims.scala
b/sql/connect/shims/src/main/scala/org/apache/spark/shims.scala
index 8753d17f35cb..9c5fb515580a 100644
--- a/sql/connect/shims/src/main/scala/org/apache/spark/shims.scala
+++ b/sql/connect/shims/src/main/scala/org/apache/spark/shims.scala
@@ -35,7 +35,6 @@ package sql {
package execution {
class QueryExecution
- class SparkStrategy
}
package internal {
class SharedState
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
index 302d38cde143..fe1216996c8c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
import org.apache.spark.annotation.{Experimental, Unstable}
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.classic.Strategy
/**
* :: Experimental ::
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
index ec85c73c5ce0..39524e60862d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
@@ -28,6 +28,7 @@ import
org.apache.spark.sql.catalyst.expressions.ExpressionInfo
import org.apache.spark.sql.catalyst.parser.ParserInterface
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.classic.Strategy
import org.apache.spark.sql.execution.{ColumnarRule, SparkPlan}
/**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/package.scala
b/sql/core/src/main/scala/org/apache/spark/sql/classic/package.scala
index f4c44a013f94..61ab4c8c1516 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/classic/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/package.scala
@@ -17,6 +17,9 @@
package org.apache.spark.sql
+import org.apache.spark.annotation.{DeveloperApi, Unstable}
+import org.apache.spark.sql.execution.SparkStrategy
+
/**
* Allows the execution of relational queries, including those expressed in
SQL using Spark.
*
@@ -30,4 +33,14 @@ package org.apache.spark.sql
*/
package object classic {
type DataFrame = Dataset[Row]
+
+ /**
+ * Converts a logical plan into zero or more SparkPlans. This API is exposed
for experimenting
+ * with the query planner and is not designed to be stable across spark
releases. Developers
+ * writing libraries should instead consider using the stable APIs provided
in
+ * [[org.apache.spark.sql.sources]]
+ */
+ @DeveloperApi
+ @Unstable
+ type Strategy = SparkStrategy
}
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala
index e424af5343fc..d59fe8222c7f 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala
@@ -17,11 +17,11 @@
package org.apache.spark.sql.execution.adaptive
-import org.apache.spark.sql.Strategy
import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
import org.apache.spark.sql.catalyst.planning.{ExtractEquiJoinKeys,
ExtractSingleColumnNullAwareAntiJoin}
import org.apache.spark.sql.catalyst.plans.LeftAnti
import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan}
+import org.apache.spark.sql.classic.Strategy
import org.apache.spark.sql.execution.{joins, SparkPlan}
import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec,
BroadcastNestedLoopJoinExec}
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/V2CommandStrategy.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/V2CommandStrategy.scala
index ebc2e83e9c5f..704502d118b8 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/V2CommandStrategy.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/v2/V2CommandStrategy.scala
@@ -17,10 +17,10 @@
package org.apache.spark.sql.execution.command.v2
-import org.apache.spark.sql.Strategy
import org.apache.spark.sql.catalyst.analysis.ResolvedIdentifier
import org.apache.spark.sql.catalyst.expressions.VariableReference
import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.classic.Strategy
import org.apache.spark.sql.execution.SparkPlan
object V2CommandStrategy extends Strategy {
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index a1bcf575ce58..32decd9c429d 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.Path
import org.apache.spark.internal.{Logging, MDC}
import org.apache.spark.internal.LogKeys.PREDICATES
import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Row, SaveMode, Strategy}
+import org.apache.spark.sql.{Row, SaveMode}
import org.apache.spark.sql.catalyst.{expressions, CatalystTypeConverters,
InternalRow, QualifiedTableName, SQLConfHelper}
import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
import org.apache.spark.sql.catalyst.analysis._
@@ -41,7 +41,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
import org.apache.spark.sql.catalyst.types.DataTypeUtils
import org.apache.spark.sql.catalyst.util.{GeneratedColumn, IdentityColumn,
ResolveDefaultColumns, V2ExpressionBuilder}
-import org.apache.spark.sql.classic.SparkSession
+import org.apache.spark.sql.classic.{SparkSession, Strategy}
import org.apache.spark.sql.connector.catalog.{SupportsRead, V1Table}
import org.apache.spark.sql.connector.catalog.TableCapability._
import org.apache.spark.sql.connector.expressions.{Expression => V2Expression,
NullOrdering, SortDirection, SortOrder => V2SortOrder, SortValue}
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index 0f22c23791a1..7291da248294 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.planning.ScanOperation
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.trees.TreePattern.{PLAN_EXPRESSION,
SCALAR_SUBQUERY}
import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.classic.Strategy
import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{DoubleType, FloatType, StructType}
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 1936a9aab0de..71a28d643954 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql.internal
import org.apache.spark.annotation.Unstable
-import org.apache.spark.sql.{DataSourceRegistration, ExperimentalMethods,
SparkSessionExtensions, Strategy, UDTFRegistration}
+import org.apache.spark.sql.{DataSourceRegistration, ExperimentalMethods,
SparkSessionExtensions, UDTFRegistration}
import org.apache.spark.sql.artifact.ArtifactManager
import org.apache.spark.sql.catalyst.analysis.{Analyzer,
EvalSubqueriesForTimeTravel, FunctionRegistry, InvokeProcedures,
ReplaceCharWithVarchar, ResolveDataSource, ResolveSessionCatalog,
ResolveTranspose, TableFunctionRegistry}
import org.apache.spark.sql.catalyst.analysis.resolver.ResolverExtension
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.optimizer.Optimizer
import org.apache.spark.sql.catalyst.parser.ParserInterface
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.classic.{SparkSession, StreamingQueryManager,
UDFRegistration}
+import org.apache.spark.sql.classic.{SparkSession, Strategy,
StreamingQueryManager, UDFRegistration}
import org.apache.spark.sql.connector.catalog.CatalogManager
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.execution.{ColumnarRule, CommandExecutionMode,
QueryExecution, SparkOptimizer, SparkPlanner, SparkSqlParser}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/ExtraStrategiesSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/ExtraStrategiesSuite.scala
index bec68fae0871..67885d07dddf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExtraStrategiesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExtraStrategiesSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
+import org.apache.spark.sql.classic.Strategy
import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan}
import org.apache.spark.sql.test.SharedSparkSession
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlannerSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlannerSuite.scala
index d5c8cabe5003..4090290aba0d 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlannerSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlannerSuite.scala
@@ -17,9 +17,9 @@
package org.apache.spark.sql.execution
-import org.apache.spark.sql.Strategy
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LocalRelation,
LogicalPlan, ReturnAnswer, Union}
+import org.apache.spark.sql.classic.Strategy
import org.apache.spark.sql.test.SharedSparkSession
class SparkPlannerSuite extends SharedSparkSession {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index ad28fd5176d9..8ddbd9af9d53 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -28,11 +28,12 @@ import org.apache.spark.SparkException
import org.apache.spark.rdd.RDD
import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent,
SparkListenerJobStart}
import org.apache.spark.shuffle.sort.SortShuffleManager
-import org.apache.spark.sql.{DataFrame, Dataset, QueryTest, Row, SparkSession,
Strategy}
+import org.apache.spark.sql.{DataFrame, Dataset, QueryTest, Row, SparkSession}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
+import org.apache.spark.sql.classic.Strategy
import org.apache.spark.sql.execution._
import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
import org.apache.spark.sql.execution.columnar.{InMemoryTableScanExec,
InMemoryTableScanLike}
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index ff2605b0b3b9..c5b1f3658edb 100644
---
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -24,14 +24,14 @@ import scala.util.control.NonFatal
import org.apache.hadoop.hive.ql.exec.{UDAF, UDF}
import org.apache.hadoop.hive.ql.udf.generic.{AbstractGenericUDAFResolver,
GenericUDF, GenericUDTF}
-import org.apache.spark.sql.{AnalysisException, Strategy}
+import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.{Analyzer,
EvalSubqueriesForTimeTravel, InvokeProcedures, ReplaceCharWithVarchar,
ResolveDataSource, ResolveSessionCatalog, ResolveTranspose}
import org.apache.spark.sql.catalyst.analysis.resolver.ResolverExtension
import org.apache.spark.sql.catalyst.catalog.{ExternalCatalogWithListener,
InvalidUDFClassException}
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.classic.SparkSession
+import org.apache.spark.sql.classic.{SparkSession, Strategy}
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.execution.SparkPlanner
import org.apache.spark.sql.execution.aggregate.ResolveEncodersInScalaAgg
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 73d0327e2bca..77364f67e223 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.planning._
import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable,
InsertIntoDir, InsertIntoStatement, LogicalPlan, ScriptTransformation,
Statistics, SubqueryAlias}
import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.classic.Strategy
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.execution._
import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils,
InsertIntoDataSourceDirCommand}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]