ueshin commented on code in PR #47349:
URL: https://github.com/apache/spark/pull/47349#discussion_r1678189190
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala:
##########
@@ -641,14 +641,19 @@ object SQLConf {
.checkValue(_ > 0, "The value of spark.sql.leafNodeDefaultParallelism must
be positive.")
.createOptional
+ // Lazily get the number of cores to make sure SparkContext created first.
+ private lazy val defaultShufflePartition: Option[Int] =
SparkContext.getActive.flatMap { sc =>
+ if (sc.isLocal) Some(SparkContext.numDriverCores(sc.master)) else None
+ }
+
val SHUFFLE_PARTITIONS = buildConf("spark.sql.shuffle.partitions")
.doc("The default number of partitions to use when shuffling data for
joins or aggregations. " +
"Note: For structured streaming, this configuration cannot be changed
between query " +
"restarts from the same checkpoint location.")
.version("1.1.0")
.intConf
.checkValue(_ > 0, "The value of spark.sql.shuffle.partitions must be
positive")
- .createWithDefault(200)
+ .createWithDefaultFunction(() => defaultShufflePartition.getOrElse(200))
Review Comment:
Will this change also affect Spark Connect local mode?
E.g.,
```
$ ./bin/pyspark --remote local
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]