This is an automated email from the ASF dual-hosted git repository.
yamamuro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 0b237bd [SPARK-31292][CORE][SQL] Replace toSet.toSeq with distinct
for readability
0b237bd is described below
commit 0b237bd615da4b2c2b781e72af4ad3a4f2951444
Author: Kengo Seki <[email protected]>
AuthorDate: Sun Mar 29 08:48:08 2020 +0900
[SPARK-31292][CORE][SQL] Replace toSet.toSeq with distinct for readability
### What changes were proposed in this pull request?
This PR replaces the method calls of `toSet.toSeq` with `distinct`.
### Why are the changes needed?
`toSet.toSeq` is intended to make its elements unique but a bit verbose.
Using `distinct` instead is easier to understand and improves readability.
### Does this PR introduce any user-facing change?
No
### How was this patch tested?
Tested with the existing unit tests and found no problem.
Closes #28062 from sekikn/SPARK-31292.
Authored-by: Kengo Seki <[email protected]>
Signed-off-by: Takeshi Yamamuro <[email protected]>
---
core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala | 2 +-
core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala | 2 +-
core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala | 2 +-
core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala | 2 +-
.../test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala | 2 +-
sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 2 +-
6 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
index 36ef906..162f090 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
@@ -150,7 +150,7 @@ private[spark] object ResourceUtils extends Logging {
def listResourceIds(sparkConf: SparkConf, componentName: String):
Seq[ResourceID] = {
sparkConf.getAllWithPrefix(s"$componentName.$RESOURCE_PREFIX.").map { case
(key, _) =>
key.substring(0, key.indexOf('.'))
- }.toSet.toSeq.map(name => new ResourceID(componentName, name))
+ }.distinct.map(name => new ResourceID(componentName, name))
}
def parseAllResourceRequests(
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
index 857c89d..15f2161 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
@@ -69,7 +69,7 @@ private[spark] class ResultTask[T, U](
with Serializable {
@transient private[this] val preferredLocs: Seq[TaskLocation] = {
- if (locs == null) Nil else locs.toSet.toSeq
+ if (locs == null) Nil else locs.distinct
}
override def runTask(context: TaskContext): U = {
diff --git
a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 4c0c30a..a0ba920 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -71,7 +71,7 @@ private[spark] class ShuffleMapTask(
}
@transient private val preferredLocs: Seq[TaskLocation] = {
- if (locs == null) Nil else locs.toSet.toSeq
+ if (locs == null) Nil else locs.distinct
}
override def runTask(context: TaskContext): MapStatus = {
diff --git
a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 7e2fbb4..f0f84fe 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -487,7 +487,7 @@ private[spark] class TaskSchedulerImpl(
newExecAvail = true
}
}
- val hosts = offers.map(_.host).toSet.toSeq
+ val hosts = offers.map(_.host).distinct
for ((host, Some(rack)) <- hosts.zip(getRacksForHosts(hosts))) {
hostsByRack.getOrElseUpdate(rack, new HashSet[String]()) += host
}
diff --git
a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index 9ee84a8..b9a11e7 100644
---
a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++
b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -761,7 +761,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with
LocalSparkContext with B
// that are explicitly blacklisted, plus those that have *any*
executors blacklisted.
val nodesForBlacklistedExecutors = offers.filter { offer =>
execBlacklist.contains(offer.executorId)
- }.map(_.host).toSet.toSeq
+ }.map(_.host).distinct
val nodesWithAnyBlacklisting = (nodeBlacklist ++
nodesForBlacklistedExecutors).toSet
// Similarly, figure out which executors have any blacklisting. This
means all executors
// that are explicitly blacklisted, plus all executors on nodes that
are blacklisted.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index e3c6388..e1e3e8e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2455,7 +2455,7 @@ class Dataset[T] private[sql](
def dropDuplicates(colNames: Seq[String]): Dataset[T] = withTypedPlan {
val resolver = sparkSession.sessionState.analyzer.resolver
val allColumns = queryExecution.analyzed.output
- val groupCols = colNames.toSet.toSeq.flatMap { (colName: String) =>
+ val groupCols = colNames.distinct.flatMap { (colName: String) =>
// It is possibly there are more than one columns with the same name,
// so we call filter instead of find.
val cols = allColumns.filter(col => resolver(col.name, colName))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]