This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new cc2cc29 [SPARK-34275][CORE][SQL][MLLIB][3.0] Replaces filter and size with count cc2cc29 is described below commit cc2cc29f5161bfeb9da58ad3d77c4f073a481ab5 Author: yangjie01 <yangji...@baidu.com> AuthorDate: Thu Jan 28 19:00:37 2021 +0900 [SPARK-34275][CORE][SQL][MLLIB][3.0] Replaces filter and size with count ### What changes were proposed in this pull request? Use `count` to simplify `find + size(or length)` operation, it's semantically consistent, but looks simpler. **Before** ``` seq.filter(p).size ``` **After** ``` seq.count(p) ``` ### Why are the changes needed? Code Simpilefications. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass the Jenkins or GitHub Action Closes #31375 from LuciferYang/SPARK-34275-30. Authored-by: yangjie01 <yangji...@baidu.com> Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- .../scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala | 2 +- core/src/test/scala/org/apache/spark/SparkContextSuite.scala | 4 ++-- .../scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala | 4 ++-- .../scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala b/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala index 3664d3f..b1af91a 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala @@ -167,7 +167,7 @@ private[spark] class ExecutorMonitor( def pendingRemovalCount: Int = executors.asScala.count { case (_, exec) => exec.pendingRemoval } def pendingRemovalCountPerResourceProfileId(id: Int): Int = { - executors.asScala.filter { case (k, v) => v.resourceProfileId == id && v.pendingRemoval }.size + executors.asScala.count { case (k, v) => v.resourceProfileId == id && v.pendingRemoval } } override def onJobStart(event: SparkListenerJobStart): Unit = { diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala index 59611d8..7b2431a 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala @@ -154,7 +154,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu } x }).count() - assert(sc.listFiles().filter(_.contains("somesuffix1")).size == 1) + assert(sc.listFiles().count(_.contains("somesuffix1")) == 1) } finally { sc.stop() } @@ -166,7 +166,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu try { sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")) sc.addJar(jarPath.toString) - assert(sc.listJars().filter(_.contains("TestUDTF.jar")).size == 1) + assert(sc.listJars().count(_.contains("TestUDTF.jar")) == 1) } finally { sc.stop() } diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala index af76f04..6742d61 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala @@ -80,10 +80,10 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest { } def validatePrediction(predictionAndLabels: Seq[Row]): Unit = { - val numOfErrorPredictions = predictionAndLabels.filter { + val numOfErrorPredictions = predictionAndLabels.count { case Row(prediction: Double, label: Double) => prediction != label - }.length + } // At least 80% of the predictions should be on. assert(numOfErrorPredictions < predictionAndLabels.length / 5) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala index f2b608b..54fc090 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala @@ -147,9 +147,9 @@ class TypedImperativeAggregateSuite extends QueryTest with SharedSparkSession { val query = df.select(typedMax($"key"), count($"key"), typedMax($"value"), count($"value")) val maxKey = nullableData.map(_._1).filter(_ != null).max - val countKey = nullableData.map(_._1).filter(_ != null).size + val countKey = nullableData.map(_._1).count(_ != null) val maxValue = nullableData.map(_._2).filter(_ != null).max - val countValue = nullableData.map(_._2).filter(_ != null).size + val countValue = nullableData.map(_._2).count(_ != null) val expected = Seq(Row(maxKey, countKey, maxValue, countValue)) checkAnswer(query, expected) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org