This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 800d3729c9c2 [SPARK-53035][CORE][SQL][K8S][MLLIB] Use `String.repeat` instead of Scala string multiplication 800d3729c9c2 is described below commit 800d3729c9c2c1b1bf2d4c326d1ade610a7f2ada Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Thu Jul 31 03:42:49 2025 -0700 [SPARK-53035][CORE][SQL][K8S][MLLIB] Use `String.repeat` instead of Scala string multiplication ### What changes were proposed in this pull request? This PR aims to use `String.repeat` (Java 11+) instead of Scala string multiplication with `*`. The focus of change is `main` source code. **BEFORE** ``` $ git grep '" \* ' | grep main | wc -l 21 ``` **AFTER** ``` $ git grep '" \* ' | grep main | wc -l 0 ``` ### Why are the changes needed? `String.repeat` is **significantly faster** than Scala implementation. ```scala scala> spark.time((" " * 500_000_000).length) Time taken: 1408 ms val res0: Int = 500000000 scala> spark.time((" ".repeat(500_000_000)).length) Time taken: 22 ms val res1: Int = 500000000 ``` ### Does this PR introduce _any_ user-facing change? No behavior change. ### How was this patch tested? Pass the existing CIs because this is the replacement. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51740 from dongjoon-hyun/SPARK-53035. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../main/scala/org/apache/spark/util/SparkStringUtils.scala | 2 +- .../apache/spark/deploy/history/HistoryServerArguments.scala | 2 +- .../scala/org/apache/spark/deploy/worker/DriverRunner.scala | 2 +- .../scala/org/apache/spark/deploy/worker/ExecutorRunner.scala | 2 +- core/src/main/scala/org/apache/spark/rdd/RDD.scala | 4 ++-- .../main/scala/org/apache/spark/ui/ConsoleProgressBar.scala | 2 +- mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala | 4 ++-- .../main/scala/org/apache/spark/mllib/tree/model/Node.scala | 2 +- .../scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala | 2 +- .../spark/sql/catalyst/expressions/codegen/CodeFormatter.scala | 4 ++-- .../org/apache/spark/sql/catalyst/util/ToNumberParser.scala | 10 +++++----- .../src/main/scala/org/apache/spark/sql/classic/Dataset.scala | 2 +- .../spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala | 2 +- .../main/scala/org/apache/spark/streaming/util/StateMap.scala | 2 +- 14 files changed, 21 insertions(+), 21 deletions(-) diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkStringUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkStringUtils.scala index dbe7d78937e9..12622b13d21f 100644 --- a/common/utils/src/main/scala/org/apache/spark/util/SparkStringUtils.scala +++ b/common/utils/src/main/scala/org/apache/spark/util/SparkStringUtils.scala @@ -60,7 +60,7 @@ private[spark] trait SparkStringUtils { val rightPadded = right ++ Seq.fill(math.max(left.size - right.size, 0))("") leftPadded.zip(rightPadded).map { case (l, r) => - (if (l == r) " " else "!") + l + (" " * ((maxLeftSize - l.length) + 3)) + r + (if (l == r) " " else "!") + l + " ".repeat((maxLeftSize - l.length) + 3) + r } } diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala index f1343a055138..14d6e4a0381a 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala @@ -83,7 +83,7 @@ private[history] class HistoryServerArguments(conf: SparkConf, args: Array[Strin configs.sortBy(_.key).foreach { conf => sb.append(" ").append(conf.key.padTo(maxConfigLength, ' ')) var currentDocLen = 0 - val intention = "\n" + " " * (maxConfigLength + 2) + val intention = "\n" + " ".repeat(maxConfigLength + 2) conf.doc.split("\\s+").foreach { word => if (currentDocLen + word.length > 60) { sb.append(intention).append(" ").append(word) diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala index 22e4c83440f6..e52bb3ae6d22 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala @@ -215,7 +215,7 @@ private[deploy] class DriverRunner( val stderr = new File(baseDir, "stderr") val redactedCommand = Utils.redactCommandLineArgs(conf, builder.command.asScala.toSeq) .mkString("\"", "\" \"", "\"") - val header = "Launch Command: %s\n%s\n\n".format(redactedCommand, "=" * 40) + val header = "Launch Command: %s\n%s\n\n".format(redactedCommand, "=".repeat(40)) Files.asCharSink(stderr, StandardCharsets.UTF_8, FileWriteMode.APPEND).write(header) CommandUtils.redirectStream(process.getErrorStream, stderr) } diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala index d21904dd16ea..1476bdd5c016 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala @@ -184,7 +184,7 @@ private[deploy] class ExecutorRunner( process = builder.start() val header = "Spark Executor Command: %s\n%s\n\n".format( - redactedCommand, "=" * 40) + redactedCommand, "=".repeat(40)) // Redirect its stdout and stderr to files val stdout = new File(executorDir, "stdout") diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 80db818b77e4..b3d412ed0494 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -2016,7 +2016,7 @@ abstract class RDD[T: ClassTag]( def firstDebugString(rdd: RDD[_]): Seq[String] = { val partitionStr = "(" + rdd.partitions.length + ")" val leftOffset = (partitionStr.length - 1) / 2 - val nextPrefix = (" " * leftOffset) + "|" + (" " * (partitionStr.length - leftOffset)) + val nextPrefix = " ".repeat(leftOffset) + "|" + " ".repeat(partitionStr.length - leftOffset) debugSelf(rdd).zipWithIndex.map{ case (desc: String, 0) => s"$partitionStr $desc" @@ -2030,7 +2030,7 @@ abstract class RDD[T: ClassTag]( val nextPrefix = ( thisPrefix + (if (isLastChild) " " else "| ") - + (" " * leftOffset) + "|" + (" " * (partitionStr.length - leftOffset))) + + (" ".repeat(leftOffset)) + "|" + (" ".repeat(partitionStr.length - leftOffset))) debugSelf(rdd).zipWithIndex.map{ case (desc: String, 0) => s"$thisPrefix+-$partitionStr $desc" diff --git a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala index 980cd6e541a2..0ce0f69b2070 100644 --- a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala +++ b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala @@ -105,7 +105,7 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging { */ private def clear(): Unit = { if (!lastProgressBar.isEmpty) { - console.printf(s"$CR${" " * TerminalWidth}$CR") + console.printf(s"$CR${" ".repeat(TerminalWidth)}$CR") lastProgressBar = "" } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala index 697d98953839..b68a5e079dc4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala @@ -134,7 +134,7 @@ class LeafNode private[ml] ( override private[tree] def numDescendants: Int = 0 override private[tree] def subtreeToString(indentFactor: Int = 0): String = { - val prefix: String = " " * indentFactor + val prefix: String = " ".repeat(indentFactor) prefix + s"Predict: $prediction\n" } @@ -212,7 +212,7 @@ class InternalNode private[ml] ( } override private[tree] def subtreeToString(indentFactor: Int = 0): String = { - val prefix: String = " " * indentFactor + val prefix: String = " ".repeat(indentFactor) prefix + s"If (${InternalNode.splitToString(split, left = true)})\n" + leftChild.subtreeToString(indentFactor + 1) + prefix + s"Else (${InternalNode.splitToString(split, left = false)})\n" + diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala index bac36ce6937b..a0eec8b2afb9 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala @@ -137,7 +137,7 @@ class Node @Since("1.2.0") ( } } } - val prefix: String = " " * indentFactor + val prefix: String = " ".repeat(indentFactor) if (isLeaf) { prefix + s"Predict: ${predict.predict}\n" } else { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala index c2fcfe179bbe..7f8bdb38b893 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala @@ -146,7 +146,7 @@ object KubernetesUtils extends Logging { @Since("3.0.0") def formatPairsBundle(pairs: Seq[(String, String)], indent: Int = 1) : String = { // Use more loggable format if value is null or empty - val indentStr = "\t" * indent + val indentStr = "\t".repeat(indent) pairs.map { case (k, v) => s"\n$indentStr $k: ${Option(v).filter(_.nonEmpty).getOrElse("N/A")}" }.mkString("") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala index 2ec31458270f..4fb180a72012 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -144,7 +144,7 @@ private class CodeFormatter { // Lines starting with '}' should be de-indented even if they contain '{' after; // in addition, lines ending with ':' are typically labels val thisLineIndent = if (line.startsWith("}") || line.startsWith(")") || line.endsWith(":")) { - " " * (indentSize * (indentLevel - 1)) + " ".repeat(indentSize * (indentLevel - 1)) } else { indentString } @@ -157,7 +157,7 @@ private class CodeFormatter { } code.append("\n") indentLevel = newIndentLevel - indentString = " " * (indentSize * newIndentLevel) + indentString = " ".repeat(indentSize * newIndentLevel) currentLine += 1 } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala index b66658467c1b..ea2f48fafc0d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala @@ -619,7 +619,7 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali // digits as the scale. This is necessary because we must determine the scale from the format // string alone but each input string may include a variable number of digits after the decimal // point. - val extraZeros = "0" * (scale - parsedAfterDecimalPoint.length) + val extraZeros = "0".repeat(scale - parsedAfterDecimalPoint.length) val afterDecimalPadded = parsedAfterDecimalPoint.toString + extraZeros val prefix = if (negateResult) "-" else "" val suffix = if (afterDecimalPadded.nonEmpty) "." + afterDecimalPadded else "" @@ -763,11 +763,11 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali // than specified in the format string, this is an overflow. if (numFormatDigitsBeforeDecimalPoint < beforeDecimalPoint.length || numFormatDigitsAfterDecimalPoint < afterDecimalPoint.length) { - beforeDecimalPoint = "#" * numFormatDigitsBeforeDecimalPoint - afterDecimalPoint = "#" * numFormatDigitsAfterDecimalPoint + beforeDecimalPoint = "#".repeat(numFormatDigitsBeforeDecimalPoint) + afterDecimalPoint = "#".repeat(numFormatDigitsAfterDecimalPoint) } - val leadingSpaces = " " * (numFormatDigitsBeforeDecimalPoint - beforeDecimalPoint.length) - val trailingZeros = "0" * (numFormatDigitsAfterDecimalPoint - afterDecimalPoint.length) + val leadingSpaces = " ".repeat(numFormatDigitsBeforeDecimalPoint - beforeDecimalPoint.length) + val trailingZeros = "0".repeat(numFormatDigitsAfterDecimalPoint - afterDecimalPoint.length) (leadingSpaces + beforeDecimalPoint, afterDecimalPoint + trailingZeros) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/Dataset.scala index 8327d8181619..940517617425 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/classic/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/Dataset.scala @@ -404,7 +404,7 @@ class Dataset[T] private[sql]( } // Create SeparateLine - val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString() + val sep: String = colWidths.map("-".repeat(_)).addString(sb, "+", "+", "+\n").toString() // column names paddedRows.head.addString(sb, "|", "|", "|\n") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index 9354f03297c5..00376670956d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -488,7 +488,7 @@ case class AdaptiveSparkPlanExec( maxFields: Int, printNodeId: Boolean, printOutputColumns: Boolean): Unit = { - append(" " * depth) + append(" ".repeat(depth)) append(s"+- == $header ==\n") plan.generateTreeString( 0, diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala index c125bd13cd35..674a53b1cdf5 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala @@ -202,7 +202,7 @@ private[streaming] class OpenHashMapBasedStateMap[K, S]( /** Get all the data of this map as string formatted as a tree based on the delta depth */ override def toDebugString(): String = { val tabs = if (deltaChainLength > 0) { - (" " * (deltaChainLength - 1)) + "+--- " + (" ".repeat(deltaChainLength - 1)) + "+--- " } else "" parentStateMap.toDebugString() + "\n" + deltaMap.iterator.mkString(tabs, "\n" + tabs, "") } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org