This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b968d2a50596 [SPARK-53177][K8S] Use Java `Base64` instead of `com.google.common.io.BaseEncoding` b968d2a50596 is described below commit b968d2a50596d67c374c71ba2def77a06d34484c Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Thu Aug 7 10:23:51 2025 -0700 [SPARK-53177][K8S] Use Java `Base64` instead of `com.google.common.io.BaseEncoding` ### What changes were proposed in this pull request? This PR aims to use Java `Base64` instead of `com.google.common.io.BaseEncoding`. In addition, new Scalastyle and Checkstyle rules are added to ban `com.google.common.io.BaseEncoding` in order to prevent a future regression. ### Why are the changes needed? Java implementation is **over 2x faster** than Google one. ```scala scala> val s = "a".repeat(5_000_000).getBytes(java.nio.charset.StandardCharsets.UTF_8) scala> spark.time(java.util.Base64.getDecoder().decode(java.util.Base64.getEncoder().encodeToString(s)).length) Time taken: 18 ms val res0: Int = 5000000 scala> spark.time(com.google.common.io.BaseEncoding.base64().decode(com.google.common.io.BaseEncoding.base64().encode(s)).length) Time taken: 50 ms val res1: Int = 5000000 ``` ### Does this PR introduce _any_ user-facing change? No behavior change. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51904 from dongjoon-hyun/SPARK-53177. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- dev/checkstyle.xml | 1 + .../k8s/features/DriverKubernetesCredentialsFeatureStep.scala | 6 +++--- .../k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala | 4 ++-- scalastyle-config.xml | 5 +++++ 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml index dea1ff7d0d77..7d34afc99345 100644 --- a/dev/checkstyle.xml +++ b/dev/checkstyle.xml @@ -191,6 +191,7 @@ <property name="illegalClasses" value="org.apache.commons.lang3.SystemUtils" /> <property name="illegalClasses" value="org.apache.hadoop.io.IOUtils" /> <property name="illegalClasses" value="com.google.common.base.Strings" /> + <property name="illegalClasses" value="com.google.common.io.BaseEncoding" /> </module> <module name="RegexpSinglelineJava"> <property name="format" value="Charset\.defaultCharset"/> diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala index 2941d3e9f9e7..83f8ed01774d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala @@ -19,10 +19,10 @@ package org.apache.spark.deploy.k8s.features import java.io.File import java.nio.charset.StandardCharsets import java.nio.file.Files +import java.util.Base64 import scala.jdk.CollectionConverters._ -import com.google.common.io.BaseEncoding import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder, Secret, SecretBuilder} import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod} @@ -48,7 +48,7 @@ private[spark] class DriverKubernetesCredentialsFeatureStep(kubernetesConf: Kube private val oauthTokenBase64 = kubernetesConf .getOption(s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX") .map { token => - BaseEncoding.base64().encode(token.getBytes(StandardCharsets.UTF_8)) + Base64.getEncoder().encodeToString(token.getBytes(StandardCharsets.UTF_8)) } private val caCertDataBase64 = safeFileConfToBase64( @@ -154,7 +154,7 @@ private[spark] class DriverKubernetesCredentialsFeatureStep(kubernetesConf: Kube .map { file => require(file.isFile, String.format("%s provided at %s does not exist or is not a file.", fileType, file.getAbsolutePath)) - BaseEncoding.base64().encode(Files.readAllBytes(file.toPath)) + Base64.getEncoder().encodeToString(Files.readAllBytes(file.toPath)) } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala index f12b10a8fddc..33982e929888 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala @@ -19,10 +19,10 @@ package org.apache.spark.deploy.k8s.features import java.io.File import java.nio.charset.StandardCharsets import java.nio.file.Files +import java.util.Base64 import scala.jdk.CollectionConverters._ -import com.google.common.io.BaseEncoding import io.fabric8.kubernetes.api.model.Secret import org.apache.spark.{SparkConf, SparkFunSuite} @@ -107,7 +107,7 @@ class DriverKubernetesCredentialsFeatureStepSuite extends SparkFunSuite { assert(credentialsSecret.getMetadata.getName === s"${kubernetesConf.resourceNamePrefix}-kubernetes-credentials") val decodedSecretData = credentialsSecret.getData.asScala.map { data => - (data._1, new String(BaseEncoding.base64().decode(data._2), StandardCharsets.UTF_8)) + (data._1, new String(Base64.getDecoder().decode(data._2), StandardCharsets.UTF_8)) } val expectedSecretData = Map( DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME -> "ca-cert", diff --git a/scalastyle-config.xml b/scalastyle-config.xml index 3abcdc79d19b..75794f0bbc1a 100644 --- a/scalastyle-config.xml +++ b/scalastyle-config.xml @@ -731,4 +731,9 @@ This file is divided into 3 sections: <parameters><parameter name="regex">org\.apache\.commons\.collections4\.MapUtils\b</parameter></parameters> <customMessage>Use org.apache.spark.util.collection.Utils instead.</customMessage> </check> + + <check customId="maputils" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> + <parameters><parameter name="regex">com\.google\.common\.io\.BaseEncoding\b</parameter></parameters> + <customMessage>Use Java APIs (like java.util.Base64) instead.</customMessage> + </check> </scalastyle> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org