This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 87016ce2dd53 [SPARK-53092][CORE][CONNECT] Ban `org.apache.commons.lang3.SystemUtils` 87016ce2dd53 is described below commit 87016ce2dd5357bc9b6a803f7f18b5a8128c17f5 Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Mon Aug 4 08:24:09 2025 -0700 [SPARK-53092][CORE][CONNECT] Ban `org.apache.commons.lang3.SystemUtils` ### What changes were proposed in this pull request? This PR aims to ban `org.apache.commons.lang3.SystemUtils`. ### Why are the changes needed? To use `SparkSystemUtils` and `Utils` consistently. ### Does this PR introduce _any_ user-facing change? No behavior changes. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51807 from dongjoon-hyun/SPARK-53092. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../main/scala/org/apache/spark/util/SparkSystemUtils.scala | 10 ++++++++++ core/src/main/scala/org/apache/spark/SparkContext.scala | 8 ++++---- core/src/main/scala/org/apache/spark/executor/Executor.scala | 8 ++++---- core/src/main/scala/org/apache/spark/util/SizeEstimator.scala | 3 ++- core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala | 4 +--- .../test/scala/org/apache/spark/benchmark/BenchmarkBase.scala | 2 +- .../scala/org/apache/spark/storage/BlockManagerSuite.scala | 2 +- .../test/scala/org/apache/spark/storage/MemoryStoreSuite.scala | 2 +- .../test/scala/org/apache/spark/util/SizeEstimatorSuite.scala | 3 ++- dev/checkstyle.xml | 1 + scalastyle-config.xml | 5 +++++ .../org/apache/spark/sql/connect/ClientE2ETestSuite.scala | 4 ++-- .../org/apache/spark/sql/connect/SQLImplicitsTestSuite.scala | 4 ++-- .../apache/spark/sql/connect/client/SparkConnectClient.scala | 10 +++++----- 14 files changed, 41 insertions(+), 25 deletions(-) diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkSystemUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkSystemUtils.scala index c855d6bace73..305cc55282bb 100644 --- a/common/utils/src/main/scala/org/apache/spark/util/SparkSystemUtils.scala +++ b/common/utils/src/main/scala/org/apache/spark/util/SparkSystemUtils.scala @@ -22,11 +22,21 @@ private[spark] trait SparkSystemUtils { */ val osName = System.getProperty("os.name") + /** + * The `os.version` system property. + */ + val osVersion = System.getProperty("os.version") + /** * The `os.arch` system property. */ val osArch = System.getProperty("os.arch") + /** + * The `java.version` system property. + */ + val javaVersion = Runtime.version.toString + /** * Whether the underlying operating system is Windows. */ diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index ddb327da5f19..23dda06326d7 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -201,10 +201,10 @@ class SparkContext(config: SparkConf) extends Logging { // log out Spark Version in Spark driver log logInfo(log"Running Spark version ${MDC(LogKeys.SPARK_VERSION, SPARK_VERSION)}") - logInfo(log"OS info ${MDC(LogKeys.OS_NAME, System.getProperty("os.name"))}," + - log" ${MDC(LogKeys.OS_VERSION, System.getProperty("os.version"))}, " + - log"${MDC(LogKeys.OS_ARCH, System.getProperty("os.arch"))}") - logInfo(log"Java version ${MDC(LogKeys.JAVA_VERSION, System.getProperty("java.version"))}") + logInfo(log"OS info ${MDC(LogKeys.OS_NAME, Utils.osName)}," + + log" ${MDC(LogKeys.OS_VERSION, Utils.osVersion)}, " + + log"${MDC(LogKeys.OS_ARCH, Utils.osArch)}") + logInfo(log"Java version ${MDC(LogKeys.JAVA_VERSION, Utils.javaVersion)}") /* ------------------------------------------------------------------------------------- * | Private variables. These variables keep the internal state of the context, and are | diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala index 3b066e15386b..4fccd2115eb3 100644 --- a/core/src/main/scala/org/apache/spark/executor/Executor.scala +++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala @@ -84,10 +84,10 @@ private[spark] class Executor( logInfo(log"Starting executor ID ${LogMDC(LogKeys.EXECUTOR_ID, executorId)}" + log" on host ${LogMDC(HOST, executorHostname)}") - logInfo(log"OS info ${LogMDC(OS_NAME, System.getProperty("os.name"))}," + - log" ${LogMDC(OS_VERSION, System.getProperty("os.version"))}, " + - log"${LogMDC(OS_ARCH, System.getProperty("os.arch"))}") - logInfo(log"Java version ${LogMDC(JAVA_VERSION, System.getProperty("java.version"))}") + logInfo(log"OS info ${LogMDC(OS_NAME, Utils.osName)}," + + log" ${LogMDC(OS_VERSION, Utils.osVersion)}, " + + log"${LogMDC(OS_ARCH, Utils.osArch)}") + logInfo(log"Java version ${LogMDC(JAVA_VERSION, Utils.javaVersion)}") private val executorShutdown = new AtomicBoolean(false) val stopHookReference = ShutdownHookManager.addShutdownHook( diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala index 88fe64859a21..12a417fee3a2 100644 --- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala +++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala @@ -29,6 +29,7 @@ import com.google.common.collect.MapMaker import org.apache.spark.annotation.DeveloperApi import org.apache.spark.internal.Logging import org.apache.spark.internal.config.Tests.TEST_USE_COMPRESSED_OOPS_KEY +import org.apache.spark.util.Utils import org.apache.spark.util.collection.OpenHashSet /** @@ -106,7 +107,7 @@ object SizeEstimator extends Logging { // Sets object size, pointer size based on architecture and CompressedOops settings // from the JVM. private def initialize(): Unit = { - val arch = System.getProperty("os.arch") + val arch = Utils.osArch is64bit = arch.contains("64") || arch.contains("s390x") isCompressedOops = getIsCompressedOops diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala index a2cdf033308a..87db697b2a51 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala @@ -233,8 +233,6 @@ private[spark] object Benchmark { def getJVMOSInfo(): String = { val vmName = System.getProperty("java.vm.name") val runtimeVersion = System.getProperty("java.runtime.version") - val osName = System.getProperty("os.name") - val osVersion = System.getProperty("os.version") - s"${vmName} ${runtimeVersion} on ${osName} ${osVersion}" + s"${vmName} ${runtimeVersion} on ${Utils.osName} ${Utils.osVersion}" } } diff --git a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala index 5432c5121b94..cd2a04249827 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala @@ -51,7 +51,7 @@ abstract class BenchmarkBase { System.setProperty(IS_TESTING.key, "true") val regenerateBenchmarkFiles: Boolean = System.getenv("SPARK_GENERATE_BENCHMARK_FILES") == "1" if (regenerateBenchmarkFiles) { - val version = System.getProperty("java.version").split("\\D+")(0).toInt + val version = Utils.javaVersion.split("\\D+")(0).toInt val jdkString = if (version > 17) s"-jdk$version" else "" val resultFileName = s"${this.getClass.getSimpleName.replace("$", "")}$jdkString$suffix-results.txt" diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala index b373e295d573..7fd080823c4d 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala @@ -152,7 +152,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with PrivateMethodTe } // Save modified system properties so that we can restore them after tests. - val originalArch = System.getProperty("os.arch") + val originalArch = Utils.osArch val originalCompressedOops = System.getProperty(TEST_USE_COMPRESSED_OOPS_KEY) def reinitializeSizeEstimator(arch: String, useCompressedOops: String): Unit = { diff --git a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala index 0cf3348235c8..ab8c465074f1 100644 --- a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala @@ -58,7 +58,7 @@ class MemoryStoreSuite def rdd(rddId: Int, splitId: Int): RDDBlockId = RDDBlockId(rddId, splitId) // Save modified system properties so that we can restore them after tests. - val originalArch = System.getProperty("os.arch") + val originalArch = Utils.osArch val originalCompressedOops = System.getProperty(TEST_USE_COMPRESSED_OOPS_KEY) def reinitializeSizeEstimator(arch: String, useCompressedOops: String): Unit = { diff --git a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala index d669f2c655ab..fe86e4988d4b 100644 --- a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala @@ -23,6 +23,7 @@ import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester} import org.apache.spark.SparkFunSuite import org.apache.spark.internal.config.Tests.TEST_USE_COMPRESSED_OOPS_KEY +import org.apache.spark.util.Utils class DummyClass1 {} @@ -74,7 +75,7 @@ class SizeEstimatorSuite with ResetSystemProperties { // Save modified system properties so that we can restore them after tests. - val originalArch = System.getProperty("os.arch") + val originalArch = Utils.osArch val originalCompressedOops = System.getProperty(TEST_USE_COMPRESSED_OOPS_KEY) def reinitializeSizeEstimator(arch: String, useCompressedOops: String): Unit = { diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml index 34082a9aed05..91c7af63a9b6 100644 --- a/dev/checkstyle.xml +++ b/dev/checkstyle.xml @@ -186,6 +186,7 @@ <property name="illegalPkgs" value="org.apache.commons.lang3.tuple" /> <property name="illegalClasses" value="org.apache.commons.lang3.JavaVersion" /> <property name="illegalClasses" value="org.apache.commons.lang3.Strings" /> + <property name="illegalClasses" value="org.apache.commons.lang3.SystemUtils" /> <property name="illegalClasses" value="org.apache.hadoop.io.IOUtils" /> <property name="illegalClasses" value="com.google.common.base.Strings" /> </module> diff --git a/scalastyle-config.xml b/scalastyle-config.xml index 7b6b65a22810..0a1e48551554 100644 --- a/scalastyle-config.xml +++ b/scalastyle-config.xml @@ -399,6 +399,11 @@ This file is divided into 3 sections: <customMessage>Use Utils.strip method instead</customMessage> </check> + <check customId="commonslang3systemutils" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> + <parameters><parameter name="regex">org\.apache\.commons\.lang3\.SystemUtils\b</parameter></parameters> + <customMessage>Use SparkSystemUtils or Utils instead</customMessage> + </check> + <check customId="commonstextstringsubstitutor" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> <parameters><parameter name="regex">org\.apache\.commons\.text\.StringSubstitutor\b</parameter></parameters> <customMessage>Use org.apache.spark.StringSubstitutor instead</customMessage> diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala index 338d532ce2df..24a1d091af2f 100644 --- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala +++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala @@ -45,7 +45,7 @@ import org.apache.spark.sql.connect.test.SparkConnectServerUtils.port import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SqlApiConf import org.apache.spark.sql.types._ -import org.apache.spark.util.{SparkFileUtils, SparkThreadUtils} +import org.apache.spark.util.{SparkFileUtils, SparkSystemUtils, SparkThreadUtils} class ClientE2ETestSuite extends QueryTest @@ -227,7 +227,7 @@ class ClientE2ETestSuite } test("spark deep recursion") { - var recursionDepth = if (System.getProperty("os.arch") == "s390x") 400 else 500 + var recursionDepth = if (SparkSystemUtils.osArch == "s390x") 400 else 500 var df = spark.range(1) for (a <- 1 to recursionDepth) { df = df.union(spark.range(a, a + 1)) diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SQLImplicitsTestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SQLImplicitsTestSuite.scala index c7b4748f1222..547d5ca7804a 100644 --- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SQLImplicitsTestSuite.scala +++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/SQLImplicitsTestSuite.scala @@ -23,7 +23,6 @@ import java.util.concurrent.atomic.AtomicLong import io.grpc.inprocess.InProcessChannelBuilder import org.apache.arrow.memory.RootAllocator -import org.apache.commons.lang3.SystemUtils import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.{Column, Encoder, SaveMode} @@ -31,6 +30,7 @@ import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.agnosticEncoderFo import org.apache.spark.sql.connect.client.SparkConnectClient import org.apache.spark.sql.connect.client.arrow.{ArrowDeserializers, ArrowSerializer} import org.apache.spark.sql.connect.test.ConnectFunSuite +import org.apache.spark.util.SparkSystemUtils /** * Test suite for SQL implicits. @@ -173,7 +173,7 @@ class SQLImplicitsTestSuite extends ConnectFunSuite with BeforeAndAfterAll { // Spark always converts them to microseconds, this will cause the // test fail when using Java 17 on Linux, so add `truncatedTo(ChronoUnit.MICROS)` when // testing on Linux using Java 17 to ensure the accuracy of input data is microseconds. - if (SystemUtils.IS_OS_LINUX) { + if (SparkSystemUtils.isLinux) { testImplicit(LocalDateTime.now().truncatedTo(ChronoUnit.MICROS)) testImplicit(Instant.now().truncatedTo(ChronoUnit.MICROS)) testImplicit(Timestamp.from(Instant.now().truncatedTo(ChronoUnit.MICROS))) diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala index e844237a3bb4..48f01a8042a6 100644 --- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala +++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala @@ -33,6 +33,7 @@ import org.apache.spark.connect.proto import org.apache.spark.connect.proto.UserContext import org.apache.spark.sql.connect.common.ProtoUtils import org.apache.spark.sql.connect.common.config.ConnectCommon +import org.apache.spark.util.SparkSystemUtils /** * Conceptually the remote spark session that communicates with the server. @@ -707,12 +708,11 @@ object SparkConnectClient { */ private def genUserAgent(value: String): String = { val scalaVersion = Properties.versionNumberString - val jvmVersion = System.getProperty("java.version").split("_")(0) + val jvmVersion = SparkSystemUtils.javaVersion.split("_")(0) val osName = { - val os = System.getProperty("os.name").toLowerCase(Locale.ROOT) - if (os.contains("mac")) "darwin" - else if (os.contains("linux")) "linux" - else if (os.contains("win")) "windows" + if (SparkSystemUtils.isMac) "darwin" + else if (SparkSystemUtils.isLinux) "linux" + else if (SparkSystemUtils.isWindows) "windows" else "unknown" } List( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org