This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 605aa0c299c [SPARK-45687][CORE][SQL][ML][MLLIB][KUBERNETES][EXAMPLES][CONNECT][STRUCTURED STREAMING] Fix `Passing an explicit array value to a Scala varargs method is deprecated` 605aa0c299c is described below commit 605aa0c299c1d88f8a31ba888ac8e6b6203be6c5 Author: Tengfei Huang <tengfe...@gmail.com> AuthorDate: Fri Nov 10 08:10:20 2023 -0600 [SPARK-45687][CORE][SQL][ML][MLLIB][KUBERNETES][EXAMPLES][CONNECT][STRUCTURED STREAMING] Fix `Passing an explicit array value to a Scala varargs method is deprecated` ### What changes were proposed in this pull request? Fix the deprecated behavior below: `Passing an explicit array value to a Scala varargs method is deprecated (since 2.13.0) and will result in a defensive copy; Use the more efficient non-copying ArraySeq.unsafeWrapArray or an explicit toIndexedSeq call` For all the use cases, we don't need to make a copy of the array. Explicitly use `ArraySeq.unsafeWrapArray` to do the conversion. ### Why are the changes needed? Eliminate compile warnings and no longer use deprecated scala APIs. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GA. Fixed all the warning with build: `mvn clean package -DskipTests -Pspark-ganglia-lgpl -Pkinesis-asl -Pdocker-integration-tests -Pyarn -Pkubernetes -Pkubernetes-integration-tests -Phive-thriftserver -Phadoop-cloud` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43642 from ivoson/SPARK-45687. Authored-by: Tengfei Huang <tengfe...@gmail.com> Signed-off-by: Sean Owen <sro...@gmail.com> --- .../scala/org/apache/spark/sql/KeyValueGroupedDataset.scala | 9 ++++++--- .../test/scala/org/apache/spark/sql/ColumnTestSuite.scala | 3 ++- .../apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala | 5 ++++- .../spark/sql/connect/planner/SparkConnectPlanner.scala | 3 ++- .../main/scala/org/apache/spark/api/python/PythonRDD.scala | 3 ++- core/src/main/scala/org/apache/spark/executor/Executor.scala | 3 ++- core/src/main/scala/org/apache/spark/rdd/RDD.scala | 3 ++- .../scala/org/apache/spark/examples/graphx/Analytics.scala | 4 ++-- .../scala/org/apache/spark/ml/classification/OneVsRest.scala | 3 ++- .../scala/org/apache/spark/ml/feature/FeatureHasher.scala | 4 +++- .../src/main/scala/org/apache/spark/ml/feature/Imputer.scala | 8 +++++--- .../main/scala/org/apache/spark/ml/feature/Interaction.scala | 4 +++- .../main/scala/org/apache/spark/ml/feature/RFormula.scala | 6 ++++-- .../scala/org/apache/spark/ml/feature/VectorAssembler.scala | 5 +++-- mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala | 3 ++- .../src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala | 3 ++- .../src/main/scala/org/apache/spark/ml/r/KSTestWrapper.scala | 3 ++- .../apache/spark/ml/regression/DecisionTreeRegressor.scala | 3 ++- .../src/main/scala/org/apache/spark/ml/tree/treeModels.scala | 3 ++- .../src/main/scala/org/apache/spark/mllib/util/MLUtils.scala | 12 ++++++++---- .../scala/org/apache/spark/ml/feature/ImputerSuite.scala | 12 ++++++++---- .../apache/spark/ml/source/image/ImageFileFormatSuite.scala | 3 ++- .../apache/spark/ml/stat/KolmogorovSmirnovTestSuite.scala | 3 ++- mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala | 6 ++++-- .../deploy/k8s/features/DriverCommandFeatureStepSuite.scala | 2 +- .../apache/spark/sql/catalyst/expressions/generators.scala | 8 ++++++-- .../sql/catalyst/expressions/UnsafeRowConverterSuite.scala | 4 +++- .../scala/org/apache/spark/sql/DataFrameStatFunctions.scala | 3 ++- .../scala/org/apache/spark/sql/KeyValueGroupedDataset.scala | 8 ++++++-- .../spark/sql/execution/datasources/jdbc/JDBCRDD.scala | 2 +- .../org/apache/spark/sql/execution/stat/StatFunctions.scala | 3 ++- .../apache/spark/sql/execution/streaming/OffsetSeqLog.scala | 3 ++- .../streaming/continuous/ContinuousRateStreamSource.scala | 3 ++- .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 3 ++- .../src/test/scala/org/apache/spark/sql/DatasetSuite.scala | 6 ++++-- .../src/test/scala/org/apache/spark/sql/GenTPCDSData.scala | 3 ++- .../test/scala/org/apache/spark/sql/ParametersSuite.scala | 9 +++++---- .../spark/sql/connector/SimpleWritableDataSource.scala | 4 +++- .../sql/execution/datasources/FileMetadataStructSuite.scala | 3 ++- .../spark/sql/execution/datasources/csv/CSVBenchmark.scala | 7 ++++--- .../scala/org/apache/spark/sql/streaming/StreamSuite.scala | 2 +- .../org/apache/spark/sql/streaming/StreamingQuerySuite.scala | 3 ++- .../org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 3 ++- .../spark/sql/hive/execution/AggregationQuerySuite.scala | 4 +++- .../spark/sql/hive/execution/ObjectHashAggregateSuite.scala | 8 +++++--- 45 files changed, 136 insertions(+), 69 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala index dac89bf3eb5..2e6117abbf3 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala @@ -194,7 +194,9 @@ class KeyValueGroupedDataset[K, V] private[sql] () extends Serializable { SortExprs: Array[Column], f: FlatMapGroupsFunction[K, V, U], encoder: Encoder[U]): Dataset[U] = { - flatMapSortedGroups(SortExprs: _*)(UdfUtils.flatMapGroupsFuncToScalaFunc(f))(encoder) + import org.apache.spark.util.ArrayImplicits._ + flatMapSortedGroups(SortExprs.toImmutableArraySeq: _*)( + UdfUtils.flatMapGroupsFuncToScalaFunc(f))(encoder) } /** @@ -458,8 +460,9 @@ class KeyValueGroupedDataset[K, V] private[sql] () extends Serializable { otherSortExprs: Array[Column], f: CoGroupFunction[K, V, U, R], encoder: Encoder[R]): Dataset[R] = { - cogroupSorted(other)(thisSortExprs: _*)(otherSortExprs: _*)( - UdfUtils.coGroupFunctionToScalaFunc(f))(encoder) + import org.apache.spark.util.ArrayImplicits._ + cogroupSorted(other)(thisSortExprs.toImmutableArraySeq: _*)( + otherSortExprs.toImmutableArraySeq: _*)(UdfUtils.coGroupFunctionToScalaFunc(f))(encoder) } protected[sql] def flatMapGroupsWithStateHelper[S: Encoder, U: Encoder]( diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ColumnTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ColumnTestSuite.scala index c1e4399ccb0..0fb6894e457 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ColumnTestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ColumnTestSuite.scala @@ -207,5 +207,6 @@ class ColumnTestSuite extends ConnectFunSuite { private val structType1 = new StructType().add("a", "int").add("b", "string") private val structType2 = structType1.add("c", "binary") testColName(structType1, _.struct(structType1)) - testColName(structType2, _.struct(structType2.fields: _*)) + import org.apache.spark.util.ArrayImplicits._ + testColName(structType2, _.struct(structType2.fields.toImmutableArraySeq: _*)) } diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala index baf65e7bb33..f7ffe7aa127 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala @@ -286,7 +286,10 @@ class UserDefinedFunctionE2ETestSuite extends QueryTest { import session.implicits._ val df = Seq((1, 2, 3)).toDF("a", "b", "c") val f = udf((row: Row) => row.schema.fieldNames) - checkDataset(df.select(f(struct(df.columns map col: _*))), Row(Seq("a", "b", "c"))) + import org.apache.spark.util.ArrayImplicits._ + checkDataset( + df.select(f(struct((df.columns map col).toImmutableArraySeq: _*))), + Row(Seq("a", "b", "c"))) } test("Filter with row input encoder") { diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index 8b852babb54..4925bc0a5dc 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -1178,11 +1178,12 @@ class SparkConnectPlanner( val normalized = normalize(schema).asInstanceOf[StructType] + import org.apache.spark.util.ArrayImplicits._ val project = Dataset .ofRows( session, logicalPlan = logical.LocalRelation(normalize(structType).asInstanceOf[StructType])) - .toDF(normalized.names: _*) + .toDF(normalized.names.toImmutableArraySeq: _*) .to(normalized) .logicalPlan .asInstanceOf[Project] diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index 3eea0ebcdb2..e98259562c9 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -43,6 +43,7 @@ import org.apache.spark.rdd.RDD import org.apache.spark.security.{SocketAuthHelper, SocketAuthServer, SocketFuncServer} import org.apache.spark.storage.{BroadcastBlockId, StorageLevel} import org.apache.spark.util._ +import org.apache.spark.util.ArrayImplicits._ private[spark] class PythonRDD( @@ -179,7 +180,7 @@ private[spark] object PythonRDD extends Logging { type UnrolledPartition = Array[ByteArray] val allPartitions: Array[UnrolledPartition] = sc.runJob(rdd, (x: Iterator[ByteArray]) => x.toArray, partitions.asScala.toSeq) - val flattenedPartition: UnrolledPartition = Array.concat(allPartitions: _*) + val flattenedPartition: UnrolledPartition = Array.concat(allPartitions.toImmutableArraySeq: _*) serveIterator(flattenedPartition.iterator, s"serve RDD ${rdd.id} with partitions ${partitions.asScala.mkString(",")}") } diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala index b12b5e21312..e340667173b 100644 --- a/core/src/main/scala/org/apache/spark/executor/Executor.scala +++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala @@ -324,7 +324,8 @@ private[spark] class Executor( private val Seq(initialUserJars, initialUserFiles, initialUserArchives) = Seq("jar", "file", "archive").map { key => conf.getOption(s"spark.app.initial.$key.urls").map { urls => - immutable.Map(urls.split(",").map(url => (url, appStartTime)): _*) + import org.apache.spark.util.ArrayImplicits._ + immutable.Map(urls.split(",").map(url => (url, appStartTime)).toImmutableArraySeq: _*) }.getOrElse(immutable.Map.empty) } updateDependencies(initialUserFiles, initialUserJars, initialUserArchives, defaultSessionState) diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 5dc666c62d1..610b48ea2ba 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -1044,7 +1044,8 @@ abstract class RDD[T: ClassTag]( */ def collect(): Array[T] = withScope { val results = sc.runJob(this, (iter: Iterator[T]) => iter.toArray) - Array.concat(results: _*) + import org.apache.spark.util.ArrayImplicits._ + Array.concat(results.toImmutableArraySeq: _*) } /** diff --git a/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala b/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala index a8f9b32b0f3..5529da74970 100644 --- a/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala +++ b/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala @@ -18,7 +18,7 @@ // scalastyle:off println package org.apache.spark.examples.graphx -import scala.collection.mutable +import scala.collection.{immutable, mutable} import org.apache.spark._ import org.apache.spark.graphx._ @@ -51,7 +51,7 @@ object Analytics { case _ => throw new IllegalArgumentException(s"Invalid argument: $arg") } } - val options = mutable.Map(optionsList: _*) + val options = mutable.Map(immutable.ArraySeq.unsafeWrapArray(optionsList): _*) val conf = new SparkConf() GraphXUtils.registerKryoClasses(conf) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala index 52106f4010f..b70f3ddd4c1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala @@ -213,9 +213,10 @@ final class OneVsRestModel private[ml] ( tmpModel.asInstanceOf[ProbabilisticClassificationModel[_, _]].setProbabilityCol("") } + import org.apache.spark.util.ArrayImplicits._ tmpModel.transform(df) .withColumn(accColName, updateUDF(col(accColName), col(tmpRawPredName))) - .select(columns: _*) + .select(columns.toImmutableArraySeq: _*) } if (handlePersistence) { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala index f1268bdf6bd..866bf9e5bf3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala @@ -187,7 +187,9 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme } val metadata = outputSchema($(outputCol)).metadata - dataset.withColumn($(outputCol), hashFeatures(struct($(inputCols).map(col): _*)), metadata) + import org.apache.spark.util.ArrayImplicits._ + dataset.withColumn($(outputCol), + hashFeatures(struct($(inputCols).map(col).toImmutableArraySeq: _*)), metadata) } @Since("2.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala index 5998887923f..4d38c127d41 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala @@ -163,24 +163,26 @@ class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String) } val numCols = cols.length + import org.apache.spark.util.ArrayImplicits._ val results = $(strategy) match { case Imputer.mean => // Function avg will ignore null automatically. // For a column only containing null, avg will return null. - val row = dataset.select(cols.map(avg): _*).head() + val row = dataset.select(cols.map(avg).toImmutableArraySeq: _*).head() Array.tabulate(numCols)(i => if (row.isNullAt(i)) Double.NaN else row.getDouble(i)) case Imputer.median => // Function approxQuantile will ignore null automatically. // For a column only containing null, approxQuantile will return an empty array. - dataset.select(cols: _*).stat.approxQuantile(inputColumns, Array(0.5), $(relativeError)) + dataset.select(cols.toImmutableArraySeq: _*) + .stat.approxQuantile(inputColumns, Array(0.5), $(relativeError)) .map(_.headOption.getOrElse(Double.NaN)) case Imputer.mode => import spark.implicits._ // If there is more than one mode, choose the smallest one to keep in line // with sklearn.impute.SimpleImputer (using scipy.stats.mode). - val modes = dataset.select(cols: _*).flatMap { row => + val modes = dataset.select(cols.toImmutableArraySeq: _*).flatMap { row => // Ignore null. Iterator.range(0, numCols) .flatMap(i => if (row.isNullAt(i)) None else Some((i, row.getDouble(i)))) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala index 9a4f1d97c90..a81c55a1715 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala @@ -108,9 +108,11 @@ class Interaction @Since("1.6.0") (@Since("1.6.0") override val uid: String) ext case _: NumericType | BooleanType => dataset(f.name).cast(DoubleType) } } + import org.apache.spark.util.ArrayImplicits._ dataset.select( col("*"), - interactFunc(struct(featureCols: _*)).as($(outputCol), featureAttrs.toMetadata())) + interactFunc(struct(featureCols.toImmutableArraySeq: _*)) + .as($(outputCol), featureAttrs.toMetadata())) } /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala index 9387ab3daeb..f3f85b40986 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala @@ -476,7 +476,8 @@ private class ColumnPruner(override val uid: String, val columnsToPrune: Set[Str override def transform(dataset: Dataset[_]): DataFrame = { val columnsToKeep = dataset.columns.filter(!columnsToPrune.contains(_)) - dataset.select(columnsToKeep.map(dataset.col): _*) + import org.apache.spark.util.ArrayImplicits._ + dataset.select(columnsToKeep.map(dataset.col).toImmutableArraySeq: _*) } override def transformSchema(schema: StructType): StructType = { @@ -564,7 +565,8 @@ private class VectorAttributeRewriter( } val otherCols = dataset.columns.filter(_ != vectorCol).map(dataset.col) val rewrittenCol = dataset.col(vectorCol).as(vectorCol, metadata) - dataset.select(otherCols :+ rewrittenCol : _*) + import org.apache.spark.util.ArrayImplicits._ + dataset.select((otherCols :+ rewrittenCol).toImmutableArraySeq : _*) } override def transformSchema(schema: StructType): StructType = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index cf5b5ecb201..47c0ca22f96 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -149,8 +149,9 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String) case _: NumericType | BooleanType => dataset(c).cast(DoubleType).as(s"${c}_double_$uid") } } - - filteredDataset.select(col("*"), assembleFunc(struct(args: _*)).as($(outputCol), metadata)) + import org.apache.spark.util.ArrayImplicits._ + filteredDataset.select(col("*"), + assembleFunc(struct(args.toImmutableArraySeq: _*)).as($(outputCol), metadata)) } @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala index 7fe9aa414f2..081a40bfbe8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala @@ -164,7 +164,8 @@ class FPGrowth @Since("2.2.0") ( instr.logPipelineStage(this) instr.logDataset(dataset) - instr.logParams(this, params: _*) + import org.apache.spark.util.ArrayImplicits._ + instr.logParams(this, params.toImmutableArraySeq: _*) val data = dataset.select($(itemsCol)) val items = data.where(col($(itemsCol)).isNotNull).rdd.map(r => r.getSeq[Any](0).toArray) val mllibFP = new MLlibFPGrowth().setMinSupport($(minSupport)) diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala index 5c98ffa394f..3ea76658d1a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala @@ -135,7 +135,8 @@ final class PrefixSpan(@Since("2.4.0") override val uid: String) extends Params @Since("2.4.0") def findFrequentSequentialPatterns(dataset: Dataset[_]): DataFrame = instrumented { instr => instr.logDataset(dataset) - instr.logParams(this, params: _*) + import org.apache.spark.util.ArrayImplicits._ + instr.logParams(this, params.toImmutableArraySeq: _*) val sequenceColParam = $(sequenceCol) val inputType = dataset.schema(sequenceColParam).dataType diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/KSTestWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/KSTestWrapper.scala index 21531eb057a..234b8bbf6f0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/KSTestWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/KSTestWrapper.scala @@ -49,7 +49,8 @@ private[r] object KSTestWrapper { case Row(feature: Double) => feature } - val ksTestResult = kolmogorovSmirnovTest(rddData, distName, distParams : _*) + import org.apache.spark.util.ArrayImplicits._ + val ksTestResult = kolmogorovSmirnovTest(rddData, distName, distParams.toImmutableArraySeq : _*) new KSTestWrapper(ksTestResult, distName, distParams) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala index d9942f1c4f3..6c0089b6894 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala @@ -129,7 +129,8 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S instr.logPipelineStage(this) instr.logDataset(instances) - instr.logParams(this, params: _*) + import org.apache.spark.util.ArrayImplicits._ + instr.logParams(this, params.toImmutableArraySeq: _*) val trees = RandomForest.run(instances, strategy, numTrees = 1, featureSubsetStrategy = "all", seed = $(seed), instr = Some(instr), parentUID = Some(uid)) diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala index cc917db98b3..47fb8bc9229 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala @@ -535,7 +535,8 @@ private[ml] object EnsembleModelReadWrite { val newNodeDataCol = df.schema("nodeData").dataType match { case StructType(fields) => val cols = fields.map(f => col(s"nodeData.${f.name}")) :+ lit(-1L).as("rawCount") - struct(cols: _*) + import org.apache.spark.util.ArrayImplicits._ + struct(cols.toImmutableArraySeq: _*) } df = df.withColumn("nodeData", newNodeDataCol) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala index 7bce38cc38a..378f1381e4c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala @@ -359,7 +359,8 @@ object MLUtils extends Logging { col(c) } } - dataset.select(exprs: _*) + import org.apache.spark.util.ArrayImplicits._ + dataset.select(exprs.toImmutableArraySeq: _*) } /** @@ -411,7 +412,8 @@ object MLUtils extends Logging { col(c) } } - dataset.select(exprs: _*) + import org.apache.spark.util.ArrayImplicits._ + dataset.select(exprs.toImmutableArraySeq: _*) } /** @@ -461,7 +463,8 @@ object MLUtils extends Logging { col(c) } } - dataset.select(exprs: _*) + import org.apache.spark.util.ArrayImplicits._ + dataset.select(exprs.toImmutableArraySeq: _*) } /** @@ -511,7 +514,8 @@ object MLUtils extends Logging { col(c) } } - dataset.select(exprs: _*) + import org.apache.spark.util.ArrayImplicits._ + dataset.select(exprs.toImmutableArraySeq: _*) } diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala index 5ef22a282c3..4873dacfc0f 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala @@ -339,9 +339,10 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { .setOutputCols(Array("out1")) val types = Seq(IntegerType, LongType) + import org.apache.spark.util.ArrayImplicits._ for (mType <- types) { // cast all columns to desired data type for testing - val df2 = df.select(df.columns.map(c => col(c).cast(mType)): _*) + val df2 = df.select(df.columns.map(c => col(c).cast(mType)).toImmutableArraySeq: _*) ImputerSuite.iterateStrategyTest(true, imputer, df2) } } @@ -360,9 +361,10 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { .setOutputCol("out") val types = Seq(IntegerType, LongType) + import org.apache.spark.util.ArrayImplicits._ for (mType <- types) { // cast all columns to desired data type for testing - val df2 = df.select(df.columns.map(c => col(c).cast(mType)): _*) + val df2 = df.select(df.columns.map(c => col(c).cast(mType)).toImmutableArraySeq: _*) ImputerSuite.iterateStrategyTest(false, imputer, df2) } } @@ -382,9 +384,10 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { .setMissingValue(-1.0) val types = Seq(IntegerType, LongType) + import org.apache.spark.util.ArrayImplicits._ for (mType <- types) { // cast all columns to desired data type for testing - val df2 = df.select(df.columns.map(c => col(c).cast(mType)): _*) + val df2 = df.select(df.columns.map(c => col(c).cast(mType)).toImmutableArraySeq: _*) ImputerSuite.iterateStrategyTest(true, imputer, df2) } } @@ -404,9 +407,10 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { .setMissingValue(-1.0) val types = Seq(IntegerType, LongType) + import org.apache.spark.util.ArrayImplicits._ for (mType <- types) { // cast all columns to desired data type for testing - val df2 = df.select(df.columns.map(c => col(c).cast(mType)): _*) + val df2 = df.select(df.columns.map(c => col(c).cast(mType)).toImmutableArraySeq: _*) ImputerSuite.iterateStrategyTest(false, imputer, df2) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala index 411e056bffb..32c50625447 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala @@ -95,7 +95,8 @@ class ImageFileFormatSuite extends SparkFunSuite with MLlibTestSparkContext { .select(substring_index(col("image.origin"), "/", -1).as("origin"), col("cls"), col("date")) .collect() - assert(Set(result: _*) === Set( + import org.apache.spark.util.ArrayImplicits._ + assert(Set(result.toImmutableArraySeq: _*) === Set( Row("29.5.a_b_EGDP022204.jpg", "kittens", "2018-01"), Row("54893.jpg", "kittens", "2018-02"), Row("DP153539.jpg", "kittens", "2018-02"), diff --git a/mllib/src/test/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTestSuite.scala index 1312de3a1b5..2ae21401538 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTestSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTestSuite.scala @@ -60,9 +60,10 @@ class KolmogorovSmirnovTestSuite val cdf = (x: Double) => theoreticalDist.cumulativeProbability(x) KolmogorovSmirnovTest.test(sampledDF, "sample", cdf).head() } else { + import org.apache.spark.util.ArrayImplicits._ KolmogorovSmirnovTest.test(sampledDF, "sample", theoreticalDistByName._1, - theoreticalDistByName._2: _* + theoreticalDistByName._2.toImmutableArraySeq: _* ).head() } val theoreticalDistMath3 = if (theoreticalDist == null) { diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala index b847c905e5f..def04b50118 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala @@ -112,13 +112,15 @@ trait MLTest extends StreamTest with TempDirectory { self: Suite => val columnsWithMetadata = dataframe.schema.map { structField => col(structField.name).as(structField.name, structField.metadata) } - val streamDF = stream.toDS().toDF(columnNames: _*).select(columnsWithMetadata: _*) + import org.apache.spark.util.ArrayImplicits._ + val streamDF = stream.toDS() + .toDF(columnNames.toImmutableArraySeq: _*).select(columnsWithMetadata: _*) val data = dataframe.as[A].collect() val streamOutput = transformer.transform(streamDF) .select(firstResultCol, otherResultCols: _*) testStream(streamOutput) ( - AddData(stream, data: _*), + AddData(stream, data.toImmutableArraySeq: _*), CheckAnswer(globalCheckFunction) ) } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala index 4c38989955b..b12508573b7 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala @@ -79,7 +79,7 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite { ( envPy.map(v => ENV_PYSPARK_PYTHON -> v :: Nil) ++ envDriverPy.map(v => ENV_PYSPARK_DRIVER_PYTHON -> v :: Nil) - ).flatten.toArray: _*) + ).flatten.toSeq: _*) val spec = applyFeatureStep( PythonMainAppResource(mainResource), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala index 49cf01d472e..b4be09f333d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala @@ -234,13 +234,15 @@ case class Stack(children: Seq[Expression]) extends Generator { override def eval(input: InternalRow): IterableOnce[InternalRow] = { val values = children.tail.map(_.eval(input)).toArray + + import org.apache.spark.util.ArrayImplicits._ for (row <- 0 until numRows) yield { val fields = new Array[Any](numFields) for (col <- 0 until numFields) { val index = row * numFields + col fields.update(col, if (index < values.length) values(index) else null) } - InternalRow(fields: _*) + InternalRow(fields.toImmutableArraySeq: _*) } } @@ -293,12 +295,14 @@ case class ReplicateRows(children: Seq[Expression]) extends Generator with Codeg override def eval(input: InternalRow): IterableOnce[InternalRow] = { val numRows = children.head.eval(input).asInstanceOf[Long] val values = children.tail.map(_.eval(input)).toArray + + import org.apache.spark.util.ArrayImplicits._ Range.Long(0, numRows, 1).map { _ => val fields = new Array[Any](numColumns) for (col <- 0 until numColumns) { fields.update(col, values(col)) } - InternalRow(fields: _*) + InternalRow(fields.toImmutableArraySeq: _*) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala index cbab8894cb5..44264a84663 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala @@ -314,7 +314,9 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers with PlanTestB val row = new SpecificInternalRow(fieldTypes) val values = Array(new CalendarInterval(0, 7, 0L), null) - row.update(0, createArray(values: _*)) + + import org.apache.spark.util.ArrayImplicits._ + row.update(0, createArray(values.toImmutableArraySeq: _*)) val unsafeRow: UnsafeRow = converter.apply(row) testArrayInterval(unsafeRow.getArray(0), values) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala index a8c4d4f8d2b..2f285776052 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala @@ -98,8 +98,9 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { cols: Array[String], probabilities: Array[Double], relativeError: Double): Array[Array[Double]] = withOrigin { + import org.apache.spark.util.ArrayImplicits._ StatFunctions.multipleApproxQuantiles( - df.select(cols.map(col): _*), + df.select(cols.map(col).toImmutableArraySeq: _*), cols, probabilities, relativeError).map(_.toArray).toArray diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala index ef0a3e0266c..22dfed3ea4c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala @@ -241,7 +241,9 @@ class KeyValueGroupedDataset[K, V] private[sql]( SortExprs: Array[Column], f: FlatMapGroupsFunction[K, V, U], encoder: Encoder[U]): Dataset[U] = { - flatMapSortedGroups(SortExprs: _*)((key, data) => f.call(key, data.asJava).asScala)(encoder) + import org.apache.spark.util.ArrayImplicits._ + flatMapSortedGroups( + SortExprs.toImmutableArraySeq: _*)((key, data) => f.call(key, data.asJava).asScala)(encoder) } /** @@ -901,7 +903,9 @@ class KeyValueGroupedDataset[K, V] private[sql]( otherSortExprs: Array[Column], f: CoGroupFunction[K, V, U, R], encoder: Encoder[R]): Dataset[R] = { - cogroupSorted(other)(thisSortExprs: _*)(otherSortExprs: _*)( + import org.apache.spark.util.ArrayImplicits._ + cogroupSorted(other)( + thisSortExprs.toImmutableArraySeq: _*)(otherSortExprs.toImmutableArraySeq: _*)( (key, left, right) => f.call(key, left.asJava, right.asJava).asScala)(encoder) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index 53b09179cc3..934ed9ac2a1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -89,7 +89,7 @@ object JDBCRDD extends Logging { * @return A Catalyst schema corresponding to columns in the given order. */ private def pruneSchema(schema: StructType, columns: Array[String]): StructType = { - val fieldMap = Map(schema.fields.map(x => x.name -> x): _*) + val fieldMap = schema.fields.map(x => x.name -> x).toMap new StructType(columns.map(name => fieldMap(name))) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala index e7f1affbde4..db26f8c7758 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala @@ -253,9 +253,10 @@ object StatFunctions extends Logging { val valueColumns = columnNames.map { columnName => new Column(ElementAt(col(columnName).expr, col("summary").expr)).as(columnName) } + import org.apache.spark.util.ArrayImplicits._ ds.select(mapColumns: _*) .withColumn("summary", explode(lit(selectedStatistics))) - .select(Array(col("summary")) ++ valueColumns: _*) + .select((Array(col("summary")) ++ valueColumns).toImmutableArraySeq: _*) } } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala index 5646f61440e..7e490ef4cd5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala @@ -64,7 +64,8 @@ class OffsetSeqLog(sparkSession: SparkSession, path: String) case "" => None case md => Some(md) } - OffsetSeq.fill(metadata, lines.map(parseOffset).toArray: _*) + import org.apache.spark.util.ArrayImplicits._ + OffsetSeq.fill(metadata, lines.map(parseOffset).toArray.toImmutableArraySeq: _*) } override protected def serialize(offsetSeq: OffsetSeq, out: OutputStream): Unit = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala index 08840496b05..132d9a9d61e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala @@ -42,7 +42,8 @@ class RateStreamContinuousStream(rowsPerSecond: Long, numPartitions: Int) extend case RateStreamPartitionOffset(i, currVal, nextRead) => (i, ValueRunTimeMsPair(currVal, nextRead)) } - RateStreamOffset(Map(tuples: _*)) + import org.apache.spark.util.ArrayImplicits._ + RateStreamOffset(Map(tuples.toImmutableArraySeq: _*)) } override def deserializeOffset(json: String): Offset = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index d3271283baa..b28a23f13f8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -1182,8 +1182,9 @@ class DataFrameSuite extends QueryTest } test("summary advanced") { + import org.apache.spark.util.ArrayImplicits._ val stats = Array("count", "50.01%", "max", "mean", "min", "25%") - val orderMatters = person2.summary(stats: _*) + val orderMatters = person2.summary(stats.toImmutableArraySeq: _*) assert(orderMatters.collect().map(_.getString(0)) === stats) val onlyPercentiles = person2.summary("0.1%", "99.9%") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index dcbd8948120..9285c31d702 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -2058,13 +2058,14 @@ class DatasetSuite extends QueryTest } test("SPARK-24569: Option of primitive types are mistakenly mapped to struct type") { + import org.apache.spark.util.ArrayImplicits._ withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") { val a = Seq(Some(1)).toDS() val b = Seq(Some(1.2)).toDS() val expected = Seq((Some(1), Some(1.2))).toDS() val joined = a.joinWith(b, lit(true)) assert(joined.schema == expected.schema) - checkDataset(joined, expected.collect(): _*) + checkDataset(joined, expected.collect().toImmutableArraySeq: _*) } } @@ -2078,7 +2079,8 @@ class DatasetSuite extends QueryTest val ds1 = spark.createDataset(rdd) val ds2 = spark.createDataset(rdd)(encoder) assert(ds1.schema == ds2.schema) - checkDataset(ds1.select("_2._2"), ds2.select("_2._2").collect(): _*) + import org.apache.spark.util.ArrayImplicits._ + checkDataset(ds1.select("_2._2"), ds2.select("_2._2").collect().toImmutableArraySeq: _*) } test("SPARK-23862: Spark ExpressionEncoder should support Java Enum type from Scala") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GenTPCDSData.scala b/sql/core/src/test/scala/org/apache/spark/sql/GenTPCDSData.scala index 6768c5fd07b..5f95ba4f385 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/GenTPCDSData.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/GenTPCDSData.scala @@ -169,7 +169,8 @@ class TPCDSTables(spark: SparkSession, dsdgenDir: String, scaleFactor: Int) } c.as(f.name) } - stringData.select(columns: _*) + import org.apache.spark.util.ArrayImplicits._ + stringData.select(columns.toImmutableArraySeq: _*) } convertedData diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala index afbe9cdac63..974def7f3b8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala @@ -532,6 +532,7 @@ class ParametersSuite extends QueryTest with SharedSparkSession { } test("SPARK-45033: maps as parameters") { + import org.apache.spark.util.ArrayImplicits._ def fromArr(keys: Array[_], values: Array[_]): Column = { map_from_arrays(Column(Literal(keys)), Column(Literal(values))) } @@ -540,21 +541,21 @@ class ParametersSuite extends QueryTest with SharedSparkSession { } def createMap(keys: Array[_], values: Array[_]): Column = { val zipped = keys.map(k => Column(Literal(k))).zip(values.map(v => Column(Literal(v)))) - map(zipped.map { case (k, v) => Seq(k, v) }.flatten: _*) + map(zipped.flatMap { case (k, v) => Seq(k, v) }.toImmutableArraySeq: _*) } def callMap(keys: Array[_], values: Array[_]): Column = { val zipped = keys.map(k => Column(Literal(k))).zip(values.map(v => Column(Literal(v)))) - call_function("map", zipped.map { case (k, v) => Seq(k, v) }.flatten: _*) + call_function("map", zipped.flatMap { case (k, v) => Seq(k, v) }.toImmutableArraySeq: _*) } def fromEntries(keys: Array[_], values: Array[_]): Column = { val structures = keys.zip(values) .map { case (k, v) => struct(Column(Literal(k)), Column(Literal(v)))} - map_from_entries(array(structures: _*)) + map_from_entries(array(structures.toImmutableArraySeq: _*)) } def callFromEntries(keys: Array[_], values: Array[_]): Column = { val structures = keys.zip(values) .map { case (k, v) => struct(Column(Literal(k)), Column(Literal(v)))} - call_function("map_from_entries", call_function("array", structures: _*)) + call_function("map_from_entries", call_function("array", structures.toImmutableArraySeq: _*)) } Seq(fromArr(_, _), createMap(_, _), callFromArr(_, _), callMap(_, _)).foreach { f => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala index 235a8ff3869..de8cf7a7b2d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala @@ -157,6 +157,7 @@ class CSVReaderFactory(conf: SerializableConfiguration) val fs = filePath.getFileSystem(conf.value) new PartitionReader[InternalRow] { + import org.apache.spark.util.ArrayImplicits._ private val inputStream = fs.open(filePath) private val lines = new BufferedReader(new InputStreamReader(inputStream)) .lines().iterator().asScala @@ -172,7 +173,8 @@ class CSVReaderFactory(conf: SerializableConfiguration) } } - override def get(): InternalRow = InternalRow(currentLine.split(",").map(_.trim.toInt): _*) + override def get(): InternalRow = + InternalRow(currentLine.split(",").map(_.trim.toInt).toImmutableArraySeq: _*) override def close(): Unit = { inputStream.close() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala index 0e4985bac99..6bf72b82564 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala @@ -1083,7 +1083,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession { // Transform the result into a literal that can be used in an expression. val metadataColumnFields = metadataColumnRow.schema.fields .map(field => lit(metadataColumnRow.getAs[Any](field.name)).as(field.name)) - val metadataColumnStruct = struct(metadataColumnFields: _*) + import org.apache.spark.util.ArrayImplicits._ + val metadataColumnStruct = struct(metadataColumnFields.toImmutableArraySeq: _*) val selectSingleRowDf = spark.read.load(dir.getAbsolutePath) .where(col("_metadata").equalTo(lit(metadataColumnStruct))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala index eb561e13fc6..e9cf35d9fab 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala @@ -70,6 +70,7 @@ object CSVBenchmark extends SqlBasedBenchmark { val benchmark = new Benchmark(s"Wide rows with $colsNum columns", rowsNum, output = output) withTempPath { path => + import org.apache.spark.util.ArrayImplicits._ val fields = Seq.tabulate(colsNum)(i => StructField(s"col$i", IntegerType)) val schema = StructType(fields) val values = (0 until colsNum).map(i => i.toString).mkString(",") @@ -87,7 +88,7 @@ object CSVBenchmark extends SqlBasedBenchmark { } val cols100 = columnNames.take(100).map(Column(_)) benchmark.addCase(s"Select 100 columns", numIters) { _ => - ds.select(cols100: _*).noop() + ds.select(cols100.toImmutableArraySeq: _*).noop() } benchmark.addCase(s"Select one column", numIters) { _ => ds.select($"col1").noop() @@ -100,7 +101,7 @@ object CSVBenchmark extends SqlBasedBenchmark { (1 until colsNum).map(i => StructField(s"col$i", IntegerType))) val dsErr1 = spark.read.schema(schemaErr1).csv(path.getAbsolutePath) benchmark.addCase(s"Select 100 columns, one bad input field", numIters) { _ => - dsErr1.select(cols100: _*).noop() + dsErr1.select(cols100.toImmutableArraySeq: _*).noop() } val badRecColName = "badRecord" @@ -109,7 +110,7 @@ object CSVBenchmark extends SqlBasedBenchmark { .option("columnNameOfCorruptRecord", badRecColName) .csv(path.getAbsolutePath) benchmark.addCase(s"Select 100 columns, corrupt record field", numIters) { _ => - dsErr2.select((Column(badRecColName) +: cols100): _*).noop() + dsErr2.select((Column(badRecColName) +: cols100).toImmutableArraySeq: _*).noop() } benchmark.run() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala index 66d37e996a6..953bbddf6ab 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala @@ -224,7 +224,7 @@ class StreamSuite extends StreamTest { // Parquet write page-level CRC checksums will change the file size and // affect the data order when reading these files. Please see PARQUET-1746 for details. val outputDf = spark.read.parquet(outputDir.getAbsolutePath).sort($"a").as[Long] - checkDataset[Long](outputDf, (0L to 10L).toArray: _*) + checkDataset[Long](outputDf, 0L to 10L: _*) } finally { query.stop() } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala index 1e0fa5b6bc9..e388de21405 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala @@ -891,7 +891,8 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi } override def getOffset: Option[Offset] = Some(LongOffset(1)) override def getBatch(start: Option[Offset], end: Offset): DataFrame = { - spark.range(2).toDF(MockSourceProvider.fakeSchema.fieldNames: _*) + import org.apache.spark.util.ArrayImplicits._ + spark.range(2).toDF(MockSourceProvider.fakeSchema.fieldNames.toImmutableArraySeq: _*) } override def schema: StructType = MockSourceProvider.fakeSchema } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 110ef7b0aff..649c985cade 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -703,6 +703,7 @@ class CliSuite extends SparkFunSuite { testRetry("formats of error messages") { def check(format: ErrorMessageFormat.Value, errorMessage: String, silent: Boolean): Unit = { val expected = errorMessage.split(System.lineSeparator()).map("" -> _) + import org.apache.spark.util.ArrayImplicits._ runCliWithin( 1.minute, extraArgs = Seq( @@ -710,7 +711,7 @@ class CliSuite extends SparkFunSuite { "--conf", s"${SQLConf.ERROR_MESSAGE_FORMAT.key}=$format", "--conf", s"${SQLConf.ANSI_ENABLED.key}=true", "-e", "select 1 / 0"), - errorResponses = Seq("DIVIDE_BY_ZERO"))(expected: _*) + errorResponses = Seq("DIVIDE_BY_ZERO"))(expected.toImmutableArraySeq: _*) } check( format = ErrorMessageFormat.PRETTY, diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala index dc8b184fcee..4b000fff0eb 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala @@ -941,8 +941,10 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te data .find(r => r.getInt(0) == 50) .getOrElse(fail("A row with id 50 should be the expected answer.")) + + import org.apache.spark.util.ArrayImplicits._ checkAnswer( - df.agg(udaf(allColumns: _*)), + df.agg(udaf(allColumns.toImmutableArraySeq: _*)), // udaf returns a Row as the output value. Row(expectedAnswer) ) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala index cc95de793ee..4e2db214035 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala @@ -152,13 +152,15 @@ class ObjectHashAggregateSuite val df = spark.createDataFrame(spark.sparkContext.parallelize(data, 1), schema) val aggFunctions = schema.fieldNames.map(f => typed_count(col(f))) + import org.apache.spark.util.ArrayImplicits._ checkAnswer( - df.agg(aggFunctions.head, aggFunctions.tail: _*), + df.agg(aggFunctions.head, aggFunctions.tail.toImmutableArraySeq: _*), Row.fromSeq(data.map(_.toSeq).transpose.map(_.count(_ != null): Long)) ) checkAnswer( - df.groupBy($"id" % 4 as "mod").agg(aggFunctions.head, aggFunctions.tail: _*), + df.groupBy($"id" % 4 as "mod") + .agg(aggFunctions.head, aggFunctions.tail.toImmutableArraySeq: _*), data.groupBy(_.getInt(0) % 4).map { case (key, value) => key -> Row.fromSeq(value.map(_.toSeq).transpose.map(_.count(_ != null): Long)) }.toSeq.map { @@ -168,7 +170,7 @@ class ObjectHashAggregateSuite withSQLConf(SQLConf.OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD.key -> "5") { checkAnswer( - df.agg(aggFunctions.head, aggFunctions.tail: _*), + df.agg(aggFunctions.head, aggFunctions.tail.toImmutableArraySeq: _*), Row.fromSeq(data.map(_.toSeq).transpose.map(_.count(_ != null): Long)) ) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org