[spark] branch master updated: [SPARK-45338][MLLIB][PYTHON][FOLLOWUP] Use `immutable.ArraySeq.unsafeWrapArray` instead of `array.toSeq`

dongjoon Sun, 01 Oct 2023 16:37:45 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 643553a2f91 [SPARK-45338][MLLIB][PYTHON][FOLLOWUP] Use 
`immutable.ArraySeq.unsafeWrapArray` instead of `array.toSeq`
643553a2f91 is described below

commit 643553a2f91122fc08a9b2ce4efb1b039aeaf946
Author: yangjie01 <[email protected]>
AuthorDate: Sun Oct 1 16:37:30 2023 -0700

    [SPARK-45338][MLLIB][PYTHON][FOLLOWUP] Use 
`immutable.ArraySeq.unsafeWrapArray` instead of `array.toSeq`
    
    ### What changes were proposed in this pull request?
    In https://github.com/apache/spark/pull/43126, there is a change from 
`JavaConverters.seqAsJavaListConverter(array).asJava` to `array.toSeq.asJava`. 
Since `array.toSeq` will involve an extra `Array.copyOf`, and the involved 
scenario in the subsequent processing is to directly convert the result of 
`array.toSeq.asJava` into an `Array[Byte]` result without the possibility of 
modifying the array content, so this pr replaces `array.toSeq` with 
`immutable.ArraySeq.unsafeWrapArray(array)` t [...]
    
    ### Why are the changes needed?
    In the current scenario, there is no risk of array data being modified, and 
`immutable.ArraySeq.unsafeWrapArray(array)` compared to `array.toSeq` avoids an 
extra `Array.copyOf`, bringing certain performance benefits and reducing memory 
usage.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Pass GitHub Actions
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #43171 from LuciferYang/SPARK-45338-FOLLOWUP.
    
    Authored-by: yangjie01 <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../spark/mllib/api/python/GaussianMixtureModelWrapper.scala       | 3 ++-
 .../scala/org/apache/spark/mllib/api/python/LDAModelWrapper.scala  | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
index 687dc208f3a..1eed97a8d4f 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.api.python
 
+import scala.collection.immutable
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.SparkContext
@@ -37,7 +38,7 @@ private[python] class GaussianMixtureModelWrapper(model: 
GaussianMixtureModel) {
     val modelGaussians = model.gaussians.map { gaussian =>
       Array[Any](gaussian.mu, gaussian.sigma)
     }
-    SerDe.dumps(modelGaussians.toSeq.asJava)
+    SerDe.dumps(immutable.ArraySeq.unsafeWrapArray(modelGaussians).asJava)
   }
 
   def predictSoft(point: Vector): Vector = {
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/api/python/LDAModelWrapper.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/api/python/LDAModelWrapper.scala
index fbc0e85b57b..b919b0a8c3f 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/api/python/LDAModelWrapper.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/api/python/LDAModelWrapper.scala
@@ -16,6 +16,7 @@
  */
 package org.apache.spark.mllib.api.python
 
+import scala.collection.immutable
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.SparkContext
@@ -35,11 +36,11 @@ private[python] class LDAModelWrapper(model: LDAModel) {
 
   def describeTopics(maxTermsPerTopic: Int): Array[Byte] = {
     val topics = model.describeTopics(maxTermsPerTopic).map { case (terms, 
termWeights) =>
-      val jTerms = terms.toSeq.asJava
-      val jTermWeights = termWeights.toSeq.asJava
+      val jTerms = immutable.ArraySeq.unsafeWrapArray(terms).asJava
+      val jTermWeights = immutable.ArraySeq.unsafeWrapArray(termWeights).asJava
       Array[Any](jTerms, jTermWeights)
     }
-    SerDe.dumps(topics.toSeq.asJava)
+    SerDe.dumps(immutable.ArraySeq.unsafeWrapArray(topics).asJava)
   }
 
   def save(sc: SparkContext, path: String): Unit = model.save(sc, path)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch master updated: [SPARK-45338][MLLIB][PYTHON][FOLLOWUP] Use `immutable.ArraySeq.unsafeWrapArray` instead of `array.toSeq`

Reply via email to