srowen commented on a change in pull request #24963: [SPARK-28159][ML] Make the 
transform natively in ml framework to avoid extra conversion
URL: https://github.com/apache/spark/pull/24963#discussion_r298626903
 
 

 ##########
 File path: 
mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
 ##########
 @@ -75,40 +77,61 @@ class ChiSqSelectorModel @Since("1.3.0") (
   private def compress(features: Vector): Vector = {
     features match {
       case SparseVector(size, indices, values) =>
-        val newSize = filterIndices.length
-        val newValues = new ArrayBuilder.ofDouble
-        val newIndices = new ArrayBuilder.ofInt
-        var i = 0
-        var j = 0
-        var indicesIdx = 0
-        var filterIndicesIdx = 0
-        while (i < indices.length && j < filterIndices.length) {
-          indicesIdx = indices(i)
-          filterIndicesIdx = filterIndices(j)
-          if (indicesIdx == filterIndicesIdx) {
-            newIndices += j
-            newValues += values(i)
-            j += 1
-            i += 1
-          } else {
-            if (indicesIdx > filterIndicesIdx) {
-              j += 1
-            } else {
-              i += 1
-            }
-          }
-        }
-        // TODO: Sparse representation might be ineffective if (newSize ~= 
newValues.size)
-        Vectors.sparse(newSize, newIndices.result(), newValues.result())
+        val (newIndices, newValues) = compressSparse(indices, values)
+        Vectors.sparse(filterIndices.length, newIndices, newValues)
       case DenseVector(values) =>
-        val values = features.toArray
-        Vectors.dense(filterIndices.map(i => values(i)))
+        Vectors.dense(compressDense(values))
+      case other =>
+        throw new UnsupportedOperationException(
+          s"Only sparse and dense vectors are supported but got 
${other.getClass}.")
+    }
+  }
+
+  private[spark] def compress(features: NewVector): NewVector = {
 
 Review comment:
   These seem like general methods, not specific to chi-squared. Do we not 
already do some of this work in the Vector constructors or an existing utility 
method?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to