This is an automated email from the ASF dual-hosted git repository.

zero323 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new c032928  [SPARK-37430][PYTHON][MLLIB] Inline hints for 
pyspark.mllib.linalg.distributed
c032928 is described below

commit c032928515e74367137c668ce692d8fd53696485
Author: hi-zir <[email protected]>
AuthorDate: Sat Mar 12 23:01:18 2022 +0100

    [SPARK-37430][PYTHON][MLLIB] Inline hints for 
pyspark.mllib.linalg.distributed
    
    ### What changes were proposed in this pull request?
    
    Inline type hints for pyspark.mllib.linalg.distributed
    
    ### Why are the changes needed?
    
    We can take advantage of static type checking within the functions by 
inlining the type hints.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    ### How was this patch tested?
    
    Existing tests.
    
    Closes #35739 from hi-zir/SPARK-37430.
    
    Authored-by: hi-zir <[email protected]>
    Signed-off-by: zero323 <[email protected]>
---
 python/pyspark/mllib/linalg/distributed.py  | 172 +++++++++++++++++-----------
 python/pyspark/mllib/linalg/distributed.pyi | 145 -----------------------
 2 files changed, 103 insertions(+), 214 deletions(-)

diff --git a/python/pyspark/mllib/linalg/distributed.py 
b/python/pyspark/mllib/linalg/distributed.py
index f892d41..d49af66 100644
--- a/python/pyspark/mllib/linalg/distributed.py
+++ b/python/pyspark/mllib/linalg/distributed.py
@@ -20,16 +20,22 @@ Package for distributed linear algebra.
 """
 
 import sys
+from typing import Any, Generic, Optional, Tuple, TypeVar, Union, TYPE_CHECKING
 
 from py4j.java_gateway import JavaObject
 
 from pyspark import RDD, since
 from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
-from pyspark.mllib.linalg import _convert_to_vector, DenseMatrix, Matrix, 
QRDecomposition
+from pyspark.mllib.linalg import _convert_to_vector, DenseMatrix, Matrix, 
QRDecomposition, Vector
 from pyspark.mllib.stat import MultivariateStatisticalSummary
 from pyspark.sql import DataFrame
 from pyspark.storagelevel import StorageLevel
 
+UT = TypeVar("UT", bound="DistributedMatrix")
+VT = TypeVar("VT", bound="Matrix")
+
+if TYPE_CHECKING:
+    from pyspark.ml._typing import VectorLike
 
 __all__ = [
     "BlockMatrix",
@@ -50,11 +56,11 @@ class DistributedMatrix:
 
     """
 
-    def numRows(self):
+    def numRows(self) -> int:
         """Get or compute the number of rows."""
         raise NotImplementedError
 
-    def numCols(self):
+    def numCols(self) -> int:
         """Get or compute the number of cols."""
         raise NotImplementedError
 
@@ -82,7 +88,12 @@ class RowMatrix(DistributedMatrix):
         the first row.
     """
 
-    def __init__(self, rows, numRows=0, numCols=0):
+    def __init__(
+        self,
+        rows: Union[RDD[Vector], DataFrame],
+        numRows: int = 0,
+        numCols: int = 0,
+    ):
         """
         Note: This docstring is not shown publicly.
 
@@ -121,7 +132,7 @@ class RowMatrix(DistributedMatrix):
         self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
 
     @property
-    def rows(self):
+    def rows(self) -> RDD[Vector]:
         """
         Rows of the RowMatrix stored as an RDD of vectors.
 
@@ -134,7 +145,7 @@ class RowMatrix(DistributedMatrix):
         """
         return self._java_matrix_wrapper.call("rows")
 
-    def numRows(self):
+    def numRows(self) -> int:
         """
         Get or compute the number of rows.
 
@@ -153,7 +164,7 @@ class RowMatrix(DistributedMatrix):
         """
         return self._java_matrix_wrapper.call("numRows")
 
-    def numCols(self):
+    def numCols(self) -> int:
         """
         Get or compute the number of cols.
 
@@ -172,7 +183,7 @@ class RowMatrix(DistributedMatrix):
         """
         return self._java_matrix_wrapper.call("numCols")
 
-    def computeColumnSummaryStatistics(self):
+    def computeColumnSummaryStatistics(self) -> MultivariateStatisticalSummary:
         """
         Computes column-wise summary statistics.
 
@@ -195,7 +206,7 @@ class RowMatrix(DistributedMatrix):
         java_col_stats = 
self._java_matrix_wrapper.call("computeColumnSummaryStatistics")
         return MultivariateStatisticalSummary(java_col_stats)
 
-    def computeCovariance(self):
+    def computeCovariance(self) -> Matrix:
         """
         Computes the covariance matrix, treating each row as an
         observation.
@@ -216,7 +227,7 @@ class RowMatrix(DistributedMatrix):
         """
         return self._java_matrix_wrapper.call("computeCovariance")
 
-    def computeGramianMatrix(self):
+    def computeGramianMatrix(self) -> Matrix:
         """
         Computes the Gramian matrix `A^T A`.
 
@@ -237,7 +248,7 @@ class RowMatrix(DistributedMatrix):
         return self._java_matrix_wrapper.call("computeGramianMatrix")
 
     @since("2.0.0")
-    def columnSimilarities(self, threshold=0.0):
+    def columnSimilarities(self, threshold: float = 0.0) -> "CoordinateMatrix":
         """
         Compute similarities between columns of this matrix.
 
@@ -310,7 +321,9 @@ class RowMatrix(DistributedMatrix):
         java_sims_mat = self._java_matrix_wrapper.call("columnSimilarities", 
float(threshold))
         return CoordinateMatrix(java_sims_mat)
 
-    def tallSkinnyQR(self, computeQ=False):
+    def tallSkinnyQR(
+        self, computeQ: bool = False
+    ) -> QRDecomposition[Optional["RowMatrix"], Matrix]:
         """
         Compute the QR decomposition of this RowMatrix.
 
@@ -360,7 +373,9 @@ class RowMatrix(DistributedMatrix):
         R = decomp.call("R")
         return QRDecomposition(Q, R)
 
-    def computeSVD(self, k, computeU=False, rCond=1e-9):
+    def computeSVD(
+        self, k: int, computeU: bool = False, rCond: float = 1e-9
+    ) -> "SingularValueDecomposition[RowMatrix, Matrix]":
         """
         Computes the singular value decomposition of the RowMatrix.
 
@@ -414,7 +429,7 @@ class RowMatrix(DistributedMatrix):
         j_model = self._java_matrix_wrapper.call("computeSVD", int(k), 
bool(computeU), float(rCond))
         return SingularValueDecomposition(j_model)
 
-    def computePrincipalComponents(self, k):
+    def computePrincipalComponents(self, k: int) -> Matrix:
         """
         Computes the k principal components of the given row matrix
 
@@ -450,7 +465,7 @@ class RowMatrix(DistributedMatrix):
         """
         return self._java_matrix_wrapper.call("computePrincipalComponents", k)
 
-    def multiply(self, matrix):
+    def multiply(self, matrix: Matrix) -> "RowMatrix":
         """
         Multiply this matrix by a local dense matrix on the right.
 
@@ -478,16 +493,16 @@ class RowMatrix(DistributedMatrix):
         return RowMatrix(j_model)
 
 
-class SingularValueDecomposition(JavaModelWrapper):
+class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]):
     """
     Represents singular value decomposition (SVD) factors.
 
     .. versionadded:: 2.2.0
     """
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.2.0")
-    def U(self):
+    def U(self) -> Optional[UT]:  # type: ignore[return]
         """
         Returns a distributed matrix whose columns are the left
         singular vectors of the SingularValueDecomposition if computeU was set 
to be True.
@@ -496,23 +511,23 @@ class SingularValueDecomposition(JavaModelWrapper):
         if u is not None:
             mat_name = u.getClass().getSimpleName()
             if mat_name == "RowMatrix":
-                return RowMatrix(u)
+                return RowMatrix(u)  # type: ignore[return-value]
             elif mat_name == "IndexedRowMatrix":
-                return IndexedRowMatrix(u)
+                return IndexedRowMatrix(u)  # type: ignore[return-value]
             else:
                 raise TypeError("Expected RowMatrix/IndexedRowMatrix got %s" % 
mat_name)
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.2.0")
-    def s(self):
+    def s(self) -> Vector:
         """
         Returns a DenseVector with singular values in descending order.
         """
         return self.call("s")
 
-    @property
+    @property  # type: ignore[misc]
     @since("2.2.0")
-    def V(self):
+    def V(self) -> VT:
         """
         Returns a DenseMatrix whose columns are the right singular
         vectors of the SingularValueDecomposition.
@@ -534,15 +549,15 @@ class IndexedRow:
         The row in the matrix at the given index.
     """
 
-    def __init__(self, index, vector):
+    def __init__(self, index: int, vector: "VectorLike") -> None:
         self.index = int(index)
         self.vector = _convert_to_vector(vector)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return "IndexedRow(%s, %s)" % (self.index, self.vector)
 
 
-def _convert_to_indexed_row(row):
+def _convert_to_indexed_row(row: Any) -> IndexedRow:
     if isinstance(row, IndexedRow):
         return row
     elif isinstance(row, tuple) and len(row) == 2:
@@ -572,7 +587,12 @@ class IndexedRowMatrix(DistributedMatrix):
         the first row.
     """
 
-    def __init__(self, rows, numRows=0, numCols=0):
+    def __init__(
+        self,
+        rows: RDD[Union[Tuple[int, "VectorLike"], IndexedRow]],
+        numRows: int = 0,
+        numCols: int = 0,
+    ):
         """
         Note: This docstring is not shown publicly.
 
@@ -623,7 +643,7 @@ class IndexedRowMatrix(DistributedMatrix):
         self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
 
     @property
-    def rows(self):
+    def rows(self) -> RDD[IndexedRow]:
         """
         Rows of the IndexedRowMatrix stored as an RDD of IndexedRows.
 
@@ -643,7 +663,7 @@ class IndexedRowMatrix(DistributedMatrix):
         rows = rows_df.rdd.map(lambda row: IndexedRow(row[0], row[1]))
         return rows
 
-    def numRows(self):
+    def numRows(self) -> int:
         """
         Get or compute the number of rows.
 
@@ -664,7 +684,7 @@ class IndexedRowMatrix(DistributedMatrix):
         """
         return self._java_matrix_wrapper.call("numRows")
 
-    def numCols(self):
+    def numCols(self) -> int:
         """
         Get or compute the number of cols.
 
@@ -685,7 +705,7 @@ class IndexedRowMatrix(DistributedMatrix):
         """
         return self._java_matrix_wrapper.call("numCols")
 
-    def columnSimilarities(self):
+    def columnSimilarities(self) -> "CoordinateMatrix":
         """
         Compute all cosine similarities between columns.
 
@@ -701,7 +721,7 @@ class IndexedRowMatrix(DistributedMatrix):
         java_coordinate_matrix = 
self._java_matrix_wrapper.call("columnSimilarities")
         return CoordinateMatrix(java_coordinate_matrix)
 
-    def computeGramianMatrix(self):
+    def computeGramianMatrix(self) -> Matrix:
         """
         Computes the Gramian matrix `A^T A`.
 
@@ -722,7 +742,7 @@ class IndexedRowMatrix(DistributedMatrix):
         """
         return self._java_matrix_wrapper.call("computeGramianMatrix")
 
-    def toRowMatrix(self):
+    def toRowMatrix(self) -> RowMatrix:
         """
         Convert this matrix to a RowMatrix.
 
@@ -737,7 +757,7 @@ class IndexedRowMatrix(DistributedMatrix):
         java_row_matrix = self._java_matrix_wrapper.call("toRowMatrix")
         return RowMatrix(java_row_matrix)
 
-    def toCoordinateMatrix(self):
+    def toCoordinateMatrix(self) -> "CoordinateMatrix":
         """
         Convert this matrix to a CoordinateMatrix.
 
@@ -752,7 +772,7 @@ class IndexedRowMatrix(DistributedMatrix):
         java_coordinate_matrix = 
self._java_matrix_wrapper.call("toCoordinateMatrix")
         return CoordinateMatrix(java_coordinate_matrix)
 
-    def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
+    def toBlockMatrix(self, rowsPerBlock: int = 1024, colsPerBlock: int = 
1024) -> "BlockMatrix":
         """
         Convert this matrix to a BlockMatrix.
 
@@ -787,7 +807,9 @@ class IndexedRowMatrix(DistributedMatrix):
         )
         return BlockMatrix(java_block_matrix, rowsPerBlock, colsPerBlock)
 
-    def computeSVD(self, k, computeU=False, rCond=1e-9):
+    def computeSVD(
+        self, k: int, computeU: bool = False, rCond: float = 1e-9
+    ) -> SingularValueDecomposition["IndexedRowMatrix", Matrix]:
         """
         Computes the singular value decomposition of the IndexedRowMatrix.
 
@@ -841,7 +863,7 @@ class IndexedRowMatrix(DistributedMatrix):
         j_model = self._java_matrix_wrapper.call("computeSVD", int(k), 
bool(computeU), float(rCond))
         return SingularValueDecomposition(j_model)
 
-    def multiply(self, matrix):
+    def multiply(self, matrix: Matrix) -> "IndexedRowMatrix":
         """
         Multiply this matrix by a local dense matrix on the right.
 
@@ -884,16 +906,16 @@ class MatrixEntry:
         The (i, j)th entry of the matrix, as a float.
     """
 
-    def __init__(self, i, j, value):
+    def __init__(self, i: int, j: int, value: float) -> None:
         self.i = int(i)
         self.j = int(j)
         self.value = float(value)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return "MatrixEntry(%s, %s, %s)" % (self.i, self.j, self.value)
 
 
-def _convert_to_matrix_entry(entry):
+def _convert_to_matrix_entry(entry: Any) -> MatrixEntry:
     if isinstance(entry, MatrixEntry):
         return entry
     elif isinstance(entry, tuple) and len(entry) == 3:
@@ -923,7 +945,12 @@ class CoordinateMatrix(DistributedMatrix):
         index plus one.
     """
 
-    def __init__(self, entries, numRows=0, numCols=0):
+    def __init__(
+        self,
+        entries: RDD[Union[Tuple[int, int, float], MatrixEntry]],
+        numRows: int = 0,
+        numCols: int = 0,
+    ):
         """
         Note: This docstring is not shown publicly.
 
@@ -975,7 +1002,7 @@ class CoordinateMatrix(DistributedMatrix):
         self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
 
     @property
-    def entries(self):
+    def entries(self) -> RDD[MatrixEntry]:
         """
         Entries of the CoordinateMatrix stored as an RDD of
         MatrixEntries.
@@ -996,7 +1023,7 @@ class CoordinateMatrix(DistributedMatrix):
         entries = entries_df.rdd.map(lambda row: MatrixEntry(row[0], row[1], 
row[2]))
         return entries
 
-    def numRows(self):
+    def numRows(self) -> int:
         """
         Get or compute the number of rows.
 
@@ -1016,7 +1043,7 @@ class CoordinateMatrix(DistributedMatrix):
         """
         return self._java_matrix_wrapper.call("numRows")
 
-    def numCols(self):
+    def numCols(self) -> int:
         """
         Get or compute the number of cols.
 
@@ -1036,7 +1063,7 @@ class CoordinateMatrix(DistributedMatrix):
         """
         return self._java_matrix_wrapper.call("numCols")
 
-    def transpose(self):
+    def transpose(self) -> "CoordinateMatrix":
         """
         Transpose this CoordinateMatrix.
 
@@ -1059,7 +1086,7 @@ class CoordinateMatrix(DistributedMatrix):
         java_transposed_matrix = self._java_matrix_wrapper.call("transpose")
         return CoordinateMatrix(java_transposed_matrix)
 
-    def toRowMatrix(self):
+    def toRowMatrix(self) -> RowMatrix:
         """
         Convert this matrix to a RowMatrix.
 
@@ -1085,7 +1112,7 @@ class CoordinateMatrix(DistributedMatrix):
         java_row_matrix = self._java_matrix_wrapper.call("toRowMatrix")
         return RowMatrix(java_row_matrix)
 
-    def toIndexedRowMatrix(self):
+    def toIndexedRowMatrix(self) -> IndexedRowMatrix:
         """
         Convert this matrix to an IndexedRowMatrix.
 
@@ -1110,7 +1137,7 @@ class CoordinateMatrix(DistributedMatrix):
         java_indexed_row_matrix = 
self._java_matrix_wrapper.call("toIndexedRowMatrix")
         return IndexedRowMatrix(java_indexed_row_matrix)
 
-    def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
+    def toBlockMatrix(self, rowsPerBlock: int = 1024, colsPerBlock: int = 
1024) -> "BlockMatrix":
         """
         Convert this matrix to a BlockMatrix.
 
@@ -1149,7 +1176,7 @@ class CoordinateMatrix(DistributedMatrix):
         return BlockMatrix(java_block_matrix, rowsPerBlock, colsPerBlock)
 
 
-def _convert_to_matrix_block_tuple(block):
+def _convert_to_matrix_block_tuple(block: Any) -> Tuple[Tuple[int, int], 
Matrix]:
     if (
         isinstance(block, tuple)
         and len(block) == 2
@@ -1198,7 +1225,14 @@ class BlockMatrix(DistributedMatrix):
         invoked.
     """
 
-    def __init__(self, blocks, rowsPerBlock, colsPerBlock, numRows=0, 
numCols=0):
+    def __init__(
+        self,
+        blocks: RDD[Tuple[Tuple[int, int], Matrix]],
+        rowsPerBlock: int,
+        colsPerBlock: int,
+        numRows: int = 0,
+        numCols: int = 0,
+    ):
         """
         Note: This docstring is not shown publicly.
 
@@ -1254,7 +1288,7 @@ class BlockMatrix(DistributedMatrix):
         self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
 
     @property
-    def blocks(self):
+    def blocks(self) -> RDD[Tuple[Tuple[int, int], Matrix]]:
         """
         The RDD of sub-matrix blocks
         ((blockRowIndex, blockColIndex), sub-matrix) that form this
@@ -1279,7 +1313,7 @@ class BlockMatrix(DistributedMatrix):
         return blocks
 
     @property
-    def rowsPerBlock(self):
+    def rowsPerBlock(self) -> int:
         """
         Number of rows that make up each block.
 
@@ -1294,7 +1328,7 @@ class BlockMatrix(DistributedMatrix):
         return self._java_matrix_wrapper.call("rowsPerBlock")
 
     @property
-    def colsPerBlock(self):
+    def colsPerBlock(self) -> int:
         """
         Number of columns that make up each block.
 
@@ -1309,7 +1343,7 @@ class BlockMatrix(DistributedMatrix):
         return self._java_matrix_wrapper.call("colsPerBlock")
 
     @property
-    def numRowBlocks(self):
+    def numRowBlocks(self) -> int:
         """
         Number of rows of blocks in the BlockMatrix.
 
@@ -1324,7 +1358,7 @@ class BlockMatrix(DistributedMatrix):
         return self._java_matrix_wrapper.call("numRowBlocks")
 
     @property
-    def numColBlocks(self):
+    def numColBlocks(self) -> int:
         """
         Number of columns of blocks in the BlockMatrix.
 
@@ -1338,7 +1372,7 @@ class BlockMatrix(DistributedMatrix):
         """
         return self._java_matrix_wrapper.call("numColBlocks")
 
-    def numRows(self):
+    def numRows(self) -> int:
         """
         Get or compute the number of rows.
 
@@ -1357,7 +1391,7 @@ class BlockMatrix(DistributedMatrix):
         """
         return self._java_matrix_wrapper.call("numRows")
 
-    def numCols(self):
+    def numCols(self) -> int:
         """
         Get or compute the number of cols.
 
@@ -1377,7 +1411,7 @@ class BlockMatrix(DistributedMatrix):
         return self._java_matrix_wrapper.call("numCols")
 
     @since("2.0.0")
-    def cache(self):
+    def cache(self) -> "BlockMatrix":
         """
         Caches the underlying RDD.
         """
@@ -1385,7 +1419,7 @@ class BlockMatrix(DistributedMatrix):
         return self
 
     @since("2.0.0")
-    def persist(self, storageLevel):
+    def persist(self, storageLevel: StorageLevel) -> "BlockMatrix":
         """
         Persists the underlying RDD with the specified storage level.
         """
@@ -1396,14 +1430,14 @@ class BlockMatrix(DistributedMatrix):
         return self
 
     @since("2.0.0")
-    def validate(self):
+    def validate(self) -> None:
         """
         Validates the block matrix info against the matrix data (`blocks`)
         and throws an exception if any error is found.
         """
         self._java_matrix_wrapper.call("validate")
 
-    def add(self, other):
+    def add(self, other: "BlockMatrix") -> "BlockMatrix":
         """
         Adds two block matrices together. The matrices must have the
         same size and matching `rowsPerBlock` and `colsPerBlock` values.
@@ -1438,7 +1472,7 @@ class BlockMatrix(DistributedMatrix):
         java_block_matrix = self._java_matrix_wrapper.call("add", 
other_java_block_matrix)
         return BlockMatrix(java_block_matrix, self.rowsPerBlock, 
self.colsPerBlock)
 
-    def subtract(self, other):
+    def subtract(self, other: "BlockMatrix") -> "BlockMatrix":
         """
         Subtracts the given block matrix `other` from this block matrix:
         `this - other`. The matrices must have the same size and
@@ -1476,7 +1510,7 @@ class BlockMatrix(DistributedMatrix):
         java_block_matrix = self._java_matrix_wrapper.call("subtract", 
other_java_block_matrix)
         return BlockMatrix(java_block_matrix, self.rowsPerBlock, 
self.colsPerBlock)
 
-    def multiply(self, other):
+    def multiply(self, other: "BlockMatrix") -> "BlockMatrix":
         """
         Left multiplies this BlockMatrix by `other`, another
         BlockMatrix. The `colsPerBlock` of this matrix must equal the
@@ -1513,7 +1547,7 @@ class BlockMatrix(DistributedMatrix):
         java_block_matrix = self._java_matrix_wrapper.call("multiply", 
other_java_block_matrix)
         return BlockMatrix(java_block_matrix, self.rowsPerBlock, 
self.colsPerBlock)
 
-    def transpose(self):
+    def transpose(self) -> "BlockMatrix":
         """
         Transpose this BlockMatrix. Returns a new BlockMatrix
         instance sharing the same underlying data. Is a lazy operation.
@@ -1533,7 +1567,7 @@ class BlockMatrix(DistributedMatrix):
         java_transposed_matrix = self._java_matrix_wrapper.call("transpose")
         return BlockMatrix(java_transposed_matrix, self.colsPerBlock, 
self.rowsPerBlock)
 
-    def toLocalMatrix(self):
+    def toLocalMatrix(self) -> Matrix:
         """
         Collect the distributed matrix on the driver as a DenseMatrix.
 
@@ -1557,7 +1591,7 @@ class BlockMatrix(DistributedMatrix):
         """
         return self._java_matrix_wrapper.call("toLocalMatrix")
 
-    def toIndexedRowMatrix(self):
+    def toIndexedRowMatrix(self) -> IndexedRowMatrix:
         """
         Convert this matrix to an IndexedRowMatrix.
 
@@ -1582,7 +1616,7 @@ class BlockMatrix(DistributedMatrix):
         java_indexed_row_matrix = 
self._java_matrix_wrapper.call("toIndexedRowMatrix")
         return IndexedRowMatrix(java_indexed_row_matrix)
 
-    def toCoordinateMatrix(self):
+    def toCoordinateMatrix(self) -> CoordinateMatrix:
         """
         Convert this matrix to a CoordinateMatrix.
 
@@ -1598,7 +1632,7 @@ class BlockMatrix(DistributedMatrix):
         return CoordinateMatrix(java_coordinate_matrix)
 
 
-def _test():
+def _test() -> None:
     import doctest
     import numpy
     from pyspark.sql import SparkSession
diff --git a/python/pyspark/mllib/linalg/distributed.pyi 
b/python/pyspark/mllib/linalg/distributed.pyi
deleted file mode 100644
index 3d8a0c5..0000000
--- a/python/pyspark/mllib/linalg/distributed.pyi
+++ /dev/null
@@ -1,145 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Generic, Sequence, Optional, Tuple, TypeVar, Union
-from pyspark.rdd import RDD
-from pyspark.storagelevel import StorageLevel
-from pyspark.mllib.common import JavaModelWrapper
-from pyspark.mllib.linalg import Vector, Matrix, QRDecomposition
-from pyspark.mllib.stat import MultivariateStatisticalSummary
-import pyspark.sql.dataframe
-from numpy import ndarray  # noqa: F401
-
-VectorLike = Union[Vector, Sequence[Union[float, int]]]
-
-UT = TypeVar("UT")
-VT = TypeVar("VT")
-
-class DistributedMatrix:
-    def numRows(self) -> int: ...
-    def numCols(self) -> int: ...
-
-class RowMatrix(DistributedMatrix):
-    def __init__(
-        self,
-        rows: Union[RDD[Vector], pyspark.sql.dataframe.DataFrame],
-        numRows: int = ...,
-        numCols: int = ...,
-    ) -> None: ...
-    @property
-    def rows(self) -> RDD[Vector]: ...
-    def numRows(self) -> int: ...
-    def numCols(self) -> int: ...
-    def computeColumnSummaryStatistics(self) -> 
MultivariateStatisticalSummary: ...
-    def computeCovariance(self) -> Matrix: ...
-    def computeGramianMatrix(self) -> Matrix: ...
-    def columnSimilarities(self, threshold: float = ...) -> CoordinateMatrix: 
...
-    def tallSkinnyQR(self, computeQ: bool = ...) -> QRDecomposition[RowMatrix, 
Matrix]: ...
-    def computeSVD(
-        self, k: int, computeU: bool = ..., rCond: float = ...
-    ) -> SingularValueDecomposition[RowMatrix, Matrix]: ...
-    def computePrincipalComponents(self, k: int) -> Matrix: ...
-    def multiply(self, matrix: Matrix) -> RowMatrix: ...
-
-class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]):
-    @property
-    def U(self) -> Optional[UT]: ...
-    @property
-    def s(self) -> Vector: ...
-    @property
-    def V(self) -> VT: ...
-
-class IndexedRow:
-    index: int
-    vector: VectorLike
-    def __init__(self, index: int, vector: VectorLike) -> None: ...
-
-class IndexedRowMatrix(DistributedMatrix):
-    def __init__(
-        self,
-        rows: RDD[Union[Tuple[int, VectorLike], IndexedRow]],
-        numRows: int = ...,
-        numCols: int = ...,
-    ) -> None: ...
-    @property
-    def rows(self) -> RDD[IndexedRow]: ...
-    def numRows(self) -> int: ...
-    def numCols(self) -> int: ...
-    def columnSimilarities(self) -> CoordinateMatrix: ...
-    def computeGramianMatrix(self) -> Matrix: ...
-    def toRowMatrix(self) -> RowMatrix: ...
-    def toCoordinateMatrix(self) -> CoordinateMatrix: ...
-    def toBlockMatrix(self, rowsPerBlock: int = ..., colsPerBlock: int = ...) 
-> BlockMatrix: ...
-    def computeSVD(
-        self, k: int, computeU: bool = ..., rCond: float = ...
-    ) -> SingularValueDecomposition[IndexedRowMatrix, Matrix]: ...
-    def multiply(self, matrix: Matrix) -> IndexedRowMatrix: ...
-
-class MatrixEntry:
-    i: int
-    j: int
-    value: float
-    def __init__(self, i: int, j: int, value: float) -> None: ...
-
-class CoordinateMatrix(DistributedMatrix):
-    def __init__(
-        self,
-        entries: RDD[Union[Tuple[int, int, float], MatrixEntry]],
-        numRows: int = ...,
-        numCols: int = ...,
-    ) -> None: ...
-    @property
-    def entries(self) -> RDD[MatrixEntry]: ...
-    def numRows(self) -> int: ...
-    def numCols(self) -> int: ...
-    def transpose(self) -> CoordinateMatrix: ...
-    def toRowMatrix(self) -> RowMatrix: ...
-    def toIndexedRowMatrix(self) -> IndexedRowMatrix: ...
-    def toBlockMatrix(self, rowsPerBlock: int = ..., colsPerBlock: int = ...) 
-> BlockMatrix: ...
-
-class BlockMatrix(DistributedMatrix):
-    def __init__(
-        self,
-        blocks: RDD[Tuple[Tuple[int, int], Matrix]],
-        rowsPerBlock: int,
-        colsPerBlock: int,
-        numRows: int = ...,
-        numCols: int = ...,
-    ) -> None: ...
-    @property
-    def blocks(self) -> RDD[Tuple[Tuple[int, int], Matrix]]: ...
-    @property
-    def rowsPerBlock(self) -> int: ...
-    @property
-    def colsPerBlock(self) -> int: ...
-    @property
-    def numRowBlocks(self) -> int: ...
-    @property
-    def numColBlocks(self) -> int: ...
-    def numRows(self) -> int: ...
-    def numCols(self) -> int: ...
-    def cache(self) -> BlockMatrix: ...
-    def persist(self, storageLevel: StorageLevel) -> BlockMatrix: ...
-    def validate(self) -> None: ...
-    def add(self, other: BlockMatrix) -> BlockMatrix: ...
-    def subtract(self, other: BlockMatrix) -> BlockMatrix: ...
-    def multiply(self, other: BlockMatrix) -> BlockMatrix: ...
-    def transpose(self) -> BlockMatrix: ...
-    def toLocalMatrix(self) -> Matrix: ...
-    def toIndexedRowMatrix(self) -> IndexedRowMatrix: ...
-    def toCoordinateMatrix(self) -> CoordinateMatrix: ...

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to