This is an automated email from the ASF dual-hosted git repository.
zero323 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new c032928 [SPARK-37430][PYTHON][MLLIB] Inline hints for
pyspark.mllib.linalg.distributed
c032928 is described below
commit c032928515e74367137c668ce692d8fd53696485
Author: hi-zir <[email protected]>
AuthorDate: Sat Mar 12 23:01:18 2022 +0100
[SPARK-37430][PYTHON][MLLIB] Inline hints for
pyspark.mllib.linalg.distributed
### What changes were proposed in this pull request?
Inline type hints for pyspark.mllib.linalg.distributed
### Why are the changes needed?
We can take advantage of static type checking within the functions by
inlining the type hints.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing tests.
Closes #35739 from hi-zir/SPARK-37430.
Authored-by: hi-zir <[email protected]>
Signed-off-by: zero323 <[email protected]>
---
python/pyspark/mllib/linalg/distributed.py | 172 +++++++++++++++++-----------
python/pyspark/mllib/linalg/distributed.pyi | 145 -----------------------
2 files changed, 103 insertions(+), 214 deletions(-)
diff --git a/python/pyspark/mllib/linalg/distributed.py
b/python/pyspark/mllib/linalg/distributed.py
index f892d41..d49af66 100644
--- a/python/pyspark/mllib/linalg/distributed.py
+++ b/python/pyspark/mllib/linalg/distributed.py
@@ -20,16 +20,22 @@ Package for distributed linear algebra.
"""
import sys
+from typing import Any, Generic, Optional, Tuple, TypeVar, Union, TYPE_CHECKING
from py4j.java_gateway import JavaObject
from pyspark import RDD, since
from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
-from pyspark.mllib.linalg import _convert_to_vector, DenseMatrix, Matrix,
QRDecomposition
+from pyspark.mllib.linalg import _convert_to_vector, DenseMatrix, Matrix,
QRDecomposition, Vector
from pyspark.mllib.stat import MultivariateStatisticalSummary
from pyspark.sql import DataFrame
from pyspark.storagelevel import StorageLevel
+UT = TypeVar("UT", bound="DistributedMatrix")
+VT = TypeVar("VT", bound="Matrix")
+
+if TYPE_CHECKING:
+ from pyspark.ml._typing import VectorLike
__all__ = [
"BlockMatrix",
@@ -50,11 +56,11 @@ class DistributedMatrix:
"""
- def numRows(self):
+ def numRows(self) -> int:
"""Get or compute the number of rows."""
raise NotImplementedError
- def numCols(self):
+ def numCols(self) -> int:
"""Get or compute the number of cols."""
raise NotImplementedError
@@ -82,7 +88,12 @@ class RowMatrix(DistributedMatrix):
the first row.
"""
- def __init__(self, rows, numRows=0, numCols=0):
+ def __init__(
+ self,
+ rows: Union[RDD[Vector], DataFrame],
+ numRows: int = 0,
+ numCols: int = 0,
+ ):
"""
Note: This docstring is not shown publicly.
@@ -121,7 +132,7 @@ class RowMatrix(DistributedMatrix):
self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
@property
- def rows(self):
+ def rows(self) -> RDD[Vector]:
"""
Rows of the RowMatrix stored as an RDD of vectors.
@@ -134,7 +145,7 @@ class RowMatrix(DistributedMatrix):
"""
return self._java_matrix_wrapper.call("rows")
- def numRows(self):
+ def numRows(self) -> int:
"""
Get or compute the number of rows.
@@ -153,7 +164,7 @@ class RowMatrix(DistributedMatrix):
"""
return self._java_matrix_wrapper.call("numRows")
- def numCols(self):
+ def numCols(self) -> int:
"""
Get or compute the number of cols.
@@ -172,7 +183,7 @@ class RowMatrix(DistributedMatrix):
"""
return self._java_matrix_wrapper.call("numCols")
- def computeColumnSummaryStatistics(self):
+ def computeColumnSummaryStatistics(self) -> MultivariateStatisticalSummary:
"""
Computes column-wise summary statistics.
@@ -195,7 +206,7 @@ class RowMatrix(DistributedMatrix):
java_col_stats =
self._java_matrix_wrapper.call("computeColumnSummaryStatistics")
return MultivariateStatisticalSummary(java_col_stats)
- def computeCovariance(self):
+ def computeCovariance(self) -> Matrix:
"""
Computes the covariance matrix, treating each row as an
observation.
@@ -216,7 +227,7 @@ class RowMatrix(DistributedMatrix):
"""
return self._java_matrix_wrapper.call("computeCovariance")
- def computeGramianMatrix(self):
+ def computeGramianMatrix(self) -> Matrix:
"""
Computes the Gramian matrix `A^T A`.
@@ -237,7 +248,7 @@ class RowMatrix(DistributedMatrix):
return self._java_matrix_wrapper.call("computeGramianMatrix")
@since("2.0.0")
- def columnSimilarities(self, threshold=0.0):
+ def columnSimilarities(self, threshold: float = 0.0) -> "CoordinateMatrix":
"""
Compute similarities between columns of this matrix.
@@ -310,7 +321,9 @@ class RowMatrix(DistributedMatrix):
java_sims_mat = self._java_matrix_wrapper.call("columnSimilarities",
float(threshold))
return CoordinateMatrix(java_sims_mat)
- def tallSkinnyQR(self, computeQ=False):
+ def tallSkinnyQR(
+ self, computeQ: bool = False
+ ) -> QRDecomposition[Optional["RowMatrix"], Matrix]:
"""
Compute the QR decomposition of this RowMatrix.
@@ -360,7 +373,9 @@ class RowMatrix(DistributedMatrix):
R = decomp.call("R")
return QRDecomposition(Q, R)
- def computeSVD(self, k, computeU=False, rCond=1e-9):
+ def computeSVD(
+ self, k: int, computeU: bool = False, rCond: float = 1e-9
+ ) -> "SingularValueDecomposition[RowMatrix, Matrix]":
"""
Computes the singular value decomposition of the RowMatrix.
@@ -414,7 +429,7 @@ class RowMatrix(DistributedMatrix):
j_model = self._java_matrix_wrapper.call("computeSVD", int(k),
bool(computeU), float(rCond))
return SingularValueDecomposition(j_model)
- def computePrincipalComponents(self, k):
+ def computePrincipalComponents(self, k: int) -> Matrix:
"""
Computes the k principal components of the given row matrix
@@ -450,7 +465,7 @@ class RowMatrix(DistributedMatrix):
"""
return self._java_matrix_wrapper.call("computePrincipalComponents", k)
- def multiply(self, matrix):
+ def multiply(self, matrix: Matrix) -> "RowMatrix":
"""
Multiply this matrix by a local dense matrix on the right.
@@ -478,16 +493,16 @@ class RowMatrix(DistributedMatrix):
return RowMatrix(j_model)
-class SingularValueDecomposition(JavaModelWrapper):
+class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]):
"""
Represents singular value decomposition (SVD) factors.
.. versionadded:: 2.2.0
"""
- @property
+ @property # type: ignore[misc]
@since("2.2.0")
- def U(self):
+ def U(self) -> Optional[UT]: # type: ignore[return]
"""
Returns a distributed matrix whose columns are the left
singular vectors of the SingularValueDecomposition if computeU was set
to be True.
@@ -496,23 +511,23 @@ class SingularValueDecomposition(JavaModelWrapper):
if u is not None:
mat_name = u.getClass().getSimpleName()
if mat_name == "RowMatrix":
- return RowMatrix(u)
+ return RowMatrix(u) # type: ignore[return-value]
elif mat_name == "IndexedRowMatrix":
- return IndexedRowMatrix(u)
+ return IndexedRowMatrix(u) # type: ignore[return-value]
else:
raise TypeError("Expected RowMatrix/IndexedRowMatrix got %s" %
mat_name)
- @property
+ @property # type: ignore[misc]
@since("2.2.0")
- def s(self):
+ def s(self) -> Vector:
"""
Returns a DenseVector with singular values in descending order.
"""
return self.call("s")
- @property
+ @property # type: ignore[misc]
@since("2.2.0")
- def V(self):
+ def V(self) -> VT:
"""
Returns a DenseMatrix whose columns are the right singular
vectors of the SingularValueDecomposition.
@@ -534,15 +549,15 @@ class IndexedRow:
The row in the matrix at the given index.
"""
- def __init__(self, index, vector):
+ def __init__(self, index: int, vector: "VectorLike") -> None:
self.index = int(index)
self.vector = _convert_to_vector(vector)
- def __repr__(self):
+ def __repr__(self) -> str:
return "IndexedRow(%s, %s)" % (self.index, self.vector)
-def _convert_to_indexed_row(row):
+def _convert_to_indexed_row(row: Any) -> IndexedRow:
if isinstance(row, IndexedRow):
return row
elif isinstance(row, tuple) and len(row) == 2:
@@ -572,7 +587,12 @@ class IndexedRowMatrix(DistributedMatrix):
the first row.
"""
- def __init__(self, rows, numRows=0, numCols=0):
+ def __init__(
+ self,
+ rows: RDD[Union[Tuple[int, "VectorLike"], IndexedRow]],
+ numRows: int = 0,
+ numCols: int = 0,
+ ):
"""
Note: This docstring is not shown publicly.
@@ -623,7 +643,7 @@ class IndexedRowMatrix(DistributedMatrix):
self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
@property
- def rows(self):
+ def rows(self) -> RDD[IndexedRow]:
"""
Rows of the IndexedRowMatrix stored as an RDD of IndexedRows.
@@ -643,7 +663,7 @@ class IndexedRowMatrix(DistributedMatrix):
rows = rows_df.rdd.map(lambda row: IndexedRow(row[0], row[1]))
return rows
- def numRows(self):
+ def numRows(self) -> int:
"""
Get or compute the number of rows.
@@ -664,7 +684,7 @@ class IndexedRowMatrix(DistributedMatrix):
"""
return self._java_matrix_wrapper.call("numRows")
- def numCols(self):
+ def numCols(self) -> int:
"""
Get or compute the number of cols.
@@ -685,7 +705,7 @@ class IndexedRowMatrix(DistributedMatrix):
"""
return self._java_matrix_wrapper.call("numCols")
- def columnSimilarities(self):
+ def columnSimilarities(self) -> "CoordinateMatrix":
"""
Compute all cosine similarities between columns.
@@ -701,7 +721,7 @@ class IndexedRowMatrix(DistributedMatrix):
java_coordinate_matrix =
self._java_matrix_wrapper.call("columnSimilarities")
return CoordinateMatrix(java_coordinate_matrix)
- def computeGramianMatrix(self):
+ def computeGramianMatrix(self) -> Matrix:
"""
Computes the Gramian matrix `A^T A`.
@@ -722,7 +742,7 @@ class IndexedRowMatrix(DistributedMatrix):
"""
return self._java_matrix_wrapper.call("computeGramianMatrix")
- def toRowMatrix(self):
+ def toRowMatrix(self) -> RowMatrix:
"""
Convert this matrix to a RowMatrix.
@@ -737,7 +757,7 @@ class IndexedRowMatrix(DistributedMatrix):
java_row_matrix = self._java_matrix_wrapper.call("toRowMatrix")
return RowMatrix(java_row_matrix)
- def toCoordinateMatrix(self):
+ def toCoordinateMatrix(self) -> "CoordinateMatrix":
"""
Convert this matrix to a CoordinateMatrix.
@@ -752,7 +772,7 @@ class IndexedRowMatrix(DistributedMatrix):
java_coordinate_matrix =
self._java_matrix_wrapper.call("toCoordinateMatrix")
return CoordinateMatrix(java_coordinate_matrix)
- def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
+ def toBlockMatrix(self, rowsPerBlock: int = 1024, colsPerBlock: int =
1024) -> "BlockMatrix":
"""
Convert this matrix to a BlockMatrix.
@@ -787,7 +807,9 @@ class IndexedRowMatrix(DistributedMatrix):
)
return BlockMatrix(java_block_matrix, rowsPerBlock, colsPerBlock)
- def computeSVD(self, k, computeU=False, rCond=1e-9):
+ def computeSVD(
+ self, k: int, computeU: bool = False, rCond: float = 1e-9
+ ) -> SingularValueDecomposition["IndexedRowMatrix", Matrix]:
"""
Computes the singular value decomposition of the IndexedRowMatrix.
@@ -841,7 +863,7 @@ class IndexedRowMatrix(DistributedMatrix):
j_model = self._java_matrix_wrapper.call("computeSVD", int(k),
bool(computeU), float(rCond))
return SingularValueDecomposition(j_model)
- def multiply(self, matrix):
+ def multiply(self, matrix: Matrix) -> "IndexedRowMatrix":
"""
Multiply this matrix by a local dense matrix on the right.
@@ -884,16 +906,16 @@ class MatrixEntry:
The (i, j)th entry of the matrix, as a float.
"""
- def __init__(self, i, j, value):
+ def __init__(self, i: int, j: int, value: float) -> None:
self.i = int(i)
self.j = int(j)
self.value = float(value)
- def __repr__(self):
+ def __repr__(self) -> str:
return "MatrixEntry(%s, %s, %s)" % (self.i, self.j, self.value)
-def _convert_to_matrix_entry(entry):
+def _convert_to_matrix_entry(entry: Any) -> MatrixEntry:
if isinstance(entry, MatrixEntry):
return entry
elif isinstance(entry, tuple) and len(entry) == 3:
@@ -923,7 +945,12 @@ class CoordinateMatrix(DistributedMatrix):
index plus one.
"""
- def __init__(self, entries, numRows=0, numCols=0):
+ def __init__(
+ self,
+ entries: RDD[Union[Tuple[int, int, float], MatrixEntry]],
+ numRows: int = 0,
+ numCols: int = 0,
+ ):
"""
Note: This docstring is not shown publicly.
@@ -975,7 +1002,7 @@ class CoordinateMatrix(DistributedMatrix):
self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
@property
- def entries(self):
+ def entries(self) -> RDD[MatrixEntry]:
"""
Entries of the CoordinateMatrix stored as an RDD of
MatrixEntries.
@@ -996,7 +1023,7 @@ class CoordinateMatrix(DistributedMatrix):
entries = entries_df.rdd.map(lambda row: MatrixEntry(row[0], row[1],
row[2]))
return entries
- def numRows(self):
+ def numRows(self) -> int:
"""
Get or compute the number of rows.
@@ -1016,7 +1043,7 @@ class CoordinateMatrix(DistributedMatrix):
"""
return self._java_matrix_wrapper.call("numRows")
- def numCols(self):
+ def numCols(self) -> int:
"""
Get or compute the number of cols.
@@ -1036,7 +1063,7 @@ class CoordinateMatrix(DistributedMatrix):
"""
return self._java_matrix_wrapper.call("numCols")
- def transpose(self):
+ def transpose(self) -> "CoordinateMatrix":
"""
Transpose this CoordinateMatrix.
@@ -1059,7 +1086,7 @@ class CoordinateMatrix(DistributedMatrix):
java_transposed_matrix = self._java_matrix_wrapper.call("transpose")
return CoordinateMatrix(java_transposed_matrix)
- def toRowMatrix(self):
+ def toRowMatrix(self) -> RowMatrix:
"""
Convert this matrix to a RowMatrix.
@@ -1085,7 +1112,7 @@ class CoordinateMatrix(DistributedMatrix):
java_row_matrix = self._java_matrix_wrapper.call("toRowMatrix")
return RowMatrix(java_row_matrix)
- def toIndexedRowMatrix(self):
+ def toIndexedRowMatrix(self) -> IndexedRowMatrix:
"""
Convert this matrix to an IndexedRowMatrix.
@@ -1110,7 +1137,7 @@ class CoordinateMatrix(DistributedMatrix):
java_indexed_row_matrix =
self._java_matrix_wrapper.call("toIndexedRowMatrix")
return IndexedRowMatrix(java_indexed_row_matrix)
- def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
+ def toBlockMatrix(self, rowsPerBlock: int = 1024, colsPerBlock: int =
1024) -> "BlockMatrix":
"""
Convert this matrix to a BlockMatrix.
@@ -1149,7 +1176,7 @@ class CoordinateMatrix(DistributedMatrix):
return BlockMatrix(java_block_matrix, rowsPerBlock, colsPerBlock)
-def _convert_to_matrix_block_tuple(block):
+def _convert_to_matrix_block_tuple(block: Any) -> Tuple[Tuple[int, int],
Matrix]:
if (
isinstance(block, tuple)
and len(block) == 2
@@ -1198,7 +1225,14 @@ class BlockMatrix(DistributedMatrix):
invoked.
"""
- def __init__(self, blocks, rowsPerBlock, colsPerBlock, numRows=0,
numCols=0):
+ def __init__(
+ self,
+ blocks: RDD[Tuple[Tuple[int, int], Matrix]],
+ rowsPerBlock: int,
+ colsPerBlock: int,
+ numRows: int = 0,
+ numCols: int = 0,
+ ):
"""
Note: This docstring is not shown publicly.
@@ -1254,7 +1288,7 @@ class BlockMatrix(DistributedMatrix):
self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
@property
- def blocks(self):
+ def blocks(self) -> RDD[Tuple[Tuple[int, int], Matrix]]:
"""
The RDD of sub-matrix blocks
((blockRowIndex, blockColIndex), sub-matrix) that form this
@@ -1279,7 +1313,7 @@ class BlockMatrix(DistributedMatrix):
return blocks
@property
- def rowsPerBlock(self):
+ def rowsPerBlock(self) -> int:
"""
Number of rows that make up each block.
@@ -1294,7 +1328,7 @@ class BlockMatrix(DistributedMatrix):
return self._java_matrix_wrapper.call("rowsPerBlock")
@property
- def colsPerBlock(self):
+ def colsPerBlock(self) -> int:
"""
Number of columns that make up each block.
@@ -1309,7 +1343,7 @@ class BlockMatrix(DistributedMatrix):
return self._java_matrix_wrapper.call("colsPerBlock")
@property
- def numRowBlocks(self):
+ def numRowBlocks(self) -> int:
"""
Number of rows of blocks in the BlockMatrix.
@@ -1324,7 +1358,7 @@ class BlockMatrix(DistributedMatrix):
return self._java_matrix_wrapper.call("numRowBlocks")
@property
- def numColBlocks(self):
+ def numColBlocks(self) -> int:
"""
Number of columns of blocks in the BlockMatrix.
@@ -1338,7 +1372,7 @@ class BlockMatrix(DistributedMatrix):
"""
return self._java_matrix_wrapper.call("numColBlocks")
- def numRows(self):
+ def numRows(self) -> int:
"""
Get or compute the number of rows.
@@ -1357,7 +1391,7 @@ class BlockMatrix(DistributedMatrix):
"""
return self._java_matrix_wrapper.call("numRows")
- def numCols(self):
+ def numCols(self) -> int:
"""
Get or compute the number of cols.
@@ -1377,7 +1411,7 @@ class BlockMatrix(DistributedMatrix):
return self._java_matrix_wrapper.call("numCols")
@since("2.0.0")
- def cache(self):
+ def cache(self) -> "BlockMatrix":
"""
Caches the underlying RDD.
"""
@@ -1385,7 +1419,7 @@ class BlockMatrix(DistributedMatrix):
return self
@since("2.0.0")
- def persist(self, storageLevel):
+ def persist(self, storageLevel: StorageLevel) -> "BlockMatrix":
"""
Persists the underlying RDD with the specified storage level.
"""
@@ -1396,14 +1430,14 @@ class BlockMatrix(DistributedMatrix):
return self
@since("2.0.0")
- def validate(self):
+ def validate(self) -> None:
"""
Validates the block matrix info against the matrix data (`blocks`)
and throws an exception if any error is found.
"""
self._java_matrix_wrapper.call("validate")
- def add(self, other):
+ def add(self, other: "BlockMatrix") -> "BlockMatrix":
"""
Adds two block matrices together. The matrices must have the
same size and matching `rowsPerBlock` and `colsPerBlock` values.
@@ -1438,7 +1472,7 @@ class BlockMatrix(DistributedMatrix):
java_block_matrix = self._java_matrix_wrapper.call("add",
other_java_block_matrix)
return BlockMatrix(java_block_matrix, self.rowsPerBlock,
self.colsPerBlock)
- def subtract(self, other):
+ def subtract(self, other: "BlockMatrix") -> "BlockMatrix":
"""
Subtracts the given block matrix `other` from this block matrix:
`this - other`. The matrices must have the same size and
@@ -1476,7 +1510,7 @@ class BlockMatrix(DistributedMatrix):
java_block_matrix = self._java_matrix_wrapper.call("subtract",
other_java_block_matrix)
return BlockMatrix(java_block_matrix, self.rowsPerBlock,
self.colsPerBlock)
- def multiply(self, other):
+ def multiply(self, other: "BlockMatrix") -> "BlockMatrix":
"""
Left multiplies this BlockMatrix by `other`, another
BlockMatrix. The `colsPerBlock` of this matrix must equal the
@@ -1513,7 +1547,7 @@ class BlockMatrix(DistributedMatrix):
java_block_matrix = self._java_matrix_wrapper.call("multiply",
other_java_block_matrix)
return BlockMatrix(java_block_matrix, self.rowsPerBlock,
self.colsPerBlock)
- def transpose(self):
+ def transpose(self) -> "BlockMatrix":
"""
Transpose this BlockMatrix. Returns a new BlockMatrix
instance sharing the same underlying data. Is a lazy operation.
@@ -1533,7 +1567,7 @@ class BlockMatrix(DistributedMatrix):
java_transposed_matrix = self._java_matrix_wrapper.call("transpose")
return BlockMatrix(java_transposed_matrix, self.colsPerBlock,
self.rowsPerBlock)
- def toLocalMatrix(self):
+ def toLocalMatrix(self) -> Matrix:
"""
Collect the distributed matrix on the driver as a DenseMatrix.
@@ -1557,7 +1591,7 @@ class BlockMatrix(DistributedMatrix):
"""
return self._java_matrix_wrapper.call("toLocalMatrix")
- def toIndexedRowMatrix(self):
+ def toIndexedRowMatrix(self) -> IndexedRowMatrix:
"""
Convert this matrix to an IndexedRowMatrix.
@@ -1582,7 +1616,7 @@ class BlockMatrix(DistributedMatrix):
java_indexed_row_matrix =
self._java_matrix_wrapper.call("toIndexedRowMatrix")
return IndexedRowMatrix(java_indexed_row_matrix)
- def toCoordinateMatrix(self):
+ def toCoordinateMatrix(self) -> CoordinateMatrix:
"""
Convert this matrix to a CoordinateMatrix.
@@ -1598,7 +1632,7 @@ class BlockMatrix(DistributedMatrix):
return CoordinateMatrix(java_coordinate_matrix)
-def _test():
+def _test() -> None:
import doctest
import numpy
from pyspark.sql import SparkSession
diff --git a/python/pyspark/mllib/linalg/distributed.pyi
b/python/pyspark/mllib/linalg/distributed.pyi
deleted file mode 100644
index 3d8a0c5..0000000
--- a/python/pyspark/mllib/linalg/distributed.pyi
+++ /dev/null
@@ -1,145 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Generic, Sequence, Optional, Tuple, TypeVar, Union
-from pyspark.rdd import RDD
-from pyspark.storagelevel import StorageLevel
-from pyspark.mllib.common import JavaModelWrapper
-from pyspark.mllib.linalg import Vector, Matrix, QRDecomposition
-from pyspark.mllib.stat import MultivariateStatisticalSummary
-import pyspark.sql.dataframe
-from numpy import ndarray # noqa: F401
-
-VectorLike = Union[Vector, Sequence[Union[float, int]]]
-
-UT = TypeVar("UT")
-VT = TypeVar("VT")
-
-class DistributedMatrix:
- def numRows(self) -> int: ...
- def numCols(self) -> int: ...
-
-class RowMatrix(DistributedMatrix):
- def __init__(
- self,
- rows: Union[RDD[Vector], pyspark.sql.dataframe.DataFrame],
- numRows: int = ...,
- numCols: int = ...,
- ) -> None: ...
- @property
- def rows(self) -> RDD[Vector]: ...
- def numRows(self) -> int: ...
- def numCols(self) -> int: ...
- def computeColumnSummaryStatistics(self) ->
MultivariateStatisticalSummary: ...
- def computeCovariance(self) -> Matrix: ...
- def computeGramianMatrix(self) -> Matrix: ...
- def columnSimilarities(self, threshold: float = ...) -> CoordinateMatrix:
...
- def tallSkinnyQR(self, computeQ: bool = ...) -> QRDecomposition[RowMatrix,
Matrix]: ...
- def computeSVD(
- self, k: int, computeU: bool = ..., rCond: float = ...
- ) -> SingularValueDecomposition[RowMatrix, Matrix]: ...
- def computePrincipalComponents(self, k: int) -> Matrix: ...
- def multiply(self, matrix: Matrix) -> RowMatrix: ...
-
-class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]):
- @property
- def U(self) -> Optional[UT]: ...
- @property
- def s(self) -> Vector: ...
- @property
- def V(self) -> VT: ...
-
-class IndexedRow:
- index: int
- vector: VectorLike
- def __init__(self, index: int, vector: VectorLike) -> None: ...
-
-class IndexedRowMatrix(DistributedMatrix):
- def __init__(
- self,
- rows: RDD[Union[Tuple[int, VectorLike], IndexedRow]],
- numRows: int = ...,
- numCols: int = ...,
- ) -> None: ...
- @property
- def rows(self) -> RDD[IndexedRow]: ...
- def numRows(self) -> int: ...
- def numCols(self) -> int: ...
- def columnSimilarities(self) -> CoordinateMatrix: ...
- def computeGramianMatrix(self) -> Matrix: ...
- def toRowMatrix(self) -> RowMatrix: ...
- def toCoordinateMatrix(self) -> CoordinateMatrix: ...
- def toBlockMatrix(self, rowsPerBlock: int = ..., colsPerBlock: int = ...)
-> BlockMatrix: ...
- def computeSVD(
- self, k: int, computeU: bool = ..., rCond: float = ...
- ) -> SingularValueDecomposition[IndexedRowMatrix, Matrix]: ...
- def multiply(self, matrix: Matrix) -> IndexedRowMatrix: ...
-
-class MatrixEntry:
- i: int
- j: int
- value: float
- def __init__(self, i: int, j: int, value: float) -> None: ...
-
-class CoordinateMatrix(DistributedMatrix):
- def __init__(
- self,
- entries: RDD[Union[Tuple[int, int, float], MatrixEntry]],
- numRows: int = ...,
- numCols: int = ...,
- ) -> None: ...
- @property
- def entries(self) -> RDD[MatrixEntry]: ...
- def numRows(self) -> int: ...
- def numCols(self) -> int: ...
- def transpose(self) -> CoordinateMatrix: ...
- def toRowMatrix(self) -> RowMatrix: ...
- def toIndexedRowMatrix(self) -> IndexedRowMatrix: ...
- def toBlockMatrix(self, rowsPerBlock: int = ..., colsPerBlock: int = ...)
-> BlockMatrix: ...
-
-class BlockMatrix(DistributedMatrix):
- def __init__(
- self,
- blocks: RDD[Tuple[Tuple[int, int], Matrix]],
- rowsPerBlock: int,
- colsPerBlock: int,
- numRows: int = ...,
- numCols: int = ...,
- ) -> None: ...
- @property
- def blocks(self) -> RDD[Tuple[Tuple[int, int], Matrix]]: ...
- @property
- def rowsPerBlock(self) -> int: ...
- @property
- def colsPerBlock(self) -> int: ...
- @property
- def numRowBlocks(self) -> int: ...
- @property
- def numColBlocks(self) -> int: ...
- def numRows(self) -> int: ...
- def numCols(self) -> int: ...
- def cache(self) -> BlockMatrix: ...
- def persist(self, storageLevel: StorageLevel) -> BlockMatrix: ...
- def validate(self) -> None: ...
- def add(self, other: BlockMatrix) -> BlockMatrix: ...
- def subtract(self, other: BlockMatrix) -> BlockMatrix: ...
- def multiply(self, other: BlockMatrix) -> BlockMatrix: ...
- def transpose(self) -> BlockMatrix: ...
- def toLocalMatrix(self) -> Matrix: ...
- def toIndexedRowMatrix(self) -> IndexedRowMatrix: ...
- def toCoordinateMatrix(self) -> CoordinateMatrix: ...
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]