Repository: spark Updated Branches: refs/heads/master 10046ea76 -> 8ac71d62d
[SPARK-11084] [ML] [PYTHON] Check if index can contain non-zero value before binary search At this moment `SparseVector.__getitem__` executes `np.searchsorted` first and checks if result is in an expected range after that. It is possible to check if index can contain non-zero value before executing `np.searchsorted`. Author: zero323 <[email protected]> Closes #9098 from zero323/sparse_vector_getitem_improved. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8ac71d62 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8ac71d62 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8ac71d62 Branch: refs/heads/master Commit: 8ac71d62d976bbfd0159cac6816dd8fa580ae1cb Parents: 10046ea Author: zero323 <[email protected]> Authored: Fri Oct 16 15:53:26 2015 -0700 Committer: Joseph K. Bradley <[email protected]> Committed: Fri Oct 16 15:53:26 2015 -0700 ---------------------------------------------------------------------- python/pyspark/mllib/linalg/__init__.py | 4 ++-- python/pyspark/mllib/tests.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/8ac71d62/python/pyspark/mllib/linalg/__init__.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index 5276eb4..ae9ce58 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -770,10 +770,10 @@ class SparseVector(Vector): if index < 0: index += self.size - insert_index = np.searchsorted(inds, index) - if insert_index >= inds.size: + if (inds.size == 0) or (index > inds.item(-1)): return 0. + insert_index = np.searchsorted(inds, index) row_ind = inds[insert_index] if row_ind == index: return vals[insert_index] http://git-wip-us.apache.org/repos/asf/spark/blob/8ac71d62/python/pyspark/mllib/tests.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py index 2a6a5cd..2ad69a0 100644 --- a/python/pyspark/mllib/tests.py +++ b/python/pyspark/mllib/tests.py @@ -252,6 +252,16 @@ class VectorTests(MLlibTestCase): for ind in [7.8, '1']: self.assertRaises(TypeError, sv.__getitem__, ind) + zeros = SparseVector(4, {}) + self.assertEqual(zeros[0], 0.0) + self.assertEqual(zeros[3], 0.0) + for ind in [4, -5]: + self.assertRaises(ValueError, zeros.__getitem__, ind) + + empty = SparseVector(0, {}) + for ind in [-1, 0, 1]: + self.assertRaises(ValueError, empty.__getitem__, ind) + def test_matrix_indexing(self): mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10]) expected = [[0, 6], [1, 8], [4, 10]] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
