Repository: spark Updated Branches: refs/heads/branch-2.3 42c1fdd22 -> f5983823e
[SPARK-25124][ML] VectorSizeHint setSize and getSize don't return values backport to 2.3 ## What changes were proposed in this pull request? In feature.py, VectorSizeHint setSize and getSize don't return value. Add return. (Please fill in changes proposed in this fix) ## How was this patch tested? Unit Test added Closes #22228 from huaxingao/spark-25124-2.3. Authored-by: Huaxin Gao <huax...@us.ibm.com> Signed-off-by: Joseph K. Bradley <jos...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f5983823 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f5983823 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f5983823 Branch: refs/heads/branch-2.3 Commit: f5983823e9b4a3b4762481306ea071a73f5742fc Parents: 42c1fdd Author: Huaxin Gao <huax...@us.ibm.com> Authored: Fri Aug 24 15:41:18 2018 -0700 Committer: Joseph K. Bradley <jos...@databricks.com> Committed: Fri Aug 24 15:41:18 2018 -0700 ---------------------------------------------------------------------- python/pyspark/ml/feature.py | 4 ++-- python/pyspark/ml/tests.py | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/f5983823/python/pyspark/ml/feature.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 04b07e6..a444fe0 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -3673,12 +3673,12 @@ class VectorSizeHint(JavaTransformer, HasInputCol, HasHandleInvalid, JavaMLReada @since("2.3.0") def getSize(self): """ Gets size param, the size of vectors in `inputCol`.""" - self.getOrDefault(self.size) + return self.getOrDefault(self.size) @since("2.3.0") def setSize(self, value): """ Sets size param, the size of vectors in `inputCol`.""" - self._set(size=value) + return self._set(size=value) if __name__ == "__main__": http://git-wip-us.apache.org/repos/asf/spark/blob/f5983823/python/pyspark/ml/tests.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 1af2b91..49912d2 100755 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -678,6 +678,23 @@ class FeatureTests(SparkSessionTestCase): expected2 = [Row(id=0, indexed=0.0), Row(id=1, indexed=1.0)] self.assertEqual(actual2, expected2) + def test_vector_size_hint(self): + df = self.spark.createDataFrame( + [(0, Vectors.dense([0.0, 10.0, 0.5])), + (1, Vectors.dense([1.0, 11.0, 0.5, 0.6])), + (2, Vectors.dense([2.0, 12.0]))], + ["id", "vector"]) + + sizeHint = VectorSizeHint( + inputCol="vector", + handleInvalid="skip") + sizeHint.setSize(3) + self.assertEqual(sizeHint.getSize(), 3) + + output = sizeHint.transform(df).head().vector + expected = DenseVector([0.0, 10.0, 0.5]) + self.assertEqual(output, expected) + class HasInducedError(Params): --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org