Repository: spark Updated Branches: refs/heads/branch-1.0 3f708f566 -> 898fc3480
fix bugs of dot in python If there are no `transpose()` in `self.theta`, a *ValueError: matrices are not aligned* is occurring. The former test case just ignore this situation. Author: Xusen Yin <[email protected]> Closes #463 from yinxusen/python-naive-bayes and squashes the following commits: fcbe3bc [Xusen Yin] fix bugs of dot in python (cherry picked from commit c919798f0912dc03c8365b9a384d9ee6d5b25c51) Signed-off-by: Patrick Wendell <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f2f093c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f2f093c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f2f093c Branch: refs/heads/branch-1.0 Commit: 4f2f093c5b65b74869068d5690a4d2b0e0b5f759 Parents: 3f708f5 Author: Xusen Yin <[email protected]> Authored: Tue Apr 22 11:06:18 2014 -0700 Committer: Patrick Wendell <[email protected]> Committed: Tue Apr 22 11:22:24 2014 -0700 ---------------------------------------------------------------------- python/pyspark/mllib/classification.py | 2 +- python/pyspark/mllib/tests.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/4f2f093c/python/pyspark/mllib/classification.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index 3a23e08..c584459 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -154,7 +154,7 @@ class NaiveBayesModel(object): def predict(self, x): """Return the most likely class for a data vector x""" - return self.labels[numpy.argmax(self.pi + _dot(x, self.theta))] + return self.labels[numpy.argmax(self.pi + _dot(x, self.theta.transpose()))] class NaiveBayes(object): @classmethod http://git-wip-us.apache.org/repos/asf/spark/blob/4f2f093c/python/pyspark/mllib/tests.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py index d4771d7..1ee96bb 100644 --- a/python/pyspark/mllib/tests.py +++ b/python/pyspark/mllib/tests.py @@ -104,10 +104,10 @@ class ListTests(PySparkTestCase): def test_classification(self): from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD, NaiveBayes data = [ - LabeledPoint(0.0, [1, 0]), - LabeledPoint(1.0, [0, 1]), - LabeledPoint(0.0, [2, 0]), - LabeledPoint(1.0, [0, 2]) + LabeledPoint(0.0, [1, 0, 0]), + LabeledPoint(1.0, [0, 1, 1]), + LabeledPoint(0.0, [2, 0, 0]), + LabeledPoint(1.0, [0, 2, 1]) ] rdd = self.sc.parallelize(data) features = [p.features.tolist() for p in data]
