Github user jkbradley commented on a diff in the pull request:
https://github.com/apache/spark/pull/7376#discussion_r34618039
--- Diff:
mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
---
@@ -154,6 +155,29 @@ class NaiveBayesSuite extends SparkFunSuite with
MLlibTestSparkContext {
// Test prediction on Array.
validatePrediction(validationData.map(row =>
model.predict(row.features)), validationData)
+
+ // Test posteriors
+ validationData.map(_.features).foreach { features =>
+ val predicted = model.predictProbabilities(features).toArray
+ assert(predicted.sum ~== 1.0 relTol 1.0e-10)
+ val expected = expectedMultinomialProbabilities(model, features)
+ expected.zip(predicted).foreach { case (e, p) => assert(e ~== p
relTol 1.0e-10) }
+ }
+ }
+
+ /**
+ * @param model Multinomial Naive Bayes model
+ * @param testData input to compute posterior probabilities for
+ * @return posterior class probabilities (in order of labels) for input
+ */
+ private def expectedMultinomialProbabilities(model: NaiveBayesModel,
testData: Vector) = {
+ val piVector = new BDV(model.pi)
+ // model.labels is row-major; treat it as col-major representation of
transpose, and transpose:
--- End diff --
"labels" --> "theta"
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]