Github user mengxr commented on a diff in the pull request:
https://github.com/apache/spark/pull/21195#discussion_r186555908
--- Diff:
mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala ---
@@ -323,4 +324,21 @@ class LDASuite extends SparkFunSuite with
MLlibTestSparkContext with DefaultRead
assert(model.getOptimizer === optimizer)
}
}
+
+ test("LDA with Array input") {
+ def trainAndLogLikelihoodAndPerplexity(dataset: Dataset[_]): (Double,
Double) = {
+ val model = new
LDA().setK(k).setOptimizer("online").setMaxIter(1).setSeed(1).fit(dataset)
+ (model.logLikelihood(dataset), model.logPerplexity(dataset))
+ }
+
+ val (newDataset, newDatasetD, newDatasetF) =
MLTestingUtils.generateArrayFeatureDataset(dataset)
+ val (ll, lp) = trainAndLogLikelihoodAndPerplexity(newDataset)
--- End diff --
minor: the output are not used. I expect they will be used once we fixed
SPARK-22210
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]