Github user mengxr commented on a diff in the pull request:
https://github.com/apache/spark/pull/4501#discussion_r24435087
--- Diff: mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
---
@@ -450,34 +450,25 @@ private[clustering] object LDA {
// Create vertices.
// Initially, we use random soft assignments of tokens to topics
(random gamma).
- val edgesWithGamma: RDD[(Edge[TokenCount], TopicCounts)] =
- edges.mapPartitionsWithIndex { case (partIndex, partEdges) =>
- val random = new Random(partIndex + randomSeed)
- partEdges.map { edge =>
- // Create a random gamma_{wjk}
- (edge, normalize(BDV.fill[Double](k)(random.nextDouble()), 1.0))
+ def createVertices(): RDD[(VertexId, TopicCounts)] = {
+ val verticesTMP: RDD[(VertexId, TopicCounts)] =
+ edges.mapPartitionsWithIndex { case (partIndex, partEdges) =>
+ val random = new Random(partIndex + randomSeed)
+ partEdges.flatMap { edge =>
+ val gamma =
normalize(BDV.fill[Double](k)(random.nextDouble()), 1.0)
+ val sum = BDV.zeros[Double](k)
+ brzAxpy(edge.attr, gamma, sum)
+
+ Seq((edge.srcId, sum), (edge.dstId, sum))
+ }
}
- }
- def createVertices(sendToWhere: Edge[TokenCount] => VertexId):
RDD[(VertexId, TopicCounts)] = {
- val verticesTMP: RDD[(VertexId, (TokenCount, TopicCounts))] =
- edgesWithGamma.map { case (edge, gamma: TopicCounts) =>
- (sendToWhere(edge), (edge.attr, gamma))
- }
- verticesTMP.aggregateByKey(BDV.zeros[Double](k))(
- (sum, t) => {
- brzAxpy(t._1, t._2, sum)
- sum
- },
- (sum0, sum1) => {
- sum0 += sum1
- }
- )
+ verticesTMP.reduceByKey((sum0, sum1) => { sum0 + sum1 })
--- End diff --
`reduceByKey(_ + _)`
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]