zhengruifeng commented on pull request #30468:
URL: https://github.com/apache/spark/pull/30468#issuecomment-732673641
commit 543a41f:
scala> val start = System.currentTimeMillis;
start: Long = 1606185191500
scala> model.recommendForAllUsers(10).count
res1: Long = 283228
scala> model.recommendForAllItems(10).count
res2: Long = 53889
scala> val end = System.currentTimeMillis;
end: Long = 1606185338722
scala> end - start
res3: Long = 147222
I also try using `BoundedPriorityQueue[Int]` instead of `new
BoundedPriorityQueue[(Int, Float)]`, it is faster than commit b645968, but
still slower than using `GuavaOrdering`.
```
val ratings = srcFactorsBlocked.crossJoin(dstFactorsBlocked)
.as[(Array[Int], Array[Float], Array[Int], Array[Float])]
.mapPartitions { iter =>
var buffer: Array[Float] = null
var pq: BoundedPriorityQueue[Int] = null
iter.flatMap { case (srcIds, srcMat, dstIds, dstMat) =>
require(srcMat.length == srcIds.length * rank)
require(dstMat.length == dstIds.length * rank)
val m = srcIds.length
val n = dstIds.length
if (buffer == null || buffer.length < n) {
buffer = Array.ofDim[Float](n)
pq = new
BoundedPriorityQueue[Int](num)(Ordering.by(buffer.apply))
}
Iterator.range(0, m).flatMap { i =>
// buffer = i-th vec in srcMat * dstMat
BLAS.f2jBLAS.sgemv("T", rank, n, 1.0F, dstMat, 0, rank,
srcMat, i * rank, 1, 0.0F, buffer, 0, 1)
pq.clear()
pq ++= Iterator.range(0, n)
val srcId = srcIds(i)
pq.iterator.map { j => (srcId, dstIds(j), buffer(j)) }
}
}
}
```
scala> val start = System.currentTimeMillis;
start: Long = 1606187052784
scala> model.recommendForAllUsers(10).count
res1: Long = 283228
scala> model.recommendForAllItems(10).count
res2: Long = 53889
scala> val end = System.currentTimeMillis;
end: Long = 1606187220213
scala> end - start
res3: Long = 167429
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]