[
https://issues.apache.org/jira/browse/SPARK-14886?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
lichenglin updated SPARK-14886:
-------------------------------
Description:
@Since("1.2.0")
def ndcgAt(k: Int): Double = {
require(k > 0, "ranking position k should be positive")
predictionAndLabels.map { case (pred, lab) =>
val labSet = lab.toSet
if (labSet.nonEmpty) {
val labSetSize = labSet.size
val n = math.min(math.max(pred.length, labSetSize), k)
var maxDcg = 0.0
var dcg = 0.0
var i = 0
while (i < n) {
val gain = 1.0 / math.log(i + 2)
if (labSet.contains(pred(i))) {
dcg += gain
}
if (i < labSetSize) {
maxDcg += gain
}
i += 1
}
dcg / maxDcg
} else {
logWarning("Empty ground truth set, check input data")
0.0
}
}.mean()
}
"if (labSet.contains(pred(i)))" will throw ArrayIndexOutOfBoundsException when
pred's size less then k.
That meas the true relevant documents has less size then the param k.
just try this with sample_movielens_data.txt
precisionAt is ok just because it has
val n = math.min(pred.length, k)
was:
@Since("1.2.0")
def ndcgAt(k: Int): Double = {
require(k > 0, "ranking position k should be positive")
predictionAndLabels.map { case (pred, lab) =>
val labSet = lab.toSet
if (labSet.nonEmpty) {
val labSetSize = labSet.size
val n = math.min(math.max(pred.length, labSetSize), k)
var maxDcg = 0.0
var dcg = 0.0
var i = 0
while (i < n) {
val gain = 1.0 / math.log(i + 2)
if (labSet.contains(pred(i))) {
dcg += gain
}
if (i < labSetSize) {
maxDcg += gain
}
i += 1
}
dcg / maxDcg
} else {
logWarning("Empty ground truth set, check input data")
0.0
}
}.mean()
}
if (labSet.contains(pred(i))) will throw ArrayIndexOutOfBoundsException when
the true relevant documents has less size the the param k.
just try this with sample_movielens_data.txt
precisionAt is ok just because it has
val n = math.min(pred.length, k)
> RankingMetrics.ndcgAt throw java.lang.ArrayIndexOutOfBoundsException
> ----------------------------------------------------------------------
>
> Key: SPARK-14886
> URL: https://issues.apache.org/jira/browse/SPARK-14886
> Project: Spark
> Issue Type: Bug
> Components: MLlib
> Affects Versions: 1.6.1
> Reporter: lichenglin
>
> @Since("1.2.0")
> def ndcgAt(k: Int): Double = {
> require(k > 0, "ranking position k should be positive")
> predictionAndLabels.map { case (pred, lab) =>
> val labSet = lab.toSet
> if (labSet.nonEmpty) {
> val labSetSize = labSet.size
> val n = math.min(math.max(pred.length, labSetSize), k)
> var maxDcg = 0.0
> var dcg = 0.0
> var i = 0
> while (i < n) {
> val gain = 1.0 / math.log(i + 2)
> if (labSet.contains(pred(i))) {
> dcg += gain
> }
> if (i < labSetSize) {
> maxDcg += gain
> }
> i += 1
> }
> dcg / maxDcg
> } else {
> logWarning("Empty ground truth set, check input data")
> 0.0
> }
> }.mean()
> }
> "if (labSet.contains(pred(i)))" will throw ArrayIndexOutOfBoundsException
> when pred's size less then k.
> That meas the true relevant documents has less size then the param k.
> just try this with sample_movielens_data.txt
> precisionAt is ok just because it has
> val n = math.min(pred.length, k)
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]