lichenglin created SPARK-14886:
----------------------------------

             Summary: RankingMetrics.ndcgAt  throw  
java.lang.ArrayIndexOutOfBoundsException
                 Key: SPARK-14886
                 URL: https://issues.apache.org/jira/browse/SPARK-14886
             Project: Spark
          Issue Type: Bug
          Components: MLlib
    Affects Versions: 1.6.1
            Reporter: lichenglin


 @Since("1.2.0")
  def ndcgAt(k: Int): Double = {
    require(k > 0, "ranking position k should be positive")
    predictionAndLabels.map { case (pred, lab) =>
      val labSet = lab.toSet

      if (labSet.nonEmpty) {
        val labSetSize = labSet.size
        val n = math.min(math.max(pred.length, labSetSize), k)
        var maxDcg = 0.0
        var dcg = 0.0
        var i = 0
        while (i < n) {
          val gain = 1.0 / math.log(i + 2)
          if (labSet.contains(pred(i))) {
            dcg += gain
          }
          if (i < labSetSize) {
            maxDcg += gain
          }
          i += 1
        }
        dcg / maxDcg
      } else {
        logWarning("Empty ground truth set, check input data")
        0.0
      }
    }.mean()
  }

   if (labSet.contains(pred(i))) will throw ArrayIndexOutOfBoundsException when 
the true relevant documents has less size the the param k.
just try this with sample_movielens_data.txt
precisionAt is ok just because it has         
val n = math.min(pred.length, k)






--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to