Author: sslavic
Date: Tue Aug 13 20:08:16 2013
New Revision: 1513632

URL: http://svn.apache.org/r1513632
Log:
MAHOUT-1313: Fixed unwanted integral division bug in RowSimilarityJob 
downsampling code where precision should have been retained

Modified:
    mahout/trunk/CHANGELOG
    
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java

Modified: mahout/trunk/CHANGELOG
URL: 
http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1513632&r1=1513631&r2=1513632&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Tue Aug 13 20:08:16 2013
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 0.9 - unreleased
 
+  MAHOUT-1313: Fixed unwanted integral division bug in RowSimilarityJob 
downsampling code where precision should have been retained (sslavic) 
+
   MAHOUT-1301: toString() method of SequentialAccessSparseVector has excess 
comma at the end (Alexander Senov, smarthi)
 
   MAHOUT-1296: Remove deprecated algorithms (ssc)

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java?rev=1513632&r1=1513631&r2=1513632&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
 Tue Aug 13 20:08:16 2013
@@ -288,7 +288,7 @@ public class RowSimilarityJob extends Ab
     private Vector sampleDown(Vector rowVector, Context ctx) {
 
       int observationsPerRow = rowVector.getNumNondefaultElements();
-      double rowSampleRate = Math.min(maxObservationsPerRow, 
observationsPerRow) / observationsPerRow;
+      double rowSampleRate = (double) Math.min(maxObservationsPerRow, 
observationsPerRow) / (double) observationsPerRow;
 
       Vector downsampledRow = rowVector.like();
       long usedObservations = 0;
@@ -297,7 +297,7 @@ public class RowSimilarityJob extends Ab
       for (Vector.Element elem : rowVector.nonZeroes()) {
 
         int columnCount = observationsPerColumn.get(elem.index());
-        double columnSampleRate = Math.min(maxObservationsPerColumn, 
columnCount) / columnCount;
+        double columnSampleRate = (double) Math.min(maxObservationsPerColumn, 
columnCount) / (double) columnCount;
 
         if (random.nextDouble() <= Math.min(rowSampleRate, columnSampleRate)) {
           downsampledRow.setQuick(elem.index(), elem.get());


Reply via email to