Author: sslavic
Date: Tue Aug 13 20:08:16 2013
New Revision: 1513632
URL: http://svn.apache.org/r1513632
Log:
MAHOUT-1313: Fixed unwanted integral division bug in RowSimilarityJob
downsampling code where precision should have been retained
Modified:
mahout/trunk/CHANGELOG
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
Modified: mahout/trunk/CHANGELOG
URL:
http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1513632&r1=1513631&r2=1513632&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Tue Aug 13 20:08:16 2013
@@ -2,6 +2,8 @@ Mahout Change Log
Release 0.9 - unreleased
+ MAHOUT-1313: Fixed unwanted integral division bug in RowSimilarityJob
downsampling code where precision should have been retained (sslavic)
+
MAHOUT-1301: toString() method of SequentialAccessSparseVector has excess
comma at the end (Alexander Senov, smarthi)
MAHOUT-1296: Remove deprecated algorithms (ssc)
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java?rev=1513632&r1=1513631&r2=1513632&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
Tue Aug 13 20:08:16 2013
@@ -288,7 +288,7 @@ public class RowSimilarityJob extends Ab
private Vector sampleDown(Vector rowVector, Context ctx) {
int observationsPerRow = rowVector.getNumNondefaultElements();
- double rowSampleRate = Math.min(maxObservationsPerRow,
observationsPerRow) / observationsPerRow;
+ double rowSampleRate = (double) Math.min(maxObservationsPerRow,
observationsPerRow) / (double) observationsPerRow;
Vector downsampledRow = rowVector.like();
long usedObservations = 0;
@@ -297,7 +297,7 @@ public class RowSimilarityJob extends Ab
for (Vector.Element elem : rowVector.nonZeroes()) {
int columnCount = observationsPerColumn.get(elem.index());
- double columnSampleRate = Math.min(maxObservationsPerColumn,
columnCount) / columnCount;
+ double columnSampleRate = (double) Math.min(maxObservationsPerColumn,
columnCount) / (double) columnCount;
if (random.nextDouble() <= Math.min(rowSampleRate, columnSampleRate)) {
downsampledRow.setQuick(elem.index(), elem.get());