Author: smarthi
Date: Sun Dec  8 18:18:08 2013
New Revision: 1549089

URL: http://svn.apache.org/r1549089
Log:
MAHOUT-1030:Regression: Clustered Points Should be 
WeightedPropertyVectorWritable not WeightedVectorWritable

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java?rev=1549089&r1=1549088&r2=1549089&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
 Sun Dec  8 18:18:08 2013
@@ -20,14 +20,17 @@ package org.apache.mahout.clustering.cla
 import java.io.IOException;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
@@ -190,7 +193,7 @@ public final class ClusterClassification
       Path output, Double clusterClassificationThreshold, boolean 
emitMostLikely) throws IOException {
     Configuration conf = new Configuration();
     SequenceFile.Writer writer = new 
SequenceFile.Writer(input.getFileSystem(conf), conf, new Path(output,
-        "part-m-" + 0), IntWritable.class, WeightedVectorWritable.class);
+        "part-m-" + 0), IntWritable.class, 
WeightedPropertyVectorWritable.class);
     for (VectorWritable vw : new 
SequenceFileDirValueIterable<VectorWritable>(input, PathType.LIST,
         PathFilters.logsCRCFilter(), conf)) {
       Vector pdfPerCluster = clusterClassifier.classify(vw.get());
@@ -203,10 +206,11 @@ public final class ClusterClassification
   
   private static void classifyAndWrite(List<Cluster> clusterModels, Double 
clusterClassificationThreshold,
       boolean emitMostLikely, SequenceFile.Writer writer, VectorWritable vw, 
Vector pdfPerCluster) throws IOException {
+    Map<Text, Text> props = Maps.newHashMap();
     if (emitMostLikely) {
       int maxValueIndex = pdfPerCluster.maxValueIndex();
-      WeightedVectorWritable wvw = new 
WeightedVectorWritable(pdfPerCluster.maxValue(), vw.get());
-      write(clusterModels, writer, wvw, maxValueIndex);
+      WeightedPropertyVectorWritable wpvw = new 
WeightedPropertyVectorWritable(pdfPerCluster.maxValue(), vw.get(), props);
+      write(clusterModels, writer, wpvw, maxValueIndex);
     } else {
       writeAllAboveThreshold(clusterModels, clusterClassificationThreshold, 
writer, vw, pdfPerCluster);
     }
@@ -222,7 +226,7 @@ public final class ClusterClassification
       }
     }
   }
-  
+
   private static void write(List<Cluster> clusterModels, SequenceFile.Writer 
writer, WeightedVectorWritable wvw,
       int maxValueIndex) throws IOException {
     Cluster cluster = clusterModels.get(maxValueIndex);
@@ -258,7 +262,7 @@ public final class ClusterClassification
     job.setNumReduceTasks(0);
     
     job.setOutputKeyClass(IntWritable.class);
-    job.setOutputValueClass(WeightedVectorWritable.class);
+    job.setOutputValueClass(WeightedPropertyVectorWritable.class);
     
     FileInputFormat.addInputPath(job, input);
     FileOutputFormat.setOutputPath(job, output);


Reply via email to