Author: tdunning
Date: Fri Nov  5 03:20:20 2010
New Revision: 1031414

URL: http://svn.apache.org/viewvc?rev=1031414&view=rev
Log:
MAHOUT-539 - Added comments and uncommented key line.

Modified:
    
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsv.java

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsv.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsv.java?rev=1031414&r1=1031413&r2=1031414&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsv.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsv.java
 Fri Nov  5 03:20:20 2010
@@ -39,8 +39,19 @@ import java.util.List;
 import java.util.Random;
 
 /**
- * Created by IntelliJ IDEA. User: tdunning Date: Oct 24, 2010 Time: 7:45:24 
PM To change this
- * template use File | Settings | File Templates.
+ * Shows how different encoding choices can make big speed differences.
+ * <p/>
+ * Run with command line options --generate 1000000 test.csv to generate a 
million data lines in
+ * test.csv.
+ * <p/>
+ * Run with command line options --parser test.csv to time how long it takes 
to parse and encode
+ * those million data points
+ * <p/>
+ * Run with command line options --fast test.csv to time how long it takes to 
parse and encode those
+ * million data points using byte-level parsing and direct value encoding.
+ * <p/>
+ * This doesn't demonstrate text encoding which is subject to somewhat 
different tricks.  The basic
+ * idea of caching hash locations and byte level parsing still very much 
applies to text, however.
  */
 public class SimpleCsv {
   public static final int SEPARATOR_CHAR = '\t';
@@ -92,7 +103,7 @@ public class SimpleCsv {
         for (int i = 0; i < FIELDS; i++) {
           double z = line.getDouble(i);
           s[i].add(z);
-//          encoder[i].addToVector((byte[]) null, z, v);
+          encoder[i].addToVector((byte[]) null, z, v);
         }
         line = in.read();
       }


Reply via email to