Author: gsingers
Date: Fri Mar 25 14:28:12 2011
New Revision: 1085397

URL: http://svn.apache.org/viewvc?rev=1085397&view=rev
Log:
MAHOUT-548: add in some CSV support for creating vectors, as well as a few 
other fixes for working with vectors

Added:
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/
    
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
    
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
    mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/
    
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
    
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
Modified:
    mahout/trunk/utils/pom.xml
    
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
    
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
    
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
    
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
    
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java

Modified: mahout/trunk/utils/pom.xml
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/pom.xml?rev=1085397&r1=1085396&r2=1085397&view=diff
==============================================================================
--- mahout/trunk/utils/pom.xml (original)
+++ mahout/trunk/utils/pom.xml Fri Mar 25 14:28:12 2011
@@ -142,6 +142,11 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.solr</groupId>
+      <artifactId>solr-commons-csv</artifactId>
+      <version>1.4.1</version>
+    </dependency>
 
     <dependency>
       <groupId>junit</groupId>

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=1085397&r1=1085396&r2=1085397&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
 Fri Mar 25 14:28:12 2011
@@ -77,16 +77,22 @@ public final class VectorDumper {
     Option dictTypeOpt = 
obuilder.withLongName("dictionaryType").withRequired(false).withArgument(
             
abuilder.withName("dictionaryType").withMinimum(1).withMaximum(1).create()).withDescription(
             "The dictionary file type 
(text|sequencefile)").withShortName("dt").create();
-    Option centroidJSonOpt = 
obuilder.withLongName("json").withRequired(false).withDescription(
-            "Output the centroid as JSON.  Otherwise it substitutes in the 
terms for vector cell entries")
+    Option jsonOpt = 
obuilder.withLongName("json").withRequired(false).withDescription(
+            "Output the Vector as JSON.  Otherwise it substitutes in the terms 
for vector cell entries")
             .withShortName("j").create();
+    Option csvOpt = 
obuilder.withLongName("csv").withRequired(false).withDescription(
+            "Output the Vector as CSV.  Otherwise it substitutes in the terms 
for vector cell entries")
+            .withShortName("c").create();
+    Option namesAsCommentsOpt = 
obuilder.withLongName("namesAsComments").withRequired(false).withDescription(
+            "If using CSV output, optionally add a comment line for each 
NamedVector (if the vector is one) printing out the name")
+            .withShortName("n").create();
     Option sizeOpt = obuilder.withLongName("sizeOnly").withRequired(false).
             withDescription("Dump only the size of the 
vector").withShortName("sz").create();
     Option helpOpt = obuilder.withLongName("help").withDescription("Print out 
help").withShortName("h")
             .create();
 
     Group group = 
gbuilder.withName("Options").withOption(seqOpt).withOption(outputOpt).withOption(
-            
dictTypeOpt).withOption(dictOpt).withOption(centroidJSonOpt).withOption(vectorAsKeyOpt).withOption(
+            
dictTypeOpt).withOption(dictOpt).withOption(csvOpt).withOption(vectorAsKeyOpt).withOption(
             printKeyOpt).withOption(sizeOpt).create();
 
     try {
@@ -122,10 +128,12 @@ public final class VectorDumper {
             throw new OptionException(dictTypeOpt);
           }
         }
-        boolean useJSON = cmdLine.hasOption(centroidJSonOpt);
+        boolean useJSON = cmdLine.hasOption(jsonOpt);
+        boolean useCSV = cmdLine.hasOption(csvOpt);
+
         boolean sizeOnly = cmdLine.hasOption(sizeOpt);
         SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
-
+        boolean namesAsComments = cmdLine.hasOption(namesAsCommentsOpt);
         Writable keyWritable = 
reader.getKeyClass().asSubclass(Writable.class).newInstance();
         Writable valueWritable = 
reader.getValueClass().asSubclass(Writable.class).newInstance();
         boolean transposeKeyValue = cmdLine.hasOption(vectorAsKeyOpt);
@@ -140,6 +148,16 @@ public final class VectorDumper {
           try {
             boolean printKey = cmdLine.hasOption(printKeyOpt);
             long i = 0;
+            if (useCSV && dictionary != null){
+              writer.write("#");
+              for (int j = 0; j < dictionary.length; j++) {
+                writer.write(dictionary[j]);
+                if (j < dictionary.length - 1){
+                  writer.write(',');
+                }
+              }
+              writer.write('\n');
+            }
             while (reader.next(keyWritable, valueWritable)) {
               if (printKey) {
                 Writable notTheVectorWritable = transposeKeyValue ? 
valueWritable : keyWritable;
@@ -159,7 +177,14 @@ public final class VectorDumper {
                 writer.write(String.valueOf(vector.size()));
                 writer.write('\n');
               } else {
-                String fmtStr = useJSON ? vector.asFormatString() : 
VectorHelper.vectorToString(vector, dictionary);
+                String fmtStr;
+                if (useJSON){
+                  fmtStr = VectorHelper.vectorToJSONString(vector, dictionary);
+                } else if (useCSV){
+                  fmtStr = VectorHelper.vectorToCSVString(vector, 
namesAsComments);
+                } else {
+                  fmtStr = vector.asFormatString();
+                }
                 writer.write(fmtStr);
                 writer.write('\n');
               }

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java?rev=1085397&r1=1085396&r2=1085397&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
 Fri Mar 25 14:28:12 2011
@@ -40,14 +40,45 @@ import org.apache.mahout.math.map.OpenOb
 public final class VectorHelper {
 
   private static final Pattern TAB_PATTERN = Pattern.compile("\t");
+
   
   private VectorHelper() { }
-  
+
+  public static String vectorToCSVString(Vector vector, boolean 
namesAsComments){
+    StringBuilder bldr = new StringBuilder(2048);
+    try {
+      vectorToCSVString(vector, namesAsComments, bldr);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    return bldr.toString();
+  }
+
+  public static void vectorToCSVString(Vector vector, boolean namesAsComments,
+                                       Appendable bldr) throws IOException {
+    if (namesAsComments && vector instanceof NamedVector){
+      bldr.append("#").append(((NamedVector)vector).getName()).append('\n');
+    }
+    Iterator<Vector.Element> iter = vector.iterator();
+    boolean first = true;
+    while (iter.hasNext()) {
+      if (first) {
+        first = false;
+      } else {
+        bldr.append(",");
+      }
+      Vector.Element elt = iter.next();
+      bldr.append(String.valueOf(elt.get()));
+    }
+    bldr.append('\n');
+  }
+
+
   /**
    * @return a String from a vector that fills in the values with the 
appropriate value from a dictionary where
    * each the ith entry is the term for the ith vector cell.
    */
-  public static String vectorToString(Vector vector, String[] dictionary) {
+  public static String vectorToJSONString(Vector vector, String[] dictionary) {
     StringBuilder bldr = new StringBuilder(2048);
     
     if (vector instanceof NamedVector) {
@@ -67,12 +98,13 @@ public final class VectorHelper {
       if (dictionary != null) {
         bldr.append(dictionary[elt.index()]);
       } else {
-        bldr.append(elt.index());
+        bldr.append(String.valueOf(elt.index()));
       }
-      bldr.append(':').append(elt.get());
+      bldr.append(':').append(String.valueOf(elt.get()));
     }
     return bldr.append('}').toString();
   }
+
   
   /**
    * Read in a dictionary file. Format is:

Added: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java?rev=1085397&view=auto
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
 (added)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
 Fri Mar 25 14:28:12 2011
@@ -0,0 +1,94 @@
+package org.apache.mahout.utils.vectors.csv;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVStrategy;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Iterator;
+
+
+/**
+ * Iterates a CSV file and produces {@link org.apache.mahout.math.Vector}.
+ * <br/>
+ * The Iterator returned throws {@link UnsupportedOperationException} for the 
{@link java.util.Iterator#remove()} method.
+ * <p/>
+ * Assumes DenseVector for now, but in the future may have the option of 
mapping columns to sparse format
+ * <p/>
+ * The Iterator is not thread-safe.
+ *
+ *
+ **/
+public class CSVVectorIterable implements Iterable<Vector> {
+  protected CSVParser parser;
+  protected String [] line;
+
+  public CSVVectorIterable(Reader reader) throws IOException {
+    parser = new CSVParser(reader);
+    line = parser.getLine();
+  }
+
+  public CSVVectorIterable(Reader reader, CSVStrategy strategy) throws 
IOException {
+    parser = new CSVParser(reader, strategy);
+    line = parser.getLine();
+  }
+
+
+  @Override
+  public Iterator<Vector> iterator() {
+    return new CSVIterator();
+  }
+
+  private class CSVIterator implements Iterator<Vector>{
+
+
+    public CSVIterator() {
+    }
+
+    @Override
+    public boolean hasNext() {
+      return line != null;
+    }
+
+    @Override
+    public Vector next() {
+
+      Vector result = null;
+      result = new DenseVector(line.length);
+      for (int i = 0; i < line.length; i++) {
+        result.setQuick(i, Double.parseDouble(line[i]));
+      }
+      //move the line forward
+      try {
+        line = parser.getLine();
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+      return result;
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+}

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
 Fri Mar 25 14:28:12 2011
@@ -25,8 +25,8 @@ import org.apache.mahout.math.Vector;
 /**
  * Write out the vectors to any {@link java.io.Writer} using {@link 
org.apache.mahout.math.Vector#asFormatString()}.
  */
-public class JWriterVectorWriter implements VectorWriter {
-  private final Writer writer;
+public class JWriterVectorWriter extends VectorWriter {
+  protected final Writer writer;
   
   public JWriterVectorWriter(Writer writer) {
     this.writer = writer;
@@ -45,14 +45,22 @@ public class JWriterVectorWriter impleme
       if (result >= maxDocs) {
         break;
       }
-      writer.write(vector.asFormatString());
-      writer.write('\n');
-      
+      formatVector(vector);
       result++;
     }
     return result;
   }
-  
+
+  protected void formatVector(Vector vector) throws IOException {
+    writer.write(vector.asFormatString());
+    writer.write('\n');
+  }
+
+  @Override
+  public void write(Vector vector) throws IOException {
+    formatVector(vector);
+  }
+
   @Override
   public void close() throws IOException {
     writer.flush();

Added: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java?rev=1085397&view=auto
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
 (added)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
 Fri Mar 25 14:28:12 2011
@@ -0,0 +1,105 @@
+package org.apache.mahout.utils.vectors.io;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+
+/**
+ * Given a Sequence File containing vectors (actually, {@link 
org.apache.mahout.math.VectorWritable}, iterate over it.
+ *
+ **/
+public class SequenceFileVectorIterable implements Iterable<Vector>{
+  protected SequenceFile.Reader reader;
+  protected long fileLen;
+  protected Writable keyWritable;
+  protected Writable valueWritable;
+  protected boolean useKey;
+
+  /**
+   * Construct the Iterable
+   * @param fs The {@link org.apache.hadoop.fs.FileSystem} containing the 
{@link org.apache.hadoop.io.SequenceFile}
+   * @param file The {@link org.apache.hadoop.fs.Path} containing the file
+   * @param conf The {@link org.apache.hadoop.conf.Configuration} to use
+   * @param useKey If true, use the key as the {@link 
org.apache.mahout.math.VectorWritable}, otherwise use the value
+   * @throws IllegalAccessException
+   * @throws InstantiationException
+   * @throws IOException
+   */
+  public SequenceFileVectorIterable(FileSystem fs, Path file, Configuration 
conf, boolean useKey) throws IllegalAccessException, InstantiationException, 
IOException {
+    this.reader = new SequenceFile.Reader(fs, file, conf);
+    ContentSummary summary = fs.getContentSummary(file);
+    fileLen = summary.getLength();
+    this.useKey = useKey;
+    keyWritable = 
reader.getKeyClass().asSubclass(Writable.class).newInstance();
+    valueWritable = 
reader.getValueClass().asSubclass(Writable.class).newInstance();
+  }
+
+  /**
+   * The Iterator returned does not support remove()
+   * @return The {@link java.util.Iterator}
+   */
+  public Iterator<Vector> iterator() {
+    return new SFIterator();
+
+  }
+
+  private final class SFIterator implements Iterator<Vector>{
+    @Override
+    public boolean hasNext() {
+      //TODO: is this legitimate?  We can't call next here since it breaks the 
iterator contract
+      try {
+        return reader.getPosition() < fileLen;
+      } catch (IOException e) {
+        return false;
+      }
+    }
+
+    @Override
+    public Vector next() {
+      Vector result = null;
+      boolean valid = false;
+      try {
+        valid = reader.next(keyWritable, valueWritable);
+        if (valid){
+          result = ((VectorWritable) (useKey ? keyWritable : 
valueWritable)).get();
+        }
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+
+      return result;
+    }
+
+    /**
+     * Not supported
+     */
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+}

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
 Fri Mar 25 14:28:12 2011
@@ -30,16 +30,16 @@ import org.apache.mahout.math.VectorWrit
  *
  * Closes the writer when done
  */
-public class SequenceFileVectorWriter implements VectorWriter {
+public class SequenceFileVectorWriter extends VectorWriter {
   private final SequenceFile.Writer writer;
-  
+  long recNum = 0;
   public SequenceFileVectorWriter(SequenceFile.Writer writer) {
     this.writer = writer;
   }
   
   @Override
   public long write(Iterable<Vector> iterable, long maxDocs) throws 
IOException {
-    long recNum = 0;
+
     for (Vector point : iterable) {
       if (recNum >= maxDocs) {
         break;
@@ -51,7 +51,13 @@ public class SequenceFileVectorWriter im
     }
     return recNum;
   }
-  
+
+  @Override
+  public void write(Vector vector) throws IOException {
+    writer.append(new LongWritable(recNum++), new VectorWritable(vector));
+
+  }
+
   @Override
   public long write(Iterable<Vector> iterable) throws IOException {
     return write(iterable, Long.MAX_VALUE);

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
 Fri Mar 25 14:28:12 2011
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.mahout.math.Vector;
 
-public interface VectorWriter {
+public abstract class VectorWriter {
   /**
    * Write all values in the Iterable to the output
    * @param iterable The {@link Iterable} to loop over
@@ -29,7 +29,15 @@ public interface VectorWriter {
    * @throws IOException if there was a problem writing
    *
    */
-  long write(Iterable<Vector> iterable) throws IOException;
+  public abstract long write(Iterable<Vector> iterable) throws IOException;
+
+  /**
+   * Write out a vector
+   *
+   * @param vector The {@link org.apache.mahout.math.Vector} to write
+   * @throws IOException
+   */
+  public abstract void write(Vector vector) throws IOException;
   
   /**
    * Write the first <code>maxDocs</code> to the output.
@@ -38,12 +46,12 @@ public interface VectorWriter {
    * @return The number of docs written
    * @throws IOException if there was a problem writing
    */
-  long write(Iterable<Vector> iterable, long maxDocs) throws IOException;
+  public abstract long write(Iterable<Vector> iterable, long maxDocs) throws 
IOException;
   
   /**
    * Close any internally held resources.  If external Writers are passed in, 
the implementation should indicate
    * whether it also closes them
    * @throws IOException if there was an issue closing the item
    */
-  void close() throws IOException;
+  public abstract void close() throws IOException;
 }

Added: 
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java?rev=1085397&view=auto
==============================================================================
--- 
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
 (added)
+++ 
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
 Fri Mar 25 14:28:12 2011
@@ -0,0 +1,60 @@
+package org.apache.mahout.utils.vectors.csv;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.utils.MahoutTestCase;
+import org.apache.mahout.utils.vectors.RandomVectorIterable;
+import org.apache.mahout.utils.vectors.VectorHelper;
+import org.apache.mahout.utils.vectors.io.JWriterVectorWriter;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.io.StringWriter;
+
+
+/**
+ *
+ *
+ **/
+public class CSVVectorIterableTest extends MahoutTestCase {
+
+
+  @Test
+  public void test() throws Exception {
+
+    StringWriter sWriter = new StringWriter();
+    JWriterVectorWriter jwvw = new JWriterVectorWriter(sWriter) {
+
+      protected void formatVector(Vector vector) throws IOException {
+        String vecStr = VectorHelper.vectorToCSVString(vector, false);
+        writer.write(vecStr);
+      }
+    };
+    Iterable<Vector> iter = new RandomVectorIterable(50);
+    jwvw.write(iter);
+    jwvw.close();
+    CSVVectorIterable csvIter = new CSVVectorIterable(new 
StringReader(sWriter.getBuffer().toString()));
+    int count = 0;
+    for (Vector vector : csvIter) {
+      //System.out.println("Vec: " + vector);
+      count++;
+    }
+    assertEquals(50, count);
+  }
+}

Added: 
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java?rev=1085397&view=auto
==============================================================================
--- 
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
 (added)
+++ 
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
 Fri Mar 25 14:28:12 2011
@@ -0,0 +1,39 @@
+package org.apache.mahout.utils.vectors.io;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.utils.MahoutTestCase;
+import org.apache.mahout.utils.vectors.RandomVectorIterable;
+import org.junit.Test;
+
+
+/**
+ *
+ *
+ **/
+public class SequenceFileVectorIterableTest extends MahoutTestCase {
+
+
+  @Test
+  public void testSFVI() throws Exception {
+    Path path = getTestTempFilePath("sfvw");
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+    SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path, 
LongWritable.class, VectorWritable.class);
+    SequenceFileVectorWriter writer = new SequenceFileVectorWriter(seqWriter);
+    Iterable<Vector> iter = new RandomVectorIterable(50);
+    writer.write(iter);
+    writer.close();
+    SequenceFileVectorIterable sfVIter = new SequenceFileVectorIterable(fs, 
path, conf, false);
+    int count = 0;
+    for (Vector vector : sfVIter) {
+      count++;
+    }
+    assertEquals(50, count);
+  }
+}


Reply via email to