Ah, OK.  Good to know.  Hadn't followed that one.   Feel free to change as 
appropriate or I can.

On Mar 25, 2011, at 9:12 PM, Dmitriy Lyubimov wrote:

> That would be a typicall change i am trying to fix with 622:
> http://svn.apache.org/viewvc/mahout/trunk/utils/pom.xml?view=diff&r1=1085396&r2=1085397&pathrev=1085397
> 
> 
> 
> On Fri, Mar 25, 2011 at 7:28 AM,  <[email protected]> wrote:
>> Author: gsingers
>> Date: Fri Mar 25 14:28:12 2011
>> New Revision: 1085397
>> 
>> URL: http://svn.apache.org/viewvc?rev=1085397&view=rev
>> Log:
>> MAHOUT-548: add in some CSV support for creating vectors, as well as a few 
>> other fixes for working with vectors
>> 
>> Added:
>>    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/
>>    
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
>>    
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
>>    mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/
>>    
>> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
>>    
>> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
>> Modified:
>>    mahout/trunk/utils/pom.xml
>>    
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
>>    
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
>>    
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
>>    
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
>>    
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
>> 
>> Modified: mahout/trunk/utils/pom.xml
>> URL: 
>> http://svn.apache.org/viewvc/mahout/trunk/utils/pom.xml?rev=1085397&r1=1085396&r2=1085397&view=diff
>> ==============================================================================
>> --- mahout/trunk/utils/pom.xml (original)
>> +++ mahout/trunk/utils/pom.xml Fri Mar 25 14:28:12 2011
>> @@ -142,6 +142,11 @@
>>       <type>test-jar</type>
>>       <scope>test</scope>
>>     </dependency>
>> +    <dependency>
>> +      <groupId>org.apache.solr</groupId>
>> +      <artifactId>solr-commons-csv</artifactId>
>> +      <version>1.4.1</version>
>> +    </dependency>
>> 
>>     <dependency>
>>       <groupId>junit</groupId>
>> 
>> Modified: 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
>> URL: 
>> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=1085397&r1=1085396&r2=1085397&view=diff
>> ==============================================================================
>> --- 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
>>  (original)
>> +++ 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
>>  Fri Mar 25 14:28:12 2011
>> @@ -77,16 +77,22 @@ public final class VectorDumper {
>>     Option dictTypeOpt = 
>> obuilder.withLongName("dictionaryType").withRequired(false).withArgument(
>>             
>> abuilder.withName("dictionaryType").withMinimum(1).withMaximum(1).create()).withDescription(
>>             "The dictionary file type 
>> (text|sequencefile)").withShortName("dt").create();
>> -    Option centroidJSonOpt = 
>> obuilder.withLongName("json").withRequired(false).withDescription(
>> -            "Output the centroid as JSON.  Otherwise it substitutes in the 
>> terms for vector cell entries")
>> +    Option jsonOpt = 
>> obuilder.withLongName("json").withRequired(false).withDescription(
>> +            "Output the Vector as JSON.  Otherwise it substitutes in the 
>> terms for vector cell entries")
>>             .withShortName("j").create();
>> +    Option csvOpt = 
>> obuilder.withLongName("csv").withRequired(false).withDescription(
>> +            "Output the Vector as CSV.  Otherwise it substitutes in the 
>> terms for vector cell entries")
>> +            .withShortName("c").create();
>> +    Option namesAsCommentsOpt = 
>> obuilder.withLongName("namesAsComments").withRequired(false).withDescription(
>> +            "If using CSV output, optionally add a comment line for each 
>> NamedVector (if the vector is one) printing out the name")
>> +            .withShortName("n").create();
>>     Option sizeOpt = obuilder.withLongName("sizeOnly").withRequired(false).
>>             withDescription("Dump only the size of the 
>> vector").withShortName("sz").create();
>>     Option helpOpt = obuilder.withLongName("help").withDescription("Print 
>> out help").withShortName("h")
>>             .create();
>> 
>>     Group group = 
>> gbuilder.withName("Options").withOption(seqOpt).withOption(outputOpt).withOption(
>> -            
>> dictTypeOpt).withOption(dictOpt).withOption(centroidJSonOpt).withOption(vectorAsKeyOpt).withOption(
>> +            
>> dictTypeOpt).withOption(dictOpt).withOption(csvOpt).withOption(vectorAsKeyOpt).withOption(
>>             printKeyOpt).withOption(sizeOpt).create();
>> 
>>     try {
>> @@ -122,10 +128,12 @@ public final class VectorDumper {
>>             throw new OptionException(dictTypeOpt);
>>           }
>>         }
>> -        boolean useJSON = cmdLine.hasOption(centroidJSonOpt);
>> +        boolean useJSON = cmdLine.hasOption(jsonOpt);
>> +        boolean useCSV = cmdLine.hasOption(csvOpt);
>> +
>>         boolean sizeOnly = cmdLine.hasOption(sizeOpt);
>>         SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
>> -
>> +        boolean namesAsComments = cmdLine.hasOption(namesAsCommentsOpt);
>>         Writable keyWritable = 
>> reader.getKeyClass().asSubclass(Writable.class).newInstance();
>>         Writable valueWritable = 
>> reader.getValueClass().asSubclass(Writable.class).newInstance();
>>         boolean transposeKeyValue = cmdLine.hasOption(vectorAsKeyOpt);
>> @@ -140,6 +148,16 @@ public final class VectorDumper {
>>           try {
>>             boolean printKey = cmdLine.hasOption(printKeyOpt);
>>             long i = 0;
>> +            if (useCSV && dictionary != null){
>> +              writer.write("#");
>> +              for (int j = 0; j < dictionary.length; j++) {
>> +                writer.write(dictionary[j]);
>> +                if (j < dictionary.length - 1){
>> +                  writer.write(',');
>> +                }
>> +              }
>> +              writer.write('\n');
>> +            }
>>             while (reader.next(keyWritable, valueWritable)) {
>>               if (printKey) {
>>                 Writable notTheVectorWritable = transposeKeyValue ? 
>> valueWritable : keyWritable;
>> @@ -159,7 +177,14 @@ public final class VectorDumper {
>>                 writer.write(String.valueOf(vector.size()));
>>                 writer.write('\n');
>>               } else {
>> -                String fmtStr = useJSON ? vector.asFormatString() : 
>> VectorHelper.vectorToString(vector, dictionary);
>> +                String fmtStr;
>> +                if (useJSON){
>> +                  fmtStr = VectorHelper.vectorToJSONString(vector, 
>> dictionary);
>> +                } else if (useCSV){
>> +                  fmtStr = VectorHelper.vectorToCSVString(vector, 
>> namesAsComments);
>> +                } else {
>> +                  fmtStr = vector.asFormatString();
>> +                }
>>                 writer.write(fmtStr);
>>                 writer.write('\n');
>>               }
>> 
>> Modified: 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
>> URL: 
>> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java?rev=1085397&r1=1085396&r2=1085397&view=diff
>> ==============================================================================
>> --- 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
>>  (original)
>> +++ 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
>>  Fri Mar 25 14:28:12 2011
>> @@ -40,14 +40,45 @@ import org.apache.mahout.math.map.OpenOb
>>  public final class VectorHelper {
>> 
>>   private static final Pattern TAB_PATTERN = Pattern.compile("\t");
>> +
>> 
>>   private VectorHelper() { }
>> -
>> +
>> +  public static String vectorToCSVString(Vector vector, boolean 
>> namesAsComments){
>> +    StringBuilder bldr = new StringBuilder(2048);
>> +    try {
>> +      vectorToCSVString(vector, namesAsComments, bldr);
>> +    } catch (IOException e) {
>> +      throw new RuntimeException(e);
>> +    }
>> +    return bldr.toString();
>> +  }
>> +
>> +  public static void vectorToCSVString(Vector vector, boolean 
>> namesAsComments,
>> +                                       Appendable bldr) throws IOException {
>> +    if (namesAsComments && vector instanceof NamedVector){
>> +      bldr.append("#").append(((NamedVector)vector).getName()).append('\n');
>> +    }
>> +    Iterator<Vector.Element> iter = vector.iterator();
>> +    boolean first = true;
>> +    while (iter.hasNext()) {
>> +      if (first) {
>> +        first = false;
>> +      } else {
>> +        bldr.append(",");
>> +      }
>> +      Vector.Element elt = iter.next();
>> +      bldr.append(String.valueOf(elt.get()));
>> +    }
>> +    bldr.append('\n');
>> +  }
>> +
>> +
>>   /**
>>    * @return a String from a vector that fills in the values with the 
>> appropriate value from a dictionary where
>>    * each the ith entry is the term for the ith vector cell.
>>    */
>> -  public static String vectorToString(Vector vector, String[] dictionary) {
>> +  public static String vectorToJSONString(Vector vector, String[] 
>> dictionary) {
>>     StringBuilder bldr = new StringBuilder(2048);
>> 
>>     if (vector instanceof NamedVector) {
>> @@ -67,12 +98,13 @@ public final class VectorHelper {
>>       if (dictionary != null) {
>>         bldr.append(dictionary[elt.index()]);
>>       } else {
>> -        bldr.append(elt.index());
>> +        bldr.append(String.valueOf(elt.index()));
>>       }
>> -      bldr.append(':').append(elt.get());
>> +      bldr.append(':').append(String.valueOf(elt.get()));
>>     }
>>     return bldr.append('}').toString();
>>   }
>> +
>> 
>>   /**
>>    * Read in a dictionary file. Format is:
>> 
>> Added: 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
>> URL: 
>> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java?rev=1085397&view=auto
>> ==============================================================================
>> --- 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
>>  (added)
>> +++ 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
>>  Fri Mar 25 14:28:12 2011
>> @@ -0,0 +1,94 @@
>> +package org.apache.mahout.utils.vectors.csv;
>> +/**
>> + * Licensed to the Apache Software Foundation (ASF) under one or more
>> + * contributor license agreements.  See the NOTICE file distributed with
>> + * this work for additional information regarding copyright ownership.
>> + * The ASF licenses this file to You under the Apache License, Version 2.0
>> + * (the "License"); you may not use this file except in compliance with
>> + * the License.  You may obtain a copy of the License at
>> + *
>> + *     http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing, software
>> + * distributed under the License is distributed on an "AS IS" BASIS,
>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>> + * See the License for the specific language governing permissions and
>> + * limitations under the License.
>> + */
>> +
>> +import org.apache.commons.csv.CSVParser;
>> +import org.apache.commons.csv.CSVStrategy;
>> +import org.apache.mahout.math.DenseVector;
>> +import org.apache.mahout.math.Vector;
>> +
>> +import java.io.BufferedReader;
>> +import java.io.IOException;
>> +import java.io.Reader;
>> +import java.util.Iterator;
>> +
>> +
>> +/**
>> + * Iterates a CSV file and produces {@link org.apache.mahout.math.Vector}.
>> + * <br/>
>> + * The Iterator returned throws {@link UnsupportedOperationException} for 
>> the {@link java.util.Iterator#remove()} method.
>> + * <p/>
>> + * Assumes DenseVector for now, but in the future may have the option of 
>> mapping columns to sparse format
>> + * <p/>
>> + * The Iterator is not thread-safe.
>> + *
>> + *
>> + **/
>> +public class CSVVectorIterable implements Iterable<Vector> {
>> +  protected CSVParser parser;
>> +  protected String [] line;
>> +
>> +  public CSVVectorIterable(Reader reader) throws IOException {
>> +    parser = new CSVParser(reader);
>> +    line = parser.getLine();
>> +  }
>> +
>> +  public CSVVectorIterable(Reader reader, CSVStrategy strategy) throws 
>> IOException {
>> +    parser = new CSVParser(reader, strategy);
>> +    line = parser.getLine();
>> +  }
>> +
>> +
>> +  @Override
>> +  public Iterator<Vector> iterator() {
>> +    return new CSVIterator();
>> +  }
>> +
>> +  private class CSVIterator implements Iterator<Vector>{
>> +
>> +
>> +    public CSVIterator() {
>> +    }
>> +
>> +    @Override
>> +    public boolean hasNext() {
>> +      return line != null;
>> +    }
>> +
>> +    @Override
>> +    public Vector next() {
>> +
>> +      Vector result = null;
>> +      result = new DenseVector(line.length);
>> +      for (int i = 0; i < line.length; i++) {
>> +        result.setQuick(i, Double.parseDouble(line[i]));
>> +      }
>> +      //move the line forward
>> +      try {
>> +        line = parser.getLine();
>> +      } catch (IOException e) {
>> +        throw new RuntimeException(e);
>> +      }
>> +      return result;
>> +    }
>> +
>> +    @Override
>> +    public void remove() {
>> +      throw new UnsupportedOperationException();
>> +    }
>> +  }
>> +}
>> 
>> Modified: 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
>> URL: 
>> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff
>> ==============================================================================
>> --- 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
>>  (original)
>> +++ 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
>>  Fri Mar 25 14:28:12 2011
>> @@ -25,8 +25,8 @@ import org.apache.mahout.math.Vector;
>>  /**
>>  * Write out the vectors to any {@link java.io.Writer} using {@link 
>> org.apache.mahout.math.Vector#asFormatString()}.
>>  */
>> -public class JWriterVectorWriter implements VectorWriter {
>> -  private final Writer writer;
>> +public class JWriterVectorWriter extends VectorWriter {
>> +  protected final Writer writer;
>> 
>>   public JWriterVectorWriter(Writer writer) {
>>     this.writer = writer;
>> @@ -45,14 +45,22 @@ public class JWriterVectorWriter impleme
>>       if (result >= maxDocs) {
>>         break;
>>       }
>> -      writer.write(vector.asFormatString());
>> -      writer.write('\n');
>> -
>> +      formatVector(vector);
>>       result++;
>>     }
>>     return result;
>>   }
>> -
>> +
>> +  protected void formatVector(Vector vector) throws IOException {
>> +    writer.write(vector.asFormatString());
>> +    writer.write('\n');
>> +  }
>> +
>> +  @Override
>> +  public void write(Vector vector) throws IOException {
>> +    formatVector(vector);
>> +  }
>> +
>>   @Override
>>   public void close() throws IOException {
>>     writer.flush();
>> 
>> Added: 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
>> URL: 
>> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java?rev=1085397&view=auto
>> ==============================================================================
>> --- 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
>>  (added)
>> +++ 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
>>  Fri Mar 25 14:28:12 2011
>> @@ -0,0 +1,105 @@
>> +package org.apache.mahout.utils.vectors.io;
>> +/**
>> + * Licensed to the Apache Software Foundation (ASF) under one or more
>> + * contributor license agreements.  See the NOTICE file distributed with
>> + * this work for additional information regarding copyright ownership.
>> + * The ASF licenses this file to You under the Apache License, Version 2.0
>> + * (the "License"); you may not use this file except in compliance with
>> + * the License.  You may obtain a copy of the License at
>> + *
>> + *     http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing, software
>> + * distributed under the License is distributed on an "AS IS" BASIS,
>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>> + * See the License for the specific language governing permissions and
>> + * limitations under the License.
>> + */
>> +
>> +import org.apache.hadoop.conf.Configuration;
>> +import org.apache.hadoop.fs.ContentSummary;
>> +import org.apache.hadoop.fs.FileSystem;
>> +import org.apache.hadoop.fs.Path;
>> +import org.apache.hadoop.io.SequenceFile;
>> +import org.apache.hadoop.io.Writable;
>> +import org.apache.mahout.math.Vector;
>> +import org.apache.mahout.math.VectorWritable;
>> +
>> +import java.io.IOException;
>> +import java.util.Iterator;
>> +
>> +
>> +/**
>> + * Given a Sequence File containing vectors (actually, {@link 
>> org.apache.mahout.math.VectorWritable}, iterate over it.
>> + *
>> + **/
>> +public class SequenceFileVectorIterable implements Iterable<Vector>{
>> +  protected SequenceFile.Reader reader;
>> +  protected long fileLen;
>> +  protected Writable keyWritable;
>> +  protected Writable valueWritable;
>> +  protected boolean useKey;
>> +
>> +  /**
>> +   * Construct the Iterable
>> +   * @param fs The {@link org.apache.hadoop.fs.FileSystem} containing the 
>> {@link org.apache.hadoop.io.SequenceFile}
>> +   * @param file The {@link org.apache.hadoop.fs.Path} containing the file
>> +   * @param conf The {@link org.apache.hadoop.conf.Configuration} to use
>> +   * @param useKey If true, use the key as the {@link 
>> org.apache.mahout.math.VectorWritable}, otherwise use the value
>> +   * @throws IllegalAccessException
>> +   * @throws InstantiationException
>> +   * @throws IOException
>> +   */
>> +  public SequenceFileVectorIterable(FileSystem fs, Path file, Configuration 
>> conf, boolean useKey) throws IllegalAccessException, InstantiationException, 
>> IOException {
>> +    this.reader = new SequenceFile.Reader(fs, file, conf);
>> +    ContentSummary summary = fs.getContentSummary(file);
>> +    fileLen = summary.getLength();
>> +    this.useKey = useKey;
>> +    keyWritable = 
>> reader.getKeyClass().asSubclass(Writable.class).newInstance();
>> +    valueWritable = 
>> reader.getValueClass().asSubclass(Writable.class).newInstance();
>> +  }
>> +
>> +  /**
>> +   * The Iterator returned does not support remove()
>> +   * @return The {@link java.util.Iterator}
>> +   */
>> +  public Iterator<Vector> iterator() {
>> +    return new SFIterator();
>> +
>> +  }
>> +
>> +  private final class SFIterator implements Iterator<Vector>{
>> +    @Override
>> +    public boolean hasNext() {
>> +      //TODO: is this legitimate?  We can't call next here since it breaks 
>> the iterator contract
>> +      try {
>> +        return reader.getPosition() < fileLen;
>> +      } catch (IOException e) {
>> +        return false;
>> +      }
>> +    }
>> +
>> +    @Override
>> +    public Vector next() {
>> +      Vector result = null;
>> +      boolean valid = false;
>> +      try {
>> +        valid = reader.next(keyWritable, valueWritable);
>> +        if (valid){
>> +          result = ((VectorWritable) (useKey ? keyWritable : 
>> valueWritable)).get();
>> +        }
>> +      } catch (IOException e) {
>> +        throw new RuntimeException(e);
>> +      }
>> +
>> +      return result;
>> +    }
>> +
>> +    /**
>> +     * Not supported
>> +     */
>> +    public void remove() {
>> +      throw new UnsupportedOperationException();
>> +    }
>> +  }
>> +}
>> 
>> Modified: 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
>> URL: 
>> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff
>> ==============================================================================
>> --- 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
>>  (original)
>> +++ 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
>>  Fri Mar 25 14:28:12 2011
>> @@ -30,16 +30,16 @@ import org.apache.mahout.math.VectorWrit
>>  *
>>  * Closes the writer when done
>>  */
>> -public class SequenceFileVectorWriter implements VectorWriter {
>> +public class SequenceFileVectorWriter extends VectorWriter {
>>   private final SequenceFile.Writer writer;
>> -
>> +  long recNum = 0;
>>   public SequenceFileVectorWriter(SequenceFile.Writer writer) {
>>     this.writer = writer;
>>   }
>> 
>>   @Override
>>   public long write(Iterable<Vector> iterable, long maxDocs) throws 
>> IOException {
>> -    long recNum = 0;
>> +
>>     for (Vector point : iterable) {
>>       if (recNum >= maxDocs) {
>>         break;
>> @@ -51,7 +51,13 @@ public class SequenceFileVectorWriter im
>>     }
>>     return recNum;
>>   }
>> -
>> +
>> +  @Override
>> +  public void write(Vector vector) throws IOException {
>> +    writer.append(new LongWritable(recNum++), new VectorWritable(vector));
>> +
>> +  }
>> +
>>   @Override
>>   public long write(Iterable<Vector> iterable) throws IOException {
>>     return write(iterable, Long.MAX_VALUE);
>> 
>> Modified: 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
>> URL: 
>> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff
>> ==============================================================================
>> --- 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
>>  (original)
>> +++ 
>> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
>>  Fri Mar 25 14:28:12 2011
>> @@ -21,7 +21,7 @@ import java.io.IOException;
>> 
>>  import org.apache.mahout.math.Vector;
>> 
>> -public interface VectorWriter {
>> +public abstract class VectorWriter {
>>   /**
>>    * Write all values in the Iterable to the output
>>    * @param iterable The {@link Iterable} to loop over
>> @@ -29,7 +29,15 @@ public interface VectorWriter {
>>    * @throws IOException if there was a problem writing
>>    *
>>    */
>> -  long write(Iterable<Vector> iterable) throws IOException;
>> +  public abstract long write(Iterable<Vector> iterable) throws IOException;
>> +
>> +  /**
>> +   * Write out a vector
>> +   *
>> +   * @param vector The {@link org.apache.mahout.math.Vector} to write
>> +   * @throws IOException
>> +   */
>> +  public abstract void write(Vector vector) throws IOException;
>> 
>>   /**
>>    * Write the first <code>maxDocs</code> to the output.
>> @@ -38,12 +46,12 @@ public interface VectorWriter {
>>    * @return The number of docs written
>>    * @throws IOException if there was a problem writing
>>    */
>> -  long write(Iterable<Vector> iterable, long maxDocs) throws IOException;
>> +  public abstract long write(Iterable<Vector> iterable, long maxDocs) 
>> throws IOException;
>> 
>>   /**
>>    * Close any internally held resources.  If external Writers are passed 
>> in, the implementation should indicate
>>    * whether it also closes them
>>    * @throws IOException if there was an issue closing the item
>>    */
>> -  void close() throws IOException;
>> +  public abstract void close() throws IOException;
>>  }
>> 
>> Added: 
>> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
>> URL: 
>> http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java?rev=1085397&view=auto
>> ==============================================================================
>> --- 
>> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
>>  (added)
>> +++ 
>> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
>>  Fri Mar 25 14:28:12 2011
>> @@ -0,0 +1,60 @@
>> +package org.apache.mahout.utils.vectors.csv;
>> +/**
>> + * Licensed to the Apache Software Foundation (ASF) under one or more
>> + * contributor license agreements.  See the NOTICE file distributed with
>> + * this work for additional information regarding copyright ownership.
>> + * The ASF licenses this file to You under the Apache License, Version 2.0
>> + * (the "License"); you may not use this file except in compliance with
>> + * the License.  You may obtain a copy of the License at
>> + *
>> + *     http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing, software
>> + * distributed under the License is distributed on an "AS IS" BASIS,
>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>> + * See the License for the specific language governing permissions and
>> + * limitations under the License.
>> + */
>> +
>> +import org.apache.mahout.math.Vector;
>> +import org.apache.mahout.utils.MahoutTestCase;
>> +import org.apache.mahout.utils.vectors.RandomVectorIterable;
>> +import org.apache.mahout.utils.vectors.VectorHelper;
>> +import org.apache.mahout.utils.vectors.io.JWriterVectorWriter;
>> +import org.junit.Test;
>> +
>> +import java.io.IOException;
>> +import java.io.StringReader;
>> +import java.io.StringWriter;
>> +
>> +
>> +/**
>> + *
>> + *
>> + **/
>> +public class CSVVectorIterableTest extends MahoutTestCase {
>> +
>> +
>> +  @Test
>> +  public void test() throws Exception {
>> +
>> +    StringWriter sWriter = new StringWriter();
>> +    JWriterVectorWriter jwvw = new JWriterVectorWriter(sWriter) {
>> +
>> +      protected void formatVector(Vector vector) throws IOException {
>> +        String vecStr = VectorHelper.vectorToCSVString(vector, false);
>> +        writer.write(vecStr);
>> +      }
>> +    };
>> +    Iterable<Vector> iter = new RandomVectorIterable(50);
>> +    jwvw.write(iter);
>> +    jwvw.close();
>> +    CSVVectorIterable csvIter = new CSVVectorIterable(new 
>> StringReader(sWriter.getBuffer().toString()));
>> +    int count = 0;
>> +    for (Vector vector : csvIter) {
>> +      //System.out.println("Vec: " + vector);
>> +      count++;
>> +    }
>> +    assertEquals(50, count);
>> +  }
>> +}
>> 
>> Added: 
>> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
>> URL: 
>> http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java?rev=1085397&view=auto
>> ==============================================================================
>> --- 
>> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
>>  (added)
>> +++ 
>> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
>>  Fri Mar 25 14:28:12 2011
>> @@ -0,0 +1,39 @@
>> +package org.apache.mahout.utils.vectors.io;
>> +
>> +import org.apache.hadoop.conf.Configuration;
>> +import org.apache.hadoop.fs.FileSystem;
>> +import org.apache.hadoop.fs.Path;
>> +import org.apache.hadoop.io.LongWritable;
>> +import org.apache.hadoop.io.SequenceFile;
>> +import org.apache.mahout.math.Vector;
>> +import org.apache.mahout.math.VectorWritable;
>> +import org.apache.mahout.utils.MahoutTestCase;
>> +import org.apache.mahout.utils.vectors.RandomVectorIterable;
>> +import org.junit.Test;
>> +
>> +
>> +/**
>> + *
>> + *
>> + **/
>> +public class SequenceFileVectorIterableTest extends MahoutTestCase {
>> +
>> +
>> +  @Test
>> +  public void testSFVI() throws Exception {
>> +    Path path = getTestTempFilePath("sfvw");
>> +    Configuration conf = new Configuration();
>> +    FileSystem fs = FileSystem.get(conf);
>> +    SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path, 
>> LongWritable.class, VectorWritable.class);
>> +    SequenceFileVectorWriter writer = new 
>> SequenceFileVectorWriter(seqWriter);
>> +    Iterable<Vector> iter = new RandomVectorIterable(50);
>> +    writer.write(iter);
>> +    writer.close();
>> +    SequenceFileVectorIterable sfVIter = new SequenceFileVectorIterable(fs, 
>> path, conf, false);
>> +    int count = 0;
>> +    for (Vector vector : sfVIter) {
>> +      count++;
>> +    }
>> +    assertEquals(50, count);
>> +  }
>> +}
>> 
>> 
>> 

--------------------------
Grant Ingersoll
http://www.lucidimagination.com/

Search the Lucene ecosystem docs using Solr/Lucene:
http://www.lucidimagination.com/search

Reply via email to