That would be a typicall change i am trying to fix with 622:
http://svn.apache.org/viewvc/mahout/trunk/utils/pom.xml?view=diff&r1=1085396&r2=1085397&pathrev=1085397



On Fri, Mar 25, 2011 at 7:28 AM,  <[email protected]> wrote:
> Author: gsingers
> Date: Fri Mar 25 14:28:12 2011
> New Revision: 1085397
>
> URL: http://svn.apache.org/viewvc?rev=1085397&view=rev
> Log:
> MAHOUT-548: add in some CSV support for creating vectors, as well as a few 
> other fixes for working with vectors
>
> Added:
>    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/
>    
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
>    
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
>    mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/
>    
> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
>    
> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
> Modified:
>    mahout/trunk/utils/pom.xml
>    
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
>    
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
>    
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
>    
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
>    
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
>
> Modified: mahout/trunk/utils/pom.xml
> URL: 
> http://svn.apache.org/viewvc/mahout/trunk/utils/pom.xml?rev=1085397&r1=1085396&r2=1085397&view=diff
> ==============================================================================
> --- mahout/trunk/utils/pom.xml (original)
> +++ mahout/trunk/utils/pom.xml Fri Mar 25 14:28:12 2011
> @@ -142,6 +142,11 @@
>       <type>test-jar</type>
>       <scope>test</scope>
>     </dependency>
> +    <dependency>
> +      <groupId>org.apache.solr</groupId>
> +      <artifactId>solr-commons-csv</artifactId>
> +      <version>1.4.1</version>
> +    </dependency>
>
>     <dependency>
>       <groupId>junit</groupId>
>
> Modified: 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
> URL: 
> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=1085397&r1=1085396&r2=1085397&view=diff
> ==============================================================================
> --- 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
>  (original)
> +++ 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
>  Fri Mar 25 14:28:12 2011
> @@ -77,16 +77,22 @@ public final class VectorDumper {
>     Option dictTypeOpt = 
> obuilder.withLongName("dictionaryType").withRequired(false).withArgument(
>             
> abuilder.withName("dictionaryType").withMinimum(1).withMaximum(1).create()).withDescription(
>             "The dictionary file type 
> (text|sequencefile)").withShortName("dt").create();
> -    Option centroidJSonOpt = 
> obuilder.withLongName("json").withRequired(false).withDescription(
> -            "Output the centroid as JSON.  Otherwise it substitutes in the 
> terms for vector cell entries")
> +    Option jsonOpt = 
> obuilder.withLongName("json").withRequired(false).withDescription(
> +            "Output the Vector as JSON.  Otherwise it substitutes in the 
> terms for vector cell entries")
>             .withShortName("j").create();
> +    Option csvOpt = 
> obuilder.withLongName("csv").withRequired(false).withDescription(
> +            "Output the Vector as CSV.  Otherwise it substitutes in the 
> terms for vector cell entries")
> +            .withShortName("c").create();
> +    Option namesAsCommentsOpt = 
> obuilder.withLongName("namesAsComments").withRequired(false).withDescription(
> +            "If using CSV output, optionally add a comment line for each 
> NamedVector (if the vector is one) printing out the name")
> +            .withShortName("n").create();
>     Option sizeOpt = obuilder.withLongName("sizeOnly").withRequired(false).
>             withDescription("Dump only the size of the 
> vector").withShortName("sz").create();
>     Option helpOpt = obuilder.withLongName("help").withDescription("Print out 
> help").withShortName("h")
>             .create();
>
>     Group group = 
> gbuilder.withName("Options").withOption(seqOpt).withOption(outputOpt).withOption(
> -            
> dictTypeOpt).withOption(dictOpt).withOption(centroidJSonOpt).withOption(vectorAsKeyOpt).withOption(
> +            
> dictTypeOpt).withOption(dictOpt).withOption(csvOpt).withOption(vectorAsKeyOpt).withOption(
>             printKeyOpt).withOption(sizeOpt).create();
>
>     try {
> @@ -122,10 +128,12 @@ public final class VectorDumper {
>             throw new OptionException(dictTypeOpt);
>           }
>         }
> -        boolean useJSON = cmdLine.hasOption(centroidJSonOpt);
> +        boolean useJSON = cmdLine.hasOption(jsonOpt);
> +        boolean useCSV = cmdLine.hasOption(csvOpt);
> +
>         boolean sizeOnly = cmdLine.hasOption(sizeOpt);
>         SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
> -
> +        boolean namesAsComments = cmdLine.hasOption(namesAsCommentsOpt);
>         Writable keyWritable = 
> reader.getKeyClass().asSubclass(Writable.class).newInstance();
>         Writable valueWritable = 
> reader.getValueClass().asSubclass(Writable.class).newInstance();
>         boolean transposeKeyValue = cmdLine.hasOption(vectorAsKeyOpt);
> @@ -140,6 +148,16 @@ public final class VectorDumper {
>           try {
>             boolean printKey = cmdLine.hasOption(printKeyOpt);
>             long i = 0;
> +            if (useCSV && dictionary != null){
> +              writer.write("#");
> +              for (int j = 0; j < dictionary.length; j++) {
> +                writer.write(dictionary[j]);
> +                if (j < dictionary.length - 1){
> +                  writer.write(',');
> +                }
> +              }
> +              writer.write('\n');
> +            }
>             while (reader.next(keyWritable, valueWritable)) {
>               if (printKey) {
>                 Writable notTheVectorWritable = transposeKeyValue ? 
> valueWritable : keyWritable;
> @@ -159,7 +177,14 @@ public final class VectorDumper {
>                 writer.write(String.valueOf(vector.size()));
>                 writer.write('\n');
>               } else {
> -                String fmtStr = useJSON ? vector.asFormatString() : 
> VectorHelper.vectorToString(vector, dictionary);
> +                String fmtStr;
> +                if (useJSON){
> +                  fmtStr = VectorHelper.vectorToJSONString(vector, 
> dictionary);
> +                } else if (useCSV){
> +                  fmtStr = VectorHelper.vectorToCSVString(vector, 
> namesAsComments);
> +                } else {
> +                  fmtStr = vector.asFormatString();
> +                }
>                 writer.write(fmtStr);
>                 writer.write('\n');
>               }
>
> Modified: 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
> URL: 
> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java?rev=1085397&r1=1085396&r2=1085397&view=diff
> ==============================================================================
> --- 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
>  (original)
> +++ 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
>  Fri Mar 25 14:28:12 2011
> @@ -40,14 +40,45 @@ import org.apache.mahout.math.map.OpenOb
>  public final class VectorHelper {
>
>   private static final Pattern TAB_PATTERN = Pattern.compile("\t");
> +
>
>   private VectorHelper() { }
> -
> +
> +  public static String vectorToCSVString(Vector vector, boolean 
> namesAsComments){
> +    StringBuilder bldr = new StringBuilder(2048);
> +    try {
> +      vectorToCSVString(vector, namesAsComments, bldr);
> +    } catch (IOException e) {
> +      throw new RuntimeException(e);
> +    }
> +    return bldr.toString();
> +  }
> +
> +  public static void vectorToCSVString(Vector vector, boolean 
> namesAsComments,
> +                                       Appendable bldr) throws IOException {
> +    if (namesAsComments && vector instanceof NamedVector){
> +      bldr.append("#").append(((NamedVector)vector).getName()).append('\n');
> +    }
> +    Iterator<Vector.Element> iter = vector.iterator();
> +    boolean first = true;
> +    while (iter.hasNext()) {
> +      if (first) {
> +        first = false;
> +      } else {
> +        bldr.append(",");
> +      }
> +      Vector.Element elt = iter.next();
> +      bldr.append(String.valueOf(elt.get()));
> +    }
> +    bldr.append('\n');
> +  }
> +
> +
>   /**
>    * @return a String from a vector that fills in the values with the 
> appropriate value from a dictionary where
>    * each the ith entry is the term for the ith vector cell.
>    */
> -  public static String vectorToString(Vector vector, String[] dictionary) {
> +  public static String vectorToJSONString(Vector vector, String[] 
> dictionary) {
>     StringBuilder bldr = new StringBuilder(2048);
>
>     if (vector instanceof NamedVector) {
> @@ -67,12 +98,13 @@ public final class VectorHelper {
>       if (dictionary != null) {
>         bldr.append(dictionary[elt.index()]);
>       } else {
> -        bldr.append(elt.index());
> +        bldr.append(String.valueOf(elt.index()));
>       }
> -      bldr.append(':').append(elt.get());
> +      bldr.append(':').append(String.valueOf(elt.get()));
>     }
>     return bldr.append('}').toString();
>   }
> +
>
>   /**
>    * Read in a dictionary file. Format is:
>
> Added: 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
> URL: 
> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java?rev=1085397&view=auto
> ==============================================================================
> --- 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
>  (added)
> +++ 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java
>  Fri Mar 25 14:28:12 2011
> @@ -0,0 +1,94 @@
> +package org.apache.mahout.utils.vectors.csv;
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License.  You may obtain a copy of the License at
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +import org.apache.commons.csv.CSVParser;
> +import org.apache.commons.csv.CSVStrategy;
> +import org.apache.mahout.math.DenseVector;
> +import org.apache.mahout.math.Vector;
> +
> +import java.io.BufferedReader;
> +import java.io.IOException;
> +import java.io.Reader;
> +import java.util.Iterator;
> +
> +
> +/**
> + * Iterates a CSV file and produces {@link org.apache.mahout.math.Vector}.
> + * <br/>
> + * The Iterator returned throws {@link UnsupportedOperationException} for 
> the {@link java.util.Iterator#remove()} method.
> + * <p/>
> + * Assumes DenseVector for now, but in the future may have the option of 
> mapping columns to sparse format
> + * <p/>
> + * The Iterator is not thread-safe.
> + *
> + *
> + **/
> +public class CSVVectorIterable implements Iterable<Vector> {
> +  protected CSVParser parser;
> +  protected String [] line;
> +
> +  public CSVVectorIterable(Reader reader) throws IOException {
> +    parser = new CSVParser(reader);
> +    line = parser.getLine();
> +  }
> +
> +  public CSVVectorIterable(Reader reader, CSVStrategy strategy) throws 
> IOException {
> +    parser = new CSVParser(reader, strategy);
> +    line = parser.getLine();
> +  }
> +
> +
> +  @Override
> +  public Iterator<Vector> iterator() {
> +    return new CSVIterator();
> +  }
> +
> +  private class CSVIterator implements Iterator<Vector>{
> +
> +
> +    public CSVIterator() {
> +    }
> +
> +    @Override
> +    public boolean hasNext() {
> +      return line != null;
> +    }
> +
> +    @Override
> +    public Vector next() {
> +
> +      Vector result = null;
> +      result = new DenseVector(line.length);
> +      for (int i = 0; i < line.length; i++) {
> +        result.setQuick(i, Double.parseDouble(line[i]));
> +      }
> +      //move the line forward
> +      try {
> +        line = parser.getLine();
> +      } catch (IOException e) {
> +        throw new RuntimeException(e);
> +      }
> +      return result;
> +    }
> +
> +    @Override
> +    public void remove() {
> +      throw new UnsupportedOperationException();
> +    }
> +  }
> +}
>
> Modified: 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
> URL: 
> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff
> ==============================================================================
> --- 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
>  (original)
> +++ 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java
>  Fri Mar 25 14:28:12 2011
> @@ -25,8 +25,8 @@ import org.apache.mahout.math.Vector;
>  /**
>  * Write out the vectors to any {@link java.io.Writer} using {@link 
> org.apache.mahout.math.Vector#asFormatString()}.
>  */
> -public class JWriterVectorWriter implements VectorWriter {
> -  private final Writer writer;
> +public class JWriterVectorWriter extends VectorWriter {
> +  protected final Writer writer;
>
>   public JWriterVectorWriter(Writer writer) {
>     this.writer = writer;
> @@ -45,14 +45,22 @@ public class JWriterVectorWriter impleme
>       if (result >= maxDocs) {
>         break;
>       }
> -      writer.write(vector.asFormatString());
> -      writer.write('\n');
> -
> +      formatVector(vector);
>       result++;
>     }
>     return result;
>   }
> -
> +
> +  protected void formatVector(Vector vector) throws IOException {
> +    writer.write(vector.asFormatString());
> +    writer.write('\n');
> +  }
> +
> +  @Override
> +  public void write(Vector vector) throws IOException {
> +    formatVector(vector);
> +  }
> +
>   @Override
>   public void close() throws IOException {
>     writer.flush();
>
> Added: 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
> URL: 
> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java?rev=1085397&view=auto
> ==============================================================================
> --- 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
>  (added)
> +++ 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java
>  Fri Mar 25 14:28:12 2011
> @@ -0,0 +1,105 @@
> +package org.apache.mahout.utils.vectors.io;
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License.  You may obtain a copy of the License at
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +import org.apache.hadoop.conf.Configuration;
> +import org.apache.hadoop.fs.ContentSummary;
> +import org.apache.hadoop.fs.FileSystem;
> +import org.apache.hadoop.fs.Path;
> +import org.apache.hadoop.io.SequenceFile;
> +import org.apache.hadoop.io.Writable;
> +import org.apache.mahout.math.Vector;
> +import org.apache.mahout.math.VectorWritable;
> +
> +import java.io.IOException;
> +import java.util.Iterator;
> +
> +
> +/**
> + * Given a Sequence File containing vectors (actually, {@link 
> org.apache.mahout.math.VectorWritable}, iterate over it.
> + *
> + **/
> +public class SequenceFileVectorIterable implements Iterable<Vector>{
> +  protected SequenceFile.Reader reader;
> +  protected long fileLen;
> +  protected Writable keyWritable;
> +  protected Writable valueWritable;
> +  protected boolean useKey;
> +
> +  /**
> +   * Construct the Iterable
> +   * @param fs The {@link org.apache.hadoop.fs.FileSystem} containing the 
> {@link org.apache.hadoop.io.SequenceFile}
> +   * @param file The {@link org.apache.hadoop.fs.Path} containing the file
> +   * @param conf The {@link org.apache.hadoop.conf.Configuration} to use
> +   * @param useKey If true, use the key as the {@link 
> org.apache.mahout.math.VectorWritable}, otherwise use the value
> +   * @throws IllegalAccessException
> +   * @throws InstantiationException
> +   * @throws IOException
> +   */
> +  public SequenceFileVectorIterable(FileSystem fs, Path file, Configuration 
> conf, boolean useKey) throws IllegalAccessException, InstantiationException, 
> IOException {
> +    this.reader = new SequenceFile.Reader(fs, file, conf);
> +    ContentSummary summary = fs.getContentSummary(file);
> +    fileLen = summary.getLength();
> +    this.useKey = useKey;
> +    keyWritable = 
> reader.getKeyClass().asSubclass(Writable.class).newInstance();
> +    valueWritable = 
> reader.getValueClass().asSubclass(Writable.class).newInstance();
> +  }
> +
> +  /**
> +   * The Iterator returned does not support remove()
> +   * @return The {@link java.util.Iterator}
> +   */
> +  public Iterator<Vector> iterator() {
> +    return new SFIterator();
> +
> +  }
> +
> +  private final class SFIterator implements Iterator<Vector>{
> +    @Override
> +    public boolean hasNext() {
> +      //TODO: is this legitimate?  We can't call next here since it breaks 
> the iterator contract
> +      try {
> +        return reader.getPosition() < fileLen;
> +      } catch (IOException e) {
> +        return false;
> +      }
> +    }
> +
> +    @Override
> +    public Vector next() {
> +      Vector result = null;
> +      boolean valid = false;
> +      try {
> +        valid = reader.next(keyWritable, valueWritable);
> +        if (valid){
> +          result = ((VectorWritable) (useKey ? keyWritable : 
> valueWritable)).get();
> +        }
> +      } catch (IOException e) {
> +        throw new RuntimeException(e);
> +      }
> +
> +      return result;
> +    }
> +
> +    /**
> +     * Not supported
> +     */
> +    public void remove() {
> +      throw new UnsupportedOperationException();
> +    }
> +  }
> +}
>
> Modified: 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
> URL: 
> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff
> ==============================================================================
> --- 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
>  (original)
> +++ 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
>  Fri Mar 25 14:28:12 2011
> @@ -30,16 +30,16 @@ import org.apache.mahout.math.VectorWrit
>  *
>  * Closes the writer when done
>  */
> -public class SequenceFileVectorWriter implements VectorWriter {
> +public class SequenceFileVectorWriter extends VectorWriter {
>   private final SequenceFile.Writer writer;
> -
> +  long recNum = 0;
>   public SequenceFileVectorWriter(SequenceFile.Writer writer) {
>     this.writer = writer;
>   }
>
>   @Override
>   public long write(Iterable<Vector> iterable, long maxDocs) throws 
> IOException {
> -    long recNum = 0;
> +
>     for (Vector point : iterable) {
>       if (recNum >= maxDocs) {
>         break;
> @@ -51,7 +51,13 @@ public class SequenceFileVectorWriter im
>     }
>     return recNum;
>   }
> -
> +
> +  @Override
> +  public void write(Vector vector) throws IOException {
> +    writer.append(new LongWritable(recNum++), new VectorWritable(vector));
> +
> +  }
> +
>   @Override
>   public long write(Iterable<Vector> iterable) throws IOException {
>     return write(iterable, Long.MAX_VALUE);
>
> Modified: 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
> URL: 
> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff
> ==============================================================================
> --- 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
>  (original)
> +++ 
> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
>  Fri Mar 25 14:28:12 2011
> @@ -21,7 +21,7 @@ import java.io.IOException;
>
>  import org.apache.mahout.math.Vector;
>
> -public interface VectorWriter {
> +public abstract class VectorWriter {
>   /**
>    * Write all values in the Iterable to the output
>    * @param iterable The {@link Iterable} to loop over
> @@ -29,7 +29,15 @@ public interface VectorWriter {
>    * @throws IOException if there was a problem writing
>    *
>    */
> -  long write(Iterable<Vector> iterable) throws IOException;
> +  public abstract long write(Iterable<Vector> iterable) throws IOException;
> +
> +  /**
> +   * Write out a vector
> +   *
> +   * @param vector The {@link org.apache.mahout.math.Vector} to write
> +   * @throws IOException
> +   */
> +  public abstract void write(Vector vector) throws IOException;
>
>   /**
>    * Write the first <code>maxDocs</code> to the output.
> @@ -38,12 +46,12 @@ public interface VectorWriter {
>    * @return The number of docs written
>    * @throws IOException if there was a problem writing
>    */
> -  long write(Iterable<Vector> iterable, long maxDocs) throws IOException;
> +  public abstract long write(Iterable<Vector> iterable, long maxDocs) throws 
> IOException;
>
>   /**
>    * Close any internally held resources.  If external Writers are passed in, 
> the implementation should indicate
>    * whether it also closes them
>    * @throws IOException if there was an issue closing the item
>    */
> -  void close() throws IOException;
> +  public abstract void close() throws IOException;
>  }
>
> Added: 
> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
> URL: 
> http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java?rev=1085397&view=auto
> ==============================================================================
> --- 
> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
>  (added)
> +++ 
> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java
>  Fri Mar 25 14:28:12 2011
> @@ -0,0 +1,60 @@
> +package org.apache.mahout.utils.vectors.csv;
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License.  You may obtain a copy of the License at
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +import org.apache.mahout.math.Vector;
> +import org.apache.mahout.utils.MahoutTestCase;
> +import org.apache.mahout.utils.vectors.RandomVectorIterable;
> +import org.apache.mahout.utils.vectors.VectorHelper;
> +import org.apache.mahout.utils.vectors.io.JWriterVectorWriter;
> +import org.junit.Test;
> +
> +import java.io.IOException;
> +import java.io.StringReader;
> +import java.io.StringWriter;
> +
> +
> +/**
> + *
> + *
> + **/
> +public class CSVVectorIterableTest extends MahoutTestCase {
> +
> +
> +  @Test
> +  public void test() throws Exception {
> +
> +    StringWriter sWriter = new StringWriter();
> +    JWriterVectorWriter jwvw = new JWriterVectorWriter(sWriter) {
> +
> +      protected void formatVector(Vector vector) throws IOException {
> +        String vecStr = VectorHelper.vectorToCSVString(vector, false);
> +        writer.write(vecStr);
> +      }
> +    };
> +    Iterable<Vector> iter = new RandomVectorIterable(50);
> +    jwvw.write(iter);
> +    jwvw.close();
> +    CSVVectorIterable csvIter = new CSVVectorIterable(new 
> StringReader(sWriter.getBuffer().toString()));
> +    int count = 0;
> +    for (Vector vector : csvIter) {
> +      //System.out.println("Vec: " + vector);
> +      count++;
> +    }
> +    assertEquals(50, count);
> +  }
> +}
>
> Added: 
> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
> URL: 
> http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java?rev=1085397&view=auto
> ==============================================================================
> --- 
> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
>  (added)
> +++ 
> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java
>  Fri Mar 25 14:28:12 2011
> @@ -0,0 +1,39 @@
> +package org.apache.mahout.utils.vectors.io;
> +
> +import org.apache.hadoop.conf.Configuration;
> +import org.apache.hadoop.fs.FileSystem;
> +import org.apache.hadoop.fs.Path;
> +import org.apache.hadoop.io.LongWritable;
> +import org.apache.hadoop.io.SequenceFile;
> +import org.apache.mahout.math.Vector;
> +import org.apache.mahout.math.VectorWritable;
> +import org.apache.mahout.utils.MahoutTestCase;
> +import org.apache.mahout.utils.vectors.RandomVectorIterable;
> +import org.junit.Test;
> +
> +
> +/**
> + *
> + *
> + **/
> +public class SequenceFileVectorIterableTest extends MahoutTestCase {
> +
> +
> +  @Test
> +  public void testSFVI() throws Exception {
> +    Path path = getTestTempFilePath("sfvw");
> +    Configuration conf = new Configuration();
> +    FileSystem fs = FileSystem.get(conf);
> +    SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path, 
> LongWritable.class, VectorWritable.class);
> +    SequenceFileVectorWriter writer = new 
> SequenceFileVectorWriter(seqWriter);
> +    Iterable<Vector> iter = new RandomVectorIterable(50);
> +    writer.write(iter);
> +    writer.close();
> +    SequenceFileVectorIterable sfVIter = new SequenceFileVectorIterable(fs, 
> path, conf, false);
> +    int count = 0;
> +    for (Vector vector : sfVIter) {
> +      count++;
> +    }
> +    assertEquals(50, count);
> +  }
> +}
>
>
>

Reply via email to