That would be a typicall change i am trying to fix with 622: http://svn.apache.org/viewvc/mahout/trunk/utils/pom.xml?view=diff&r1=1085396&r2=1085397&pathrev=1085397
On Fri, Mar 25, 2011 at 7:28 AM, <[email protected]> wrote: > Author: gsingers > Date: Fri Mar 25 14:28:12 2011 > New Revision: 1085397 > > URL: http://svn.apache.org/viewvc?rev=1085397&view=rev > Log: > MAHOUT-548: add in some CSV support for creating vectors, as well as a few > other fixes for working with vectors > > Added: > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/ > > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java > > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java > mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/ > > mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java > > mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java > Modified: > mahout/trunk/utils/pom.xml > > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java > > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java > > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java > > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java > > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java > > Modified: mahout/trunk/utils/pom.xml > URL: > http://svn.apache.org/viewvc/mahout/trunk/utils/pom.xml?rev=1085397&r1=1085396&r2=1085397&view=diff > ============================================================================== > --- mahout/trunk/utils/pom.xml (original) > +++ mahout/trunk/utils/pom.xml Fri Mar 25 14:28:12 2011 > @@ -142,6 +142,11 @@ > <type>test-jar</type> > <scope>test</scope> > </dependency> > + <dependency> > + <groupId>org.apache.solr</groupId> > + <artifactId>solr-commons-csv</artifactId> > + <version>1.4.1</version> > + </dependency> > > <dependency> > <groupId>junit</groupId> > > Modified: > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java > URL: > http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=1085397&r1=1085396&r2=1085397&view=diff > ============================================================================== > --- > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java > (original) > +++ > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java > Fri Mar 25 14:28:12 2011 > @@ -77,16 +77,22 @@ public final class VectorDumper { > Option dictTypeOpt = > obuilder.withLongName("dictionaryType").withRequired(false).withArgument( > > abuilder.withName("dictionaryType").withMinimum(1).withMaximum(1).create()).withDescription( > "The dictionary file type > (text|sequencefile)").withShortName("dt").create(); > - Option centroidJSonOpt = > obuilder.withLongName("json").withRequired(false).withDescription( > - "Output the centroid as JSON. Otherwise it substitutes in the > terms for vector cell entries") > + Option jsonOpt = > obuilder.withLongName("json").withRequired(false).withDescription( > + "Output the Vector as JSON. Otherwise it substitutes in the > terms for vector cell entries") > .withShortName("j").create(); > + Option csvOpt = > obuilder.withLongName("csv").withRequired(false).withDescription( > + "Output the Vector as CSV. Otherwise it substitutes in the > terms for vector cell entries") > + .withShortName("c").create(); > + Option namesAsCommentsOpt = > obuilder.withLongName("namesAsComments").withRequired(false).withDescription( > + "If using CSV output, optionally add a comment line for each > NamedVector (if the vector is one) printing out the name") > + .withShortName("n").create(); > Option sizeOpt = obuilder.withLongName("sizeOnly").withRequired(false). > withDescription("Dump only the size of the > vector").withShortName("sz").create(); > Option helpOpt = obuilder.withLongName("help").withDescription("Print out > help").withShortName("h") > .create(); > > Group group = > gbuilder.withName("Options").withOption(seqOpt).withOption(outputOpt).withOption( > - > dictTypeOpt).withOption(dictOpt).withOption(centroidJSonOpt).withOption(vectorAsKeyOpt).withOption( > + > dictTypeOpt).withOption(dictOpt).withOption(csvOpt).withOption(vectorAsKeyOpt).withOption( > printKeyOpt).withOption(sizeOpt).create(); > > try { > @@ -122,10 +128,12 @@ public final class VectorDumper { > throw new OptionException(dictTypeOpt); > } > } > - boolean useJSON = cmdLine.hasOption(centroidJSonOpt); > + boolean useJSON = cmdLine.hasOption(jsonOpt); > + boolean useCSV = cmdLine.hasOption(csvOpt); > + > boolean sizeOnly = cmdLine.hasOption(sizeOpt); > SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); > - > + boolean namesAsComments = cmdLine.hasOption(namesAsCommentsOpt); > Writable keyWritable = > reader.getKeyClass().asSubclass(Writable.class).newInstance(); > Writable valueWritable = > reader.getValueClass().asSubclass(Writable.class).newInstance(); > boolean transposeKeyValue = cmdLine.hasOption(vectorAsKeyOpt); > @@ -140,6 +148,16 @@ public final class VectorDumper { > try { > boolean printKey = cmdLine.hasOption(printKeyOpt); > long i = 0; > + if (useCSV && dictionary != null){ > + writer.write("#"); > + for (int j = 0; j < dictionary.length; j++) { > + writer.write(dictionary[j]); > + if (j < dictionary.length - 1){ > + writer.write(','); > + } > + } > + writer.write('\n'); > + } > while (reader.next(keyWritable, valueWritable)) { > if (printKey) { > Writable notTheVectorWritable = transposeKeyValue ? > valueWritable : keyWritable; > @@ -159,7 +177,14 @@ public final class VectorDumper { > writer.write(String.valueOf(vector.size())); > writer.write('\n'); > } else { > - String fmtStr = useJSON ? vector.asFormatString() : > VectorHelper.vectorToString(vector, dictionary); > + String fmtStr; > + if (useJSON){ > + fmtStr = VectorHelper.vectorToJSONString(vector, > dictionary); > + } else if (useCSV){ > + fmtStr = VectorHelper.vectorToCSVString(vector, > namesAsComments); > + } else { > + fmtStr = vector.asFormatString(); > + } > writer.write(fmtStr); > writer.write('\n'); > } > > Modified: > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java > URL: > http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java?rev=1085397&r1=1085396&r2=1085397&view=diff > ============================================================================== > --- > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java > (original) > +++ > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java > Fri Mar 25 14:28:12 2011 > @@ -40,14 +40,45 @@ import org.apache.mahout.math.map.OpenOb > public final class VectorHelper { > > private static final Pattern TAB_PATTERN = Pattern.compile("\t"); > + > > private VectorHelper() { } > - > + > + public static String vectorToCSVString(Vector vector, boolean > namesAsComments){ > + StringBuilder bldr = new StringBuilder(2048); > + try { > + vectorToCSVString(vector, namesAsComments, bldr); > + } catch (IOException e) { > + throw new RuntimeException(e); > + } > + return bldr.toString(); > + } > + > + public static void vectorToCSVString(Vector vector, boolean > namesAsComments, > + Appendable bldr) throws IOException { > + if (namesAsComments && vector instanceof NamedVector){ > + bldr.append("#").append(((NamedVector)vector).getName()).append('\n'); > + } > + Iterator<Vector.Element> iter = vector.iterator(); > + boolean first = true; > + while (iter.hasNext()) { > + if (first) { > + first = false; > + } else { > + bldr.append(","); > + } > + Vector.Element elt = iter.next(); > + bldr.append(String.valueOf(elt.get())); > + } > + bldr.append('\n'); > + } > + > + > /** > * @return a String from a vector that fills in the values with the > appropriate value from a dictionary where > * each the ith entry is the term for the ith vector cell. > */ > - public static String vectorToString(Vector vector, String[] dictionary) { > + public static String vectorToJSONString(Vector vector, String[] > dictionary) { > StringBuilder bldr = new StringBuilder(2048); > > if (vector instanceof NamedVector) { > @@ -67,12 +98,13 @@ public final class VectorHelper { > if (dictionary != null) { > bldr.append(dictionary[elt.index()]); > } else { > - bldr.append(elt.index()); > + bldr.append(String.valueOf(elt.index())); > } > - bldr.append(':').append(elt.get()); > + bldr.append(':').append(String.valueOf(elt.get())); > } > return bldr.append('}').toString(); > } > + > > /** > * Read in a dictionary file. Format is: > > Added: > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java > URL: > http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java?rev=1085397&view=auto > ============================================================================== > --- > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java > (added) > +++ > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java > Fri Mar 25 14:28:12 2011 > @@ -0,0 +1,94 @@ > +package org.apache.mahout.utils.vectors.csv; > +/** > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreements. See the NOTICE file distributed with > + * this work for additional information regarding copyright ownership. > + * The ASF licenses this file to You under the Apache License, Version 2.0 > + * (the "License"); you may not use this file except in compliance with > + * the License. You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +import org.apache.commons.csv.CSVParser; > +import org.apache.commons.csv.CSVStrategy; > +import org.apache.mahout.math.DenseVector; > +import org.apache.mahout.math.Vector; > + > +import java.io.BufferedReader; > +import java.io.IOException; > +import java.io.Reader; > +import java.util.Iterator; > + > + > +/** > + * Iterates a CSV file and produces {@link org.apache.mahout.math.Vector}. > + * <br/> > + * The Iterator returned throws {@link UnsupportedOperationException} for > the {@link java.util.Iterator#remove()} method. > + * <p/> > + * Assumes DenseVector for now, but in the future may have the option of > mapping columns to sparse format > + * <p/> > + * The Iterator is not thread-safe. > + * > + * > + **/ > +public class CSVVectorIterable implements Iterable<Vector> { > + protected CSVParser parser; > + protected String [] line; > + > + public CSVVectorIterable(Reader reader) throws IOException { > + parser = new CSVParser(reader); > + line = parser.getLine(); > + } > + > + public CSVVectorIterable(Reader reader, CSVStrategy strategy) throws > IOException { > + parser = new CSVParser(reader, strategy); > + line = parser.getLine(); > + } > + > + > + @Override > + public Iterator<Vector> iterator() { > + return new CSVIterator(); > + } > + > + private class CSVIterator implements Iterator<Vector>{ > + > + > + public CSVIterator() { > + } > + > + @Override > + public boolean hasNext() { > + return line != null; > + } > + > + @Override > + public Vector next() { > + > + Vector result = null; > + result = new DenseVector(line.length); > + for (int i = 0; i < line.length; i++) { > + result.setQuick(i, Double.parseDouble(line[i])); > + } > + //move the line forward > + try { > + line = parser.getLine(); > + } catch (IOException e) { > + throw new RuntimeException(e); > + } > + return result; > + } > + > + @Override > + public void remove() { > + throw new UnsupportedOperationException(); > + } > + } > +} > > Modified: > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java > URL: > http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff > ============================================================================== > --- > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java > (original) > +++ > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java > Fri Mar 25 14:28:12 2011 > @@ -25,8 +25,8 @@ import org.apache.mahout.math.Vector; > /** > * Write out the vectors to any {@link java.io.Writer} using {@link > org.apache.mahout.math.Vector#asFormatString()}. > */ > -public class JWriterVectorWriter implements VectorWriter { > - private final Writer writer; > +public class JWriterVectorWriter extends VectorWriter { > + protected final Writer writer; > > public JWriterVectorWriter(Writer writer) { > this.writer = writer; > @@ -45,14 +45,22 @@ public class JWriterVectorWriter impleme > if (result >= maxDocs) { > break; > } > - writer.write(vector.asFormatString()); > - writer.write('\n'); > - > + formatVector(vector); > result++; > } > return result; > } > - > + > + protected void formatVector(Vector vector) throws IOException { > + writer.write(vector.asFormatString()); > + writer.write('\n'); > + } > + > + @Override > + public void write(Vector vector) throws IOException { > + formatVector(vector); > + } > + > @Override > public void close() throws IOException { > writer.flush(); > > Added: > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java > URL: > http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java?rev=1085397&view=auto > ============================================================================== > --- > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java > (added) > +++ > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java > Fri Mar 25 14:28:12 2011 > @@ -0,0 +1,105 @@ > +package org.apache.mahout.utils.vectors.io; > +/** > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreements. See the NOTICE file distributed with > + * this work for additional information regarding copyright ownership. > + * The ASF licenses this file to You under the Apache License, Version 2.0 > + * (the "License"); you may not use this file except in compliance with > + * the License. You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +import org.apache.hadoop.conf.Configuration; > +import org.apache.hadoop.fs.ContentSummary; > +import org.apache.hadoop.fs.FileSystem; > +import org.apache.hadoop.fs.Path; > +import org.apache.hadoop.io.SequenceFile; > +import org.apache.hadoop.io.Writable; > +import org.apache.mahout.math.Vector; > +import org.apache.mahout.math.VectorWritable; > + > +import java.io.IOException; > +import java.util.Iterator; > + > + > +/** > + * Given a Sequence File containing vectors (actually, {@link > org.apache.mahout.math.VectorWritable}, iterate over it. > + * > + **/ > +public class SequenceFileVectorIterable implements Iterable<Vector>{ > + protected SequenceFile.Reader reader; > + protected long fileLen; > + protected Writable keyWritable; > + protected Writable valueWritable; > + protected boolean useKey; > + > + /** > + * Construct the Iterable > + * @param fs The {@link org.apache.hadoop.fs.FileSystem} containing the > {@link org.apache.hadoop.io.SequenceFile} > + * @param file The {@link org.apache.hadoop.fs.Path} containing the file > + * @param conf The {@link org.apache.hadoop.conf.Configuration} to use > + * @param useKey If true, use the key as the {@link > org.apache.mahout.math.VectorWritable}, otherwise use the value > + * @throws IllegalAccessException > + * @throws InstantiationException > + * @throws IOException > + */ > + public SequenceFileVectorIterable(FileSystem fs, Path file, Configuration > conf, boolean useKey) throws IllegalAccessException, InstantiationException, > IOException { > + this.reader = new SequenceFile.Reader(fs, file, conf); > + ContentSummary summary = fs.getContentSummary(file); > + fileLen = summary.getLength(); > + this.useKey = useKey; > + keyWritable = > reader.getKeyClass().asSubclass(Writable.class).newInstance(); > + valueWritable = > reader.getValueClass().asSubclass(Writable.class).newInstance(); > + } > + > + /** > + * The Iterator returned does not support remove() > + * @return The {@link java.util.Iterator} > + */ > + public Iterator<Vector> iterator() { > + return new SFIterator(); > + > + } > + > + private final class SFIterator implements Iterator<Vector>{ > + @Override > + public boolean hasNext() { > + //TODO: is this legitimate? We can't call next here since it breaks > the iterator contract > + try { > + return reader.getPosition() < fileLen; > + } catch (IOException e) { > + return false; > + } > + } > + > + @Override > + public Vector next() { > + Vector result = null; > + boolean valid = false; > + try { > + valid = reader.next(keyWritable, valueWritable); > + if (valid){ > + result = ((VectorWritable) (useKey ? keyWritable : > valueWritable)).get(); > + } > + } catch (IOException e) { > + throw new RuntimeException(e); > + } > + > + return result; > + } > + > + /** > + * Not supported > + */ > + public void remove() { > + throw new UnsupportedOperationException(); > + } > + } > +} > > Modified: > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java > URL: > http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff > ============================================================================== > --- > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java > (original) > +++ > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java > Fri Mar 25 14:28:12 2011 > @@ -30,16 +30,16 @@ import org.apache.mahout.math.VectorWrit > * > * Closes the writer when done > */ > -public class SequenceFileVectorWriter implements VectorWriter { > +public class SequenceFileVectorWriter extends VectorWriter { > private final SequenceFile.Writer writer; > - > + long recNum = 0; > public SequenceFileVectorWriter(SequenceFile.Writer writer) { > this.writer = writer; > } > > @Override > public long write(Iterable<Vector> iterable, long maxDocs) throws > IOException { > - long recNum = 0; > + > for (Vector point : iterable) { > if (recNum >= maxDocs) { > break; > @@ -51,7 +51,13 @@ public class SequenceFileVectorWriter im > } > return recNum; > } > - > + > + @Override > + public void write(Vector vector) throws IOException { > + writer.append(new LongWritable(recNum++), new VectorWritable(vector)); > + > + } > + > @Override > public long write(Iterable<Vector> iterable) throws IOException { > return write(iterable, Long.MAX_VALUE); > > Modified: > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java > URL: > http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff > ============================================================================== > --- > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java > (original) > +++ > mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java > Fri Mar 25 14:28:12 2011 > @@ -21,7 +21,7 @@ import java.io.IOException; > > import org.apache.mahout.math.Vector; > > -public interface VectorWriter { > +public abstract class VectorWriter { > /** > * Write all values in the Iterable to the output > * @param iterable The {@link Iterable} to loop over > @@ -29,7 +29,15 @@ public interface VectorWriter { > * @throws IOException if there was a problem writing > * > */ > - long write(Iterable<Vector> iterable) throws IOException; > + public abstract long write(Iterable<Vector> iterable) throws IOException; > + > + /** > + * Write out a vector > + * > + * @param vector The {@link org.apache.mahout.math.Vector} to write > + * @throws IOException > + */ > + public abstract void write(Vector vector) throws IOException; > > /** > * Write the first <code>maxDocs</code> to the output. > @@ -38,12 +46,12 @@ public interface VectorWriter { > * @return The number of docs written > * @throws IOException if there was a problem writing > */ > - long write(Iterable<Vector> iterable, long maxDocs) throws IOException; > + public abstract long write(Iterable<Vector> iterable, long maxDocs) throws > IOException; > > /** > * Close any internally held resources. If external Writers are passed in, > the implementation should indicate > * whether it also closes them > * @throws IOException if there was an issue closing the item > */ > - void close() throws IOException; > + public abstract void close() throws IOException; > } > > Added: > mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java > URL: > http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java?rev=1085397&view=auto > ============================================================================== > --- > mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java > (added) > +++ > mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java > Fri Mar 25 14:28:12 2011 > @@ -0,0 +1,60 @@ > +package org.apache.mahout.utils.vectors.csv; > +/** > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreements. See the NOTICE file distributed with > + * this work for additional information regarding copyright ownership. > + * The ASF licenses this file to You under the Apache License, Version 2.0 > + * (the "License"); you may not use this file except in compliance with > + * the License. You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +import org.apache.mahout.math.Vector; > +import org.apache.mahout.utils.MahoutTestCase; > +import org.apache.mahout.utils.vectors.RandomVectorIterable; > +import org.apache.mahout.utils.vectors.VectorHelper; > +import org.apache.mahout.utils.vectors.io.JWriterVectorWriter; > +import org.junit.Test; > + > +import java.io.IOException; > +import java.io.StringReader; > +import java.io.StringWriter; > + > + > +/** > + * > + * > + **/ > +public class CSVVectorIterableTest extends MahoutTestCase { > + > + > + @Test > + public void test() throws Exception { > + > + StringWriter sWriter = new StringWriter(); > + JWriterVectorWriter jwvw = new JWriterVectorWriter(sWriter) { > + > + protected void formatVector(Vector vector) throws IOException { > + String vecStr = VectorHelper.vectorToCSVString(vector, false); > + writer.write(vecStr); > + } > + }; > + Iterable<Vector> iter = new RandomVectorIterable(50); > + jwvw.write(iter); > + jwvw.close(); > + CSVVectorIterable csvIter = new CSVVectorIterable(new > StringReader(sWriter.getBuffer().toString())); > + int count = 0; > + for (Vector vector : csvIter) { > + //System.out.println("Vec: " + vector); > + count++; > + } > + assertEquals(50, count); > + } > +} > > Added: > mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java > URL: > http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java?rev=1085397&view=auto > ============================================================================== > --- > mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java > (added) > +++ > mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java > Fri Mar 25 14:28:12 2011 > @@ -0,0 +1,39 @@ > +package org.apache.mahout.utils.vectors.io; > + > +import org.apache.hadoop.conf.Configuration; > +import org.apache.hadoop.fs.FileSystem; > +import org.apache.hadoop.fs.Path; > +import org.apache.hadoop.io.LongWritable; > +import org.apache.hadoop.io.SequenceFile; > +import org.apache.mahout.math.Vector; > +import org.apache.mahout.math.VectorWritable; > +import org.apache.mahout.utils.MahoutTestCase; > +import org.apache.mahout.utils.vectors.RandomVectorIterable; > +import org.junit.Test; > + > + > +/** > + * > + * > + **/ > +public class SequenceFileVectorIterableTest extends MahoutTestCase { > + > + > + @Test > + public void testSFVI() throws Exception { > + Path path = getTestTempFilePath("sfvw"); > + Configuration conf = new Configuration(); > + FileSystem fs = FileSystem.get(conf); > + SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path, > LongWritable.class, VectorWritable.class); > + SequenceFileVectorWriter writer = new > SequenceFileVectorWriter(seqWriter); > + Iterable<Vector> iter = new RandomVectorIterable(50); > + writer.write(iter); > + writer.close(); > + SequenceFileVectorIterable sfVIter = new SequenceFileVectorIterable(fs, > path, conf, false); > + int count = 0; > + for (Vector vector : sfVIter) { > + count++; > + } > + assertEquals(50, count); > + } > +} > > >
