Ah, OK. Good to know. Hadn't followed that one. Feel free to change as appropriate or I can.
On Mar 25, 2011, at 9:12 PM, Dmitriy Lyubimov wrote: > That would be a typicall change i am trying to fix with 622: > http://svn.apache.org/viewvc/mahout/trunk/utils/pom.xml?view=diff&r1=1085396&r2=1085397&pathrev=1085397 > > > > On Fri, Mar 25, 2011 at 7:28 AM, <[email protected]> wrote: >> Author: gsingers >> Date: Fri Mar 25 14:28:12 2011 >> New Revision: 1085397 >> >> URL: http://svn.apache.org/viewvc?rev=1085397&view=rev >> Log: >> MAHOUT-548: add in some CSV support for creating vectors, as well as a few >> other fixes for working with vectors >> >> Added: >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/ >> >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java >> >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java >> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/ >> >> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java >> >> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java >> Modified: >> mahout/trunk/utils/pom.xml >> >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java >> >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java >> >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java >> >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java >> >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java >> >> Modified: mahout/trunk/utils/pom.xml >> URL: >> http://svn.apache.org/viewvc/mahout/trunk/utils/pom.xml?rev=1085397&r1=1085396&r2=1085397&view=diff >> ============================================================================== >> --- mahout/trunk/utils/pom.xml (original) >> +++ mahout/trunk/utils/pom.xml Fri Mar 25 14:28:12 2011 >> @@ -142,6 +142,11 @@ >> <type>test-jar</type> >> <scope>test</scope> >> </dependency> >> + <dependency> >> + <groupId>org.apache.solr</groupId> >> + <artifactId>solr-commons-csv</artifactId> >> + <version>1.4.1</version> >> + </dependency> >> >> <dependency> >> <groupId>junit</groupId> >> >> Modified: >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java >> URL: >> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=1085397&r1=1085396&r2=1085397&view=diff >> ============================================================================== >> --- >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java >> (original) >> +++ >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java >> Fri Mar 25 14:28:12 2011 >> @@ -77,16 +77,22 @@ public final class VectorDumper { >> Option dictTypeOpt = >> obuilder.withLongName("dictionaryType").withRequired(false).withArgument( >> >> abuilder.withName("dictionaryType").withMinimum(1).withMaximum(1).create()).withDescription( >> "The dictionary file type >> (text|sequencefile)").withShortName("dt").create(); >> - Option centroidJSonOpt = >> obuilder.withLongName("json").withRequired(false).withDescription( >> - "Output the centroid as JSON. Otherwise it substitutes in the >> terms for vector cell entries") >> + Option jsonOpt = >> obuilder.withLongName("json").withRequired(false).withDescription( >> + "Output the Vector as JSON. Otherwise it substitutes in the >> terms for vector cell entries") >> .withShortName("j").create(); >> + Option csvOpt = >> obuilder.withLongName("csv").withRequired(false).withDescription( >> + "Output the Vector as CSV. Otherwise it substitutes in the >> terms for vector cell entries") >> + .withShortName("c").create(); >> + Option namesAsCommentsOpt = >> obuilder.withLongName("namesAsComments").withRequired(false).withDescription( >> + "If using CSV output, optionally add a comment line for each >> NamedVector (if the vector is one) printing out the name") >> + .withShortName("n").create(); >> Option sizeOpt = obuilder.withLongName("sizeOnly").withRequired(false). >> withDescription("Dump only the size of the >> vector").withShortName("sz").create(); >> Option helpOpt = obuilder.withLongName("help").withDescription("Print >> out help").withShortName("h") >> .create(); >> >> Group group = >> gbuilder.withName("Options").withOption(seqOpt).withOption(outputOpt).withOption( >> - >> dictTypeOpt).withOption(dictOpt).withOption(centroidJSonOpt).withOption(vectorAsKeyOpt).withOption( >> + >> dictTypeOpt).withOption(dictOpt).withOption(csvOpt).withOption(vectorAsKeyOpt).withOption( >> printKeyOpt).withOption(sizeOpt).create(); >> >> try { >> @@ -122,10 +128,12 @@ public final class VectorDumper { >> throw new OptionException(dictTypeOpt); >> } >> } >> - boolean useJSON = cmdLine.hasOption(centroidJSonOpt); >> + boolean useJSON = cmdLine.hasOption(jsonOpt); >> + boolean useCSV = cmdLine.hasOption(csvOpt); >> + >> boolean sizeOnly = cmdLine.hasOption(sizeOpt); >> SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); >> - >> + boolean namesAsComments = cmdLine.hasOption(namesAsCommentsOpt); >> Writable keyWritable = >> reader.getKeyClass().asSubclass(Writable.class).newInstance(); >> Writable valueWritable = >> reader.getValueClass().asSubclass(Writable.class).newInstance(); >> boolean transposeKeyValue = cmdLine.hasOption(vectorAsKeyOpt); >> @@ -140,6 +148,16 @@ public final class VectorDumper { >> try { >> boolean printKey = cmdLine.hasOption(printKeyOpt); >> long i = 0; >> + if (useCSV && dictionary != null){ >> + writer.write("#"); >> + for (int j = 0; j < dictionary.length; j++) { >> + writer.write(dictionary[j]); >> + if (j < dictionary.length - 1){ >> + writer.write(','); >> + } >> + } >> + writer.write('\n'); >> + } >> while (reader.next(keyWritable, valueWritable)) { >> if (printKey) { >> Writable notTheVectorWritable = transposeKeyValue ? >> valueWritable : keyWritable; >> @@ -159,7 +177,14 @@ public final class VectorDumper { >> writer.write(String.valueOf(vector.size())); >> writer.write('\n'); >> } else { >> - String fmtStr = useJSON ? vector.asFormatString() : >> VectorHelper.vectorToString(vector, dictionary); >> + String fmtStr; >> + if (useJSON){ >> + fmtStr = VectorHelper.vectorToJSONString(vector, >> dictionary); >> + } else if (useCSV){ >> + fmtStr = VectorHelper.vectorToCSVString(vector, >> namesAsComments); >> + } else { >> + fmtStr = vector.asFormatString(); >> + } >> writer.write(fmtStr); >> writer.write('\n'); >> } >> >> Modified: >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java >> URL: >> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java?rev=1085397&r1=1085396&r2=1085397&view=diff >> ============================================================================== >> --- >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java >> (original) >> +++ >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java >> Fri Mar 25 14:28:12 2011 >> @@ -40,14 +40,45 @@ import org.apache.mahout.math.map.OpenOb >> public final class VectorHelper { >> >> private static final Pattern TAB_PATTERN = Pattern.compile("\t"); >> + >> >> private VectorHelper() { } >> - >> + >> + public static String vectorToCSVString(Vector vector, boolean >> namesAsComments){ >> + StringBuilder bldr = new StringBuilder(2048); >> + try { >> + vectorToCSVString(vector, namesAsComments, bldr); >> + } catch (IOException e) { >> + throw new RuntimeException(e); >> + } >> + return bldr.toString(); >> + } >> + >> + public static void vectorToCSVString(Vector vector, boolean >> namesAsComments, >> + Appendable bldr) throws IOException { >> + if (namesAsComments && vector instanceof NamedVector){ >> + bldr.append("#").append(((NamedVector)vector).getName()).append('\n'); >> + } >> + Iterator<Vector.Element> iter = vector.iterator(); >> + boolean first = true; >> + while (iter.hasNext()) { >> + if (first) { >> + first = false; >> + } else { >> + bldr.append(","); >> + } >> + Vector.Element elt = iter.next(); >> + bldr.append(String.valueOf(elt.get())); >> + } >> + bldr.append('\n'); >> + } >> + >> + >> /** >> * @return a String from a vector that fills in the values with the >> appropriate value from a dictionary where >> * each the ith entry is the term for the ith vector cell. >> */ >> - public static String vectorToString(Vector vector, String[] dictionary) { >> + public static String vectorToJSONString(Vector vector, String[] >> dictionary) { >> StringBuilder bldr = new StringBuilder(2048); >> >> if (vector instanceof NamedVector) { >> @@ -67,12 +98,13 @@ public final class VectorHelper { >> if (dictionary != null) { >> bldr.append(dictionary[elt.index()]); >> } else { >> - bldr.append(elt.index()); >> + bldr.append(String.valueOf(elt.index())); >> } >> - bldr.append(':').append(elt.get()); >> + bldr.append(':').append(String.valueOf(elt.get())); >> } >> return bldr.append('}').toString(); >> } >> + >> >> /** >> * Read in a dictionary file. Format is: >> >> Added: >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java >> URL: >> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java?rev=1085397&view=auto >> ============================================================================== >> --- >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java >> (added) >> +++ >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterable.java >> Fri Mar 25 14:28:12 2011 >> @@ -0,0 +1,94 @@ >> +package org.apache.mahout.utils.vectors.csv; >> +/** >> + * Licensed to the Apache Software Foundation (ASF) under one or more >> + * contributor license agreements. See the NOTICE file distributed with >> + * this work for additional information regarding copyright ownership. >> + * The ASF licenses this file to You under the Apache License, Version 2.0 >> + * (the "License"); you may not use this file except in compliance with >> + * the License. You may obtain a copy of the License at >> + * >> + * http://www.apache.org/licenses/LICENSE-2.0 >> + * >> + * Unless required by applicable law or agreed to in writing, software >> + * distributed under the License is distributed on an "AS IS" BASIS, >> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >> + * See the License for the specific language governing permissions and >> + * limitations under the License. >> + */ >> + >> +import org.apache.commons.csv.CSVParser; >> +import org.apache.commons.csv.CSVStrategy; >> +import org.apache.mahout.math.DenseVector; >> +import org.apache.mahout.math.Vector; >> + >> +import java.io.BufferedReader; >> +import java.io.IOException; >> +import java.io.Reader; >> +import java.util.Iterator; >> + >> + >> +/** >> + * Iterates a CSV file and produces {@link org.apache.mahout.math.Vector}. >> + * <br/> >> + * The Iterator returned throws {@link UnsupportedOperationException} for >> the {@link java.util.Iterator#remove()} method. >> + * <p/> >> + * Assumes DenseVector for now, but in the future may have the option of >> mapping columns to sparse format >> + * <p/> >> + * The Iterator is not thread-safe. >> + * >> + * >> + **/ >> +public class CSVVectorIterable implements Iterable<Vector> { >> + protected CSVParser parser; >> + protected String [] line; >> + >> + public CSVVectorIterable(Reader reader) throws IOException { >> + parser = new CSVParser(reader); >> + line = parser.getLine(); >> + } >> + >> + public CSVVectorIterable(Reader reader, CSVStrategy strategy) throws >> IOException { >> + parser = new CSVParser(reader, strategy); >> + line = parser.getLine(); >> + } >> + >> + >> + @Override >> + public Iterator<Vector> iterator() { >> + return new CSVIterator(); >> + } >> + >> + private class CSVIterator implements Iterator<Vector>{ >> + >> + >> + public CSVIterator() { >> + } >> + >> + @Override >> + public boolean hasNext() { >> + return line != null; >> + } >> + >> + @Override >> + public Vector next() { >> + >> + Vector result = null; >> + result = new DenseVector(line.length); >> + for (int i = 0; i < line.length; i++) { >> + result.setQuick(i, Double.parseDouble(line[i])); >> + } >> + //move the line forward >> + try { >> + line = parser.getLine(); >> + } catch (IOException e) { >> + throw new RuntimeException(e); >> + } >> + return result; >> + } >> + >> + @Override >> + public void remove() { >> + throw new UnsupportedOperationException(); >> + } >> + } >> +} >> >> Modified: >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java >> URL: >> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff >> ============================================================================== >> --- >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java >> (original) >> +++ >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/JWriterVectorWriter.java >> Fri Mar 25 14:28:12 2011 >> @@ -25,8 +25,8 @@ import org.apache.mahout.math.Vector; >> /** >> * Write out the vectors to any {@link java.io.Writer} using {@link >> org.apache.mahout.math.Vector#asFormatString()}. >> */ >> -public class JWriterVectorWriter implements VectorWriter { >> - private final Writer writer; >> +public class JWriterVectorWriter extends VectorWriter { >> + protected final Writer writer; >> >> public JWriterVectorWriter(Writer writer) { >> this.writer = writer; >> @@ -45,14 +45,22 @@ public class JWriterVectorWriter impleme >> if (result >= maxDocs) { >> break; >> } >> - writer.write(vector.asFormatString()); >> - writer.write('\n'); >> - >> + formatVector(vector); >> result++; >> } >> return result; >> } >> - >> + >> + protected void formatVector(Vector vector) throws IOException { >> + writer.write(vector.asFormatString()); >> + writer.write('\n'); >> + } >> + >> + @Override >> + public void write(Vector vector) throws IOException { >> + formatVector(vector); >> + } >> + >> @Override >> public void close() throws IOException { >> writer.flush(); >> >> Added: >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java >> URL: >> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java?rev=1085397&view=auto >> ============================================================================== >> --- >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java >> (added) >> +++ >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterable.java >> Fri Mar 25 14:28:12 2011 >> @@ -0,0 +1,105 @@ >> +package org.apache.mahout.utils.vectors.io; >> +/** >> + * Licensed to the Apache Software Foundation (ASF) under one or more >> + * contributor license agreements. See the NOTICE file distributed with >> + * this work for additional information regarding copyright ownership. >> + * The ASF licenses this file to You under the Apache License, Version 2.0 >> + * (the "License"); you may not use this file except in compliance with >> + * the License. You may obtain a copy of the License at >> + * >> + * http://www.apache.org/licenses/LICENSE-2.0 >> + * >> + * Unless required by applicable law or agreed to in writing, software >> + * distributed under the License is distributed on an "AS IS" BASIS, >> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >> + * See the License for the specific language governing permissions and >> + * limitations under the License. >> + */ >> + >> +import org.apache.hadoop.conf.Configuration; >> +import org.apache.hadoop.fs.ContentSummary; >> +import org.apache.hadoop.fs.FileSystem; >> +import org.apache.hadoop.fs.Path; >> +import org.apache.hadoop.io.SequenceFile; >> +import org.apache.hadoop.io.Writable; >> +import org.apache.mahout.math.Vector; >> +import org.apache.mahout.math.VectorWritable; >> + >> +import java.io.IOException; >> +import java.util.Iterator; >> + >> + >> +/** >> + * Given a Sequence File containing vectors (actually, {@link >> org.apache.mahout.math.VectorWritable}, iterate over it. >> + * >> + **/ >> +public class SequenceFileVectorIterable implements Iterable<Vector>{ >> + protected SequenceFile.Reader reader; >> + protected long fileLen; >> + protected Writable keyWritable; >> + protected Writable valueWritable; >> + protected boolean useKey; >> + >> + /** >> + * Construct the Iterable >> + * @param fs The {@link org.apache.hadoop.fs.FileSystem} containing the >> {@link org.apache.hadoop.io.SequenceFile} >> + * @param file The {@link org.apache.hadoop.fs.Path} containing the file >> + * @param conf The {@link org.apache.hadoop.conf.Configuration} to use >> + * @param useKey If true, use the key as the {@link >> org.apache.mahout.math.VectorWritable}, otherwise use the value >> + * @throws IllegalAccessException >> + * @throws InstantiationException >> + * @throws IOException >> + */ >> + public SequenceFileVectorIterable(FileSystem fs, Path file, Configuration >> conf, boolean useKey) throws IllegalAccessException, InstantiationException, >> IOException { >> + this.reader = new SequenceFile.Reader(fs, file, conf); >> + ContentSummary summary = fs.getContentSummary(file); >> + fileLen = summary.getLength(); >> + this.useKey = useKey; >> + keyWritable = >> reader.getKeyClass().asSubclass(Writable.class).newInstance(); >> + valueWritable = >> reader.getValueClass().asSubclass(Writable.class).newInstance(); >> + } >> + >> + /** >> + * The Iterator returned does not support remove() >> + * @return The {@link java.util.Iterator} >> + */ >> + public Iterator<Vector> iterator() { >> + return new SFIterator(); >> + >> + } >> + >> + private final class SFIterator implements Iterator<Vector>{ >> + @Override >> + public boolean hasNext() { >> + //TODO: is this legitimate? We can't call next here since it breaks >> the iterator contract >> + try { >> + return reader.getPosition() < fileLen; >> + } catch (IOException e) { >> + return false; >> + } >> + } >> + >> + @Override >> + public Vector next() { >> + Vector result = null; >> + boolean valid = false; >> + try { >> + valid = reader.next(keyWritable, valueWritable); >> + if (valid){ >> + result = ((VectorWritable) (useKey ? keyWritable : >> valueWritable)).get(); >> + } >> + } catch (IOException e) { >> + throw new RuntimeException(e); >> + } >> + >> + return result; >> + } >> + >> + /** >> + * Not supported >> + */ >> + public void remove() { >> + throw new UnsupportedOperationException(); >> + } >> + } >> +} >> >> Modified: >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java >> URL: >> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff >> ============================================================================== >> --- >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java >> (original) >> +++ >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java >> Fri Mar 25 14:28:12 2011 >> @@ -30,16 +30,16 @@ import org.apache.mahout.math.VectorWrit >> * >> * Closes the writer when done >> */ >> -public class SequenceFileVectorWriter implements VectorWriter { >> +public class SequenceFileVectorWriter extends VectorWriter { >> private final SequenceFile.Writer writer; >> - >> + long recNum = 0; >> public SequenceFileVectorWriter(SequenceFile.Writer writer) { >> this.writer = writer; >> } >> >> @Override >> public long write(Iterable<Vector> iterable, long maxDocs) throws >> IOException { >> - long recNum = 0; >> + >> for (Vector point : iterable) { >> if (recNum >= maxDocs) { >> break; >> @@ -51,7 +51,13 @@ public class SequenceFileVectorWriter im >> } >> return recNum; >> } >> - >> + >> + @Override >> + public void write(Vector vector) throws IOException { >> + writer.append(new LongWritable(recNum++), new VectorWritable(vector)); >> + >> + } >> + >> @Override >> public long write(Iterable<Vector> iterable) throws IOException { >> return write(iterable, Long.MAX_VALUE); >> >> Modified: >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java >> URL: >> http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java?rev=1085397&r1=1085396&r2=1085397&view=diff >> ============================================================================== >> --- >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java >> (original) >> +++ >> mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java >> Fri Mar 25 14:28:12 2011 >> @@ -21,7 +21,7 @@ import java.io.IOException; >> >> import org.apache.mahout.math.Vector; >> >> -public interface VectorWriter { >> +public abstract class VectorWriter { >> /** >> * Write all values in the Iterable to the output >> * @param iterable The {@link Iterable} to loop over >> @@ -29,7 +29,15 @@ public interface VectorWriter { >> * @throws IOException if there was a problem writing >> * >> */ >> - long write(Iterable<Vector> iterable) throws IOException; >> + public abstract long write(Iterable<Vector> iterable) throws IOException; >> + >> + /** >> + * Write out a vector >> + * >> + * @param vector The {@link org.apache.mahout.math.Vector} to write >> + * @throws IOException >> + */ >> + public abstract void write(Vector vector) throws IOException; >> >> /** >> * Write the first <code>maxDocs</code> to the output. >> @@ -38,12 +46,12 @@ public interface VectorWriter { >> * @return The number of docs written >> * @throws IOException if there was a problem writing >> */ >> - long write(Iterable<Vector> iterable, long maxDocs) throws IOException; >> + public abstract long write(Iterable<Vector> iterable, long maxDocs) >> throws IOException; >> >> /** >> * Close any internally held resources. If external Writers are passed >> in, the implementation should indicate >> * whether it also closes them >> * @throws IOException if there was an issue closing the item >> */ >> - void close() throws IOException; >> + public abstract void close() throws IOException; >> } >> >> Added: >> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java >> URL: >> http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java?rev=1085397&view=auto >> ============================================================================== >> --- >> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java >> (added) >> +++ >> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/csv/CSVVectorIterableTest.java >> Fri Mar 25 14:28:12 2011 >> @@ -0,0 +1,60 @@ >> +package org.apache.mahout.utils.vectors.csv; >> +/** >> + * Licensed to the Apache Software Foundation (ASF) under one or more >> + * contributor license agreements. See the NOTICE file distributed with >> + * this work for additional information regarding copyright ownership. >> + * The ASF licenses this file to You under the Apache License, Version 2.0 >> + * (the "License"); you may not use this file except in compliance with >> + * the License. You may obtain a copy of the License at >> + * >> + * http://www.apache.org/licenses/LICENSE-2.0 >> + * >> + * Unless required by applicable law or agreed to in writing, software >> + * distributed under the License is distributed on an "AS IS" BASIS, >> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >> + * See the License for the specific language governing permissions and >> + * limitations under the License. >> + */ >> + >> +import org.apache.mahout.math.Vector; >> +import org.apache.mahout.utils.MahoutTestCase; >> +import org.apache.mahout.utils.vectors.RandomVectorIterable; >> +import org.apache.mahout.utils.vectors.VectorHelper; >> +import org.apache.mahout.utils.vectors.io.JWriterVectorWriter; >> +import org.junit.Test; >> + >> +import java.io.IOException; >> +import java.io.StringReader; >> +import java.io.StringWriter; >> + >> + >> +/** >> + * >> + * >> + **/ >> +public class CSVVectorIterableTest extends MahoutTestCase { >> + >> + >> + @Test >> + public void test() throws Exception { >> + >> + StringWriter sWriter = new StringWriter(); >> + JWriterVectorWriter jwvw = new JWriterVectorWriter(sWriter) { >> + >> + protected void formatVector(Vector vector) throws IOException { >> + String vecStr = VectorHelper.vectorToCSVString(vector, false); >> + writer.write(vecStr); >> + } >> + }; >> + Iterable<Vector> iter = new RandomVectorIterable(50); >> + jwvw.write(iter); >> + jwvw.close(); >> + CSVVectorIterable csvIter = new CSVVectorIterable(new >> StringReader(sWriter.getBuffer().toString())); >> + int count = 0; >> + for (Vector vector : csvIter) { >> + //System.out.println("Vec: " + vector); >> + count++; >> + } >> + assertEquals(50, count); >> + } >> +} >> >> Added: >> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java >> URL: >> http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java?rev=1085397&view=auto >> ============================================================================== >> --- >> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java >> (added) >> +++ >> mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorIterableTest.java >> Fri Mar 25 14:28:12 2011 >> @@ -0,0 +1,39 @@ >> +package org.apache.mahout.utils.vectors.io; >> + >> +import org.apache.hadoop.conf.Configuration; >> +import org.apache.hadoop.fs.FileSystem; >> +import org.apache.hadoop.fs.Path; >> +import org.apache.hadoop.io.LongWritable; >> +import org.apache.hadoop.io.SequenceFile; >> +import org.apache.mahout.math.Vector; >> +import org.apache.mahout.math.VectorWritable; >> +import org.apache.mahout.utils.MahoutTestCase; >> +import org.apache.mahout.utils.vectors.RandomVectorIterable; >> +import org.junit.Test; >> + >> + >> +/** >> + * >> + * >> + **/ >> +public class SequenceFileVectorIterableTest extends MahoutTestCase { >> + >> + >> + @Test >> + public void testSFVI() throws Exception { >> + Path path = getTestTempFilePath("sfvw"); >> + Configuration conf = new Configuration(); >> + FileSystem fs = FileSystem.get(conf); >> + SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path, >> LongWritable.class, VectorWritable.class); >> + SequenceFileVectorWriter writer = new >> SequenceFileVectorWriter(seqWriter); >> + Iterable<Vector> iter = new RandomVectorIterable(50); >> + writer.write(iter); >> + writer.close(); >> + SequenceFileVectorIterable sfVIter = new SequenceFileVectorIterable(fs, >> path, conf, false); >> + int count = 0; >> + for (Vector vector : sfVIter) { >> + count++; >> + } >> + assertEquals(50, count); >> + } >> +} >> >> >> -------------------------- Grant Ingersoll http://www.lucidimagination.com/ Search the Lucene ecosystem docs using Solr/Lucene: http://www.lucidimagination.com/search
