Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPGrowth.java Tue Nov 9 13:19:26 2010 @@ -20,6 +20,7 @@ package org.apache.mahout.fpm.pfpgrowth. import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -28,7 +29,6 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.Set; import org.apache.commons.lang.mutable.MutableLong; import org.apache.hadoop.conf.Configuration; @@ -36,6 +36,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.OutputCollector; import org.apache.mahout.common.Pair; import org.apache.mahout.fpm.pfpgrowth.convertors.StatusUpdater; @@ -63,7 +64,7 @@ public class FPGrowth<A extends Comparab Path path) throws IOException { List<Pair<String,TopKStringPatterns>> ret = new ArrayList<Pair<String,TopKStringPatterns>>(); - Text key = new Text(); + Writable key = new Text(); TopKStringPatterns value = new TopKStringPatterns(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); // key is feature value is count @@ -146,10 +147,10 @@ public class FPGrowth<A extends Comparab * @throws IOException */ public final void generateTopKFrequentPatterns(Iterator<Pair<List<A>,Long>> transactionStream, - List<Pair<A,Long>> frequencyList, + Collection<Pair<A, Long>> frequencyList, long minSupport, int k, - Set<A> returnableFeatures, + Collection<A> returnableFeatures, OutputCollector<A,List<Pair<List<A>,Long>>> output, StatusUpdater updater) throws IOException { @@ -178,7 +179,7 @@ public class FPGrowth<A extends Comparab log.info("Number of unique items {}", frequencyList.size()); - Set<Integer> returnFeatures = new HashSet<Integer>(); + Collection<Integer> returnFeatures = new HashSet<Integer>(); if (returnableFeatures != null && !returnableFeatures.isEmpty()) { for (A attrib : returnableFeatures) { if (attributeIdMapping.containsKey(attrib)) { @@ -206,7 +207,7 @@ public class FPGrowth<A extends Comparab * * @param tree * to be mined - * @param minSupportMutable + * @param minSupportValue * minimum support of the pattern to keep * @param k * Number of top frequent patterns to keep @@ -218,14 +219,12 @@ public class FPGrowth<A extends Comparab * @return Top K Frequent Patterns for each feature and their support */ private Map<Integer,FrequentPatternMaxHeap> fpGrowth(FPTree tree, - MutableLong minSupportMutable, + long minSupportValue, int k, - Set<Integer> requiredFeatures, + Collection<Integer> requiredFeatures, TopKPatternsOutputConverter<A> outputCollector, StatusUpdater updater) throws IOException { - long minSupportValue = minSupportMutable.longValue(); - Map<Integer,FrequentPatternMaxHeap> patterns = new HashMap<Integer,FrequentPatternMaxHeap>(); FPTreeDepthCache treeCache = new FPTreeDepthCache(); for (int i = tree.getHeaderTableCount() - 1; i >= 0; i--) { @@ -250,9 +249,8 @@ public class FPGrowth<A extends Comparab private static FrequentPatternMaxHeap generateSinglePathPatterns(FPTree tree, int k, - MutableLong minSupportMutable) { - FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k, - false); + long minSupport) { + FrequentPatternMaxHeap frequentPatterns = new FrequentPatternMaxHeap(k, false); int tempNode = FPTree.ROOTNODEID; Pattern frequentItem = new Pattern(); @@ -262,7 +260,7 @@ public class FPGrowth<A extends Comparab tempNode); } tempNode = tree.childAtIndex(tempNode, 0); - if (tree.count(tempNode) >= minSupportMutable.intValue()) { + if (tree.count(tempNode) >= minSupport) { frequentItem.add(tree.attribute(tempNode), tree.count(tempNode)); } } @@ -296,8 +294,11 @@ public class FPGrowth<A extends Comparab */ private Map<Integer,FrequentPatternMaxHeap> generateTopKFrequentPatterns( Iterator<Pair<int[],Long>> transactions, - long[] attributeFrequency, long minSupport, int k, int featureSetSize, - Set<Integer> returnFeatures, TopKPatternsOutputConverter<A> topKPatternsOutputCollector, + long[] attributeFrequency, + long minSupport, + int k, + int featureSetSize, + Collection<Integer> returnFeatures, TopKPatternsOutputConverter<A> topKPatternsOutputCollector, StatusUpdater updater) throws IOException { FPTree tree = new FPTree(featureSetSize); @@ -306,7 +307,6 @@ public class FPGrowth<A extends Comparab } // Constructing initial FPTree from the list of transactions - MutableLong minSupportMutable = new MutableLong(minSupport); int nodecount = 0; // int attribcount = 0; int i = 0; @@ -314,8 +314,7 @@ public class FPGrowth<A extends Comparab Pair<int[],Long> transaction = transactions.next(); Arrays.sort(transaction.getFirst()); // attribcount += transaction.length; - nodecount += treeAddCount(tree, transaction.getFirst(), transaction - .getSecond(), minSupportMutable, attributeFrequency); + nodecount += treeAddCount(tree, transaction.getFirst(), transaction.getSecond(), minSupport, attributeFrequency); i++; if (i % 10000 == 0) { log.info("FPTree Building: Read {} Transactions", i); @@ -324,8 +323,7 @@ public class FPGrowth<A extends Comparab log.info("Number of Nodes in the FP Tree: {}", nodecount); - return fpGrowth(tree, minSupportMutable, k, returnFeatures, - topKPatternsOutputCollector, updater); + return fpGrowth(tree, minSupport, k, returnFeatures, topKPatternsOutputCollector, updater); } private static FrequentPatternMaxHeap growth(FPTree tree, @@ -350,7 +348,7 @@ public class FPGrowth<A extends Comparab while (i < headerTableCount) { int attribute = tree.getAttributeAtIndex(i); long count = tree.getHeaderSupportCount(attribute); - if (count < minSupportMutable.intValue()) { + if (count < minSupportMutable.longValue()) { i++; continue; } @@ -358,7 +356,7 @@ public class FPGrowth<A extends Comparab FPTree conditionalTree = treeCache.getFirstLevelTree(attribute); if (conditionalTree.isEmpty()) { traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), - minSupportMutable, conditionalTree, tree); + minSupportMutable.longValue(), conditionalTree, tree); // printTree(conditionalTree); } @@ -377,7 +375,7 @@ public class FPGrowth<A extends Comparab frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns, attribute, count, false); } - if (frequentPatterns.isFull() && minSupportMutable.intValue() < frequentPatterns.leastSupport()) { + if (frequentPatterns.isFull() && minSupportMutable.longValue() < frequentPatterns.leastSupport()) { minSupportMutable.setValue(frequentPatterns.leastSupport()); } i++; @@ -413,7 +411,7 @@ public class FPGrowth<A extends Comparab } if (tree.singlePath()) { - return generateSinglePathPatterns(tree, k, minSupportMutable); + return generateSinglePathPatterns(tree, k, minSupportMutable.longValue()); } updater.update("Bottom Up FP Growth"); @@ -428,7 +426,7 @@ public class FPGrowth<A extends Comparab FrequentPatternMaxHeap returnedPatterns; if (conditionalOfCurrentAttribute) { traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), - minSupportMutable, conditionalTree, tree); + minSupportMutable.longValue(), conditionalTree, tree); returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable, k, treeCache, level + 1, true, currentAttribute, updater); @@ -437,7 +435,7 @@ public class FPGrowth<A extends Comparab } else { if (attribute == currentAttribute) { traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), - minSupportMutable, conditionalTree, tree); + minSupportMutable.longValue(), conditionalTree, tree); returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable, k, treeCache, level + 1, true, currentAttribute, updater); @@ -445,7 +443,7 @@ public class FPGrowth<A extends Comparab attribute, count, true); } else if (attribute > currentAttribute) { traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), - minSupportMutable, conditionalTree, tree); + minSupportMutable.longValue(), conditionalTree, tree); returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable, k, treeCache, level + 1, false, currentAttribute, updater); frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns, @@ -453,7 +451,7 @@ public class FPGrowth<A extends Comparab } } - if (frequentPatterns.isFull() && minSupportMutable.intValue() < frequentPatterns.leastSupport()) { + if (frequentPatterns.isFull() && minSupportMutable.longValue() < frequentPatterns.leastSupport()) { minSupportMutable.setValue(frequentPatterns.leastSupport()); } } @@ -481,14 +479,14 @@ public class FPGrowth<A extends Comparab } else { int attribute = tree.getAttributeAtIndex(index); long count = tree.getHeaderSupportCount(attribute); - if (count < minSupportMutable.intValue()) { + if (count < minSupportMutable.longValue()) { return frequentPatterns; } } } if (tree.singlePath()) { - return generateSinglePathPatterns(tree, k, minSupportMutable); + return generateSinglePathPatterns(tree, k, minSupportMutable.longValue()); } updater.update("Top Down Growth:"); @@ -505,7 +503,7 @@ public class FPGrowth<A extends Comparab FrequentPatternMaxHeap returnedPatterns; if (conditionalOfCurrentAttribute) { traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), - minSupportMutable, conditionalTree, tree); + minSupportMutable.longValue(), conditionalTree, tree); returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable, k, treeCache, level + 1, true, currentAttribute, updater); @@ -515,7 +513,7 @@ public class FPGrowth<A extends Comparab } else { if (attribute == currentAttribute) { traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), - minSupportMutable, conditionalTree, tree); + minSupportMutable.longValue(), conditionalTree, tree); returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable, k, treeCache, level + 1, true, currentAttribute, updater); frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns, @@ -523,7 +521,7 @@ public class FPGrowth<A extends Comparab } else if (attribute > currentAttribute) { traverseAndBuildConditionalFPTreeData(tree.getHeaderNext(attribute), - minSupportMutable, conditionalTree, tree); + minSupportMutable.longValue(), conditionalTree, tree); returnedPatterns = growthBottomUp(conditionalTree, minSupportMutable, k, treeCache, level + 1, false, currentAttribute, updater); frequentPatterns = mergeHeap(frequentPatterns, returnedPatterns, @@ -531,7 +529,7 @@ public class FPGrowth<A extends Comparab } } - if (frequentPatterns.isFull() && minSupportMutable.intValue() < frequentPatterns.leastSupport()) { + if (frequentPatterns.isFull() && minSupportMutable.longValue() < frequentPatterns.leastSupport()) { minSupportMutable.setValue(frequentPatterns.leastSupport()); } } @@ -555,7 +553,7 @@ public class FPGrowth<A extends Comparab } private static void traverseAndBuildConditionalFPTreeData(int firstConditionalNode, - MutableLong minSupportMutable, + long minSupport, FPTree conditionalTree, FPTree tree) { @@ -569,8 +567,7 @@ public class FPGrowth<A extends Comparab while (pathNode != 0) { // dummy root node int attribute = tree.attribute(pathNode); - if (tree.getHeaderSupportCount(attribute) < minSupportMutable - .intValue()) { + if (tree.getHeaderSupportCount(attribute) < minSupport) { pathNode = tree.parent(pathNode); continue; } @@ -612,16 +609,15 @@ public class FPGrowth<A extends Comparab tree.clearConditional(); conditionalTree.reorderHeaderTable(); - pruneFPTree(minSupportMutable, conditionalTree); + pruneFPTree(minSupport, conditionalTree); // prune Conditional Tree } - private static void pruneFPTree(MutableLong minSupportMutable, FPTree tree) { + private static void pruneFPTree(long minSupport, FPTree tree) { for (int i = 0; i < tree.getHeaderTableCount(); i++) { int currentAttribute = tree.getAttributeAtIndex(i); - if (tree.getHeaderSupportCount(currentAttribute) < minSupportMutable - .intValue()) { + if (tree.getHeaderSupportCount(currentAttribute) < minSupport) { int nextNode = tree.getHeaderNext(currentAttribute); tree.removeHeaderNext(currentAttribute); while (nextNode != -1) { @@ -650,9 +646,7 @@ public class FPGrowth<A extends Comparab int parent = tree.parent(nextNode); - if (!prevNode.containsKey(parent)) { - prevNode.put(parent, nextNode); - } else { + if (prevNode.containsKey(parent)) { int prevNodeId = prevNode.get(parent); if (tree.childCount(prevNodeId) <= 1 && tree.childCount(nextNode) <= 1) { tree.addCount(prevNodeId, tree.count(nextNode)); @@ -662,6 +656,8 @@ public class FPGrowth<A extends Comparab } tree.setNext(justPrevNode, tree.next(nextNode)); } + } else { + prevNode.put(parent, nextNode); } justPrevNode = nextNode; nextNode = tree.next(nextNode); @@ -692,7 +688,7 @@ public class FPGrowth<A extends Comparab private static int treeAddCount(FPTree tree, int[] myList, long addCount, - Number minSupport, + long minSupport, long[] attributeFrequency) { int temp = FPTree.ROOTNODEID; @@ -700,7 +696,7 @@ public class FPGrowth<A extends Comparab boolean addCountMode = true; for (int attribute : myList) { - if (attributeFrequency[attribute] < minSupport.intValue()) { + if (attributeFrequency[attribute] < minSupport) { return ret; } int child;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FPTree.java Tue Nov 9 13:19:26 2010 @@ -18,7 +18,7 @@ package org.apache.mahout.fpm.pfpgrowth.fpgrowth; import java.util.Arrays; -import java.util.Set; +import java.util.Collection; import java.util.TreeSet; /** @@ -72,7 +72,7 @@ public class FPTree { private boolean singlePath; - private final Set<Integer> sortedSet = new TreeSet<Integer>(); + private final Collection<Integer> sortedSet = new TreeSet<Integer>(); public FPTree() { this(DEFAULT_INITIAL_SIZE, DEFAULT_HEADER_TABLE_INITIAL_SIZE); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java Tue Nov 9 13:19:26 2010 @@ -20,6 +20,7 @@ package org.apache.mahout.ga.watchmaker; import java.io.BufferedWriter; import java.io.IOException; import java.io.OutputStreamWriter; +import java.util.Collection; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -54,8 +55,8 @@ public final class MahoutEvaluator { * <code>List<Double></code> that contains the evaluated fitness for each candidate from the * input population, sorted in the same order as the candidates. */ - public static void evaluate(FitnessEvaluator<?> evaluator, List<?> population, List<Double> evaluations) - throws IOException, ClassNotFoundException, InterruptedException { + public static void evaluate(FitnessEvaluator<?> evaluator, Iterable<?> population, Collection<Double> evaluations) + throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(); job.setJarByClass(MahoutEvaluator.class); Configuration conf = job.getConfiguration(); @@ -78,7 +79,7 @@ public final class MahoutEvaluator { * population to store * @return input <code>Path</code> */ - private static Path prepareInput(FileSystem fs, List<?> population) throws IOException { + private static Path prepareInput(FileSystem fs, Iterable<?> population) throws IOException { Path inpath = new Path(fs.getWorkingDirectory(), "input"); HadoopUtil.overwriteOutput(inpath); storePopulation(fs, new Path(inpath, "population"), population); @@ -122,7 +123,7 @@ public final class MahoutEvaluator { * @param population * population to store */ - static void storePopulation(FileSystem fs, Path f, List<?> population) throws IOException { + static void storePopulation(FileSystem fs, Path f, Iterable<?> population) throws IOException { FSDataOutputStream out = fs.create(f); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java Tue Nov 9 13:19:26 2010 @@ -19,7 +19,7 @@ package org.apache.mahout.ga.watchmaker; import java.io.IOException; import java.util.ArrayList; -import java.util.List; +import java.util.Collection; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -29,6 +29,7 @@ import org.apache.hadoop.io.DoubleWritab import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile.Reader; import org.apache.hadoop.io.SequenceFile.Sorter; +import org.apache.hadoop.io.Writable; /** Utility Class that deals with the output. */ public final class OutputUtils { @@ -48,7 +49,7 @@ public final class OutputUtils { */ public static Path[] listOutputFiles(FileSystem fs, Path outpath) throws IOException { FileStatus[] status = fs.listStatus(outpath); - List<Path> outpaths = new ArrayList<Path>(); + Collection<Path> outpaths = new ArrayList<Path>(); for (FileStatus s : status) { if (!s.isDir()) { outpaths.add(s.getPath()); @@ -72,7 +73,7 @@ public final class OutputUtils { public static void importEvaluations(FileSystem fs, Configuration conf, Path outpath, - List<Double> evaluations) throws IOException { + Collection<Double> evaluations) throws IOException { Sorter sorter = new Sorter(fs, LongWritable.class, DoubleWritable.class, conf); // merge and sort the outputs @@ -81,7 +82,7 @@ public final class OutputUtils { sorter.merge(outfiles, output); // import the evaluations - LongWritable key = new LongWritable(); + Writable key = new LongWritable(); DoubleWritable value = new DoubleWritable(); Reader reader = new Reader(fs, output, conf); try { Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java Tue Nov 9 13:19:26 2010 @@ -23,6 +23,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; @@ -104,7 +105,7 @@ public final class TimesSquaredJob { Path inputVectorPath = new Path(outputVectorPathBase, INPUT_VECTOR + '/' + now); SequenceFile.Writer inputVectorPathWriter = new SequenceFile.Writer(fs, conf, inputVectorPath, NullWritable.class, VectorWritable.class); - VectorWritable inputVW = new VectorWritable(v); + Writable inputVW = new VectorWritable(v); inputVectorPathWriter.append(NullWritable.get(), inputVW); inputVectorPathWriter.close(); URI ivpURI = inputVectorPath.toUri(); @@ -158,9 +159,7 @@ public final class TimesSquaredJob { Path inputVectorPath = new Path(localFiles[0].getPath()); FileSystem fs = inputVectorPath.getFileSystem(conf); - SequenceFile.Reader reader = new SequenceFile.Reader(fs, - inputVectorPath, - conf); + SequenceFile.Reader reader = new SequenceFile.Reader(fs, inputVectorPath, conf); VectorWritable val = new VectorWritable(); NullWritable nw = NullWritable.get(); reader.next(nw, val); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java Tue Nov 9 13:19:26 2010 @@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; @@ -194,7 +195,7 @@ public class DistributedLanczosSolver ex IntWritable iw = new IntWritable(); for (int i = 0; i < eigenVectors.numRows() - 1; i++) { Vector v = eigenVectors.getRow(i); - VectorWritable vw = new VectorWritable(v); + Writable vw = new VectorWritable(v); iw.set(i); seqWriter.append(iw, vw); } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java Tue Nov 9 13:19:26 2010 @@ -19,6 +19,7 @@ package org.apache.mahout.math.hadoop.de import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -30,6 +31,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.ToolRunner; import org.apache.mahout.common.AbstractJob; @@ -82,7 +84,7 @@ public class EigenVerificationJob extend private double minEigenValue; - private boolean loadEigensInMemory; + //private boolean loadEigensInMemory; private Path tmpOut; @@ -125,7 +127,6 @@ public class EigenVerificationJob extend * @param minEigenValue a double representing the minimum eigenvalue * @param inMemory a boolean requesting in-memory preparation * @param config the JobConf to use, or null if a default is ok (saves referencing JobConf in calling classes unless needed) - * @throws IOException */ public int run(Path corpusInput, Path eigenInput, @@ -182,7 +183,8 @@ public class EigenVerificationJob extend return OrthonormalityVerifier.pairwiseInnerProducts(eigensToVerify); } - private void saveCleanEigens(Configuration conf, List<Map.Entry<MatrixSlice, EigenStatus>> prunedEigenMeta) throws IOException { + private void saveCleanEigens(Configuration conf, Collection<Map.Entry<MatrixSlice, EigenStatus>> prunedEigenMeta) + throws IOException { Path path = new Path(outPath, CLEAN_EIGENVECTORS); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class); @@ -191,9 +193,12 @@ public class EigenVerificationJob extend for (Map.Entry<MatrixSlice, EigenStatus> pruneSlice : prunedEigenMeta) { MatrixSlice s = pruneSlice.getKey(); EigenStatus meta = pruneSlice.getValue(); - EigenVector ev = new EigenVector((DenseVector) s.vector(), meta.getEigenValue(), Math.abs(1 - meta.getCosAngle()), s.index()); + EigenVector ev = new EigenVector((DenseVector) s.vector(), + meta.getEigenValue(), + Math.abs(1 - meta.getCosAngle()), + s.index()); log.info("appending {} to {}", ev, path); - VectorWritable vw = new VectorWritable(ev); + Writable vw = new VectorWritable(ev); iw.set(s.index()); seqWriter.append(iw, vw); @@ -264,14 +269,8 @@ public class EigenVerificationJob extend /** * Progammatic invocation of run() - * @param conf TODO * @param eigenInput Output of LanczosSolver * @param corpusInput Input of LanczosSolver - * @param output - * @param inMemory - * @param maxError - * @param minEigenValue - * @param maxEigens */ public void runJob(Configuration conf, Path eigenInput, @@ -279,7 +278,8 @@ public class EigenVerificationJob extend Path output, boolean inMemory, double maxError, - double minEigenValue, int maxEigens) throws IOException { + double minEigenValue, + int maxEigens) throws IOException { // no need to handle command line arguments outPath = output; tmpOut = new Path(outPath, "tmp"); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java Tue Nov 9 13:19:26 2010 @@ -32,8 +32,6 @@ import org.apache.mahout.common.CommandL import org.apache.mahout.common.HadoopUtil; import org.apache.mahout.vectorizer.collocations.llr.LLRReducer; import org.apache.mahout.vectorizer.common.PartialVectorMerger; -import org.apache.mahout.vectorizer.DictionaryVectorizer; -import org.apache.mahout.vectorizer.DocumentProcessor; import org.apache.mahout.vectorizer.tfidf.TFIDFConverter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TF.java Tue Nov 9 13:19:26 2010 @@ -18,7 +18,7 @@ package org.apache.mahout.vectorizer; /** - * {...@link org.apache.mahout.utils.vectors.Weight} based on term frequency only + * {...@link Weight} based on term frequency only */ public class TF implements Weight { Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java Tue Nov 9 13:19:26 2010 @@ -160,7 +160,7 @@ public final class CollocDriver extends } /** - * Generate all ngrams for the {...@link org.apache.mahout.utils.vectors.text.DictionaryVectorizer} job + * Generate all ngrams for the {...@link org.apache.mahout.vectorizer.DictionaryVectorizer} job * * @param input * input path containing tokenized documents @@ -200,7 +200,7 @@ public final class CollocDriver extends int maxNGramSize, int reduceTasks, int minSupport) - throws IOException, ClassNotFoundException, InterruptedException { + throws IOException, ClassNotFoundException, InterruptedException { Configuration con = new Configuration(baseConf); con.setBoolean(EMIT_UNIGRAMS, emitUnigrams); @@ -247,7 +247,7 @@ public final class CollocDriver extends boolean emitUnigrams, float minLLRValue, int reduceTasks) - throws IOException, InterruptedException, ClassNotFoundException { + throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(baseConf); conf.setLong(LLRReducer.NGRAM_TOTAL, nGramTotal); conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java Tue Nov 9 13:19:26 2010 @@ -48,11 +48,6 @@ public class AdaptiveWordValueEncoder ex } @Override - protected int hashForProbe(byte[] originalForm, int dataSize, String name, int probe) { - return super.hashForProbe(originalForm, dataSize, name, probe); - } - - @Override protected double getWeight(byte[] originalForm, double w) { return w * weight(originalForm); } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java Tue Nov 9 13:19:26 2010 @@ -49,11 +49,12 @@ public class CachingContinuousValueEncod } protected int hashForProbe(String originalForm, int dataSize, String name, int probe) { - Preconditions.checkArgument(dataSize == this.dataSize, "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + "]"); + Preconditions.checkArgument(dataSize == this.dataSize, + "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + ']'); if (caches[probe].containsKey(originalForm.hashCode())) { return caches[probe].get(originalForm.hashCode()); } - int hash = super.hashForProbe(originalForm.getBytes(), dataSize, name, probe); + int hash = hashForProbe(originalForm.getBytes(), dataSize, name, probe); caches[probe].put(originalForm.hashCode(), hash); return hash; } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java Tue Nov 9 13:19:26 2010 @@ -50,11 +50,12 @@ public class CachingStaticWordValueEncod } protected int hashForProbe(String originalForm, int dataSize, String name, int probe) { - Preconditions.checkArgument(dataSize == this.dataSize, "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + "]"); + Preconditions.checkArgument(dataSize == this.dataSize, + "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + ']'); if (caches[probe].containsKey(originalForm.hashCode())) { return caches[probe].get(originalForm.hashCode()); } - int hash = super.hashForProbe(originalForm.getBytes(), dataSize, name, probe); + int hash = hashForProbe(originalForm.getBytes(), dataSize, name, probe); caches[probe].put(originalForm.hashCode(), hash); return hash; } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java Tue Nov 9 13:19:26 2010 @@ -24,7 +24,7 @@ package org.apache.mahout.vectorizer.enc public abstract class CachingValueEncoder extends FeatureVectorEncoder { private int[] cachedProbes; - public CachingValueEncoder(String name, int seed) { + protected CachingValueEncoder(String name, int seed) { super(name); cacheProbeLocations(seed); } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/Dictionary.java Tue Nov 9 13:19:26 2010 @@ -45,7 +45,7 @@ public class Dictionary { return dict.size(); } - public static Dictionary fromList(List<String> values) { + public static Dictionary fromList(Iterable<String> values) { Dictionary dict = new Dictionary(); for (String value : values) { dict.intern(value); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java?rev=1032979&r1=1032978&r2=1032979&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java Tue Nov 9 13:19:26 2010 @@ -24,7 +24,7 @@ import com.google.common.collect.Multise import org.apache.mahout.math.Vector; import java.util.ArrayList; -import java.util.List; +import java.util.Collection; import java.util.regex.Pattern; /** @@ -99,7 +99,7 @@ public class TextValueEncoder extends Fe @Override protected Iterable<Integer> hashesForProbe(byte[] originalForm, int dataSize, String name, int probe){ - List<Integer> hashes = new ArrayList<Integer>(); + Collection<Integer> hashes = new ArrayList<Integer>(); for (String word : tokenize(new String(originalForm, Charsets.UTF_8))){ hashes.add(hashForProbe(bytesForString(word), dataSize, name, probe)); }
