Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java (original) +++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/CDRule.java Tue Feb 16 10:41:38 2010 @@ -85,14 +85,14 @@ DataSet dataset = DataSet.getDataSet(); for (int condInd = 0; condInd < nbConditions; condInd++) { - int attrInd = CDRule.attributeIndex(condInd); + int attrInd = attributeIndex(condInd); setW(condInd, rng.nextDouble()); setO(condInd, rng.nextBoolean()); if (dataset.isNumerical(attrInd)) { - setV(condInd, CDRule.randomNumerical(dataset, attrInd, rng)); + setV(condInd, randomNumerical(dataset, attrInd, rng)); } else { - setV(condInd, CDRule.randomCategorical(dataset, attrInd, rng)); + setV(condInd, randomCategorical(dataset, attrInd, rng)); } } } @@ -155,7 +155,7 @@ * @return */ boolean condition(int condInd, DataLine dl) { - int attrInd = CDRule.attributeIndex(condInd); + int attrInd = attributeIndex(condInd); // is the condition active if (getW(condInd) < threshold) { @@ -170,7 +170,7 @@ } boolean numericalCondition(int condInd, DataLine dl) { - int attrInd = CDRule.attributeIndex(condInd); + int attrInd = attributeIndex(condInd); if (getO(condInd)) { return dl.getAttribut(attrInd) >= getV(condInd); @@ -180,7 +180,7 @@ } boolean categoricalCondition(int condInd, DataLine dl) { - int attrInd = CDRule.attributeIndex(condInd); + int attrInd = attributeIndex(condInd); if (getO(condInd)) { return dl.getAttribut(attrInd) == getV(condInd); @@ -201,7 +201,7 @@ buffer.append(" && "); } - buffer.append("attr").append(CDRule.attributeIndex(condInd)).append(' ').append( + buffer.append("attr").append(attributeIndex(condInd)).append(' ').append( getO(condInd) ? ">=" : "<"); buffer.append(' ').append(getV(condInd)); @@ -265,7 +265,7 @@ CDRule rule = (CDRule) obj; for (int index = 0; index < nbConditions; index++) { - if (!CDRule.areGenesEqual(this, rule, index)) { + if (!areGenesEqual(this, rule, index)) { return false; } }
Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataSet.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataSet.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataSet.java (original) +++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/DataSet.java Tue Feb 16 10:41:38 2010 @@ -53,10 +53,10 @@ * @throws RuntimeException if the dataset has not been initialized */ public static DataSet getDataSet() { - if (DataSet.dataset == null) { + if (dataset == null) { throw new IllegalStateException("DataSet not initialized"); } - return DataSet.dataset; + return dataset; } /** Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java (original) +++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMahoutEvaluator.java Tue Feb 16 10:41:38 2010 @@ -75,10 +75,10 @@ Path outpath = OutputUtils.prepareOutput(fs); - CDMahoutEvaluator.configureJob(conf, rules, target, inpath, outpath, split); + configureJob(conf, rules, target, inpath, outpath, split); JobClient.runJob(conf); - CDMahoutEvaluator.importEvaluations(fs, conf, outpath, evaluations); + importEvaluations(fs, conf, outpath, evaluations); } /** @@ -109,7 +109,7 @@ DatasetSplit split) throws IOException { List<CDFitness> evals = new ArrayList<CDFitness>(); - CDMahoutEvaluator.evaluate(Arrays.asList(rule), target, inpath, evals, split); + evaluate(Arrays.asList(rule), target, inpath, evals, split); return evals.get(0); } @@ -127,7 +127,7 @@ */ public static void evaluate(List<? extends Rule> rules, int target, Path inpath, List<CDFitness> evaluations) throws IOException { - CDMahoutEvaluator.evaluate(rules, target, inpath, evaluations, new DatasetSplit(1)); + evaluate(rules, target, inpath, evaluations, new DatasetSplit(1)); } /** Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java (original) +++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/CDInfosTool.java Tue Feb 16 10:41:38 2010 @@ -87,10 +87,10 @@ Path outpath = OutputUtils.prepareOutput(fs); - CDInfosTool.configureJob(conf, descriptors, inpath, outpath); + configureJob(conf, descriptors, inpath, outpath); JobClient.runJob(conf); - CDInfosTool.importDescriptions(fs, conf, outpath, descriptions); + importDescriptions(fs, conf, outpath, descriptions); } /** @@ -246,14 +246,14 @@ FileSystem fs = FileSystem.get(inpath.toUri(), new Configuration()); log.info("Loading Descriptors..."); - Descriptors descriptors = CDInfosTool.loadDescriptors(fs, inpath); + Descriptors descriptors = loadDescriptors(fs, inpath); log.info("Gathering informations..."); List<String> descriptions = new ArrayList<String>(); - CDInfosTool.gatherInfos(descriptors, inpath, descriptions); + gatherInfos(descriptors, inpath, descriptions); log.info("Storing Descriptions..."); - CDInfosTool.storeDescriptions(fs, inpath, descriptors, descriptions); + storeDescriptions(fs, inpath, descriptors, descriptions); } catch (OptionException e) { log.error("Error while parsing options", e); CommandLineUtil.printHelp(group); Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java (original) +++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolCombiner.java Tue Feb 16 10:41:38 2010 @@ -88,7 +88,7 @@ if (descriptors.isNominal(index)) { return nominalDescription(values); } else if (descriptors.isNumerical(index)) { - return ToolCombiner.numericalDescription(values); + return numericalDescription(values); } else { throw new IllegalArgumentException("An ignored attribute should never reach the Combiner"); } Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java (original) +++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java Tue Feb 16 10:41:38 2010 @@ -87,7 +87,7 @@ */ String combineDescriptions(int index, Iterator<Text> values) { if (descriptors.isNumerical(index)) { - return ToolReducer.numericDescription(values); + return numericDescription(values); } else if (descriptors.isNominal(index)) { return nominalDescription(values); } else { Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/TravellingSalesman.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/TravellingSalesman.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/TravellingSalesman.java (original) +++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/travellingsalesman/TravellingSalesman.java Tue Feb 16 10:41:38 2010 @@ -107,7 +107,7 @@ @Override protected void postProcessing(List<String> result) { - executionPanel.appendOutput(TravellingSalesman.createResultString(strategy.getDescription(), result, + executionPanel.appendOutput(createResultString(strategy.getDescription(), result, evaluator.getFitness(result, null), elapsedTime)); setEnabled(true); } Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java (original) +++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java Tue Feb 16 10:41:38 2010 @@ -53,7 +53,7 @@ */ public final class SequenceFilesFromDirectory { - private transient static Logger log = LoggerFactory.getLogger(SequenceFilesFromDirectory.class); + private static final Logger log = LoggerFactory.getLogger(SequenceFilesFromDirectory.class); private static ChunkedWriter createNewChunkedWriter(int chunkSizeInMB, String outputDir) throws IOException { return new ChunkedWriter(chunkSizeInMB, outputDir); Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java (original) +++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java Tue Feb 16 10:41:38 2010 @@ -119,7 +119,7 @@ if (cmdLine.hasOption(allOpt)) { all = true; } - WikipediaToSequenceFile.runJob(inputPath, outputPath, catFile, cmdLine.hasOption(exactMatchOpt), all); + runJob(inputPath, outputPath, catFile, cmdLine.hasOption(exactMatchOpt), all); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java Tue Feb 16 10:41:38 2010 @@ -99,11 +99,11 @@ int topic = key.getX(); int word = key.getY(); - LDAPrintTopics.ensureQueueSize(queues, topic); + ensureQueueSize(queues, topic); if (word >= 0 && topic >= 0) { double score = value.get(); String realWord = wordList.get(word); - LDAPrintTopics.maybeEnqueue(queues.get(topic), realWord, score, numWordsToPrint); + maybeEnqueue(queues.get(topic), realWord, score, numWordsToPrint); } } reader.close(); @@ -219,7 +219,7 @@ throw new IllegalArgumentException("Invalid dictionary format"); } - List<List<String>> topWords = LDAPrintTopics.topWordsForTopics(input, config, wordList, + List<List<String>> topWords = topWordsForTopics(input, config, wordList, numWords); if (!output.exists()) { Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Tue Feb 16 10:41:38 2010 @@ -77,7 +77,7 @@ if (cmdLine.hasOption(helpOpt)) { - SequenceFileDumper.printHelp(group); + printHelp(group); return; } @@ -132,7 +132,7 @@ } catch (OptionException e) { log.error("Exception", e); - SequenceFileDumper.printHelp(group); + printHelp(group); } } Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Tue Feb 16 10:41:38 2010 @@ -82,7 +82,7 @@ if (this.pointsDir != null) { JobConf conf = new JobConf(Job.class); // read in the points - clusterIdToPoints = ClusterDumper.readPoints(this.pointsDir, conf); + clusterIdToPoints = readPoints(this.pointsDir, conf); } else { clusterIdToPoints = Collections.emptyMap(); } @@ -139,7 +139,7 @@ writer.append('\n'); if (dictionary != null) { - String topTerms = ClusterDumper.getTopFeatures(center, dictionary, numTopFeatures); + String topTerms = getTopFeatures(center, dictionary, numTopFeatures); writer.write("\tTop Terms: "); writer.write(topTerms); writer.write('\n'); Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocDriver.java Tue Feb 16 10:41:38 2010 @@ -95,7 +95,7 @@ Option numReduceTasksOpt = obuilder.withLongName("numReducers").withRequired(false).withArgument( abuilder.withName("numReducers").withMinimum(1).withMaximum(1).create()).withDescription( - "(Optional) Number of reduce tasks. Default Value: " + CollocDriver.DEFAULT_PASS1_NUM_REDUCE_TASKS) + "(Optional) Number of reduce tasks. Default Value: " + DEFAULT_PASS1_NUM_REDUCE_TASKS) .withShortName("nr").create(); Option preprocessOpt = obuilder.withLongName("preprocess").withRequired(false).withDescription( @@ -150,7 +150,6 @@ } int minSupport = CollocReducer.DEFAULT_MIN_SUPPORT; - ; if (cmdLine.hasOption(minSupportOpt)) { minSupport = Integer.parseInt(cmdLine.getValue(minSupportOpt).toString()); } Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocMapper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocMapper.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocMapper.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocMapper.java Tue Feb 16 10:41:38 2010 @@ -53,8 +53,8 @@ public static final String MAX_SHINGLE_SIZE = "maxShingleSize"; public static final int DEFAULT_MAX_SHINGLE_SIZE = 2; - public static enum Count { - NGRAM_TOTAL; + public enum Count { + NGRAM_TOTAL } private static final Logger log = LoggerFactory.getLogger(CollocMapper.class); Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocReducer.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocReducer.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocReducer.java Tue Feb 16 10:41:38 2010 @@ -38,10 +38,10 @@ public static final String MIN_SUPPORT = "minSupport"; public static final int DEFAULT_MIN_SUPPORT = 2; - public static enum Skipped { + public enum Skipped { LESS_THAN_MIN_SUPPORT - }; - + } + private static final Logger log = LoggerFactory.getLogger(CollocReducer.class); private int minSupport; Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java Tue Feb 16 10:41:38 2010 @@ -34,12 +34,12 @@ */ public class Gram implements WritableComparable<Gram> { - public static enum Type { + public enum Type { HEAD, TAIL, UNIGRAM - }; - + } + private String gram; private int frequency; private Type type; @@ -161,7 +161,7 @@ if (typeValue == 0) { type = Type.TAIL; } else if (typeValue == 1) { - type = Type.HEAD; + type = HEAD; } else { type = Type.UNIGRAM; } @@ -177,7 +177,7 @@ if (type == Type.TAIL) { out.writeByte(0); - } else if (type == Type.HEAD) { + } else if (type == HEAD) { out.writeByte(1); } else { out.writeByte(2); @@ -202,11 +202,11 @@ return 1; } - if (this.type == Type.HEAD && other.type != Type.HEAD) { + if (this.type == HEAD && other.type != HEAD) { return -1; } - if (this.type != Type.HEAD && other.type == Type.HEAD) { + if (this.type != HEAD && other.type == HEAD) { return 1; } @@ -257,7 +257,7 @@ @Override public String toString() { - return "'" + gram + "'[" + (type == Type.UNIGRAM ? "u" : type == Type.HEAD ? "h" : "t") + "]:" + return "'" + gram + "'[" + (type == Type.UNIGRAM ? "u" : type == HEAD ? "h" : "t") + "]:" + frequency; } Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducer.java Tue Feb 16 10:41:38 2010 @@ -38,7 +38,7 @@ */ public class LLRReducer extends MapReduceBase implements Reducer<Gram,Gram,Text,DoubleWritable> { - public static enum Skipped { + public enum Skipped { EXTRA_HEAD, EXTRA_TAIL, MISSING_HEAD, @@ -46,8 +46,8 @@ LESS_THAN_MIN_LLR, LLR_CALCULATION_ERROR, UNIGRAM_COUNT - }; - + } + private static final Logger log = LoggerFactory.getLogger(LLRReducer.class); public static final String NGRAM_TOTAL = "ngramTotal"; @@ -179,14 +179,14 @@ /** * provide interface so the input to the llr calculation can be captured for validation in unit testing */ - public static interface LLCallback { - public double logLikelihoodRatio(int k11, int k12, int k21, int k22); + public interface LLCallback { + double logLikelihoodRatio(int k11, int k12, int k21, int k22); } /** concrete implementation delegates to LogLikelihood class */ public static final class ConcreteLLCallback implements LLCallback { public double logLikelihoodRatio(int k11, int k12, int k21, int k22) { return LogLikelihood.logLikelihoodRatio(k11, k12, k21, k22); - }; + } } } Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollector.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollector.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollector.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollector.java Tue Feb 16 10:41:38 2010 @@ -48,8 +48,8 @@ public static final String ANALYZER_CLASS = "analyzerClass"; public static final String MAX_SHINGLE_SIZE = "maxShingleSize"; - public static enum Count { - NGRAM_TOTAL; + public enum Count { + NGRAM_TOTAL } private static final Logger log = LoggerFactory.getLogger(NGramCollector.class); @@ -61,9 +61,7 @@ /** max size of shingles (ngrams) to create */ private int maxShingleSize; - - public NGramCollector() {} - + /** * Configure the * Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java Tue Feb 16 10:41:38 2010 @@ -93,7 +93,7 @@ if (cmdLine.hasOption(helpOpt)) { - VectorDumper.printHelp(group); + printHelp(group); return; } @@ -133,7 +133,7 @@ writer = new OutputStreamWriter(System.out); } boolean printKey = cmdLine.hasOption(printKeyOpt); - SeqFileIterator iterator = (SequenceFileVectorIterable.SeqFileIterator) vectorIterable + SeqFileIterator iterator = (SeqFileIterator) vectorIterable .iterator(); int i = 0; while (iterator.hasNext()) { @@ -157,7 +157,7 @@ } catch (OptionException e) { log.error("Exception", e); - VectorDumper.printHelp(group); + printHelp(group); } } Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java Tue Feb 16 10:41:38 2010 @@ -84,7 +84,7 @@ * @throws IOException */ public static String[] loadTermDictionary(File dictFile) throws IOException { - return VectorHelper.loadTermDictionary(new FileInputStream(dictFile)); + return loadTermDictionary(new FileInputStream(dictFile)); } /** Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java Tue Feb 16 10:41:38 2010 @@ -131,10 +131,10 @@ }); for (File file : files) { - Driver.writeFile(outWriter, outDir, file, maxDocs, model); + writeFile(outWriter, outDir, file, maxDocs, model); } } else { - Driver.writeFile(outWriter, outDir, input, maxDocs, model); + writeFile(outWriter, outDir, input, maxDocs, model); } log.info("Dictionary Output file: {}", dictOut); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(dictOut), @@ -167,10 +167,10 @@ BufferedWriter writer = new BufferedWriter(new FileWriter(outFile)); vectorWriter = new JWriterVectorWriter(writer); } else { - vectorWriter = Driver.getSeqFileWriter(outFile); + vectorWriter = getSeqFileWriter(outFile); } } else { - vectorWriter = Driver.getSeqFileWriter(outFile); + vectorWriter = getSeqFileWriter(outFile); } long numDocs = vectorWriter.write(iteratable, maxDocs); Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java Tue Feb 16 10:41:38 2010 @@ -85,7 +85,7 @@ double result = 0.0; switch (type) { case NUMERIC: { - result = MapBackedARFFModel.processNumeric(data); + result = processNumeric(data); break; } case DATE: { @@ -149,7 +149,7 @@ protected double processDate(String data, int idx) { DateFormat format = dateMap.get(idx); if (format == null) { - format = ARFFModel.DEFAULT_DATE_FORMAT; + format = DEFAULT_DATE_FORMAT; } double result; try { Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java Tue Feb 16 10:41:38 2010 @@ -195,10 +195,10 @@ BufferedWriter writer = new BufferedWriter(new FileWriter(outFile)); vectorWriter = new JWriterVectorWriter(writer); } else { - vectorWriter = Driver.getSeqFileWriter(outFile); + vectorWriter = getSeqFileWriter(outFile); } } else { - vectorWriter = Driver.getSeqFileWriter(outFile); + vectorWriter = getSeqFileWriter(outFile); } long numDocs = vectorWriter.write(iterable, maxDocs); Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java Tue Feb 16 10:41:38 2010 @@ -39,12 +39,12 @@ private final FieldSelector idFieldSelector; private final VectorMapper mapper; - private double normPower = LuceneIterable.NO_NORMALIZING; + private double normPower = NO_NORMALIZING; public static final double NO_NORMALIZING = -1.0; public LuceneIterable(IndexReader reader, String idField, String field, VectorMapper mapper) { - this(reader, idField, field, mapper, LuceneIterable.NO_NORMALIZING); + this(reader, idField, field, mapper, NO_NORMALIZING); } /** @@ -67,7 +67,7 @@ String field, VectorMapper mapper, double normPower) { - if (normPower != LuceneIterable.NO_NORMALIZING && normPower < 0) { + if (normPower != NO_NORMALIZING && normPower < 0) { throw new IllegalArgumentException("normPower must either be -1 or >= 0"); } idFieldSelector = new SetBasedFieldSelector(Collections.singleton(idField), Collections.emptySet()); @@ -123,7 +123,7 @@ } else { result.setName(String.valueOf(doc)); } - if (normPower != LuceneIterable.NO_NORMALIZING) { + if (normPower != NO_NORMALIZING) { result = result.normalize(normPower); } } catch (IOException e) { Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermCountReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermCountReducer.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermCountReducer.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermCountReducer.java Tue Feb 16 10:41:38 2010 @@ -45,7 +45,7 @@ while (values.hasNext()) { sum += values.next().get(); } - if (sum >= TermCountReducer.minSupport) { + if (sum >= minSupport) { output.collect(key, new LongWritable(sum)); } } @@ -53,7 +53,7 @@ @Override public void configure(JobConf job) { super.configure(job); - TermCountReducer.minSupport = job.getInt(DictionaryVectorizer.MIN_SUPPORT, + minSupport = job.getInt(DictionaryVectorizer.MIN_SUPPORT, DictionaryVectorizer.DEFAULT_MIN_SUPPORT); } } Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountMapper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountMapper.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountMapper.java (original) +++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/text/term/TermDocumentCountMapper.java Tue Feb 16 10:41:38 2010 @@ -51,8 +51,8 @@ while (it.hasNext()) { Element e = it.next(); - output.collect(new IntWritable(e.index()), TermDocumentCountMapper.ONE); + output.collect(new IntWritable(e.index()), ONE); } - output.collect(TermDocumentCountMapper.TOTAL_COUNT, TermDocumentCountMapper.ONE); + output.collect(TOTAL_COUNT, ONE); } } Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java (original) +++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java Tue Feb 16 10:41:38 2010 @@ -122,7 +122,7 @@ int i = 0; for (Vector vector : iterable) { Assert.assertNotNull(vector); - System.out.println("Vector[" + i++ + "]=" + TestL1ModelClustering.formatVector(vector)); + System.out.println("Vector[" + i++ + "]=" + formatVector(vector)); sampleData.add(new VectorWritable(vector)); } } @@ -204,24 +204,24 @@ public void testDocs() throws Exception { System.out.println("testDocs"); - getSampleData(TestL1ModelClustering.DOCS); + getSampleData(DOCS); DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(sampleData, new L1ModelDistribution(sampleData.get(0)), 1.0, 15, 1, 0); List<Model<VectorWritable>[]> result = dc.cluster(10); Assert.assertNotNull(result); - TestL1ModelClustering.printSamples(result, 0); - printClusters(result.get(result.size() - 1), sampleData, TestL1ModelClustering.DOCS); + printSamples(result, 0); + printClusters(result.get(result.size() - 1), sampleData, DOCS); } public void testDocs2() throws Exception { System.out.println("testDocs2"); - getSampleData(TestL1ModelClustering.DOCS2); + getSampleData(DOCS2); DirichletClusterer<VectorWritable> dc = new DirichletClusterer<VectorWritable>(sampleData, new L1ModelDistribution(sampleData.get(0)), 1.0, 15, 1, 0); List<Model<VectorWritable>[]> result = dc.cluster(10); Assert.assertNotNull(result); - TestL1ModelClustering.printSamples(result, 0); - printClusters(result.get(result.size() - 1), sampleData, TestL1ModelClustering.DOCS2); + printSamples(result, 0); + printClusters(result.get(result.size() - 1), sampleData, DOCS2); } } Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java (original) +++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java Tue Feb 16 10:41:38 2010 @@ -74,7 +74,7 @@ public void testDense() throws Exception { ARFFModel model = new MapBackedARFFModel(); - ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.SAMPLE_DENSE_ARFF, model); + ARFFVectorIterable iterable = new ARFFVectorIterable(SAMPLE_DENSE_ARFF, model); int count = 0; for (Vector vector : iterable) { Assert.assertTrue("Vector is not dense", vector instanceof DenseVector); @@ -85,7 +85,7 @@ public void testSparse() throws Exception { ARFFModel model = new MapBackedARFFModel(); - ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.SAMPLE_SPARSE_ARFF, model); + ARFFVectorIterable iterable = new ARFFVectorIterable(SAMPLE_SPARSE_ARFF, model); int count = 0; for (Vector vector : iterable) { Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector); @@ -97,7 +97,7 @@ public void testNonNumeric() throws Exception { MapBackedARFFModel model = new MapBackedARFFModel(); - ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.NON_NUMERIC_ARFF, model); + ARFFVectorIterable iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF, model); int count = 0; for (Vector vector : iterable) { Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector); @@ -125,7 +125,7 @@ public void testMultipleNoms() throws Exception { MapBackedARFFModel model = new MapBackedARFFModel(); - ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.NON_NUMERIC_ARFF, model); + ARFFVectorIterable iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF, model); int count = 0; for (Vector vector : iterable) { Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector); @@ -150,7 +150,7 @@ Assert.assertEquals(1, integerDateFormatMap.size()); model = new MapBackedARFFModel(model.getWords(), model.getWordCount(), model.getNominalMap()); - iterable = new ARFFVectorIterable(ARFFVectorIterableTest.NON_NUMERIC_ARFF2, model); + iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF2, model); count = 0; for (Vector vector : iterable) { Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector); Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java (original) +++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java Tue Feb 16 10:41:38 2010 @@ -55,7 +55,7 @@ Field id = new Field("id", "doc_" + i, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add(id); //Store both position and offset information - Field text = new Field("content", LuceneIterableTest.DOCS[i], Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); + Field text = new Field("content", DOCS[i], Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); doc.add(text); writer.addDocument(doc); } Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java?rev=910458&r1=910457&r2=910458&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java (original) +++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java Tue Feb 16 10:41:38 2010 @@ -63,29 +63,29 @@ } public static String getRandomDocument() { - int length = (DictionaryVectorizerTest.AVG_DOCUMENT_LENGTH >> 1) - + DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.AVG_DOCUMENT_LENGTH); - StringBuilder sb = new StringBuilder(length * DictionaryVectorizerTest.AVG_SENTENCE_LENGTH - * DictionaryVectorizerTest.AVG_WORD_LENGTH); + int length = (AVG_DOCUMENT_LENGTH >> 1) + + DictionaryVectorizerTest.random.nextInt(AVG_DOCUMENT_LENGTH); + StringBuilder sb = new StringBuilder(length * AVG_SENTENCE_LENGTH + * AVG_WORD_LENGTH); for (int i = 0; i < length; i++) { - sb.append(DictionaryVectorizerTest.getRandomSentence()); + sb.append(getRandomSentence()); } return sb.toString(); } public static String getRandomSentence() { - int length = (DictionaryVectorizerTest.AVG_SENTENCE_LENGTH >> 1) - + DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.AVG_SENTENCE_LENGTH); - StringBuilder sb = new StringBuilder(length * DictionaryVectorizerTest.AVG_WORD_LENGTH); + int length = (AVG_SENTENCE_LENGTH >> 1) + + DictionaryVectorizerTest.random.nextInt(AVG_SENTENCE_LENGTH); + StringBuilder sb = new StringBuilder(length * AVG_WORD_LENGTH); for (int i = 0; i < length; i++) { - sb.append(DictionaryVectorizerTest.getRandomString()).append(' '); + sb.append(getRandomString()).append(' '); } - sb.append(DictionaryVectorizerTest.getRandomDelimiter()); + sb.append(getRandomDelimiter()); return sb.toString(); } public static String getRandomString() { - int length = (DictionaryVectorizerTest.AVG_WORD_LENGTH >> 1) + DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.AVG_WORD_LENGTH); + int length = (AVG_WORD_LENGTH >> 1) + DictionaryVectorizerTest.random.nextInt(AVG_WORD_LENGTH); StringBuilder sb = new StringBuilder(length); for (int i = 0; i < length; i++) { sb.append(DictionaryVectorizerTest.CHARSET.charAt(DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.CHARSET.length()))); @@ -103,7 +103,7 @@ if (f.isDirectory()) { String[] contents = f.list(); for (String content : contents) { - DictionaryVectorizerTest.rmr(f.toString() + File.separator + content); + rmr(f.toString() + File.separator + content); } } f.delete(); @@ -113,8 +113,8 @@ @Override public void setUp() throws Exception { super.setUp(); - DictionaryVectorizerTest.rmr("output"); - DictionaryVectorizerTest.rmr("testdata"); + rmr("output"); + rmr("testdata"); Configuration conf = new Configuration(); fs = FileSystem.get(conf); } @@ -129,9 +129,9 @@ SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, Text.class); - for (int i = 0; i < DictionaryVectorizerTest.NUM_DOCS; i++) { + for (int i = 0; i < NUM_DOCS; i++) { writer.append(new Text("Document::ID::" + i), new Text( - DictionaryVectorizerTest.getRandomDocument())); + getRandomDocument())); } writer.close(); Class<? extends Analyzer> analyzer = new StandardAnalyzer(