Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/FPGFormatter.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/FPGFormatter.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/FPGFormatter.java (original) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/FPGFormatter.java Mon Dec 5 12:33:12 2011 @@ -1,5 +1,4 @@ -package org.apache.mahout.utils.regex; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -16,17 +15,20 @@ package org.apache.mahout.utils.regex; * limitations under the License. */ +package org.apache.mahout.utils.regex; import java.util.regex.Pattern; /** * Collapses/converts all whitespace to a single tab - * - **/ + */ public class FPGFormatter implements RegexFormatter { + private static final Pattern WHITESPACE = Pattern.compile("\\W+"); + @Override public String format(String toFormat) { - return "\t" + WHITESPACE.matcher(toFormat).replaceAll("|"); + return '\t' + WHITESPACE.matcher(toFormat).replaceAll("|"); } + }
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/IdentityFormatter.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/IdentityFormatter.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/IdentityFormatter.java (original) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/IdentityFormatter.java Mon Dec 5 12:33:12 2011 @@ -1,10 +1,22 @@ -package org.apache.mahout.utils.regex; - - -/** +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * + * http://www.apache.org/licenses/LICENSE-2.0 * - **/ + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.utils.regex; + public class IdentityFormatter implements RegexFormatter { @Override Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/IdentityTransformer.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/IdentityTransformer.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/IdentityTransformer.java (original) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/IdentityTransformer.java Mon Dec 5 12:33:12 2011 @@ -1,5 +1,4 @@ -package org.apache.mahout.utils.regex; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -16,13 +15,16 @@ package org.apache.mahout.utils.regex; * limitations under the License. */ +package org.apache.mahout.utils.regex; /** * No-op */ -public class IdentityTransformer implements RegexTransformer { +public final class IdentityTransformer implements RegexTransformer { + @Override public String transformMatch(String match) { return match; } + } Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexConverterDriver.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexConverterDriver.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexConverterDriver.java (original) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexConverterDriver.java Mon Dec 5 12:33:12 2011 @@ -1,19 +1,4 @@ -package org.apache.mahout.utils.regex; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hadoop.util.ToolRunner; -import org.apache.lucene.analysis.Analyzer; -import org.apache.mahout.common.AbstractJob; -import org.apache.mahout.common.HadoopUtil; -import org.apache.mahout.common.commandline.DefaultOptionCreator; -import org.apache.mahout.vectorizer.DefaultAnalyzer; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -30,6 +15,20 @@ import org.apache.mahout.vectorizer.Defa * limitations under the License. */ +package org.apache.mahout.utils.regex; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.util.ToolRunner; +import org.apache.lucene.analysis.Analyzer; +import org.apache.mahout.common.AbstractJob; +import org.apache.mahout.common.HadoopUtil; +import org.apache.mahout.common.commandline.DefaultOptionCreator; /** * Experimental @@ -64,14 +63,14 @@ public class RegexConverterDriver extend } String trans = getOption("transformerClass"); if (trans != null) { - if (trans.equalsIgnoreCase("url")) { + if ("url".equalsIgnoreCase(trans)) { trans = URLDecodeTransformer.class.getName(); } conf.set(RegexMapper.TRANSFORMER_CLASS, trans); } String formatter = getOption("formatterClass"); if (formatter != null) { - if (formatter.equalsIgnoreCase("fpg")) { + if ("fpg".equalsIgnoreCase(formatter)) { formatter = FPGFormatter.class.getName(); } conf.set(RegexMapper.FORMATTER_CLASS, formatter); Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexFormatter.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexFormatter.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexFormatter.java (original) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexFormatter.java Mon Dec 5 12:33:12 2011 @@ -1,4 +1,3 @@ -package org.apache.mahout.utils.regex; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,11 +15,10 @@ package org.apache.mahout.utils.regex; * limitations under the License. */ +package org.apache.mahout.utils.regex; -/** - * - * - **/ public interface RegexFormatter { - public String format(String toFormat); + + String format(String toFormat); + } Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexMapper.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexMapper.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexMapper.java (original) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexMapper.java Mon Dec 5 12:33:12 2011 @@ -1,5 +1,4 @@ -package org.apache.mahout.utils.regex; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -16,6 +15,8 @@ package org.apache.mahout.utils.regex; * limitations under the License. */ +package org.apache.mahout.utils.regex; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; @@ -28,12 +29,8 @@ import java.util.ArrayList; import java.util.List; import java.util.regex.Pattern; - -/** - * - * - **/ public class RegexMapper extends Mapper<LongWritable, Text, LongWritable, Text> { + public static final String REGEX = "regex"; public static final String GROUP_MATCHERS = "regex.groups"; public static final String TRANSFORMER_CLASS = "transformer.class"; @@ -54,8 +51,8 @@ public class RegexMapper extends Mapper< regex = Pattern.compile(regexStr); String[] groups = config.getStrings(GROUP_MATCHERS); if (groups != null) { - for (int i = 0; i < groups.length; i++) { - groupsToKeep.add(Integer.parseInt(groups[i])); + for (String group : groups) { + groupsToKeep.add(Integer.parseInt(group)); } } @@ -74,7 +71,7 @@ public class RegexMapper extends Mapper< @Override protected void map(LongWritable key, Text text, Context context) throws IOException, InterruptedException { String result = RegexUtils.extract(text.toString(), regex, groupsToKeep, " ", transformer); - if (result != null && result.length() > 0) { + if (result != null && !result.isEmpty()) { String format = formatter.format(result); context.write(key, new Text(format)); } Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexTransformer.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexTransformer.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexTransformer.java (original) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexTransformer.java Mon Dec 5 12:33:12 2011 @@ -1,4 +1,3 @@ -package org.apache.mahout.utils.regex; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,11 +15,13 @@ package org.apache.mahout.utils.regex; * limitations under the License. */ +package org.apache.mahout.utils.regex; /** * Transforms the match of a regular expression. */ public interface RegexTransformer { - public String transformMatch(String match); + + String transformMatch(String match); } Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexUtils.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexUtils.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexUtils.java (original) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/RegexUtils.java Mon Dec 5 12:33:12 2011 @@ -1,9 +1,4 @@ -package org.apache.mahout.utils.regex; - -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -20,32 +15,44 @@ import java.util.regex.Pattern; * limitations under the License. */ +package org.apache.mahout.utils.regex; + +import java.util.Collection; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public final class RegexUtils { -/** - * - * - **/ -public class RegexUtils { public static final RegexTransformer IDENTITY_TRANSFORMER = new IdentityTransformer(); public static final RegexFormatter IDENTITY_FORMATTER = new IdentityFormatter(); - public static String extract(String line, Pattern pattern, List<Integer> groupsToKeep, + private RegexUtils() { + } + + public static String extract(CharSequence line, Pattern pattern, Collection<Integer> groupsToKeep, String separator, RegexTransformer transformer) { StringBuilder bldr = new StringBuilder(); extract(line, bldr, pattern, groupsToKeep, separator, transformer); return bldr.toString(); } - public static void extract(String line, StringBuilder outputBuffer, - Pattern pattern, List<Integer> groupsToKeep, String separator, + public static void extract(CharSequence line, StringBuilder outputBuffer, + Pattern pattern, Collection<Integer> groupsToKeep, String separator, RegexTransformer transformer) { if (transformer == null) { transformer = IDENTITY_TRANSFORMER; } Matcher matcher = pattern.matcher(line); String match; - if (groupsToKeep.isEmpty() == false) { - while (matcher.find() == true) { + if (groupsToKeep.isEmpty()) { + while (matcher.find()) { + match = matcher.group(); + if (match != null) { + outputBuffer.append(transformer.transformMatch(match)).append(separator); + } + } + } else { + while (matcher.find()) { for (Integer groupNum : groupsToKeep) { match = matcher.group(groupNum); if (match != null) { @@ -53,13 +60,6 @@ public class RegexUtils { } } } - } else { - while (matcher.find() == true) { - match = matcher.group(); - if (match != null) { - outputBuffer.append(transformer.transformMatch(match)).append(separator); - } - } } //trim off the last separator, which is always there if (outputBuffer.length() > 0) { Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/URLDecodeTransformer.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/URLDecodeTransformer.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/URLDecodeTransformer.java (original) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/URLDecodeTransformer.java Mon Dec 5 12:33:12 2011 @@ -1,4 +1,3 @@ -package org.apache.mahout.utils.regex; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,16 +15,14 @@ package org.apache.mahout.utils.regex; * limitations under the License. */ +package org.apache.mahout.utils.regex; + import java.io.UnsupportedEncodingException; import java.net.URLDecoder; +public final class URLDecodeTransformer implements RegexTransformer { -/** - * - * - **/ -public class URLDecodeTransformer implements RegexTransformer { - private String enc; + private final String enc; public URLDecodeTransformer() { enc = "UTF-8"; @@ -40,7 +37,7 @@ public class URLDecodeTransformer implem try { return URLDecoder.decode(match, enc); } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } } } Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java (original) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java Mon Dec 5 12:33:12 2011 @@ -77,10 +77,10 @@ public final class VectorDumper { "Print out the key as well, delimited by a tab (or the value if useKey is true)").withShortName("p") .create(); Option outputOpt = obuilder.withLongName("output").withRequired(false).withArgument( - abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription( + abuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription( "The output file. If not specified, dumps to the console").withShortName("o").create(); Option dictOpt = obuilder.withLongName("dictionary").withRequired(false).withArgument( - abuilder.withName("dictionary").withMinimum(1).withMaximum(1).create()).withDescription( + abuilder.withName("dictionary").withMinimum(1).withMaximum(1).create()).withDescription( "The dictionary file. ").withShortName("d").create(); Option dictTypeOpt = obuilder.withLongName("dictionaryType").withRequired(false).withArgument( abuilder.withName("dictionaryType").withMinimum(1).withMaximum(1).create()).withDescription( @@ -97,13 +97,13 @@ public final class VectorDumper { Option sizeOpt = obuilder.withLongName("sizeOnly").withRequired(false). withDescription("Dump only the size of the vector").withShortName("sz").create(); Option numItemsOpt = obuilder.withLongName("numItems").withRequired(false).withArgument( - abuilder.withName("n").withMinimum(1).withMaximum(1).create()). + abuilder.withName("n").withMinimum(1).withMaximum(1).create()). withDescription("Output at most <n> vecors").withShortName("n").create(); Option numIndexesPerVectorOpt = obuilder.withLongName("vectorSize").withShortName("vs") .withRequired(false).withArgument(abuilder.withName("vs").withMinimum(1) - .withMaximum(1).create()) + .withMaximum(1).create()) .withDescription("Truncate vectors to <vs> length when dumping (most useful when in" - + " conjunction with -sort").create(); + + " conjunction with -sort").create(); Option filtersOpt = obuilder.withLongName("filter").withRequired(false).withArgument( abuilder.withName("filter").withMinimum(1).withMaximum(100).create()). withDescription("Only dump out those vectors whose name matches the filter." + @@ -219,7 +219,7 @@ public final class VectorDumper { Vector vector = vectorWritable.get(); if (filters != null && vector instanceof NamedVector - && filters.contains(((NamedVector)vector).getName()) == false){ + && !filters.contains(((NamedVector)vector).getName())){ //we are filtering out this item, skip continue; } Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java (original) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFIterator.java Mon Dec 5 12:33:12 2011 @@ -47,7 +47,7 @@ final class ARFFIterator extends Abstrac try { while ((line = reader.readLine()) != null) { line = line.trim(); - if (line.length() > 0 && !line.startsWith(ARFFModel.ARFF_COMMENT)) { + if (!line.isEmpty() && !line.startsWith(ARFFModel.ARFF_COMMENT)) { break; } } Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java (original) +++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java Mon Dec 5 12:33:12 2011 @@ -139,7 +139,7 @@ public final class Driver { Map<String,Integer> labels = arffModel.getLabelBindings(); Writer writer = Files.newWriterSupplier(dictOut, Charsets.UTF_8, true).getOutput(); try { - writer.write("Label bindings for Relation " + arffModel.getRelation() + "\n"); + writer.write("Label bindings for Relation " + arffModel.getRelation() + '\n'); for (Map.Entry<String,Integer> entry : labels.entrySet()) { writer.write(entry.getKey()); writer.write(delimiter); Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/regex/RegexMapperTest.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/regex/RegexMapperTest.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/regex/RegexMapperTest.java (original) +++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/regex/RegexMapperTest.java Mon Dec 5 12:33:12 2011 @@ -1,4 +1,3 @@ -package org.apache.mahout.utils.regex; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,6 +15,8 @@ package org.apache.mahout.utils.regex; * limitations under the License. */ +package org.apache.mahout.utils.regex; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; @@ -26,14 +27,7 @@ import org.junit.Test; import java.util.List; - -/** - * - * - **/ - -public class RegexMapperTest extends MahoutTestCase { - +public final class RegexMapperTest extends MahoutTestCase { @Test public void testRegex() throws Exception { @@ -41,7 +35,6 @@ public class RegexMapperTest extends Mah Configuration conf = new Configuration(); conf.set(RegexMapper.REGEX, "(?<=(\\?|&)q=).*?(?=&|$)"); conf.set(RegexMapper.TRANSFORMER_CLASS, URLDecodeTransformer.class.getName()); - //conf.set(RegexMapper.); DummyRecordWriter<LongWritable, Text> mapWriter = new DummyRecordWriter<LongWritable, Text>(); Mapper<LongWritable, Text, LongWritable, Text>.Context mapContext = DummyRecordWriter .build(mapper, conf, mapWriter); @@ -53,7 +46,7 @@ public class RegexMapperTest extends Mah LongWritable key = new LongWritable(i); mapper.map(key, new Text(testStr), mapContext); List<Text> value = mapWriter.getValue(key); - if (RegexUtilsTest.GOLD[i].equals("") == false) { + if (!RegexUtilsTest.GOLD[i].isEmpty()) { assertEquals(1, value.size()); assertEquals(RegexUtilsTest.GOLD[i], value.get(0).toString()); } @@ -67,7 +60,6 @@ public class RegexMapperTest extends Mah conf.set(RegexMapper.REGEX, "(\\d+)\\.(\\d+)\\.(\\d+)"); conf.set(RegexMapper.TRANSFORMER_CLASS, URLDecodeTransformer.class.getName()); conf.setStrings(RegexMapper.GROUP_MATCHERS, "1", "3"); - //conf.set(RegexMapper.); DummyRecordWriter<LongWritable, Text> mapWriter = new DummyRecordWriter<LongWritable, Text>(); Mapper<LongWritable, Text, LongWritable, Text>.Context mapContext = DummyRecordWriter .build(mapper, conf, mapWriter); @@ -79,9 +71,8 @@ public class RegexMapperTest extends Mah LongWritable key = new LongWritable(i); mapper.map(key, new Text(testStr), mapContext); List<Text> value = mapWriter.getValue(key); - String gold = "127 0"; assertEquals(1, value.size()); - assertEquals(gold, value.get(0).toString()); + assertEquals("127 0", value.get(0).toString()); } } @@ -92,20 +83,19 @@ public class RegexMapperTest extends Mah conf.set(RegexMapper.REGEX, "(?<=(\\?|&)q=).*?(?=&|$)"); conf.set(RegexMapper.TRANSFORMER_CLASS, URLDecodeTransformer.class.getName()); conf.set(RegexMapper.FORMATTER_CLASS, FPGFormatter.class.getName()); - //conf.set(RegexMapper.); DummyRecordWriter<LongWritable, Text> mapWriter = new DummyRecordWriter<LongWritable, Text>(); Mapper<LongWritable, Text, LongWritable, Text>.Context mapContext = DummyRecordWriter .build(mapper, conf, mapWriter); mapper.setup(mapContext); - FPGFormatter formatter = new FPGFormatter(); + RegexFormatter formatter = new FPGFormatter(); for (int i = 0; i < RegexUtilsTest.TEST_STRS.length; i++) { String testStr = RegexUtilsTest.TEST_STRS[i]; LongWritable key = new LongWritable(i); mapper.map(key, new Text(testStr), mapContext); List<Text> value = mapWriter.getValue(key); - if (RegexUtilsTest.GOLD[i].equals("") == false) { + if (!RegexUtilsTest.GOLD[i].isEmpty()) { assertEquals(1, value.size()); assertEquals(formatter.format(RegexUtilsTest.GOLD[i]), value.get(0).toString()); } Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/regex/RegexUtilsTest.java URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/regex/RegexUtilsTest.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/regex/RegexUtilsTest.java (original) +++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/regex/RegexUtilsTest.java Mon Dec 5 12:33:12 2011 @@ -1,5 +1,4 @@ -package org.apache.mahout.utils.regex; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -16,37 +15,32 @@ package org.apache.mahout.utils.regex; * limitations under the License. */ +package org.apache.mahout.utils.regex; import org.apache.mahout.common.MahoutTestCase; import org.junit.Test; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; -import java.util.List; import java.util.regex.Pattern; +public final class RegexUtilsTest extends MahoutTestCase { -/** - * - * - **/ -public class RegexUtilsTest extends MahoutTestCase { - public static final String[] TEST_STRS = new String[]{ + static final String[] TEST_STRS = { "127.0.0.1 - - [01/10/2011:00:01:51 +0000] \"GET /solr/collection1/browse?q=foo&rows=10&wt=json&hl=true&hl.fl=body&hl.fl=content", "127.0.0.1 - - [01/10/2011:00:20:58 +0000] \"GET /solr/collection1/browse?q=Using+Solr+Search+RDBMS&fq=%7B%21tag%3Dsource%7D%28%28source%3Alucid+AND+lucid_facet%3A%28site%29%29%29&rows=10", "127.0.0.1 - - [01/10/2011:00:21:21 +0000] \"GET /solr/collection1/browse?q=language+detection&start=560&rows=10 HTTP/1.1\" 200 45071", "127.0.0.1 - - [01/10/2011:00:21:21 +0000] \"GET /solr/collection1/browse?q=&start=560&rows=10 HTTP/1.1\" 200 45071" }; - public static final String[] GOLD = new String[]{"foo", "Using Solr Search RDBMS", "language detection", ""}; + static final String[] GOLD = {"foo", "Using Solr Search RDBMS", "language detection", ""}; @Test public void testExtract() throws Exception { + Pattern pattern = Pattern.compile("(?<=(\\?|&)q=).*?(?=&|$)"); String line = "127.0.0.1 - - [24/05/2010:01:19:22 +0000] \"GET /solr/select?q=import statement&start=1 HTTP/1.1\" 200 37571"; - String res; - Pattern pattern; - pattern = Pattern.compile("(?<=(\\?|&)q=).*?(?=&|$)"); - res = RegexUtils.extract(line, pattern, Collections.<Integer>emptyList(), " ", RegexUtils.IDENTITY_TRANSFORMER); - assertTrue(res, res.equals("import statement")); + String res = RegexUtils.extract(line, pattern, Collections.<Integer>emptyList(), " ", RegexUtils.IDENTITY_TRANSFORMER); + assertEquals(res, "import statement", res); for (int i = 0; i < TEST_STRS.length; i++) { String testStr = TEST_STRS[i]; @@ -56,12 +50,12 @@ public class RegexUtilsTest extends Maho pattern = Pattern.compile("((?<=(\\?|&)q=)(.*?)(?=(&|$))|(?<=((\\?|&)start=))(\\d+))"); res = RegexUtils.extract(line, pattern, Collections.<Integer>emptyList(), " ", RegexUtils.IDENTITY_TRANSFORMER); - assertTrue(res, res.equals("import statement 1")); + assertEquals(res, "import statement 1", res); pattern = Pattern.compile("(start=1) HTTP"); - List<Integer> groupsToKeep = new ArrayList<Integer>(); + Collection<Integer> groupsToKeep = new ArrayList<Integer>(); groupsToKeep.add(1); res = RegexUtils.extract(line, pattern, groupsToKeep, " ", RegexUtils.IDENTITY_TRANSFORMER); - assertTrue(res, res.equals("start=1")); + assertEquals(res, "start=1", res); } } Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/MurmurHash3.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/MurmurHash3.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/math/src/main/java/org/apache/mahout/math/MurmurHash3.java (original) +++ mahout/trunk/math/src/main/java/org/apache/mahout/math/MurmurHash3.java Mon Dec 5 12:33:12 2011 @@ -1,5 +1,4 @@ -package org.apache.mahout.math; -/** +/* * This code is public domain. * * The MurmurHash3 algorithm was created by Austin Appleby and put into the public domain. See http://code.google.com/p/smhasher/ @@ -8,7 +7,7 @@ package org.apache.mahout.math; * Yonik Seeley and was placed into the public domain per https://github.com/yonik/java_util/blob/master/src/util/hash/MurmurHash3.java. */ -// +package org.apache.mahout.math; /** * <p> @@ -60,7 +59,7 @@ public class MurmurHash3 { k1 |= (data[roundedEnd + 1] & 0xff) << 8; // fallthrough case 1: - k1 |= (data[roundedEnd] & 0xff); + k1 |= data[roundedEnd] & 0xff; k1 *= c1; k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15); k1 *= c2; Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/als/AlternatingLeastSquaresSolver.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/als/AlternatingLeastSquaresSolver.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/math/src/main/java/org/apache/mahout/math/als/AlternatingLeastSquaresSolver.java (original) +++ mahout/trunk/math/src/main/java/org/apache/mahout/math/als/AlternatingLeastSquaresSolver.java Mon Dec 5 12:33:12 2011 @@ -30,7 +30,7 @@ import java.util.Iterator; * See <a href="http://www.hpl.hp.com/personal/Robert_Schreiber/papers/2008%20AAIM%20Netflix/netflix_aaim08(submitted).pdf"> * this paper.</a> */ -public class AlternatingLeastSquaresSolver { +public final class AlternatingLeastSquaresSolver { public Vector solve(Iterable<Vector> featureVectors, Vector ratingVector, double lambda, int numFeatures) { @@ -53,11 +53,11 @@ public class AlternatingLeastSquaresSolv return solve(Ai, Vi); } - protected Vector solve(Matrix Ai, Matrix Vi) { + Vector solve(Matrix Ai, Matrix Vi) { return new QRDecomposition(Ai).solve(Vi).viewColumn(0); } - protected Matrix addLambdaTimesNuiTimesE(Matrix matrix, double lambda, int nui) { + Matrix addLambdaTimesNuiTimesE(Matrix matrix, double lambda, int nui) { Preconditions.checkArgument(matrix.numCols() == matrix.numRows()); for (int n = 0; n < matrix.numCols(); n++) { matrix.setQuick(n, n, matrix.getQuick(n, n) + lambda * nui); @@ -65,7 +65,7 @@ public class AlternatingLeastSquaresSolv return matrix; } - protected Matrix createMiIi(Iterable<Vector> featureVectors, int numFeatures) { + Matrix createMiIi(Iterable<Vector> featureVectors, int numFeatures) { Matrix MiIi = new DenseMatrix(numFeatures, Iterables.size(featureVectors)); int n = 0; for (Vector featureVector : featureVectors) { @@ -77,7 +77,7 @@ public class AlternatingLeastSquaresSolv return MiIi; } - protected Matrix createRiIiMaybeTransposed(Vector ratingVector) { + Matrix createRiIiMaybeTransposed(Vector ratingVector) { Preconditions.checkArgument(ratingVector.isSequentialAccess()); Matrix RiIiMaybeTransposed = new DenseMatrix(ratingVector.getNumNondefaultElements(), 1); Iterator<Vector.Element> ratingsIterator = ratingVector.iterateNonZero(); Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/als/ImplicitFeedbackAlternatingLeastSquaresSolver.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/als/ImplicitFeedbackAlternatingLeastSquaresSolver.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/math/src/main/java/org/apache/mahout/math/als/ImplicitFeedbackAlternatingLeastSquaresSolver.java (original) +++ mahout/trunk/math/src/main/java/org/apache/mahout/math/als/ImplicitFeedbackAlternatingLeastSquaresSolver.java Mon Dec 5 12:33:12 2011 @@ -52,7 +52,7 @@ public class ImplicitFeedbackAlternating return solve(YtransposeY.plus(YtransponseCuMinusIYPlusLambdaI(ratings)), YtransponseCuPu(ratings)); } - private Vector solve(Matrix A, Matrix y) { + private static Vector solve(Matrix A, Matrix y) { return new QRDecomposition(A).solve(y).viewColumn(0); } @@ -116,7 +116,7 @@ public class ImplicitFeedbackAlternating Iterator<Vector.Element> ratings = userRatings.iterateNonZero(); while (ratings.hasNext()) { Vector.Element e = ratings.next(); - YtransponseCuPu.assign((Y.get(e.index()).times(confidence(e.get()))), Functions.PLUS); + YtransponseCuPu.assign(Y.get(e.index()).times(confidence(e.get())), Functions.PLUS); } return columnVectorAsMatrix(YtransponseCuPu); @@ -124,8 +124,9 @@ public class ImplicitFeedbackAlternating private Matrix columnVectorAsMatrix(Vector v) { Matrix matrix = new DenseMatrix(numFeatures, 1); - for (Vector.Element e : v) - matrix.setQuick(e.index(), 0, e.get()); + for (Vector.Element e : v) { + matrix.setQuick(e.index(), 0, e.get()); + } return matrix; } Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java (original) +++ mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java Mon Dec 5 12:33:12 2011 @@ -26,20 +26,20 @@ import org.apache.mahout.math.VectorIter import java.util.Map; public class LanczosState { + protected Matrix diagonalMatrix; protected final VectorIterable corpus; protected double scaleFactor; protected int iterationNumber; protected final int desiredRank; protected Map<Integer, Vector> basis; - protected final Map<Integer, Double> singularValues; protected Map<Integer, Vector> singularVectors; - public LanczosState(VectorIterable corpus, int numCols, int desiredRank, Vector initialVector) { + public LanczosState(VectorIterable corpus, int desiredRank, Vector initialVector) { this.corpus = corpus; this.desiredRank = desiredRank; - intitializeBasisAndSingularVectors(numCols, desiredRank); + intitializeBasisAndSingularVectors(); setBasisVector(0, initialVector); scaleFactor = 0; diagonalMatrix = new DenseMatrix(desiredRank, desiredRank); @@ -47,7 +47,7 @@ public class LanczosState { iterationNumber = 1; } - protected void intitializeBasisAndSingularVectors(int numCols, int rank) { + protected void intitializeBasisAndSingularVectors() { basis = Maps.newHashMap(); singularVectors = Maps.newHashMap(); } Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/MurmurHash3Test.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/MurmurHash3Test.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/math/src/test/java/org/apache/mahout/math/MurmurHash3Test.java (original) +++ mahout/trunk/math/src/test/java/org/apache/mahout/math/MurmurHash3Test.java Mon Dec 5 12:33:12 2011 @@ -1,32 +1,41 @@ -package org.apache.mahout.math; /** * Public domain, just like the MurmurHash3 code. */ -import junit.framework.TestCase; +package org.apache.mahout.math; +import org.junit.Test; +public final class MurmurHash3Test extends MahoutTestCase { -public class MurmurHash3Test extends TestCase { + private static final int[] ANSWERS = + {0x0,0xcf9ce026,0x7b1ebceb,0x8a59e474,0xcf337f94,0x8b678f66,0x813ff5a2,0x1c2f4b2b,0xa6fcba77,0xe658f908, + 0x9f2656af,0x826b85ca,0xebb6ceca,0x24c4112c,0x66eff5b0,0xa9aca7d5,0xf7f04d03,0x9d781105,0x6dcde4f3, + 0x69edd8a8,0x5cdcd417,0x18d67f6,0xea040c90,0xdf70ea4a,0x8fb349e6,0x79a89b03,0x7ef9fc34,0x6017f692, + 0x5be02058,0x9e3986f9,0x8fa6dd28,0x6733b993,0x26230d32,0x92051d69,0x8d6f37f7,0xa1653103,0x8491c23f, + 0x2e8f59ce,0x5ae9461e,0xfe286e6,0x844e6959,0x87e9065d,0xe302e21c,0x1b3b3296,0xd29849c9,0x4e625f26, + 0xa8c35ac0,0x71335a06,0xfd256d8f,0x4e5eb258,0x4e2320d1,0xba2e9832,0xb00df8eb,0xbd87594d,0x83b6dce3, + 0xcf8646d0,0x7e79f2e2,0xd41fcd97,0x556a93,0x4419437b,0x39aa0e4e,0x43a57251,0x9430922f,0xd784b08f, + 0xa2772512,0xa2a6ee4b,0x9cb1abae,0xebd2bef0}; + @Test public void testCorrectValues() throws Exception { byte[] bytes = "Now is the time for all good men to come to the aid of their country".getBytes("UTF-8"); - int hash=0; - for (int i=0; i<bytes.length; i++) { - hash = hash*31+(bytes[i]&0xff); - bytes[i] = (byte)hash; + int hash = 0; + for (int i = 0; i < bytes.length; i++) { + hash = hash * 31 + (bytes[i] & 0xff); + bytes[i] = (byte) hash; } // test different offsets. - for (int offset = 0; offset<10; offset++) { + for (int offset = 0; offset < 10; offset++) { byte[] arr = new byte[bytes.length + offset]; System.arraycopy(bytes, 0, arr, offset, bytes.length); - for (int len=0; len<bytes.length; len++) { + for (int len = 0; len < bytes.length; len++) { int h = MurmurHash3.murmurhash3_x86_32(arr, offset, len, len); - assertEquals(answers[len], h); + assertEquals(ANSWERS[len], h); } } } - static int[] answers = new int[] {0x0,0xcf9ce026,0x7b1ebceb,0x8a59e474,0xcf337f94,0x8b678f66,0x813ff5a2,0x1c2f4b2b,0xa6fcba77,0xe658f908,0x9f2656af,0x826b85ca,0xebb6ceca,0x24c4112c,0x66eff5b0,0xa9aca7d5,0xf7f04d03,0x9d781105,0x6dcde4f3,0x69edd8a8,0x5cdcd417,0x18d67f6,0xea040c90,0xdf70ea4a,0x8fb349e6,0x79a89b03,0x7ef9fc34,0x6017f692,0x5be02058,0x9e3986f9,0x8fa6dd28,0x6733b993,0x26230d32,0x92051d69,0x8d6f37f7,0xa1653103,0x8491c23f,0x2e8f59ce,0x5ae9461e,0xfe286e6,0x844e6959,0x87e9065d,0xe302e21c,0x1b3b3296,0xd29849c9,0x4e625f26,0xa8c35ac0,0x71335a06,0xfd256d8f,0x4e5eb258,0x4e2320d1,0xba2e9832,0xb00df8eb,0xbd87594d,0x83b6dce3,0xcf8646d0,0x7e79f2e2,0xd41fcd97,0x556a93,0x4419437b,0x39aa0e4e,0x43a57251,0x9430922f,0xd784b08f,0xa2772512,0xa2a6ee4b,0x9cb1abae,0xebd2bef0}; } \ No newline at end of file Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java (original) +++ mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java Mon Dec 5 12:33:12 2011 @@ -41,7 +41,7 @@ public final class TestLanczosSolver ext initialVector.assign(1.0 / Math.sqrt(size)); LanczosSolver solver = new LanczosSolver(); int desiredRank = 80; - LanczosState state = new LanczosState(m, size, desiredRank, initialVector); + LanczosState state = new LanczosState(m, desiredRank, initialVector); // set initial vector? solver.solve(state, desiredRank, true); @@ -71,7 +71,7 @@ public final class TestLanczosSolver ext Vector initialVector = new DenseVector(numColumns); initialVector.assign(1.0 / Math.sqrt(numColumns)); int rank = 50; - LanczosState state = new LanczosState(corpus, numColumns, rank, initialVector); + LanczosState state = new LanczosState(corpus, rank, initialVector); long time = timeLanczos(corpus, state, rank, false); assertTrue("Lanczos taking too long! Are you in the debugger? :)", time < 10000); assertOrthonormal(state); @@ -88,7 +88,7 @@ public final class TestLanczosSolver ext Vector initialVector = new DenseVector(numCols); initialVector.assign(1.0 / Math.sqrt(numCols)); int rank = 30; - LanczosState state = new LanczosState(corpus, numCols, rank, initialVector); + LanczosState state = new LanczosState(corpus, rank, initialVector); long time = timeLanczos(corpus, state, rank, true); assertTrue("Lanczos taking too long! Are you in the debugger? :)", time < 10000); //assertOrthonormal(state); Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/solver/TestConjugateGradientSolver.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/solver/TestConjugateGradientSolver.java?rev=1210428&r1=1210427&r2=1210428&view=diff ============================================================================== --- mahout/trunk/math/src/test/java/org/apache/mahout/math/solver/TestConjugateGradientSolver.java (original) +++ mahout/trunk/math/src/test/java/org/apache/mahout/math/solver/TestConjugateGradientSolver.java Mon Dec 5 12:33:12 2011 @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.mahout.math.solver; import org.apache.mahout.math.DenseMatrix; @@ -7,8 +24,8 @@ import org.apache.mahout.math.Matrix; import org.apache.mahout.math.Vector; import org.junit.Test; -public class TestConjugateGradientSolver extends MahoutTestCase -{ +public class TestConjugateGradientSolver extends MahoutTestCase { + @Test public void testConjugateGradientSolver() { Matrix a = getA(); @@ -121,7 +138,8 @@ public class TestConjugateGradientSolver -0.26613760880058035, 0.04428280690189126, 0.05917203395002889, 0.14089688752570340, 0.02901858439788401 }, 10, 10); } - + + /* private static Matrix getAsymmetricMatrix() { return reshape(new double[] { 0.1586493402398226, -0.8668244036239467, 0.4335233711065471, -1.1025223577469705, 1.1344100191664601, @@ -199,6 +217,7 @@ public class TestConjugateGradientSolver return m; } + */ private static Matrix reshape(double[] values, int rows, int columns) { Matrix m = new DenseMatrix(rows, columns);
