MAHOUT-1652: Java 7 upgrade, this closes apache/mahout#112
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/85f9ece6 Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/85f9ece6 Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/85f9ece6 Branch: refs/heads/master Commit: 85f9ece660407fe68ad61560ebd723a57ec761e6 Parents: 53e5ada Author: Suneel Marthi <[email protected]> Authored: Sun Apr 5 22:21:29 2015 -0400 Committer: Suneel Marthi <[email protected]> Committed: Sun Apr 5 22:24:06 2015 -0400 ---------------------------------------------------------------------- .../benchmark/ClosestCentroidBenchmark.java | 2 - .../benchmark/SerializationBenchmark.java | 31 ++---- .../mahout/benchmark/VectorBenchmarks.java | 45 ++++---- .../taste/impl/model/hbase/HBaseDataModel.java | 42 ++++--- .../impl/model/jdbc/AbstractJDBCDataModel.java | 29 +++-- .../impl/model/mongodb/MongoDBDataModel.java | 54 +++++---- .../jdbc/MySQLJDBCItemSimilarity.java | 1 - .../classifier/ConfusionMatrixDumper.java | 21 ++-- .../mahout/clustering/cdbw/CDbwEvaluator.java | 15 ++- .../clustering/conversion/InputMapper.java | 16 +-- .../clustering/evaluation/ClusterEvaluator.java | 11 +- .../evaluation/RepresentativePointsDriver.java | 28 ++--- .../evaluation/RepresentativePointsMapper.java | 10 +- .../mahout/clustering/lda/LDAPrintTopics.java | 15 ++- .../apache/mahout/text/LuceneIndexHelper.java | 3 +- .../mahout/text/LuceneSegmentInputFormat.java | 10 +- .../mahout/text/LuceneSegmentRecordReader.java | 1 - .../mahout/text/LuceneStorageConfiguration.java | 25 +++-- .../text/MailArchivesClusteringAnalyzer.java | 1 + .../mahout/text/PrefixAdditionFilter.java | 8 +- .../mahout/text/SequenceFilesFromDirectory.java | 10 +- .../SequenceFilesFromLuceneStorageDriver.java | 15 ++- .../SequenceFilesFromLuceneStorageMapper.java | 9 +- .../text/SequenceFilesFromMailArchives.java | 18 ++- .../mahout/text/WholeFileRecordReader.java | 20 ++-- .../mahout/text/WikipediaToSequenceFile.java | 4 +- .../text/wikipedia/WikipediaAnalyzer.java | 1 + .../WikipediaDatasetCreatorDriver.java | 4 +- .../WikipediaDatasetCreatorMapper.java | 25 ++--- .../mahout/text/wikipedia/WikipediaMapper.java | 6 +- .../text/wikipedia/WikipediaXmlSplitter.java | 8 +- .../mahout/text/wikipedia/XmlInputFormat.java | 6 +- .../mahout/utils/ConcatenateVectorsJob.java | 8 +- .../org/apache/mahout/utils/MatrixDumper.java | 2 +- .../apache/mahout/utils/SequenceFileDumper.java | 11 +- .../org/apache/mahout/utils/SplitInput.java | 55 ++++----- .../org/apache/mahout/utils/SplitInputJob.java | 1 + .../mahout/utils/clustering/ClusterDumper.java | 20 ++-- .../nlp/collocations/llr/BloomTokenFilter.java | 2 +- .../mahout/utils/regex/AnalyzerTransformer.java | 17 +-- .../apache/mahout/utils/regex/RegexMapper.java | 14 +-- .../apache/mahout/utils/vectors/RowIdJob.java | 34 ++---- .../mahout/utils/vectors/VectorDumper.java | 6 +- .../mahout/utils/vectors/VectorHelper.java | 15 ++- .../mahout/utils/vectors/arff/ARFFIterator.java | 6 +- .../mahout/utils/vectors/arff/ARFFModel.java | 1 - .../utils/vectors/arff/ARFFVectorIterable.java | 8 +- .../mahout/utils/vectors/arff/Driver.java | 111 +++++++++---------- .../utils/vectors/arff/MapBackedARFFModel.java | 12 +- .../utils/vectors/lucene/CachedTermInfo.java | 12 +- .../utils/vectors/lucene/ClusterLabels.java | 21 ++-- .../mahout/utils/vectors/lucene/Driver.java | 39 +++---- .../utils/vectors/lucene/LuceneIterator.java | 10 +- .../mahout/clustering/TestClusterDumper.java | 23 ++-- .../clustering/cdbw/TestCDbwEvaluator.java | 25 ++--- .../mahout/text/AbstractLuceneStorageTest.java | 15 ++- .../text/LuceneSegmentRecordReaderTest.java | 25 +++-- .../text/LuceneStorageConfigurationTest.java | 11 +- ...equenceFilesFromLuceneStorageDriverTest.java | 32 ++---- ...SequenceFilesFromLuceneStorageMRJobTest.java | 15 +-- .../SequenceFilesFromLuceneStorageTest.java | 70 ++++++------ .../text/SequenceFilesFromMailArchivesTest.java | 19 +--- .../text/TestSequenceFilesFromDirectory.java | 58 +++------- .../mahout/text/doc/NumericFieldDocument.java | 1 - .../org/apache/mahout/utils/SplitInputTest.java | 29 ++--- .../mahout/utils/TestConcatenateVectorsJob.java | 4 +- .../mahout/utils/email/MailProcessorTest.java | 8 +- .../collocations/llr/BloomTokenFilterTest.java | 3 +- .../mahout/utils/regex/RegexMapperTest.java | 4 +- .../mahout/utils/regex/RegexUtilsTest.java | 10 +- .../mahout/utils/vectors/VectorHelperTest.java | 19 +--- .../vectors/arff/ARFFVectorIterableTest.java | 3 +- .../mahout/utils/vectors/arff/DriverTest.java | 3 +- .../vectors/arff/MapBackedARFFModelTest.java | 1 - .../vectors/csv/CSVVectorIteratorTest.java | 16 +-- .../utils/vectors/io/VectorWriterTest.java | 16 +-- .../org/apache/mahout/math/QRDecomposition.java | 1 - .../mahout/math/SingularValueDecomposition.java | 2 +- .../apache/mahout/math/SparseColumnMatrix.java | 4 +- .../math/decomposer/hebbian/HebbianSolver.java | 10 +- .../math/decomposer/hebbian/TrainingState.java | 4 +- .../apache/mahout/math/stats/LogLikelihood.java | 8 +- .../apache/mahout/common/RandomUtilsTest.java | 2 +- .../taste/hadoop/RecommendedItemsWritable.java | 4 +- .../mahout/cf/taste/hadoop/TopItemsQueue.java | 10 +- .../apache/mahout/cf/taste/hadoop/als/ALS.java | 20 ++-- .../hadoop/als/FactorizationEvaluator.java | 14 +-- .../hadoop/als/ParallelALSFactorizationJob.java | 11 +- .../mahout/cf/taste/hadoop/item/IDReader.java | 16 +-- .../item/ItemFilterAsVectorAndPrefsReducer.java | 12 +- .../hadoop/item/ToVectorAndPrefReducer.java | 6 +- .../hadoop/item/VectorAndPrefsWritable.java | 6 +- .../similarity/item/TopSimilarItemsQueue.java | 10 +- .../cf/taste/impl/common/RefreshHelper.java | 8 +- .../AbstractDifferenceRecommenderEvaluator.java | 13 +-- .../eval/GenericRelevantItemsDataSplitter.java | 4 +- .../cf/taste/impl/eval/LoadEvaluator.java | 4 +- .../cf/taste/impl/model/AbstractIDMigrator.java | 3 +- .../cf/taste/impl/model/file/FileDataModel.java | 13 +-- .../GenericItemBasedRecommender.java | 2 +- .../impl/recommender/RandomRecommender.java | 4 +- .../cf/taste/impl/recommender/TopItems.java | 13 +-- .../impl/recommender/svd/ALSWRFactorizer.java | 25 ++--- .../svd/FilePersistenceStrategy.java | 17 +-- .../recommender/svd/SVDPlusPlusFactorizer.java | 12 +- .../precompute/FileSimilarItemsWriter.java | 2 +- .../mahout/classifier/ConfusionMatrix.java | 10 +- .../classifier/RegressionResultAnalyzer.java | 4 +- .../apache/mahout/classifier/df/DFUtils.java | 54 ++++----- .../mahout/classifier/df/DecisionForest.java | 10 +- .../df/builder/DecisionTreeBuilder.java | 4 +- .../apache/mahout/classifier/df/data/Data.java | 20 ++-- .../mahout/classifier/df/data/DataLoader.java | 11 +- .../mahout/classifier/df/data/DataUtils.java | 4 +- .../mahout/classifier/df/data/Dataset.java | 12 +- .../classifier/df/data/DescriptorUtils.java | 4 +- .../classifier/df/mapreduce/Classifier.java | 12 +- .../df/mapreduce/inmem/InMemBuilder.java | 16 +-- .../df/mapreduce/inmem/InMemInputFormat.java | 18 +-- .../df/mapreduce/partial/Step1Mapper.java | 4 +- .../classifier/df/ref/SequentialBuilder.java | 4 +- .../mahout/classifier/df/tools/Describe.java | 12 +- .../mahout/classifier/mlp/NeuralNetwork.java | 50 ++++----- .../classifier/mlp/RunMultilayerPerceptron.java | 24 ++-- .../mlp/TrainMultilayerPerceptron.java | 59 ++++------ .../classifier/naivebayes/BayesUtils.java | 21 +--- .../classifier/naivebayes/NaiveBayesModel.java | 16 +-- .../naivebayes/test/TestNaiveBayesDriver.java | 11 +- .../sequencelearning/hmm/BaumWelchTrainer.java | 10 +- .../sequencelearning/hmm/HmmUtils.java | 7 +- .../hmm/RandomSequenceGenerator.java | 14 +-- .../sequencelearning/hmm/ViterbiEvaluator.java | 19 ++-- .../sgd/AdaptiveLogisticRegression.java | 16 +-- .../mahout/classifier/sgd/CrossFoldLearner.java | 4 +- .../mahout/classifier/sgd/CsvRecordFactory.java | 14 ++- .../mahout/classifier/sgd/GradientMachine.java | 4 +- .../mahout/classifier/sgd/ModelDissector.java | 14 +-- .../mahout/classifier/sgd/ModelSerializer.java | 15 +-- .../mahout/classifier/sgd/RankingGradient.java | 4 +- .../mahout/clustering/AbstractCluster.java | 9 +- .../mahout/clustering/ClusteringUtils.java | 3 +- .../classify/ClusterClassificationDriver.java | 10 +- .../classify/ClusterClassificationMapper.java | 10 +- .../clustering/classify/ClusterClassifier.java | 91 +++++++-------- .../fuzzykmeans/FuzzyKMeansDriver.java | 4 +- .../mahout/clustering/iterator/CIReducer.java | 4 +- .../iterator/FuzzyKMeansClusteringPolicy.java | 7 +- .../mahout/clustering/kmeans/KMeansDriver.java | 4 +- .../clustering/kmeans/RandomSeedGenerator.java | 13 +-- .../mahout/clustering/lda/cvb/CVB0Driver.java | 12 +- .../cvb/InMemoryCollapsedVariationalBayes0.java | 45 ++------ .../mahout/clustering/lda/cvb/ModelTrainer.java | 26 ++--- .../mahout/clustering/lda/cvb/TopicModel.java | 28 ++--- .../mahout/clustering/spectral/VectorCache.java | 15 +-- .../spectral/kmeans/EigenSeedGenerator.java | 16 +-- .../streaming/cluster/BallKMeans.java | 12 +- .../streaming/cluster/StreamingKMeans.java | 4 +- .../mapreduce/StreamingKMeansDriver.java | 6 +- .../mapreduce/StreamingKMeansMapper.java | 4 +- .../mapreduce/StreamingKMeansThread.java | 4 +- .../mapreduce/StreamingKMeansUtilsMR.java | 17 +-- .../streaming/tools/ResplitSequenceFiles.java | 4 +- .../ClusterOutputPostProcessor.java | 12 +- .../org/apache/mahout/common/AbstractJob.java | 24 ++-- .../apache/mahout/common/CommandLineUtil.java | 2 +- .../org/apache/mahout/common/HadoopUtil.java | 17 +-- .../distance/MahalanobisDistanceMeasure.java | 27 ++--- .../distance/MinkowskiDistanceMeasure.java | 4 +- .../distance/WeightedDistanceMeasure.java | 10 +- .../WeightedEuclideanDistanceMeasure.java | 1 - .../org/apache/mahout/driver/MahoutDriver.java | 10 +- .../apache/mahout/ep/EvolutionaryProcess.java | 17 +-- 172 files changed, 1055 insertions(+), 1487 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java b/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java index d28ee5a..b1c2ded 100644 --- a/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java +++ b/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java @@ -27,8 +27,6 @@ import org.apache.mahout.math.SparseMatrix; import org.apache.mahout.math.Vector; public class ClosestCentroidBenchmark { - public static final String SERIALIZE = "Serialize"; - public static final String DESERIALIZE = "Deserialize"; private final VectorBenchmarks mark; public ClosestCentroidBenchmark(VectorBenchmarks mark) { http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java b/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java index 10fcd11..cd403c2 100644 --- a/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java +++ b/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java @@ -17,9 +17,6 @@ package org.apache.mahout.benchmark; -import java.io.IOException; - -import com.google.common.io.Closeables; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -30,6 +27,8 @@ import org.apache.mahout.common.TimingStatistics; import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator; import org.apache.mahout.math.VectorWritable; +import java.io.IOException; + import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR; import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR; import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR; @@ -51,14 +50,14 @@ public class SerializationBenchmark { public void serializeBenchmark() throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); - SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/dense-vector"), IntWritable.class, - VectorWritable.class); Writable one = new IntWritable(0); VectorWritable vec = new VectorWritable(); TimingStatistics stats = new TimingStatistics(); - try { + try (SequenceFile.Writer writer = + new SequenceFile.Writer(fs, conf, new Path("/tmp/dense-vector"), + IntWritable.class, VectorWritable.class)){ for (int i = 0; i < mark.loop; i++) { TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec); vec.set(mark.vectors[0][mark.vIndex(i)]); @@ -67,15 +66,13 @@ public class SerializationBenchmark { break; } } - } finally { - Closeables.close(writer, false); } mark.printStats(stats, SERIALIZE, DENSE_VECTOR); - writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/randsparse-vector"), IntWritable.class, - VectorWritable.class); stats = new TimingStatistics(); - try { + try (SequenceFile.Writer writer = + new SequenceFile.Writer(fs, conf, + new Path("/tmp/randsparse-vector"), IntWritable.class, VectorWritable.class)){ for (int i = 0; i < mark.loop; i++) { TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec); vec.set(mark.vectors[1][mark.vIndex(i)]); @@ -84,15 +81,13 @@ public class SerializationBenchmark { break; } } - } finally { - Closeables.close(writer, false); } mark.printStats(stats, SERIALIZE, RAND_SPARSE_VECTOR); - writer = new SequenceFile.Writer(fs, conf, new Path("/tmp/seqsparse-vector"), IntWritable.class, - VectorWritable.class); stats = new TimingStatistics(); - try { + try (SequenceFile.Writer writer = + new SequenceFile.Writer(fs, conf, + new Path("/tmp/seqsparse-vector"), IntWritable.class, VectorWritable.class)) { for (int i = 0; i < mark.loop; i++) { TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec); vec.set(mark.vectors[2][mark.vIndex(i)]); @@ -101,8 +96,6 @@ public class SerializationBenchmark { break; } } - } finally { - Closeables.close(writer, false); } mark.printStats(stats, SERIALIZE, SEQ_SPARSE_VECTOR); @@ -117,7 +110,7 @@ public class SerializationBenchmark { private void doDeserializeBenchmark(String name, String pathString) throws IOException { TimingStatistics stats = new TimingStatistics(); TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec); - SequenceFileValueIterator<Writable> iterator = new SequenceFileValueIterator<Writable>(new Path(pathString), true, + SequenceFileValueIterator<Writable> iterator = new SequenceFileValueIterator<>(new Path(pathString), true, new Configuration()); while (iterator.hasNext()) { iterator.next(); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java b/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java index c29760a..a076322 100644 --- a/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java +++ b/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java @@ -17,17 +17,6 @@ package org.apache.mahout.benchmark; -import java.io.IOException; -import java.text.DecimalFormat; -import java.util.BitSet; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Random; -import java.util.concurrent.TimeUnit; -import java.util.regex.Pattern; - import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; @@ -56,8 +45,18 @@ import org.apache.mahout.math.Vector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import java.io.IOException; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import java.util.regex.Pattern; public class VectorBenchmarks { private static final int MAX_TIME_MS = 5000; @@ -93,11 +92,11 @@ public class VectorBenchmarks { final long maxTimeUsec; final long leadTimeUsec; - private final List<Vector> randomVectors = Lists.newArrayList(); - private final List<int[]> randomVectorIndices = Lists.newArrayList(); - private final List<double[]> randomVectorValues = Lists.newArrayList(); - private final Map<String, Integer> implType = Maps.newHashMap(); - private final Map<String, List<String[]>> statsMap = Maps.newHashMap(); + private final List<Vector> randomVectors = new ArrayList<>(); + private final List<int[]> randomVectorIndices = new ArrayList<>(); + private final List<double[]> randomVectorValues = new ArrayList<>(); + private final Map<String, Integer> implType = new HashMap<>(); + private final Map<String, List<String[]>> statsMap = new HashMap<>(); private final BenchmarkRunner runner; private final Random r = RandomUtils.getRandom(); @@ -162,7 +161,7 @@ public class VectorBenchmarks { } int implId = implType.get(implName); if (!statsMap.containsKey(benchmarkName)) { - statsMap.put(benchmarkName, Lists.<String[]>newArrayList()); + statsMap.put(benchmarkName, new ArrayList<String[]>()); } List<String[]> implStats = statsMap.get(benchmarkName); while (implStats.size() < implId + 1) { @@ -224,7 +223,7 @@ public class VectorBenchmarks { private boolean buildVectorIncrementally(TimingStatistics stats, int randomIndex, Vector v, boolean useSetQuick) { int[] indexes = randomVectorIndices.get(randomIndex); double[] values = randomVectorValues.get(randomIndex); - List<Integer> randomOrder = Lists.newArrayList(); + List<Integer> randomOrder = new ArrayList<>(); for (int i = 0; i < indexes.length; i++) { randomOrder.add(i); } @@ -421,9 +420,9 @@ public class VectorBenchmarks { } private String asCsvString() { - List<String> keys = Lists.newArrayList(statsMap.keySet()); + List<String> keys = new ArrayList<>(statsMap.keySet()); Collections.sort(keys); - Map<Integer,String> implMap = Maps.newHashMap(); + Map<Integer,String> implMap = new HashMap<>(); for (Entry<String,Integer> e : implType.entrySet()) { implMap.put(e.getValue(), e.getKey()); } @@ -459,7 +458,7 @@ public class VectorBenchmarks { } } sb.append('\n'); - List<String> keys = Lists.newArrayList(statsMap.keySet()); + List<String> keys = new ArrayList<>(statsMap.keySet()); Collections.sort(keys); for (String benchmarkName : keys) { List<String[]> implTokenizedStats = statsMap.get(benchmarkName); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java index 9378b11..9735ffe 100644 --- a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java +++ b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java @@ -17,15 +17,6 @@ package org.apache.mahout.cf.taste.impl.model.hbase; -import java.io.Closeable; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; - -import com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; @@ -42,13 +33,9 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.FilterList; -import org.apache.hadoop.hbase.filter.KeyOnlyFilter; import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; +import org.apache.hadoop.hbase.filter.KeyOnlyFilter; import org.apache.hadoop.hbase.util.Bytes; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import org.apache.mahout.cf.taste.common.NoSuchItemException; import org.apache.mahout.cf.taste.common.NoSuchUserException; import org.apache.mahout.cf.taste.common.Refreshable; @@ -59,6 +46,18 @@ import org.apache.mahout.cf.taste.impl.model.GenericItemPreferenceArray; import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; /** * <p>Naive approach of storing one preference as one value in the table. @@ -134,15 +133,12 @@ public final class HBaseDataModel implements DataModel, Closeable { * Create the table if it doesn't exist */ private void bootstrap(Configuration conf) throws IOException { - HBaseAdmin admin = new HBaseAdmin(conf); HTableDescriptor tDesc = new HTableDescriptor(Bytes.toBytes(tableName)); tDesc.addFamily(new HColumnDescriptor(USERS_CF)); tDesc.addFamily(new HColumnDescriptor(ITEMS_CF)); - try { + try (HBaseAdmin admin = new HBaseAdmin(conf)) { admin.createTable(tDesc); log.info("Created table {}", tableName); - } finally { - admin.close(); } } @@ -339,7 +335,7 @@ public final class HBaseDataModel implements DataModel, Closeable { Result[] results; try { HTableInterface table = pool.getTable(tableName); - List<Get> gets = Lists.newArrayListWithCapacity(2); + List<Get> gets = new ArrayList<>(2); gets.add(new Get(itemToBytes(itemID1))); gets.add(new Get(itemToBytes(itemID2))); gets.get(0).addFamily(USERS_CF); @@ -380,7 +376,7 @@ public final class HBaseDataModel implements DataModel, Closeable { public void setPreference(long userID, long itemID, float value) throws TasteException { try { HTableInterface table = pool.getTable(tableName); - List<Put> puts = Lists.newArrayListWithCapacity(2); + List<Put> puts = new ArrayList<>(2); puts.add(new Put(userToBytes(userID))); puts.add(new Put(itemToBytes(itemID))); puts.get(0).add(ITEMS_CF, Bytes.toBytes(itemID), Bytes.toBytes(value)); @@ -396,7 +392,7 @@ public final class HBaseDataModel implements DataModel, Closeable { public void removePreference(long userID, long itemID) throws TasteException { try { HTableInterface table = pool.getTable(tableName); - List<Delete> deletes = Lists.newArrayListWithCapacity(2); + List<Delete> deletes = new ArrayList<>(2); deletes.add(new Delete(userToBytes(userID))); deletes.add(new Delete(itemToBytes(itemID))); deletes.get(0).deleteColumns(ITEMS_CF, Bytes.toBytes(itemID)); @@ -457,7 +453,7 @@ public final class HBaseDataModel implements DataModel, Closeable { Scan scan = new Scan(new byte[]{0x69}, new byte[]{0x70}); scan.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, new KeyOnlyFilter(), new FirstKeyOnlyFilter())); ResultScanner scanner = table.getScanner(scan); - Collection<Long> ids = Lists.newLinkedList(); + Collection<Long> ids = new LinkedList<>(); for (Result result : scanner) { ids.add(bytesToUserOrItemID(result.getRow())); } @@ -482,7 +478,7 @@ public final class HBaseDataModel implements DataModel, Closeable { Scan scan = new Scan(new byte[]{0x75}, new byte[]{0x76}); scan.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, new KeyOnlyFilter(), new FirstKeyOnlyFilter())); ResultScanner scanner = table.getScanner(scan); - Collection<Long> ids = Lists.newLinkedList(); + Collection<Long> ids = new LinkedList<>(); for (Result result : scanner) { ids.add(bytesToUserOrItemID(result.getRow())); } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java index d9317c0..66f0a77 100644 --- a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java +++ b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java @@ -17,17 +17,7 @@ package org.apache.mahout.cf.taste.impl.model.jdbc; -import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.Collection; -import java.util.List; - -import javax.sql.DataSource; - -import com.google.common.collect.Lists; +import com.google.common.base.Preconditions; import org.apache.mahout.cf.taste.common.NoSuchItemException; import org.apache.mahout.cf.taste.common.NoSuchUserException; import org.apache.mahout.cf.taste.common.Refreshable; @@ -49,7 +39,15 @@ import org.apache.mahout.common.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Preconditions; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import javax.sql.DataSource; /** * <p> @@ -96,7 +94,6 @@ public abstract class AbstractJDBCDataModel extends AbstractJDBCComponent implem private final String getUsersSQL; private final String getItemsSQL; private final String getPrefsForItemSQL; - //private final String getNumPreferenceForItemSQL; private final String getNumPreferenceForItemsSQL; private final String getMaxPreferenceSQL; private final String getMinPreferenceSQL; @@ -283,7 +280,7 @@ public abstract class AbstractJDBCDataModel extends AbstractJDBCComponent implem log.debug("Executing SQL query: {}", getUserSQL); rs = stmt.executeQuery(); - List<Preference> prefs = Lists.newArrayList(); + List<Preference> prefs = new ArrayList<>(); while (rs.next()) { prefs.add(buildPreference(rs)); } @@ -323,7 +320,7 @@ public abstract class AbstractJDBCDataModel extends AbstractJDBCComponent implem rs = stmt.executeQuery(getAllUsersSQL); Long currentUserID = null; - List<Preference> currentPrefs = Lists.newArrayList(); + List<Preference> currentPrefs = new ArrayList<>(); while (rs.next()) { long nextUserID = getLongColumn(rs, 1); if (currentUserID != null && !currentUserID.equals(nextUserID) && !currentPrefs.isEmpty()) { @@ -533,7 +530,7 @@ public abstract class AbstractJDBCDataModel extends AbstractJDBCComponent implem log.debug("Executing SQL query: {}", getPrefsForItemSQL); rs = stmt.executeQuery(); - List<Preference> prefs = Lists.newArrayList(); + List<Preference> prefs = new ArrayList<>(); while (rs.next()) { prefs.add(buildPreference(rs)); } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java index fe6b843..92a4019 100644 --- a/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java +++ b/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/mongodb/MongoDBDataModel.java @@ -17,20 +17,15 @@ package org.apache.mahout.cf.taste.impl.model.mongodb; -import java.text.DateFormat; -import java.text.ParseException; -import java.util.Collection; -import java.util.Date; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.concurrent.locks.ReentrantLock; -import java.net.UnknownHostException; -import java.text.SimpleDateFormat; -import java.util.regex.Pattern; - import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import com.mongodb.BasicDBObject; +import com.mongodb.DB; +import com.mongodb.DBCollection; +import com.mongodb.DBCursor; +import com.mongodb.DBObject; +import com.mongodb.Mongo; +import org.apache.mahout.cf.taste.common.NoSuchItemException; +import org.apache.mahout.cf.taste.common.NoSuchUserException; import org.apache.mahout.cf.taste.common.Refreshable; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.impl.common.FastByIDMap; @@ -42,19 +37,22 @@ import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.model.Preference; import org.apache.mahout.cf.taste.model.PreferenceArray; -import org.apache.mahout.cf.taste.common.NoSuchUserException; -import org.apache.mahout.cf.taste.common.NoSuchItemException; - import org.bson.types.ObjectId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.mongodb.BasicDBObject; -import com.mongodb.DBObject; -import com.mongodb.Mongo; -import com.mongodb.DB; -import com.mongodb.DBCollection; -import com.mongodb.DBCursor; +import java.net.UnknownHostException; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Date; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.concurrent.locks.ReentrantLock; +import java.util.regex.Pattern; /** * <p>A {@link DataModel} backed by a MongoDB database. This class expects a @@ -408,8 +406,8 @@ public final class MongoDBDataModel implements DataModel { while (cursor.hasNext()) { Map<String,Object> user = (Map<String,Object>) cursor.next().toMap(); String userID = getID(user.get(mongoUserID), true); - Collection<List<String>> items = Lists.newArrayList(); - List<String> item = Lists.newArrayList(); + Collection<List<String>> items = new ArrayList<>(); + List<String> item = new ArrayList<>(); item.add(getID(user.get(mongoItemID), false)); item.add(Float.toString(getPreference(user.get(mongoPreference)))); items.add(item); @@ -431,8 +429,8 @@ public final class MongoDBDataModel implements DataModel { Map<String,Object> user = (Map<String,Object>) cursor.next().toMap(); if (!user.containsKey("deleted_at")) { String userID = getID(user.get(mongoUserID), true); - Collection<List<String>> items = Lists.newArrayList(); - List<String> item = Lists.newArrayList(); + Collection<List<String>> items = new ArrayList<>(); + List<String> item = new ArrayList<>(); item.add(getID(user.get(mongoItemID), false)); item.add(Float.toString(getPreference(user.get(mongoPreference)))); items.add(item); @@ -552,7 +550,7 @@ public final class MongoDBDataModel implements DataModel { Mongo mongoDDBB = new Mongo(mongoHost, mongoPort); DB db = mongoDDBB.getDB(mongoDB); mongoTimestamp = new Date(0); - FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<Collection<Preference>>(); + FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<>(); if (!mongoAuth || db.authenticate(mongoUsername, mongoPassword.toCharArray())) { collection = db.getCollection(mongoCollection); collectionMap = db.getCollection(mongoMapCollection); @@ -572,7 +570,7 @@ public final class MongoDBDataModel implements DataModel { float ratingValue = getPreference(user.get(mongoPreference)); Collection<Preference> userPrefs = userIDPrefMap.get(userID); if (userPrefs == null) { - userPrefs = Lists.newArrayListWithCapacity(2); + userPrefs = new ArrayList<>(2); userIDPrefMap.put(userID, userPrefs); } userPrefs.add(new GenericPreference(userID, itemID, ratingValue)); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java b/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java index 20d1384..af0742e 100644 --- a/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java +++ b/integration/src/main/java/org/apache/mahout/cf/taste/impl/similarity/jdbc/MySQLJDBCItemSimilarity.java @@ -70,7 +70,6 @@ import org.apache.mahout.cf.taste.common.TasteException; * Note that for each row, item_id_a should be less than item_id_b. It is redundant to store it both ways, * so the pair is always stored as a pair with the lesser one first. * - * @see org.apache.mahout.cf.taste.impl.recommender.slopeone.jdbc.MySQLJDBCDiffStorage * @see org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel */ public class MySQLJDBCItemSimilarity extends SQL92JDBCItemSimilarity { http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java b/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java index 45f300a..03a3000 100644 --- a/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java +++ b/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java @@ -17,16 +17,8 @@ package org.apache.mahout.classifier; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.io.PrintStream; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -import com.google.common.base.Charsets; +import com.google.common.collect.Lists; +import org.apache.commons.io.Charsets; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -38,7 +30,14 @@ import org.apache.mahout.common.commandline.DefaultOptionCreator; import org.apache.mahout.math.Matrix; import org.apache.mahout.math.MatrixWritable; -import com.google.common.collect.Lists; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.Iterator; +import java.util.List; +import java.util.Map; /** * Export a ConfusionMatrix in various text formats: ToString version Grayscale HTML table Summary HTML table http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java b/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java index 972dec1..545c1ff 100644 --- a/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java +++ b/integration/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java @@ -17,10 +17,6 @@ package org.apache.mahout.clustering.cdbw; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.mahout.clustering.Cluster; @@ -41,8 +37,11 @@ import org.apache.mahout.math.VectorWritable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; /** * This class calculates the CDbw metric as defined in @@ -53,7 +52,7 @@ public final class CDbwEvaluator { private static final Logger log = LoggerFactory.getLogger(CDbwEvaluator.class); private final Map<Integer,List<VectorWritable>> representativePoints; - private final Map<Integer,Double> stDevs = Maps.newHashMap(); + private final Map<Integer,Double> stDevs = new HashMap<>(); private final List<Cluster> clusters; private final DistanceMeasure measure; private Double interClusterDensity = null; @@ -110,7 +109,7 @@ public final class CDbwEvaluator { * @return a List<Cluster> of the clusters */ private static List<Cluster> loadClusters(Configuration conf, Path clustersIn) { - List<Cluster> clusters = Lists.newArrayList(); + List<Cluster> clusters = new ArrayList<>(); for (ClusterWritable clusterWritable : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST, PathFilters.logsCRCFilter(), conf)) { Cluster cluster = clusterWritable.getValue(); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java b/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java index 9dbce5a..e4c72c6 100644 --- a/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java +++ b/integration/src/main/java/org/apache/mahout/clustering/conversion/InputMapper.java @@ -17,13 +17,6 @@ package org.apache.mahout.clustering.conversion; -import java.io.IOException; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; -import java.util.Collection; -import java.util.regex.Pattern; - -import com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; @@ -31,6 +24,13 @@ import org.apache.hadoop.mapreduce.Mapper; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.regex.Pattern; + public class InputMapper extends Mapper<LongWritable, Text, Text, VectorWritable> { private static final Pattern SPACE = Pattern.compile(" "); @@ -42,7 +42,7 @@ public class InputMapper extends Mapper<LongWritable, Text, Text, VectorWritable String[] numbers = SPACE.split(values.toString()); // sometimes there are multiple separator spaces - Collection<Double> doubles = Lists.newArrayList(); + Collection<Double> doubles = new ArrayList<>(); for (String value : numbers) { if (!value.isEmpty()) { doubles.add(Double.valueOf(value)); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java b/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java index 3cd06eb..757f38c 100644 --- a/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java +++ b/integration/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java @@ -17,10 +17,6 @@ package org.apache.mahout.clustering.evaluation; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.mahout.clustering.Cluster; @@ -37,7 +33,10 @@ import org.apache.mahout.math.VectorWritable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.Lists; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; public class ClusterEvaluator { @@ -89,7 +88,7 @@ public class ClusterEvaluator { * @return a List<Cluster> of the clusters */ private static List<Cluster> loadClusters(Configuration conf, Path clustersIn) { - List<Cluster> clusters = Lists.newArrayList(); + List<Cluster> clusters = new ArrayList<>(); for (ClusterWritable clusterWritable : new SequenceFileDirValueIterable<ClusterWritable>(clustersIn, PathType.LIST, PathFilters.logsCRCFilter(), conf)) { Cluster cluster = clusterWritable.getValue(); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java b/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java index f18b584..2fe37ef 100644 --- a/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java +++ b/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java @@ -18,12 +18,10 @@ package org.apache.mahout.clustering.evaluation; import java.io.IOException; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; - -import com.google.common.collect.Maps; -import com.google.common.io.Closeables; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -98,8 +96,6 @@ public final class RepresentativePointsDriver extends AbstractJob { * the Path to the directory containing representativePoints-i folders * @param numIterations * the int number of iterations to print - * @throws IOException - * if errors occur */ public static void printRepresentativePoints(Path output, int numIterations) { for (int i = 0; i <= numIterations; i++) { @@ -141,8 +137,8 @@ public final class RepresentativePointsDriver extends AbstractJob { for (FileStatus part : fs.listStatus(inPath, PathFilters.logsCRCFilter())) { Path inPart = part.getPath(); Path path = new Path(output, inPart.getName()); - SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class); - try { + try (SequenceFile.Writer writer = + new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class)){ for (ClusterWritable clusterWritable : new SequenceFileValueIterable<ClusterWritable>(inPart, true, conf)) { Cluster cluster = clusterWritable.getValue(); if (log.isDebugEnabled()) { @@ -150,8 +146,6 @@ public final class RepresentativePointsDriver extends AbstractJob { } writer.append(new IntWritable(cluster.getId()), new VectorWritable(cluster.getCenter())); } - } finally { - Closeables.close(writer, false); } } } @@ -184,7 +178,7 @@ public final class RepresentativePointsDriver extends AbstractJob { DistanceMeasure measure) throws IOException { Map<Integer,List<VectorWritable>> repPoints = RepresentativePointsMapper.getRepresentativePoints(conf, stateIn); - Map<Integer,WeightedVectorWritable> mostDistantPoints = Maps.newHashMap(); + Map<Integer,WeightedVectorWritable> mostDistantPoints = new HashMap<>(); FileSystem fs = FileSystem.get(clusteredPointsIn.toUri(), conf); for (Pair<IntWritable,WeightedVectorWritable> record : new SequenceFileDirIterable<IntWritable,WeightedVectorWritable>(clusteredPointsIn, PathType.LIST, @@ -192,25 +186,19 @@ public final class RepresentativePointsDriver extends AbstractJob { RepresentativePointsMapper.mapPoint(record.getFirst(), record.getSecond(), measure, repPoints, mostDistantPoints); } int part = 0; - SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(stateOut, "part-m-" + part++), - IntWritable.class, VectorWritable.class); - try { + try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(stateOut, "part-m-" + part++), + IntWritable.class, VectorWritable.class)){ for (Entry<Integer,List<VectorWritable>> entry : repPoints.entrySet()) { for (VectorWritable vw : entry.getValue()) { writer.append(new IntWritable(entry.getKey()), vw); } } - } finally { - Closeables.close(writer, false); } - writer = new SequenceFile.Writer(fs, conf, new Path(stateOut, "part-m-" + part++), IntWritable.class, - VectorWritable.class); - try { + try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(stateOut, "part-m-" + part++), + IntWritable.class, VectorWritable.class)){ for (Map.Entry<Integer,WeightedVectorWritable> entry : mostDistantPoints.entrySet()) { writer.append(new IntWritable(entry.getKey()), new VectorWritable(entry.getValue().getVector())); } - } finally { - Closeables.close(writer, false); } } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java b/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java index 255e4a3..0ae79ad 100644 --- a/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java +++ b/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java @@ -18,11 +18,11 @@ package org.apache.mahout.clustering.evaluation; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; @@ -41,7 +41,7 @@ public class RepresentativePointsMapper extends Mapper<IntWritable, WeightedVectorWritable, IntWritable, WeightedVectorWritable> { private Map<Integer, List<VectorWritable>> representativePoints; - private final Map<Integer, WeightedVectorWritable> mostDistantPoints = Maps.newHashMap(); + private final Map<Integer, WeightedVectorWritable> mostDistantPoints = new HashMap<>(); private DistanceMeasure measure = new EuclideanDistanceMeasure(); @Override @@ -98,7 +98,7 @@ public class RepresentativePointsMapper } public static Map<Integer, List<VectorWritable>> getRepresentativePoints(Configuration conf, Path statePath) { - Map<Integer, List<VectorWritable>> representativePoints = Maps.newHashMap(); + Map<Integer, List<VectorWritable>> representativePoints = new HashMap<>(); for (Pair<IntWritable,VectorWritable> record : new SequenceFileDirIterable<IntWritable,VectorWritable>(statePath, PathType.LIST, @@ -107,7 +107,7 @@ public class RepresentativePointsMapper int keyValue = record.getFirst().get(); List<VectorWritable> repPoints = representativePoints.get(keyValue); if (repPoints == null) { - repPoints = Lists.newArrayList(); + repPoints = new ArrayList<>(); representativePoints.put(keyValue, repPoints); } repPoints.add(record.getSecond()); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java b/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java index cb8d935..392909e 100644 --- a/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java +++ b/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java @@ -17,24 +17,22 @@ package org.apache.mahout.clustering.lda; +import com.google.common.io.Closeables; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.PriorityQueue; import java.util.Queue; - -import com.google.common.base.Charsets; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.io.Closeables; import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; @@ -43,6 +41,7 @@ import org.apache.commons.cli2.builder.ArgumentBuilder; import org.apache.commons.cli2.builder.DefaultOptionBuilder; import org.apache.commons.cli2.builder.GroupBuilder; import org.apache.commons.cli2.commandline.Parser; +import org.apache.commons.io.Charsets; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DoubleWritable; @@ -168,7 +167,7 @@ public final class LDAPrintTopics { out.write("==========="); out.write('\n'); } - List<Pair<String,Double>> topKasList = Lists.newArrayListWithCapacity(topK.size()); + List<Pair<String,Double>> topKasList = new ArrayList<>(topK.size()); for (Pair<String,Double> wordWithScore : topK) { topKasList.add(wordWithScore); } @@ -197,8 +196,8 @@ public final class LDAPrintTopics { Configuration job, List<String> wordList, int numWordsToPrint) { - List<Queue<Pair<String,Double>>> queues = Lists.newArrayList(); - Map<Integer,Double> expSums = Maps.newHashMap(); + List<Queue<Pair<String,Double>>> queues = new ArrayList<>(); + Map<Integer,Double> expSums = new HashMap<>(); for (Pair<IntPairWritable,DoubleWritable> record : new SequenceFileDirIterable<IntPairWritable, DoubleWritable>( new Path(dir, "part-*"), PathType.GLOB, null, null, true, job)) { http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java b/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java index 438beb9..465e51b 100644 --- a/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java +++ b/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java @@ -33,7 +33,8 @@ public class LuceneIndexHelper { public static void fieldShouldExistInIndex(IndexReader reader, String fieldName) throws IOException { IndexableField field = reader.document(0).getField(fieldName); if (field == null || !field.fieldType().stored()) { - throw new IllegalArgumentException("Field '" + fieldName + "' is possibly not stored since first document in index does not contain this field."); + throw new IllegalArgumentException("Field '" + fieldName + + "' is possibly not stored since first document in index does not contain this field."); } } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java index fabca54..1c4f8de 100644 --- a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java +++ b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java @@ -16,7 +16,10 @@ package org.apache.mahout.text; * limitations under the License. */ -import com.google.common.collect.Lists; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -32,9 +35,6 @@ import org.apache.lucene.index.SegmentInfos; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.List; - /** * {@link InputFormat} implementation which splits a Lucene index at the segment level. */ @@ -48,7 +48,7 @@ public class LuceneSegmentInputFormat extends InputFormat { LuceneStorageConfiguration lucene2SeqConfiguration = new LuceneStorageConfiguration(configuration); - List<LuceneSegmentInputSplit> inputSplits = Lists.newArrayList(); + List<LuceneSegmentInputSplit> inputSplits = new ArrayList<>(); List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths(); for (Path indexPath : indexPaths) { http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java index a0aa6b0..485e856 100644 --- a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java +++ b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java @@ -98,6 +98,5 @@ public class LuceneSegmentRecordReader extends RecordReader<Text, NullWritable> @Override public void close() throws IOException { segmentReader.close(); - //searcher.close(); } } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java b/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java index 88f86c5..b36f3e9 100644 --- a/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java +++ b/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java @@ -16,9 +16,18 @@ package org.apache.mahout.text; * limitations under the License. */ +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -36,14 +45,6 @@ import org.apache.mahout.common.iterator.sequencefile.PathFilters; import org.apache.mahout.common.iterator.sequencefile.PathType; import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.Set; - import static org.apache.lucene.util.Version.LUCENE_46; /** @@ -186,7 +187,7 @@ public class LuceneStorageConfiguration implements Writable { } public DocumentStoredFieldVisitor getStoredFieldVisitor() { - Set<String> fieldSet = Sets.newHashSet(idField); + Set<String> fieldSet = new HashSet<>(Collections.singleton(idField)); fieldSet.addAll(fields); return new DocumentStoredFieldVisitor(fieldSet); } @@ -205,7 +206,7 @@ public class LuceneStorageConfiguration implements Writable { public void readFields(DataInput in) throws IOException { try { sequenceFilesOutputPath = new Path(in.readUTF()); - indexPaths = Lists.newArrayList(); + indexPaths = new ArrayList<>(); String[] indexPaths = in.readUTF().split(SEPARATOR_PATHS); for (String indexPath : indexPaths) { this.indexPaths.add(new Path(indexPath)); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java b/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java index a7503e1..8776c5f 100644 --- a/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java +++ b/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java @@ -21,6 +21,7 @@ import java.io.Reader; import java.util.Arrays; import java.util.regex.Matcher; import java.util.regex.Pattern; + import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java b/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java index a13341b..37ebc44 100644 --- a/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java +++ b/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java @@ -17,7 +17,6 @@ package org.apache.mahout.text; -import com.google.common.io.Closeables; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -53,10 +52,7 @@ public final class PrefixAdditionFilter extends SequenceFilesFromDirectoryFilter fs.listStatus(fst.getPath(), new PrefixAdditionFilter(getConf(), dirPath, getOptions(), writer, getCharset(), fs)); } else { - InputStream in = null; - try { - in = fs.open(fst.getPath()); - + try (InputStream in = fs.open(fst.getPath())){ StringBuilder file = new StringBuilder(); for (String aFit : new FileLineIterable(in, getCharset(), false)) { file.append(aFit).append('\n'); @@ -65,8 +61,6 @@ public final class PrefixAdditionFilter extends SequenceFilesFromDirectoryFilter ? current.getName() : current.getName() + Path.SEPARATOR + fst.getPath().getName(); writer.write(getPrefix() + Path.SEPARATOR + name, file.toString()); - } finally { - Closeables.close(in, false); } } } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java index 720078c..311ab8d 100644 --- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java +++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectory.java @@ -19,10 +19,9 @@ package org.apache.mahout.text; import java.io.IOException; import java.nio.charset.Charset; +import java.util.HashMap; import java.util.Map; -import com.google.common.collect.Maps; -import com.google.common.io.Closeables; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -100,9 +99,8 @@ public class SequenceFilesFromDirectory extends AbstractJob { Charset charset = Charset.forName(getOption(CHARSET_OPTION[0])); String keyPrefix = getOption(KEY_PREFIX_OPTION[0]); FileSystem fs = FileSystem.get(input.toUri(), conf); - ChunkedWriter writer = new ChunkedWriter(conf, Integer.parseInt(options.get(CHUNK_SIZE_OPTION[0])), output); - try { + try (ChunkedWriter writer = new ChunkedWriter(conf, Integer.parseInt(options.get(CHUNK_SIZE_OPTION[0])), output)) { SequenceFilesFromDirectoryFilter pathFilter; String fileFilterClassName = options.get(FILE_FILTER_CLASS_OPTION[0]); if (PrefixAdditionFilter.class.getName().equals(fileFilterClassName)) { @@ -113,8 +111,6 @@ public class SequenceFilesFromDirectory extends AbstractJob { new Object[] {conf, keyPrefix, options, writer, charset, fs}); } fs.listStatus(input, pathFilter); - } finally { - Closeables.close(writer, false); } return 0; } @@ -209,7 +205,7 @@ public class SequenceFilesFromDirectory extends AbstractJob { * @return Map of options */ protected Map<String, String> parseOptions() { - Map<String, String> options = Maps.newHashMap(); + Map<String, String> options = new HashMap<>(); options.put(CHUNK_SIZE_OPTION[0], getOption(CHUNK_SIZE_OPTION[0])); options.put(FILE_FILTER_CLASS_OPTION[0], getOption(FILE_FILTER_CLASS_OPTION[0])); options.put(CHARSET_OPTION[0], getOption(CHARSET_OPTION[0])); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java index d3903dd..1bd3f3e 100644 --- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java +++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java @@ -17,7 +17,11 @@ package org.apache.mahout.text; */ -import com.google.common.collect.Lists; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Pattern; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.ToolRunner; @@ -30,11 +34,6 @@ import org.apache.lucene.util.Version; import org.apache.mahout.common.AbstractJob; import org.apache.mahout.common.commandline.DefaultOptionCreator; -import java.util.List; -import java.util.regex.Pattern; - -import static java.util.Arrays.asList; - /** * Driver class for the lucene2seq program. Converts text contents of stored fields of a lucene index into a Hadoop * SequenceFile. The key of the sequence file is the document ID and the value is the concatenated text of the specified @@ -77,7 +76,7 @@ public class SequenceFilesFromLuceneStorageDriver extends AbstractJob { Configuration configuration = getConf(); String[] paths = getInputPath().toString().split(","); - List<Path> indexPaths = Lists.newArrayList(); + List<Path> indexPaths = new ArrayList<>(); for (String path : paths) { indexPaths.add(new Path(path)); } @@ -91,7 +90,7 @@ public class SequenceFilesFromLuceneStorageDriver extends AbstractJob { indexPaths, sequenceFilesOutputPath, idField, - asList(fields.split(SEPARATOR_FIELDS))); + Arrays.asList(fields.split(SEPARATOR_FIELDS))); Query query = DEFAULT_QUERY; if (hasOption(OPTION_QUERY)) { http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java index d87dadc..f31d055 100644 --- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java +++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java @@ -18,6 +18,7 @@ package org.apache.mahout.text; import com.google.common.base.Strings; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; @@ -31,8 +32,6 @@ import org.apache.lucene.store.IOContext; import java.io.IOException; import java.util.List; -import static org.apache.commons.lang.StringUtils.isBlank; - /** * Maps document IDs to key value pairs with ID field as the key and the concatenated stored field(s) * as value. @@ -64,13 +63,13 @@ public class SequenceFilesFromLuceneStorageMapper extends Mapper<Text, NullWrita Text theValue = new Text(); LuceneSeqFileHelper.populateValues(document, theValue, fields); //if they are both empty, don't write - if (isBlank(theKey.toString()) && isBlank(theValue.toString())) { + if (StringUtils.isBlank(theKey.toString()) && StringUtils.isBlank(theValue.toString())) { context.getCounter(DataStatus.EMPTY_BOTH).increment(1); return; } - if (isBlank(theKey.toString())) { + if (StringUtils.isBlank(theKey.toString())) { context.getCounter(DataStatus.EMPTY_KEY).increment(1); - } else if (isBlank(theValue.toString())) { + } else if (StringUtils.isBlank(theValue.toString())) { context.getCounter(DataStatus.EMPTY_VALUE).increment(1); } context.write(theKey, theValue); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java index 30c2a47..c17cc12 100644 --- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java +++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java @@ -16,10 +16,6 @@ */ package org.apache.mahout.text; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.io.Closeables; - import org.apache.commons.io.DirectoryWalker; import org.apache.commons.io.comparator.CompositeFileComparator; import org.apache.commons.io.comparator.DirectoryFileComparator; @@ -46,10 +42,12 @@ import java.io.File; import java.io.IOException; import java.nio.charset.Charset; import java.util.ArrayDeque; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import java.util.Deque; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; @@ -81,9 +79,9 @@ public final class SequenceFilesFromMailArchives extends AbstractJob { private static final int MAX_JOB_SPLIT_LOCATIONS = 1000000; public void createSequenceFiles(MailOptions options) throws IOException { - ChunkedWriter writer = new ChunkedWriter(getConf(), options.getChunkSize(), new Path(options.getOutputDir())); - MailProcessor processor = new MailProcessor(options, options.getPrefix(), writer); - try { + try (ChunkedWriter writer = + new ChunkedWriter(getConf(), options.getChunkSize(), new Path(options.getOutputDir()))){ + MailProcessor processor = new MailProcessor(options, options.getPrefix(), writer); if (options.getInput().isDirectory()) { PrefixAdditionDirectoryWalker walker = new PrefixAdditionDirectoryWalker(processor, writer); walker.walk(options.getInput()); @@ -94,8 +92,6 @@ public final class SequenceFilesFromMailArchives extends AbstractJob { long finish = System.currentTimeMillis(); log.info("Parsed {} messages from {} in time: {}", cnt, options.getInput().getAbsolutePath(), finish - start); } - } finally { - Closeables.close(writer, false); } } @@ -226,11 +222,11 @@ public final class SequenceFilesFromMailArchives extends AbstractJob { options.setChunkSize(chunkSize); options.setCharset(charset); - List<Pattern> patterns = Lists.newArrayListWithCapacity(5); + List<Pattern> patterns = new ArrayList<>(5); // patternOrder is used downstream so that we can know what order the text // is in instead of encoding it in the string, which // would require more processing later to remove it pre feature selection. - Map<String, Integer> patternOrder = Maps.newHashMap(); + Map<String, Integer> patternOrder = new HashMap<>(); int order = 0; if (hasOption(FROM_OPTION[0])) { patterns.add(MailProcessor.FROM_PREFIX); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java b/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java index fdb3654..b8441b7 100644 --- a/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java +++ b/integration/src/main/java/org/apache/mahout/text/WholeFileRecordReader.java @@ -5,9 +5,9 @@ * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + * <p/> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,7 +19,6 @@ package org.apache.mahout.text; import java.io.IOException; -import com.google.common.io.Closeables; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; @@ -55,7 +54,7 @@ public class WholeFileRecordReader extends RecordReader<IntWritable, BytesWritab public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext taskAttemptContext, Integer idx) throws IOException { this.fileSplit = new FileSplit(fileSplit.getPath(idx), fileSplit.getOffset(idx), - fileSplit.getLength(idx), fileSplit.getLocations()); + fileSplit.getLength(idx), fileSplit.getLocations()); this.configuration = taskAttemptContext.getConfiguration(); this.index = new IntWritable(idx); this.fileFilterClassName = this.configuration.get(FILE_FILTER_CLASS_OPTION[0]); @@ -78,8 +77,9 @@ public class WholeFileRecordReader extends RecordReader<IntWritable, BytesWritab @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) - throws IOException, InterruptedException { - if (!StringUtils.isBlank(fileFilterClassName) && !PrefixAdditionFilter.class.getName().equals(fileFilterClassName)) { + throws IOException, InterruptedException { + if (!StringUtils.isBlank(fileFilterClassName) && + !PrefixAdditionFilter.class.getName().equals(fileFilterClassName)) { try { pathFilter = (PathFilter) Class.forName(fileFilterClassName).newInstance(); } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) { @@ -106,15 +106,11 @@ public class WholeFileRecordReader extends RecordReader<IntWritable, BytesWritab fileStatuses = fs.listStatus(file); } - FSDataInputStream in = null; if (fileStatuses.length == 1) { - try { - in = fs.open(fileStatuses[0].getPath()); + try (FSDataInputStream in = fs.open(fileStatuses[0].getPath())) { IOUtils.readFully(in, contents, 0, contents.length); value.setCapacity(contents.length); value.set(contents, 0, contents.length); - } finally { - Closeables.close(in, false); } processed = true; return true; http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java b/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java index 1cde4cd..bed4640 100644 --- a/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java +++ b/integration/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java @@ -19,10 +19,10 @@ package org.apache.mahout.text; import java.io.File; import java.io.IOException; +import java.util.HashSet; import java.util.Locale; import java.util.Set; -import com.google.common.collect.Sets; import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; @@ -168,7 +168,7 @@ public final class WikipediaToSequenceFile { "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); - Set<String> categories = Sets.newHashSet(); + Set<String> categories = new HashSet<>(); if (!catFile.isEmpty()) { for (String line : new FileLineIterable(new File(catFile))) { categories.add(line.trim().toLowerCase(Locale.ENGLISH)); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java index d9df97f..ad55ba7 100644 --- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java +++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java @@ -18,6 +18,7 @@ package org.apache.mahout.text.wikipedia; import java.io.Reader; + import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java index 6632ad2..7113629 100644 --- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java +++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java @@ -19,10 +19,10 @@ package org.apache.mahout.text.wikipedia; import java.io.File; import java.io.IOException; +import java.util.HashSet; import java.util.Locale; import java.util.Set; -import com.google.common.collect.Sets; import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; @@ -154,7 +154,7 @@ public final class WikipediaDatasetCreatorDriver { // Dont ever forget this. People should keep track of how hadoop conf // parameters can make or break a piece of code - Set<String> categories = Sets.newHashSet(); + Set<String> categories = new HashSet<>(); for (String line : new FileLineIterable(new File(catFile))) { categories.add(line.trim().toLowerCase(Locale.ENGLISH)); } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java index 54a1df3..50e5f37 100644 --- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java +++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java @@ -17,14 +17,6 @@ package org.apache.mahout.text.wikipedia; -import java.io.IOException; -import java.io.StringReader; -import java.util.List; -import java.util.Locale; -import java.util.Set; -import java.util.regex.Pattern; - -import com.google.common.collect.Sets; import com.google.common.io.Closeables; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.hadoop.conf.Configuration; @@ -40,7 +32,14 @@ import org.apache.mahout.common.ClassUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.Lists; +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import java.util.regex.Pattern; /** * Maps over Wikipedia xml format and output all document having the category listed in the input category @@ -89,13 +88,13 @@ public class WikipediaDatasetCreatorMapper extends Mapper<LongWritable, Text, Te Configuration conf = context.getConfiguration(); if (inputCategories == null) { - Set<String> newCategories = Sets.newHashSet(); + Set<String> newCategories = new HashSet<>(); DefaultStringifier<Set<String>> setStringifier = - new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(newCategories)); + new DefaultStringifier<>(conf, GenericsUtil.getClass(newCategories)); String categoriesStr = conf.get("wikipedia.categories", setStringifier.toString(newCategories)); Set<String> inputCategoriesSet = setStringifier.fromString(categoriesStr); - inputCategories = Lists.newArrayList(inputCategoriesSet); - inputCategoryPatterns = Lists.newArrayListWithCapacity(inputCategories.size()); + inputCategories = new ArrayList<>(inputCategoriesSet); + inputCategoryPatterns = new ArrayList<>(inputCategories.size()); for (String inputCategory : inputCategories) { inputCategoryPatterns.add(Pattern.compile(".*\\b" + inputCategory + "\\b.*")); } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java index d880760..abd3a04 100644 --- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java +++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaMapper.java @@ -18,12 +18,12 @@ package org.apache.mahout.text.wikipedia; import java.io.IOException; +import java.util.HashSet; import java.util.Locale; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; -import com.google.common.collect.Sets; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DefaultStringifier; @@ -106,9 +106,9 @@ public class WikipediaMapper extends Mapper<LongWritable, Text, Text, Text> { super.setup(context); Configuration conf = context.getConfiguration(); - Set<String> newCategories = Sets.newHashSet(); + Set<String> newCategories = new HashSet<>(); DefaultStringifier<Set<String>> setStringifier = - new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(newCategories)); + new DefaultStringifier<>(conf, GenericsUtil.getClass(newCategories)); String categoriesStr = conf.get("wikipedia.categories"); inputCategories = setStringifier.fromString(categoriesStr); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java index c9a54e9..fc065fe 100644 --- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java +++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java @@ -26,7 +26,6 @@ import java.net.URI; import java.text.DecimalFormat; import java.text.NumberFormat; -import com.google.common.io.Closeables; import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; @@ -219,12 +218,9 @@ public final class WikipediaXmlSplitter { content.append("</mediawiki>"); fileNumber++; String filename = outputDirPath + "/chunk-" + decimalFormatter.format(fileNumber) + ".xml"; - BufferedWriter chunkWriter = - new BufferedWriter(new OutputStreamWriter(fs.create(new Path(filename)), "UTF-8")); - try { + try (BufferedWriter chunkWriter = + new BufferedWriter(new OutputStreamWriter(fs.create(new Path(filename)), "UTF-8"))) { chunkWriter.write(content.toString(), 0, content.length()); - } finally { - Closeables.close(chunkWriter, false); } if (fileNumber >= numChunks) { break; http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java index 7f16f31..afd350f 100644 --- a/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java +++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/XmlInputFormat.java @@ -17,10 +17,8 @@ package org.apache.mahout.text.wikipedia; -import java.io.IOException; - -import com.google.common.base.Charsets; import com.google.common.io.Closeables; +import org.apache.commons.io.Charsets; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; @@ -36,6 +34,8 @@ import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; + /** * Reads records that are delimited by a specific begin/end tag. */ http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java b/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java index 1814bd5..33d09a0 100644 --- a/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java +++ b/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java @@ -17,11 +17,9 @@ package org.apache.mahout.utils; - import java.io.IOException; import com.google.common.base.Preconditions; -import com.google.common.io.Closeables; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -113,12 +111,8 @@ public class ConcatenateVectorsJob extends AbstractJob { Preconditions.checkArgument(paths.length > 0, path.getName() + " is a file, should be a directory"); Path file = paths[0].getPath(); - SequenceFile.Reader reader = null; - try { - reader = new SequenceFile.Reader(fs, file, fs.getConf()); + try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, fs.getConf())){ return reader.getKeyClass().asSubclass(Writable.class); - } finally { - Closeables.close(reader, true); } } } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java ---------------------------------------------------------------------- diff --git a/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java b/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java index 8ab57be..f63de83 100644 --- a/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java +++ b/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java @@ -25,7 +25,7 @@ import java.io.PrintStream; import java.util.List; import java.util.Map; -import com.google.common.base.Charsets; +import org.apache.commons.io.Charsets; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.ToolRunner;
