http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java index 137b174..86f99b6 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java @@ -5,9 +5,9 @@ * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + * <p/> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,10 +22,9 @@ import java.io.DataOutput; import java.io.DataOutputStream; import java.io.IOException; import java.nio.charset.Charset; +import java.util.ArrayList; import java.util.List; -import com.google.common.collect.Lists; -import com.google.common.io.Closeables; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -40,8 +39,9 @@ import org.apache.mahout.common.iterator.sequencefile.PathFilters; */ public final class DFUtils { - private DFUtils() {} - + private DFUtils() { + } + /** * Writes an Node[] into a DataOutput * @throws java.io.IOException @@ -52,7 +52,7 @@ public final class DFUtils { w.write(out); } } - + /** * Reads a Node[] from a DataInput * @throws java.io.IOException @@ -63,10 +63,10 @@ public final class DFUtils { for (int index = 0; index < length; index++) { nodes[index] = Node.read(in); } - + return nodes; } - + /** * Writes a double[] into a DataOutput * @throws java.io.IOException @@ -77,7 +77,7 @@ public final class DFUtils { out.writeDouble(value); } } - + /** * Reads a double[] from a DataInput * @throws java.io.IOException @@ -88,10 +88,10 @@ public final class DFUtils { for (int index = 0; index < length; index++) { array[index] = in.readDouble(); } - + return array; } - + /** * Writes an int[] into a DataOutput * @throws java.io.IOException @@ -102,7 +102,7 @@ public final class DFUtils { out.writeInt(value); } } - + /** * Reads an int[] from a DataInput * @throws java.io.IOException @@ -113,16 +113,16 @@ public final class DFUtils { for (int index = 0; index < length; index++) { array[index] = in.readInt(); } - + return array; } - + /** * Return a list of all files in the output directory * @throws IOException if no file is found */ public static Path[] listOutputFiles(FileSystem fs, Path outputPath) throws IOException { - List<Path> outputFiles = Lists.newArrayList(); + List<Path> outputFiles = new ArrayList<>(); for (FileStatus s : fs.listStatus(outputPath, PathFilters.logsCRCFilter())) { if (!s.isDir() && !s.getPath().getName().startsWith("_")) { outputFiles.add(s.getPath()); @@ -140,27 +140,24 @@ public final class DFUtils { public static String elapsedTime(long milli) { long seconds = milli / 1000; milli %= 1000; - + long minutes = seconds / 60; seconds %= 60; - + long hours = minutes / 60; minutes %= 60; - + return hours + "h " + minutes + "m " + seconds + "s " + milli; } public static void storeWritable(Configuration conf, Path path, Writable writable) throws IOException { FileSystem fs = path.getFileSystem(conf); - FSDataOutputStream out = fs.create(path); - try { + try (FSDataOutputStream out = fs.create(path)) { writable.write(out); - } finally { - Closeables.close(out, false); } } - + /** * Write a string to a path. * @param conf From which the file system will be picked @@ -169,13 +166,8 @@ public final class DFUtils { * @throws IOException if things go poorly */ public static void storeString(Configuration conf, Path path, String string) throws IOException { - DataOutputStream out = null; - try { - out = path.getFileSystem(conf).create(path); + try (DataOutputStream out = path.getFileSystem(conf).create(path)) { out.write(string.getBytes(Charset.defaultCharset())); - } finally { - Closeables.close(out, false); } } - }
http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java b/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java index 1b47ec7..bb4153e 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java @@ -18,8 +18,6 @@ package org.apache.mahout.classifier.df; import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.io.Closeables; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; @@ -34,6 +32,7 @@ import org.apache.mahout.classifier.df.node.Node; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Random; @@ -45,7 +44,7 @@ public class DecisionForest implements Writable { private final List<Node> trees; private DecisionForest() { - trees = Lists.newArrayList(); + trees = new ArrayList<>(); } public DecisionForest(List<Node> trees) { @@ -225,15 +224,12 @@ public class DecisionForest implements Writable { DecisionForest forest = null; for (Path path : files) { - FSDataInputStream dataInput = new FSDataInputStream(fs.open(path)); - try { + try (FSDataInputStream dataInput = new FSDataInputStream(fs.open(path))) { if (forest == null) { forest = read(dataInput); } else { forest.readFields(dataInput); } - } finally { - Closeables.close(dataInput, true); } } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java index 895188b..8a7d945 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java @@ -17,7 +17,6 @@ package org.apache.mahout.classifier.df.builder; -import com.google.common.collect.Sets; import org.apache.mahout.classifier.df.data.Data; import org.apache.mahout.classifier.df.data.Dataset; import org.apache.mahout.classifier.df.data.Instance; @@ -34,6 +33,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Collection; +import java.util.HashSet; import java.util.Random; /** @@ -263,7 +263,7 @@ public class DecisionTreeBuilder implements TreeBuilder { // tree is complemented Collection<Double> subsetValues = null; if (complemented) { - subsetValues = Sets.newHashSet(); + subsetValues = new HashSet<>(); for (double value : values) { subsetValues.add(value); } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java index c1bddd9..c68ce52 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java @@ -17,11 +17,11 @@ package org.apache.mahout.classifier.df.data; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; import org.apache.mahout.classifier.df.data.conditions.Condition; +import java.util.ArrayList; import java.util.Collection; +import java.util.HashSet; import java.util.List; import java.util.Random; @@ -38,12 +38,12 @@ public class Data implements Cloneable { public Data(Dataset dataset) { this.dataset = dataset; - this.instances = Lists.newArrayList(); + this.instances = new ArrayList<>(); } public Data(Dataset dataset, List<Instance> instances) { this.dataset = dataset; - this.instances = Lists.newArrayList(instances); + this.instances = new ArrayList<>(instances); } /** @@ -86,7 +86,7 @@ public class Data implements Cloneable { * @return the subset from this data that matches the given condition */ public Data subset(Condition condition) { - List<Instance> subset = Lists.newArrayList(); + List<Instance> subset = new ArrayList<>(); for (Instance instance : instances) { if (condition.isTrueFor(instance)) { @@ -102,7 +102,7 @@ public class Data implements Cloneable { */ public Data bagging(Random rng) { int datasize = size(); - List<Instance> bag = Lists.newArrayListWithCapacity(datasize); + List<Instance> bag = new ArrayList<>(datasize); for (int i = 0; i < datasize; i++) { bag.add(instances.get(rng.nextInt(datasize))); @@ -121,7 +121,7 @@ public class Data implements Cloneable { */ public Data bagging(Random rng, boolean[] sampled) { int datasize = size(); - List<Instance> bag = Lists.newArrayListWithCapacity(datasize); + List<Instance> bag = new ArrayList<>(datasize); for (int i = 0; i < datasize; i++) { int index = rng.nextInt(datasize); @@ -136,7 +136,7 @@ public class Data implements Cloneable { * Splits the data in two, returns one part, and this gets the rest of the data. <b>VERY SLOW!</b> */ public Data rsplit(Random rng, int subsize) { - List<Instance> subset = Lists.newArrayListWithCapacity(subsize); + List<Instance> subset = new ArrayList<>(subsize); for (int i = 0; i < subsize; i++) { subset.add(instances.remove(rng.nextInt(instances.size()))); @@ -190,7 +190,7 @@ public class Data implements Cloneable { * finds all distinct values of a given attribute */ public double[] values(int attr) { - Collection<Double> result = Sets.newHashSet(); + Collection<Double> result = new HashSet<>(); for (Instance instance : instances) { result.add(instance.get(attr)); @@ -208,7 +208,7 @@ public class Data implements Cloneable { @Override public Data clone() { - return new Data(dataset, Lists.newArrayList(instances)); + return new Data(dataset, new ArrayList<>(instances)); } @Override http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java index 8eed6cf..c8d9dcd 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java @@ -19,7 +19,6 @@ package org.apache.mahout.classifier.df.data; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; -import com.google.common.collect.Sets; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -28,6 +27,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Scanner; import java.util.Set; @@ -80,7 +81,7 @@ public final class DataLoader { if (attrs[attr].isCategorical() || (!regression && attrs[attr].isLabel())) { // update values if (values[attr] == null) { - values[attr] = Sets.newHashSet(); + values[attr] = new HashSet<>(); } values[attr].add(token); } else { @@ -111,7 +112,7 @@ public final class DataLoader { FSDataInputStream input = fs.open(fpath); Scanner scanner = new Scanner(input, "UTF-8"); - List<Instance> instances = Lists.newArrayList(); + List<Instance> instances = new ArrayList<>(); DataConverter converter = new DataConverter(dataset); @@ -137,7 +138,7 @@ public final class DataLoader { /** Loads the data from multiple paths specified by pathes */ public static Data loadData(Dataset dataset, FileSystem fs, Path[] pathes) throws IOException { - List<Instance> instances = Lists.newArrayList(); + List<Instance> instances = new ArrayList<>(); for (Path path : pathes) { Data loadedData = loadData(dataset, fs, path); @@ -150,7 +151,7 @@ public final class DataLoader { /** Loads the data from a String array */ public static Data loadData(Dataset dataset, String[] data) { - List<Instance> instances = Lists.newArrayList(); + List<Instance> instances = new ArrayList<>(); DataConverter converter = new DataConverter(dataset); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java index 856d452..3eb126c 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java @@ -18,8 +18,8 @@ package org.apache.mahout.classifier.df.data; import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import java.util.ArrayList; import java.util.List; import java.util.Random; @@ -71,7 +71,7 @@ public final class DataUtils { */ public static int maxindex(Random rng, int[] values) { int max = 0; - List<Integer> maxindices = Lists.newArrayList(); + List<Integer> maxindices = new ArrayList<>(); for (int index = 0; index < values.length; index++) { if (values[index] > max) { http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java index d2bec37..413389f 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java @@ -18,8 +18,6 @@ package org.apache.mahout.classifier.df.data; import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import com.google.common.io.Closeables; import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.conf.Configuration; @@ -32,6 +30,8 @@ import org.codehaus.jackson.type.TypeReference; import java.io.IOException; import java.nio.charset.Charset; import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedList; import java.util.List; import java.util.Locale; import java.util.Map; @@ -336,7 +336,7 @@ public class Dataset { * @return some JSON */ public String toJSON() { - List<Map<String, Object>> toWrite = Lists.newLinkedList(); + List<Map<String, Object>> toWrite = new LinkedList<>(); // attributes does not include ignored columns and it does include the class label int ignoredCount = 0; for (int i = 0; i < attributes.length + ignored.length; i++) { @@ -374,8 +374,8 @@ public class Dataset { } catch (Exception ex) { throw new RuntimeException(ex); } - List<Attribute> attributes = Lists.newLinkedList(); - List<Integer> ignored = Lists.newLinkedList(); + List<Attribute> attributes = new LinkedList<>(); + List<Integer> ignored = new LinkedList<>(); String[][] nominalValues = new String[fromJSON.size()][]; Dataset dataset = new Dataset(); for (int i = 0; i < fromJSON.size(); i++) { @@ -412,7 +412,7 @@ public class Dataset { * @return map of (AttributeTypes, Values) */ private Map<String, Object> getMap(Attribute type, String[] values, boolean isLabel) { - Map<String, Object> attribute = Maps.newHashMap(); + Map<String, Object> attribute = new HashMap<>(); attribute.put(TYPE, type.toString().toLowerCase(Locale.getDefault())); attribute.put(VALUES, values); attribute.put(LABEL, isLabel); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java index a2198b1..f2e0ce4 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java @@ -18,9 +18,9 @@ package org.apache.mahout.classifier.df.data; import com.google.common.base.Splitter; -import com.google.common.collect.Lists; import org.apache.mahout.classifier.df.data.Dataset.Attribute; +import java.util.ArrayList; import java.util.List; import java.util.Locale; @@ -40,7 +40,7 @@ public final class DescriptorUtils { * if a bad token is encountered */ public static Attribute[] parseDescriptor(CharSequence descriptor) throws DescriptorException { - List<Attribute> attributes = Lists.newArrayList(); + List<Attribute> attributes = new ArrayList<>(); for (String token : SPACE.split(descriptor)) { token = token.toUpperCase(Locale.ENGLISH); if ("I".equals(token)) { http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java index b8e5c2d..bdbaf2b 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java @@ -17,11 +17,6 @@ package org.apache.mahout.classifier.df.mapreduce; -import java.io.IOException; -import java.util.List; -import java.util.Random; - -import com.google.common.collect.Lists; import com.google.common.io.Closeables; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.filecache.DistributedCache; @@ -51,6 +46,11 @@ import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + /** * Mapreduce implementation that classifies the Input data using a previousely built decision forest */ @@ -144,7 +144,7 @@ public class Classifier { Path[] outfiles = DFUtils.listOutputFiles(fs, mappersOutputPath); // read all the output - List<double[]> resList = Lists.newArrayList(); + List<double[]> resList = new ArrayList<>(); for (Path path : outfiles) { FSDataOutputStream ofile = null; try { http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java index 573a1e0..4c33e73 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java @@ -17,8 +17,12 @@ package org.apache.mahout.classifier.df.mapreduce.inmem; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.filecache.DistributedCache; import org.apache.hadoop.fs.FileSystem; @@ -36,10 +40,6 @@ import org.apache.mahout.classifier.df.node.Node; import org.apache.mahout.common.Pair; import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable; -import java.io.IOException; -import java.util.List; -import java.util.Map; - /** * MapReduce implementation where each mapper loads a full copy of the data in-memory. The forest trees are * splitted across all the mappers @@ -80,7 +80,7 @@ public class InMemBuilder extends Builder { protected DecisionForest parseOutput(Job job) throws IOException { Configuration conf = job.getConfiguration(); - Map<Integer,MapredOutput> output = Maps.newHashMap(); + Map<Integer,MapredOutput> output = new HashMap<>(); Path outputPath = getOutputPath(conf); FileSystem fs = outputPath.getFileSystem(conf); @@ -101,7 +101,7 @@ public class InMemBuilder extends Builder { * Process the output, extracting the trees */ private static DecisionForest processOutput(Map<Integer,MapredOutput> output) { - List<Node> trees = Lists.newArrayList(); + List<Node> trees = new ArrayList<>(); for (Map.Entry<Integer,MapredOutput> entry : output.entrySet()) { MapredOutput value = entry.getValue(); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java index a39218e..51e5a3e 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java @@ -17,8 +17,15 @@ package org.apache.mahout.classifier.df.mapreduce.inmem; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.Random; + import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; @@ -33,13 +40,6 @@ import org.apache.mahout.common.RandomUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.List; -import java.util.Locale; -import java.util.Random; - /** * Custom InputFormat that generates InputSplits given the desired number of trees.<br> * each input split contains a subset of the trees.<br> @@ -94,7 +94,7 @@ public class InMemInputFormat extends InputFormat<IntWritable,NullWritable> { int id = 0; - List<InputSplit> splits = Lists.newArrayListWithCapacity(numSplits); + List<InputSplit> splits = new ArrayList<>(numSplits); for (int index = 0; index < numSplits - 1; index++) { splits.add(new InMemInputSplit(id, splitSize, nextSeed())); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java index eaf0b15..648472c 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java @@ -18,7 +18,6 @@ package org.apache.mahout.classifier.df.mapreduce.partial; import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; @@ -35,6 +34,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Random; @@ -61,7 +61,7 @@ public class Step1Mapper extends MapredMapper<LongWritable,Text,TreeID,MapredOut private int partition; /** will contain all instances if this mapper's split */ - private final List<Instance> instances = Lists.newArrayList(); + private final List<Instance> instances = new ArrayList<>(); public int getFirstTreeId() { return firstTreeId; http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java index 292b591..d7f023b 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java @@ -17,7 +17,6 @@ package org.apache.mahout.classifier.df.ref; -import com.google.common.collect.Lists; import org.apache.mahout.classifier.df.Bagging; import org.apache.mahout.classifier.df.DecisionForest; import org.apache.mahout.classifier.df.builder.TreeBuilder; @@ -26,6 +25,7 @@ import org.apache.mahout.classifier.df.node.Node; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.ArrayList; import java.util.List; import java.util.Random; @@ -56,7 +56,7 @@ public class SequentialBuilder { } public DecisionForest build(int nbTrees) { - List<Node> trees = Lists.newArrayList(); + List<Node> trees = new ArrayList<>(); for (int treeId = 0; treeId < nbTrees; treeId++) { trees.add(bagging.build(rng)); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java b/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java index 58814a8..226d3db 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/tools/Describe.java @@ -17,7 +17,11 @@ package org.apache.mahout.classifier.df.tools; -import com.google.common.collect.Lists; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; @@ -38,10 +42,6 @@ import org.apache.mahout.common.CommandLineUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.Collection; -import java.util.List; - /** * Generates a file descriptor for a given dataset */ @@ -138,7 +138,7 @@ public final class Describe { } private static List<String> convert(Collection<?> values) { - List<String> list = Lists.newArrayListWithCapacity(values.size()); + List<String> list = new ArrayList<>(values.size()); for (Object value : values) { list.add(value.toString()); } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java b/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java index 056bd48..f4e765c 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java +++ b/mr/src/main/java/org/apache/mahout/classifier/mlp/NeuralNetwork.java @@ -19,10 +19,12 @@ package org.apache.mahout.classifier.mlp; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; +import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -41,10 +43,6 @@ import org.apache.mahout.math.function.DoubleFunction; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.io.Closeables; - /** * AbstractNeuralNetwork defines the general operations for a neural network * based model. Typically, all derivative models such as Multilayer Perceptron @@ -63,7 +61,7 @@ public abstract class NeuralNetwork { /* The default momentum weight */ public static final double DEFAULT_MOMENTUM_WEIGHT = 0.1; - public static enum TrainingMethod { GRADIENT_DESCENT } + public enum TrainingMethod { GRADIENT_DESCENT } /* The name of the model */ protected String modelType; @@ -113,11 +111,11 @@ public abstract class NeuralNetwork { costFunctionName = "Minus_Squared"; modelType = getClass().getSimpleName(); - layerSizeList = Lists.newArrayList(); - layerSizeList = Lists.newArrayList(); - weightMatrixList = Lists.newArrayList(); - prevWeightUpdatesList = Lists.newArrayList(); - squashingFunctionList = Lists.newArrayList(); + layerSizeList = new ArrayList<>(); + layerSizeList = new ArrayList<>(); + weightMatrixList = new ArrayList<>(); + prevWeightUpdatesList = new ArrayList<>(); + squashingFunctionList = new ArrayList<>(); } /** @@ -350,7 +348,7 @@ public abstract class NeuralNetwork { * existing matrices. */ public void setWeightMatrices(Matrix[] matrices) { - weightMatrixList = Lists.newArrayList(); + weightMatrixList = new ArrayList<>(); Collections.addAll(weightMatrixList, matrices); } @@ -411,7 +409,7 @@ public abstract class NeuralNetwork { * @return Cached output of each layer. */ protected List<Vector> getOutputInternal(Vector instance) { - List<Vector> outputCache = Lists.newArrayList(); + List<Vector> outputCache = new ArrayList<>(); // fill with instance Vector intermediateOutput = instance; outputCache.add(intermediateOutput); @@ -592,14 +590,10 @@ public abstract class NeuralNetwork { protected void readFromModel() throws IOException { log.info("Load model from {}", modelPath); Preconditions.checkArgument(modelPath != null, "Model path has not been set."); - FSDataInputStream is = null; - try { - Path path = new Path(modelPath); - FileSystem fs = path.getFileSystem(new Configuration()); - is = new FSDataInputStream(fs.open(path)); + Path path = new Path(modelPath); + FileSystem fs = path.getFileSystem(new Configuration()); + try (FSDataInputStream is = new FSDataInputStream(fs.open(path))) { readFields(is); - } finally { - Closeables.close(is, true); } } @@ -611,14 +605,10 @@ public abstract class NeuralNetwork { public void writeModelToFile() throws IOException { log.info("Write model to {}.", modelPath); Preconditions.checkArgument(modelPath != null, "Model path has not been set."); - FSDataOutputStream stream = null; - try { - Path path = new Path(modelPath); - FileSystem fs = path.getFileSystem(new Configuration()); - stream = fs.create(path, true); + Path path = new Path(modelPath); + FileSystem fs = path.getFileSystem(new Configuration()); + try (FSDataOutputStream stream = fs.create(path, true)) { write(stream); - } finally { - Closeables.close(stream, false); } } @@ -717,7 +707,7 @@ public abstract class NeuralNetwork { // Read layer size list int numLayers = input.readInt(); - layerSizeList = Lists.newArrayList(); + layerSizeList = new ArrayList<>(); for (int i = 0; i < numLayers; i++) { layerSizeList.add(input.readInt()); } @@ -726,15 +716,15 @@ public abstract class NeuralNetwork { // Read squash functions int squashingFunctionSize = input.readInt(); - squashingFunctionList = Lists.newArrayList(); + squashingFunctionList = new ArrayList<>(); for (int i = 0; i < squashingFunctionSize; i++) { squashingFunctionList.add(WritableUtils.readString(input)); } // Read weights and construct matrices of previous updates int numOfMatrices = input.readInt(); - weightMatrixList = Lists.newArrayList(); - prevWeightUpdatesList = Lists.newArrayList(); + weightMatrixList = new ArrayList<>(); + prevWeightUpdatesList = new ArrayList<>(); for (int i = 0; i < numOfMatrices; i++) { Matrix matrix = MatrixWritable.readMatrix(input); weightMatrixList.add(matrix); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java b/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java index 6130530..270ea43 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java +++ b/mr/src/main/java/org/apache/mahout/classifier/mlp/RunMultilayerPerceptron.java @@ -22,6 +22,7 @@ import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.InputStreamReader; import java.io.OutputStreamWriter; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -41,9 +42,6 @@ import org.apache.mahout.math.Vector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.collect.Lists; -import com.google.common.io.Closeables; - /** Run {@link MultilayerPerceptron} classification. * @deprecated as of as of 0.10.0. * */ @@ -61,11 +59,11 @@ public class RunMultilayerPerceptron { int columnEnd; boolean skipHeader; } - + public static void main(String[] args) throws Exception { - + Parameters parameters = new Parameters(); - + if (parseArgs(args, parameters)) { log.info("Load model from {}.", parameters.modelFilePathStr); MultilayerPerceptron mlp = new MultilayerPerceptron(parameters.modelFilePathStr); @@ -98,15 +96,10 @@ public class RunMultilayerPerceptron { log.info("Read from column {} to column {}.", parameters.columnStart, parameters.columnEnd); - BufferedWriter writer = null; - BufferedReader reader = null; - try { - writer = new BufferedWriter(new OutputStreamWriter(outputFS.create(outputFilePath))); - reader = new BufferedReader(new InputStreamReader(inputFS.open(inputFilePath))); - + try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outputFS.create(outputFilePath))); + BufferedReader reader = new BufferedReader(new InputStreamReader(inputFS.open(inputFilePath)))) { String line; - if (parameters.skipHeader) { reader.readLine(); } @@ -125,9 +118,6 @@ public class RunMultilayerPerceptron { } mlp.close(); log.info("Labeling finished."); - } finally { - Closeables.close(reader, true); - Closeables.close(writer, true); } } } @@ -154,7 +144,7 @@ public class RunMultilayerPerceptron { .withDescription("type of input file, currently support 'csv'") .create(); - List<Integer> columnRangeDefault = Lists.newArrayList(); + List<Integer> columnRangeDefault = new ArrayList<>(); columnRangeDefault.add(0); columnRangeDefault.add(Integer.MAX_VALUE); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java b/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java index a194c4c..d634aa5 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java +++ b/mr/src/main/java/org/apache/mahout/classifier/mlp/TrainMultilayerPerceptron.java @@ -19,9 +19,12 @@ package org.apache.mahout.classifier.mlp; import java.io.BufferedReader; import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; +import com.google.common.base.Preconditions; import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; @@ -38,11 +41,6 @@ import org.apache.mahout.math.Vector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.io.Closeables; - /** Train a {@link MultilayerPerceptron}. * @deprecated as of as of 0.10.0. * */ @@ -50,7 +48,7 @@ import com.google.common.io.Closeables; public final class TrainMultilayerPerceptron { private static final Logger log = LoggerFactory.getLogger(TrainMultilayerPerceptron.class); - + /** The parameters used by MLP. */ static class Parameters { double learningRate; @@ -59,31 +57,17 @@ public final class TrainMultilayerPerceptron { String inputFilePath; boolean skipHeader; - Map<String, Integer> labelsIndex = Maps.newHashMap(); + Map<String, Integer> labelsIndex = new HashMap<>(); String modelFilePath; boolean updateModel; - List<Integer> layerSizeList = Lists.newArrayList(); + List<Integer> layerSizeList = new ArrayList<>(); String squashingFunctionName; } - /* - private double learningRate; - private double momemtumWeight; - private double regularizationWeight; - - private String inputFilePath; - private boolean skipHeader; - private Map<String, Integer> labelsIndex = Maps.newHashMap(); - - private String modelFilePath; - private boolean updateModel; - private List<Integer> layerSizeList = Lists.newArrayList(); - private String squashingFunctionName;*/ - public static void main(String[] args) throws Exception { Parameters parameters = new Parameters(); - + if (parseArgs(args, parameters)) { log.info("Validate model..."); // check whether the model already exists @@ -109,31 +93,28 @@ public final class TrainMultilayerPerceptron { } mlp.setCostFunction("Minus_Squared"); mlp.setLearningRate(parameters.learningRate) - .setMomentumWeight(parameters.momemtumWeight) - .setRegularizationWeight(parameters.regularizationWeight); + .setMomentumWeight(parameters.momemtumWeight) + .setRegularizationWeight(parameters.regularizationWeight); } mlp.setModelPath(parameters.modelFilePath); } // set the parameters mlp.setLearningRate(parameters.learningRate) - .setMomentumWeight(parameters.momemtumWeight) - .setRegularizationWeight(parameters.regularizationWeight); + .setMomentumWeight(parameters.momemtumWeight) + .setRegularizationWeight(parameters.regularizationWeight); // train by the training data Path trainingDataPath = new Path(parameters.inputFilePath); FileSystem dataFs = trainingDataPath.getFileSystem(new Configuration()); Preconditions.checkArgument(dataFs.exists(trainingDataPath), "Training dataset %s cannot be found!", - parameters.inputFilePath); + parameters.inputFilePath); log.info("Read data and train model..."); - BufferedReader reader = null; - try { - reader = new BufferedReader(new InputStreamReader(dataFs.open(trainingDataPath))); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(dataFs.open(trainingDataPath)))) { String line; - // read training data line by line if (parameters.skipHeader) { reader.readLine(); @@ -163,15 +144,13 @@ public final class TrainMultilayerPerceptron { log.info("Write trained model to {}", parameters.modelFilePath); mlp.writeModelToFile(); mlp.close(); - } finally { - Closeables.close(reader, true); } } } /** * Parse the input arguments. - * + * * @param args The input arguments * @param parameters The parameters parsed. * @return Whether the input arguments are valid. @@ -196,7 +175,7 @@ public final class TrainMultilayerPerceptron { .withRequired(true) .withChildren(skipHeaderGroup) .withArgument(argumentBuilder.withName("path").withMinimum(1).withMaximum(1) - .create()).withDescription("the file path of training dataset") + .create()).withDescription("the file path of training dataset") .create(); Option labelsOption = optionBuilder @@ -295,9 +274,9 @@ public final class TrainMultilayerPerceptron { parameters.squashingFunctionName = getString(commandLine, squashingFunctionOption); System.out.printf("Input: %s, Model: %s, Update: %s, Layer size: %s, Squashing function: %s, Learning rate: %f," + - " Momemtum weight: %f, Regularization Weight: %f\n", parameters.inputFilePath, parameters.modelFilePath, - parameters.updateModel, Arrays.toString(parameters.layerSizeList.toArray()), - parameters.squashingFunctionName, parameters.learningRate, parameters.momemtumWeight, + " Momemtum weight: %f, Regularization Weight: %f\n", parameters.inputFilePath, parameters.modelFilePath, + parameters.updateModel, Arrays.toString(parameters.layerSizeList.toArray()), + parameters.squashingFunctionName, parameters.learningRate, parameters.momemtumWeight, parameters.regularizationWeight); return true; @@ -321,7 +300,7 @@ public final class TrainMultilayerPerceptron { static List<Integer> getIntegerList(CommandLine commandLine, Option option) { List<String> list = commandLine.getValues(option); - List<Integer> valList = Lists.newArrayList(); + List<Integer> valList = new ArrayList<>(); for (String str : list) { valList.add(Integer.parseInt(str)); } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java index 1e5171c..c09dd83 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java +++ b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java @@ -20,9 +20,11 @@ package org.apache.mahout.classifier.naivebayes; import java.io.IOException; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; import java.util.regex.Pattern; +import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -44,11 +46,6 @@ import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; import org.apache.mahout.math.map.OpenObjectIntHashMap; -import com.google.common.base.Preconditions; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; -import com.google.common.io.Closeables; - public final class BayesUtils { private static final Pattern SLASH = Pattern.compile("/"); @@ -104,14 +101,11 @@ public final class BayesUtils { public static int writeLabelIndex(Configuration conf, Iterable<String> labels, Path indexPath) throws IOException { FileSystem fs = FileSystem.get(indexPath.toUri(), conf); - SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, indexPath, Text.class, IntWritable.class); int i = 0; - try { + try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, indexPath, Text.class, IntWritable.class)) { for (String label : labels) { writer.append(new Text(label), new IntWritable(i++)); } - } finally { - Closeables.close(writer, false); } return i; } @@ -119,10 +113,9 @@ public final class BayesUtils { public static int writeLabelIndex(Configuration conf, Path indexPath, Iterable<Pair<Text,IntWritable>> labels) throws IOException { FileSystem fs = FileSystem.get(indexPath.toUri(), conf); - SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, indexPath, Text.class, IntWritable.class); - Collection<String> seen = Sets.newHashSet(); + Collection<String> seen = new HashSet<>(); int i = 0; - try { + try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, indexPath, Text.class, IntWritable.class)){ for (Object label : labels) { String theLabel = SLASH.split(((Pair<?, ?>) label).getFirst().toString())[1]; if (!seen.contains(theLabel)) { @@ -130,8 +123,6 @@ public final class BayesUtils { seen.add(theLabel); } } - } finally { - Closeables.close(writer, false); } return i; } @@ -154,7 +145,7 @@ public final class BayesUtils { } public static Map<String,Vector> readScoresFromCache(Configuration conf) throws IOException { - Map<String,Vector> sumVectors = Maps.newHashMap(); + Map<String,Vector> sumVectors = new HashMap<>(); for (Pair<Text,VectorWritable> entry : new SequenceFileDirIterable<Text,VectorWritable>(HadoopUtil.getSingleCachedFile(conf), PathType.LIST, PathFilters.partFilter(), conf)) { http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java index f180e8b..9f85aab 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java +++ b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java @@ -31,7 +31,6 @@ import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; import com.google.common.base.Preconditions; -import com.google.common.io.Closeables; /** NaiveBayesModel holds the weight matrix, the feature and label sums and the weight normalizer vectors.*/ public class NaiveBayesModel { @@ -102,15 +101,14 @@ public class NaiveBayesModel { public static NaiveBayesModel materialize(Path output, Configuration conf) throws IOException { FileSystem fs = output.getFileSystem(conf); - Vector weightsPerLabel = null; + Vector weightsPerLabel; Vector perLabelThetaNormalizer = null; - Vector weightsPerFeature = null; + Vector weightsPerFeature; Matrix weightsPerLabelAndFeature; float alphaI; boolean isComplementary; - FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin")); - try { + try (FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin"))) { alphaI = in.readFloat(); isComplementary = in.readBoolean(); weightsPerFeature = VectorWritable.readVector(in); @@ -122,9 +120,8 @@ public class NaiveBayesModel { for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) { weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in)); } - } finally { - Closeables.close(in, true); } + NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel, perLabelThetaNormalizer, alphaI, isComplementary); model.validate(); @@ -133,8 +130,7 @@ public class NaiveBayesModel { public void serialize(Path output, Configuration conf) throws IOException { FileSystem fs = output.getFileSystem(conf); - FSDataOutputStream out = fs.create(new Path(output, "naiveBayesModel.bin")); - try { + try (FSDataOutputStream out = fs.create(new Path(output, "naiveBayesModel.bin"))) { out.writeFloat(alphaI); out.writeBoolean(isComplementary); VectorWritable.writeVector(out, weightsPerFeature); @@ -145,8 +141,6 @@ public class NaiveBayesModel { for (int row = 0; row < weightsPerLabelAndFeature.numRows(); row++) { VectorWritable.writeVector(out, weightsPerLabelAndFeature.viewRow(row)); } - } finally { - Closeables.close(out, false); } } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java index 8fd422f..d9eedcf 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java +++ b/mr/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java @@ -17,13 +17,12 @@ package org.apache.mahout.classifier.naivebayes.test; -import com.google.common.base.Preconditions; import java.io.IOException; import java.util.List; import java.util.Map; import java.util.regex.Pattern; -import com.google.common.io.Closeables; +import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -127,10 +126,10 @@ public class TestNaiveBayesDriver extends AbstractJob { } else { classifier = new StandardNaiveBayesClassifier(model); } - SequenceFile.Writer writer = SequenceFile.createWriter(fs, getConf(), new Path(getOutputPath(), "part-r-00000"), - Text.class, VectorWritable.class); - try { + try (SequenceFile.Writer writer = + SequenceFile.createWriter(fs, getConf(), new Path(getOutputPath(), "part-r-00000"), + Text.class, VectorWritable.class)) { SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<>(getInputPath(), PathType.LIST, PathFilters.partFilter(), getConf()); // loop through the part-r-* files in getInputPath() and get classification scores for all entries @@ -138,8 +137,6 @@ public class TestNaiveBayesDriver extends AbstractJob { writer.append(new Text(SLASH.split(pair.getFirst().toString())[1]), new VectorWritable(classifier.classifyFull(pair.getSecond().get()))); } - } finally { - Closeables.close(writer, false); } } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java index 942a101..6d4e2b0 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java +++ b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java @@ -21,12 +21,11 @@ import java.io.DataOutputStream; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; +import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Scanner; -import com.google.common.collect.Lists; -import com.google.common.io.Closeables; import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; @@ -95,7 +94,7 @@ public final class BaumWelchTrainer { //constructing random-generated HMM HmmModel model = new HmmModel(nrOfHiddenStates, nrOfObservedStates, new Date().getTime()); - List<Integer> observations = Lists.newArrayList(); + List<Integer> observations = new ArrayList<>(); //reading observations try (Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8")) { @@ -114,11 +113,8 @@ public final class BaumWelchTrainer { observationsArray, epsilon, maxIterations, true); //serializing trained model - DataOutputStream stream = new DataOutputStream(new FileOutputStream(output)); - try { + try (DataOutputStream stream = new DataOutputStream(new FileOutputStream(output))){ LossyHmmSerializer.serialize(trainedModel, stream); - } finally { - Closeables.close(stream, false); } //printing tranied model http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java index 521be09..e710816 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java +++ b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/HmmUtils.java @@ -17,11 +17,12 @@ package org.apache.mahout.classifier.sequencelearning.hmm; +import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; -import com.google.common.collect.Lists; +import com.google.common.base.Preconditions; import org.apache.mahout.math.DenseMatrix; import org.apache.mahout.math.DenseVector; import org.apache.mahout.math.Matrix; @@ -29,8 +30,6 @@ import org.apache.mahout.math.RandomAccessSparseVector; import org.apache.mahout.math.SparseMatrix; import org.apache.mahout.math.Vector; -import com.google.common.base.Preconditions; - /** * A collection of utilities for handling HMMModel objects. */ @@ -257,7 +256,7 @@ public final class HmmUtils { int[] sequence, boolean observed, String defaultValue) { - List<String> decoded = Lists.newArrayListWithCapacity(sequence.length); + List<String> decoded = new ArrayList<>(sequence.length); for (int position : sequence) { String nextState; if (observed) { http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java index cd2ced1..02baef1 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java +++ b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java @@ -25,8 +25,6 @@ import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; -import com.google.common.base.Charsets; -import com.google.common.io.Closeables; import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; @@ -35,6 +33,7 @@ import org.apache.commons.cli2.builder.ArgumentBuilder; import org.apache.commons.cli2.builder.DefaultOptionBuilder; import org.apache.commons.cli2.builder.GroupBuilder; import org.apache.commons.cli2.commandline.Parser; +import org.apache.commons.io.Charsets; import org.apache.mahout.common.CommandLineUtil; /** @@ -80,26 +79,21 @@ public final class RandomSequenceGenerator { int length = Integer.parseInt((String) commandLine.getValue(lengthOption)); //reading serialized HMM - DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath)); HmmModel model; - try { + try (DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath))){ model = LossyHmmSerializer.deserialize(modelStream); - } finally { - Closeables.close(modelStream, true); } //generating observations int[] observations = HmmEvaluator.predict(model, length, System.currentTimeMillis()); //writing output - PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true); - try { + try (PrintWriter writer = + new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true)){ for (int observation : observations) { writer.print(observation); writer.print(' '); } - } finally { - Closeables.close(writer, false); } } catch (OptionException e) { CommandLineUtil.printHelp(optionGroup); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java index fb64385..317237d 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java +++ b/mr/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java @@ -23,12 +23,10 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; +import java.util.ArrayList; import java.util.List; import java.util.Scanner; -import com.google.common.base.Charsets; -import com.google.common.collect.Lists; -import com.google.common.io.Closeables; import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; @@ -37,6 +35,7 @@ import org.apache.commons.cli2.builder.ArgumentBuilder; import org.apache.commons.cli2.builder.DefaultOptionBuilder; import org.apache.commons.cli2.builder.GroupBuilder; import org.apache.commons.cli2.commandline.Parser; +import org.apache.commons.io.Charsets; import org.apache.mahout.common.CommandLineUtil; import org.apache.mahout.common.commandline.DefaultOptionCreator; @@ -82,16 +81,14 @@ public final class ViterbiEvaluator { boolean computeLikelihood = commandLine.hasOption(likelihoodOption); //reading serialized HMM - DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath)); + ; HmmModel model; - try { + try (DataInputStream modelStream = new DataInputStream(new FileInputStream(modelPath))) { model = LossyHmmSerializer.deserialize(modelStream); - } finally { - Closeables.close(modelStream, true); } //reading observations - List<Integer> observations = Lists.newArrayList(); + List<Integer> observations = new ArrayList<>(); try (Scanner scanner = new Scanner(new FileInputStream(input), "UTF-8")) { while (scanner.hasNextInt()) { observations.add(scanner.nextInt()); @@ -107,14 +104,12 @@ public final class ViterbiEvaluator { int[] hiddenStates = HmmEvaluator.decode(model, observationsArray, true); //writing output - PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true); - try { + try (PrintWriter writer = + new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), Charsets.UTF_8), true)) { for (int hiddenState : hiddenStates) { writer.print(hiddenState); writer.print(' '); } - } finally { - Closeables.close(writer, false); } if (computeLikelihood) { http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java index d00b021..24e5798 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java +++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java @@ -17,7 +17,6 @@ package org.apache.mahout.classifier.sgd; -import com.google.common.collect.Lists; import org.apache.hadoop.io.Writable; import org.apache.mahout.classifier.OnlineLearner; import org.apache.mahout.ep.EvolutionaryProcess; @@ -33,6 +32,7 @@ import org.slf4j.LoggerFactory; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.concurrent.ExecutionException; @@ -79,7 +79,7 @@ public class AdaptiveLogisticRegression implements OnlineLearner, Writable { private int currentStep = 1000; private int bufferSize = 1000; - private List<TrainingExample> buffer = Lists.newArrayList(); + private List<TrainingExample> buffer = new ArrayList<>(); private EvolutionaryProcess<Wrapper, CrossFoldLearner> ep; private State<Wrapper, CrossFoldLearner> best; private int threadCount = DEFAULT_THREAD_COUNT; @@ -118,7 +118,7 @@ public class AdaptiveLogisticRegression implements OnlineLearner, Writable { this.numFeatures = numFeatures; this.threadCount = threadCount; this.poolSize = poolSize; - seed = new State<Wrapper, CrossFoldLearner>(new double[2], 10); + seed = new State<>(new double[2], 10); Wrapper w = new Wrapper(numCategories, numFeatures, prior); seed.setPayload(w); @@ -284,7 +284,7 @@ public class AdaptiveLogisticRegression implements OnlineLearner, Writable { } private void setupOptimizer(int poolSize) { - ep = new EvolutionaryProcess<Wrapper, CrossFoldLearner>(threadCount, poolSize, seed); + ep = new EvolutionaryProcess<>(threadCount, poolSize, seed); } /** @@ -561,22 +561,22 @@ public class AdaptiveLogisticRegression implements OnlineLearner, Writable { bufferSize = in.readInt(); int n = in.readInt(); - buffer = Lists.newArrayList(); + buffer = new ArrayList<>(); for (int i = 0; i < n; i++) { TrainingExample example = new TrainingExample(); example.readFields(in); buffer.add(example); } - ep = new EvolutionaryProcess<Wrapper, CrossFoldLearner>(); + ep = new EvolutionaryProcess<>(); ep.readFields(in); - best = new State<Wrapper, CrossFoldLearner>(); + best = new State<>(); best.readFields(in); threadCount = in.readInt(); poolSize = in.readInt(); - seed = new State<Wrapper, CrossFoldLearner>(); + seed = new State<>(); seed.readFields(in); numFeatures = in.readInt(); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java index 36bcae0..f56814b 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java +++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/CrossFoldLearner.java @@ -17,7 +17,6 @@ package org.apache.mahout.classifier.sgd; -import com.google.common.collect.Lists; import org.apache.hadoop.io.Writable; import org.apache.mahout.classifier.AbstractVectorClassifier; import org.apache.mahout.classifier.OnlineLearner; @@ -31,6 +30,7 @@ import org.apache.mahout.math.stats.OnlineAuc; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.ArrayList; import java.util.List; /** @@ -47,7 +47,7 @@ public class CrossFoldLearner extends AbstractVectorClassifier implements Online private static final double MIN_SCORE = 1.0e-50; private OnlineAuc auc = new GlobalOnlineAuc(); private double logLikelihood; - private final List<OnlineLogisticRegression> models = Lists.newArrayList(); + private final List<OnlineLogisticRegression> models = new ArrayList<>(); // lambda, learningRate, perTermOffset, perTermExponent private double[] parameters = new double[4]; http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java index b21860f..dbf3198 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java +++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java @@ -22,7 +22,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Collections2; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import org.apache.commons.csv.CSVUtils; import org.apache.mahout.math.Vector; @@ -36,11 +35,14 @@ import org.apache.mahout.vectorizer.encoders.TextValueEncoder; import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeMap; /** * Converts CSV data lines to vectors. @@ -81,7 +83,7 @@ public class CsvRecordFactory implements RecordFactory { .put("t", TextValueEncoder.class) .build(); - private final Map<String, Set<Integer>> traceDictionary = Maps.newTreeMap(); + private final Map<String, Set<Integer>> traceDictionary = new TreeMap<>(); private int target; private final Dictionary targetDictionary; @@ -113,7 +115,7 @@ public class CsvRecordFactory implements RecordFactory { return Arrays.asList(CSVUtils.parseLine(line)); } catch (IOException e) { - List<String> list = Lists.newArrayList(); + List<String> list = new ArrayList<>(); list.add(line); return list; } @@ -186,7 +188,7 @@ public class CsvRecordFactory implements RecordFactory { @Override public void firstLine(String line) { // read variable names, build map of name -> column - final Map<String, Integer> vars = Maps.newHashMap(); + final Map<String, Integer> vars = new HashMap<>(); variableNames = parseCsvLine(line); int column = 0; for (String var : variableNames) { @@ -202,7 +204,7 @@ public class CsvRecordFactory implements RecordFactory { } // create list of predictor column numbers - predictors = Lists.newArrayList(Collections2.transform(typeMap.keySet(), new Function<String, Integer>() { + predictors = new ArrayList<>(Collections2.transform(typeMap.keySet(), new Function<String, Integer>() { @Override public Integer apply(String from) { Integer r = vars.get(from); @@ -217,7 +219,7 @@ public class CsvRecordFactory implements RecordFactory { Collections.sort(predictors); // and map from column number to type encoder for each column that is a predictor - predictorEncoders = Maps.newHashMap(); + predictorEncoders = new HashMap<>(); for (Integer predictor : predictors) { String name; Class<? extends FeatureVectorEncoder> c; http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java index d158f4d..90ef7a8 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java +++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/GradientMachine.java @@ -17,7 +17,6 @@ package org.apache.mahout.classifier.sgd; -import com.google.common.collect.Sets; import org.apache.hadoop.io.Writable; import org.apache.mahout.classifier.AbstractVectorClassifier; import org.apache.mahout.classifier.OnlineLearner; @@ -31,6 +30,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.Collection; +import java.util.HashSet; import java.util.Random; /** @@ -387,7 +387,7 @@ public class GradientMachine extends AbstractVectorClassifier implements OnlineL public void train(long trackingKey, String groupKey, int actual, Vector instance) { Vector hiddenActivation = inputToHidden(instance); hiddenToOutput(hiddenActivation); - Collection<Integer> goodLabels = Sets.newHashSet(); + Collection<Integer> goodLabels = new HashSet<>(); goodLabels.add(actual); updateRanking(hiddenActivation, goodLabels, 2, rnd); } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java index ebb0614..bcd2ebc 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java +++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelDissector.java @@ -17,14 +17,14 @@ package org.apache.mahout.classifier.sgd; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import com.google.common.collect.Ordering; import org.apache.mahout.classifier.AbstractVectorClassifier; import org.apache.mahout.common.RandomUtils; import org.apache.mahout.math.Vector; +import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.PriorityQueue; @@ -55,7 +55,7 @@ public class ModelDissector { private final Map<String,Vector> weightMap; public ModelDissector() { - weightMap = Maps.newHashMap(); + weightMap = new HashMap<>(); } /** @@ -105,14 +105,14 @@ public class ModelDissector { * @return A list of the top variables. */ public List<Weight> summary(int n) { - Queue<Weight> pq = new PriorityQueue<Weight>(); + Queue<Weight> pq = new PriorityQueue<>(); for (Map.Entry<String, Vector> entry : weightMap.entrySet()) { pq.add(new Weight(entry.getKey(), entry.getValue())); while (pq.size() > n) { pq.poll(); } } - List<Weight> r = Lists.newArrayList(pq); + List<Weight> r = new ArrayList<>(pq); Collections.sort(r, Ordering.natural().reverse()); return r; } @@ -170,14 +170,14 @@ public class ModelDissector { public Weight(String feature, Vector weights, int n) { this.feature = feature; // pick out the weight with the largest abs value, but don't forget the sign - Queue<Category> biggest = new PriorityQueue<Category>(n + 1, Ordering.natural()); + Queue<Category> biggest = new PriorityQueue<>(n + 1, Ordering.natural()); for (Vector.Element element : weights.all()) { biggest.add(new Category(element.index(), element.get())); while (biggest.size() > n) { biggest.poll(); } } - categories = Lists.newArrayList(biggest); + categories = new ArrayList<>(biggest); Collections.sort(categories, Ordering.natural().reverse()); value = categories.get(0).weight; maxIndex = categories.get(0).index; http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java index f0150e9..f89b245 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java +++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java @@ -37,29 +37,20 @@ public final class ModelSerializer { } public static void writeBinary(String path, CrossFoldLearner model) throws IOException { - DataOutputStream out = new DataOutputStream(new FileOutputStream(path)); - try { + try (DataOutputStream out = new DataOutputStream(new FileOutputStream(path))) { PolymorphicWritable.write(out, model); - } finally { - Closeables.close(out, false); } } public static void writeBinary(String path, OnlineLogisticRegression model) throws IOException { - DataOutputStream out = new DataOutputStream(new FileOutputStream(path)); - try { + try (DataOutputStream out = new DataOutputStream(new FileOutputStream(path))) { PolymorphicWritable.write(out, model); - } finally { - Closeables.close(out, false); } } public static void writeBinary(String path, AdaptiveLogisticRegression model) throws IOException { - DataOutputStream out = new DataOutputStream(new FileOutputStream(path)); - try { + try (DataOutputStream out = new DataOutputStream(new FileOutputStream(path))){ PolymorphicWritable.write(out, model); - } finally { - Closeables.close(out, false); } } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java b/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java index b52cb8c..a04fc8b 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java +++ b/mr/src/main/java/org/apache/mahout/classifier/sgd/RankingGradient.java @@ -17,12 +17,12 @@ package org.apache.mahout.classifier.sgd; -import com.google.common.collect.Lists; import org.apache.mahout.classifier.AbstractVectorClassifier; import org.apache.mahout.math.Vector; import org.apache.mahout.math.function.Functions; import java.util.ArrayDeque; +import java.util.ArrayList; import java.util.Deque; import java.util.List; @@ -40,7 +40,7 @@ public class RankingGradient implements Gradient { private int window = 10; - private final List<Deque<Vector>> history = Lists.newArrayList(); + private final List<Deque<Vector>> history = new ArrayList<>(); public RankingGradient(int window) { this.window = window; http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java b/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java index cc05beb..86fa011 100644 --- a/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java +++ b/mr/src/main/java/org/apache/mahout/clustering/AbstractCluster.java @@ -22,12 +22,11 @@ import java.io.DataOutput; import java.io.IOException; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.HashMap; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import org.apache.hadoop.conf.Configuration; import org.apache.mahout.common.parameters.Parameter; import org.apache.mahout.math.RandomAccessSparseVector; @@ -359,7 +358,7 @@ public abstract class AbstractCluster implements Cluster { // we assume sequential access in the output Vector provider = v.isSequentialAccess() ? v : new SequentialAccessSparseVector(v); - List<Object> terms = Lists.newLinkedList(); + List<Object> terms = new LinkedList<>(); String term = ""; for (Element elem : provider.nonZeroes()) { @@ -370,7 +369,7 @@ public abstract class AbstractCluster implements Cluster { term = String.valueOf(elem.index()); } - Map<String, Object> term_entry = Maps.newHashMap(); + Map<String, Object> term_entry = new HashMap<>(); double roundedWeight = (double) Math.round(elem.get() * 1000) / 1000; if (hasBindings || isSparse) { term_entry.put(term, roundedWeight); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java b/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java index 421ffcf..ad0f8ec 100644 --- a/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java +++ b/mr/src/main/java/org/apache/mahout/clustering/ClusteringUtils.java @@ -17,6 +17,7 @@ package org.apache.mahout.clustering; +import java.util.ArrayList; import java.util.List; import com.google.common.base.Preconditions; @@ -52,7 +53,7 @@ public final class ClusteringUtils { DistanceMeasure distanceMeasure) { UpdatableSearcher searcher = new ProjectionSearch(distanceMeasure, 3, 1); searcher.addAll(centroids); - List<OnlineSummarizer> summarizers = Lists.newArrayList(); + List<OnlineSummarizer> summarizers = new ArrayList<>(); if (searcher.size() == 0) { return summarizers; } http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java b/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java index 6e2c3cf..384e294 100644 --- a/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java +++ b/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java @@ -18,12 +18,12 @@ package org.apache.mahout.clustering.classify; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -159,7 +159,7 @@ public final class ClusterClassificationDriver extends AbstractJob { * @throws IOException */ private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException { - List<Cluster> clusterModels = Lists.newArrayList(); + List<Cluster> clusterModels = new ArrayList<>(); Path finalClustersPath = finalClustersPath(conf, clusterOutputPath); Iterator<?> it = new SequenceFileDirValueIterator<Writable>(finalClustersPath, PathType.LIST, PathFilters.partFilter(), null, false, conf); @@ -225,7 +225,7 @@ public final class ClusterClassificationDriver extends AbstractJob { private static void classifyAndWrite(List<Cluster> clusterModels, Double clusterClassificationThreshold, boolean emitMostLikely, SequenceFile.Writer writer, VectorWritable vw, Vector pdfPerCluster) throws IOException { - Map<Text, Text> props = Maps.newHashMap(); + Map<Text, Text> props = new HashMap<>(); if (emitMostLikely) { int maxValueIndex = pdfPerCluster.maxValueIndex(); WeightedPropertyVectorWritable weightedPropertyVectorWritable = @@ -238,7 +238,7 @@ public final class ClusterClassificationDriver extends AbstractJob { private static void writeAllAboveThreshold(List<Cluster> clusterModels, Double clusterClassificationThreshold, SequenceFile.Writer writer, VectorWritable vw, Vector pdfPerCluster) throws IOException { - Map<Text, Text> props = Maps.newHashMap(); + Map<Text, Text> props = new HashMap<>(); for (Element pdf : pdfPerCluster.nonZeroes()) { if (pdf.get() >= clusterClassificationThreshold) { WeightedPropertyVectorWritable wvw = new WeightedPropertyVectorWritable(pdf.get(), vw.get(), props); http://git-wip-us.apache.org/repos/asf/mahout/blob/85f9ece6/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java b/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java index 9edbd8e..dfddab0 100644 --- a/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java +++ b/mr/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java @@ -18,12 +18,12 @@ package org.apache.mahout.clustering.classify; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -67,7 +67,7 @@ public class ClusterClassificationMapper extends threshold = conf.getFloat(ClusterClassificationConfigKeys.OUTLIER_REMOVAL_THRESHOLD, 0.0f); emitMostLikely = conf.getBoolean(ClusterClassificationConfigKeys.EMIT_MOST_LIKELY, false); - clusterModels = Lists.newArrayList(); + clusterModels = new ArrayList<>(); if (clustersIn != null && !clustersIn.isEmpty()) { Path clustersInPath = new Path(clustersIn); @@ -128,13 +128,13 @@ public class ClusterClassificationMapper extends DistanceMeasure distanceMeasure = distanceMeasureCluster.getMeasure(); double distance = distanceMeasure.distance(cluster.getCenter(), vw.get()); - Map<Text, Text> props = Maps.newHashMap(); + Map<Text, Text> props = new HashMap<>(); props.put(new Text("distance"), new Text(Double.toString(distance))); context.write(clusterId, new WeightedPropertyVectorWritable(weight, vw.get(), props)); } public static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException { - List<Cluster> clusters = Lists.newArrayList(); + List<Cluster> clusters = new ArrayList<>(); FileSystem fileSystem = clusterOutputPath.getFileSystem(conf); FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter()); Iterator<?> it = new SequenceFileDirValueIterator<Writable>(
