Repository: mahout Updated Branches: refs/heads/master ec8a5a006 -> 87c15bea4
http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java b/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java index f36f0a0..91ee50e 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java +++ b/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java @@ -17,15 +17,19 @@ package org.apache.mahout.math.hadoop.stochasticsvd; +import com.google.common.base.Function; +import com.google.common.collect.Iterators; +import com.google.common.io.Closeables; import java.io.Closeable; import java.io.IOException; -import java.util.*; -import java.util.Arrays; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.Deque; +import java.util.Iterator; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; - -import com.google.common.base.Function; -import com.google.common.collect.Iterators; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -35,14 +39,19 @@ import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Writable; import org.apache.mahout.common.IOUtils; import org.apache.mahout.common.Pair; -import org.apache.mahout.common.iterator.sequencefile.*; -import org.apache.mahout.math.*; +import org.apache.mahout.common.iterator.sequencefile.PathFilters; +import org.apache.mahout.common.iterator.sequencefile.PathType; +import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterator; +import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterator; +import org.apache.mahout.math.DenseMatrix; +import org.apache.mahout.math.DenseSymmetricMatrix; +import org.apache.mahout.math.DenseVector; +import org.apache.mahout.math.Matrix; +import org.apache.mahout.math.UpperTriangular; import org.apache.mahout.math.Vector; +import org.apache.mahout.math.VectorWritable; import org.apache.mahout.math.function.Functions; -import com.google.common.collect.Lists; -import com.google.common.io.Closeables; - /** * set of small file manipulation helpers. */ @@ -59,7 +68,7 @@ public final class SSVDHelper { static Vector loadVector(Path glob, Configuration conf) throws IOException { SequenceFileDirValueIterator<VectorWritable> iter = - new SequenceFileDirValueIterator<VectorWritable>(glob, + new SequenceFileDirValueIterator<>(glob, PathType.GLOB, null, null, @@ -93,21 +102,18 @@ public final class SSVDHelper { Configuration conf) throws IOException { VectorWritable vw = new VectorWritable(v); FileSystem fs = FileSystem.get(conf); - SequenceFile.Writer w = - new SequenceFile.Writer(fs, - conf, - vectorFilePath, - IntWritable.class, - VectorWritable.class); - try { + try (SequenceFile.Writer w = new SequenceFile.Writer(fs, + conf, + vectorFilePath, + IntWritable.class, + VectorWritable.class)) { w.append(new IntWritable(), vw); - } finally { + } /* * this is a writer, no quiet close please. we must bail out on incomplete * close. */ - w.close(); - } + } /** @@ -124,7 +130,7 @@ public final class SSVDHelper { } FileStatus firstSeqFile; - if (fstats[0].isDir()) { + if (fstats[0].isDirectory()) { firstSeqFile = fs.listStatus(fstats[0].getPath(), PathFilters.logsCRCFilter())[0]; } else { firstSeqFile = fstats[0]; @@ -169,7 +175,7 @@ public final class SSVDHelper { Deque<Closeable> closeables) throws IOException { SequenceFileDirIterator<Writable, VectorWritable> ret = - new SequenceFileDirIterator<Writable, VectorWritable>(glob, + new SequenceFileDirIterator<>(glob, PathType.GLOB, PathFilters.logsCRCFilter(), PARTITION_COMPARATOR, @@ -195,9 +201,9 @@ public final class SSVDHelper { */ public static DenseMatrix drmLoadAsDense(FileSystem fs, Path glob, Configuration conf) throws IOException { - Deque<Closeable> closeables = new ArrayDeque<Closeable>(); + Deque<Closeable> closeables = new ArrayDeque<>(); try { - List<double[]> denseData = new ArrayList<double[]>(); + List<double[]> denseData = new ArrayList<>(); for (Iterator<Pair<Writable, Vector>> iter = drmIterator(fs, glob, conf, closeables); iter.hasNext(); ) { Pair<Writable, Vector> p = iter.next(); @@ -241,7 +247,7 @@ public final class SSVDHelper { throws IOException { SequenceFileDirValueIterator<VectorWritable> iter = - new SequenceFileDirValueIterator<VectorWritable>(glob, + new SequenceFileDirValueIterator<>(glob, PathType.GLOB, null, PARTITION_COMPARATOR, @@ -276,14 +282,12 @@ public final class SSVDHelper { * contain the matrix. */ - SequenceFileDirValueIterator<VectorWritable> iter = - new SequenceFileDirValueIterator<VectorWritable>(glob, - PathType.GLOB, - null, - null, - true, - conf); - try { + try (SequenceFileDirValueIterator<VectorWritable> iter = new SequenceFileDirValueIterator<>(glob, + PathType.GLOB, + null, + null, + true, + conf)) { if (!iter.hasNext()) { throw new IOException("No triangular matrices found"); } @@ -294,8 +298,6 @@ public final class SSVDHelper { } return result; - } finally { - iter.close(); } } http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java b/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java index ed0256d..94be450 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java +++ b/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java @@ -129,7 +129,6 @@ public final class SSVDSolver { * @param k desired rank * @param p SSVD oversampling parameter * @param reduceTasks Number of reduce tasks (where applicable) - * @throws IOException when IO condition occurs. */ public SSVDSolver(Configuration conf, Path[] inputPath, http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java b/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java index 7534e5a..a6db079 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java +++ b/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java @@ -32,7 +32,6 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; -import org.apache.mahout.math.DenseMatrix; import org.apache.mahout.math.DenseVector; import org.apache.mahout.math.Matrix; import org.apache.mahout.math.NamedVector; http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/QRFirstStep.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/QRFirstStep.java b/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/QRFirstStep.java index 239396e..8509e0a 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/QRFirstStep.java +++ b/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/QRFirstStep.java @@ -149,8 +149,7 @@ public class QRFirstStep implements Closeable, OutputCollector<Writable, Vector> value .setBlock(GivensThinSolver.computeQtHat(value.getBlock(), qCnt, - new CopyConstructorIterator<UpperTriangular>(rSubseq - .iterator()))); + new CopyConstructorIterator<>(rSubseq.iterator()))); if (qCnt == 1) { /* * just merge r[0] <- r[1] so it doesn't have to repeat in subsequent http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/QRLastStep.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/QRLastStep.java b/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/QRLastStep.java index 04429f1..545f1f9 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/QRLastStep.java +++ b/mrlegacy/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/QRLastStep.java @@ -58,7 +58,6 @@ public class QRLastStep implements Closeable, Iterator<Vector> { * all RHat outputs int the group in order of groups * @param blockNum * our RHat number in the group - * @throws IOException */ public QRLastStep(Iterator<DenseBlockWritable> qHatInput, Iterator<VectorWritable> rHatInput, @@ -92,8 +91,7 @@ public class QRLastStep implements Closeable, Iterator<Vector> { GivensThinSolver .computeQtHat(v.getBlock(), blockNum == 0 ? 0 : 1, - new CopyConstructorIterator<UpperTriangular>(mRs - .iterator())); + new CopyConstructorIterator<>(mRs.iterator())); r = mQt[0].length; kp = mQt.length; if (qRow == null) { http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java b/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java index 9014c89..51484c7 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java +++ b/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java @@ -73,7 +73,7 @@ public class BruteSearch extends UpdatableSearcher { // A priority queue of the best @limit elements, ordered from worst to best so that the worst // element is always on top and can easily be removed. PriorityQueue<WeightedThing<Integer>> bestNeighbors = - new PriorityQueue<WeightedThing<Integer>>(limit, Ordering.natural().reverse()); + new PriorityQueue<>(limit, Ordering.natural().reverse()); // The resulting list of weighted WeightedVectors (the weight is the distance from the query). List<WeightedThing<Vector>> results = Lists.newArrayListWithCapacity(limit); @@ -83,7 +83,7 @@ public class BruteSearch extends UpdatableSearcher { // Only add a new neighbor if the result is better than the worst element // in the queue or the queue isn't full. if (bestNeighbors.size() < limit || bestNeighbors.peek().getWeight() > distance) { - bestNeighbors.add(new WeightedThing<Integer>(rowNumber, distance)); + bestNeighbors.add(new WeightedThing<>(rowNumber, distance)); if (bestNeighbors.size() > limit) { bestNeighbors.poll(); } else { @@ -96,7 +96,7 @@ public class BruteSearch extends UpdatableSearcher { } for (int i = limit - 1; i >= 0; --i) { WeightedThing<Integer> neighbor = bestNeighbors.poll(); - results.set(i, new WeightedThing<Vector>( + results.set(i, new WeightedThing<>( referenceVectors.get(neighbor.getValue()), neighbor.getWeight())); } return results; @@ -124,7 +124,7 @@ public class BruteSearch extends UpdatableSearcher { bestVector = row; } } - return new WeightedThing<Vector>(bestVector, bestDistance); + return new WeightedThing<>(bestVector, bestDistance); } /** http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java b/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java index 1954bb7..006f4b6 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java +++ b/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java @@ -150,7 +150,7 @@ public class FastProjectionSearch extends UpdatableSearcher { List<WeightedThing<Vector>> top = Lists.newArrayListWithCapacity(candidates.size() + pendingAdditions.size()); for (Vector candidate : Iterables.concat(candidates, pendingAdditions)) { - top.add(new WeightedThing<Vector>(candidate, distanceMeasure.distance(candidate, query))); + top.add(new WeightedThing<>(candidate, distanceMeasure.distance(candidate, query))); } Collections.sort(top); @@ -205,7 +205,7 @@ public class FastProjectionSearch extends UpdatableSearcher { } } - return new WeightedThing<Vector>(bestVector, bestDistance); + return new WeightedThing<>(bestVector, bestDistance); } @Override @@ -219,7 +219,7 @@ public class FastProjectionSearch extends UpdatableSearcher { Vector projection = basisMatrix.times(vector); for (int i = 0; i < basisMatrix.numRows(); ++i) { List<WeightedThing<Vector>> currProjections = scalarProjections.get(i); - WeightedThing<Vector> searchedThing = new WeightedThing<Vector>(projection.get(i)); + WeightedThing<Vector> searchedThing = new WeightedThing<>(projection.get(i)); int middle = Collections.binarySearch(currProjections, searchedThing); if (middle < 0) { isProjected = false; @@ -264,7 +264,7 @@ public class FastProjectionSearch extends UpdatableSearcher { for (Vector pending : pendingAdditions) { Vector projection = basisMatrix.times(pending); for (int i = 0; i < numProjections; ++i) { - scalarProjections.get(i).add(new WeightedThing<Vector>(pending, projection.get(i))); + scalarProjections.get(i).add(new WeightedThing<>(pending, projection.get(i))); } } pendingAdditions.clear(); http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/LocalitySensitiveHashSearch.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/LocalitySensitiveHashSearch.java b/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/LocalitySensitiveHashSearch.java index cd69b0b..aa1f103 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/LocalitySensitiveHashSearch.java +++ b/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/LocalitySensitiveHashSearch.java @@ -191,7 +191,7 @@ public class LocalitySensitiveHashSearch extends UpdatableSearcher { List<WeightedThing<Vector>> results = Lists.newArrayListWithExpectedSize(top.size()); while (top.size() != 0) { WeightedThing<Vector> wv = top.pop(); - results.add(new WeightedThing<Vector>(((HashedVector) wv.getValue()).getVector(), wv.getWeight())); + results.add(new WeightedThing<>(((HashedVector) wv.getValue()).getVector(), wv.getWeight())); } Collections.reverse(results); if (limit < results.size()) { @@ -235,7 +235,7 @@ public class LocalitySensitiveHashSearch extends UpdatableSearcher { } protected static WeightedThing<Vector> removeHash(WeightedThing<Vector> input) { - return new WeightedThing<Vector>(((HashedVector) input.getValue()).getVector(), input.getWeight()); + return new WeightedThing<>(((HashedVector) input.getValue()).getVector(), input.getWeight()); } @Override http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java b/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java index 651a548..61a9f56 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java +++ b/mrlegacy/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java @@ -96,7 +96,7 @@ public class ProjectionSearch extends UpdatableSearcher { // Add the the new vector and the projected distance to each set separately. int i = 0; for (TreeMultiset<WeightedThing<Vector>> s : scalarProjections) { - s.add(new WeightedThing<Vector>(vector, projection.get(i++))); + s.add(new WeightedThing<>(vector, projection.get(i++))); } int numVectors = scalarProjections.get(0).size(); for (TreeMultiset<WeightedThing<Vector>> s : scalarProjections) { @@ -138,7 +138,7 @@ public class ProjectionSearch extends UpdatableSearcher { Iterator<? extends Vector> projections = basisMatrix.iterator(); for (TreeMultiset<WeightedThing<Vector>> v : scalarProjections) { Vector basisVector = projections.next(); - WeightedThing<Vector> projectedQuery = new WeightedThing<Vector>(query, + WeightedThing<Vector> projectedQuery = new WeightedThing<>(query, query.dot(basisVector)); for (WeightedThing<Vector> candidate : Iterables.concat( Iterables.limit(v.tailMultiset(projectedQuery, BoundType.CLOSED), searchSize), @@ -151,7 +151,7 @@ public class ProjectionSearch extends UpdatableSearcher { // this is probably just as fast as a priority queue here. List<WeightedThing<Vector>> top = Lists.newArrayList(); for (Vector candidate : candidates) { - top.add(new WeightedThing<Vector>(candidate, distanceMeasure.distance(query, candidate))); + top.add(new WeightedThing<>(candidate, distanceMeasure.distance(query, candidate))); } Collections.sort(top); return top.subList(0, Math.min(limit, top.size())); @@ -176,7 +176,7 @@ public class ProjectionSearch extends UpdatableSearcher { Iterator<? extends Vector> projections = basisMatrix.iterator(); for (TreeMultiset<WeightedThing<Vector>> v : scalarProjections) { Vector basisVector = projections.next(); - WeightedThing<Vector> projectedQuery = new WeightedThing<Vector>(query, query.dot(basisVector)); + WeightedThing<Vector> projectedQuery = new WeightedThing<>(query, query.dot(basisVector)); for (WeightedThing<Vector> candidate : Iterables.concat( Iterables.limit(v.tailMultiset(projectedQuery, BoundType.CLOSED), searchSize), Iterables.limit(v.headMultiset(projectedQuery, BoundType.OPEN).descendingMultiset(), searchSize))) { @@ -188,7 +188,7 @@ public class ProjectionSearch extends UpdatableSearcher { } } - return new WeightedThing<Vector>(bestVector, bestDistance); + return new WeightedThing<>(bestVector, bestDistance); } @Override @@ -211,7 +211,7 @@ public class ProjectionSearch extends UpdatableSearcher { if (toRemove.getWeight() < epsilon) { Iterator<? extends Vector> basisVectors = basisMatrix.iterator(); for (TreeMultiset<WeightedThing<Vector>> projection : scalarProjections) { - if (!projection.remove(new WeightedThing<Vector>(vector, vector.dot(basisVectors.next())))) { + if (!projection.remove(new WeightedThing<>(vector, vector.dot(basisVectors.next())))) { throw new RuntimeException("Internal inconsistency in ProjectionSearch"); } } http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/math/random/RandomProjector.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/math/random/RandomProjector.java b/mrlegacy/src/main/java/org/apache/mahout/math/random/RandomProjector.java index b3d3017..79fe4b6 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/math/random/RandomProjector.java +++ b/mrlegacy/src/main/java/org/apache/mahout/math/random/RandomProjector.java @@ -97,7 +97,7 @@ public final class RandomProjector { */ public static Matrix generateBasisZeroPlusMinusOne(int projectedVectorSize, int vectorSize) { Matrix basisMatrix = new DenseMatrix(projectedVectorSize, vectorSize); - Multinomial<Double> choice = new Multinomial<Double>(); + Multinomial<Double> choice = new Multinomial<>(); choice.add(0.0, 2 / 3.0); choice.add(Math.sqrt(3.0), 1 / 6.0); choice.add(-Math.sqrt(3.0), 1 / 6.0); http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvd.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvd.java b/mrlegacy/src/main/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvd.java index 7c08dca..f7724f7 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvd.java +++ b/mrlegacy/src/main/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvd.java @@ -122,11 +122,8 @@ public class SequentialOutOfCoreSvd { // step 1, compute R as in R'R = Y'Y where Y = A \Omega for (File file : partsOfA) { MatrixWritable m = new MatrixWritable(); - DataInputStream in = new DataInputStream(new FileInputStream(file)); - try { + try (DataInputStream in = new DataInputStream(new FileInputStream(file))) { m.readFields(in); - } finally { - in.close(); } Matrix aI = m.get(); @@ -145,11 +142,8 @@ public class SequentialOutOfCoreSvd { int ncols = 0; for (File file : partsOfA) { MatrixWritable m = new MatrixWritable(); - DataInputStream in = new DataInputStream(new FileInputStream(file)); - try { + try (DataInputStream in = new DataInputStream(new FileInputStream(file))) { m.readFields(in); - } finally { - in.close(); } Matrix aI = m.get(); ncols = Math.max(ncols, aI.columnSize()); @@ -168,11 +162,8 @@ public class SequentialOutOfCoreSvd { MatrixWritable bTmp = new MatrixWritable(); for (int j = 0; j < ncols; j += columnsPerSlice) { if (bFile(tmpDir, j).exists()) { - DataInputStream in = new DataInputStream(new FileInputStream(bFile(tmpDir, j))); - try { + try (DataInputStream in = new DataInputStream(new FileInputStream(bFile(tmpDir, j)))) { bTmp.readFields(in); - } finally { - in.close(); } b2.assign(bTmp.get().times(bTmp.get().transpose()), Functions.PLUS); @@ -188,19 +179,13 @@ public class SequentialOutOfCoreSvd { File bPath = bFile(tmpDir, j); if (bPath.exists()) { MatrixWritable m = new MatrixWritable(); - DataInputStream in = new DataInputStream(new FileInputStream(bPath)); - try { + try (DataInputStream in = new DataInputStream(new FileInputStream(bPath))) { m.readFields(in); - } finally { - in.close(); } m.set(l2.solveRight(m.get().transpose()).times(svd.getV())); - DataOutputStream out = new DataOutputStream(new FileOutputStream( - new File(tmpDir, String.format("V-%s", bPath.getName().replaceAll(".*-", ""))))); - try { + try (DataOutputStream out = new DataOutputStream(new FileOutputStream( + new File(tmpDir, String.format("V-%s", bPath.getName().replaceAll(".*-", "")))))) { m.write(out); - } finally { - out.close(); } } } @@ -216,12 +201,9 @@ public class SequentialOutOfCoreSvd { Matrix y = aI.times(new RandomTrinaryMatrix(seed, aI.numCols(), dim, false)); Matrix uI = r2.solveRight(y).times(svd.getU()); m.set(uI); - DataOutputStream out = new DataOutputStream(new FileOutputStream( - new File(tmpDir, String.format("U-%s", file.getName().replaceAll(".*-", ""))))); - try { + try (DataOutputStream out = new DataOutputStream(new FileOutputStream( + new File(tmpDir, String.format("U-%s", file.getName().replaceAll(".*-", "")))))) { m.write(out); - } finally { - out.close(); } } } @@ -229,21 +211,15 @@ public class SequentialOutOfCoreSvd { private static void addToSavedCopy(File file, Matrix matrix) throws IOException { MatrixWritable mw = new MatrixWritable(); if (file.exists()) { - DataInputStream in = new DataInputStream(new FileInputStream(file)); - try { + try (DataInputStream in = new DataInputStream(new FileInputStream(file))) { mw.readFields(in); - } finally { - in.close(); } mw.get().assign(matrix, Functions.PLUS); } else { mw.set(matrix); } - DataOutputStream out = new DataOutputStream(new FileOutputStream(file)); - try { + try (DataOutputStream out = new DataOutputStream(new FileOutputStream(file))) { mw.write(out); - } finally { - out.close(); } } http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java b/mrlegacy/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java index 0fb8fea..8a1f8f8 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java +++ b/mrlegacy/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java @@ -235,7 +235,7 @@ public final class DictionaryVectorizer extends AbstractJob implements Vectorize Path filesPattern = new Path(wordCountPath, OUTPUT_FILES_PATTERN); int i = 0; for (Pair<Writable,Writable> record - : new SequenceFileDirIterable<Writable,Writable>(filesPattern, PathType.GLOB, null, null, true, conf)) { + : new SequenceFileDirIterable<>(filesPattern, PathType.GLOB, null, null, true, conf)) { if (currentChunkSize > chunkSizeLimit) { Closeables.close(dictWriter, false); chunkIndex++; http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapper.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapper.java b/mrlegacy/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapper.java index 9f9714e..81f7ee4 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapper.java +++ b/mrlegacy/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapper.java @@ -96,8 +96,8 @@ public class CollocMapper extends Mapper<Text, StringTuple, GramKey, Gram> { int count = 0; // ngram count OpenObjectIntHashMap<String> ngrams = - new OpenObjectIntHashMap<String>(value.getEntries().size() * (maxShingleSize - 1)); - OpenObjectIntHashMap<String> unigrams = new OpenObjectIntHashMap<String>(value.getEntries().size()); + new OpenObjectIntHashMap<>(value.getEntries().size() * (maxShingleSize - 1)); + OpenObjectIntHashMap<String> unigrams = new OpenObjectIntHashMap<>(value.getEntries().size()); do { String term = sf.getAttribute(CharTermAttribute.class).toString(); @@ -136,9 +136,7 @@ public class CollocMapper extends Mapper<Text, StringTuple, GramKey, Gram> { gramKey.set(tail, ngram.getBytes()); context.write(gramKey, ngram); - } catch (IOException e) { - throw new IllegalStateException(e); - } catch (InterruptedException e) { + } catch (IOException | InterruptedException e) { throw new IllegalStateException(e); } } @@ -153,9 +151,7 @@ public class CollocMapper extends Mapper<Text, StringTuple, GramKey, Gram> { Gram unigram = new Gram(term, frequency, Gram.Type.UNIGRAM); gramKey.set(unigram, EMPTY); context.write(gramKey, unigram); - } catch (IOException e) { - throw new IllegalStateException(e); - } catch (InterruptedException e) { + } catch (IOException | InterruptedException e) { throw new IllegalStateException(e); } return true; http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java b/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java index 4052fd8..0b350c6 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java +++ b/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java @@ -17,10 +17,10 @@ package org.apache.mahout.vectorizer.encoders; -import com.google.common.base.Charsets; -import org.apache.mahout.math.map.OpenIntIntHashMap; +import java.util.Arrays; import com.google.common.base.Preconditions; +import org.apache.mahout.math.map.OpenIntIntHashMap; public class CachingContinuousValueEncoder extends ContinuousValueEncoder { private final int dataSize; @@ -53,7 +53,7 @@ public class CachingContinuousValueEncoder extends ContinuousValueEncoder { protected int hashForProbe(byte[] originalForm, int dataSize, String name, int probe) { Preconditions.checkArgument(dataSize == this.dataSize, "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + ']'); - int originalHashcode = originalForm.hashCode(); + int originalHashcode = Arrays.hashCode(originalForm); if (caches[probe].containsKey(originalHashcode)) { return caches[probe].get(originalHashcode); } http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java b/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java index cf8ea37..258ff84 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java +++ b/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java @@ -17,8 +17,10 @@ package org.apache.mahout.vectorizer.encoders; -import org.apache.mahout.math.map.OpenIntIntHashMap; +import java.util.Arrays; + import com.google.common.base.Preconditions; +import org.apache.mahout.math.map.OpenIntIntHashMap; public class CachingStaticWordValueEncoder extends StaticWordValueEncoder { @@ -52,7 +54,7 @@ public class CachingStaticWordValueEncoder extends StaticWordValueEncoder { protected int hashForProbe(byte[] originalForm, int dataSize, String name, int probe) { Preconditions.checkArgument(dataSize == this.dataSize, "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + ']'); - int originalHashcode = originalForm.hashCode(); + int originalHashcode = Arrays.hashCode(originalForm); if (caches[probe].containsKey(originalHashcode)) { return caches[probe].get(originalHashcode); } http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java b/mrlegacy/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java index e8b24e1..a6fda58 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java +++ b/mrlegacy/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java @@ -50,7 +50,7 @@ import java.util.Iterator; */ public class TFPartialVectorReducer extends Reducer<Text, StringTuple, Text, VectorWritable> { - private final OpenObjectIntHashMap<String> dictionary = new OpenObjectIntHashMap<String>(); + private final OpenObjectIntHashMap<String> dictionary = new OpenObjectIntHashMap<>(); private int dimension; private boolean sequentialAccess; http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/vectorizer/term/TermCountMapper.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/vectorizer/term/TermCountMapper.java b/mrlegacy/src/main/java/org/apache/mahout/vectorizer/term/TermCountMapper.java index 7073a85..9af3d57 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/vectorizer/term/TermCountMapper.java +++ b/mrlegacy/src/main/java/org/apache/mahout/vectorizer/term/TermCountMapper.java @@ -33,7 +33,7 @@ public class TermCountMapper extends Mapper<Text, StringTuple, Text, LongWritabl @Override protected void map(Text key, StringTuple value, final Context context) throws IOException, InterruptedException { - OpenObjectLongHashMap<String> wordCount = new OpenObjectLongHashMap<String>(); + OpenObjectLongHashMap<String> wordCount = new OpenObjectLongHashMap<>(); for (String word : value.getEntries()) { if (wordCount.containsKey(word)) { wordCount.put(word, wordCount.get(word) + 1); http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/mrlegacy/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFConverter.java ---------------------------------------------------------------------- diff --git a/mrlegacy/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFConverter.java b/mrlegacy/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFConverter.java index 21d2418..5f9d666 100644 --- a/mrlegacy/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFConverter.java +++ b/mrlegacy/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFConverter.java @@ -245,7 +245,7 @@ public final class TFIDFConverter { } featureCount++; Long[] counts = {featureCount, vectorCount}; - return new Pair<Long[], List<Path>>(counts, chunkPaths); + return new Pair<>(counts, chunkPaths); } finally { Closeables.close(freqWriter, false); } http://git-wip-us.apache.org/repos/asf/mahout/blob/87c15bea/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 4f5fe88..93afd14 100644 --- a/pom.xml +++ b/pom.xml @@ -95,8 +95,8 @@ </mailingLists> <properties> <skipTests>false</skipTests> - <maven.compiler.source>1.6</maven.compiler.source> - <maven.compiler.target>1.6</maven.compiler.target> + <maven.compiler.source>1.7</maven.compiler.source> + <maven.compiler.target>1.7</maven.compiler.target> <maven.clover.multiproject>true</maven.clover.multiproject> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <msurefire.version>2.16</msurefire.version> @@ -104,11 +104,11 @@ <mcheckstyle.version>2.10</mcheckstyle.version> <mfindbugs.version>2.5.2</mfindbugs.version> <mjavadoc.version>2.9.1</mjavadoc.version> - <hadoop.version>2.2.0</hadoop.version> + <hadoop.version>2.6.0</hadoop.version> <hadoop.classifier>hadoop2</hadoop.classifier> <hbase.version>0.98.0-${hadoop.classifier}</hbase.version> <lucene.version>4.6.1</lucene.version> - <slf4j.version>1.7.5</slf4j.version> + <slf4j.version>1.7.10</slf4j.version> <scala.major>2.10</scala.major> <scala.version>2.10.4</scala.version> <spark.version>1.1.1</spark.version>
